Repository: k2-fsa/sherpa-onnx
Branch: master
Commit: e0ab4a8beb10
Files: 4274
Total size: 12.5 MB

Directory structure:
gitextract_zccx8fk8/

├── .clang-format
├── .clang-tidy
├── .flake8
├── .github/
│   ├── scripts/
│   │   ├── .gitignore
│   │   ├── as-cmake-sub-project/
│   │   │   ├── CMakeLists.txt
│   │   │   └── main.cc
│   │   ├── export-ascend/
│   │   │   ├── __init__.py
│   │   │   ├── generate_paraformer.py
│   │   │   ├── generate_sense_voice.py
│   │   │   ├── generate_whisper.py
│   │   │   └── generate_zipformer_ctc_20250703.py
│   │   ├── export-qnn/
│   │   │   ├── __init__.py
│   │   │   ├── generate_paraformer.py
│   │   │   ├── generate_sense_voice.py
│   │   │   └── generate_zipformer.py
│   │   ├── node-addon/
│   │   │   ├── README-optional.md
│   │   │   ├── README.md
│   │   │   ├── index.js
│   │   │   ├── notes.md
│   │   │   ├── package-optional.json
│   │   │   └── package.json
│   │   ├── test-audio-tagging.sh
│   │   ├── test-c-api.sh
│   │   ├── test-cxx-api.sh
│   │   ├── test-dart.sh
│   │   ├── test-dot-net.sh
│   │   ├── test-kws.sh
│   │   ├── test-nodejs-addon-npm.sh
│   │   ├── test-nodejs-npm.sh
│   │   ├── test-offline-ctc.sh
│   │   ├── test-offline-fire-red-asr.sh
│   │   ├── test-offline-moonshine.sh
│   │   ├── test-offline-punctuation.sh
│   │   ├── test-offline-source-separation.sh
│   │   ├── test-offline-speech-denoiser.sh
│   │   ├── test-offline-transducer.sh
│   │   ├── test-offline-tts.sh
│   │   ├── test-offline-whisper.sh
│   │   ├── test-online-ctc.sh
│   │   ├── test-online-paraformer.sh
│   │   ├── test-online-punctuation.sh
│   │   ├── test-online-transducer.sh
│   │   ├── test-python.sh
│   │   ├── test-rust.sh
│   │   ├── test-speaker-diarization.sh
│   │   ├── test-speaker-recognition-python.sh
│   │   ├── test-spoken-language-identification.sh
│   │   └── test-swift.sh
│   └── workflows/
│       ├── .gitignore
│       ├── aarch64-linux-gnu-shared.yaml
│       ├── aarch64-linux-gnu-static.yaml
│       ├── add-new-asr-models.yaml
│       ├── android-rknn.yaml
│       ├── android-static.yaml
│       ├── android.yaml
│       ├── apk-asr-2pass.yaml
│       ├── apk-asr.yaml
│       ├── apk-audio-tagging-wearos.yaml
│       ├── apk-audio-tagging.yaml
│       ├── apk-kws.yaml
│       ├── apk-qnn-vad-asr-simulated-streaming.yaml
│       ├── apk-speaker-diarization.yaml
│       ├── apk-speaker-identification.yaml
│       ├── apk-spoken-language-identification.yaml
│       ├── apk-tts-engine.yaml
│       ├── apk-tts.yaml
│       ├── apk-vad-asr-simulated-streaming.yaml
│       ├── apk-vad-asr.yaml
│       ├── apk-vad.yaml
│       ├── arm-linux-gnueabihf.yaml
│       ├── as_cmake_sub_project.yaml
│       ├── ascend.yaml
│       ├── axcl-linux-aarch64.yaml
│       ├── axera-linux-aarch64.yaml
│       ├── build-wheels-aarch64-cuda.yaml
│       ├── build-wheels-aarch64-rknn.yaml
│       ├── build-wheels-aarch64.yaml
│       ├── build-wheels-armv7l.yaml
│       ├── build-wheels-linux-cuda.yaml
│       ├── build-wheels-linux.yaml
│       ├── build-wheels-macos-arm64.yaml
│       ├── build-wheels-macos-universal2.yaml
│       ├── build-wheels-macos-x64.yaml
│       ├── build-wheels-win32.yaml
│       ├── build-wheels-win64-cuda.yaml
│       ├── build-wheels-win64.yaml
│       ├── build-xcframework.yaml
│       ├── c-api-from-buffer.yaml
│       ├── c-api.yaml
│       ├── checksum.yaml
│       ├── clang-tidy.yaml
│       ├── cxx-api.yaml
│       ├── dot-net.yaml
│       ├── export-3dspeaker-to-onnx.yaml
│       ├── export-ced-to-onnx.yaml
│       ├── export-dophin-ctc-to-onnx.yaml
│       ├── export-fire-red-asr.yaml
│       ├── export-gtcrn.yaml
│       ├── export-kitten.yaml
│       ├── export-kokoro.yaml
│       ├── export-libriheavy.yaml
│       ├── export-matcha-fa-en.yaml
│       ├── export-matcha-zh-en.yaml
│       ├── export-medasr-ctc-to-onnx.yaml
│       ├── export-melo-tts-to-onnx.yaml
│       ├── export-moonshine-to-onnx.yaml
│       ├── export-nemo-canary-180m-flash.yaml
│       ├── export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml
│       ├── export-nemo-fast-conformer-hybrid-transducer-ctc.yaml
│       ├── export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml
│       ├── export-nemo-fast-conformer-hybrid-transducer-transducer.yaml
│       ├── export-nemo-giga-am-to-onnx.yaml
│       ├── export-nemo-parakeet-tdt-0.6b-v2.yaml
│       ├── export-nemo-parakeet-tdt.yaml
│       ├── export-nemo-speaker-verification-to-onnx.yaml
│       ├── export-nemotron-speech-streaming-en-0.6b.yaml
│       ├── export-omnilingual-asr-to-onnx.yaml
│       ├── export-paraformer-to-ascend-npu.yaml
│       ├── export-paraformer-to-qnn.yaml
│       ├── export-paraformer-to-rknn.yaml
│       ├── export-peng-cheng-starling.yaml
│       ├── export-piper.yaml
│       ├── export-pocket-tts.yaml
│       ├── export-pyannote-segmentation-to-onnx.yaml
│       ├── export-revai-segmentation-to-onnx.yaml
│       ├── export-russian-onnx-models.yaml
│       ├── export-sense-voice-to-ascend-npu.yaml
│       ├── export-sense-voice-to-onnx.yaml
│       ├── export-sense-voice-to-qnn.yaml
│       ├── export-sense-voice-to-rknn.yaml
│       ├── export-silero-vad-rknn.yaml
│       ├── export-spleeter-to-onnx.yaml
│       ├── export-supertonic.yaml
│       ├── export-t-one-to-onnx.yaml
│       ├── export-telespeech-ctc.yaml
│       ├── export-uvr-to-onnx.yaml
│       ├── export-vits-ljspeech-to-onnx.yaml
│       ├── export-vocos.yaml
│       ├── export-wenet-to-onnx.yaml
│       ├── export-wespeaker-to-onnx.yaml
│       ├── export-whisper-to-ascend-npu.yaml
│       ├── export-whisper-to-onnx.yaml
│       ├── export-zipformer-ctc-to-ascend-20250703.yaml
│       ├── export-zipformer-ctc-to-qnn-20250703.yaml
│       ├── flutter-android.yaml
│       ├── flutter-linux.yaml
│       ├── flutter-macos.yaml
│       ├── flutter-windows-x64.yaml
│       ├── generate-tts-samples.yaml
│       ├── hap-vad-asr.yaml
│       ├── har.yaml
│       ├── harmony-os.yaml
│       ├── jar.yaml
│       ├── jni.yaml
│       ├── lazarus.yaml
│       ├── linux-gpu.yaml
│       ├── linux-jni-aarch64.yaml
│       ├── linux-jni.yaml
│       ├── linux.yaml
│       ├── macos-jni.yaml
│       ├── macos.yaml
│       ├── mfc.yaml
│       ├── mobile-asr-models.yaml
│       ├── mobile-kws-models.yaml
│       ├── nightly-wheel-arm.yaml
│       ├── npm-addon-linux-aarch64.yaml
│       ├── npm-addon-linux-x64.yaml
│       ├── npm-addon-macos.yaml
│       ├── npm-addon-win-x64.yaml
│       ├── npm-addon-win-x86.yaml
│       ├── npm-addon.yaml
│       ├── npm.yaml
│       ├── pascal.yaml
│       ├── pkg-config.yaml
│       ├── release-dart-package.yaml
│       ├── release-go.yaml
│       ├── release-rust.yaml
│       ├── riscv64-linux.yaml
│       ├── riscv64-spacemit-linux.yaml
│       ├── rknn-linux-aarch64.yaml
│       ├── run-java-test.yaml
│       ├── run-python-test-macos.yaml
│       ├── run-python-test.yaml
│       ├── sanitizer.yaml
│       ├── speaker-diarization.yaml
│       ├── style_check.yaml
│       ├── swift.yaml
│       ├── test-build-wheel.yaml
│       ├── test-dart-package.yaml
│       ├── test-dart.yaml
│       ├── test-dot-net-nuget.yaml
│       ├── test-dot-net.yaml
│       ├── test-go-package.yaml
│       ├── test-go.yaml
│       ├── test-nodejs-addon-api.yaml
│       ├── test-nodejs-addon-npm-aarch64.yaml
│       ├── test-nodejs-addon-npm-win-x86.yaml
│       ├── test-nodejs-addon-npm.yaml
│       ├── test-nodejs-npm.yaml
│       ├── test-nodejs.yaml
│       ├── test-onnxruntime-version.yaml
│       ├── test-pip-install.yaml
│       ├── test-piper-phonemize.yaml
│       ├── test-python-offline-websocket-server.yaml
│       ├── test-python-online-websocket-server.yaml
│       ├── test-rust-package.yaml
│       ├── test-rust.yaml
│       ├── upload-models.yaml
│       ├── upload-zipvoice-models.yaml
│       ├── wasm-simd-hf-space-en-asr-zipformer.yaml
│       ├── wasm-simd-hf-space-silero-vad.yaml
│       ├── wasm-simd-hf-space-speaker-diarization.yaml
│       ├── wasm-simd-hf-space-speech-enhancement-gtcrn.yaml
│       ├── wasm-simd-hf-space-ten-vad.yaml
│       ├── wasm-simd-hf-space-tts.yaml
│       ├── wasm-simd-hf-space-vad-asr.yaml
│       ├── wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
│       ├── wasm-simd-hf-space-zh-en-asr-paraformer.yaml
│       ├── wasm-simd-hf-space-zh-en-asr-zipformer.yaml
│       ├── windows-arm64.yaml
│       ├── windows-x64-cuda.yaml
│       ├── windows-x64-jni.yaml
│       ├── windows-x64.yaml
│       └── windows-x86.yaml
├── .gitignore
├── CHANGELOG.md
├── CMakeLists.txt
├── CPPLINT.cfg
├── LICENSE
├── MANIFEST.in
├── README.md
├── android/
│   ├── .gitignore
│   ├── README.md
│   ├── SherpaOnnx/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── MainActivity.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── .gitignore
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitkeep
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── layout/
│   │   │       │       │   └── activity_main.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       ├── values-night/
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── ExampleUnitTest.kt
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   ├── SherpaOnnx2Pass/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── .gitignore
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── MainActivity.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── .gitkeep
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitkeep
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── layout/
│   │   │       │       │   └── activity_main.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       ├── values-night/
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── ExampleUnitTest.kt
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   ├── SherpaOnnxAar/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   ├── libs.versions.toml
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   ├── settings.gradle.kts
│   │   └── sherpa_onnx/
│   │       ├── .gitignore
│   │       ├── build.gradle.kts
│   │       ├── consumer-rules.pro
│   │       ├── proguard-rules.pro
│   │       └── src/
│   │           ├── androidTest/
│   │           │   └── java/
│   │           │       └── com/
│   │           │           └── k2fsa/
│   │           │               └── sherpa/
│   │           │                   └── onnx/
│   │           │                       └── ExampleInstrumentedTest.kt
│   │           ├── main/
│   │           │   ├── AndroidManifest.xml
│   │           │   └── jniLibs/
│   │           │       ├── arm64-v8a/
│   │           │       │   └── .gitkeep
│   │           │       ├── armeabi-v7a/
│   │           │       │   └── .gitkeep
│   │           │       ├── x86/
│   │           │       │   └── .gitkeep
│   │           │       └── x86_64/
│   │           │           └── .gitkeep
│   │           └── test/
│   │               └── java/
│   │                   └── com/
│   │                       └── k2fsa/
│   │                           └── sherpa/
│   │                               └── onnx/
│   │                                   └── ExampleUnitTest.kt
│   ├── SherpaOnnxAudioTagging/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── audio/
│   │   │       │                           └── tagging/
│   │   │       │                               └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitignore
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── audio/
│   │   │       │   │                       └── tagging/
│   │   │       │   │                           ├── Home.kt
│   │   │       │   │                           ├── MainActivity.kt
│   │   │       │   │                           ├── Tagger.kt
│   │   │       │   │                           └── ui/
│   │   │       │   │                               └── theme/
│   │   │       │   │                                   ├── Color.kt
│   │   │       │   │                                   ├── Theme.kt
│   │   │       │   │                                   └── Type.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitignore
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── audio/
│   │   │                                   └── tagging/
│   │   │                                       └── ExampleUnitTest.kt
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle.kts
│   ├── SherpaOnnxAudioTaggingWearOs/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── lint.xml
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       └── main/
│   │   │           ├── AndroidManifest.xml
│   │   │           ├── assets/
│   │   │           │   └── .gitignore
│   │   │           ├── java/
│   │   │           │   └── com/
│   │   │           │       └── k2fsa/
│   │   │           │           └── sherpa/
│   │   │           │               └── onnx/
│   │   │           │                   └── audio/
│   │   │           │                       └── tagging/
│   │   │           │                           └── wear/
│   │   │           │                               └── os/
│   │   │           │                                   └── presentation/
│   │   │           │                                       ├── HomeScreen.kt
│   │   │           │                                       ├── MainActivity.kt
│   │   │           │                                       └── theme/
│   │   │           │                                           └── Theme.kt
│   │   │           ├── jniLibs/
│   │   │           │   ├── arm64-v8a/
│   │   │           │   │   └── .gitignore
│   │   │           │   ├── armeabi-v7a/
│   │   │           │   │   └── .gitignore
│   │   │           │   ├── x86/
│   │   │           │   │   └── .gitignore
│   │   │           │   └── x86_64/
│   │   │           │       └── .gitignore
│   │   │           └── res/
│   │   │               ├── drawable/
│   │   │               │   └── splash_icon.xml
│   │   │               ├── values/
│   │   │               │   ├── strings.xml
│   │   │               │   └── styles.xml
│   │   │               └── values-round/
│   │   │                   └── strings.xml
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle.kts
│   ├── SherpaOnnxJavaDemo/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       └── main/
│   │   │           ├── AndroidManifest.xml
│   │   │           ├── assets/
│   │   │           │   └── .gitkeep
│   │   │           ├── java/
│   │   │           │   └── com/
│   │   │           │       └── k2fsa/
│   │   │           │           └── sherpa/
│   │   │           │               └── onnx/
│   │   │           │                   ├── AppViewModel.java
│   │   │           │                   ├── Application.java
│   │   │           │                   ├── MainActivity.java
│   │   │           │                   └── service/
│   │   │           │                       └── SpeechSherpaRecognitionService.java
│   │   │           └── res/
│   │   │               ├── drawable/
│   │   │               │   ├── ic_bg_mic_24.xml
│   │   │               │   └── ic_launcher_background.xml
│   │   │               ├── drawable-v24/
│   │   │               │   └── ic_launcher_foreground.xml
│   │   │               ├── layout/
│   │   │               │   └── activity_main.xml
│   │   │               ├── mipmap-anydpi-v26/
│   │   │               │   ├── ic_launcher.xml
│   │   │               │   └── ic_launcher_round.xml
│   │   │               ├── values/
│   │   │               │   ├── colors.xml
│   │   │               │   ├── strings.xml
│   │   │               │   └── themes.xml
│   │   │               ├── values-night/
│   │   │               │   └── themes.xml
│   │   │               └── xml/
│   │   │                   ├── backup_rules.xml
│   │   │                   └── data_extraction_rules.xml
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   ├── SherpaOnnxKws/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── MainActivity.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── .gitignore
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitkeep
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── layout/
│   │   │       │       │   └── activity_main.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       ├── values-night/
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── ExampleUnitTest.kt
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   ├── SherpaOnnxSimulateStreamingAsr/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── simulate/
│   │   │       │                           └── streaming/
│   │   │       │                               └── asr/
│   │   │       │                                   └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── simulate/
│   │   │       │   │                       └── streaming/
│   │   │       │   │                           └── asr/
│   │   │       │   │                               ├── BarItem.kt
│   │   │       │   │                               ├── MainActivity.kt
│   │   │       │   │                               ├── NavBarItems.kt
│   │   │       │   │                               ├── NavRoutes.kt
│   │   │       │   │                               ├── SimulateStreamingAsr.kt
│   │   │       │   │                               ├── screens/
│   │   │       │   │                               │   ├── Help.kt
│   │   │       │   │                               │   └── Home.kt
│   │   │       │   │                               └── ui/
│   │   │       │   │                                   └── theme/
│   │   │       │   │                                       ├── Color.kt
│   │   │       │   │                                       ├── Theme.kt
│   │   │       │   │                                       └── Type.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitkeep
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── simulate/
│   │   │                                   └── streaming/
│   │   │                                       └── asr/
│   │   │                                           └── ExampleUnitTest.kt
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   ├── libs.versions.toml
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle.kts
│   ├── SherpaOnnxSimulateStreamingAsrWearOs/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── lint.xml
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       └── main/
│   │   │           ├── AndroidManifest.xml
│   │   │           ├── assets/
│   │   │           │   └── .gitignore
│   │   │           ├── java/
│   │   │           │   └── com/
│   │   │           │       └── k2fsa/
│   │   │           │           └── sherpa/
│   │   │           │               └── onnx/
│   │   │           │                   └── simulate/
│   │   │           │                       └── streaming/
│   │   │           │                           └── asr/
│   │   │           │                               └── wear/
│   │   │           │                                   └── os/
│   │   │           │                                       └── presentation/
│   │   │           │                                           ├── HomeScreen.kt
│   │   │           │                                           ├── MainActivity.kt
│   │   │           │                                           ├── SimulateStreamingAsr.kt
│   │   │           │                                           └── theme/
│   │   │           │                                               └── Theme.kt
│   │   │           └── res/
│   │   │               ├── drawable/
│   │   │               │   └── splash_icon.xml
│   │   │               ├── values/
│   │   │               │   ├── strings.xml
│   │   │               │   └── styles.xml
│   │   │               └── values-round/
│   │   │                   └── strings.xml
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   ├── libs.versions.toml
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle.kts
│   ├── SherpaOnnxSpeakerDiarization/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── speaker/
│   │   │       │                           └── diarization/
│   │   │       │                               └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── speaker/
│   │   │       │   │                       └── diarization/
│   │   │       │   │                           ├── BarItem.kt
│   │   │       │   │                           ├── MainActivity.kt
│   │   │       │   │                           ├── NavBarItems.kt
│   │   │       │   │                           ├── NavRoutes.kt
│   │   │       │   │                           ├── ReadWaveFile.kt
│   │   │       │   │                           ├── SpeakerDiarizationObject.kt
│   │   │       │   │                           ├── screens/
│   │   │       │   │                           │   ├── Help.kt
│   │   │       │   │                           │   └── Home.kt
│   │   │       │   │                           └── ui/
│   │   │       │   │                               └── theme/
│   │   │       │   │                                   ├── Color.kt
│   │   │       │   │                                   ├── Theme.kt
│   │   │       │   │                                   └── Type.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitkeep
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── speaker/
│   │   │                                   └── diarization/
│   │   │                                       └── ExampleUnitTest.kt
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   ├── libs.versions.toml
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle.kts
│   ├── SherpaOnnxSpeakerIdentification/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── speaker/
│   │   │       │                           └── identification/
│   │   │       │                               └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── speaker/
│   │   │       │   │                       └── identification/
│   │   │       │   │                           ├── BarItem.kt
│   │   │       │   │                           ├── MainActivity.kt
│   │   │       │   │                           ├── NavBarItems.kt
│   │   │       │   │                           ├── NavRoutes.kt
│   │   │       │   │                           ├── screens/
│   │   │       │   │                           │   ├── Help.kt
│   │   │       │   │                           │   ├── Home.kt
│   │   │       │   │                           │   ├── Register.kt
│   │   │       │   │                           │   └── View.kt
│   │   │       │   │                           └── ui/
│   │   │       │   │                               └── theme/
│   │   │       │   │                                   ├── Color.kt
│   │   │       │   │                                   ├── Theme.kt
│   │   │       │   │                                   └── Type.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitkeep
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── speaker/
│   │   │                                   └── identification/
│   │   │                                       └── ExampleUnitTest.kt
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle.kts
│   ├── SherpaOnnxSpokenLanguageIdentification/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── slid/
│   │   │       │                           └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitignore
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── slid/
│   │   │       │   │                       ├── Home.kt
│   │   │       │   │                       ├── MainActivity.kt
│   │   │       │   │                       ├── slid.kt
│   │   │       │   │                       └── ui/
│   │   │       │   │                           └── theme/
│   │   │       │   │                               ├── Color.kt
│   │   │       │   │                               ├── Theme.kt
│   │   │       │   │                               └── Type.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitignore
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── slid/
│   │   │                                   └── ExampleUnitTest.kt
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle.kts
│   ├── SherpaOnnxTts/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── .gitignore
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── MainActivity.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitignore
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── layout/
│   │   │       │       │   └── activity_main.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       ├── values-night/
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── ExampleUnitTest.kt
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   ├── SherpaOnnxTtsEngine/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── tts/
│   │   │       │                           └── engine/
│   │   │       │                               └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── tts/
│   │   │       │   │                       └── engine/
│   │   │       │   │                           ├── CheckVoiceData.kt
│   │   │       │   │                           ├── GetSampleText.kt
│   │   │       │   │                           ├── InstallVoiceData.kt
│   │   │       │   │                           ├── MainActivity.kt
│   │   │       │   │                           ├── PreferencesHelper.kt
│   │   │       │   │                           ├── TtsEngine.kt
│   │   │       │   │                           ├── TtsService.kt
│   │   │       │   │                           ├── TtsViewModel.kt
│   │   │       │   │                           └── ui/
│   │   │       │   │                               └── theme/
│   │   │       │   │                                   ├── Color.kt
│   │   │       │   │                                   ├── Theme.kt
│   │   │       │   │                                   └── Type.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitkeep
│   │   │       │   └── res/
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── ic_launcher_background.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           ├── data_extraction_rules.xml
│   │   │       │           └── tts_engine.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── tts/
│   │   │                                   └── engine/
│   │   │                                       └── ExampleUnitTest.kt
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle.kts
│   ├── SherpaOnnxVad/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitignore
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── MainActivity.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── .gitignore
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitignore
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitignore
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   ├── black_circle.xml
│   │   │       │       │   ├── ic_launcher_background.xml
│   │   │       │       │   └── red_circle.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── layout/
│   │   │       │       │   └── activity_main.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       ├── values-night/
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── ExampleUnitTest.kt
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   ├── SherpaOnnxVadAsr/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── k2fsa/
│   │   │       │               └── sherpa/
│   │   │       │                   └── onnx/
│   │   │       │                       └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── assets/
│   │   │       │   │   └── .gitignore
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── k2fsa/
│   │   │       │   │           └── sherpa/
│   │   │       │   │               └── onnx/
│   │   │       │   │                   └── MainActivity.kt
│   │   │       │   ├── jniLibs/
│   │   │       │   │   ├── arm64-v8a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── armeabi-v7a/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   ├── x86/
│   │   │       │   │   │   └── .gitkeep
│   │   │       │   │   └── x86_64/
│   │   │       │   │       └── .gitkeep
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── ic_launcher_background.xml
│   │   │       │       ├── drawable-v24/
│   │   │       │       │   └── ic_launcher_foreground.xml
│   │   │       │       ├── layout/
│   │   │       │       │   └── activity_main.xml
│   │   │       │       ├── mipmap-anydpi-v26/
│   │   │       │       │   ├── ic_launcher.xml
│   │   │       │       │   └── ic_launcher_round.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── themes.xml
│   │   │       │       ├── values-night/
│   │   │       │       │   └── themes.xml
│   │   │       │       └── xml/
│   │   │       │           ├── backup_rules.xml
│   │   │       │           └── data_extraction_rules.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── k2fsa/
│   │   │                       └── sherpa/
│   │   │                           └── onnx/
│   │   │                               └── ExampleUnitTest.kt
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   └── SherpaOnnxWebSocket/
│       ├── .gitignore
│       ├── app/
│       │   ├── .gitignore
│       │   ├── build.gradle
│       │   ├── proguard-rules.pro
│       │   └── src/
│       │       ├── androidTest/
│       │       │   └── java/
│       │       │       └── com/
│       │       │           └── k2fsa/
│       │       │               └── sherpa/
│       │       │                   └── onnx/
│       │       │                       └── ExampleInstrumentedTest.kt
│       │       ├── main/
│       │       │   ├── AndroidManifest.xml
│       │       │   ├── assets/
│       │       │   │   └── .gitkeep
│       │       │   ├── java/
│       │       │   │   └── com/
│       │       │   │       └── k2fsa/
│       │       │   │           └── sherpa/
│       │       │   │               └── onnx/
│       │       │   │                   ├── MainActivity.kt
│       │       │   │                   ├── MyWebsocketClient.kt
│       │       │   │                   ├── SpeechContent.kt
│       │       │   │                   └── WaveReader.kt
│       │       │   ├── jniLibs/
│       │       │   │   ├── .gitignore
│       │       │   │   ├── arm64-v8a/
│       │       │   │   │   └── .gitkeep
│       │       │   │   ├── armeabi-v7a/
│       │       │   │   │   └── .gitkeep
│       │       │   │   ├── x86/
│       │       │   │   │   └── .gitkeep
│       │       │   │   └── x86_64/
│       │       │   │       └── .gitkeep
│       │       │   └── res/
│       │       │       ├── drawable/
│       │       │       │   └── ic_launcher_background.xml
│       │       │       ├── drawable-v24/
│       │       │       │   └── ic_launcher_foreground.xml
│       │       │       ├── layout/
│       │       │       │   └── activity_main.xml
│       │       │       ├── mipmap-anydpi-v26/
│       │       │       │   ├── ic_launcher.xml
│       │       │       │   └── ic_launcher_round.xml
│       │       │       ├── values/
│       │       │       │   ├── colors.xml
│       │       │       │   ├── strings.xml
│       │       │       │   └── themes.xml
│       │       │       ├── values-night/
│       │       │       │   └── themes.xml
│       │       │       └── xml/
│       │       │           ├── backup_rules.xml
│       │       │           └── data_extraction_rules.xml
│       │       └── test/
│       │           └── java/
│       │               └── com/
│       │                   └── k2fsa/
│       │                       └── sherpa/
│       │                           └── onnx/
│       │                               └── ExampleUnitTest.kt
│       ├── build.gradle
│       ├── gradle/
│       │   └── wrapper/
│       │       └── gradle-wrapper.properties
│       ├── gradle.properties
│       ├── gradlew
│       ├── gradlew.bat
│       └── settings.gradle
├── c-api-examples/
│   ├── CMakeLists.txt
│   ├── Makefile
│   ├── README.md
│   ├── add-punctuation-c-api.c
│   ├── add-punctuation-online-c-api.c
│   ├── asr-microphone-example/
│   │   ├── CMakeLists.txt
│   │   ├── CPPLINT.cfg
│   │   ├── README.md
│   │   └── c-api-alsa.cc
│   ├── audio-tagging-c-api.c
│   ├── decode-file-c-api.c
│   ├── dolphin-ctc-c-api.c
│   ├── fire-red-asr-c-api.c
│   ├── fire-red-asr-ctc-c-api.c
│   ├── funasr-nano-c-api.c
│   ├── keywords-spotter-buffered-tokens-keywords-c-api.c
│   ├── kitten-tts-en-c-api.c
│   ├── kokoro-tts-en-c-api.c
│   ├── kokoro-tts-zh-en-c-api.c
│   ├── kws-c-api.c
│   ├── matcha-tts-en-c-api.c
│   ├── matcha-tts-zh-c-api.c
│   ├── medasr-ctc-c-api.c
│   ├── moonshine-c-api.c
│   ├── moonshine-v2-c-api.c
│   ├── nemo-canary-c-api.c
│   ├── nemo-parakeet-c-api.c
│   ├── offline-speaker-diarization-c-api.c
│   ├── offline-tts-c-api.c
│   ├── omnilingual-asr-ctc-c-api.c
│   ├── online-speech-enhancement-dpdfnet-c-api.c
│   ├── online-speech-enhancement-gtcrn-c-api.c
│   ├── paraformer-c-api.c
│   ├── pocket-tts-en-c-api.c
│   ├── sense-voice-c-api.c
│   ├── sense-voice-with-hr-c-api.c
│   ├── speaker-identification-c-api.c
│   ├── speech-enhancement-dpdfnet-c-api.c
│   ├── speech-enhancement-gtcrn-c-api.c
│   ├── spoken-language-identification-c-api.c
│   ├── streaming-ctc-buffered-tokens-c-api.c
│   ├── streaming-hlg-decode-file-c-api.c
│   ├── streaming-paraformer-buffered-tokens-c-api.c
│   ├── streaming-paraformer-c-api.c
│   ├── streaming-t-one-ctc-c-api.c
│   ├── streaming-zipformer-buffered-tokens-hotwords-c-api.c
│   ├── streaming-zipformer-c-api.c
│   ├── streaming-zipformer-with-hr-c-api.c
│   ├── supertonic-tts-en-c-api.c
│   ├── telespeech-c-api.c
│   ├── vad-moonshine-c-api.c
│   ├── vad-sense-voice-c-api.c
│   ├── vad-whisper-c-api.c
│   ├── wenet-ctc-c-api.c
│   ├── whisper-c-api.c
│   ├── zipformer-c-api.c
│   └── zipvoice-tts-zh-en-c-api.c
├── cmake/
│   ├── .gitignore
│   ├── __init__.py
│   ├── asio.cmake
│   ├── cargs.cmake
│   ├── cmake_extension.py
│   ├── eigen.cmake
│   ├── espeak-ng-for-piper.cmake
│   ├── googletest.cmake
│   ├── hclust-cpp.cmake
│   ├── json.cmake
│   ├── kaldi-decoder.cmake
│   ├── kaldi-native-fbank.cmake
│   ├── kaldifst.cmake
│   ├── onnxruntime-linux-aarch64-gpu.cmake
│   ├── onnxruntime-linux-aarch64-static.cmake
│   ├── onnxruntime-linux-aarch64.cmake
│   ├── onnxruntime-linux-arm-static.cmake
│   ├── onnxruntime-linux-arm.cmake
│   ├── onnxruntime-linux-riscv64-spacemit.cmake
│   ├── onnxruntime-linux-riscv64-static.cmake
│   ├── onnxruntime-linux-riscv64.cmake
│   ├── onnxruntime-linux-x86_64-gpu.cmake
│   ├── onnxruntime-linux-x86_64-static.cmake
│   ├── onnxruntime-linux-x86_64.cmake
│   ├── onnxruntime-osx-arm64-static.cmake
│   ├── onnxruntime-osx-arm64.cmake
│   ├── onnxruntime-osx-universal-static.cmake
│   ├── onnxruntime-osx-universal.cmake
│   ├── onnxruntime-osx-x86_64-static.cmake
│   ├── onnxruntime-osx-x86_64.cmake
│   ├── onnxruntime-wasm-simd.cmake
│   ├── onnxruntime-win-arm64-static.cmake
│   ├── onnxruntime-win-arm64.cmake
│   ├── onnxruntime-win-x64-directml.cmake
│   ├── onnxruntime-win-x64-gpu.cmake
│   ├── onnxruntime-win-x64-static.cmake
│   ├── onnxruntime-win-x64.cmake
│   ├── onnxruntime-win-x86-static.cmake
│   ├── onnxruntime-win-x86.cmake
│   ├── onnxruntime.cmake
│   ├── openfst.cmake
│   ├── piper-phonemize.cmake
│   ├── portaudio.cmake
│   ├── pybind11.cmake
│   ├── sherpa-onnx-shared.pc.in
│   ├── sherpa-onnx-static-no-tts.pc.in
│   ├── sherpa-onnx-static.pc.in
│   ├── show-info.cmake
│   ├── simple-sentencepiece.cmake
│   └── websocketpp.cmake
├── cxx-api-examples/
│   ├── CMakeLists.txt
│   ├── audio-tagging-ced-cxx-api.cc
│   ├── audio-tagging-zipformer-cxx-api.cc
│   ├── dolphin-ctc-cxx-api.cc
│   ├── fire-red-asr-ctc-cxx-api.cc
│   ├── fire-red-asr-ctc-simulate-streaming-alsa-cxx-api.cc
│   ├── fire-red-asr-ctc-simulate-streaming-microphone-cxx-api.cc
│   ├── fire-red-asr-cxx-api.cc
│   ├── funasr-nano-cxx-api.cc
│   ├── kitten-tts-en-cxx-api.cc
│   ├── kokoro-tts-en-cxx-api.cc
│   ├── kokoro-tts-zh-en-cxx-api.cc
│   ├── kws-cxx-api.cc
│   ├── matcha-tts-en-cxx-api.cc
│   ├── matcha-tts-zh-cxx-api.cc
│   ├── medasr-ctc-cxx-api.cc
│   ├── moonshine-cxx-api.cc
│   ├── moonshine-v2-cxx-api.cc
│   ├── nemo-canary-cxx-api.cc
│   ├── offline-punctuation-cxx-api.cc
│   ├── omnilingual-asr-ctc-cxx-api.cc
│   ├── online-punctuation-cxx-api.cc
│   ├── online-speech-enhancement-dpdfnet-cxx-api.cc
│   ├── online-speech-enhancement-gtcrn-cxx-api.cc
│   ├── parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api.cc
│   ├── parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
│   ├── pocket-tts-en-cxx-api.cc
│   ├── sense-voice-cxx-api.cc
│   ├── sense-voice-simulate-streaming-alsa-cxx-api.cc
│   ├── sense-voice-simulate-streaming-microphone-cxx-api.cc
│   ├── sense-voice-with-hr-cxx-api.cc
│   ├── sherpa-display.h
│   ├── speech-enhancement-dpdfnet-cxx-api.cc
│   ├── speech-enhancement-gtcrn-cxx-api.cc
│   ├── streaming-t-one-ctc-cxx-api.cc
│   ├── streaming-zipformer-cxx-api.cc
│   ├── streaming-zipformer-rtf-cxx-api.cc
│   ├── streaming-zipformer-with-hr-cxx-api.cc
│   ├── supertonic-tts-en-cxx-api.cc
│   ├── vad-cxx-api.cc
│   ├── wenet-ctc-cxx-api.cc
│   ├── wenet-ctc-simulate-streaming-microphone-cxx-api.cc
│   ├── whisper-cxx-api.cc
│   ├── zipformer-ctc-simulate-streaming-alsa-cxx-api.cc
│   ├── zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
│   ├── zipformer-transducer-simulate-streaming-microphone-cxx-api.cc
│   └── zipvoice-tts-zh-en-cxx-api.cc
├── dart-api-examples/
│   ├── .gitignore
│   ├── README.md
│   ├── add-punctuations/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   └── punctuations.dart
│   │   ├── pubspec.yaml
│   │   └── run-ct-transformer.sh
│   ├── audio-tagging/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   ├── ced.dart
│   │   │   └── zipformer.dart
│   │   ├── pubspec.yaml
│   │   ├── run-ced.sh
│   │   └── run-zipformer.sh
│   ├── keyword-spotter/
│   │   ├── .gitignore
│   │   ├── CHANGELOG.md
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   └── zipformer-transducer.dart
│   │   ├── pubspec.yaml
│   │   └── run-zh.sh
│   ├── non-streaming-asr/
│   │   ├── .gitignore
│   │   ├── CHANGELOG.md
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   ├── dolphin-ctc.dart
│   │   │   ├── fire-red-asr-ctc.dart
│   │   │   ├── fire-red-asr.dart
│   │   │   ├── funasr-nano.dart
│   │   │   ├── medasr-ctc.dart
│   │   │   ├── moonshine.dart
│   │   │   ├── moonshine_v2.dart
│   │   │   ├── nemo-canary.dart
│   │   │   ├── nemo-ctc.dart
│   │   │   ├── nemo-transducer.dart
│   │   │   ├── omnilingual-asr-ctc.dart
│   │   │   ├── paraformer-itn.dart
│   │   │   ├── paraformer.dart
│   │   │   ├── sense-voice-with-hr.dart
│   │   │   ├── sense-voice.dart
│   │   │   ├── telespeech-ctc.dart
│   │   │   ├── vad-with-paraformer.dart
│   │   │   ├── wenet-ctc.dart
│   │   │   ├── whisper.dart
│   │   │   ├── zipformer-ctc.dart
│   │   │   └── zipformer-transducer.dart
│   │   ├── pubspec.yaml
│   │   ├── run-dolphin-ctc.sh
│   │   ├── run-fire-red-asr-ctc.sh
│   │   ├── run-fire-red-asr.sh
│   │   ├── run-funasr-nano.sh
│   │   ├── run-medasr-ctc.sh
│   │   ├── run-moonshine-v2.sh
│   │   ├── run-moonshine.sh
│   │   ├── run-nemo-canary.sh
│   │   ├── run-nemo-ctc.sh
│   │   ├── run-nemo-transducer.sh
│   │   ├── run-omnilingual-asr-ctc.sh
│   │   ├── run-paraformer-itn.sh
│   │   ├── run-paraformer.sh
│   │   ├── run-sense-voice-with-hr.sh
│   │   ├── run-sense-voice.sh
│   │   ├── run-telespeech-ctc.sh
│   │   ├── run-vad-with-paraformer.sh
│   │   ├── run-wenet-ctc.sh
│   │   ├── run-whisper.sh
│   │   ├── run-zipformer-ctc.sh
│   │   └── run-zipformer-transducer.sh
│   ├── speaker-diarization/
│   │   ├── .gitignore
│   │   ├── CHANGELOG.md
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   └── speaker-diarization.dart
│   │   ├── pubspec.yaml
│   │   └── run.sh
│   ├── speaker-identification/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   └── speaker_id.dart
│   │   ├── pubspec.yaml
│   │   └── run-3d-speaker.sh
│   ├── speech-enhancement-dpdfnet/
│   │   ├── .gitignore
│   │   ├── CHANGELOG.md
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   └── speech_enhancement_dpdfnet.dart
│   │   ├── pubspec.yaml
│   │   └── run.sh
│   ├── speech-enhancement-gtcrn/
│   │   ├── .gitignore
│   │   ├── CHANGELOG.md
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   └── speech_enhancement_gtcrn.dart
│   │   ├── pubspec.yaml
│   │   └── run.sh
│   ├── spoken-language-identification/
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   └── spoken_language_identification.dart
│   │   ├── pubspec.yaml
│   │   └── run-whisper.sh
│   ├── streaming-asr/
│   │   ├── .gitignore
│   │   ├── CHANGELOG.md
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   ├── paraformer.dart
│   │   │   ├── t-one-ctc.dart
│   │   │   ├── zipformer-ctc-hlg.dart
│   │   │   ├── zipformer-ctc.dart
│   │   │   └── zipformer-transducer.dart
│   │   ├── pubspec.yaml
│   │   ├── run-nemo-transducer.sh
│   │   ├── run-paraformer.sh
│   │   ├── run-t-one-ctc.sh
│   │   ├── run-zipformer-ctc-hlg.sh
│   │   ├── run-zipformer-ctc.sh
│   │   ├── run-zipformer-transducer-itn.sh
│   │   └── run-zipformer-transducer.sh
│   ├── streaming-speech-enhancement-dpdfnet/
│   │   ├── README.md
│   │   ├── bin/
│   │   │   └── streaming_speech_enhancement_dpdfnet.dart
│   │   └── run.sh
│   ├── streaming-speech-enhancement-gtcrn/
│   │   ├── README.md
│   │   ├── bin/
│   │   │   └── streaming_speech_enhancement_gtcrn.dart
│   │   └── run.sh
│   ├── tts/
│   │   ├── .gitignore
│   │   ├── CHANGELOG.md
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   ├── coqui.dart
│   │   │   ├── kitten-en.dart
│   │   │   ├── kokoro-en.dart
│   │   │   ├── kokoro-zh-en.dart
│   │   │   ├── matcha-en.dart
│   │   │   ├── matcha-zh.dart
│   │   │   ├── piper.dart
│   │   │   ├── pocket-en.dart
│   │   │   ├── supertonic-en.dart
│   │   │   ├── vits-zh.dart
│   │   │   └── zipvoice-zh-en.dart
│   │   ├── pubspec.yaml
│   │   ├── run-coqui.sh
│   │   ├── run-kitten-en.sh
│   │   ├── run-kokoro-en.sh
│   │   ├── run-kokoro-zh-en.sh
│   │   ├── run-matcha-en.sh
│   │   ├── run-matcha-zh.sh
│   │   ├── run-piper.sh
│   │   ├── run-pocket-en.sh
│   │   ├── run-supertonic-en.sh
│   │   ├── run-vits-zh.sh
│   │   └── run-zipvoice-zh-en.sh
│   ├── vad/
│   │   ├── .gitignore
│   │   ├── CHANGELOG.md
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── bin/
│   │   │   ├── init.dart
│   │   │   ├── ten-vad.dart
│   │   │   └── vad.dart
│   │   ├── pubspec.yaml
│   │   ├── run-ten-vad.sh
│   │   └── run.sh
│   └── vad-with-non-streaming-asr/
│       ├── .gitignore
│       ├── README.md
│       ├── analysis_options.yaml
│       ├── bin/
│       │   ├── dolphin-ctc.dart
│       │   ├── moonshine.dart
│       │   ├── paraformer.dart
│       │   ├── sense-voice-2.dart
│       │   ├── sense-voice.dart
│       │   ├── telespeech-ctc.dart
│       │   ├── whisper.dart
│       │   ├── zipformer-ctc.dart
│       │   └── zipformer-transducer.dart
│       ├── pubspec.yaml
│       ├── run-dolphin-ctc.sh
│       ├── run-moonshine.sh
│       ├── run-paraformer.sh
│       ├── run-sense-voice-en.sh
│       ├── run-sense-voice-zh-2.sh
│       ├── run-sense-voice-zh.sh
│       ├── run-telespeech-ctc.sh
│       ├── run-whisper.sh
│       ├── run-zipformer-ctc.sh
│       └── run-zipformer-transducer.sh
├── dotnet-examples/
│   ├── .editorconfig
│   ├── .gitignore
│   ├── .notes
│   ├── Common/
│   │   ├── Common.csproj
│   │   └── WaveHeader.cs
│   ├── README.md
│   ├── keyword-spotting-from-files/
│   │   ├── Program.cs
│   │   ├── keyword-spotting-from-files.csproj
│   │   └── run.sh
│   ├── keyword-spotting-from-microphone/
│   │   ├── Program.cs
│   │   ├── keyword-spotting-from-microphone.csproj
│   │   └── run.sh
│   ├── kitten-tts/
│   │   ├── Program.cs
│   │   ├── kitten-tts.csproj
│   │   └── run-kitten.sh
│   ├── kitten-tts-play/
│   │   ├── Program.cs
│   │   ├── kitten-tts-play.csproj
│   │   └── run-kitten.sh
│   ├── kokoro-tts/
│   │   ├── Program.cs
│   │   ├── kokoro-tts.csproj
│   │   └── run-kokoro.sh
│   ├── kokoro-tts-play/
│   │   ├── Program.cs
│   │   ├── kokoro-tts-play.csproj
│   │   └── run-kokoro-en.sh
│   ├── non-streaming-canary-decode-files/
│   │   ├── Program.cs
│   │   ├── non-streaming-canary-decode-files.csproj
│   │   └── run.sh
│   ├── non-streaming-funasr-nano-decode-files/
│   │   ├── Program.cs
│   │   ├── non-streaming-funasr-nano-decode-files.csproj
│   │   └── run.sh
│   ├── non-streaming-moonshine-v2-decode-files/
│   │   ├── Program.cs
│   │   ├── non-streaming-moonshine-v2-decode-files.csproj
│   │   └── run.sh
│   ├── offline-audio-tagging/
│   │   ├── Program.cs
│   │   ├── offline-audio-tagging.csproj
│   │   └── run.sh
│   ├── offline-decode-files/
│   │   ├── Program.cs
│   │   ├── offline-decode-files.csproj
│   │   ├── run-dolphin-ctc.sh
│   │   ├── run-fire-red-asr-ctc.sh
│   │   ├── run-fire-red-asr.sh
│   │   ├── run-hotwords.sh
│   │   ├── run-medasr-ctc.sh
│   │   ├── run-moonshine.sh
│   │   ├── run-nemo-ctc.sh
│   │   ├── run-omnilingual-asr-ctc.sh
│   │   ├── run-paraformer-itn.sh
│   │   ├── run-paraformer.sh
│   │   ├── run-sense-voice-ctc.sh
│   │   ├── run-tdnn-yesno.sh
│   │   ├── run-telespeech-ctc.sh
│   │   ├── run-wenet-ctc.sh
│   │   ├── run-whisper-large-v3.sh
│   │   ├── run-whisper.sh
│   │   ├── run-zipformer-ctc.sh
│   │   └── run-zipformer.sh
│   ├── offline-punctuation/
│   │   ├── Program.cs
│   │   ├── offline-punctuation.csproj
│   │   └── run.sh
│   ├── offline-speaker-diarization/
│   │   ├── Program.cs
│   │   ├── offline-speaker-diarization.csproj
│   │   └── run.sh
│   ├── offline-tts/
│   │   ├── Program.cs
│   │   ├── offline-tts.csproj
│   │   ├── run-aishell3.sh
│   │   ├── run-hf-fanchen.sh
│   │   ├── run-matcha-en.sh
│   │   ├── run-matcha-zh.sh
│   │   └── run-piper.sh
│   ├── offline-tts-play/
│   │   ├── .gitignore
│   │   ├── Program.cs
│   │   ├── offline-tts-play.csproj
│   │   ├── run-hf-fanchen.sh
│   │   ├── run-matcha-en.sh
│   │   └── run-matcha-zh.sh
│   ├── online-decode-files/
│   │   ├── Program.cs
│   │   ├── online-decode-files.csproj
│   │   ├── run-paraformer.sh
│   │   ├── run-t-one-ctc.sh
│   │   ├── run-transducer-itn.sh
│   │   ├── run-transducer.sh
│   │   └── run-zipformer2-ctc.sh
│   ├── pocket-tts-zero-shot/
│   │   ├── Program.cs
│   │   ├── pocket-tts-zero-shot.csproj
│   │   └── run.sh
│   ├── pocket-tts-zero-shot-play/
│   │   ├── Program.cs
│   │   ├── pocket-tts-zero-shot-play.csproj
│   │   └── run.sh
│   ├── sherpa-onnx.sln
│   ├── speaker-identification/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── speaker-identification.csproj
│   ├── speech-enhancement-dpdfnet/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── speech-enhancement-dpdfnet.csproj
│   ├── speech-enhancement-gtcrn/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── speech-enhancement-gtcrn.csproj
│   ├── speech-recognition-from-microphone/
│   │   ├── Program.cs
│   │   ├── run-paraformer.sh
│   │   ├── run-transducer.sh
│   │   └── speech-recognition-from-microphone.csproj
│   ├── spoken-language-identification/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── spoken-language-identification.csproj
│   ├── streaming-hlg-decoding/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── streaming-hlg-decoding.csproj
│   ├── streaming-speech-enhancement-dpdfnet/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── streaming-speech-enhancement-dpdfnet.csproj
│   ├── streaming-speech-enhancement-gtcrn/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── streaming-speech-enhancement-gtcrn.csproj
│   ├── supertonic-tts/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── supertonic-tts.csproj
│   ├── vad-non-streaming-asr-paraformer/
│   │   ├── Program.cs
│   │   ├── run-ten-vad.sh
│   │   ├── run.sh
│   │   └── vad-non-streaming-asr-paraformer.csproj
│   ├── vad-non-streaming-funasr-nano/
│   │   ├── Program.cs
│   │   ├── run-ten-vad.sh
│   │   ├── run.sh
│   │   └── vad-non-streaming-funasr-nano.csproj
│   ├── version-test/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── version-test.csproj
│   ├── zipvoice-tts/
│   │   ├── Program.cs
│   │   ├── run.sh
│   │   └── zipvoice-tts.csproj
│   └── zipvoice-tts-play/
│       ├── Program.cs
│       ├── run.sh
│       └── zipvoice-tts-play.csproj
├── ffmpeg-examples/
│   ├── Makefile
│   ├── README.md
│   ├── how-to-fix-errors.md
│   └── sherpa-onnx-ffmpeg.c
├── flutter/
│   ├── .gitignore
│   ├── README.md
│   ├── notes.md
│   ├── notes2.md
│   ├── publish.md
│   ├── sherpa_onnx/
│   │   ├── .gitignore
│   │   ├── .metadata
│   │   ├── analysis_options.yaml
│   │   ├── example/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   └── example.md
│   │   ├── lib/
│   │   │   ├── sherpa_onnx.dart
│   │   │   └── src/
│   │   │       ├── audio_tagging.dart
│   │   │       ├── feature_config.dart
│   │   │       ├── homophone_replacer_config.dart
│   │   │       ├── keyword_spotter.dart
│   │   │       ├── offline_punctuation.dart
│   │   │       ├── offline_recognizer.dart
│   │   │       ├── offline_speaker_diarization.dart
│   │   │       ├── offline_speech_denoiser.dart
│   │   │       ├── offline_stream.dart
│   │   │       ├── online_punctuation.dart
│   │   │       ├── online_recognizer.dart
│   │   │       ├── online_speech_denoiser.dart
│   │   │       ├── online_stream.dart
│   │   │       ├── sherpa_onnx_bindings.dart
│   │   │       ├── speaker_identification.dart
│   │   │       ├── spoken_language_identification.dart
│   │   │       ├── tts.dart
│   │   │       ├── utils.dart
│   │   │       ├── vad.dart
│   │   │       ├── version.dart
│   │   │       ├── wave_reader.dart
│   │   │       └── wave_writer.dart
│   │   └── pubspec.yaml
│   ├── sherpa_onnx_android/
│   │   ├── .gitignore
│   │   ├── .metadata
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── android/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── settings.gradle
│   │   │   └── src/
│   │   │       └── main/
│   │   │           ├── AndroidManifest.xml
│   │   │           └── jniLibs/
│   │   │               ├── README.md
│   │   │               ├── arm64-v8a/
│   │   │               │   └── .gitkeep
│   │   │               ├── armeabi-v7a/
│   │   │               │   └── .gitkeep
│   │   │               ├── x86/
│   │   │               │   └── .gitkeep
│   │   │               └── x86_64/
│   │   │                   └── .gitkeep
│   │   ├── lib/
│   │   │   ├── .gitkeep
│   │   │   └── README.md
│   │   └── pubspec.yaml
│   ├── sherpa_onnx_ios/
│   │   ├── .gitignore
│   │   ├── .metadata
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── ios/
│   │   │   └── sherpa_onnx_ios.podspec
│   │   ├── lib/
│   │   │   └── README.md
│   │   └── pubspec.yaml
│   ├── sherpa_onnx_linux/
│   │   ├── .gitignore
│   │   ├── .metadata
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── lib/
│   │   │   ├── .gitkeep
│   │   │   └── README.md
│   │   ├── linux/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── README.md
│   │   │   ├── aarch64/
│   │   │   │   └── .gitikeep
│   │   │   └── x64/
│   │   │       └── .gitikeep
│   │   └── pubspec.yaml
│   ├── sherpa_onnx_macos/
│   │   ├── .gitignore
│   │   ├── .metadata
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── lib/
│   │   │   ├── .gitkeep
│   │   │   └── README.md
│   │   ├── macos/
│   │   │   ├── README.md
│   │   │   └── sherpa_onnx_macos.podspec
│   │   └── pubspec.yaml
│   └── sherpa_onnx_windows/
│       ├── .gitignore
│       ├── .metadata
│       ├── README.md
│       ├── analysis_options.yaml
│       ├── lib/
│       │   ├── .gitkeep
│       │   └── README.md
│       └── pubspec.yaml
├── flutter-examples/
│   ├── .gitignore
│   ├── README.md
│   ├── andriod-notes.md
│   ├── how-tts-is-created.md
│   ├── non_streaming_vad_asr/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── lib/
│   │   │   ├── info.dart
│   │   │   ├── main.dart
│   │   │   ├── non_streaming_vad_asr.dart
│   │   │   ├── offline_model.dart
│   │   │   └── utils.dart
│   │   ├── macos/
│   │   │   ├── .gitignore
│   │   │   ├── Flutter/
│   │   │   │   ├── Flutter-Debug.xcconfig
│   │   │   │   └── Flutter-Release.xcconfig
│   │   │   ├── Runner/
│   │   │   │   ├── AppDelegate.swift
│   │   │   │   ├── Assets.xcassets/
│   │   │   │   │   └── AppIcon.appiconset/
│   │   │   │   │       └── Contents.json
│   │   │   │   ├── Base.lproj/
│   │   │   │   │   └── MainMenu.xib
│   │   │   │   ├── Configs/
│   │   │   │   │   ├── AppInfo.xcconfig
│   │   │   │   │   ├── Debug.xcconfig
│   │   │   │   │   ├── Release.xcconfig
│   │   │   │   │   └── Warnings.xcconfig
│   │   │   │   ├── DebugProfile.entitlements
│   │   │   │   ├── Info.plist
│   │   │   │   ├── MainFlutterWindow.swift
│   │   │   │   └── Release.entitlements
│   │   │   ├── Runner.xcodeproj/
│   │   │   │   ├── project.pbxproj
│   │   │   │   ├── project.xcworkspace/
│   │   │   │   │   └── xcshareddata/
│   │   │   │   │       └── IDEWorkspaceChecks.plist
│   │   │   │   └── xcshareddata/
│   │   │   │       └── xcschemes/
│   │   │   │           └── Runner.xcscheme
│   │   │   ├── Runner.xcworkspace/
│   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   └── xcshareddata/
│   │   │   │       └── IDEWorkspaceChecks.plist
│   │   │   └── RunnerTests/
│   │   │       └── RunnerTests.swift
│   │   └── pubspec.yaml
│   ├── streaming_asr/
│   │   ├── .gitignore
│   │   ├── .metadata
│   │   ├── README.md
│   │   ├── analysis_options.yaml
│   │   ├── android/
│   │   │   ├── .gitignore
│   │   │   ├── app/
│   │   │   │   ├── build.gradle
│   │   │   │   └── src/
│   │   │   │       ├── debug/
│   │   │   │       │   └── AndroidManifest.xml
│   │   │   │       ├── main/
│   │   │   │       │   ├── AndroidManifest.xml
│   │   │   │       │   ├── kotlin/
│   │   │   │       │   │   └── com/
│   │   │   │       │   │       └── k2fsa/
│   │   │   │       │   │           └── sherpa/
│   │   │   │       │   │               └── onnx/
│   │   │   │       │   │                   └── streaming_asr/
│   │   │   │       │   │                       └── MainActivity.kt
│   │   │   │       │   └── res/
│   │   │   │       │       ├── drawable/
│   │   │   │       │       │   └── launch_background.xml
│   │   │   │       │       ├── drawable-v21/
│   │   │   │       │       │   └── launch_background.xml
│   │   │   │       │       ├── values/
│   │   │   │       │       │   └── styles.xml
│   │   │   │       │       └── values-night/
│   │   │   │       │           └── styles.xml
│   │   │   │       └── profile/
│   │   │   │           └── AndroidManifest.xml
│   │   │   ├── build.gradle
│   │   │   ├── gradle/
│   │   │   │   └── wrapper/
│   │   │   │       └── gradle-wrapper.properties
│   │   │   ├── gradle.properties
│   │   │   └── settings.gradle
│   │   ├── assets/
│   │   │   └── .gitignore
│   │   ├── ios/
│   │   │   ├── .gitignore
│   │   │   ├── Flutter/
│   │   │   │   ├── AppFrameworkInfo.plist
│   │   │   │   ├── Debug.xcconfig
│   │   │   │   └── Release.xcconfig
│   │   │   ├── Runner/
│   │   │   │   ├── AppDelegate.swift
│   │   │   │   ├── Assets.xcassets/
│   │   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   │   └── Contents.json
│   │   │   │   │   └── LaunchImage.imageset/
│   │   │   │   │       ├── Contents.json
│   │   │   │   │       └── README.md
│   │   │   │   ├── Base.lproj/
│   │   │   │   │   ├── LaunchScreen.storyboard
│   │   │   │   │   └── Main.storyboard
│   │   │   │   ├── Info.plist
│   │   │   │   └── Runner-Bridging-Header.h
│   │   │   ├── Runner.xcodeproj/
│   │   │   │   ├── project.pbxproj
│   │   │   │   ├── project.xcworkspace/
│   │   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   │   └── xcshareddata/
│   │   │   │   │       ├── IDEWorkspaceChecks.plist
│   │   │   │   │       └── WorkspaceSettings.xcsettings
│   │   │   │   └── xcshareddata/
│   │   │   │       └── xcschemes/
│   │   │   │           └── Runner.xcscheme
│   │   │   ├── Runner.xcworkspace/
│   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   └── xcshareddata/
│   │   │   │       ├── IDEWorkspaceChecks.plist
│   │   │   │       └── WorkspaceSettings.xcsettings
│   │   │   └── RunnerTests/
│   │   │       └── RunnerTests.swift
│   │   ├── lib/
│   │   │   ├── info.dart
│   │   │   ├── main.dart
│   │   │   ├── online_model.dart
│   │   │   ├── streaming_asr.dart
│   │   │   └── utils.dart
│   │   ├── linux/
│   │   │   ├── .gitignore
│   │   │   ├── CMakeLists.txt
│   │   │   ├── flutter/
│   │   │   │   └── CMakeLists.txt
│   │   │   ├── main.cc
│   │   │   ├── my_application.cc
│   │   │   └── my_application.h
│   │   ├── macos/
│   │   │   ├── .gitignore
│   │   │   ├── Flutter/
│   │   │   │   ├── Flutter-Debug.xcconfig
│   │   │   │   └── Flutter-Release.xcconfig
│   │   │   ├── Runner/
│   │   │   │   ├── AppDelegate.swift
│   │   │   │   ├── Assets.xcassets/
│   │   │   │   │   └── AppIcon.appiconset/
│   │   │   │   │       └── Contents.json
│   │   │   │   ├── Base.lproj/
│   │   │   │   │   └── MainMenu.xib
│   │   │   │   ├── Configs/
│   │   │   │   │   ├── AppInfo.xcconfig
│   │   │   │   │   ├── Debug.xcconfig
│   │   │   │   │   ├── Release.xcconfig
│   │   │   │   │   └── Warnings.xcconfig
│   │   │   │   ├── DebugProfile.entitlements
│   │   │   │   ├── Info.plist
│   │   │   │   ├── MainFlutterWindow.swift
│   │   │   │   └── Release.entitlements
│   │   │   ├── Runner.xcodeproj/
│   │   │   │   ├── project.pbxproj
│   │   │   │   ├── project.xcworkspace/
│   │   │   │   │   └── xcshareddata/
│   │   │   │   │       └── IDEWorkspaceChecks.plist
│   │   │   │   └── xcshareddata/
│   │   │   │       └── xcschemes/
│   │   │   │           └── Runner.xcscheme
│   │   │   ├── Runner.xcworkspace/
│   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   └── xcshareddata/
│   │   │   │       └── IDEWorkspaceChecks.plist
│   │   │   └── RunnerTests/
│   │   │       └── RunnerTests.swift
│   │   ├── pubspec.yaml
│   │   ├── test/
│   │   │   └── widget_test.dart
│   │   └── windows/
│   │       ├── .gitignore
│   │       ├── CMakeLists.txt
│   │       ├── flutter/
│   │       │   └── CMakeLists.txt
│   │       └── runner/
│   │           ├── CMakeLists.txt
│   │           ├── Runner.rc
│   │           ├── flutter_window.cpp
│   │           ├── flutter_window.h
│   │           ├── main.cpp
│   │           ├── resource.h
│   │           ├── runner.exe.manifest
│   │           ├── utils.cpp
│   │           ├── utils.h
│   │           ├── win32_window.cpp
│   │           └── win32_window.h
│   └── tts/
│       ├── .gitignore
│       ├── .metadata
│       ├── README.md
│       ├── analysis_options.yaml
│       ├── android/
│       │   ├── .gitignore
│       │   ├── app/
│       │   │   ├── build.gradle
│       │   │   └── src/
│       │   │       ├── debug/
│       │   │       │   └── AndroidManifest.xml
│       │   │       ├── main/
│       │   │       │   ├── AndroidManifest.xml
│       │   │       │   ├── kotlin/
│       │   │       │   │   └── com/
│       │   │       │   │       └── example/
│       │   │       │   │           └── tts/
│       │   │       │   │               └── MainActivity.kt
│       │   │       │   └── res/
│       │   │       │       ├── drawable/
│       │   │       │       │   └── launch_background.xml
│       │   │       │       ├── drawable-v21/
│       │   │       │       │   └── launch_background.xml
│       │   │       │       ├── values/
│       │   │       │       │   └── styles.xml
│       │   │       │       └── values-night/
│       │   │       │           └── styles.xml
│       │   │       └── profile/
│       │   │           └── AndroidManifest.xml
│       │   ├── build.gradle
│       │   ├── gradle/
│       │   │   └── wrapper/
│       │   │       └── gradle-wrapper.properties
│       │   ├── gradle.properties
│       │   └── settings.gradle
│       ├── assets/
│       │   └── .gitkeep
│       ├── generate-asset-list.py
│       ├── ios/
│       │   ├── .gitignore
│       │   ├── Flutter/
│       │   │   ├── AppFrameworkInfo.plist
│       │   │   ├── Debug.xcconfig
│       │   │   └── Release.xcconfig
│       │   ├── Runner/
│       │   │   ├── AppDelegate.swift
│       │   │   ├── Assets.xcassets/
│       │   │   │   ├── AppIcon.appiconset/
│       │   │   │   │   └── Contents.json
│       │   │   │   └── LaunchImage.imageset/
│       │   │   │       ├── Contents.json
│       │   │   │       └── README.md
│       │   │   ├── Base.lproj/
│       │   │   │   ├── LaunchScreen.storyboard
│       │   │   │   └── Main.storyboard
│       │   │   ├── Info.plist
│       │   │   └── Runner-Bridging-Header.h
│       │   ├── Runner.xcodeproj/
│       │   │   ├── project.pbxproj
│       │   │   ├── project.xcworkspace/
│       │   │   │   ├── contents.xcworkspacedata
│       │   │   │   └── xcshareddata/
│       │   │   │       ├── IDEWorkspaceChecks.plist
│       │   │   │       └── WorkspaceSettings.xcsettings
│       │   │   └── xcshareddata/
│       │   │       └── xcschemes/
│       │   │           └── Runner.xcscheme
│       │   ├── Runner.xcworkspace/
│       │   │   ├── contents.xcworkspacedata
│       │   │   └── xcshareddata/
│       │   │       ├── IDEWorkspaceChecks.plist
│       │   │       └── WorkspaceSettings.xcsettings
│       │   └── RunnerTests/
│       │       └── RunnerTests.swift
│       ├── lib/
│       │   ├── info.dart
│       │   ├── isolate_tts.dart
│       │   ├── main.dart
│       │   ├── model.dart
│       │   ├── tts.dart
│       │   └── utils.dart
│       ├── linux/
│       │   ├── .gitignore
│       │   ├── CMakeLists.txt
│       │   ├── flutter/
│       │   │   └── CMakeLists.txt
│       │   ├── main.cc
│       │   ├── my_application.cc
│       │   └── my_application.h
│       ├── macos/
│       │   ├── .gitignore
│       │   ├── Flutter/
│       │   │   ├── Flutter-Debug.xcconfig
│       │   │   └── Flutter-Release.xcconfig
│       │   ├── Runner/
│       │   │   ├── AppDelegate.swift
│       │   │   ├── Assets.xcassets/
│       │   │   │   └── AppIcon.appiconset/
│       │   │   │       └── Contents.json
│       │   │   ├── Base.lproj/
│       │   │   │   └── MainMenu.xib
│       │   │   ├── Configs/
│       │   │   │   ├── AppInfo.xcconfig
│       │   │   │   ├── Debug.xcconfig
│       │   │   │   ├── Release.xcconfig
│       │   │   │   └── Warnings.xcconfig
│       │   │   ├── DebugProfile.entitlements
│       │   │   ├── Info.plist
│       │   │   ├── MainFlutterWindow.swift
│       │   │   └── Release.entitlements
│       │   ├── Runner.xcodeproj/
│       │   │   ├── project.pbxproj
│       │   │   ├── project.xcworkspace/
│       │   │   │   └── xcshareddata/
│       │   │   │       └── IDEWorkspaceChecks.plist
│       │   │   └── xcshareddata/
│       │   │       └── xcschemes/
│       │   │           └── Runner.xcscheme
│       │   ├── Runner.xcworkspace/
│       │   │   ├── contents.xcworkspacedata
│       │   │   └── xcshareddata/
│       │   │       └── IDEWorkspaceChecks.plist
│       │   └── RunnerTests/
│       │       └── RunnerTests.swift
│       ├── pubspec.yaml
│       ├── test/
│       │   └── widget_test.dart
│       └── windows/
│           ├── .gitignore
│           ├── CMakeLists.txt
│           ├── flutter/
│           │   └── CMakeLists.txt
│           └── runner/
│               ├── CMakeLists.txt
│               ├── Runner.rc
│               ├── flutter_window.cpp
│               ├── flutter_window.h
│               ├── main.cpp
│               ├── resource.h
│               ├── runner.exe.manifest
│               ├── utils.cpp
│               ├── utils.h
│               ├── win32_window.cpp
│               └── win32_window.h
├── go-api-examples/
│   ├── .gitignore
│   ├── README.md
│   ├── add-punctuation/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── add-punctuation-online/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── audio-tagging/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── keyword-spotting-from-file/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── non-streaming-canary-decode-files/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── non-streaming-funasr-nano-decode-files/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── non-streaming-medasr-ctc-decode-files/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── non-streaming-moonshine-v2-decode-files/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── non-streaming-omnilingual-asr-ctc-decode-files/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── non-streaming-speaker-diarization/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── non-streaming-tts/
│   │   ├── go.mod
│   │   ├── main.go
│   │   ├── run-kitten-en.sh
│   │   ├── run-kokoro-en.sh
│   │   ├── run-kokoro-zh-en.sh
│   │   ├── run-matcha-en.sh
│   │   ├── run-matcha-zh.sh
│   │   ├── run-vits-ljs.sh
│   │   ├── run-vits-piper-en_US-lessac-medium.sh
│   │   ├── run-vits-vctk.sh
│   │   └── run-vits-zh-aishell3.sh
│   ├── offline-tts-play/
│   │   ├── go.mod
│   │   ├── main.go
│   │   ├── run-kitten-en.sh
│   │   ├── run-kokoro-en.sh
│   │   ├── run-kokoro-zh-en.sh
│   │   ├── run-matcha-en.sh
│   │   ├── run-matcha-zh.sh
│   │   ├── run-vits-ljs.sh
│   │   ├── run-vits-piper-en_US-lessac-medium.sh
│   │   ├── run-vits-vctk.sh
│   │   └── run-vits-zh-aishell3.sh
│   ├── speaker-identification/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── speech-enhancement-dpdfnet/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── speech-enhancement-gtcrn/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── streaming-hlg-decoding/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── streaming-speech-enhancement-dpdfnet/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── streaming-speech-enhancement-gtcrn/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── supertonic-tts/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── vad/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── vad-asr-whisper/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── vad-speaker-identification/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── vad-spoken-language-identification/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── zero-shot-pocket-tts/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── zero-shot-pocket-tts-play/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   ├── zero-shot-zipvoice-tts/
│   │   ├── go.mod
│   │   ├── main.go
│   │   └── run.sh
│   └── zero-shot-zipvoice-tts-play/
│       ├── go.mod
│       ├── main.go
│       └── run.sh
├── harmony-os/
│   ├── .gitignore
│   ├── README.md
│   ├── SherpaOnnxHar/
│   │   ├── .gitignore
│   │   ├── AppScope/
│   │   │   ├── app.json5
│   │   │   └── resources/
│   │   │       └── base/
│   │   │           └── element/
│   │   │               └── string.json
│   │   ├── README.md
│   │   ├── build-profile.json5
│   │   ├── code-linter.json5
│   │   ├── entry/
│   │   │   ├── .gitignore
│   │   │   ├── build-profile.json5
│   │   │   ├── hvigorfile.ts
│   │   │   ├── obfuscation-rules.txt
│   │   │   ├── oh-package.json5
│   │   │   └── src/
│   │   │       ├── main/
│   │   │       │   ├── ets/
│   │   │       │   │   ├── entryability/
│   │   │       │   │   │   └── EntryAbility.ets
│   │   │       │   │   ├── entrybackupability/
│   │   │       │   │   │   └── EntryBackupAbility.ets
│   │   │       │   │   └── pages/
│   │   │       │   │       └── Index.ets
│   │   │       │   ├── module.json5
│   │   │       │   └── resources/
│   │   │       │       ├── base/
│   │   │       │       │   ├── element/
│   │   │       │       │   │   ├── color.json
│   │   │       │       │   │   └── string.json
│   │   │       │       │   ├── media/
│   │   │       │       │   │   └── layered_image.json
│   │   │       │       │   └── profile/
│   │   │       │       │       ├── backup_config.json
│   │   │       │       │       └── main_pages.json
│   │   │       │       ├── en_US/
│   │   │       │       │   └── element/
│   │   │       │       │       └── string.json
│   │   │       │       └── zh_CN/
│   │   │       │           └── element/
│   │   │       │               └── string.json
│   │   │       ├── ohosTest/
│   │   │       │   ├── ets/
│   │   │       │   │   └── test/
│   │   │       │   │       ├── Ability.test.ets
│   │   │       │   │       └── List.test.ets
│   │   │       │   └── module.json5
│   │   │       └── test/
│   │   │           ├── List.test.ets
│   │   │           └── LocalUnit.test.ets
│   │   ├── hvigor/
│   │   │   └── hvigor-config.json5
│   │   ├── hvigorfile.ts
│   │   ├── notes.md
│   │   ├── oh-package-lock.json5
│   │   ├── oh-package.json5
│   │   ├── release.sh
│   │   └── sherpa_onnx/
│   │       ├── .gitignore
│   │       ├── BuildProfile.ets
│   │       ├── Index.ets
│   │       ├── README.md
│   │       ├── build-profile.json5
│   │       ├── consumer-rules.txt
│   │       ├── hvigorfile.ts
│   │       ├── obfuscation-rules.txt
│   │       ├── oh-package-lock.json5
│   │       ├── oh-package.json5
│   │       └── src/
│   │           ├── main/
│   │           │   ├── cpp/
│   │           │   │   ├── CMakeLists.txt
│   │           │   │   ├── audio-tagging.cc
│   │           │   │   ├── include/
│   │           │   │   │   └── sherpa-onnx/
│   │           │   │   │       └── c-api/
│   │           │   │   │           └── README.md
│   │           │   │   ├── keyword-spotting.cc
│   │           │   │   ├── libs/
│   │           │   │   │   ├── .gitignore
│   │           │   │   │   ├── README.md
│   │           │   │   │   ├── arm64-v8a/
│   │           │   │   │   │   └── .gitkeep
│   │           │   │   │   ├── armeabi-v7a/
│   │           │   │   │   │   └── .gitkeep
│   │           │   │   │   └── x86_64/
│   │           │   │   │       └── .gitkeep
│   │           │   │   ├── macros.h
│   │           │   │   ├── my-patch.diff
│   │           │   │   ├── non-streaming-asr.cc
│   │           │   │   ├── non-streaming-speaker-diarization.cc
│   │           │   │   ├── non-streaming-speech-denoiser.cc
│   │           │   │   ├── non-streaming-tts.cc
│   │           │   │   ├── offline-punctuation.cc
│   │           │   │   ├── online-punctuation.cc
│   │           │   │   ├── sherpa-onnx-node-addon-api.cc
│   │           │   │   ├── speaker-identification.cc
│   │           │   │   ├── speech-denoiser.h
│   │           │   │   ├── spoken-language-identification.cc
│   │           │   │   ├── streaming-asr.cc
│   │           │   │   ├── streaming-speech-denoiser.cc
│   │           │   │   ├── types/
│   │           │   │   │   └── libsherpa_onnx/
│   │           │   │   │       ├── Index.d.ts
│   │           │   │   │       └── oh-package.json5
│   │           │   │   ├── utils.cc
│   │           │   │   ├── vad.cc
│   │           │   │   ├── version.cc
│   │           │   │   ├── wave-reader.cc
│   │           │   │   └── wave-writer.cc
│   │           │   ├── ets/
│   │           │   │   └── components/
│   │           │   │       ├── KeywordSpotting.ets
│   │           │   │       ├── MainPage.ets
│   │           │   │       ├── NonStreamingAsr.ets
│   │           │   │       ├── NonStreamingSpeakerDiarization.ets
│   │           │   │       ├── NonStreamingTts.ets
│   │           │   │       ├── OfflinePunctuation.ets
│   │           │   │       ├── OnlinePunctuation.ets
│   │           │   │       ├── SpeakerIdentification.ets
│   │           │   │       ├── StreamingAsr.ets
│   │           │   │       └── Vad.ets
│   │           │   ├── module.json5
│   │           │   └── resources/
│   │           │       ├── base/
│   │           │       │   └── element/
│   │           │       │       └── string.json
│   │           │       ├── en_US/
│   │           │       │   └── element/
│   │           │       │       └── string.json
│   │           │       └── zh_CN/
│   │           │           └── element/
│   │           │               └── string.json
│   │           ├── ohosTest/
│   │           │   ├── ets/
│   │           │   │   └── test/
│   │           │   │       ├── Ability.test.ets
│   │           │   │       └── List.test.ets
│   │           │   └── module.json5
│   │           └── test/
│   │               ├── List.test.ets
│   │               └── LocalUnit.test.ets
│   ├── SherpaOnnxSpeakerDiarization/
│   │   ├── .gitignore
│   │   ├── AppScope/
│   │   │   ├── app.json5
│   │   │   └── resources/
│   │   │       └── base/
│   │   │           └── element/
│   │   │               └── string.json
│   │   ├── build-profile.json5
│   │   ├── code-linter.json5
│   │   ├── entry/
│   │   │   ├── .gitignore
│   │   │   ├── build-profile.json5
│   │   │   ├── hvigorfile.ts
│   │   │   ├── obfuscation-rules.txt
│   │   │   ├── oh-package.json5
│   │   │   └── src/
│   │   │       ├── main/
│   │   │       │   ├── ets/
│   │   │       │   │   ├── entryability/
│   │   │       │   │   │   └── EntryAbility.ets
│   │   │       │   │   ├── entrybackupability/
│   │   │       │   │   │   └── EntryBackupAbility.ets
│   │   │       │   │   ├── pages/
│   │   │       │   │   │   └── Index.ets
│   │   │       │   │   └── workers/
│   │   │       │   │       └── SpeakerDiarizationWorker.ets
│   │   │       │   ├── module.json5
│   │   │       │   └── resources/
│   │   │       │       ├── base/
│   │   │       │       │   ├── element/
│   │   │       │       │   │   ├── color.json
│   │   │       │       │   │   └── string.json
│   │   │       │       │   ├── media/
│   │   │       │       │   │   └── layered_image.json
│   │   │       │       │   └── profile/
│   │   │       │       │       ├── backup_config.json
│   │   │       │       │       └── main_pages.json
│   │   │       │       ├── en_US/
│   │   │       │       │   └── element/
│   │   │       │       │       └── string.json
│   │   │       │       ├── rawfile/
│   │   │       │       │   └── .gitkeep
│   │   │       │       └── zh_CN/
│   │   │       │           └── element/
│   │   │       │               └── string.json
│   │   │       ├── ohosTest/
│   │   │       │   ├── ets/
│   │   │       │   │   └── test/
│   │   │       │   │       ├── Ability.test.ets
│   │   │       │   │       └── List.test.ets
│   │   │       │   └── module.json5
│   │   │       └── test/
│   │   │           ├── List.test.ets
│   │   │           └── LocalUnit.test.ets
│   │   ├── hvigor/
│   │   │   └── hvigor-config.json5
│   │   ├── hvigorfile.ts
│   │   ├── oh-package-lock.json5
│   │   └── oh-package.json5
│   ├── SherpaOnnxSpeakerIdentification/
│   │   ├── .gitignore
│   │   ├── AppScope/
│   │   │   ├── app.json5
│   │   │   └── resources/
│   │   │       └── base/
│   │   │           └── element/
│   │   │               └── string.json
│   │   ├── build-profile.json5
│   │   ├── code-linter.json5
│   │   ├── entry/
│   │   │   ├── .gitignore
│   │   │   ├── build-profile.json5
│   │   │   ├── hvigorfile.ts
│   │   │   ├── obfuscation-rules.txt
│   │   │   ├── oh-package-lock.json5
│   │   │   ├── oh-package.json5
│   │   │   └── src/
│   │   │       ├── main/
│   │   │       │   ├── ets/
│   │   │       │   │   ├── entryability/
│   │   │       │   │   │   └── EntryAbility.ets
│   │   │       │   │   ├── entrybackupability/
│   │   │       │   │   │   └── EntryBackupAbility.ets
│   │   │       │   │   ├── pages/
│   │   │       │   │   │   ├── Index.ets
│   │   │       │   │   │   └── Permission.ets
│   │   │       │   │   └── workers/
│   │   │       │   │       └── SpeakerIdentificationWorker.ets
│   │   │       │   ├── module.json5
│   │   │       │   └── resources/
│   │   │       │       ├── base/
│   │   │       │       │   ├── element/
│   │   │       │       │   │   ├── color.json
│   │   │       │       │   │   └── string.json
│   │   │       │       │   ├── media/
│   │   │       │       │   │   └── layered_image.json
│   │   │       │       │   └── profile/
│   │   │       │       │       ├── backup_config.json
│   │   │       │       │       └── main_pages.json
│   │   │       │       ├── en_US/
│   │   │       │       │   └── element/
│   │   │       │       │       └── string.json
│   │   │       │       ├── rawfile/
│   │   │       │       │   └── .gitkeep
│   │   │       │       └── zh_CN/
│   │   │       │           └── element/
│   │   │       │               └── string.json
│   │   │       ├── ohosTest/
│   │   │       │   ├── ets/
│   │   │       │   │   └── test/
│   │   │       │   │       ├── Ability.test.ets
│   │   │       │   │       └── List.test.ets
│   │   │       │   └── module.json5
│   │   │       └── test/
│   │   │           ├── List.test.ets
│   │   │           └── LocalUnit.test.ets
│   │   ├── hvigor/
│   │   │   └── hvigor-config.json5
│   │   ├── hvigorfile.ts
│   │   ├── oh-package-lock.json5
│   │   └── oh-package.json5
│   ├── SherpaOnnxStreamingAsr/
│   │   ├── .gitignore
│   │   ├── AppScope/
│   │   │   ├── app.json5
│   │   │   └── resources/
│   │   │       └── base/
│   │   │           └── element/
│   │   │               └── string.json
│   │   ├── build-profile.json5
│   │   ├── code-linter.json5
│   │   ├── entry/
│   │   │   ├── .gitignore
│   │   │   ├── build-profile.json5
│   │   │   ├── hvigorfile.ts
│   │   │   ├── obfuscation-rules.txt
│   │   │   ├── oh-package-lock.json5
│   │   │   ├── oh-package.json5
│   │   │   └── src/
│   │   │       ├── main/
│   │   │       │   ├── ets/
│   │   │       │   │   ├── entryability/
│   │   │       │   │   │   └── EntryAbility.ets
│   │   │       │   │   ├── entrybackupability/
│   │   │       │   │   │   └── EntryBackupAbility.ets
│   │   │       │   │   ├── pages/
│   │   │       │   │   │   ├── Index.ets
│   │   │       │   │   │   └── Permission.ets
│   │   │       │   │   └── workers/
│   │   │       │   │       └── StreamingAsrWorker.ets
│   │   │       │   ├── module.json5
│   │   │       │   └── resources/
│   │   │       │       ├── base/
│   │   │       │       │   ├── element/
│   │   │       │       │   │   ├── color.json
│   │   │       │       │   │   └── string.json
│   │   │       │       │   ├── media/
│   │   │       │       │   │   └── layered_image.json
│   │   │       │       │   └── profile/
│   │   │       │       │       ├── backup_config.json
│   │   │       │       │       └── main_pages.json
│   │   │       │       ├── en_US/
│   │   │       │       │   └── element/
│   │   │       │       │       └── string.json
│   │   │       │       ├── rawfile/
│   │   │       │       │   └── .gitkeep
│   │   │       │       └── zh_CN/
│   │   │       │           └── element/
│   │   │       │               └── string.json
│   │   │       ├── ohosTest/
│   │   │       │   ├── ets/
│   │   │       │   │   └── test/
│   │   │       │   │       ├── Ability.test.ets
│   │   │       │   │       └── List.test.ets
│   │   │       │   └── module.json5
│   │   │       └── test/
│   │   │           ├── List.test.ets
│   │   │           └── LocalUnit.test.ets
│   │   ├── hvigor/
│   │   │   └── hvigor-config.json5
│   │   ├── hvigorfile.ts
│   │   ├── oh-package-lock.json5
│   │   └── oh-package.json5
│   ├── SherpaOnnxTts/
│   │   ├── .gitignore
│   │   ├── AppScope/
│   │   │   ├── app.json5
│   │   │   └── resources/
│   │   │       └── base/
│   │   │           └── element/
│   │   │               └── string.json
│   │   ├── README.md
│   │   ├── build-profile.json5
│   │   ├── code-linter.json5
│   │   ├── entry/
│   │   │   ├── .gitignore
│   │   │   ├── build-profile.json5
│   │   │   ├── hvigorfile.ts
│   │   │   ├── obfuscation-rules.txt
│   │   │   ├── oh-package-lock.json5
│   │   │   ├── oh-package.json5
│   │   │   └── src/
│   │   │       ├── main/
│   │   │       │   ├── ets/
│   │   │       │   │   ├── entryability/
│   │   │       │   │   │   └── EntryAbility.ets
│   │   │       │   │   ├── entrybackupability/
│   │   │       │   │   │   └── EntryBackupAbility.ets
│   │   │       │   │   ├── pages/
│   │   │       │   │   │   └── Index.ets
│   │   │       │   │   └── workers/
│   │   │       │   │       └── NonStreamingTtsWorker.ets
│   │   │       │   ├── module.json5
│   │   │       │   └── resources/
│   │   │       │       ├── base/
│   │   │       │       │   ├── element/
│   │   │       │       │   │   ├── color.json
│   │   │       │       │   │   └── string.json
│   │   │       │       │   ├── media/
│   │   │       │       │   │   └── layered_image.json
│   │   │       │       │   └── profile/
│   │   │       │       │       ├── backup_config.json
│   │   │       │       │       └── main_pages.json
│   │   │       │       ├── en_US/
│   │   │       │       │   └── element/
│   │   │       │       │       └── string.json
│   │   │       │       ├── rawfile/
│   │   │       │       │   └── .gitkeep
│   │   │       │       └── zh_CN/
│   │   │       │           └── element/
│   │   │       │               └── string.json
│   │   │       ├── ohosTest/
│   │   │       │   ├── ets/
│   │   │       │   │   └── test/
│   │   │       │   │       ├── Ability.test.ets
│   │   │       │   │       └── List.test.ets
│   │   │       │   └── module.json5
│   │   │       └── test/
│   │   │           ├── List.test.ets
│   │   │           └── LocalUnit.test.ets
│   │   ├── hvigor/
│   │   │   └── hvigor-config.json5
│   │   ├── hvigorfile.ts
│   │   ├── oh-package-lock.json5
│   │   └── oh-package.json5
│   └── SherpaOnnxVadAsr/
│       ├── .gitignore
│       ├── AppScope/
│       │   ├── app.json5
│       │   └── resources/
│       │       └── base/
│       │           └── element/
│       │               └── string.json
│       ├── README.md
│       ├── build-profile.json5
│       ├── code-linter.json5
│       ├── entry/
│       │   ├── .gitignore
│       │   ├── README.md
│       │   ├── build-profile.json5
│       │   ├── hvigorfile.ts
│       │   ├── obfuscation-rules.txt
│       │   ├── oh-package-lock.json5
│       │   ├── oh-package.json5
│       │   └── src/
│       │       ├── main/
│       │       │   ├── ets/
│       │       │   │   ├── entryability/
│       │       │   │   │   └── EntryAbility.ets
│       │       │   │   ├── entrybackupability/
│       │       │   │   │   └── EntryBackupAbility.ets
│       │       │   │   ├── pages/
│       │       │   │   │   ├── Index.ets
│       │       │   │   │   ├── NonStreamingAsrModels.ets
│       │       │   │   │   └── Permission.ets
│       │       │   │   └── workers/
│       │       │   │       └── NonStreamingAsrWithVadWorker.ets
│       │       │   ├── module.json5
│       │       │   └── resources/
│       │       │       ├── base/
│       │       │       │   ├── element/
│       │       │       │   │   ├── color.json
│       │       │       │   │   └── string.json
│       │       │       │   ├── media/
│       │       │       │   │   └── layered_image.json
│       │       │       │   └── profile/
│       │       │       │       ├── backup_config.json
│       │       │       │       └── main_pages.json
│       │       │       ├── en_US/
│       │       │       │   └── element/
│       │       │       │       └── string.json
│       │       │       ├── rawfile/
│       │       │       │   └── .gitkeep
│       │       │       └── zh_CN/
│       │       │           └── element/
│       │       │               └── string.json
│       │       ├── ohosTest/
│       │       │   ├── ets/
│       │       │   │   └── test/
│       │       │   │       ├── Ability.test.ets
│       │       │   │       └── List.test.ets
│       │       │   └── module.json5
│       │       └── test/
│       │           ├── List.test.ets
│       │           └── LocalUnit.test.ets
│       ├── hvigor/
│       │   └── hvigor-config.json5
│       ├── hvigorfile.ts
│       ├── oh-package-lock.json5
│       └── oh-package.json5
├── ios-swift/
│   ├── .gitignore
│   └── SherpaOnnx/
│       ├── SherpaOnnx/
│       │   ├── AppDelegate.swift
│       │   ├── Assets.xcassets/
│       │   │   ├── AccentColor.colorset/
│       │   │   │   └── Contents.json
│       │   │   ├── AppIcon.appiconset/
│       │   │   │   └── Contents.json
│       │   │   └── Contents.json
│       │   ├── Base.lproj/
│       │   │   ├── LaunchScreen.storyboard
│       │   │   └── Main.storyboard
│       │   ├── Info.plist
│       │   ├── Model.swift
│       │   ├── SceneDelegate.swift
│       │   └── ViewController.swift
│       ├── SherpaOnnx.xcodeproj/
│       │   ├── project.pbxproj
│       │   └── project.xcworkspace/
│       │       ├── contents.xcworkspacedata
│       │       └── xcshareddata/
│       │           └── IDEWorkspaceChecks.plist
│       ├── SherpaOnnxTests/
│       │   └── SherpaOnnxTests.swift
│       └── SherpaOnnxUITests/
│           ├── SherpaOnnxUITests.swift
│           └── SherpaOnnxUITestsLaunchTests.swift
├── ios-swiftui/
│   ├── .gitignore
│   ├── SherpaOnnx/
│   │   ├── SherpaOnnx/
│   │   │   ├── Assets.xcassets/
│   │   │   │   ├── AccentColor.colorset/
│   │   │   │   │   └── Contents.json
│   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   └── Contents.json
│   │   │   ├── ContentView.swift
│   │   │   ├── Extension.swift
│   │   │   ├── Info.plist
│   │   │   ├── Model.swift
│   │   │   ├── Preview Content/
│   │   │   │   └── Preview Assets.xcassets/
│   │   │   │       └── Contents.json
│   │   │   ├── SherpaOnnxApp.swift
│   │   │   └── SherpaOnnxViewModel.swift
│   │   ├── SherpaOnnx.xcodeproj/
│   │   │   ├── project.pbxproj
│   │   │   └── project.xcworkspace/
│   │   │       ├── contents.xcworkspacedata
│   │   │       └── xcshareddata/
│   │   │           └── IDEWorkspaceChecks.plist
│   │   ├── SherpaOnnxTests/
│   │   │   └── SherpaOnnxTests.swift
│   │   └── SherpaOnnxUITests/
│   │       ├── SherpaOnnxUITests.swift
│   │       └── SherpaOnnxUITestsLaunchTests.swift
│   ├── SherpaOnnx2Pass/
│   │   ├── SherpaOnnx2Pass/
│   │   │   ├── Assets.xcassets/
│   │   │   │   ├── AccentColor.colorset/
│   │   │   │   │   └── Contents.json
│   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   └── Contents.json
│   │   │   ├── ContentView.swift
│   │   │   ├── Extension.swift
│   │   │   ├── Info.plist
│   │   │   ├── Model.swift
│   │   │   ├── Preview Content/
│   │   │   │   └── Preview Assets.xcassets/
│   │   │   │       └── Contents.json
│   │   │   ├── SherpaOnnx2PassApp.swift
│   │   │   └── SherpaOnnxViewModel.swift
│   │   └── SherpaOnnx2Pass.xcodeproj/
│   │       ├── project.pbxproj
│   │       └── project.xcworkspace/
│   │           ├── contents.xcworkspacedata
│   │           └── xcshareddata/
│   │               └── IDEWorkspaceChecks.plist
│   ├── SherpaOnnxLangID/
│   │   ├── SherpaOnnxLangID/
│   │   │   ├── Assets.xcassets/
│   │   │   │   ├── AccentColor.colorset/
│   │   │   │   │   └── Contents.json
│   │   │   │   ├── AppIcon 1.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   └── Contents.json
│   │   │   ├── ContentView.swift
│   │   │   ├── Info.plist
│   │   │   ├── Preview Content/
│   │   │   │   └── Preview Assets.xcassets/
│   │   │   │       └── Contents.json
│   │   │   ├── SherpaOnnxLangIDApp.swift
│   │   │   └── ViewModel.swift
│   │   ├── SherpaOnnxLangID.xcodeproj/
│   │   │   ├── project.pbxproj
│   │   │   └── project.xcworkspace/
│   │   │       ├── contents.xcworkspacedata
│   │   │       └── xcshareddata/
│   │   │           └── IDEWorkspaceChecks.plist
│   │   ├── SherpaOnnxLangIDTests/
│   │   │   └── SherpaOnnxLangIDTests.swift
│   │   └── SherpaOnnxLangIDUITests/
│   │       ├── SherpaOnnxLangIDUITests.swift
│   │       └── SherpaOnnxLangIDUITestsLaunchTests.swift
│   ├── SherpaOnnxSubtitle/
│   │   ├── .gitignore
│   │   ├── SherpaOnnxSubtitle/
│   │   │   ├── Assets.xcassets/
│   │   │   │   ├── AccentColor.colorset/
│   │   │   │   │   └── Contents.json
│   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   └── Contents.json
│   │   │   ├── ContentView.swift
│   │   │   ├── Extensions/
│   │   │   │   └── UTType.swift
│   │   │   ├── Info.plist
│   │   │   ├── Models/
│   │   │   │   ├── Audio.swift
│   │   │   │   ├── Document.swift
│   │   │   │   ├── Errors.swift
│   │   │   │   └── SpeechSegment.swift
│   │   │   ├── Preview Content/
│   │   │   │   └── Preview Assets.xcassets/
│   │   │   │       └── Contents.json
│   │   │   ├── SherpaOnnxSubtitleApp.swift
│   │   │   └── SubtitleViewModel.swift
│   │   └── SherpaOnnxSubtitle.xcodeproj/
│   │       ├── project.pbxproj
│   │       └── project.xcworkspace/
│   │           ├── contents.xcworkspacedata
│   │           └── xcshareddata/
│   │               └── IDEWorkspaceChecks.plist
│   └── SherpaOnnxTts/
│       ├── SherpaOnnxTts/
│       │   ├── Assets.xcassets/
│       │   │   ├── AccentColor.colorset/
│       │   │   │   └── Contents.json
│       │   │   ├── AppIcon.appiconset/
│       │   │   │   └── Contents.json
│       │   │   └── Contents.json
│       │   ├── ContentView.swift
│       │   ├── Info.plist
│       │   ├── Preview Content/
│       │   │   └── Preview Assets.xcassets/
│       │   │       └── Contents.json
│       │   ├── SherpaOnnxTtsApp.swift
│       │   └── ViewModel.swift
│       └── SherpaOnnxTts.xcodeproj/
│           ├── project.pbxproj
│           └── project.xcworkspace/
│               ├── contents.xcworkspacedata
│               └── xcshareddata/
│                   └── IDEWorkspaceChecks.plist
├── java-api-examples/
│   ├── .gitignore
│   ├── AudioTaggingCEDFromFile.java
│   ├── AudioTaggingZipformerFromFile.java
│   ├── InverseTextNormalizationNonStreamingParaformer.java
│   ├── InverseTextNormalizationStreamingTransducer.java
│   ├── KeywordSpotterFromFile.java
│   ├── NonStreamingDecodeFileDolphinCtc.java
│   ├── NonStreamingDecodeFileFireRedAsr.java
│   ├── NonStreamingDecodeFileFireRedAsrCtc.java
│   ├── NonStreamingDecodeFileFunAsrNano.java
│   ├── NonStreamingDecodeFileMedAsrCtc.java
│   ├── NonStreamingDecodeFileMoonshine.java
│   ├── NonStreamingDecodeFileMoonshineV2.java
│   ├── NonStreamingDecodeFileNemo.java
│   ├── NonStreamingDecodeFileNemoCanary.java
│   ├── NonStreamingDecodeFileOmnilingualAsrCtc.java
│   ├── NonStreamingDecodeFileParaformer.java
│   ├── NonStreamingDecodeFileSenseVoice.java
│   ├── NonStreamingDecodeFileSenseVoiceWithHr.java
│   ├── NonStreamingDecodeFileTeleSpeechCtc.java
│   ├── NonStreamingDecodeFileTransducer.java
│   ├── NonStreamingDecodeFileTransducerHotwords.java
│   ├── NonStreamingDecodeFileWenetCtc.java
│   ├── NonStreamingDecodeFileWhisper.java
│   ├── NonStreamingDecodeFileWhisperMultiple.java
│   ├── NonStreamingDecodeFileZipformerCtc.java
│   ├── NonStreamingSpeechEnhancementDpdfNet.java
│   ├── NonStreamingSpeechEnhancementGtcrn.java
│   ├── NonStreamingTtsCoquiDe.java
│   ├── NonStreamingTtsKittenEn.java
│   ├── NonStreamingTtsKokoroEn.java
│   ├── NonStreamingTtsKokoroZhEn.java
│   ├── NonStreamingTtsMatchaEn.java
│   ├── NonStreamingTtsMatchaZh.java
│   ├── NonStreamingTtsPiperEn.java
│   ├── NonStreamingTtsPiperEnWithCallback.java
│   ├── NonStreamingTtsVitsZh.java
│   ├── NonStreamingWebsocketClient.java
│   ├── OfflineAddPunctuation.java
│   ├── OfflineSpeakerDiarizationDemo.java
│   ├── OnlineAddPunctuation.java
│   ├── PocketTts.java
│   ├── README.md
│   ├── SpeakerIdentification.java
│   ├── SpokenLanguageIdentificationWhisper.java
│   ├── StreamingAsrFromMicTransducer.java
│   ├── StreamingDecodeFileCtc.java
│   ├── StreamingDecodeFileCtcHLG.java
│   ├── StreamingDecodeFileParaformer.java
│   ├── StreamingDecodeFileToneCtc.java
│   ├── StreamingDecodeFileTransducer.java
│   ├── StreamingSpeechEnhancementDpdfNet.java
│   ├── StreamingSpeechEnhancementGtcrn.java
│   ├── SupertonicTts.java
│   ├── TenVadRemoveSilence.java
│   ├── VadFromMic.java
│   ├── VadFromMicWithNonStreamingMoonshine.java
│   ├── VadFromMicWithNonStreamingParaformer.java
│   ├── VadFromMicWithNonStreamingSenseVoice.java
│   ├── VadFromMicWithNonStreamingWhisper.java
│   ├── VadNonStreamingDolphinCtc.java
│   ├── VadNonStreamingParaformer.java
│   ├── VadNonStreamingSenseVoice.java
│   ├── VadRemoveSilence.java
│   ├── VersionTest.java
│   ├── ZipVoiceTts.java
│   ├── run-audio-tagging-ced-from-file.sh
│   ├── run-audio-tagging-zipformer-from-file.sh
│   ├── run-inverse-text-normalization-paraformer.sh
│   ├── run-inverse-text-normalization-transducer.sh
│   ├── run-kws-from-file.sh
│   ├── run-non-streaming-decode-file-dolphin-ctc.sh
│   ├── run-non-streaming-decode-file-fire-red-asr-ctc.sh
│   ├── run-non-streaming-decode-file-fire-red-asr.sh
│   ├── run-non-streaming-decode-file-funasr-nano.sh
│   ├── run-non-streaming-decode-file-medasr-ctc.sh
│   ├── run-non-streaming-decode-file-moonshine-v2.sh
│   ├── run-non-streaming-decode-file-moonshine.sh
│   ├── run-non-streaming-decode-file-nemo-canary.sh
│   ├── run-non-streaming-decode-file-nemo.sh
│   ├── run-non-streaming-decode-file-omnilingual-asr-ctc.sh
│   ├── run-non-streaming-decode-file-paraformer.sh
│   ├── run-non-streaming-decode-file-sense-voice-with-hr.sh
│   ├── run-non-streaming-decode-file-sense-voice.sh
│   ├── run-non-streaming-decode-file-tele-speech-ctc.sh
│   ├── run-non-streaming-decode-file-transducer-hotwords.sh
│   ├── run-non-streaming-decode-file-transducer.sh
│   ├── run-non-streaming-decode-file-wenet-ctc.sh
│   ├── run-non-streaming-decode-file-whisper-multiple.sh
│   ├── run-non-streaming-decode-file-whisper.sh
│   ├── run-non-streaming-decode-file-zipformer-ctc.sh
│   ├── run-non-streaming-speech-enhancement-dpdfnet.sh
│   ├── run-non-streaming-speech-enhancement-gtcrn.sh
│   ├── run-non-streaming-tts-coqui-de.sh
│   ├── run-non-streaming-tts-kitten-en.sh
│   ├── run-non-streaming-tts-kokoro-en.sh
│   ├── run-non-streaming-tts-kokoro-zh-en.sh
│   ├── run-non-streaming-tts-matcha-en.sh
│   ├── run-non-streaming-tts-matcha-zh.sh
│   ├── run-non-streaming-tts-piper-en-with-callback.sh
│   ├── run-non-streaming-tts-piper-en.sh
│   ├── run-non-streaming-tts-vits-zh.sh
│   ├── run-non-streaming-websocket-client.sh
│   ├── run-offline-add-punctuation-zh-en.sh
│   ├── run-offline-speaker-diarization.sh
│   ├── run-online-add-punctuation-zh-en.sh
│   ├── run-pocket-tts.sh
│   ├── run-speaker-identification.sh
│   ├── run-spoken-language-identification-whisper.sh
│   ├── run-streaming-asr-from-mic-transducer.sh
│   ├── run-streaming-decode-file-ctc-hlg.sh
│   ├── run-streaming-decode-file-ctc.sh
│   ├── run-streaming-decode-file-paraformer.sh
│   ├── run-streaming-decode-file-tone-ctc.sh
│   ├── run-streaming-decode-file-transducer.sh
│   ├── run-streaming-speech-enhancement-dpdfnet.sh
│   ├── run-streaming-speech-enhancement-gtcrn.sh
│   ├── run-supertonic-tts.sh
│   ├── run-ten-vad-remove-silence.sh
│   ├── run-vad-from-mic-non-streaming-moonshine.sh
│   ├── run-vad-from-mic-non-streaming-paraformer.sh
│   ├── run-vad-from-mic-non-streaming-sense-voice.sh
│   ├── run-vad-from-mic-non-streaming-whisper.sh
│   ├── run-vad-from-mic.sh
│   ├── run-vad-non-streaming-dolphin-ctc.sh
│   ├── run-vad-non-streaming-paraformer.sh
│   ├── run-vad-non-streaming-sense-voice.sh
│   ├── run-vad-remove-silence.sh
│   ├── run-version-test.sh
│   ├── run-zipvoice-tts.sh
│   └── src/
│       └── websocketsrv/
│           ├── AsrWebsocketClient.java
│           ├── AsrWebsocketServer.java
│           ├── ConnectionData.java
│           ├── DecoderThreadHandler.java
│           └── StreamThreadHandler.java
├── jitpack.yml
├── kotlin-api-examples/
│   ├── .gitignore
│   ├── faked-asset-manager.kt
│   ├── faked-log.kt
│   ├── test_audio_tagging.kt
│   ├── test_itn_offline_asr.kt
│   ├── test_itn_online_asr.kt
│   ├── test_language_id.kt
│   ├── test_offline_asr.kt
│   ├── test_offline_fire_red_asr_ctc.kt
│   ├── test_offline_funasr_nano.kt
│   ├── test_offline_medasr_ctc.kt
│   ├── test_offline_moonshine_asr_v2.kt
│   ├── test_offline_nemo_canary.kt
│   ├── test_offline_omnilingual_asr_ctc.kt
│   ├── test_offline_punctuation.kt
│   ├── test_offline_sense_voice_with_hr.kt
│   ├── test_offline_speaker_diarization.kt
│   ├── test_offline_speech_denoiser.kt
│   ├── test_offline_speech_denoiser_dpdfnet.kt
│   ├── test_offline_wenet_ctc.kt
│   ├── test_online_asr.kt
│   ├── test_online_punctuation.kt
│   ├── test_online_speech_denoiser.kt
│   ├── test_pocket_tts.kt
│   ├── test_speaker_id.kt
│   ├── test_supertonic_tts.kt
│   ├── test_tts.kt
│   ├── test_version.kt
│   └── test_zipvoice_tts.kt
├── lazarus-examples/
│   ├── .gitignore
│   └── README.md
├── mfc-examples/
│   ├── .gitignore
│   ├── NonStreamingSpeechRecognition/
│   │   ├── NonStreamingSpeechRecognition.cpp
│   │   ├── NonStreamingSpeechRecognition.h
│   │   ├── NonStreamingSpeechRecognition.rc
│   │   ├── NonStreamingSpeechRecognition.vcxproj
│   │   ├── NonStreamingSpeechRecognition.vcxproj.filters
│   │   ├── NonStreamingSpeechRecognitionDlg.cpp
│   │   ├── NonStreamingSpeechRecognitionDlg.h
│   │   ├── Resource.h
│   │   ├── framework.h
│   │   ├── pch.cpp
│   │   ├── pch.h
│   │   ├── res/
│   │   │   └── NonStreamingSpeechRecognition.rc2
│   │   ├── sherpa-onnx-deps.props
│   │   └── targetver.h
│   ├── NonStreamingTextToSpeech/
│   │   ├── NonStreamingTextToSpeech.cpp
│   │   ├── NonStreamingTextToSpeech.h
│   │   ├── NonStreamingTextToSpeech.rc
│   │   ├── NonStreamingTextToSpeech.vcxproj
│   │   ├── NonStreamingTextToSpeech.vcxproj.filters
│   │   ├── NonStreamingTextToSpeechDlg.cpp
│   │   ├── NonStreamingTextToSpeechDlg.h
│   │   ├── Resource.h
│   │   ├── framework.h
│   │   ├── pch.cpp
│   │   ├── pch.h
│   │   ├── res/
│   │   │   └── NonStreamingTextToSpeech.rc2
│   │   ├── sherpa-onnx-deps.props
│   │   └── targetver.h
│   ├── README.md
│   ├── StreamingSpeechRecognition/
│   │   ├── Resource.h
│   │   ├── StreamingSpeechRecognition.cpp
│   │   ├── StreamingSpeechRecognition.h
│   │   ├── StreamingSpeechRecognition.rc
│   │   ├── StreamingSpeechRecognition.vcxproj
│   │   ├── StreamingSpeechRecognition.vcxproj.filters
│   │   ├── StreamingSpeechRecognitionDlg.cpp
│   │   ├── StreamingSpeechRecognitionDlg.h
│   │   ├── framework.h
│   │   ├── pch.cpp
│   │   ├── pch.h
│   │   ├── res/
│   │   │   └── StreamingSpeechRecognition.rc2
│   │   ├── sherpa-onnx-deps.props
│   │   └── targetver.h
│   └── mfc-examples.sln
├── new-release.sh
├── nodejs-addon-examples/
│   ├── .gitignore
│   ├── README.md
│   ├── package.json
│   ├── test_asr_non_streaming_dolphin_ctc.js
│   ├── test_asr_non_streaming_fire_red_asr.js
│   ├── test_asr_non_streaming_fire_red_asr_ctc.js
│   ├── test_asr_non_streaming_fire_red_asr_ctc_async.js
│   ├── test_asr_non_streaming_funasr_nano.js
│   ├── test_asr_non_streaming_funasr_nano_async.js
│   ├── test_asr_non_streaming_medasr_ctc.js
│   ├── test_asr_non_streaming_moonshine.js
│   ├── test_asr_non_streaming_moonshine_v2.js
│   ├── test_asr_non_streaming_nemo_canary.js
│   ├── test_asr_non_streaming_nemo_ctc.js
│   ├── test_asr_non_streaming_nemo_parakeet_tdt_v2.js
│   ├── test_asr_non_streaming_omnilingual_asr_ctc.js
│   ├── test_asr_non_streaming_paraformer.js
│   ├── test_asr_non_streaming_paraformer_itn.js
│   ├── test_asr_non_streaming_sense_voice.js
│   ├── test_asr_non_streaming_sense_voice_with_hr.js
│   ├── test_asr_non_streaming_transducer.js
│   ├── test_asr_non_streaming_wenet_ctc.js
│   ├── test_asr_non_streaming_whisper.js
│   ├── test_asr_non_streaming_zipformer_ctc.js
│   ├── test_asr_streaming_ctc.js
│   ├── test_asr_streaming_ctc_hlg.js
│   ├── test_asr_streaming_ctc_hlg_microphone.js
│   ├── test_asr_streaming_ctc_microphone.js
│   ├── test_asr_streaming_paraformer.js
│   ├── test_asr_streaming_paraformer_microphone.js
│   ├── test_asr_streaming_t_one_ctc.js
│   ├── test_asr_streaming_transducer.js
│   ├── test_asr_streaming_transducer_itn.js
│   ├── test_asr_streaming_transducer_microphone.js
│   ├── test_asr_streaming_transducer_microphone_itn.js
│   ├── test_asr_streaming_transducer_with_hr.js
│   ├── test_audio_tagging_ced.js
│   ├── test_audio_tagging_zipformer.js
│   ├── test_keyword_spotter_transducer.js
│   ├── test_keyword_spotter_transducer_microphone.js
│   ├── test_offline_punctuation.js
│   ├── test_offline_speaker_diarization.js
│   ├── test_offline_speech_enhancement_dpdfnet.js
│   ├── test_offline_speech_enhancement_gtcrn.js
│   ├── test_online_punctuation.js
│   ├── test_online_speech_enhancement_dpdfnet.js
│   ├── test_online_speech_enhancement_gtcrn.js
│   ├── test_speaker_identification.js
│   ├── test_spoken_language_identification.js
│   ├── test_tts_non_streaming_kitten_en.js
│   ├── test_tts_non_streaming_kokoro_en.js
│   ├── test_tts_non_streaming_kokoro_zh_en.js
│   ├── test_tts_non_streaming_matcha_icefall_en.js
│   ├── test_tts_non_streaming_matcha_icefall_zh.js
│   ├── test_tts_non_streaming_pocket_en.js
│   ├── test_tts_non_streaming_pocket_en_async.js
│   ├── test_tts_non_streaming_pocket_en_play_async.js
│   ├── test_tts_non_streaming_supertonic_en.js
│   ├── test_tts_non_streaming_supertonic_en_async.js
│   ├── test_tts_non_streaming_supertonic_en_play_async.js
│   ├── test_tts_non_streaming_vits_coqui_de.js
│   ├── test_tts_non_streaming_vits_piper_en.js
│   ├── test_tts_non_streaming_vits_zh_aishell3.js
│   ├── test_tts_non_streaming_vits_zh_ll.js
│   ├── test_tts_non_streaming_zipvoice_zh_en.js
│   ├── test_tts_non_streaming_zipvoice_zh_en_async.js
│   ├── test_tts_non_streaming_zipvoice_zh_en_play_async.js
│   ├── test_vad_asr_non_streaming_moonshine_microphone.js
│   ├── test_vad_asr_non_streaming_nemo_ctc_microphone.js
│   ├── test_vad_asr_non_streaming_paraformer_microphone.js
│   ├── test_vad_asr_non_streaming_sense_voice_microphone.js
│   ├── test_vad_asr_non_streaming_transducer_microphone.js
│   ├── test_vad_asr_non_streaming_whisper_microphone.js
│   ├── test_vad_asr_non_streaming_zipformer_ctc_microphone.js
│   ├── test_vad_microphone.js
│   ├── test_vad_spoken_language_identification_microphone.js
│   ├── test_vad_with_non_streaming_asr_moonshine.js
│   └── test_vad_with_non_streaming_asr_whisper.js
├── nodejs-examples/
│   ├── .gitignore
│   ├── README.md
│   ├── package.json
│   ├── test-keyword-spotter-transducer.js
│   ├── test-offline-dolphin-ctc.js
│   ├── test-offline-fire-red-asr-ctc.js
│   ├── test-offline-fire-red-asr.js
│   ├── test-offline-funasr-nano.js
│   ├── test-offline-medasr-ctc.js
│   ├── test-offline-moonshine-v2.js
│   ├── test-offline-moonshine.js
│   ├── test-offline-nemo-canary.js
│   ├── test-offline-nemo-ctc.js
│   ├── test-offline-omnilingual-asr-ctc.js
│   ├── test-offline-paraformer-itn.js
│   ├── test-offline-paraformer.js
│   ├── test-offline-sense-voice-with-hr.js
│   ├── test-offline-sense-voice.js
│   ├── test-offline-speaker-diarization.js
│   ├── test-offline-speech-enhancement-dpdfnet.js
│   ├── test-offline-speech-enhancement-gtcrn.js
│   ├── test-offline-transducer.js
│   ├── test-offline-tts-kitten-en.js
│   ├── test-offline-tts-kokoro-en.js
│   ├── test-offline-tts-kokoro-zh-en.js
│   ├── test-offline-tts-matcha-en.js
│   ├── test-offline-tts-matcha-zh.js
│   ├── test-offline-tts-pocket-en.js
│   ├── test-offline-tts-vits-en.js
│   ├── test-offline-tts-vits-zh.js
│   ├── test-offline-tts-zipvoice-zh-en.js
│   ├── test-offline-wenet-ctc.js
│   ├── test-offline-whisper.js
│   ├── test-offline-zipformer-ctc.js
│   ├── test-online-paraformer-microphone-mic.js
│   ├── test-online-paraformer-microphone.js
│   ├── test-online-paraformer.js
│   ├── test-online-speech-enhancement-dpdfnet.js
│   ├── test-online-speech-enhancement-gtcrn.js
│   ├── test-online-t-one-ctc.js
│   ├── test-online-transducer-itn.js
│   ├── test-online-transducer-microphone.js
│   ├── test-online-transducer.js
│   ├── test-online-zipformer2-ctc-hlg.js
│   ├── test-online-zipformer2-ctc.js
│   ├── test-vad-with-non-streaming-asr-moonshine.js
│   └── test-vad-with-non-streaming-asr-whisper.js
├── pascal-api-examples/
│   ├── .gitignore
│   ├── README.md
│   ├── non-streaming-asr/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── dolphin_ctc.pas
│   │   ├── fire_red_asr.pas
│   │   ├── fire_red_asr_ctc.pas
│   │   ├── funasr_nano.pas
│   │   ├── medasr_ctc.pas
│   │   ├── moonshine.pas
│   │   ├── moonshine_v2.pas
│   │   ├── nemo_canary.pas
│   │   ├── nemo_ctc.pas
│   │   ├── nemo_transducer.pas
│   │   ├── omnilingual_asr_ctc.pas
│   │   ├── paraformer.pas
│   │   ├── paraformer_itn.pas
│   │   ├── run-dolphin-ctc.sh
│   │   ├── run-fire-red-asr-ctc.sh
│   │   ├── run-fire-red-asr.sh
│   │   ├── run-funasr-nano.sh
│   │   ├── run-medasr-ctc.sh
│   │   ├── run-moonshine-v2.sh
│   │   ├── run-moonshine.sh
│   │   ├── run-nemo-canary.sh
│   │   ├── run-nemo-ctc.sh
│   │   ├── run-nemo-transducer.sh
│   │   ├── run-omnilingual-asr-ctc.sh
│   │   ├── run-paraformer-itn.sh
│   │   ├── run-paraformer.sh
│   │   ├── run-sense-voice.sh
│   │   ├── run-telespeech-ctc.sh
│   │   ├── run-wenet-ctc.sh
│   │   ├── run-whisper.sh
│   │   ├── run-zipformer-ctc.sh
│   │   ├── run-zipformer-transducer.sh
│   │   ├── sense_voice.pas
│   │   ├── telespeech_ctc.pas
│   │   ├── wenet_ctc.pas
│   │   ├── whisper.pas
│   │   ├── zipformer_ctc.pas
│   │   └── zipformer_transducer.pas
│   ├── portaudio-test/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── test-play.pas
│   │   └── test-record.pas
│   ├── read-wav/
│   │   ├── .gitignore
│   │   └── main.pas
│   ├── speaker-diarization/
│   │   └── main.pas
│   ├── speech-enhancement-dpdfnet/
│   │   ├── .gitignore
│   │   └── dpdfnet.pas
│   ├── speech-enhancement-gtcrn/
│   │   ├── .gitignore
│   │   └── gtcrn.pas
│   ├── streaming-asr/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── nemo_transducer.pas
│   │   ├── paraformer.pas
│   │   ├── run-nemo-transducer.sh
│   │   ├── run-paraformer.sh
│   │   ├── run-t-one-ctc.sh
│   │   ├── run-zipformer-ctc-hlg.sh
│   │   ├── run-zipformer-ctc.sh
│   │   ├── run-zipformer-transducer.sh
│   │   ├── t_one_ctc.pas
│   │   ├── zipformer_ctc.pas
│   │   ├── zipformer_ctc_hlg.pas
│   │   └── zipformer_transducer.pas
│   ├── streaming-speech-enhancement-dpdfnet/
│   │   ├── .gitignore
│   │   └── dpdfnet.pas
│   ├── streaming-speech-enhancement-gtcrn/
│   │   ├── .gitignore
│   │   └── gtcrn.pas
│   ├── tts/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── kitten-en-playback.pas
│   │   ├── kitten-en.pas
│   │   ├── kokoro-en-playback.pas
│   │   ├── kokoro-en.pas
│   │   ├── kokoro-zh-en-playback.pas
│   │   ├── kokoro-zh-en.pas
│   │   ├── matcha-en-playback.pas
│   │   ├── matcha-en.pas
│   │   ├── matcha-zh-playback.pas
│   │   ├── matcha-zh.pas
│   │   ├── piper-playback.pas
│   │   ├── piper.pas
│   │   ├── pocket-en.pas
│   │   ├── run-kitten-en-playback.sh
│   │   ├── run-kitten-en.sh
│   │   ├── run-kokoro-en-playback.sh
│   │   ├── run-kokoro-en.sh
│   │   ├── run-kokoro-zh-en-playback.sh
│   │   ├── run-kokoro-zh-en.sh
│   │   ├── run-matcha-en-playback.sh
│   │   ├── run-matcha-en.sh
│   │   ├── run-matcha-zh-playback.sh
│   │   ├── run-matcha-zh.sh
│   │   ├── run-piper-playback.sh
│   │   ├── run-piper.sh
│   │   ├── run-pocket-en.sh
│   │   ├── run-supertonic-en.sh
│   │   ├── run-zipvoice-zh-en.sh
│   │   ├── supertonic-en.pas
│   │   └── zipvoice-zh-en.pas
│   ├── vad/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── circular_buffer.pas
│   │   ├── remove_silence.pas
│   │   ├── remove_silence_ten_vad.pas
│   │   ├── run-circular-buffer.sh
│   │   ├── run-remove-silence-ten-vad.sh
│   │   └── run-remove-silence.sh
│   └── vad-with-non-streaming-asr/
│       ├── .gitignore
│       ├── README.md
│       ├── run-vad-with-dolphin-ctc.sh
│       ├── run-vad-with-moonshine.sh
│       ├── run-vad-with-sense-voice.sh
│       ├── run-vad-with-whisper.sh
│       ├── run-vad-with-zipformer-ctc.sh
│       ├── vad_with_dolphin.pas
│       ├── vad_with_moonshine.pas
│       ├── vad_with_sense_voice.pas
│       ├── vad_with_whisper.pas
│       └── vad_with_zipformer_ctc.pas
├── pom.xml
├── python-api-examples/
│   ├── README.md
│   ├── add-punctuation-online.py
│   ├── add-punctuation.py
│   ├── audio-tagging-from-a-file-ced.py
│   ├── audio-tagging-from-a-file.py
│   ├── generate-subtitles.py
│   ├── http_server.py
│   ├── inverse-text-normalization-offline-asr.py
│   ├── inverse-text-normalization-online-asr.py
│   ├── keyword-spotter-from-microphone.py
│   ├── keyword-spotter.py
│   ├── non_streaming_server.py
│   ├── offline-decode-files.py
│   ├── offline-dolphin-ctc-decode-files.py
│   ├── offline-fire-red-asr-ctc-decode-files.py
│   ├── offline-fire-red-asr-decode-files.py
│   ├── offline-funasr-nano-decode-files.py
│   ├── offline-medasr-ctc-decode-files.py
│   ├── offline-moonshine-decode-files-v2.py
│   ├── offline-moonshine-decode-files.py
│   ├── offline-nemo-canary-decode-files.py
│   ├── offline-nemo-ctc-decode-files.py
│   ├── offline-nemo-parakeet-decode-file.py
│   ├── offline-nemo-transducer-decode-files.py
│   ├── offline-omnilingual-asr-ctc-decode-files.py
│   ├── offline-omnilingual-asr-ctc-v2-decode-files.py
│   ├── offline-sense-voice-ctc-decode-files-with-hr.py
│   ├── offline-sense-voice-ctc-decode-files.py
│   ├── offline-source-separation-spleeter.py
│   ├── offline-source-separation-uvr.py
│   ├── offline-speaker-diarization.py
│   ├── offline-speech-enhancement-dpdfnet.py
│   ├── offline-speech-enhancement-gtcrn.py
│   ├── offline-telespeech-ctc-decode-files.py
│   ├── offline-tts-play.py
│   ├── offline-tts.py
│   ├── offline-websocket-client-decode-files-paralell.py
│   ├── offline-websocket-client-decode-files-sequential.py
│   ├── offline-whisper-decode-files.py
│   ├── offline-zipformer-ctc-decode-files.py
│   ├── online-decode-files.py
│   ├── online-nemo-ctc-decode-files.py
│   ├── online-speech-enhancement-dpdfnet.py
│   ├── online-speech-enhancement-gtcrn.py
│   ├── online-t-one-ctc-decode-files.py
│   ├── online-websocket-client-decode-file.py
│   ├── online-websocket-client-microphone.py
│   ├── online-zipformer-ctc-hlg-decode-file.py
│   ├── pocket-tts-play.py
│   ├── pocket-tts.py
│   ├── simulate-streaming-paraformer-microphone.py
│   ├── simulate-streaming-sense-voice-microphone.py
│   ├── speaker-identification-with-vad-dynamic.py
│   ├── speaker-identification-with-vad-non-streaming-asr-alsa.py
│   ├── speaker-identification-with-vad-non-streaming-asr.py
│   ├── speaker-identification-with-vad.py
│   ├── speaker-identification.py
│   ├── speech-recognition-from-microphone-with-endpoint-detection-alsa.py
│   ├── speech-recognition-from-microphone-with-endpoint-detection.py
│   ├── speech-recognition-from-microphone.py
│   ├── speech-recognition-from-url.py
│   ├── spoken-language-identification.py
│   ├── streaming-paraformer-asr-microphone.py
│   ├── streaming_server.py
│   ├── supertonic-tts.py
│   ├── test-sentence-piece-tokenizer.py
│   ├── test-whisper-timestamps.py
│   ├── two-pass-speech-recognition-from-microphone.py
│   ├── two-pass-wss.py
│   ├── vad-alsa.py
│   ├── vad-microphone.py
│   ├── vad-remove-non-speech-segments-alsa.py
│   ├── vad-remove-non-speech-segments-from-file.py
│   ├── vad-remove-non-speech-segments.py
│   ├── vad-with-non-streaming-asr.py
│   ├── web/
│   │   ├── .gitignore
│   │   ├── generate-certificate.py
│   │   ├── index.html
│   │   ├── js/
│   │   │   ├── offline_record.js
│   │   │   ├── streaming_record.js
│   │   │   └── upload.js
│   │   ├── nav-partial.html
│   │   ├── offline_record.html
│   │   ├── start-https-server.py
│   │   ├── streaming_record.html
│   │   └── upload.html
│   ├── zipvoice-tts-play.py
│   └── zipvoice-tts.py
├── release.sh
├── rust-api-examples/
│   ├── .gitignore
│   ├── Cargo.toml
│   ├── README.md
│   ├── examples/
│   │   ├── audio_tagging_ced.rs
│   │   ├── audio_tagging_zipformer.rs
│   │   ├── fire_red_asr_ctc.rs
│   │   ├── keyword_spotter.rs
│   │   ├── kitten_tts_en.rs
│   │   ├── kokoro_tts_en.rs
│   │   ├── kokoro_tts_zh_en.rs
│   │   ├── matcha_tts_en.rs
│   │   ├── matcha_tts_zh.rs
│   │   ├── moonshine_v2.rs
│   │   ├── nemo_parakeet.rs
│   │   ├── offline_punctuation.rs
│   │   ├── offline_speaker_diarization.rs
│   │   ├── offline_speech_enhancement_dpdfnet.rs
│   │   ├── offline_speech_enhancement_gtcrn.rs
│   │   ├── online_punctuation.rs
│   │   ├── pocket_tts.rs
│   │   ├── sense_voice.rs
│   │   ├── silero_vad_remove_silence.rs
│   │   ├── speaker_embedding_cosine_similarity.rs
│   │   ├── speaker_embedding_extractor.rs
│   │   ├── speaker_embedding_manager.rs
│   │   ├── spoken_language_identification.rs
│   │   ├── streaming_speech_enhancement_dpdfnet.rs
│   │   ├── streaming_speech_enhancement_gtcrn.rs
│   │   ├── streaming_zipformer.rs
│   │   ├── streaming_zipformer_microphone.rs
│   │   ├── supertonic_tts.rs
│   │   ├── version.rs
│   │   ├── vits_tts.rs
│   │   └── zipvoice_tts.rs
│   ├── run-audio-tagging-ced.sh
│   ├── run-audio-tagging-zipformer.sh
│   ├── run-fire-red-asr-ctc.sh
│   ├── run-keyword-spotter.sh
│   ├── run-kitten-tts-en.sh
│   ├── run-kokoro-tts-en.sh
│   ├── run-kokoro-tts-zh-en.sh
│   ├── run-matcha-tts-en.sh
│   ├── run-matcha-tts-zh.sh
│   ├── run-moonshine-v2.sh
│   ├── run-nemo-parakeet-en.sh
│   ├── run-offline-punctuation.sh
│   ├── run-offline-speaker-diarization.sh
│   ├── run-offline-speech-enhancement-dpdfnet.sh
│   ├── run-offline-speech-enhancement-gtcrn.sh
│   ├── run-online-punctuation.sh
│   ├── run-pocket-tts.sh
│   ├── run-sense-voice.sh
│   ├── run-silero-vad-remove-silence.sh
│   ├── run-speaker-embedding-cosine-similarity.sh
│   ├── run-speaker-embedding-extractor.sh
│   ├── run-speaker-embedding-manager.sh
│   ├── run-spoken-language-identification.sh
│   ├── run-streaming-speech-enhancement-dpdfnet.sh
│   ├── run-streaming-speech-enhancement-gtcrn.sh
│   ├── run-streaming-zipformer-en.sh
│   ├── run-streaming-zipformer-microphone-zh-en.sh
│   ├── run-streaming-zipformer-zh-en.sh
│   ├── run-supertonic-tts.sh
│   ├── run-version.sh
│   ├── run-vits-de.sh
│   ├── run-vits-en.sh
│   ├── run-zipformer-en.sh
│   ├── run-zipformer-vi.sh
│   ├── run-zipformer-zh-en.sh
│   └── run-zipvoice-tts.sh
├── scripts/
│   ├── 3dspeaker/
│   │   ├── README.md
│   │   ├── export-onnx.py
│   │   └── test-onnx.py
│   ├── apk/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── build-apk-asr-2pass.sh.in
│   │   ├── build-apk-asr.sh.in
│   │   ├── build-apk-audio-tagging-wearos.sh.in
│   │   ├── build-apk-audio-tagging.sh.in
│   │   ├── build-apk-qnn-vad-asr-simulate-streaming.sh.in
│   │   ├── build-apk-slid.sh.in
│   │   ├── build-apk-speaker-diarization.sh.in
│   │   ├── build-apk-speaker-identification.sh.in
│   │   ├── build-apk-tts-engine.sh.in
│   │   ├── build-apk-tts.sh.in
│   │   ├── build-apk-vad-asr-simulate-streaming.sh.in
│   │   ├── build-apk-vad-asr.sh.in
│   │   ├── generate-asr-2pass-apk-script.py
│   │   ├── generate-asr-apk-script.py
│   │   ├── generate-audio-tagging-apk-script.py
│   │   ├── generate-qnn-vad-asr-apk-script.py
│   │   ├── generate-slid-apk-script.py
│   │   ├── generate-speaker-diarization-apk-script.py
│   │   ├── generate-speaker-identification-apk-script.py
│   │   ├── generate-tts-apk-script.py
│   │   └── generate-vad-asr-apk-script.py
│   ├── bbpe/
│   │   ├── .gitignore
│   │   └── generate_bbpe_table.py
│   ├── benchmark/
│   │   ├── README.md
│   │   ├── download_librispeech_test_data.py
│   │   └── run_timestamp_benchmark.py
│   ├── check_style_cpplint.sh
│   ├── dart/
│   │   ├── add-punctuations-pubspec.yaml
│   │   ├── audio-tagging-pubspec.yaml
│   │   ├── kws-pubspec.yaml
│   │   ├── non-streaming-asr-pubspec.yaml
│   │   ├── release.sh
│   │   ├── sherpa-onnx-pubspec.yaml
│   │   ├── slid-pubspec.yaml
│   │   ├── speaker-diarization-pubspec.yaml
│   │   ├── speaker-id-pubspec.yaml
│   │   ├── speech-enhancement-dpdfnet-pubspec.yaml
│   │   ├── speech-enhancement-gtcrn-pubspec.yaml
│   │   ├── streaming-asr-pubspec.yaml
│   │   ├── streaming-speech-enhancement-dpdfnet-pubspec.yaml
│   │   ├── streaming-speech-enhancement-gtcrn-pubspec.yaml
│   │   ├── tts-pubspec.yaml
│   │   ├── vad-non-streaming-asr-pubspec.yaml
│   │   └── vad-pubspec.yaml
│   ├── dotnet/
│   │   ├── .gitignore
│   │   ├── AudioEvent.cs
│   │   ├── AudioTagging.cs
│   │   ├── AudioTaggingConfig.cs
│   │   ├── AudioTaggingModelConfig.cs
│   │   ├── CircularBuffer.cs
│   │   ├── DenoisedAudio.cs
│   │   ├── Dll.cs
│   │   ├── FastClusteringConfig.cs
│   │   ├── FeatureConfig.cs
│   │   ├── HomophoneReplacerConfig.cs
│   │   ├── KeywordResult.cs
│   │   ├── KeywordSpotter.cs
│   │   ├── KeywordSpotterConfig.cs
│   │   ├── OfflineCanaryModelConfig.cs
│   │   ├── OfflineDolphinModelConfig.cs
│   │   ├── OfflineFireRedAsrCtcModel.cs
│   │   ├── OfflineFireRedAsrModelConfig.cs
│   │   ├── OfflineFunAsrNanoModel.cs
│   │   ├── OfflineLMConfig.cs
│   │   ├── OfflineMedAsrCtcModel.cs
│   │   ├── OfflineModelConfig.cs
│   │   ├── OfflineMoonshineModelConfig.cs
│   │   ├── OfflineNemoEncDecCtcModelConfig.cs
│   │   ├── OfflineOmnilingualAsrCtcModel.cs
│   │   ├── OfflineParaformerModelConfig.cs
│   │   ├── OfflinePunctuation.cs
│   │   ├── OfflinePunctuationConfig.cs
│   │   ├── OfflinePunctuationModelConfig.cs
│   │   ├── OfflineRecognizer.cs
│   │   ├── OfflineRecognizerConfig.cs
│   │   ├── OfflineRecognizerResult.cs
│   │   ├── OfflineSenseVoiceModelConfig.cs
│   │   ├── OfflineSpeakerDiarization.cs
│   │   ├── OfflineSpeakerDiarizationConfig.cs
│   │   ├── OfflineSpeakerDiarizationSegment.cs
│   │   ├── OfflineSpeakerSegmentationModelConfig.cs
│   │   ├── OfflineSpeakerSegmentationPyannoteModelConfig.cs
│   │   ├── OfflineSpeechDenoiser.cs
│   │   ├── OfflineSpeechDenoiserConfig.cs
│   │   ├── OfflineSpeechDenoiserDpdfNetModelConfig.cs
│   │   ├── OfflineSpeechDenoiserGtcrnModelConfig.cs
│   │   ├── OfflineSpeechDenoiserModelConfig.cs
│   │   ├── OfflineStream.cs
│   │   ├── OfflineTdnnModelConfig.cs
│   │   ├── OfflineTransducerModelConfig.cs
│   │   ├── OfflineTts.cs
│   │   ├── OfflineTtsConfig.cs
│   │   ├── OfflineTtsGeneratedAudio.cs
│   │   ├── OfflineTtsGenerationConfig.cs
│   │   ├── OfflineTtsKittenModelConfig.cs
│   │   ├── OfflineTtsKokoroModelConfig.cs
│   │   ├── OfflineTtsMatchaModelConfig.cs
│   │   ├── OfflineTtsModelConfig.cs
│   │   ├── OfflineTtsPocketModelConfig.cs
│   │   ├── OfflineTtsSupertonicModelConfig.cs
│   │   ├── OfflineTtsVitsModelConfig.cs
│   │   ├── OfflineTtsZipVoiceModelConfig.cs
│   │   ├── OfflineWenetCtcModelConfig.cs
│   │   ├── OfflineWhisperModelConfig.cs
│   │   ├── OfflineZipformerAudioTaggingModelConfig.cs
│   │   ├── OfflineZipformerCtcModelConfig.cs
│   │   ├── OnlineCtcFstDecoderConfig.cs
│   │   ├── OnlineModelConfig.cs
│   │   ├── OnlineNemoCtcModelConfig.cs
│   │   ├── OnlineParaformerModelConfig.cs
│   │   ├── OnlineRecognizer.cs
│   │   ├── OnlineRecognizerConfig.cs
│   │   ├── OnlineRecognizerResult.cs
│   │   ├── OnlineSpeechDenoiser.cs
│   │   ├── OnlineSpeechDenoiserConfig.cs
│   │   ├── OnlineStream.cs
│   │   ├── OnlineToneCtcModelConfig.cs
│   │   ├── OnlineTransducerModelConfig.cs
│   │   ├── OnlineZipformer2CtcModelConfig.cs
│   │   ├── README.md
│   │   ├── SileroVadModelConfig.cs
│   │   ├── SpeakerEmbeddingExtractor.cs
│   │   ├── SpeakerEmbeddingExtractorConfig.cs
│   │   ├── SpeakerEmbeddingManager.cs
│   │   ├── SpeechSegment.cs
│   │   ├── SpokenLanguageIdentification.cs
│   │   ├── SpokenLanguageIdentificationConfig.cs
│   │   ├── SpokenLanguageIdentificationResult.cs
│   │   ├── SpokenLanguageIdentificationWhisperConfig.cs
│   │   ├── TenVadModelConfig.cs
│   │   ├── VadModelConfig.cs
│   │   ├── VersionInfo.cs
│   │   ├── VoiceActivityDetector.cs
│   │   ├── examples/
│   │   │   ├── Common.csproj
│   │   │   └── README.md
│   │   ├── generate.py
│   │   ├── sherpa-onnx.csproj.in
│   │   └── sherpa-onnx.csproj.runtime.in
│   ├── export_bpe_vocab.py
│   ├── flutter/
│   │   ├── .gitignore
│   │   ├── build-android-streaming-asr.sh.in
│   │   ├── build-android-tts.sh.in
│   │   ├── build-linux-streaming-asr.sh.in
│   │   ├── build-linux-tts.sh.in
│   │   ├── build-macos-streaming-asr.sh.in
│   │   ├── build-macos-tts.sh.in
│   │   ├── build-windows-streaming-asr.sh.in
│   │   ├── build-windows-tts.sh.in
│   │   ├── generate-streaming-asr.py
│   │   └── generate-tts.py
│   ├── go/
│   │   ├── README.md
│   │   ├── _internal/
│   │   │   ├── .gitignore
│   │   │   ├── add-punctuation/
│   │   │   │   └── go.mod
│   │   │   ├── add-punctuation-online/
│   │   │   │   └── go.mod
│   │   │   ├── build_darwin_amd64.go
│   │   │   ├── build_darwin_arm64.go
│   │   │   ├── build_linux_amd64.go
│   │   │   ├── build_linux_arm.go
│   │   │   ├── build_linux_arm64.go
│   │   │   ├── build_windows_386.go
│   │   │   ├── build_windows_amd64.go
│   │   │   ├── go.mod
│   │   │   ├── lib/
│   │   │   │   └── x86_64-pc-windows-gnu/
│   │   │   │       └── .gitkeep
│   │   │   ├── non-streaming-canary-decode-files/
│   │   │   │   └── go.mod
│   │   │   ├── non-streaming-funasr-nano-decode-files/
│   │   │   │   └── go.mod
│   │   │   ├── non-streaming-omnilingual-asr-ctc-decode-files/
│   │   │   │   └── go.mod
│   │   │   ├── non-streaming-speaker-diarization/
│   │   │   │   └── go.mod
│   │   │   ├── supertonic-tts/
│   │   │   │   └── go.mod
│   │   │   ├── vad-speaker-identification/
│   │   │   │   └── go.mod
│   │   │   ├── zero-shot-zipvoice-tts/
│   │   │   │   └── go.mod
│   │   │   └── zero-shot-zipvoice-tts-play/
│   │   │       └── go.mod
│   │   ├── defines.go.jinja
│   │   ├── generate.py
│   │   ├── release.sh
│   │   ├── sherpa_onnx.go
│   │   └── ssh_config
│   ├── gtcrn/
│   │   ├── README.md
│   │   ├── add_meta_data.py
│   │   ├── show.py
│   │   └── test.py
│   ├── hap/
│   │   ├── .gitignore
│   │   └── build-hap-vad-asr.sh.in
│   ├── kitten-tts/
│   │   ├── README.md
│   │   ├── mini_v0_1/
│   │   │   ├── add_meta_data.py
│   │   │   ├── convert_opset.py
│   │   │   └── generate_samples.py
│   │   ├── nano_v0_1/
│   │   │   ├── add_meta_data.py
│   │   │   ├── convert_opset.py
│   │   │   ├── generate_samples.py
│   │   │   ├── generate_tokens.py
│   │   │   ├── generate_voices_bin.py
│   │   │   ├── show.py
│   │   │   └── test.py
│   │   └── nano_v0_2/
│   │       ├── add_meta_data.py
│   │       ├── convert_opset.py
│   │       └── generate_samples.py
│   ├── kokoro/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── v0.19/
│   │   │   ├── .gitignore
│   │   │   ├── __init__.py
│   │   │   ├── add_meta_data.py
│   │   │   ├── dynamic_quantization.py
│   │   │   ├── generate_samples.py
│   │   │   ├── generate_tokens.py
│   │   │   ├── generate_voices_bin.py
│   │   │   └── test.py
│   │   ├── v1.0/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── add_meta_data.py
│   │   │   ├── dynamic_quantization.py
│   │   │   ├── export_onnx.py
│   │   │   ├── generate_lexicon_en.py
│   │   │   ├── generate_lexicon_zh.py
│   │   │   ├── generate_samples.py
│   │   │   ├── generate_tokens.py
│   │   │   ├── generate_voices_bin.py
│   │   │   └── test.py
│   │   └── v1.1-zh/
│   │       ├── README.md
│   │       ├── add_meta_data.py
│   │       ├── dynamic_quantization.py
│   │       ├── export_onnx.py
│   │       ├── generate_lexicon_zh.py
│   │       ├── generate_samples.py
│   │       ├── generate_voices_bin.py
│   │       └── test.py
│   ├── lazarus/
│   │   └── generate-subtitles.py
│   ├── matcha-tts/
│   │   ├── README.md
│   │   ├── en/
│   │   │   └── generate_samples.py
│   │   ├── fa-en/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── add_meta_data.py
│   │   │   └── test.py
│   │   ├── zh/
│   │   │   └── generate_samples.py
│   │   └── zh-en/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── generate_lexicon.py
│   │       ├── generate_samples.py
│   │       ├── generate_tokens.py
│   │       └── test.py
│   ├── medasr/
│   │   ├── README.md
│   │   ├── export_onnx.py
│   │   └── test_onnx.py
│   ├── melo-tts/
│   │   ├── README.md
│   │   ├── export-onnx-en.py
│   │   ├── export-onnx.py
│   │   ├── show-info.py
│   │   └── test.py
│   ├── mobile-asr-models/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── dynamic_quantization.py
│   │   ├── generate-asr.py
│   │   ├── generate-kws.py
│   │   ├── parse_options.sh
│   │   └── run2.sh.in
│   ├── moonshine/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── export-onnx.py
│   │   ├── test.py
│   │   └── v2/
│   │       ├── README.md
│   │       ├── generate_tokens.py
│   │       └── test.py
│   ├── nemo/
│   │   ├── .gitignore
│   │   ├── GigaAM/
│   │   │   ├── README.md
│   │   │   ├── export-onnx-ctc-v2.py
│   │   │   ├── export-onnx-ctc-v3-punct.py
│   │   │   ├── export-onnx-ctc-v3.py
│   │   │   ├── export-onnx-ctc.py
│   │   │   ├── export-onnx-rnnt-v2.py
│   │   │   ├── export-onnx-rnnt-v3-punct.py
│   │   │   ├── export-onnx-rnnt-v3.py
│   │   │   ├── export-onnx-rnnt.py
│   │   │   ├── run-ctc-v2.sh
│   │   │   ├── run-ctc-v3-punct.sh
│   │   │   ├── run-ctc-v3.sh
│   │   │   ├── run-ctc.sh
│   │   │   ├── run-rnnt-v2.sh
│   │   │   ├── run-rnnt-v3-punct.sh
│   │   │   ├── run-rnnt-v3.sh
│   │   │   ├── run-rnnt.sh
│   │   │   ├── test-onnx-ctc.py
│   │   │   └── test-onnx-rnnt.py
│   │   ├── README.md
│   │   ├── canary/
│   │   │   ├── export_onnx_180m_flash.py
│   │   │   ├── run_180m_flash.sh
│   │   │   └── test_180m_flash.py
│   │   ├── fast-conformer-hybrid-transducer-ctc/
│   │   │   ├── README.md
│   │   │   ├── export-onnx-ctc-non-streaming.py
│   │   │   ├── export-onnx-ctc.py
│   │   │   ├── export-onnx-transducer-non-streaming.py
│   │   │   ├── export-onnx-transducer.py
│   │   │   ├── run-ctc-non-streaming-2.sh
│   │   │   ├── run-ctc-non-streaming.sh
│   │   │   ├── run-ctc.sh
│   │   │   ├── run-transducer-non-streaming-2.sh
│   │   │   ├── run-transducer-non-streaming.sh
│   │   │   ├── run-transducer.sh
│   │   │   ├── show-onnx-transudcer.py
│   │   │   ├── test-onnx-ctc-non-streaming.py
│   │   │   ├── test-onnx-ctc.py
│   │   │   ├── test-onnx-transducer-non-streaming.py
│   │   │   └── test-onnx-transducer.py
│   │   ├── generate_bpe_vocab.py
│   │   ├── nemotron-speech-streaming-en-0.6b/
│   │   │   └── export_onnx.py
│   │   ├── parakeet-tdt-0.6b-v2/
│   │   │   ├── export_onnx.py
│   │   │   └── test_onnx.py
│   │   ├── parakeet-tdt-0.6b-v3/
│   │   │   └── export_onnx.py
│   │   ├── parakeet-tdt_ctc-0.6b-ja/
│   │   │   ├── export-onnx-ctc.py
│   │   │   └── run-ctc.sh
│   │   └── speaker-verification/
│   │       ├── README.md
│   │       ├── export-onnx.py
│   │       └── test-onnx.py
│   ├── node-addon-api/
│   │   ├── .gitignore
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── lib/
│   │   │   ├── addon-static-import.js
│   │   │   ├── addon.js
│   │   │   ├── audio-tagg.js
│   │   │   ├── keyword-spotter.js
│   │   │   ├── non-streaming-asr.js
│   │   │   ├── non-streaming-speaker-diarization.js
│   │   │   ├── non-streaming-speech-denoiser.js
│   │   │   ├── non-streaming-tts.js
│   │   │   ├── online-speech-denoiser.js
│   │   │   ├── punctuation.js
│   │   │   ├── sherpa-onnx.js
│   │   │   ├── speaker-identification.js
│   │   │   ├── spoken-language-identification.js
│   │   │   ├── streaming-asr.js
│   │   │   ├── types.js
│   │   │   └── vad.js
│   │   ├── package.json
│   │   ├── test/
│   │   │   ├── test_asr_streaming_transducer.js
│   │   │   └── test_binding.js
│   │   └── tsconfig.json
│   ├── nodejs/
│   │   ├── README.md
│   │   ├── index.js
│   │   └── package.json
│   ├── omnilingual-asr/
│   │   ├── README.md
│   │   ├── export-onnx.py
│   │   └── test.py
│   ├── paraformer/
│   │   ├── .gitignore
│   │   ├── ascend-npu/
│   │   │   ├── export_decoder_onnx.py
│   │   │   ├── export_encoder_onnx.py
│   │   │   ├── export_predictor_onnx.py
│   │   │   └── test_om.py
│   │   ├── qnn/
│   │   │   ├── .gitignore
│   │   │   ├── convert_decoder.sh
│   │   │   ├── convert_encoder.sh
│   │   │   ├── convert_predictor.sh
│   │   │   ├── generate_decoder_data.py
│   │   │   ├── generate_encoder_data.py
│   │   │   ├── generate_predictor_data.py
│   │   │   └── test_qnn.py
│   │   └── rknn/
│   │       ├── download-example-model.sh
│   │       ├── export_decoder_onnx.py
│   │       ├── export_encoder_onnx.py
│   │       ├── export_predictor_onnx.py
│   │       ├── export_rknn.py
│   │       ├── test_onnx.py
│   │       └── torch_model.py
│   ├── peng-cheng-starling/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   └── quantize_models.py
│   ├── piper/
│   │   ├── .gitignore
│   │   ├── add_meta_data.py
│   │   ├── dynamic_quantization.py
│   │   ├── generate.py
│   │   ├── generate.sh.in
│   │   └── generate_samples.py.in
│   ├── pocket-tts/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── convert_tokenizer.py
│   │   └── test_tokenizer.py
│   ├── pyannote/
│   │   └── segmentation/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── export-onnx.py
│   │       ├── notes.md
│   │       ├── preprocess.sh
│   │       ├── show-onnx.py
│   │       ├── speaker-diarization-onnx.py
│   │       ├── speaker-diarization-torch.py
│   │       ├── vad-onnx.py
│   │       └── vad-torch.py
│   ├── qnn/
│   │   ├── __init__.py
│   │   ├── device_info.py
│   │   └── generate_config.py
│   ├── sense-voice/
│   │   ├── README-nano.md
│   │   ├── README.md
│   │   ├── ascend-npu/
│   │   │   ├── export_onnx.py
│   │   │   ├── export_onnx_static_shape.py
│   │   │   ├── test_om.py
│   │   │   └── test_om_static.py
│   │   ├── export-onnx.py
│   │   ├── export_onnx_nano.py
│   │   ├── qnn/
│   │   │   ├── .gitignore
│   │   │   ├── decode_logits.py
│   │   │   └── generate_test_data.py
│   │   ├── rknn/
│   │   │   ├── adaptor.py
│   │   │   ├── export-onnx.py
│   │   │   ├── export-rknn.py
│   │   │   ├── nano.py
│   │   │   ├── test_nano_torch.py
│   │   │   ├── test_onnx.py
│   │   │   └── torch_model.py
│   │   ├── show-info.py
│   │   ├── test.py
│   │   └── test_onnx_nano.py
│   ├── silero_vad/
│   │   └── v4/
│   │       ├── README.md
│   │       ├── export-onnx.py
│   │       ├── export-rknn.py
│   │       ├── show.py
│   │       ├── test-on-rk3588-board.py
│   │       └── test-onnx.py
│   ├── spleeter/
│   │   ├── .gitignore
│   │   ├── __init__.py
│   │   ├── convert_to_pb.py
│   │   ├── convert_to_torch.py
│   │   ├── export_onnx.py
│   │   ├── separate.py
│   │   ├── separate_onnx.py
│   │   └── unet.py
│   ├── supertonic/
│   │   ├── README.md
│   │   ├── convert.py
│   │   ├── dump_inputs.py
│   │   ├── gen_calib_configs.py
│   │   ├── generate_indexer_bin.py
│   │   └── generate_voices_bin.py
│   ├── t-one/
│   │   ├── README.md
│   │   ├── add_meta_data.py
│   │   ├── generate_tokens.py
│   │   └── test.py
│   ├── tele-speech/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── add-metadata.py
│   │   └── test.py
│   ├── text2token.py
│   ├── utils.sh
│   ├── uvr_mdx/
│   │   ├── READEME.md
│   │   ├── add_meta_data_and_quantize.py
│   │   ├── show.py
│   │   └── test.py
│   ├── vits/
│   │   ├── .gitignore
│   │   ├── __init__.py
│   │   ├── export-onnx-ljs.py
│   │   └── export-onnx-vctk.py
│   ├── vocos/
│   │   ├── README.md
│   │   ├── add_meta_data.py
│   │   └── test.py
│   ├── wasm/
│   │   ├── generate-tts.py
│   │   ├── generate-vad-asr.py
│   │   ├── run-tts.sh.in
│   │   └── run-vad-asr.sh.in
│   ├── wenet/
│   │   ├── README.md
│   │   ├── export-onnx-streaming.py
│   │   ├── export-onnx.py
│   │   ├── test-onnx-streaming.py
│   │   └── test-onnx.py
│   ├── wespeaker/
│   │   ├── README.md
│   │   ├── add_meta_data.py
│   │   └── test.py
│   ├── wheel/
│   │   ├── README.md
│   │   ├── patch_wheel.py
│   │   ├── sherpa-onnx-bin/
│   │   │   └── setup.py
│   │   └── sherpa-onnx-core/
│   │       ├── .gitignore
│   │       ├── MANIFEST.in
│   │       ├── setup.py
│   │       └── sherpa_onnx/
│   │           ├── __main__.py
│   │           └── _info.py
│   ├── whisper/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── ascend-npu/
│   │   │   └── test_om.py
│   │   ├── export-onnx-with-attention.py
│   │   ├── export-onnx.py
│   │   ├── find_alignment_heads.py
│   │   ├── model-info.md
│   │   ├── requirements.txt
│   │   ├── rknn/
│   │   │   ├── README.md
│   │   │   ├── export_onnx.py
│   │   │   ├── export_rknn.py
│   │   │   ├── generate_decoder_data.py
│   │   │   ├── generate_encoder_data.py
│   │   │   ├── notes.md
│   │   │   ├── test_on_rk3588_board.py
│   │   │   ├── test_onnx.py
│   │   │   ├── test_qnn.py
│   │   │   ├── test_torch.py
│   │   │   └── tiny-en-onnx-info.md
│   │   ├── test.py
│   │   ├── test_torch.py
│   │   └── tools/
│   │       ├── timestamp_viewer.html
│   │       └── whisper_timestamps_csv.py
│   ├── zipformer-ctc/
│   │   └── ascend/
│   │       └── 2025-07-03/
│   │           ├── onnx_test.py
│   │           └── test_om.py
│   └── zipvoice/
│       └── zh-en/
│           ├── generate_lexicon.py
│           └── test_onnx.py
├── setup.py
├── sherpa-onnx/
│   ├── CMakeLists.txt
│   ├── c-api/
│   │   ├── CMakeLists.txt
│   │   ├── Doxyfile
│   │   ├── README.md
│   │   ├── c-api.cc
│   │   ├── c-api.h
│   │   ├── cxx-api.cc
│   │   ├── cxx-api.h
│   │   ├── generate.sh
│   │   ├── mainpage.md
│   │   ├── sherpa-onnx-symbols-c.exp
│   │   └── sherpa-onnx-symbols-c.lds
│   ├── csrc/
│   │   ├── .gitignore
│   │   ├── CMakeLists.txt
│   │   ├── CPPLINT.cfg
│   │   ├── README.md
│   │   ├── alsa-play.cc
│   │   ├── alsa-play.h
│   │   ├── alsa.cc
│   │   ├── alsa.h
│   │   ├── ascend/
│   │   │   ├── macros.h
│   │   │   ├── offline-paraformer-model-ascend.cc
│   │   │   ├── offline-paraformer-model-ascend.h
│   │   │   ├── offline-recognizer-zipformer-ctc-ascend-impl.h
│   │   │   ├── offline-sense-voice-model-ascend.cc
│   │   │   ├── offline-sense-voice-model-ascend.h
│   │   │   ├── offline-whisper-model-ascend.cc
│   │   │   ├── offline-whisper-model-ascend.h
│   │   │   ├── offline-zipformer-ctc-model-ascend.cc
│   │   │   ├── offline-zipformer-ctc-model-ascend.h
│   │   │   ├── utils.cc
│   │   │   └── utils.h
│   │   ├── audio-tagging-ced-impl.h
│   │   ├── audio-tagging-impl.cc
│   │   ├── audio-tagging-impl.h
│   │   ├── audio-tagging-label-file.cc
│   │   ├── audio-tagging-label-file.h
│   │   ├── audio-tagging-model-config.cc
│   │   ├── audio-tagging-model-config.h
│   │   ├── audio-tagging-zipformer-impl.h
│   │   ├── audio-tagging.cc
│   │   ├── audio-tagging.h
│   │   ├── axcl/
│   │   │   ├── axcl-engine-guard.cc
│   │   │   ├── axcl-engine-guard.h
│   │   │   ├── axcl-engine-io-guard.cc
│   │   │   ├── axcl-engine-io-guard.h
│   │   │   ├── axcl-engine-io-info-guard.cc
│   │   │   ├── axcl-engine-io-info-guard.h
│   │   │   ├── axcl-manager.cc
│   │   │   ├── axcl-manager.h
│   │   │   ├── axcl-model.cc
│   │   │   ├── axcl-model.h
│   │   │   ├── offline-sense-voice-model-axcl.cc
│   │   │   ├── offline-sense-voice-model-axcl.h
│   │   │   ├── utils.cc
│   │   │   └── utils.h
│   │   ├── axera/
│   │   │   ├── ax-engine-guard.cc
│   │   │   ├── ax-engine-guard.h
│   │   │   ├── offline-sense-voice-model-axera.cc
│   │   │   ├── offline-sense-voice-model-axera.h
│   │   │   ├── utils.cc
│   │   │   └── utils.h
│   │   ├── base64-decode.cc
│   │   ├── base64-decode.h
│   │   ├── bbpe.cc
│   │   ├── bbpe.h
│   │   ├── cat-test.cc
│   │   ├── cat.cc
│   │   ├── cat.h
│   │   ├── character-lexicon.cc
│   │   ├── character-lexicon.h
│   │   ├── circular-buffer-test.cc
│   │   ├── circular-buffer.cc
│   │   ├── circular-buffer.h
│   │   ├── context-graph-test.cc
│   │   ├── context-graph.cc
│   │   ├── context-graph.h
│   │   ├── display.h
│   │   ├── endpoint.cc
│   │   ├── endpoint.h
│   │   ├── fast-clustering-config.cc
│   │   ├── fast-clustering-config.h
│   │   ├── fast-clustering-test.cc
│   │   ├── fast-clustering.cc
│   │   ├── fast-clustering.h
│   │   ├── features.cc
│   │   ├── features.h
│   │   ├── file-utils.cc
│   │   ├── file-utils.h
│   │   ├── fst-utils.cc
│   │   ├── fst-utils.h
│   │   ├── funasr-nano-tokenizer.cc
│   │   ├── funasr-nano-tokenizer.h
│   │   ├── hifigan-vocoder.cc
│   │   ├── hifigan-vocoder.h
│   │   ├── homophone-replacer.cc
│   │   ├── homophone-replacer.h
│   │   ├── hypothesis.cc
│   │   ├── hypothesis.h
│   │   ├── keyword-spotter-impl.cc
│   │   ├── keyword-spotter-impl.h
│   │   ├── keyword-spotter-transducer-impl.h
│   │   ├── keyword-spotter.cc
│   │   ├── keyword-spotter.h
│   │   ├── kokoro-multi-lang-lexicon.cc
│   │   ├── kokoro-multi-lang-lexicon.h
│   │   ├── lexicon.cc
│   │   ├── lexicon.h
│   │   ├── lodr-fst.cc
│   │   ├── lodr-fst.h
│   │   ├── log.cc
│   │   ├── log.h
│   │   ├── macros.h
│   │   ├── matcha-tts-lexicon.cc
│   │   ├── matcha-tts-lexicon.h
│   │   ├── math-test.cc
│   │   ├── math.cc
│   │   ├── math.h
│   │   ├── melo-tts-lexicon.cc
│   │   ├── melo-tts-lexicon.h
│   │   ├── microphone.cc
│   │   ├── microphone.h
│   │   ├── normal-data-generator.cc
│   │   ├── normal-data-generator.h
│   │   ├── offline-canary-model-config.cc
│   │   ├── offline-canary-model-config.h
│   │   ├── offline-canary-model-meta-data.h
│   │   ├── offline-canary-model.cc
│   │   ├── offline-canary-model.h
│   │   ├── offline-ced-model.cc
│   │   ├── offline-ced-model.h
│   │   ├── offline-ct-transformer-model-meta-data.h
│   │   ├── offline-ct-transformer-model.cc
│   │   ├── offline-ct-transformer-model.h
│   │   ├── offline-ctc-decoder.h
│   │   ├── offline-ctc-fst-decoder-config.cc
│   │   ├── offline-ctc-fst-decoder-config.h
│   │   ├── offline-ctc-fst-decoder.cc
│   │   ├── offline-ctc-fst-decoder.h
│   │   ├── offline-ctc-greedy-search-decoder.cc
│   │   ├── offline-ctc-greedy-search-decoder.h
│   │   ├── offline-ctc-model.cc
│   │   ├── offline-ctc-model.h
│   │   ├── offline-dolphin-model-config.cc
│   │   ├── offline-dolphin-model-config.h
│   │   ├── offline-dolphin-model-meta-data.h
│   │   ├── offline-dolphin-model.cc
│   │   ├── offline-dolphin-model.h
│   │   ├── offline-fire-red-asr-ctc-model-config.cc
│   │   ├── offline-fire-red-asr-ctc-model-config.h
│   │   ├── offline-fire-red-asr-ctc-model.cc
│   │   ├── offline-fire-red-asr-ctc-model.h
│   │   ├── offline-fire-red-asr-decoder.h
│   │   ├── offline-fire-red-asr-greedy-search-decoder.cc
│   │   ├── offline-fire-red-asr-greedy-search-decoder.h
│   │   ├── offline-fire-red-asr-model-config.cc
│   │   ├── offline-fire-red-asr-model-config.h
│   │   ├── offline-fire-red-asr-model-meta-data.h
│   │   ├── offline-fire-red-asr-model.cc
│   │   ├── offline-fire-red-asr-model.h
│   │   ├── offline-funasr-nano-model-config.cc
│   │   ├── offline-funasr-nano-model-config.h
│   │   ├── offline-funasr-nano-model.cc
│   │   ├── offline-funasr-nano-model.h
│   │   ├── offline-lm-config.cc
│   │   ├── offline-lm-config.h
│   │   ├── offline-lm.cc
│   │   ├── offline-lm.h
│   │   ├── offline-medasr-ctc-model-config.cc
│   │   ├── offline-medasr-ctc-model-config.h
│   │   ├── offline-medasr-ctc-model.cc
│   │   ├── offline-medasr-ctc-model.h
│   │   ├── offline-model-config.cc
│   │   ├── offline-model-config.h
│   │   ├── offline-moonshine-decoder.h
│   │   ├── offline-moonshine-greedy-search-decoder.cc
│   │   ├── offline-moonshine-greedy-search-decoder.h
│   │   ├── offline-moonshine-model-config.cc
│   │   ├── offline-moonshine-model-config.h
│   │   ├── offline-moonshine-model-v2.cc
│   │   ├── offline-moonshine-model-v2.h
│   │   ├── offline-moonshine-model.cc
│   │   ├── offline-moonshine-model.h
│   │   ├── offline-moonshine-v2-greedy-search-decoder.cc
│   │   ├── offline-moonshine-v2-greedy-search-decoder.h
│   │   ├── offline-nemo-enc-dec-ctc-model-config.cc
│   │   ├── offline-nemo-enc-dec-ctc-model-config.h
│   │   ├── offline-nemo-enc-dec-ctc-model.cc
│   │   ├── offline-nemo-enc-dec-ctc-model.h
│   │   ├── offline-omnilingual-asr-ctc-model-config.cc
│   │   ├── offline-omnilingual-asr-ctc-model-config.h
│   │   ├── offline-omnilingual-asr-ctc-model.cc
│   │   ├── offline-omnilingual-asr-ctc-model.h
│   │   ├── offline-paraformer-decoder.h
│   │   ├── offline-paraformer-greedy-search-decoder.cc
│   │   ├── offline-paraformer-greedy-search-decoder.h
│   │   ├── offline-paraformer-model-config.cc
│   │   ├── offline-paraformer-model-config.h
│   │   ├── offline-paraformer-model.cc
│   │   ├── offline-paraformer-model.h
│   │   ├── offline-punctuation-ct-transformer-impl.h
│   │   ├── offline-punctuation-impl.cc
│   │   ├── offline-punctuation-impl.h
│   │   ├── offline-punctuation-model-config.cc
│   │   ├── offline-punctuation-model-config.h
│   │   ├── offline-punctuation.cc
│   │   ├── offline-punctuation.h
│   │   ├── offline-recognizer-canary-impl.h
│   │   ├── offline-recognizer-ctc-impl.h
│   │   ├── offline-recognizer-fire-red-asr-impl.h
│   │   ├── offline-recognizer-funasr-nano-impl.cc
│   │   ├── offline-recognizer-funasr-nano-impl.h
│   │   ├── offline-recognizer-impl.cc
│   │   ├── offline-recognizer-impl.h
│   │   ├── offline-recognizer-moonshine-impl.h
│   │   ├── offline-recognizer-moonshine-v2-impl.h
│   │   ├── offline-recognizer-paraformer-impl.h
│   │   ├── offline-recognizer-paraformer-tpl-impl.h
│   │   ├── offline-recognizer-sense-voice-impl.h
│   │   ├── offline-recognizer-sense-voice-tpl-impl.h
│   │   ├── offline-recognizer-transducer-impl.h
│   │   ├── offline-recognizer-transducer-nemo-impl.h
│   │   ├── offline-recognizer-whisper-impl.h
│   │   ├── offline-recognizer-whisper-tpl-impl.h
│   │   ├── offline-recognizer.cc
│   │   ├── offline-recognizer.h
│   │   ├── offline-rnn-lm.cc
│   │   ├── offline-rnn-lm.h
│   │   ├── offline-sense-voice-model-config.cc
│   │   ├── offline-sense-voice-model-config.h
│   │   ├── offline-sense-voice-model-meta-data.h
│   │   ├── offline-sense-voice-model.cc
│   │   ├── offline-sense-voice-model.h
│   │   ├── offline-source-separation-impl.cc
│   │   ├── offline-source-separation-impl.h
│   │   ├── offline-source-separation-model-config.cc
│   │   ├── offline-source-separation-model-config.h
│   │   ├── offline-source-separation-spleeter-impl.h
│   │   ├── offline-source-separation-spleeter-model-config.cc
│   │   ├── offline-source-separation-spleeter-model-config.h
│   │   ├── offline-source-separation-spleeter-model-meta-data.h
│   │   ├── offline-source-separation-spleeter-model.cc
│   │   ├── offline-source-separation-spleeter-model.h
│   │   ├── offline-source-separation-uvr-impl.h
│   │   ├── offline-source-separation-uvr-model-config.cc
│   │   ├── offline-source-separation-uvr-model-config.h
│   │   ├── offline-source-separation-uvr-model-meta-data.h
│   │   ├── offline-source-separation-uvr-model.cc
│   │   ├── offline-source-separation-uvr-model.h
│   │   ├── offline-source-separation.cc
│   │   ├── offline-source-separation.h
│   │   ├── offline-speaker-diarization-impl.cc
│   │   ├── offline-speaker-diarization-impl.h
│   │   ├── offline-speaker-diarization-pyannote-impl.h
│   │   ├── offline-speaker-diarization-result.cc
│   │   ├── offline-speaker-diarization-result.h
│   │   ├── offline-speaker-diarization.cc
│   │   ├── offline-speaker-diarization.h
│   │   ├── offline-speaker-segmentation-model-config.cc
│   │   ├── offline-speaker-segmentation-model-config.h
│   │   ├── offline-speaker-segmentation-pyannote-model-config.cc
│   │   ├── offline-speaker-segmentation-pyannote-model-config.h
│   │   ├── offline-speaker-segmentation-pyannote-model-meta-data.h
│   │   ├── offline-speaker-segmentation-pyannote-model.cc
│   │   ├── offline-speaker-segmentation-pyannote-model.h
│   │   ├── offline-speech-denoiser-dpdfnet-impl.h
│   │   ├── offline-speech-denoiser-dpdfnet-model-config.cc
│   │   ├── offline-speech-denoiser-dpdfnet-model-config.h
│   │   ├── offline-speech-denoiser-dpdfnet-model-meta-data.h
│   │   ├── offline-speech-denoiser-dpdfnet-model.cc
│   │   ├── offline-speech-denoiser-dpdfnet-model.h
│   │   ├── offline-speech-denoiser-gtcrn-impl.h
│   │   ├── offline-speech-denoiser-gtcrn-model-config.cc
│   │   ├── offline-speech-denoiser-gtcrn-model-config.h
│   │   ├── offline-speech-denoiser-gtcrn-model-meta-data.h
│   │   ├── offline-speech-denoiser-gtcrn-model.cc
│   │   ├── offline-speech-denoiser-gtcrn-model.h
│   │   ├── offline-speech-denoiser-impl.cc
│   │   ├── offline-speech-denoiser-impl.h
│   │   ├── offline-speech-denoiser-model-config.cc
│   │   ├── offline-speech-denoiser-model-config.h
│   │   ├── offline-speech-denoiser.cc
│   │   ├── offline-speech-denoiser.h
│   │   ├── offline-stream.cc
│   │   ├── offline-stream.h
│   │   ├── offline-tdnn-ctc-model.cc
│   │   ├── offline-tdnn-ctc-model.h
│   │   ├── offline-tdnn-model-config.cc
│   │   ├── offline-tdnn-model-config.h
│   │   ├── offline-telespeech-ctc-model.cc
│   │   ├── offline-telespeech-ctc-model.h
│   │   ├── offline-transducer-decoder.h
│   │   ├── offline-transducer-greedy-search-decoder.cc
│   │   ├── offline-transducer-greedy-search-decoder.h
│   │   ├── offline-transducer-greedy-search-nemo-decoder.cc
│   │   ├── offline-transducer-greedy-search-nemo-decoder.h
│   │   ├── offline-transducer-model-config.cc
│   │   ├── offline-transducer-model-config.h
│   │   ├── offline-transducer-model.cc
│   │   ├── offline-transducer-model.h
│   │   ├── offline-transducer-modified-beam-search-decoder.cc
│   │   ├── offline-transducer-modified-beam-search-decoder.h
│   │   ├── offline-transducer-modified-beam-search-nemo-decoder.cc
│   │   ├── offline-transducer-modified-beam-search-nemo-decoder.h
│   │   ├── offline-transducer-nemo-model.cc
│   │   ├── offline-transducer-nemo-model.h
│   │   ├── offline-tts-character-frontend.cc
│   │   ├── offline-tts-character-frontend.h
│   │   ├── offline-tts-frontend.cc
│   │   ├── offline-tts-frontend.h
│   │   ├── offline-tts-impl.cc
│   │   ├── offline-tts-impl.h
│   │   ├── offline-tts-kitten-impl.h
│   │   ├── offline-tts-kitten-model-config.cc
│   │   ├── offline-tts-kitten-model-config.h
│   │   ├── offline-tts-kitten-model-meta-data.h
│   │   ├── offline-tts-kitten-model.cc
│   │   ├── offline-tts-kitten-model.h
│   │   ├── offline-tts-kokoro-impl.h
│   │   ├── offline-tts-kokoro-model-config.cc
│   │   ├── offline-tts-kokoro-model-config.h
│   │   ├── offline-tts-kokoro-model-meta-data.h
│   │   ├── offline-tts-kokoro-model.cc
│   │   ├── offline-tts-kokoro-model.h
│   │   ├── offline-tts-matcha-impl.h
│   │   ├── offline-tts-matcha-model-config.cc
│   │   ├── offline-tts-matcha-model-config.h
│   │   ├── offline-tts-matcha-model-meta-data.h
│   │   ├── offline-tts-matcha-model.cc
│   │   ├── offline-tts-matcha-model.h
│   │   ├── offline-tts-model-config.cc
│   │   ├── offline-tts-model-config.h
│   │   ├── offline-tts-pocket-impl.h
│   │   ├── offline-tts-pocket-model-config.cc
│   │   ├── offline-tts-pocket-model-config.h
│   │   ├── offline-tts-pocket-model.cc
│   │   ├── offline-tts-pocket-model.h
│   │   ├── offline-tts-supertonic-impl.cc
│   │   ├── offline-tts-supertonic-impl.h
│   │   ├── offline-tts-supertonic-model-config.cc
│   │   ├── offline-tts-supertonic-model-config.h
│   │   ├── offline-tts-supertonic-model.cc
│   │   ├── offline-tts-supertonic-model.h
│   │   ├── offline-tts-supertonic-unicode-processor.cc
│   │   ├── offline-tts-supertonic-unicode-processor.h
│   │   ├── offline-tts-vits-impl.h
│   │   ├── offline-tts-vits-model-config.cc
│   │   ├── offline-tts-vits-model-config.h
│   │   ├── offline-tts-vits-model-meta-data.h
│   │   ├── offline-tts-vits-model.cc
│   │   ├── offline-tts-vits-model.h
│   │   ├── offline-tts-zipvoice-impl.h
│   │   ├── offline-tts-zipvoice-model-config.cc
│   │   ├── offline-tts-zipvoice-model-config.h
│   │   ├── offline-tts-zipvoice-model-meta-data.h
│   │   ├── offline-tts-zipvoice-model.cc
│   │   ├── offline-tts-zipvoice-model.h
│   │   ├── offline-tts.cc
│   │   ├── offline-tts.h
│   │   ├── offline-websocket-server-impl.cc
│   │   ├── offline-websocket-server-impl.h
│   │   ├── offline-websocket-server.cc
│   │   ├── offline-wenet-ctc-model-config.cc
│   │   ├── offline-wenet-ctc-model-config.h
│   │   ├── offline-wenet-ctc-model.cc
│   │   ├── offline-wenet-ctc-model.h
│   │   ├── offline-whisper-decoder.h
│   │   ├── offline-whisper-dtw.cc
│   │   ├── offline-whisper-dtw.h
│   │   ├── offline-whisper-greedy-search-decoder.cc
│   │   ├── offline-whisper-greedy-search-decoder.h
│   │   ├── offline-whisper-model-config.cc
│   │   ├── offline-whisper-model-config.h
│   │   ├── offline-whisper-model.cc
│   │   ├── offline-whisper-model.h
│   │   ├── offline-whisper-timestamp-rules-test.cc
│   │   ├── offline-whisper-timestamp-rules.cc
│   │   ├── offline-whisper-timestamp-rules.h
│   │   ├── offline-zipformer-audio-tagging-model-config.cc
│   │   ├── offline-zipformer-audio-tagging-model-config.h
│   │   ├── offline-zipformer-audio-tagging-model.cc
│   │   ├── offline-zipformer-audio-tagging-model.h
│   │   ├── offline-zipformer-ctc-model-config.cc
│   │   ├── offline-zipformer-ctc-model-config.h
│   │   ├── offline-zipformer-ctc-model.cc
│   │   ├── offline-zipformer-ctc-model.h
│   │   ├── online-cnn-bilstm-model-meta-data.h
│   │   ├── online-cnn-bilstm-model.cc
│   │   ├── online-cnn-bilstm-model.h
│   │   ├── online-conformer-transducer-model.cc
│   │   ├── online-conformer-transducer-model.h
│   │   ├── online-ctc-decoder.h
│   │   ├── online-ctc-fst-decoder-config.cc
│   │   ├── online-ctc-fst-decoder-config.h
│   │   ├── online-ctc-fst-decoder.cc
│   │   ├── online-ctc-fst-decoder.h
│   │   ├── online-ctc-greedy-search-decoder.cc
│   │   ├── online-ctc-greedy-search-decoder.h
│   │   ├── online-ctc-model.cc
│   │   ├── online-ctc-model.h
│   │   ├── online-ebranchformer-transducer-model.cc
│   │   ├── online-ebranchformer-transducer-model.h
│   │   ├── online-lm-config.cc
│   │   ├── online-lm-config.h
│   │   ├── online-lm.cc
│   │   ├── online-lm.h
│   │   ├── online-lstm-transducer-model.cc
│   │   ├── online-lstm-transducer-model.h
│   │   ├── online-model-config.cc
│   │   ├── online-model-config.h
│   │   ├── online-nemo-ctc-model-config.cc
│   │   ├── online-nemo-ctc-model-config.h
│   │   ├── online-nemo-ctc-model.cc
│   │   ├── online-nemo-ctc-model.h
│   │   ├── online-paraformer-decoder.h
│   │   ├── online-paraformer-model-config.cc
│   │   ├── online-paraformer-model-config.h
│   │   ├── online-paraformer-model.cc
│   │   ├── online-paraformer-model.h
│   │   ├── online-punctuation-cnn-bilstm-impl.h
│   │   ├── online-punctuation-impl.cc
│   │   ├── online-punctuation-impl.h
│   │   ├── online-punctuation-model-config.cc
│   │   ├── online-punctuation-model-config.h
│   │   ├── online-punctuation.cc
│   │   ├── online-punctuation.h
│   │   ├── online-recognizer-ctc-impl.h
│   │   ├── online-recognizer-impl.cc
│   │   ├── online-recognizer-impl.h
│   │   ├── online-recognizer-paraformer-impl.h
│   │   ├── online-recognizer-transducer-impl.h
│   │   ├── online-recognizer-transducer-nemo-impl.h
│   │   ├── online-recognizer.cc
│   │   ├── online-recognizer.h
│   │   ├── online-rnn-lm.cc
│   │   ├── online-rnn-lm.h
│   │   ├── online-speech-denoiser-dpdfnet-impl.h
│   │   ├── online-speech-denoiser-gtcrn-impl.h
│   │   ├── online-speech-denoiser-impl.cc
│   │   ├── online-speech-denoiser-impl.h
│   │   ├── online-speech-denoiser-stft-impl.h
│   │   ├── online-speech-denoiser.cc
│   │   ├── online-speech-denoiser.h
│   │   ├── online-stream.cc
│   │   ├── online-stream.h
│   │   ├── online-t-one-ctc-model-config.cc
│   │   ├── online-t-one-ctc-model-config.h
│   │   ├── online-t-one-ctc-model.cc
│   │   ├── online-t-one-ctc-model.h
│   │   ├── online-transducer-decoder.cc
│   │   ├── online-transducer-decoder.h
│   │   ├── online-transducer-greedy-search-decoder.cc
│   │   ├── online-transducer-greedy-search-decoder.h
│   │   ├── online-transducer-greedy-search-nemo-decoder.cc
│   │   ├── online-transducer-greedy-search-nemo-decoder.h
│   │   ├── online-transducer-model-config.cc
│   │   ├── online-transducer-model-config.h
│   │   ├── online-transducer-model.cc
│   │   ├── online-transducer-model.h
│   │   ├── online-transducer-modified-beam-search-decoder.cc
│   │   ├── online-transducer-modified-beam-search-decoder.h
│   │   ├── online-transducer-nemo-model.cc
│   │   ├── online-transducer-nemo-model.h
│   │   ├── online-websocket-client.cc
│   │   ├── online-websocket-server-impl.cc
│   │   ├── online-websocket-server-impl.h
│   │   ├── online-websocket-server.cc
│   │   ├── online-wenet-ctc-model-config.cc
│   │   ├── online-wenet-ctc-model-config.h
│   │   ├── online-wenet-ctc-model.cc
│   │   ├── online-wenet-ctc-model.h
│   │   ├── online-zipformer-transducer-model.cc
│   │   ├── online-zipformer-transducer-model.h
│   │   ├── online-zipformer2-ctc-model-config.cc
│   │   ├── online-zipformer2-ctc-model-config.h
│   │   ├── online-zipformer2-ctc-model.cc
│   │   ├── online-zipformer2-ctc-model.h
│   │   ├── online-zipformer2-transducer-model.cc
│   │   ├── online-zipformer2-transducer-model.h
│   │   ├── onnx-utils.cc
│   │   ├── onnx-utils.h
│   │   ├── packed-sequence-test.cc
│   │   ├── packed-sequence.cc
│   │   ├── packed-sequence.h
│   │   ├── pad-sequence-test.cc
│   │   ├── pad-sequence.cc
│   │   ├── pad-sequence.h
│   │   ├── parse-options.cc
│   │   ├── parse-options.h
│   │   ├── phrase-matcher.cc
│   │   ├── phrase-matcher.h
│   │   ├── piper-phonemize-lexicon.cc
│   │   ├── piper-phonemize-lexicon.h
│   │   ├── piper-phonemize-test.cc
│   │   ├── provider-config.cc
│   │   ├── provider-config.h
│   │   ├── provider.cc
│   │   ├── provider.h
│   │   ├── qnn/
│   │   │   ├── macros.h
│   │   │   ├── offline-paraformer-model-qnn.cc
│   │   │   ├── offline-paraformer-model-qnn.h
│   │   │   ├── offline-recognizer-zipformer-ctc-qnn-impl.h
│   │   │   ├── offline-sense-voice-model-qnn.cc
│   │   │   ├── offline-sense-voice-model-qnn.h
│   │   │   ├── offline-zipformer-ctc-model-qnn.cc
│   │   │   ├── offline-zipformer-ctc-model-qnn.h
│   │   │   ├── qnn-backend.cc
│   │   │   ├── qnn-backend.h
│   │   │   ├── qnn-model.cc
│   │   │   ├── qnn-model.h
│   │   │   ├── utils.cc
│   │   │   └── utils.h
│   │   ├── qnn-config.cc
│   │   ├── qnn-config.h
│   │   ├── regex-lang-test.cc
│   │   ├── resample.cc
│   │   ├── resample.h
│   │   ├── rknn/
│   │   │   ├── context-blocking-queue-rknn.cc
│   │   │   ├── context-blocking-queue-rknn.h
│   │   │   ├── keyword-spotter-transducer-rknn-impl.h
│   │   │   ├── macros.h
│   │   │   ├── offline-ctc-greedy-search-decoder-rknn.cc
│   │   │   ├── offline-ctc-greedy-search-decoder-rknn.h
│   │   │   ├── offline-paraformer-model-rknn.cc
│   │   │   ├── offline-paraformer-model-rknn.h
│   │   │   ├── offline-sense-voice-model-rknn.cc
│   │   │   ├── offline-sense-voice-model-rknn.h
│   │   │   ├── online-recognizer-ctc-rknn-impl.h
│   │   │   ├── online-recognizer-transducer-rknn-impl.h
│   │   │   ├── online-stream-rknn.cc
│   │   │   ├── online-stream-rknn.h
│   │   │   ├── online-transducer-decoder-rknn.h
│   │   │   ├── online-transducer-greedy-search-decoder-rknn.cc
│   │   │   ├── online-transducer-greedy-search-decoder-rknn.h
│   │   │   ├── online-transducer-modified-beam-search-decoder-rknn.cc
│   │   │   ├── online-transducer-modified-beam-search-decoder-rknn.h
│   │   │   ├── online-zipformer-ctc-model-rknn.cc
│   │   │   ├── online-zipformer-ctc-model-rknn.h
│   │   │   ├── online-zipformer-transducer-model-rknn.cc
│   │   │   ├── online-zipformer-transducer-model-rknn.h
│   │   │   ├── silero-vad-model-rknn.cc
│   │   │   ├── silero-vad-model-rknn.h
│   │   │   ├── transducer-keyword-decoder-rknn.cc
│   │   │   ├── transducer-keyword-decoder-rknn.h
│   │   │   ├── utils.cc
│   │   │   └── utils.h
│   │   ├── sentence-piece-tokenizer-test.cc
│   │   ├── sentence-piece-tokenizer.cc
│   │   ├── sentence-piece-tokenizer.h
│   │   ├── session.cc
│   │   ├── session.h
│   │   ├── sherpa-display.h
│   │   ├── sherpa-onnx-alsa-offline-audio-tagging.cc
│   │   ├── sherpa-onnx-alsa-offline-speaker-identification.cc
│   │   ├── sherpa-onnx-alsa-offline.cc
│   │   ├── sherpa-onnx-alsa.cc
│   │   ├── sherpa-onnx-keyword-spotter-alsa.cc
│   │   ├── sherpa-onnx-keyword-spotter-microphone.cc
│   │   ├── sherpa-onnx-keyword-spotter.cc
│   │   ├── sherpa-onnx-microphone-offline-audio-tagging.cc
│   │   ├── sherpa-onnx-microphone-offline-speaker-identification.cc
│   │   ├── sherpa-onnx-microphone-offline.cc
│   │   ├── sherpa-onnx-microphone.cc
│   │   ├── sherpa-onnx-offline-audio-tagging.cc
│   │   ├── sherpa-onnx-offline-denoiser.cc
│   │   ├── sherpa-onnx-offline-language-identification.cc
│   │   ├── sherpa-onnx-offline-parallel.cc
│   │   ├── sherpa-onnx-offline-punctuation.cc
│   │   ├── sherpa-onnx-offline-source-separation.cc
│   │   ├── sherpa-onnx-offline-speaker-diarization.cc
│   │   ├── sherpa-onnx-offline-tts-play-alsa.cc
│   │   ├── sherpa-onnx-offline-tts-play.cc
│   │   ├── sherpa-onnx-offline-tts.cc
│   │   ├── sherpa-onnx-offline.cc
│   │   ├── sherpa-onnx-online-denoiser.cc
│   │   ├── sherpa-onnx-online-punctuation.cc
│   │   ├── sherpa-onnx-vad-alsa-offline-asr.cc
│   │   ├── sherpa-onnx-vad-alsa.cc
│   │   ├── sherpa-onnx-vad-microphone-offline-asr.cc
│   │   ├── sherpa-onnx-vad-microphone-simulated-streaming-asr.cc
│   │   ├── sherpa-onnx-vad-microphone.cc
│   │   ├── sherpa-onnx-vad-with-offline-asr.cc
│   │   ├── sherpa-onnx-vad-with-online-asr.cc
│   │   ├── sherpa-onnx-vad.cc
│   │   ├── sherpa-onnx-version.cc
│   │   ├── sherpa-onnx.cc
│   │   ├── silero-vad-model-config.cc
│   │   ├── silero-vad-model-config.h
│   │   ├── silero-vad-model.cc
│   │   ├── silero-vad-model.h
│   │   ├── slice-test.cc
│   │   ├── slice.cc
│   │   ├── slice.h
│   │   ├── speaker-embedding-extractor-general-impl.h
│   │   ├── speaker-embedding-extractor-impl.cc
│   │   ├── speaker-embedding-extractor-impl.h
│   │   ├── speaker-embedding-extractor-model-meta-data.h
│   │   ├── speaker-embedding-extractor-model.cc
│   │   ├── speaker-embedding-extractor-model.h
│   │   ├── speaker-embedding-extractor-nemo-impl.h
│   │   ├── speaker-embedding-extractor-nemo-model-meta-data.h
│   │   ├── speaker-embedding-extractor-nemo-model.cc
│   │   ├── speaker-embedding-extractor-nemo-model.h
│   │   ├── speaker-embedding-extractor.cc
│   │   ├── speaker-embedding-extractor.h
│   │   ├── speaker-embedding-manager-test.cc
│   │   ├── speaker-embedding-manager.cc
│   │   ├── speaker-embedding-manager.h
│   │   ├── spoken-language-identification-impl.cc
│   │   ├── spoken-language-identification-impl.h
│   │   ├── spoken-language-identification-whisper-impl.h
│   │   ├── spoken-language-identification.cc
│   │   ├── spoken-language-identification.h
│   │   ├── stack-test.cc
│   │   ├── stack.cc
│   │   ├── stack.h
│   │   ├── symbol-table.cc
│   │   ├── symbol-table.h
│   │   ├── tee-stream.h
│   │   ├── ten-vad-model-config.cc
│   │   ├── ten-vad-model-config.h
│   │   ├── ten-vad-model.cc
│   │   ├── ten-vad-model.h
│   │   ├── text-utils-test.cc
│   │   ├── text-utils.cc
│   │   ├── text-utils.h
│   │   ├── text2token-test.cc
│   │   ├── timer.cc
│   │   ├── timer.h
│   │   ├── transducer-keyword-decoder.cc
│   │   ├── transducer-keyword-decoder.h
│   │   ├── transpose-test.cc
│   │   ├── transpose.cc
│   │   ├── transpose.h
│   │   ├── unbind-test.cc
│   │   ├── unbind.cc
│   │   ├── unbind.h
│   │   ├── utfcpp-test.cc
│   │   ├── utils.cc
│   │   ├── utils.h
│   │   ├── vad-model-config.cc
│   │   ├── vad-model-config.h
│   │   ├── vad-model.cc
│   │   ├── vad-model.h
│   │   ├── version.cc
│   │   ├── version.h
│   │   ├── vocoder.cc
│   │   ├── vocoder.h
│   │   ├── vocos-vocoder.cc
│   │   ├── vocos-vocoder.h
│   │   ├── voice-activity-detector.cc
│   │   ├── voice-activity-detector.h
│   │   ├── wave-reader-test.cc
│   │   ├── wave-reader.cc
│   │   ├── wave-reader.h
│   │   ├── wave-writer.cc
│   │   └── wave-writer.h
│   ├── java-api/
│   │   ├── .build.txt
│   │   ├── .gitignore
│   │   ├── MANIFEST.MF
│   │   ├── Makefile
│   │   ├── pom.xml
│   │   ├── readme.md
│   │   ├── readme.zh.md
│   │   └── src/
│   │       └── main/
│   │           ├── java/
│   │           │   └── com/
│   │           │       └── k2fsa/
│   │           │           └── sherpa/
│   │           │               └── onnx/
│   │           │                   ├── AudioEvent.java
│   │           │                   ├── AudioTagging.java
│   │           │                   ├── AudioTaggingConfig.java
│   │           │                   ├── AudioTaggingModelConfig.java
│   │           │                   ├── DenoisedAudio.java
│   │           │                   ├── EndpointConfig.java
│   │           │                   ├── EndpointRule.java
│   │           │                   ├── FastClusteringConfig.java
│   │           │                   ├── FeatureConfig.java
│   │           │                   ├── GeneratedAudio.java
│   │           │                   ├── GenerationConfig.java
│   │           │                   ├── HomophoneReplacerConfig.java
│   │           │                   ├── KeywordSpotter.java
│   │           │                   ├── KeywordSpotterConfig.java
│   │           │                   ├── KeywordSpotterResult.java
│   │           │                   ├── LibraryLoader.java
│   │           │                   ├── LibraryUtils.java
│   │           │                   ├── OfflineCanaryModelConfig.java
│   │           │                   ├── OfflineDolphinModelConfig.java
│   │           │                   ├── OfflineFireRedAsrCtcModelConfig.java
│   │           │                   ├── OfflineFireRedAsrModelConfig.java
│   │           │                   ├── OfflineFunAsrNanoModelConfig.java
│   │           │                   ├── OfflineMedAsrCtcModelConfig.java
│   │           │                   ├── OfflineModelConfig.java
│   │           │                   ├── OfflineMoonshineModelConfig.java
│   │           │                   ├── OfflineNemoEncDecCtcModelConfig.java
│   │           │                   ├── OfflineOmnilingualAsrCtcModelConfig.java
│   │           │                   ├── OfflineParaformerModelConfig.java
│   │           │                   ├── OfflinePunctuation.java
│   │           │                   ├── OfflinePunctuationConfig.java
│   │           │                   ├── OfflinePunctuationModelConfig.java
│   │           │                   ├── OfflineRecognizer.java
│   │           │                   ├── OfflineRecognizerConfig.java
│   │           │                   ├── OfflineRecognizerResult.java
│   │           │                   ├── OfflineSenseVoiceModelConfig.java
│   │           │                   ├── OfflineSpeakerDiarization.java
│   │           │                   ├── OfflineSpeakerDiarizationCallback.java
│   │           │                   ├── OfflineSpeakerDiarizationConfig.java
│   │           │                   ├── OfflineSpeakerDiarizationSegment.java
│   │           │                   ├── OfflineSpeakerSegmentationModelConfig.java
│   │           │                   ├── OfflineSpeakerSegmentationPyannoteModelConfig.java
│   │           │                   ├── OfflineSpeechDenoiser.java
│   │           │                   ├── OfflineSpeechDenoiserConfig.java
│   │           │                   ├── OfflineSpeechDenoiserDpdfNetModelConfig.java
│   │           │                   ├── OfflineSpeechDenoiserGtcrnModelConfig.java
│   │           │                   ├── OfflineSpeechDenoiserModelConfig.java
│   │           │                   ├── OfflineStream.java
│   │           │                   ├── OfflineTransducerModelConfig.java
│   │           │                   ├── OfflineTts.java
│   │           │                   ├── OfflineTtsCallback.java
│   │           │                   ├── OfflineTtsConfig.java
│   │           │                   ├── OfflineTtsKittenModelConfig.java
│   │           │                   ├── OfflineTtsKokoroModelConfig.java
│   │           │                   ├── OfflineTtsMatchaModelConfig.java
│   │           │                   ├── OfflineTtsModelConfig.java
│   │           │                   ├── OfflineTtsPocketModelConfig.java
│   │           │                   ├── OfflineTtsSupertonicModelConfig.java
│   │           │                   ├── OfflineTtsVitsModelConfig.java
│   │           │                   ├── OfflineTtsZipVoiceModelConfig.java
│   │           │                   ├── OfflineWenetCtcModelConfig.java
│   │           │                   ├── OfflineWhisperModelConfig.java
│   │           │                   ├── OfflineZipformerAudioTaggingModelConfig.java
│   │           │                   ├── OfflineZipformerCtcModelConfig.java
│   │           │                   ├── OnlineCtcFstDecoderConfig.java
│   │           │                   ├── OnlineLMConfig.java
│   │           │                   ├── OnlineModelConfig.java
│   │           │                   ├── OnlineNeMoCtcModelConfig.java
│   │           │                   ├── OnlineParaformerModelConfig.java
│   │           │                   ├── OnlinePunctuation.java
│   │           │                   ├── OnlinePunctuationConfig.java
│   │           │                   ├── OnlinePunctuationModelConfig.java
│   │           │                   ├── OnlineRecognizer.java
│   │           │                   ├── OnlineRecognizerConfig.java
│   │           │                   ├── OnlineRecognizerResult.java
│   │           │                   ├── OnlineSpeechDenoiser.java
│   │           │                   ├── OnlineSpeechDenoiserConfig.java
│   │           │                   ├── OnlineStream.java
│   │           │                   ├── OnlineToneCtcModelConfig.java
│   │           │                   ├── OnlineTransducerModelConfig.java
│   │           │                   ├── OnlineZipformer2CtcModelConfig.java
│   │           │                   ├── QnnConfig.java
│   │           │                   ├── SileroVadModelConfig.java
│   │           │                   ├── SpeakerEmbeddingExtractor.java
│   │           │                   ├── SpeakerEmbeddingExtractorConfig.java
│   │           │                   ├── SpeakerEmbeddingManager.java
│   │           │                   ├── SpeechSegment.java
│   │           │                   ├── SpokenLanguageIdentification.java
│   │           │                   ├── SpokenLanguageIdentificationConfig.java
│   │           │                   ├── SpokenLanguageIdentificationWhisperConfig.java
│   │           │                   ├── TenVadModelConfig.java
│   │           │                   ├── Vad.java
│   │           │                   ├── VadModelConfig.java
│   │           │                   ├── VersionInfo.java
│   │           │                   ├── WaveData.java
│   │           │                   ├── WaveReader.java
│   │           │                   └── WaveWriter.java
│   │           └── resources/
│   │               ├── .gitignore
│   │               └── readme.md
│   ├── jni/
│   │   ├── CMakeLists.txt
│   │   ├── audio-tagging.cc
│   │   ├── common.cc
│   │   ├── common.h
│   │   ├── generate.sh
│   │   ├── jni.cc
│   │   ├── keyword-spotter.cc
│   │   ├── offline-punctuation.cc
│   │   ├── offline-recognizer.cc
│   │   ├── offline-speaker-diarization.cc
│   │   ├── offline-speech-denoiser.cc
│   │   ├── offline-stream.cc
│   │   ├── offline-tts.cc
│   │   ├── online-punctuation.cc
│   │   ├── online-recognizer.cc
│   │   ├── online-speech-denoiser.cc
│   │   ├── online-stream.cc
│   │   ├── sherpa-onnx-symbols.exp
│   │   ├── sherpa-onnx-symbols.lds
│   │   ├── speaker-embedding-extractor.cc
│   │   ├── speaker-embedding-manager.cc
│   │   ├── speech-denoiser.cc
│   │   ├── speech-denoiser.h
│   │   ├── spoken-language-identification.cc
│   │   ├── version.cc
│   │   ├── voice-activity-detector.cc
│   │   ├── wave-reader.cc
│   │   └── wave-writer.cc
│   ├── kotlin-api/
│   │   ├── AudioTagging.kt
│   │   ├── DenoisedAudio.kt
│   │   ├── FeatureConfig.kt
│   │   ├── HomophoneReplacerConfig.kt
│   │   ├── KeywordSpotter.kt
│   │   ├── OfflinePunctuation.kt
│   │   ├── OfflineRecognizer.kt
│   │   ├── OfflineSpeakerDiarization.kt
│   │   ├── OfflineSpeechDenoiser.kt
│   │   ├── OfflineStream.kt
│   │   ├── OnlinePunctuation.kt
│   │   ├── OnlineRecognizer.kt
│   │   ├── OnlineSpeechDenoiser.kt
│   │   ├── OnlineStream.kt
│   │   ├── QnnConfig.kt
│   │   ├── Speaker.kt
│   │   ├── SpeakerEmbeddingExtractorConfig.kt
│   │   ├── SpokenLanguageIdentification.kt
│   │   ├── Tts.kt
│   │   ├── Vad.kt
│   │   ├── VersionInfo.kt
│   │   └── WaveReader.kt
│   ├── pascal-api/
│   │   ├── README.md
│   │   ├── portaudio.pas
│   │   └── sherpa_onnx.pas
│   ├── python/
│   │   ├── CMakeLists.txt
│   │   ├── csrc/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── alsa.cc
│   │   │   ├── alsa.h
│   │   │   ├── audio-tagging.cc
│   │   │   ├── audio-tagging.h
│   │   │   ├── circular-buffer.cc
│   │   │   ├── circular-buffer.h
│   │   │   ├── cuda-config.cc
│   │   │   ├── cuda-config.h
│   │   │   ├── display.cc
│   │   │   ├── display.h
│   │   │   ├── endpoint.cc
│   │   │   ├── endpoint.h
│   │   │   ├── faked-alsa.cc
│   │   │   ├── fast-clustering.cc
│   │   │   ├── fast-clustering.h
│   │   │   ├── features.cc
│   │   │   ├── features.h
│   │   │   ├── homophone-replacer.cc
│   │   │   ├── homophone-replacer.h
│   │   │   ├── keyword-spotter.cc
│   │   │   ├── keyword-spotter.h
│   │   │   ├── offline-canary-model-config.cc
│   │   │   ├── offline-canary-model-config.h
│   │   │   ├── offline-ctc-fst-decoder-config.cc
│   │   │   ├── offline-ctc-fst-decoder-config.h
│   │   │   ├── offline-dolphin-model-config.cc
│   │   │   ├── offline-dolphin-model-config.h
│   │   │   ├── offline-fire-red-asr-ctc-model-config.cc
│   │   │   ├── offline-fire-red-asr-ctc-model-config.h
│   │   │   ├── offline-fire-red-asr-model-config.cc
│   │   │   ├── offline-fire-red-asr-model-config.h
│   │   │   ├── offline-funasr-nano-model-config.cc
│   │   │   ├── offline-funasr-nano-model-config.h
│   │   │   ├── offline-lm-config.cc
│   │   │   ├── offline-lm-config.h
│   │   │   ├── offline-medasr-ctc-model-config.cc
│   │   │   ├── offline-medasr-ctc-model-config.h
│   │   │   ├── offline-model-config.cc
│   │   │   ├── offline-model-config.h
│   │   │   ├── offline-moonshine-model-config.cc
│   │   │   ├── offline-moonshine-model-config.h
│   │   │   ├── offline-nemo-enc-dec-ctc-model-config.cc
│   │   │   ├── offline-nemo-enc-dec-ctc-model-config.h
│   │   │   ├── offline-omnilingual-asr-ctc-model-config.cc
│   │   │   ├── offline-omnilingual-asr-ctc-model-config.h
│   │   │   ├── offline-paraformer-model-config.cc
│   │   │   ├── offline-paraformer-model-config.h
│   │   │   ├── offline-punctuation.cc
│   │   │   ├── offline-punctuation.h
│   │   │   ├── offline-recognizer.cc
│   │   │   ├── offline-recognizer.h
│   │   │   ├── offline-sense-voice-model-config.cc
│   │   │   ├── offline-sense-voice-model-config.h
│   │   │   ├── offline-source-separation-model-config.cc
│   │   │   ├── offline-source-separation-model-config.h
│   │   │   ├── offline-source-separation-spleeter-model-config.cc
│   │   │   ├── offline-source-separation-spleeter-model-config.h
│   │   │   ├── offline-source-separation-uvr-model-config.cc
│   │   │   ├── offline-source-separation-uvr-model-config.h
│   │   │   ├── offline-source-separation.cc
│   │   │   ├── offline-source-separation.h
│   │   │   ├── offline-speaker-diarization-result.cc
│   │   │   ├── offline-speaker-diarization-result.h
│   │   │   ├── offline-speaker-diarization.cc
│   │   │   ├── offline-speaker-diarization.h
│   │   │   ├── offline-speech-denoiser-dpdfnet-model-config.cc
│   │   │   ├── offline-speech-denoiser-dpdfnet-model-config.h
│   │   │   ├── offline-speech-denoiser-gtcrn-model-config.cc
│   │   │   ├── offline-speech-denoiser-gtcrn-model-config.h
│   │   │   ├── offline-speech-denoiser-model-config.cc
│   │   │   ├── offline-speech-denoiser-model-config.h
│   │   │   ├── offline-speech-denoiser.cc
│   │   │   ├── offline-speech-denoiser.h
│   │   │   ├── offline-stream.cc
│   │   │   ├── offline-stream.h
│   │   │   ├── offline-tdnn-model-config.cc
│   │   │   ├── offline-tdnn-model-config.h
│   │   │   ├── offline-transducer-model-config.cc
│   │   │   ├── offline-transducer-model-config.h
│   │   │   ├── offline-tts-kitten-model-config.cc
│   │   │   ├── offline-tts-kitten-model-config.h
│   │   │   ├── offline-tts-kokoro-model-config.cc
│   │   │   ├── offline-tts-kokoro-model-config.h
│   │   │   ├── offline-tts-matcha-model-config.cc
│   │   │   ├── offline-tts-matcha-model-config.h
│   │   │   ├── offline-tts-model-config.cc
│   │   │   ├── offline-tts-model-config.h
│   │   │   ├── offline-tts-pocket-model-config.cc
│   │   │   ├── offline-tts-pocket-model-config.h
│   │   │   ├── offline-tts-supertonic-model-config.cc
│   │   │   ├── offline-tts-supertonic-model-config.h
│   │   │   ├── offline-tts-vits-model-config.cc
│   │   │   ├── offline-tts-vits-model-config.h
│   │   │   ├── offline-tts-zipvoice-model-config.cc
│   │   │   ├── offline-tts-zipvoice-model-config.h
│   │   │   ├── offline-tts.cc
│   │   │   ├── offline-tts.h
│   │   │   ├── offline-wenet-ctc-model-config.cc
│   │   │   ├── offline-wenet-ctc-model-config.h
│   │   │   ├── offline-whisper-model-config.cc
│   │   │   ├── offline-whisper-model-config.h
│   │   │   ├── offline-zipformer-ctc-model-config.cc
│   │   │   ├── offline-zipformer-ctc-model-config.h
│   │   │   ├── online-ctc-fst-decoder-config.cc
│   │   │   ├── online-ctc-fst-decoder-config.h
│   │   │   ├── online-lm-config.cc
│   │   │   ├── online-lm-config.h
│   │   │   ├── online-model-config.cc
│   │   │   ├── online-model-config.h
│   │   │   ├── online-nemo-ctc-model-config.cc
│   │   │   ├── online-nemo-ctc-model-config.h
│   │   │   ├── online-paraformer-model-config.cc
│   │   │   ├── online-paraformer-model-config.h
│   │   │   ├── online-punctuation.cc
│   │   │   ├── online-punctuation.h
│   │   │   ├── online-recognizer.cc
│   │   │   ├── online-recognizer.h
│   │   │   ├── online-speech-denoiser.cc
│   │   │   ├── online-speech-denoiser.h
│   │   │   ├── online-stream.cc
│   │   │   ├── online-stream.h
│   │   │   ├── online-t-one-ctc-model-config.cc
│   │   │   ├── online-t-one-ctc-model-config.h
│   │   │   ├── online-transducer-model-config.cc
│   │   │   ├── online-transducer-model-config.h
│   │   │   ├── online-wenet-ctc-model-config.cc
│   │   │   ├── online-wenet-ctc-model-config.h
│   │   │   ├── online-zipformer2-ctc-model-config.cc
│   │   │   ├── online-zipformer2-ctc-model-config.h
│   │   │   ├── provider-config.cc
│   │   │   ├── provider-config.h
│   │   │   ├── sentence-piece-tokenizer.cc
│   │   │   ├── sentence-piece-tokenizer.h
│   │   │   ├── sherpa-onnx.cc
│   │   │   ├── sherpa-onnx.h
│   │   │   ├── silero-vad-model-config.cc
│   │   │   ├── silero-vad-model-config.h
│   │   │   ├── speaker-embedding-extractor.cc
│   │   │   ├── speaker-embedding-extractor.h
│   │   │   ├── speaker-embedding-manager.cc
│   │   │   ├── speaker-embedding-manager.h
│   │   │   ├── spoken-language-identification.cc
│   │   │   ├── spoken-language-identification.h
│   │   │   ├── ten-vad-model-config.cc
│   │   │   ├── ten-vad-model-config.h
│   │   │   ├── tensorrt-config.cc
│   │   │   ├── tensorrt-config.h
│   │   │   ├── vad-model-config.cc
│   │   │   ├── vad-model-config.h
│   │   │   ├── vad-model.cc
│   │   │   ├── vad-model.h
│   │   │   ├── version.cc
│   │   │   ├── version.h
│   │   │   ├── voice-activity-detector.cc
│   │   │   ├── voice-activity-detector.h
│   │   │   ├── wave-writer.cc
│   │   │   └── wave-writer.h
│   │   ├── sherpa_onnx/
│   │   │   ├── __init__.py
│   │   │   ├── cli.py
│   │   │   ├── display.py
│   │   │   ├── keyword_spotter.py
│   │   │   ├── offline_recognizer.py
│   │   │   ├── online_recognizer.py
│   │   │   └── utils.py
│   │   └── tests/
│   │       ├── CMakeLists.txt
│   │       ├── test_fast_clustering.py
│   │       ├── test_feature_extractor_config.py
│   │       ├── test_keyword_spotter.py
│   │       ├── test_offline_recognizer.py
│   │       ├── test_online_recognizer.py
│   │       ├── test_online_transducer_model_config.py
│   │       ├── test_speaker_recognition.py
│   │       └── test_text2token.py
│   └── rust/
│       ├── .gitignore
│       ├── .rustfmt.toml
│       ├── Cargo.toml
│       ├── check.sh
│       ├── publish.sh
│       ├── sherpa-onnx/
│       │   ├── Cargo.toml
│       │   └── src/
│       │       ├── audio_tagging.rs
│       │       ├── display.rs
│       │       ├── kws.rs
│       │       ├── lib.rs
│       │       ├── offline_asr.rs
│       │       ├── offline_punctuation.rs
│       │       ├── offline_speaker_diarization.rs
│       │       ├── offline_speech_denoiser.rs
│       │       ├── online_asr.rs
│       │       ├── online_punctuation.rs
│       │       ├── online_speech_denoiser.rs
│       │       ├── speaker_embedding.rs
│       │       ├── spoken_language_identification.rs
│       │       ├── tts.rs
│       │       ├── utils.rs
│       │       ├── vad.rs
│       │       └── wave.rs
│       └── sherpa-onnx-sys/
│           ├── Cargo.toml
│           ├── build.rs
│           └── src/
│               ├── audio_tagging.rs
│               ├── kws.rs
│               ├── lib.rs
│               ├── offline_asr.rs
│               ├── offline_punctuation.rs
│               ├── offline_speaker_diarization.rs
│               ├── online_asr.rs
│               ├── online_punctuation.rs
│               ├── speaker_embedding.rs
│               ├── speech_denoiser.rs
│               ├── spoken_language_identification.rs
│               ├── tts.rs
│               ├── vad.rs
│               └── wave.rs
├── swift-api-examples/
│   ├── .gitignore
│   ├── SherpaOnnx-Bridging-Header.h
│   ├── SherpaOnnx.swift
│   ├── add-punctuation-online.swift
│   ├── add-punctuations.swift
│   ├── compute-speaker-embeddings.swift
│   ├── decode-file-non-streaming.swift
│   ├── decode-file-sense-voice-with-hr.swift
│   ├── decode-file-t-one-streaming.swift
│   ├── decode-file.swift
│   ├── dolphin-ctc-asr.swift
│   ├── fire-red-asr-ctc.swift
│   ├── fire-red-asr.swift
│   ├── funasr-nano.swift
│   ├── generate-subtitles.swift
│   ├── keyword-spotting-from-file.swift
│   ├── medasr-ctc.swift
│   ├── moonshine-v2-asr.swift
│   ├── omnilingual-asr-ctc.swift
│   ├── online-speech-enhancement-dpdfnet.swift
│   ├── online-speech-enhancement-gtcrn.swift
│   ├── run-add-punctuations-online.sh
│   ├── run-add-punctuations.sh
│   ├── run-compute-speaker-embeddings.sh
│   ├── run-decode-file-non-streaming.sh
│   ├── run-decode-file-sense-voice-with-hr.sh
│   ├── run-decode-file-t-one-streaming.sh
│   ├── run-decode-file.sh
│   ├── run-dolphin-ctc-asr.sh
│   ├── run-fire-red-asr-ctc.sh
│   ├── run-fire-red-asr.sh
│   ├── run-funasr-nano-asr.sh
│   ├── run-generate-subtitles-ten-vad.sh
│   ├── run-generate-subtitles.sh
│   ├── run-keyword-spotting-from-file.sh
│   ├── run-medasr-ctc-asr.sh
│   ├── run-moonshine-v2-asr.sh
│   ├── run-omnilingual-asr-ctc-asr.sh
│   ├── run-online-speech-enhancement-dpdfnet.sh
│   ├── run-online-speech-enhancement-gtcrn.sh
│   ├── run-speaker-diarization.sh
│   ├── run-speech-enhancement-dpdfnet.sh
│   ├── run-speech-enhancement-gtcrn.sh
│   ├── run-spoken-language-identification.sh
│   ├── run-streaming-hlg-decode-file.sh
│   ├── run-test-version.sh
│   ├── run-tts-kitten-en.sh
│   ├── run-tts-kokoro-en.sh
│   ├── run-tts-kokoro-zh-en.sh
│   ├── run-tts-matcha-en.sh
│   ├── run-tts-matcha-zh.sh
│   ├── run-tts-pocket-en.sh
│   ├── run-tts-supertonic-en.sh
│   ├── run-tts-vits.sh
│   ├── run-tts-zipvoice.sh
│   ├── run-wenet-ctc-asr.sh
│   ├── run-zipformer-ctc-asr.sh
│   ├── speaker-diarization.swift
│   ├── speech-enhancement-dpdfnet.swift
│   ├── speech-enhancement-gtcrn.swift
│   ├── spoken-language-identification.swift
│   ├── streaming-hlg-decode-file.swift
│   ├── test-version.swift
│   ├── tts-kitten-en.swift
│   ├── tts-kokoro-en.swift
│   ├── tts-kokoro-zh-en.swift
│   ├── tts-matcha-en.swift
│   ├── tts-matcha-zh.swift
│   ├── tts-pocket-en.swift
│   ├── tts-supertonic-en.swift
│   ├── tts-vits.swift
│   ├── tts-zipvoice.swift
│   ├── wenet-ctc-asr.swift
│   └── zipformer-ctc-asr.swift
├── toolchains/
│   ├── aarch64-linux-gnu.toolchain.cmake
│   ├── arm-linux-gnueabihf.toolchain.cmake
│   ├── ios.toolchain.cmake
│   ├── riscv64-linux-gnu-spacemit.toolchain.cmake
│   └── riscv64-linux-gnu.toolchain.cmake
└── wasm/
    ├── CMakeLists.txt
    ├── asr/
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── app-asr.js
    │   ├── assets/
    │   │   ├── .gitignore
    │   │   └── README.md
    │   ├── index.html
    │   ├── sherpa-onnx-asr.js
    │   └── sherpa-onnx-wasm-main-asr.cc
    ├── kws/
    │   ├── CMakeLists.txt
    │   ├── app.js
    │   ├── assets/
    │   │   └── README.md
    │   ├── index.html
    │   ├── sherpa-onnx-kws.js
    │   └── sherpa-onnx-wasm-main-kws.cc
    ├── nodejs/
    │   ├── CMakeLists.txt
    │   ├── sherpa-onnx-wasm-nodejs.cc
    │   └── sherpa-onnx-wave.js
    ├── speaker-diarization/
    │   ├── CMakeLists.txt
    │   ├── app-speaker-diarization.js
    │   ├── assets/
    │   │   └── README.md
    │   ├── index.html
    │   ├── sherpa-onnx-speaker-diarization.js
    │   └── sherpa-onnx-wasm-main-speaker-diarization.cc
    ├── speech-enhancement/
    │   ├── CMakeLists.txt
    │   ├── app-speech-enhancement.js
    │   ├── assets/
    │   │   └── README.md
    │   ├── index.html
    │   ├── sherpa-onnx-speech-enhancement.js
    │   └── sherpa-onnx-wasm-main-speech-enhancement.cc
    ├── tts/
    │   ├── CMakeLists.txt
    │   ├── app-tts.js
    │   ├── assets/
    │   │   ├── .gitignore
    │   │   └── README.md
    │   ├── index.html
    │   ├── sherpa-onnx-tts.js
    │   ├── sherpa-onnx-tts.worker.js
    │   └── sherpa-onnx-wasm-main-tts.cc
    ├── vad/
    │   ├── CMakeLists.txt
    │   ├── app-vad.js
    │   ├── assets/
    │   │   └── README.md
    │   ├── index.html
    │   ├── sherpa-onnx-vad.js
    │   └── sherpa-onnx-wasm-main-vad.cc
    └── vad-asr/
        ├── CMakeLists.txt
        ├── app-vad-asr.js
        ├── assets/
        │   └── README.md
        ├── index.html
        └── sherpa-onnx-wasm-main-vad-asr.cc

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
---
BasedOnStyle: Google
---
Language:               Cpp
Cpp11BracedListStyle:   true
Standard:               Cpp11
DerivePointerAlignment: false
PointerAlignment:       Right
---
Language: Java
JavaImportGroups: [ 'java', 'javax', 'javafx', 'org', 'io', 'com', 'de.gsi' ]
AccessModifierOffset: -4
AlignAfterOpenBracket: DontAlign
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: DontAlign
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortLambdasOnASingleLine: None
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
  AfterClass: false
  AfterControlStatement: Never
  AfterEnum: false
  AfterFunction: false
  AfterNamespace: false
  AfterObjCDeclaration: false
  AfterStruct: false
  AfterUnion: false
  BeforeCatch: false
  BeforeElse: false
  IndentBraces: false
  SplitEmptyFunction: true
  SplitEmptyRecord: true
  SplitEmptyNamespace: true
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Custom
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeComma
BreakAfterJavaFieldAnnotations: true
BreakStringLiterals: true
ColumnLimit: 0
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 8
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
  - forever # avoids { wrapped to next line
  - foreach
  - Q_FOREACH
  - BOOST_FOREACH
IncludeCategories:
  - Regex: '^<Q.*'
    Priority: 200
IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: false
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
# Do not add QT_BEGIN_NAMESPACE/QT_END_NAMESPACE as this will indent lines in between.
MacroBlockBegin: ""
MacroBlockEnd: ""
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 150
PenaltyBreakBeforeFirstCallParameter: 300
PenaltyBreakComment: 500
PenaltyBreakFirstLessLess: 400
PenaltyBreakString: 600
PenaltyExcessCharacter: 50
PenaltyReturnTypeOnItsOwnLine: 300
PointerAlignment: Right
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: true
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: false
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: c++17
TabWidth: 4
UseTab: Never

================================================
FILE: .clang-tidy
================================================
---
# NOTE there must be no spaces before the '-', so put the comma last.
# The check bugprone-unchecked-optional-access is also turned off atm
# because it causes clang-tidy to hang randomly. The tracking issue
# can be found at https://github.com/llvm/llvm-project/issues/69369.
#
# Modified from
# https://github.com/pytorch/pytorch/blob/main/.clang-tidy
InheritParentConfig: true
Checks: '
bugprone-*,
-bugprone-easily-swappable-parameters,
-bugprone-forward-declaration-namespace,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-macro-parentheses,
-bugprone-lambda-function-name,
-bugprone-narrowing-conversions,
-bugprone-reserved-identifier,
-bugprone-swapped-arguments,
-bugprone-unchecked-optional-access,
clang-diagnostic-missing-prototypes,
cppcoreguidelines-*,
-cppcoreguidelines-avoid-const-or-ref-data-members,
-cppcoreguidelines-avoid-do-while,
-cppcoreguidelines-avoid-magic-numbers,
-cppcoreguidelines-avoid-non-const-global-variables,
-cppcoreguidelines-interfaces-global-init,
-cppcoreguidelines-macro-usage,
-cppcoreguidelines-narrowing-conversions,
-cppcoreguidelines-owning-memory,
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
-cppcoreguidelines-pro-bounds-constant-array-index,
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
-cppcoreguidelines-pro-type-const-cast,
-cppcoreguidelines-pro-type-cstyle-cast,
-cppcoreguidelines-pro-type-reinterpret-cast,
-cppcoreguidelines-pro-type-static-cast-downcast,
-cppcoreguidelines-pro-type-union-access,
-cppcoreguidelines-pro-type-vararg,
-cppcoreguidelines-special-member-functions,
-cppcoreguidelines-non-private-member-variables-in-classes,
-facebook-hte-RelativeInclude,
hicpp-exception-baseclass,
hicpp-avoid-goto,
misc-*,
-misc-const-correctness,
-misc-include-cleaner,
-misc-use-anonymous-namespace,
-misc-unused-parameters,
-misc-no-recursion,
-misc-non-private-member-variables-in-classes,
-misc-confusable-identifiers,
modernize-*,
-modernize-macro-to-enum,
-modernize-pass-by-value,
-modernize-return-braced-init-list,
-modernize-use-auto,
-modernize-use-default-member-init,
-modernize-use-using,
-modernize-use-trailing-return-type,
-modernize-use-nodiscard,
performance-*,
readability-container-size-empty,
readability-delete-null-pointer,
readability-duplicate-include
readability-misplaced-array-index,
readability-redundant-function-ptr-dereference,
readability-redundant-smartptr-get,
readability-simplify-subscript-expr,
readability-string-compare,
'
WarningsAsErrors: '*'
...


================================================
FILE: .flake8
================================================
[flake8]
show-source=true
statistics=true
max-line-length = 120

exclude =
  .git,
  ./cmake,


================================================
FILE: .github/scripts/.gitignore
================================================
Makefile
*.jar
hs_err_pid*.log


================================================
FILE: .github/scripts/as-cmake-sub-project/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)

project(use-of-sherpa-onnx-as-a-sub-project)

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sherpa-onnx/setup.py")
  message(FATAL_ERROR "Please download the source code of sherpa-onnx and put it inside this directory")
endif()

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")

include_directories(./sherpa-onnx)
add_subdirectory(./sherpa-onnx)

add_executable(main main.cc)
target_link_libraries(main sherpa-onnx-core)


================================================
FILE: .github/scripts/as-cmake-sub-project/main.cc
================================================
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/parse-options.h"

int main(int32_t argc, char *argv[]) {
  sherpa_onnx::ParseOptions po("help info");
  sherpa_onnx::OfflineRecognizerConfig config;
  config.Register(&po);
  po.PrintUsage();
  return 0;
}


================================================
FILE: .github/scripts/export-ascend/__init__.py
================================================


================================================
FILE: .github/scripts/export-ascend/generate_paraformer.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import itertools
import json
from dataclasses import asdict, dataclass

from generate_zipformer_ctc_20250703 import get_cann_version, get_image, get_soc_version


@dataclass
class Config:
    # 7.0, 8.0, 8.1, 8.2
    cann: str

    # 910B, 910B2, 910B3, 310P3
    soc_version: str

    # FunASR, WSChuan-ASR
    framework: str

    image: str = ""

    def __post_init__(self):
        self.image = get_image(self.cann, soc_version=self.soc_version)


def main():
    cann_version = get_cann_version()
    soc_version = get_soc_version()
    framework_list = ["FunASR", "WSChuan-ASR"]

    configs = [
        Config(cann=cann, soc_version=soc, framework=framework)
        for cann, soc, framework in itertools.product(
            cann_version, soc_version, framework_list
        )
    ]

    ans = [asdict(c) for c in configs]

    print(json.dumps({"include": ans}))


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/export-ascend/generate_sense_voice.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import itertools
import json
from dataclasses import asdict, dataclass

from generate_zipformer_ctc_20250703 import get_image, get_soc_version, get_cann_version


@dataclass
class Config:
    # 7.0, 8.0, 8.1, 8.2
    cann: str

    # 910B, 910B2, 910B3, 310P3
    soc_version: str

    # FunASR, WSYue-ASR
    framework: str

    image: str = ""

    def __post_init__(self):
        self.image = get_image(self.cann, soc_version=self.soc_version)


def main():
    cann_version = get_cann_version()
    soc_version = get_soc_version()
    framework_list = ["FunASR", "WSYue-ASR"]

    configs = [
        Config(cann=cann, soc_version=soc, framework=framework)
        for cann, soc, framework in itertools.product(
            cann_version, soc_version, framework_list
        )
    ]

    ans = [asdict(c) for c in configs]

    print(json.dumps({"include": ans}))


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/export-ascend/generate_whisper.py
================================================
#!/usr/bin/env python3
# Copyright    2026  Xiaomi Corp.        (authors: Fangjun Kuang)

import itertools
import json
from dataclasses import asdict, dataclass

from generate_zipformer_ctc_20250703 import get_image, get_soc_version, get_cann_version


@dataclass
class Config:
    # 7.0, 8.0, 8.1, 8.2
    cann: str

    # 910B, 910B2, 910B3, 310P3
    soc_version: str

    model: str

    image: str = ""

    def __post_init__(self):
        self.image = get_image(self.cann, soc_version=self.soc_version)


def main():
    cann_version = get_cann_version()
    soc_version = get_soc_version()
    model_list = [
        "turbo",
        "distil-medium.en",
        "distil-small.en",
        "tiny.en",
        "base.en",
        "small.en",
        "medium.en",
        "tiny",
        "base",
        "small",
        "medium",
        "medium-aishell",
    ]

    configs = [
        Config(cann=cann, soc_version=soc, model=model)
        for cann, soc, model in itertools.product(cann_version, soc_version, model_list)
    ]

    ans = [asdict(c) for c in configs]

    print(json.dumps({"include": ans}))


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/export-ascend/generate_zipformer_ctc_20250703.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import itertools
import json
from dataclasses import asdict, dataclass


# image: ascendai/cann:latest
# image: ascendai/cann:8.1.rc1-910b-ubuntu22.04-py3.10
# see https://hub.docker.com/r/gpustack/ascendai-cann/tags?name=8.0
# see https://hub.docker.com/r/gpustack/devel-ascendai-cann/tags?name=310p
# and
# https://quay.io/repository/ascend/cann?tab=tags
def get_image(cann: str, soc_version: str):
    cann2image_910 = {
        "7.0": "quay.io/ascend/cann:7.0.1.beta1-910b-ubuntu22.04-py3.8",
        "8.0": "gpustack/ascendai-cann:8.0.RC3-910b-ubuntu20.04-py3.9",
        "8.1": "gpustack/devel-ascendai-cann:8.1.rc1.beta1-910b-ubuntu20.04-v2",
        "8.2": "gpustack/devel-ascendai-cann:8.2.rc1-910b-ubuntu20.04-v2",
        "8.3": "quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11",
        "8.5": "quay.io/ascend/cann:8.5.0-910b-ubuntu22.04-py3.11",
    }

    cann2image_310 = {
        "7.0": "quay.io/ascend/cann:7.0.1-310p-ubuntu22.04-py3.9",
        "8.0": "gpustack/devel-ascendai-cann:8.0.rc3.beta1-310p-ubuntu20.04-v2",
        "8.1": "gpustack/devel-ascendai-cann:8.1.rc1.beta1-310p-ubuntu20.04-v2",
        "8.2": "gpustack/devel-ascendai-cann:8.2.rc1-310p-ubuntu20.04-v2",
        "8.3": "quay.io/ascend/cann:8.3.rc2-310p-ubuntu22.04-py3.11",
        "8.5": "quay.io/ascend/cann:8.5.0-310p-ubuntu22.04-py3.11",
    }

    if "910" in soc_version:
        return cann2image_910[cann]
    elif "310" in soc_version:
        return cann2image_310[cann]
    else:
        raise ValueError(f"Unsupported soc_version {soc_version}")


def get_soc_version():
    soc_version = ["910B", "910B2", "910B3", "910B4", "310P3"]
    return soc_version


def get_cann_version():
    cann_version = ["7.0", "8.0", "8.1", "8.2", "8.3", "8.5"]
    return cann_version


@dataclass
class Config:
    # 7.0, 8.0, 8.1, 8.2
    cann: str

    # 910B, 910B2, 910B3, 310P3
    soc_version: str

    num_seconds: str

    image: str = ""

    def __post_init__(self):
        self.image = get_image(self.cann, soc_version=self.soc_version)


def main():
    cann_version = get_cann_version()
    soc_version = get_soc_version()
    input_in_seconds = ["5", "8", "10", "13", "15", "18", "20", "23", "25", "28", "30"]

    configs = [
        Config(cann=cann, soc_version=soc, num_seconds=sec)
        for cann, soc, sec in itertools.product(
            cann_version, soc_version, input_in_seconds
        )
    ]

    ans = [asdict(c) for c in configs]

    print(json.dumps({"include": ans}))


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/export-qnn/__init__.py
================================================


================================================
FILE: .github/scripts/export-qnn/generate_paraformer.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import json

from device_info import soc_info_dict
from dataclasses import asdict, dataclass
import itertools


@dataclass
class Config:
    soc: str  # SM8850
    soc_id: int  # 87
    arch: str  # v81
    input_in_seconds: str
    framework: str


def main():

    input_in_seconds = ["5", "8", "10", "13", "15", "18", "20", "23", "25", "28", "30"]
    framework_list = ["FunASR", "WSChuan-ASR"]

    configs = []

    for name, soc in soc_info_dict.items():
        for num_seconds, framework in itertools.product(
            input_in_seconds, framework_list
        ):
            configs.append(
                Config(
                    soc=name,
                    soc_id=soc.model.value,
                    arch=soc.info.arch.name,
                    input_in_seconds=num_seconds,
                    framework=framework,
                )
            )

    ans = [asdict(c) for c in configs]

    print(json.dumps({"include": ans}))


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/export-qnn/generate_sense_voice.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import json

from device_info import soc_info_dict
from dataclasses import asdict, dataclass
import itertools


@dataclass
class Config:
    soc: str  # SM8850
    soc_id: int  # 87
    arch: str  # v81
    input_in_seconds: str
    framework: str


def main():

    input_in_seconds = ["5", "8", "10", "13", "15", "18", "20", "23", "25", "28", "30"]
    framework_list = ["FunASR", "WSYue-ASR"]

    configs = []

    for name, soc in soc_info_dict.items():
        for num_seconds, framework in itertools.product(
            input_in_seconds, framework_list
        ):
            configs.append(
                Config(
                    soc=name,
                    soc_id=soc.model.value,
                    arch=soc.info.arch.name,
                    input_in_seconds=num_seconds,
                    framework=framework,
                )
            )

    ans = [asdict(c) for c in configs]

    print(json.dumps({"include": ans}))


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/export-qnn/generate_zipformer.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import json

from device_info import soc_info_dict
from dataclasses import asdict, dataclass
import itertools


@dataclass
class Config:
    soc: str  # SM8850
    soc_id: int  # 87
    arch: str  # v81
    input_in_seconds: str
    model_name: str


def main():

    input_in_seconds = ["5", "8", "10", "13", "15", "18", "20", "23", "25", "28", "30"]
    model_name_list = ["20250703", "20251222"]

    configs = []

    for name, soc in soc_info_dict.items():
        for num_seconds, model_name in itertools.product(
            input_in_seconds, model_name_list
        ):
            if model_name == "20251222":
                if num_seconds not in ["5"]:
                    # TODO(fangjun): We only upload model-5-seconds.onnx right now
                    continue

            configs.append(
                Config(
                    soc=name,
                    soc_id=soc.model.value,
                    arch=soc.info.arch.name,
                    input_in_seconds=num_seconds,
                    model_name=model_name,
                )
            )

    ans = [asdict(c) for c in configs]

    print(json.dumps({"include": ans}))


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/node-addon/README-optional.md
================================================
# Introduction

Please see [sherpa-onnx-node](https://www.npmjs.com/package/sherpa-onnx-node)


================================================
FILE: .github/scripts/node-addon/README.md
================================================
# Introduction

Please see
https://github.com/k2-fsa/sherpa-onnx/blob/master/nodejs-addon-examples/README.md
for usages.


||Method|Support multiple threads|Minimum required node version|
|---|---|---|---|
|this package| https://github.com/nodejs/node-addon-api | Yes | v16|
|https://www.npmjs.com/package/sherpa-onnx| WebAssembly | No | v18|


================================================
FILE: .github/scripts/node-addon/index.js
================================================
module.exports = require('./sherpa-onnx.node');


================================================
FILE: .github/scripts/node-addon/notes.md
================================================
# Introduction

See also

  - https://github.com/WonderInventions/node-webrtc/blob/develop/package.json
  - https://stackoverflow.com/questions/15176082/npm-package-json-os-specific-dependency
  - https://github.com/WonderInventions/node-webrtc/blob/develop/lib/binding.js
  - cross-compiling https://github.com/nodejs/node-gyp/issues/829#issuecomment-665527032
  - https://nodejs.github.io/node-addon-examples/build-tools/cmake-js


================================================
FILE: .github/scripts/node-addon/package-optional.json
================================================
{
  "name": "sherpa-onnx-PLATFORM2-ARCH",
  "version": "SHERPA_ONNX_VERSION",
  "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
  },
  "keywords": [
    "speech to text",
    "text to speech",
    "transcription",
    "real-time speech recognition",
    "without internet connection",
    "locally",
    "local",
    "embedded systems",
    "open source",
    "diarization",
    "speaker diarization",
    "speaker recognition",
    "speaker",
    "speaker segmentation",
    "speaker verification",
    "spoken language identification",
    "sherpa",
    "zipformer",
    "asr",
    "tts",
    "stt",
    "c++",
    "onnxruntime",
    "onnx",
    "ai",
    "next-gen kaldi",
    "offline",
    "privacy",
    "open source",
    "streaming speech recognition",
    "speech",
    "recognition",
    "vad",
    "node-addon-api",
    "speaker id",
    "language id",
    "speech enhancement",
    "denoising"
  ],
  "author": "The next-gen Kaldi team",
  "license": "Apache-2.0",
  "bugs": {
    "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
  },
  "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
   "os": [
    "PLATFORM"
  ],
  "cpu": [
    "ARCH"
  ]
}


================================================
FILE: .github/scripts/node-addon/package.json
================================================
{
  "name": "sherpa-onnx-node",
  "version": "SHERPA_ONNX_VERSION",
  "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
  "main": "sherpa-onnx.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
  },
  "keywords": [
    "speech to text",
    "text to speech",
    "transcription",
    "real-time speech recognition",
    "without internet connection",
    "locally",
    "local",
    "embedded systems",
    "open source",
    "diarization",
    "speaker diarization",
    "speaker recognition",
    "speaker",
    "speaker segmentation",
    "speaker verification",
    "spoken language identification",
    "sherpa",
    "zipformer",
    "asr",
    "tts",
    "stt",
    "c++",
    "onnxruntime",
    "onnx",
    "ai",
    "next-gen kaldi",
    "offline",
    "privacy",
    "open source",
    "streaming speech recognition",
    "speech",
    "recognition",
    "vad",
    "node-addon-api",
    "speaker id",
    "language id",
    "speech enhancement",
    "denoising"
  ],
  "author": "The next-gen Kaldi team",
  "license": "Apache-2.0",
  "bugs": {
    "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
  },
  "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
  "optionalDependencies": {
    "sherpa-onnx-darwin-arm64": "^SHERPA_ONNX_VERSION",
    "sherpa-onnx-darwin-x64": "^SHERPA_ONNX_VERSION",
    "sherpa-onnx-linux-x64": "^SHERPA_ONNX_VERSION",
    "sherpa-onnx-linux-arm64": "^SHERPA_ONNX_VERSION",
    "sherpa-onnx-win-x64": "^SHERPA_ONNX_VERSION",
    "sherpa-onnx-win-ia32": "^SHERPA_ONNX_VERSION"
  }
}


================================================
FILE: .github/scripts/test-audio-tagging.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Run zipformer for audio tagging                             "
log "------------------------------------------------------------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
repo=sherpa-onnx-zipformer-audio-tagging-2024-04-09
ls -lh $repo

for w in 1.wav 2.wav 3.wav 4.wav; do
  $EXE \
    --zipformer-model=$repo/model.onnx \
    --labels=$repo/class_labels_indices.csv \
    $repo/test_wavs/$w
done
rm -rf $repo


================================================
FILE: .github/scripts/test-c-api.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "SLID_EXE is $SLID_EXE"
echo "SID_EXE is $SID_EXE"
echo "AT_EXE is $AT_EXE"
echo "PUNCT_EXE is $PUNCT_EXE"
echo "PATH: $PATH"

log "------------------------------------------------------------"
log "Test adding punctuations                                    "
log "------------------------------------------------------------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
ls -lh
tar xf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
ls -lh sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
$PUNCT_EXE
rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12

log "------------------------------------------------------------"
log "Test audio tagging                                          "
log "------------------------------------------------------------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2

$AT_EXE

rm -rf sherpa-onnx-zipformer-audio-tagging-2024-04-09


log "------------------------------------------------------------"
log "Download whisper tiny for spoken language identification    "
log "------------------------------------------------------------"

rm -rf sherpa-onnx-whisper-tiny*
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2

$SLID_EXE

rm -rf sherpa-onnx-whisper-tiny*

log "------------------------------------------------------------"
log "Download file for speaker identification and verification   "
log "------------------------------------------------------------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
git clone https://github.com/csukuangfj/sr-data

$SID_EXE

rm -fv *.onnx
rm -rf sr-data


================================================
FILE: .github/scripts/test-cxx-api.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "CXX_STREAMING_ZIPFORMER_EXE is $CXX_STREAMING_ZIPFORMER_EXE"
echo "CXX_WHISPER_EXE is $CXX_WHISPER_EXE"
echo "CXX_SENSE_VOICE_EXE is $CXX_SENSE_VOICE_EXE"
echo "PATH: $PATH"

log "------------------------------------------------------------"
log "Test streaming zipformer CXX API"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
$CXX_STREAMING_ZIPFORMER_EXE
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20

log "------------------------------------------------------------"
log "Test Whisper CXX API"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
$CXX_WHISPER_EXE
rm -rf sherpa-onnx-whisper-tiny.en

log "------------------------------------------------------------"
log "Test SenseVoice CXX API"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

$CXX_SENSE_VOICE_EXE
rm -rf sherpa-onnx-sense-voice-*


================================================
FILE: .github/scripts/test-dart.sh
================================================
#!/usr/bin/env bash

set -ex

cd dart-api-examples

pushd speech-enhancement-gtcrn
echo "speech enhancement with gtcrn models"
./run.sh
ls -lh
popd

pushd speech-enhancement-dpdfnet
echo "speech enhancement with dpdfnet models"
./run.sh
ls -lh
popd

pushd streaming-speech-enhancement-gtcrn
echo "streaming speech enhancement with gtcrn models"
./run.sh
ls -lh
popd

pushd streaming-speech-enhancement-dpdfnet
echo "streaming speech enhancement with dpdfnet models"
./run.sh
ls -lh
popd

pushd non-streaming-asr

echo '----------Moonshine v2----------'
./run-moonshine-v2.sh
rm -rf sherpa-onnx-*

echo '----------FireRedASR CTC----------'
./run-fire-red-asr-ctc.sh
rm -rf sherpa-onnx-*

echo '----------FunASR Nano----------'
./run-funasr-nano.sh
rm -rf sherpa-onnx-*

echo '----------MedASR CTC----------'
./run-medasr-ctc.sh
rm -rf sherpa-onnx-*

echo '----------Omnilingual ASR CTC----------'
./run-omnilingual-asr-ctc.sh
rm -rf sherpa-onnx-*

echo '----------Wenet CTC----------'
./run-wenet-ctc.sh
rm -rf sherpa-onnx-*

echo '----------Zipformer CTC----------'
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*

echo '----------SenseVoice----------'
./run-sense-voice-with-hr.sh
./run-sense-voice.sh
rm -rf sherpa-onnx-*

echo '----------FireRedAsr----------'
./run-fire-red-asr.sh
rm -rf sherpa-onnx-fire-red-asr-*

echo '----------NeMo transducer----------'
./run-nemo-transducer.sh
rm -rf sherpa-onnx-*

echo '----------Dolphin CTC----------'
./run-dolphin-ctc.sh
rm -rf sherpa-onnx-*

echo '----------NeMo CTC----------'
./run-nemo-ctc.sh
rm -rf sherpa-onnx-*

echo '----------TeleSpeech CTC----------'
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*

echo '----------moonshine----------'
./run-moonshine.sh
rm -rf sherpa-onnx-*

echo '----------whisper----------'
./run-whisper.sh
rm -rf sherpa-onnx-*

echo '----------zipformer transducer----------'
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*

echo '----------paraformer itn----------'
./run-paraformer-itn.sh

echo '----------paraformer----------'
./run-paraformer.sh
rm -rf sherpa-onnx-*

echo '----------VAD with paraformer----------'
./run-vad-with-paraformer.sh
rm -rf sherpa-onnx-*

popd # non-streaming-asr

pushd tts

echo '----------tts----------'
./run-pocket-en.sh
./run-kitten-en.sh
./run-supertonic-en.sh
./run-kokoro-zh-en.sh
./run-kokoro-en.sh
./run-matcha-zh.sh
./run-matcha-en.sh
./run-zipvoice-zh-en.sh
ls -lh *.wav
rm -rf matcha-icefall-*
rm -rf sherpa-onnx-zipvoice-*
rm *.onnx

echo '----------piper tts----------'
./run-piper.sh
rm -rf vits-piper-*

echo '----------coqui tts----------'
./run-coqui.sh
rm -rf vits-coqui-*

echo '----------zh tts----------'
./run-vits-zh.sh
rm -rf sherpa-onnx-*

ls -lh *.wav

popd # tts

pushd spoken-language-identification
./run-whisper.sh
popd

pushd streaming-asr

echo '----------streaming T-one ctc----------'
./run-t-one-ctc.sh
rm -rf sherpa-onnx-*

echo '----------streaming zipformer ctc HLG----------'
./run-zipformer-ctc-hlg.sh
rm -rf sherpa-onnx-*

echo '----------streaming zipformer ctc----------'
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*

echo '----------streaming zipformer transducer----------'
./run-zipformer-transducer-itn.sh
./run-zipformer-transducer.sh
rm -f itn*
rm -rf sherpa-onnx-*

echo '----------streaming NeMo transducer----------'
./run-nemo-transducer.sh
rm -rf sherpa-onnx-*

echo '----------streaming paraformer----------'
./run-paraformer.sh
rm -rf sherpa-onnx-*

popd # streaming-asr

pushd vad
./run-ten-vad.sh
./run.sh
rm *.onnx
popd

pushd speaker-diarization
echo '----------speaker diarization----------'
./run.sh
popd

pushd speaker-identification
echo '----------3d speaker----------'
./run-3d-speaker.sh
popd

pushd add-punctuations
echo '----------CT Transformer----------'
./run-ct-transformer.sh
popd

pushd audio-tagging
echo '----------zipformer----------'
./run-zipformer.sh

echo '----------ced----------'
./run-ced.sh
popd

pushd vad-with-non-streaming-asr

echo '----------Zipformer CTC----------'
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*

echo '----------Dolphin CTC----------'
./run-dolphin-ctc.sh
rm -rf sherpa-onnx-*

echo '----------TeleSpeech CTC----------'
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*

echo "----zipformer transducer----"
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*

echo "----moonshine----"
./run-moonshine.sh
rm -rf sherpa-onnx-*

echo "----whisper----"
./run-whisper.sh
rm -rf sherpa-onnx-*

echo "----paraformer----"
./run-paraformer.sh
rm -rf sherpa-onnx-*

echo "----SenseVoice zh----"
./run-sense-voice-zh-2.sh
./run-sense-voice-zh.sh
rm -rf sherpa-onnx-*

echo "----SenseVoice en----"
./run-sense-voice-en.sh
rm -rf sherpa-onnx-*

popd

pushd keyword-spotter
./run-zh.sh
popd


================================================
FILE: .github/scripts/test-dot-net.sh
================================================
#!/usr/bin/env bash

set -ex

cd dotnet-examples/

cd ./supertonic-tts
./run.sh
ls -lh
rm -rf sherpa-onnx-supertonic-*

cd ../non-streaming-moonshine-v2-decode-files
./run.sh
rm -rf sherpa-onnx-moonshine-*

cd ../offline-decode-files

./run-fire-red-asr-ctc.sh
rm -rf sherpa-onnx-fire-*

./run-medasr-ctc.sh
rm -rf sherpa-onnx-*

./run-omnilingual-asr-ctc.sh
rm -rf sherpa-onnx-*

./run-wenet-ctc.sh
rm -rf sherpa-onnx-*

./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*

./run-dolphin-ctc.sh
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02

./run-fire-red-asr.sh
rm -rf sherpa-onnx-fire-red-asr-*

./run-moonshine.sh
rm -rf sherpa-onnx-*

./run-sense-voice-ctc.sh
rm -rf sherpa-onnx-*

./run-paraformer-itn.sh
rm -rf sherpa-onnx-*

./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*

./run-nemo-ctc.sh
rm -rf sherpa-onnx-*

./run-paraformer.sh
rm -rf sherpa-onnx-*

./run-zipformer.sh
rm -rf sherpa-onnx-*

./run-hotwords.sh
rm -rf sherpa-onnx-*

./run-whisper.sh
rm -rf sherpa-onnx-*

# ./run-whisper-large-v3.sh
# rm -rf sherpa-onnx-*

./run-tdnn-yesno.sh
rm -rf sherpa-onnx-*

cd ../pocket-tts-zero-shot
./run.sh
ls -lh
rm -rf sherpa-onnx-pocket-*

cd ../zipvoice-tts
./run.sh
ls -lh
rm -rf sherpa-onnx-zipvoice-*
rm -f vocos_24khz.onnx

cd ../vad-non-streaming-funasr-nano
./run-ten-vad.sh
rm -fv *.onnx

./run.sh
rm -fv *.onnx

cd ../non-streaming-funasr-nano-decode-files
./run.sh
ls -lh
rm -rf sherpa-onnx-funasr-*

cd ../version-test
./run.sh
ls -lh

cd ../offline-audio-tagging
./run.sh
ls -lh
rm -rf sherpa-onnx-*

cd ../kitten-tts
./run-kitten.sh
ls -lh
rm -rf kitten-nano-en-v0_1-fp16

cd ../vad-non-streaming-asr-paraformer
./run-ten-vad.sh
rm -fv *.onnx

./run.sh
rm -fv *.onnx

cd ../non-streaming-canary-decode-files

./run.sh
ls -lh
rm -rf sherpa-onnx-nemo-*


cd ../speech-enhancement-gtcrn
./run.sh
ls -lh

cd ../speech-enhancement-dpdfnet
./run.sh
ls -lh

cd ../streaming-speech-enhancement-gtcrn
./run.sh
ls -lh

cd ../streaming-speech-enhancement-dpdfnet
./run.sh
ls -lh

cd ../kokoro-tts
./run-kokoro.sh
ls -lh

cd ../offline-tts
./run-matcha-zh.sh
ls -lh *.wav
./run-matcha-en.sh
ls -lh *.wav
./run-aishell3.sh
ls -lh *.wav
./run-piper.sh
ls -lh *.wav
./run-hf-fanchen.sh
ls -lh *.wav
ls -lh

pushd ../..

mkdir tts

cp -v dotnet-examples/kokoro-tts/*.wav ./tts
cp -v dotnet-examples/offline-tts/*.wav ./tts
cp -v dotnet-examples/supertonic-tts/*.wav ./tts
cp -v dotnet-examples/zipvoice-tts/*.wav ./tts
popd

cd ../offline-speaker-diarization
./run.sh
rm -rfv *.onnx
rm -fv *.wav
rm -rfv sherpa-onnx-pyannote-*

cd ../keyword-spotting-from-files
./run.sh

cd ../online-decode-files
./run-t-one-ctc.sh
rm -rf sherpa-onnx-*

./run-transducer-itn.sh
rm -rf sherpa-onnx-*

./run-zipformer2-ctc.sh
rm -rf sherpa-onnx-*

./run-transducer.sh
rm -rf sherpa-onnx-*

./run-paraformer.sh
rm -rf sherpa-onnx-*

cd ../offline-punctuation
./run.sh
rm -rf sherpa-onnx-*

cd ../speaker-identification
./run.sh

cd ../streaming-hlg-decoding/
./run.sh
rm -rf sherpa-onnx-*

cd ../spoken-language-identification
./run.sh
rm -rf sherpa-onnx-*


================================================
FILE: .github/scripts/test-kws.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Run Chinese keyword spotting (Wenetspeech）"
log "------------------------------------------------------------"

repo_url=https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
log "Start testing ${repo_url}"
repo=sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01

log "Download pretrained model and test-data from $repo_url"
curl -SL -O $repo_url
tar jxvf ${repo}.tar.bz

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  --decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  --joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
  --keywords-file=$repo/test_wavs/test_keywords.txt \
  --max-active-paths=4 \
  --num-threads=4 \
  $repo/test_wavs/3.wav $repo/test_wavs/4.wav $repo/test_wavs/5.wav $repo/test_wavs/6.wav

rm -rf $repo
rm -rf ${repo}.tar.bz

log "------------------------------------------------------------"
log "Run English keyword spotting (Gigaspeech）"
log "------------------------------------------------------------"

repo_url=https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
log "Start testing ${repo_url}"
repo=sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01

log "Download pretrained model and test-data from $repo_url"
curl -SL -O $repo_url
tar jxvf ${repo}.tar.bz

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  --decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  --joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
  --keywords-file=$repo/test_wavs/test_keywords.txt \
  --max-active-paths=4 \
  --num-threads=4 \
  $repo/test_wavs/0.wav $repo/test_wavs/1.wav

rm -rf $repo
rm -rf ${repo}.tar.bz


================================================
FILE: .github/scripts/test-nodejs-addon-npm.sh
================================================
#!/usr/bin/env bash

set -ex

d=nodejs-addon-examples
echo "dir: $d"
cd $d

arch=$(node -p "require('os').arch()")
platform=$(node -p "require('os').platform()")
node_version=$(node -p "process.versions.node.split('.')[0]")

echo "----------Moonshine v2----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2

node ./test_asr_non_streaming_moonshine_v2.js

rm -rf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27

echo "----------FireRedAsr CTC----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

node ./test_asr_non_streaming_fire_red_asr_ctc.js
node ./test_asr_non_streaming_fire_red_asr_ctc_async.js

rm -rf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25

echo "----------PocketTTS----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

node ./test_tts_non_streaming_pocket_en.js
node ./test_tts_non_streaming_pocket_en_async.js

rm -rf sherpa-onnx-pocket-tts-int8-2026-01-26

echo "----------ZipVoice----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

node ./test_tts_non_streaming_zipvoice_zh_en.js
node ./test_tts_non_streaming_zipvoice_zh_en_async.js

rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
rm -f vocos_24khz.onnx

echo "----------non-streaming ASR FunASR Nano----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2

node ./test_asr_non_streaming_funasr_nano.js
node ./test_asr_non_streaming_funasr_nano_async.js

rm -rf sherpa-onnx-funasr-nano-int8-2025-12-30

echo "----------non-streaming ASR Google MedASR CTC----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2

node ./test_asr_non_streaming_medasr_ctc.js

rm -rf sherpa-onnx-medasr-ctc-en-int8-2025-12-25

echo "----------non-streaming ASR Omnilingual ASR CTC----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2

node ./test_asr_non_streaming_omnilingual_asr_ctc.js

rm -rf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12

echo "----------non-streaming ASR WeNet CTC----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2

node ./test_asr_non_streaming_wenet_ctc.js
rm -rf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10

echo "----------streaming ASR T-one CTC----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2

node ./test_asr_streaming_t_one_ctc.js

rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08

echo "----------KittenTTS----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

node ./test_tts_non_streaming_kitten_en.js

rm -rf kitten-nano-en-v0_1-fp16

echo "----------SupertonicTTS----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

node ./test_tts_non_streaming_supertonic_en.js
node ./test_tts_non_streaming_supertonic_en_async.js

rm -rf sherpa-onnx-supertonic-tts-int8-2026-03-06

echo "----------non-streaming ASR NeMo Canary----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2

node ./test_asr_non_streaming_nemo_canary.js

rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8

echo "----------non-streaming ASR Zipformer CTC----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

node ./test_asr_non_streaming_zipformer_ctc.js
rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03

echo "----------non-streaming ASR NeMo parakeet tdt----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2

node ./test_asr_non_streaming_nemo_parakeet_tdt_v2.js
rm -rf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8

echo "----------non-streaming ASR dolphin CTC----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2

node ./test_asr_non_streaming_dolphin_ctc.js

rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02

echo "----------non-streaming speech denoiser----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav

node ./test_offline_speech_enhancement_gtcrn.js
node ./test_offline_speech_enhancement_dpdfnet.js
node ./test_online_speech_enhancement_gtcrn.js
node ./test_online_speech_enhancement_dpdfnet.js
rm gtcrn_simple.onnx
rm dpdfnet_baseline.onnx
ls -lh *.wav

echo "----------non-streaming asr FireRedAsr----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2

node ./test_asr_non_streaming_fire_red_asr.js
rm -rf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16

echo "----------non-streaming asr moonshine + vad----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test_vad_with_non_streaming_asr_moonshine.js
rm -rf sherpa-onnx-*
rm *.wav
rm *.onnx

echo "----------non-streaming speaker diarization----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

node ./test_offline_speaker_diarization.js

rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*

echo "----------non-streaming asr whisper + vad----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test_vad_with_non_streaming_asr_whisper.js
rm -rf sherpa-onnx-whisper*
rm *.wav
rm *.onnx

echo "----------asr----------"

if [[ $arch != "ia32" && $platform != "win32" ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2

  node ./test_asr_non_streaming_nemo_ctc.js
  rm -rf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

  node ./test_asr_non_streaming_sense_voice.js

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  tar xf dict.tar.bz2

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

  node ./test_asr_non_streaming_sense_voice_with_hr.js

  rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
  rm -rf dict replace.fst test-hr.wav lexicon.txt

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  node ./test_asr_non_streaming_paraformer.js

  rm -f itn*

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav

  node ./test_asr_non_streaming_paraformer_itn.js

  rm -rf sherpa-onnx-paraformer-zh-2023-09-14
fi

echo "----------tts----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

node ./test_tts_non_streaming_kokoro_zh_en.js
ls -lh *.wav
rm -rf kokoro-multi-lang-v1_0

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

node ./test_tts_non_streaming_kokoro_en.js
ls -lh *.wav
rm -rf kokoro-en-v0_19

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

node ./test_tts_non_streaming_matcha_icefall_en.js
rm vocos-22khz-univ.onnx
rm -rf matcha-icefall-en_US-ljspeech

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

node ./test_tts_non_streaming_matcha_icefall_zh.js
rm vocos-22khz-univ.onnx
rm -rf matcha-icefall-zh-baker
ls -lh *.wav

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2

node ./test_tts_non_streaming_vits_piper_en.js
rm -rf vits-piper-en_GB-cori-medium

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xvf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2

node ./test_tts_non_streaming_vits_coqui_de.js
rm -rf vits-coqui-de-css10

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2

node ./test_tts_non_streaming_vits_zh_ll.js
rm -rf sherpa-onnx-vits-zh-ll

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2

node ./test_tts_non_streaming_vits_zh_aishell3.js
rm -rf vits-icefall-zh-aishell3

echo "----------keyword spotting----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2

node ./test_keyword_spotter_transducer.js
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01

if [[ $arch != "ia32" && $platform != "win32" && $node_version != 21 ]]; then
  # The punctuation model is so large that it cause memory allocation failure on windows x86
  # 2024-07-17 03:24:34.2388391 [E:onnxruntime:, inference_session.cc:1981
  # onnxruntime::InferenceSession::Initialize::<lambda_d603a8c74863bd6b58a1c7996295ed04>::operator ()]
  # Exception during initialization: bad allocation
  # Error: Process completed with exit code 127.
  #
  # Node 21 does not have such an issue
  echo "----------add punctuations----------"

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2

  node ./test_offline_punctuation.js
  rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12


  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2

  node ./test_online_punctuation.js
  rm -rf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
fi

echo "----------audio tagging----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2

node ./test_audio_tagging_zipformer.js
rm -rf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2

node ./test_audio_tagging_ced.js
rm -rf sherpa-onnx-ced-mini-audio-tagging-2024-04-19

echo "----------speaker identification----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

git clone https://github.com/csukuangfj/sr-data

node ./test_speaker_identification.js

rm *.onnx
rm -rf sr-data

echo "----------spoken language identification----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2

node ./test_spoken_language_identification.js
rm -rf sherpa-onnx-whisper-tiny
rm -rf spoken-language-identification-test-wavs

echo "----------streaming asr----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

rm -f itn*

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

if [[ $arch != "ia32" && $platform != "win32" ]]; then
  node test_asr_streaming_transducer_itn.js
  node test_asr_streaming_transducer.js
  node test_asr_streaming_transducer_with_hr.js
fi

rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
rm -rf dict lexicon.txt replace.fst test-hr.wav

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2

node ./test_asr_streaming_ctc.js

# To decode with HLG.fst
node ./test_asr_streaming_ctc_hlg.js
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

node ./test_asr_streaming_paraformer.js
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

echo "----------non-streaming asr----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2

node ./test_asr_non_streaming_transducer.js
rm -rf sherpa-onnx-zipformer-en-2023-04-01

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

node ./test_asr_non_streaming_whisper.js
rm -rf sherpa-onnx-whisper-tiny.en

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

node ./test_asr_non_streaming_moonshine.js
rm -rf sherpa-onnx-*

ls -lh


================================================
FILE: .github/scripts/test-nodejs-npm.sh
================================================
#!/usr/bin/env bash

set -ex

echo "dir: $d"
cd $d
npm install
git status
ls -lh
ls -lh node_modules

echo "---test moonshine v2---"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2

node ./test-offline-moonshine-v2.js

rm -rf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27

echo "---test FireRedASR CTC---"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

node ./test-offline-fire-red-asr-ctc.js

rm -rf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2

node ./test-offline-funasr-nano.js

rm -rf sherpa-onnx-funasr-nano-int8-2025-12-30

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2

node ./test-offline-medasr-ctc.js

rm -rf sherpa-onnx-medasr-ctc-en-int8-2025-12-25

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2

node ./test-offline-omnilingual-asr-ctc.js

rm -rf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2

node ./test-offline-wenet-ctc.js
rm -rf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
node ./test-online-t-one-ctc.js

rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

node ./test-offline-tts-kitten-en.js
ls -lh *.wav
rm -rf kitten-nano-en-v0_1-fp16

# online asr
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
node ./test-online-paraformer.js
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

rm -f itn*
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst

node ./test-online-transducer-itn.js

node ./test-online-transducer.js

rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2

node ./test-online-zipformer2-ctc.js
rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
node ./test-online-zipformer2-ctc-hlg.js
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18

echo "----------keyword spotting----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2

node ./test-keyword-spotter-transducer.js
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01

# asr with offline nemo canary
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2

node ./test-offline-nemo-canary.js
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8

# asr with offline zipformer ctc
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

node ./test-offline-zipformer-ctc.js
rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03

# asr with offline dolphin ctc
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
node ./test-offline-dolphin-ctc.js
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02

# speech enhancement
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
node ./test-offline-speech-enhancement-gtcrn.js
node ./test-offline-speech-enhancement-dpdfnet.js
node ./test-online-speech-enhancement-gtcrn.js
node ./test-online-speech-enhancement-dpdfnet.js
ls -lh *.wav
rm gtcrn_simple.onnx
rm dpdfnet_baseline.onnx
rm -fv inp_16k.wav
rm -fv enhanced*.wav

# offline tts

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

node ./test-offline-tts-zipvoice-zh-en.js
ls -lh *.wav
rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
rm -f vocos_24khz.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

node ./test-offline-tts-kokoro-zh-en.js
ls -lh *.wav
rm -rf kokoro-multi-lang-v1_0

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

node ./test-offline-tts-kokoro-en.js
rm -rf kokoro-en-v0_19

ls -lh

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

node ./test-offline-tts-matcha-zh.js

rm -rf matcha-icefall-zh-baker
rm vocos-22khz-univ.onnx


echo "---"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

node ./test-offline-tts-matcha-en.js

rm -rf matcha-icefall-en_US-ljspeech
rm vocos-22khz-univ.onnx

echo "---"

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
node ./test-offline-tts-vits-en.js
rm -rf vits-piper-en_US-amy-low*

echo "---"

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
node ./test-offline-tts-vits-zh.js
rm -rf vits-icefall-zh-aishell3*

ls -lh *.wav

echo '-----speaker diarization----------'
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

node ./test-offline-speaker-diarization.js
rm -rfv *.wav *.onnx sherpa-onnx-pyannote-*

echo '-----vad+moonshine----------'

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
node ./test-vad-with-non-streaming-asr-whisper.js
rm Obama.wav
rm silero_vad.onnx
rm -rf sherpa-onnx-moonshine-*

echo '-----vad+whisper----------'

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
node ./test-vad-with-non-streaming-asr-whisper.js
rm Obama.wav
rm silero_vad.onnx
rm -rf sherpa-onnx-whisper-tiny.en

# offline asr
#
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2

node ./test-offline-sense-voice.js

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

node ./test-offline-sense-voice-with-hr.js

rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17
rm -rf dict replace.fst test-hr.wav lexicon.txt

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
ls -lh
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

rm -f itn*
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
node ./test-offline-paraformer-itn.js
rm -rf sherpa-onnx-paraformer-zh-2023-09-14

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
ls -lh
tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
rm sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
node ./test-offline-nemo-ctc.js
rm -rf sherpa-onnx-nemo-ctc-en-conformer-small

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
ls -lh
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
node ./test-offline-paraformer.js
rm -rf sherpa-onnx-paraformer-zh-2023-09-14

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
ls -lh
tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
rm sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
node ./test-offline-transducer.js
rm -rf sherpa-onnx-zipformer-en-2023-06-26

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
node ./test-offline-whisper.js
rm -rf sherpa-onnx-whisper-tiny.en

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

node ./test-offline-moonshine.js
rm -rf sherpa-onnx-moonshine-*


================================================
FILE: .github/scripts/test-offline-ctc.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

for type in base small; do
  log "------------------------------------------------------------"
  log "Run Dolphin CTC models ($type int8)"
  log "------------------------------------------------------------"
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2

  $EXE \
    --dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
    --tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/tokens.txt \
    --debug=1 \
    ./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav

  rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02

  log "------------------------------------------------------------"
  log "Run Dolphin CTC models ($type)"
  log "------------------------------------------------------------"
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2

  $EXE \
    --dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/model.onnx \
    --tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/tokens.txt \
    --debug=1 \
    ./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/test_wavs/0.wav

  rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
done

log "------------------------------------------------------------"
log "Run NeMo GigaAM Russian models v2"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19.tar.bz2
tar xvf sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19.tar.bz2
rm sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19.tar.bz2

$EXE \
  --nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19/model.int8.onnx \
  --tokens=./sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19/tokens.txt \
  --debug=1 \
  ./sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19/test_wavs/example.wav

rm -rf sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19

log "------------------------------------------------------------"
log "Run NeMo GigaAM Russian models v1"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
tar xvf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
rm sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2

$EXE \
  --nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx \
  --tokens=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt \
  --debug=1 \
  ./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/test_wavs/example.wav

rm -rf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24

log "------------------------------------------------------------"
log "Run SenseVoice models"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
repo=sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17

for m in model.int8.onnx; do
  for w in zh en yue ja ko; do
    for use_itn in 0 1; do
      echo "$m $w $use_itn"
      time $EXE \
        --tokens=$repo/tokens.txt \
        --sense-voice-model=$repo/$m \
        --sense-voice-use-itn=$use_itn \
        $repo/test_wavs/$w.wav
    done
  done
done

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
rm dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

for m in model.int8.onnx; do
  for use_itn in 0 1; do
    echo "$m $w $use_itn"
    time $EXE \
      --tokens=$repo/tokens.txt \
      --sense-voice-model=$repo/$m \
      --sense-voice-use-itn=$use_itn \
      --hr-lexicon=./lexicon.txt \
      --hr-rule-fsts=./replace.fst \
      ./test-hr.wav
  done
done

rm -rf dict replace.fst test-hr.wav lexicon.txt

# test wav reader for non-standard wav files
waves=(
  naudio.wav
  junk-padding.wav
  int8-1-channel-zh.wav
  int8-2-channel-zh.wav
  int8-4-channel-zh.wav
  int16-1-channel-zh.wav
  int16-2-channel-zh.wav
  int32-1-channel-zh.wav
  int32-2-channel-zh.wav
  float32-1-channel-zh.wav
  float32-2-channel-zh.wav
)
for w in ${waves[@]}; do
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$w

  time $EXE \
    --tokens=$repo/tokens.txt \
    --sense-voice-model=$repo/model.int8.onnx \
    $w
  rm -v $w
done

rm -rf $repo

if true; then
  # It has problems with onnxruntime 1.18
  log "------------------------------------------------------------"
  log "Run Wenet models"
  log "------------------------------------------------------------"
  wenet_models=(
  sherpa-onnx-zh-wenet-aishell
  # sherpa-onnx-zh-wenet-aishell2
  # sherpa-onnx-zh-wenet-wenetspeech
  # sherpa-onnx-zh-wenet-multi-cn
  sherpa-onnx-en-wenet-librispeech
  # sherpa-onnx-en-wenet-gigaspeech
  )
  for name in ${wenet_models[@]}; do
    repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$name.tar.bz2
    log "Start testing ${repo_url}"
    repo=$name
    log "Download pretrained model and test-data from $repo_url"
    curl -SL -O $repo_url
    tar xvf $name.tar.bz2
    rm $name.tar.bz2

    log "test float32 models"
    time $EXE \
      --tokens=$repo/tokens.txt \
      --wenet-ctc-model=$repo/model.onnx \
      $repo/test_wavs/0.wav \
      $repo/test_wavs/1.wav \
      $repo/test_wavs/8k.wav

    log "test int8 models"
    time $EXE \
      --tokens=$repo/tokens.txt \
      --wenet-ctc-model=$repo/model.int8.onnx \
      $repo/test_wavs/0.wav \
      $repo/test_wavs/1.wav \
      $repo/test_wavs/8k.wav

    rm -rf $repo
  done
fi


log "test offline TeleSpeech CTC"
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
name=$(basename $url)
repo=$(basename -s .tar.bz2 $name)

curl -SL -O $url
tar xvf $name
rm $name
ls -lh $repo

test_wavs=(
3-sichuan.wav
4-tianjin.wav
5-henan.wav
)
for w in ${test_wavs[@]}; do
  time $EXE \
    --tokens=$repo/tokens.txt \
    --telespeech-ctc=$repo/model.int8.onnx \
    --debug=1 \
    $repo/test_wavs/$w
done

time $EXE \
  --tokens=$repo/tokens.txt \
  --telespeech-ctc=$repo/model.int8.onnx \
  --debug=1 \
  $repo/test_wavs/3-sichuan.wav \
  $repo/test_wavs/4-tianjin.wav \
  $repo/test_wavs/5-henan.wav

rm -rf $repo

log "-----------------------------------------------------------------"
log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
log "-----------------------------------------------------------------"

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "test $repo"
test_wavs=(
de-german.wav
es-spanish.wav
hr-croatian.wav
po-polish.wav
uk-ukrainian.wav
en-english.wav
fr-french.wav
it-italian.wav
ru-russian.wav
)
for w in ${test_wavs[@]}; do
  time $EXE \
    --tokens=$repo/tokens.txt \
    --nemo-ctc-model=$repo/model.onnx \
    --debug=1 \
    $repo/test_wavs/$w
done

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-24500.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "Test $repo"

time $EXE \
  --tokens=$repo/tokens.txt \
  --nemo-ctc-model=$repo/model.onnx \
  --debug=1 \
  $repo/test_wavs/en-english.wav

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-es-1424.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "test $repo"

time $EXE \
  --tokens=$repo/tokens.txt \
  --nemo-ctc-model=$repo/model.onnx \
  --debug=1 \
  $repo/test_wavs/es-spanish.wav

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "Test $repo"

test_wavs=(
en-english.wav
de-german.wav
fr-french.wav
es-spanish.wav
)

for w in ${test_wavs[@]}; do
  time $EXE \
    --tokens=$repo/tokens.txt \
    --nemo-ctc-model=$repo/model.onnx \
    --debug=1 \
    $repo/test_wavs/$w
done

rm -rf $repo


log "------------------------------------------------------------"
log "Run tdnn yesno (Hebrew)"
log "------------------------------------------------------------"
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-tdnn-yesno.tar.bz2
curl -SL -O $url
tar xvf sherpa-onnx-tdnn-yesno.tar.bz2
rm sherpa-onnx-tdnn-yesno.tar.bz2
log "Start testing ${url}"
repo=sherpa-onnx-tdnn-yesno
log "Download pretrained model and test-data from $url"

log "test float32 models"
time $EXE \
  --sample-rate=8000 \
  --feat-dim=23 \
  \
  --tokens=$repo/tokens.txt \
  --tdnn-model=$repo/model-epoch-14-avg-2.onnx \
  $repo/test_wavs/0_0_0_1_0_0_0_1.wav \
  $repo/test_wavs/0_0_1_0_0_0_1_0.wav \
  $repo/test_wavs/0_0_1_0_0_1_1_1.wav \
  $repo/test_wavs/0_0_1_0_1_0_0_1.wav \
  $repo/test_wavs/0_0_1_1_0_0_0_1.wav \
  $repo/test_wavs/0_0_1_1_0_1_1_0.wav

log "test int8 models"
time $EXE \
  --sample-rate=8000 \
  --feat-dim=23 \
  \
  --tokens=$repo/tokens.txt \
  --tdnn-model=$repo/model-epoch-14-avg-2.int8.onnx \
  $repo/test_wavs/0_0_0_1_0_0_0_1.wav \
  $repo/test_wavs/0_0_1_0_0_0_1_0.wav \
  $repo/test_wavs/0_0_1_0_0_1_1_1.wav \
  $repo/test_wavs/0_0_1_0_1_0_0_1.wav \
  $repo/test_wavs/0_0_1_1_0_0_0_1.wav \
  $repo/test_wavs/0_0_1_1_0_1_1_0.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run Citrinet (stt_en_citrinet_512, English)"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
rm sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
log "Start testing ${repo_url}"
repo=sherpa-onnx-nemo-ctc-en-citrinet-512
log "Download pretrained model and test-data from $repo_url"

time $EXE \
  --tokens=$repo/tokens.txt \
  --nemo-ctc-model=$repo/model.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

time $EXE \
  --tokens=$repo/tokens.txt \
  --nemo-ctc-model=$repo/model.int8.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run Librispeech zipformer CTC H/HL/HLG decoding (English)   "
log "------------------------------------------------------------"
repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-en-2023-10-02.tar.bz2
curl -SL -O $repo_url
log "Start testing ${repo_url}"
tar xvf sherpa-onnx-zipformer-ctc-en-2023-10-02.tar.bz2
rm sherpa-onnx-zipformer-ctc-en-2023-10-02.tar.bz2
repo=sherpa-onnx-zipformer-ctc-en-2023-10-02
log "Download pretrained model and test-data from $repo_url"

graphs=(
$repo/H.fst
$repo/HL.fst
$repo/HLG.fst
)

for graph in ${graphs[@]}; do
  log "test float32 models with $graph"
  time $EXE \
    --model-type=zipformer2_ctc \
    --ctc.graph=$graph \
    --zipformer-ctc-model=$repo/model.onnx \
    --tokens=$repo/tokens.txt \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav

  log "test int8 models with $graph"
  time $EXE \
    --model-type=zipformer2_ctc \
    --ctc.graph=$graph \
    --zipformer-ctc-model=$repo/model.int8.onnx \
    --tokens=$repo/tokens.txt \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav
done

rm -rf $repo


================================================
FILE: .github/scripts/test-offline-fire-red-asr.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

for w in 0.wav 1.wav 2.wav 3-sichuan.wav 3.wav 4-tianjin.wav 5-henan.wav 8k.wav; do
$EXE \
  --fire-red-asr-ctc=./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx \
  --tokens=./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt \
  ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/$w
done

rm -rf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25


================================================
FILE: .github/scripts/test-offline-moonshine.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

names=(
tiny
base
)

for name in ${names[@]}; do
  log "------------------------------------------------------------"
  log "Run $name"
  log "------------------------------------------------------------"

  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-$name.tar.bz2
  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-$name-en-int8.tar.bz2
  curl -SL -O $repo_url
  tar xvf sherpa-onnx-moonshine-$name-en-int8.tar.bz2
  rm sherpa-onnx-moonshine-$name-en-int8.tar.bz2
  repo=sherpa-onnx-moonshine-$name-en-int8
  log "Start testing ${repo_url}"

  log "test int8 onnx"

  time $EXE \
    --moonshine-preprocessor=$repo/preprocess.onnx \
    --moonshine-encoder=$repo/encode.int8.onnx \
    --moonshine-uncached-decoder=$repo/uncached_decode.int8.onnx \
    --moonshine-cached-decoder=$repo/cached_decode.int8.onnx \
    --tokens=$repo/tokens.txt \
    --num-threads=2 \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/8k.wav

  rm -rf $repo
done


================================================
FILE: .github/scripts/test-offline-punctuation.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Download the punctuation model                             "
log "------------------------------------------------------------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
ls -lh $repo

$EXE \
 --debug=1 \
 --ct-transformer=$repo/model.onnx \
 "这是一个测试你好吗How are you我很好thank you are you ok谢谢你"

$EXE \
 --debug=1 \
 --ct-transformer=$repo/model.onnx \
 "我们都是木头人不会说话不会动"

$EXE \
 --debug=1 \
 --ct-transformer=$repo/model.onnx \
 "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"

rm -rf $repo


================================================
FILE: .github/scripts/test-offline-source-separation.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

if [ -z $EXE ]; then
  EXE=./build/bin/sherpa-onnx-offline-source-separation
fi

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Run spleeter"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2
tar xvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2
rm sherpa-onnx-spleeter-2stems-fp16.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav

$EXE \
  --spleeter-vocals=sherpa-onnx-spleeter-2stems-fp16/vocals.fp16.onnx \
  --spleeter-accompaniment=sherpa-onnx-spleeter-2stems-fp16/accompaniment.fp16.onnx \
  --num-threads=2 \
  --debug=1 \
  --input-wav=./qi-feng-le-zh.wav \
  --output-vocals-wav=spleeter_output_vocals.wav \
  --output-accompaniment-wav=spleeter_output_accompaniment.wav

rm -rf sherpa-onnx-spleeter-2stems-fp16

log "------------------------------------------------------------"
log "Run UVR"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR-MDX-NET-Voc_FT.onnx

$EXE \
  --debug=1 \
  --num-threads=2 \
  --uvr-model=./UVR-MDX-NET-Voc_FT.onnx \
  --input-wav=./qi-feng-le-zh.wav \
  --output-vocals-wav=uvr_output_vocals.wav \
  --output-accompaniment-wav=uvr_output_non_vocals.wav

rm ./UVR-MDX-NET-Voc_FT.onnx \

mkdir source-separation-wavs
mv qi-feng-le-zh.wav source-separation-wavs
mv spleeter_*.wav ./source-separation-wavs
mv uvr_*.wav ./source-separation-wavs


================================================
FILE: .github/scripts/test-offline-speech-denoiser.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

if [ -z $EXE ]; then
  EXE=./build/bin/sherpa-onnx-offline-denoiser
fi

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Run gtcrn"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/speech_with_noise.wav

$EXE \
  --debug=1 \
  --speech-denoiser-gtcrn-model=./gtcrn_simple.onnx \
  --input-wav=./speech_with_noise.wav \
  --output-wav=./enhanced_speech_16k.wav

rm ./gtcrn_simple.onnx


================================================
FILE: .github/scripts/test-offline-transducer.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Run NeMo GigaAM Russian models v2"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19.tar.bz2
tar xvf sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19.tar.bz2
rm sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19.tar.bz2

$EXE \
  --encoder=./sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/encoder.int8.onnx \
  --decoder=./sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/decoder.onnx \
  --joiner=./sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/joiner.onnx \
  --tokens=./sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/tokens.txt \
  --model-type=nemo_transducer \
  ./sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/test_wavs/example.wav

rm -rf sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19


log "------------------------------------------------------------------------"
log "Run zipformer transducer models (Russian)                              "
log "------------------------------------------------------------------------"
for type in small-zipformer zipformer; do
  url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-$type-ru-2024-09-18.tar.bz2
  name=$(basename $url)
  curl -SL -O $url
  tar xvf $name
  rm $name
  repo=$(basename -s .tar.bz2 $name)
  ls -lh $repo

  log "test $repo"
  test_wavs=(
  0.wav
  1.wav
  )

  for w in ${test_wavs[@]}; do
    time $EXE \
      --tokens=$repo/tokens.txt \
      --encoder=$repo/encoder.onnx \
      --decoder=$repo/decoder.onnx \
      --joiner=$repo/joiner.onnx \
      --debug=1 \
      $repo/test_wavs/$w
  done

  for w in ${test_wavs[@]}; do
    time $EXE \
      --tokens=$repo/tokens.txt \
      --encoder=$repo/encoder.int8.onnx \
      --decoder=$repo/decoder.onnx \
      --joiner=$repo/joiner.int8.onnx \
      --debug=1 \
      $repo/test_wavs/$w
  done
  rm -rf $repo
done

log "------------------------------------------------------------------------"
log "Run zipformer transducer models (Japanese from ReazonSpeech)                              "
log "------------------------------------------------------------------------"
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2

name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

cat $repo/test_wavs/*.txt

log "test $repo"
test_wavs=(
1.wav
2.wav
3.wav
4.wav
5.wav
)

for w in ${test_wavs[@]}; do
  time $EXE \
    --tokens=$repo/tokens.txt \
    --encoder=$repo/encoder-epoch-99-avg-1.onnx \
    --decoder=$repo/decoder-epoch-99-avg-1.onnx \
    --joiner=$repo/joiner-epoch-99-avg-1.onnx \
    --debug=1 \
    $repo/test_wavs/$w
done

for w in ${test_wavs[@]}; do
  time $EXE \
    --tokens=$repo/tokens.txt \
    --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
    --decoder=$repo/decoder-epoch-99-avg-1.onnx \
    --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
    --debug=1 \
    $repo/test_wavs/$w
done
rm -rf $repo

log "------------------------------------------------------------------------"
log "Run Nemo fast conformer hybrid transducer ctc models (transducer branch)"
log "------------------------------------------------------------------------"

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "test $repo"
test_wavs=(
de-german.wav
es-spanish.wav
hr-croatian.wav
po-polish.wav
uk-ukrainian.wav
en-english.wav
fr-french.wav
it-italian.wav
ru-russian.wav
)
for w in ${test_wavs[@]}; do
  time $EXE \
    --tokens=$repo/tokens.txt \
    --encoder=$repo/encoder.onnx \
    --decoder=$repo/decoder.onnx \
    --joiner=$repo/joiner.onnx \
    --debug=1 \
    $repo/test_wavs/$w
done

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-en-24500.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "Test $repo"

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder.onnx \
  --decoder=$repo/decoder.onnx \
  --joiner=$repo/joiner.onnx \
  --debug=1 \
  $repo/test_wavs/en-english.wav

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-es-1424.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "test $repo"

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder.onnx \
  --decoder=$repo/decoder.onnx \
  --joiner=$repo/joiner.onnx \
  --debug=1 \
  $repo/test_wavs/es-spanish.wav

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "Test $repo"

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder.onnx \
  --decoder=$repo/decoder.onnx \
  --joiner=$repo/joiner.onnx \
  --debug=1 \
  $repo/test_wavs/en-english.wav \
  $repo/test_wavs/de-german.wav \
  $repo/test_wavs/fr-french.wav \
  $repo/test_wavs/es-spanish.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run Conformer transducer (English)"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-conformer-en-2023-03-18.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-conformer-en-2023-03-18.tar.bz2
rm sherpa-onnx-conformer-en-2023-03-18.tar.bz2
log "Start testing ${repo_url}"
repo=sherpa-onnx-conformer-en-2023-03-18
log "Download pretrained model and test-data from $repo_url"

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run Zipformer transducer (English)"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-03-30.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-zipformer-en-2023-03-30.tar.bz2
rm sherpa-onnx-zipformer-en-2023-03-30.tar.bz2
repo=sherpa-onnx-zipformer-en-2023-03-30
log "Start testing ${repo_url}"

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
lm_repo=$(basename $lm_repo_url)
pushd $lm_repo
git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
popd

bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
log "Download bi-gram LM from ${bigram_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
bigramlm_repo=$(basename $bigram_repo_url)
pushd $bigramlm_repo
git lfs pull --include "2gram.fst"
popd

log "Start testing with LM and bi-gram LODR"
# TODO: find test examples that change with the LODR
time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  --decoding_method="modified_beam_search" \
  --lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
  --lodr-fst=$bigramlm_repo/2gram.fst \
  --lodr-scale=-0.5  \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo $lm_repo $bigramlm_repo

log "------------------------------------------------------------"
log "Run Paraformer (Chinese)"
log "------------------------------------------------------------"
# For onnxruntime 1.18.0, sherpa-onnx-paraformer-zh-2023-03-28 throws the following error
# libc++abi: terminating with uncaught exception of type Ort::Exception: Node (Loop_5471)
# Op (Loop) [TypeInferenceError] Graph attribute inferencing failed: Node (Concat_5490)
# Op (Concat) [ShapeInferenceError] All inputs to Concat must have same rank. Input 1 has rank 2 != 1
#
# See https://github.com/microsoft/onnxruntime/issues/8115
# We need to re-export this model using a recent version of onnxruntime and onnx

log "------------------------------------------------------------"
log "Run Paraformer (Chinese) with timestamps"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
repo=sherpa-onnx-paraformer-zh-2023-09-14

log "Start testing ${repo_url}"

time $EXE \
  --tokens=$repo/tokens.txt \
  --paraformer=$repo/model.int8.onnx \
  --num-threads=2 \
  --decoding-method=greedy_search \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/2.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run NeMo transducer (modified_beam_search + hotwords)"
log "------------------------------------------------------------"

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-en-24500.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "Test NeMo transducer with modified_beam_search (no hotwords)"

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder.onnx \
  --decoder=$repo/decoder.onnx \
  --joiner=$repo/joiner.onnx \
  --model-type=nemo_transducer \
  --decoding-method=modified_beam_search \
  --debug=1 \
  $repo/test_wavs/en-english.wav

log "Test NeMo transducer with modified_beam_search and hotwords"

# Create hotwords file (BPE tokens for common English words)
cat > $repo/hotwords.txt << EOF
▁THE
▁AND
▁THAT
EOF

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder.onnx \
  --decoder=$repo/decoder.onnx \
  --joiner=$repo/joiner.onnx \
  --model-type=nemo_transducer \
  --decoding-method=modified_beam_search \
  --hotwords-file=$repo/hotwords.txt \
  --hotwords-score=1.5 \
  --debug=1 \
  $repo/test_wavs/en-english.wav

rm -rf $repo


================================================
FILE: .github/scripts/test-offline-tts.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

# test waves are saved in ./tts
mkdir ./tts

log "------------------------------------------------------------"
log "sherpa-onnx-pocket-tts-int8-2026-01-26"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

$EXE \
  --pocket-lm-flow=./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx \
  --pocket-lm-main=./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx \
  --pocket-encoder=./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx \
  --pocket-decoder=./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx \
  --pocket-text-conditioner=./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx \
  --pocket-vocab-json=./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json \
  --pocket-token-scores-json=./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json \
  --reference-audio=./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav \
  --num-threads=2 \
  --debug=1 \
  --num-steps=5 \
  --output-filename="./tts/pocket-tts-out-bria.wav" \
    "I am happy to join with you today in what will go down in history as the greatest demonstration for freedom in the history of our nation. Five score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation. This momentous decree came as a great beacon light of hope to millions of Negro slaves who had been seared in the flames of withering injustice. It came as a joyous daybreak to end the long night of their captivity. But one hundred years later, the Negro still is not free. One hundred years later, the life of the Negro is still sadly crippled by the manacles of segregation and the chains of discrimination. One hundred years later, the Negro lives on a lonely island of poverty in the midst of a vast ocean of material prosperity. One hundred years later, the Negro is still languished in the corners of American society and finds himself an exile in his own land. And so we've come here today to dramatize a shameful condition. In a sense we've come to our nation's capital to cash a check. When the architects of our republic wrote the magnificent words of the Constitution and the Declaration of Independence, they were signing a promissory note to which every American was to fall heir. This note was a promise that all men, yes, black men as well as white men, would be guaranteed the unalienable Rights of Life, Liberty and the pursuit of Happiness. It is obvious today that America has defaulted on this promissory note, insofar as her citizens of color are concerned. Instead of honoring this sacred obligation, America has given the Negro people a bad check, a check which has come back marked insufficient funds."

rm -rf sherpa-onnx-pocket-tts-int8-2026-01-26

log "------------------------------------------------------------"
log "kokoro-en-v0_19"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

# mapping of sid to voice name
# 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
# 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis

for sid in $(seq 0 10); do
  $EXE \
    --debug=1 \
    --kokoro-model=./kokoro-en-v0_19/model.onnx \
    --kokoro-voices=./kokoro-en-v0_19/voices.bin \
    --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
    --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
    --num-threads=2 \
    --sid=$sid \
    --output-filename="./tts/kokoro-$sid.wav" \
    "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be  a statesman, a businessman, an official, or a scholar."
done
rm -rf kokoro-en-v0_19

log "------------------------------------------------------------"
log "matcha-tts-fa_en-musa"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-tts-fa_en-musa.tar.bz2
tar xvf matcha-tts-fa_en-musa.tar.bz2
rm matcha-tts-fa_en-musa.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx


$EXE \
  --matcha-acoustic-model=./matcha-tts-fa_en-musa/model.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-tokens=./matcha-tts-fa_en-musa/tokens.txt \
  --matcha-data-dir=./matcha-tts-fa_en-musa/espeak-ng-data \
  --output-filename=./tts/test-matcha-fa-en-musa.wav \
  --num-threads=2 \
  "How are you doing today?  این یک نمونه ی تست فارسی است. This is a test."

rm -rf matcha-tts-fa_en-musa
rm vocos-22khz-univ.onnx
ls -lh tts/*.wav

log "------------------------------------------------------------"
log "matcha-icefall-en_US-ljspeech"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx


$EXE \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --num-threads=2 \
  --output-filename=./tts/matcha-ljspeech-1.wav \
  --debug=1 \
 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

rm vocos-22khz-univ.onnx
rm -rf matcha-icefall-en_US-ljspeech
ls -lh tts/*.wav

log "------------------------------------------------------------"
log "matcha-icefall-zh-baker"
log "------------------------------------------------------------"
curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

$EXE \
  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
  --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
  --num-threads=2 \
  --debug=1 \
  --output-filename=./tts/matcha-baker-zh-1.wav \
  '小米的使命是，始终坚持做"感动人心、价格厚道"的好产品，让全球每个人都能享受科技带来的美好生活'

$EXE \
  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
  --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
  --num-threads=2 \
  --debug=1 \
  --output-filename=./tts/matcha-baker-zh-2.wav \
  "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。"

rm vocos-22khz-univ.onnx
rm -rf matcha-icefall-zh-baker

log "------------------------------------------------------------"
log "vits-piper-en_US-amy-low"
log "------------------------------------------------------------"
curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
rm vits-piper-en_US-amy-low.tar.bz2

$EXE \
  --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
  --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
  --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
  --debug=1 \
  --output-filename=./tts/amy.wav \
  "“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.” The sun shone bleakly in the sky, its meager light struggling to penetrate the thick foliage of the forest. Birds sang their songs up in the crowns of the trees, fluttering from one branch to the other. A blanket of total tranquility lied over the forest. The peace was only broken by the steady gallop of the horses of the soldiers who were traveling to their upcoming knighting the morrow at Camelot, and rowdy conversation. “Finally we will get what we deserve,” “It’s been about time,” Perceval agreed. “We’ve been risking our arses for the past two years. It’s the least they could give us.” Merlin remained ostensibly silent, refusing to join the verbal parade of self-aggrandizing his fellow soldiers have engaged in. He found it difficult to happy about anything, when even if they had won the war, he had lost everything else in the process."

file ./tts/amy.wav
rm -rf vits-piper-en_US-amy-low

log "------------------------------------------------------------"
log "vits-ljs test"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-ljs.tar.bz2
curl -SL -O $repo_url
tar xvf vits-ljs.tar.bz2
rm vits-ljs.tar.bz2
repo=vits-ljs

log "Start testing ${repo_url}"

$EXE \
  --vits-model=$repo/vits-ljs.onnx \
  --vits-lexicon=$repo/lexicon.txt \
  --vits-tokens=$repo/tokens.txt \
  --output-filename=./tts/vits-ljs.wav \
  'liliana, the most beautiful and lovely assistant of our team!'

ls -lh ./tts

rm -rfv $repo

log "------------------------------------------------------------"
log "vits-vctk test"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
curl -SL -O $repo_url
tar xvf vits-vctk.tar.bz2
rm vits-vctk.tar.bz2
repo=vits-vctk

log "Start testing ${repo_url}"

for sid in 0 10 90; do
  $EXE \
    --vits-model=$repo/vits-vctk.onnx \
    --vits-lexicon=$repo/lexicon.txt \
    --vits-tokens=$repo/tokens.txt \
    --sid=$sid \
    --output-filename=./tts/vits-vctk-${sid}.wav \
    'liliana, the most beautiful and lovely assistant of our team!'
done

rm -rfv $repo

ls -lh tts/

log "------------------------------------------------------------"
log "vits-zh-aishell3"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
curl -SL -O $repo_url
tar xvf vits-zh-aishell3.tar.bz2
rm vits-zh-aishell3.tar.bz2
repo=vits-zh-aishell3

log "Start testing ${repo_url}"

for sid in 0 10 90; do
  $EXE \
    --vits-model=$repo/vits-aishell3.onnx \
    --vits-lexicon=$repo/lexicon.txt \
    --vits-tokens=$repo/tokens.txt \
    --sid=$sid \
    --output-filename=./tts/vits-aishell3-${sid}.wav \
    '林美丽最美丽'
done

rm -rfv $repo

ls -lh ./tts/


================================================
FILE: .github/scripts/test-offline-whisper.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

names=(
tiny.en
base.en
small.en
medium.en
tiny
base
small
medium
distil-medium.en
distil-small.en
)

for name in ${names[@]}; do
  log "------------------------------------------------------------"
  log "Run $name"
  log "------------------------------------------------------------"

  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-$name.tar.bz2
  curl -SL -O $repo_url
  tar xvf sherpa-onnx-whisper-$name.tar.bz2
  rm sherpa-onnx-whisper-$name.tar.bz2
  repo=sherpa-onnx-whisper-$name
  log "Start testing ${repo_url}"

  log "test fp32 onnx"

  time $EXE \
    --tokens=$repo/${name}-tokens.txt \
    --whisper-encoder=$repo/${name}-encoder.onnx \
    --whisper-decoder=$repo/${name}-decoder.onnx \
    --whisper-tail-paddings=500 \
    --num-threads=2 \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/8k.wav

  log "test int8 onnx"

  time $EXE \
    --tokens=$repo/${name}-tokens.txt \
    --whisper-encoder=$repo/${name}-encoder.int8.onnx \
    --whisper-decoder=$repo/${name}-decoder.int8.onnx \
    --whisper-tail-paddings=500 \
    --num-threads=2 \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/8k.wav

  rm -rf $repo
done


================================================
FILE: .github/scripts/test-online-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Run streaming NeMo CTC                                      "
log "------------------------------------------------------------"

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2
name=$(basename $url)
repo=$(basename -s .tar.bz2 $name)

curl -SL -O $url
tar xvf $name
rm $name
ls -lh $repo

$EXE \
  --nemo-ctc-model=$repo/model.onnx \
  --tokens=$repo/tokens.txt \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run streaming Zipformer2 CTC HLG decoding                   "
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
repo=$PWD/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
ls -lh $repo
echo "pwd: $PWD"

$EXE \
  --zipformer2-ctc-model=$repo/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \
  --ctc-graph=$repo/HLG.fst \
  --tokens=$repo/tokens.txt \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run streaming Zipformer2 CTC                                "
log "------------------------------------------------------------"

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
repo=$(basename -s .tar.bz2 $url)
curl -SL -O $url
tar xvf $repo.tar.bz2
rm $repo.tar.bz2

log "test fp32"

time $EXE \
  --debug=1 \
  --zipformer2-ctc-model=$repo/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
  --tokens=$repo/tokens.txt \
  $repo/test_wavs/DEV_T0000000000.wav \
  $repo/test_wavs/DEV_T0000000001.wav \
  $repo/test_wavs/DEV_T0000000002.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run streaming Conformer CTC from WeNet"
log "------------------------------------------------------------"
wenet_models=(
sherpa-onnx-zh-wenet-aishell
# sherpa-onnx-zh-wenet-aishell2
# sherpa-onnx-zh-wenet-wenetspeech
# sherpa-onnx-zh-wenet-multi-cn
sherpa-onnx-en-wenet-librispeech
# sherpa-onnx-en-wenet-gigaspeech
)
for name in ${wenet_models[@]}; do
  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$name.tar.bz2
  curl -SL -O $repo_url
  tar xvf $name.tar.bz2
  rm $name.tar.bz2
  repo=$name
  log "Start testing ${repo_url}"

  log "test float32 models"
  time $EXE \
    --tokens=$repo/tokens.txt \
    --wenet-ctc-model=$repo/model-streaming.onnx \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/8k.wav

  log "test int8 models"
  time $EXE \
    --tokens=$repo/tokens.txt \
    --wenet-ctc-model=$repo/model-streaming.int8.onnx \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/8k.wav

  rm -rf $repo
done


================================================
FILE: .github/scripts/test-online-paraformer.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Run streaming Paraformer"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
repo=sherpa-onnx-streaming-paraformer-bilingual-zh-en

log "Start testing ${repo_url}"

time $EXE \
  --tokens=$repo/tokens.txt \
  --paraformer-encoder=$repo/encoder.onnx \
  --paraformer-decoder=$repo/decoder.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/2.wav \
  $repo/test_wavs/3.wav \
  $repo/test_wavs/8k.wav

time $EXE \
  --tokens=$repo/tokens.txt \
  --paraformer-encoder=$repo/encoder.int8.onnx \
  --paraformer-decoder=$repo/decoder.int8.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/2.wav \
  $repo/test_wavs/3.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo


================================================
FILE: .github/scripts/test-online-punctuation.sh
================================================
#!/usr/bin/env bash

set -ex

echo "TODO(fangjun): Skip this test since the sanitizer test is failed. We need to fix it"
exit 0

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Download the punctuation model                             "
log "------------------------------------------------------------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2

tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
repo=sherpa-onnx-online-punct-en-2024-08-06
ls -lh $repo

for m in model.onnx model.int8.onnx; do
  $EXE \
   --debug=1 \
   --cnn-bilstm=$repo/$m \
   --bpe-vocab=$repo/bpe.vocab \
   "How are you i am fine thank you"

  $EXE \
   --debug=1 \
   --cnn-bilstm=$repo/$m \
   --bpe-vocab=$repo/bpe.vocab \
   "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
done

rm -rf $repo


================================================
FILE: .github/scripts/test-online-transducer.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

log "------------------------------------------------------------"
log "Run NeMo transducer (English)"
log "------------------------------------------------------------"
repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
rm sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
repo=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms

log "Start testing ${repo_url}"

waves=(
$repo/test_wavs/0.wav
$repo/test_wavs/1.wav
$repo/test_wavs/8k.wav
)

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder.onnx \
  --decoder=$repo/decoder.onnx \
  --joiner=$repo/joiner.onnx \
  --num-threads=2 \
  $wave
done

time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder.onnx \
  --decoder=$repo/decoder.onnx \
  --joiner=$repo/joiner.onnx \
  --num-threads=2 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo

log "------------------------------------------------------------"
log "Run LSTM transducer (English)"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-lstm-en-2023-02-17.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-lstm-en-2023-02-17.tar.bz2
rm sherpa-onnx-lstm-en-2023-02-17.tar.bz2
repo=sherpa-onnx-lstm-en-2023-02-17

log "Start testing ${repo_url}"

waves=(
$repo/test_wavs/0.wav
$repo/test_wavs/1.wav
$repo/test_wavs/8k.wav
)

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  $wave
done

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
  --num-threads=2 \
  $wave
done

rm -rf $repo

log "------------------------------------------------------------"
log "Run LSTM transducer (Chinese)"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-lstm-zh-2023-02-20.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-lstm-zh-2023-02-20.tar.bz2
rm sherpa-onnx-lstm-zh-2023-02-20.tar.bz2
repo=sherpa-onnx-lstm-zh-2023-02-20

log "Start testing ${repo_url}"

waves=(
$repo/test_wavs/0.wav
$repo/test_wavs/1.wav
$repo/test_wavs/8k.wav
)

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-11-avg-1.onnx \
  --decoder=$repo/decoder-epoch-11-avg-1.onnx \
  --joiner=$repo/joiner-epoch-11-avg-1.onnx \
  --num-threads=2 \
  $wave
done

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-11-avg-1.int8.onnx \
  --decoder=$repo/decoder-epoch-11-avg-1.onnx \
  --joiner=$repo/joiner-epoch-11-avg-1.int8.onnx \
  --num-threads=2 \
  $wave
done

rm -rf $repo

log "------------------------------------------------------------"
log "Run streaming Zipformer transducer (English)"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-2023-02-21.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-streaming-zipformer-en-2023-02-21.tar.bz2
rm sherpa-onnx-streaming-zipformer-en-2023-02-21.tar.bz2
repo=sherpa-onnx-streaming-zipformer-en-2023-02-21

log "Start testing ${repo_url}"

waves=(
$repo/test_wavs/0.wav
$repo/test_wavs/1.wav
$repo/test_wavs/8k.wav
)

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  $wave
done

# test int8
#
for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
  --num-threads=2 \
  $wave
done

lm_repo_url=https://huggingface.co/vsd-vector/icefall-librispeech-rnn-lm
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
lm_repo=$(basename $lm_repo_url)
pushd $lm_repo
git lfs pull --include "with-state-epoch-99-avg-1.onnx"
popd

bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
log "Download bi-gram LM from ${bigram_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
bigramlm_repo=$(basename $bigram_repo_url)
pushd $bigramlm_repo
git lfs pull --include "2gram.fst"
popd

log "Start testing LODR"

waves=(
$repo/test_wavs/0.wav
$repo/test_wavs/1.wav
$repo/test_wavs/8k.wav
)

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  --decoding_method="modified_beam_search" \
  --lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
  --lodr-fst=$bigramlm_repo/2gram.fst \
  --lodr-scale=-0.5  \
  $wave
done

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  --decoding_method="modified_beam_search" \
  --lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
  --lodr-fst=$bigramlm_repo/2gram.fst \
  --lodr-scale=-0.5  \
  --lm-shallow-fusion=true \
  $wave
done

rm -rf $repo $bigramlm_repo $lm_repo

log "------------------------------------------------------------"
log "Run streaming Zipformer transducer (Bilingual, Chinese + English)"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
repo=sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20

log "Start testing ${repo_url}"

waves=(
$repo/test_wavs/0.wav
$repo/test_wavs/1.wav
$repo/test_wavs/2.wav
$repo/test_wavs/3.wav
$repo/test_wavs/8k.wav
)

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  $wave
done

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
  --num-threads=2 \
  $wave
done

# Decode a URL
if [ $EXE == "sherpa-onnx-ffmpeg" ]; then
  time $EXE \
  $repo/tokens.txt \
  $repo/encoder-epoch-99-avg-1.onnx \
  $repo/decoder-epoch-99-avg-1.onnx \
  $repo/joiner-epoch-99-avg-1.onnx \
  https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/resolve/main/test_wavs/4.wav \
  2
fi

if [ $EXE == "sherpa-onnx-ffmpeg" ]; then
  time $EXE \
  $repo/tokens.txt \
  $repo/encoder-epoch-99-avg-1.int8.onnx \
  $repo/decoder-epoch-99-avg-1.onnx \
  $repo/joiner-epoch-99-avg-1.int8.onnx \
  https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/resolve/main/test_wavs/4.wav \
  2
fi

rm -rf $repo

log "------------------------------------------------------------"
log "Run streaming Conformer transducer (English)"
log "------------------------------------------------------------"

repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-conformer-en-2023-05-09.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-streaming-conformer-en-2023-05-09.tar.bz2
rm sherpa-onnx-streaming-conformer-en-2023-05-09.tar.bz2
repo=sherpa-onnx-streaming-conformer-en-2023-05-09

log "Start testing ${repo_url}"

waves=(
$repo/test_wavs/0.wav
$repo/test_wavs/1.wav
$repo/test_wavs/2.wav
)

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  $wave
done

for wave in ${waves[@]}; do
  time $EXE \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
  --num-threads=2 \
  $wave
done

rm -rf $repo


================================================
FILE: .github/scripts/test-python.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

log "test Supertonic TTS"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

python3 python-api-examples/supertonic-tts.py

rm -rf sherpa-onnx-supertonic-tts-int8-2026-03-06

mkdir -p tts
cp supertonic-en.wav tts/
ls -lh tts

log "test Moonshine v2"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2

ls -lh sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27

python3 ./python-api-examples/offline-moonshine-decode-files-v2.py

rm -rf  sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27

log "test FireRedASR CTC"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

python3 ./python-api-examples/offline-fire-red-asr-ctc-decode-files.py

rm -rf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25

log "test FireRedASR AED"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2

python3 ./python-api-examples/offline-fire-red-asr-decode-files.py

rm -rf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16

log "test PocketTTS"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

python3 ./python-api-examples/pocket-tts.py

rm -rf sherpa-onnx-pocket-tts-int8-2026-01-26

log "test ZipVoice TTS"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

python3 ./python-api-examples/zipvoice-tts.py

cp generated-zipvoice-zh-en-python.wav tts/

rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
rm -f vocos_24khz.onnx

log "test Google MedASR"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
ls -lh sherpa-onnx-medasr-ctc-en-int8-2025-12-25

ls -lh sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs

python3 ./python-api-examples/offline-medasr-ctc-decode-files.py
rm -rf sherpa-onnx-medasr-ctc-en-int8-2025-12-25

log "test omnilingual ASR"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
ls -lh sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12

python3 ./python-api-examples/offline-omnilingual-asr-ctc-decode-files.py

rm -rf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12

log "test T-one"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2

python3 ./python-api-examples/online-t-one-ctc-decode-files.py

rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08

log "test nemo canary"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
python3 ./python-api-examples/offline-nemo-canary-decode-files.py
rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8

log "test spleeter"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2
tar xvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2
rm sherpa-onnx-spleeter-2stems-fp16.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
./python-api-examples/offline-source-separation-spleeter.py
rm -rf sherpa-onnx-spleeter-2stems-fp16
rm qi-feng-le-zh.wav

log "test UVR"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_9482.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
./python-api-examples/offline-source-separation-uvr.py
rm UVR_MDXNET_9482.onnx
rm qi-feng-le-zh.wav

mkdir source-separation

mv spleeter-*.wav source-separation
mv uvr-*.wav source-separation

ls -lh source-separation


log "test offline dolphin ctc"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2

python3 ./python-api-examples/offline-dolphin-ctc-decode-files.py

rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02

log "test offline speech enhancement (GTCRN)"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/speech_with_noise.wav
python3 ./python-api-examples/offline-speech-enhancement-gtcrn.py
python3 ./python-api-examples/offline-speech-enhancement-dpdfnet.py
python3 ./python-api-examples/online-speech-enhancement-gtcrn.py
python3 ./python-api-examples/online-speech-enhancement-dpdfnet.py
ls -lh *.wav

log "test offline zipformer (byte-level bpe, Chinese+English)"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
tar xvf sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
rm sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2

repo=sherpa-onnx-zipformer-zh-en-2023-11-22

./python-api-examples/offline-decode-files.py  \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-34-avg-19.int8.onnx \
  --decoder=$repo/decoder-epoch-34-avg-19.onnx \
  --joiner=$repo/joiner-epoch-34-avg-19.int8.onnx \
  --num-threads=2 \
  --decoding-method=greedy_search \
  --debug=true \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/2.wav

rm -rf sherpa-onnx-zipformer-zh-en-2023-11-22

log "test offline Moonshine"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

python3 ./python-api-examples/offline-moonshine-decode-files.py

rm -rf sherpa-onnx-moonshine-tiny-en-int8

log "test offline speaker diarization"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

python3 ./python-api-examples/offline-speaker-diarization.py

rm -rf *.wav *.onnx ./sherpa-onnx-pyannote-segmentation-3-0


log "test_clustering"
pushd /tmp/
mkdir test-cluster
cd test-cluster
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
git clone https://github.com/csukuangfj/sr-data
popd

python3 ./sherpa-onnx/python/tests/test_fast_clustering.py

rm -rf /tmp/test-cluster

export GIT_CLONE_PROTECTION_ACTIVE=false

log "test offline SenseVoice CTC"
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
name=$(basename $url)
repo=$(basename -s .tar.bz2 $name)

curl -SL -O $url
tar xvf $name
rm $name
ls -lh $repo
python3 ./python-api-examples/offline-sense-voice-ctc-decode-files.py

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
rm dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

python3 ./python-api-examples/offline-sense-voice-ctc-decode-files-with-hr.py

rm -rf dict replace.fst test-hr.wav lexicon.txt

if [[ $(uname) == Linux ]]; then
  # It needs ffmpeg
  log  "generate subtitles (Chinese)"
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav

  python3 ./python-api-examples/generate-subtitles.py \
    --silero-vad-model=./silero_vad.onnx \
    --sense-voice=$repo/model.onnx \
    --tokens=$repo/tokens.txt \
    --num-threads=2 \
    ./lei-jun-test.wav

  cat lei-jun-test.srt

  rm lei-jun-test.wav

  log  "generate subtitles (English)"
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav

  python3 ./python-api-examples/generate-subtitles.py \
    --silero-vad-model=./silero_vad.onnx \
    --sense-voice=$repo/model.onnx \
    --tokens=$repo/tokens.txt \
    --num-threads=2 \
    ./Obama.wav

  cat Obama.srt
  rm Obama.wav
  rm silero_vad.onnx
fi
rm -rf $repo

log "test offline TeleSpeech CTC"
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
name=$(basename $url)
repo=$(basename -s .tar.bz2 $name)

curl -SL -O $url
tar xvf $name
rm $name
ls -lh $repo
python3 ./python-api-examples/offline-telespeech-ctc-decode-files.py
rm -rf $repo

log "test online NeMo CTC"

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2
name=$(basename $url)
repo=$(basename -s .tar.bz2 $name)

curl -SL -O $url
tar xvf $name
rm $name
ls -lh $repo
python3 ./python-api-examples/online-nemo-ctc-decode-files.py
rm -rf $repo

log "test offline punctuation"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
ls -lh $repo

python3 ./python-api-examples/add-punctuation.py

rm -rf $repo

log "test online punctuation"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
repo=sherpa-onnx-online-punct-en-2024-08-06
ls -lh $repo

python3 ./python-api-examples/add-punctuation-online.py

rm -rf $repo

log "test audio tagging"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
 python3 ./python-api-examples/audio-tagging-from-a-file.py
rm -rf sherpa-onnx-zipformer-audio-tagging-2024-04-09


log "test streaming zipformer2 ctc HLG decoding"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
repo=sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18

python3 ./python-api-examples/online-zipformer-ctc-hlg-decode-file.py \
  --debug 1 \
  --tokens ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt \
  --graph ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst \
  --model ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \
  ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav

rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18


mkdir -p /tmp/icefall-models
dir=/tmp/icefall-models

pushd $dir

repo=$dir/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18
mkdir -p $repo
cd $repo
mkdir exp-ctc-rnnt-small
cd exp-ctc-rnnt-small
curl -LS -O https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/exp-ctc-rnnt-small/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx
cd ..
mkdir -p data/lang_bpe_500
cd data/lang_bpe_500
curl -LS -O https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/data/lang_bpe_500/tokens.txt
cd ../..
mkdir test_wavs
cd test_wavs

curl -LS -O https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/test_wavs/0.wav
curl -LS -O https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/test_wavs/1.wav
curl -LS -O https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/test_wavs/8k.wav
popd

python3 ./python-api-examples/online-decode-files.py \
  --tokens=$repo/data/lang_bpe_500/tokens.txt \
  --zipformer2-ctc=$repo/exp-ctc-rnnt-small/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

rm -rf $repo

python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose

wenet_models=(
# sherpa-onnx-zh-wenet-aishell
# sherpa-onnx-zh-wenet-aishell2
# sherpa-onnx-zh-wenet-wenetspeech
# sherpa-onnx-zh-wenet-multi-cn
sherpa-onnx-en-wenet-librispeech
# sherpa-onnx-en-wenet-gigaspeech
)

for name in ${wenet_models[@]}; do
  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$name.tar.bz2
  curl -SL -O $repo_url
  tar xvf $name.tar.bz2
  rm $name.tar.bz2
  repo=$name
  log "Start testing ${repo_url}"

  if false; then
    # offline wenet ctc models are not supported by onnxruntime >= 1.18
    python3 ./python-api-examples/offline-decode-files.py \
      --tokens=$repo/tokens.txt \
      --wenet-ctc=$repo/model.onnx \
      $repo/test_wavs/0.wav \
      $repo/test_wavs/1.wav \
      $repo/test_wavs/8k.wav
  fi

  python3 ./python-api-examples/online-decode-files.py \
    --tokens=$repo/tokens.txt \
    --wenet-ctc=$repo/model-streaming.onnx \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/8k.wav

  python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose

  python3 sherpa-onnx/python/tests/test_online_recognizer.py --verbose

  rm -rf $repo
done

log "Offline TTS test"
# test waves are saved in ./tts
mkdir -p ./tts

log "test kitten tts"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

python3 ./python-api-examples/offline-tts.py \
  --debug=1 \
  --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
  --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
  --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
  --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
  --num-threads=2 \
  --sid=0 \
  --output-filename="./tts/kitten-0.wav" \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

rm -rf kitten-nano-en-v0_1-fp16

log "kokoro-multi-lang-v1_0 test"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

python3 ./python-api-examples/offline-tts.py \
  --debug=1 \
  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
  --num-threads=2 \
  --sid=18 \
  --output-filename="./tts/kokoro-18-zh-en.wav" \
  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"

rm -rf kokoro-multi-lang-v1_0

log "kokoro-en-v0_19 test"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

python3 ./python-api-examples/offline-tts.py \
  --debug=1 \
  --kokoro-model=./kokoro-en-v0_19/model.onnx \
  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
  --num-threads=2 \
  --sid=10 \
  --output-filename="./tts/kokoro-10.wav" \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be  a statesman, a businessman, an official, or a scholar."

rm -rf kokoro-en-v0_19

log "matcha-ljspeech-en test"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

python3 ./python-api-examples/offline-tts.py \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --output-filename=./tts/test-matcha-ljspeech-en.wav \
  --num-threads=2 \
 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

rm vocos-22khz-univ.onnx
rm -rf matcha-icefall-en_US-ljspeech

log "matcha-baker-zh test"

curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

python3 ./python-api-examples/offline-tts.py \
 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
 --matcha-vocoder=./vocos-22khz-univ.onnx \
 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
 --output-filename=./tts/test-matcha-baker-zh.wav \
 "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"

rm -rf matcha-icefall-zh-baker
rm vocos-22khz-univ.onnx

log "vits-ljs test"

curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx
curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt
curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt

python3 ./python-api-examples/offline-tts.py \
  --vits-model=./vits-ljs.onnx \
  --vits-lexicon=./lexicon.txt \
  --vits-tokens=./tokens.txt \
  --output-filename=./tts/vits-ljs.wav \
  'liliana, the most beautiful and lovely assistant of our team!'

ls -lh ./tts

rm -v vits-ljs.onnx ./lexicon.txt ./tokens.txt

log "vits-vctk test"
curl -LS -O https://huggingface.co/csukuangfj/vits-vctk/resolve/main/vits-vctk.onnx
curl -LS -O https://huggingface.co/csukuangfj/vits-vctk/resolve/main/lexicon.txt
curl -LS -O https://huggingface.co/csukuangfj/vits-vctk/resolve/main/tokens.txt

for sid in 0 10 90; do
  python3 ./python-api-examples/offline-tts.py \
    --vits-model=./vits-vctk.onnx \
    --vits-lexicon=./lexicon.txt \
    --vits-tokens=./tokens.txt \
    --sid=$sid \
    --output-filename=./tts/vits-vctk-${sid}.wav \
    'liliana, the most beautiful and lovely assistant of our team!'
done

rm -v vits-vctk.onnx ./lexicon.txt ./tokens.txt

if [[ x$OS != x'windows-latest' ]]; then
  echo "OS: $OS"

  log "vits-zh-aishell3"

  curl -LS -O https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/vits-aishell3.onnx
  curl -LS -O https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/lexicon.txt
  curl -LS -O https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/tokens.txt

  for sid in 0 10 90; do
    python3 ./python-api-examples/offline-tts.py \
      --vits-model=./vits-aishell3.onnx \
      --vits-lexicon=./lexicon.txt \
      --vits-tokens=./tokens.txt \
      --sid=$sid \
      --output-filename=./tts/vits-aishell3-${sid}.wav \
      '林美丽最美丽'
  done

  rm -v vits-aishell3.onnx ./lexicon.txt ./tokens.txt
fi

mkdir -p /tmp/icefall-models
dir=/tmp/icefall-models

log "Test streaming transducer models"

if [[ x$OS != x'windows-latest' ]]; then
  echo "OS: $OS"
  pushd $dir
  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  curl -SL -O $repo_url
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  repo=sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20

  log "Start testing ${repo_url}"
  repo=$dir/$repo

  python3 -c "import sherpa_onnx; print(sherpa_onnx.__file__)"
  sherpa_onnx_version=$(python3 -c "import sherpa_onnx; print(sherpa_onnx.__version__)")

  echo "sherpa_onnx version: $sherpa_onnx_version"

  pwd
  ls -lh

  ls -lh $repo
  popd

  python3 ./python-api-examples/online-decode-files.py \
    --tokens=$repo/tokens.txt \
    --encoder=$repo/encoder-epoch-99-avg-1.onnx \
    --decoder=$repo/decoder-epoch-99-avg-1.onnx \
    --joiner=$repo/joiner-epoch-99-avg-1.onnx \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav \
    $repo/test_wavs/3.wav \
    $repo/test_wavs/8k.wav

  python3 ./python-api-examples/online-decode-files.py \
    --tokens=$repo/tokens.txt \
    --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
    --decoder=$repo/decoder-epoch-99-avg-1.onnx \
    --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav \
    $repo/test_wavs/3.wav \
    $repo/test_wavs/8k.wav

  ln -s $repo $PWD/

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav

  python3 ./python-api-examples/inverse-text-normalization-online-asr.py

  python3 sherpa-onnx/python/tests/test_online_recognizer.py --verbose

  rm -rfv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20

  rm -rf $repo
fi

log "Test non-streaming transducer models"

pushd $dir
repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
log "Download pretrained model and test-data from $repo_url"

curl -SL -O $repo_url
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
repo=$dir/sherpa-onnx-zipformer-en-2023-04-01

popd

ls -lh $repo

python3 ./python-api-examples/offline-decode-files.py \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

python3 ./python-api-examples/offline-decode-files.py \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
lm_repo=$(basename $lm_repo_url)
pushd $lm_repo
git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
popd

bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
log "Download bi-gram LM from ${bigram_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
bigramlm_repo=$(basename $bigram_repo_url)
pushd $bigramlm_repo
git lfs pull --include "2gram.fst"
popd

log "Perform offline decoding with RNN-LM and LODR"
python3 ./python-api-examples/offline-decode-files.py \
  --tokens=$repo/tokens.txt \
  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
  --decoding-method=modified_beam_search \
  --lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
  --lodr-fst=$bigramlm_repo/2gram.fst \
  --lodr-scale=-0.5 \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose

rm -rf $repo $lm_repo $bigramlm_repo

log "Test non-streaming paraformer models"

if [[ x$OS != x'windows-latest' ]]; then
  echo "OS: $OS"
  pushd $dir
  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  curl -SL -O $repo_url
  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  log "Start testing ${repo_url}"
  repo=$dir/sherpa-onnx-paraformer-zh-2023-09-14

  ls -lh $repo
  popd

  python3 ./python-api-examples/offline-decode-files.py \
    --tokens=$repo/tokens.txt \
    --paraformer=$repo/model.int8.onnx \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav \
    $repo/test_wavs/8k.wav

  python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose

  ln -s $repo $PWD/

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav

  python3 ./python-api-examples/inverse-text-normalization-offline-asr.py

  rm -rfv sherpa-onnx-paraformer-zh-2023-09-14

  rm -rf $repo
fi

log "Test non-streaming NeMo CTC models"

pushd $dir
repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
curl -SL -O $repo_url
tar xvf sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
rm sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2

log "Start testing ${repo_url}"
repo=$dir/sherpa-onnx-nemo-ctc-en-citrinet-512

ls -lh $repo
popd

python3 ./python-api-examples/offline-decode-files.py \
  --tokens=$repo/tokens.txt \
  --nemo-ctc=$repo/model.onnx \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

python3 ./python-api-examples/offline-decode-files.py \
  --tokens=$repo/tokens.txt \
  --nemo-ctc=$repo/model.int8.onnx \
  $repo/test_wavs/0.wav \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose

rm -rf $repo

# test text2token
git clone https://github.com/pkufool/sherpa-test-data /tmp/sherpa-test-data

python3 sherpa-onnx/python/tests/test_text2token.py --verbose

rm -rf /tmp/sherpa-test-data

dir=/tmp/onnx-models
mkdir -p $dir

log "Test keyword spotting models"

python3 -c "import sherpa_onnx; print(sherpa_onnx.__file__)"
sherpa_onnx_version=$(python3 -c "import sherpa_onnx; print(sherpa_onnx.__version__)")

echo "sherpa_onnx version: $sherpa_onnx_version"

pwd
ls -lh

if [[ x$OS != x'windows-latest' ]]; then
  echo "OS: $OS"

  repo=sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
  log "Start testing ${repo}"

  curl -LS -O https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
  tar xf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz
  rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz

  ls -lh $repo

  python3 ./python-api-examples/keyword-spotter.py

  python3 sherpa-onnx/python/tests/test_keyword_spotter.py --verbose

  rm -rf $repo
fi

rm -r $dir


================================================
FILE: .github/scripts/test-rust.sh
================================================
#!/usr/bin/env bash

set -ex

cd rust-api-examples

./run-audio-tagging-zipformer.sh
rm -rf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15

./run-audio-tagging-ced.sh
rm -rf sherpa-onnx-ced-mini-audio-tagging-2024-04-19

./run-speaker-embedding-extractor.sh
./run-speaker-embedding-manager.sh
rm -f 3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
rm -rf sr-data

./run-speaker-embedding-cosine-similarity.sh
rm -f wespeaker_zh_cnceleb_resnet34.onnx fangjun-sr-1.wav fangjun-sr-2.wav leijun-sr-1.wav

./run-offline-speaker-diarization.sh
rm -rf sherpa-onnx-pyannote-segmentation-3-0
rm -f 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx 0-four-speakers-zh.wav

./run-vits-en.sh
rm -rf vits-piper-en_US-amy-low

./run-vits-de.sh
rm -rf vits-piper-de_DE-glados-high

./run-matcha-tts-en.sh
./run-matcha-tts-zh.sh
rm -rf matcha-icefall-en_US-ljspeech matcha-icefall-zh-baker
rm -f vocos-22khz-univ.onnx

./run-kokoro-tts-en.sh
rm -rf kokoro-en-v0_19

./run-kokoro-tts-zh-en.sh
rm -rf kokoro-multi-lang-v1_0

./run-kitten-tts-en.sh
rm -rf kitten-nano-en-v0_1-fp16

./run-pocket-tts.sh
rm -rf sherpa-onnx-pocket-*

./run-supertonic-tts.sh
rm -rf sherpa-onnx-supertonic-*

./run-zipvoice-tts.sh
rm -rf sherpa-onnx-zipvoice-*
rm -f vocos_24khz.onnx

./run-online-punctuation.sh
rm -rf sherpa-onnx-online-punct-*

./run-keyword-spotter.sh
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile

./run-spoken-language-identification.sh
rm -rf sherpa-onnx-whisper-tiny spoken-language-identification-test-wavs

./run-offline-punctuation.sh
rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8

./run-version.sh

./run-moonshine-v2.sh

./run-fire-red-asr-ctc.sh

./run-silero-vad-remove-silence.sh

./run-nemo-parakeet-en.sh
./run-zipformer-vi.sh
./run-zipformer-zh-en.sh
./run-zipformer-en.sh

./run-sense-voice.sh

./run-streaming-zipformer-en.sh
./run-streaming-zipformer-zh-en.sh

./run-offline-speech-enhancement-gtcrn.sh
./run-offline-speech-enhancement-dpdfnet.sh
./run-streaming-speech-enhancement-gtcrn.sh
./run-streaming-speech-enhancement-dpdfnet.sh


================================================
FILE: .github/scripts/test-speaker-diarization.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

log "specify number of clusters"
$EXE \
  --clustering.num-clusters=4 \
  --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
  --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
  ./0-four-speakers-zh.wav

log "specify threshold for clustering"

$EXE \
  --clustering.cluster-threshold=0.90 \
  --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
  --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
  ./0-four-speakers-zh.wav

rm -rf sherpa-onnx-pyannote-*
rm -fv *.onnx
rm -fv *.wav


================================================
FILE: .github/scripts/test-speaker-recognition-python.sh
================================================
#!/usr/bin/env bash

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

d=/tmp/sr-models
mkdir -p $d

pushd $d
log "Download test waves"
git clone https://github.com/csukuangfj/sr-data
popd

log "Download wespeaker models"
model_dir=$d/wespeaker
mkdir -p $model_dir
pushd $model_dir
models=(
wespeaker_en_voxceleb_CAM++.onnx
wespeaker_en_voxceleb_CAM++_LM.onnx
wespeaker_en_voxceleb_resnet152_LM.onnx
wespeaker_en_voxceleb_resnet221_LM.onnx
wespeaker_en_voxceleb_resnet293_LM.onnx
wespeaker_en_voxceleb_resnet34.onnx
wespeaker_en_voxceleb_resnet34_LM.onnx
wespeaker_zh_cnceleb_resnet34.onnx
wespeaker_zh_cnceleb_resnet34_LM.onnx
)
for m in ${models[@]}; do
  curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$m
  curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_en_voxceleb_CAM++_LM.onnx
done
ls -lh
popd

log "Download 3d-speaker models"
model_dir=$d/3dspeaker
mkdir -p $model_dir
pushd $model_dir
models=(
3dspeaker_speech_campplus_sv_en_voxceleb_16k.onnx
3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx
3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx
3dspeaker_speech_eres2net_sv_en_voxceleb_16k.onnx
3dspeaker_speech_eres2net_sv_zh-cn_16k-common.onnx
)
for m in ${models[@]}; do
  curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$m
done
ls -lh
popd

log "Download NeMo models"
model_dir=$d/nemo
mkdir -p $model_dir
pushd $model_dir
models=(
nemo_en_titanet_large.onnx
nemo_en_titanet_small.onnx
nemo_en_speakerverification_speakernet.onnx
)
for m in ${models[@]}; do
  curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$m
done
ls -lh
popd

python3 sherpa-onnx/python/tests/test_speaker_recognition.py --verbose


================================================
FILE: .github/scripts/test-spoken-language-identification.sh
================================================
#!/usr/bin/env bash

set -e

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

export GIT_CLONE_PROTECTION_ACTIVE=false

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

names=(
tiny
base
small
medium
)

# all_language_codes=bo,ml,tt,fa,sl,bg,sn,sr,tl,km,ln,mr,hr,eu,ro,ba,bs,pl,as,nn,sk,ko,oc,ar,uz,pa,tg,mk,kk,hi,ha,uk,is,de,el,ja,yo,be,so,tk,id,sa,ru,yi,en,am,cs,ne,la,sv,su,pt,mi,ca,sd,hy,haw,fi,et,kn,da,lt,it,nl,he,mg,ur,tr,af,br,bn,ta,no,my,si,mt,th,gl,sw,mn,jw,ms,ps,fo,ka,hu,zh,ht,az,fr,lo,sq,gu,cy,lv,es,lb,te,vi

log "Download test waves"
waves=(
ar-arabic.wav
bg-bulgarian.wav
cs-czech.wav
da-danish.wav
# de-german.wav
# el-greek.wav
# en-english.wav
# es-spanish.wav
# fa-persian.wav
# fi-finnish.wav
# fr-french.wav
# hi-hindi.wav
# hr-croatian.wav
# id-indonesian.wav
# it-italian.wav
# ja-japanese.wav
# ko-korean.wav
# nl-dutch.wav
# no-norwegian.wav
# po-polish.wav
# pt-portuguese.wav
# ro-romanian.wav
# ru-russian.wav
# sk-slovak.wav
# sv-swedish.wav
# ta-tamil.wav
# tl-tagalog.wav
# tr-turkish.wav
# uk-ukrainian.wav
# zh-chinese.wav
)

for wav in ${waves[@]}; do
  echo "Downloading $wav"
  curl -SL -O https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/resolve/main/test_wavs/$wav
  ls -lh *.wav
done

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
data=spoken-language-identification-test-wavs

for name in ${names[@]}; do
  log "------------------------------------------------------------"
  log "Run $name"
  log "------------------------------------------------------------"
  repo_url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-$name.tar.bz2
  curl -SL -O $repo_url
  tar xvf sherpa-onnx-whisper-$name.tar.bz2
  rm sherpa-onnx-whisper-$name.tar.bz2

  log "Start testing ${repo_url}"
  repo=sherpa-onnx-whisper-$name

  for wav in ${waves[@]}; do
    log "test fp32 onnx"

    time $EXE \
      --whisper-encoder=$repo/${name}-encoder.onnx \
      --whisper-decoder=$repo/${name}-decoder.onnx \
      $data/$wav

    log "test int8 onnx"

    time $EXE \
      --whisper-encoder=$repo/${name}-encoder.int8.onnx \
      --whisper-decoder=$repo/${name}-decoder.int8.onnx \
      $data/$wav
  done
  rm -rf $repo
done


================================================
FILE: .github/scripts/test-swift.sh
================================================
#!/usr/bin/env bash

set -ex

echo "pwd: $PWD"

cd swift-api-examples
ls -lh

./run-test-version.sh

./run-moonshine-v2-asr.sh
rm -rf sherpa-onnx-moonshine-*

./run-fire-red-asr-ctc.sh
rm -rf sherpa-onnx-fire-red-*

./run-tts-pocket-en.sh
ls -lh
rm -rf sherpa-onnx-pocket-*

./run-tts-supertonic-en.sh
ls -lh
rm -rf sherpa-onnx-supertonic-*

./run-medasr-ctc-asr.sh
rm -rf sherpa-onnx-medasr-*

./run-funasr-nano-asr.sh
rm -rf sherpa-onnx-funasr-nano-*

./run-omnilingual-asr-ctc-asr.sh
rm -rf sherpa-onnx-omnilingual-*

./run-decode-file-t-one-streaming.sh
rm -rf sherpa-onnx-streaming-*

./run-compute-speaker-embeddings.sh
rm -fv *.wav *.onnx

./run-tts-kitten-en.sh
ls -lh
rm -rf kitten-*

./run-wenet-ctc-asr.sh
rm -rf sherpa-onnx-*

./run-zipformer-ctc-asr.sh
rm -rf sherpa-onnx-zipformer-*

./run-decode-file-sense-voice-with-hr.sh
rm -rf sherpa-onnx-sense-voice-*
rm -rf dict lexicon.txt replace.fst test-hr.wav

./run-dolphin-ctc-asr.sh
rm -rf sherpa-onnx-dolphin-*

./run-speech-enhancement-gtcrn.sh
./run-speech-enhancement-dpdfnet.sh
./run-online-speech-enhancement-gtcrn.sh
./run-online-speech-enhancement-dpdfnet.sh
ls -lh *.wav

./run-fire-red-asr.sh
rm -rf sherpa-onnx-fire-red-asr-*

./run-tts-vits.sh
ls -lh
rm -rf vits-piper-*

./run-tts-kokoro-zh-en.sh
ls -lh
rm -rf kokoro-multi-*

./run-tts-kokoro-en.sh
ls -lh
rm -rf kokoro-en-*

./run-tts-matcha-zh.sh
ls -lh
rm -rf matcha-icefall-*

./run-tts-matcha-en.sh
ls -lh
rm -rf matcha-icefall-*

./run-tts-zipvoice.sh
ls -lh
rm -rf sherpa-onnx-zipvoice-*
rm -f vocos_24khz.onnx

./run-speaker-diarization.sh
rm -rf *.onnx
rm -rf sherpa-onnx-pyannote-segmentation-3-0
rm -fv *.wav

./run-add-punctuations.sh
rm ./add-punctuations
rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12

./run-keyword-spotting-from-file.sh
rm ./keyword-spotting-from-file
rm -rf sherpa-onnx-kws-*

./run-streaming-hlg-decode-file.sh
rm ./streaming-hlg-decode-file
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18

./run-spoken-language-identification.sh
rm -rf sherpa-onnx-whisper*

mkdir -p /Users/fangjun/Desktop
pushd /Users/fangjun/Desktop
curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
ls -lh
popd

./run-generate-subtitles-ten-vad.sh
rm -rf *.onnx

./run-generate-subtitles.sh
rm -rf *.onnx

ls -lh /Users/fangjun/Desktop
cat /Users/fangjun/Desktop/Obama.srt

rm -rf sherpa-onnx-whisper*
rm -f *.onnx
rm /Users/fangjun/Desktop/Obama.wav

./run-decode-file.sh
rm decode-file
sed -i.bak  '20d' ./decode-file.swift
./run-decode-file.sh

./run-decode-file-non-streaming.sh

ls -lh


================================================
FILE: .github/workflows/.gitignore
================================================
!*.yaml


================================================
FILE: .github/workflows/aarch64-linux-gnu-shared.yaml
================================================
# Modified from https://github.com/Tencent/ncnn/blob/master/.github/workflows/linux-arm-cpu-gcc.yml
name: aarch64-linux-gnu-shared

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/aarch64-linux-gnu-shared.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/aarch64-linux-gnu.toolchain.cmake'

  workflow_dispatch:

concurrency:
  group: aarch64-linux-gnu-shared-${{ github.ref }}
  cancel-in-progress: true

jobs:
  aarch64_linux_gnu_shared:
    runs-on: ${{ matrix.os }}
    name: aarch64 shared GPU ${{ matrix.gpu }} ${{ matrix.onnxruntime_version }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - os: ubuntu-22.04-arm
            gpu: ON
            onnxruntime_version: "1.11.0"
          - os: ubuntu-22.04-arm
            gpu: ON
            onnxruntime_version: "1.16.0"
          - os: ubuntu-22.04-arm
            gpu: ON
            onnxruntime_version: "1.18.0"
          - os: ubuntu-22.04-arm
            gpu: ON
            onnxruntime_version: "1.18.1"
          - os: ubuntu-22.04-arm
            gpu: OFF
            onnxruntime_version: ""

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Build sherpa-onnx
        if: matrix.gpu == 'ON'
        shell: bash
        run: |
          onnxruntime_version=${{ matrix.onnxruntime_version }}

          git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
          pushd alsa-lib
          ./gitcompile
          popd

          p=$PWD

          export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
          export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
          export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

          mkdir build
          cd build
          cmake \
            -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
            -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
            -DBUILD_SHARED_LIBS=ON \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DSHERPA_ONNX_ENABLE_GPU=ON \
            -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=$onnxruntime_version \
            ..
          make -j4 install

          cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
          cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin

          rm -rf install/lib/pkgconfig
          rm -fv install/lib/cargs.h
          rm -fv install/lib/libcargs.so

      - name: Build sherpa-onnx
        if: matrix.gpu == 'OFF'
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/k2-fsa/sherpa-onnx \
              quay.io/pypa/manylinux2014_aarch64 \
            bash -c '
              echo "config: ${{ matrix.config }}"
              uname -a
              which gcc

              gcc --version
              g++ --version

              echo "pwd"

              ls -lh

              cd /k2-fsa/sherpa-onnx/

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              p=$PWD

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              mkdir build
              cd build

              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -DBUILD_SHARED_LIBS=ON \
                -DCMAKE_INSTALL_PREFIX=./install \
                ..

              make -j4 install

              cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
              cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin

              rm -rf install/lib/pkgconfig
              rm -fv install/lib/cargs.h
              rm -fv install/lib/libcargs.so
            '

      - name: Display system info
        shell: bash
        run: |
          uname -a
          gcc --version
          g++ --version

      - name: Display generated files
        shell: bash
        run: |
          cd build/install

          ls -lh bin

          echo "---"

          ls -lh lib

          file bin/sherpa-onnx

          readelf -d bin/sherpa-onnx

          ldd bin/sherpa-onnx

          ./bin/sherpa-onnx --help

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-shared
          if [[ ${{ matrix.gpu }} == OFF ]]; then
            dst=${dst}-cpu
          else
            dst=${dst}-gpu-onnxruntime-${{ matrix.onnxruntime_version }}
          fi
          mkdir $dst

          cp -a build/install/bin $dst/
          cp -a build/install/lib $dst/

          ls -lh build/install/lib
          ls -lh build/install/bin

          ls -lh $dst/bin/
          echo "strip"
          strip $dst/bin/*

          echo "after strip"
          ls -lh $dst/bin/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-linux-aarch64-shared-gpu-${{ matrix.gpu }}-onnxruntime-${{ matrix.onnxruntime_version }}
          path: sherpa-onnx-*linux-aarch64-shared*.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=aarch64/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*-shared*.tar.bz2 $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-shared.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for aarch64 linux
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2

      - name: Release pre-compiled binaries and libs for aarch64 linux
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.17

      - name: Test offline Moonshine
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/install/bin:$PATH
          export EXE=sherpa-onnx-offline

          ls -lh build/bin/sherpa-onnx-offline

          readelf -d build/bin/sherpa-onnx-offline

          strings build/bin/sherpa-onnx-offline | grep ^GLIBC

          .github/scripts/test-offline-moonshine.sh


================================================
FILE: .github/workflows/aarch64-linux-gnu-static.yaml
================================================
# Modified from https://github.com/Tencent/ncnn/blob/master/.github/workflows/linux-arm-cpu-gcc.yml
name: aarch64-linux-gnu-static

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/aarch64-linux-gnu-static.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/aarch64-linux-gnu.toolchain.cmake'

  workflow_dispatch:

concurrency:
  group: aarch64-linux-gnu-static-${{ github.ref }}
  cancel-in-progress: true

jobs:
  aarch64_linux_gnu_static:
    runs-on: ${{ matrix.os }}
    name: aarch64 static lib test
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-22.04-arm]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Build sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/k2-fsa/sherpa-onnx \
              ghcr.io/csukuangfj/manylinux2014-aarch64-gcc11:latest \
            bash -c '
              echo "config: ${{ matrix.config }}"
              uname -a
              which gcc

              gcc --version
              g++ --version

              ldd --version

              export GCC_ROOT=/opt/gcc-11.4.0
              export CC=$GCC_ROOT/bin/gcc
              export CXX=$GCC_ROOT/bin/g++
              export PATH=$GCC_ROOT/bin:$PATH

              gcc --version
              which gcc

              g++ --version
              which g++

              ldd --version

              echo "pwd"

              ls -lh

              cd /k2-fsa/sherpa-onnx/

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              p=$PWD

              mkdir build
              cd build
              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -DBUILD_SHARED_LIBS=OFF \
                -DCMAKE_INSTALL_PREFIX=./install \
                ..

              make -j 4

              make install

              cp bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
              cp bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin

              ls -lh install/lib

              rm -rf install/lib/pkgconfig
              rm -fv install/lib/cargs.h
            '

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-static
          mkdir $dst

          ls -lh build/install/lib

          cp -a build/install/bin $dst/
          ls -lh $dst/bin/
          echo "strip"
          strip $dst/bin/*
          ls -lh $dst/bin/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-linux-aarch64-static
          path: sherpa-onnx-*linux-aarch64-static.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=aarch64/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*-static.tar.bz2 $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-static.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for aarch64 linux
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2

      - name: Release pre-compiled binaries and libs for aarch64 linux
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.11.5

      - name: Test offline Moonshine
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          ls -lh build/bin/sherpa-onnx-offline

          readelf -d build/bin/sherpa-onnx-offline

          strings build/bin/sherpa-onnx-offline | grep ^GLIBC

          .github/scripts/test-offline-moonshine.sh


================================================
FILE: .github/workflows/add-new-asr-models.yaml
================================================
name: add-new-asr-models

on:
  # push:
  #   branches:
  #     - new-asr-models
  workflow_dispatch:

concurrency:
  group: add-new-asr-models-${{ github.ref }}
  cancel-in-progress: true

jobs:
  add-new-asr-models:
    runs-on: ${{ matrix.os }}
    name: New asr models
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Download icefall-asr-zipformer-multi-zh-en-2023-11-22
        shell: bash
        run: |
          d=sherpa-onnx-zipformer-zh-en-2023-11-22
          mkdir $d
          pushd $d

          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/tokens.txt
          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/bbpe.model
          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/decoder-epoch-34-avg-19.onnx
          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.int8.onnx
          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.onnx
          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.int8.onnx
          wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.onnx

          mkdir test_wavs
          cd test_wavs
          wget -O 0.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav
          wget -O 1.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav

          wget -O 2.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
          popd
          tar cvjf $d.tar.bz2 $d
          ls -lh $d
          rm -rf $d

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/android-rknn.yaml
================================================
name: android-rknn

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/android-rknn.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/jni/*'
      - 'build-android*.sh'
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: android-rknn-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build-android-rknn-libs:
    name: Android rknn libs
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android-rknn

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: build android arm64-v8a
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          export SHERPA_ONNX_ENABLE_C_API=ON
          export SHERPA_ONNX_ENABLE_RKNN=ON
          ./build-android-arm64-v8a.sh
          mkdir -p jniLibs/arm64-v8a/
          cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/
          cp -v ./build-android-arm64-v8a/install/lib/README.md ./jniLibs/arm64-v8a/
          rm -rf  ./build-android-arm64-v8a/

      - name: build android armv7-eabi
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          export SHERPA_ONNX_ENABLE_C_API=ON
          export SHERPA_ONNX_ENABLE_RKNN=ON
          ./build-android-armv7-eabi.sh
          mkdir -p ./jniLibs/armeabi-v7a/
          cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/
          cp -v ./build-android-armv7-eabi/install/lib/README.md ./jniLibs/armeabi-v7a/
          rm -rf ./build-android-armv7-eabi

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-android-rknn.tar.bz2

          tar cjvf $filename ./jniLibs

          ls -lh

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-android-libs-rknn
          path: ./jniLibs

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            du -h -d1 .
            ls -lh

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface

            cp -v ../sherpa-onnx-*-android-rknn.tar.bz2 ./

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-android.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release android libs
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-android-rknn.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.17

  build-android-aar-rknn:
    needs: [build-android-rknn-libs]
    name: Android rknn AAR
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Retrieve artifact
        uses: actions/download-artifact@v4
        with:
          name: sherpa-onnx-android-libs-rknn
          path: /tmp/jniLibs

      - name: Show jni libs
        shell: bash
        run: |
          ls -lh /tmp/jniLibs

          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a
          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a

      - name: Copy libs
        shell: bash
        run: |
          for arch in arm64-v8a armeabi-v7a; do
            cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/
          done

          rm -rf android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86
          rm -rf android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64

      - name: Check libs
        shell: bash
        run: |
          ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/*

      - name: Build aar
        shell: bash
        run: |
          cd android/SherpaOnnxAar

          ./gradlew :sherpa_onnx:assembleRelease

      - name: Display aar
        shell: bash
        run: |
          cd android/SherpaOnnxAar

          ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
          cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../


      - name: Rename aar
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          mv sherpa_onnx-release.aar sherpa-onnx-${SHERPA_ONNX_VERSION}-rknn.aar

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-android-aar
          path: ./*.aar

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            du -h -d1 .
            ls -lh

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=android/aar
            mkdir -p $dst

            cp -v ../*.aar $dst

            git status
            git lfs track "*.aar"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-rknn.aar"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main
      - name: Release android aar
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.aar
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.17

      - name: Release android aar
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.aar


================================================
FILE: .github/workflows/android-static.yaml
================================================
# static means we link onnxruntime statically
# but we still have libsherpa-onnx-jni.so
name: android-static

on:
  push:
    branches:
      - master
      - android-link-onnxruntime-statically
    paths:
      - '.github/workflows/android-static.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/jni/*'
      - 'build-android*.sh'
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: android-static-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build-android-static-libs:
    name: Android static libs
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android-jni-static

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: build android arm64-v8a
        shell: bash
        run: |
          export BUILD_SHARED_LIBS=OFF

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-android-arm64-v8a.sh
          mkdir -p jniLibs/arm64-v8a/
          cp -v ./build-android-arm64-v8a-static/install/lib/*.so ./jniLibs/arm64-v8a/
          rm -rf  ./build-android-arm64-v8a-static/

      - name: build android armv7-eabi
        shell: bash
        run: |
          export BUILD_SHARED_LIBS=OFF

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-android-armv7-eabi.sh
          mkdir -p ./jniLibs/armeabi-v7a/
          cp -v ./build-android-armv7-eabi-static/install/lib/*.so ./jniLibs/armeabi-v7a/
          rm -rf ./build-android-armv7-eabi-static

      - name: build android x86_64
        shell: bash
        run: |
          export BUILD_SHARED_LIBS=OFF

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-android-x86-64.sh
          mkdir -p ./jniLibs/x86_64
          cp -v ./build-android-x86-64-static/install/lib/*.so ./jniLibs/x86_64
          rm -rf ./build-android-x86-64-static

      - name: build android x86
        shell: bash
        run: |
          export BUILD_SHARED_LIBS=OFF

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-android-x86.sh
          mkdir -p ./jniLibs/x86
          cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86
          rm -rf ./build-android-x86

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-android-static-link-onnxruntime.tar.bz2

          tar cjvf $filename ./jniLibs

          ls -lh

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-android-libs-static
          path: ./jniLibs

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            du -h -d1 .
            ls -lh

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*-android*.tar.bz2 $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-android.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release android libs
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-android*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.17

  build-android-aar-static:
    needs: [build-android-static-libs]
    name: Android AAR
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Retrieve artifact
        uses: actions/download-artifact@v4
        with:
          name: sherpa-onnx-android-libs-static
          path: /tmp/jniLibs

      - name: Show jni libs
        shell: bash
        run: |
          ls -lh /tmp/jniLibs

          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a
          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a
          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86
          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86_64
          #
      - name: Copy libs
        shell: bash
        run: |
          for arch in arm64-v8a armeabi-v7a x86 x86_64; do
            cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/
          done

      - name: Check libs
        shell: bash
        run: |
          ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/*

      - name: Build aar
        shell: bash
        run: |
          cd android/SherpaOnnxAar

          ./gradlew :sherpa_onnx:assembleRelease

      - name: Display aar
        shell: bash
        run: |
          cd android/SherpaOnnxAar

          ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
          cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../

      - name: Rename aar
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          mv sherpa_onnx-release.aar sherpa-onnx-static-link-onnxruntime-${SHERPA_ONNX_VERSION}.aar

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-android-aar-static
          path: ./*.aar

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            du -h -d1 .
            ls -lh

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=android/aar/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../*.aar $dst

            git status
            git lfs track "*.aar"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}.aar"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release android aar
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.aar
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.17


================================================
FILE: .github/workflows/android.yaml
================================================
name: android

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/android.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/jni/*'
      - 'build-android*.sh'
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: android-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build-android-libs:
    name: Android libs
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android-jni

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: build android arm64-v8a
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          export SHERPA_ONNX_ENABLE_C_API=ON
          ./build-android-arm64-v8a.sh

          readelf -l ./build-android-arm64-v8a/install/lib/*.so

          mkdir -p jniLibs/arm64-v8a/
          cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/
          cp -v ./build-android-arm64-v8a/install/lib/README.md ./jniLibs/arm64-v8a/
          rm -rf  ./build-android-arm64-v8a/

      - name: build android armv7-eabi
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          export SHERPA_ONNX_ENABLE_C_API=ON
          ./build-android-armv7-eabi.sh
          mkdir -p ./jniLibs/armeabi-v7a/

          readelf -l ./build-android-armv7-eabi/install/lib/*.so

          cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/
          cp -v ./build-android-armv7-eabi/install/lib/README.md ./jniLibs/armeabi-v7a/
          rm -rf ./build-android-armv7-eabi

      - name: build android x86_64
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          export SHERPA_ONNX_ENABLE_C_API=ON
          ./build-android-x86-64.sh

          readelf -l ./build-android-x86-64/install/lib/*.so

          mkdir -p ./jniLibs/x86_64
          cp -v ./build-android-x86-64/install/lib/*.so ./jniLibs/x86_64
          cp -v ./build-android-x86-64/install/lib/README.md ./jniLibs/x86_64
          rm -rf ./build-android-x86-64

      - name: build android x86
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          export SHERPA_ONNX_ENABLE_C_API=ON
          ./build-android-x86.sh

          readelf -l ./build-android-x86/install/lib/*.so

          mkdir -p ./jniLibs/x86
          cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86
          cp -v ./build-android-x86/install/lib/README.md ./jniLibs/x86
          rm -rf ./build-android-x86

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-android.tar.bz2

          tar cjvf $filename ./jniLibs

          ls -lh

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-android-libs
          path: ./jniLibs

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            du -h -d1 .
            ls -lh

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface

            cp -v ../sherpa-onnx-*-android.tar.bz2 ./

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-android.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release android libs
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-android.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.17

      - name: Release android libs
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-android.tar.bz2

  build-android-aar:
    needs: [build-android-libs]
    name: Android AAR
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Retrieve artifact
        uses: actions/download-artifact@v4
        with:
          name: sherpa-onnx-android-libs
          path: /tmp/jniLibs

      - name: Show jni libs
        shell: bash
        run: |
          ls -lh /tmp/jniLibs

          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 arm64-v8a
          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 armeabi-v7a
          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86
          # drwxr-xr-x 2 runner docker 4.0K Dec 12 06:56 x86_64
          #
      - name: Copy libs
        shell: bash
        run: |
          for arch in arm64-v8a armeabi-v7a x86 x86_64; do
            cp -v /tmp/jniLibs/$arch/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/$arch/
          done

      - name: Check libs
        shell: bash
        run: |
          ls -lh android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/*

      - name: Build aar
        shell: bash
        run: |
          cd android/SherpaOnnxAar

          ./gradlew :sherpa_onnx:assembleRelease

      - name: Display aar
        shell: bash
        run: |
          cd android/SherpaOnnxAar

          ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
          cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../


      - name: Rename aar
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          mv sherpa_onnx-release.aar sherpa-onnx-${SHERPA_ONNX_VERSION}.aar

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-android-aar
          path: ./*.aar

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            du -h -d1 .
            ls -lh

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=android/aar
            mkdir -p $dst

            cp -v ../*.aar $dst

            git status
            git lfs track "*.aar"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}.aar"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release android aar
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.aar
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.17

      - name: Release android aar
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.aar


================================================
FILE: .github/workflows/apk-asr-2pass.yaml
================================================
name: apk-asr-2pass

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-asr-2pass-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_asr_2pass:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for asr ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["16"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-asr-2pass-apk-script.py --total $total --index $index

          chmod +x build-apk-asr-2pass.sh
          mv -v ./build-apk-asr-2pass.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-asr-2pass.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=asr-2pass/$SHERPA_ONNX_VERSION
            mkdir -p $d

            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-asr.yaml
================================================
name: apk-asr

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-asr-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_asr:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for asr ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["15"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-asr-apk-script.py --total $total --index $index

          chmod +x build-apk-asr.sh
          mv -v ./build-apk-asr.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-asr.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=asr/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-audio-tagging-wearos.yaml
================================================
name: apk-audio-tagging-wearos

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-audio-tagging-wearos-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_audio_tagging_wearos:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for WearOS ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["1"]
        index: ["0"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-audio-tagging-apk-script.py --total $total --index $index

          chmod +x build-apk-audio-tagging-wearos.sh
          mv -v ./build-apk-audio-tagging-wearos.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-audio-tagging-wearos.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK for audio tagging after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK for audio tagging after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=audio-tagging-wearos/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-audio-tagging.yaml
================================================
name: apk-audio-tagging

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-audio-tagging-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_audio_tagging:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for audio tagging ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["1"]
        index: ["0"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-audio-tagging-apk-script.py --total $total --index $index

          chmod +x build-apk-audio-tagging.sh
          mv -v ./build-apk-audio-tagging.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-audio-tagging.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK for audio tagging after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK for audio tagging after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p audio-tagging
            d=audio-tagging/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk ./$d

            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-kws.yaml
================================================
name: apk-kws

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-kws-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_kws:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for kws ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["1"]
        index: ["0"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          mv -v ./build-apk-kws.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-kws.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=kws/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-qnn-vad-asr-simulated-streaming.yaml
================================================
name: apk-qnn-vad-asr-simulated-streaming

on:
  push:
    branches:
      - apk
      - zipformer-ctc-qnn-2

  workflow_dispatch:

concurrency:
  group: apk-qnn-vad-asr-simulated-streaming-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  simulated_streaming_asr:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["10"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android-qnn

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-qnn-vad-asr-apk-script.py --total $total --index $index

          chmod +x build-apk-qnn-vad-asr-simulate-streaming.sh
          mv -v ./build-apk-qnn-vad-asr-simulate-streaming.sh ../..

      - uses: actions/upload-artifact@v4
        with:
          name: build-script-${{ matrix.total }}-${{ matrix.index }}
          path: ./build-apk-qnn-vad-asr-simulate-streaming.sh

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-qnn-vad-asr-simulate-streaming.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            du -h -d1 .
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=qnn-vad-asr-simulated-streaming/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks for qnn"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-speaker-diarization.yaml
================================================
name: apk-speaker-diarization

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-speaker-diarization-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_speaker_identification:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for speaker diarization ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["1"]
        index: ["0"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          python3 ./generate-speaker-diarization-apk-script.py --total $total --index $index

          chmod +x build-apk-speaker-diarization.sh
          mv -v ./build-apk-speaker-diarization.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-speaker-diarization.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=speaker-diarization/$SHERPA_ONNX_VERSION
            mkdir -p $d/
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-speaker-identification.yaml
================================================
name: apk-speaker-identification

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-speaker-identification-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_speaker_identification:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for speaker identification ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["10"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-speaker-identification-apk-script.py --total $total --index $index

          chmod +x build-apk-speaker-identification.sh
          mv -v ./build-apk-speaker-identification.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-speaker-identification.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=speaker-identification/$SHERPA_ONNX_VERSION
            mkdir -p $d/
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-spoken-language-identification.yaml
================================================
name: apk-slid

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-slid-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_slid:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for slid ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["1"]
        index: ["0"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-slid-apk-script.py --total $total --index $index

          chmod +x build-apk-slid.sh
          mv -v ./build-apk-slid.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-slid.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK for slid after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK for slid after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=slid/$SHERPA_ONNX_VERSION
            mkdir -p $d/
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-tts-engine.yaml
================================================
name: apk-tts-engine

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-tts-engine-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_tts_engine:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for tts engine ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["40"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2 iso639-lang

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-tts-apk-script.py --total $total --index $index

          chmod +x build-apk-tts-engine.sh
          mv -v ./build-apk-tts-engine.sh ../..

      - name: build APK for TTS engine
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-tts-engine.sh

      - name: Display APK for TTS engine
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK for TTS engine after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK for TTS engine after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - uses: actions/upload-artifact@v4
        if: false
        with:
          name: tts-engine-apk-${{ matrix.index }}
          path: ./apks/*.apk

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=tts-engine-new/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more tts engine apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-tts.yaml
================================================
name: apk-tts

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-tts-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_tts:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for tts ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["40"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2 iso639-lang

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-tts-apk-script.py --total $total --index $index

          chmod +x build-apk-tts.sh
          mv -v ./build-apk-tts.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-tts.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK for TTS engine after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK for TTS engine after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - uses: actions/upload-artifact@v4
        if: false
        with:
          name: tts-apk-${{ matrix.index }}
          path: ./apks/*.apk

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=tts-new/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-vad-asr-simulated-streaming.yaml
================================================
name: apk-vad-asr-simulated-streaming

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-vad-asr-simulated-streaming-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  simulated_streaming_asr:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["25"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-vad-asr-apk-script.py --total $total --index $index

          chmod +x build-apk-vad-asr-simulate-streaming.sh
          mv -v ./build-apk-vad-asr-simulate-streaming.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-vad-asr-simulate-streaming.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            du -h -d1 .
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=vad-asr-simulated-streaming/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-vad-asr.yaml
================================================
name: apk-vad-asr

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-vad-asr-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_vad_asr:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for asr ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["25"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-vad-asr-apk-script.py --total $total --index $index

          chmod +x build-apk-vad-asr.sh
          mv -v ./build-apk-vad-asr.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-vad-asr.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            du -h -d1 .
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=vad-asr/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/apk-vad.yaml
================================================
name: apk-vad

on:
  push:
    branches:
      - apk

  workflow_dispatch:

concurrency:
  group: apk-vad-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  apk_vad:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: apk for vad ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["1"]
        index: ["0"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-android

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/apk

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          mv -v ./build-apk-vad.sh ../..

      - name: build APK
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
          ./build-apk-vad.sh

      - name: Display APK
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=vad/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../apks/*.apk $d/
            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more apks"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-apk main


================================================
FILE: .github/workflows/arm-linux-gnueabihf.yaml
================================================
# Modified from https://github.com/Tencent/ncnn/blob/master/.github/workflows/linux-arm-cpu-gcc.yml
name: arm-linux-gnueabihf

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/arm-linux-gnueabihf.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/arm-linux-gnueabihf.toolchain.cmake'
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: arm-linux-gnueabihf-${{ github.ref }}
  cancel-in-progress: true

jobs:
  arm_linux_gnueabihf:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.lib_type }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        lib_type: [static, shared]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-arm-${{ matrix.lib_type }}

      - name: cache-toolchain
        id: cache-toolchain
        uses: actions/cache@v4
        with:
          path: toolchain
          key: gcc-arm-11.2-2022.02-x86_64-arm-none-linux-gnueabihf

      - name: Download toolchain
        if: steps.cache-toolchain.outputs.cache-hit != 'true'
        shell: bash
        run: |
          curl -SL -O https://huggingface.co/csukuangfj/arm-linux-gcc/resolve/main/gcc-arm-11.2-2022.02-x86_64-arm-none-linux-gnueabihf.tar.xz
          mkdir $GITHUB_WORKSPACE/toolchain
          tar xvf ./gcc-arm-11.2-2022.02-x86_64-arm-none-linux-gnueabihf.tar.xz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain
          rm -v gcc-arm-11.2-2022.02-x86_64-arm-none-linux-gnueabihf.tar.xz

      - name: Display toolchain info
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          arm-none-linux-gnueabihf-gcc --version

      - name: build arm-linux-gnueabihf
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cmake --version

          lib_type=${{ matrix.lib_type }}

          if [[ $lib_type == "shared" ]]; then
            export BUILD_SHARED_LIBS=ON
          else
            export BUILD_SHARED_LIBS=OFF
          fi

          ./build-arm-linux-gnueabihf.sh

          ls -lh build-arm-linux-gnueabihf/bin
          ls -lh build-arm-linux-gnueabihf/lib

          file build-arm-linux-gnueabihf/bin/sherpa-onnx

          strings build-arm-linux-gnueabihf/bin/sherpa-onnx | grep ^GLIBC

      - name: Copy files
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          arm-none-linux-gnueabihf-strip --version

          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-arm-gnueabihf-${{ matrix.lib_type }}
          mkdir $dst

          ls -lh build-arm-linux-gnueabihf/install/lib

          cp -a build-arm-linux-gnueabihf/install/bin $dst/
          ls -lh $dst/bin/*
          arm-none-linux-gnueabihf-strip $dst/bin/*
          ls -lh $dst

          lib_type=${{ matrix.lib_type }}
          if [[ $lib_type == "shared" ]]; then
            cp -a build-arm-linux-gnueabihf/install/lib $dst/
            rm -v $dst/lib/libasound.so
          fi

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - uses: actions/upload-artifact@v4
        if: matrix.lib_type == 'shared'
        with:
          name: sherpa-onnx-linux-arm-gnueabihf-shared
          path: sherpa-onnx-*linux-arm-gnueabihf-shared.tar.bz2

      - uses: actions/upload-artifact@v4
        if: matrix.lib_type == 'static'
        with:
          name: sherpa-onnx-linux-arm-gnueabihf-static
          path: sherpa-onnx-*linux-arm-gnueabihf-static.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=arm32/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*.tar.bz2 $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for arm linux gnueabihf ${{ matrix.lib_type }}
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-arm-gnueabihf*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.0


================================================
FILE: .github/workflows/as_cmake_sub_project.yaml
================================================
name: as_cmake_sub_project

on:
  push:
    branches:
      - master

  workflow_dispatch:

concurrency:
  group: as-cmake-sub-project-${{ github.ref }}
  cancel-in-progress: true

jobs:
  as_cmake_sub_project:
    name: ${{ matrix.os }} shared ${{ matrix.shared_lib }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        shared_lib: [ON, OFF]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-${{ matrix.shared_lib }}-cmake-sub-project

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh
          du -h -d1 .

      - name: Build
        shell: bash
        run: |
          mv .github/scripts/as-cmake-sub-project ..
          cd ../as-cmake-sub-project
          ln -s $PWD/../sherpa-onnx .
          mkdir build
          cd build
          cmake -DBUILD_SHARED_LIBS=${{ matrix.shared_lib }} ..
          make -j2 main

      - name: Test
        shell: bash
        run: |
          cd ../as-cmake-sub-project

          cd build
          ls -lh lib
          echo "----"
          ls -lh bin

          readelf -d ./bin/main
          ./bin/main


================================================
FILE: .github/workflows/ascend.yaml
================================================
name: ascend

on:
  push:
    branches:
      - master

  workflow_dispatch:

concurrency:
  group: ascend-${{ github.ref }}
  cancel-in-progress: true

jobs:
  linux:
    name: ascend
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        include:
          # - image: "gpustack/ascendai-cann:8.0.RC3-910b-ubuntu20.04-py3.9"
          #   name: "8.0.0-10b"
          - image: "gpustack/devel-ascendai-cann:8.0.rc3.beta1-310p-ubuntu20.04-v2"
            name: "8.0.0-310p"
    container:
      # image: ascendai/cann:latest
      # image: ascendai/cann:8.1.rc1-910b-ubuntu22.04-py3.10
      # see https://hub.docker.com/r/gpustack/ascendai-cann/tags?name=8.0
      # see https://hub.docker.com/r/gpustack/devel-ascendai-cann/tags?name=310p
      # and
      # https://quay.io/repository/ascend/cann?tab=tags
      image: ${{ matrix.image }}

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Setup Python 3.8
        uses: actions/setup-python@v5
        with:
          python-version: "3.8"

      - name: Install dependencies
        shell: bash
        run: |
          apt-get update && apt-get install -y git curl cmake gcc g++

      - name: Show GCC version
        shell: bash
        run: |
          gcc --version
          g++ --version
          which gcc
          which g++

      - name: Build sherpa-onnx
        shell: bash
        run: |
          ls -lh /usr/local/Ascend/ascend-toolkit/set_env.sh
          find /usr/local/Ascend -name "libascend*.so" 2>/dev/null
          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          mkdir build
          cd build
          cmake -DSHERPA_ONNX_ENABLE_ASCEND_NPU=ON ..

          make -j2

      - name: Show results
        shell: bash
        run: |
          cd build

          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          ldd ./bin/sherpa-onnx-offline


================================================
FILE: .github/workflows/axcl-linux-aarch64.yaml
================================================
name: axcl-linux-aarch64

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/axcl-linux-aarch64.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/csrc/axcl/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/aarch64-linux-gnu.toolchain.cmake'

  workflow_dispatch:

concurrency:
  group: axcl-linux-aarch64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  axcl_linux_aarch64:
    runs-on: ubuntu-22.04-arm
    name: axcl npu
    strategy:
      fail-fast: false

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Download SDK
        shell: bash
        run: |
          git clone --depth 1 https://github.com/Abandon-ht/axcl_bsp_sdk
          mv axcl_bsp_sdk/out sdk_dir

          ls -lh sdk_dir/include
          echo "---"
          ls -lh sdk_dir/bsp
          echo "---"
          ls -lh sdk_dir/lib

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: axcl-linux-aarch64

      - name: Build sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/k2-fsa/sherpa-onnx \
              quay.io/pypa/manylinux_2_28_aarch64 \
            bash -c '
              uname -a
              which gcc

              gcc --version
              g++ --version

              cmake --version


              cd /k2-fsa/sherpa-onnx/

              export AXCL_SDK_ROOT=$PWD/sdk_dir
              echo "AXCL_SDK_ROOT: $AXCL_SDK_ROOT"
              export CPLUS_INCLUDE_PATH="$AXCL_SDK_ROOT/include:$AXCL_SDK_ROOT/bsp:$CPLUS_INCLUDE_PATH"
              export SHERPA_ONNX_AXCL_LIB_DIR="$AXCL_SDK_ROOT/lib"

              echo "pwd"

              ls -lh

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              ls -lh $PWD/alsa-lib/src/.libs

              strings $PWD/alsa-lib/src/.libs/libasound.so.2.0.0 | grep "^GLIBC"

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
              p=$PWD

              export SHERPA_ONNX_ENABLE_ALSA=1

              mkdir build
              cd build

              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -DBUILD_SHARED_LIBS=ON \
                -DCMAKE_INSTALL_PREFIX=./install \
                -DSHERPA_ONNX_ENABLE_AXCL=ON \
                ..

              make -j4 install

              rm -rf install/lib/pkgconfig
              rm -fv install/lib/cargs.h
              rm -fv install/lib/libcargs.so
            '

      - name: Display system info
        shell: bash
        run: |
          uname -a
          gcc --version
          g++ --version

      - name: Display generated files
        shell: bash
        run: |
          export AXCL_SDK_ROOT=$PWD/sdk_dir
          export LD_LIBRARY_PATH=$AXCL_SDK_ROOT/lib:$LD_LIBRARY_PATH

          ls -lh $AXCL_SDK_ROOT/lib/

          cd build/install

          ls -lh bin

          echo "---"

          ls -lh lib

          file bin/sherpa-onnx

          readelf -d bin/sherpa-onnx

          ldd bin/sherpa-onnx

          echo "---"
          strings bin/sherpa-onnx | grep "^GLIBC"

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          suffix=shared

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-axcl-linux-aarch64-$suffix
          mkdir $dst

          cp -a build/install/bin $dst/

          mkdir -p $dst/lib
          cp -v build/install/lib/lib*.so $dst/lib/

          ls -lh build/install/lib
          ls -lh build/install/bin

          ls -lh $dst/bin/
          echo "strip"
          strip $dst/bin/*

          echo "after strip"
          ls -lh $dst/bin/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-axcl-linux-aarch64-shared
          path: sherpa-onnx-*linux-aarch64*.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=axcl-linux-aarch64/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*axcl*-*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-axcl-linux-aarch64.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for linux aarch64
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2

      - name: Release pre-compiled binaries and libs for linux aarch64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.19


================================================
FILE: .github/workflows/axera-linux-aarch64.yaml
================================================
name: axera-linux-aarch64

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/axera-linux-aarch64.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/csrc/axera/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/aarch64-linux-gnu.toolchain.cmake'

  workflow_dispatch:

concurrency:
  group: axera-linux-aarch64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  axera_linux_aarch64:
    runs-on: ubuntu-22.04-arm
    name: axera npu
    strategy:
      fail-fast: false
      matrix:
        include:
          - soc: ax650
          - soc: ax630c
          - soc: ax620q

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Download SDK
        shell: bash
        run: |
          soc=${{ matrix.soc }}
          if [[ $soc == ax650 ]]; then
            version=1.45.0_p39
            curl -SL -O https://github.com/AXERA-TECH/ax650n_bsp_sdk/archive/refs/tags/v$version.zip
            unzip -qq v$version.zip

            mv $PWD/ax650n_bsp_sdk-$version/msp/out sdk_dir
          elif [[ $soc == ax630c || $soc == ax620q ]]; then
            version=2.0.0_P7
            curl -SL -O https://github.com/AXERA-TECH/ax620e_bsp_sdk/archive/refs/tags/v2.0.0_P7.zip
            unzip -qq v$version.zip
            mv $PWD/ax620e_bsp_sdk-$version/msp/out/arm64_glibc sdk_dir

          fi

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: axera-${{ matrix.soc }}-linux-aarch64

      - name: Build sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/k2-fsa/sherpa-onnx \
              quay.io/pypa/manylinux_2_28_aarch64 \
            bash -c '
              uname -a
              which gcc

              gcc --version
              g++ --version

              cmake --version


              cd /k2-fsa/sherpa-onnx/

              export AXERA_SDK_ROOT=$PWD/sdk_dir
              echo "AXERA_SDK_ROOT: $AXERA_SDK_ROOT"
              export CPLUS_INCLUDE_PATH="$AXERA_SDK_ROOT/include:$CPLUS_INCLUDE_PATH"
              export SHERPA_ONNX_AXERA_LIB_DIR="$AXERA_SDK_ROOT/lib"

              echo "pwd"

              ls -lh

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              ls -lh $PWD/alsa-lib/src/.libs

              strings $PWD/alsa-lib/src/.libs/libasound.so.2.0.0 | grep "^GLIBC"

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
              p=$PWD

              export SHERPA_ONNX_ENABLE_ALSA=1

              mkdir build
              cd build

              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -DBUILD_SHARED_LIBS=ON \
                -DCMAKE_INSTALL_PREFIX=./install \
                -DSHERPA_ONNX_ENABLE_AXERA=ON \
                ..

              make -j4 install

              rm -rf install/lib/pkgconfig
              rm -fv install/lib/cargs.h
              rm -fv install/lib/libcargs.so
            '

      - name: Display system info
        shell: bash
        run: |
          uname -a
          gcc --version
          g++ --version

      - name: Display generated files
        shell: bash
        run: |
          export AXERA_SDK_ROOT=$PWD/sdk_dir
          export LD_LIBRARY_PATH=$AXERA_SDK_ROOT/lib:$LD_LIBRARY_PATH

          ls -lh $AXERA_SDK_ROOT/lib/

          cd build/install

          ls -lh bin

          echo "---"

          ls -lh lib

          file bin/sherpa-onnx

          readelf -d bin/sherpa-onnx

          ldd bin/sherpa-onnx

          echo "---"
          strings bin/sherpa-onnx | grep "^GLIBC"

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          suffix=shared

          soc=${{ matrix.soc }}

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-axera-$soc-linux-aarch64-$suffix
          mkdir $dst

          cp -a build/install/bin $dst/

          mkdir -p $dst/lib
          cp -v build/install/lib/lib*.so $dst/lib/

          ls -lh build/install/lib
          ls -lh build/install/bin

          ls -lh $dst/bin/
          echo "strip"
          strip $dst/bin/*

          echo "after strip"
          ls -lh $dst/bin/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-axera-${{ matrix.soc }}-linux-aarch64-shared
          path: sherpa-onnx-*linux-aarch64*.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=axera-linux-aarch64/$SHERPA_ONNX_VERSION/${{ matrix.soc }}
            mkdir -p $dst

            cp -v ../sherpa-onnx-*axera*-*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-axera-${{ matrix.soc }}-linux-aarch64.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for linux aarch64
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2

      - name: Release pre-compiled binaries and libs for linux aarch64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.19


================================================
FILE: .github/workflows/build-wheels-aarch64-cuda.yaml
================================================
name: build-wheels-aarch64-cuda

on:
  push:
    branches:
      - wheel
  workflow_dispatch:

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-aarch64-cuda-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build_wheels_aarch64_cuda:
    name: ${{ matrix.manylinux }} ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-22.04-arm]
        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313", "cp314"]
        # manylinux: [manylinux2014] #, manylinux_2_28]
        manylinux: [manylinux_2_28] #, manylinux_2_28]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
      # for a list of versions
      - name: Build wheels
        uses: pypa/cibuildwheel@v3.3.1
        env:
          CIBW_BEFORE_ALL: |
            git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
            cd alsa-lib
            ./gitcompile
            cd ..
            echo "PWD"
            ls -lh /project/alsa-lib/src/.libs

          CIBW_ENVIRONMENT: >
            CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH
            C_INCLUDE_PATH=/project/alsa-lib/include:$C_INCLUDE_PATH
            SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs
            LD_LIBRARY_PATH=/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR:$LD_LIBRARY_PATH
            SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
            SHERPA_ONNX_ENABLE_ALSA=1
            SHERPA_ONNX_ENABLE_GPU=ON
            SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_GPU=ON -DALSA_INCLUDE_DIR=/project/alsa-lib/include -DALSA_LIBRARY=/project/alsa-lib/src/.libs/libasound.so"
          CIBW_BUILD: "${{ matrix.python-version}}-* "
          CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
          CIBW_BUILD_VERBOSITY: 3
          CIBW_ARCHS_LINUX: aarch64
          CIBW_MANYLINUX_AARCH64_IMAGE: quay.io/pypa/${{ matrix.manylinux }}_aarch64
          #  Don't repair Linux wheels
          CIBW_REPAIR_WHEEL_COMMAND_LINUX: ""
          # From onnxruntime >= 1.17.0, it drops support for CentOS 7.0 and it supports only manylinux_2_28.
          # manylinux_2_24 is no longer supported

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/

      - name: Install patchelf
        shell: bash
        run: |
          sudo apt-get update -q
          sudo apt-get install -q -y patchelf
          patchelf --help

      - name: Patch wheels
        shell: bash
        run: |
          mkdir ./wheels
          sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels

          ls -lh ./wheels/
          rm -rf ./wheelhouse
          mv ./wheels ./wheelhouse

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cuda/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }}
          path: ./wheelhouse/*.whl


================================================
FILE: .github/workflows/build-wheels-aarch64-rknn.yaml
================================================
name: build-wheels-aarch64-rknn

on:
  push:
    branches:
      - wheel
  workflow_dispatch:

concurrency:
  group: build-wheels-aarch64-rknn-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build_wheels_aarch64_rknn:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.python-version }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-22.04-arm]
        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Download rknn-toolkit2
        shell: bash
        run: |
          git clone --depth 1 https://github.com/airockchip/rknn-toolkit2

      - name: Build sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/k2-fsa/sherpa-onnx \
              quay.io/pypa/manylinux_2_28_aarch64 \
            bash -c '
              uname -a
              which gcc

              gcc --version
              g++ --version

              find /opt -name "python*"

              py=${{ matrix.python-version }}

              for v in $(seq 0 99); do
                if [ -f /opt/_internal/cpython-$py.$v/bin/python3 ]; then
                  py=/opt/_internal/cpython-$py.$v/bin/python3
                  break
                fi
              done

              # there is
              # py=/opt/_internal/cpython-3.13.3-nogil/bin/python3
              #
              echo "py: $py"

              $py --version

              $py -m venv my-py

              python3 --version
              which python3

              source ./my-py/bin/activate

              python3 --version
              which python3

              python3 -m pip install wheel twine setuptools

              echo "pwd"

              cd /k2-fsa/sherpa-onnx/

              ls -lh

              cmake --version

              uname -a
              echo "pwd"

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              ls -lh $PWD/alsa-lib/src/.libs

              strings $PWD/alsa-lib/src/.libs/libasound.so.2.0.0 | grep "^GLIBC"

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              export SHERPA_ONNX_RKNN_TOOLKIT2_PATH=$PWD/rknn-toolkit2
              export SHERPA_ONNX_RKNN_TOOLKIT2_LIB_DIR=$SHERPA_ONNX_RKNN_TOOLKIT2_PATH/rknpu2/runtime/Linux/librknn_api/aarch64
              export CPLUS_INCLUDE_PATH=$SHERPA_ONNX_RKNN_TOOLKIT2_PATH/rknpu2/runtime/Linux/librknn_api/include:$CPLUS_INCLUDE_PATH

              export SHERPA_ONNX_ENABLE_ALSA=1

              p=$PWD

              export SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_RKNN=ON -DALSA_INCLUDE_DIR=$p/alsa-lib/include -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so"
              python3 setup.py bdist_wheel

              mv dist wheelhouse
            '

      - name: Display results
        shell: bash
        run: |
          ls -lh wheelhouse

      - name: Fix wheel name
        shell: bash
        run: |
          python3 -m pip install auditwheel

          auditwheel show ./wheelhouse/*.whl

          auditwheel repair --help

          auditwheel --verbose repair --plat manylinux_2_27_aarch64 \
            --exclude librknnrt.so \
            --exclude libasound.so.2 \
            -w ./dist ./wheelhouse/*.whl

          ls -lh dist/*.whl

      - name: Show glibc versions
        shell: bash
        run: |
          mkdir t
          cp dist/*.whl t
          cd t
          unzip ./*.whl
          strings sherpa_onnx-*.data/data/bin/sherpa-onnx | grep GLIBC

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=rknn/$SHERPA_ONNX_VERSION/

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../dist/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}
          path: ./dist/*.whl


================================================
FILE: .github/workflows/build-wheels-aarch64.yaml
================================================
name: build-wheels-aarch64

on:
  push:
    branches:
      - wheel
  workflow_dispatch:
    inputs:
      publish_sherpa_onnx_bin:
        description: "Publish sherpa-onnx-bin"
        required: false
        default: "true"
        type: boolean

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-aarch64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  core:
    name: core
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-24.04-arm]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh
          du -h -d1 .

      - name: Build sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
              quay.io/pypa/manylinux2014_aarch64 \
            bash -c '
              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              find /opt -name "python*"

              echo "--------------------"
              PY_PATH=$(echo /opt/_internal/cpython-3.10*/bin)
              export PATH=$PY_PATH:$PATH
              which python3
              python3 --version

              python3 -m venv my

              source ./my/bin/activate

              python3 -m pip install setuptools wheel twine

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              mkdir build
              pushd build

              cmake \
                -D SHERPA_ONNX_ENABLE_TTS=ON \
                -D CMAKE_BUILD_TYPE=Release \
                -D BUILD_SHARED_LIBS=ON \
                -D SHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
                -D CMAKE_INSTALL_PREFIX=./install \
                ..

              make -j2
              make install

              ls -lh lib
              ls -lh bin

              echo "----"
              ls -lh install/lib

              rm -fv install/lib/libcargs.so

              echo "----"
              ls -lh install/bin

              echo "sherpa-onnx-core"
              mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
              cp -v ./install/lib/lib*.so ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib

              mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api
              cp -v ./install/include/sherpa-onnx/c-api/*.h ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api

              pushd ../scripts/wheel/sherpa-onnx-core
              python3 setup.py bdist_wheel --plat-name=manylinux2014_aarch64

              ls -lh dist

              popd

              echo "sherpa-onnx-bin"

              mkdir -p ../scripts/wheel/sherpa-onnx-bin/bin
              cp -v ./install/bin/sherpa-onnx* ../scripts/wheel/sherpa-onnx-bin/bin

              pushd ../scripts/wheel/sherpa-onnx-bin
              python3 setup.py bdist_wheel --plat-name=manylinux2014_aarch64

              ls -lh dist

              popd
            '

      - name: Collect wheels
        shell: bash
        run: |
          sudo chown -R $USER ./scripts/wheel
          mkdir wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-core/dist/*.whl ./wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-bin/dist/*.whl ./wheelhouse

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-linux-aarch64
          path: ./wheelhouse/*.whl

      - name: Show wheels
        shell: bash
        run: |
          sudo chown -R $USER ./scripts/wheel
          ls -lh ./scripts/wheel/sherpa-onnx-core/dist
          ls -lh ./scripts/wheel/sherpa-onnx-bin/dist

          unzip -l ./scripts/wheel/sherpa-onnx-core/dist/*.whl
          echo "---"
          unzip -l ./scripts/wheel/sherpa-onnx-bin/dist/*.whl

      - name: Install patchelf
        shell: bash
        run: |
          sudo apt-get update -q
          sudo apt-get install -q -y patchelf
          patchelf --help

      - name: Patch wheels
        shell: bash
        run: |
          mkdir ./wheels
          sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels

          ls -lh ./wheels/
          rm -rf ./wheelhouse
          mv ./wheels ./wheelhouse

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-linux-aarch64-patched
          path: ./wheelhouse/*.whl

  test:
    name: test
    needs: [core]
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-24.04-arm]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Retrieve artifact from Linux x64
        uses: actions/download-artifact@v4
        with:
          name: wheels-core-linux-aarch64-patched
          path: /tmp/wheels

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Show
        shell: bash
        run: |
          ls -lh /tmp/wheels

      - name: Install
        shell: bash
        run: |
          python3 -m pip install /tmp/wheels/*.whl

      - name: Show version
        shell: bash
        run: |
          sherpa-onnx-version

      - name: Show help
        shell: bash
        run: |
          sherpa-onnx --help

          echo "---"

          ls -lh $(which sherpa-onnx)
          file $(which sherpa-onnx)
          readelf -d $(which sherpa-onnx)

          ldd $(which sherpa-onnx)

          sherpa-onnx-offline --help

          echo "---"

          sherpa-onnx-vad --help

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v /tmp/wheels/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI ${{ github.event.inputs.publish_sherpa_onnx_bin }}
        if: ${{ (github.event.inputs.publish_sherpa_onnx_bin || 'true') == 'true' }}
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload /tmp/wheels/*.whl


  build_wheels_aarch64:
    needs: [core, test]
    name: ${{ matrix.manylinux }} ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        # see https://github.com/pypa/cibuildwheel/issues/2257
        # we don't use qemu from now on
        os: [ubuntu-24.04-arm]
        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313", "cp314"]
        manylinux: [manylinux2014] #, manylinux_2_28]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
      # for a list of versions
      - name: Build wheels
        uses: pypa/cibuildwheel@v3.3.1
        env:
          CIBW_BEFORE_ALL: |
            git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
            cd alsa-lib
            ./gitcompile
            cd ..
            echo "PWD"
            ls -lh /project/alsa-lib/src/.libs

          CIBW_ENVIRONMENT: >
            SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON
            C_INCLUDE_PATH=/project/alsa-lib/include:$C_INCLUDE_PATH
            CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH
            SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs
            LD_LIBRARY_PATH=/project/build/bdist.linux-aarch64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR
            SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
            SHERPA_ONNX_ENABLE_ALSA=1
            SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF -DALSA_INCLUDE_DIR=$p/alsa-lib/include -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so"

          CIBW_BUILD: "${{ matrix.python-version}}-* "
          CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
          CIBW_BUILD_VERBOSITY: 3
          CIBW_ARCHS_LINUX: aarch64
          # https://quay.io/repository/pypa/manylinux2014_aarch64?tab=tags
          CIBW_MANYLINUX_AARCH64_IMAGE: quay.io/pypa/${{ matrix.manylinux }}_aarch64
          # From onnxruntime >= 1.17.0, it drops support for CentOS 7.0 and it supports only manylinux_2_28.
          # manylinux_2_24 is no longer supported
          CIBW_REPAIR_WHEEL_COMMAND: >
            auditwheel repair -w {dest_dir}
            --exclude libonnxruntime.so
            {wheel}

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }}-linux-aarch64
          path: ./wheelhouse/*.whl

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/

      - name: Show wheels
        shell: bash
        run: |
          ls -lh wheelhouse/*.whl

          unzip -l wheelhouse/*.whl

          echo "---"

          mkdir t
          cp wheelhouse/*.whl ./t
          cd ./t
          unzip ./*.whl
          ls -lh
          echo "---"

          readelf -d sherpa_onnx/lib/*.so

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload ./wheelhouse/*.whl


================================================
FILE: .github/workflows/build-wheels-armv7l.yaml
================================================
name: build-wheels-armv7l

on:
  push:
    branches:
      - wheel
  workflow_dispatch:
    inputs:
      publish_sherpa_onnx_bin:
        description: "Publish sherpa-onnx-bin"
        required: false
        default: "true"
        type: boolean

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-armv7-${{ github.ref }}
  cancel-in-progress: true

jobs:
  core:
    name: core
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v2
        with:
          platforms: arm

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh
          du -h -d1 .

      - name: Build sherpa-onnx
        shell: bash
        run: |
          docker run --rm \
            --platform linux/arm/v7 \
            --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
            quay.io/pypa/manylinux_2_35_armv7l \
            bash -c '
              find / -name "*gcc*" 2>/dev/null

              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              # find /opt -name "python*"

              echo "--------------------"
              PY_PATH=$(echo /opt/_internal/cpython-3.10*/bin)
              export PATH=$PY_PATH:$PATH
              which python3
              python3 --version

              python3 -m venv my

              source ./my/bin/activate

              python3 -m pip install setuptools wheel twine

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              mkdir build
              pushd build

              cmake \
                -D SHERPA_ONNX_ENABLE_TTS=ON \
                -D CMAKE_BUILD_TYPE=Release \
                -D BUILD_SHARED_LIBS=ON \
                -D SHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
                -D CMAKE_INSTALL_PREFIX=./install \
                ..

              make -j2
              make install

              ls -lh lib
              ls -lh bin

              echo "----"
              ls -lh install/lib

              file install/lib/*

              rm -fv install/lib/libcargs.so

              echo "----"
              ls -lh install/bin

              file install/bin/*

              ./install/bin/sherpa-onnx --help

              echo "sherpa-onnx-core"
              mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
              cp -v ./install/lib/lib*.so ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib

              mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api
              cp -v ./install/include/sherpa-onnx/c-api/*.h ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api

              pushd ../scripts/wheel/sherpa-onnx-core
              python3 setup.py bdist_wheel --plat-name=manylinux_2_35_armv7l

              ls -lh dist

              popd

              echo "sherpa-onnx-bin"

              mkdir -p ../scripts/wheel/sherpa-onnx-bin/bin
              cp -v ./install/bin/sherpa-onnx* ../scripts/wheel/sherpa-onnx-bin/bin

              pushd ../scripts/wheel/sherpa-onnx-bin
              python3 setup.py bdist_wheel --plat-name=manylinux_2_35_armv7l

              ls -lh dist

              popd
            '

      - name: Collect wheels
        shell: bash
        run: |
          sudo chown -R $USER ./scripts/wheel
          mkdir wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-core/dist/*.whl ./wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-bin/dist/*.whl ./wheelhouse

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-linux-armv7l
          path: ./wheelhouse/*.whl

      - name: Show wheels
        shell: bash
        run: |
          sudo chown -R $USER ./scripts/wheel
          ls -lh ./scripts/wheel/sherpa-onnx-core/dist
          ls -lh ./scripts/wheel/sherpa-onnx-bin/dist

          unzip -l ./scripts/wheel/sherpa-onnx-core/dist/*.whl
          echo "---"
          unzip -l ./scripts/wheel/sherpa-onnx-bin/dist/*.whl

      - name: Install patchelf
        shell: bash
        run: |
          sudo apt-get update -q
          sudo apt-get install -q -y patchelf
          patchelf --help

      - name: Patch wheels
        shell: bash
        run: |
          mkdir ./wheels
          sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels

          ls -lh ./wheels/
          rm -rf ./wheelhouse
          mv ./wheels ./wheelhouse

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-linux-armv7l-patched
          path: ./wheelhouse/*.whl

  test:
    name: test
    needs: [core]
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Retrieve artifact from Linux
        uses: actions/download-artifact@v4
        with:
          name: wheels-core-linux-armv7l-patched
          path: /tmp/wheels

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Show
        shell: bash
        run: |
          ls -lh /tmp/wheels

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v /tmp/wheels/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI ${{ github.event.inputs.publish_sherpa_onnx_bin }}
        if: ${{ (github.event.inputs.publish_sherpa_onnx_bin || 'true') == 'true' }}
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload /tmp/wheels/*.whl


  build_wheels_armv7l:
    name: ${{ matrix.manylinux }} ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        # see https://github.com/pypa/cibuildwheel/issues/2257
        # we don't use qemu from now on
        os: [ubuntu-latest]
        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
        manylinux: [manylinux_2_35] #, manylinux_2_28]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v2
        with:
          platforms: arm

      - name: Build sherpa-onnx
        shell: bash
        run: |
          docker run --rm \
            --platform linux/arm/v7 \
            --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
            quay.io/pypa/manylinux_2_35_armv7l \
            bash -c '
              find / -name "*gcc*" 2>/dev/null

              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              python_version=${{ matrix.python-version }}

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              # find /opt -name "python*"

              echo "--------------------"
              # Construct glob pattern
              PY_GLOB="/opt/_internal/cpython-${python_version}*/bin"

              # Expand the glob safely
              shopt -s nullglob  # Avoid literal string if no match
              matches=($PY_GLOB)
              shopt -u nullglob

              if [[ ${#matches[@]} -eq 0 ]]; then
                echo "No Python installation found for version $python_version"
                exit 1
              elif [[ ${#matches[@]} -gt 1 ]]; then
                echo "Multiple Python installations found for version $python_version:"
                printf "  %s\n" "${matches[@]}"
                echo "Using the first one: ${matches[0]}"
              fi

              PY_PATH="${matches[0]}"

              echo "$PY_PATH"
              export PATH="$PY_PATH:$PATH"
              echo $PY_PATH
              export PATH=$PY_PATH:$PATH
              which python3
              python3 --version

              python3 -m venv my

              source ./my/bin/activate

              python3 -m pip install setuptools wheel twine

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
              echo "SHERPA_ONNX_ALSA_LIB_DIR: $SHERPA_ONNX_ALSA_LIB_DIR"

              export LD_LIBRARY_PATH=$PWD/build/bdist.linux-aarch64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR:$LD_LIBRARY_PATH
              export LIBRARY_PATH=$PWD/build/bdist.linux-aarch64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR:$LIBRARY_PATH

              echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
              echo "LIBRARY_PATH: $LIBRARY_PATH"

              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              export SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
              export SHERPA_ONNX_ENABLE_ALSA=1
              export SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_C_FLAGS=\"-march=armv7-a -mfloat-abi=hard -mfpu=neon\" -DCMAKE_CXX_FLAGS=\"-march=armv7-a -mfloat-abi=hard -mfpu=neon\" -DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=ON -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF -DALSA_INCLUDE_DIR=$p/alsa-lib/include -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so"
              python3 setup.py bdist_wheel
              ls -lh dist

              mkdir wheelhouse
              cp -v dist/* wheelhouse/
            '

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }}-linux-armv7l
          path: ./wheelhouse/*.whl

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/

      - name: Show wheels
        shell: bash
        run: |
          ls -lh wheelhouse/*.whl

          unzip -l wheelhouse/*.whl

          echo "---"

          mkdir t
          cp wheelhouse/*.whl ./t
          cd ./t
          unzip ./*.whl
          ls -lh
          echo "---"

          readelf -d sherpa_onnx/lib/*.so

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload ./wheelhouse/*.whl


================================================
FILE: .github/workflows/build-wheels-linux-cuda.yaml
================================================
name: build-wheels-linux-cuda

on:
  push:
    branches:
      - wheel
  workflow_dispatch:

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-linux-cuda-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build_wheels_linux_cuda:
    name: ${{ matrix.manylinux }} ${{ matrix.python-version }} ${{ matrix.onnxruntime_version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-22.04]
        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
        onnxruntime_version: ["1.17.1", "1.23.2"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          if [[ ${{ matrix.python-version }} == "3.7" ]]; then
            pip install -U pip wheel setuptools twine
          else
            pip install -U pip wheel setuptools twine==5.0.0
          fi

      - name: Build alsa-lib
        shell: bash
        run: |
          git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
          cd alsa-lib
          ./gitcompile

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
          export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
          export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
          export LD_LIBRARY_PATH=$SHERPA_ONNX_ALSA_LIB_DIR:$LD_LIBRARY_PATH

          echo "CPLUS_INCLUDE_PATH: $CPLUS_INCLUDE_PATH"
          ls -lh $PWD/alsa-lib/include
          echo "---"
          ls -lh $PWD/alsa-lib/src/.libs

          p=$PWD

          export SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
          export SHERPA_ONNX_ENABLE_ALSA=1
          export SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_GPU=ON"

          onnxruntime_version=${{ matrix.onnxruntime_version }}
          curl -SL -O https://github.com/csukuangfj/onnxruntime-libs/releases/download/v$onnxruntime_version/onnxruntime-linux-x64-gpu-$onnxruntime_version-patched.zip
          unzip  onnxruntime-linux-x64-gpu-$onnxruntime_version-patched.zip

          export SHERPA_ONNXRUNTIME_LIB_DIR=$PWD/onnxruntime-linux-x64-gpu-$onnxruntime_version-patched/lib
          export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$PWD/onnxruntime-linux-x64-gpu-$onnxruntime_version-patched/include

          if [[ $onnxruntime_version == "1.23.2" ]]; then
            export SHERPA_ONNX_CUDA_VERSION="12.cudnn9"
          fi

          python3 setup.py bdist_wheel

          ls -lh dist

          mv dist wheelhouse

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/

          unzip -l ./wheelhouse/*.whl

      - name: Install patchelf
        shell: bash
        run: |
          sudo apt-get update -q
          sudo apt-get install -q -y patchelf
          patchelf --help

      - name: Patch wheels
        shell: bash
        run: |
          mkdir ./wheels
          sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels

          ls -lh ./wheels/
          rm -rf ./wheelhouse
          mv ./wheels ./wheelhouse


      - uses: actions/upload-artifact@v4
        with:
          name: wheel-cuda-${{ matrix.python-version }}-${{ matrix.onnxruntime_version }}
          path: ./wheelhouse/*.whl

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cuda/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main


================================================
FILE: .github/workflows/build-wheels-linux.yaml
================================================
name: build-wheels-linux

on:
  push:
    branches:
      - wheel
  workflow_dispatch:
    inputs:
      publish_sherpa_onnx_bin:
        description: "Publish sherpa-onnx-bin"
        required: false
        default: "true"
        type: boolean

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-linux-${{ github.ref }}
  cancel-in-progress: true

jobs:
  core:
    name: core
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh
          du -h -d1 .

      - name: Build sherpa-onnx (docker manually)
        shell: bash
        run: |
          docker run --rm \
            -v ${{ github.workspace }}:/workspace \
            -w /workspace \
            quay.io/pypa/manylinux2014_x86_64 \
            bash -c '
              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              cd /workspace

              echo pwd
              echo $PWD

              find /opt -name "python*"

              echo "--------------------"
              PY_PATH=$(echo /opt/_internal/cpython-3.10*/bin)
              echo "PY_PATH: $PY_PATH"

              export PATH=$PY_PATH:$PATH

              echo "path $PATH"

              which python3
              python3 --version

              python3 -m venv my

              source ./my/bin/activate

              python3 -m pip install setuptools wheel twine

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              mkdir build
              pushd build

              cmake \
                -D SHERPA_ONNX_ENABLE_TTS=ON \
                -D CMAKE_BUILD_TYPE=Release \
                -D BUILD_SHARED_LIBS=ON \
                -D SHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
                -D CMAKE_INSTALL_PREFIX=./install \
                ..

              make -j2
              make install

              ls -lh lib
              ls -lh bin

              echo "----"
              ls -lh install/lib

              rm -fv install/lib/libcargs.so

              echo "----"
              ls -lh install/bin

              echo sherpa-onnx-core
              mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
              cp -v ./install/lib/lib*.so ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib

              mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api
              cp -v ./install/include/sherpa-onnx/c-api/*.h ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api

              pushd ../scripts/wheel/sherpa-onnx-core
              python3 setup.py bdist_wheel --plat-name=manylinux2014_x86_64

              ls -lh dist
              unzip -l dist/*.whl

              popd

              echo "sherpa-onnx-bin"

              mkdir -p ../scripts/wheel/sherpa-onnx-bin/bin
              cp -v ./install/bin/sherpa-onnx* ../scripts/wheel/sherpa-onnx-bin/bin

              pushd ../scripts/wheel/sherpa-onnx-bin
              python3 setup.py bdist_wheel --plat-name=manylinux2014_x86_64

              ls -lh dist
              unzip -l dist/*.whl

              popd
            '

      - name: Collect wheels
        shell: bash
        run: |
          sudo chown -R $USER ./scripts/wheel
          mkdir wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-core/dist/*.whl ./wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-bin/dist/*.whl ./wheelhouse

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-linux-x64
          path: ./wheelhouse/*.whl

      - name: Show wheels
        shell: bash
        run: |
          sudo chown -R $USER ./scripts/wheel
          ls -lh ./scripts/wheel/sherpa-onnx-core/dist
          ls -lh ./scripts/wheel/sherpa-onnx-bin/dist

          unzip -l ./scripts/wheel/sherpa-onnx-core/dist/*.whl
          echo "---"
          unzip -l ./scripts/wheel/sherpa-onnx-bin/dist/*.whl

      - name: Install patchelf
        shell: bash
        run: |
          sudo apt-get update -q
          sudo apt-get install -q -y patchelf
          patchelf --help

      - name: Patch wheels
        shell: bash
        run: |
          mkdir ./wheels
          sudo ./scripts/wheel/patch_wheel.py --in-dir ./wheelhouse --out-dir ./wheels

          ls -lh ./wheels/
          rm -rf ./wheelhouse
          mv ./wheels ./wheelhouse

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-linux-x64-patched
          path: ./wheelhouse/*.whl

  test:
    name: test
    needs: [core]
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Retrieve artifact from Linux x64
        uses: actions/download-artifact@v4
        with:
          name: wheels-core-linux-x64-patched
          path: /tmp/wheels

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Show
        shell: bash
        run: |
          ls -lh /tmp/wheels

      - name: Install
        shell: bash
        run: |
          python3 -m pip install /tmp/wheels/*.whl

      - name: Show version
        shell: bash
        run: |
          sherpa-onnx-version

      - name: Show help
        shell: bash
        run: |
          sherpa-onnx --help

          echo "---"

          ls -lh $(which sherpa-onnx)
          file $(which sherpa-onnx)
          readelf -d $(which sherpa-onnx)

          ldd $(which sherpa-onnx)

          sherpa-onnx-offline --help

          echo "---"

          sherpa-onnx-vad --help

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v /tmp/wheels/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI ${{ github.event.inputs.publish_sherpa_onnx_bin }}
        if: ${{ (github.event.inputs.publish_sherpa_onnx_bin || 'true') == 'true' }}
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload /tmp/wheels/*.whl

  build_wheels_linux:
    needs: [core, test]
    name: ${{ matrix.manylinux }} ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313", "cp314"]
        manylinux: [manylinux2014] #, manylinux_2_28]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
      # for a list of versions
      - name: Build wheels
        uses: pypa/cibuildwheel@v3.3.1
        env:
          CIBW_BEFORE_ALL: |
            git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
            cd alsa-lib
            ./gitcompile
            cd ..
            echo "PWD"
            ls -lh /project/alsa-lib/src/.libs

          CIBW_ENVIRONMENT: >
            SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON
            CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH
            C_INCLUDE_PATH=/project/alsa-lib/include:$C_INCLUDE_PATH
            SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs
            LD_LIBRARY_PATH=/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR
            SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
            SHERPA_ONNX_ENABLE_ALSA=1
            SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF -DALSA_INCLUDE_DIR=$p/alsa-lib/include -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so"

          CIBW_BUILD: "${{ matrix.python-version}}-* "
          CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
          CIBW_BUILD_VERBOSITY: 3
          CIBW_MANYLINUX_X86_64_IMAGE: quay.io/pypa/${{ matrix.manylinux }}_x86_64
          CIBW_REPAIR_WHEEL_COMMAND: >
            auditwheel repair -w {dest_dir}
            --exclude libonnxruntime.so
            {wheel}

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}-${{ matrix.manylinux }}
          path: ./wheelhouse/*.whl

      - name: Show wheels
        shell: bash
        run: |
          ls -lh wheelhouse/*.whl
          unzip -l wheelhouse/*.whl

          echo "---"

          mkdir t
          cp wheelhouse/*.whl ./t
          cd ./t
          unzip ./*.whl
          ls -lh
          echo "---"

          readelf -d sherpa_onnx/lib/*.so

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload ./wheelhouse/*.whl

      - name: Build sdist
        if: matrix.python-version == 'cp38' && matrix.manylinux == 'manylinux2014'
        shell: bash
        run: |
          python3 setup.py sdist
          ls -l dist/*

      - name: Publish sdist to PyPI
        if: matrix.python-version == 'cp38' && matrix.manylinux == 'manylinux2014'
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          twine upload dist/sherpa*.tar.gz


================================================
FILE: .github/workflows/build-wheels-macos-arm64.yaml
================================================
name: build-wheels-macos-arm64

on:
  push:
    branches:
      - wheel
  workflow_dispatch:
    inputs:
      publish_sherpa_onnx_bin:
        description: "Publish sherpa-onnx-bin"
        required: false
        default: "true"
        type: boolean

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-macos-arm64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  core:
    runs-on: ${{ matrix.os }}
    name: core
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Install deps
        shell: bash
        run: |
          python3 -m pip install setuptools wheel twine

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: macos-latest-sherpa-onnx-core-arm64

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -DSHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON \
            -D BUILD_SHARED_LIBS=ON \
            -D SHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
            -D CMAKE_BUILD_TYPE=Release \
            -D CMAKE_OSX_ARCHITECTURES='arm64' \
            -D CMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx for macos
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          make -j2
          make install

          ls -lh lib
          ls -lh bin

          file ./bin/sherpa-onnx

          rm -fv ./install/include/cargs.h
          rm -fv ./install/lib/cargs.h
          rm -fv ./install/lib/libcargs.dylib
          rm -fv ./install/lib/libcargs.a
          rm -rfv ./install/lib/pkgconfig

      - name: Copy files
        shell: bash
        run: |
          echo 'sherpa-onnx-core'
          mkdir -p scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          cp -v ./build/install/lib/lib* ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib

          mkdir -p ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api
          cp -v ./build/install/include/sherpa-onnx/c-api/*.h ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api

          echo 'sherpa-onnx-bin'

          mkdir -p ./scripts/wheel/sherpa-onnx-bin/bin
          cp -v ./build/install/bin/sherpa-onnx* ./scripts/wheel/sherpa-onnx-bin/bin

      - name: Build sherpa-onnx-core
        shell: bash
        run: |
          pushd ./scripts/wheel/sherpa-onnx-core
          python3 setup.py bdist_wheel --plat-name=macosx_11_0_arm64

          ls -lh dist
          unzip -l dist/*.whl

          popd

      - name: Build sherpa-onnx-bin
        shell: bash
        run: |
          pushd ./scripts/wheel/sherpa-onnx-bin
          python3 setup.py bdist_wheel --plat-name=macosx_11_0_arm64

          ls -lh dist
          unzip -l dist/*.whl

          popd

      - name: Collect wheels
        shell: bash
        run: |
          cp -v ./scripts/wheel/sherpa-onnx-core/dist/*.whl .
          cp -v ./scripts/wheel/sherpa-onnx-bin/dist/*.whl .

          ls -lh *.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-macos-arm64
          path: ./*.whl

  test:
    name: test
    needs: [core]
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Retrieve artifact from macos arm64
        uses: actions/download-artifact@v4
        with:
          name: wheels-core-macos-arm64
          path: /tmp/wheels

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Show
        shell: bash
        run: |
          ls -lh /tmp/wheels

      - name: Install
        shell: bash
        run: |
          python3 -m pip install /tmp/wheels/*.whl

      - name: Show version
        shell: bash
        run: |
          sherpa-onnx-version

      - name: Show help
        shell: bash
        run: |
          sherpa-onnx --help

          ls -lh $(which sherpa-onnx)
          file $(which sherpa-onnx)

          otool -L $(which sherpa-onnx)
          otool -l $(which sherpa-onnx)

          echo "---"

          sherpa-onnx-offline --help

          echo "---"

          sherpa-onnx-vad --help

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v /tmp/wheels/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI ${{ github.event.inputs.publish_sherpa_onnx_bin }}
        if: ${{ (github.event.inputs.publish_sherpa_onnx_bin || 'true') == 'true' }}
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          opts='--break-system-packages'

          python3 -m pip install $opts wheel twine==5.0.0 setuptools

          twine upload /tmp/wheels/*.whl

  build_wheels_macos_arm64:
    needs: [core, test]
    name: ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313", "cp314"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Build wheels
        uses: pypa/cibuildwheel@v3.3.1
        env:
          CIBW_BUILD: "${{ matrix.python-version}}-* "
          CIBW_ENVIRONMENT: >
            SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON
            SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64' -DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF"
          CIBW_ARCHS: "arm64"
          CIBW_BUILD_VERBOSITY: 3

          #  Don't repair macOS wheels
          CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
          unzip -l ./wheelhouse/*.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}
          path: ./wheelhouse/*.whl

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        run: |
          opts='--break-system-packages'

          python3 -m pip install $opts wheel twine==5.0.0 setuptools

          twine upload ./wheelhouse/*.whl


================================================
FILE: .github/workflows/build-wheels-macos-universal2.yaml
================================================
name: build-wheels-macos-universal2

on:
  push:
    branches:
      - wheel
  workflow_dispatch:
    inputs:
      publish_sherpa_onnx_bin:
        description: "Publish sherpa-onnx-bin"
        required: false
        default: "true"
        type: boolean

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-macos-universal2-${{ github.ref }}
  cancel-in-progress: true

jobs:
  core:
    runs-on: ${{ matrix.os }}
    name: core
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Install deps
        shell: bash
        run: |
          python3 -m pip install setuptools wheel twine

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: macos-latest-sherpa-onnx-core-universal2

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -DSHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON \
            -D BUILD_SHARED_LIBS=ON \
            -D SHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
            -D CMAKE_BUILD_TYPE=Release \
            -D CMAKE_OSX_ARCHITECTURES='arm64;x86_64' \
            -D CMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx for macos
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          make -j2
          make install

          ls -lh lib
          ls -lh bin

          file ./bin/sherpa-onnx

          rm -fv ./install/include/cargs.h
          rm -fv ./install/lib/cargs.h
          rm -fv ./install/lib/libcargs.dylib
          rm -fv ./install/lib/libcargs.a
          rm -rfv ./install/lib/pkgconfig

      - name: Copy files
        shell: bash
        run: |
          echo 'sherpa-onnx-core'
          mkdir -p scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          cp -v ./build/install/lib/lib* ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib

          mkdir -p ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api
          cp -v ./build/install/include/sherpa-onnx/c-api/*.h ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api

          echo 'sherpa-onnx-bin'

          mkdir -p ./scripts/wheel/sherpa-onnx-bin/bin
          cp -v ./build/install/bin/sherpa-onnx* ./scripts/wheel/sherpa-onnx-bin/bin

      - name: Build sherpa-onnx-core
        shell: bash
        run: |
          pushd ./scripts/wheel/sherpa-onnx-core
          python3 setup.py bdist_wheel --plat-name=macosx_10_15_universal2

          ls -lh dist
          unzip -l dist/*.whl

          popd

      - name: Build sherpa-onnx-bin
        shell: bash
        run: |
          pushd ./scripts/wheel/sherpa-onnx-bin
          python3 setup.py bdist_wheel --plat-name=macosx_10_15_universal2

          ls -lh dist
          unzip -l dist/*.whl

          popd

      - name: Collect wheels
        shell: bash
        run: |
          cp -v ./scripts/wheel/sherpa-onnx-core/dist/*.whl .
          cp -v ./scripts/wheel/sherpa-onnx-bin/dist/*.whl .

          ls -lh *.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-macos-universal
          path: ./*.whl

  test:
    name: test ${{ matrix.os }}
    needs: [core]
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest, macos-15-intel]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Retrieve artifact from macos universal
        uses: actions/download-artifact@v4
        with:
          name: wheels-core-macos-universal
          path: /tmp/wheels

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Show
        shell: bash
        run: |
          ls -lh /tmp/wheels

      - name: Install
        shell: bash
        run: |
          python3 -m pip install /tmp/wheels/*.whl

      - name: Show version
        shell: bash
        run: |
          sherpa-onnx-version

      - name: Show help
        shell: bash
        run: |
          sherpa-onnx --help

          ls -lh $(which sherpa-onnx)
          file $(which sherpa-onnx)

          otool -L $(which sherpa-onnx)
          otool -l $(which sherpa-onnx)

          echo "---"

          sherpa-onnx-offline --help

          echo "---"

          sherpa-onnx-vad --help

      - name: Publish to huggingface
        if: matrix.os == 'macos-latest'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v /tmp/wheels/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI ${{ github.event.inputs.publish_sherpa_onnx_bin }}
        if: ${{ matrix.os == 'macos-latest' && (github.event.inputs.publish_sherpa_onnx_bin || 'true') == 'true' }}
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          opts='--break-system-packages'

          python3 -m pip install $opts wheel twine==5.0.0 setuptools

          twine upload /tmp/wheels/*.whl

  build_wheels_macos_universal2:
    needs: [core, test]
    name: ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313", "cp314"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set macOS deployment target
        run: echo "MACOSX_DEPLOYMENT_TARGET=10.15" >> $GITHUB_ENV

      - name: Build wheels
        uses: pypa/cibuildwheel@v3.3.1
        env:
          CIBW_BUILD: "${{ matrix.python-version}}-* "
          CIBW_ENVIRONMENT: >
            MACOSX_DEPLOYMENT_TARGET=10.15
            SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON
            SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DSHERPA_ONNX_ENABLE_BINARY=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET='10.15' -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF"
          CIBW_ARCHS: "universal2"
          CIBW_BUILD_VERBOSITY: 3

          #  Don't repair macOS wheels
          CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
          unzip -l ./wheelhouse/*.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}
          path: ./wheelhouse/*.whl

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        run: |
          opts='--break-system-packages'

          python3 -m pip install $opts wheel twine==5.0.0 setuptools

          twine upload ./wheelhouse/*.whl


================================================
FILE: .github/workflows/build-wheels-macos-x64.yaml
================================================
name: build-wheels-macos-x64

on:
  push:
    branches:
      - wheel
  workflow_dispatch:
    inputs:
      publish_sherpa_onnx_bin:
        description: "Publish sherpa-onnx-bin"
        required: false
        default: "true"
        type: boolean

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-macos-x64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  core:
    runs-on: ${{ matrix.os }}
    name: core
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Install deps
        shell: bash
        run: |
          python3 -m pip install setuptools wheel twine

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: macos-latest-sherpa-onnx-core-x64

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -DSHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON \
            -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 \
            -D BUILD_SHARED_LIBS=ON \
            -D SHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
            -D CMAKE_BUILD_TYPE=Release \
            -D CMAKE_OSX_ARCHITECTURES='x86_64' \
            -D CMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx for macos
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          make -j2
          make install

          ls -lh lib
          ls -lh bin

          file ./bin/sherpa-onnx

          rm -fv ./install/include/cargs.h
          rm -fv ./install/lib/cargs.h
          rm -fv ./install/lib/libcargs.dylib
          rm -fv ./install/lib/libcargs.a
          rm -rfv ./install/lib/pkgconfig

      - name: Copy files
        shell: bash
        run: |
          echo 'sherpa-onnx-core'
          mkdir -p scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          cp -v ./build/install/lib/lib* ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib

          mkdir -p ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api
          cp -v ./build/install/include/sherpa-onnx/c-api/*.h ./scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api

          echo 'sherpa-onnx-bin'

          mkdir -p ./scripts/wheel/sherpa-onnx-bin/bin
          cp -v ./build/install/bin/sherpa-onnx* ./scripts/wheel/sherpa-onnx-bin/bin

      - name: Build sherpa-onnx-core
        shell: bash
        run: |
          pushd ./scripts/wheel/sherpa-onnx-core
          python3 setup.py bdist_wheel --plat-name=macosx_10_15_x86_64

          ls -lh dist
          unzip -l dist/*.whl

          popd

      - name: Build sherpa-onnx-bin
        shell: bash
        run: |
          pushd ./scripts/wheel/sherpa-onnx-bin
          python3 setup.py bdist_wheel --plat-name=macosx_10_15_x86_64

          ls -lh dist
          unzip -l dist/*.whl

          popd

      - name: Collect wheels
        shell: bash
        run: |
          cp -v ./scripts/wheel/sherpa-onnx-core/dist/*.whl .
          cp -v ./scripts/wheel/sherpa-onnx-bin/dist/*.whl .

          ls -lh *.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-macos-x64
          path: ./*.whl

  test:
    name: test
    needs: [core]
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-15-intel]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Retrieve artifact from macos x64
        uses: actions/download-artifact@v4
        with:
          name: wheels-core-macos-x64
          path: /tmp/wheels

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Show
        shell: bash
        run: |
          ls -lh /tmp/wheels

      - name: Install
        shell: bash
        run: |
          python3 -m pip install /tmp/wheels/*.whl

      - name: Show version
        shell: bash
        run: |
          sherpa-onnx-version

      - name: Show help
        shell: bash
        run: |
          sherpa-onnx --help

          ls -lh $(which sherpa-onnx)
          file $(which sherpa-onnx)
          otool -L $(which sherpa-onnx)
          otool -l $(which sherpa-onnx)

          echo "---"

          sherpa-onnx-offline --help

          echo "---"

          sherpa-onnx-vad --help

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v /tmp/wheels/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI ${{ github.event.inputs.publish_sherpa_onnx_bin }}
        if: ${{ (github.event.inputs.publish_sherpa_onnx_bin || 'true') == 'true' }}
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload /tmp/wheels/*.whl

  build_wheels_macos_x64:
    needs: [core, test]
    name: ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313", "cp314"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set macOS deployment target
        run: echo "MACOSX_DEPLOYMENT_TARGET=10.15" >> $GITHUB_ENV

      - name: Build wheels
        uses: pypa/cibuildwheel@v3.3.1
        env:
          CIBW_BUILD: "${{ matrix.python-version}}-* "
          CIBW_ENVIRONMENT: >
            MACOSX_DEPLOYMENT_TARGET=10.15
            SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON
            SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='x86_64' -DSHERPA_ONNX_ENABLE_BINARY=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET='10.15' -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF"

          CIBW_ARCHS: "x86_64"
          CIBW_BUILD_VERBOSITY: 3

          #  Don't repair macOS wheels
          CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
          unzip -l ./wheelhouse/*.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-macos-x64-${{ matrix.python-version }}
          path: ./wheelhouse/*.whl

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        run: |
          opts='--break-system-packages'

          python3 -m pip install $opts wheel twine==5.0.0 setuptools

          twine upload ./wheelhouse/*.whl


================================================
FILE: .github/workflows/build-wheels-win32.yaml
================================================
name: build-wheels-win32

on:
  push:
    branches:
      - wheel
  workflow_dispatch:
    inputs:
      publish_sherpa_onnx_bin:
        description: "Publish sherpa-onnx-bin"
        required: false
        default: "true"
        type: boolean

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-win32-${{ github.ref }}
  cancel-in-progress: true

jobs:
  core:
    name: core
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install wheel twine==5.0.0 setuptools

      - name: Install sccache
        run: choco install sccache -y

      - name: Cache sccache
        uses: actions/cache@v3
        with:
          path: C:\Users\runneradmin\AppData\Local\Mozilla\sccache
          key: ${{ matrix.os }}-sccache-core-win32
          restore-keys: |
            ${{ matrix.os }}-sccache-core-win32

      - name: Configure CMake
        shell: bash
        run: |
          mkdir build
          cd build
          cmake \
            -D CMAKE_C_COMPILER_LAUNCHER=sccache \
            -D CMAKE_CXX_COMPILER_LAUNCHER=sccache \
            -A Win32 \
            -D CMAKE_BUILD_TYPE=Release \
            -D BUILD_SHARED_LIBS=ON \
            -D SHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
            -DCMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx for windows
        shell: bash
        run: |
          cd build
          cmake --build . --config Release  -- -m:2
          cmake --build . --config Release --target install -- -m:2

          ls -lh ./bin/Release/sherpa-onnx.exe

      - name: Show sccache stats
        run: sccache --show-stats

      - name: Show
        shell: bash
        run: |
          echo "---bin---"
          ls -lh build/install/bin
          echo "---lib---"
          ls -lh build/install/lib
          echo "---include---"
          ls -lh build/install/include

      - name: Copy files
        shell: bash
        run: |
          cd build
          echo 'sherpa-onnx-core'
          mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          cp -v ./install/lib/onnxruntime.dll ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          cp -v ./install/lib/sherpa-onnx-*.dll ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          # keep the *.lib file so users can write code to link with our dll
          cp -v ./install/lib/sherpa-onnx-*.lib ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib

          mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api
          cp -v ./install/include/sherpa-onnx/c-api/*.h ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api

          pushd ../scripts/wheel/sherpa-onnx-core
          python3 setup.py bdist_wheel --plat-name=win32

          ls -lh dist

          popd

          echo 'sherpa-onnx-bin'

          mkdir -p ../scripts/wheel/sherpa-onnx-bin/bin
          cp -v ./install/bin/sherpa-onnx* ../scripts/wheel/sherpa-onnx-bin/bin

          pushd ../scripts/wheel/sherpa-onnx-bin
          python3 setup.py bdist_wheel --plat-name=win32

          ls -lh dist

          popd

      - name: Collect wheels
        shell: bash
        run: |
          mkdir wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-core/dist/*.whl ./wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-bin/dist/*.whl ./wheelhouse

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-win-x86
          path: ./wheelhouse/*.whl

      - name: Show wheels
        shell: bash
        run: |
          ls -lh ./scripts/wheel/sherpa-onnx-core/dist
          ls -lh ./scripts/wheel/sherpa-onnx-bin/dist

          unzip -l ./scripts/wheel/sherpa-onnx-core/dist/*.whl
          echo "---"
          unzip -l ./scripts/wheel/sherpa-onnx-bin/dist/*.whl

  test:
    name: test
    needs: [core]
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Retrieve artifact from Windows x86
        uses: actions/download-artifact@v4
        with:
          name: wheels-core-win-x86
          path: /tmp/wheels

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"
          architecture: x86

      - name: Show
        shell: bash
        run: |
          ls -lh /d/tmp/wheels

      - name: Install
        shell: bash
        run: |
          python3 -m pip install /d/tmp/wheels/*.whl

      - name: Show version
        shell: bash
        run: |
          sherpa-onnx-version

          which sherpa-onnx-version

      - name: Show help
        shell: bash
        run: |
          sherpa-onnx --help

          echo "---"

          sherpa-onnx-offline --help

          echo "---"

          sherpa-onnx-vad --help

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v /d/tmp/wheels/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI ${{ github.event.inputs.publish_sherpa_onnx_bin }}
        if: ${{ (github.event.inputs.publish_sherpa_onnx_bin || 'true') == 'true' }}
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload /d/tmp/wheels/*.whl

  build_wheels_win32:
    needs: [core, test]
    name: ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        python-version: ["cp38", "cp39", "cp310", "cp311", "cp312", "cp313", "cp314"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
      # for a list of versions
      - name: Build wheels (cibuildwheel)
        uses: pypa/cibuildwheel@v3.1.4
        env:
          CIBW_BUILD: "${{ matrix.python-version}}-* "
          CIBW_SKIP: "*-win_amd64"
          CIBW_BUILD_VERBOSITY: 3
          CIBW_ENVIRONMENT: >
            SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON
            SHERPA_ONNX_CMAKE_ARGS="-A Win32 -DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF"

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/

          unzip -l ./wheelhouse/*.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}
          path: ./wheelhouse/*.whl

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload ./wheelhouse/*.whl


================================================
FILE: .github/workflows/build-wheels-win64-cuda.yaml
================================================
name: build-wheels-win64-cuda

on:
  push:
    branches:
      - wheel
  workflow_dispatch:

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-win64-cuda-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build_wheels_win64_cuda:
    name: ${{ matrix.python-version }} ${{ matrix.onnxruntime_version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
        onnxruntime_version: ["1.17.1", "1.23.2"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Build wheels
        shell: bash
        run: |
          pip install setuptools wheel

          export SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_GPU=ON"

          onnxruntime_version=${{ matrix.onnxruntime_version }}
          curl -SL -O https://github.com/microsoft/onnxruntime/releases/download/v$onnxruntime_version/onnxruntime-win-x64-gpu-$onnxruntime_version.zip
          unzip onnxruntime-win-x64-gpu-$onnxruntime_version.zip

          export SHERPA_ONNXRUNTIME_LIB_DIR=$PWD/onnxruntime-win-x64-gpu-$onnxruntime_version/lib
          export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$PWD/onnxruntime-win-x64-gpu-$onnxruntime_version/include

          if [[ $onnxruntime_version == "1.23.2" ]]; then
            export SHERPA_ONNX_CUDA_VERSION="12.cudnn9"
          fi

          python3 setup.py bdist_wheel

          ls -lh ./dist/

          mv dist wheelhouse

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
          unzip -l ./wheelhouse/*.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}-${{ matrix.onnxruntime_version }}
          path: ./wheelhouse/*.whl

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cuda/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main


================================================
FILE: .github/workflows/build-wheels-win64.yaml
================================================
name: build-wheels-win64

on:
  push:
    branches:
      - wheel
  workflow_dispatch:
    inputs:
      publish_sherpa_onnx_bin:
        description: "Publish sherpa-onnx-bin"
        required: false
        default: "true"
        type: boolean

env:
  SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
  group: build-wheels-win64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  core:
    name: core
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install wheel twine==5.0.0 setuptools

      - name: Install sccache
        run: choco install sccache -y

      - name: Cache sccache
        uses: actions/cache@v3
        with:
          path: C:\Users\runneradmin\AppData\Local\Mozilla\sccache
          key: ${{ matrix.os }}-sccache-core
          restore-keys: |
            ${{ matrix.os }}-sccache-core-

      - name: Configure CMake
        shell: bash
        run: |
          mkdir build
          cd build
          cmake \
            -D CMAKE_C_COMPILER_LAUNCHER=sccache \
            -D CMAKE_CXX_COMPILER_LAUNCHER=sccache \
            -A x64 \
            -D SHERPA_ONNX_ENABLE_TTS=ON \
            -D CMAKE_BUILD_TYPE=Release \
            -D BUILD_SHARED_LIBS=ON \
            -D SHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
            -DCMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx for windows
        shell: bash
        run: |
          cd build
          cmake --build . --config Release  -- -m:2
          cmake --build . --config Release --target install -- -m:2

          ls -lh ./bin/Release/sherpa-onnx.exe

      - name: Show sccache stats
        run: sccache --show-stats

      - name: Show
        shell: bash
        run: |
          echo "---bin---"
          ls -lh build/install/bin
          echo "---lib---"
          ls -lh build/install/lib
          echo "---include---"
          ls -lh build/install/include

      - name: Copy files
        shell: bash
        run: |
          cd build
          echo 'sherpa-onnx-core'
          mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          cp -v ./install/lib/onnxruntime.dll ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          cp -v ./install/lib/sherpa-onnx-*.dll ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib
          # keep the *.lib file so users can write code to link with our dll
          cp -v ./install/lib/sherpa-onnx-*.lib ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/lib

          mkdir -p ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api
          cp -v ./install/include/sherpa-onnx/c-api/*.h ../scripts/wheel/sherpa-onnx-core/sherpa_onnx/include/sherpa-onnx/c-api

          pushd ../scripts/wheel/sherpa-onnx-core
          python3 setup.py bdist_wheel --plat-name=win_amd64

          ls -lh dist

          popd

          echo 'sherpa-onnx-bin'

          mkdir -p ../scripts/wheel/sherpa-onnx-bin/bin
          cp -v ./install/bin/sherpa-onnx* ../scripts/wheel/sherpa-onnx-bin/bin

          pushd ../scripts/wheel/sherpa-onnx-bin
          python3 setup.py bdist_wheel --plat-name=win_amd64

          ls -lh dist

          popd

      - name: Collect wheels
        shell: bash
        run: |
          mkdir wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-core/dist/*.whl ./wheelhouse
          cp -v ./scripts/wheel/sherpa-onnx-bin/dist/*.whl ./wheelhouse

      - uses: actions/upload-artifact@v4
        with:
          name: wheels-core-win-x64
          path: ./wheelhouse/*.whl

      - name: Show wheels
        shell: bash
        run: |
          ls -lh ./scripts/wheel/sherpa-onnx-core/dist
          ls -lh ./scripts/wheel/sherpa-onnx-bin/dist

          unzip -l ./scripts/wheel/sherpa-onnx-core/dist/*.whl
          echo "---"
          unzip -l ./scripts/wheel/sherpa-onnx-bin/dist/*.whl

  test:
    name: test
    needs: [core]
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Retrieve artifact from Windows x64
        uses: actions/download-artifact@v4
        with:
          name: wheels-core-win-x64
          path: /tmp/wheels

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Show
        shell: bash
        run: |
          ls -lh /d/tmp/wheels

      - name: Install
        shell: bash
        run: |
          python3 -m pip install /d/tmp/wheels/*.whl

      - name: Show version
        shell: bash
        run: |
          sherpa-onnx-version

          which sherpa-onnx-version

      - name: Show help
        shell: bash
        run: |
          sherpa-onnx --help

          echo "---"

          sherpa-onnx-offline --help

          echo "---"

          sherpa-onnx-vad --help

          which sherpa-onnx-vad

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v /d/tmp/wheels/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI ${{ github.event.inputs.publish_sherpa_onnx_bin }}
        if: ${{ (github.event.inputs.publish_sherpa_onnx_bin || 'true') == 'true' }}
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine==5.0.0 setuptools

          twine upload /d/tmp/wheels/*.whl

  build_wheels_win64:
    needs: [core, test]
    name: ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Build wheels (cmd)
        shell: bash
        run: |
          python3 -m pip install setuptools wheel twine

          export SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON

          export SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF"

          python3 setup.py bdist_wheel

          ls -lh ./dist/

          mv dist wheelhouse

      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
          unzip -l ./wheelhouse/*.whl

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.python-version }}
          path: ./wheelhouse/*.whl

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            d=cpu/$SHERPA_ONNX_VERSION

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            mkdir -p $d

            cp -v ../wheelhouse/*.whl $d/

            git status
            git add .
            git commit -m "add more wheels"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-wheels main

      - name: Publish wheels to PyPI
        shell: bash
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        run: |
          python3 -m pip install --upgrade pip
          if [[ ${{ matrix.python-version }} == "3.7" ]]; then
            python3 -m pip install wheel twine setuptools
          else
            python3 -m pip install wheel twine==5.0.0 setuptools
          fi

          twine upload ./wheelhouse/*.whl


================================================
FILE: .github/workflows/build-xcframework.yaml
================================================
name: build-xcframework

on:
  push:
    branches:
      - master
    paths:
      - './build-ios.sh'
      - '.github/workflows/build-xcframework.yaml'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: build-xcframework-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build_xcframework:
    name: tts-${{ matrix.with_tts }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        with_tts: [ON, OFF]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Build iOS shared
        if: matrix.with_tts == 'ON'
        shell: bash
        run: |
          export CMAKE_VERBOSE_MAKEFILE=ON
          ./build-ios-shared.sh

      - name: Build iOS
        if: matrix.with_tts == 'ON'
        shell: bash
        run: |
          ./build-ios.sh

      - name: Build iOS (No tts)
        if: matrix.with_tts == 'OFF'
        shell: bash
        run: |
          ./build-ios-no-tts.sh

      - name: Display artifacts
        if: matrix.with_tts == 'ON'
        shell: bash
        run: |
          brew install tree
          tree -L 2 ./build-ios

      - name: Display artifacts
        if: matrix.with_tts == 'OFF'
        shell: bash
        run: |
          brew install tree
          tree -L 2 ./build-ios-no-tts

      - name: Package artifacts
        if: matrix.with_tts == 'ON'
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          rm -rf build-ios/build
          rm -rf build-ios/install
          rm -rf build-ios/ios-onnxruntime/.git

          tree build-ios

          filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-ios.tar.bz2

          tar cjvf $filename ./build-ios

          ls -lh

      - name: Package artifacts
        if: matrix.with_tts == 'OFF'
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          rm -rf build-ios-no-tts/build
          rm -rf build-ios-no-tts/install
          rm -rf build-ios-no-tts/ios-onnxruntime/.git

          tree build-ios-no-tts

          filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-ios-no-tts.tar.bz2

          tar cjvf $filename ./build-ios-no-tts

          ls -lh

      - uses: actions/upload-artifact@v4
        if: matrix.with_tts == 'ON'
        with:
          name: sherpa-onnx-ios-libs
          path: ./build-ios

      - uses: actions/upload-artifact@v4
        if: matrix.with_tts == 'OFF'
        with:
          name: sherpa-onnx-ios-libs-no-tts
          path: ./build-ios-no-tts

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface

            cp -v ../sherpa-onnx-*.tar.bz2 ./

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-ios.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release xcframework
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2


================================================
FILE: .github/workflows/c-api-from-buffer.yaml
================================================
name: c-api-from-memory

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/c-api-from-buffer.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'c-api-examples/**'
      - 'ffmpeg-examples/**'

  workflow_dispatch:

concurrency:
  group: c-api-from-buffer-${{ github.ref }}
  cancel-in-progress: true

jobs:
  c_api_from_buffer:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-c-api-shared

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -D CMAKE_BUILD_TYPE=Release \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_INSTALL_PREFIX=./install \
            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
            ..

          make -j2 install

          ls -lh install/lib
          ls -lh install/include

          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
            ldd ./install/lib/libsherpa-onnx-c-api.so
            echo "---"
            readelf -d ./install/lib/libsherpa-onnx-c-api.so
          fi

          if [[ ${{ matrix.os }} == macos-latest ]]; then
            otool -L ./install/lib/libsherpa-onnx-c-api.dylib
          fi

      - name: Test streaming zipformer with tokens and hotwords loaded from buffers
        shell: bash
        run: |
          gcc -o streaming-zipformer-buffered-tokens-hotwords-c-api ./c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh streaming-zipformer-buffered-tokens-hotwords-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
            ldd ./streaming-zipformer-buffered-tokens-hotwords-c-api
            echo "----"
            readelf -d ./streaming-zipformer-buffered-tokens-hotwords-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
          rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
          curl -SL -O https://huggingface.co/desh2608/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-small/blob/main/data/lang_bpe_500/bpe.model
          cp bpe.model sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/
          rm bpe.model

          printf "▁A ▁T ▁P :1.5\n▁A ▁B ▁C :3.0" > hotwords.txt
          mv hotwords.txt ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17

          ls -lh sherpa-onnx-streaming-zipformer-en-20M-2023-02-17
          echo "---"
          ls -lh sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./streaming-zipformer-buffered-tokens-hotwords-c-api

          rm -rf sherpa-onnx-streaming-zipformer-*

      - name: Test streaming paraformer with tokens loaded from buffers
        shell: bash
        run: |
          gcc -o streaming-paraformer-buffered-tokens-c-api ./c-api-examples/streaming-paraformer-buffered-tokens-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh streaming-paraformer-buffered-tokens-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
            ldd ./streaming-paraformer-buffered-tokens-c-api
            echo "----"
            readelf -d ./streaming-paraformer-buffered-tokens-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
          tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
          rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

          ls -lh sherpa-onnx-streaming-paraformer-bilingual-zh-en
          echo "---"
          ls -lh sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./streaming-paraformer-buffered-tokens-c-api

          rm -rf sherpa-onnx-streaming-paraformer-*

      - name: Test streaming ctc with tokens loaded from buffers
        shell: bash
        run: |
          gcc -o streaming-ctc-buffered-tokens-c-api ./c-api-examples/streaming-ctc-buffered-tokens-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh streaming-ctc-buffered-tokens-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
            ldd ./streaming-ctc-buffered-tokens-c-api
            echo "----"
            readelf -d ./streaming-ctc-buffered-tokens-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
          rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2

          ls -lh sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
          echo "---"
          ls -lh sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./streaming-ctc-buffered-tokens-c-api

          rm -rf sherpa-onnx-streaming-ctc-*

      - name: Test keywords spotting with tokens and keywords loaded from buffers
        shell: bash
        run: |
          gcc -o keywords-spotter-buffered-tokens-keywords-c-api ./c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh keywords-spotter-buffered-tokens-keywords-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
            ldd ./keywords-spotter-buffered-tokens-keywords-c-api
            echo "----"
            readelf -d ./keywords-spotter-buffered-tokens-keywords-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
          tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
          rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2

          ls -lh sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile
          echo "---"
          ls -lh sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./keywords-spotter-buffered-tokens-keywords-c-api

          rm -rf sherpa-onnx-kws-zipformer-*


================================================
FILE: .github/workflows/c-api.yaml
================================================
name: c-api

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/c-api.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'c-api-examples/**'
      - 'ffmpeg-examples/**'

  workflow_dispatch:

concurrency:
  group: c-api-${{ github.ref }}
  cancel-in-progress: true

jobs:
  c_api:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, ubuntu-22.04-arm]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-c-api-shared

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -D CMAKE_BUILD_TYPE=Release \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_INSTALL_PREFIX=./install \
            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
            ..

          make -j2 install

          ls -lh install/lib
          ls -lh install/include

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./install/lib/libsherpa-onnx-c-api.so
            echo "---"
            readelf -d ./install/lib/libsherpa-onnx-c-api.so
          fi

          if [[ ${{ matrix.os }} == macos-latest ]]; then
            otool -L ./install/lib/libsherpa-onnx-c-api.dylib
          fi

      - name: Test Moonshine v2
        shell: bash
        run: |
          name=moonshine-v2-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
          tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
          rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-moonshine-*

      - name: Test FireRedASR CTC
        shell: bash
        run: |
          name=fire-red-asr-ctc-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
          tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
          rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-fire-*

      - name: Test online punctuation
        shell: bash
        run: |
          name=add-punctuation-online-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
          tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
          rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-online-punct-en-2024-08-06

      - name: Test PocketTTS
        shell: bash
        run: |
          name=pocket-tts-en-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
          tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
          rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-pocket-tts-int8-2026-01-26

      - uses: actions/upload-artifact@v4
        with:
          name: pocket-tts-wavs-${{ matrix.os }}
          path: ./generated-pocket-en.wav

      - name: Test SupertonicTTS
        shell: bash
        run: |
          name=supertonic-tts-en-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
          tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
          rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-supertonic-tts-int8-2026-03-06

          ls -lh ./generated-supertonic-en-c.wav

      - uses: actions/upload-artifact@v4
        with:
          name: supertonic-tts-wavs-${{ matrix.os }}
          path: ./generated-supertonic-en-c.wav

      - name: Test ZipVoiceTTS
        shell: bash
        run: |
          name=zipvoice-tts-zh-en-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
          tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
          rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
          rm -f vocos_24khz.onnx

          ls -lh ./generated-zipvoice-zh-en-c.wav

      - uses: actions/upload-artifact@v4
        with:
          name: zipvoice-tts-wavs-${{ matrix.os }}
          path: ./generated-zipvoice-zh-en-c.wav

      - name: Test FunASR Nano
        shell: bash
        run: |
          name=funasr-nano-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
          tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
          rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-funasr-*

      - name: Test MedASR CTC
        shell: bash
        run: |
          name=medasr-ctc-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
          tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
          rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-medasr-*

      - name: Test Omnilingual ASR CTC
        shell: bash
        run: |
          name=omnilingual-asr-ctc-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
          tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
          rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-omnilingual-*

      - name: Test Wenet CTC
        shell: bash
        run: |
          name=wenet-ctc-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
          tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
          rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-wenetspeech-*

      - name: Test T-one
        shell: bash
        run: |
          name=streaming-t-one-ctc-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
          tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
          rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08

      - name: Test KittenTTS
        shell: bash
        run: |
          name=kitten-tts-en-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
          tar xf kitten-nano-en-v0_1-fp16.tar.bz2
          rm kitten-nano-en-v0_1-fp16.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf kitten-nano-en-v0_1-fp16

      - uses: actions/upload-artifact@v4
        with:
          name: kitten-tts-wavs-${{ matrix.os }}
          path: ./generated-kitten-en.wav

      - name: Test streaming zipformer with homophone replacer
        shell: bash
        run: |
          name=streaming-zipformer-with-hr-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

          ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
          echo "---"

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
          tar xf dict.tar.bz2
          rm dict.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-streaming-zipformer-*
          rm -rf dict lexicon.txt test-hr.wav replace.fst
          rm -v $name

      - name: Test NeMo Canary
        shell: bash
        run: |
          name=nemo-canary-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
          tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
          rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8

      - name: Test Dolphin CTC
        shell: bash
        run: |
          name=dolphin-ctc-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
          tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
          rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm $name
          rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02

      - name: Test speech enhancement (GTCRN)
        shell: bash
        run: |
          name=speech-enhancement-gtcrn-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name
          rm -rf denoised-wavs
          mkdir -p denoised-wavs
          cp -v inp_16k.wav denoised-wavs
          cp -v enhanced.wav denoised-wavs/enhanced-gtcrn.wav
          rm -fv *.onnx enhanced.wav

          rm $name

      - name: Test speech enhancement (DPDFNet)
        shell: bash
        run: |
          name=speech-enhancement-dpdfnet-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          mkdir -p denoised-wavs
          cp -v enhanced.wav denoised-wavs/enhanced-dpdfnet.wav
          rm -fv *.onnx enhanced.wav

          rm $name

      - name: Test online speech enhancement (GTCRN)
        shell: bash
        run: |
          name=online-speech-enhancement-gtcrn-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name
          mkdir -p denoised-wavs
          cp -v enhanced-online-gtcrn.wav denoised-wavs/
          rm -fv *.onnx enhanced-online-gtcrn.wav

          rm $name

      - name: Test online speech enhancement (DPDFNet)
        shell: bash
        run: |
          name=online-speech-enhancement-dpdfnet-c-api
          gcc -o $name ./c-api-examples/$name.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name
          mkdir -p denoised-wavs
          cp -v enhanced-online-dpdfnet.wav denoised-wavs/
          rm -fv *.onnx enhanced-online-dpdfnet.wav

          rm $name

      - uses: actions/upload-artifact@v4
        with:
          name: denoised-wavs-${{ matrix.os }}
          path: ./denoised-wavs/*.wav

      - name: Test FireRedAsr
        shell: bash
        run: |
          gcc -o fire-red-asr-c-api ./c-api-examples/fire-red-asr-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh fire-red-asr-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./fire-red-asr-c-api
            echo "----"
            readelf -d ./fire-red-asr-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
          tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
          rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2

          ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
          echo "---"
          ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./fire-red-asr-c-api

          rm -rf sherpa-onnx-fire-red-asr-*

      - name: Test kws (zh)
        shell: bash
        run: |
          gcc -o kws-c-api ./c-api-examples/kws-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
          tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
          rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./kws-c-api

          rm ./kws-c-api
          rm -rf sherpa-onnx-kws-*

      - name: Test Kokoro TTS (zh+en)
        shell: bash
        run: |
          gcc -o kokoro-tts-zh-en-c-api ./c-api-examples/kokoro-tts-zh-en-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
          tar xf kokoro-multi-lang-v1_0.tar.bz2
          rm kokoro-multi-lang-v1_0.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./kokoro-tts-zh-en-c-api

          rm ./kokoro-tts-zh-en-c-api
          rm -rf kokoro-zh-en-*

      - name: Test Kokoro TTS (en)
        shell: bash
        run: |
          gcc -o kokoro-tts-en-c-api ./c-api-examples/kokoro-tts-en-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
          tar xf kokoro-en-v0_19.tar.bz2
          rm kokoro-en-v0_19.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./kokoro-tts-en-c-api

          rm ./kokoro-tts-en-c-api
          rm -rf kokoro-en-*

      - uses: actions/upload-artifact@v4
        with:
          name: kokoro-tts-${{ matrix.os }}
          path: ./generated-kokoro-*.wav

      - name: Test Matcha TTS (zh)
        shell: bash
        run: |
          gcc -o matcha-tts-zh-c-api ./c-api-examples/matcha-tts-zh-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
          tar xvf matcha-icefall-zh-baker.tar.bz2
          rm matcha-icefall-zh-baker.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./matcha-tts-zh-c-api

          rm ./matcha-tts-zh-c-api
          rm -rf matcha-icefall-*
          rm vocos-22khz-univ.onnx

      - name: Test Matcha TTS (en)
        shell: bash
        run: |
          gcc -o matcha-tts-en-c-api ./c-api-examples/matcha-tts-en-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
          tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
          rm matcha-icefall-en_US-ljspeech.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./matcha-tts-en-c-api

          rm ./matcha-tts-en-c-api
          rm -rf matcha-icefall-*
          rm vocos-22khz-univ.onnx

      - uses: actions/upload-artifact@v4
        with:
          name: matcha-tts-${{ matrix.os }}
          path: ./generated-matcha-*.wav

      - name: Test silero-vad + Whisper tiny.en
        shell: bash
        run: |
          gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          # Now download models
          #
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
          tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
          rm sherpa-onnx-whisper-tiny.en.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./vad-whisper-c-api

          rm -rf sherpa-onnx-*
          rm -rf *.onnx
          rm *.wav

      - name: Test ten-vad + Whisper tiny.en
        shell: bash
        run: |
          gcc -o vad-whisper-c-api ./c-api-examples/vad-whisper-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          # Now download models
          #
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
          tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
          rm sherpa-onnx-whisper-tiny.en.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./vad-whisper-c-api

          rm -rf sherpa-onnx-*
          rm -rf *.onnx
          rm *.wav

      - name: Test silero-vad + Moonshine
        shell: bash
        run: |
          gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          # Now download models
          #
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
          tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
          rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./vad-moonshine-c-api

          rm -rf sherpa-onnx-*
          rm -rf *.onnx
          rm *.wav

      - name: Test ten-vad + Moonshine
        shell: bash
        run: |
          gcc -o vad-moonshine-c-api ./c-api-examples/vad-moonshine-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          # Now download models
          #
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
          tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
          rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./vad-moonshine-c-api

          rm -rf sherpa-onnx-*
          rm -rf *.onnx
          rm *.wav

      - name: Test Moonshine
        shell: bash
        run: |
          gcc -o moonshine-c-api ./c-api-examples/moonshine-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
          tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
          rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./moonshine-c-api

          rm -rf sherpa-onnx-*

      - name: Test ffmpeg
        # if: matrix.os == 'macos-latest'
        if: false
        shell: bash
        run: |
          brew install ffmpeg

          cd ffmpeg-examples
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

          make
          ls -lh
          ./run.sh
          rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20

      - name: Test silero-vad + sense-voice
        shell: bash
        run: |
          gcc -o vad-sense-voice-c-api ./c-api-examples/vad-sense-voice-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh vad-sense-voice-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./vad-sense-voice-c-api
            echo "----"
            readelf -d ./vad-sense-voice-c-api
          fi

          # Now download models
          #
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
          tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
          rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

          ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
          echo "---"
          ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./vad-sense-voice-c-api

          rm -rf sherpa-onnx-sense-voice-*
          rm -rf *.onnx
          rm *.wav

      - name: Test ten-vad + sense-voice
        shell: bash
        run: |
          gcc -o vad-sense-voice-c-api ./c-api-examples/vad-sense-voice-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh vad-sense-voice-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./vad-sense-voice-c-api
            echo "----"
            readelf -d ./vad-sense-voice-c-api
          fi

          # Now download models
          #
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
          tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
          rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

          ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
          echo "---"
          ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./vad-sense-voice-c-api

          rm -rf sherpa-onnx-sense-voice-*
          rm -rf *.onnx
          rm *.wav

      - name: Test sense-voice
        shell: bash
        run: |
          gcc -o sense-voice-c-api ./c-api-examples/sense-voice-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh sense-voice-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./sense-voice-c-api
            echo "----"
            readelf -d ./sense-voice-c-api
          fi

          # Now download models
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
          tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
          rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

          ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
          echo "---"
          ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./sense-voice-c-api

          rm -rf sherpa-onnx-sense-voice-*

      - name: Test whisper
        shell: bash
        run: |
          gcc -o whisper-c-api ./c-api-examples/whisper-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh whisper-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./whisper-c-api
            echo "----"
            readelf -d ./whisper-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
          tar xvf sherpa-onnx-whisper-tiny.tar.bz2
          rm sherpa-onnx-whisper-tiny.tar.bz2

          ls -lh sherpa-onnx-whisper-tiny
          echo "---"
          ls -lh sherpa-onnx-whisper-tiny/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./whisper-c-api

          rm -rf sherpa-onnx-whisper-*

      - name: Test non-streaming zipformer
        shell: bash
        run: |
          gcc -o zipformer-c-api ./c-api-examples/zipformer-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh zipformer-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./zipformer-c-api
            echo "----"
            readelf -d ./zipformer-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-small-en-2023-06-26.tar.bz2
          tar xvf sherpa-onnx-zipformer-small-en-2023-06-26.tar.bz2
          rm sherpa-onnx-zipformer-small-en-2023-06-26.tar.bz2

          ls -lh sherpa-onnx-zipformer-small-en-2023-06-26
          echo "---"
          ls -lh sherpa-onnx-zipformer-small-en-2023-06-26/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./zipformer-c-api

          rm -rf sherpa-onnx-zipformer-*

      - name: Test streaming zipformer
        shell: bash
        run: |
          gcc -o streaming-zipformer-c-api ./c-api-examples/streaming-zipformer-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh streaming-zipformer-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./streaming-zipformer-c-api
            echo "----"
            readelf -d ./streaming-zipformer-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
          rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2

          ls -lh sherpa-onnx-streaming-zipformer-en-20M-2023-02-17
          echo "---"
          ls -lh sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./streaming-zipformer-c-api

          rm -rf sherpa-onnx-streaming-zipformer-*

      - name: Test non-streaming paraformer
        shell: bash
        run: |
          gcc -o paraformer-c-api ./c-api-examples/paraformer-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh paraformer-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./paraformer-c-api
            echo "----"
            readelf -d ./paraformer-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2
          tar xvf sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2
          rm sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2

          ls -lh sherpa-onnx-paraformer-zh-small-2024-03-09
          echo "---"
          ls -lh sherpa-onnx-paraformer-zh-small-2024-03-09/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./paraformer-c-api

          rm -rf sherpa-onnx-paraformer-*

      - name: Test streaming paraformer
        shell: bash
        run: |
          gcc -o streaming-paraformer-c-api ./c-api-examples/streaming-paraformer-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh streaming-paraformer-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./streaming-paraformer-c-api
            echo "----"
            readelf -d ./streaming-paraformer-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
          tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
          rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

          ls -lh sherpa-onnx-streaming-paraformer-bilingual-zh-en
          echo "---"
          ls -lh sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./streaming-paraformer-c-api

          rm -rf sherpa-onnx-streaming-paraformer-*

      - name: Test telespeech
        shell: bash
        run: |
          gcc -o telespeech-c-api ./c-api-examples/telespeech-c-api.c \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh telespeech-c-api

          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
            ldd ./telespeech-c-api
            echo "----"
            readelf -d ./telespeech-c-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
          tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
          rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2

          ls -lh sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04
          echo "---"
          ls -lh sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./telespeech-c-api

          rm -rf sherpa-onnx-telespeech-*


================================================
FILE: .github/workflows/checksum.yaml
================================================
name: Create checksum

on:
  schedule:
    - cron: "0 1 * * *" # Runs at 1:00 AM UTC daily
  workflow_dispatch:

jobs:
  checksum:
    if: github.repository_owner == 'k2-fsa'
    runs-on: macos-latest
    strategy:
      matrix:
        tag: [null, asr-models, tts-models, kws-models, speaker-recongition-models, audio-tagging-models, punctuation-models]
    steps:
      - name: Run checksum action
        uses: thewh1teagle/checksum@v1
        with:
          tag: ${{ matrix.tag }}
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/clang-tidy.yaml
================================================
name: clang-tidy

on:
  push:
    branches:
      - master
      - clang-tidy
    paths:
      - 'sherpa-onnx/csrc/**'


  workflow_dispatch:

concurrency:
  group: clang-tidy-${{ github.ref }}
  cancel-in-progress: true

jobs:
  clang-tidy:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.8]
      fail-fast: false

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install clang-tidy
        shell: bash
        run: |
          pip install clang-tidy

      - name: Configure
        shell: bash
        run: |
          mkdir build
          cd build
          cmake -DSHERPA_ONNX_ENABLE_PYTHON=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ..

      - name: Check with clang-tidy
        shell: bash
        run: |
          cd build
          make check


================================================
FILE: .github/workflows/cxx-api.yaml
================================================
name: cxx-api

on:
  push:
    branches:
      - master
      - cxx-api-asr-non-streaming
    paths:
      - '.github/workflows/cxx-api.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'cxx-api-examples/**'

  workflow_dispatch:

concurrency:
  group: cxx-api-${{ github.ref }}
  cancel-in-progress: true

jobs:
  cxx_api:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, ubuntu-22.04-arm]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-cxx-api-shared

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -D CMAKE_BUILD_TYPE=Release \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_INSTALL_PREFIX=./install \
            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
            ..

          make -j2 install

          ls -lh install/lib
          ls -lh install/include

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./install/lib/libsherpa-onnx-c-api.so
            ldd ./install/lib/libsherpa-onnx-cxx-api.so
            echo "---"
            readelf -d ./install/lib/libsherpa-onnx-c-api.so
            readelf -d ./install/lib/libsherpa-onnx-cxx-api.so
          fi

          if [[ ${{ matrix.os }} == macos-latest ]]; then
            otool -L ./install/lib/libsherpa-onnx-c-api.dylib
            otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
          fi

      - name: Test Moonshine v2
        shell: bash
        run: |
          name=moonshine-v2-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
          tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
          rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-moonshine-*
          rm -v ./$name

      - name: Test FireRedASR CTC
        shell: bash
        run: |
          name=fire-red-asr-ctc-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
          tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
          rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-fire-red-*
          rm -v ./$name

      - name: Test PocketTTS
        shell: bash
        run: |
          name=pocket-tts-en-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
          tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
          rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-pocket-tts-int8-2026-01-26
          rm -v ./$name

          ls -lh ./generated-pocket-en-cxx.wav

      - uses: actions/upload-artifact@v4
        with:
          name: pocket-tts-wavs-${{ matrix.os }}
          path: ./generated-pocket-en-cxx.wav

      - name: Test SupertonicTTS
        shell: bash
        run: |
          name=supertonic-tts-en-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
          tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
          rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-supertonic-tts-int8-2026-03-06
          rm -v ./$name

          ls -lh ./generated-supertonic-en-cxx.wav

      - uses: actions/upload-artifact@v4
        with:
          name: supertonic-tts-wavs-${{ matrix.os }}
          path: ./generated-supertonic-en-cxx.wav

      - name: Test ZipVoiceTTS
        shell: bash
        run: |
          name=zipvoice-tts-zh-en-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
          tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
          rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
          rm -f vocos_24khz.onnx
          rm -v ./$name

          ls -lh ./generated-zipvoice-zh-en-cxx.wav

      - uses: actions/upload-artifact@v4
        with:
          name: zipvoice-tts-wavs-${{ matrix.os }}
          path: ./generated-zipvoice-zh-en-cxx.wav

      - name: Test FunASR Nano
        shell: bash
        run: |
          name=funasr-nano-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
          tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
          rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-funasr-*
          rm -v ./$name

      - name: Test MedASR CTC
        shell: bash
        run: |
          name=medasr-ctc-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
          tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
          rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-medasr-*
          rm -v ./$name

      - name: Test Omnilingual ASR CTC
        shell: bash
        run: |
          name=omnilingual-asr-ctc-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
          tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
          rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-omnilingual-*
          rm -v ./$name

      - name: Test Online punctuation
        shell: bash
        run: |
          name=online-punctuation-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
          tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
          rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-online-punct-*
          rm -v ./$name

      - name: Test Offline punctuation
        shell: bash
        run: |
          name=offline-punctuation-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8.tar.bz2
          tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8.tar.bz2
          rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-punct-*
          rm -v ./$name

      - name: Test CED audio tagging
        shell: bash
        run: |
          name=audio-tagging-ced-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
          tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
          rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-ced-*
          rm -v ./$name

      - name: Test Zipformer audio tagging
        shell: bash
        run: |
          name=audio-tagging-zipformer-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
          tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
          rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-zipformer-*
          rm -v ./$name

      - name: Test Wenet CTC
        shell: bash
        run: |
          name=wenet-ctc-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
          tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
          rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-wenetspeech-*
          rm -v ./$name

      - name: Test T-one
        shell: bash
        run: |
          name=streaming-t-one-ctc-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
          tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
          rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08
          rm -v ./$name

      - name: Test KittenTTS
        shell: bash
        run: |
          name=kitten-tts-en-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ls -lh ./$name
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
          tar xf kitten-nano-en-v0_1-fp16.tar.bz2
          rm kitten-nano-en-v0_1-fp16.tar.bz2

          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf kitten-nano-en-v0_1-fp16
          rm -v ./$name

      - uses: actions/upload-artifact@v4
        with:
          name: kitten-tts-wavs-${{ matrix.os }}
          path: ./generated-kitten-en-cxx.wav

      - name: Test NeMo Canary
        shell: bash
        run: |
          name=nemo-canary-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
          tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
          rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2

          ls -lh sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
          echo "---"

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-nemo-canary-*
          rm -v ./$name

      - name: Test streaming zipformer with Homophone replacer
        shell: bash
        run: |
          name=streaming-zipformer-with-hr-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

          ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
          echo "---"

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
          tar xf dict.tar.bz2
          rm dict.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./$name

          rm -rf sherpa-onnx-streaming-zipformer-*
          rm -rf dict lexicon.txt test-hr.wav replace.fst
          rm -v ./$name

      - name: Test Dolphin CTC
        shell: bash
        run: |
          name=dolphin-ctc-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
          tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
          rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2

          ./$name

          rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02

          rm $name

      - name: Test silero-vad
        shell: bash
        run: |
          name=vad-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

          ./$name

          mkdir vad-test-silero-vad
          cp -v lei-jun-test*.wav vad-test-silero-vad

          ls -lh vad-test-silero-vad

          rm $name
          rm -fv *.onnx
          rm -fv *.wav

      - uses: actions/upload-artifact@v4
        with:
          name: silero-vad-test-wavs-cxx-${{ matrix.os }}
          path: ./vad-test-silero-vad/*.wav

      - name: Test ten-vad
        shell: bash
        run: |
          name=vad-cxx-api
          g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $name

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$name
            echo "----"
            readelf -d ./$name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx

          ./$name

          mkdir vad-test-ten-vad
          cp -v lei-jun-test*.wav vad-test-ten-vad

          ls -lh vad-test-ten-vad

          rm $name
          rm -fv *.onnx
          rm -rf *.wav

      - uses: actions/upload-artifact@v4
        with:
          name: ten-vad-test-wavs-cxx-${{ matrix.os }}
          path: ./vad-test-ten-vad/*.wav

      - name: Test Speech Enhancement
        shell: bash
        run: |
          gtcrn_name=speech-enhancement-gtcrn-cxx-api
          g++ -std=c++17 -o $gtcrn_name ./cxx-api-examples/$gtcrn_name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          dpdfnet_name=speech-enhancement-dpdfnet-cxx-api
          g++ -std=c++17 -o $dpdfnet_name ./cxx-api-examples/$dpdfnet_name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          online_gtcrn_name=online-speech-enhancement-gtcrn-cxx-api
          g++ -std=c++17 -o $online_gtcrn_name ./cxx-api-examples/$online_gtcrn_name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          online_dpdfnet_name=online-speech-enhancement-dpdfnet-cxx-api
          g++ -std=c++17 -o $online_dpdfnet_name ./cxx-api-examples/$online_dpdfnet_name.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh $gtcrn_name $dpdfnet_name $online_gtcrn_name $online_dpdfnet_name

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./$gtcrn_name
            echo "----"
            readelf -d ./$gtcrn_name
            echo "----"
            ldd ./$dpdfnet_name
            echo "----"
            readelf -d ./$dpdfnet_name
            echo "----"
            ldd ./$online_gtcrn_name
            echo "----"
            readelf -d ./$online_gtcrn_name
            echo "----"
            ldd ./$online_dpdfnet_name
            echo "----"
            readelf -d ./$online_dpdfnet_name
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav

          ./$gtcrn_name
          ./$dpdfnet_name
          ./$online_gtcrn_name
          ./$online_dpdfnet_name

          mkdir denoised-wavs
          cp -v inp_16k.wav denoised-wavs
          cp -v enhanced-gtcrn.wav denoised-wavs
          cp -v enhanced-dpdfnet.wav denoised-wavs
          cp -v enhanced-online-gtcrn.wav denoised-wavs
          cp -v enhanced-online-dpdfnet.wav denoised-wavs

          rm $gtcrn_name $dpdfnet_name $online_gtcrn_name $online_dpdfnet_name

      - uses: actions/upload-artifact@v4
        with:
          name: denoised-wavs-cxx-${{ matrix.os }}
          path: ./denoised-wavs/*.wav

      - name: Test FireRedAsr
        shell: bash
        run: |
          g++ -std=c++17 -o fire-red-asr-cxx-api ./cxx-api-examples/fire-red-asr-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh fire-red-asr-cxx-api

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./fire-red-asr-cxx-api
            echo "----"
            readelf -d ./fire-red-asr-cxx-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
          tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
          rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2

          ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
          echo "---"
          ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs

          ./fire-red-asr-cxx-api

          rm -rf sherpa-onnx-fire-red-asr-*

      - name: Test KWS (zh)
        shell: bash
        run: |
          g++ -std=c++17 -o kws-cxx-api ./cxx-api-examples/kws-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
          tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
          rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./kws-cxx-api

          rm kws-cxx-api
          rm -rf sherpa-onnx-kws-*

      - name: Test Kokoro TTS (zh+en)
        shell: bash
        run: |
          g++ -std=c++17 -o kokoro-tts-zh-en-cxx-api ./cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
          tar xf kokoro-multi-lang-v1_0.tar.bz2
          rm kokoro-multi-lang-v1_0.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./kokoro-tts-zh-en-cxx-api

          rm kokoro-tts-zh-en-cxx-api
          rm -rf kokoro-*

      - name: Test Kokoro TTS (en)
        shell: bash
        run: |
          g++ -std=c++17 -o kokoro-tts-en-cxx-api ./cxx-api-examples/kokoro-tts-en-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
          tar xf kokoro-en-v0_19.tar.bz2
          rm kokoro-en-v0_19.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./kokoro-tts-en-cxx-api

          rm kokoro-tts-en-cxx-api
          rm -rf kokoro-en-*

      - uses: actions/upload-artifact@v4
        with:
          name: kokoro-tts-${{ matrix.os }}
          path: ./generated-kokoro-*.wav

      - name: Test Matcha TTS (zh)
        shell: bash
        run: |
          g++ -std=c++17 -o matcha-tts-zh-cxx-api ./cxx-api-examples/matcha-tts-zh-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
          tar xvf matcha-icefall-zh-baker.tar.bz2
          rm matcha-icefall-zh-baker.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./matcha-tts-zh-cxx-api

          rm -rf matcha-icefall-*
          rm vocos-22khz-univ.onnx
          rm matcha-tts-zh-cxx-api

      - name: Test Matcha TTS (en)
        shell: bash
        run: |
          g++ -std=c++17 -o matcha-tts-en-cxx-api ./cxx-api-examples/matcha-tts-en-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
          tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
          rm matcha-icefall-en_US-ljspeech.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./matcha-tts-en-cxx-api

          rm matcha-tts-en-cxx-api
          rm -rf matcha-icefall-*
          rm vocos-22khz-univ.onnx

      - uses: actions/upload-artifact@v4
        with:
          name: matcha-tts-${{ matrix.os }}
          path: ./generated-matcha-*.wav

      - name: Test Moonshine tiny
        shell: bash
        run: |
          g++ -std=c++17 -o moonshine-cxx-api ./cxx-api-examples/moonshine-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
          tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
          rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./moonshine-cxx-api

          rm -rf sherpa-onnx-*
          rm ./moonshine-cxx-api

      - name: Test whisper
        shell: bash
        run: |
          g++ -std=c++17 -o whisper-cxx-api ./cxx-api-examples/whisper-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh whisper-cxx-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./whisper-cxx-api
            echo "----"
            readelf -d ./whisper-cxx-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
          tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
          rm sherpa-onnx-whisper-tiny.en.tar.bz2

          ls -lh sherpa-onnx-whisper-tiny.en
          echo "---"
          ls -lh sherpa-onnx-whisper-tiny.en/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./whisper-cxx-api

          rm -rf sherpa-onnx-whisper-*
          rm ./whisper-cxx-api

      - name: Test SenseVoice
        shell: bash
        run: |
          g++ -std=c++17 -o sense-voice-cxx-api ./cxx-api-examples/sense-voice-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh sense-voice-cxx-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./sense-voice-cxx-api
            echo "----"
            readelf -d ./sense-voice-cxx-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
          tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
          rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

          ls -lh sherpa-onnx-sense-voice-*
          echo "---"
          ls -lh sherpa-onnx-sense-voice-*/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./sense-voice-cxx-api

          rm -rf sherpa-onnx-sense-voice-*
          rm ./sense-voice-cxx-api

      - name: Test streaming zipformer
        shell: bash
        run: |
          g++ -std=c++17 -o streaming-zipformer-cxx-api ./cxx-api-examples/streaming-zipformer-cxx-api.cc \
            -I ./build/install/include \
            -L ./build/install/lib/ \
            -l sherpa-onnx-cxx-api \
            -l sherpa-onnx-c-api \
            -l onnxruntime

          ls -lh streaming-zipformer-cxx-api

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            ldd ./streaming-zipformer-cxx-api
            echo "----"
            readelf -d ./streaming-zipformer-cxx-api
          fi

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

          ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
          echo "---"
          ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs

          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

          ./streaming-zipformer-cxx-api

          rm -rf sherpa-onnx-streaming-zipformer-*
          rm ./streaming-zipformer-cxx-api


================================================
FILE: .github/workflows/dot-net.yaml
================================================
name: release-nuget-package

on:
  workflow_dispatch:

concurrency:
  group: release-nuget-package
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  build-libs:
    name: ${{ matrix.os }} ${{ matrix.arch }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        arch: [x64, x86, arm64]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Build sherpa-onnx
        shell: bash
        run: |
          arch=${{ matrix.arch }}
          opts=""
          if [ $arch == x86 ]; then
            opts="-A Win32"
          elif [ $arch == arm64 ]; then
            opts="-A ARM64"
          fi

          mkdir build
          cd build
          cmake \
            $opts \
            -DBUILD_SHARED_LIBS=ON \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DCMAKE_BUILD_TYPE=Release \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DBUILD_ESPEAK_NG_EXE=OFF \
            -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF  \
            -DSHERPA_ONNX_ENABLE_BINARY=ON \
            ..

          cmake --build . --target install --config Release
          rm -rf install/pkgconfig

      - name: Create tar file
        shell: bash
        run: |
          arch=${{ matrix.arch }}

          cd build

          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ../CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-$SHERPA_ONNX_VERSION-win-$arch
          mv install/lib $dst
          tar cjvf $dst.tar.bz2 $dst
          ls -lh *.tar.bz2
          mv *.tar.bz2 ../

      - uses: actions/upload-artifact@v4
        with:
          name: windows-${{ matrix.arch }}
          path: ./*.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            export GIT_LFS_SKIP_SMUDGE=1

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            git fetch
            git pull
            dst=windows-for-dotnet/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*.tar.bz2 $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "add more files"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main
            rm -rf huggingface

  release-nuget-package:
    runs-on: ${{ matrix.os }}
    needs: [build-libs]
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Setup .NET
        uses: actions/setup-dotnet@v4
        with:
          dotnet-version: 8.0.x

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip Jinja2

      - name: Retrieve artifact from windows x64
        uses: actions/download-artifact@v4
        with:
          name: windows-x64
          path: /tmp/windows-x64

      - name: Retrieve artifact from windows x86
        uses: actions/download-artifact@v4
        with:
          name: windows-x86
          path: /tmp/windows-x86

      - name: Retrieve artifact from windows arm64
        uses: actions/download-artifact@v4
        with:
          name: windows-arm64
          path: /tmp/windows-arm64

      - name: Check dotnet
        run: dotnet --info

      - name: Build
        shell: bash
        run: |
          sudo apt-get install -y tree
          ls -lh /tmp/

          tree /tmp/windows*
          echo "----"

          rm -fv /tmp/windows*/*.lib
          tree /tmp/windows*

      - name: Build
        shell: bash
        run: |
          cd scripts/dotnet
          ./run.sh

          ls -lh /tmp/packages

      - name: publish .Net packages to nuget.org
        if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
        shell: bash
        env:
          API_KEY: ${{ secrets.NUGET_API_KEY }}
        run: |
          # API_KEY is valid until 2025.04.26
          cd /tmp/packages
          dotnet nuget push ./org.k2fsa.sherpa.onnx.*.nupkg --skip-duplicate --api-key $API_KEY --source https://api.nuget.org/v3/index.json


================================================
FILE: .github/workflows/export-3dspeaker-to-onnx.yaml
================================================
name: export-3dspeaker-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-3dspeaker-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-3dspeaker-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export 3d-speaker to ONNX
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run
        shell: bash
        run: |
          cd scripts/3dspeaker
          ./run.sh

          mv -v *.onnx ../..

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: speaker-recongition-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=speaker-embedding-models
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            mv -v ./*.onnx ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main


================================================
FILE: .github/workflows/export-ced-to-onnx.yaml
================================================
name: export-ced-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-ced-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-ced-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export ced
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run
        shell: bash
        run: |
          cd scripts/ced
          ./run.sh

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: audio-tagging-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              tiny
              mini
              small
              base
            )

            for m in ${models[@]}; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              d=sherpa-onnx-ced-$m-audio-tagging-2024-04-19
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/$d huggingface
              mv -v $d/* huggingface
              cd huggingface
              git lfs track "*.onnx"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/$d main
              cd ..
            done


================================================
FILE: .github/workflows/export-dophin-ctc-to-onnx.yaml
================================================
name: export-dolphin-ctc-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-dolphin-ctc-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-dolphin-ctc-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.model_type }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        model_type: [small, base]

    steps:
      - uses: actions/checkout@v4

      - name: Download ${{ matrix.model_type }}
        shell: bash
        run: |
          git lfs install
          type=${{ matrix.model_type }}

          git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
          git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02

          rm -rf sherpa-onnx-dolphin-*/.git*

          ls -lha sherpa-onnx-dolphin-*/

          tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
          tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-fire-red-asr.yaml
================================================
name: export-fire-red-asr-to-onnx

on:
  push:
    branches:
      - export-fire-red-asr

  workflow_dispatch:

concurrency:
  group: export-fire-red-asr-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-fire-red-asr-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export FireRedAsr ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1

      - name: Download exported ONNX model from ModelScope
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        shell: bash
        run: |
          git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.git ms
          ls -lh ms


      - name: Collect results
        shell: bash
        run: |
          src=ms
          d=sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
          mkdir $d

          mv -v $src/*.onnx $d
          cp -v $src/README.md $d
          cp -v $src/tokens.txt $d
          cp -av $src/test_wavs $d

          ls -lh $d/
          tar cjfv $d.tar.bz2 $d

          ls -lh $d.tar.bz2
          rm -rf ms

      - name: Publish to huggingface ${{ matrix.version }}
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            src=sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            cp -av ../$src/* ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src main || true

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models


================================================
FILE: .github/workflows/export-gtcrn.yaml
================================================
name: export-gtcrn-to-onnx

on:
  push:
    branches:
      - export-gtcrn

  workflow_dispatch:

concurrency:
  group: export-gtcrn-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-gtcrn-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export gtcrn ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch "kaldi-native-fbank>=1.21.1"

      - name: Run
        shell: bash
        run: |
          cd scripts/gtcrn
          ./run.sh
          ./test.py
          ls -lh

      - name: Collect results
        shell: bash
        run: |
          src=scripts/gtcrn
          cp -v $src/*.onnx ./
          ls -lh *.onnx

      - name: Publish to huggingface 0.19
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/speech-enhancement-models huggingface
            cd huggingface
            git fetch
            git pull

            cp -v ../gtcrn_simple.onnx ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/speech-enhancement-models main || true

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: speech-enhancement-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          tag: speech-enhancement-models


================================================
FILE: .github/workflows/export-kitten.yaml
================================================
name: export-kitten-to-onnx

on:
  push:
    branches:
      - kitten-0.2

  workflow_dispatch:

concurrency:
  group: export-kitten-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-kitten-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export kitten ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        version: ["nano_v0_1", "nano_v0_2", "mini_v0_1"]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html

      - name: Run
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        shell: bash
        run: |
          cd scripts/kitten-tts/${{ matrix.version }}
          ./run.sh

      - name: Collect results
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
          tar xf espeak-ng-data.tar.bz2
          rm espeak-ng-data.tar.bz2

          version=${{ matrix.version }}

          src=scripts/kitten-tts/$version

          if [[ $version == "nano_v0_1" ]]; then
            d=kitten-nano-en-v0_1-fp16
          elif [[ $version == "nano_v0_2" ]]; then
            d=kitten-nano-en-v0_2-fp16
          elif [[ $version == "mini_v0_1" ]]; then
            d=kitten-mini-en-v0_1-fp16
          else
            echo "version $version"
            exit 1
          fi

          mkdir $d
          cp -a LICENSE $d/LICENSE
          cp -a espeak-ng-data $d/
          cp -v $src/model.fp16.onnx $d/model.fp16.onnx
          cp -v $src/voices.bin $d/
          cp -v $src/tokens.txt $d/
          cp -v $src/../README.md $d/README.md
          ls -lh $d/
          tar cjfv $d.tar.bz2 $d

          ls -lh $d.tar.bz2

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: tts-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            dirs=(
              kitten-nano-en-v0_1-fp16
              kitten-nano-en-v0_2-fp16
              kitten-mini-en-v0_1-fp16
            )

            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            for d in ${dirs[@]}; do
              echo "d $d"
              if [[ ! -d $d ]]; then
                echo "$d does not exist"
                continue
              fi

              echo "$d exists"
              rm -rf huggingface

              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
              cd huggingface
              rm -rf ./*

              git lfs track "*.onnx"
              git lfs track af_dict
              git lfs track ar_dict
              git lfs track cmn_dict
              git lfs track da_dict en_dict fa_dict hu_dict ia_dict it_dict lb_dict phondata ru_dict ta_dict
              git lfs track ur_dict yue_dict

              cp -a ../$d/* ./

              git add .

              ls -lh

              git status

              git commit -m "add models"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main || true
            done


================================================
FILE: .github/workflows/export-kokoro.yaml
================================================
name: export-kokoro-to-onnx

on:
  push:
    branches:
      - refactor-kokoro-2

  workflow_dispatch:

concurrency:
  group: export-kokoro-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-kokoro-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export kokoro ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        version: ["0.19", "1.0", "1.1-zh"]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install kokoro "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html misaki[en] misaki[zh] torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch sherpa-onnx

      - name: Run
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
          tar xf espeak-ng-data.tar.bz2
          rm espeak-ng-data.tar.bz2
          cp -a ./espeak-ng-data ./scripts/kokoro/v0.19
          cp -a ./espeak-ng-data ./scripts/kokoro/v1.0
          cp -a ./espeak-ng-data ./scripts/kokoro/v1.1-zh

          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          cd scripts/kokoro
          v=${{ matrix.version }}
          if [[ $v = "0.19" ]]; then
            cd v0.19
            ./run.sh

            if false; then
              # generate samples
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
              mkdir -p hf/kokoro/v0.19/mp3
              ./generate_samples.py
              pushd hf
              git pull
              git add .
              git commit -m 'add kokoro samples for v0.19'
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
              popd
              rm -rf hf
            fi

          elif [[ $v == "1.0" ]]; then
            cd v1.0
            ./run.sh

            if false; then
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
              mkdir -p hf/kokoro/v1.0/mp3

              curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
              tar xvf dict.tar.bz2
              rm dict.tar.bz2

              curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
              curl -SL -o number-zh.fst  https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
              curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst

              ./generate_samples.py
              pushd hf
              git pull
              git add .
              git commit -m 'add kokoro samples for v1.0'
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
              popd
              rm -rf hf
            fi

          elif [[ $v == "1.1-zh" ]]; then
            cd v1.1-zh
            ./run.sh

            if false; then
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
              mkdir -p hf/kokoro/v1.1-zh/mp3

              curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
              tar xvf dict.tar.bz2
              rm dict.tar.bz2

              curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
              curl -SL -o number-zh.fst  https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
              curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst

              ./generate_samples.py
              pushd hf
              git pull
              git add .
              git commit -m 'add kokoro samples for v1.1-zh'
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
              popd
              rm -rf hf
            fi
          else
            echo "Unknown version $v"
            exit 1
          fi

      - name: Collect results 0.19
        if: matrix.version == '0.19'
        shell: bash
        run: |
          src=scripts/kokoro/v0.19

          d=kokoro-en-v0_19

          mkdir $d
          cp -a LICENSE $d/LICENSE
          cp -a espeak-ng-data $d/
          cp -v $src/model.onnx $d/model.onnx
          cp -v $src/voices.bin $d/
          cp -v $src/tokens.txt $d/
          cp -v $src/../README.md $d/README.md
          ls -lh $d/
          tar cjfv $d.tar.bz2 $d

          ls -lh $d.tar.bz2

      - name: Collect results 0.19 (int8)
        if: matrix.version == '0.19'
        shell: bash
        run: |
          src=scripts/kokoro/v0.19

          d=kokoro-int8-en-v0_19

          mkdir $d
          cp -a LICENSE $d/LICENSE
          cp -a espeak-ng-data $d/
          cp -v $src/model.int8.onnx $d/model.int8.onnx
          cp -v $src/voices.bin $d/
          cp -v $src/tokens.txt $d/
          cp -v $src/../README.md $d/README.md
          ls -lh $d/
          tar cjfv $d.tar.bz2 $d

          ls -lh $d.tar.bz2

      - name: Collect results 1.0
        if: matrix.version == '1.0'
        shell: bash
        run: |
          curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
          tar xvf dict.tar.bz2
          rm dict.tar.bz2

          curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
          curl -SL -o number-zh.fst  https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
          curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst

          src=scripts/kokoro/v1.0

          d=kokoro-multi-lang-v1_0
          mkdir $d
          cp -v LICENSE $d/LICENSE
          cp -a espeak-ng-data $d/
          cp -v $src/kokoro.onnx $d/model.onnx
          cp -v $src/voices.bin $d/
          cp -v $src/tokens.txt $d/
          cp -v $src/lexicon*.txt $d/
          cp -v $src/README.md $d/README.md
          cp -av dict $d/
          cp -v ./*.fst $d/
          ls -lh $d/
          echo "---"
          ls -lh $d/dict

          tar cjfv $d.tar.bz2 $d
          rm -rf $d

          ls -lh $d.tar.bz2

          d=kokoro-int8-multi-lang-v1_0
          mkdir $d
          cp -v LICENSE $d/LICENSE
          cp -a espeak-ng-data $d/
          cp -v $src/kokoro.int8.onnx $d/model.int8.onnx
          cp -v $src/voices.bin $d/
          cp -v $src/tokens.txt $d/
          cp -v $src/lexicon*.txt $d/
          cp -v $src/README.md $d/README.md
          cp -av dict $d/
          cp -v ./*.fst $d/
          ls -lh $d/
          echo "---"
          ls -lh $d/dict

          tar cjfv $d.tar.bz2 $d
          rm -rf $d

          ls -lh $d.tar.bz2

      - name: Collect results 1.1-zh
        if: matrix.version == '1.1-zh'
        shell: bash
        run: |
          curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
          tar xvf dict.tar.bz2
          rm dict.tar.bz2

          curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
          curl -SL -o number-zh.fst  https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
          curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst

          src=scripts/kokoro/v1.1-zh

          d=kokoro-multi-lang-v1_1
          mkdir $d
          cp -v LICENSE $d/LICENSE
          cp -a espeak-ng-data $d/
          cp -v $src/kokoro.onnx $d/model.onnx
          cp -v $src/voices.bin $d/
          cp -v $src/tokens.txt $d/
          cp -v $src/lexicon*.txt $d/
          cp -v $src/README.md $d/README.md
          cp -av dict $d/
          cp -v ./*.fst $d/
          ls -lh $d/
          echo "---"
          ls -lh $d/dict

          tar cjfv $d.tar.bz2 $d
          rm -rf $d
          ls -lh $d.tar.bz2

          d=kokoro-int8-multi-lang-v1_1
          mkdir $d
          cp -v LICENSE $d/LICENSE
          cp -a espeak-ng-data $d/
          cp -v $src/kokoro.int8.onnx $d/model.int8.onnx
          cp -v $src/voices.bin $d/
          cp -v $src/tokens.txt $d/
          cp -v $src/lexicon*.txt $d/
          cp -v $src/README.md $d/README.md
          cp -av dict $d/
          cp -v ./*.fst $d/
          ls -lh $d/
          echo "---"
          ls -lh $d/dict

          tar cjfv $d.tar.bz2 $d
          rm -rf $d
          ls -lh $d.tar.bz2

          echo "---"
          ls -lh *.tar.bz2

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: tts-models

      - name: Publish to huggingface 0.19
        if: matrix.version == '0.19'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            dirs=(
              kokoro-en-v0_19
              # kokoro-int8-en-v0_19
            )

            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            for d in ${dirs[@]}; do
              rm -rf huggingface

              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 huggingface
              cd huggingface
              rm -rf ./*

              git lfs track "*.onnx"
              git lfs track af_dict
              git lfs track ar_dict
              git lfs track cmn_dict
              git lfs track da_dict en_dict fa_dict hu_dict ia_dict it_dict lb_dict phondata ru_dict ta_dict
              git lfs track ur_dict yue_dict


              cp -a ../$d/* ./

              git add .

              ls -lh

              git status

              git commit -m "add models"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
            done

      - name: Publish to huggingface 1.0 float32
        if: matrix.version == '1.0'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            git lfs track "cmn_dict"
            git lfs track "ru_dict"
            git lfs track "*.wav"
            git lfs track "lexicon*.txt"

            cp -a ../espeak-ng-data ./

            cp -v ../scripts/kokoro/v1.0/kokoro.onnx ./model.onnx


            cp -v ../scripts/kokoro/v1.0/tokens.txt .
            cp -v ../scripts/kokoro/v1.0/voices.bin .
            cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
            cp -v ../scripts/kokoro/v1.0/README.md ./README.md
            cp -v ../LICENSE ./
            cp -av ../dict ./
            cp -v ../*.fst ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true

      - name: Publish to huggingface 1.0 int8
        if: matrix.version == '1.0'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            git lfs track "cmn_dict"
            git lfs track "ru_dict"
            git lfs track "af_dict"
            git lfs track "ar_dict"
            git lfs track "da_dict"
            git lfs track "en_dict"
            git lfs track "fa_dict"
            git lfs track "hu_dict"
            git lfs track "ia_dict"
            git lfs track "it_dict"
            git lfs track "lb_dict"
            git lfs track "phondata"
            git lfs track "ta_dict"
            git lfs track "ur_dict"
            git lfs track "yue_dict"
            git lfs track "*.wav"
            git lfs track "lexicon*.txt"

            cp -a ../espeak-ng-data ./

            cp -v ../scripts/kokoro/v1.0/kokoro.int8.onnx ./model.int8.onnx

            cp -v ../scripts/kokoro/v1.0/tokens.txt .
            cp -v ../scripts/kokoro/v1.0/voices.bin .
            cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
            cp -v ../scripts/kokoro/v1.0/README.md ./README.md
            cp -v ../LICENSE ./
            cp -av ../dict ./
            cp -v ../*.fst ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 main || true

      - name: Publish to huggingface 1.1-zh
        if: matrix.version == '1.1-zh'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_1 huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            git lfs track "cmn_dict"
            git lfs track "ru_dict"
            git lfs track "*.wav"
            git lfs track "lexicon*.txt"

            cp -a ../espeak-ng-data ./

            cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx

            cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
            cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
            cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
            cp -v ../scripts/kokoro/v1.1-zh/README.md ./README.md
            cp -v ../LICENSE ./
            cp -av ../dict ./
            cp -v ../*.fst ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_1 main || true

      - name: Publish to huggingface 1.1-zh-int8
        if: matrix.version == '1.1-zh'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            git lfs track "cmn_dict"
            git lfs track "ru_dict"
            git lfs track "*.wav"
            git lfs track "lexicon*.txt"

            cp -a ../espeak-ng-data ./

            cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx

            cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
            cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
            cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
            cp -v ../scripts/kokoro/v1.1-zh/README.md ./README.md
            cp -v ../LICENSE ./
            cp -av ../dict ./
            cp -v ../*.fst ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true


================================================
FILE: .github/workflows/export-libriheavy.yaml
================================================
name: export-libriheavy-to-onnx

on:
  push:
    branches:
      - libriheavy-model
  workflow_dispatch:

concurrency:
  group: export-libriheavy-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-libriheavy-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export libriheavy
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run
        shell: bash
        run: |
          cd scripts/icefall
          ./run-libriheavy.sh
          ./run-libriheavy-punct-case.sh

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            for m in large medium small; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              src=sherpa-onnx-zipformer-en-libriheavy-20230926-$m
              echo "Process $src"

              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface
              cd huggingface
              git fetch
              git pull

              cp -av ../scripts/icefall/$src/* .

              git lfs track "*.onnx"
              git add .

              git commit -m "add large"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src main || true

              cd ..

              rm -rf huggingface/.git*

              mv huggingface $src

              tar cjvf $src.tar.bz2 $src
              rm -rf $src
              ls -lh
            done

      - name: Publish to huggingface (case and punct)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            for m in large medium small; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              src=sherpa-onnx-zipformer-en-libriheavy-20230830-$m-punct-case
              echo "Process $src"

              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface
              cd huggingface
              git fetch
              git pull

              cp -av ../scripts/icefall/$src/* .

              git lfs track "*.onnx"
              git add .

              git commit -m "add large"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src main || true

              cd ..

              rm -rf huggingface/.git*

              mv huggingface $src

              tar cjvf $src.tar.bz2 $src
              rm -rf $src
              ls -lh
            done

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-matcha-fa-en.yaml
================================================
name: export-matcha-fa-en-to-onnx

on:
  push:
    branches:
      - tts-matcha-samples

  workflow_dispatch:

concurrency:
  group: export-matcha-fa-en-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-matcha-fa-en-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export matcha fa-en ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html sherpa-onnx

      - name: Run
        if: false
        shell: bash
        run: |
          cd scripts/matcha-tts/fa-en
          ./run.sh

      - name: Generate samples
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        shell: bash
        run: |
          cd scripts/matcha-tts/zh

          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
          tar xvf matcha-icefall-zh-baker.tar.bz2
          rm matcha-icefall-zh-baker.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
          mkdir -p ./hf/matcha/icefall-zh/mp3

          ./generate_samples.py

          pushd hf
          git pull
          git add .
          git commit -m 'add kokoro samples for matcha tts zh'
          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
          popd
          rm -rf hf

          ls -lh

      - name: Collect results ${{ matrix.version }}
        if: false
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
          tar xf espeak-ng-data.tar.bz2
          rm espeak-ng-data.tar.bz2

          src=scripts/matcha-tts/fa-en
          dst1=matcha-tts-fa_en-musa # male
          dst2=matcha-tts-fa_en-khadijah # female

          mkdir $dst1 $dst2

          cp -a espeak-ng-data $dst1/
          cp -a espeak-ng-data $dst2/

          cp -v $src/male/* $dst1
          cp -v $src/female/* $dst2

          cp -v $src/README.md $dst1/
          cp -v $src/README.md $dst2/

          ls -lh $dst1/
          echo "---"
          ls -lh $dst2/
          tar cjfv $dst1.tar.bz2 $dst1
          tar cjfv $dst2.tar.bz2 $dst2

          ls -lh $dst1.tar.bz2
          ls -lh $dst2.tar.bz2

      - name: Publish to huggingface male (musa)
        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-musa huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            git lfs track "cmn_dict"
            git lfs track "ru_dict"

            cp -a ../matcha-tts-fa_en-musa/* ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-musa main || true

      - name: Publish to huggingface female (khadijah)
        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-khadijah huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            git lfs track "cmn_dict"
            git lfs track "ru_dict"

            cp -a ../matcha-tts-fa_en-khadijah/* ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-khadijah main || true

      - name: Release
        # if: github.repository_owner == 'csukuangfj'
        if: false
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models

      - name: Release
        # if: github.repository_owner == 'k2-fsa'
        if: false
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: tts-models


================================================
FILE: .github/workflows/export-matcha-zh-en.yaml
================================================
name: export-matcha-zh-en-to-onnx

on:
  push:
    branches:
      - matcha-zh-en

  workflow_dispatch:

concurrency:
  group: export-matcha-zh-en-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-matcha-zh-en-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export matcha zh-en ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" pypinyin soundfile \
            sherpa-onnx -f https://k2-fsa.github.io/sherpa/onnx/cpu.html

      - name: Generate samples
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        shell: bash
        run: |
          cd scripts/matcha-tts/zh-en

          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-en.tar.bz2
          tar xvf matcha-icefall-zh-en.tar.bz2
          rm matcha-icefall-zh-en.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-16khz-univ.onnx

          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
          mkdir -p ./hf/matcha/icefall-zh-en/mp3

          ./generate_samples.py

          pushd hf
          git pull
          git add .
          git commit -m 'add samples for matcha tts zh en'
          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
          popd
          rm -rf hf

          ls -lh

      - name: Run
        shell: bash
        run: |
          cd scripts/matcha-tts/zh-en
          curl -SL -O https://modelscope.cn/models/dengcunqin/matcha_tts_zh_en_20251010/resolve/master/model-steps-3.onnx
          curl -SL -O https://modelscope.cn/models/dengcunqin/matcha_tts_zh_en_20251010/resolve/master/vocab_tts.txt

          ./generate_tokens.py
          ./generate_lexicon.py

          curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
          curl -SL -o number-zh.fst  https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
          curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst

      - name: Collect results ${{ matrix.version }}
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
          tar xf espeak-ng-data.tar.bz2
          rm espeak-ng-data.tar.bz2

          src=scripts/matcha-tts/zh-en
          dst=matcha-icefall-zh-en

          mkdir $dst

          cp -a espeak-ng-data $dst/

          cp -v $src/tokens.txt $dst
          cp -v $src/lexicon.txt $dst
          cp -v $src/model-steps-3.onnx $dst
          cp -v $src/README.md $dst
          cp -v $src/*.fst $dst

          tar cjfv $dst.tar.bz2 $dst

          ls -lh $dst.tar.bz2

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-icefall-zh-en huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            git lfs track "cmn_dict"
            git lfs track "ru_dict" af_dict ar_dict da_dict en_dict fa_dict hu_dict ia_dict it_dict lb_dict phondata ta_dict ur_dict yue_dict

            cp -a ../matcha-icefall-zh-en/* ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-icefall-zh-en main || true

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: tts-models


================================================
FILE: .github/workflows/export-medasr-ctc-to-onnx.yaml
================================================
name: export-medasr-ctc-to-onnx

on:
  push:
    branches:
      - cpp-medasr-2
  workflow_dispatch:

concurrency:
  group: export-medasr-ctc-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-medasr-ctc-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export medasr ctc
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          cd scripts/medasr
          ./run.sh

      - name: Download test data
        shell: bash
        run: |
          cd scripts/medasr

          for i in $(seq 0 5); do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-medasr-ctc-en-int8-2025-12-25/resolve/main/test_wavs/$i.wav
          done

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-medasr-ctc-en-int8-2025-12-25/resolve/main/test_wavs/transcript.txt

          ls -lh

      - name: Test fp32
        shell: bash
        run: |
          cd scripts/medasr

          for i in $(seq 0 5); do
            python3 test_onnx.py --model ./model.onnx --tokens ./tokens.txt --wav ./$i.wav
          done

          cat transcript.txt

      - name: Test int8
        shell: bash
        run: |
          cd scripts/medasr

          for i in $(seq 0 5); do
            python3 test_onnx.py --model ./model.int8.onnx --tokens ./tokens.txt --wav ./$i.wav
          done

          cat transcript.txt

      - name: Collect fp32 files
        shell: bash
        run: |
          cd scripts/medasr

          d=sherpa-onnx-medasr-ctc-en-2025-12-25
          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v model.onnx $d
          cp -v README.md $d
          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs/
          cp -v transcript.txt $d/test_wavs/

          tar cjvf $d.tar.bz2 $d

          ls -lh $d
          ls -lh *.tar.bz2

          mv $d ../..
          mv $d.tar.bz2 ../..

      - name: Collect int8 files
        shell: bash
        run: |
          cd scripts/medasr

          d=sherpa-onnx-medasr-ctc-en-int8-2025-12-25
          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v model.int8.onnx $d
          cp -v README.md $d
          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs/
          cp -v transcript.txt $d/test_wavs/

          tar cjvf $d.tar.bz2 $d

          ls -lh $d
          ls -lh *.tar.bz2

          mv $d ../..
          mv $d.tar.bz2 ../..

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 5
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            names=(
             sherpa-onnx-medasr-ctc-en-2025-12-25
             sherpa-onnx-medasr-ctc-en-int8-2025-12-25
            )
            for d in ${names[@]}; do
              if [ ! -d $d ]; then
                echo "$d does not exist - skip it"
                continue;
              fi

              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
              cp -av $d/* ./huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.wav"
              git status
              git add .
              git status
              git commit -m "add models"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
              cd ..
            done


================================================
FILE: .github/workflows/export-melo-tts-to-onnx.yaml
================================================
name: export-melo-tts-to-onnx

on:
  push:
    branches:
      - export-melo-tts-onnx
  workflow_dispatch:

concurrency:
  group: export-melo-tts-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-melo-tts-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export melo-tts
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run
        shell: bash
        run: |
          cd scripts/melo-tts
          ./run.sh

      - uses: actions/upload-artifact@v4
        with:
          name: test.wav
          path: scripts/melo-tts/test.wav

      - name: Publish to huggingface (Chinese + English)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-zh_en huggingface
            cd huggingface
            git fetch
            git pull
            echo "pwd: $PWD"
            ls -lh ../scripts/melo-tts/zh_en

            rm -rf ./

            cp -v ../scripts/melo-tts/zh_en/*.onnx .
            cp -v ../scripts/melo-tts/zh_en/lexicon.txt .
            cp -v ../scripts/melo-tts/zh_en/tokens.txt .
            cp -v ../scripts/melo-tts/zh_en/README.md .

            curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE

            curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/new_heteronym.fst
            curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
            curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
            curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
            curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
            tar xvf dict.tar.bz2
            rm dict.tar.bz2

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git diff

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-zh_en main || true

            cd ..

            rm -rf huggingface/.git*
            dst=vits-melo-tts-zh_en

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Publish to huggingface (English)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-en huggingface
            cd huggingface
            git fetch
            git pull
            echo "pwd: $PWD"
            ls -lh ../scripts/melo-tts/en

            rm -rf ./

            cp -v ../scripts/melo-tts/en/*.onnx .
            cp -v ../scripts/melo-tts/en/lexicon.txt .
            cp -v ../scripts/melo-tts/en/tokens.txt .
            cp -v ../scripts/melo-tts/en/README.md .

            curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git diff

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-en main || true

            cd ..

            rm -rf huggingface/.git*
            dst=vits-melo-tts-en

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models


================================================
FILE: .github/workflows/export-moonshine-to-onnx.yaml
================================================
name: export-moonshine-to-onnx

on:
  push:
    branches:
      - jni-moonshine-v2-2
  workflow_dispatch:

concurrency:
  group: export-moonshine-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-moonshine-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export moonshine ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]
        version: [v2]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install -q onnx onnxruntime librosa tokenizers soundfile moonshine-voice

      - name: Run v1
        if: matrix.version == 'v1'
        shell: bash
        run: |
          pushd scripts/moonshine
          ./run.sh
          popd

          mv -v scripts/moonshine/*.tar.bz2 .
          mv -v scripts/moonshine/sherpa-onnx-* ./

      - name: Run v2
        if: matrix.version == 'v2'
        shell: bash
        run: |
          pushd scripts/moonshine/v2
          ./run.sh
          popd

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            models=(
              sherpa-onnx-moonshine-tiny-en-int8
              sherpa-onnx-moonshine-base-en-int8
              sherpa-onnx-moonshine-base-ar-quantized-2026-02-27
              sherpa-onnx-moonshine-base-en-quantized-2026-02-27
              sherpa-onnx-moonshine-base-es-quantized-2026-02-27
              sherpa-onnx-moonshine-base-ja-quantized-2026-02-27
              sherpa-onnx-moonshine-base-uk-quantized-2026-02-27
              sherpa-onnx-moonshine-base-vi-quantized-2026-02-27
              sherpa-onnx-moonshine-base-zh-quantized-2026-02-27
              sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27
              sherpa-onnx-moonshine-tiny-ja-quantized-2026-02-27
              sherpa-onnx-moonshine-tiny-ko-quantized-2026-02-27
            )
            for d in ${models[@]}; do
              if [ ! -d $d ]; then
                continue;
              fi

              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d huggingface

              rm -rf huggingface/*.onnx
              rm -rf huggingface/*/*.wav

              cp -av $d/* huggingface

              pushd huggingface
              git lfs track "*.onnx"
              git lfs track "*.ort"
              git lfs track "*.data"
              git lfs track "*.weights"
              git lfs track "bpe.model"
              git lfs track "*.wav"
              git lfs track "*.json"
              git status
              git add .

              git commit -m "add models"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d main

              popd
            done

            rm -rf huggingface

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            rm -rf ms
            git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git ms

            cp -av *.tar.bz2 ms/

            pushd ms
            git lfs track "*.tar.bz2"
            git status
            ls -lh
            git add .

            git commit -m "add models"
            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git
            popd

            rm -rf ms


================================================
FILE: .github/workflows/export-nemo-canary-180m-flash.yaml
================================================
name: export-nemo-canary-180m-flash

on:
  push:
    branches:
      - export-nemo-canary
  workflow_dispatch:

concurrency:
  group: export-nemo-canary-180m-flash-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemo-canary-180m-flash:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: parakeet nemo canary 180m flash
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run
        shell: bash
        run: |
          cd scripts/nemo/canary
          ./run_180m_flash.sh

          ls -lh *.onnx
          mv -v *.onnx ../../..
          mv -v tokens.txt ../../..
          mv de.wav ../../../
          mv en.wav ../../../

      - name: Collect files (fp32)
        shell: bash
        run: |
          d=sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr
          mkdir -p $d
          cp encoder.onnx $d
          cp decoder.onnx $d
          cp tokens.txt $d

          mkdir $d/test_wavs
          cp de.wav $d/test_wavs
          cp en.wav $d/test_wavs

          tar cjfv $d.tar.bz2 $d

      - name: Collect files (int8)
        shell: bash
        run: |
          d=sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
          mkdir -p $d
          cp encoder.int8.onnx $d
          cp decoder.int8.onnx $d
          cp tokens.txt $d

          mkdir $d/test_wavs
          cp de.wav $d/test_wavs
          cp en.wav $d/test_wavs

          tar cjfv $d.tar.bz2 $d

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr
              sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8
            )

            for m in ${models[@]}; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
              cp -av $m/* huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.wav"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
              cd ..
            done

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming.yaml
================================================
name: export-nemo-fast-conformer-ctc-non-streaming

on:
  workflow_dispatch:

concurrency:
  group: export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemo-fast-conformer-hybrid-transducer-ctc-non-streaming:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: Hybrid ctc non-streaming
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install NeMo
        shell: bash
        run: |
          BRANCH='main'
          pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
          pip install onnxruntime ipython
          pip install kaldi-native-fbank
          pip install soundfile librosa

      - name: Run
        shell: bash
        run: |
          cd scripts/nemo/fast-conformer-hybrid-transducer-ctc
          ./run-ctc-non-streaming-2.sh
          ./run-ctc-non-streaming.sh

          mv -v sherpa-onnx-nemo* ../../..

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              sherpa-onnx-nemo-fast-conformer-ctc-en-24500
              sherpa-onnx-nemo-fast-conformer-ctc-es-1424
              sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
              sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
              sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
              sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
              sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
              sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
              sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
              sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
              sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc
              sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8
              sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc
              sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8
            )

            for m in ${models[@]}; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
              cp -av $m/* huggingface
              cd huggingface
              git lfs track "*.onnx" "*.wav"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
              cd ..
              rm -rf huggingface
            done

      - name: Compress files
        shell: bash
        run: |
          dirs=(
            sherpa-onnx-nemo-fast-conformer-ctc-en-24500
            sherpa-onnx-nemo-fast-conformer-ctc-es-1424
            sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
            sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
            sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
            sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
            sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
            sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
            sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
            sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
            sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc
            sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8
            sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc
            sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8
          )
          for d in ${dirs[@]}; do
            tar cjvf ${d}.tar.bz2 ./$d
          done

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-nemo-fast-conformer-hybrid-transducer-ctc.yaml
================================================
name: export-nemo-fast-conformer-ctc-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-nemo-fast-conformer-hybrid-transducer-ctc-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemo-fast-conformer-hybrid-transducer-ctc-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: Hybrid ctc streaming
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install NeMo
        shell: bash
        run: |
          BRANCH='main'
          pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
          pip install onnxruntime ipython
          pip install kaldi-native-fbank
          pip install soundfile librosa

      - name: Run
        shell: bash
        run: |
          cd scripts/nemo/fast-conformer-hybrid-transducer-ctc
          ./run-ctc.sh

          mv -v sherpa-onnx-nemo* ../../..

      - name: Download test waves
        shell: bash
        run: |
          mkdir test_wavs
          pushd test_wavs
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/0.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/1.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/8k.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
          popd

          names=(
            sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
            sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
            sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
            sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
            sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
            sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
          )
          for d in ${names[@]}; do
            cp -av test_wavs $d/
            tar cjvf $d.tar.bz2 $d
          done

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
              sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
              sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
              sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
              sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
              sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
            )

            for m in ${models[@]}; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
              cp -av $m/* huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.wav"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
              cd ..
            done


================================================
FILE: .github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming.yaml
================================================
name: export-nemo-fast-conformer-transducer-non-streaming

on:
  workflow_dispatch:

concurrency:
  group: export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemo-fast-conformer-hybrid-transducer-transducer-non-streaming:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: Hybrid transducer non-streaming
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install NeMo
        shell: bash
        run: |
          BRANCH='main'
          pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
          pip install onnxruntime ipython
          pip install kaldi-native-fbank
          pip install soundfile librosa

      - name: Run
        shell: bash
        run: |
          cd scripts/nemo/fast-conformer-hybrid-transducer-ctc
          ./run-transducer-non-streaming-2.sh
          ./run-transducer-non-streaming.sh

          mv -v sherpa-onnx-nemo* ../../..

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              sherpa-onnx-nemo-fast-conformer-transducer-en-24500
              sherpa-onnx-nemo-fast-conformer-transducer-es-1424
              sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
              sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
              sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
              sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
              sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
              sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
              sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
              sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
              sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc
              sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8
              sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc
              sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8
            )

            for m in ${models[@]}; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
              cp -av $m/* huggingface
              cd huggingface
              git lfs track "*.onnx" "*.wav"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
              cd ..
            done

      - name: Compress files
        shell: bash
        run: |
          dirs=(
            sherpa-onnx-nemo-fast-conformer-transducer-en-24500
            sherpa-onnx-nemo-fast-conformer-transducer-es-1424
            sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
            sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
            sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
            sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
            sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
            sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
            sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
            sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
            sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc
            sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8
            sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc
            sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8
          )
          for d in ${dirs[@]}; do
            tar cjvf ${d}.tar.bz2 ./$d
          done

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-nemo-fast-conformer-hybrid-transducer-transducer.yaml
================================================
name: export-nemo-fast-conformer-transducer-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-nemo-fast-conformer-hybrid-transducer-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemo-fast-conformer-hybrid-transducer-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: Hybrid transducer streaming
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install NeMo
        shell: bash
        run: |
          BRANCH='main'
          pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
          pip install onnxruntime ipython
          pip install kaldi-native-fbank
          pip install soundfile librosa

      - name: Run
        shell: bash
        run: |
          cd scripts/nemo/fast-conformer-hybrid-transducer-ctc
          ./run-transducer.sh

          mv -v sherpa-onnx-nemo* ../../..

      - name: Download test waves
        shell: bash
        run: |
          mkdir test_wavs
          pushd test_wavs
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/0.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/1.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/8k.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
          popd

          models=(
            sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
            sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
            sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
            sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
            sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
            sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
          )
          for m in ${models[@]}; do
            cp -av test_wavs $m
            tar cjvf $m.tar.bz2 $m
          done

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
              sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
              sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
              sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
              sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
              sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
            )

            for m in ${models[@]}; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
              cp -av $m/* huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.wav"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
              cd ..
            done


================================================
FILE: .github/workflows/export-nemo-giga-am-to-onnx.yaml
================================================
name: export-nemo-giga-am-to-onnx

on:
  push:
    branches:
      - export-giga-am-v3
  workflow_dispatch:

concurrency:
  group: export-nemo-giga-am-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemo-am-giga-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export nemo GigaAM models to ONNX
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run CTC
        if: false
        shell: bash
        run: |
          pushd scripts/nemo/GigaAM
          ./run-ctc.sh
          popd

          d=sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24
          mkdir $d
          mkdir $d/test_wavs
          rm scripts/nemo/GigaAM/model.onnx
          mv -v scripts/nemo/GigaAM/*.int8.onnx $d/
          cp -v scripts/nemo/GigaAM/*.md $d/
          mv -v scripts/nemo/GigaAM/*.pdf $d/
          mv -v scripts/nemo/GigaAM/tokens.txt $d/
          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
          mv -v scripts/nemo/GigaAM/run-ctc.sh $d/
          mv -v scripts/nemo/GigaAM/export-onnx-ctc.py $d/
          cp -v scripts/nemo/GigaAM/test-onnx-ctc.py $d/

          ls -lh scripts/nemo/GigaAM/

          ls -lh $d

          tar cjvf ${d}.tar.bz2 $d

      - name: Run Transducer
        if: false
        shell: bash
        run: |
          pushd scripts/nemo/GigaAM
          ./run-rnnt.sh
          popd

          d=sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24
          mkdir $d
          mkdir $d/test_wavs

          mv -v scripts/nemo/GigaAM/encoder.int8.onnx $d/
          mv -v scripts/nemo/GigaAM/decoder.onnx $d/
          mv -v scripts/nemo/GigaAM/joiner.onnx $d/

          cp -v scripts/nemo/GigaAM/*.md $d/
          mv -v scripts/nemo/GigaAM/*.pdf $d/
          mv -v scripts/nemo/GigaAM/tokens.txt $d/
          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
          mv -v scripts/nemo/GigaAM/run-rnnt.sh $d/
          mv -v scripts/nemo/GigaAM/export-onnx-rnnt.py $d/
          cp -v scripts/nemo/GigaAM/test-onnx-rnnt.py $d/

          ls -lh scripts/nemo/GigaAM/

          ls -lh $d

          tar cjvf ${d}.tar.bz2 $d

      - name: Run CTC v2
        if: false
        shell: bash
        run: |
          pushd scripts/nemo/GigaAM
          ./run-ctc-v2.sh
          popd

          d=sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19
          mkdir $d
          mkdir $d/test_wavs
          rm scripts/nemo/GigaAM/v2_ctc.onnx
          mv -v scripts/nemo/GigaAM/*.int8.onnx $d/
          cp -v scripts/nemo/GigaAM/LICENSE $d/
          mv -v scripts/nemo/GigaAM/tokens.txt $d/
          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
          mv -v scripts/nemo/GigaAM/run-ctc-v2.sh $d/
          mv -v scripts/nemo/GigaAM/*-ctc-v2.py $d/
          cp -v scripts/nemo/GigaAM/test-onnx-ctc.py $d/

          ls -lh scripts/nemo/GigaAM/

          ls -lh $d

          tar cjvf ${d}.tar.bz2 $d

      - name: Run Transducer v2
        if: false
        shell: bash
        run: |
          pushd scripts/nemo/GigaAM
          ./run-rnnt-v2.sh
          popd

          d=sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19
          mkdir $d
          mkdir $d/test_wavs

          mv -v scripts/nemo/GigaAM/encoder.int8.onnx $d/
          mv -v scripts/nemo/GigaAM/decoder.onnx $d/
          mv -v scripts/nemo/GigaAM/joiner.onnx $d/

          cp -v scripts/nemo/GigaAM/*.md $d/
          cp -v scripts/nemo/GigaAM/LICENSE $d/
          mv -v scripts/nemo/GigaAM/tokens.txt $d/
          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
          mv -v scripts/nemo/GigaAM/run-rnnt-v2.sh $d/
          cp -v scripts/nemo/GigaAM/test-onnx-rnnt.py $d/

          ls -lh scripts/nemo/GigaAM/

          ls -lh $d

          tar cjvf ${d}.tar.bz2 $d

      - name: Run CTC v3
        if: true
        shell: bash
        run: |
          pushd scripts/nemo/GigaAM
          ./run-ctc-v3.sh
          popd

          d=sherpa-onnx-nemo-ctc-giga-am-v3-russian-2025-12-16
          mkdir $d
          mkdir $d/test_wavs
          ls -lh scripts/nemo/GigaAM/v3_ctc.onnx
          rm scripts/nemo/GigaAM/v3_ctc.onnx
          cp -v scripts/nemo/GigaAM/*.md $d/
          mv -v scripts/nemo/GigaAM/*.int8.onnx $d/
          cp -v scripts/nemo/GigaAM/LICENSE $d/
          mv -v scripts/nemo/GigaAM/tokens.txt $d/
          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
          mv -v scripts/nemo/GigaAM/run-ctc-v3.sh $d/
          mv -v scripts/nemo/GigaAM/*-ctc-v3.py $d/
          cp -v scripts/nemo/GigaAM/test-onnx-ctc.py $d/

          ls -lh scripts/nemo/GigaAM/

          ls -lh $d

          tar cjvf ${d}.tar.bz2 $d

          ls -lh *.tar.bz2

      - name: Run CTC v3 with punctuations
        if: true
        shell: bash
        run: |
          pushd scripts/nemo/GigaAM
          ./run-ctc-v3-punct.sh
          popd

          d=sherpa-onnx-nemo-ctc-punct-giga-am-v3-russian-2025-12-16
          mkdir $d
          mkdir $d/test_wavs
          rm scripts/nemo/GigaAM/v3_e2e_ctc.onnx
          cp -v scripts/nemo/GigaAM/*.md $d/
          mv -v scripts/nemo/GigaAM/*.int8.onnx $d/
          cp -v scripts/nemo/GigaAM/LICENSE $d/
          mv -v scripts/nemo/GigaAM/tokens.txt $d/
          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
          mv -v scripts/nemo/GigaAM/run-ctc-v3-punct.sh $d/
          mv -v scripts/nemo/GigaAM/*-ctc-v3-punct.py $d/
          cp -v scripts/nemo/GigaAM/test-onnx-ctc.py $d/

          ls -lh scripts/nemo/GigaAM/

          ls -lh $d

          tar cjvf ${d}.tar.bz2 $d

          ls -lh *.tar.bz2

      - name: Run Transducer v3
        if: false
        shell: bash
        run: |
          pushd scripts/nemo/GigaAM
          ./run-rnnt-v3.sh
          popd

          d=sherpa-onnx-nemo-transducer-giga-am-v3-russian-2025-12-16
          mkdir $d
          mkdir $d/test_wavs

          mv -v scripts/nemo/GigaAM/encoder.int8.onnx $d/
          mv -v scripts/nemo/GigaAM/decoder.onnx $d/
          mv -v scripts/nemo/GigaAM/joiner.onnx $d/

          cp -v scripts/nemo/GigaAM/*.md $d/
          cp -v scripts/nemo/GigaAM/LICENSE $d/
          mv -v scripts/nemo/GigaAM/tokens.txt $d/
          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
          mv -v scripts/nemo/GigaAM/run-rnnt-v3.sh $d/
          cp -v scripts/nemo/GigaAM/test-onnx-rnnt.py $d/

          ls -lh scripts/nemo/GigaAM/

          ls -lh $d

          tar cjvf ${d}.tar.bz2 $d

      - name: Run Transducer v3 with punctuations
        if: false
        shell: bash
        run: |
          pushd scripts/nemo/GigaAM
          ./run-rnnt-v3-punct.sh
          popd

          d=sherpa-onnx-nemo-transducer-punct-giga-am-v3-russian-2025-12-16
          mkdir $d
          mkdir $d/test_wavs

          mv -v scripts/nemo/GigaAM/encoder.int8.onnx $d/
          mv -v scripts/nemo/GigaAM/decoder.onnx $d/
          mv -v scripts/nemo/GigaAM/joiner.onnx $d/

          cp -v scripts/nemo/GigaAM/*.md $d/
          cp -v scripts/nemo/GigaAM/LICENSE $d/
          mv -v scripts/nemo/GigaAM/tokens.txt $d/
          mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/
          mv -v scripts/nemo/GigaAM/run-rnnt-v3-punct.sh $d/
          cp -v scripts/nemo/GigaAM/test-onnx-rnnt.py $d/

          ls -lh scripts/nemo/GigaAM/

          ls -lh $d

          tar cjvf ${d}.tar.bz2 $d

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models

      - name: Publish to huggingface (CTC)
        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            rm -rf huggingface
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            cp -av $d/* ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git lfs track "*.wav"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main

      - name: Publish to huggingface (Transducer)
        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 5
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            rm -rf huggingface
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            cp -av $d/* ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git lfs track "*.wav"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main

      - name: Publish v2 to huggingface (CTC)
        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 5
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19/
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            rm -rf huggingface
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            cp -av $d/* ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git lfs track "*.wav"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main

      - name: Publish v2 to huggingface (Transducer)
        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 5
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            rm -rf huggingface
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            cp -av $d/* ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git lfs track "*.wav"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main

      - name: Publish v3 to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 5
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            names=(
             sherpa-onnx-nemo-ctc-giga-am-v3-russian-2025-12-16
             sherpa-onnx-nemo-ctc-punct-giga-am-v3-russian-2025-12-16
             sherpa-onnx-nemo-transducer-giga-am-v3-russian-2025-12-16
             sherpa-onnx-nemo-transducer-punct-giga-am-v3-russian-2025-12-16
            )
            for d in ${names[@]}; do
              if [ ! -d $d ]; then
                echo "$d does not exist - skip it"
                continue;
              fi

              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
              cp -av $d/* ./huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.wav"
              git status
              git add .
              git status
              git commit -m "add models"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
              cd ..
            done


================================================
FILE: .github/workflows/export-nemo-parakeet-tdt-0.6b-v2.yaml
================================================
name: export-nemo-parakeet-tdt-0.6b

on:
  push:
    branches:
      - export-nemo-parakeet-tdt-0.6b-v2
  workflow_dispatch:

concurrency:
  group: export-nemo-parakeet-tdt-0.6b-v2-${{ github.ref }}
  cancel-in-progress: true

env:
  HF_HUB_ENABLE_HF_TRANSFER: "0"

jobs:
  export-nemo-parakeet-tdt-0_6b:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: parakeet tdt 0.6b ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]
        version: ["v2", "v3"]

    steps:
      - uses: actions/checkout@v4

      - name: Show disk space
        run: |
          df -h

      # See https://github.com/vlayer-xyz/vlayer/pull/543/files
      # Free up disk space as the macOS runners end up using most for Xcode
      # versions we don't need and use iOS simulators.
      - name: Free up disk space
        run: |
          echo '*** Delete iOS simulators and their caches'
          xcrun simctl delete all
          sudo rm -rf ~/Library/Developer/CoreSimulator/Caches/*

      - name: Show disk space
        run: |
          df -h

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run ${{ matrix.version }}
        if: matrix.version == 'v2'
        shell: bash
        run: |
          cd scripts/nemo/parakeet-tdt-0.6b-v2
          ./run.sh

          ls -lh *.onnx
          ls -lh *.weights

          mv -v *.onnx ../../..
          mv -v *.weights ../../..
          mv -v tokens.txt ../../..
          mv 2086-149220-0033.wav ../../../0.wav

      - name: Run ${{ matrix.version }}
        if: matrix.version == 'v3'
        shell: bash
        run: |
          cd scripts/nemo/parakeet-tdt-0.6b-v3
          ./run.sh

          ls -lh *.onnx
          mv -v *.onnx ../../..
          mv -v *.weights ../../..
          mv -v tokens.txt ../../..
          mv *.wav ../../../

      - name: Collect files (fp32)
        shell: bash
        run: |
          version=${{ matrix.version }}
          d=sherpa-onnx-nemo-parakeet-tdt-0.6b-$version
          mkdir -p $d
          cp -v encoder.onnx $d
          cp -v encoder.weights $d
          cp -v decoder.onnx $d
          cp -v joiner.onnx $d
          cp -v tokens.txt $d

          mkdir $d/test_wavs
          cp -v *.wav $d/test_wavs

          # tar cjfv $d.tar.bz2 $d

          # ls -lh *.tar.bz2

      - name: Collect files (int8)
        shell: bash
        run: |
          version=${{ matrix.version }}
          d=sherpa-onnx-nemo-parakeet-tdt-0.6b-$version-int8
          mkdir -p $d
          cp -v encoder.int8.onnx $d
          cp -v decoder.int8.onnx $d
          cp -v joiner.int8.onnx $d
          cp -v tokens.txt $d

          mkdir $d/test_wavs
          cp -v *.wav $d/test_wavs

          tar cjfv $d.tar.bz2 $d

          ls -lh *.tar.bz2

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            version=${{ matrix.version }}
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              sherpa-onnx-nemo-parakeet-tdt-0.6b-$version
              sherpa-onnx-nemo-parakeet-tdt-0.6b-$version-int8
            )

            for m in ${models[@]}; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
              cp -av $m/* huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.wav"
              git lfs track "*.weights"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
              cd ..
            done

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-nemo-parakeet-tdt.yaml
================================================
name: export-nemo-parakeet-tdt

on:
  push:
    branches:
      - refactor-export-nemo
  workflow_dispatch:

concurrency:
  group: export-nemo-parakeet-tdt-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemo-parakeet-tdt-0_6b-v2:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: parakeet tdt
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install python dependencies
        shell: bash
        run: |
          pip install \
            nemo_toolkit['asr'] \
            "numpy<2" \
            ipython \
            kaldi-native-fbank \
            librosa \
            onnx==1.17.0 \
            onnxmltools==1.13.0 \
            onnxruntime==1.17.1 \
            soundfile

      - name: Run
        shell: bash
        run: |
          cd scripts/nemo/parakeet-tdt_ctc-0.6b-ja
          ./run-ctc.sh

      - name: Collect files
        shell: bash
        run: |
          models=(
            sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
          )
          for m in ${models[@]}; do
            mv -v scripts/nemo/parakeet-tdt_ctc-0.6b-ja/$m .
            tar cjfv $m.tar.bz2 $m
          done


      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
            )

            for m in ${models[@]}; do
              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
              cp -av $m/* huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.wav"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
              cd ..
            done

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-nemo-speaker-verification-to-onnx.yaml
================================================
name: export-nemo-speaker-verification-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-nemo-speaker-verification-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemo-speaker-verification-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export nemo speaker verification models to ONNX
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run
        shell: bash
        run: |
          cd scripts/nemo/speaker-verification
          ./run.sh

          mv -v *.onnx ../../..

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: speaker-recongition-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=speaker-embedding-models
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            mv -v ./*.onnx ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main


================================================
FILE: .github/workflows/export-nemotron-speech-streaming-en-0.6b.yaml
================================================
name: export-nemotron-speech-streaming-en-06b

on:
  push:
    branches:
      - export-nemotron-streaming-2
  workflow_dispatch:

concurrency:
  group: export-nemotron-streaming-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-nemotron-speech-streaming-en-0-6b-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: nemotron-speech-streaming-en-0-6b-to-onnx
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install NeMo
        shell: bash
        run: |
          BRANCH='main'
          pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
          pip install onnxruntime ipython
          pip install kaldi-native-fbank
          pip install soundfile librosa

      - name: Run
        shell: bash
        run: |
          cd scripts/nemo/nemotron-speech-streaming-en-0.6b

          python3 ./export_onnx.py

          ls -lh *.onnx
          echo "---"
          ls -lh encoder.*

      - name: Collect results
        shell: bash
        run: |
          src=scripts/nemo/nemotron-speech-streaming-en-0.6b
          d=sherpa-onnx-nemotron-speech-streaming-en-0.6b-2026-01-14
          mkdir -p $d

          cp -av $src/encoder.onnx $d/
          cp -av $src/encoder.data $d/
          cp -av $src/decoder.onnx $d/
          cp -av $src/joiner.onnx $d/
          cp -av $src/tokens.txt $d/
          cat >$d/README.md <<EOF
          # Introduction
          This model is from https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b
          EOF

          ls -lh $d

          d=sherpa-onnx-nemotron-speech-streaming-en-0.6b-int8-2026-01-14
          mkdir -p $d

          cp -av $src/encoder.int8.onnx $d/
          cp -av $src/decoder.int8.onnx $d/
          cp -av $src/joiner.int8.onnx $d/
          cp -av $src/tokens.txt $d/
          cat >$d/README.md <<EOF
          # Introduction
          This model is from https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b
          EOF

          ls -lh $d


      - name: Download test waves
        if: true
        shell: bash
        run: |
          mkdir test_wavs
          pushd test_wavs
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/0.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/1.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/8k.wav
          curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
          popd

          models=(
            sherpa-onnx-nemotron-speech-streaming-en-0.6b-int8-2026-01-14
            sherpa-onnx-nemotron-speech-streaming-en-0.6b-2026-01-14
          )
          for m in ${models[@]}; do
            cp -av test_wavs $m
            tar cjvf $m.tar.bz2 $m
          done

          ls -lh *.tar.bz2

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          # fp32 models is 2.2GB > 2GB
          file: sherpa-onnx-nemotron-speech-streaming-en-0.6b-int8-2026-01-14.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            models=(
              sherpa-onnx-nemotron-speech-streaming-en-0.6b-int8-2026-01-14
              sherpa-onnx-nemotron-speech-streaming-en-0.6b-2026-01-14
            )

            for m in ${models[@]}; do
              if [ ! -d $m ]; then
                echo "skip $m"
                continue
              fi

              rm -rf huggingface
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
              cp -av $m/* huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.data"
              git lfs track "*.wav"
              git status
              git add .
              git status
              git commit -m "first commit"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
              cd ..
            done

            rm -rf huggingface

      - name: Publish to modelscope
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git ms

              mkdir ms/nemo
              cp -av $m ms/nemo

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh
              git add .

              git commit -m "add models"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git

              popd
            done
            rm -rf ms

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models huggingface

              d=asr-models/nemo
              mkdir -p huggingface/$d

              cp -v $m huggingface/$d/

              pushd huggingface
              git lfs track "*.tar.bz2"
              ls -lh $d/$m

              ls -lh $d

              pushd $d
              git lfs track "*.tar.bz2"
              popd

              git status
              git add .

              git commit -m "add $m"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models main
              popd
            done
            rm -rf huggingface


================================================
FILE: .github/workflows/export-omnilingual-asr-to-onnx.yaml
================================================
name: export-omnilingual-asr-to-onnx

on:
  push:
    branches:
      - omnilingual-1b
  workflow_dispatch:

concurrency:
  group: export-omnilingual-asr-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-omnilingual-asr-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.model_card }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]
        model_card: ["omniASR_CTC_300M", "omniASR_CTC_300M_v2", "omniASR_CTC_1B", "omniASR_CTC_1B_v2"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        shell: bash
        run: |
          sudo apt install libsndfile1

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install fairseq2 \
            --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/pt2.8.0/cpu \
            torch==2.8.0+cpu -f https://download.pytorch.org/whl/torch \
            torchaudio==2.8.0+cpu -f https://download.pytorch.org/whl/torchaudio \
            onnx==1.17.0 \
            onnxruntime==1.17.1 \
            soundfile \
            librosa

          pip install --no-deps omnilingual_asr

          pip install retrying pandas polars pyarrow xxhash

      - name: Setup tmate session
        if: false
        uses: mxschmitt/action-tmate@v3

      - name: Run
        shell: bash
        run: |
          cd scripts/omnilingual-asr
          model_card=${{ matrix.model_card }}
          python3 ./export-onnx.py --model-card $model_card

          ls -lh *.onnx
          ls -lh *.weights || true

          rm README.md

          curl -SL -O https://raw.githubusercontent.com/facebookresearch/omnilingual-asr/refs/heads/main/README.md
          curl -SL -O https://raw.githubusercontent.com/facebookresearch/omnilingual-asr/refs/heads/main/LICENSE

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/en.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/es.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/fr.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/de.wav

          echo "---test----"
          python3 ./test.py

          echo "---collect files----"

          if [[ $model_card == omniASR_CTC_300M ]]; then
            d=sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-2025-11-12
          elif [[ $model_card == omniASR_CTC_300M_v2 ]]; then
            d=sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-2026-02-05
          elif [[ $model_card == omniASR_CTC_1B ]]; then
            d=sherpa-onnx-omnilingual-asr-1600-languages-1B-ctc-2025-11-12
          elif [[ $model_card == omniASR_CTC_1B_v2 ]]; then
            d=sherpa-onnx-omnilingual-asr-1600-languages-1B-ctc-v2-2026-02-05
          else
            echo "Unknown model: $model_card"
            exit 1
          fi

          mkdir -p $d
          mkdir -p $d/test_wavs

          mv -v model.onnx $d
          mv -v model.weights $d || true
          cp -v tokens.txt $d
          cp -v README.md $d
          cp -v LICENSE* $d
          cp -v *.wav $d/test_wavs

          ls -lh $d

          tar cjfv $d.tar.bz2 $d
          mv $d ../..

          if [[ $model_card == omniASR_CTC_300M ]]; then
            d=sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12
          elif [[ $model_card == omniASR_CTC_300M_v2 ]]; then
            d=sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05
          elif [[ $model_card == omniASR_CTC_1B ]]; then
            d=sherpa-onnx-omnilingual-asr-1600-languages-1B-ctc-int8-2025-11-12
          elif [[ $model_card == omniASR_CTC_1B_v2 ]]; then
            d=sherpa-onnx-omnilingual-asr-1600-languages-1B-ctc-v2-int8-2026-02-05
          else
            echo "Unknown model: $model_card"
            exit 1
          fi

          mkdir -p $d
          mkdir -p $d/test_wavs

          mv -v model.int8.onnx $d
          cp -v tokens.txt $d
          cp -v README.md $d
          cp -v LICENSE* $d
          cp -v *.wav $d/test_wavs
          ls -lh $d

          tar cjfv $d.tar.bz2 $d

          mv $d ../..

          mv *.tar.bz2 ../../

          cd ../..

          ls -lh *.tar.bz2

          df -h
          rm -fv onnx_* model.encoder* model.final*

          ls -lh ~/.cache/fairseq2/assets/*

          rm -rf ~/.cache/fairseq2/assets/
          rm -rf ~/.cache

          df -h

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            model_card=${{ matrix.model_card }}

            dirs=(
              sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-2025-11-12
              sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12
              sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-2026-02-05
              sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05
              sherpa-onnx-omnilingual-asr-1600-languages-1B-ctc-2025-11-12
              sherpa-onnx-omnilingual-asr-1600-languages-1B-ctc-int8-2025-11-12
              sherpa-onnx-omnilingual-asr-1600-languages-1B-ctc-v2-2026-02-05
              sherpa-onnx-omnilingual-asr-1600-languages-1B-ctc-v2-int8-2026-02-05
            )

            for d in ${dirs[@]}; do
              if [[ ! -d $d ]]; then
                continue;
              fi
              rm -rf huggingface
              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d huggingface
              pushd huggingface

              git fetch
              git pull
              echo "pwd: $PWD"
              rm -fv ./*.weights
              mv -v ../$d/* .

              git lfs track "*.onnx"
              git lfs track "*.weights"
              git lfs track "*.wav"
              ls -lh
              git add .

              ls -lh

              git status

              git commit -m "add models"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d main || true
              popd
            done

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in *.tar.bz2; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git ms

              cp -av $m ms/

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh
              git add .

              git commit -m "add models"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git

              popd
            done

      # List large files first (safe)
      - name: List .tar.bz2 files larger than 2GB
        run: |
          ls -lh *.tar.bz2
          echo "----"
          find . -type f -name "*.tar.bz2" -size +2G -print

      # Delete large files
      - name: Delete .tar.bz2 files larger than 2GB
        run: |
          find . -type f -name "*.tar.bz2" -size +2G -delete

          ls -lh *.tar.bz2

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-paraformer-to-ascend-npu.yaml
================================================
name: export-paraformer-to-ascend-npu

on:
  push:
    branches:
      - fix-ascend-2
  workflow_dispatch:

concurrency:
  group: export-paraformer-to-ascend-nput-${{ github.ref }}
  cancel-in-progress: true

jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python3 .github/scripts/export-ascend/generate_paraformer.py
          MATRIX=$(python3 .github/scripts/export-ascend/generate_paraformer.py)

          # deprecated
          # echo "::set-output name=matrix::${MATRIX}"
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT

  export-paraformer-to-rknn:
    needs: generate_build_matrix
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.framework }} ${{ matrix.soc_version }} ${{ matrix.cann }}
    runs-on: ubuntu-latest

    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    container:
      image: ${{ matrix.image }}

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python 3.8
        uses: actions/setup-python@v5
        with:
          python-version: "3.8"

      - name: Show Python
        shell: bash
        run: |
          python3 --version
          which python3

      - name: Install curl
        shell: bash
        run: apt-get update && apt-get install -y curl bzip2 git git-lfs

      - name: Verify environment
        shell: bash
        run: |
          ls -lh /usr/local/Ascend/ascend-toolkit/set_env.sh

          find /usr/local/Ascend -name "libascend*.so" 2>/dev/null

          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          echo "CANN environment:"
          which atc || echo "atc not found"
          atc --help

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install "numpy<2" \
                  onnx==1.17.0 \
                  torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
                  attrs psutil scipy decorator cloudpickle ml-dtypes tornado \
                  pyyaml

      - name: Setup tmate session
        if: false
        uses: mxschmitt/action-tmate@v3

      - name: Run Paraformer from FunAsr
        if: matrix.framework == 'FunASR'
        shell: bash
        run: |
          cd scripts/paraformer/ascend-npu

          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/am.mvn
          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/config.yaml
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/tokens.txt

          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/model.pt
          mv model.pt model_state_dict.pt

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/0.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/1.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/2.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/3-sichuan.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/4-tianjin.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/5-henan.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/6-zh-en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/8k.wav

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/README.md

          echo "export to onnx"

          python3 ./export_encoder_onnx.py
          python3 ./export_decoder_onnx.py
          python3 ./export_predictor_onnx.py

          rm -v *.pt

          ls -lh *.onnx

          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          soc_version=${{ matrix.soc_version }}
          cann=${{ matrix.cann }}

          atc --model=./predictor.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=predictor \
            --input_format=ND \
            --input_shape="encoder_out:1,-1,512" \
            --soc_version="Ascend${soc_version}"

          ls -lh *.om

          atc --model=./decoder.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=decoder \
            --input_format=ND \
            --input_shape="encoder_out:1,-1,512;acoustic_embedding:1,-1,512" \
            --soc_version="Ascend${soc_version}"

          ls -lh *.om

          atc --model=./encoder.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=encoder \
            --input_format=ND \
            --input_shape="x:1,-1,560" \
            --soc_version="Ascend${soc_version}"

          ls -lh *.om

          rm -v *.onnx


          echo "collect results"
          d=sherpa-onnx-ascend-${soc_version}-cann-$cann-paraformer-zh-2023-03-28

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v encoder_linux_aarch64.om $d/encoder.om
          cp -v decoder_linux_aarch64.om $d/decoder.om
          cp -v predictor_linux_aarch64.om $d/predictor.om
          cp -v test_om.py $d/

          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          rm -v *.om

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Run Paraformer from WSChuan-ASR
        if: matrix.framework == 'WSChuan-ASR'
        shell: bash
        run: |
          cd scripts/paraformer/ascend-npu

          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/am.mvn
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/config.yaml
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/tokens.json
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/model_state_dict.pt

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/tokens.txt


          for i in $(seq 1 16); do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/test_wavs/$i.wav
          done

          rm -f README.md || true
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/README.md

          echo "export to onnx"

          python3 ./export_encoder_onnx.py
          python3 ./export_decoder_onnx.py
          python3 ./export_predictor_onnx.py

          ls -lh *.onnx

          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          soc_version=${{ matrix.soc_version }}
          cann=${{ matrix.cann }}

          atc --model=./predictor.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=predictor \
            --input_format=ND \
            --input_shape="encoder_out:1,-1,512" \
            --soc_version="Ascend${soc_version}"

          ls -lh *.om

          atc --model=./decoder.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=decoder \
            --input_format=ND \
            --input_shape="encoder_out:1,-1,512;acoustic_embedding:1,-1,512" \
            --soc_version="Ascend${soc_version}"

          ls -lh *.om

          atc --model=./encoder.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=encoder \
            --input_format=ND \
            --input_shape="x:1,-1,560" \
            --soc_version="Ascend${soc_version}"

          ls -lh *.om

          rm -v *.onnx

          echo "collect results"
          d=sherpa-onnx-ascend-${soc_version}-cann-$cann-paraformer-zh-2025-10-07

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v encoder_linux_aarch64.om $d/encoder.om
          cp -v decoder_linux_aarch64.om $d/decoder.om
          cp -v predictor_linux_aarch64.om $d/predictor.om
          cp -v test_om.py $d/

          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          rm -v *.om

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-ascend

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models-ascend

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models huggingface

              d=asr-models/ascend-npu/paraformer
              mkdir -p huggingface/$d

              cp -v $m huggingface/$d/

              pushd huggingface
              git lfs track "*.tar.bz2"
              ls -lh $d
              pushd $d
              git lfs track "*.tar.bz2"
              popd

              git status
              git add .

              git commit -m "add $m"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models main
              popd
            done
            rm -rf huggingface

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git ms

              d=ascend-npu/paraformer
              mkdir -p ms/$d

              cp -av $m ms/$d/

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh $d/$m

              ls -lh $d
              git add .

              git commit -m "add $m"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git

              popd
            done
            rm -rf ms


================================================
FILE: .github/workflows/export-paraformer-to-qnn.yaml
================================================
name: export-paraformer-to-qnn

on:
  push:
    branches:
      - export-paraformer-qnn-2
  workflow_dispatch:

concurrency:
  group: export-paraformer-to-qnn-${{ github.ref }}
  cancel-in-progress: true

jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python3 .github/scripts/export-qnn/generate_paraformer.py
          MATRIX=$(python3 .github/scripts/export-qnn/generate_paraformer.py)

          # deprecated
          # echo "::set-output name=matrix::${MATRIX}"
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT

  export-paraformer-to-qnn:
    needs: generate_build_matrix
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.framework }} ${{ matrix.input_in_seconds }} ${{ matrix.soc }}
    runs-on: ubuntu-22.04
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python 3.10
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Create directories
        shell: bash
        run: |
          mkdir so binary

      - name: Create Python virtual environment
        shell: bash
        run: |
          python3 -m venv py310
          which python3
          source py310/bin/activate
          which python3

      - name: Show ndk-build help
        shell: bash
        run: |
          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          ndk-build --help

      - name: Download toolkit
        shell: bash
        run: |
          curl -SL -O https://huggingface.co/csukuangfj/qnn-toolkit/resolve/main/v2.40.0.251030.zip
          ls -lh v2.40.0.251030.zip

      - name: Unzip toolkit
        shell: bash
        run: |
          unzip v2.40.0.251030.zip

      - name: Show
        shell: bash
        run: |
          ls -lh

          echo "---ls -lh qairt---"

          ls -lh qairt

          echo "---"

      - name: Install linux dependencies
        shell: bash
        run: |
          ls -lh

          echo "---"

          ls -lh qairt

          cd qairt/2.40.0.251030/bin
          source envsetup.sh

          yes | sudo ${QNN_SDK_ROOT}/bin/check-linux-dependency.sh || true

      - name: Install Python dependencies
        shell: bash
        run: |
          source py310/bin/activate

          cd qairt/2.40.0.251030/bin
          source envsetup.sh

          python3 -m pip install \
            mock \
            numpy \
            opencv-python \
            optuna \
            packaging \
            pandas \
            paramiko \
            pathlib2 \
            pillow \
            plotly \
            protobuf \
            psutil \
            pydantic \
            pytest \
            pyyaml \
            rich \
            scikit-optimize \
            scipy \
            six \
            tabulate \
            typing-extensions \
            xlsxwriter

          python3 "${QNN_SDK_ROOT}/bin/check-python-dependency" || true

          which python3

      - name: Install onnx dependencies
        shell: bash
        run: |
          source py310/bin/activate
          python3 -m pip install --upgrade \
            torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
            kaldi_native_fbank \
            pip \
            "numpy<2" \
            onnx==1.17.0 \
            onnxruntime==1.17.1 \
            soundfile \
            librosa \
            onnxsim \
            sentencepiece \
            pyyaml

          which python3

      - name: Show qnn-onnx-converter help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          qnn-onnx-converter --help

      - name: Show qnn-model-lib-generator help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          qnn-model-lib-generator --help

      - name: Show qnn-net-run help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          qnn-net-run --help

      - name: Run Paraformer from FunAsr
        if: matrix.framework == 'FunASR'
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd


          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          export LDFLAGS="-Wl,-z,max-page-size=16384"

          export t=${{ matrix.input_in_seconds }}
          export soc=${{ matrix.soc }}

          dir=$PWD

          cd scripts/paraformer/qnn

          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/am.mvn
          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/config.yaml
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/tokens.txt

          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/model.pt
          mv model.pt model_state_dict.pt

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/0.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/1.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/2.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/3-sichuan.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/4-tianjin.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/5-henan.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/6-zh-en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/8k.wav

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/README.md


          ./convert_decoder.sh

          ./convert_predictor.sh

          ./convert_encoder.sh

          ls -lh model_libs/*/lib*.so

          ls -lh binary

          readelf -lW model_libs/*/lib*.so

          echo "collect results"

          d=sherpa-onnx-qnn-${{ matrix.soc}}-binary-$t-seconds-paraformer-zh-2023-03-28-int8

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v binary/encoder.bin $d/
          cp -v binary/predictor.bin $d/
          cp -v binary/decoder.bin $d/

          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          mv *.tar.bz2 ../../../binary/


          for p in x86_64-linux-clang aarch64-android; do
            if [[ $p == x86_64-linux-clang ]]; then

              d=sherpa-onnx-qnn-$t-seconds-paraformer-zh-2023-03-28-int8-linux-x64
            elif [[ $p == aarch64-android ]]; then
              d=sherpa-onnx-qnn-$t-seconds-paraformer-zh-2023-03-28-int8-android-aarch64
            else
              echo "Unknown $p"
              exit -1
            fi

            mkdir -p $d
            mkdir -p $d/test_wavs

            cp -v README.md $d

            cp -v model_libs/$p/libencoder*.so $d/libencoder.so
            cp -v model_libs/$p/libpredictor*.so $d/libpredictor.so
            cp -v model_libs/$p/libdecoder*.so $d/libdecoder.so

            cp -v tokens.txt $d
            cp -v *.wav $d/test_wavs
            ls -lh $d
            tar cjfv $d.tar.bz2 $d
            ls -lh *.tar.bz2
            rm -rf $d
          done

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../../so/


      - name: Run Paraformer from WSChuan-ASR
        if: matrix.framework == 'WSChuan-ASR'
        shell: bash
        run: |
          dir=$PWD
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          export LDFLAGS="-Wl,-z,max-page-size=16384"
          export t=${{ matrix.input_in_seconds }}
          export soc=${{ matrix.soc }}

          cd scripts/paraformer/qnn

          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/am.mvn
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/config.yaml
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/tokens.json
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/model_state_dict.pt

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/tokens.txt


          for i in $(seq 1 16); do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/test_wavs/$i.wav
          done

          rm -f README.md || true
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/README.md

          ./convert_decoder.sh

          ./convert_predictor.sh

          ./convert_encoder.sh

          ls -lh model_libs/*/lib*.so

          ls -lh binary

          readelf -lW model_libs/*/lib*.so

          echo "collect results"

          d=sherpa-onnx-qnn-${{ matrix.soc}}-binary-$t-seconds-paraformer-zh-2025-10-07-int8

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v binary/encoder.bin $d/
          cp -v binary/predictor.bin $d/
          cp -v binary/decoder.bin $d/

          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          mv *.tar.bz2 ../../../binary/


          for p in x86_64-linux-clang aarch64-android; do
            if [[ $p == x86_64-linux-clang ]]; then

              d=sherpa-onnx-qnn-$t-seconds-paraformer-zh-2025-10-07-int8-linux-x64
            elif [[ $p == aarch64-android ]]; then
              d=sherpa-onnx-qnn-$t-seconds-paraformer-zh-2025-10-07-int8-android-aarch64
            else
              echo "Unknown $p"
              exit -1
            fi

            mkdir -p $d
            mkdir -p $d/test_wavs

            cp -v README.md $d

            cp -v model_libs/$p/libencoder*.so $d/libencoder.so
            cp -v model_libs/$p/libpredictor*.so $d/libpredictor.so
            cp -v model_libs/$p/libdecoder*.so $d/libdecoder.so

            cp -v tokens.txt $d
            cp -v *.wav $d/test_wavs
            ls -lh $d
            tar cjfv $d.tar.bz2 $d
            ls -lh *.tar.bz2
            rm -rf $d
          done

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../../so/


      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.framework }}-${{ matrix.soc }}-${{ matrix.input_in_seconds }}-seconds
          path: ./scripts/paraformer/qnn/my-config*/*.json

      - name: Release
        if: github.repository_owner == 'csukuangfj' && matrix.soc == 'SM8850'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./so/*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-qnn

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./binary/*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-qnn-binary

      - name: Release
        if: github.repository_owner == 'k2-fsa' && matrix.soc == 'SM8850'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./so/*.tar.bz2
          overwrite: true
          tag: asr-models-qnn

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./binary/*.tar.bz2
          overwrite: true
          tag: asr-models-qnn-binary


================================================
FILE: .github/workflows/export-paraformer-to-rknn.yaml
================================================
name: export-paraformer-to-rknn

on:
  push:
    branches:
      - ci-paraformer-rknn
  workflow_dispatch:

concurrency:
  group: export-paraformer-to-rknn-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-paraformer-to-rknn:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.framework }} ${{ matrix.platform }} ${{ matrix.input_in_seconds }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]
        platform: ["rk3562", "rk3566", "rk3568", "rk3576", "rk3588"]
        input_in_seconds: ["5", "10", "15", "20", "25", "30"]
        framework: ["FunASR", "WSChuan-ASR"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade \
            pip \
            "numpy<2" \
            torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
            onnx==1.17.0 \
            onnxruntime==1.17.1 \
            librosa \
            soundfile \
            pyyaml \
            onnxsim \
            sentencepiece \
            kaldi_native_fbank

          curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
          pip install ./*.whl "numpy<=1.26.4"

      - name: Run Paraformer from FunAsr
        if: matrix.framework == 'FunASR'
        shell: bash
        run: |
          cd scripts/paraformer/rknn

          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/am.mvn
          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/config.yaml
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/tokens.txt

          curl -SL -O https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/resolve/master/model.pt
          mv model.pt model_state_dict.pt

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/0.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/1.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/2.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/3-sichuan.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/4-tianjin.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/5-henan.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/6-zh-en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/test_wavs/8k.wav

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/README.md

          echo "export to onnx"
          t=${{ matrix.input_in_seconds }}
          p=${{ matrix.platform }}

          export url="https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
          export model_author="iic"
          export comment="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"

          echo "----$t---"
          python3 ./export_encoder_onnx.py  --input-len-in-seconds $t
          python3 ./export_rknn.py --target-platform $p --in-model ./encoder-$t-seconds.onnx --out-model ./encoder-$t-seconds.rknn >/dev/null 2>&1

          python3 ./export_predictor_onnx.py  --input-len-in-seconds $t
          python3 ./export_rknn.py --target-platform $p --in-model ./predictor-$t-seconds.onnx --out-model ./predictor-$t-seconds.rknn >/dev/null 2>&1

          python3 ./export_decoder_onnx.py  --input-len-in-seconds $t
          python3 ./export_rknn.py --target-platform $p --in-model ./decoder-$t-seconds.onnx --out-model ./decoder-$t-seconds.rknn >/dev/null 2>&1

          ls -lh *.onnx
          echo "---"
          ls -lh *.rknn

          echo "collect results"
          d=sherpa-onnx-$p-$t-seconds-paraformer-zh-2023-03-28

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v encoder-$t-seconds.rknn $d/encoder.rknn
          cp -v decoder-$t-seconds.rknn $d/decoder.rknn
          cp -v predictor-$t-seconds.rknn $d/predictor.rknn

          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Run Paraformer from WSChuan-ASR
        if: matrix.framework == 'WSChuan-ASR'
        shell: bash
        run: |
          cd scripts/paraformer/rknn

          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/am.mvn
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/config.yaml
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/tokens.json
          curl -SL -O https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/model_state_dict.pt

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/tokens.txt


          for i in $(seq 1 16); do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/test_wavs/$i.wav
          done

          rm -f README.md || true
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-int8-2025-10-07/resolve/main/README.md

          echo "export to onnx"
          t=${{ matrix.input_in_seconds }}
          p=${{ matrix.platform }}

          export model_author="ASLP-lab"
          export comment="ASLP-lab/WSChuan-ASR"
          export url="https://huggingface.co/ASLP-lab/WSChuan-ASR/tree/main/Paraformer-large-Chuan"

          echo "----$t---"
          python3 ./export_encoder_onnx.py  --input-len-in-seconds $t
          python3 ./export_rknn.py --target-platform $p --in-model ./encoder-$t-seconds.onnx --out-model ./encoder-$t-seconds.rknn >/dev/null 2>&1

          python3 ./export_predictor_onnx.py  --input-len-in-seconds $t
          python3 ./export_rknn.py --target-platform $p --in-model ./predictor-$t-seconds.onnx --out-model ./predictor-$t-seconds.rknn >/dev/null 2>&1

          python3 ./export_decoder_onnx.py  --input-len-in-seconds $t
          python3 ./export_rknn.py --target-platform $p --in-model ./decoder-$t-seconds.onnx --out-model ./decoder-$t-seconds.rknn >/dev/null 2>&1

          ls -lh *.onnx
          echo "---"
          ls -lh *.rknn

          echo "collect results"
          d=sherpa-onnx-$p-$t-seconds-paraformer-zh-2025-10-07

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v encoder-$t-seconds.rknn $d/encoder.rknn
          cp -v decoder-$t-seconds.rknn $d/decoder.rknn
          cp -v predictor-$t-seconds.rknn $d/predictor.rknn

          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models


================================================
FILE: .github/workflows/export-peng-cheng-starling.yaml
================================================
name: export-peng-cheng-starling-to-onnx

on:
  push:
    branches:
      - fix-ci-2

  workflow_dispatch:

concurrency:
  group: export-peng-cheng-starling-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-peng-cheng-starling-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export peng cheng starling ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1

      - name: Run
        shell: bash
        run: |
          cd scripts/peng-cheng-starling
          ./run.sh
          python3 ./quantize_models.py

          ls -lh
          rm encoder-epoch-75-avg-11-chunk-16-left-128.onnx
          rm joiner-epoch-75-avg-11-chunk-16-left-128.onnx
          echo "----"
          ls -lh


      - name: Collect results ${{ matrix.version }}
        shell: bash
        run: |
          src=scripts/peng-cheng-starling
          d=sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10
          mkdir $d

          mv -v $src/*.onnx $d
          cp -v $src/README.md $d
          cp -v $src/bpe.model $d
          cp -v $src/tokens.txt $d
          cp -av $src/test_wavs $d

          ls -lh $d/
          tar cjfv $d.tar.bz2 $d

          ls -lh $d.tar.bz2

      - name: Publish to huggingface ${{ matrix.version }}
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            src=sherpa-onnx-streaming-zipformer-ar_en_id_ja_ru_th_vi_zh-2025-02-10

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull

            cp -av ../$src/* ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$src main || true

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models


================================================
FILE: .github/workflows/export-piper.yaml
================================================
name: export-piper

on:
  push:
    branches:
      - export-piper
  workflow_dispatch:

concurrency:
  group: export-piper-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-piper:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]
        total: ["20"]
        index: [
          "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
          "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
           ]
        # total: ["2"]
        # index: ["0", "1"]
        # total: ["1"]
        # index: ["0"]
        # total: ["5"]
        # index: ["0", "1", "2", "3", "4"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2 iso639-lang onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx onnxmltools==1.13.0
          python3 -m pip install "numpy<2" soundfile

      - name: Generate script
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        shell: bash
        run: |
          cd scripts/piper

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          export GIT_LFS_SKIP_SMUDGE=1
          export GIT_CLONE_PROTECTION_ACTIVE=false

          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf

          python3 ./generate.py --total $total --index $index
          chmod +x ./generate.sh
          ls -lh

      - name: Show script
        shell: bash
        run: |
          cd scripts/piper
          cat ./generate.sh

      - name: Run script
        shell: bash
        run: |
          cd scripts/piper
          ./generate.sh

      - name: Show generated mp3 files
        shell: bash
        run: |
          cd scripts/piper
          ls -lh hf/piper/mp3/*
          echo "----"
          ls -lh hf/piper/mp3/*/*

      - name: Push generated mp3 files
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            cd scripts/piper/hf
            git pull --rebase
            git lfs track "*.mp3"
            git status .
            git add .
            git commit -m 'Add mp3 files'
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main

      - name: Show generated model files
        shell: bash
        run: |
          cd scripts/piper
          ls -lh *.tar.bz2

      - name: Show generated model files(2)
        shell: bash
        run: |
          cd scripts/piper
          ls -lh release/

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            dirs=(
              vits-piper-de_DE-glados-high
              vits-piper-de_DE-glados-low
              vits-piper-de_DE-glados-medium
              vits-piper-de_DE-glados_turret-high
              vits-piper-de_DE-glados_turret-low
              vits-piper-de_DE-glados_turret-medium
              vits-piper-en_US-glados-high
              vits-piper-fa_IR-ganji-medium
              vits-piper-fa_IR-ganji_adabi-medium
              vits-piper-fa_IR-reza_ibrahim-medium
              vits-piper-hi_IN-pratham-medium
              vits-piper-hi_IN-priyamvada-medium
              vits-piper-es_AR-daniela-high
              vits-piper-en_GB-miro-high
              vits-piper-en_GB-dii-high
              vits-piper-pt_PT-miro-high
              vits-piper-pt_PT-dii-high
              vits-piper-pt_BR-miro-high
              vits-piper-pt_BR-dii-high
              vits-piper-es_ES-miro-high
              vits-piper-it_IT-miro-high
              vits-piper-it_IT-dii-high
              vits-piper-nl_NL-miro-high
              vits-piper-nl_NL-dii-high
              vits-piper-de_DE-miro-high
              vits-piper-de_DE-dii-high
              vits-piper-fr_FR-miro-high
              vits-piper-en_US-miro-high
              vits-piper-pl_PL-jarvis_wg_glos-medium
              vits-piper-pl_PL-justyna_wg_glos-medium
              vits-piper-pl_PL-meski_wg_glos-medium
              vits-piper-pl_PL-zenski_wg_glos-medium
              vits-piper-id_ID-news_tts-medium
              vits-piper-hi_IN-rohan-medium
              vits-piper-ar_JO-SA_miro-high-int8
              vits-piper-ar_JO-SA_miro-high-fp16
              vits-piper-ar_JO-SA_miro-high
              vits-piper-ar_JO-SA_dii-high-int8
              vits-piper-ar_JO-SA_dii-high-fp16
              vits-piper-ar_JO-SA_dii-high
              vits-piper-ar_JO-SA_miro_V2-high-int8
              vits-piper-ar_JO-SA_miro_V2-high-fp16
              vits-piper-ar_JO-SA_miro_V2-high
            )
            for d in ${dirs[@]}; do
              src=scripts/piper/release/$d
              if [ ! -d $src ]; then
                continue;
              fi

              rm -rf huggingface
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
              cp -a $src/* ./huggingface
              pushd huggingface
              git lfs track "*.onnx"
              git lfs track af_dict
              git lfs track ar_dict
              git lfs track cmn_dict
              git lfs track da_dict en_dict fa_dict hu_dict ia_dict it_dict lb_dict phondata ru_dict ta_dict
              git lfs track ur_dict yue_dict

              git status
              git add .
              git status
              git commit -m "add models"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
              popd

            done

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./scripts/piper/vits-piper-*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./scripts/piper/vits-piper-*.tar.bz2
          overwrite: true
          tag: tts-models


================================================
FILE: .github/workflows/export-pocket-tts.yaml
================================================
name: export-pocket-to-onnx

on:
  push:
    branches:
      - export-pocket-tts-2

  workflow_dispatch:

concurrency:
  group: export-pocket-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-pocket-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export PocketTTS ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Install ffmpeg
        shell: bash
        run: brew install ffmpeg

      - name: Verify ffmpeg
        shell: bash
        run: ffmpeg -version

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" onnx==1.17.0 onnxruntime==1.17.1 librosa soundfile \
            torch==2.8.0

      - name: Run
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        shell: bash
        run: |
          git clone https://github.com/csukuangfj/pocket-tts-onnx-export
          cd pocket-tts-onnx-export
          pip install -r requirements.txt
          pip install onnx==1.17.0 torch==2.8.0
          pip list

          git grep 'opset_version'
          echo "---"
          sed -i '' 's/opset_version=17/opset_version=14/g' scripts/*.py
          echo "---"
          git grep 'opset_version'

          python export.py
          python export.py --quantize

          ls -lh onnx/

          python3 ../scripts/pyannote//segmentation/show-onnx.py --filename ./onnx/flow_lm_flow.onnx
          python3 ../scripts/pyannote//segmentation/show-onnx.py --filename ./onnx/flow_lm_main.onnx
          python3 ../scripts/pyannote//segmentation/show-onnx.py --filename ./onnx/mimi_encoder.onnx
          python3 ../scripts/pyannote//segmentation/show-onnx.py --filename ./onnx/mimi_decoder.onnx
          python3 ../scripts/pyannote//segmentation/show-onnx.py --filename ./onnx/text_conditioner.onnx

          cd onnx
          mv flow_lm_flow_int8.onnx lm_flow.int8.onnx
          mv flow_lm_flow.onnx lm_flow.onnx

          mv flow_lm_main_int8.onnx lm_main.int8.onnx
          mv flow_lm_main.onnx lm_main.onnx

          mv mimi_encoder_int8.onnx encoder.int8.onnx
          mv mimi_encoder.onnx encoder.onnx

          mv mimi_decoder_int8.onnx decoder.int8.onnx
          mv mimi_decoder.onnx decoder.onnx

          mv text_conditioner_int8.onnx text_conditioner.int8.onnx
          cd ..

          mv onnx ..

          # bash-3.2$ ls -lh onnx/
          # total 1318368
          # -rw-r--r--  1 runner  staff   9.5M Feb 10 09:29 flow_lm_flow_int8.onnx
          # -rw-r--r--  1 runner  staff    37M Feb 10 09:29 flow_lm_flow.onnx
          # -rw-r--r--  1 runner  staff    73M Feb 10 09:29 flow_lm_main_int8.onnx
          # -rw-r--r--  1 runner  staff   289M Feb 10 09:29 flow_lm_main.onnx
          # -rw-r--r--  1 runner  staff    22M Feb 10 09:29 mimi_decoder_int8.onnx
          # -rw-r--r--  1 runner  staff    40M Feb 10 09:29 mimi_decoder.onnx
          # -rw-r--r--  1 runner  staff    71M Feb 10 09:29 mimi_encoder_int8.onnx
          # -rw-r--r--  1 runner  staff    71M Feb 10 09:29 mimi_encoder.onnx
          # -rw-r--r--  1 runner  staff    16M Feb 10 09:29 text_conditioner_int8.onnx
          # -rw-r--r--  1 runner  staff    16M Feb 10 09:29 text_conditioner.onnx

      - name: Setup tmate session
        # if: true
        if: failure()
        uses: mxschmitt/action-tmate@v3

      - name: Generate json files
        if: true
        shell: bash
        run: |
          cp -v onnx/*.onnx scripts/pocket-tts

          pushd scripts/pocket-tts
          curl -SsL -O https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/onnx/LICENSE
          curl -SsL -O https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/tokenizer.model

          wget https://github.com/kyutai-labs/delayed-streams-modeling/raw/refs/heads/main/audio/bria.mp3
          wget https://github.com/kyutai-labs/delayed-streams-modeling/raw/refs/heads/main/audio/loona.mp3
          wget https://github.com/kyutai-labs/delayed-streams-modeling/raw/refs/heads/main/audio/sample_fr_hibiki_crepes.mp3
          for f in *.mp3; do
            ffmpeg -y -i "$f" -ac 1 -ar 24000 "${f%.mp3}.wav"
          done
          rm -v *.mp3

          ls -lh

          ./convert_tokenizer.py

          ls -lh
          rm README.md
          cat >README.md <<EOF
          # Introduction
          See also https://github.com/kyutai-labs/pocket-tts
          Onnx files are exported using https://github.com/KevinAHM/pocket-tts-onnx-export
          Files in test_wav are from https://github.com/kyutai-labs/delayed-streams-modeling/tree/main/audio
          Before you use it, please read its [LICENSE](https://huggingface.co/KevinAHM/pocket-tts-onnx/blob/main/onnx/LICENSE)
          It is for non-commercial.
          EOF

      - name: Collect results
        if: true
        shell: bash
        run: |
          d=sherpa-onnx-pocket-tts-2026-01-26
          mkdir -p $d
          mkdir -p $d/test_wavs
          src=scripts/pocket-tts
          cp -v $src/*.onnx $d
          rm $d/*int8.onnx
          cp -v $src/README.md $d
          cp -v $src/LICENSE $d
          cp -v $src/*.json $d
          cp -v $src/*.wav $d/test_wavs
          ls -lh $d/
          tar cjfv $d.tar.bz2 $d
          ls -lh $d.tar.bz2

      - name: Collect results (int8)
        if: true
        shell: bash
        run: |
          d=sherpa-onnx-pocket-tts-int8-2026-01-26
          mkdir -p $d
          mkdir -p $d/test_wavs
          src=scripts/pocket-tts
          cp -v $src/*.onnx $d
          rm $d/lm_flow.onnx
          rm $d/lm_main.onnx
          rm $d/decoder.onnx
          rm $d/encoder.int8.onnx
          rm $d/text_conditioner.int8.onnx
          cp -v $src/README.md $d
          cp -v $src/LICENSE $d
          cp -v $src/*.json $d
          cp -v $src/*.wav $d/test_wavs
          ls -lh $d/
          tar cjfv $d.tar.bz2 $d
          ls -lh $d.tar.bz2

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models


      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: tts-models


      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            dirs=(
              sherpa-onnx-pocket-tts-2026-01-26
              sherpa-onnx-pocket-tts-int8-2026-01-26
            )

            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            for d in ${dirs[@]}; do
              echo "d $d"
              if [[ ! -d $d ]]; then
                echo "$d does not exist"
                continue
              fi

              echo "$d exists"
              rm -rf huggingface

              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d huggingface
              cd huggingface
              rm -rf ./*

              git lfs track "*.onnx"
              git lfs track "*.wav"

              cp -a ../$d/* ./

              git add .

              ls -lh

              git status

              git commit -m "add models"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d main || true
              cd ..
            done

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in *.tar.bz2; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/tts-models.git ms

              cp -av $m ms/

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh
              git add .

              git commit -m "add models"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/tts-models.git

              popd
            done


================================================
FILE: .github/workflows/export-pyannote-segmentation-to-onnx.yaml
================================================
name: export-pyannote-segmentation-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-pyannote-segmentation-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-pyannote-segmentation-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export Pyannote segmentation models to ONNX
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install pyannote
        shell: bash
        run: |
          pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3

      - name: Run
        shell: bash
        run: |
          d=sherpa-onnx-pyannote-segmentation-3-0
          src=$PWD/$d
          mkdir -p $src

          pushd scripts/pyannote/segmentation
          ./run.sh
          cp ./*.onnx $src/
          cp ./README.md $src/
          cp ./LICENSE $src/
          cp ./run.sh $src/
          cp ./*.py $src/

          popd
          ls -lh $d
          tar cjfv $d.tar.bz2 $d

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: speaker-segmentation-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=sherpa-onnx-pyannote-segmentation-3-0
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            cp -v $d/* ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main


================================================
FILE: .github/workflows/export-revai-segmentation-to-onnx.yaml
================================================
name: export-revai-segmentation-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-revai-segmentation-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-revai-segmentation-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export revai segmentation models to ONNX
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install pyannote
        shell: bash
        run: |
          pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3

      - name: Run
        shell: bash
        run: |
          d=sherpa-onnx-reverb-diarization-v1
          src=$PWD/$d
          mkdir -p $src

          pushd scripts/pyannote/segmentation
          ./run-revai.sh
          cp ./*.onnx $src/
          cp ./README.md $src/
          cp ./LICENSE $src/
          cp ./run-revai.sh $src/run.sh
          cp ./*.py $src/

          popd
          ls -lh $d
          tar cjfv $d.tar.bz2 $d

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: speaker-segmentation-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=sherpa-onnx-reverb-diarization-v1
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            cp -v $d/* ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main


================================================
FILE: .github/workflows/export-russian-onnx-models.yaml
================================================
name: export-russian-onnx-models

on:
  workflow_dispatch:

concurrency:
  group: export-russian-onnx-models-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-russian-onnx-models:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export Russian onnx models
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4

      - name: vosk-model-ru (zipformer v1)
        shell: bash
        run: |
          cat >README.md <<EOF
          # Introduction
          Models in this directory are from
          https://huggingface.co/alphacep/vosk-model-ru/tree/main
          EOF

          cat README.md

          d=sherpa-onnx-zipformer-ru-2024-09-18
          mkdir $d
          pushd $d
          curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/lang/bpe.model
          curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/lang/tokens.txt
          curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/encoder.int8.onnx
          curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/decoder.int8.onnx
          curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/joiner.int8.onnx

          curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/encoder.onnx
          curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/decoder.onnx
          curl -SL -O https://huggingface.co/alphacep/vosk-model-ru/resolve/main/am-onnx/joiner.onnx

          mkdir test_wavs
          cd test_wavs
          curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav
          curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav

          mv russian-i-love-you.wav 0.wav
          mv test.wav 1.wav
          popd

          ls -lh $d

          tar cjvf $d.tar.bz2 $d
          rm -rf $d

      - name: vosk-model-ru-small (zipformer v1)
        shell: bash
        run: |
          cat >README.md <<EOF
          # Introduction
          Models in this directory are from
          https://huggingface.co/alphacep/vosk-model-small-ru/tree/main
          EOF

          cat README.md

          d=sherpa-onnx-small-zipformer-ru-2024-09-18
          mkdir $d
          pushd $d
          curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/lang/bpe.model
          curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/lang/tokens.txt
          curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/encoder.int8.onnx
          curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/decoder.int8.onnx
          curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/joiner.int8.onnx

          curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/encoder.onnx
          curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/decoder.onnx
          curl -SL -O https://huggingface.co/alphacep/vosk-model-small-ru/resolve/main/am/joiner.onnx

          mkdir test_wavs
          cd test_wavs
          curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav
          curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav

          mv russian-i-love-you.wav 0.wav
          mv test.wav 1.wav
          popd

          ls -lh $d

          tar cjvf $d.tar.bz2 $d
          rm -rf $d

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-sense-voice-to-ascend-npu.yaml
================================================
name: export-sense-voice-to-ascend-npu

on:
  push:
    branches:
      - fix-ascend-2
  workflow_dispatch:

concurrency:
  group: export-sense-voice-to-ascend-npu-${{ github.ref }}
  cancel-in-progress: true

jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python3 .github/scripts/export-ascend/generate_sense_voice.py
          MATRIX=$(python3 .github/scripts/export-ascend/generate_sense_voice.py)

          # deprecated
          # echo "::set-output name=matrix::${MATRIX}"
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT

  export-sense-voice-to-ascend-npu:
    needs: generate_build_matrix
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.framework }} ${{ matrix.soc_version }} ${{ matrix.cann }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    container:
      image: ${{ matrix.image }}

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python 3.8
        uses: actions/setup-python@v5
        with:
          python-version: "3.8"

      - name: Show Python
        shell: bash
        run: |
          python3 --version
          which python3

      - name: Install curl
        shell: bash
        run: apt-get update && apt-get install -y curl bzip2 git git-lfs

      - name: Verify environment
        shell: bash
        run: |
          ls -lh /usr/local/Ascend/ascend-toolkit/set_env.sh

          find /usr/local/Ascend -name "libascend*.so" 2>/dev/null


          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          echo "CANN environment:"
          which atc || echo "atc not found"
          atc --help

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install "numpy<2" \
                  onnx==1.17.0 \
                  torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
                  attrs psutil scipy decorator cloudpickle ml-dtypes tornado \
                  sentencepiece \
                  pyyaml

      - name: Run SenseVoice from FunAsr
        if: matrix.framework == 'FunASR'
        shell: bash
        run: |
          cd scripts/sense-voice/ascend-npu

          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/model.pt
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/README.md
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/LICENSE

          echo "export to onnx"

          python3 ./export_onnx.py
          rm -v *.pt

          ls -lh *.onnx

          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          soc_version=${{ matrix.soc_version }}
          cann=${{ matrix.cann }}

          atc --model=./model.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=model \
            --input_format=ND \
            --input_shape="x:1,-1,560;prompt:4" \
            --soc_version="Ascend${soc_version}"

          rm -v *.onnx

          ls -lh *.om

          echo "collect results"
          d=sherpa-onnx-ascend-${soc_version}-cann-${cann}-sense-voice-zh-en-ja-ko-yue-2024-07-17

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v LICENSE $d
          cp -v model_linux_aarch64.om $d/model.om
          cp -v tokens.txt $d
          cp -v test_om.py $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          rm -v *.om

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Run SenseVoice from WSYue-ASR
        if: matrix.framework == 'WSYue-ASR'
        shell: bash
        run: |
          cd scripts/sense-voice/ascend-npu

          curl -SL -O https://huggingface.co/ASLP-lab/WSYue-ASR/resolve/main/sensevoice_small_yue/model.pt

          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav

          for i in $(seq 0 17); do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/test_wavs/yue-$i.wav
          done

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/README.md

          echo "export to onnx"
          python3 ./export_onnx.py
          rm -v *.pt

          ls -lh *.onnx

          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          soc_version=${{ matrix.soc_version }}
          cann=${{ matrix.cann }}

          atc --model=./model.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=model \
            --input_format=ND \
            --input_shape="x:1,-1,560;prompt:4" \
            --soc_version="Ascend${soc_version}"

          rm -v *.onnx
          ls -lh *.om

          echo "collect results"
          d=sherpa-onnx-ascend-${soc_version}-cann-${cann}-sense-voice-zh-en-ja-ko-yue-2025-09-09

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v model_linux_aarch64.om $d/model.om
          cp -v tokens.txt $d
          cp -v test_om.py $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          rm -v *.om

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-ascend

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models-ascend

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models huggingface

              d=asr-models/ascend-npu/sense-voice
              mkdir -p huggingface/$d

              cp -v $m huggingface/$d/

              pushd huggingface
              git lfs track "*.tar.bz2"
              ls -lh $d/$m

              ls -lh $d

              pushd $d
              git lfs track "*.tar.bz2"
              popd

              git status
              git add .

              git commit -m "add $m"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models main
              popd
            done

            rm -rf huggingface

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git ms

              d=ascend-npu/sense-voice
              mkdir -p ms/$d

              cp -av $m ms/$d/

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh $d/$m

              ls -lh $d
              git add .

              git commit -m "add $m"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git

              popd
            done
            rm -rf ms


================================================
FILE: .github/workflows/export-sense-voice-to-onnx.yaml
================================================
name: export-sense-voice-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-sense-voice-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-sense-voice-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export sense-voice
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        shell: bash
        run: |
          sudo apt-get install -y -qq sox libsox-fmt-mp3

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install \
            torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
            onnx==1.17.0 \
            onnxruntime==1.17.1 \
            soundfile \
            kaldi-native-fbank \
            librosa

          pip install  "numpy<2"

      - name: Download test_wavs
        shell: bash
        run: |
          sudo apt-get install -y -qq sox libsox-fmt-mp3

          cd scripts/sense-voice

          curl -SL -O https://huggingface.co/FunAudioLLM/SenseVoiceSmall/resolve/main/example/zh.mp3
          curl -SL -O https://huggingface.co/FunAudioLLM/SenseVoiceSmall/resolve/main/example/en.mp3
          curl -SL -O https://huggingface.co/FunAudioLLM/SenseVoiceSmall/resolve/main/example/ja.mp3
          curl -SL -O https://huggingface.co/FunAudioLLM/SenseVoiceSmall/resolve/main/example/ko.mp3
          curl -SL -O https://huggingface.co/FunAudioLLM/SenseVoiceSmall/resolve/main/example/yue.mp3

          soxi *.mp3

          sox zh.mp3 -r 16k zh.wav
          sox en.mp3 -r 16k en.wav
          sox ja.mp3 -r 16k ja.wav
          sox ko.mp3 -r 16k ko.wav
          sox yue.mp3 -r 16k yue.wav


      - name: Run
        shell: bash
        run: |
          cd scripts/sense-voice
          curl -SL -O https://huggingface.co/csukuangfj/funasr-nano-with-ctc/resolve/main/model.pt
          curl -SL -O https://huggingface.co/csukuangfj/funasr-nano-with-ctc/resolve/main/tokens.txt
          ls -lh
          ./export_onnx_nano.py

          ls -lh

          d=sherpa-onnx-sense-voice-funasr-nano-2025-12-17
          d2=sherpa-onnx-sense-voice-funasr-nano-int8-2025-12-17
          mkdir -p $d $d2

          cp README-nano.md $d/README.md
          cp README-nano.md $d2/README.md

          mv model.onnx $d/
          mv model.int8.onnx $d2/

          for m in $d $d2; do
            mkdir -p $m/test_wavs
            cp -v *.wav $m/test_wavs
            cp -v tokens.txt $m/

            ls -lh $m

            tar cjfv $m.tar.bz2 $m

            ls -lh $m.tar.bz2
            mv $m.tar.bz2 ../../
            mv $m ../../
          done

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 5
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            names=(
              sherpa-onnx-sense-voice-funasr-nano-2025-12-17
              sherpa-onnx-sense-voice-funasr-nano-int8-2025-12-17
            )
            for d in ${names[@]}; do
              if [ ! -d $d ]; then
                echo "$d does not exist - skip it"
                continue;
              fi

              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
              cp -av $d/* ./huggingface
              cd huggingface
              git lfs track "*.onnx"
              git lfs track "*.wav"
              git status
              git add .
              git status
              git commit -m "add models"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
              cd ..
            done

      - name: Run
        shell: bash
        if: false
        run: |
          cd scripts/sense-voice
          ./run.sh

      - name: Publish to huggingface
        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 huggingface
            cd huggingface
            git fetch
            git pull
            echo "pwd: $PWD"
            ls -lh ../scripts/sense-voice

            rm -rf ./*

            cp -v ../scripts/sense-voice/*.onnx .
            cp -v ../scripts/sense-voice/tokens.txt .
            cp -v ../scripts/sense-voice/README.md .
            cp -v ../scripts/sense-voice/export-onnx.py .

            mkdir test_wavs
            cp -v ../*.wav ./test_wavs/

            curl -SL -O https://raw.githubusercontent.com/FunAudioLLM/SenseVoice/main/LICENSE

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 main || true

            cd ..

            rm -rf huggingface/.git*
            dst=sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-sense-voice-to-qnn.yaml
================================================
name: export-sense-voice-to-qnn

on:
  push:
    branches:
      - qnn-binary-2
  workflow_dispatch:

concurrency:
  group: export-sense-voice-to-qnn-${{ github.ref }}
  cancel-in-progress: true

jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python3 .github/scripts/export-qnn/generate_sense_voice.py
          MATRIX=$(python3 .github/scripts/export-qnn/generate_sense_voice.py)

          # deprecated
          # echo "::set-output name=matrix::${MATRIX}"
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT

  export-sense-voice-to-qnn:
    needs: generate_build_matrix
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.framework }} ${{ matrix.input_in_seconds }} ${{ matrix.soc }}
    runs-on: ubuntu-22.04
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python 3.10
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Create directories
        shell: bash
        run: |
          mkdir so binary

      - name: Create Python virtual environment
        shell: bash
        run: |
          python3 -m venv py310
          which python3
          source py310/bin/activate
          which python3

      - name: Show ndk-build help
        shell: bash
        run: |
          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          ndk-build --help

      - name: Download toolkit
        shell: bash
        run: |
          curl -SL -O https://huggingface.co/csukuangfj/qnn-toolkit/resolve/main/v2.40.0.251030.zip
          ls -lh v2.40.0.251030.zip

      - name: Unzip toolkit
        shell: bash
        run: |
          unzip v2.40.0.251030.zip

      - name: Show
        shell: bash
        run: |
          ls -lh

          echo "---ls -lh qairt---"

          ls -lh qairt

          echo "---"

      - name: Install linux dependencies
        shell: bash
        run: |
          ls -lh

          echo "---"

          ls -lh qairt

          cd qairt/2.40.0.251030/bin
          source envsetup.sh

          yes | sudo ${QNN_SDK_ROOT}/bin/check-linux-dependency.sh || true

      - name: Install Python dependencies
        shell: bash
        run: |
          source py310/bin/activate

          cd qairt/2.40.0.251030/bin
          source envsetup.sh

          python3 -m pip install \
            mock \
            numpy \
            opencv-python \
            optuna \
            packaging \
            pandas \
            paramiko \
            pathlib2 \
            pillow \
            plotly \
            protobuf \
            psutil \
            pydantic \
            pytest \
            pyyaml \
            rich \
            scikit-optimize \
            scipy \
            six \
            tabulate \
            typing-extensions \
            xlsxwriter

          python3 "${QNN_SDK_ROOT}/bin/check-python-dependency" || true

          which python3

      - name: Install onnx dependencies
        shell: bash
        run: |
          source py310/bin/activate
          python3 -m pip install --upgrade \
            torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
            kaldi_native_fbank \
            pip \
            "numpy<2" \
            onnx==1.17.0 \
            onnxruntime==1.17.1 \
            soundfile \
            librosa \
            onnxsim \
            sentencepiece \
            pyyaml

          which python3

      - name: Show qnn-onnx-converter help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          qnn-onnx-converter --help

      - name: Show qnn-model-lib-generator help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          qnn-model-lib-generator --help

      - name: Show qnn-net-run help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          qnn-net-run --help

      - name: Run SenseVoice from FunAsr
        if: matrix.framework == 'FunASR'
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          export LDFLAGS="-Wl,-z,max-page-size=16384"
          dir=$PWD

          cd scripts/sense-voice/qnn

          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/model.pt
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/README.md
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/LICENSE

          echo "export to onnx"
          t=${{ matrix.input_in_seconds }}

          echo "----$t---"
          python3 ./export-onnx.py --input-len-in-seconds $t --opset-version 17

          ls -lh *.onnx

          python3 ../../pyannote/segmentation/show-onnx.py --filename ./model-$t-seconds.onnx

          echo "test exported onnx models"

          echo "----------$t----------"
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ja.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ko.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav

          echo "export to qnn"
          echo "----------$t----------"
          num_frames=$(python3 -c "print(int($t*100 / 6 + 0.5))")

          echo "num_frames: $num_frames"

          ./generate_test_data.py  --num-frames $num_frames --wav ./zh.wav
          mv input0.raw zh-input0.raw
          mv input1.raw zh-input1.raw
          echo "zh-input0.raw zh-input1.raw" > input_list.txt

          for w in ja ko en yue; do
            ./generate_test_data.py  --num-frames $num_frames --wav ./$w.wav
            mv input0.raw $w-input0.raw
            mv input1.raw $w-input1.raw
            echo "$w-input0.raw $w-input1.raw" >> input_list.txt
          done

          cat ./input_list.txt

          qnn-onnx-converter \
            --input_network model-$t-seconds.onnx \
            --output_path ./model-$t-seconds-quantized \
            --out_node logits \
            --input_list ./input_list.txt \
            --use_native_input_files  \
            --input_dtype x float32 \
            --input_dtype prompt int32 \
            --act_bitwidth 16 \
            --bias_bitwidth 32 \
            --input_layout x NTF
          ls -lh
          mv model-$t-seconds-quantized model-$t-seconds-quantized.cpp
          echo "----"
          ls -lh

          python3 "${QNN_SDK_ROOT}/bin/x86_64-linux-clang/qnn-model-lib-generator" \
            -c "model-$t-seconds-quantized.cpp" \
            -b "model-$t-seconds-quantized.bin" \
            -o model_libs > /dev/null 2>&1

          ls -lh model_libs/*/

          readelf -lW model_libs/*/lib*.so

          echo "Generate context binary"

          $dir/scripts/qnn/generate_config.py  \
            --soc ${{ matrix.soc }} \
            --graph-name "model_${t}_seconds_quantized" \
            --output-dir ./my-config \
            --qnn-sdk-root $QNN_SDK_ROOT

          ls -lh my-config

          head -n 1000 my-config/*.json

          $QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-context-binary-generator \
            --backend $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtp.so \
            --model ./model_libs/x86_64-linux-clang/libmodel-$t-seconds-quantized.so \
            --output_dir ./binary \
            --binary_file model \
            --config_file ./my-config/htp_backend_extensions.json

          ls -lh binary/

          echo "collect results"

          d=sherpa-onnx-qnn-${{ matrix.soc}}-binary-$t-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8
          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v LICENSE $d
          cp -v binary/model.bin $d/
          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs

          echo "num_frames=$num_frames" > $d/info.txt

          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d
          mv *.tar.bz2 ../../../binary/


          for p in x86_64-linux-clang aarch64-android; do
            if [[ $p == x86_64-linux-clang ]]; then
              d=sherpa-onnx-qnn-$t-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-linux-x64
            elif [[ $p == aarch64-android ]]; then
              d=sherpa-onnx-qnn-$t-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64
            else
              echo "Unknown $p"
              exit -1
            fi

            mkdir -p $d
            mkdir -p $d/test_wavs

            cp -v README.md $d
            cp -v LICENSE $d
            cp -v model_libs/$p/lib*.so $d/libmodel.so
            cp -v tokens.txt $d
            cp -v *.wav $d/test_wavs

            echo "num_frames=$num_frames" > $d/info.txt
            echo "target=$p" >> $d/info.txt

            ls -lh $d
            tar cjfv $d.tar.bz2 $d
            ls -lh *.tar.bz2
            rm -rf $d
          done

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../../so/


      - name: Run SenseVoice from WSYue-ASR
        if: matrix.framework == 'WSYue-ASR'
        shell: bash
        run: |
          dir=$PWD
          source py310/bin/activate

          pushd qairt/2.40.0.251030/bin
          source envsetup.sh
          popd

          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          export LDFLAGS="-Wl,-z,max-page-size=16384"

          cd scripts/sense-voice/qnn

          curl -SL -O https://huggingface.co/ASLP-lab/WSYue-ASR/resolve/main/sensevoice_small_yue/model.pt

          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav

          for i in $(seq 0 17); do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/test_wavs/yue-$i.wav
          done

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/README.md

          echo "export to onnx"
          t=${{ matrix.input_in_seconds }}

          echo "----$t---"

          export model_author="ASLP-lab"
          export comment="ASLP-lab/WSYue-ASR"
          export url="https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/sensevoice_small_yue"

          python3 ./export-onnx.py --input-len-in-seconds $t --opset-version 17

          ls -lh *.onnx

          python3 ../../pyannote/segmentation/show-onnx.py --filename ./model-$t-seconds.onnx

          echo "test exported onnx models"

          echo "----------$t----------"
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav

          for i in $(seq 0 17); do
            echo "yue-$i.wav"
            python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue-$i.wav
          done

          echo "export to qnn"
          echo "----------$t----------"
          num_frames=$(python3 -c "print(int($t*100 / 6 + 0.5))")

          echo "num_frames: $num_frames"

          ./generate_test_data.py  --num-frames $num_frames --wav ./zh.wav
          mv input0.raw zh-input0.raw
          mv input1.raw zh-input1.raw
          echo "zh-input0.raw zh-input1.raw" > input_list.txt

          for w in en yue; do
            ./generate_test_data.py  --num-frames $num_frames --wav ./$w.wav
            mv input0.raw $w-input0.raw
            mv input1.raw $w-input1.raw
            echo "$w-input0.raw $w-input1.raw" >> input_list.txt
          done

          for i in $(seq 0 17); do
            echo "yue-$i.wav"
            ./generate_test_data.py  --num-frames $num_frames --wav ./yue-$i.wav
            mv input0.raw $i-input0.raw
            mv input1.raw $i-input1.raw
            echo "$i-input0.raw $i-input1.raw" >> input_list.txt
          done

          cat ./input_list.txt

          qnn-onnx-converter \
            --input_network model-$t-seconds.onnx \
            --output_path ./model-$t-seconds-quantized \
            --out_node logits \
            --input_list ./input_list.txt \
            --use_native_input_files  \
            --input_dtype x float32 \
            --input_dtype prompt int32 \
            --act_bitwidth 16 \
            --bias_bitwidth 32 \
            --input_layout x NTF
          ls -lh
          mv model-$t-seconds-quantized model-$t-seconds-quantized.cpp
          echo "----"
          ls -lh

          python3 "${QNN_SDK_ROOT}/bin/x86_64-linux-clang/qnn-model-lib-generator" \
            -c "model-$t-seconds-quantized.cpp" \
            -b "model-$t-seconds-quantized.bin" \
            -o model_libs > /dev/null 2>&1

          ls -lh model_libs/*/

          readelf -lW model_libs/*/lib*.so

          $dir/scripts/qnn/generate_config.py  \
            --soc ${{ matrix.soc }} \
            --graph-name "model_${t}_seconds_quantized" \
            --output-dir ./my-config \
            --qnn-sdk-root $QNN_SDK_ROOT

          ls -lh my-config

          head -n 1000 my-config/*.json

          $QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-context-binary-generator \
            --backend $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtp.so \
            --model ./model_libs/x86_64-linux-clang/libmodel-$t-seconds-quantized.so \
            --output_dir ./binary \
            --binary_file model \
            --config_file ./my-config/htp_backend_extensions.json

          ls -lh binary/

          echo "collect results"

          d=sherpa-onnx-qnn-${{ matrix.soc }}-binary-$t-seconds-sense-voice-zh-en-ja-ko-yue-2025-09-09-int8
          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v binary/model.bin $d/
          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs

          echo "num_frames=$num_frames" > $d/info.txt

          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d
          mv *.tar.bz2 ../../../binary/

          echo "collect results"
          for p in x86_64-linux-clang aarch64-android; do
            if [[ $p == x86_64-linux-clang ]]; then
              d=sherpa-onnx-qnn-$t-seconds-sense-voice-zh-en-ja-ko-yue-2025-09-09-int8-linux-x64
            elif [[ $p == aarch64-android ]]; then
              d=sherpa-onnx-qnn-$t-seconds-sense-voice-zh-en-ja-ko-yue-2025-09-09-int8-android-aarch64
            else
              echo "Unknown $p"
              exit -1
            fi

            mkdir -p $d
            mkdir -p $d/test_wavs

            cp -v README.md $d
            cp -v model_libs/$p/lib*.so $d/libmodel.so
            cp -v tokens.txt $d
            cp -v *.wav $d/test_wavs

            echo "num_frames=$num_frames" > $d/info.txt
            echo "target=$p" >> $d/info.txt

            ls -lh $d
            tar cjfv $d.tar.bz2 $d
            ls -lh *.tar.bz2
            rm -rf $d
          done

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../../so/

      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.framework }}-${{ matrix.soc }}-${{ matrix.input_in_seconds }}-seconds
          path: ./scripts/sense-voice/qnn/*.json

      - name: Release
        if: github.repository_owner == 'csukuangfj' && matrix.soc == 'SM8850'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./so/*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-qnn

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./binary/*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-qnn-binary

      - name: Release
        if: github.repository_owner == 'k2-fsa' && matrix.soc == 'SM8850'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./so/*.tar.bz2
          overwrite: true
          tag: asr-models-qnn

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./binary/*.tar.bz2
          overwrite: true
          tag: asr-models-qnn-binary


================================================
FILE: .github/workflows/export-sense-voice-to-rknn.yaml
================================================
name: export-sense-voice-to-rknn

on:
  push:
    branches:
      - export-sense-voice-rknn-ci-2
  workflow_dispatch:

concurrency:
  group: export-sense-voice-to-rknn-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-sense-voice-to-rknn:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.framework }} ${{ matrix.platform }} ${{ matrix.input_in_seconds }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]
        platform: ["rk3562", "rk3566", "rk3568", "rk3576", "rk3588"]
        input_in_seconds: ["5", "10", "15", "20", "25", "30"]
        framework: ["FunASR", "WSYue-ASR"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade \
            pip \
            "numpy<2" \
            torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
            onnx==1.17.0 \
            onnxruntime==1.17.1 \
            librosa \
            soundfile \
            onnxsim \
            sentencepiece \
            kaldi_native_fbank

          curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
          pip install ./*.whl "numpy<=1.26.4"

      - name: Run SenseVoice from FunAsr
        if: matrix.framework == 'FunASR'
        shell: bash
        run: |
          cd scripts/sense-voice/rknn

          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/model.pt
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/README.md
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/LICENSE

          echo "export to onnx"
          t=${{ matrix.input_in_seconds }}
          p=${{ matrix.platform }}

          echo "----$t---"
          python3 ./export-onnx.py --input-len-in-seconds $t

          ls -lh *.onnx

          echo "test exported onnx models"

          echo "----------$t----------"
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ja.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ko.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav

          echo "export to rknn"
          echo "----------$t----------"
          echo "----------$p----------"
          python3 export-rknn.py --target-platform $p --in-model model-$t-seconds.onnx --out-model model-$p-$t-seconds.rknn >/dev/null  2>&1

          ls -lh *.rknn

          echo "collect results"
          d=sherpa-onnx-$p-$t-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v LICENSE $d
          cp -v model-$p-$t-seconds.rknn $d/model.rknn
          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Run SenseVoice from WSYue-ASR
        if: matrix.framework == 'WSYue-ASR'
        shell: bash
        run: |
          cd scripts/sense-voice/rknn

          curl -SL -O https://huggingface.co/ASLP-lab/WSYue-ASR/resolve/main/sensevoice_small_yue/model.pt

          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
          curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav

          for i in $(seq 0 17); do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/test_wavs/yue-$i.wav
          done

          rm -f README.md || true

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/README.md

          echo "export to onnx"
          t=${{ matrix.input_in_seconds }}
          p=${{ matrix.platform }}

          echo "----$t---"

          export model_author="ASLP-lab"
          export comment="ASLP-lab/WSYue-ASR"
          export url="https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/sensevoice_small_yue"

          python3 ./export-onnx.py --input-len-in-seconds $t

          ls -lh *.onnx

          echo "test exported onnx models"

          echo "----------$t----------"
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav
          python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav
          for i in $(seq 0 17); do
            echo "yue-$i.wav"
            python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue-$i.wav
          done

          echo "export to rknn"
          echo "----------$t----------"
          echo "----------$p----------"
          python3 export-rknn.py --target-platform $p --in-model model-$t-seconds.onnx --out-model model-$p-$t-seconds.rknn >/dev/null  2>&1

          ls -lh *.rknn

          echo "collect results"
          d=sherpa-onnx-$p-$t-seconds-sense-voice-zh-en-ja-ko-yue-2025-09-09

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v README.md $d
          cp -v model-$p-$t-seconds.rknn $d/model.rknn
          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models


================================================
FILE: .github/workflows/export-silero-vad-rknn.yaml
================================================
name: export-silero-vad-to-rknn

on:
  workflow_dispatch:

concurrency:
  group: export-silero-vad-to-rknn-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-silero-vad-to-rknn:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export silero-vad to rknn
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade \
            pip \
            "numpy<2" \
            torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
            onnx \
            onnxruntime==1.17.1 \
            librosa \
            soundfile \
            onnxsim

          curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
          pip install ./*.whl "numpy<=1.26.4"

      - name: Run
        shell: bash
        run: |
          cd scripts/silero_vad/v4
          curl -SL -O https://github.com/snakers4/silero-vad/raw/refs/tags/v4.0/files/silero_vad.jit
          ./export-onnx.py
          ./show.py

          ls -lh m.onnx

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
          ./test-onnx.py  --model ./m.onnx --wav ./lei-jun-test.wav

          for platform in rk3588 rk3576 rk3568 rk3566 rk3562; do
          echo "Platform: $platform"
            ./export-rknn.py --in-model ./m.onnx --out-model silero-vad-v4-$platform.rknn  --target-platform $platform
            ls -lh silero-vad-v4-$platform.rknn
          done

      - name: Collect files
        shell: bash
        run: |
          cd scripts/silero_vad/v4
          ls -lh
          mv *.rknn ../../..

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.rknn
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Upload model to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1

            git clone https://huggingface.co/csukuangfj/sherpa-onnx-rknn-models huggingface
            cd huggingface

            git fetch
            git pull
            git lfs track "*.rknn"
            git merge -m "merge remote" --ff origin main
            dst=vad
            mkdir -p $dst
            cp ../*.rknn $dst/ || true

            ls -lh $dst
            git add .
            git status
            git commit -m "update models"
            git status

            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-rknn-models main || true
            rm -rf huggingface


================================================
FILE: .github/workflows/export-spleeter-to-onnx.yaml
================================================
name: export-spleeter-to-onnx

on:
  push:
    branches:
      - spleeter-cpp-2
  workflow_dispatch:

concurrency:
  group: export-spleeter-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-spleeter-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export spleeter to ONNX
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        shell: bash
        run: |
          pip install tensorflow torch "numpy<2" onnx==1.17.0 onnxruntime==1.17.1 onnxmltools

      - name: Run
        shell: bash
        run: |
          cd scripts/spleeter
          ./run.sh

          echo "---"
          ls -lh 2stems
          echo "---"
          ls -lh 2stems/*.onnx
          echo "---"

          mv -v 2stems/*.onnx ../..

      - name: Collect models
        shell: bash
        run: |
          mkdir sherpa-onnx-spleeter-2stems
          mkdir sherpa-onnx-spleeter-2stems-int8
          mkdir sherpa-onnx-spleeter-2stems-fp16

          mv -v vocals.onnx sherpa-onnx-spleeter-2stems/
          mv -v accompaniment.onnx sherpa-onnx-spleeter-2stems/

          mv -v vocals.int8.onnx sherpa-onnx-spleeter-2stems-int8/
          mv -v accompaniment.int8.onnx sherpa-onnx-spleeter-2stems-int8/

          mv -v vocals.fp16.onnx sherpa-onnx-spleeter-2stems-fp16/
          mv -v accompaniment.fp16.onnx sherpa-onnx-spleeter-2stems-fp16/

          tar cjvf sherpa-onnx-spleeter-2stems.tar.bz2 sherpa-onnx-spleeter-2stems
          tar cjvf sherpa-onnx-spleeter-2stems-int8.tar.bz2 sherpa-onnx-spleeter-2stems-int8
          tar cjvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2 sherpa-onnx-spleeter-2stems-fp16

          ls -lh *.tar.bz2

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: source-separation-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            names=(
              sherpa-onnx-spleeter-2stems
              sherpa-onnx-spleeter-2stems-int8
              sherpa-onnx-spleeter-2stems-fp16
            )
            for d in ${names[@]}; do
              rm -rf huggingface
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
              cp -v $d/*onnx huggingface

              cd huggingface
              git lfs track "*.onnx"
              git status
              git add .
              ls -lh
              git status
              git commit -m "add models"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
              cd ..
            done


================================================
FILE: .github/workflows/export-supertonic.yaml
================================================
name: export-supertonic-to-int8-onnx

on:
  push:
    branches:
      - ci-supertonic

  workflow_dispatch:

concurrency:
  group: export-supertonic-to-int8-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-supertonic-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export supertonic int8
    runs-on: macos-latest

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Install Python dependencies
        shell: bash
        run: |
          brew install git-xet
          git xet install

          pip install numpy onnx onnxruntime

      - name: Run
        shell: bash
        run: |
          cd scripts/supertonic
          ./run.sh

          wget https://raw.githubusercontent.com/supertone-inc/supertonic/refs/heads/main/LICENSE
          rm README.md
          wget https://raw.githubusercontent.com/supertone-inc/supertonic/refs/heads/main/README.md

      - name: Collect results
        shell: bash
        run: |
          src=scripts/supertonic
          d=sherpa-onnx-supertonic-tts-int8-2026-03-06

          mkdir $d
          cp -a $src/LICENSE $d/
          cp -a $src/README.md $d/
          cp -v $src/onnx_int8/*.int8.onnx $d/
          [ -f $src/assets/onnx/unicode_indexer.bin ] && cp -v $src/assets/onnx/unicode_indexer.bin $d/
          [ -f $src/assets/onnx/tts.json ] && cp -v $src/assets/onnx/tts.json $d/
          [ -f $src/assets/voice_styles/voice.bin ] && cp -v $src/assets/voice_styles/voice.bin $d/voice.bin
          ls -lh $d/
          tar cjfv $d.tar.bz2 $d

          ls -lh $d.tar.bz2

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: tts-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            dirs=(
              sherpa-onnx-supertonic-tts-int8-2026-03-06
            )

            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            for d in ${dirs[@]}; do
              echo "d $d"
              if [[ ! -d $d ]]; then
                echo "$d does not exist"
                continue
              fi

              echo "$d exists"
              rm -rf huggingface

              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d huggingface
              cd huggingface
              rm -rf ./*

              git lfs track "*.onnx"
              git lfs track "*.wav"

              cp -a ../$d/* ./

              git add .

              ls -lh

              git status

              git commit -m "add models"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d main || true
              cd ..
            done

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in *.tar.bz2; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/tts-models.git ms

              cp -av $m ms/

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh
              git add .

              git commit -m "add models"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/tts-models.git

              popd
            done


================================================
FILE: .github/workflows/export-t-one-to-onnx.yaml
================================================
name: export-t-one-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-t-one-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-t-one-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export t-one
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install onnx==1.17.0 onnxruntime==1.17.1 soundfile librosa kaldi_native_fbank "numpy<2"

      - name: Run
        shell: bash
        run: |
          cd scripts/t-one

          wget https://raw.githubusercontent.com/voicekit-team/T-one/refs/heads/main/LICENSE
          ./run.sh

          d=sherpa-onnx-streaming-t-one-russian-2025-09-08
          mkdir $d
          cp -v ./tokens.txt $d
          cp -v ./model.onnx $d
          cp -v ./russian_test_short_from_t_one.wav $d/0.wav
          cp -v ./LICENSE $d
          cp -v ./README.md $d

          ls -lh $d

          tar cjfv $d.tar.bz2 $d

          ls -lh $d.tar.bz2

          mv $d.tar.bz2 ../..
          mv $d ../..

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            m=sherpa-onnx-streaming-t-one-russian-2025-09-08

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
            cd huggingface
            git fetch
            git pull
            echo "pwd: $PWD"
            ls -lh ../$m
            git lfs track "*.wav"

            rm -rf ./*

            cp -v ../$m/* ./

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main || true

            cd ..

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-telespeech-ctc.yaml
================================================
name: export-telespeech-ctc-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-telespeech-ctc-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-telespeech-ctc-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: telespeech
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install onnx onnxruntime soundfile librosa numpy kaldi-native-fbank

      - name: Run
        shell: bash
        run: |
          cd scripts/tele-speech
          ./run.sh

          ./test.py

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Publish float32 model to huggingface
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          src=scripts/tele-speech/sherpa-onnx-telespeech-ctc-zh-2024-06-04
          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          export GIT_CLONE_PROTECTION_ACTIVE=false

          GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf
          cp -a $src/* hf/
          cd hf
          git lfs track "*.pdf"
          git lfs track "*.onnx"
          git add .
          git commit -m 'add model files' || true
          git status
          ls -lh
          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 main || true
          rm -rf hf

      - name: Publish int8 model to huggingface
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          src=scripts/tele-speech/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04
          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          export GIT_CLONE_PROTECTION_ACTIVE=false

          rm -rf hf
          GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf
          cp -a $src/* hf/
          cd hf
          git lfs track "*.pdf"
          git lfs track "*.onnx"
          git add .
          git commit -m 'add model files' || true
          git status
          ls -lh
          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 main || true


================================================
FILE: .github/workflows/export-uvr-to-onnx.yaml
================================================
name: export-uvr-to-onnx

on:
  push:
    branches:
      - uvr
  workflow_dispatch:

concurrency:
  group: export-uvr-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-uvr-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export UVR to ONNX
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        shell: bash
        run: |
          pip install "numpy<2" onnx==1.17.0 onnxruntime==1.17.1 onnxmltools kaldi-native-fbank librosa soundfile

      - name: Run
        shell: bash
        run: |
          cd scripts/uvr_mdx
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/audio_example.wav
          ls -lh audio_example.wav
          ./run.sh

      - name: Collect mp3 files
        shell: bash
        run: |
          mv -v scripts/uvr_mdx/*.mp3 ./
          ls -lh *.mp3

      - uses: actions/upload-artifact@v4
        with:
          name: generated-mp3
          path: ./*.mp3

      - name: Collect models
        shell: bash
        run: |
          mv -v scripts/uvr_mdx/models/*.onnx ./
          ls -lh *.onnx

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: source-separation-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            rm -rf huggingface
            git clone https://huggingface.co/k2-fsa/sherpa-onnx-models huggingface
            cd huggingface
            mkdir -p source-separation-models
            cp -av ../*.onnx ./source-separation-models
            git lfs track "*.onnx"
            git status
            git add .
            ls -lh
            git status
            git commit -m "add source separation models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models main


================================================
FILE: .github/workflows/export-vits-ljspeech-to-onnx.yaml
================================================
name: export-vits-ljspeech-to-onnx

on:
  push:
    branches:
      - master
    paths:
      - 'scripts/vits/**'
      - '.github/workflows/export-vits-ljspeech-to-onnx.yaml'
  pull_request:
    paths:
      - 'scripts/vits/**'
      - '.github/workflows/export-vits-ljspeech-to-onnx.yaml'

  workflow_dispatch:

concurrency:
  group: export-vits-ljspeech-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-vits-ljspeech-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: vits ljspeech
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        torch: ["1.13.0"]

    steps:
      - uses: actions/checkout@v4

      - name: Install dependencies
        shell: bash
        run: |
          python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html numpy
          python3 -m pip install onnxruntime onnx soundfile
          python3 -m pip install scipy cython unidecode phonemizer

          # required by phonemizer
          # See https://bootphon.github.io/phonemizer/install.html
          # To fix the following error: RuntimeError: espeak not installed on your system
          #
          sudo apt-get install festival espeak-ng mbrola


      - name: export vits ljspeech
        shell: bash
        run: |
          cd scripts/vits

          echo "Downloading vits"
          git clone https://github.com/jaywalnut310/vits
          pushd vits/monotonic_align
          python3 setup.py build
          ls -lh build/
          ls -lh build/lib*/
          ls -lh build/lib*/*/

          cp build/lib*/monotonic_align/core*.so .
          sed -i.bak s/.monotonic_align.core/.core/g ./__init__.py
          git diff
          popd

          export PYTHONPATH=$PWD/vits:$PYTHONPATH

          echo "Download models"

          wget -qq https://huggingface.co/csukuangfj/vits-ljs/resolve/main/pretrained_ljs.pth
          wget -qq https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt
          wget -qq https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt
          wget -qq https://huggingface.co/csukuangfj/vits-ljs/resolve/main/test.py

          python3 ./export-onnx-ljs.py --config vits/configs/ljs_base.json --checkpoint ./pretrained_ljs.pth
          python3 ./test.py
          ls -lh *.wav

      - uses: actions/upload-artifact@v4
        with:
          name: test-0.wav
          path: scripts/vits/test-0.wav

      - uses: actions/upload-artifact@v4
        with:
          name: test-1.wav
          path: scripts/vits/test-1.wav

      - uses: actions/upload-artifact@v4
        with:
          name: test-2.wav
          path: scripts/vits/test-2.wav


================================================
FILE: .github/workflows/export-vocos.yaml
================================================
name: export-vocos-to-onnx

on:
  push:
    branches:
      - export-vocos

  workflow_dispatch:

concurrency:
  group: export-vocos-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-vocos-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export vocos ${{ matrix.version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html kaldi_native_fbank

      - name: Run
        shell: bash
        run: |
          cd scripts/vocos
          ./run.sh
          ls -lh

      - name: Collect results
        shell: bash
        run: |
          cp -v scripts/vocos/vocos-22khz-univ.onnx .
          cp -v scripts/vocos/*.wav .

      - uses: actions/upload-artifact@v4
        with:
          name: generated-waves
          path: ./*.wav

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models huggingface
            cd huggingface
            git fetch
            git pull

            d=vocoder-models
            mkdir -p $d

            cp -a ../vocos-22khz-univ.onnx $d/

            git lfs track "*.onnx"
            git add .

            ls -lh

            git status

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models main || true

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: vocoder-models

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          tag: vocoder-models


================================================
FILE: .github/workflows/export-wenet-to-onnx.yaml
================================================
name: export-wenet-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-wenet-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-wenet-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export wenet
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Run
        shell: bash
        run: |
          sudo apt-get install tree sox
          cd scripts/wenet
          ./run.sh

      - name: Publish to huggingface (aishell)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell huggingface
            cd huggingface
            git fetch
            git pull

            cp -v ../scripts/wenet/aishell_u2pp_conformer_exp/*.onnx .
            cp -v ../scripts/wenet/aishell_u2pp_conformer_exp/units.txt tokens.txt
            cp -v ../scripts/wenet/aishell_u2pp_conformer_exp/README.md .

            if [ ! -d test_wavs ]; then
              mkdir test_wavs
              cd test_wavs
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/0.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/1.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/8k.wav
              cd ..
            fi
            git lfs track "*.onnx"
            git add .

            git commit -m "add aishell models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell main || true

            cd ..

            rm -rf huggingface/.git
            dst=sherpa-onnx-zh-wenet-aishell

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Publish to huggingface (aishell2)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell2 huggingface
            cd huggingface
            git fetch
            git pull

            cp -v ../scripts/wenet/aishell2_u2pp_conformer_exp/*.onnx .
            cp -v ../scripts/wenet/aishell2_u2pp_conformer_exp/units.txt tokens.txt
            cp -v ../scripts/wenet/aishell2_u2pp_conformer_exp/README.md .

            if [ ! -d test_wavs ]; then
              mkdir test_wavs
              cd test_wavs
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/0.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/1.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/8k.wav
              cd ..
            fi
            git lfs track "*.onnx"
            git add .

            git commit -m "add aishell2 models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-aishell2 main || true

            cd ..

            rm -rf huggingface/.git
            dst=sherpa-onnx-zh-wenet-aishell2

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Publish to huggingface (multi_cn)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-multi-cn huggingface
            cd huggingface
            git fetch
            git pull

            cp -v ../scripts/wenet/multi_cn_unified_conformer_exp/*.onnx .
            cp -v ../scripts/wenet/multi_cn_unified_conformer_exp/units.txt tokens.txt
            cp -v ../scripts/wenet/multi_cn_unified_conformer_exp/README.md .

            if [ ! -d test_wavs ]; then
              mkdir test_wavs
              cd test_wavs
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/0.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/1.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/8k.wav
              cd ..
            fi
            git lfs track "*.onnx"
            git add .

            git commit -m "add multi_cn models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-multi-cn main || true

            cd ..

            rm -rf huggingface/.git
            dst=sherpa-onnx-zh-wenet-multi-cn

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Publish to huggingface (wenetspeech)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-wenetspeech huggingface
            cd huggingface
            git fetch
            git pull

            cp -v ../scripts/wenet/20220506_u2pp_conformer_exp/*.onnx .
            cp -v ../scripts/wenet/20220506_u2pp_conformer_exp/units.txt tokens.txt
            cp -v ../scripts/wenet/20220506_u2pp_conformer_exp/README.md .

            if [ ! -d test_wavs ]; then
              mkdir test_wavs
              cd test_wavs
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/0.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/1.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/resolve/main/test_wavs/8k.wav
              cd ..
            fi
            git lfs track "*.onnx"
            git add .

            git commit -m "add wenetspeech models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-zh-wenet-wenetspeech main || true

            cd ..

            rm -rf huggingface/.git
            dst=sherpa-onnx-zh-wenet-wenetspeech

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Publish to huggingface (librispeech)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-librispeech huggingface
            cd huggingface
            git fetch
            git pull

            cp -v ../scripts/wenet/librispeech_u2pp_conformer_exp/*.onnx .
            cp -v ../scripts/wenet/librispeech_u2pp_conformer_exp/units.txt tokens.txt
            cp -v ../scripts/wenet/librispeech_u2pp_conformer_exp/README.md .

            if [ ! -d test_wavs ]; then
              mkdir test_wavs
              cd test_wavs
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21/resolve/main/test_wavs/0.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21/resolve/main/test_wavs/1.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21/resolve/main/test_wavs/8k.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21/resolve/main/test_wavs/trans.txt
              cd ..
            fi
            git lfs track "*.onnx"
            git add .

            git commit -m "add librispeech models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-librispeech main || true

            cd ..

            rm -rf huggingface/.git
            dst=sherpa-onnx-en-wenet-librispeech

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Publish to huggingface (gigaspeech)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-gigaspeech huggingface
            cd huggingface
            git fetch
            git pull

            cp -v ../scripts/wenet/20210728_u2pp_conformer_exp/*.onnx .
            cp -v ../scripts/wenet/20210728_u2pp_conformer_exp/units.txt tokens.txt
            cp -v ../scripts/wenet/20210728_u2pp_conformer_exp/README.md .

            if [ ! -d test_wavs ]; then
              mkdir test_wavs
              cd test_wavs
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21/resolve/main/test_wavs/0.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21/resolve/main/test_wavs/1.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21/resolve/main/test_wavs/8k.wav
              wget -q https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21/resolve/main/test_wavs/trans.txt
              cd ..
            fi
            git lfs track "*.onnx"
            git add .

            git commit -m "add gigaspeech models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-en-wenet-gigaspeech main || true

            cd ..

            rm -rf huggingface/.git
            dst=sherpa-onnx-en-wenet-gigaspeech

            mv huggingface $dst

            tar cjvf $dst.tar.bz2 $dst
            rm -rf $dst

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/export-wespeaker-to-onnx.yaml
================================================
name: export-wespeaker-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: export-wespeaker-to-onnx-${{ github.ref }}
  cancel-in-progress: true

jobs:
  export-wespeaker-to-onnx:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: export wespeaker
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install kaldi-native-fbank numpy onnx onnxruntime

      - name: Run
        shell: bash
        run: |
          cd scripts/wespeaker
          ./run.sh

          mv -v *.onnx ../..

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: speaker-recongition-models

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            d=speaker-embedding-models
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
            mv -v ./*.onnx ./huggingface
            cd huggingface
            git lfs track "*.onnx"
            git status
            git add .
            git status
            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main


================================================
FILE: .github/workflows/export-whisper-to-ascend-npu.yaml
================================================
name: export-whisper-to-ascend-npu

on:
  push:
    branches:
      - fix-ascend-2
  workflow_dispatch:

concurrency:
  group: export-whisper-to-ascend-npu-${{ github.ref }}
  cancel-in-progress: true

jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python3 .github/scripts/export-ascend/generate_whisper.py
          MATRIX=$(python3 .github/scripts/export-ascend/generate_whisper.py)

          # deprecated
          # echo "::set-output name=matrix::${MATRIX}"
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT

  export-whisper-to-ascend-npu:
    needs: generate_build_matrix
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.model }} ${{ matrix.soc_version }} ${{ matrix.cann }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    container:
      image: ${{ matrix.image }}

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python 3.8
        uses: actions/setup-python@v5
        with:
          python-version: "3.8"

      - name: Show Python
        shell: bash
        run: |
          python3 --version
          which python3

      - name: Install curl
        shell: bash
        run: |
          apt-get update && apt-get install -y curl bzip2 git git-lfs

      - name: Verify environment
        shell: bash
        run: |
          ls -lh /usr/local/Ascend/ascend-toolkit/set_env.sh

          find /usr/local/Ascend -name "libascend*.so" 2>/dev/null


          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          echo "CANN environment:"
          which atc || echo "atc not found"
          atc --help

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install "numpy<2" \
                  onnx==1.17.0 \
                  onnxruntime==1.17.1 \
                  torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
                  torchaudio==2.0.0+cpu -f https://download.pytorch.org/whl/torchaudio \
                  openai-whisper \
                  attrs psutil scipy decorator cloudpickle ml-dtypes tornado \
                  sentencepiece \
                  pyyaml

      - name: export ${{ matrix.model }} to ONNX
        shell: bash
        run: |
          cd scripts/whisper/ascend-npu
          model=${{ matrix.model }}
          echo "model: $model"
          if [[ $model == distil-medium.en ]]; then
            curl -L -s -o distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin
            ls -lh
          elif [[ $model == distil-large-v2 ]]; then
            curl -L -s -o distil-large-v2-original-model.bin https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/original-model.bin
            ls -lh
          elif [[ $model == distil-large-v3 ]]; then
            curl -L -s -o distil-large-v3-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3-openai/resolve/main/model.bin
            ls -lh
          elif [[ $model == distil-large-v3.5 ]]; then
            curl -L -s -o distil-large-v3.5-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3.5-openai/resolve/main/model.bin
            ls -lh
          elif [[ $model == distil-small.en ]]; then
            curl -L -s -o distil-small-en-original-model.bin https://huggingface.co/distil-whisper/distil-small.en/resolve/main/original-model.bin
            ls -lh
          elif [[ $model == medium-aishell ]]; then
            curl -L -s -o medium-aishell.pt https://huggingface.co/yuekai/icefall_asr_aishell_whisper/resolve/main/exp_medium/whisper-medium-aishell1-epoch-10-avg-4.pt
            ls -lh
          fi
          python3 ./export_onnx.py --model ${{ matrix.model }}


          ls -lh

          ls -lh ~/.cache/whisper || true
          ls -lh distil*original-model.bin || true
          rm -rf ~/.cache/whisper
          rm -f distil*original-model.bin
          rm -f medium-aishell.pt

      - name: export ${{ matrix.model }} ONNX to Ascend OM
        shell: bash
        run: |
          cd scripts/whisper/ascend-npu
          ls -lh *.onnx

          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          soc_version=${{ matrix.soc_version }}
          cann=${{ matrix.cann }}

          model=${{ matrix.model }}

          atc --model=./${model}-encoder.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=${model}-encoder \
            --input_format=ND \
            --soc_version="Ascend${soc_version}"

          ls -lh *.om

          atc --model=./${model}-decoder.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=${model}-decoder \
            --input_format=ND \
            --soc_version="Ascend${soc_version}"

          ls -lh *.om

          rm -v *.onnx

          echo "collect results"
          d=sherpa-onnx-ascend-${soc_version}-cann-${cann}-whisper-$model

          mkdir -p $d
          mkdir -p $d/test_wavs

          pushd $d/test_wavs
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/0.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/1.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/8k.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/trans.txt
          popd

          cp -v $model-encoder*.om $d/${model}-encoder.om
          cp -v $model-decoder*.om $d/${model}-decoder.om
          cp -v $model-tokens.txt $d/
          cp -v test_om.py $d
          ls -lh $d

          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          rm -v *.om

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../../..

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-ascend

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models-ascend

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models huggingface

              d=asr-models/ascend-npu/whisper
              mkdir -p huggingface/$d

              cp -v $m huggingface/$d/

              pushd huggingface
              git lfs track "*.tar.bz2"
              ls -lh $d/$m

              ls -lh $d

              pushd $d
              git lfs track "*.tar.bz2"
              popd

              git status
              git add .

              git commit -m "add $m"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models main
              popd
            done
            rm -rf huggingface

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            models=(
              sherpa-onnx-ascend-${{ matrix.soc_version }}-cann-${{ matrix.cann }}-whisper-${{ matrix.model }}.tar.bz2
            )
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git ms

              d=ascend-npu/whisper
              mkdir -p ms/$d

              cp -av $m ms/$d/

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh $d/$m

              ls -lh $d

              git add .

              git commit -m "add $m"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git

              popd
            done
            rm -rf ms


================================================
FILE: .github/workflows/export-whisper-to-onnx.yaml
================================================
name: export-whisper-to-onnx

on:
  workflow_dispatch:

concurrency:
  group: release-whisper-${{ github.ref }}
  cancel-in-progress: true

jobs:
  release-whisper-models:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.model }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        model: ["turbo", "distil-medium.en", "distil-small.en",  "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2", "distil-large-v3", "distil-large-v3.5"]
        # model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"]
        # model: ["distil-large-v3.5", "distil-large-v3"]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        shell: bash
        run: |
          python3 -m pip install torch==1.13.0 torchaudio==0.13.0 -f https://download.pytorch.org/whl/cpu/torch_stable.html
          python3 -m pip install -U openai-whisper
          python3 -m pip install onnxruntime onnx soundfile librosa

      - name: export ${{ matrix.model }}
        shell: bash
        run: |
          cd scripts/whisper
          model=${{ matrix.model }}
          echo "model: $model"
          if [[ $model == distil-medium.en ]]; then
            wget -q -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin
            ls -lh
          elif [[ $model == distil-large-v2 ]]; then
            wget -q -O distil-large-v2-original-model.bin https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/original-model.bin
            ls -lh
          elif [[ $model == distil-large-v3 ]]; then
            wget -q -O distil-large-v3-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3-openai/resolve/main/model.bin
            ls -lh
          elif [[ $model == distil-large-v3.5 ]]; then
            wget -q -O distil-large-v3.5-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3.5-openai/resolve/main/model.bin
            ls -lh
          elif [[ $model == distil-small.en ]]; then
            wget -q -O distil-small-en-original-model.bin https://huggingface.co/distil-whisper/distil-small.en/resolve/main/original-model.bin
            ls -lh
          elif [[ $model == medium-aishell ]]; then
            wget -q -O medium-aishell.pt https://huggingface.co/yuekai/icefall_asr_aishell_whisper/resolve/main/exp_medium/whisper-medium-aishell1-epoch-10-avg-4.pt
            ls -lh
          fi
          python3 ./export-onnx.py --model ${{ matrix.model }}
          # python3 -m onnxruntime.tools.convert_onnx_models_to_ort --optimization_style=Fixed ./
          #


          ls -lh

          ls -lh ~/.cache/whisper || true
          ls -lh distil*original-model.bin || true
          rm -rf ~/.cache/whisper
          rm -f distil*original-model.bin
          rm -f medium-aishell.pt

          src=sherpa-onnx-whisper-${{ matrix.model }}

          cd ..
          mkdir $src
          mv -v whisper/$model* $src/

          echo "------------------------------"

          cd $src
          du -h -d1 .
          ls -lh
          mkdir -p test_wavs
          cd test_wavs
          wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/0.wav
          wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/1.wav
          wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/8k.wav
          wget -q https://huggingface.co/csukuangfj/sherpa-onnx-whisper-medium.en/resolve/main/test_wavs/trans.txt
          cd ../..
          mv $src ../
          echo "pwd: $PWD"

          cd ../
          echo "--------------------"
          ls -lh
          ls -lh $src
          echo "--------------------"

          if [[ $model == medium-aishell ]]; then
            ls -lh *.onnx # the float32 onnx model for medium-aishell is too large to be uploaded to GitHub
            mkdir -p bak
            mv -v $src/$model-encoder.onnx ./bak
            mv -v $src/$model-decoder.onnx ./bak
            ls -lh $src

            tar cvjf $src.tar.bz2 $src
            mv -v ./bak/* $src/
            rm -rf bak
          elif [[ -f $src/$model-encoder.weights ]]; then
            # we only publish int8 models to GitHub for large Whisper models
            mkdir -p bak
            mv -v $src/*weights ./bak
            mv -v $src/$model-encoder.onnx ./bak
            mv -v $src/$model-decoder.onnx ./bak
            ls -lh $src

            tar cvjf $src.tar.bz2 $src
            mv -v ./bak/* $src/
            rm -rf bak
          else
            tar cvjf $src.tar.bz2 $src
          fi

          ls -lh *.tar.bz2

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar*
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Publish ${{ matrix.model }} to huggingface
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          src=sherpa-onnx-whisper-${{ matrix.model }}

          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          export GIT_CLONE_PROTECTION_ACTIVE=false

          export GIT_LFS_SKIP_SMUDGE=1

          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} huggingface

          rm -rf huggingface/*

          cp -av $src/* ./huggingface/

          cd huggingface

          git status
          ls -lh
          git lfs track "*.wav*"
          git lfs track "*onnx*"
          git lfs track "*weights*"

          git add .
          git commit -m "upload ${{ matrix.model }}"
          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-whisper-${{ matrix.model }} main

      - name: Test float32 ${{ matrix.model }}
        shell: bash
        run: |
          python3 -m pip install kaldi-native-fbank
          model=${{ matrix.model }}
          src=sherpa-onnx-whisper-$model
          time python3 scripts/whisper/test.py \
            --encoder $src/$model-encoder.onnx \
            --decoder $src/$model-decoder.onnx \
            --tokens $src/$model-tokens.txt \
            $src/test_wavs/0.wav

      - name: Test int8 ${{ matrix.model }}
        shell: bash
        run: |
          model=${{ matrix.model }}
          src=sherpa-onnx-whisper-$model
          time python3 scripts/whisper/test.py \
            --encoder $src/$model-encoder.int8.onnx \
            --decoder $src/$model-decoder.int8.onnx \
            --tokens $src/$model-tokens.txt \
            $src/test_wavs/0.wav


================================================
FILE: .github/workflows/export-zipformer-ctc-to-ascend-20250703.yaml
================================================
name: export-zipformer-ctc-to-ascend-npu-20250703

on:
  push:
    branches:
      - fix-ascend-2
  workflow_dispatch:

concurrency:
  group: export-zipformer-ctc-to-ascend-npu-20250703-${{ github.ref }}
  cancel-in-progress: true

jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python3 .github/scripts/export-ascend/generate_zipformer_ctc_20250703.py
          MATRIX=$(python3 .github/scripts/export-ascend/generate_zipformer_ctc_20250703.py)

          # deprecated
          # echo "::set-output name=matrix::${MATRIX}"
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT

  export-zipformer-ctc-to-ascend-npu-20250703:
    needs: generate_build_matrix
    name: ${{ matrix.soc_version }} ${{ matrix.cann }} ${{ matrix.num_seconds }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    container:
      image: ${{ matrix.image }}

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python 3.8
        uses: actions/setup-python@v5
        with:
          python-version: "3.8"

      - name: Show Python
        shell: bash
        run: |
          python3 --version
          which python3

      - name: Install curl
        shell: bash
        run: apt-get update && apt-get install -y curl bzip2 git git-lfs

      - name: Verify environment
        shell: bash
        run: |
          ls -lh /usr/local/Ascend/ascend-toolkit/set_env.sh

          find /usr/local/Ascend -name "libascend*.so" 2>/dev/null


          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          echo "CANN environment:"
          which atc || echo "atc not found"
          atc --help

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install "numpy<2" \
                  onnx==1.17.0 \
                  torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
                  attrs psutil scipy decorator cloudpickle ml-dtypes tornado \
                  sentencepiece \
                  pyyaml

      - name: Export ${{ matrix.num_seconds }}
        shell: bash
        run: |
          mkdir tmp
          cd tmp

          t=${{ matrix.num_seconds }}
          num_frames=$(($t*100))

          echo "num_frames: $num_frames"

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/generate_test_data.py
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/test.py
          chmod +x generate_test_data.py

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/0.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/1.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/8k.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/tokens.txt


          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH

          # for cann 7.0.0
          export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/x86_64:$LD_LIBRARY_PATH

          soc_version=${{ matrix.soc_version }}
          cann=${{ matrix.cann }}

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/model-$t-seconds.onnx
          mv model-$t-seconds.onnx model.onnx

          atc --model=./model.onnx \
            --framework=5 \
            --host_env_os=linux \
            --host_env_cpu=aarch64 \
            --output=model \
            --input_format=ND \
            --input_shape="x:1,${num_frames},80" \
            --soc_version="Ascend${soc_version}"

          rm -v *.onnx

          ls -lh *.om

          echo "collect results"
          d=sherpa-onnx-ascend-${soc_version}-cann-${cann}-$t-seconds-zipformer-ctc-zh-2025-07-03

          mkdir -p $d
          mkdir -p $d/test_wavs

          cp -v model_linux_aarch64.om $d/model.om || cp -v model.om $d/model.om
          cp -v tokens.txt $d
          cp -v ../scripts/zipformer-ctc/ascend/2025-07-03/onnx_test.py $d
          cp -v ../scripts/zipformer-ctc/ascend/2025-07-03/test_om.py $d
          cp -v *.wav $d/test_wavs
          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d

          rm -v *.om

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-ascend

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          tag: asr-models-ascend

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models huggingface

              d=asr-models/ascend-npu/zipformer-ctc
              mkdir -p huggingface/$d

              cp -v $m huggingface/$d/

              pushd huggingface
              git lfs track "*.tar.bz2"
              ls -lh $d
              pushd $d
              git lfs track "*.tar.bz2"
              popd

              git status
              git add .

              git commit -m "add $m"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-models main
              popd
            done
            rm -rf huggingface

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in "*.tar.bz2"; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git ms

              d=ascend-npu/zipformer-ctc
              mkdir -p ms/$d

              cp -av $m ms/$d/

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh $d/$m

              ls -lh $d
              git add .

              git commit -m "add $m"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git

              popd
            done
            rm -rf ms


================================================
FILE: .github/workflows/export-zipformer-ctc-to-qnn-20250703.yaml
================================================
name: export-zipformer-ctc-to-qnn-20250703

on:
  push:
    branches:
      - zipformer-qnn-model-2
  workflow_dispatch:

concurrency:
  group: export-zipformer-ctc-to-qnn-20250703-${{ github.ref }}
  cancel-in-progress: true

jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python3 .github/scripts/export-qnn/generate_zipformer.py
          MATRIX=$(python3 .github/scripts/export-qnn/generate_zipformer.py)

          # deprecated
          # echo "::set-output name=matrix::${MATRIX}"
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT

  export-zipformer-ctc-to-qnn-20250703:
    needs: generate_build_matrix
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: ${{ matrix.model_name }} ${{ matrix.input_in_seconds }} ${{ matrix.soc }}
    runs-on: ubuntu-22.04
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    steps:
      - uses: actions/checkout@v4

      - name: Setup Python 3.10
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Create directories
        shell: bash
        run: |
          mkdir so binary

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Create Python virtual environment
        shell: bash
        run: |
          python3 -m venv py310
          which python3
          source py310/bin/activate
          which python3

      - name: Show ndk-build help
        shell: bash
        run: |
          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          ndk-build --help

      - name: Download toolkit
        shell: bash
        run: |
          curl -SL -O https://huggingface.co/csukuangfj/qnn-toolkit/resolve/main/v2.33.0.250327.zip
          ls -lh v2.33.0.250327.zip

      - name: Unzip toolkit
        shell: bash
        run: |
          unzip v2.33.0.250327.zip

      - name: Show
        shell: bash
        run: |
          ls -lh

          echo "---ls -lh qairt---"

          ls -lh qairt

          echo "---"

      - name: Install linux dependencies
        shell: bash
        run: |
          ls -lh

          echo "---"

          ls -lh qairt

          cd qairt/2.33.0.250327/bin
          source envsetup.sh

          yes | sudo ${QNN_SDK_ROOT}/bin/check-linux-dependency.sh || true

      - name: Install Python dependencies
        shell: bash
        run: |
          source py310/bin/activate

          cd qairt/2.33.0.250327/bin
          source envsetup.sh

          python3 -m pip install \
            mock \
            numpy \
            opencv-python \
            optuna \
            packaging \
            pandas \
            paramiko \
            pathlib2 \
            pillow \
            plotly \
            protobuf \
            psutil \
            pydantic \
            pytest \
            pyyaml \
            rich \
            scikit-optimize \
            scipy \
            six \
            tabulate \
            typing-extensions \
            xlsxwriter

          python3 "${QNN_SDK_ROOT}/bin/check-python-dependency" || true

          which python3

      - name: Install onnx dependencies
        shell: bash
        run: |
          source py310/bin/activate
          python3 -m pip install --upgrade \
            torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
            kaldi_native_fbank \
            pip \
            "numpy<2" \
            onnx==1.17.0 \
            onnxruntime==1.17.1 \
            soundfile \
            librosa \
            onnxsim \
            sentencepiece \
            pyyaml

          which python3

      - name: Show qnn-onnx-converter help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.33.0.250327/bin
          source envsetup.sh
          popd

          qnn-onnx-converter --help

      - name: Show qnn-model-lib-generator help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.33.0.250327/bin
          source envsetup.sh
          popd

          qnn-model-lib-generator --help

      - name: Show qnn-net-run help
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.33.0.250327/bin
          source envsetup.sh
          popd

          qnn-net-run --help

      - name: Run ${{ matrix.input_in_seconds }}
        if: matrix.model_name == '20250703'
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.33.0.250327/bin
          source envsetup.sh
          popd

          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          export LDFLAGS="-Wl,-z,max-page-size=16384"
          dir=$PWD

          mkdir tmp

          cd tmp

          t=${{ matrix.input_in_seconds }}
          num_frames=$(($t*100))

          echo "num_frames: $num_frames"

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/generate_test_data.py
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/test.py
          chmod +x generate_test_data.py

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/0.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/1.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/8k.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/tokens.txt

          ./generate_test_data.py --num-frames $num_frames --wav 0.wav
          ./generate_test_data.py --num-frames $num_frames --wav 1.wav
          ./generate_test_data.py --num-frames $num_frames --wav 8k.wav

          echo -e "0.raw\n1.raw\n8k.raw" > input_list.txt


          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/model-$t-seconds.onnx

          python3 ../scripts/pyannote/segmentation/show-onnx.py --filename ./model-$t-seconds.onnx


          echo "export to qnn"
          echo "----------$t----------"

          qnn-onnx-converter \
            --input_network model-$t-seconds.onnx \
            --output_path ./model-$t-seconds-quantized \
            --out_node log_probs \
            --input_list ./input_list.txt \
            --use_native_input_files  \
            --input_dtype x float32 \
            --act_bitwidth 16 \
            --bias_bitwidth 32 \
            --input_layout x NTF

          ls -lh
          mv model-$t-seconds-quantized model-$t-seconds-quantized.cpp
          echo "----"
          ls -lh

          python3 "${QNN_SDK_ROOT}/bin/x86_64-linux-clang/qnn-model-lib-generator" \
            -c "model-$t-seconds-quantized.cpp" \
            -b "model-$t-seconds-quantized.bin" \
            -o model_libs > /dev/null 2>&1

          ls -lh model_libs/*/

          readelf -lW model_libs/*/lib*.so

          echo "Generate context binary"

          $dir/scripts/qnn/generate_config.py  \
            --soc ${{ matrix.soc }} \
            --graph-name "model_${t}_seconds_quantized" \
            --output-dir ./my-config \
            --qnn-sdk-root $QNN_SDK_ROOT

          ls -lh my-config

          head -n 1000 my-config/*.json

          $QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-context-binary-generator \
            --backend $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtp.so \
            --model ./model_libs/x86_64-linux-clang/libmodel-$t-seconds-quantized.so \
            --output_dir ./binary \
            --binary_file model \
            --config_file ./my-config/htp_backend_extensions.json

          ls -lh binary/

          echo "collect results"

          d=sherpa-onnx-qnn-${{ matrix.soc}}-binary-$t-seconds-zipformer-ctc-zh-2025-07-03-int8
          mkdir -p $d
          mkdir -p $d/test_wavs
          cp -v binary/model.bin $d/
          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          echo "num_frames=$num_frames" > $d/info.txt

          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d
          mv *.tar.bz2 ../binary/

          for p in x86_64-linux-clang aarch64-android; do
            if [[ $p == x86_64-linux-clang ]]; then
              d=sherpa-onnx-qnn-$t-seconds-zipformer-ctc-zh-2025-07-03-int8-linux-x64
            elif [[ $p == aarch64-android ]]; then
              d=sherpa-onnx-qnn-$t-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64
            else
              echo "Unknown $p"
              exit -1
            fi

            mkdir -p $d
            mkdir -p $d/test_wavs

            cp -v model_libs/$p/lib*.so $d/libmodel.so
            cp -v tokens.txt $d
            cp -v *.wav $d/test_wavs

            echo "num_frames=$num_frames" > $d/info.txt
            echo "target=$p" >> $d/info.txt

            ls -lh $d
            tar cjfv $d.tar.bz2 $d
            ls -lh *.tar.bz2
            rm -rf $d
          done

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../so

      - name: Run ${{ matrix.input_in_seconds }}
        if: matrix.model_name == '20251222'
        shell: bash
        run: |
          source py310/bin/activate

          pushd qairt/2.33.0.250327/bin
          source envsetup.sh
          popd

          export PATH=${ANDROID_NDK_LATEST_HOME}:$PATH
          export LDFLAGS="-Wl,-z,max-page-size=16384"
          dir=$PWD

          mkdir tmp

          cd tmp

          t=${{ matrix.input_in_seconds }}
          num_frames=$(($t*100))

          echo "num_frames: $num_frames"

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/generate_test_data.py
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/test.py
          chmod +x generate_test_data.py

          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/0.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/1.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/8k.wav
          curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-zh-2025-07-03-source-models/resolve/main/tokens.txt

          ./generate_test_data.py --num-frames $num_frames --wav 0.wav
          ./generate_test_data.py --num-frames $num_frames --wav 1.wav
          ./generate_test_data.py --num-frames $num_frames --wav 8k.wav

          echo -e "0.raw\n1.raw\n8k.raw" > input_list.txt

          curl -SL -O https://huggingface.co/csukuangfj/2025-12-22/resolve/main/zipformer-ctc-models/model-$t-seconds.onnx

          python3 ../scripts/pyannote/segmentation/show-onnx.py --filename ./model-$t-seconds.onnx


          echo "export to qnn"
          echo "----------$t----------"

          qnn-onnx-converter \
            --input_network model-$t-seconds.onnx \
            --output_path ./model-$t-seconds-quantized \
            --out_node log_probs \
            --input_list ./input_list.txt \
            --use_native_input_files  \
            --input_dtype x float32 \
            --act_bitwidth 16 \
            --bias_bitwidth 32 \
            --input_layout x NTF

          ls -lh
          mv model-$t-seconds-quantized model-$t-seconds-quantized.cpp
          echo "----"
          ls -lh

          python3 "${QNN_SDK_ROOT}/bin/x86_64-linux-clang/qnn-model-lib-generator" \
            -c "model-$t-seconds-quantized.cpp" \
            -b "model-$t-seconds-quantized.bin" \
            -o model_libs > /dev/null 2>&1

          ls -lh model_libs/*/

          readelf -lW model_libs/*/lib*.so

          echo "Generate context binary"

          $dir/scripts/qnn/generate_config.py  \
            --soc ${{ matrix.soc }} \
            --graph-name "model_${t}_seconds_quantized" \
            --output-dir ./my-config \
            --qnn-sdk-root $QNN_SDK_ROOT

          ls -lh my-config

          head -n 1000 my-config/*.json

          $QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-context-binary-generator \
            --backend $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtp.so \
            --model ./model_libs/x86_64-linux-clang/libmodel-$t-seconds-quantized.so \
            --output_dir ./binary \
            --binary_file model \
            --config_file ./my-config/htp_backend_extensions.json

          ls -lh binary/

          d=sherpa-onnx-qnn-${{ matrix.soc}}-binary-$t-seconds-zipformer-ctc-zh-2025-12-22-int8
          mkdir -p $d
          mkdir -p $d/test_wavs
          cp -v binary/model.bin $d/
          cp -v tokens.txt $d
          cp -v *.wav $d/test_wavs
          echo "num_frames=$num_frames" > $d/info.txt

          ls -lh $d
          tar cjfv $d.tar.bz2 $d
          ls -lh *.tar.bz2
          rm -rf $d
          mv *.tar.bz2 ../binary/

          echo "collect results"

          for p in x86_64-linux-clang aarch64-android; do
            if [[ $p == x86_64-linux-clang ]]; then
              d=sherpa-onnx-qnn-$t-seconds-zipformer-ctc-zh-2025-12-22-int8-linux-x64
            elif [[ $p == aarch64-android ]]; then
              d=sherpa-onnx-qnn-$t-seconds-zipformer-ctc-zh-2025-12-22-int8-android-aarch64
            else
              echo "Unknown $p"
              exit -1
            fi

            mkdir -p $d
            mkdir -p $d/test_wavs

            cp -v model_libs/$p/lib*.so $d/libmodel.so
            cp -v tokens.txt $d
            cp -v *.wav $d/test_wavs

            echo "num_frames=$num_frames" > $d/info.txt
            echo "target=$p" >> $d/info.txt

            ls -lh $d
            tar cjfv $d.tar.bz2 $d
            ls -lh *.tar.bz2
            rm -rf $d
          done

          echo "----show---"
          ls -lh *.tar.bz2

          mv *.tar.bz2 ../so

      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.model_name }}-${{ matrix.soc }}-${{ matrix.input_in_seconds }}-seconds
          path: ./tmp/*.json

      - name: Release
        if: github.repository_owner == 'csukuangfj' && matrix.soc == 'SM8850'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./so/*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-qnn

      - name: Release
        if: github.repository_owner == 'csukuangfj'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./binary/*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models-qnn-binary

      - name: Release
        if: github.repository_owner == 'k2-fsa' && matrix.soc == 'SM8850'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./so/*.tar.bz2
          overwrite: true
          tag: asr-models-qnn

      - name: Release
        if: github.repository_owner == 'k2-fsa'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./binary/*.tar.bz2
          overwrite: true
          tag: asr-models-qnn-binary


================================================
FILE: .github/workflows/flutter-android.yaml
================================================
name: flutter-android

on:
  push:
    branches:
      - flutter
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: flutter-android-${{ github.ref }}
  cancel-in-progress: true

jobs:
  asr:
    name: asr ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["3"]
        index: ["0", "1", "2"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Set up JDK 17
        uses: actions/setup-java@v3
        with:
          distribution: 'temurin'
          java-version: '17'

      - name: Check Java version
        run: |
          java -version
          echo $JAVA_HOME

      - name: Set JAVA_HOME for Gradle
        run: echo "JAVA_HOME=$JAVA_HOME" >> $GITHUB_ENV

      - name: Check Java version
        run: |
          java -version
          echo $JAVA_HOME

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2 iso639-lang

      - name: Install deps
        shell: bash
        run: |
          sudo apt-get update -y
          sudo apt-get install -y build-essential jq git cmake
          sudo apt-get install -y curl

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v4
        with:
          channel: stable
          version: 3.29.0
          cache: true

      - name: Install ninja
        shell: bash
        run: |
          sudo apt-get install -y ninja-build

      - name: Display ninja version
        shell: bash
        run: |
          ninja --version
          ninja --help || true
          which ninja

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh

      - name: Display machine info
        shell: bash
        run: |
          uname -a

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version

          git config --global --add safe.directory /__t/flutter-Linux-*/flutter || true

          flutter --version

          dart --version
          flutter doctor

      - name: Install libgtk-3-dev
        shell: bash
        run: |
          sudo apt install -y libgtk-3-dev tree clang pkg-config

      - name: Accept Android licenses
        run: yes | $ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager --licenses

      - name: Install Android SDK Components
        run: |
          $ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager "platforms;android-35" "build-tools;35.0.0"

      - name: Install NDK 27
        run: |
          $ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager "ndk;27.0.12077973"

      - name: Display flutter info (2)
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

          cd ..

      - name: Build flutter
        shell: bash
        run: |
          cd scripts/flutter

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-streaming-asr.py --total $total --index $index

          chmod +x *.sh
          ./build-android-streaming-asr.sh

          cd ../../

          ls -lh *.apk

      - name: Display generated files
        shell: bash
        run: |
          ls -lh *.apk

          mkdir apks

          mv -v *.apk ./apks

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' || github.repository_owner == 'csu-fangjun') && ((github.event_name == 'push' || github.event_name == 'workflow_dispatch') || contains(github.ref, 'refs/tags/'))
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            dst=flutter/asr/android/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../apks/*.apk $dst

            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter main

  tts:
    name: tts ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["15"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Set up JDK 17
        uses: actions/setup-java@v3
        with:
          distribution: 'temurin'
          java-version: '17'

      - name: Check Java version
        run: |
          java -version
          echo $JAVA_HOME

      - name: Set JAVA_HOME for Gradle
        run: echo "JAVA_HOME=$JAVA_HOME" >> $GITHUB_ENV

      - name: Check Java version
        run: |
          java -version
          echo $JAVA_HOME

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display NDK HOME
        shell: bash
        run: |
          echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
          ls -lh ${ANDROID_NDK_LATEST_HOME}

      - name: Setup build tool version variable
        shell: bash
        run: |
          echo "---"
          ls -lh /usr/local/lib/android/
          echo "---"

          ls -lh /usr/local/lib/android/sdk
          echo "---"

          ls -lh /usr/local/lib/android/sdk/build-tools
          echo "---"

          BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
          echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
          echo "Last build tool version is: $BUILD_TOOL_VERSION"

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2 iso639-lang

      - name: Install deps
        shell: bash
        run: |
          sudo apt-get update -y
          sudo apt-get install -y build-essential jq git cmake
          sudo apt-get install -y curl

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v4
        with:
          channel: stable
          version: 3.29.0
          cache: true

      - name: Install ninja
        shell: bash
        run: |
          sudo apt-get install -y ninja-build

      - name: Display ninja version
        shell: bash
        run: |
          ninja --version
          ninja --help || true
          which ninja

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh

      - name: Display machine info
        shell: bash
        run: |
          uname -a

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version

          git config --global --add safe.directory /__t/flutter-Linux-*/flutter || true

          flutter --version

          dart --version
          flutter doctor

      - name: Install libgtk-3-dev
        shell: bash
        run: |
          sudo apt install -y libgtk-3-dev tree clang pkg-config

      - name: Accept Android licenses
        run: yes | $ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager --licenses

      - name: Install Android SDK Components
        run: |
          $ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager "platforms;android-35" "build-tools;35.0.0"

      - name: Display flutter info (2)
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

          cd ..

      - name: Build flutter
        shell: bash
        run: |
          cd scripts/flutter

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-tts.py --total $total --index $index

          chmod +x *.sh
          ./build-android-tts.sh

          cd ../../

          ls -lh *.apk

      - name: Display generated files
        shell: bash
        run: |
          ls -lh *.apk

          mkdir apks

          mv -v *.apk ./apks

      # https://github.com/marketplace/actions/sign-android-release
      - uses: r0adkll/sign-android-release@v1
        name: Sign app APK
        with:
          releaseDirectory: ./apks
          signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
          alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
          keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
        env:
          BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}

      - name: Display APK after signing
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Rename APK after signing
        shell: bash
        run: |
          cd apks
          rm -fv signingKey.jks
          rm -fv *.apk.idsig
          rm -fv *-aligned.apk

          all_apks=$(ls -1 *-signed.apk)
          echo "----"
          echo $all_apks
          echo "----"
          for apk in ${all_apks[@]}; do
            n=$(echo $apk | sed -e s/-signed//)
            mv -v $apk $n
          done

          cd ..

          ls -lh ./apks/
          du -h -d1 .

      - name: Display APK after rename
        shell: bash
        run: |
          ls -lh ./apks/
          du -h -d1 .

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' || github.repository_owner == 'csu-fangjun') && ((github.event_name == 'push' || github.event_name == 'workflow_dispatch') || contains(github.ref, 'refs/tags/'))
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            dst=flutter/tts/android/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../apks/*.apk $dst

            git status
            git lfs track "*.apk"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter main


================================================
FILE: .github/workflows/flutter-linux.yaml
================================================
name: flutter-linux

on:
  push:
    branches:
      - flutter
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: flutter-linux-${{ github.ref }}
  cancel-in-progress: true

# See https://github.com/actions/checkout/issues/1590#issuecomment-2207052044
# and
# https://github.blog/changelog/2023-06-13-github-actions-all-actions-will-run-on-node16-instead-of-node12-by-default/
env:
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

jobs:
  asr:
    name: asr ${{ matrix.arch }} ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        arch: [x86_64]
        total: ["3"]
        index: ["0", "1", "2"]


    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --break-system-packages --upgrade pip jinja2

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v4
        with:
          channel: stable
          version: 3.24.3
          cache: true

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh

      - name: Display machine info
        shell: bash
        run: |
          uname -a

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Install libgtk-3-dev
        shell: bash
        run: |
          sudo apt install -y libgtk-3-dev tree clang pkg-config

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Build flutter
        shell: bash
        run: |
          export arch=${{ matrix.arch }}
          cd scripts/flutter

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-streaming-asr.py --total $total --index $index

          chmod +x *.sh
          ./build-linux-streaming-asr.sh
          cd ../../
          ls -lh *.tar.bz2

      - name: Display generated files
        shell: bash
        run: |
          ls -lh *.tar.bz2

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' || github.repository_owner == 'csu-fangjun') && ((github.event_name == 'push' || github.event_name == 'workflow_dispatch') || contains(github.ref, 'refs/tags/'))
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            dst=flutter/asr/linux/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter main

  tts:
    name: tts ${{ matrix.arch }} ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        arch: [x86_64]
        total: ["20"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19"]


    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --break-system-packages --upgrade pip jinja2

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v4
        with:
          channel: stable
          version: 3.24.3
          cache: true

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh

      - name: Display machine info
        shell: bash
        run: |
          uname -a

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Install libgtk-3-dev
        shell: bash
        run: |
          sudo apt install -y libgtk-3-dev tree clang pkg-config

      - name: Install deps
        shell: bash
        run: |
          sudo apt-get update -y
          sudo apt-get install -y build-essential jq git python3-pip
          sudo apt-get install -y curl
          sudo apt-get install -y libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libunwind-dev

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Build flutter
        shell: bash
        run: |
          export arch=${{ matrix.arch }}
          cd scripts/flutter

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-tts.py --total $total --index $index

          chmod +x *.sh
          ./build-linux-tts.sh
          cd ../../
          ls -lh *.tar.bz2

      - name: Display generated files
        shell: bash
        run: |
          ls -lh *.tar.bz2

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' || github.repository_owner == 'csu-fangjun') && ((github.event_name == 'push' || github.event_name == 'workflow_dispatch') || contains(github.ref, 'refs/tags/'))
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            dst=flutter/tts/linux/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter main


================================================
FILE: .github/workflows/flutter-macos.yaml
================================================
name: flutter-macos

on:
  push:
    branches:
      - flutter
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: flutter-macos-${{ github.ref }}
  cancel-in-progress: true

jobs:
  asr:
    name: asr ${{ matrix.arch }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        arch: [x86_64, arm64]
        total: ["3"]
        index: ["0", "1", "2"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --break-system-packages --upgrade pip jinja2

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v4
        with:
          channel: stable
          version: 3.24.3
          cache: true

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh

      - name: Display machine info
        shell: bash
        run: |
          uname -a

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Build flutter
        shell: bash
        run: |
          export arch=${{ matrix.arch }}
          cd scripts/flutter

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-streaming-asr.py --total $total --index $index

          chmod +x *.sh
          ./build-macos-streaming-asr.sh
          cd ../../
          ls -lh *.tar.bz2

      - name: Display generated files
        shell: bash
        run: |
          ls -lh *.tar.bz2

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' || github.repository_owner == 'csu-fangjun') && ((github.event_name == 'push' || github.event_name == 'workflow_dispatch') || contains(github.ref, 'refs/tags/'))
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            dst=flutter/asr/macos/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter main

  tts:
    name: tts ${{ matrix.arch }} ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        arch: [x86_64, arm64]
        total: ["10"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --break-system-packages --upgrade pip jinja2 iso639-lang

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v4
        with:
          channel: stable
          version: 3.24.3
          cache: true

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh

      - name: Display machine info
        shell: bash
        run: |
          uname -a

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Build flutter
        shell: bash
        run: |
          export arch=${{ matrix.arch }}
          cd scripts/flutter

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-tts.py --total $total --index $index

          chmod +x *.sh
          ./build-macos-tts.sh
          cd ../../
          ls -lh *.tar.bz2

      - name: Display generated files
        shell: bash
        run: |
          ls -lh *.tar.bz2

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' || github.repository_owner == 'csu-fangjun') && ((github.event_name == 'push' || github.event_name == 'workflow_dispatch') || contains(github.ref, 'refs/tags/'))
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main
            dst=flutter/tts/macos/$SHERPA_ONNX_VERSION
            mkdir -p $dst
            cp -v ../*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter main


================================================
FILE: .github/workflows/flutter-windows-x64.yaml
================================================
name: flutter-windows-x64

on:
  push:
    branches:
      - flutter
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: flutter-windows-x64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  asr:
    name: asr ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        total: ["3"]
        index: ["0", "1", "2"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v4
        with:
          channel: stable
          version: 3.24.3
          cache: true

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh

      - name: Display machine info
        shell: bash
        run: |
          uname -a

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Build flutter
        shell: bash
        run: |
          cd scripts/flutter

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-streaming-asr.py --total $total --index $index

          chmod +x *.sh
          ./build-windows-streaming-asr.sh
          cd ../../
          ls -lh *.tar.bz2

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' || github.repository_owner == 'csu-fangjun') && ((github.event_name == 'push' || github.event_name == 'workflow_dispatch') || contains(github.ref, 'refs/tags/'))
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            dst=flutter/asr/windows/$SHERPA_ONNX_VERSION
            mkdir -p $dst
            cp -v ../*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter main

  tts:
    name: tts ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        total: ["20"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "11", "12", "13", "14", "15", "16", "17", "18", "19"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2 iso639-lang

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v3
        with:
          channel: stable
          version: latest

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh

      - name: Display machine info
        shell: bash
        run: |
          uname -a

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Build flutter
        shell: bash
        run: |
          cd scripts/flutter

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-tts.py --total $total --index $index

          chmod +x *.sh
          ./build-windows-tts.sh
          cd ../../
          ls -lh *.tar.bz2

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' || github.repository_owner == 'csu-fangjun') && ((github.event_name == 'push' || github.event_name == 'workflow_dispatch') || contains(github.ref, 'refs/tags/'))
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            dst=flutter/tts/windows/$SHERPA_ONNX_VERSION
            mkdir -p $dst
            cp -v ../*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-flutter main


================================================
FILE: .github/workflows/generate-tts-samples.yaml
================================================
name: generate-tts-samples

on:
  push:
    branches:
      - tts-samples-2

  workflow_dispatch:

concurrency:
  group: generate-tts-samples-${{ github.ref }}
  cancel-in-progress: true

jobs:
  generate_tts_samples:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install "numpy<=1.26.4" sherpa-onnx soundfile

      - name: kitten
        if: true
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          cd scripts/kitten-tts
          pwd=$PWD

          export GIT_LFS_SKIP_SMUDGE=1
          export GIT_CLONE_PROTECTION_ACTIVE=false
          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
          mkdir -p ./hf/kitten/v0.1-nano/mp3
          mkdir -p ./hf/kitten/v0.2-nano/mp3
          mkdir -p ./hf/kitten/v0.1-mini/mp3

          for v in 1 2; do
            pushd nano_v0_$v
            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_$v-fp16.tar.bz2
            tar xf kitten-nano-en-v0_$v-fp16.tar.bz2
            rm kitten-nano-en-v0_$v-fp16.tar.bz2

            ln -s ../hf .
            python3 ./generate_samples.py
            rm -rf kitten-nano-en-v0_$v-fp16
            popd
          done

          for v in 1; do
            pushd mini_v0_$v
            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-mini-en-v0_$v-fp16.tar.bz2
            tar xf kitten-mini-en-v0_$v-fp16.tar.bz2
            rm kitten-mini-en-v0_$v-fp16.tar.bz2

            ln -s ../hf .
            python3 ./generate_samples.py
            rm -rf kitten-mini-en-v0_$v-fp16
            popd
          done

          pushd hf
          git pull
          git add .
          git commit -m 'add kitten tts samples'
          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
          popd
          rm -rf hf

      - name: matcha en (ljspeech)
        if: false
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          cd scripts/matcha-tts/en/
          pwd=$PWD

          export GIT_LFS_SKIP_SMUDGE=1
          export GIT_CLONE_PROTECTION_ACTIVE=false
          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf

          mkdir -p ./hf/matcha/icefall-en-ljspeech/mp3
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
          tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
          rm matcha-icefall-en_US-ljspeech.tar.bz2

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

          python3 ./generate_samples.py

          pushd hf
          git pull
          git add .
          git commit -m 'add matcha tts en (ljspeech) samples'
          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
          popd

          rm -rf hf


================================================
FILE: .github/workflows/hap-vad-asr.yaml
================================================
name: hap-vad-asr

on:
  push:
    branches:
      - hap
      - hap-ci

  workflow_dispatch:

concurrency:
  group: hap-vad-asr-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write

jobs:
  hap_vad_asr:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    runs-on: ${{ matrix.os }}
    name: Haps for vad asr ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["10"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # https://github.com/actions/setup-java
      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '17' # it requires jdk 17 to sigh the hap

      - name: Show java version
        shell: bash
        run: |
          which java
          java --version

      - name: cache-toolchain
        id: cache-toolchain-ohos
        uses: actions/cache@v4
        with:
          path: command-line-tools
          key: commandline-tools-linux-x64-5.0.5.200.zip

      - name: Download toolchain
        if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
        shell: bash
        run: |
          curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
          unzip commandline-tools-linux-x64-5.0.5.200.zip
          rm commandline-tools-linux-x64-5.0.5.200.zip

      - name: Set environment variable
        shell: bash
        run: |
          echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin"  >> "$GITHUB_PATH"
          which cmake

          cmake --version

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/hap

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-vad-asr-hap-script.py --total $total --index $index
          ls -lh

          chmod +x build-hap-vad-asr.sh
          mv -v ./build-hap-vad-asr.sh ../..

      - name: Generate secrets
        shell: bash
        run: |
          echo "${{ secrets.HAP_SHERPA_ONNX_CER }}" > /tmp/sherpa_onnx.cer
          shasum -a 256 /tmp/sherpa_onnx.cer
          ls -lh /tmp/sherpa_onnx.cer

          # macos
          # base64 -i sherpa_onnx_profileRelease.p7b -o sherpa_onnx_profileRelease.p7b.base64
          #
          # linux
          # base64 -w 0 sherpa_onnx_profileRelease.p7b > sherpa_onnx_profileRelease.p7b.base64
          #
          # cat sherpa_onnx_profileRelease.p7b.base64 | base64 --decode > sherpa_onnx_profileRelease.p7b
          #
          echo "${{ secrets.HAP_SHERPA_ONNX_PROFILE }}"   | base64 --decode > /tmp/sherpa_onnx_profileRelease.p7b
          echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" > ./sherpa_onnx_ohos_key.p12.base64
          echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" | base64 --decode > /tmp/sherpa_onnx_ohos_key.p12

          ls -l /tmp/sherpa_onnx_profileRelease.p7b
          ls -l /tmp/sherpa_onnx_ohos_key.p12

          ls -lh ./sherpa_onnx_ohos_key.p12.base64
          shasum -a 256 ./sherpa_onnx_ohos_key.p12.base64
          wc ./sherpa_onnx_ohos_key.p12.base64
          rm ./sherpa_onnx_ohos_key.p12.base64

          shasum -a 256 /tmp/sherpa_onnx_profileRelease.p7b
          shasum -a 256 /tmp/sherpa_onnx_ohos_key.p12

      - name: build HAP
        env:
          HAP_KEY_ALIAS: ${{ secrets.HAP_KEY_ALIAS }}
          HAP_KEY_PWD: ${{ secrets.HAP_KEY_PWD }}
          HAP_KEY_STORE_PWD: ${{ secrets.HAP_KEY_STORE_PWD }}
        shell: bash
        run: |
          export COMMANDLINE_TOOLS_DIR=$GITHUB_WORKSPACE/command-line-tools
          ./build-hap-vad-asr.sh

          # remove secrets
          rm /tmp/sherpa_onnx.cer
          rm /tmp/sherpa_onnx_profileRelease.p7b
          rm /tmp/sherpa_onnx_ohos_key.p12

      - name: Display HAPs
        shell: bash
        run: |
          ls -lh ./haps/
          du -h -d1 .

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os huggingface
            cd huggingface
            du -h -d1 .
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=hap/vad-asr/$SHERPA_ONNX_VERSION
            mkdir -p $d
            cp -v ../haps/*.hap $d/
            git status
            git lfs track "*.hap"
            git add .
            git commit -m "add more HAPs"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os main


================================================
FILE: .github/workflows/har.yaml
================================================
name: har

on:
  push:
    branches:
      - master
      # - ohos-har
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: har-${{ github.ref }}
  cancel-in-progress: true

jobs:
  har:
    name: Har
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: har-linux

      - name: cache-toolchain
        id: cache-toolchain-ohos
        uses: actions/cache@v4
        with:
          path: command-line-tools
          key: commandline-tools-linux-x64-5.0.5.200.zip

      - name: Download toolchain
        if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
        shell: bash
        run: |
          curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
          unzip commandline-tools-linux-x64-5.0.5.200.zip
          rm commandline-tools-linux-x64-5.0.5.200.zip

      - name: Set environment variable
        shell: bash
        run: |
          echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin"  >> "$GITHUB_PATH"
          which cmake

          cmake --version

          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake

          echo "===="
          cat $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
          echo "===="

          # echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin"  >> "$GITHUB_PATH"

          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/
          echo "--"
          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/*unknown*

          cat $GITHUB_PATH

          # /home/runner/work/onnxruntime-libs/onnxruntime-libs/command-line-tools/sdk/default/openharmony/native/llvm/bin/aarch64-unknown-linux-ohos-clang -v || true
          export PATH=$PWD/command-line-tools/sdk/default/openharmony/native/llvm/bin:$PATH
          echo "path: $PATH"

          which aarch64-unknown-linux-ohos-clang++ || true
          which aarch64-unknown-linux-ohos-clang || true

          aarch64-unknown-linux-ohos-clang++ --version || true
          aarch64-unknown-linux-ohos-clang --version || true

          which armv7-unknown-linux-ohos-clang++
          which armv7-unknown-linux-ohos-clang

          armv7-unknown-linux-ohos-clang++ --version
          armv7-unknown-linux-ohos-clang --version

          which x86_64-unknown-linux-ohos-clang++
          which x86_64-unknown-linux-ohos-clang

          x86_64-unknown-linux-ohos-clang++ --version
          x86_64-unknown-linux-ohos-clang --version

      - name: Install tree
        shell: bash
        run: |
          sudo apt-get update -q
          sudo apt-get install -y -q tree

      - name: Build libraries
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export OHOS_SDK_NATIVE_DIR="$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native"

          ./build-ohos-arm64-v8a.sh
          ./build-ohos-x86-64.sh

      - name: Build Har
        shell: bash
        run: |
          export PATH="$GITHUB_WORKSPACE/command-line-tools/bin:$PATH"

          which hvigorw

          pushd harmony-os/SherpaOnnxHar

          cp -fv ../../LICENSE ./sherpa_onnx
          cp -fv ../../CHANGELOG.md ./sherpa_onnx

          hvigorw --mode module -p product=default -p module=sherpa_onnx@default assembleHar --analyze=normal --parallel --incremental --no-daemon
          ls -lh ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har
          cp -v ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har ../../

          popd

          ls -lh *.har

      - name: View Har
        shell: bash
        run: |
          file sherpa_onnx.har
          tar xvf sherpa_onnx.har

          cd package
          ls -lh

          ls -lh libs
          echo "---libs/x86_64---"
          ls -lh libs/x86_64

          echo "---libs/arm64-v8a---"
          ls -lh libs/arm64-v8a

          echo "---src/main/ets/components---"
          ls -lh src/main/ets/components/

          echo "---src/main/cpp/types/libsherpa_onnx/---"
          ls -lh src/main/cpp/types/libsherpa_onnx/

          tree .

      - name: Collect result
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          mv sherpa_onnx.har sherpa_onnx-$SHERPA_ONNX_VERSION.har

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-har
          path: ./sherpa_onnx*.har

      - name: Release har
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.har
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.30

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-harmony-os huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=har
            mkdir -p $d
            cp -v ../*.har $d/
            git status
            git lfs track "*.har"
            git add .
            git commit -m "add more hars"
            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-harmony-os main


================================================
FILE: .github/workflows/harmony-os.yaml
================================================
name: harmony-os

on:
  push:
    branches:
      - master
      - ohos
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: harmony-os-${{ github.ref }}
  cancel-in-progress: true

jobs:
  harmony_os:
    name: Harmony OS ${{ matrix.arch }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        arch: [arm64-v8a, armeabi-v7a, x86_64]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ohos-${{ matrix.arch }}

      - name: cache-toolchain
        id: cache-toolchain-ohos
        uses: actions/cache@v4
        with:
          path: command-line-tools
          key: commandline-tools-linux-x64-5.0.5.200.zip

      - name: Download toolchain
        if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
        shell: bash
        run: |
          curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
          unzip commandline-tools-linux-x64-5.0.5.200.zip
          rm commandline-tools-linux-x64-5.0.5.200.zip

      - name: Set environment variable
        shell: bash
        run: |
          echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin"  >> "$GITHUB_PATH"
          which cmake

          cmake --version

          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake

          echo "===="
          cat $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build/cmake/ohos.toolchain.cmake
          echo "===="

          # echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin"  >> "$GITHUB_PATH"

          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/
          echo "--"
          ls -lh $GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/llvm/bin/*unknown*

          cat $GITHUB_PATH

          # /home/runner/work/onnxruntime-libs/onnxruntime-libs/command-line-tools/sdk/default/openharmony/native/llvm/bin/aarch64-unknown-linux-ohos-clang -v || true
          export PATH=$PWD/command-line-tools/sdk/default/openharmony/native/llvm/bin:$PATH
          echo "path: $PATH"

          which aarch64-unknown-linux-ohos-clang++ || true
          which aarch64-unknown-linux-ohos-clang || true

          aarch64-unknown-linux-ohos-clang++ --version || true
          aarch64-unknown-linux-ohos-clang --version || true

          which armv7-unknown-linux-ohos-clang++
          which armv7-unknown-linux-ohos-clang

          armv7-unknown-linux-ohos-clang++ --version
          armv7-unknown-linux-ohos-clang --version

          which x86_64-unknown-linux-ohos-clang++
          which x86_64-unknown-linux-ohos-clang

          x86_64-unknown-linux-ohos-clang++ --version
          x86_64-unknown-linux-ohos-clang --version

      - name: Build ${{ matrix.arch }}
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          arch=${{ matrix.arch }}

          echo "arch: $arch"

          export OHOS_SDK_NATIVE_DIR="$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native"

          if [[ $arch == arm64-v8a ]]; then
            ./build-ohos-arm64-v8a.sh
          elif [[ $arch == armeabi-v7a ]]; then
            ./build-ohos-armeabi-v7a.sh
          elif [[ $arch == x86_64 ]]; then
            ./build-ohos-x86-64.sh
          else
            echo "Unknown arch $arch"
          fi

      - name: Collect result for ${{ matrix.arch }}
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV"

          arch=${{ matrix.arch }}
          d=sherpa-onnx-$SHERPA_ONNX_VERSION-ohos-$arch
          if [[ $arch == x86_64 ]]; then
            cd ./build-ohos-x86-64
          else
            cd ./build-ohos-$arch
          fi

          mv install $d
          tar cjfv $d.tar.bz2 $d

          ls -lh $d/lib


          file $d/lib/*

          readelf -d $d/lib/libsherpa-onnx-c-api.so

          mv $d.tar.bz2 ../

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-ohos-${{ matrix.arch }}
          path: ./*.tar.bz2

      - name: Release jar
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.10.23


================================================
FILE: .github/workflows/jar.yaml
================================================
name: jar

on:
  push:
    branches:
      - refactor-jar
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: jar-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: write
jobs:
  jar:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.arch }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - os: ubuntu-24.04-arm
            arch: "arm64"

          - os: ubuntu-latest
            arch: "x64"

          - os: macos-latest
            arch: "arm64"

          - os: macos-15-intel
            arch: "x64"

          - os: windows-2022
            arch: "x64"

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: Show java version
        shell: bash
        run: |
          java --version

      - name: Download libs ${{ matrix.os }} ${{ matrix.arch }}
        if: ${{ matrix.os == 'ubuntu-24.04-arm' && matrix.arch == 'arm64' }}
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/v$SHERPA_ONNX_VERSION/sherpa-onnx-v$SHERPA_ONNX_VERSION-linux-aarch64-jni.tar.bz2
          tar xvf ./*.tar.bz2

          src=sherpa-onnx-v$SHERPA_ONNX_VERSION-linux-aarch64-jni
          dst=sherpa-onnx/java-api/resources/sherpa-onnx/native/linux-aarch64

          mkdir -p $dst
          cp -v $src/lib/libsherpa-onnx-jni.so $dst/
          cp -v $src/lib/libonnxruntime.so $dst/

          ls -lh $dst
          rm -rf $src*

      - name: Download libs ${{ matrix.os }} ${{ matrix.arch }}
        if: ${{ matrix.os == 'ubuntu-latest' && matrix.arch == 'x64' }}
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/v$SHERPA_ONNX_VERSION/sherpa-onnx-v$SHERPA_ONNX_VERSION-linux-x64-jni.tar.bz2
          tar xvf ./*.tar.bz2

          src=sherpa-onnx-v$SHERPA_ONNX_VERSION-linux-x64-jni
          dst=sherpa-onnx/java-api/resources/sherpa-onnx/native/linux-x64

          mkdir -p $dst
          cp -v $src/lib/libsherpa-onnx-jni.so $dst/
          cp -v $src/lib/libonnxruntime.so $dst/

          ls -lh $dst
          rm -rf $src*

      - name: Download libs ${{ matrix.os }} ${{ matrix.arch }}
        if: ${{ matrix.os == 'macos-latest' && matrix.arch == 'arm64' }}
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/v$SHERPA_ONNX_VERSION/sherpa-onnx-v$SHERPA_ONNX_VERSION-osx-arm64-jni.tar.bz2
          tar xvf ./*.tar.bz2

          src=sherpa-onnx-v$SHERPA_ONNX_VERSION-osx-arm64-jni
          dst=sherpa-onnx/java-api/resources/sherpa-onnx/native/osx-aarch64

          mkdir -p $dst
          cp -v $src/lib/libonnxruntime.1.23.2.dylib $dst/
          cp -v $src/lib/libsherpa-onnx-jni.dylib $dst/

          ls -lh $dst
          rm -rf $src*

      - name: Download libs ${{ matrix.os }} ${{ matrix.arch }}
        if: ${{ matrix.os == 'macos-15-intel' && matrix.arch == 'x64' }}
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/v$SHERPA_ONNX_VERSION/sherpa-onnx-v$SHERPA_ONNX_VERSION-osx-x86_64-jni.tar.bz2
          tar xvf ./*.tar.bz2

          src=sherpa-onnx-v$SHERPA_ONNX_VERSION-osx-x86_64-jni
          dst=sherpa-onnx/java-api/resources/sherpa-onnx/native/osx-x64

          mkdir -p $dst
          cp -v $src/lib/libonnxruntime.1.23.2.dylib $dst/
          cp -v $src/lib/libsherpa-onnx-jni.dylib $dst/

          ls -lh $dst
          rm -rf $src*

      - name: Download libs ${{ matrix.os }} ${{ matrix.arch }}
        if: ${{ matrix.os == 'windows-2022' && matrix.arch == 'x64' }}
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/v$SHERPA_ONNX_VERSION/sherpa-onnx-v$SHERPA_ONNX_VERSION-win-x64-jni.tar.bz2
          tar xvf ./*.tar.bz2

          src=sherpa-onnx-v$SHERPA_ONNX_VERSION-win-x64-jni
          ls -lh $src
          ls -lh $src/lib
          dst=sherpa-onnx/java-api/resources/sherpa-onnx/native/win-x64

          mkdir -p $dst
          cp -v $src/lib/onnxruntime.dll $dst/
          cp -v $src/lib/sherpa-onnx-jni.dll $dst/

          ls -lh $dst
          rm -rf $src*

      - name: Create java jar (source code)
        shell: bash
        run: |
          cd sherpa-onnx/java-api
          make

          ls -lh build

      - name: Create java jar (native lib)
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          cd sherpa-onnx/java-api

          ls -lh resources/sherpa-onnx/native

          echo "--"

          ls -lh resources/sherpa-onnx/native/*/

          jar cfvm ./sherpa-onnx-native.jar MANIFEST.MF -C ./resources .

          ls -lh *.jar

          os=${{ matrix.os }}
          arch=${{ matrix.arch }}

          if [[ $os == "ubuntu-24.04-arm" && $arch == "arm64" ]]; then
            mv -v sherpa-onnx-native.jar sherpa-onnx-native-lib-linux-aarch64-$SHERPA_ONNX_VERSION.jar
          elif [[ $os == "ubuntu-latest" && $arch == "x64" ]]; then
            mv -v sherpa-onnx-native.jar sherpa-onnx-native-lib-linux-x64-$SHERPA_ONNX_VERSION.jar
          elif [[ $os == "macos-latest" && $arch == "arm64" ]]; then
            mv -v sherpa-onnx-native.jar sherpa-onnx-native-lib-osx-aarch64-$SHERPA_ONNX_VERSION.jar
          elif [[ $os == "macos-15-intel" && $arch == "x64" ]]; then
            mv -v sherpa-onnx-native.jar sherpa-onnx-native-lib-osx-x64-$SHERPA_ONNX_VERSION.jar
          elif [[ $os == "windows-2022" && $arch == "x64" ]]; then
            mv -v sherpa-onnx-native.jar sherpa-onnx-native-lib-win-x64-$SHERPA_ONNX_VERSION.jar
          else
            echo "Unknown os $os with arch $arch"
          fi

      - name: Show java jar (source code)
        shell: bash
        run: |
          cd sherpa-onnx/java-api

          unzip -l build/sherpa-onnx.jar

      - name: Show java jar (native lib)
        shell: bash
        run: |
          cd sherpa-onnx/java-api

          unzip -l sherpa-onnx*.jar

      - name: Release jar
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./sherpa-onnx/java-api/sherpa-onnx-native-*.jar

      - name: Release jar
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./sherpa-onnx/java-api/sherpa-onnx-native-*.jar
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.15

      - name: Test KittenTTS
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          os=${{ matrix.os }}
          arch=${{ matrix.arch }}

          if [[ $os == "ubuntu-24.04-arm" && $arch == "arm64" ]]; then
            native_jar=sherpa-onnx-native-lib-linux-aarch64-$SHERPA_ONNX_VERSION.jar
          elif [[ $os == "ubuntu-latest" && $arch == "x64" ]]; then
            native_jar=sherpa-onnx-native-lib-linux-x64-$SHERPA_ONNX_VERSION.jar
          elif [[ $os == "macos-latest" && $arch == "arm64" ]]; then
            native_jar=sherpa-onnx-native-lib-osx-aarch64-$SHERPA_ONNX_VERSION.jar
          elif [[ $os == "macos-15-intel" && $arch == "x64" ]]; then
            native_jar=sherpa-onnx-native-lib-osx-x64-$SHERPA_ONNX_VERSION.jar
          elif [[ $os == "windows-2022" && $arch == "x64" ]]; then
            native_jar=sherpa-onnx-native-lib-win-x64-$SHERPA_ONNX_VERSION.jar
          else
            echo "Unknown os $os with arch $arch"
          fi

          echo "native_jar: $native_jar"
          ls -lh sherpa-onnx/java-api/$native_jar

          if [[ ${{ matrix.os }} == "windows-2022" ]]; then
            SEP=";"
          else
            SEP=":"
          fi
          cd java-api-examples

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
          tar xf kitten-nano-en-v0_1-fp16.tar.bz2
          rm kitten-nano-en-v0_1-fp16.tar.bz2

          java \
            -cp "../sherpa-onnx/java-api/build/sherpa-onnx.jar${SEP}../sherpa-onnx/java-api/$native_jar" \
            NonStreamingTtsKittenEn.java


================================================
FILE: .github/workflows/jni.yaml
================================================
name: jni

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/jni.yaml'
      - 'cmake/**'
      - 'kotlin-api-examples/**'
      - 'sherpa-onnx/kotlin-api/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/jni/*'

  workflow_dispatch:

concurrency:
  group: jni-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  jni:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, macos-15-intel]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}

      - name: OS info
        shell: bash
        run: |
          uname -a

      - name: Display kotlin version
        shell: bash
        run: |
          kotlinc -version

      - name: Display java version
        shell: bash
        run: |
          java -version
          javac -help
          echo "JAVA_HOME is: ${JAVA_HOME}"

      - name:  Run JNI test
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          cd ./kotlin-api-examples
          ./run.sh

      - uses: actions/upload-artifact@v4
        with:
          name: tts-files-${{ matrix.os }}
          path: kotlin-api-examples/test-*.wav


================================================
FILE: .github/workflows/lazarus.yaml
================================================
name: lazarus

on:
  push:
    branches:
      - master
      - lazarus
    paths:
      - '.github/workflows/lazarus.yaml'
      - 'cmake/**'
      - 'lazarus-examples/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'sherpa-onnx/pascal-api/*'
      - 'scripts/lazarus/*'

  workflow_dispatch:

concurrency:
  group: lazarus-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  build:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-22.04, macos-latest, macos-15-intel, windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}

      # See https://github.com/gcarreno/setup-lazarus
      - uses: gcarreno/setup-lazarus@v3.3.1
        with:
          lazarus-version: "stable"
          with-cache: false

      - name: Lazarus info
        shell: bash
        run: |
          which lazbuild
          lazbuild --help

      - name: FPC info
        shell: bash
        run: |
          which fpc
          fpc -i

      - name: OS info
        shell: bash
        run: |
          uname -a

      - name: Install patchelf for ubuntu
        if: matrix.os == 'ubuntu-22.04'
        shell: bash
        run: |
          sudo apt-get update -q
          sudo apt-get install -q -y patchelf

      - name: Show Patchelf version (ubuntu)
        if: matrix.os == 'ubuntu-22.04'
        shell: bash
        run: |
          patchelf --version
          patchelf --help
          which patchelf

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build
          os=${{ matrix.os }}

          if [[ $os == 'windows-2022' || $os == 'ubuntu-22.04' ]]; then
            BUILD_SHARED_LIBS=ON
          else
            BUILD_SHARED_LIBS=OFF
          fi

          cmake \
            -DCMAKE_INSTALL_PREFIX=./install \
            -D BUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
            -D CMAKE_BUILD_TYPE=Release \
            ..

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          cmake --build . --target install --config Release -j 2

          ls -lh install/lib/

          cp -v install/lib/*.dll ../lazarus-examples/generate_subtitles/ || true
          cp -v install/lib/*.so* ../lazarus-examples/generate_subtitles/ || true

      - name: Build generating subtitles
        shell: bash
        run: |
          cd lazarus-examples/generate_subtitles
          os=${{ matrix.os }}
          if [[ $os == macos-15-intel ]]; then
            lazbuild --verbose --build-mode=Release --widgetset=cocoa ./generate_subtitles.lpi
          elif [[ $os == macos-latest ]]; then
            lazbuild --verbose --build-mode=Release --widgetset=cocoa --cpu=aarch64 ./generate_subtitles.lpi
          elif [[ $os == 'ubuntu-22.04' ]]; then
            lazbuild --verbose --build-mode=Release-Linux ./generate_subtitles.lpi
          else
            lazbuild --verbose --build-mode=Release ./generate_subtitles.lpi
          fi

      - name: Display generating subtitles
        shell: bash
        run: |
          cd lazarus-examples/generate_subtitles
          ls -lh

      - name: Collect generating subtitles (Ubuntu)
        if: matrix.os == 'ubuntu-22.04'
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          cd lazarus-examples/generate_subtitles
          ls -lh
          readelf -d ./generate_subtitles
          echo '----------'
          ldd ./generate_subtitles

          d=generate_subtitles-linux-x64-$SHERPA_ONNX_VERSION
          echo "---before running patchelf---"
          readelf -d ./generate_subtitles

          patchelf --set-rpath '$ORIGIN' ./generate_subtitles

          echo "---after running patchelf---"
          readelf -d ./generate_subtitles

          mkdir -p $d
          cp -v ./generate_subtitles $d/
          cp -v *.so $d/

          mv -v $d /tmp/linux-x64

          ls -lh /tmp/linux-x64

      - name: Collect generating subtitles (windows)
        if: matrix.os == 'windows-2022'
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          cd lazarus-examples/generate_subtitles
          ls -lh

          d=generate-subtitles-windows-x64-$SHERPA_ONNX_VERSION
          mkdir -p $d
          cp -v ./generate_subtitles.exe $d/
          cp -v onnxruntime.dll $d/
          cp -v sherpa-onnx-c-api.dll $d/
          mv $d ../../windows-x64
          cd ../..

          ls -lh windows-x64

      - name: Collect generating subtitles (macos)
        if: matrix.os == 'macos-15-intel' || matrix.os == 'macos-latest'
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          cd lazarus-examples/generate_subtitles
          ls -lh
          file ./generate_subtitles
          echo '----------'
          otool -L ./generate_subtitles
          rm -v generate_subtitles.app/Contents/MacOS/generate_subtitles
          cp -v ./generate_subtitles generate_subtitles.app/Contents/MacOS/generate_subtitles
          chmod +x generate_subtitles.app/Contents/MacOS/generate_subtitles

          if [[ ${{ matrix.os }} == 'macos-latest' ]]; then
            mv generate_subtitles.app /tmp/macos-arm64
          else
            mv generate_subtitles.app /tmp/macos-x64
            d=generate-subtitles-macos-x64-$SHERPA_ONNX_VERSION.app
          fi

          ls -lh /tmp
          echo "---"
          ls -lh /tmp/macos-*

      - uses: actions/upload-artifact@v4
        if: matrix.os == 'ubuntu-22.04'
        with:
          name: linux-x64
          path: /tmp/linux-x64

      - uses: actions/upload-artifact@v4
        if: matrix.os == 'macos-latest'
        with:
          name: macos-arm64
          path: /tmp/macos-arm64

      - uses: actions/upload-artifact@v4
        if: matrix.os == 'macos-15-intel'
        with:
          name: macos-x64
          path: /tmp/macos-x64

      - uses: actions/upload-artifact@v4
        if: matrix.os == 'windows-2022'
        with:
          name: windows-x64
          path: ./windows-x64

  release:
    runs-on: ${{ matrix.os }}
    needs: [build]
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["2"]
        index: ["0", "1"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Retrieve artifact from windows x64
        uses: actions/download-artifact@v4
        with:
          name: windows-x64
          path: /tmp/windows-x64

      - name: Retrieve artifact from linux x64
        uses: actions/download-artifact@v4
        with:
          name: linux-x64
          path: /tmp/linux-x64

      - name: Retrieve artifact from macos x64
        uses: actions/download-artifact@v4
        with:
          name: macos-x64
          path: /tmp/macos-x64

      - name: Retrieve artifact from macos arm64
        uses: actions/download-artifact@v4
        with:
          name: macos-arm64
          path: /tmp/macos-arm64

      - name: Display build files
        shell: bash
        run: |
          ls -lh /tmp
          echo "---linux-x64---"
          ls -lh /tmp/linux-x64/
          readelf -d /tmp/linux-x64/generate_subtitles
          echo "---"
          ldd /tmp/linux-x64/generate_subtitles

          echo "---macos-x64---"
          ls -lh /tmp/macos-x64/
          mkdir -p /tmp/macos-x64/Contents/Resources
          chmod +x /tmp/macos-x64/Contents/MacOS/generate_subtitles

          echo "---macos-arm64---"
          ls -lh /tmp/macos-arm64/
          mkdir -p /tmp/macos-arm64/Contents/Resources
          chmod +x /tmp/macos-arm64/Contents/MacOS/generate_subtitles

          echo "---windows-x64---"
          ls -lh /tmp/windows-x64/

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/lazarus

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-subtitles.py --total $total --index $index

          chmod +x build-generate-subtitles.sh
          mv -v ./build-generate-subtitles.sh ../..

      - name: Generate tar files
        shell: bash
        run: |
          ./build-generate-subtitles.sh

      - name: Display tar files
        shell: bash
        run: |
          ls -lh /tmp/out

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-bin huggingface
            cd huggingface
            git remote set-url origin https://csukuangfj:$HF_TOKEN@huggingface.co/sherpa-onnx-bin
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            d=generate-subtitles/$SHERPA_ONNX_VERSION
            mkdir -p $d

            cp -v /tmp/out/*.tar.bz2 $d/
            git status
            git lfs track "*.tar.bz2"
            git add .
            git commit -m "add more files"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-bin main


================================================
FILE: .github/workflows/linux-gpu.yaml
================================================
name: linux-gpu

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/linux-gpu.yaml'
      - '.github/scripts/test-online-transducer.sh'
      - '.github/scripts/test-online-paraformer.sh'
      - '.github/scripts/test-offline-transducer.sh'
      - '.github/scripts/test-offline-ctc.sh'
      - '.github/scripts/test-online-ctc.sh'
      - '.github/scripts/test-offline-tts.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'c-api-examples/**'

  workflow_dispatch:

concurrency:
  group: linux-gpu-${{ github.ref }}
  cancel-in-progress: true

jobs:
  linux_gpu:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.build_type }} ${{ matrix.onnxruntime_version }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        # build_type: [Release, Debug]
        build_type: [Release]
        onnxruntime_version: ["1.17.1", "1.23.2"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Build sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
              quay.io/pypa/manylinux_2_28_x86_64 \
            bash -c '
              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              onnxruntime_version=${{ matrix.onnxruntime_version }}
              curl -SL -O https://github.com/csukuangfj/onnxruntime-libs/releases/download/v$onnxruntime_version/onnxruntime-linux-x64-gpu-$onnxruntime_version-patched.zip
              unzip  onnxruntime-linux-x64-gpu-$onnxruntime_version-patched.zip

              export SHERPA_ONNXRUNTIME_LIB_DIR=$PWD/onnxruntime-linux-x64-gpu-$onnxruntime_version-patched/lib
              export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$PWD/onnxruntime-linux-x64-gpu-$onnxruntime_version-patched/include

              ls -lh /home/runner/work/sherpa-onnx/sherpa-onnx/onnxruntime-linux-x64-gpu-$onnxruntime_version-patched/lib/libonnxruntime.so

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              p=$PWD

              mkdir build
              cd build

              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
                -D CMAKE_INSTALL_PREFIX=./install \
                -D BUILD_SHARED_LIBS=ON \
                -D SHERPA_ONNX_ENABLE_GPU=ON \
                ..

              make -j2
              make install

              ls -lh lib
              ls -lh bin

              echo "----"
              ls -lh install/lib

              echo "----"
              ls -lh install/bin
            '

      - name: Display dependencies of sherpa-onnx for linux
        shell: bash
        run: |
          du -h -d1 .
          sudo chown -R $USER ./build
          ls -lh build/bin
          ls -lh build/_deps/onnxruntime-src/lib/ || true

          echo "strip"
          strip build/bin/*
          echo "after strip"
          ls -lh build/bin

          file build/bin/sherpa-onnx
          file build/bin/sherpa-onnx
          ls -lh build/bin/sherpa-onnx
          readelf -d build/bin/sherpa-onnx

          rm -fv build/install/include/cargs.h
          rm -fv build/install/lib/cargs.h
          rm -fv build/install/lib/libcargs.so
          rm -rfv build/install/lib/pkgconfig

          strings build/install/lib/*.so | grep "^GLIBC_"

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-gpu

          onnxruntime_version=${{ matrix.onnxruntime_version }}
          if [[ $onnxruntime_version == "1.23.2" ]]; then
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-cuda-12.x-cudnn-9.x-linux-x64-gpu
          fi

          mkdir $dst

          cp -a build/install/bin $dst/
          cp -a build/install/lib $dst/
          cp -a build/install/include $dst/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - name: Release pre-compiled binaries and libs for linux x64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*gpu.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.13

      - name: Release pre-compiled binaries and libs for linux x64
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*gpu.tar.bz2

      - name: Display dependencies of sherpa-onnx for linux
        shell: bash
        run: |
          file build/bin/sherpa-onnx
          readelf -d build/bin/sherpa-onnx

      - name: Test spoken language identification
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-language-identification

          .github/scripts/test-spoken-language-identification.sh

      - name: Test online CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-ctc.sh

      - name: Test offline TTS
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-tts

          .github/scripts/test-offline-tts.sh

      - name: Test online paraformer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-paraformer.sh


      - name: Test offline Whisper
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-whisper.sh

      - name: Test offline CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-ctc.sh

      - name: Test offline transducer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-transducer.sh

      - name: Test online transducer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-transducer.sh

      - name: Test online transducer (C API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=decode-file-c-api

          .github/scripts/test-online-transducer.sh


================================================
FILE: .github/workflows/linux-jni-aarch64.yaml
================================================
name: linux-jni-aarch64

on:
  push:
    branches:
      - jni
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
  workflow_dispatch:

concurrency:
  group: linux-jni-aarch64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  linux-jni-aarch64:
    name: linux jni aarch64
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-22.04-arm]
        # java-version: ['8', '11', '16', '17', '21']
        java-version: ['21']

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: ${{ matrix.java-version }}

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh
          du -h -d1 .

      - name: Build sherpa-onnx
        if: matrix.java-version == '21'
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
              quay.io/pypa/manylinux2014_aarch64 \
            bash -c '
              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              yum install -y java-11-openjdk-devel
              java -version
              which java
              ls -lh $(which java)
              ls -lrt /etc/alternatives/java

              export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-11.0.23.0.9-2.el7_9.aarch64
              echo "JAVA_HOME: $JAVA_HOME"
              find $JAVA_HOME -name jni.h

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
              p=$PWD

              mkdir build
              cd build

              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -D SHERPA_ONNX_ENABLE_TTS=ON \
                -D CMAKE_BUILD_TYPE=Release \
                -D BUILD_SHARED_LIBS=ON \
                -D CMAKE_INSTALL_PREFIX=./install \
                -D SHERPA_ONNX_ENABLE_BINARY=OFF \
                -D SHERPA_ONNX_ENABLE_JNI=ON \
                ..

              make -j2
              make install

              ls -lh lib
              rm -rf ./install/lib/pkgconfig
              rm -rf ./install/lib/share
              rm -rf ./install/lib/cargs.h
              rm -rf ./install/include/cargs.h
              rm -rf ./install/lib/libcargs.so
              rm -rf ./install/lib/libsherpa-onnx-c-api.so

              echo "----"
              ls -lh install/lib

              echo "----"
            '

      - uses: actions/upload-artifact@v4
        if: matrix.java-version == '21'
        with:
          name: release-jni-linux-${{ matrix.java-version }}
          path: build/install/*

      - name: Copy files
        if: matrix.java-version == '21'
        shell: bash
        run: |
          du -h -d1 .
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-aarch64-jni
          mkdir $dst

          cp -a build/install/lib $dst/
          cp -a build/install/include $dst/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst
          du -h -d1 .

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && matrix.java-version == '21'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=jni/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*.tar.bz2 $dst/
            cp -v ../*.jar $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "add more files"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for linux aarch64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') && matrix.java-version == '21'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.25

      - name: Release pre-compiled binaries and libs for linux aarch64
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') && matrix.java-version == '21'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2


================================================
FILE: .github/workflows/linux-jni.yaml
================================================
name: linux-jni

on:
  push:
    branches:
      - jni
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
  workflow_dispatch:

concurrency:
  group: linux-jni-${{ github.ref }}
  cancel-in-progress: true

jobs:
  linux-jni:
    name: linux jni
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        java-version: ['24']

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: ${{ matrix.java-version }}

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh
          du -h -d1 .

      - name: Build jar ${{ matrix.java-version }}
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          cd sherpa-onnx/java-api
          make
          ls -lh build/
          cp build/sherpa-onnx.jar ../../sherpa-onnx-$SHERPA_ONNX_VERSION.jar
          cd ../..
          ls -lh *.jar

      - uses: actions/upload-artifact@v4
        with:
          name: release-jni-linux-jar-${{ matrix.java-version }}
          path: ./*.jar

      - name: Release jar
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.jar
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.25

      - name: Release jar
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.jar

      - name: Build sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
              quay.io/pypa/manylinux2014_x86_64 \
            bash -c '
              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              yum install -y java-11-openjdk-devel
              java -version
              which java
              ls -lh $(which java)
              ls -lrt /etc/alternatives/java

              export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-11.0.23.0.9-2.el7_9.x86_64
              echo "JAVA_HOME: $JAVA_HOME"
              find $JAVA_HOME -name jni.h

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              p=$PWD

              mkdir build
              cd build

              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -D SHERPA_ONNX_ENABLE_TTS=ON \
                -D CMAKE_BUILD_TYPE=Release \
                -D BUILD_SHARED_LIBS=ON \
                -D CMAKE_INSTALL_PREFIX=./install \
                -D SHERPA_ONNX_ENABLE_JNI=ON \
                ..

              make -j2
              make install

              ls -lh lib
              ls -lh bin
              rm -rf ./install/lib/pkgconfig
              rm -rf ./install/lib/share
              rm -rf ./install/lib/cargs.h
              rm -rf ./install/include/cargs.h
              rm -rf ./install/lib/libcargs.so
              rm -rf ./install/lib/libsherpa-onnx-c-api.so
              rm -rf ./install/lib/libsherpa-onnx-cxx-api.so

              echo "----"
              ls -lh install/lib

              echo "----"
              ls -lh install/bin
            '

      - name: Display dependencies of sherpa-onnx for linux
        shell: bash
        run: |
          du -h -d1 .
          sudo chown -R $USER ./build
          ls -lh build/bin
          ls -lh build/_deps/onnxruntime-src/lib/

          echo "strip"
          strip build/bin/*
          echo "after strip"
          ls -lh build/bin

          file build/bin/sherpa-onnx
          file build/bin/sherpa-onnx
          ls -lh build/bin/sherpa-onnx
          readelf -d build/bin/sherpa-onnx

      - uses: actions/upload-artifact@v4
        with:
          name: release-jni-linux-${{ matrix.java-version }}
          path: build/install/*

      - name: Copy files
        shell: bash
        run: |
          du -h -d1 .
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-jni
          mkdir $dst

          cp -a build/install/bin $dst/
          cp -a build/install/lib $dst/
          cp -a build/install/include $dst/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst
          du -h -d1 .

      - name: Release pre-compiled binaries and libs for linux x64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.11

      - name: Release pre-compiled binaries and libs for linux x64
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=jni/$SHERPA_ONNX_VERSION
            mkdir -p $dst
            git lfs track "*.jar"

            cp -v ../sherpa-onnx-*.tar.bz2 $dst/
            cp -v ../*.jar $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "add more files"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main


================================================
FILE: .github/workflows/linux.yaml
================================================
name: linux

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/linux.yaml'
      - '.github/scripts/test-kws.sh'
      - '.github/scripts/test-online-transducer.sh'
      - '.github/scripts/test-offline-speech-denoiser.sh'
      - '.github/scripts/test-offline-source-separation.sh'
      - '.github/scripts/test-online-paraformer.sh'
      - '.github/scripts/test-offline-transducer.sh'
      - '.github/scripts/test-offline-ctc.sh'
      - '.github/scripts/test-online-ctc.sh'
      - '.github/scripts/test-offline-tts.sh'
      - '.github/scripts/test-audio-tagging.sh'
      - '.github/scripts/test-offline-punctuation.sh'
      - '.github/scripts/test-online-punctuation.sh'
      - '.github/scripts/test-speaker-diarization.sh'
      - '.github/scripts/test-c-api.sh'
      - '.github/scripts/test-cxx-api.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'c-api-examples/**'
  pull_request:
    branches:
      - master
    paths:
      - '.github/workflows/linux.yaml'
      - '.github/scripts/test-kws.sh'
      - '.github/scripts/test-offline-speech-denoiser.sh'
      - '.github/scripts/test-offline-source-separation.sh'
      - '.github/scripts/test-online-transducer.sh'
      - '.github/scripts/test-online-paraformer.sh'
      - '.github/scripts/test-offline-transducer.sh'
      - '.github/scripts/test-offline-ctc.sh'
      - '.github/scripts/test-online-ctc.sh'
      - '.github/scripts/test-offline-tts.sh'
      - '.github/scripts/test-audio-tagging.sh'
      - '.github/scripts/test-offline-punctuation.sh'
      - '.github/scripts/test-online-punctuation.sh'
      - '.github/scripts/test-speaker-diarization.sh'
      - '.github/scripts/test-c-api.sh'
      - '.github/scripts/test-cxx-api.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'

  workflow_dispatch:

concurrency:
  group: linux-${{ github.ref }}
  cancel-in-progress: true

jobs:
  linux:
    name: ${{ matrix.build_type }} shared-${{ matrix.shared_lib }} tts-${{ matrix.with_tts }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        build_type: [Release, Debug]
        shared_lib: [ON, OFF]
        with_tts: [ON, OFF]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display PWD
        shell: bash
        run: |
          echo "pwd: $PWD"
          ls -lh
          du -h -d1 .

      - name: Build sherpa-onnx
        shell: bash
        run: |
          docker run --rm \
            --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
            quay.io/pypa/manylinux2014_x86_64 \
            bash -c '
              uname -a
              gcc --version

              # use gcc 11. the default is gcc 10

              # See https://github.com/nealef/clefos/issues/9
              echo "multilib_policy=best" >> /etc/yum.conf
              echo "skip_missing_names_on_install=False" >> /etc/yum.conf
              sed -i "/^override_install_langs=/d" /etc/yum.conf
              yum -y update
              yum -y install yum-utils curl
              yum-config-manager --enable extras
              yum -y install centos-release-scl-rh
              yum -y install devtoolset-11-binutils devtoolset-11-gcc devtoolset-11-gcc-c++ devtoolset-11-gcc-gfortran

              # see https://stackoverflow.com/questions/72904802/can-not-find-required-gcc-version-after-devtoolset-installation
              ls -lh /opt/rh/devtoolset-11

              source /opt/rh/devtoolset-11/enable

              echo "which gcc"
              which gcc

              echo "gcc --version"
              gcc --version

              cmake --version
              cat /etc/*release
              id
              pwd

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs

              p=$PWD

              mkdir build
              cd build

              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -D SHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} \
                -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
                -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} \
                -D CMAKE_INSTALL_PREFIX=./install \
                ..

              make -j2
              make install

              ls -lh lib
              ls -lh bin

              echo "----"
              ls -lh install/lib

              echo "----"
              ls -lh install/bin
            '

      - name: Display dependencies of sherpa-onnx for linux
        shell: bash
        run: |
          du -h -d1 .
          sudo chown -R $USER ./build
          ls -lh build/bin
          ls -lh build/_deps/onnxruntime-src/lib/

          echo "strip"
          strip build/bin/*
          echo "after strip"
          ls -lh build/bin

          file build/bin/sherpa-onnx
          file build/bin/sherpa-onnx
          ls -lh build/bin/sherpa-onnx
          readelf -d build/bin/sherpa-onnx

          rm -fv build/install/include/cargs.h
          rm -fv build/install/lib/cargs.h
          rm -fv build/install/lib/libcargs.so
          rm -rfv build/install/lib/pkgconfig

      - uses: actions/upload-artifact@v4
        with:
          name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
          path: install/*

      - name: Copy files
        shell: bash
        if: matrix.build_type == 'Release'
        run: |
          du -h -d1 .
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          if [[ ${{ matrix.shared_lib }} == 'ON' ]]; then
            suffix=shared
          else
            suffix=static
          fi

          if [[ ${{ matrix.with_tts }} == ON ]]; then
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-$suffix
          else
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-$suffix-no-tts
          fi
          mkdir $dst

          cp -a build/install/bin $dst/
          if [[ ${{ matrix.shared_lib }} == ON ]]; then
            mkdir $dst/lib
            cp -av build/install/lib/*.so* $dst/lib/
          fi
          cp -a build/install/include $dst/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst
          du -h -d1 .

      - name: Release pre-compiled binaries and libs for linux x64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') && matrix.build_type == 'Release'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.25

      - name: Release pre-compiled binaries and libs for linux x64
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') && matrix.build_type == 'Release'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2

      - name: Test offline source separation
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-source-separation

          .github/scripts/test-offline-source-separation.sh

      - uses: actions/upload-artifact@v4
        with:
          name: source-separation-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
          path: ./source-separation-wavs/*.wav

      - name: Test offline CTC
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-ctc.sh
          du -h -d1 .

      - name: Test offline speech denoiser
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-denoiser

          .github/scripts/test-offline-speech-denoiser.sh

      - name: Test offline TTS
        if: matrix.with_tts == 'ON'
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-tts

          .github/scripts/test-offline-tts.sh
          du -h -d1 .

      - uses: actions/upload-artifact@v4
        with:
          name: speech-denoiser-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
          path: ./*speech*.wav

      - uses: actions/upload-artifact@v4
        if: matrix.with_tts == 'ON'
        with:
          name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
          path: tts

      - name: Test offline FireRedASR
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          readelf -d build/bin/sherpa-onnx-offline

          .github/scripts/test-offline-fire-red-asr.sh
          du -h -d1 .

      - name: Test offline Moonshine
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          readelf -d build/bin/sherpa-onnx-offline

          .github/scripts/test-offline-moonshine.sh
          du -h -d1 .

      - name: Test C++ API
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
          export CXX_WHISPER_EXE=whisper-cxx-api
          export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api

          .github/scripts/test-cxx-api.sh
          du -h -d1 .

      - name: Test offline speaker diarization
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-speaker-diarization

          .github/scripts/test-speaker-diarization.sh

      - name: Test offline transducer
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-transducer.sh
          du -h -d1 .

      - name: Test online punctuation
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-online-punctuation

          .github/scripts/test-online-punctuation.sh
          du -h -d1 .

      - name: Test online transducer
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-transducer.sh
          du -h -d1 .

      - name: Test online transducer (C API)
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=decode-file-c-api

          .github/scripts/test-online-transducer.sh
          du -h -d1 .

      - name: Test spoken language identification (C++ API)
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-language-identification

          .github/scripts/test-spoken-language-identification.sh
          du -h -d1 .

      - name: Test online CTC
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-ctc.sh
          du -h -d1 .

      - name: Test C API
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export SLID_EXE=spoken-language-identification-c-api
          export SID_EXE=speaker-identification-c-api
          export AT_EXE=audio-tagging-c-api
          export PUNCT_EXE=add-punctuation-c-api

          .github/scripts/test-c-api.sh
          du -h -d1 .

      - name: Test offline punctuation
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-punctuation

          .github/scripts/test-offline-punctuation.sh
          du -h -d1 .

      - name: Test Audio tagging
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-audio-tagging

          .github/scripts/test-audio-tagging.sh
          du -h -d1 .

      - name: Test transducer kws
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-keyword-spotter

          .github/scripts/test-kws.sh
          du -h -d1 .

      - name: Test offline Whisper
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          readelf -d build/bin/sherpa-onnx-offline

          .github/scripts/test-offline-whisper.sh
          du -h -d1 .

      - name: Test online paraformer
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-paraformer.sh
          du -h -d1 .


================================================
FILE: .github/workflows/macos-jni.yaml
================================================
name: macos-jni

on:
  push:
    branches:
      - jni
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: macos-jni-${{ github.ref }}
  cancel-in-progress: true

jobs:
  macos_jni:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.arch }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        arch: [arm64, x86_64]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-${{ matrix.arch }}

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build
          arch=${{ matrix.arch }}

          cmake \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_BUILD_TYPE=Release \
            -D CMAKE_OSX_ARCHITECTURES=$arch \
            -D SHERPA_ONNX_ENABLE_JNI=ON \
            -DCMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx for macos
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          make -j2
          make install

          ls -lh lib
          ls -lh bin

          file ./bin/sherpa-onnx

          rm -rf ./install/lib/pkgconfig
          rm -rf ./install/lib/share
          rm -rf ./install/lib/cargs.h
          rm -rf ./install/include/cargs.h
          rm -rf ./install/lib/libcargs.dylib

      - uses: actions/upload-artifact@v4
        with:
          name: release-jni-macos-${{ matrix.arch }}
          path: build/install/*

      - name: Copy files
        shell: bash
        run: |
          du -h -d1 .
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          arch=${{ matrix.arch }}
          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-$arch-jni
          mkdir -p $dst

          cp -a build/install/bin $dst/
          cp -a build/install/lib $dst/
          cp -a build/install/include $dst/

          brew install tree

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst
          du -h -d1 .

      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=jni/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "add more files"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for linux x64
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2


================================================
FILE: .github/workflows/macos.yaml
================================================
name: macos

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/scripts/test-offline-speech-denoiser.sh'
      - '.github/workflows/macos.yaml'
      - '.github/scripts/test-kws.sh'
      - '.github/scripts/test-online-transducer.sh'
      - '.github/scripts/test-online-paraformer.sh'
      - '.github/scripts/test-offline-transducer.sh'
      - '.github/scripts/test-offline-ctc.sh'
      - '.github/scripts/test-offline-tts.sh'
      - '.github/scripts/test-online-ctc.sh'
      - '.github/scripts/test-audio-tagging.sh'
      - '.github/scripts/test-offline-punctuation.sh'
      - '.github/scripts/test-online-punctuation.sh'
      - '.github/scripts/test-speaker-diarization.sh'
      - '.github/scripts/test-c-api.sh'
      - '.github/scripts/test-cxx-api.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
  pull_request:
    branches:
      - master
    paths:
      - '.github/scripts/test-offline-speech-denoiser.sh'
      - '.github/workflows/macos.yaml'
      - '.github/scripts/test-kws.sh'
      - '.github/scripts/test-online-transducer.sh'
      - '.github/scripts/test-online-paraformer.sh'
      - '.github/scripts/test-offline-transducer.sh'
      - '.github/scripts/test-offline-ctc.sh'
      - '.github/scripts/test-offline-tts.sh'
      - '.github/scripts/test-online-ctc.sh'
      - '.github/scripts/test-audio-tagging.sh'
      - '.github/scripts/test-offline-punctuation.sh'
      - '.github/scripts/test-online-punctuation.sh'
      - '.github/scripts/test-speaker-diarization.sh'
      - '.github/scripts/test-c-api.sh'
      - '.github/scripts/test-cxx-api.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'

  workflow_dispatch:

concurrency:
  group: macos-${{ github.ref }}
  cancel-in-progress: true

jobs:
  macos:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.build_type }} ${{ matrix.lib_type }} tts-${{ matrix.with_tts }} ${{ matrix.os }} ${{ matrix.arch }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        build_type: [Release, Debug]
        lib_type: [static, shared]
        with_tts: [ON, OFF]
        arch: ["arm64;x86_64"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.lib_type }}-tts-${{ matrix.with_tts }}

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build
          lib_type=${{ matrix.lib_type }}
          if [[ $lib_type == "static" ]]; then
            BUILD_SHARED_LIBS=OFF
          else
            BUILD_SHARED_LIBS=ON
          fi

          arch="${{ matrix.arch }}"

          cmake \
            -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} \
            -D BUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
            -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
            -D CMAKE_OSX_ARCHITECTURES="$arch" \
            -D CMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx for macos
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          make -j2
          make install

          ls -lh lib
          ls -lh bin

          file ./bin/sherpa-onnx

          rm -fv ./install/include/cargs.h
          rm -fv ./install/lib/cargs.h
          rm -fv ./install/lib/libcargs.dylib
          rm -fv ./install/lib/libcargs.a
          rm -rfv ./install/lib/pkgconfig

      - name: Display dependencies of sherpa-onnx for macos
        shell: bash
        run: |
          file bin/sherpa-onnx
          otool -L build/bin/sherpa-onnx
          otool -l build/bin/sherpa-onnx

      - name: Copy files
        if: matrix.build_type == 'Release'
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          if [[ ${{ matrix.with_tts }} == ON ]]; then
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}
          else
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}-no-tts
          fi
          mkdir $dst

          cp -a build/install/bin $dst/
          if [[ ${{ matrix.lib_type }} == shared ]]; then
            mkdir $dst/lib
            cp -a build/install/lib/*.dylib* $dst/lib/
          else
            cp -a build/install/lib $dst/
          fi
          cp -a build/install/include $dst/

          brew install tree
          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - name: Release pre-compiled binaries and libs for macOS
        if: matrix.build_type == 'Release' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*osx-universal2*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.24

      - name: Test offline FireRedASR
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-fire-red-asr.sh

      - name: Test offline CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-ctc.sh

      - name: Test offline speech denoiser
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-denoiser

          .github/scripts/test-offline-speech-denoiser.sh

      - name: Test offline TTS
        if: matrix.with_tts == 'ON'
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-tts

          .github/scripts/test-offline-tts.sh

      - name: Test offline Moonshine
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-moonshine.sh

      - name: Test C++ API
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
          export CXX_WHISPER_EXE=whisper-cxx-api
          export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api

          .github/scripts/test-cxx-api.sh
          du -h -d1 .

      - name: Test offline speaker diarization
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-speaker-diarization

          .github/scripts/test-speaker-diarization.sh

      - name: Test offline transducer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-transducer.sh

      - name: Test online punctuation
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-online-punctuation

          .github/scripts/test-online-punctuation.sh

      - name: Test online CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-ctc.sh

      - name: Test offline punctuation
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-punctuation

          .github/scripts/test-offline-punctuation.sh

      - name: Test C API
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export SLID_EXE=spoken-language-identification-c-api
          export SID_EXE=speaker-identification-c-api
          export AT_EXE=audio-tagging-c-api
          export PUNCT_EXE=add-punctuation-c-api

          .github/scripts/test-c-api.sh

      - name: Test Audio tagging
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-audio-tagging

          .github/scripts/test-audio-tagging.sh

      - name: Test spoken language identification (C++ API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-language-identification

          .github/scripts/test-spoken-language-identification.sh

      - name: Test transducer kws
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-keyword-spotter

          .github/scripts/test-kws.sh

      - name: Test online paraformer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-paraformer.sh

      - name: Test offline Whisper
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-whisper.sh

      - name: Test online transducer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-transducer.sh

      - name: Test online transducer (C API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=decode-file-c-api

          .github/scripts/test-online-transducer.sh


================================================
FILE: .github/workflows/mfc.yaml
================================================
name: mfc

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/mfc.yaml'
      - 'cmake/**'
      - 'mfc-examples/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'

  workflow_dispatch:

concurrency:
  group: mfc-${{ github.ref }}
  cancel-in-progress: true

jobs:
  mfc:
    name: MFC for ${{ matrix.arch }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        arch: [x64, x86]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display MSBuild info
        shell: cmd
        run: |
          set path="C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin"
          msbuild -help

      - name: Configure CMake
        shell: bash
        run: |
          mkdir build
          cd build
          arch=${{ matrix.arch }}
          if [[ $arch == "x86" ]]; then
            arch=Win32
          fi
          cmake -A $arch -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install ..

      - name: Build sherpa-onnx for windows
        shell: bash
        run: |
          cd build
          cmake --build . --config Release -- -m:2
          cmake --build . --config Release --target install -- -m:2

          ls -lh install/*

          ls -lh install/lib
          ls -lh install/bin

      - name: Build MFC
        shell: cmd
        run: |
          set path="C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin"

          cd mfc-examples

          msbuild .\mfc-examples.sln /property:Configuration=Release /property:Platform=${{ matrix.arch }}

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          arch=${{ matrix.arch }}
          if [[ $arch == "x86" ]]; then
            src=mfc-examples/Release
            ls -h $src
            dst=mfc-examples/$arch/Release

            mkdir -p $dst
            cp $src/* $dst
          fi

          cd mfc-examples/$arch/Release
          ls -lh

          cp -v StreamingSpeechRecognition.exe sherpa-onnx-streaming-asr-$arch-${SHERPA_ONNX_VERSION}.exe
          cp -v NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-asr-$arch-${SHERPA_ONNX_VERSION}.exe
          cp -v NonStreamingTextToSpeech.exe ../sherpa-onnx-non-streaming-tts-$arch-${SHERPA_ONNX_VERSION}.exe
          ls -lh

      - name: Upload artifact tts
        uses: actions/upload-artifact@v4
        with:
          name: non-streaming-tts-${{ matrix.arch }}
          path: ./mfc-examples/${{ matrix.arch }}/sherpa-onnx-non-streaming-tts-*.exe

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: streaming-speech-recognition-${{ matrix.arch }}
          path: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-streaming-asr-*.exe

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: non-streaming-speech-recognition-${{ matrix.arch }}
          path: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-non-streaming-asr-*.exe

      - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-streaming-*.exe
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.24

      - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-non-streaming-*.exe
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.24

      - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./mfc-examples/${{ matrix.arch }}/sherpa-onnx-non-streaming-*.exe
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.24


================================================
FILE: .github/workflows/mobile-asr-models.yaml
================================================
name: mobile-asr-models

on:
  push:
    branches:
      - asr-mobile

  workflow_dispatch:

concurrency:
  group: mobile-asr-models-${{ github.ref }}
  cancel-in-progress: true

jobs:
  mobile-asr-models:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' || github.repository_owner == 'csu-fangjun'
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]
        total: ["11"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        shell: bash
        run: |
          python3 -m pip install onnxruntime==1.16.3 onnx==1.15.0 jinja2

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/mobile-asr-models

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-asr.py --total $total --index $index
          chmod +x run2.sh
          mv run2.sh run.sh
          ls -lh

      - name: Run
        shell: bash
        run: |
          cd scripts/mobile-asr-models
          ./run.sh

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models


================================================
FILE: .github/workflows/mobile-kws-models.yaml
================================================
name: mobile-kws-models

on:
  push:
    branches:
      - asr-mobile

  workflow_dispatch:

concurrency:
  group: mobile-kws-models-${{ github.ref }}
  cancel-in-progress: true

jobs:
  mobile-kws-models:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' || github.repository_owner == 'csu-fangjun'
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.index }}/${{ matrix.total }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]
        total: ["2"]
        index: ["0", "1"]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install dependencies
        shell: bash
        run: |
          python3 -m pip install onnxruntime==1.16.3 onnx==1.15.0 jinja2

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/mobile-asr-models

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-kws.py --total $total --index $index
          chmod +x run2.sh
          mv run2.sh run.sh
          ls -lh

      - name: Run
        shell: bash
        run: |
          cd scripts/mobile-asr-models
          ./run.sh

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./kws/*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: kws-models


================================================
FILE: .github/workflows/nightly-wheel-arm.yaml
================================================
name: nightly-wheel-arm

on:
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 23:50 UTC time every day
    - cron: "50 23 * * *"

  workflow_dispatch:

concurrency:
  group: nightly-wheel-armv7l-${{ github.ref }}
  cancel-in-progress: true

jobs:
  nightly-wheel-arm:
    name: ${{ matrix.python-version }}
    # see https://github.com/actions/virtual-environments/blob/win19/20210525.0/images/win/Windows2019-Readme.md
    runs-on: ${{ matrix.os}}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v2
        with:
          platforms: arm

      - name: Run docker
        shell: bash
        run: |
            docker run --rm \
              --platform linux/arm/v7 \
              --volume ${{ github.workspace }}/:/workspace \
              balenalib/raspberrypi3-python:${{ matrix.python-version }}-bullseye-build \
            bash -c '
              uname -a
              cd /workspace
              ls -lh

              v=${{ matrix.python-version }}
              PYTHON_VERSION=${v/./}
              echo PYTHON_VERSION=$PYTHON_VERSION >> $GITHUB_ENV
              extra=""
              if [[ ${PYTHON_VERSION} == "37" ]]; then
                extra="m"
              fi

              # pip install -i https://www.piwheels.org/simple numpy sentencepiece click
              pip install https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/sentencepiece-0.2.0-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}${extra}-linux_armv7l.whl
              pip install --no-deps sherpa-onnx
              python3 -c "import sherpa_onnx; print(sherpa_onnx.__file__, sherpa_onnx.__version__); print(dir(sherpa_onnx)); print(help(sherpa_onnx))"
            '


================================================
FILE: .github/workflows/npm-addon-linux-aarch64.yaml
================================================
name: npm-addon-linux-aarch64

on:
  push:
    branches:
      - node-addon
  workflow_dispatch:

concurrency:
  group: npm-addon-linux-aarch64-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read
  id-token: write

jobs:
  npm-addon-linux-aarch64:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v2
        with:
          platforms: arm64

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Show .npmrc
        shell: bash
        run: |
          echo $PWD
          echo $HOME

          find $HOME -name .npmrc

      - uses: actions/setup-node@v4
        with:
          node-version: '24'
          registry-url: 'https://registry.npmjs.org'

      - name: Show .npmrc
        shell: bash
        run: |
          echo $PWD
          echo $HOME

          find $HOME -name .npmrc

          cat /home/runner/work/_temp/.npmrc
          cp -v /home/runner/work/_temp/.npmrc ./

      - name: Build sherpa-onnx (docker manually)
        shell: bash
        run: |
          docker run --rm \
              --volume ${{ github.workspace }}/:/shared/ \
              -w /shared \
              --platform linux/arm64 \
              quay.io/pypa/manylinux2014_aarch64 \
            bash -c '
              cp /shared/.npmrc ~/

              cat ~/.npmrc

              echo $HOME
              uname -a
              cat /etc/*release
              gcc --version
              cmake --version

              curl -sL https://rpm.nodesource.com/setup_16.x | bash -
              yum install -y nodejs

              node --version

              cd /shared

              mkdir build
              cd build
              cmake \
                -DCMAKE_INSTALL_PREFIX=./install \
                -DBUILD_SHARED_LIBS=ON \
                -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
                -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
                -DSHERPA_ONNX_ENABLE_BINARY=OFF \
                ..

              make -j2
              make install
              cd ..

              d=$PWD
              export SHERPA_ONNX_INSTALL_DIR=$d/build/install

              ls -lh /shared/build

              pushd scripts/node-addon-api/
              npm i

              ./node_modules/.bin/cmake-js compile --log-level verbose
              popd

              owner=${{ github.repository_owner }}
              export owner

              echo "---"
              ls -lh build/install/lib/
              sudo chown -R runner ./build
              echo "---"
              ls -lh build/install/lib/
              echo "---"

              .github/scripts/node-addon/run.sh

              ls -lh ./sherpa-onnx-node

              tar czvf sherpa-onnx-linux-arm64.tgz sherpa-onnx-node
            '

      - name: Publish
        shell: bash
        run: |
          cd sherpa-onnx-node
          ls -lh
          npm publish --access public

          # cd ./sherpa-onnx-node
          # cp -v /shared/.npmrc ./
          # # https://docs.npmjs.com/trusted-publishers
          # ls -lh
          # npm publish --access public


================================================
FILE: .github/workflows/npm-addon-linux-x64.yaml
================================================
name: npm-addon-linux-x64

on:
  push:
    branches:
      - node-addon
  workflow_dispatch:

concurrency:
  group: npm-addon-linux-x64-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read
  id-token: write

jobs:
  npm-addon-linux-x64:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - uses: actions/setup-node@v4
        with:
          node-version: '24'
          registry-url: 'https://registry.npmjs.org'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Build sherpa-onnx
        shell: bash
        run: |
          docker run --rm \
            --volume ${{ github.workspace }}/:/shared/ \
            quay.io/pypa/manylinux2014_x86_64 \
            bash -c '
              uname -a
              gcc --version
              cmake --version
              cd /shared

              mkdir build
              cd build
              cmake \
                -DCMAKE_INSTALL_PREFIX=./install \
                -DBUILD_SHARED_LIBS=ON \
                -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
                -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
                -DSHERPA_ONNX_ENABLE_BINARY=OFF \
                ..
              make -j1 install
            '

      - name: Build sherpa-onnx node-addon
        shell: bash
        run: |
          d=$PWD
          export SHERPA_ONNX_INSTALL_DIR=$d/build/install

          sudo mkdir /shared
          sudo ln -s $PWD/build /shared/

          ls -lh /shared/build

          cd scripts/node-addon-api/

          npm i

          ./node_modules/.bin/cmake-js compile --log-level verbose

      - name: Prepare for publish
        shell: bash
        run: |
          owner=${{ github.repository_owner }}
          export owner

          echo "---"
          ls -lh build/install/lib/
          sudo chown -R runner ./build
          echo "---"
          ls -lh build/install/lib/
          echo "---"

          # find build/install/lib/ -maxdepth 1 -type l
          # find build/install/lib/ -maxdepth 1 -type l -delete
          #
          # echo "---"
          # ls -lh build/install/lib/

          .github/scripts/node-addon/run.sh

      - name: Display files to be published
        shell: bash
        run: |
          ls -lh ./sherpa-onnx-node
          tar cjvf ./sherpa-onnx-node.tar.bz2 ./sherpa-onnx-node

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-linux-x64
          path: ./sherpa-onnx-node.tar.bz2

      - name: Publish
        shell: bash
        run: |
          cd ./sherpa-onnx-node
          # https://docs.npmjs.com/trusted-publishers
          npm publish --access public


================================================
FILE: .github/workflows/npm-addon-macos.yaml
================================================
name: npm-addon-macos

on:
  push:
    branches:
      - node-addon
  workflow_dispatch:

concurrency:
  group: npm-addon-macos-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read
  id-token: write

jobs:
  npm-addon-macos:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-15-intel, macos-14]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Update pip
        shell: bash
        run: |
          pip install -U pip

      - uses: actions/setup-node@v4
        with:
          node-version: '24'
          registry-url: 'https://registry.npmjs.org'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-release-shared

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          mkdir build
          cd build
          cmake \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DBUILD_SHARED_LIBS=ON \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            ..
          make -j install

      - name: Build sherpa-onnx node-addon
        shell: bash
        run: |
          d=$PWD
          export SHERPA_ONNX_INSTALL_DIR=$d/build/install

          cd scripts/node-addon-api/

          npm i

          ./node_modules/.bin/cmake-js compile --log-level verbose

      - name: Prepare for publish
        shell: bash
        run: |
          owner=${{ github.repository_owner }}
          export owner

          ls -lh build/install/lib/
          echo "---"

          # find build/install/lib/ -maxdepth 1 -type l
          # find build/install/lib/ -maxdepth 1 -type l -delete

          # echo "---"
          # ls -lh build/install/lib/

          .github/scripts/node-addon/run.sh

      - name: Display files to be published
        shell: bash
        run: |
          ls -lh ./sherpa-onnx-node
          tar cjvf ./sherpa-onnx-node.tar.bz2 ./sherpa-onnx-node

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-${{ matrix.os }}
          path: ./sherpa-onnx-node.tar.bz2

      - name: Publish
        shell: bash
        run: |
          cd ./sherpa-onnx-node
          # https://docs.npmjs.com/trusted-publishers
          npm publish --access public


================================================
FILE: .github/workflows/npm-addon-win-x64.yaml
================================================
name: npm-addon-win-x64

on:
  push:
    branches:
      - node-addon
  workflow_dispatch:

concurrency:
  group: npm-addon-win-x64-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read
  id-token: write

jobs:
  npm-addon-win-x64:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - uses: actions/setup-node@v4
        with:
          node-version: '24'
          registry-url: 'https://registry.npmjs.org'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Build sherpa-onnx
        shell: bash
        run: |
          mkdir build
          cd build
          cmake \
            -DCMAKE_BUILD_TYPE=Release \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DBUILD_SHARED_LIBS=ON \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            -DBUILD_ESPEAK_NG_EXE=OFF \
            -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF  \
            ..

          ls -lh  _deps/onnxruntime-src/lib/

          cmake --build . --config Release --target install -- -m:6

          ls -lh install/lib

          echo "----------"

          cp -v  _deps/onnxruntime-src/lib/*.lib ./install/lib
          cp -v  _deps/onnxruntime-src/lib/*.dll ./install/lib

          echo "----------"

          ls -lh install/lib

      - name: Build sherpa-onnx node-addon
        shell: bash
        run: |
          d=$PWD
          export SHERPA_ONNX_INSTALL_DIR=$d/build/install

          cd scripts/node-addon-api/

          npm i

          ./node_modules/.bin/cmake-js compile --log-level verbose

      - name: Prepare for publish
        shell: bash
        run: |
          owner=${{ github.repository_owner }}
          export owner

          echo "---"
          ls -lh build/install/lib/
          echo "---"
          ls -lh build/install/lib/
          echo "---"

          .github/scripts/node-addon/run.sh

      - name: Display files to be published
        shell: bash
        run: |
          ls -lh ./sherpa-onnx-node
          tar cjvf ./sherpa-onnx-node.tar.bz2 ./sherpa-onnx-node

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-win-x64
          path: ./sherpa-onnx-node.tar.bz2

      - name: Publish
        shell: bash
        run: |
          cd ./sherpa-onnx-node
          # https://docs.npmjs.com/trusted-publishers
          npm publish --access public


================================================
FILE: .github/workflows/npm-addon-win-x86.yaml
================================================
name: npm-addon-win-x86

on:
  push:
    branches:
      - node-addon
  workflow_dispatch:

concurrency:
  group: npm-addon-win-x86-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read
  id-token: write

jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - uses: actions/setup-node@v4
        with:
          registry-url: 'https://registry.npmjs.org'
          architecture: 'x86'
          node-version: '16'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Show node-addon
        shell: bash
        run: |
          cd scripts/node-addon-api/

          npm i || true
          cat node_modules/node-addon-api/package.json
          cd node_modules/
          tar cjf node-addon-api.tar.bz2 ./node-addon-api

      - uses: actions/upload-artifact@v4
        with:
          name: node-addon-api
          path: ./scripts/node-addon-api/node_modules/node-addon-api.tar.bz2

      - name: Build sherpa-onnx
        shell: bash
        run: |
          mkdir build
          cd build
          cmake \
            -A Win32 \
            -DCMAKE_BUILD_TYPE=Release \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DBUILD_SHARED_LIBS=ON \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            -DBUILD_ESPEAK_NG_EXE=OFF \
            -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF  \
            ..

          ls -lh  _deps/onnxruntime-src/lib/

          cmake --build . --config Release --target install -- -m:6

          ls -lh install/lib

          echo "----------"

          cp -v  _deps/onnxruntime-src/lib/*.lib ./install/lib
          cp -v  _deps/onnxruntime-src/lib/*.dll ./install/lib

          echo "----------"

          ls -lh install/lib

      - name: Build sherpa-onnx node-addon
        shell: bash
        run: |
          d=$PWD
          export SHERPA_ONNX_INSTALL_DIR=$d/build/install

          cd scripts/node-addon-api/

          npm i

          npm config set cmake_js_A "Win32"
          ./node_modules/.bin/cmake-js compile -A Win32 --log-level verbose

      - name: Prepare for publish
        shell: bash
        run: |
          owner=${{ github.repository_owner }}
          export owner

          echo "---"
          ls -lh build/install/lib/
          echo "---"
          ls -lh build/install/lib/
          echo "---"

          .github/scripts/node-addon/run.sh

      - name: Display files to be published
        shell: bash
        run: |
          ls -lh ./sherpa-onnx-node
          tar cjvf ./sherpa-onnx-node.tar.bz2 ./sherpa-onnx-node

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-win-ia32
          path: ./sherpa-onnx-node.tar.bz2

  upload:
    needs: [build]
    name: upload
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - uses: actions/setup-node@v4
        with:
          node-version: '24'
          registry-url: 'https://registry.npmjs.org'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Retrieve artifact
        uses: actions/download-artifact@v4
        with:
          name: sherpa-onnx-win-ia32
          path: /tmp/files/

      - name: Unzip
        shell: bash
        run: |
          cd /tmp/files
          tar xvf sherpa-onnx-node.tar.bz2

      - name: Publish
        shell: bash
        run: |
          cd /tmp/files/sherpa-onnx-node
          # https://docs.npmjs.com/trusted-publishers
          npm publish --access public


================================================
FILE: .github/workflows/npm-addon.yaml
================================================
name: npm-addon

on:
  push:
    branches:
      - node-addon
  workflow_dispatch:

concurrency:
  group: npm-addon-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read
  id-token: write

jobs:
  npm-addon:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - uses: actions/setup-node@v4
        with:
          registry-url: 'https://registry.npmjs.org'
          node-version: '24'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-release-shared

      - name: Prepare for publish
        shell: bash
        run: |
          owner=${{ github.repository_owner }}
          export owner

          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
          # SHERPA_ONNX_VERSION=1.0.30

          src_dir=.github/scripts/node-addon
          sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json
          sed -i.bak s/k2-fsa/$owner/g $src_dir/package.json

          dst=sherpa-onnx-node
          mkdir $dst
          cp $src_dir/package.json $dst/
          cp $src_dir/README.md $dst/
          cp scripts/node-addon-api/lib/*.js $dst/

      - name: Display files to be published
        shell: bash
        run: |
          ls -lh ./sherpa-onnx-node
          tar cjvf ./sherpa-onnx-node.tar.bz2 ./sherpa-onnx-node

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-node
          path: ./sherpa-onnx-node.tar.bz2

      - name: Publish
        shell: bash
        run: |
          cd ./sherpa-onnx-node
          # https://docs.npmjs.com/trusted-publishers
          npm publish --access public


================================================
FILE: .github/workflows/npm.yaml
================================================
name: npm

on:
  push:
    branches:
      - npm
  workflow_dispatch:

concurrency:
  group: npm-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read
  id-token: write

jobs:
  nodejs:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - uses: actions/setup-node@v4
        with:
          node-version: '24'
          registry-url: 'https://registry.npmjs.org'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Build nodejs package
        shell: bash
        run: |
          ./build-wasm-simd-nodejs.sh
          cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
          cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/

          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
          echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

          cd scripts/nodejs

          owner=${{ github.repository_owner }}
          echo "owner: $owner"

          sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json
          sed -i.bak s/k2-fsa/$owner/g ./package.json

          rm package.json.bak

      - name: Collect files
        shell: bash
        run: |
          dst=sherpa-onnx-wasm-nodejs
          mkdir $dst
          cp -v scripts/nodejs/* $dst
          tar cvjf $dst.tar.bz2 $dst

          echo "---"
          ls -h $dst

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-nodejs
          path: ./*.tar.bz2

      - name: Build nodejs package
        shell: bash
        run: |
          cd scripts/nodejs

          git diff

          # https://docs.npmjs.com/trusted-publishers
          npm publish --provenance --access public


================================================
FILE: .github/workflows/pascal.yaml
================================================
name: pascal

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/pascal.yaml'
      - 'cmake/**'
      - 'pascal-api-examples/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'sherpa-onnx/pascal-api/*'

  workflow_dispatch:

concurrency:
  group: pascal-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  pascal:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, macos-15-intel, windows-2022, ubuntu-22.04-arm]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}

      - name: Install Free pascal compiler (ubuntu)
        if: matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-22.04-arm'
        shell: bash
        run: |
          sudo apt-get update
          sudo apt-get install -q -y fpc

      - name: Install Free pascal compiler (macos)
        if: matrix.os == 'macos-latest' || matrix.os == 'macos-15-intel'
        shell: bash
        run: |
          brew install fpc
          # brew install --cask lazarus
          #
      - name: Install Free pascal compiler (windows)
        if: matrix.os == 'windows-2022'
        shell: bash
        run: |
          choco install lazarus

          ls -lh /c/lazarus/fpc/3.2.2/bin/x86_64-win64/

      - name: FPC info
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
          which fpc
          fpc -i

      - name: OS info
        shell: bash
        run: |
          uname -a

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -DCMAKE_INSTALL_PREFIX=./install \
            -D BUILD_SHARED_LIBS=ON \
            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
            -D CMAKE_BUILD_TYPE=Release \
            ..

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          cmake --build . --target install --config Release

          ls -lh install/lib/

          if [[ ${{ matrix.os }} == 'windows-2022' ]]; then
            cp -v install/lib/*.dll ../pascal-api-examples/non-streaming-asr
            cp -v install/lib/*.dll ../pascal-api-examples/read-wav
            cp -v install/lib/*.dll ../pascal-api-examples/speaker-diarization
            cp -v install/lib/*.dll ../pascal-api-examples/speech-enhancement-gtcrn
            cp -v install/lib/*.dll ../pascal-api-examples/speech-enhancement-dpdfnet
            cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
            cp -v install/lib/*.dll ../pascal-api-examples/streaming-speech-enhancement-gtcrn
            cp -v install/lib/*.dll ../pascal-api-examples/streaming-speech-enhancement-dpdfnet
            cp -v install/lib/*.dll ../pascal-api-examples/tts
            cp -v install/lib/*.dll ../pascal-api-examples/vad
            cp -v install/lib/*.dll ../pascal-api-examples/vad-with-non-streaming-asr

            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/non-streaming-asr
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/read-wav
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/speaker-diarization
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/speech-enhancement-gtcrn
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/speech-enhancement-dpdfnet
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/streaming-asr
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/streaming-speech-enhancement-gtcrn
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/streaming-speech-enhancement-dpdfnet
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad
            cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad-with-non-streaming-asr
          fi

      - name:  Run Speech Enhancement test
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH

          cd ./pascal-api-examples

          pushd speech-enhancement-gtcrn
          ./run-gtcrn.sh
          ls -lh
          popd

          pushd speech-enhancement-dpdfnet
          ./run-dpdfnet.sh
          ls -lh
          popd

          pushd streaming-speech-enhancement-gtcrn
          ./run-gtcrn.sh
          ls -lh
          popd

          pushd streaming-speech-enhancement-dpdfnet
          ./run-dpdfnet.sh
          ls -lh
          popd

      - name:  Run Pascal test (TTS)
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH

          cd ./pascal-api-examples
          pushd tts

          ./run-pocket-en.sh
          rm -rf sherpa-onnx-pocket-*

          ./run-piper.sh
          rm -rf vits-piper-*
          rm piper
          ls -lh
          echo "---"

          ./run-kokoro-zh-en.sh
          rm -rf kokoro-multi-*
          rm kokoro-zh-en
          ls -lh
          echo "---"

          ./run-kokoro-en.sh
          rm -rf kokoro-en-*
          rm kokoro-en
          ls -lh
          echo "---"

          ./run-matcha-zh.sh
          rm -rf matcha-icefall-*
          rm matcha-zh
          ls -lh
          echo "---"

          ./run-matcha-en.sh
          rm -rf matcha-icefall-*
          rm matcha-en
          ls -lh
          echo "---"

          ./run-supertonic-en.sh
          rm -rf sherpa-onnx-supertonic-*
          rm supertonic-en
          ls -lh
          echo "---"

          ./run-zipvoice-zh-en.sh
          rm -rf sherpa-onnx-zipvoice-*
          rm -f vocos_24khz.onnx
          rm zipvoice-zh-en
          ls -lh
          echo "---"

          popd

      - uses: actions/upload-artifact@v4
        with:
          name: tts-${{ matrix.os }}
          path: ./pascal-api-examples/tts/*.wav

      - name:  Run Pascal test (Non Streaming ASR)
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH

          cd ./pascal-api-examples

          pushd non-streaming-asr

          ./run-funasr-nano.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-wenet-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-medasr-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-omnilingual-asr-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-nemo-canary.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-zipformer-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-dolphin-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-zipformer-transducer.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-moonshine-v2.sh

          ./run-moonshine.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-fire-red-asr-ctc.sh
          rm -rf sherpa-onnx-fire-red-asr*
          echo "---"

          ./run-fire-red-asr.sh
          rm -rf sherpa-onnx-fire-red-asr*
          echo "---"

          ./run-whisper.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-nemo-transducer.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-nemo-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-sense-voice.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-telespeech-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-paraformer.sh

          ./run-paraformer-itn.sh

          rm -rf sherpa-onnx-*
          echo "---"

          ls -lh
          popd

      - name:  Run Pascal test (Streaming ASR)
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH

          cd ./pascal-api-examples

          pushd streaming-asr

          ./run-t-one-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-zipformer-transducer.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ./run-nemo-transducer.sh
          rm -rf sherpa-onnx-*
          echo "---"

          if [[ ${{ matrix.os }} != 'windows-2022' ]]; then
            ./run-paraformer.sh
            rm -rf sherpa-onnx-*
            echo "---"

            ./run-zipformer-ctc.sh
            echo "---"

            ./run-zipformer-ctc-hlg.sh
            rm -rf sherpa-onnx-*
            echo "---"
          fi

          ls -lh
          popd

      - name:  Run Pascal test (VAD test)
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH

          cd ./pascal-api-examples

          pushd vad
          ./run-circular-buffer.sh
          echo "---"

          time ./run-remove-silence-ten-vad.sh
          echo "---"

          time ./run-remove-silence.sh
          echo "---"

          ls -lh

          popd

      - name:  Run Pascal test (Speaker diarization)
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH

          cd ./pascal-api-examples
          pushd speaker-diarization

          ./run.sh
          rm -rfv *.onnx *.wav sherpa-onnx-*
          ls -lh
          echo "---"

          popd

      - name:  Run Pascal test (VAD + non-streaming ASR)
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH

          cd ./pascal-api-examples

          pushd vad-with-non-streaming-asr

          time ./run-vad-with-zipformer-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          time ./run-vad-with-dolphin-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"

          time ./run-vad-with-moonshine.sh
          rm -rf sherpa-onnx-*
          echo "---"

          time ./run-vad-with-whisper.sh
          rm -rf sherpa-onnx-*
          echo "---"

          time ./run-vad-with-sense-voice.sh
          rm -rf sherpa-onnx-*
          echo "---"

          ls -lh

          popd

      - name:  Run Pascal test (Read wav test)
        shell: bash
        run: |
          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH

          cd ./pascal-api-examples

          pushd read-wav
          ./run.sh
          echo "---"
          ls -lh
          popd


================================================
FILE: .github/workflows/pkg-config.yaml
================================================
name: pkg-config

on:
  push:
    branches:
      - master
      - pkg-config
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: pkg-config-${{ github.ref }}
  cancel-in-progress: true

jobs:
  pkg_config:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.build_type }} ${{ matrix.lib_type }} tts-${{ matrix.tts }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest]
        build_type: [Release, Debug]
        lib_type: [shared, static]
        tts: [ON, OFF]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-${{ matrix.build_type }}-lib-${{ matrix.lib_type }}

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build
          if [[ ${{ matrix.lib_type }} == "shared" ]]; then
            cmake -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.tts }} -DSHERPA_ONNX_ENABLE_C_API=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON ..
          else
            cmake -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.tts }} -DSHERPA_ONNX_ENABLE_C_API=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=OFF ..
          fi

      - name: Build sherpa-onnx for ${{ matrix.os }} ${{ matrix.build_type }} ${{ matrix.lib_type }} tts-${{ matrix.tts }}
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          make -j2
          make install

          ls -lh lib
          ls -lh bin

      - name: Install tree
        if: matrix.os == 'ubuntu-latest'
        shell: bash
        run: |
          sudo apt-get install tree

      - name: Install tree
        if: matrix.os == 'macos-latest'
        shell: bash
        run: |
          brew install tree

      - name: Display generated files of sherpa-onnx for ${{ matrix.os }} ${{ matrix.build_type }} ${{ matrix.lib_type }}
        shell: bash
        run: |
          tree build/install
          ls -lh build/install

          cat build/install/sherpa-onnx.pc

      - name: Show pkg-config
        shell: bash
        run: |
          export PKG_CONFIG_PATH=$PWD/build/install:$PKG_CONFIG_PATH
          pkg-config --cflags sherpa-onnx
          pkg-config --libs sherpa-onnx

      - name: Build C API example
        shell: bash
        run: |
          export PKG_CONFIG_PATH=$PWD/build/install:$PKG_CONFIG_PATH
          cd c-api-examples

          pkg-config --cflags sherpa-onnx

          gcc -o decode-file-c-api $(pkg-config --cflags sherpa-onnx) ./decode-file-c-api.c $(pkg-config --libs sherpa-onnx)

          ./decode-file-c-api --help

      - name: Build C API example (tts)
        if: matrix.tts == 'ON'
        shell: bash
        run: |
          export PKG_CONFIG_PATH=$PWD/build/install:$PKG_CONFIG_PATH
          cd c-api-examples

          pkg-config --cflags sherpa-onnx

          gcc -o offline-tts-c-api $(pkg-config --cflags sherpa-onnx) ./offline-tts-c-api.c $(pkg-config --libs sherpa-onnx)

          ./offline-tts-c-api --help

      - name: Test online transducer (C API)
        shell: bash
        run: |
          export PATH=$PWD/c-api-examples:$PATH
          export EXE=decode-file-c-api

          .github/scripts/test-online-transducer.sh

      - uses: actions/upload-artifact@v4
        with:
          name: tts-generated-test-files-${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.lib_type }}-tts-${{ matrix.tts }}
          path: tts


================================================
FILE: .github/workflows/release-dart-package.yaml
================================================
name: release-dart

on:
  push:
    branches:
      - ci-pub-dart
    tags:
      - 'dart-v[0-9]+.[0-9]+.[0-9]+*' # tag-pattern on pub.dev: 'v{{version}}'

  workflow_dispatch:

concurrency:
  group: release-dart-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build_linux_libs_x64:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Build sherpa-onnx
        shell: bash
        run: |
          docker run --rm \
            --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
            quay.io/pypa/manylinux2014_x86_64 \
            bash -c '
              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              mkdir build
              cd build

              cmake \
                -D SHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
                -D SHERPA_ONNX_ENABLE_TTS=ON \
                -D CMAKE_BUILD_TYPE=Release \
                -D BUILD_SHARED_LIBS=ON \
                -D CMAKE_INSTALL_PREFIX=./install \
                -D SHERPA_ONNX_ENABLE_JNI=OFF \
                -D SHERPA_ONNX_ENABLE_BINARY=OFF \
                ..

              make -j2
              make install

              ls -lh ./install/lib
            '

      - name: Create tar file
        shell: bash
        run: |
          mkdir x64
          dst=x64
          cp -v build/install/lib/lib* $dst
          tar cjvf $dst.tar.bz2 $dst
          ls -lh *.tar.bz2

      - uses: actions/upload-artifact@v4
        with:
          name: linux-x64
          path: ./*.tar.bz2

  build_linux_libs_aarch64:
    # if: false
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-22.04-arm]

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Build sherpa-onnx
        shell: bash
        run: |
          docker run --rm \
            --volume ${{ github.workspace }}/:/home/runner/work/sherpa-onnx/sherpa-onnx \
            quay.io/pypa/manylinux2014_aarch64 \
            bash -c '
              uname -a
              gcc --version
              cmake --version
              cat /etc/*release
              id
              pwd

              cd /home/runner/work/sherpa-onnx/sherpa-onnx

              mkdir build
              cd build

              cmake \
                -D SHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
                -D SHERPA_ONNX_ENABLE_TTS=ON \
                -D CMAKE_BUILD_TYPE=Release \
                -D BUILD_SHARED_LIBS=ON \
                -D CMAKE_INSTALL_PREFIX=./install \
                -D SHERPA_ONNX_ENABLE_JNI=OFF \
                -D SHERPA_ONNX_ENABLE_BINARY=OFF \
                ..

              make -j2
              make install

              ls -lh ./install/lib
            '

      - name: Create tar file
        shell: bash
        run: |
          mkdir aarch64
          dst=aarch64
          cp -v build/install/lib/lib* $dst
          tar cjvf $dst.tar.bz2 $dst
          ls -lh *.tar.bz2

      - uses: actions/upload-artifact@v4
        with:
          name: linux-aarch64
          path: ./*.tar.bz2

  sherpa_onnx_linux:
    needs: [build_linux_libs_x64, build_linux_libs_aarch64]
    # if: false
    permissions:
      id-token: write # Required for authentication using OIDC
    name: sherpa_onnx_linux
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Fix version
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          src_dir=$PWD/flutter/sherpa_onnx_linux
          pushd $src_dir
          v="version: $SHERPA_ONNX_VERSION"
          echo "v: $v"
          sed -i.bak s"/^version: .*/$v/" ./pubspec.yaml
          rm *.bak
          git status
          git diff

      - name: Retrieve artifact from linux x64
        uses: actions/download-artifact@v4
        with:
          name: linux-x64
          path: /tmp

      - name: Retrieve artifact from linux aarch64
        uses: actions/download-artifact@v4
        with:
          name: linux-aarch64
          path: /tmp

      - name: Show files
        shell: bash
        run: |
          cd /tmp
          tar xvf x64.tar.bz2
          tar xvf aarch64.tar.bz2

          echo "----x64---"
          ls -lh /tmp/x64/
          echo "----aarch64---"
          ls -lh /tmp/aarch64/

      - name: Copy extra files
        shell: bash
        run: |
          dst=flutter/sherpa_onnx_linux

          mkdir $dst/example

          cp -v flutter/sherpa_onnx/example/* $dst/example
          cp -v LICENSE $dst/
          cp -v CHANGELOG.md $dst/

          git status

      - name: Copy pre-built libs
        shell: bash
        run: |
          cp -v /tmp/x64/lib*.so* flutter/sherpa_onnx_linux/linux/x64
          cp -v /tmp/aarch64/lib*.so* flutter/sherpa_onnx_linux/linux/aarch64

          mv -v flutter/sherpa_onnx_linux /tmp/to_be_published

          ls -lh /tmp/to_be_published/linux

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v3
        with:
          channel: master
          version: 3.24.0

      - uses: dart-lang/setup-dart@v1

      - name: Release
        shell: bash
        run: |
          cd /tmp/to_be_published
          flutter pub get
          flutter pub publish --dry-run
          flutter pub publish --force

  sherpa_onnx_macos:
    # if: false
    permissions:
      id-token: write # Required for authentication using OIDC
    name: sherpa_onnx_macos
    runs-on: macos-latest

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-flutter-release-package

      - name: Fix version
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          src_dir=$PWD/flutter/sherpa_onnx_macos
          pushd $src_dir
          v="version: $SHERPA_ONNX_VERSION"
          echo "v: $v"
          sed -i.bak s"/^version: .*/$v/" ./pubspec.yaml
          rm *.bak
          git status
          git diff

      - name: Copy extra files
        shell: bash
        run: |
          dst=flutter/sherpa_onnx_macos

          mkdir $dst/example

          cp -v flutter/sherpa_onnx/example/* $dst/example
          cp -v LICENSE $dst/
          cp -v CHANGELOG.md $dst/

          git status

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build
          cmake \
            -D SHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -D SHERPA_ONNX_ENABLE_TTS=ON \
            -D CMAKE_BUILD_TYPE=Release \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_INSTALL_PREFIX=./install \
            -D SHERPA_ONNX_ENABLE_JNI=OFF \
            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
            -D CMAKE_OSX_ARCHITECTURES="x86_64;arm64" \
            ..

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          cd build
          make -j2 install

          ls -lh install/lib/libsherpa-onnx-c-api.dylib
          file install/lib/libsherpa-onnx-c-api.dylib
          rm -v install/lib/libonnxruntime.dylib

      - name: Copy pre-built libs
        shell: bash
        run: |
          cp -v build/install/lib/lib*.dylib* flutter/sherpa_onnx_macos/macos/

          mv -v flutter/sherpa_onnx_macos /tmp/to_be_published

          ls -lh /tmp/to_be_published/macos

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v3
        with:
          channel: stable
          version: latest

      - uses: dart-lang/setup-dart@v1

      - name: Release
        shell: bash
        run: |
          cd /tmp/to_be_published
          du -h -d1 .
          flutter pub get
          flutter pub publish --dry-run
          flutter pub publish --force

  sherpa_onnx_windows:
    # if: false
    permissions:
      id-token: write # Required for authentication using OIDC
    name: sherpa_onnx_windows
    runs-on: windows-2022

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Fix version
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          src_dir=$PWD/flutter/sherpa_onnx_windows
          pushd $src_dir
          v="version: $SHERPA_ONNX_VERSION"
          echo "v: $v"
          sed -i.bak s"/^version: .*/$v/" ./pubspec.yaml
          rm *.bak
          git status
          git diff

      - name: Copy extra files
        shell: bash
        run: |
          dst=flutter/sherpa_onnx_windows

          mkdir $dst/example

          cp -v flutter/sherpa_onnx/example/* $dst/example
          cp -v LICENSE $dst/
          cp -v CHANGELOG.md $dst/

          git status

      - name: Configure CMake
        shell: bash
        run: |
          mkdir build
          cd build
          cmake \
            -D SHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -D SHERPA_ONNX_ENABLE_TTS=ON \
            -D CMAKE_BUILD_TYPE=Release \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_INSTALL_PREFIX=./install \
            -D SHERPA_ONNX_ENABLE_JNI=OFF \
            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
            ..

      - name: Build sherpa-onnx
        shell: bash
        run: |
          cd build
          cmake --build . --target install --config Release -- -m:2

          ls -lh install/lib/*.dll

      - name: Copy pre-built libs
        shell: bash
        run: |
          cp -v build/install/lib/*.dll flutter/sherpa_onnx_windows/windows/
          mv -v flutter/sherpa_onnx_windows /tmp/to_be_published

          ls -lh /tmp/to_be_published/windows

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v3
        with:
          channel: stable
          version: latest

      - uses: dart-lang/setup-dart@v1

      - name: Release
        shell: bash
        run: |
          cd /tmp/to_be_published
          flutter pub get
          flutter pub publish --dry-run
          flutter pub publish --force

  sherpa_onnx_android:
    # if: false
    permissions:
      id-token: write # Required for authentication using OIDC
    name: sherpa_onnx_android
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-flutter-release-package-android

      - name: Fix version
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          src_dir=$PWD/flutter/sherpa_onnx_android
          pushd $src_dir
          v="version: $SHERPA_ONNX_VERSION"
          echo "v: $v"
          sed -i.bak s"/^version: .*/$v/" ./pubspec.yaml
          rm *.bak
          git status
          git diff

      - name: Copy extra files
        shell: bash
        run: |
          dst=flutter/sherpa_onnx_android

          mkdir $dst/example

          cp -v flutter/sherpa_onnx/example/* $dst/example
          cp -v LICENSE $dst/
          cp -v CHANGELOG.md $dst/

          git status

      - name: Build android-arm64-v8a
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export SHERPA_ONNX_ENABLE_C_API=ON
          export SHERPA_ONNX_ENABLE_JNI=OFF
          export SHERPA_ONNX_ENABLE_BINARY=OFF

          ./build-android-arm64-v8a.sh

      - name: Build android-armv7-eabi
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export SHERPA_ONNX_ENABLE_C_API=ON
          export SHERPA_ONNX_ENABLE_JNI=OFF
          export SHERPA_ONNX_ENABLE_BINARY=OFF

          ./build-android-armv7-eabi.sh

      - name: Build android-x86
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export SHERPA_ONNX_ENABLE_C_API=ON
          export SHERPA_ONNX_ENABLE_JNI=OFF
          export SHERPA_ONNX_ENABLE_BINARY=OFF

          ./build-android-x86.sh

      - name: Build android-x86-64
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export SHERPA_ONNX_ENABLE_C_API=ON
          export SHERPA_ONNX_ENABLE_JNI=OFF
          export SHERPA_ONNX_ENABLE_BINARY=OFF

          ./build-android-x86-64.sh

      - name: Copy pre-built libs
        shell: bash
        run: |
          echo "----arm64-v8a----"
          cp -v build-android-arm64-v8a/install/lib/lib*.so flutter/sherpa_onnx_android/android/src/main/jniLibs/arm64-v8a/

          echo "----armv7-eabi----"
          cp -v build-android-armv7-eabi/install/lib/lib*.so flutter/sherpa_onnx_android/android/src/main/jniLibs/armeabi-v7a

          echo "----x86----"
          cp -v build-android-x86/install/lib/lib*.so flutter/sherpa_onnx_android/android/src/main/jniLibs/x86

          echo "----x86_64----"
          cp -v build-android-x86-64/install/lib/lib*.so flutter/sherpa_onnx_android/android/src/main/jniLibs/x86_64

          mv -v flutter/sherpa_onnx_android /tmp/to_be_published

          ls -lh /tmp/to_be_published

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v3
        with:
          channel: stable
          version: latest

      - uses: dart-lang/setup-dart@v1

      - name: Release
        shell: bash
        run: |
          cd /tmp/to_be_published
          du -h -d1 .

          flutter pub get
          flutter pub publish --dry-run
          flutter pub publish --force

  sherpa_onnx_ios:
    # if: false
    permissions:
      id-token: write # Required for authentication using OIDC
    name: sherpa_onnx_ios
    runs-on: macos-latest

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-flutter-release-package-ios

      - name: Fix version
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          src_dir=$PWD/flutter/sherpa_onnx_ios
          pushd $src_dir
          v="version: $SHERPA_ONNX_VERSION"
          echo "v: $v"
          sed -i.bak s"/^version: .*/$v/" ./pubspec.yaml
          rm *.bak
          git status
          git diff

      - name: Copy extra files
        shell: bash
        run: |
          dst=flutter/sherpa_onnx_ios

          mkdir $dst/example

          cp -v flutter/sherpa_onnx/example/* $dst/example
          cp -v LICENSE $dst/
          cp -v CHANGELOG.md $dst/

          git status

      - name: Build ios
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version
          ./build-ios-shared.sh

      - name: Copy pre-built libs
        shell: bash
        run: |
          echo "----ios arm64 and arm64_x64_simulator----"
          cp -av build-ios-shared/sherpa_onnx.xcframework flutter/sherpa_onnx_ios/ios/

          mv -v flutter/sherpa_onnx_ios /tmp/to_be_published

          ls -lh /tmp/to_be_published

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v3
        with:
          channel: stable
          version: latest

      - uses: dart-lang/setup-dart@v1

      - name: Release
        shell: bash
        run: |
          cd /tmp/to_be_published
          du -h -d1 .

          flutter pub get
          flutter pub publish --dry-run
          flutter pub publish --force

  sherpa_onnx:
    needs: [sherpa_onnx_linux, sherpa_onnx_macos, sherpa_onnx_windows, sherpa_onnx_android, sherpa_onnx_ios]
    # if: false
    permissions:
      id-token: write # Required for authentication using OIDC
    name: sherpa_onnx
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Flutter SDK
        uses: flutter-actions/setup-flutter@v3
        with:
          channel: stable
          version: latest

      - uses: dart-lang/setup-dart@v1

      - name: Fix version
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          src_dir=$PWD/flutter/sherpa_onnx
          pushd $src_dir
          v="version: $SHERPA_ONNX_VERSION"
          echo "v: $v"
          sed -i.bak s"/^version: .*/$v/" ./pubspec.yaml
          rm *.bak
          git status
          git diff

      - name: Copy extra files
        shell: bash
        run: |
          dst=flutter/sherpa_onnx

          cp -v LICENSE $dst/
          cp -v CHANGELOG.md $dst/
          cp -v README.md $dst/

          git status

          mv -v flutter/sherpa_onnx /tmp/to_be_published

          ls -lh /tmp/to_be_published

      - name: Release
        shell: bash
        run: |
          cd /tmp/to_be_published
          du -h -d1 .

          flutter pub get
          flutter pub publish --dry-run
          flutter pub publish --force


================================================
FILE: .github/workflows/release-go.yaml
================================================
name: release-go

on:
  workflow_dispatch:

concurrency:
  group: release-go-${{ github.ref }}
  cancel-in-progress: true

jobs:
  release_go:
    name: Release go
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Add SSH key
        run: |
          mkdir -p ~/.ssh/
          cp scripts/go/ssh_config ~/.ssh/config
          echo "${{ secrets.MY_GITHUB_SSH_KEY }}" > ~/.ssh/github && chmod 600 ~/.ssh/github
          ssh github.com || true

      - name: Release
        shell: bash
        run: |
          cd scripts/go
          ./release.sh


================================================
FILE: .github/workflows/release-rust.yaml
================================================
name: Publish Rust Crates

on:
  push:
    branches:
      - release-rust

  workflow_dispatch:

jobs:
  publish:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: stable

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Update
        shell: bash
        run: |
          cd sherpa-onnx/rust
          ./publish.sh

      - name: Login to crates.io
        run: cargo login ${{ secrets.CARGO_REGISTRY_TOKEN }}

      - name: Publish sherpa-onnx-sys
        shell: bash
        env:
          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
        run: |
          cargo publish --allow-dirty --manifest-path=sherpa-onnx/rust/sherpa-onnx-sys/Cargo.toml
          sleep 30  # Wait for crates.io to index

      - name: Publish sherpa-onnx
        shell: bash
        env:
          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
        run: |
          cargo publish --allow-dirty --manifest-path=sherpa-onnx/rust/sherpa-onnx/Cargo.toml


================================================
FILE: .github/workflows/riscv64-linux.yaml
================================================
name: riscv64-linux

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/riscv64-linux.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/riscv64-linux-gnu.toolchain.cmake'
      - 'build-riscv64-linux-gnu.sh'
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: riscv64-linux-${{ github.ref }}
  cancel-in-progress: true

jobs:
  riscv64_linux:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.lib_type }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        lib_type: [shared] #, static]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-riscv64-${{ matrix.lib_type }}

      - name: cache-qemu
        id: cache-qemu
        uses: actions/cache@v4
        with:
          path: qemu-install
          key: qemu-riscv-xuantie-install-20240306

      - name: qemu
        if: steps.cache-qemu.outputs.cache-hit != 'true'
        run: |
          # https://pypi.org/project/xuantie-qemu/#files
          wget -q https://files.pythonhosted.org/packages/21/f4/733f29c435987e8bb264a6504c7a4ea4c04d0d431b38a818ab63eef082b9/xuantie_qemu-20230825-py3-none-manylinux1_x86_64.whl
          unzip xuantie_qemu-20230825-py3-none-manylinux1_x86_64.whl
          mkdir -p qemu-install/bin

          cp -v ./qemu/qemu-riscv64 ./qemu-install/bin

      - name: cache-toolchain
        id: cache-toolchain
        uses: actions/cache@v4
        with:
          path: toolchain
          key: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1-20220906.tar.gz

      - name: Download toolchain
        if: steps.cache-toolchain.outputs.cache-hit != 'true'
        shell: bash
        run: |
          wget -q https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1-20220906.tar.gz

          mkdir $GITHUB_WORKSPACE/toolchain

          tar xvf ./Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1-20220906.tar.gz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain
          ls -lh $GITHUB_WORKSPACE/toolchain/bin

      - name: Display toolchain info
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          riscv64-unknown-linux-gnu-gcc --version

      - name: Display qemu-riscv64 -h
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot
          qemu-riscv64 -h

      - name: build riscv64-linux
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cmake --version

          lib_type=${{ matrix.lib_type }}

          if [[ $lib_type == "shared" ]]; then
            export BUILD_SHARED_LIBS=ON
          else
            export BUILD_SHARED_LIBS=OFF
          fi

          ./build-riscv64-linux-gnu.sh

          ls -lh build-riscv64-linux-gnu/bin
          ls -lh build-riscv64-linux-gnu/lib

          echo "---install/lib---"
          ls -lh build-riscv64-linux-gnu/install/lib

          echo "---install/bin---"
          ls -lh build-riscv64-linux-gnu/install/bin

          file build-riscv64-linux-gnu/bin/sherpa-onnx

          readelf -d build-riscv64-linux-gnu/bin/sherpa-onnx

      - name: Copy files
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          riscv64-unknown-linux-gnu-strip --version

          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-riscv64-${{ matrix.lib_type }}
          mkdir $dst

          cp -v $GITHUB_WORKSPACE/toolchain/sysroot/lib/ld-linux-riscv64xthead-lp64d.so.1 build-riscv64-linux-gnu/install/lib/

          ls -lh build-riscv64-linux-gnu/install/lib

          cp -a build-riscv64-linux-gnu/install/bin $dst/
          ls -lh $dst/bin/*
          riscv64-unknown-linux-gnu-strip $dst/bin/*
          ls -lh $dst

          lib_type=${{ matrix.lib_type }}
          if [[ $lib_type == "shared" ]]; then
            cp -a build-riscv64-linux-gnu/install/lib $dst/
            rm -fv $dst/lib/libasound.so
            rm -fv $dst/lib/libonnxruntime.so
          fi

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - uses: actions/upload-artifact@v4
        if: matrix.lib_type == 'shared'
        with:
          name: sherpa-onnx-linux-riscv64-shared
          path: sherpa-onnx-*linux-riscv64-shared.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false

            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=riscv64/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*-shared.tar.bz2 $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-riscv64-shared.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - uses: actions/upload-artifact@v4
        if: matrix.lib_type == 'static'
        with:
          name: sherpa-onnx-linux-riscv64-static
          path: sherpa-onnx-*linux-riscv64-static.tar.bz2

      - name: Release pre-compiled binaries and libs for riscv64 linux ${{ matrix.lib_type }}
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-riscv64*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.11

      - name: Release pre-compiled binaries and libs for riscv64 linux ${{ matrix.lib_type }}
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-riscv64*.tar.bz2

      - name: Test sherpa-onnx
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot
          export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib

          ls -lh ./build-riscv64-linux-gnu/bin

          echo "----------sherpa-onnx----------"
          qemu-riscv64 ./build-riscv64-linux-gnu/bin/sherpa-onnx --help
          readelf -d ./build-riscv64-linux-gnu/bin/sherpa-onnx

          echo "----------sherpa-onnx-offline----------"
          qemu-riscv64 ./build-riscv64-linux-gnu/bin/sherpa-onnx-offline --help
          readelf -d ./build-riscv64-linux-gnu/bin/sherpa-onnx-offline

          echo "----------sherpa-onnx-offline-tts----------"
          qemu-riscv64 ./build-riscv64-linux-gnu/bin/sherpa-onnx-offline-tts --help
          readelf -d ./build-riscv64-linux-gnu/bin/sherpa-onnx-offline-tts

      - name: Test streaming speech recognition
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot
          export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib

          wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2
          rm sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2

          qemu-riscv64 ./build-riscv64-linux-gnu/bin/sherpa-onnx \
            --tokens=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/tokens.txt \
            --encoder=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/encoder-epoch-99-avg-1.onnx \
            --decoder=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/decoder-epoch-99-avg-1.onnx \
            --joiner=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/joiner-epoch-99-avg-1.onnx \
            ./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/test_wavs/0.wav

      - name: Test offline tts
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot
          export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib

          wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
          tar xf vits-piper-en_US-lessac-medium.tar.bz2
          rm vits-piper-en_US-lessac-medium.tar.bz2

          qemu-riscv64 ./build-riscv64-linux-gnu/bin/sherpa-onnx-offline-tts \
            --vits-model=./vits-piper-en_US-lessac-medium/en_US-lessac-medium.onnx \
            --vits-data-dir=./vits-piper-en_US-lessac-medium/espeak-ng-data \
            --vits-tokens=./vits-piper-en_US-lessac-medium/tokens.txt \
            --output-filename=./liliana-piper-en_US-lessac-medium.wav \
            'liliana, the most beautiful and lovely assistant of our team!'

      - uses: actions/upload-artifact@v4
        if: matrix.lib_type == 'shared'
        with:
          name: wave
          path: ./*.wav


================================================
FILE: .github/workflows/riscv64-spacemit-linux.yaml
================================================
name: riscv64-spacemit-linux

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/riscv64-spacemit-linux.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/riscv64-linux-gnu-spacemit.toolchain.cmake'
      - 'build-riscv64-linux-gnu-spacemit.sh'
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: riscv64-spacemit-linux-${{ github.ref }}
  cancel-in-progress: true

jobs:
  riscv64_spacemit_linux:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.lib_type }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        lib_type: [shared] #, static]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-riscv64-spacemit-${{ matrix.lib_type }}

      - name: cache-qemu
        id: cache-qemu
        uses: actions/cache@v4
        with:
          path: qemu-install
          key: qemu-riscv-spacemit-install-20250818

      - name: qemu
        if: steps.cache-qemu.outputs.cache-hit != 'true'
        run: |
          wget -q https://archive.spacemit.com/spacemit-ai/qemu/jdsk-qemu-v10.0.2.tar.gz
          tar -xf jdsk-qemu-v10.0.2.tar.gz
          mkdir -p qemu-install/bin

          cp -v ./jdsk-qemu/bin/qemu-riscv64 ./qemu-install/bin

      - name: cache-toolchain
        id: cache-toolchain
        uses: actions/cache@v4
        with:
          path: toolchain
          key: https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz

      - name: Download toolchain
        if: steps.cache-toolchain.outputs.cache-hit != 'true'
        shell: bash
        run: |
          wget -q https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz

          mkdir $GITHUB_WORKSPACE/toolchain

          tar xvf spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz --strip-components 1 -C $GITHUB_WORKSPACE/toolchain
          ls -lh $GITHUB_WORKSPACE/toolchain/bin

      - name: Display toolchain info
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          riscv64-unknown-linux-gnu-gcc --version

      - name: Display qemu-riscv64 -h
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot
          qemu-riscv64 -h

      - name: build riscv64-spacemit-linux
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cmake --version

          lib_type=${{ matrix.lib_type }}

          if [[ $lib_type == "shared" ]]; then
            export BUILD_SHARED_LIBS=ON
          else
            export BUILD_SHARED_LIBS=OFF
          fi

          export RISCV_ROOT_PATH=$GITHUB_WORKSPACE/toolchain
          ./build-riscv64-linux-gnu-spacemit.sh

          ls -lh build-riscv64-linux-gnu-spacemit/bin
          ls -lh build-riscv64-linux-gnu-spacemit/lib

          echo "---install/lib---"
          ls -lh build-riscv64-linux-gnu-spacemit/install/lib

          echo "---install/bin---"
          ls -lh build-riscv64-linux-gnu-spacemit/install/bin

          file build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx

          readelf -d build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx

      - name: Copy files
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          riscv64-unknown-linux-gnu-strip --version

          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-riscv64-spacemit-${{ matrix.lib_type }}
          mkdir $dst

          cp -v $GITHUB_WORKSPACE/toolchain/sysroot/lib/ld-linux-riscv64-lp64d.so.1 build-riscv64-linux-gnu-spacemit/install/lib/

          ls -lh build-riscv64-linux-gnu-spacemit/install/lib

          cp -a build-riscv64-linux-gnu-spacemit/install/bin $dst/
          ls -lh $dst/bin/*
          riscv64-unknown-linux-gnu-strip $dst/bin/*
          ls -lh $dst

          lib_type=${{ matrix.lib_type }}
          if [[ $lib_type == "shared" ]]; then
            cp -a build-riscv64-linux-gnu-spacemit/install/lib $dst/
            rm -fv $dst/lib/libasound.so
          fi

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - uses: actions/upload-artifact@v4
        if: matrix.lib_type == 'shared'
        with:
          name: sherpa-onnx-linux-riscv64-spacemit-shared
          path: sherpa-onnx-*linux-riscv64-spacemit-shared.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false

            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=riscv64-spacemit/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*-shared.tar.bz2 $dst/

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-riscv64-spacemit-shared.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - uses: actions/upload-artifact@v4
        if: matrix.lib_type == 'static'
        with:
          name: sherpa-onnx-linux-riscv64-spacemit-static
          path: sherpa-onnx-*linux-riscv64-spacemit-static.tar.bz2

      - name: Release pre-compiled binaries and libs for riscv64 linux ${{ matrix.lib_type }}
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-riscv64*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.11

      - name: Release pre-compiled binaries and libs for riscv64 linux ${{ matrix.lib_type }}
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-riscv64*.tar.bz2

      - name: Test sherpa-onnx
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot
          export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib
          export QEMU_ARGS="-cpu max,vlen=256,elen=64,vext_spec=v1.0"

          ls -lh ./build-riscv64-linux-gnu-spacemit/bin

          echo "----------sherpa-onnx----------"
          qemu-riscv64 ${QEMU_ARGS} ./build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx --help
          readelf -d ./build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx

          echo "----------sherpa-onnx-offline----------"
          qemu-riscv64 ${QEMU_ARGS} ./build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx-offline --help
          readelf -d ./build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx-offline

          echo "----------sherpa-onnx-offline-tts----------"
          qemu-riscv64 ${QEMU_ARGS} ./build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx-offline-tts --help
          readelf -d ./build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx-offline-tts

      - name: Test streaming speech recognition
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot
          export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib
          export QEMU_ARGS="-cpu max,vlen=256,elen=64,vext_spec=v1.0"
          echo "Some mistakes in ep graph partition, disable op Gather for spacemit-ep now, will be fixed soon."
          export SPACEMIT_EP_DISABLE_OP_TYPE_FILTER="Gather"

          wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2
          rm sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2

          qemu-riscv64 ${QEMU_ARGS} ./build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx \
            --provider=spacemit \
            --tokens=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/tokens.txt \
            --encoder=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/encoder-epoch-99-avg-1.onnx \
            --decoder=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/decoder-epoch-99-avg-1.onnx \
            --joiner=./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/joiner-epoch-99-avg-1.onnx \
            ./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/test_wavs/0.wav

      - name: Test offline tts
        shell: bash
        run: |
          export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH
          export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
          export QEMU_LD_PREFIX=$GITHUB_WORKSPACE/toolchain/sysroot
          export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/toolchain/sysroot/lib
          export QEMU_ARGS="-cpu max,vlen=256,elen=64,vext_spec=v1.0"
          echo "Some mistakes in ep graph partition, disable op Gather;Cast;ConvTranspose for spacemit-ep now, will be fixed soon."
          export SPACEMIT_EP_DISABLE_OP_TYPE_FILTER="Gather;Cast;ConvTranspose"

          wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
          tar xf vits-piper-en_US-lessac-medium.tar.bz2
          rm vits-piper-en_US-lessac-medium.tar.bz2

          qemu-riscv64 ${QEMU_ARGS} ./build-riscv64-linux-gnu-spacemit/bin/sherpa-onnx-offline-tts \
            --provider=spacemit \
            --vits-model=./vits-piper-en_US-lessac-medium/en_US-lessac-medium.onnx \
            --vits-data-dir=./vits-piper-en_US-lessac-medium/espeak-ng-data \
            --vits-tokens=./vits-piper-en_US-lessac-medium/tokens.txt \
            --output-filename=./liliana-piper-en_US-lessac-medium.wav \
            'liliana, the most beautiful and lovely assistant of our team!'

      - uses: actions/upload-artifact@v4
        if: matrix.lib_type == 'shared'
        with:
          name: wave
          path: ./*.wav


================================================
FILE: .github/workflows/rknn-linux-aarch64.yaml
================================================
name: rknn-linux-aarch64

on:
  push:
    branches:
      - master
      - ci-rknn-bins
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/rknn-linux-aarch64.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/csrc/rknn/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/aarch64-linux-gnu.toolchain.cmake'
  pull_request:
    branches:
      - master
    paths:
      - '.github/workflows/rknn-linux-aarch64.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/csrc/rknn/*'
      - 'sherpa-onnx/c-api/*'
      - 'toolchains/aarch64-linux-gnu.toolchain.cmake'

  workflow_dispatch:

concurrency:
  group: rknn-linux-aarch64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  rknn_linux_aarch64:
    runs-on: ${{ matrix.os }}
    name: rknn shared ${{ matrix.shared }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - os: ubuntu-22.04-arm
            shared: ON
          - os: ubuntu-22.04-arm
            shared: OFF

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-${{ matrix.shared }}-rknn-linux-aarch64

      - name: Download rknn-toolkit2
        shell: bash
        run: |
          git clone --depth 1 https://github.com/airockchip/rknn-toolkit2

      - name: Build sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --volume ${{ github.workspace }}/:/k2-fsa/sherpa-onnx \
              quay.io/pypa/manylinux_2_28_aarch64 \
            bash -c '
              uname -a
              which gcc

              gcc --version
              g++ --version


              cmake --version


              cd /k2-fsa/sherpa-onnx/

              echo "pwd"

              ls -lh

              git clone --depth 1 --branch v1.2.12 https://github.com/alsa-project/alsa-lib
              pushd alsa-lib
              ./gitcompile
              popd

              ls -lh $PWD/alsa-lib/src/.libs

              strings $PWD/alsa-lib/src/.libs/libasound.so.2.0.0 | grep "^GLIBC"

              export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
              export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
              export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
              p=$PWD

              export SHERPA_ONNX_RKNN_TOOLKIT2_PATH=$PWD/rknn-toolkit2
              export SHERPA_ONNX_RKNN_TOOLKIT2_LIB_DIR=$SHERPA_ONNX_RKNN_TOOLKIT2_PATH/rknpu2/runtime/Linux/librknn_api/aarch64
              export CPLUS_INCLUDE_PATH=$SHERPA_ONNX_RKNN_TOOLKIT2_PATH/rknpu2/runtime/Linux/librknn_api/include:$CPLUS_INCLUDE_PATH

              export SHERPA_ONNX_ENABLE_ALSA=1

              mkdir build
              cd build

              BUILD_SHARED_LIBS=${{ matrix.shared }}

              cmake \
                -DALSA_INCLUDE_DIR=$p/alsa-lib/include \
                -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
                -DBUILD_SHARED_LIBS=ON \
                -DCMAKE_INSTALL_PREFIX=./install \
                -DSHERPA_ONNX_ENABLE_RKNN=ON \
                -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
                ..

              make -j4 install

              rm -rf install/lib/pkgconfig
              rm -fv install/lib/cargs.h
              rm -fv install/lib/libcargs.so
            '

      - name: Display system info
        shell: bash
        run: |
          uname -a
          gcc --version
          g++ --version

      - name: Display generated files
        shell: bash
        run: |
          export SHERPA_ONNX_RKNN_TOOLKIT2_PATH=$PWD/rknn-toolkit2
          export LD_LIBRARY_PATH=$SHERPA_ONNX_RKNN_TOOLKIT2_PATH/rknpu2/runtime/Linux/librknn_api/aarch64:$LD_LIBRARY_PATH

          cd build/install

          ls -lh bin

          echo "---"

          ls -lh lib

          file bin/sherpa-onnx

          readelf -d bin/sherpa-onnx

          ldd bin/sherpa-onnx

          ./bin/sherpa-onnx --help

          echo "---"
          strings bin/sherpa-onnx | grep "^GLIBC"

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          if [[ ${{ matrix.shared }} == ON ]]; then
            suffix=shared
          else
            suffix=static
          fi

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-rknn-linux-aarch64-$suffix
          mkdir $dst

          cp -a build/install/bin $dst/

          if [[ ${{ matrix.shared }} == ON ]]; then
            mkdir -p $dst/lib
            cp -v build/install/lib/lib*.so $dst/lib/
          fi

          ls -lh build/install/lib
          ls -lh build/install/bin

          ls -lh $dst/bin/
          echo "strip"
          strip $dst/bin/*

          echo "after strip"
          ls -lh $dst/bin/

          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-linux-linux-aarch64-shared-${{ matrix.shared }}
          path: sherpa-onnx-*linux-aarch64*.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=rknn-linux-aarch64/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*rknn*-*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}-rknn-linux-aarch64.tar.bz2"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for rknn linux aarch64
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2

      - name: Release pre-compiled binaries and libs for rknn linux aarch64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*linux-aarch64*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.13

      - name: Test offline Moonshine
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          du -h -d1 .

          export SHERPA_ONNX_RKNN_TOOLKIT2_PATH=$PWD/rknn-toolkit2
          export LD_LIBRARY_PATH=$SHERPA_ONNX_RKNN_TOOLKIT2_PATH/rknpu2/runtime/Linux/librknn_api/aarch64:$LD_LIBRARY_PATH

          export PATH=$PWD/build/install/bin:$PATH
          export EXE=sherpa-onnx-offline

          readelf -d build/bin/sherpa-onnx-offline

          .github/scripts/test-offline-moonshine.sh


================================================
FILE: .github/workflows/run-java-test.yaml
================================================
name: run-java-test

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/run-java-test.yaml'
      - 'cmake/**'
      - 'java-api-examples/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/jni/*'
      - 'sherpa-onnx/java-api/**'

  workflow_dispatch:

concurrency:
  group: run-java-test-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  run_java_test:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, macos-14]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-java

      - name: OS info
        shell: bash
        run: |
          uname -a

      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: Display java version
        shell: bash
        run: |
          java -version
          java -help
          echo "----"
          javac -version
          javac -help
          echo "JAVA_HOME is: ${JAVA_HOME}"

          cmake --version

      - name:  Build sherpa-onnx (jar)
        shell: bash
        run: |
          cd sherpa-onnx/java-api/
          make
          ls -lh

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-jar-${{ matrix.os }}
          path: sherpa-onnx/java-api/build

      - name:  Build sherpa-onnx (C++)
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          mkdir build
          cd build

          cmake \
            -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
            -DSHERPA_ONNX_ENABLE_TESTS=OFF \
            -DSHERPA_ONNX_ENABLE_CHECK=OFF \
            -DBUILD_SHARED_LIBS=ON \
            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            -DBUILD_ESPEAK_NG_EXE=OFF \
            -DSHERPA_ONNX_ENABLE_JNI=ON \
            ..

            make -j4
            ls -lh lib

      - name:  Run java version test
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-version-test.sh

      - name:  Run java test (Non-Streaming ASR)
        shell: bash
        run: |
          cd ./java-api-examples

          ./run-non-streaming-decode-file-fire-red-asr-ctc.sh

          ./run-non-streaming-decode-file-zipformer-ctc.sh
          rm -rf sherpa-onnx-zipformer-ctc-*

          ./run-non-streaming-decode-file-dolphin-ctc.sh
          rm -rf sherpa-onnx-dolphin-*

          ./run-non-streaming-decode-file-moonshine-v2.sh
          ./run-non-streaming-decode-file-moonshine.sh
          rm -rf sherpa-onnx-moonshine-*

          ./run-non-streaming-decode-file-sense-voice.sh
          rm -rf sherpa-onnx-sense-voice-*

          ./run-inverse-text-normalization-paraformer.sh

          ./run-non-streaming-decode-file-paraformer.sh
          rm -rf sherpa-onnx-paraformer-zh-*

          ./run-non-streaming-decode-file-transducer.sh
          rm -rf sherpa-onnx-zipformer-*

          ./run-non-streaming-decode-file-fire-red-asr.sh
          rm -rf sherpa-onnx-fire-red-*

          ./run-non-streaming-decode-file-whisper.sh

          ./run-non-streaming-decode-file-whisper-multiple.sh
          rm -rf sherpa-onnx-whisper-*

          ./run-non-streaming-decode-file-nemo.sh
          rm -rf sherpa-onnx-nemo-*

      - name:  Run java test (FunASR Nano)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-non-streaming-decode-file-funasr-nano.sh
          rm -rf sherpa-onnx-funasr-*

      - name:  Run java test (MedASR CTC)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-non-streaming-decode-file-medasr-ctc.sh
          rm -rf sherpa-onnx-medasr-*

      - name:  Run java test (Omnilingual ASR CTC)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-non-streaming-decode-file-omnilingual-asr-ctc.sh
          rm -rf sherpa-onnx-omnilingual-*

      - name:  Run java test (WeNet CTC)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-non-streaming-decode-file-wenet-ctc.sh
          rm -rf sherpa-onnx-wenet*

      - name:  Run java test (Streaming T-one)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-streaming-decode-file-tone-ctc.sh
          rm -rf sherpa-onnx-streaming-t-one-*

      - name:  Run java test (Nemo Canary)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-non-streaming-decode-file-nemo-canary.sh
          rm -rf sherpa-onnx-nemo-*

      - name:  Run java test (Non-streaming SenseVoice with homophone replacer)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-non-streaming-decode-file-sense-voice-with-hr.sh
          rm -rf sherpa-onnx-sense-*
          rm -rf dict lexicon.txt replace.fst

      - name:  Run java test (VAD + Non-streaming Dolphin CTC)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-vad-non-streaming-dolphin-ctc.sh
          rm *.onnx
          ls -lh *.wav
          rm *.wav
          rm -rf sherpa-onnx-dolphin-*

      - name:  Run speech enhancement
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-non-streaming-speech-enhancement-gtcrn.sh
          ./run-non-streaming-speech-enhancement-dpdfnet.sh
          ./run-streaming-speech-enhancement-gtcrn.sh
          ./run-streaming-speech-enhancement-dpdfnet.sh
          ls -lh *.wav

          rm -fv gtcrn_simple.onnx dpdfnet_baseline.onnx *.wav

      - name:  Run java test (Online add punctuations)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-online-add-punctuation-zh-en.sh
          # Delete model files to save space
          rm -rf sherpa-onnx-online-*

      - name:  Run java test (Offline add punctuations)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-offline-add-punctuation-zh-en.sh
          # Delete model files to save space
          rm -rf sherpa-onnx-punct-*

      - name:  Run java test (speaker diarization)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-offline-speaker-diarization.sh
          rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*

      - name:  Run java test (kws)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-kws-from-file.sh
          rm -rf sherpa-onnx-*

      - name:  Run java test (VAD + Non-streaming SenseVoice)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-vad-non-streaming-sense-voice.sh
          rm *.onnx
          ls -lh *.wav
          rm *.wav
          rm -rf sherpa-onnx-*

      - name:  Run java test (VAD + Non-streaming Paraformer)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-vad-non-streaming-paraformer.sh
          rm *.onnx
          ls -lh *.wav
          rm *.wav
          rm -rf sherpa-onnx-*

      - name:  Run java test (ten-vad remove silence)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-ten-vad-remove-silence.sh
          rm *.onnx
          ls -lh *.wav
          rm *.wav

      - name:  Run java test (silero-vad remove silence)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-vad-remove-silence.sh
          rm *.onnx
          ls -lh *.wav
          rm *.wav

      - name:  Run java test (speaker identification)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-speaker-identification.sh
          # Delete model files to save space
          rm -rf *.onnx
          rm -rf sr-data

      - name:  Run java test (audio tagging)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-audio-tagging-zipformer-from-file.sh
          # Delete model files to save space
          rm -rf sherpa-onnx-zipformer-*

          ./run-audio-tagging-ced-from-file.sh
          rm -rf sherpa-onnx-ced-*


      - name:  Run java test (Spoken language identification)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-spoken-language-identification-whisper.sh
          # Delete model files to save space
          rm -rf sherpa-onnx-whisper-*

      - name:  Run java test (Streaming ASR)
        shell: bash
        run: |
          cd ./java-api-examples
          ./run-inverse-text-normalization-transducer.sh
          rm -rf sherpa-onnx-streaming-*

          ./run-streaming-decode-file-ctc.sh
          # Delete model files to save space
          rm -rf sherpa-onnx-streaming-*

          ./run-streaming-decode-file-ctc-hlg.sh
          rm -rf sherpa-onnx-streaming-*

          ./run-streaming-decode-file-paraformer.sh
          rm -rf sherpa-onnx-streaming-*

          ./run-streaming-decode-file-transducer.sh
          rm -rf sherpa-onnx-streaming-*

      - name:  Run java test (Non-Streaming TTS)
        shell: bash
        run: |
          cd ./java-api-examples

           ./run-pocket-tts.sh
           ./run-zipvoice-tts.sh
           ./run-supertonic-tts.sh
           ./run-non-streaming-tts-kitten-en.sh
           ./run-non-streaming-tts-kokoro-zh-en.sh
           ./run-non-streaming-tts-kokoro-en.sh
           ./run-non-streaming-tts-matcha-zh.sh
          ./run-non-streaming-tts-matcha-en.sh
          ls -lh

           rm -rf sherpa-onnx-pocket-tts-*
           rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
           rm -rf sherpa-onnx-supertonic-tts-*
           rm -rf kitten-nano-en-*
           rm -rf kokoro-multi-*
           rm -rf kokoro-en-*

           rm -rf matcha-icefall-*
           rm vocos-22khz-univ.onnx
           rm vocos_24khz.onnx

          ./run-non-streaming-tts-piper-en.sh
          rm -rf vits-piper-*

          ./run-non-streaming-tts-coqui-de.sh
          rm -rf vits-coqui-*

          ./run-non-streaming-tts-vits-zh.sh
          rm -rf vits-zh-*

      - uses: actions/upload-artifact@v4
        with:
          name: tts-wav-files-${{ matrix.os }}
          path: java-api-examples/*.wav


================================================
FILE: .github/workflows/run-python-test-macos.yaml
================================================
name: run-python-test-macos

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/run-python-test-macos.yaml'
      - '.github/scripts/test-python.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'python-api-examples/**'

  workflow_dispatch:

concurrency:
  group: run-python-test-macos-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  run-python-test:
    name: ${{ matrix.os }} ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        # See https://github.com/actions/runner-images
        # macos-14 is for arm64
        # macos-14-large is for x64
        include:
          - os: macos-15-intel
            python-version: "3.8"

          - os: macos-15-intel
            python-version: "3.9"
          - os: macos-14
            python-version: "3.10"
          - os: macos-14
            python-version: "3.11"

          - os: macos-latest
            python-version: "3.12"

          - os: macos-latest
            python-version: "3.13"

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display OS version
        shell: bash
        run: |
          uname -a
          sw_vers

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-python-${{ matrix.python-version }}

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip numpy pypinyin sentencepiece>=0.1.96 soundfile setuptools wheel librosa

      - name: Install sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          python3 -m pip install .

      - name: Test sherpa-onnx
        shell: bash
        run: |
          export OS=${{ matrix.os }}
          .github/scripts/test-python.sh
          .github/scripts/test-speaker-recognition-python.sh

      - uses: actions/upload-artifact@v4
        with:
          name: source-separation-${{ matrix.os }}-${{ matrix.python-version }}
          path: ./source-separation

      - uses: actions/upload-artifact@v4
        with:
          name: tts-generated-test-files-${{ matrix.os }}-${{ matrix.python-version }}
          path: tts


================================================
FILE: .github/workflows/run-python-test.yaml
================================================
name: run-python-test

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/run-python-test.yaml'
      - '.github/scripts/test-python.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'python-api-examples/**'
  pull_request:
    branches:
      - master
    paths:
      - '.github/workflows/run-python-test.yaml'
      - '.github/scripts/test-python.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'python-api-examples/**'
  workflow_dispatch:

concurrency:
  group: run-python-test-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  run-python-test:
    name: ${{ matrix.os }} ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - os: ubuntu-24.04
            python-version: "3.8"
          - os: ubuntu-24.04
            python-version: "3.9"

          - os: ubuntu-24.04
            python-version: "3.10"
          - os: ubuntu-24.04
            python-version: "3.11"
          - os: ubuntu-24.04
            python-version: "3.12"
          - os: ubuntu-24.04
            python-version: "3.13"

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display OS version
        shell: bash
        run: |
          uname -a
          find "/etc" -maxdepth 1 -type f -name "*version" -exec head -n 100 {} \;

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-python-${{ matrix.python-version }}

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip numpy pypinyin sentencepiece>=0.1.96 soundfile librosa
          python3 -m pip install wheel twine setuptools

      - uses: afoley587/setup-ffmpeg@main
        id: setup-ffmpeg
        with:
          ffmpeg-version: release
          architecture: ''
          github-token: ${{ github.server_url == 'https://github.com' && github.token || '' }}

      - name: Install ninja
        shell: bash
        run: |
          sudo apt-get install ninja-build

      - name: Display ninja version
        shell: bash
        run: |
          ninja --version
          ninja --help || true
          which ninja

      - name: Display site packages dir
        shell: bash
        run: |
          python3 -c 'import site; print(site.getsitepackages())'
          p=$(python3 -c 'import site; print(site.getsitepackages())')
          echo "p: $p"

      - name: Install patchelf
        shell: bash
        run: |
          sudo apt-get update -q
          sudo apt-get install -q -y patchelf
          patchelf --help

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version
          export SHERPA_ONNX_CMAKE_ARGS="-G Ninja -DCMAKE_BUILD_TYPE=Release"
          export SHERPA_ONNX_MAKE_ARGS="-j 6"

          python3 setup.py bdist_wheel

      - name: Patch wheels
        shell: bash
        run: |
          mkdir ./dist2
          sudo ./scripts/wheel/patch_wheel.py --in-dir ./dist --out-dir ./dist2

      - name: Install sherpa-onnx
        shell: bash
        run: |
          ls -lh dist2

          python3 -m pip install ./dist2/*.whl

      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.os }}-${{ matrix.python-version }}-whl
          path: ./dist

      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.os }}-${{ matrix.python-version }}-whl-patched
          path: ./dist2

      - name: Show dependencies
        shell: bash
        run: |
          cd dist
          mkdir t
          cd t
          unzip ../*.whl
          readelf -d sherpa_onnx/lib/_sherpa_onnx*.so

          echo "----"

          readelf -d sherpa_onnx-*.data/data/bin/sherpa-onnx

      - name: Show dependencies (patched)
        shell: bash
        run: |
          cd dist2
          mkdir t
          cd t
          unzip ../*.whl
          readelf -d sherpa_onnx/lib/_sherpa_onnx*.so

          echo "----"

          readelf -d sherpa_onnx-*.data/data/bin/sherpa-onnx

      - name: Test sherpa-onnx
        shell: bash
        run: |
          export OS=${{ matrix.os }}

          p=$(python3 -c 'import site; print(site.getsitepackages()[0])')
          echo "p: $p"
          p=$p/sherpa_onnx/lib
          echo "p: $p"
          ls -lh $p

          export LD_LIBRARY_PATH=$p:$LD_LIBRARY_PATH
          echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"

          .github/scripts/test-python.sh
          .github/scripts/test-speaker-recognition-python.sh

      - uses: actions/upload-artifact@v4
        with:
          name: source-separation-${{ matrix.os }}-${{ matrix.python-version }}-whl
          path: ./source-separation

      - uses: actions/upload-artifact@v4
        with:
          name: tts-generated-test-files-${{ matrix.os }}-${{ matrix.python-version }}
          path: tts


================================================
FILE: .github/workflows/sanitizer.yaml
================================================
name: sanitizer

on:
  workflow_dispatch:

  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 22:50 UTC time every day
    - cron: "50 22 * * *"

concurrency:
  group: sanitizer-${{ github.ref }}
  cancel-in-progress: true

jobs:
  sanitizer:
    runs-on: ${{ matrix.os }}
    name: sanitizer
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-sanitizer

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -DSHERPA_ONNX_ENABLE_PYTHON=ON \
            -DSHERPA_ONNX_ENABLE_TESTS=ON \
            -DSHERPA_ONNX_ENABLE_JNI=ON \
            -DSHERPA_ONNX_ENABLE_SANITIZER=ON \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_BUILD_TYPE=Release \
            -DCMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          make -j2
          make install

          ls -lh lib
          ls -lh bin

          file ./bin/sherpa-onnx

      - name: Display dependencies of sherpa-onnx for macos
        shell: bash
        run: |
          file bin/sherpa-onnx
          otool -L build/bin/sherpa-onnx
          otool -l build/bin/sherpa-onnx

      - name: Test C++ API
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
          export CXX_WHISPER_EXE=whisper-cxx-api

          .github/scripts/test-cxx-api.sh

      - name: Test online punctuation
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-online-punctuation

          .github/scripts/test-online-punctuation.sh

      - name: Test offline punctuation
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-punctuation

          .github/scripts/test-offline-punctuation.sh

      - name: Test offline transducer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-transducer.sh

      - name: Test online CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-ctc.sh


      - name: Test C API
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export SLID_EXE=spoken-language-identification-c-api
          export SID_EXE=speaker-identification-c-api
          export AT_EXE=audio-tagging-c-api
          export PUNCT_EXE=add-punctuation-c-api

          .github/scripts/test-c-api.sh

      - name: Test Audio tagging
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-audio-tagging

          .github/scripts/test-audio-tagging.sh

      - name: Test spoken language identification (C++ API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-language-identification

          .github/scripts/test-spoken-language-identification.sh

      - name: Test transducer kws
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-keyword-spotter

          .github/scripts/test-kws.sh

      - name: Test offline TTS
        if: matrix.with_tts == 'ON'
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-tts

          .github/scripts/test-offline-tts.sh

      - name: Test online paraformer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-paraformer.sh

      - name: Test offline Whisper
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-whisper.sh

      - name: Test offline CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-ctc.sh

      - name: Test online transducer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-transducer.sh

      - name: Test online transducer (C API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=decode-file-c-api

          .github/scripts/test-online-transducer.sh


================================================
FILE: .github/workflows/speaker-diarization.yaml
================================================
name: speaker-diarization

on:
  push:
    branches:
      - speaker-diarization
  workflow_dispatch:

concurrency:
  group: speaker-diarization-${{ github.ref }}
  cancel-in-progress: true

jobs:
  linux:
    name: speaker diarization
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-speaker-diarization

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install pyannote
        shell: bash
        run: |
          pip install pyannote.audio onnx onnxruntime

      - name: Install sherpa-onnx from source
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine setuptools

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cat sherpa-onnx/python/sherpa_onnx/__init__.py

          python3 setup.py bdist_wheel
          ls -lh dist
          pip install ./dist/*.whl

      - name: Run tests
        shell: bash
        run: |
          pushd scripts/pyannote/segmentation

          python3 -c "import sherpa_onnx; print(sherpa_onnx.__file__)"
          python3 -c "import sherpa_onnx; print(sherpa_onnx.__version__)"
          python3 -c "import sherpa_onnx; print(dir(sherpa_onnx))"

          curl -SL -O https://huggingface.co/csukuangfj/pyannote-models/resolve/main/segmentation-3.0/pytorch_model.bin

          test_wavs=(
            0-four-speakers-zh.wav
            1-two-speakers-en.wav
            2-two-speakers-en.wav
            3-two-speakers-en.wav
          )

          for w in ${test_wavs[@]}; do
            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/$w
          done

          soxi *.wav

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
          tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
          rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
          ls -lh sherpa-onnx-pyannote-segmentation-3-0

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

          for w in ${test_wavs[@]}; do
            echo "---------test $w (onnx)----------"
            time ./speaker-diarization-onnx.py \
              --seg-model ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
              --speaker-embedding-model ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
              --wav $w

            echo "---------test $w (torch)----------"
            time ./speaker-diarization-torch.py  --wav $w
          done


================================================
FILE: .github/workflows/style_check.yaml
================================================
# Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
#
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: style_check

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/style_check.yaml'
      - 'sherpa-onnx/**'

  workflow_dispatch:

concurrency:
  group: style_check-${{ github.ref }}
  cancel-in-progress: true

jobs:
  style_check:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.8]
      fail-fast: false

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Check style with cpplint
        shell: bash
        working-directory: ${{github.workspace}}
        run: ./scripts/check_style_cpplint.sh


================================================
FILE: .github/workflows/swift.yaml
================================================
name: swift

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - './build-swift-macos.sh'
      - '.github/workflows/swift.yaml'
      - 'cmake/**'
      - 'swift-api-examples/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/**'
      - '.github/scripts/test-swift.sh'

  pull_request:
    branches:
      - master
    paths:
      - './build-swift-macos.sh'
      - '.github/workflows/swift.yaml'
      - 'cmake/**'
      - 'swift-api-examples/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/**'
      - '.github/scripts/test-swift.sh'

  workflow_dispatch:

concurrency:
  group: swift-${{ github.ref }}
  cancel-in-progress: true

jobs:
  swift:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest, macos-15-intel]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-swift

      - name: Build
        shell: bash
        run: |
          sudo mkdir -p /Users/fangjun/Desktop
          sudo chmod a=rwx /Users/fangjun/Desktop
          ls -lhd /Users/fangjun/Desktop
          ls -lh /Users/fangjun/Desktop

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          ./build-swift-macos.sh

      - name: Copy files
        if: matrix.os == 'macos-15-intel' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-macos-xcframework-static
          mkdir $dst

          mv -v build-swift-macos/sherpa-onnx.xcframework $dst

          brew install tree
          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - name: Release pre-compiled binaries and libs for macOS
        if: matrix.os == 'macos-15-intel' && (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*macos-xcframework-static.tar.bz2

      - name: test
        shell: bash
        run: |
          .github/scripts/test-swift.sh


================================================
FILE: .github/workflows/test-build-wheel.yaml
================================================
name: test-build-wheel

on:
  push:
    branches:
      - master
    paths:
      - 'setup.py'
      - '.github/workflows/test-build-wheel.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/python/**'

  workflow_dispatch:

concurrency:
  group: test-build-wheel-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test-build-wheel:
    name: ${{ matrix.os }} ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        # See https://github.com/actions/runner-images
        include:
          - os: ubuntu-latest
            python-version: "3.8"
          - os: ubuntu-latest
            python-version: "3.9"
          - os: ubuntu-latest
            python-version: "3.10"
          - os: ubuntu-latest
            python-version: "3.11"
          - os: ubuntu-latest
            python-version: "3.12"
          - os: ubuntu-latest
            python-version: "3.13"

          - os: ubuntu-24.04-arm
            python-version: "3.8"
          - os: ubuntu-24.04-arm
            python-version: "3.9"
          - os: ubuntu-24.04-arm
            python-version: "3.10"
          - os: ubuntu-24.04-arm
            python-version: "3.11"
          - os: ubuntu-24.04-arm
            python-version: "3.12"
          - os: ubuntu-24.04-arm
            python-version: "3.13"

          - os: macos-15-intel
            python-version: "3.8"

          - os: macos-15-intel
            python-version: "3.9"
          - os: macos-15-intel
            python-version: "3.10"
          - os: macos-15-intel
            python-version: "3.11"

          - os: macos-latest
            python-version: "3.12"
          - os: macos-latest
            python-version: "3.13"

          - os: windows-2022
            python-version: "3.7"
          - os: windows-2022
            python-version: "3.8"
          - os: windows-2022
            python-version: "3.9"

          - os: windows-2022
            python-version: "3.10"
          - os: windows-2022
            python-version: "3.11"
          - os: windows-2022
            python-version: "3.12"
          - os: windows-2022
            python-version: "3.13"

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-${{ matrix.python_version }}

      - name: Install python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine setuptools

      - name: Build
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          export SHERPA_ONNX_MAKE_ARGS="VERBOSE=1 -j2"

          python3 setup.py bdist_wheel
          ls -lh dist

      - uses: actions/upload-artifact@v4
        with:
          name: wheel-${{ matrix.os }}-${{ matrix.python-version }}
          path: ./dist/*.whl

      - name: Display wheel
        shell: bash
        run: |
          ls -lh dist
          cd dist

          mkdir t
          cd t
          unzip ../*.whl

          ls -lh sherpa_onnx/lib

          file sherpa_onnx/lib/*

      - name: Install wheel
        shell: bash
        run: |
          pip install --verbose ./dist/*.whl

      - name: Test
        shell: bash
        run: |
          which sherpa-onnx
          sherpa-onnx --help


================================================
FILE: .github/workflows/test-dart-package.yaml
================================================
name: test-dart-package

on:
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

  workflow_dispatch:

concurrency:
  group: test-dart-package-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test_dart_package:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, ubuntu-24.04-arm] #, windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      # see https://github.com/subosito/flutter-action/issues/345
      - name: Set up Flutter
        uses: subosito/flutter-action@v2
        with:
          channel: master
          flutter-version: 3.24.0

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Display sherpa-onnx package info
        shell: bash
        run: |
          cd dart-api-examples/vad
          flutter pub get

          if false; then

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-24.04-arm ]]; then
            echo "-----"
            ls -lh /home/runner/work/_temp/pub-cache/hosted/pub.dev

            echo "-----"
            ls -lh /home/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx*

            echo "-----"
            ls -lh /home/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx*/*

            echo "-----"
            ls -lh /home/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx_linux-*

            # sudo mkdir /home/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx_linux-1.10.7/lib
            # sudo touch /home/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx_linux-1.10.7/lib/.gitkeep

            echo "-----"
            ls -lh /home/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx_linux-*/linux/*
          elif [[ ${{ matrix.os }} == macos-latest ]]; then
            echo "-----"
            ls -lh /Users/runner/work/_temp/pub-cache/hosted/pub.dev

            echo "-----"
            ls -lh /Users/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx*

            echo "-----"
            ls -lh /Users/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx*/*

            echo "-----"
            ls -lh /Users/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx_macos-*/

            echo "-----"
            ls -lh /Users/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx_macos-*/macos

            # sudo mkdir /Users/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx_macos-1.10.7/lib
            # sudo touch /Users/runner/work/_temp/pub-cache/hosted/pub.dev/sherpa_onnx_macos-1.10.7/lib/.gitkeep
          fi
          fi


      - name: Run tests
        shell: bash
        run: |
          .github/scripts/test-dart.sh


================================================
FILE: .github/workflows/test-dart.yaml
================================================
name: test-dart

on:
  push:
    branches:
      - master
      - dart
    paths:
      - '.github/workflows/test-dart.yaml'
      - '.github/scripts/test-dart.sh'
      - 'dart-api-examples/**'
      - 'flutter/**'

  workflow_dispatch:

concurrency:
  group: test-dart-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test_dart:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, ubuntu-24.04-arm] #, windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-dart

      # see https://github.com/subosito/flutter-action/issues/345
      - name: Set up Flutter
        uses: subosito/flutter-action@v2
        with:
          channel: master
          flutter-version: 3.24.0

      - name: Display flutter info
        shell: bash
        run: |
          which flutter
          which dart

          flutter --version
          dart --version
          flutter doctor

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version
          mkdir build

          cd build

          cmake \
            -DBUILD_SHARED_LIBS=ON \
            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DBUILD_ESPEAK_NG_EXE=OFF \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            -DCMAKE_INSTALL_PREFIX=./install \
            ..

          cmake --build . --target install --config Release

      - name: Copy libs
        shell: bash
        run: |
          if [[ ${{ matrix.os }} == ubuntu-latest ]]; then
            os=linux-x64
          elif [[ ${{ matrix.os }} == ubuntu-24.04-arm ]]; then
            os=linux-aarch64
          elif [[ ${{ matrix.os }} == macos-latest ]]; then
            os=macos
          elif [[ ${{ matrix.os }} == windows-2022 ]]; then
            os=windows
          fi

          echo "os: $os"

          if [[ $os == windows ]]; then
            cp -fv build/install/lib/*.dll ./flutter/sherpa_onnx_$os/$os
          elif [[ $os == linux-x64 ]]; then
            cp -fv build/install/lib/lib* ./flutter/sherpa_onnx_linux/linux/x64
          elif [[ $os == linux-aarch64 ]]; then
            cp -fv build/install/lib/lib* ./flutter/sherpa_onnx_linux/linux/aarch64
          else
            cp -fv build/install/lib/lib* ./flutter/sherpa_onnx_$os/$os
          fi

          echo "--------------------"

          if [[ $os == linux-x64 || $os == linux-aarch64 ]]; then
            ls -lh ./flutter/sherpa_onnx_linux/linux/*
          else
            ls -lh ./flutter/sherpa_onnx_$os/$os
          fi

      - name: Run tests
        shell: bash
        run: |
          cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
          cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
          cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml
          cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml
          cp scripts/dart/kws-pubspec.yaml dart-api-examples/keyword-spotter/pubspec.yaml
          cp scripts/dart/vad-non-streaming-asr-pubspec.yaml dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
          cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml
          cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml
          cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml
          cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml
          cp scripts/dart/speech-enhancement-gtcrn-pubspec.yaml dart-api-examples/speech-enhancement-gtcrn/pubspec.yaml
          cp scripts/dart/speech-enhancement-dpdfnet-pubspec.yaml dart-api-examples/speech-enhancement-dpdfnet/pubspec.yaml
          cp scripts/dart/streaming-speech-enhancement-gtcrn-pubspec.yaml dart-api-examples/streaming-speech-enhancement-gtcrn/pubspec.yaml
          cp scripts/dart/streaming-speech-enhancement-dpdfnet-pubspec.yaml dart-api-examples/streaming-speech-enhancement-dpdfnet/pubspec.yaml
          cp scripts/dart/slid-pubspec.yaml dart-api-examples/spoken-language-identification/pubspec.yaml

          cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml


          .github/scripts/test-dart.sh


================================================
FILE: .github/workflows/test-dot-net-nuget.yaml
================================================
name: test-dot-net-nuget

on:
  workflow_dispatch:

  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 23:50 UTC time every day
    - cron: "50 23 * * *"

concurrency:
  group: test-dot-net-nuget-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  test-dot-net-nuget:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Free space
        if: matrix.os == 'ubuntu-latest'
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h

      - name: Free more space
        if: matrix.os == 'ubuntu-latest'
        shell: bash
        run: |
          # https://github.com/orgs/community/discussions/25678
          cd /opt
          find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'

          sudo rm -rf /usr/share/dotnet
          sudo rm -rf "/usr/local/share/boost"
          sudo rm -rf "$AGENT_TOOLSDIRECTORY"

      - name: Free Disk Space (Ubuntu)
        if: matrix.os == 'ubuntu-latest'
        uses: jlumbroso/free-disk-space@main
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: false

          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: false
          haskell: true
          large-packages: true
          docker-images: false
          swap-storage: true

      - name: Check space
        if: matrix.os == 'ubuntu-latest'
        shell: bash
        run: |
          df -h

      - name: Setup .NET 8.0
        uses: actions/setup-dotnet@v4
        with:
          dotnet-version: 8.0.x

      - name: Check dotnet
        run: dotnet --info

      - name: Run tests
        shell: bash
        run: |
          .github/scripts/test-dot-net.sh

      - uses: actions/upload-artifact@v4
        with:
          name: dot-net-tts-generated-test-files-${{ matrix.os }}
          path: tts


================================================
FILE: .github/workflows/test-dot-net.yaml
================================================
name: test-dot-net

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-dot-net.yaml'
      - '.github/scripts/test-dot-net.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'dotnet-examples/**'
      - 'scripts/dotnet/**'

  workflow_dispatch:

concurrency:
  group: test-dot-net-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  build-libs:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-dotnet-release-shared

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build
          cmake \
            -DBUILD_SHARED_LIBS=ON \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DCMAKE_BUILD_TYPE=Release \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DBUILD_ESPEAK_NG_EXE=OFF \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            ..

          cmake --build . --target install --config Release

          rm -rf install/share
          rm -rf install/lib/pkg*

          ls -lh ./install/lib

      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.os }}
          path: ./build/install/lib/

  test-dot-net:
    runs-on: ${{ matrix.os }}
    needs: [build-libs]
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]

    steps:
      - name: Check space
        shell: bash
        run: |
          df -h

      - name: Free space
        if: false
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h

      - name: Free more space
        if: false
        shell: bash
        run: |
          # https://github.com/orgs/community/discussions/25678
          cd /opt
          find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'

          sudo rm -rf /usr/share/dotnet
          sudo rm -rf "/usr/local/share/boost"
          sudo rm -rf "$AGENT_TOOLSDIRECTORY"

      - name: Free Disk Space (Ubuntu)
        if: true
        uses: jlumbroso/free-disk-space@main
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: false

          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: false
          haskell: true
          large-packages: true
          docker-images: false
          swap-storage: true

      - name: Check space
        if: true
        shell: bash
        run: |
          df -h

      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip Jinja2

      - name: Retrieve artifact from ubuntu-latest
        uses: actions/download-artifact@v4
        with:
          name: ubuntu-latest
          path: /tmp/linux-x64

      - name: Setup .NET
        uses: actions/setup-dotnet@v4
        with:
          dotnet-version: 8.0.x

      - name: Check dotnet
        run: dotnet --info

      - name: Display files
        shell: bash
        run: |
          echo "----------/tmp----------"
          ls -lh /tmp

          echo "----------/tmp/linux-x64----------"
          ls -lh /tmp/linux-x64
          df -h

      - name: Build
        shell: bash
        run: |
          cd scripts/dotnet
          ./run.sh
          df -h

          ls -lh /tmp/packages

      - name: Copy files
        shell: bash
        run: |
          cp -v scripts/dotnet/examples/Common.csproj dotnet-examples/Common/

          ls -lh /tmp

          df -h

      - name: Run tests
        shell: bash
        run: |
          dotnet nuget locals all --clear
          df -h

          .github/scripts/test-dot-net.sh

      - uses: actions/upload-artifact@v4
        with:
          name: dot-net-tts-generated-test-files-${{ matrix.os }}
          path: tts


================================================
FILE: .github/workflows/test-go-package.yaml
================================================
name: test-go-package

on:
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

  workflow_dispatch:

concurrency:
  group: test-go-package-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test-go-package:
    name: ${{ matrix.os }} ${{matrix.arch }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - os: ubuntu-latest
            arch: amd64
          - os: ubuntu-22.04-arm
            arch: arm64
          - os: macos-15-intel
            arch: amd64
          - os: macos-14
            arch: arm64
          - os: windows-2022
            arch: x64
          - os: windows-2022
            arch: x86 # use 386 for GOARCH

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - uses: actions/setup-go@v5
        with:
          go-version: '>=1.17'

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Display go version
        shell: bash
        run: |
          go version
          go env GOPATH
          go env GOARCH

      - name: Set up MinGW for x64
        if: matrix.os == 'windows-2022' && matrix.arch == 'x64'
        uses: csukuangfj/setup-mingw@v2.2.1
        with:
          platform: ${{ matrix.arch }}

      - name: Set up MinGW for x86
        if: matrix.os == 'windows-2022' && matrix.arch == 'x86'
        uses: csukuangfj/setup-mingw@v2.2.1
        with:
          platform: ${{ matrix.arch }}
          version: '12.2.0'

      - name: Show gcc
        if: matrix.os == 'windows-2022'
        run: |
          gcc --version

      - name: Test NeMo Canary ASR
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/non-streaming-canary-decode-files
          ./run.sh
          rm -rf sherpa-onnx-nemo-*

      - name: Test speech enhancement (GTCRN)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/speech-enhancement-gtcrn/
          ./run.sh

      - name: Test speech enhancement (DPDFNet)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/speech-enhancement-dpdfnet/
          ./run.sh

      - name: Test streaming speech enhancement (GTCRN)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/streaming-speech-enhancement-gtcrn/
          ./run.sh

      - name: Test streaming speech enhancement (DPDFNet)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/streaming-speech-enhancement-dpdfnet/
          ./run.sh

      - name: Test Keyword spotting
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/keyword-spotting-from-file/
          ./run.sh

      - name: Test adding punctuation
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/add-punctuation/
          ./run.sh

      - name: Test non-streaming speaker diarization
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/non-streaming-speaker-diarization/
          ./run.sh

      - name: Test non-streaming speaker diarization
        if: matrix.os == 'windows-2022' && matrix.arch == 'x64'
        shell: bash
        run: |
          cd go-api-examples/non-streaming-speaker-diarization/
          go mod tidy
          cat go.mod
          go build

          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .

          ./run.sh

      - name: Test non-streaming speaker diarization
        if: matrix.os == 'windows-2022' && matrix.arch == 'x86'
        shell: bash
        run: |
          cd go-api-examples/non-streaming-speaker-diarization/

          go env GOARCH
          go env -w GOARCH=386
          go env -w CGO_ENABLED=1

          go mod tidy
          cat go.mod
          go build

          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .

          ./run.sh

      - name: Test streaming HLG decoding (Linux/macOS)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/streaming-hlg-decoding/
          ./run.sh

      - name: Test speaker identification (Linux/macOS)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/speaker-identification
          ./run.sh

      - name: Test speaker identification (Win64)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x64'
        shell: bash
        run: |
          cd go-api-examples/speaker-identification
          go mod tidy
          cat go.mod
          go build

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
          git clone https://github.com/csukuangfj/sr-data
          ls -lh
          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
          ls -lh
          go mod tidy
          go build
          go run ./main.go

      - name: Test speaker identification (Win32)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x86'
        shell: bash
        run: |
          cd go-api-examples/speaker-identification
          go mod tidy
          cat go.mod
          ls -lh

          go env GOARCH
          go env
          echo "------------------------------"
          go env -w GOARCH=386
          go env -w CGO_ENABLED=1
          go env

          go clean
          go build

          echo $PWD

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
          git clone https://github.com/csukuangfj/sr-data
          ls -lh
          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
          ls -lh
          go mod tidy
          go build
          go run ./main.go

          rm -rf sr-data
          rm -rf *.onnx

      - name: Test non-streaming TTS (Linux/macOS)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          mkdir tts-waves
          cd go-api-examples/non-streaming-tts
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo "Test kokoro zh+en"
          ./run-kokoro-zh-en.sh
          rm -rf kokoro-multi-*
          ls -lh

          echo "Test kokoro en"
          ./run-kokoro-en.sh
          rm -rf kokoro-en-*
          ls -lh

          echo "Test matcha zh"
          ./run-matcha-zh.sh
          rm -rf matcha-icefall-*

          echo "Test matcha en"
          ./run-matcha-en.sh
          rm -rf matcha-icefall-*
          ls -lh *.wav

          echo "Test vits-ljs"
          ./run-vits-ljs.sh
          rm -rf vits-ljs

          echo "Test vits-vctk"
          ./run-vits-vctk.sh
          rm -rf vits-vctk

          echo "Test vits-icefall-zh-aishell3"
          ./run-vits-zh-aishell3.sh
          rm -rf vits-icefall-zh-aishell3

          echo "Test vits-piper-en_US-lessac-medium"
          ./run-vits-piper-en_US-lessac-medium.sh
          rm -rf vits-piper-en_US-lessac-medium

          ls -lh *.wav
          cp *.wav ../../tts-waves/

      - name: Test zero-shot ZipVoice TTS (Linux/macOS)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          mkdir -p tts-waves
          cd go-api-examples/zero-shot-zipvoice-tts
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
          rm -f vocos_24khz.onnx
          ls -lh *.wav
          cp *.wav ../../tts-waves/

      - name: Test non-streaming TTS (Win64)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x64'
        shell: bash
        run: |
          mkdir tts-waves
          cd go-api-examples/non-streaming-tts
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
          ls -lh

          echo "Test matcha zh"
          ./run-matcha-zh.sh
          rm -rf matcha-icefall-*

          echo "Test matcha en"
          ./run-matcha-en.sh
          rm -rf matcha-icefall-*
          ls -lh *.wav

          echo "Test vits-ljs"
          ./run-vits-ljs.sh
          rm -rf vits-ljs

          echo "Test vits-vctk"
          ./run-vits-vctk.sh
          rm -rf vits-vctk

          echo "Test vits-zh-aishell3"
          ./run-vits-zh-aishell3.sh
          rm -rf vits-icefall-zh-aishell3

          echo "Test vits-piper-en_US-lessac-medium"
          ./run-vits-piper-en_US-lessac-medium.sh
          rm -rf vits-piper-en_US-lessac-medium

          ls -lh *.wav
          cp *.wav ../../tts-waves/

      - name: Test zero-shot ZipVoice TTS (Win64)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x64'
        shell: bash
        run: |
          mkdir -p tts-waves
          cd go-api-examples/zero-shot-zipvoice-tts
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo $PWD
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
          rm -f vocos_24khz.onnx
          ls -lh *.wav
          cp *.wav ../../tts-waves/

      - name: Test non-streaming TTS (Win32)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x86'
        shell: bash
        run: |
          mkdir tts-waves
          cd go-api-examples/non-streaming-tts
          ls -lh
          go mod tidy
          cat go.mod
          ls -lh

          go env GOARCH
          go env
          echo "------------------------------"
          go env -w GOARCH=386
          go env -w CGO_ENABLED=1
          go env

          go clean
          go build

          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
          ls -lh

          echo "Test matcha zh"
          ./run-matcha-zh.sh
          rm -rf matcha-icefall-*

          echo "Test matcha en"
          ./run-matcha-en.sh
          rm -rf matcha-icefall-*
          ls -lh *.wav

          echo "Test vits-ljs"
          ./run-vits-ljs.sh
          rm -rf vits-ljs

          echo "Test vits-vctk"
          ./run-vits-vctk.sh
          rm -rf vits-vctk

          echo "Test vits-zh-aishell3"
          ./run-vits-zh-aishell3.sh
          rm -rf vits-zh-aishell3

          echo "Test vits-piper-en_US-lessac-medium"
          ./run-vits-piper-en_US-lessac-medium.sh
          rm -rf vits-piper-en_US-lessac-medium

          ls -lh *.wav
          cp *.wav ../../tts-waves/

      - name: Test zero-shot ZipVoice TTS (Win32)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x86'
        shell: bash
        run: |
          mkdir -p tts-waves
          cd go-api-examples/zero-shot-zipvoice-tts
          ls -lh
          go mod tidy
          cat go.mod
          ls -lh

          go env -w GOARCH=386
          go env -w CGO_ENABLED=1
          go clean
          go build

          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
          rm -f vocos_24khz.onnx
          ls -lh *.wav
          cp *.wav ../../tts-waves/

      - name: Test non-streaming decoding files (Linux/macOS)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/non-streaming-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-zipformer-en-2023-06-26

          echo "Test paraformer"
          ./run-paraformer.sh
          rm -rf sherpa-onnx-paraformer-zh-2023-09-14

          echo "Test NeMo CTC"
          ./run-nemo-ctc.sh
          rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium

          echo "Test Whisper tiny.en"
          ./run-whisper.sh
          rm -rf sherpa-onnx-whisper-tiny.en

          echo "Test Tdnn yesno"
          ./run-tdnn-yesno.sh
          rm -rf sherpa-onnx-tdnn-yesno

      - name: Test non-streaming decoding files (Win64)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x64'
        shell: bash
        run: |
          cd go-api-examples/non-streaming-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
          ls -lh

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-zipformer-en-2023-06-26

          echo "Test paraformer"
          ./run-paraformer.sh
          rm -rf sherpa-onnx-paraformer-zh-2023-09-14

          echo "Test NeMo CTC"
          ./run-nemo-ctc.sh
          rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium

          echo "Test Whisper tiny.en"
          ./run-whisper.sh
          rm -rf sherpa-onnx-whisper-tiny.en

          echo "Test Tdnn yesno"
          ./run-tdnn-yesno.sh
          rm -rf sherpa-onnx-tdnn-yesno

      - name: Test non-streaming decoding files (Win32)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x86'
        shell: bash
        run: |
          cd go-api-examples/non-streaming-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          ls -lh

          go env GOARCH
          go env
          echo "------------------------------"
          go env -w GOARCH=386
          go env -w CGO_ENABLED=1
          go env

          go clean
          go build

          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
          ls -lh

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-zipformer-en-2023-06-26

          echo "Test paraformer"
          ./run-paraformer.sh
          rm -rf sherpa-onnx-paraformer-zh-2023-09-14

          echo "Test NeMo CTC"
          ./run-nemo-ctc.sh
          rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium

          echo "Test Whisper tiny.en"
          ./run-whisper.sh
          rm -rf sherpa-onnx-whisper-tiny.en

          echo "Test Tdnn yesno"
          ./run-tdnn-yesno.sh
          rm -rf sherpa-onnx-tdnn-yesno

      - name: Test audio tagging (Linux/macOS)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/audio-tagging
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh

      - name: Test streaming decoding files (Linux/macOS)
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd go-api-examples/streaming-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26

          echo "Test paraformer"
          ./run-paraformer.sh
          rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

      - name: Test streaming decoding files (Win64)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x64'
        shell: bash
        run: |
          cd go-api-examples/streaming-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
          ls -lh

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26

          echo "Test paraformer"
          ./run-paraformer.sh
          rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

      - name: Test streaming decoding files (Win32)
        if: matrix.os == 'windows-2022' && matrix.arch == 'x86'
        shell: bash
        run: |
          cd go-api-examples/streaming-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          ls -lh

          go env GOARCH
          go env
          echo "------------------------------"
          go env -w GOARCH=386
          go env -w CGO_ENABLED=1
          go env

          go clean
          go build

          echo $PWD
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
          ls -lh

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26

          echo "Test paraformer"
          ./run-paraformer.sh
          rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

      - uses: actions/upload-artifact@v4
        with:
          name: tts-waves-${{ matrix.os }}-${{ matrix.arch }}
          path: tts-waves


================================================
FILE: .github/workflows/test-go.yaml
================================================
name: test-go

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-go.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'go-api-examples/**'
      - 'scripts/go/**'

  workflow_dispatch:

concurrency:
  group: test-go-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test-go:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest, macos-15-intel, ubuntu-latest, windows-2022, ubuntu-22.04-arm]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-go

      - uses: actions/setup-go@v5
        with:
          go-version: '>=1.17'

      - name: Display go version
        shell: bash
        run: |
          go version
          go env GOPATH
          go env GOARCH
          go env CGO_ENABLED

      - name: Display go env
        shell: bash
        run: |
          go env

      - name: Build sherpa-onnx
        shell: bash
        run: |
          upload_dir=$PWD/to-upload
          mkdir -p $upload_dir
          echo "upload_dir"

          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          mkdir build
          cd build
          cmake \
            -DCMAKE_C_COMPILER_LAUNCHER=ccache \
            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
            -DBUILD_SHARED_LIBS=ON \
            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DCMAKE_INSTALL_PREFIX=./install \
            ..

          if [[ ${{ matrix.os }} == windows-2022 ]]; then
            cmake --build . --target install --config Release -- -m:2
          else
            make -j2 install
          fi

          if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
            cp -v ./lib/*.so $upload_dir
            cp -v _deps/onnxruntime-src/lib/libonnxruntime*so* $upload_dir

            cp -v _deps/onnxruntime-src/lib/libonnxruntime*so* ./lib/

            rm -v ./lib/*.a
            ls -h ./lib
          elif [[ ${{ matrix.os }} == windows-2022 ]]; then
            cp -v ./install/lib/sherpa-onnx-c-api.dll ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/
            cp -v ./install/lib/onnxruntime.dll ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/
            ls -lh ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/

            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation-online
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-canary-decode-files/
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-fire-red-asr-ctc-decode-files
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-funasr-nano-decode-files
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-medasr-ctc-decode-files
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-moonshine-v2-decode-files
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-omnilingual-asr-ctc-decode-files
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/speaker-identification/
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/speech-enhancement-gtcrn
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/speech-enhancement-dpdfnet
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/streaming-decode-files/
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/streaming-speech-enhancement-gtcrn
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/streaming-speech-enhancement-dpdfnet
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/streaming-hlg-decoding/
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/vad
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/vad-asr-paraformer
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/vad-asr-whisper
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/vad-speaker-identification
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/vad-spoken-language-identification
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/zero-shot-pocket-tts
            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/supertonic-tts

            cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll $upload_dir
          else
            cp -v _deps/onnxruntime-src/lib/libonnxruntime*dylib $upload_dir/
            cp -v lib/*.dylib $upload_dir

            cp -v _deps/onnxruntime-src/lib/libonnxruntime*dylib ./lib/
            rm ./lib/*.a
            rm ./lib/libonnxruntime.dylib
            cd lib
            ln -s libonnxruntime.1.23.2.dylib libonnxruntime.dylib
            cd ..
          fi

          cd ../scripts/go/_internal/
          ls -lh lib
          echo "-----"
          ls -lh lib/*/
          echo "-----"

          go mod tidy
          go build

      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.os }}-libs
          path: to-upload/

      - name: Test SupertonicTTS
        shell: bash
        run: |
          cd scripts/go/_internal/supertonic-tts
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-*
          ls -lh *.wav

      - name: Test non-streaming decoding files with Moonshine v2
        shell: bash
        run: |
          cd scripts/go/_internal/non-streaming-moonshine-v2-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-moonshine-*

      - name: Test non-streaming decoding files with FireRedAsrCtc
        shell: bash
        run: |
          cd scripts/go/_internal/non-streaming-fire-red-asr-ctc-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-fire-red-*

      - name: Test ZeroShot TTS with PocketTTS
        shell: bash
        run: |
          cd scripts/go/_internal/zero-shot-pocket-tts
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-*
          ls -lh *.wav

      - name: Test ZeroShot TTS with ZipVoice
        shell: bash
        run: |
          cd scripts/go/_internal/zero-shot-zipvoice-tts
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
          rm -f vocos_24khz.onnx
          ls -lh *.wav

      - name: Test non-streaming decoding files with FunASR Nano
        shell: bash
        run: |
          cd scripts/go/_internal/non-streaming-funasr-nano-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-funasr-*

      - name: Test non-streaming decoding files with MedASR
        shell: bash
        run: |
          cd scripts/go/_internal/non-streaming-medasr-ctc-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-medasr-*

      - name: Test non-streaming decoding files with Omnilingual ASR
        shell: bash
        run: |
          cd scripts/go/_internal/non-streaming-omnilingual-asr-ctc-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-omnilingual-*

      - name: Test non-streaming TTS
        shell: bash
        run: |
          mkdir tts-waves

          cd scripts/go/_internal/non-streaming-tts/
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo "Test kitten en"
          ./run-kitten-en.sh
          rm -rf kitten-*
          ls -lh

          echo "Test kokoro zh+en"
          ./run-kokoro-zh-en.sh
          rm -rf kokoro-multi-*
          ls -lh

          echo "Test kokoro en"
          ./run-kokoro-en.sh
          rm -rf kokoro-en-*
          ls -lh

          echo "Test matcha zh"
          ./run-matcha-zh.sh
          rm -rf matcha-icefall-*

          echo "Test matcha en"
          ./run-matcha-en.sh
          rm -rf matcha-icefall-*
          ls -lh *.wav

          echo "Test vits-ljs"
          ./run-vits-ljs.sh
          rm -rf vits-ljs

          echo "Test vits-vctk"
          ./run-vits-vctk.sh
          rm -rf vits-vctk

          echo "Test vits-zh-aishell3"
          ./run-vits-zh-aishell3.sh
          rm -rf vits-icefall-zh-aishell3

          echo "Test vits-piper-en_US-lessac-medium"
          ./run-vits-piper-en_US-lessac-medium.sh
          rm -rf vits-piper-en_US-lessac-medium

          cp *.wav ../../../../tts-waves/

      - uses: actions/upload-artifact@v4
        with:
          name: tts-waves-${{ matrix.os }}
          path: tts-waves

      - name: Test streaming decoding files
        shell: bash
        run: |
          cd scripts/go/_internal/streaming-decode-files
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo "Test T-one CTC"
          ./run-t-one-ctc.sh

          echo "Test zipformer2 CTC"
          ./run-zipformer2-ctc-with-hr.sh
          ./run-zipformer2-ctc.sh
          rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26

          ./run-transducer-itn.sh
          rm -rf sherpa-onnx-streaming-*

          echo "Test paraformer"
          ./run-paraformer.sh
          rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

      - name: Test non-streaming decoding files with NeMo Canary
        shell: bash
        run: |
          cd scripts/go/_internal/non-streaming-canary-decode-files/
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          ./run.sh
          rm -rf sherpa-onnx-nemo-*

      - name: Test non-streaming decoding files
        shell: bash
        run: |
          cd scripts/go/_internal/non-streaming-decode-files/
          ls -lh
          go mod tidy
          cat go.mod
          go build
          ls -lh

          echo "Test Wenet CTC"
          ./run-wenet-ctc.sh
          rm -rf sherpa-onnx-wenet*

          echo "Test Zipformer CTC"
          ./run-zipformer-ctc.sh
          rm -rf sherpa-onnx-zipformer-*

          echo "Test SenseVoice ctc"
          ./run-sense-voice-small-with-hr.sh
          ./run-sense-voice-small.sh
          rm -rf sherpa-onnx-sense-*

          echo "Test Dolphin CTC"
          ./run-dolphin-ctc-base.sh
          rm -rf sherpa-onnx-dolphin-*

          echo "Test FireRedAsr"
          ./run-fire-red-asr.sh
          rm -rf sherpa-onnx-fire-red-asr-*

          echo "Test Moonshine"
          ./run-moonshine.sh
          rm -rf sherpa-onnx-*

          echo "Test telespeech ctc"
          ./run-telespeech-ctc.sh
          rm -rf sherpa-onnx-telespeech-ctc-*

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-zipformer-en-2023-06-26

          echo "Test transducer"
          ./run-transducer.sh
          rm -rf sherpa-onnx-zipformer-en-2023-06-26

          echo "Test paraformer"
          ./run-paraformer.sh
          ./run-paraformer-itn.sh
          rm -rf sherpa-onnx-paraformer-zh-2023-09-14

          echo "Test NeMo CTC"
          ./run-nemo-ctc.sh
          rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium

          echo "Test Whisper tiny.en"
          ./run-whisper.sh
          rm -rf sherpa-onnx-whisper-tiny.en

          echo "Test Tdnn yesno"
          ./run-tdnn-yesno.sh
          rm -rf sherpa-onnx-tdnn-yesno

      - name: Test speech enhancement (GTCRN)
        shell: bash
        run: |
          cd scripts/go/_internal/speech-enhancement-gtcrn/

          ./run.sh

          ls -lh

      - name: Test speech enhancement (DPDFNet)
        shell: bash
        run: |
          cd scripts/go/_internal/speech-enhancement-dpdfnet/

          ./run.sh

          ls -lh

      - name: Test streaming speech enhancement (GTCRN)
        shell: bash
        run: |
          cd scripts/go/_internal/streaming-speech-enhancement-gtcrn/

          ./run.sh

          ls -lh

      - name: Test streaming speech enhancement (DPDFNet)
        shell: bash
        run: |
          cd scripts/go/_internal/streaming-speech-enhancement-dpdfnet/

          ./run.sh

          ls -lh

      - name: Test audio tagging
        shell: bash
        run: |
          cd scripts/go/_internal/audio-tagging/

          ./run.sh

          ls -lh

      - name: Test Keyword spotting
        shell: bash
        run: |
          cd scripts/go/_internal/keyword-spotting-from-file/

          ./run.sh

          ls -lh

      - name: Test adding punctuation
        shell: bash
        run: |
          cd scripts/go/_internal/add-punctuation/
          ./run.sh

      - name: Test adding online punctuation
        shell: bash
        run: |
          cd scripts/go/_internal/add-punctuation-online/
          ./run.sh

      - name: Test non-streaming speaker diarization
        shell: bash
        run: |
          cd scripts/go/_internal/non-streaming-speaker-diarization/
          ./run.sh

      - name: Test speaker identification
        shell: bash
        run: |
          cd scripts/go/_internal/speaker-identification/
          ./run.sh

      - name: Test streaming HLG decoding
        shell: bash
        run: |
          cd scripts/go/_internal/streaming-hlg-decoding/
          ./run.sh


================================================
FILE: .github/workflows/test-nodejs-addon-api.yaml
================================================
name: test-node-addon-api

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-nodejs-addon-api.yaml'
      - '.github/scripts/test-nodejs-addon-npm.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'scripts/node-addon-api/**'
      - 'nodejs-addon-examples/**'

  workflow_dispatch:

concurrency:
  group: test-node-addon-api-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  test-node-addon-api:
    name: ${{ matrix.os }} ${{ matrix.node-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest, ubuntu-latest]
        node-version: ["16", "22"]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          pip install ninja

      - name: Show ninja help
        shell: bash
        run: |
          ninja --help || true

      - uses: actions/setup-node@v4
        with:
          registry-url: 'https://registry.npmjs.org'
          node-version: ${{ matrix.node-version }}

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Display npm help
        shell: bash
        run: |
          npm help

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-release-shared

      - name: Build sherpa-onnx
        if: matrix.os == 'windows-2022'
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          mkdir build
          cd build
          cmake \
            -DCMAKE_BUILD_TYPE=Release \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DBUILD_SHARED_LIBS=ON \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            ..

          ls -lh  _deps/onnxruntime-src/lib/

          cmake --build . --config Release --target install -- -m:6

          ls -lh install/lib

          echo "----------"

          cp -v  _deps/onnxruntime-src/lib/*.lib ./install/lib

          echo "----------"

          ls -lh install/lib

      - name: Build sherpa-onnx
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          mkdir build
          cd build
          cmake \
            -G Ninja \
            -DCMAKE_BUILD_TYPE=Release \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DBUILD_SHARED_LIBS=ON \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            ..

          cmake --build . --config Release --target install -- -j 6

      - name: Build node-addon-api package
        shell: bash
        run: |
          d=$PWD
          export SHERPA_ONNX_INSTALL_DIR=$d/build/install

          cd scripts/node-addon-api

          echo $d/build/install

          ls -lh $d/build/install

          npm i

          ./node_modules/.bin/cmake-js compile --log-level verbose

      - name: Run tests
        shell: bash
        run: |
          export PATH=$PWD/build/install/lib:$PATH
          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          d=nodejs-addon-examples
          cd $d
          files=$(ls *.js)
          echo $files
          for f in ${files[@]}; do
            echo $f
            sed -i.bak s%sherpa-onnx-node%./sherpa-onnx% ./$f
          done
          cd ..

          cp -v scripts/node-addon-api/build/Release/sherpa-onnx.node $d/
          cp -v scripts/node-addon-api/lib/*.js $d/
          cp -v ./build/install/lib/lib*  $d/

          .github/scripts/test-nodejs-addon-npm.sh


================================================
FILE: .github/workflows/test-nodejs-addon-npm-aarch64.yaml
================================================
name: test-node-addon-npm-aarch64

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-nodejs-addon-npm-aarch64.yaml'
      - '.github/scripts/test-nodejs-addon-npm.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'scripts/node-addon-api/**'
      - 'scripts/node-addon-api/*.js'
      - 'nodejs-addon-examples/**'

  workflow_dispatch:

concurrency:
  group: test-node-addon-npm-aarch64-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  test-node-addon-npm-aarch64:
    name: ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v2
        with:
          platforms: arm64

      - name: Test sherpa-onnx
        shell: bash
        run: |
            docker run --rm \
              --platform linux/arm64 \
              --volume ${{ github.workspace }}/:/shared/ \
              quay.io/pypa/manylinux2014_aarch64 \
            bash -c '
              git config --global --add safe.directory /shared

              echo $HOME
              uname -a
              cat /etc/*release
              cmake --version

              curl -sL https://rpm.nodesource.com/setup_16.x | bash -
              yum install -y nodejs

              node --version

              cd /shared

              d=nodejs-addon-examples
              echo "dir: $d"
              cd $d
              npm install --verbose
              git status
              ls -lh
              ls -lh node_modules

              export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
              export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
              export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
              export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH

              cd ../

              .github/scripts/test-nodejs-addon-npm.sh
            '


================================================
FILE: .github/workflows/test-nodejs-addon-npm-win-x86.yaml
================================================
name: test-node-addon-npm-win-x86

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-nodejs-addon-npm-win-x86.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'scripts/node-addon-api/**'
      - 'scripts/node-addon-api/*.js'
      - 'nodejs-addon-examples/**'
      - '.github/scripts/test-nodejs-addon-npm.sh'

  workflow_dispatch:

concurrency:
  group: test-node-addon-npm-win-x86-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  test-node-addon-npm-win-x86:
    name: ${{ matrix.os }} node v${{ matrix.node-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        node-version: ["16", "17", "18", "19", "21", "22"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - uses: actions/setup-node@v4
        with:
          registry-url: 'https://registry.npmjs.org'
          node-version: ${{ matrix.node-version }}
          architecture: 'x86'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Run tests
        shell: bash
        run: |
          d=nodejs-addon-examples
          echo "dir: $d"
          cd $d
          npm install --verbose
          git status
          ls -lh
          ls -lh node_modules

          export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH

          cd ../

          .github/scripts/test-nodejs-addon-npm.sh


================================================
FILE: .github/workflows/test-nodejs-addon-npm.yaml
================================================
name: test-node-addon-npm

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-nodejs-addon-npm.yaml'
      - '.github/scripts/test-nodejs-addon-npm.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'scripts/node-addon-api/**'
      - 'scripts/node-addon-api/*.js'
      - 'nodejs-addon-examples/**'

  workflow_dispatch:

concurrency:
  group: test-node-addon-npm-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  test-node-addon-npm:
    name: ${{ matrix.os }} node v${{ matrix.node-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest, macos-14, ubuntu-latest, ubuntu-22.04, windows-2022]
        node-version: ["16", "17", "18", "19", "21", "22"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - uses: actions/setup-node@v4
        with:
          registry-url: 'https://registry.npmjs.org'
          node-version: ${{ matrix.node-version }}

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Run tests
        shell: bash
        run: |
          d=nodejs-addon-examples
          echo "dir: $d"
          cd $d
          npm install --verbose
          git status
          ls -lh
          ls -lh node_modules

          export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH

          cd ../

          .github/scripts/test-nodejs-addon-npm.sh


================================================
FILE: .github/workflows/test-nodejs-npm.yaml
================================================
name: test-nodejs-npm

on:
  workflow_dispatch:

  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 23:50 UTC time every day
    - cron: "50 23 * * *"

concurrency:
  group: test-nodejs-npm-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  test-nodejs-npm:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-2022]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - uses: actions/setup-node@v4
        with:
          registry-url: 'https://registry.npmjs.org'

      - name: Display node version
        shell: bash
        run: |
          node --version
          npm --version

      - name: Run tests
        shell: bash
        run: |
          node --version
          npm --version

          export d=nodejs-examples
          ./.github/scripts/test-nodejs-npm.sh


================================================
FILE: .github/workflows/test-nodejs.yaml
================================================
name: test-nodejs

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-nodejs.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/c-api/*'
      - 'scripts/nodejs/**'
      - 'nodejs-examples/**'

  workflow_dispatch:

concurrency:
  group: test-nodejs-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  test-nodejs:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest] #, macos-latest] #, windows-2022]
        python-version: ["3.8"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-${{ matrix.build_type }}-wasm-nodejs

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - uses: actions/setup-node@v4
        with:
          registry-url: 'https://registry.npmjs.org'

      - name: Display node version
        shell: bash
        run: |
          node --version

      - name: Build nodejs package
        shell: bash
        env:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          ./build-wasm-simd-nodejs.sh
          cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
          cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/

      - name: replace files
        shell: bash
        run: |
          cd nodejs-examples
          files=$(ls -1 *.js)
          for f in ${files[@]}; do
            echo $f
            sed -i.bak s%\'sherpa-onnx\'%\'./index.js\'% $f
            git status
          done
          git diff
          cp *.js ../scripts/nodejs

      - name: Run tests
        shell: bash
        run: |
          node --version
          npm --version
          export d=scripts/nodejs
          cat $d/index.js

          pushd $d
          npm install
          npm install wav
          popd

          ./.github/scripts/test-nodejs-npm.sh


================================================
FILE: .github/workflows/test-onnxruntime-version.yaml
================================================
name: test-onnxruntime-version

on:
  push:
    branches:
      - master
      - test-onnxruntime
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: test-onnxrntime-version-${{ github.ref }}
  cancel-in-progress: true

jobs:
  macos:
    runs-on: ${{ matrix.os }}
    name: onnxruntime ${{ matrix.version }}
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest]
        version: ["1.11.0", "1.11.1", "1.12.0", "1.12.1", "1.13.1", "1.14.0", "1.14.1", "1.15.0", "1.15.1", "1.16.1", "1.16.2", "1.17.0", "1.17.1", "1.17.3", "1.18.0", "1.18.1", "1.19.0", "1.19.2", "1.20.0", "1.20.1", "1.20.2", "1.21.0", "1.21.1", "1.22.0", "1.22.1", "1.22.2", "1.23.0", "1.23.1", "1.23.2"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-onnxruntime-${{ matrix.version }}

      - name: Download onnxruntime ${{ matrix.version }}
        shell: bash
        run: |
          version=${{ matrix.version }}
          curl -SL -O https://github.com/microsoft/onnxruntime/releases/download/v${version}/onnxruntime-osx-universal2-${version}.tgz
          tar xvf onnxruntime-osx-universal2-${version}.tgz
          ls -lh onnxruntime-osx-universal2-${version}

          ls -lh onnxruntime-osx-universal2-${version}
          echo "---"
          ls -lh onnxruntime-osx-universal2-${version}/include
          echo "---"
          ls -lh onnxruntime-osx-universal2-${version}/lib

      - name: Configure CMake
        shell: bash
        run: |
          version=${{ matrix.version }}
          onnxruntime_dir=$PWD/onnxruntime-osx-universal2-${version}
          export SHERPA_ONNXRUNTIME_LIB_DIR=$onnxruntime_dir/lib/
          export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$onnxruntime_dir/include/

          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build

          cmake \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_OSX_ARCHITECTURES='arm64;x86_64' \
            -D CMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx for macos
        shell: bash
        run: |
          version=${{ matrix.version }}
          onnxruntime_dir=$PWD/onnxruntime-osx-universal2-${version}
          export SHERPA_ONNXRUNTIME_LIB_DIR=$onnxruntime_dir/lib/
          export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$onnxruntime_dir/include/

          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

          cd build
          make -j2
          make install

          ls -lh lib
          ls -lh bin

          file ./bin/sherpa-onnx

          rm -fv ./install/include/cargs.h
          rm -fv ./install/lib/cargs.h
          rm -fv ./install/lib/libcargs.dylib
          rm -fv ./install/lib/libcargs.a
          rm -rfv ./install/lib/pkgconfig

      - name: Display dependencies of sherpa-onnx for macos
        shell: bash
        run: |
          file bin/sherpa-onnx
          otool -L build/bin/sherpa-onnx
          otool -l build/bin/sherpa-onnx

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-onnxruntime-${{ matrix.version }}-osx-universal2-shared
          mkdir $dst

          cp -a build/install/bin $dst/
          mkdir $dst/lib
          cp -a build/install/lib/*.dylib* $dst/lib/
          cp -a build/install/include $dst/

          brew install tree
          tree $dst

          tar cjvf ${dst}.tar.bz2 $dst

      - name: Release pre-compiled binaries and libs for macOS
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*osx-universal2*.tar.bz2

      - name: Test offline CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-ctc.sh

      - name: Test offline speech denoiser
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-denoiser

          .github/scripts/test-offline-speech-denoiser.sh

      - name: Test offline TTS
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-tts

          .github/scripts/test-offline-tts.sh

      - name: Test offline Moonshine
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-moonshine.sh

      - name: Test C++ API
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api
          export CXX_WHISPER_EXE=whisper-cxx-api
          export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api

          .github/scripts/test-cxx-api.sh
          du -h -d1 .

      - name: Test offline speaker diarization
        shell: bash
        run: |
          du -h -d1 .
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-speaker-diarization

          .github/scripts/test-speaker-diarization.sh

      - name: Test offline transducer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-transducer.sh

      - name: Test online punctuation
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-online-punctuation

          .github/scripts/test-online-punctuation.sh

      - name: Test online CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-ctc.sh

      - name: Test offline punctuation
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-punctuation

          .github/scripts/test-offline-punctuation.sh

      - name: Test C API
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export SLID_EXE=spoken-language-identification-c-api
          export SID_EXE=speaker-identification-c-api
          export AT_EXE=audio-tagging-c-api
          export PUNCT_EXE=add-punctuation-c-api

          .github/scripts/test-c-api.sh

      - name: Test Audio tagging
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-audio-tagging

          .github/scripts/test-audio-tagging.sh

      - name: Test spoken language identification (C++ API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline-language-identification

          .github/scripts/test-spoken-language-identification.sh

      - name: Test transducer kws
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-keyword-spotter

          .github/scripts/test-kws.sh

      - name: Test online paraformer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-paraformer.sh

      - name: Test offline Whisper
        if: matrix.build_type != 'Debug'
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx-offline

          .github/scripts/test-offline-whisper.sh

      - name: Test online transducer
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=sherpa-onnx

          .github/scripts/test-online-transducer.sh

      - name: Test online transducer (C API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
          export EXE=decode-file-c-api

          .github/scripts/test-online-transducer.sh


================================================
FILE: .github/workflows/test-pip-install.yaml
================================================
name: test-pip-install

on:
  push:
    branches:
      - test-pip-install
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 23:50 UTC time every day
    - cron: "50 23 * * *"
  workflow_dispatch:

concurrency:
  group: test-pip-install-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  test_pip_install:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.python-version }}
    strategy:
      fail-fast: false
      matrix:
        # See https://github.com/actions/runner-images
        include:
          - os: ubuntu-22.04
            python-version: "3.8"
          - os: ubuntu-22.04
            python-version: "3.9"
          - os: ubuntu-22.04
            python-version: "3.10"
          - os: ubuntu-latest
            python-version: "3.11"
          - os: ubuntu-24.04
            python-version: "3.12"
          - os: ubuntu-latest
            python-version: "3.13"

          - os: ubuntu-24.04-arm
            python-version: "3.8"
          - os: ubuntu-24.04-arm
            python-version: "3.9"
          - os: ubuntu-24.04-arm
            python-version: "3.10"
          - os: ubuntu-24.04-arm
            python-version: "3.11"
          - os: ubuntu-24.04-arm
            python-version: "3.12"
          - os: ubuntu-24.04-arm
            python-version: "3.13"

          - os: macos-15-intel
            python-version: "3.8"

          - os: macos-15-intel
            python-version: "3.9"
          - os: macos-15-intel
            python-version: "3.10"
          - os: macos-15-intel
            python-version: "3.11"

          - os: macos-14
            python-version: "3.12"
          - os: macos-14
            python-version: "3.13"

          - os: windows-2022
            python-version: "3.8"
          - os: windows-2022
            python-version: "3.9"

          - os: windows-2022
            python-version: "3.10"
          - os: windows-2022
            python-version: "3.11"
          - os: windows-2022
            python-version: "3.12"
          - os: windows-2022
            python-version: "3.13"

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install sherpa-onnx
        shell: bash
        run: |
          pip install --verbose -U sherpa-onnx sherpa-onnx-core sherpa-onnx-bin

      - name: Test sherpa-onnx-bin
        shell: bash
        run: |
          sherpa-onnx-version

          sherpa-onnx --help
          sherpa-onnx-keyword-spotter --help
          sherpa-onnx-offline --help
          sherpa-onnx-offline-tts --help

          sherpa-onnx-microphone --help
          sherpa-onnx-microphone-offline --help

          sherpa-onnx-offline-websocket-server --help

          sherpa-onnx-online-websocket-server --help
          sherpa-onnx-online-websocket-client --help

      - name: Test sherpa-onnx-core
        shell: bash
        run: |
          python3 -m sherpa_onnx --cflags
          python3 -m sherpa_onnx --c-api-libs
          python3 -m sherpa_onnx --c-api-libs-only-L
          python3 -m sherpa_onnx --c-api-libs-only-l

          python3 -m sherpa_onnx --cxx-api-libs
          python3 -m sherpa_onnx --cxx-api-libs-only-L
          python3 -m sherpa_onnx --cxx-api-libs-only-l

      - name: Test sherpa-onnx
        shell: bash
        run: |
          python3 -c "import sherpa_onnx; print(sherpa_onnx.__file__)"
          python3 -c "import sherpa_onnx; print(sherpa_onnx.__version__)"
          python3 -c "import sherpa_onnx; print(sherpa_onnx.OnlineRecognizer)"
          python3 -c "import sherpa_onnx; print(sherpa_onnx.OfflineRecognizer)"


================================================
FILE: .github/workflows/test-piper-phonemize.yaml
================================================
name: test-piper-phonemize
on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-piper-phonemize.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'

  workflow_dispatch:

concurrency:
  group: test-piper-phonemize-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test_piper_phonemize:
    name: ${{ matrix.os }} ${{ matrix.build_type }} ${{ matrix.shared_lib }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-2022]
        build_type: [Release, Debug]
        shared_lib: [ON, OFF]
        exclude:
          - os: windows-2022
            build_type: Debug
            shared_lib: OFF

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-${{ matrix.build_type }}-shared-${{ matrix.shared_lib }}

      - name: Configure CMake
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build
          cmake -DSHERPA_ONNX_ENABLE_EPSEAK_NG_EXE=ON -DBUILD_ESPEAK_NG_EXE=ON -DCMAKE_VERBOSE_MAKEFILE=ON -D SHERPA_ONNX_ENABLE_TESTS=ON -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install ..

      - name: Build
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          cd build
          cmake --build . --target install --config ${{ matrix.build_type }}

      - name: run test
        if: matrix.os != 'windows-2022'
        shell: bash
        run: |
          cd build

          ls -lh install/
          ls -lh install/share
          ls -lh install/share/espeak-ng-data/

          ./bin/piper-phonemize-test

      - name: run test
        if: matrix.os == 'windows-2022'
        shell: bash
        run: |
          cd build

          ls -lh install/
          ls -lh install/share
          ls -lh install/share/espeak-ng-data/

          ./bin/${{ matrix.build_type }}/piper-phonemize-test


================================================
FILE: .github/workflows/test-python-offline-websocket-server.yaml
================================================
name: Python offline websocket server

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-python-offline-websocket-server.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/python/**'

  workflow_dispatch:

concurrency:
  group: python-offline-websocket-server-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  python_offline_websocket_server:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.python-version }} ${{ matrix.model_type }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, ubuntu-22.04, windows-2022, macos-latest, macos-14]
        python-version: ["3.10"]
        model_type: ["transducer", "paraformer", "nemo_ctc", "whisper", "tdnn"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-python-${{ matrix.python-version }}

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip numpy pypinyin sentencepiece setuptools wheel

      - name: Install sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          python3 -m pip install .
          python3 -m pip install websockets

      - name: Start server for transducer models
        if: matrix.model_type == 'transducer'
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
          tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
          rm sherpa-onnx-zipformer-en-2023-06-26.tar.bz2

          python3 ./python-api-examples/non_streaming_server.py \
            --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \
            --decoder ./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx \
            --joiner ./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx \
            --tokens ./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt &

          echo "sleep 10 seconds to wait the server start"
          sleep 10

      - name: Start client for transducer models
        if: matrix.model_type == 'transducer'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/1.wav \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/8k.wav

          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/1.wav \
            ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/8k.wav

      - name: Start server for paraformer models
        if: matrix.model_type == 'paraformer' && matrix.os != 'windows-2022'
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
          tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
          rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

          python3 ./python-api-examples/non_streaming_server.py \
            --paraformer ./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
            --tokens ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt &

          echo "sleep 10 seconds to wait the server start"
          sleep 10

      - name: Start client for paraformer models
        if: matrix.model_type == 'paraformer' && matrix.os != 'windows-2022'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \
            ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \
            ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav \
            ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav

          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \
            ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \
            ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav \
            ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav

      - name: Start server for nemo_ctc models
        if: matrix.model_type == 'nemo_ctc'
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2
          tar xvf sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2
          rm sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2

          python3 ./python-api-examples/non_streaming_server.py \
            --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
            --tokens ./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt &

          echo "sleep 10 seconds to wait the server start"
          sleep 10

      - name: Start client for nemo_ctc models
        if: matrix.model_type == 'nemo_ctc'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav

          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
            ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav

      - name: Start server for whisper models
        if: matrix.model_type == 'whisper'
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
          tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
          rm sherpa-onnx-whisper-tiny.en.tar.bz2

          python3 ./python-api-examples/non_streaming_server.py \
            --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
            --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
            --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt &

          echo "sleep 10 seconds to wait the server start"
          sleep 10

      - name: Start client for whisper models
        if: matrix.model_type == 'whisper'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav

          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
            ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav

      - name: Start server for tdnn models
        if: matrix.model_type == 'tdnn'
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-tdnn-yesno.tar.bz2
          tar xvf sherpa-onnx-tdnn-yesno.tar.bz2
          rm sherpa-onnx-tdnn-yesno.tar.bz2

          python3 ./python-api-examples/non_streaming_server.py \
            --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
            --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
            --sample-rate=8000 \
            --feat-dim=23 &

          echo "sleep 10 seconds to wait the server start"
          sleep 10

      - name: Start client for tdnn models
        if: matrix.model_type == 'tdnn'
        shell: bash
        run: |
          python3 ./python-api-examples/offline-websocket-client-decode-files-paralell.py \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav

          python3 ./python-api-examples/offline-websocket-client-decode-files-sequential.py \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \
            ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav


================================================
FILE: .github/workflows/test-python-online-websocket-server.yaml
================================================
name: Python online websocket server

on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-python-online-websocket-server.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
      - 'sherpa-onnx/python/**'

  workflow_dispatch:

concurrency:
  group: python-online-websocket-server-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  python_online_websocket_server:
    runs-on: ${{ matrix.os }}
    name: ${{ matrix.os }} ${{ matrix.python-version }} ${{ matrix.model_type }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, ubuntu-22.04, windows-2022, macos-latest, macos-14]
        python-version: ["3.10"]
        model_type: ["transducer", "paraformer", "zipformer2-ctc"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-python-${{ matrix.python-version }}

      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip numpy pypinyin sentencepiece setuptools wheel

      - name: Install sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          python3 -m pip install .
          python3 -m pip install websockets

      - name: Start server for zipformer2 CTC models
        if: matrix.model_type == 'zipformer2-ctc'
        shell: bash
        run: |
          curl -O -L https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
          rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2

          python3 ./python-api-examples/streaming_server.py \
            --zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
            --tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt &
          echo "sleep 10 seconds to wait the server start"
          sleep 10

      - name: Start client for zipformer2 CTC models
        if: matrix.model_type == 'zipformer2-ctc'
        shell: bash
        run: |
          python3 ./python-api-examples/online-websocket-client-decode-file.py \
            ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav

      - name: Start server for transducer models
        if: matrix.model_type == 'transducer'
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
          rm sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2

          python3 ./python-api-examples/streaming_server.py \
            --encoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \
            --decoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-128.onnx \
            --joiner ./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-128.onnx \
            --tokens ./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt &
          echo "sleep 10 seconds to wait the server start"
          sleep 10

      - name: Start client for transducer models
        if: matrix.model_type == 'transducer'
        shell: bash
        run: |
          python3 ./python-api-examples/online-websocket-client-decode-file.py \
            ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav

      - name: Start server for paraformer models
        if: matrix.model_type == 'paraformer'
        shell: bash
        run: |
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
          tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
          rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

          python3 ./python-api-examples/streaming_server.py \
            --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
            --paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
            --paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx &

          echo "sleep 10 seconds to wait the server start"
          sleep 10

      - name: Start client for paraformer models
        if: matrix.model_type == 'paraformer'
        shell: bash
        run: |
          python3 ./python-api-examples/online-websocket-client-decode-file.py \
            ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav

          python3 ./python-api-examples/online-websocket-client-decode-file.py \
            ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav

          python3 ./python-api-examples/online-websocket-client-decode-file.py \
            ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav

          python3 ./python-api-examples/online-websocket-client-decode-file.py \
            ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/3.wav


================================================
FILE: .github/workflows/test-rust-package.yaml
================================================
name: Test rust package

on:
  push:
    branches:
      - rust-api
  workflow_dispatch:

concurrency:
  group: test-rust-package-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test-rust-package:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, macos-15-intel, ubuntu-22.04-arm]

    env:
      # Placeholder, will be overwritten per OS
      SHERPA_ONNX_LIB_DIR: ""
      RUSTFLAGS: ""

    steps:
      # Checkout the repository
      - uses: actions/checkout@v4

      # Install Rust stable
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: stable

      # Download prebuilt libraries depending on OS
      - name: Download prebuilt Sherpa-ONNX libraries
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          if [[ "${{ matrix.os }}" == "macos-latest" ]]; then
            d=sherpa-onnx-v$SHERPA_ONNX_VERSION-osx-universal2-shared
          elif [[ "${{ matrix.os }}" == "macos-15-intel" ]]; then
            d=sherpa-onnx-v$SHERPA_ONNX_VERSION-osx-universal2-shared
          elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
            d=sherpa-onnx-v$SHERPA_ONNX_VERSION-linux-x64-shared
          elif [[ "${{ matrix.os }}" == "ubuntu-22.04-arm" ]]; then
            d=sherpa-onnx-v$SHERPA_ONNX_VERSION-linux-aarch64-shared-cpu
          else
            echo "Unknown ${{ matrix.os }}"
            exit 1
          fi

          LIB_URL="https://github.com/k2-fsa/sherpa-onnx/releases/download/v$SHERPA_ONNX_VERSION/$d.tar.bz2"

          curl -SsL -O  "$LIB_URL"
          tar -xvf $d.tar.bz2

          ls -lh $d/lib

          # Export environment variables for this step
          echo "SHERPA_ONNX_LIB_DIR=$PWD/$d/lib" >> $GITHUB_ENV
          echo "RUSTFLAGS=-C link-arg=-Wl,-rpath,$PWD/$d/lib" >> $GITHUB_ENV

      - name: Show libs
        shell: bash
        run: |
          echo "SHERPA_ONNX_LIB_DIR: $SHERPA_ONNX_LIB_DIR"
          ls -lh $SHERPA_ONNX_LIB_DIR

          echo "RUSTFLAGS: $RUSTFLAGS"

      - name: Run test
        shell: bash
        run: |
          ./.github/scripts/test-rust.sh


================================================
FILE: .github/workflows/test-rust.yaml
================================================
name: Test rust

on:
  push:
    branches:
      - rust-api
  workflow_dispatch:

concurrency:
  group: test-rust-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test-rust:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, macos-15-intel, ubuntu-22.04-arm]

    env:
      # Placeholder, will be overwritten per OS
      SHERPA_ONNX_LIB_DIR: ""
      RUSTFLAGS: ""

    steps:
      # Checkout the repository
      - uses: actions/checkout@v4

      - name: ccache
        uses: hendrikmuhs/ccache-action@v1.2
        with:
          key: ${{ matrix.os }}-rust

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Configure sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          mkdir build
          cd build
          cmake \
            -D SHERPA_ONNX_ENABLE_BINARY=OFF \
            -D BUILD_SHARED_LIBS=ON \
            -D CMAKE_INSTALL_PREFIX=./install \
            ..

      - name: Build sherpa-onnx
        shell: bash
        run: |
          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
          cmake --version

          cd build
          make -j2
          make install
          ls -lh install/lib

          echo "SHERPA_ONNX_LIB_DIR=$PWD/install/lib" >> $GITHUB_ENV
          echo "RUSTFLAGS=-C link-arg=-Wl,-rpath,$PWD/install/lib" >> $GITHUB_ENV

      # Install Rust stable
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: stable

      - name: Show libs
        shell: bash
        run: |
          echo "SHERPA_ONNX_LIB_DIR: $SHERPA_ONNX_LIB_DIR"
          ls -lh $SHERPA_ONNX_LIB_DIR

          echo "RUSTFLAGS: $RUSTFLAGS"

      - name: Test locally
        shell: bash
        run: |
          cd rust-api-examples

          sed -i.bak 's|^sherpa-onnx *=.*|sherpa-onnx = { path = "../sherpa-onnx/rust/sherpa-onnx" }|' Cargo.toml

          git diff .

          cargo clean
          cargo run --example version

      - name: Run test
        shell: bash
        run: |
          ./.github/scripts/test-rust.sh


================================================
FILE: .github/workflows/upload-models.yaml
================================================
name: upload-models

on:
  push:
    branches:
      - upload-models
  workflow_dispatch:

concurrency:
  group: upload-models-${{ github.ref }}
  cancel-in-progress: true

jobs:
  upload-models:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: upload models
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: Upload DPDFNet
        shell: bash
        run: |

          models=(
            baseline.onnx
            dpdfnet2.onnx
            dpdfnet2_48khz_hr.onnx
            dpdfnet4.onnx
            dpdfnet8.onnx
          )
          for m in ${models[@]}; do
            wget https://huggingface.co/Ceva-IP/DPDFNet/resolve/main/onnx/$m
          done

          mv baseline.onnx dpdfnet_baseline.onnx

      - name: Install ffmpeg
        if: false
        shell: bash
        run: |
          sudo apt-get update
          sudo apt-get install -y ffmpeg

      - name: Verify ffmpeg
        if: false
        shell: bash
        run: |
          ffmpeg -version

      - name: git config
        shell: bash
        run: |
          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

      - name: FireRedASR2 CTC (int8)
        if: false
        shell: bash
        run: |
          d=sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25
          mkdir $d

          pushd $d

          cat >README.md <<EOF
          # Introduction
          Model files are converted from
          https://www.modelscope.cn/models/FireRedTeam/FireRedASR2-AED

          We export only the encoder and the CTC branch. The attention decoder
          is not used.
          EOF

          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/ctc/model.int8.onnx
          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/ctc/tokens.txt
          mkdir test_wavs
          cd test_wavs
          for w in 0.wav 1.wav 2.wav 3-sichuan.wav 3.wav 4-tianjin.wav 5-henan.wav 8k.wav; do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16/resolve/main/test_wavs/$w
          done

          popd

          ls -lh $d

          tar cjvf $d.tar.bz2 $d

          ls -lh *.tar.bz2

      - name: FireRedASR2 CTC (fp32)
        if: false
        shell: bash
        run: |
          d=sherpa-onnx-fire-red-asr2-ctc-zh_en-2026-02-25
          mkdir $d

          pushd $d

          cat >README.md <<EOF
          # Introduction
          Model files are converted from
          https://www.modelscope.cn/models/FireRedTeam/FireRedASR2-AED

          We export only the encoder and the CTC branch. The attention decoder
          is not used.
          EOF

          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/ctc/model.onnx
          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/ctc/model.weights
          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/ctc/tokens.txt
          mkdir test_wavs
          cd test_wavs
          for w in 0.wav 1.wav 2.wav 3-sichuan.wav 3.wav 4-tianjin.wav 5-henan.wav 8k.wav; do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16/resolve/main/test_wavs/$w
          done

          popd

          ls -lh $d

          tar cjvf $d.tar.bz2 $d

          ls -lh *.tar.bz2

      - name: FireRedASR2 AED (int8)
        if: false
        shell: bash
        run: |
          d=sherpa-onnx-fire-red-asr2-zh_en-int8-2026-02-26
          mkdir $d

          pushd $d

          cat >README.md <<EOF
          # Introduction
          Model files are converted from
          https://www.modelscope.cn/models/FireRedTeam/FireRedASR2-AED
          EOF

          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/aed/encoder.int8.onnx
          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/aed/decoder.int8.onnx
          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/aed/tokens.txt
          mkdir test_wavs
          cd test_wavs
          for w in 0.wav 1.wav 2.wav 3-sichuan.wav 3.wav 4-tianjin.wav 5-henan.wav 8k.wav; do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16/resolve/main/test_wavs/$w
          done

          popd

          ls -lh $d

          tar cjvf $d.tar.bz2 $d

          ls -lh *.tar.bz2

      - name: FireRedASR2 AED (fp32)
        if: false
        shell: bash
        run: |
          d=sherpa-onnx-fire-red-asr2-zh_en-2026-02-26
          mkdir $d

          pushd $d

          cat >README.md <<EOF
          # Introduction
          Model files are converted from
          https://www.modelscope.cn/models/FireRedTeam/FireRedASR2-AED
          EOF

          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/aed/encoder.onnx
          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/aed/encoder.weights
          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/aed/decoder.onnx
          curl -SL -O https://www.modelscope.cn/models/csukuangfj/FireRedASR2-AED-onnx/resolve/master/aed/tokens.txt
          mkdir test_wavs
          cd test_wavs
          for w in 0.wav 1.wav 2.wav 3-sichuan.wav 3.wav 4-tianjin.wav 5-henan.wav 8k.wav; do
            curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16/resolve/main/test_wavs/$w
          done

          popd

          ls -lh $d

          tar cjvf $d.tar.bz2 $d

          ls -lh *.tar.bz2

      - name: Zipformer-30M-RNNT-6000h
        if: false
        shell: bash
        run: |
          git lfs install
          repo=Zipformer-30M-RNNT-6000h
          git clone https://huggingface.co/hynt/$repo
          pushd $repo
          mkdir test_wavs
          cd test_wavs
          wget https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-vi-2025-04-20/resolve/main/test_wavs/0.wav
          wget https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-vi-2025-04-20/resolve/main/test_wavs/1.wav
          wget https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-vi-2025-04-20/resolve/main/test_wavs/2.wav
          wget https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-vi-2025-04-20/resolve/main/test_wavs/README.md
          popd

          d=sherpa-onnx-zipformer-vi-30M-2026-02-09
          mkdir -p $d
          cat >$d/README.md <<EOF
          # Introduction
          Model files are from
          https://huggingface.co/hynt/Zipformer-30M-RNNT-6000h
          EOF

          cp -v $repo/encoder-epoch-20-avg-10.onnx $d/encoder.onnx
          cp -v $repo/decoder-epoch-20-avg-10.onnx $d/decoder.onnx
          cp -v $repo/joiner-epoch-20-avg-10.onnx $d/joiner.onnx
          cp -v $repo/bpe.model $d/
          cp -v $repo/config.json $d/tokens.txt
          cp -av $repo/test_wavs $d/

          tar cjfv $d.tar.bz2 $d

          d=sherpa-onnx-zipformer-vi-30M-int8-2026-02-09
          mkdir -p $d
          cat >$d/README.md <<EOF
          # Introduction
          Model files are from
          https://huggingface.co/hynt/Zipformer-30M-RNNT-6000h
          EOF

          cp -v $repo/encoder-epoch-20-avg-10.int8.onnx $d/encoder.int8.onnx
          cp -v $repo/decoder-epoch-20-avg-10.onnx $d/decoder.onnx
          cp -v $repo/joiner-epoch-20-avg-10.int8.onnx $d/joiner.int8.onnx
          cp -v $repo/bpe.model $d/
          cp -v $repo/config.json $d/tokens.txt
          cp -av $repo/test_wavs $d/

          tar cjfv $d.tar.bz2 $d

      - name: vosk-model-small-streaming-bn
        if: false
        shell: bash
        run: |
          git lfs install
          repo=vosk-model-small-streaming-bn
          git clone https://huggingface.co/alphacep/$repo
          cd $repo
          mv test.wav 0.wav
          wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/bn.wav
          mv bn.wav 1.wav
          cd ..

          d=sherpa-onnx-streaming-zipformer-bn-vosk-2026-02-09
          mkdir $d
          cat >$d/README.md <<EOF
          # Introduction
          Model files are from
          https://huggingface.co/alphacep/vosk-model-small-streaming-bn
          EOF

          mv $repo/am-onnx/*.onnx $d/
          mv $repo/lang/* $d/
          mkdir $d/test_wavs
          mv $repo/*.wav $d/test_wavs

          tar cfjv $d.tar.bz2 $d

      - name: WenetSpeech Wu
        if: false
        shell: bash
        run: |
          git lfs install
          git clone https://huggingface.co/csukuangfj2/sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-int8-2026-02-03
          git clone https://huggingface.co/csukuangfj2/sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-2026-02-03

          d=sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-int8-2026-02-03
          rm -rf $d/.git*

          tar cjfv $d.tar.bz2 $d

          rm -rf $d

          d=sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-2026-02-03
          rm -rf $d/.git*

          tar cjfv $d.tar.bz2 $d
          rm -rf $d

          ls -lh *.tar.bz2

      - name: Setup tmate session
        if: false
        uses: mxschmitt/action-tmate@v3

      - name: Collect funasr-nano with LLM
        if: false
        shell: bash
        run: |
          git lfs install
          models=(
            sherpa-onnx-funasr-nano-int8-2025-12-30
            sherpa-onnx-funasr-nano-fp16-2025-12-30
            sherpa-onnx-funasr-nano-2025-12-30
          )
          for d in ${models[@]}; do
            git clone https://huggingface.co/csukuangfj/$d
            rm -rf $d/.git
            tar cjfv $d.tar.bz2 $d
            ls -lh $d.tar.bz2
            ls -lh $d
            rm -rf $d
          done

      - name: Collect funasr-nano with LLM int8
        if: false
        shell: bash
        run: |
          d=sherpa-onnx-funasr-nano-int8-2025-12-30
          mkdir $d
          pushd $d

          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/llm_int8/llm.int8.onnx
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/embedding.int8.onnx
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/encoder_adaptor.int8.onnx

          mkdir Qwen3-0.6B
          cd Qwen3-0.6B
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/merges.txt
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/tokenizer.json
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/vocab.json

          ls -lh
          cd ..
          mkdir test_wavs
          cd test_wavs

          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_hunan.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_minnan.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_sh.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_yue.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_3.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_4.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_5.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/ja.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/ja_en_codeswitch.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_3.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_en_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/noise_en.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_biochemistry.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_chemistry.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_history.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_math.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_medical.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_physics.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/yuenan.wav

          mv yuenan.wav vietnamese.wav

          for f in *.wav; do
            ffmpeg -y -loglevel error -i "$f" \
              -ac 1 -ar 16000 -sample_fmt s16 \
              "${f}.tmp.wav" \
            && mv "${f}.tmp.wav" "$f"
          done

          curl -SL -O https://modelscope.cn/models/csukuangfj/sherpa-doc-files/resolve/master/source/_static/fun-asr-nano-2025-12-30/lyrics_en_1.wav
          curl -SL -O https://modelscope.cn/models/csukuangfj/sherpa-doc-files/resolve/master/source/_static/fun-asr-nano-2025-12-30/lyrics_en_3.wav

          cat >README.md <<EOF
          Audio files in this directory are downloaded from
          https://github.com/FunAudioLLM/FunAudioLLM.github.io/tree/master/funasr/static/audios

          | Filename| Trascript|
          |---------|----------|
          |湖南方言[dia_hunan.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/dia_hunan.wav)|但总来讲孙膑对兵法的理解运用比庞涓略胜一筹。|
          |闽南语[dia_minnan.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/dia_minnan.wav)|嗯，下摆若有机会吧，因为即久吼开了吼卷啊遮厉害，会倒贴钱啊。|
          |上海话[dia_sh.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/dia_sh.wav)|人跟狗，包括人跟动物接触长了，全有感情。葛末随了阿拉社会个富裕。|
          |粤语[dia_yue.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/dia_yue.wav)|啲身体好劲啊，跟住咧佢哋有一个人咧就突然可能就有高原反应啦，突然间就啊窒息咗，即系晕晕咗。|
          |中文歌曲[lyrics.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics.wav)|我看到我的身后盯着我的人群，喜欢或恨不一样的神情，我知道这可能就是所谓的成名，我知道必须往前一步也不能停。|
          |中文歌曲[lyrics_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_2.wav)|明明那么远，为何却感觉离他那么近？闭上眼，你甚至能背出他所有押韵。虽然不听说唱了，但你已学会自信。我代表所有中文说唱歌手向你致敬。如今面对困难的你，早已不再抱怨。|
          |中文歌曲[lyrics_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_3.wav)|你听啊秋末的落叶，你听它叹息着离别，只剩我独自领略海与山风和月，你听啊。|
          |英文歌曲[lyrics_en_1.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_en_1.wav)|When I was young I'd listen to the radio. Waiting for my favorite songs. When they played I'd sing along. It made me smile.|
          |英文歌曲[lyrics_en_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_en_2.wav)|I see your monsters. I see your pain. Tell me your problems; I'll chase them away. I'll be your lighthouse. I'll make it okay. When I see your monsters, I'll stand there so brave and chase them all away.|
          |英文歌曲[lyrics_en_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_en_3.wav)|An empty street, an empty house, a hole inside my heart. I'm all alone and the rooms are getting smaller. I wonder how, I wonder why, I wonder where they are. The days we had, the songs we sang together.|
          |英文[noise_en.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/noise_en.wav)|So what's interesting here is I feel that you know brands knowing this when people sort of speak to the voice assistance at home and if you want to be the brand.|
          |[far_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/far_2.wav)|然后被冠以了渣男线的称号，好了，不管这个，那么前方即将到达沈杜公路站，左边是8号线。|
          |[far_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/far_3.wav)|周末要不要去露营，最近天气超舒服，露营？我怕虫子咬，而且晚上睡帐篷会不会很冷啊？放心，我借了专业装备还有暖宝宝，再带点火锅食材，边吃边看星星超惬意。|
          |[far_4.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/far_4.wav)|<music>唯一的遗憾就是他那个八宝鸭还有烤鸭都没吃上 估计得提前预定吧 <impact_sounds></impact_sounds>只能怪我自己没有做好功课</music>|
          |[far_5.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/far_5.wav)|别紧张<breathing></breathing>我只是我是在这边逛街 然后看到你们在这边拍照 想跟你交个朋友<impact_sounds> 认识</impact_sounds>一下|
          |日语[ja.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/ja.wav)|人民たちは、金欲しさに王をのけ者にしてしまって、何でもすべて商人のところへ持って行ってしまいました。|
          |日英混合[ja_en_codeswitch.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/ja_en_codeswitch.wav)|このカフェのwi-fiがアン ステーブル 過ぎて、google meetでディスコネクトされて クライエントに悪い印象を与えてしまった。|
          |越南语[vietnamese.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/vietnamese.wav)|Đi cùng với tiếp tục kêu gọi người dân đã qua lại các ổ dịch này, khai báo y tế và yêu cầu liên hệ để được xét nghiệm.|
          |[rag_biochemistry.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/rag_biochemistry.wav)|利用三磷酸腺苷的水解所产生的能量来驱动其他化学反应|
          |[rag_chemistry.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/rag_chemistry.wav)|比如说酯在当时被认为是一种含氧酸盐|
          |[rag_history.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/rag_history.wav)|由罗马皇帝钦点的犹地亚王大希律王统治期间|
          |[rag_math.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/rag_math.wav)|对微分形式的积分是微分几何中的基本概念|
          |[rag_medical.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/rag_medical.wav)|肾脏中肾小球囊上的细胞膜孔隙很小|
          |[rag_physics.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/rag_physics.wav)|根据碰撞理论月面样本缺少挥发性物质|
          EOF
          cd ..

          cat >README.md <<EOF

          # Introduction
          Models in this directory are downloaded from
          https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/files

          Export script can be found at
          https://github.com/Wasser1462/FunASR-nano-onnx

          The author is https://github.com/Wasser1462
          EOF

          popd
          ls -lh $d
          tar cjvf $d.tar.bz2 $d

      - name: Collect funasr-nano with LLM float32
        if: false
        shell: bash
        run: |
          d=sherpa-onnx-funasr-nano-2025-12-30
          mkdir $d
          pushd $d

          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/llm_fp32/llm.fp32.onnx
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/llm_fp32/llm.fp32.data
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/embedding.onnx
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/encoder_adaptor.onnx

          mkdir Qwen3-0.6B
          cd Qwen3-0.6B
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/merges.txt
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/tokenizer.json
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/vocab.json

          ls -lh
          cd ..
          mkdir test_wavs
          cd test_wavs

          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_hunan.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_minnan.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_sh.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_yue.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_3.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_4.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_5.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/ja.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/ja_en_codeswitch.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_3.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_en_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/noise_en.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_biochemistry.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_chemistry.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_history.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_math.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_medical.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_physics.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/yuenan.wav

          mv yuenan.wav vietnamese.wav

          for f in *.wav; do
            ffmpeg -y -loglevel error -i "$f" \
              -ac 1 -ar 16000 -sample_fmt s16 \
              "${f}.tmp.wav" \
            && mv "${f}.tmp.wav" "$f"
          done

          curl -SL -O https://modelscope.cn/models/csukuangfj/sherpa-doc-files/resolve/master/source/_static/fun-asr-nano-2025-12-30/lyrics_en_1.wav
          curl -SL -O https://modelscope.cn/models/csukuangfj/sherpa-doc-files/resolve/master/source/_static/fun-asr-nano-2025-12-30/lyrics_en_3.wav

          cat >README.md <<EOF
          Audio files in this directory are downloaded from
          https://github.com/FunAudioLLM/FunAudioLLM.github.io/tree/master/funasr/static/audios

          | Filename| Trascript|
          |---------|----------|
          |湖南方言[dia_hunan.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/dia_hunan.wav)|但总来讲孙膑对兵法的理解运用比庞涓略胜一筹。|
          |闽南语[dia_minnan.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/dia_minnan.wav)|嗯，下摆若有机会吧，因为即久吼开了吼卷啊遮厉害，会倒贴钱啊。|
          |上海话[dia_sh.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/dia_sh.wav)|人跟狗，包括人跟动物接触长了，全有感情。葛末随了阿拉社会个富裕。|
          |粤语[dia_yue.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/dia_yue.wav)|啲身体好劲啊，跟住咧佢哋有一个人咧就突然可能就有高原反应啦，突然间就啊窒息咗，即系晕晕咗。|
          |中文歌曲[lyrics.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/lyrics.wav)|我看到我的身后盯着我的人群，喜欢或恨不一样的神情，我知道这可能就是所谓的成名，我知道必须往前一步也不能停。|
          |中文歌曲[lyrics_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/lyrics_2.wav)|明明那么远，为何却感觉离他那么近？闭上眼，你甚至能背出他所有押韵。虽然不听说唱了，但你已学会自信。我代表所有中文说唱歌手向你致敬。如今面对困难的你，早已不再抱怨。|
          |中文歌曲[lyrics_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/lyrics_3.wav)|你听啊秋末的落叶，你听它叹息着离别，只剩我独自领略海与山风和月，你听啊。|
          |英文歌曲[lyrics_en_1.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_en_1.wav)|When I was young I'd listen to the radio. Waiting for my favorite songs. When they played I'd sing along. It made me smile.|
          |英文歌曲[lyrics_en_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/lyrics_en_2.wav)|I see your monsters. I see your pain. Tell me your problems; I'll chase them away. I'll be your lighthouse. I'll make it okay. When I see your monsters, I'll stand there so brave and chase them all away.|
          |英文歌曲[lyrics_en_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_en_3.wav)|An empty street, an empty house, a hole inside my heart. I'm all alone and the rooms are getting smaller. I wonder how, I wonder why, I wonder where they are. The days we had, the songs we sang together.|
          |英文[noise_en.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/noise_en.wav)|So what's interesting here is I feel that you know brands knowing this when people sort of speak to the voice assistance at home and if you want to be the brand.|
          |[far_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/far_2.wav)|然后被冠以了渣男线的称号，好了，不管这个，那么前方即将到达沈杜公路站，左边是8号线。|
          |[far_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/far_3.wav)|周末要不要去露营，最近天气超舒服，露营？我怕虫子咬，而且晚上睡帐篷会不会很冷啊？放心，我借了专业装备还有暖宝宝，再带点火锅食材，边吃边看星星超惬意。|
          |[far_4.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/far_4.wav)|<music>唯一的遗憾就是他那个八宝鸭还有烤鸭都没吃上 估计得提前预定吧 <impact_sounds></impact_sounds>只能怪我自己没有做好功课</music>|
          |[far_5.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/far_5.wav)|别紧张<breathing></breathing>我只是我是在这边逛街 然后看到你们在这边拍照 想跟你交个朋友<impact_sounds> 认识</impact_sounds>一下|
          |日语[ja.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/ja.wav)|人民たちは、金欲しさに王をのけ者にしてしまって、何でもすべて商人のところへ持って行ってしまいました。|
          |日英混合[ja_en_codeswitch.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/ja_en_codeswitch.wav)|このカフェのwi-fiがアン ステーブル 過ぎて、google meetでディスコネクトされて クライエントに悪い印象を与えてしまった。|
          |越南语[vietnamese.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/vietnamese.wav)|Đi cùng với tiếp tục kêu gọi người dân đã qua lại các ổ dịch này, khai báo y tế và yêu cầu liên hệ để được xét nghiệm.|
          |[rag_biochemistry.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/rag_biochemistry.wav)|利用三磷酸腺苷的水解所产生的能量来驱动其他化学反应|
          |[rag_chemistry.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/rag_chemistry.wav)|比如说酯在当时被认为是一种含氧酸盐|
          |[rag_history.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/rag_history.wav)|由罗马皇帝钦点的犹地亚王大希律王统治期间|
          |[rag_math.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/rag_math.wav)|对微分形式的积分是微分几何中的基本概念|
          |[rag_medical.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/rag_medical.wav)|肾脏中肾小球囊上的细胞膜孔隙很小|
          |[rag_physics.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-2025-12-30/resolve/main/test_wavs/rag_physics.wav)|根据碰撞理论月面样本缺少挥发性物质|
          EOF
          cd ..

          cat >README.md <<EOF

          # Introduction
          Models in this directory are downloaded from
          https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/files

          Export script can be found at
          https://github.com/Wasser1462/FunASR-nano-onnx

          The author is https://github.com/Wasser1462
          EOF

          popd
          ls -lh $d
          tar cjvf $d.tar.bz2 $d
          ls -lh *.tar.bz2

      - name: Collect funasr-nano with LLM fp16
        if: false
        shell: bash
        run: |
          d=sherpa-onnx-funasr-nano-fp16-2025-12-30
          mkdir $d
          pushd $d

          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/llm_fp16/llm.fp16.onnx
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/embedding.int8.onnx
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/encoder_adaptor.int8.onnx

          mkdir Qwen3-0.6B
          cd Qwen3-0.6B
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/merges.txt
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/tokenizer.json
          curl -SL -O https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/resolve/master/Qwen3-0.6B/vocab.json

          ls -lh
          cd ..
          mkdir test_wavs
          cd test_wavs

          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_hunan.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_minnan.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_sh.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/dia_yue.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_3.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_4.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/far_5.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/ja.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/ja_en_codeswitch.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_3.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/lyrics_en_2.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/noise_en.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_biochemistry.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_chemistry.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_history.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_math.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_medical.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/rag_physics.wav
          curl -SL -O https://github.com/FunAudioLLM/FunAudioLLM.github.io/raw/refs/heads/master/funasr/static/audios/yuenan.wav

          mv yuenan.wav vietnamese.wav

          for f in *.wav; do
            ffmpeg -y -loglevel error -i "$f" \
              -ac 1 -ar 16000 -sample_fmt s16 \
              "${f}.tmp.wav" \
            && mv "${f}.tmp.wav" "$f"
          done

          curl -SL -O https://modelscope.cn/models/csukuangfj/sherpa-doc-files/resolve/master/source/_static/fun-asr-nano-2025-12-30/lyrics_en_1.wav
          curl -SL -O https://modelscope.cn/models/csukuangfj/sherpa-doc-files/resolve/master/source/_static/fun-asr-nano-2025-12-30/lyrics_en_3.wav

          cat >README.md <<EOF
          Audio files in this directory are downloaded from
          https://github.com/FunAudioLLM/FunAudioLLM.github.io/tree/master/funasr/static/audios

          | Filename| Trascript|
          |---------|----------|
          |湖南方言[dia_hunan.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/dia_hunan.wav)|但总来讲孙膑对兵法的理解运用比庞涓略胜一筹。|
          |闽南语[dia_minnan.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/dia_minnan.wav)|嗯，下摆若有机会吧，因为即久吼开了吼卷啊遮厉害，会倒贴钱啊。|
          |上海话[dia_sh.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/dia_sh.wav)|人跟狗，包括人跟动物接触长了，全有感情。葛末随了阿拉社会个富裕。|
          |粤语[dia_yue.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/dia_yue.wav)|啲身体好劲啊，跟住咧佢哋有一个人咧就突然可能就有高原反应啦，突然间就啊窒息咗，即系晕晕咗。|
          |中文歌曲[lyrics.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/lyrics.wav)|我看到我的身后盯着我的人群，喜欢或恨不一样的神情，我知道这可能就是所谓的成名，我知道必须往前一步也不能停。|
          |中文歌曲[lyrics_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/lyrics_2.wav)|明明那么远，为何却感觉离他那么近？闭上眼，你甚至能背出他所有押韵。虽然不听说唱了，但你已学会自信。我代表所有中文说唱歌手向你致敬。如今面对困难的你，早已不再抱怨。|
          |中文歌曲[lyrics_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/lyrics_3.wav)|你听啊秋末的落叶，你听它叹息着离别，只剩我独自领略海与山风和月，你听啊。|
          |英文歌曲[lyrics_en_1.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_en_1.wav)|When I was young I'd listen to the radio. Waiting for my favorite songs. When they played I'd sing along. It made me smile.|
          |英文歌曲[lyrics_en_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/lyrics_en_2.wav)|I see your monsters. I see your pain. Tell me your problems; I'll chase them away. I'll be your lighthouse. I'll make it okay. When I see your monsters, I'll stand there so brave and chase them all away.|
          |英文歌曲[lyrics_en_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-int8-2025-12-30/resolve/main/test_wavs/lyrics_en_3.wav)|An empty street, an empty house, a hole inside my heart. I'm all alone and the rooms are getting smaller. I wonder how, I wonder why, I wonder where they are. The days we had, the songs we sang together.|
          |英文[noise_en.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/noise_en.wav)|So what's interesting here is I feel that you know brands knowing this when people sort of speak to the voice assistance at home and if you want to be the brand.|
          |[far_2.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/far_2.wav)|然后被冠以了渣男线的称号，好了，不管这个，那么前方即将到达沈杜公路站，左边是8号线。|
          |[far_3.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/far_3.wav)|周末要不要去露营，最近天气超舒服，露营？我怕虫子咬，而且晚上睡帐篷会不会很冷啊？放心，我借了专业装备还有暖宝宝，再带点火锅食材，边吃边看星星超惬意。|
          |[far_4.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/far_4.wav)|<music>唯一的遗憾就是他那个八宝鸭还有烤鸭都没吃上 估计得提前预定吧 <impact_sounds></impact_sounds>只能怪我自己没有做好功课</music>|
          |[far_5.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/far_5.wav)|别紧张<breathing></breathing>我只是我是在这边逛街 然后看到你们在这边拍照 想跟你交个朋友<impact_sounds> 认识</impact_sounds>一下|
          |日语[ja.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/ja.wav)|人民たちは、金欲しさに王をのけ者にしてしまって、何でもすべて商人のところへ持って行ってしまいました。|
          |日英混合[ja_en_codeswitch.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/ja_en_codeswitch.wav)|このカフェのwi-fiがアン ステーブル 過ぎて、google meetでディスコネクトされて クライエントに悪い印象を与えてしまった。|
          |越南语[vietnamese.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/vietnamese.wav)|Đi cùng với tiếp tục kêu gọi người dân đã qua lại các ổ dịch này, khai báo y tế và yêu cầu liên hệ để được xét nghiệm.|
          |[rag_biochemistry.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/rag_biochemistry.wav)|利用三磷酸腺苷的水解所产生的能量来驱动其他化学反应|
          |[rag_chemistry.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/rag_chemistry.wav)|比如说酯在当时被认为是一种含氧酸盐|
          |[rag_history.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/rag_history.wav)|由罗马皇帝钦点的犹地亚王大希律王统治期间|
          |[rag_math.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/rag_math.wav)|对微分形式的积分是微分几何中的基本概念|
          |[rag_medical.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/rag_medical.wav)|肾脏中肾小球囊上的细胞膜孔隙很小|
          |[rag_physics.wav](https://huggingface.co/csukuangfj/sherpa-onnx-funasr-nano-fp16-2025-12-30/resolve/main/test_wavs/rag_physics.wav)|根据碰撞理论月面样本缺少挥发性物质|
          EOF
          cd ..

          cat >README.md <<EOF

          # Introduction
          Models in this directory are downloaded from
          https://www.modelscope.cn/models/zengshuishui/FunASR-nano-onnx/files

          Export script can be found at
          https://github.com/Wasser1462/FunASR-nano-onnx

          The author is https://github.com/Wasser1462
          EOF

          popd
          ls -lh $d
          tar cjvf $d.tar.bz2 $d
          ls -lh *.tar.bz2

      - name: Streaming zipformer from Banafo/Kroko-ASR
        if: false
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          git lfs install
          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/Banafo/Kroko-ASR src
          pushd src
          curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_encoder.onnx
          curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_decoder.onnx
          curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_joiner.onnx
          curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_tokens.txt
          popd

          for lang in en es fr de; do
            repo=sherpa-onnx-streaming-zipformer-$lang-kroko-2025-08-06
            git clone https://huggingface.co/csukuangfj/$repo
            cp src/${lang}_encoder.onnx $repo/encoder.onnx
            cp src/${lang}_decoder.onnx $repo/decoder.onnx
            cp src/${lang}_joiner.onnx $repo/joiner.onnx
            cp src/${lang}_tokens.txt $repo/tokens.txt

            pushd $repo

            echo "See license at https://huggingface.co/Banafo/Kroko-ASR" > README.md

            mkdir -p test_wavs
            pushd test_wavs
            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$lang.wav
            mv $lang.wav 0.wav
            popd

            git lfs track "*.onnx" "*.wav"
            git status
            ls -lh
            git add .
            git commit -m 'add model files' || true
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$repo main || true

            popd

            rm -rf $repo/.git*

            tar cjfv $repo.tar.bz2 $repo

            ls -lh *.tar.bz2
          done

      - name: FireRed ASR fp16
        if: false
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 hf

          git lfs install
          git clone https://www.modelscope.cn/csukuangfj/sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16.git ms

          d=sherpa-onnx-fire-red-asr-large-zh_en-fp16-2025-02-16
          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d
          mv -v hf/test_wavs $d
          mv -v hf/README.md $d
          mv -v hf/tokens.txt $d
          mv -v ms/*.onnx $d

          pushd $d
          git lfs track "*.onnx"
          git lfs track "*.wav"
          git status
          git add .
          git commit -m "add models"
          ls -lh
          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
          popd

          rm -rf $d/.git
          rm -rf $d/.gitattributes
          tar cjvf $d.tar.bz2 $d

      - name: Zipformer CTC (non-streaming)
        if: false
        shell: bash
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          git lfs install
          names=(
            sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
            sherpa-onnx-zipformer-ctc-zh-2025-07-03
            sherpa-onnx-zipformer-ctc-zh-fp16-2025-07-03
            sherpa-onnx-zipformer-ctc-small-zh-int8-2025-07-16
            sherpa-onnx-zipformer-ctc-small-zh-fp16-2025-07-16
            sherpa-onnx-zipformer-ctc-small-zh-2025-07-16
          )
          for name in ${names[@]}; do
            rm -rf ms
            git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/$name.git ms
            git clone https://huggingface.co/csukuangfj/$name

            cp -av ms/test_wavs $name
            cp -v ms/*.onnx $name
            cp -v ms/tokens.txt $name
            cp -v ms/bbpe.model $name

            pushd $name
            git lfs track "*.wav" "*.onnx" "*.model"
            git add .
            git status
            git commit -m 'add models' || true
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$name main || true

            # git lfs pull
            rm -rf .git
            rm -rfv .gitattributes
            ls -lh
            popd

            tar cjfv $name.tar.bz2 $name
            rm -rf $name
            ls -lh *.tar.bz2
          done

      - name: sense-voice
        if: false
        shell: bash
        run: |
          git lfs install
          d=sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2025-09-09
          f=sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09
          git clone https://huggingface.co/csukuangfj/$d
          git clone https://huggingface.co/csukuangfj/$f

          rm -rf $d/.git
          rm -rf $f/.gi*

          rm -rf $d/.gitattributes
          rm -rf $f/.gitattributes

          tar cjfv $d.tar.bz2 $d
          tar cjfv $f.tar.bz2 $f

          ls -lh *.tar.bz2

      - name: wenetspeech chuan paraformer
        if: false
        shell: bash
        run: |
          git lfs install
          d=sherpa-onnx-paraformer-zh-int8-2025-10-07
          f=sherpa-onnx-paraformer-zh-2025-10-07
          git clone https://huggingface.co/csukuangfj/$d
          git clone https://huggingface.co/csukuangfj/$f

          rm -rf $d/.git
          rm -rf $f/.gi*

          rm -rf $d/.gitattributes
          rm -rf $f/.gitattributes

          tar cjfv $d.tar.bz2 $d
          tar cjfv $f.tar.bz2 $f

          ls -lh *.tar.bz2

      - name: u2ppconformer
        if: false
        shell: bash
        run: |
          git lfs install

          d=sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-2025-09-10
          f=sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10

          git clone https://huggingface.co/csukuangfj/$d
          git clone https://huggingface.co/csukuangfj/$f

          rm -rf $d/.git
          rm -rf $f/.gi*

          rm -rf $d/.gitattributes
          rm -rf $f/.gitattributes

          tar cjfv $d.tar.bz2 $d
          tar cjfv $f.tar.bz2 $f

          ls -lh *.tar.bz2

      - name: Vietnamese (zipformer)
        if: false
        shell: bash
        run: |
          rm -rf models
          mkdir models
          cd models
          cat >README.md <<EOF
          # Introduction
          Models in this directory are from
          https://huggingface.co/zzasdf/viet_iter3_pseudo_label
          which are trained on about 70k hours of data.
          EOF

          git lfs install
          git clone https://huggingface.co/csukuangfj/viet_iter3_pseudo_label hf

          ls -lh

          d=sherpa-onnx-zipformer-vi-2025-04-20
          mkdir -p $d
          cp -v hf/exp/encoder-epoch-12-avg-8.onnx $d/
          cp -v hf/exp/decoder-epoch-12-avg-8.onnx $d/
          cp -v hf/exp/joiner-epoch-12-avg-8.onnx $d/
          cp -v hf/data/Vietnam_bpe_2000_new/bpe.model $d/
          cp -v hf/data/Vietnam_bpe_2000_new/tokens.txt $d/
          cp -av hf/test_wavs $d
          cp -v README.md $d

          tar cjfv $d.tar.bz2 $d

          d=sherpa-onnx-zipformer-vi-int8-2025-04-20
          mkdir -p $d

          cp -v hf/exp/encoder-epoch-12-avg-8.int8.onnx $d/
          cp -v hf/exp/decoder-epoch-12-avg-8.onnx $d/
          cp -v hf/exp/joiner-epoch-12-avg-8.int8.onnx $d/
          cp -v hf/data/Vietnam_bpe_2000_new/bpe.model $d/
          cp -v hf/data/Vietnam_bpe_2000_new/tokens.txt $d/
          cp -av hf/test_wavs $d
          cp -v README.md $d

          tar cjfv $d.tar.bz2 $d

          rm -rf hf

          ls -lh

          cd ..

          mv models/* .

      - name: Publish to huggingface (Vietnamese zipformer)
        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            models=(
              sherpa-onnx-zipformer-vi-2025-04-20
              sherpa-onnx-zipformer-vi-int8-2025-04-20
            )
            for d in ${models[@]}; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
              cp -av $d/* huggingface

              pushd huggingface
              git lfs track "*.onnx"
              git lfs track "bpe.model"
              git lfs track "*.wav"
              git status
              git add .

              git commit -m "add models"
              git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main

              popd
            done

      - name: vosk-model-ru (stream zipformer)
        if: false
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

          cat >README.md <<EOF
          # Introduction
          Models in this directory are from
          https://huggingface.co/alphacep/vosk-model-small-streaming-ru
          EOF

          git lfs install
          git clone https://huggingface.co/alphacep/vosk-model-small-streaming-ru hf

          git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16 int8
          git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16 fp32

          rm -fv int8/*.onnx
          rm -fv fp32/*.onnx

          mkdir -p int8/test_wavs
          mkdir -p fp32/test_wavs

          curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav
          curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav

          mv russian-i-love-you.wav 0.wav
          mv test.wav 1.wav

          cp -v README.md int8/
          cp -v README.md fp32/

          cp -v *.wav int8/test_wavs
          cp -v *.wav fp32/test_wavs

          cp -v hf/am-onnx/{encoder,decoder,joiner}.onnx fp32/

          cp -v hf/am-onnx/{encoder,joiner}.int8.onnx int8/
          cp -v hf/am-onnx/decoder.onnx int8/

          cp -v hf/lang/tokens.txt int8/
          cp -v hf/lang/bpe.model int8/

          cp -v hf/lang/tokens.txt fp32/
          cp -v hf/lang/bpe.model fp32/

          mv int8 sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16
          mv fp32 sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16

          models=(
            sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16
            sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16
          )

          for d in ${models[@]}; do
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            pushd $d
            git lfs track "*.onnx"
            git lfs track "bpe.model"
            git lfs track "*.wav"
            git status
            git add .

            git commit -m "add models"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
            popd

            rm -rf $d/.git*

            tar cjfv $d.tar.bz2 $d
          done
          ls -lh *.tar.bz2

      - name: vosk-model-ru (zipformer)
        if: false
        shell: bash
        run: |
          rm -rf models
          mkdir models
          cd models
          cat >README.md <<EOF
          # Introduction
          Models in this directory are from
          https://huggingface.co/alphacep/vosk-model-ru/tree/main
          EOF

          git lfs install
          git clone https://huggingface.co/alphacep/vosk-model-ru hf

          ls -lh

          mkdir test_wavs
          pushd test_wavs
          curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav
          curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav

          mv russian-i-love-you.wav 0.wav
          mv test.wav 1.wav
          popd

          d=sherpa-onnx-zipformer-ru-2025-04-20
          mkdir $d
          cp -v hf/am-onnx/encoder.onnx $d
          cp -v hf/am-onnx/decoder.onnx $d
          cp -v hf/am-onnx/joiner.onnx $d
          cp -v hf/lang/bpe.model $d
          cp -v hf/lang/tokens.txt $d
          cp -av test_wavs $d/
          cp -v README.md $d

          tar cjfv $d.tar.bz2 $d

          d=sherpa-onnx-zipformer-ru-int8-2025-04-20
          mkdir $d
          cp -v hf/am-onnx/encoder.int8.onnx $d
          cp -v hf/am-onnx/decoder.onnx $d
          cp -v hf/am-onnx/joiner.int8.onnx $d
          cp -v hf/lang/bpe.model $d
          cp -v hf/lang/tokens.txt $d
          cp -av test_wavs $d
          cp -v README.md $d

          tar cjfv $d.tar.bz2 $d

          rm -rf hf

          ls -lh

          cd ..

          mv models/* .

      - name: Publish to huggingface
        if: true
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            models=(
              sherpa-onnx-zipformer-ru-2025-04-20
              sherpa-onnx-zipformer-ru-int8-2025-04-20
              sherpa-onnx-funasr-nano-int8-2025-12-30
              sherpa-onnx-funasr-nano-fp16-2025-12-30
              sherpa-onnx-funasr-nano-2025-12-30
              sherpa-onnx-streaming-zipformer-bn-vosk-2026-02-09
              sherpa-onnx-zipformer-vi-30M-int8-2026-02-09
              sherpa-onnx-zipformer-vi-30M-2026-02-09
              sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25
              sherpa-onnx-fire-red-asr2-ctc-zh_en-2026-02-25
              sherpa-onnx-fire-red-asr2-zh_en-int8-2026-02-26
              sherpa-onnx-fire-red-asr2-zh_en-2026-02-26
            )
            for d in ${models[@]}; do
              if [ ! -d $d ]; then
                continue;
              fi

              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false
              rm -rf huggingface
              git clone https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d huggingface

              rm -rf huggingface/*.onnx
              rm -rf huggingface/*/*.wav

              cp -av $d/* huggingface

              pushd huggingface
              git lfs track "*.onnx"
              git lfs track "*.data"
              git lfs track "*.weights"
              git lfs track "bpe.model"
              git lfs track "*.wav"
              git lfs track "*.json"
              git status
              git add .

              git commit -m "add models"
              git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/$d main

              popd
            done

            rm -rf huggingface

      - name: Publish to modelscope
        if: true
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            for m in *.tar.bz2; do
              export GIT_LFS_SKIP_SMUDGE=1
              export GIT_CLONE_PROTECTION_ACTIVE=false

              rm -rf ms
              git clone https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git ms

              cp -av $m ms/

              pushd ms
              git lfs track "*.tar.bz2"
              git status
              ls -lh
              git add .

              git commit -m "add models"
              git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/csukuangfj/asr-models.git

              popd
            done

      - name: Release
        if: true
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models

      - name: Release
        if: false
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.onnx
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: speech-enhancement-models


================================================
FILE: .github/workflows/upload-zipvoice-models.yaml
================================================
name: upload-zipvoice-models

on:
  push:
    branches:
      - upload-zipvoice-onnx-models
  workflow_dispatch:

concurrency:
  group: upload-zipvoice-models-${{ github.ref }}
  cancel-in-progress: true

jobs:
  upload-zipvoice-models:
    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
    name: upload zipvoice models
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

      - name: git config
        shell: bash
        run: |
          git config --global user.email "csukuangfj@gmail.com"
          git config --global user.name "Fangjun Kuang"

      - name: Setup Python 3.10
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip pypinyin

      - name: sherpa-onnx-zipvoice-distill-zh-en-emilia-int8
        shell: bash
        run: |
          echo "Generate lexicon.txt"

          python3 ./scripts/zipvoice/zh-en/generate_lexicon.py

          d=sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
          mkdir $d

          cp lexicon.txt $d

          pushd $d

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/prompt.txt
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/news-female.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/news-female-2.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/leijun-1.wav


          curl -SL -O https://huggingface.co/k2-fsa/ZipVoice/resolve/main/zipvoice_distill/fm_decoder_int8.onnx
          curl -SL -O https://huggingface.co/k2-fsa/ZipVoice/resolve/main/zipvoice_distill/text_encoder_int8.onnx

          mv fm_decoder_int8.onnx decoder.int8.onnx
          mv text_encoder_int8.onnx encoder.int8.onnx

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
          tar xf espeak-ng-data.tar.bz2
          rm espeak-ng-data.tar.bz2

          curl -SL -O https://huggingface.co/k2-fsa/ZipVoice/resolve/main/zipvoice_distill/tokens.txt
          mkdir test_wavs
          mv *.wav test_wavs

          mv prompt.txt test_wavs

          ls -lh
          popd
          tar cjfv $d.tar.bz2 $d
          rm -rf $d
          ls -lh $d.tar.bz2

      - name: sherpa-onnx-zipvoice-distill-zh-en-emilia-fp32
        shell: bash
        run: |
          echo "Generate lexicon.txt"

          python3 ./scripts/zipvoice/zh-en/generate_lexicon.py

          d=sherpa-onnx-zipvoice-distill-fp32-zh-en-emilia
          mkdir $d

          cp lexicon.txt $d

          pushd $d

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/prompt.txt
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/news-female.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/news-female-2.wav
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/leijun-1.wav


          curl -SL -O https://huggingface.co/k2-fsa/ZipVoice/resolve/main/zipvoice_distill/fm_decoder.onnx
          curl -SL -O https://huggingface.co/k2-fsa/ZipVoice/resolve/main/zipvoice_distill/text_encoder.onnx

          mv fm_decoder.onnx decoder.onnx
          mv text_encoder.onnx encoder.onnx

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
          tar xf espeak-ng-data.tar.bz2
          rm espeak-ng-data.tar.bz2

          curl -SL -O https://huggingface.co/k2-fsa/ZipVoice/resolve/main/zipvoice_distill/tokens.txt
          mkdir test_wavs
          mv *.wav test_wavs

          mv prompt.txt test_wavs

          ls -lh
          popd
          tar cjfv $d.tar.bz2 $d
          rm -rf $d
          ls -lh $d.tar.bz2

      - name: Release
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models


================================================
FILE: .github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
================================================
name: wasm-simd-hf-space-en-asr-zipformer

on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-en-asr-zipformer-${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-en-asr-zipformer:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Download model files
        shell: bash
        run: |
          cd wasm/asr/assets
          ls -lh
          echo "----------"

          wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-2023-06-21.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-en-2023-06-21.tar.bz2
          rm sherpa-onnx-streaming-zipformer-en-2023-06-21.tar.bz2
          mv sherpa-onnx-streaming-zipformer-en-2023-06-21/encoder-epoch-99-avg-1.int8.onnx encoder.onnx
          mv sherpa-onnx-streaming-zipformer-en-2023-06-21/decoder-epoch-99-avg-1.onnx decoder.onnx
          mv sherpa-onnx-streaming-zipformer-en-2023-06-21/joiner-epoch-99-avg-1.onnx joiner.onnx
          mv sherpa-onnx-streaming-zipformer-en-2023-06-21/tokens.txt ./

          rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-21

          ls -lh

      - name: Build sherpa-onnx for WebAssembly (ASR)
        shell: bash
        run: |
          ./build-wasm-simd-asr.sh

      - name: collect files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en-asr-zipformer
          mv build-wasm-simd-asr/install/bin/wasm/asr $dst
          ls -lh $dst
          tar cjfv ${dst}.tar.bz2 ./${dst}

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-en-asr-zipformer
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2

      - name: Publish to ModelScope
        # if: false
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf ms
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en.git ms
            cd ms
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en.git

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en huggingface
            cd huggingface
            rm -rf ./*
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en main


================================================
FILE: .github/workflows/wasm-simd-hf-space-silero-vad.yaml
================================================
name: wasm-simd-hf-space-silero-vad

on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-silero-vad-${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-silero-vad:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Download model files
        shell: bash
        run: |
          cd wasm/vad/assets
          ls -lh
          echo "----------"
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
          ls -lh

      - name: Build sherpa-onnx for WebAssembly
        shell: bash
        run: |
          ./build-wasm-simd-vad.sh

      - name: collect files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad
          mv build-wasm-simd-vad/install/bin/wasm/vad $dst
          ls -lh $dst
          tar cjfv $dst.tar.bz2 ./$dst

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-vad
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2

      - name: Publish to ModelScope
        # if: false
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf ms
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx.git ms
            cd ms
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx.git

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx huggingface
            cd huggingface
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-vad/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx main


================================================
FILE: .github/workflows/wasm-simd-hf-space-speaker-diarization.yaml
================================================
name: wasm-simd-hf-space-speaker-diarization

on:
  push:
    branches:
      - wasm
      - wasm-speaker-diarization
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-speaker-diarization-${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-speaker-diarization:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Download model files
        shell: bash
        run: |
          cd wasm/speaker-diarization/assets/
          ls -lh
          echo "----------"

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
          tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
          rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
          mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx ./segmentation.onnx
          rm -rf sherpa-onnx-pyannote-segmentation-3-0

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
          mv 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ./embedding.onnx

          echo "----------"

          ls -lh

      - name: Build sherpa-onnx for WebAssembly
        shell: bash
        run: |
          ./build-wasm-simd-speaker-diarization.sh

      - name: collect files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-speaker-diarization
          mv build-wasm-simd-speaker-diarization/install/bin/wasm/speaker-diarization $dst
          ls -lh $dst
          tar cjfv $dst.tar.bz2 ./$dst

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-speaker-diarization
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2

      - name: Publish to ModelScope
        # if: false
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf ms
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx.git ms
            cd ms
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx.git

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx huggingface
            ls -lh

            cd huggingface
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx main


================================================
FILE: .github/workflows/wasm-simd-hf-space-speech-enhancement-gtcrn.yaml
================================================
name: wasm-simd-hf-space-speech-enhancement-gtcrn

on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-speech-enhancement-gtcrn-${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-speech-enhancement-gtcrn:
    name: wasm gtcrn
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Download model
        shell: bash
        run: |
          cd wasm/speech-enhancement/assets
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
          mv gtcrn_simple.onnx gtcrn.onnx

      - name: build
        shell: bash
        run: |
          ./build-wasm-simd-speech-enhancement.sh

      - name: collect files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          d=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-speech-enhancement-gtcrn
          mv build-wasm-simd-speech-enhancement/install/bin/wasm/speech-enhancement $d
          ls -lh $d
          tar cjfv $d.tar.bz2 $d

          echo "---"

          ls -lh *.tar.bz2

      - uses: actions/upload-artifact@v4
        with:
          name: wasm-speech-enhancement-gtcrn
          path: ./*.tar.bz2

      - name: Release
        # if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.10.46

      - name: Release
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2

      - name: Publish to ModelScope
        # if: false
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf ms
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone http://www.modelscope.cn/studios/csukuangfj/wasm-speech-enhancement-gtcrn.git ms

            cd ms
            rm -fv *.js
            rm -fv *.data

            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push http://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/wasm-speech-enhancement-gtcrn.git

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://huggingface.co/spaces/k2-fsa/wasm-speech-enhancement-gtcrn huggingface
            cd huggingface
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/wasm-speech-enhancement-gtcrn main


================================================
FILE: .github/workflows/wasm-simd-hf-space-ten-vad.yaml
================================================
name: wasm-simd-hf-space-ten-vad

on:
  push:
    branches:
      - wasm
      - wasm-ten-vad
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-ten-vad-${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-ten-vad:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Download model files
        shell: bash
        run: |
          cd wasm/vad/assets
          ls -lh
          echo "----------"
          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
          ls -lh
          cd ..
          sed -i.bak "s|.*(with <a .*|    (with <a href="https://github.com/TEN-framework/ten-vad">ten-vad</a>)|" ./index.html
          git diff .

      - name: Build sherpa-onnx for WebAssembly
        shell: bash
        run: |
          ./build-wasm-simd-vad.sh

      - name: collect files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-ten-vad
          mv build-wasm-simd-vad/install/bin/wasm/vad $dst
          ls -lh $dst
          tar cjfv $dst.tar.bz2 ./$dst

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-ten-vad
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2

      - name: Publish to ModelScope
        # if: false
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf ms
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://www.modelscope.cn/studios/csukuangfj/web-assembly-ten-vad-sherpa-onnx.git ms
            cd ms
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-ten-vad/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/csukuangfj/web-assembly-ten-vad-sherpa-onnx.git

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://huggingface.co/spaces/k2-fsa/web-assembly-ten-vad-sherpa-onnx huggingface
            cd huggingface
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-ten-vad/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-ten-vad-sherpa-onnx main


================================================
FILE: .github/workflows/wasm-simd-hf-space-tts.yaml
================================================
name: wasm-simd-hf-space-tts

on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-tts${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-tts:
    name: ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["7"]
        index: ["0", "1", "2", "3", "4", "5", "6"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/wasm

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-tts.py --total $total --index $index

          chmod +x run-tts.sh
          mv -v ./run-tts.sh ../..

      - name: Show build scripts
        shell: bash
        run: |
          cat ./run-tts.sh

      - uses: actions/upload-artifact@v4
        with:
          name: run-tts-${{ matrix.index }}
          path: ./run-tts.sh

      - name: Build sherpa-onnx for WebAssembly
        shell: bash
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          ./run-tts.sh

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.19

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-tts-${{ matrix.index }}
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2


================================================
FILE: .github/workflows/wasm-simd-hf-space-vad-asr.yaml
================================================
name: wasm-simd-hf-space-vad-asr

on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-vad-asr${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-vad-asr:
    name: ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["15"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Generate build script
        shell: bash
        run: |
          cd scripts/wasm

          total=${{ matrix.total }}
          index=${{ matrix.index }}

          ./generate-vad-asr.py --total $total --index $index

          chmod +x run-vad-asr.sh
          mv -v ./run-vad-asr.sh ../..

      - name: Show build scripts
        shell: bash
        run: |
          cat ./run-vad-asr.sh

      - uses: actions/upload-artifact@v4
        with:
          name: run-vad-asr-${{ matrix.index }}
          path: ./run-vad-asr.sh

      - name: Build sherpa-onnx for WebAssembly
        shell: bash
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          ./run-vad-asr.sh

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.10.23

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-vad-asr-${{ matrix.index }}
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2


================================================
FILE: .github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
================================================
name: wasm-simd-hf-space-zh-cantonese-en-asr-paraformer

on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-zh-cantonese-en-asr-paraformer-${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-zh-cantonese-en-asr-paraformer:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Download model files
        shell: bash
        run: |
          cd wasm/asr/assets
          ls -lh
          echo "----------"

          wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2
          tar xvf sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2
          rm sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2

          mv sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx encoder.onnx
          mv sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx decoder.onnx
          mv sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt ./

          rm -rf sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en

          ls -lh

          cd ../

          sed -i.bak s/"type = 0"/"type = 1"/g ./sherpa-onnx-asr.js
          sed -i.bak s/Zipformer/Paraformer/g ./index.html

          git diff

      - name: Build sherpa-onnx for WebAssembly (ASR)
        shell: bash
        run: |
          ./build-wasm-simd-asr.sh

      - name: collect files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-zh-cantonese-en-asr-paraformer
          mv build-wasm-simd-asr/install/bin/wasm/asr $dst
          ls -lh $dst
          tar cjfv ${dst}.tar.bz2 ./${dst}

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-zh-cantonese-en-asr-paraformer
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer huggingface
            cd huggingface
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer main

      - name: Publish to ModelScope
        # if: false
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 10
          timeout_seconds: 600
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf ms
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer.git ms
            cd ms
            rm -fv *.js
            rm -fv *.data
            git config lfs.locksverify true
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer.git


================================================
FILE: .github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
================================================
name: wasm-simd-hf-space-zh-en-asr-paraformer

on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-zh-en-asr-paraformer-${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-zh-en-asr-paraformer:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Download model files
        shell: bash
        run: |
          cd wasm/asr/assets
          ls -lh
          echo "----------"

          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
          tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
          rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

          mv sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx encoder.onnx
          mv sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx decoder.onnx
          mv sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ./

          rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

          ls -lh

          cd ../

          sed -i.bak s/"type = 0"/"type = 1"/g ./sherpa-onnx-asr.js
          sed -i.bak s/Zipformer/Paraformer/g ./index.html

          git diff

      - name: Build sherpa-onnx for WebAssembly (ASR)
        shell: bash
        run: |
          ./build-wasm-simd-asr.sh

      - name: collect files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-zh-en-asr-paraformer
          mv build-wasm-simd-asr/install/bin/wasm/asr $dst
          ls -lh $dst
          tar cjfv ${dst}.tar.bz2 ./${dst}

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-zh-en-asr-paraformer
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2

      - name: Publish to ModelScope
        # if: false
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf ms
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer.git ms
            cd ms
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer.git

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer huggingface
            cd huggingface
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer main


================================================
FILE: .github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
================================================
name: wasm-simd-hf-space-zh-en-asr-zipformer

on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: wasm-simd-hf-space-zh-en-asr-zipformer-${{ github.ref }}
  cancel-in-progress: true

jobs:
  wasm-simd-hf-space-zh-en-asr-zipformer:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.53
          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check

      - name: Download model files
        shell: bash
        run: |
          cd wasm/asr/assets
          ls -lh
          echo "----------"
          wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
          mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx encoder.onnx
          mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx decoder.onnx
          mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx joiner.onnx
          mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ./
          rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/

          ls -lh

      - name: Build sherpa-onnx for WebAssembly (ASR)
        shell: bash
        run: |
          ./build-wasm-simd-asr.sh

      - name: collect files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-zh-en-asr-zipformer
          mv build-wasm-simd-asr/install/bin/wasm/asr $dst
          ls -lh $dst
          tar cjfv ${dst}.tar.bz2 ./${dst}

      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-zh-en-asr-zipformer
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2

      - name: Release
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2

      - name: Publish to ModelScope
        # if: false
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf ms
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en.git ms
            cd ms
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en.git

      - name: Publish to huggingface
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false

            git clone https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en huggingface
            cd huggingface
            rm -fv *.js
            rm -fv *.data
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main

            cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* .

            git status
            git lfs track "*.data"
            git lfs track "*.wasm"
            ls -lh

            git add .
            git commit -m "update model"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en main


================================================
FILE: .github/workflows/windows-arm64.yaml
================================================
name: windows-arm64

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/windows-arm64.yaml'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'

  workflow_dispatch:

concurrency:
  group: windows-arm64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  windows_arm64:
    name: shared-${{ matrix.shared_lib }} tts-${{ matrix.with_tts }} static CRT ${{ matrix.use_static_crt }} ${{ matrix.build_type }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        build_type: [Release, Debug, MinSizeRel, RelWithDebInfo]
        shared_lib: [ON, OFF]
        with_tts: [ON, OFF]
        use_static_crt: [ON, OFF]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Set up MSVC
        uses: ilammy/msvc-dev-cmd@v1

      - name: find dumpbin
        shell: bash
        run: |
          which dumpbin

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Configure CMake
        shell: bash
        run: |
          mkdir build
          cd build
          cmake \
            -A ARM64 \
            -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} \
            -DSHERPA_ONNX_USE_STATIC_CRT=${{ matrix.use_static_crt }} \
            -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
            -D SHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} \
            -D CMAKE_INSTALL_PREFIX=./install \
            -D BUILD_ESPEAK_NG_EXE=OFF \
            ..

      - name: Check 1
        shell: bash
        run: |
          cd build

          cat sherpa-onnx/csrc/sherpa-onnx.vcxproj

      - name: Check 2
        shell: cmd
        run: |
          cd build

          findstr /R /C:"<RuntimeLibrary>" sherpa-onnx\csrc\sherpa-onnx.vcxproj

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-vcxproj-release-windows-arm64-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/sherpa-onnx/csrc/sherpa-onnx.vcxproj

      - name: Check 3
        shell: bash
        run: |
          cd build

          cat c-api-examples/vad-whisper-c-api.vcxproj

      - name: Check 4
        shell: cmd
        run: |
          cd build

          findstr /R /C:"<RuntimeLibrary>" "c-api-examples\vad-whisper-c-api.vcxproj"

      - uses: actions/upload-artifact@v4
        with:
          name: vad-whisper-c-api-vcxproj-release-windows-arm64-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/c-api-examples/vad-whisper-c-api.vcxproj

      - name: Build sherpa-onnx for windows
        shell: bash
        run: |
          cd build
          cmake --build . --config ${{ matrix.build_type }} -- -m:2
          cmake --build . --config ${{ matrix.build_type }} --target install -- -m:2

          ls -lh ./bin/${{ matrix.build_type }}/sherpa-onnx.exe

      - name: Show exe
        shell: bash
        run: |
          ls -lh $PWD/build/bin/${{ matrix.build_type }}

      - name: Dump CRT dependencies
        shell: cmd
        run: |
          dumpbin /dependents build\bin\${{ matrix.build_type }}\sherpa-onnx.exe

      - uses: actions/upload-artifact@v4
        with:
          name: release-windows-arm64-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/install/*

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          shared_lib=${{ matrix.shared_lib }}
          use_static_crt=${{ matrix.use_static_crt }}
          if [[ $shared_lib == "ON" ]]; then
            if [[ $use_static_crt == ON ]]; then
              suffix=shared-MT-${{ matrix.build_type }}
            else
              suffix=shared-MD-${{ matrix.build_type }}
            fi
          else
            if [[ $use_static_crt == ON ]]; then
              suffix=static-MT-${{ matrix.build_type }}
            else
              suffix=static-MD-${{ matrix.build_type }}
            fi
          fi

          if [[ ${{ matrix.with_tts }} == ON ]]; then
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-arm64-$suffix
          else
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-arm64-$suffix-no-tts
          fi

          if [[ "${{ matrix.build_type }}" == "Debug" || "${{ matrix.build_type }}" == "RelWithDebInfo" ]]; then
            echo "Copy matching PDB files..."

            build_bin_dir=build/bin/${{ matrix.build_type }}
            install_bin_dir=build/install/bin

            for exe in ${install_bin_dir}/*.exe; do
              base=$(basename "$exe" .exe)
              pdb=${build_bin_dir}/${base}.pdb

              if [[ -f "$pdb" ]]; then
                echo "Copying $pdb"
                cp "$pdb" ${install_bin_dir}/
              else
                echo "No PDB found for $base"
              fi
            done
          fi

          mkdir $dst

          cp -a build/install/bin $dst/
          cp -a build/install/lib $dst/
          cp -a build/install/include $dst/

          ls -lh $dst/bin/
          echo "---"
          ls -lh $dst/lib/

          tar cjvf ${dst}.tar.bz2 $dst

          ls -lh $dst/

          ls -lh *.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=win64/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for Windows arm64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-win-arm64*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.28

      - name: Release pre-compiled binaries and libs for Windows arm64
        if: github.repository_owner == 'k2-fsa'&& github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-win-arm64*.tar.bz2


================================================
FILE: .github/workflows/windows-x64-cuda.yaml
================================================
name: windows-x64-cuda

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/windows-x64-cuda.yaml'
      - '.github/scripts/test-online-transducer.sh'
      - '.github/scripts/test-online-paraformer.sh'
      - '.github/scripts/test-offline-transducer.sh'
      - '.github/scripts/test-offline-ctc.sh'
      - '.github/scripts/test-online-ctc.sh'
      - '.github/scripts/test-offline-tts.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'

  workflow_dispatch:

concurrency:
  group: windows-x64-cuda-${{ github.ref }}
  cancel-in-progress: true

jobs:
  windows_x64_cuda:
    name: Windows x64 CUDA ${{ matrix.onnxruntime_version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        onnxruntime_version: ["1.17.1", "1.23.2"]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Configure CMake
        shell: bash
        run: |
          onnxruntime_version=${{ matrix.onnxruntime_version }}
          curl -SL -O https://github.com/microsoft/onnxruntime/releases/download/v$onnxruntime_version/onnxruntime-win-x64-gpu-$onnxruntime_version.zip
          unzip onnxruntime-win-x64-gpu-$onnxruntime_version.zip

          export SHERPA_ONNXRUNTIME_LIB_DIR=$PWD/onnxruntime-win-x64-gpu-$onnxruntime_version/lib
          export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$PWD/onnxruntime-win-x64-gpu-$onnxruntime_version/include

          mkdir build
          cd build
          cmake \
          -A x64 \
          -D CMAKE_BUILD_TYPE=Release \
          -D BUILD_SHARED_LIBS=ON \
          -D CMAKE_INSTALL_PREFIX=./install \
          -D SHERPA_ONNX_ENABLE_GPU=ON \
          ..

      - name: Build sherpa-onnx for windows
        shell: bash
        run: |
          cd build
          cmake --build . --config Release -- -m:2
          cmake --build . --config Release --target install -- -m:2

          ls -lh ./bin/Release/sherpa-onnx.exe

          onnxruntime_version=${{ matrix.onnxruntime_version }}

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-cuda

          onnxruntime_version=${{ matrix.onnxruntime_version }}
          if [[ $onnxruntime_version == "1.23.2" ]]; then
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-cuda-12.x-cudnn-9.x-win-x64-cuda
          fi

          mkdir $dst

          cp -a build/install/bin $dst/
          cp -a build/install/lib $dst/
          cp -a build/install/include $dst/

          tar cjvf ${dst}.tar.bz2 $dst

      - name: Release pre-compiled binaries and libs for windows x64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*cuda.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.15

      - name: Release pre-compiled binaries and libs for windows x64
        if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*cuda.tar.bz2

      - name: Test spoken language identification
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=sherpa-onnx-offline-language-identification.exe

          .github/scripts/test-spoken-language-identification.sh

      - name: Test online CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=sherpa-onnx.exe

          .github/scripts/test-online-ctc.sh

      - name: Test offline TTS
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=sherpa-onnx-offline-tts.exe

          .github/scripts/test-offline-tts.sh

      - name: Test online paraformer for windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=sherpa-onnx.exe

          .github/scripts/test-online-paraformer.sh

      - name: Test offline Whisper for windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=sherpa-onnx-offline.exe

          .github/scripts/test-offline-whisper.sh

      - name: Test offline CTC for windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=sherpa-onnx-offline.exe

          .github/scripts/test-offline-ctc.sh

      - name: Test offline transducer for Windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=sherpa-onnx-offline.exe

          .github/scripts/test-offline-transducer.sh

      - name: Test online transducer for Windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=sherpa-onnx.exe

          .github/scripts/test-online-transducer.sh

      - name: Test online transducer (C API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
          export EXE=decode-file-c-api.exe

          .github/scripts/test-online-transducer.sh


================================================
FILE: .github/workflows/windows-x64-jni.yaml
================================================
name: windows-x64-jni

on:
  push:
    branches:
      - jni
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'

  workflow_dispatch:

concurrency:
  group: windows-x64-jni-${{ github.ref }}
  cancel-in-progress: true

jobs:
  windows_x64_jni:
    name: windows x64 jni
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - uses: actions/setup-java@v4
        with:
          distribution: 'temurin' # See 'Supported distributions' for available options
          java-version: '21'

      - name: Configure CMake
        shell: bash
        run: |
          mkdir build
          cd build
          cmake \
            -A x64 \
            -DBUILD_SHARED_LIBS=ON \
            -D SHERPA_ONNX_ENABLE_JNI=ON \
            -DCMAKE_INSTALL_PREFIX=./install \
            -DCMAKE_BUILD_TYPE=Release \
            -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
            -DBUILD_ESPEAK_NG_EXE=OFF \
            -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF  \
            -DSHERPA_ONNX_ENABLE_BINARY=OFF \
            -DSHERPA_ONNX_ENABLE_C_API=OFF \
            ..

      - name: Build sherpa-onnx for windows
        shell: bash
        run: |
          cd build
          cmake --build . --config Release -- -m:2
          cmake --build . --config Release --target install -- -m:2

          rm -rf install/share
          rm -rf install/lib/share
          rm -rf install/lib/pkgconfig
          rm -rf install/lib/sherpa-onnx-c-api.*

      - uses: actions/upload-artifact@v4
        with:
          name: release-jni-windows-x64
          path: build/install/*

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-jni
          mkdir -p $dst

          cp -a build/install/lib $dst/ || true

          tar cjvf ${dst}.tar.bz2 $dst

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=jni/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for Windows x64
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2
          # repo_name: k2-fsa/sherpa-onnx
          # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          # tag: v1.12.18


================================================
FILE: .github/workflows/windows-x64.yaml
================================================
name: windows-x64

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/windows-x64.yaml'
      - '.github/scripts/test-online-transducer.sh'
      - '.github/scripts/test-online-paraformer.sh'
      - '.github/scripts/test-offline-transducer.sh'
      - '.github/scripts/test-offline-ctc.sh'
      - '.github/scripts/test-online-ctc.sh'
      - '.github/scripts/test-offline-tts.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'

  workflow_dispatch:

concurrency:
  group: windows-x64-${{ github.ref }}
  cancel-in-progress: true

jobs:
  windows_x64:
    name: shared-${{ matrix.shared_lib }} tts-${{ matrix.with_tts }} static CRT ${{ matrix.use_static_crt }} ${{ matrix.build_type }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        build_type: [Release, Debug, MinSizeRel, RelWithDebInfo]
        shared_lib: [ON, OFF]
        with_tts: [ON, OFF]
        use_static_crt: [ON, OFF]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set up MSVC
        uses: ilammy/msvc-dev-cmd@v1

      - name: find dumpbin
        shell: bash
        run: |
          which dumpbin

      - name: Configure CMake
        shell: bash
        run: |
          mkdir build

          cmake --version

          cd build
          cmake \
            -A x64 \
            -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} \
            -DSHERPA_ONNX_USE_STATIC_CRT=${{ matrix.use_static_crt }} \
            -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
            -D SHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} \
            -D CMAKE_INSTALL_PREFIX=./install \
            -D BUILD_ESPEAK_NG_EXE=OFF \
            ..

      - name: Check 1
        shell: bash
        run: |
          cd build

          cat sherpa-onnx/csrc/sherpa-onnx.vcxproj

      - name: Check 2
        shell: cmd
        run: |
          cd build

          findstr /R /C:"<RuntimeLibrary>" sherpa-onnx\csrc\sherpa-onnx.vcxproj

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-vcxproj-windows-x64-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/sherpa-onnx/csrc/sherpa-onnx.vcxproj

      - name: Check 3
        shell: bash
        run: |
          cd build

          cat c-api-examples/vad-whisper-c-api.vcxproj

      - name: Check 4
        shell: cmd
        run: |
          cd build

          findstr /R /C:"<RuntimeLibrary>" "c-api-examples\vad-whisper-c-api.vcxproj"

      - uses: actions/upload-artifact@v4
        with:
          name: vad-whisper-c-api-vcxproj-windows-x64-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/c-api-examples/vad-whisper-c-api.vcxproj

      - name: Build sherpa-onnx for windows
        shell: bash
        run: |
          cd build

          cmake --version

          cmake --build . --config ${{ matrix.build_type }} -- -m:2
          cmake --build . --config ${{ matrix.build_type }} --target install -- -m:2

          ls -lh ./bin/${{ matrix.build_type }}/sherpa-onnx.exe

      - name: Show exe
        shell: bash
        run: |
          ls -lh $PWD/build/bin/${{ matrix.build_type }}

      - name: Dump CRT dependencies
        shell: cmd
        run: |
          dumpbin /dependents build\bin\${{ matrix.build_type }}\sherpa-onnx.exe

      - uses: actions/upload-artifact@v4
        with:
          name: windows-x64-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/install/*

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          shared_lib=${{ matrix.shared_lib }}
          use_static_crt=${{ matrix.use_static_crt }}
          if [[ $shared_lib == "ON" ]]; then
            if [[ $use_static_crt == ON ]]; then
              suffix=shared-MT-${{ matrix.build_type }}
            else
              suffix=shared-MD-${{ matrix.build_type }}
            fi
          else
            if [[ $use_static_crt == ON ]]; then
              suffix=static-MT-${{ matrix.build_type }}
            else
              suffix=static-MD-${{ matrix.build_type }}
            fi
          fi

          if [[ ${{ matrix.with_tts }} == ON ]]; then
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-$suffix
          else
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-$suffix-no-tts
          fi

          mkdir $dst

          if [[ "${{ matrix.build_type }}" == "Debug" || "${{ matrix.build_type }}" == "RelWithDebInfo" ]]; then
            echo "Copy matching PDB files..."

            build_bin_dir=build/bin/${{ matrix.build_type }}
            install_bin_dir=build/install/bin

            for exe in ${install_bin_dir}/*.exe; do
              base=$(basename "$exe" .exe)
              pdb=${build_bin_dir}/${base}.pdb

              if [[ -f "$pdb" ]]; then
                echo "Copying $pdb"
                cp "$pdb" ${install_bin_dir}/
              else
                echo "No PDB found for $base"
              fi
            done
          fi

          cp -a build/install/bin $dst/
          cp -a build/install/lib $dst/
          cp -a build/install/include $dst/

          ls -lh $dst/bin/
          echo "---"
          ls -lh $dst/lib/

          tar cjvf ${dst}.tar.bz2 $dst

          ls -lh $dst/

          ls -lh *.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=win64/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for Windows x64
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-win-x64*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.28

      - name: Release pre-compiled binaries and libs for Windows x64
        if: github.repository_owner == 'k2-fsa'&& github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-win-x64*.tar.bz2

      - name: Test offline Moonshine for windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline.exe

          .github/scripts/test-offline-moonshine.sh

      - name: Test C++ API
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export CXX_STREAMING_ZIPFORMER_EXE=streaming-zipformer-cxx-api.exe
          export CXX_WHISPER_EXE=whisper-cxx-api.exe
          export CXX_SENSE_VOICE_EXE=sense-voice-cxx-api.exe

          .github/scripts/test-cxx-api.sh

      - name: Test offline speaker diarization
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline-speaker-diarization.exe

          .github/scripts/test-speaker-diarization.sh

      - name: Test online punctuation
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-online-punctuation.exe

          .github/scripts/test-online-punctuation.sh

      - name: Test offline punctuation
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline-punctuation.exe

          .github/scripts/test-offline-punctuation.sh

      - name: Test C API
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export SLID_EXE=spoken-language-identification-c-api.exe
          export SID_EXE=speaker-identification-c-api.exe
          export AT_EXE=audio-tagging-c-api.exe
          export PUNCT_EXE=add-punctuation-c-api.exe

          .github/scripts/test-c-api.sh

      - name: Test Audio tagging
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline-audio-tagging.exe

          .github/scripts/test-audio-tagging.sh

      - name: Test spoken language identification (C++ API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline-language-identification.exe

          .github/scripts/test-spoken-language-identification.sh

      - name: Test online CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx.exe

          .github/scripts/test-online-ctc.sh

      - name: Test offline TTS
        if: matrix.with_tts == 'ON'
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline-tts.exe

          .github/scripts/test-offline-tts.sh

      - name: Test online paraformer for windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx.exe

          .github/scripts/test-online-paraformer.sh

      - name: Test offline Whisper for windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline.exe

          .github/scripts/test-offline-whisper.sh

      - name: Test offline CTC for windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline.exe

          .github/scripts/test-offline-ctc.sh

      - name: Test offline transducer for Windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx-offline.exe

          .github/scripts/test-offline-transducer.sh

      - name: Test online transducer for Windows x64
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx.exe

          .github/scripts/test-online-transducer.sh

      - name: Test online transducer (C API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=decode-file-c-api.exe

          .github/scripts/test-online-transducer.sh


================================================
FILE: .github/workflows/windows-x86.yaml
================================================
name: windows-x86

on:
  push:
    branches:
      - master
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
    paths:
      - '.github/workflows/windows-x86.yaml'
      - '.github/scripts/test-online-transducer.sh'
      - '.github/scripts/test-online-paraformer.sh'
      - '.github/scripts/test-offline-transducer.sh'
      - '.github/scripts/test-offline-ctc.sh'
      - '.github/scripts/test-offline-tts.sh'
      - '.github/scripts/test-online-ctc.sh'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'

  workflow_dispatch:

concurrency:
  group: windows-x86-${{ github.ref }}
  cancel-in-progress: true

jobs:
  windows_x86:
    name: shared-${{ matrix.shared_lib }} tts-${{ matrix.with_tts }} static CRT ${{ matrix.use_static_crt }} ${{ matrix.build_type }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [windows-2022]
        build_type: [Release, Debug, MinSizeRel, RelWithDebInfo]
        shared_lib: [OFF, ON]
        with_tts: [ON, OFF]
        use_static_crt: [ON, OFF]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Update version
        shell: bash
        run: |
          ./new-release.sh
          git diff .

      - name: Set up MSVC
        uses: ilammy/msvc-dev-cmd@v1

      - name: find dumpbin
        shell: bash
        run: |
          which dumpbin

      - name: Configure CMake
        shell: bash
        run: |
          mkdir build
          cd build
          cmake \
            -A Win32 \
            -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} \
            -DSHERPA_ONNX_USE_STATIC_CRT=${{ matrix.use_static_crt }} \
            -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
            -D SHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
            -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} \
            -D CMAKE_INSTALL_PREFIX=./install \
            -D BUILD_ESPEAK_NG_EXE=OFF \
            ..

      - name: Check 1
        shell: bash
        run: |
          cd build

          cat sherpa-onnx/csrc/sherpa-onnx.vcxproj

      - name: Check 2
        shell: cmd
        run: |
          cd build

          findstr /R /C:"<RuntimeLibrary>" sherpa-onnx\csrc\sherpa-onnx.vcxproj

      - uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-vcxproj-release-windows-x86-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/sherpa-onnx/csrc/sherpa-onnx.vcxproj

      - name: Check 3
        shell: bash
        run: |
          cd build

          cat c-api-examples/vad-whisper-c-api.vcxproj

      - name: Check 4
        shell: cmd
        run: |
          cd build

          findstr /R /C:"<RuntimeLibrary>" "c-api-examples\vad-whisper-c-api.vcxproj"

      - uses: actions/upload-artifact@v4
        with:
          name: vad-whisper-c-api-vcxproj-release-windows-x86-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/c-api-examples/vad-whisper-c-api.vcxproj

      - name: Build sherpa-onnx for windows
        shell: bash
        run: |
          cd build
          cmake --build . --config ${{ matrix.build_type }} -- -m:2
          cmake --build . --config ${{ matrix.build_type }} --target install -- -m:2

          ls -lh ./bin/${{ matrix.build_type }}/sherpa-onnx.exe

      - name: Show exe
        shell: bash
        run: |
          ls -lh $PWD/build/bin/${{ matrix.build_type }}

      - name: Dump CRT dependencies
        shell: cmd
        run: |
          dumpbin /dependents build\bin\${{ matrix.build_type }}\sherpa-onnx.exe

      - uses: actions/upload-artifact@v4
        with:
          name: release-windows-x86-${{ matrix.shared_lib }}-${{ matrix.with_tts }}-static-crt-${{ matrix.use_static_crt }}-${{ matrix.build_type }}
          path: build/install/*

      - name: Copy files
        shell: bash
        run: |
          SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

          shared_lib=${{ matrix.shared_lib }}
          use_static_crt=${{ matrix.use_static_crt }}
          if [[ $shared_lib == "ON" ]]; then
            if [[ $use_static_crt == ON ]]; then
              suffix=shared-MT-${{ matrix.build_type }}
            else
              suffix=shared-MD-${{ matrix.build_type }}
            fi
          else
            if [[ $use_static_crt == ON ]]; then
              suffix=static-MT-${{ matrix.build_type }}
            else
              suffix=static-MD-${{ matrix.build_type }}
            fi
          fi

          if [[ ${{ matrix.with_tts }} == ON ]]; then
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-$suffix
          else
            dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-$suffix-no-tts
          fi

          if [[ "${{ matrix.build_type }}" == "Debug" || "${{ matrix.build_type }}" == "RelWithDebInfo" ]]; then
            echo "Copy matching PDB files..."

            build_bin_dir=build/bin/${{ matrix.build_type }}
            install_bin_dir=build/install/bin

            for exe in ${install_bin_dir}/*.exe; do
              base=$(basename "$exe" .exe)
              pdb=${build_bin_dir}/${base}.pdb

              if [[ -f "$pdb" ]]; then
                echo "Copying $pdb"
                cp "$pdb" ${install_bin_dir}/
              else
                echo "No PDB found for $base"
              fi
            done
          fi

          mkdir $dst

          cp -a build/install/bin $dst/
          cp -a build/install/lib $dst/
          cp -a build/install/include $dst/

          ls -lh $dst/bin/
          echo "---"
          ls -lh $dst/lib/

          tar cjvf ${dst}.tar.bz2 $dst

          ls -lh $dst/

          ls -lh *.tar.bz2

      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

            rm -rf huggingface
            export GIT_CLONE_PROTECTION_ACTIVE=false
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj2/sherpa-onnx-libs huggingface

            cd huggingface
            dst=win64/$SHERPA_ONNX_VERSION
            mkdir -p $dst

            cp -v ../sherpa-onnx-*.tar.bz2 $dst

            git status
            git lfs track "*.bz2"

            git add .

            git commit -m "upload sherpa-onnx-${SHERPA_ONNX_VERSION}"

            git push https://csukuangfj2:$HF_TOKEN@huggingface.co/csukuangfj2/sherpa-onnx-libs main

      - name: Release pre-compiled binaries and libs for Windows x86
        if: github.repository_owner == 'csukuangfj' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-win-x86*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: v1.12.28

      - name: Release pre-compiled binaries and libs for Windows x86
        if: github.repository_owner == 'k2-fsa'&& github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*-win-x86*.tar.bz2

      - name: Test online CTC
        shell: bash
        run: |
          export PATH=$PWD/build/bin/${{ matrix.build_type }}:$PATH
          export EXE=sherpa-onnx.exe

          .github/scripts/test-online-ctc.sh


================================================
FILE: .gitignore
================================================
build
*.zip
*.tgz
*.sw?
onnxruntime-*
icefall-*
run.sh
__pycache__
dist/
sherpa_onnx.egg-info/
.DS_Store
build-aarch64-linux-gnu
build-arm-linux-gnueabihf
sherpa-onnx-streaming-zipformer-*
sherpa-onnx-lstm-en-*
sherpa-onnx-lstm-zh-*
build-android-arm64-v8a/
build-android-armv7-eabi/
build-android-x86-64/
a.txt
run-bilingual*.sh
run-*-zipformer.sh
run-zh.sh
decode-file-c-api
offline-tts-c-api
run-decode-file-c-api.sh
sherpa-onnx-ffmpeg
build-ios
build-swift-macos
aa.sh
client-2.sh
ffmpeg-examples/run-3.sh
python-api-examples/decode-file-multiple-bak-2.py
run-en-zipformer-microphone*
run-websocket-server*
decode-file
*.dylib
tokens.txt
*.onnx
log.txt
tags
run-decode-file-python.sh
android/SherpaOnnx/app/src/main/assets/
*.ncnn.*
run-sherpa-onnx-offline.sh
sherpa-onnx-conformer-en-2023-03-18
paraformer-onnxruntime-python-example
run-sherpa-onnx-offline-paraformer.sh
run-sherpa-onnx-offline-transducer.sh
sherpa-onnx-paraformer-zh-2023-03-28
sherpa-onnx-paraformer-zh-2023-09-14
run-offline-websocket-server-paraformer.sh
run-*int8.sh
a.sh
run-offline-websocket-client-*.sh
run-sherpa-onnx-*.sh
sherpa-onnx-zipformer-en-2023-03-30
sherpa-onnx-zipformer-en-2023-04-01
run-offline-decode-files.sh
sherpa-onnx-nemo-ctc-en-citrinet-512
sherpa-onnx-streaming-paraformer-bilingual-zh-en
run-offline-decode-files-nemo-ctc.sh
sherpa-onnx-nemo-ctc-*
*.wav
sherpa-onnx-zipformer-*
sherpa-onnx-conformer-*
sherpa-onnx-whisper-*
swift-api-examples/k2fsa-*
run-*.sh
two-pass-*.sh
build-*

## User settings
xcuserdata/

## Xcode 8 and earlier
*.xcscmblueprint
*.xccheckout
vits-vctk
vits-zh-aishell3
jslint.mjs
vits-piper-en_US-amy-low
vits-piper-*-*-*
log
*.exe
vits-piper-*
vits-coqui-*
vits-mms-*
*.tar.bz2
sherpa-onnx-paraformer-trilingual-zh-cantonese-en
sr-data
*xcworkspace/xcuserdata/*

vits-icefall-*
sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
spoken-language-identification-test-wavs
my-release-key*
vits-zh-hf-fanchen-C
sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
*.dll
*.lib
*.tar.gz
*.tar.bz2
*.zip
sherpa-onnx-ced-*
node_modules
package-lock.json
pubspec.lock
sherpa-onnx-nemo-*
sherpa-onnx-vits-*
sherpa-onnx-telespeech-ctc-*
*.fst
.ccache
lib*.a
sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
*.bak
vits-melo-tts-zh_en
*.o
*.ppu
sherpa-onnx-online-punct-en-2024-08-06
*.mp4
*.mp3
sherpa-onnx-pyannote-segmentation-3-0
sherpa-onnx-moonshine-tiny-en-int8
sherpa-onnx-moonshine-base-en-int8
harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE
harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md
matcha-icefall-zh-baker
matcha-icefall-en_US-ljspeech
kokoro-en-v0_19
*.pt
lexicon.txt
us_gold.json
us_silver.json
kokoro-multi-lang-v1_0
sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
cmake-build-debug
cmake-build-release
README-DEV.txt
*.rknn
*.jit
##clion
.idea
sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
dict
*.npz
voices.bin
kitten-nano-en-v0_1-fp16
*.egg-info
*.jar
vocab.json
*.so
sherpa-onnx-streaming-t-one-russian-2025-09-08
sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10
am.mvn
*bpe.model
config.yaml
configuration.json
sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12
sherpa-onnx-qnn-10-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64
sherpa-onnx-paraformer-zh-int8-2025-10-07
sherpa-onnx-qnn-5-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64
sherpa-onnx-qnn-10-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64
build-riscv64-linux-gnu-spacemit/
spacemit-toolchain*
sherpa-onnx-qnn-*
matcha-icefall-*
sherpa-onnx-medasr-ctc-en-int8-2025-12-25
sherpa-onnx-funasr-nano-int8-2025-12-30
*.raw
*-input-list.txt
sherpa-onnx-funasr-nano*2025-12-30
sherpa-onnx-pocket-tts-int8-2026-01-26
sherpa-onnx-pocket-tts-2026-01-26
sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17
sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25
non-streaming-fire-red-asr-ctc-decode-files
sherpa-onnx-moonshine-*-quantized-2026-02-27
sherpa-onnx-supertonic-tts-int8-2026-03-06
token_scores.json
sherpa-onnx-zipvoice-distill-int8-zh-en-emilia
sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile
sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8
doxygen-docs

================================================
FILE: CHANGELOG.md
================================================
## 1.12.31

* Fix building har for OHOS (#3361)
* Refactor MatchaTTS to use the new Generate API (#3362)
* Refactor Kokoro TTS to use the new Generate API (#3363)
* Refactor KittenTTS to use the new Generate API (#3364)
* Refactor VITS to use the new Generate API (#3365)
* Add Rust API examples for TTS (#3366)
* Fix Swift tests (#3367)
* Add Rust API for audio tagging (#3368)
* Add Rust API for speaker embedding extractor and manager (#3369)
* Add Rust API for speaker diarization (#3370)
* Refactor Rust API for speech denoiser (#3371)
* Add Rust API for KWS, offline punctuation and spoken language identification (#3372)
* Add doc for c api and cxx api (#3374)
* Add link to C API doc (#3375)
* Add doc for Rust API (#3376)
* Add doc for Dart API (#3377)
* Add more doc for Rust API (#3378)

## 1.12.30

* Fix typos in the project (#3293)
* Fix WebAssembly JavaScript API (#3294)
* Remove unnecessary SHERPA_ONNX_API from C/C++ APIs (#3295)
* Fix bugs in CXX APIs (#3296)
* Result goes to stdout (#3274)
* Small fix to online recognizer C++ code (#3297)
* Small fixes to JNI wrappers (#3298)
* Add SetOption/GetOption to OnlineStream and OfflineStream (#3307)
* Add SetOption/GetOption C API and export symbols (#3308)
* Add SetOption/GetOption CXX wrapper for OnlineStream and OfflineStream (#3309)
* Migrate Paraformer is_final to use SetOption mechanism (#3310)
* Add SetOption/GetOption Python bindings for OnlineStream and OfflineStream (#3311)
* Add SetOption/GetOption Java, Kotlin, and JNI bindings (#3312)
* Add SetOption/GetOption Go bindings for OnlineStream and OfflineStream (#3313)
* Add SetOption/GetOption C# bindings for OnlineStream and OfflineStream (#3314)
* Add SetOption/GetOption WASM/JavaScript bindings (#3315)
* Fix padding bug in test-onnx-streaming.py (#3318)
* Fix style issues (#3321)
* Add DPDFNet speech denoiser support for offline and streaming (#3276)
* Upload DPDFNet models (#3322)
* Add C API example for online punctuation (#3323)
* Add online speech denoiser for GTCRN and examples (#3324)
* Add Go API example for online punctuation (#3325)
* Release Rust package for offline/online speech denoiser (#3328)
* Refactor Dart API to check for nullptr (#3329)
* Use onnxruntime v1.23.2 for Android (#3330)
* Refactor ZipVoice TTS to support callback (#3332)
* Add C and CXX API examples for ZipVoice (#3333)
* Add Go API examples for ZipVoice (#3334)
* Add Python API examples for ZipVoice TTS (#3335)
* Add WebAssembly example for ZipVoice (#3337)
* Update WebAssembly download progress text to show MB (#3338)
* Add WebAssembly example for PocketTTS (#3340)
* Add JavaScript (WebAssembly) example for ZipVoice TTS (#3341)
* Add JavaScript (node-addon) example for ZipVoice TTS (#3342)
* Add JavaScript playback examples for Pocket and Supertonic TTS (#3343)
* Add Kotlin and Java API for ZipVoice models (#3344)
* Add C# API examples for ZipVoice models (#3345)
* Add Swift API examples for ZipVoice models (#3346)
* Add Dart API examples for ZipVoice models (#3347)
* Add Rust API example for online punctuation (#3348)
* Add fcitx5-vinput to projects using sherpa-onnx (#3350)
* Add Pascal API examples for ZipVoice models (#3351)
* Add Rust API examples for ZipVoice models (#3352)
* Add SetOption/GetOption/HasOption Kotlin bindings (#3354)
* Fix building Python wheels for Windows (#3355)
* Fix OHOS APIs for TTS and ASR (#3356)
* Add HarmonyOS APIs for online punctuation (#3357)
* Add HarmonyOS APIs for offline punctuation (#3359)

## 1.12.29

* Add Supertonic TTS support (#3094)
* Upload supertonic tts models (#3263)
* Add Python API examples for Supertonic TTS (#3264)
* Support dynamic decoder layers in canary model runtime (#3268)
* Add CXX API for Supertonic TTS (#3280)
* Add C# API for Supertonic TTS (#3283)
* Add Go API for Supertonic TTS (#3284)
* Add Rust API for Supertonic TTS (#3285)
* Add Swift API for Supertonic TTS (#3286)
* Add JavaScript API for Supertonic TTS (#3287)
* Add Dart API for Supertonic TTS (#3288)
* Add Java and Kotlin API for Supertonic TTS (#3289)
* Add Pascal API and example for Supertonic TTS (#3290)
* Publish pdb files for Debug build on Windows (#3252)
* Fix memory leak in WebAssembly for TTS (#3259)
* Refactor WebAssembly TTS API (#3260)

## 1.12.28

* Add C++ runtime support for Moonshine v2 (#3232)
* Export Moonshine v2 models to sherpa-onnx (#3234)
* Update Python APIs for Moonshine v2 models (#3235)
* Add Kotlin and Java APIs for Moonshine v2 models (#3237)
* Add C and C++ API for Moonshine v2 models (#3238)
* Add Swift API for Moonshine v2 models (#3240)
* Add JavaScript API (WebAssembly) for Moonshine v2 models (#3241)
* Add JavaScript API (node-addon) for Moonshine v2 models (#3242)
* Add C# API for Moonshine v2 (#3243)
* Add Go API for Moonshine v2 (#3244)
* Add Dart API for Moonshine v2 (#3245)
* Add Rust API for Moonshine v2 (#3247)
* Add Pascal API for Moonshine v2 (#3248)
* Build huggingface spaces for Moonshine v2 with WebAssembly (#3249)

## 1.12.27

* Add Rust API for VAD (#3213)
* Replace deprecated std::istrstream with std::istringstream (#3214)
* Replace deprecated std::wstring_convert with manual UTF-8 codec (#3215)
* Fix CMake warnings: optional feature message level + policy version minimum (#3217)
* Upload FireRedASR2 CTC model (#3220)
* Bump hclust-cpp to 2026-02-25 release and modernize FetchContent (#3216)
* Support FireRedASR CTC models (#3221)
* Update language bindings for FireRedASR CTC models (#3224)

## 1.12.26

* Fix CI (#3192)
* Fix heap-buffer-overflow in ReadWaveImpl when data chunk size is odd (#3195)
* [PocketTTS] Add seed support and voice embedding caching for consiste… (#3189)
* Feat/pocket tts cache config (#3200)
* 3197: enhanced java binding for voice_embedding_cache_capacity (#3201)
* Dart, flutter, go, c-api binding and example (#3202)
* Begin to add Rust API (#3203)
* Add Rust API for streaming speech recognition (#3204)
* Add a real-time speech recognition example with microphone for Rust API. (#3205)
* Add Rust API for offline ASR (#3207)
* feat: Add PocketTTS cache & seed support to Node.js Addon and WASM APIs (#3206)
* Add more examples for offline ASR models with Rust API. (#3209)
* Update C#/Swift/Pascal API for PocketTTS' VoiceEmbeddingCacheCapacity. (#3211)

## 1.12.25

* Fix building without tts (#3168)
* Fix publishing npm packages for Linux aarch64 and wheels for macOS (#3169)
* Export PocketTTS for earlier versions of onnxruntime (#3170)
* Fix building wheels for Python 3.14 (#3182)
* Update Eigen from 3.4.0 to 3.4.1 (#3178)
* fix(flutter): add missing FFI struct fields for OfflineWhisper and FunAsrNano (#3186)
* Fix building wheels for Windows (#3187)

## 1.12.24

* Fix UnicodeDecodeError when accessing tokens in FunASR-nano tokenizer (#3058)
* Use more jobs for building VAD ASR APKs (#3068)
* Add export CGO_ENABLED=1 to all GO examples. (#3069)
* Support BPE tokenizer (#3078)
* Add C++ runtime and Python support PocketTTS for streaming voice cloning on CPU (#3083)
* Refactor addon loading logic and add static import for platform-specific binaries (#3075)
* Update C++ binary for PocketTTS (#3087)
* Add Python API examples for PocketTTS (#3088)
* Limit text length for PocketTTS. (#3089)
* Add CI for PocketTTS. (#3090)
* Fix Python CI (#3091)
* Fix build error (#3096)
* Add Java and Kotlin API for PocketTTS (#3095)
* Refactor JNI to remove casting. (#3103)
* Refactor JNI (#3107)
* Support MD and MT MSVC runtime libraries (CRT) for Windows x64 static build (#3111)
* Fix MSVC CRT for Windows x64 shared build. (#3114)
* Fix MSVC CRT for Windows arm64 (#3117)
* Fix MSVC CRT for Windows x86 (#3118)
* Refactor CI for Windows x64 (#3119)
* Fix CI for Windows x64 (#3123)
* Upload WenetSpeech-Wu u2pp ASR models. (#3125)
* Add TTS generation with GenerationConfig params C API (#3115)
* Refactor TTS C API (#3127)
* Add CXX API for PocketTTS (#3128)
* Add Swift API for PocketTTS (#3129)
* fix(android): Optimize UI updates and remove dead code in MainActivity (#3130)
* Change RPATH for sherpa-onnx.node (#3131)
* Add async js API for tts generate. (#3133)
* fix(android): Initialize models in background coroutine to avoid UI blocking (#3132)
* Add hotword support for FunASR-Nano (#3122)
* Provide async JS API to create TTS. (#3134)
* feat: Add a WebAssembly Text-to-Speech (TTS) demo with UI and worker-based audio generation using sherpa-onnx. (#3120)
* feat: add support for Meta Omnilingual ASR v2 models (#3138)
* Export omnilingualASR v2 (#3140)
* feat: Add ys_log_probs to NeMo transducer greedy search decoder (#3105)
* Add modified beam search and hotwords support for NeMo transducer models (#3077)
* Fix ORT Value default construction for Android build (#3141)
* Whisper timestamps (#2945)
* Add node-addon JavaScript API for PocketTTS (#3139)
* Update lifecycle-runtime-ktx version to 2.5.1 (#3143)
* Enable return value in callback for TTS in Go API. (#3150)
* Refactor Go API for TTS (#3151)
* Export models for CANN 8.1 (#3152)
* Add Go API for PocketTTS (#3153)
* Export models for CANN 8.3 and 8.5 (#3156)
* Add https://huggingface.co/alphacep/vosk-model-small-streaming-bn (#3158)
* Add Pascal API for Pocket TTS (#3157)
* Upload Vietnamese ASR models (#3159)
* Refactor Pascal API (#3160)
* Add C# API for PocketTTS. (#3162)
* Add JavaScript (WebAssembly) API for PocketTTS (#3163)
* Add Dart API for PocketTTS (#3164)
* Add GeneratedAudio ToBuffer() to the GO API (#3136)
* fix: resolve high vulnerability python.lang.security.audit.dangerous-system-call-tainted-env-args.dangerous-system-call-tainted-env-args (#3155)
* Fix various language bindings (#3166)

## 1.12.23

* Node addon api jsdoc (#3005)
* Add JavaScript async api for OfflineRecognizer decodeStream. (#3049)
* Support creating OfflineRecognizer asynchronously in JavaScript. (#3050)
* Fix uploading files to huggingface (#3054)
* Add Dart API for FunASR Nano (#3055)
* Fix uploading APK files (#3056)

## 1.12.22

* Update wav files for FunASR Nano (#3038)
* cmake: fix sha256 for onnxruntime linux x86_64 gpu package (#3042)
* Fix checking funasr nano tokenizer on Windows (#3043)
* Support nemotron-speech-streaming-en-0.6b (#3044)
* Build APK for nemotron-speech-streaming-en-0.6b (#3045)
* Fix building Linux arm wheels (#3047)

## 1.12.21

* Fix publishing NPM packages (#2909)
* Refactor ZipVoice C++ code (#2911)
* Export more zipformer ctc models to qnn (#2921)
* [KWS] Add phone+ppinyin tokenization with lexicon support (for zh-en model) (#2922)
* Export Paraformer ASR models to QNN (#2925)
* Add Transpose for a 2-D matrix. (#2926)
* Optimize computation with Eigen. (#2928)
* Add C++ runtime for Paraformer ASR models with Qualcomm NPU using QNN (#2931)
* Add Android demo for Paraformer ASR with Qualcomm NPU. (#2932)
* Export Google MedASR to sherpa-onnx (#2934)
* Add C++ runtime and Python API for Google MedASR models (#2935)
* Fix creating a view of an Ort::Value tensor. (#2939)
* Add C and CXX API for Google MedASR model (#2946)
* [TTS Engine] Fix engine speed (#2895)
* Add Swift API for Google MedASR model (#2947)
* Add C# API for Google MedASR model (#2949)
* Add Pascal API for Google MedASR model (#2950)
* Add Go API for Google MedAsr model (#2952)
* Add Dart API for Google MedAsr model (#2953)
* Add JavaScript API (WebAssembly) for Google MedAsr model (#2954)
* Add JavaScript API (node-addon) for Google MedAsr model (#2955)
* Add Kotlin and Java API for Google MedAsr model (#2956)
* Add funASR-Nano with LLM support (#2936)
* Fix building for Windows (#2964)
* Fix building for HarmonyOS (#2972)
* [feature] add FunASRNano config into golang api (#2974)
* Update FunAsr-Nano CTC model (#2978)
* [opt] opt free pointer function in Go API (#2975)
* [feature] use jinja2 to generate sherpa-onnx-go lib (#2976)
* Reformat Go API code (#2979)
* Fix building for onnxruntime >= 1.11.0 (#2981)
* Export Whisper to RK NPU (#2983)
* Test Whisper on Ascend NPU using ACL Python API (#2986)
* FunASR-nano: switch to unified KV-cache LLM (#2995)
* Remove filesystem header (#2998)
* Fix(csrc/melotts): Fix V-words pronunciation on MeloTTS_en (#3002)
* Upload FunASR Nano ASR models with LLM (#3003)
* Fix download test wav files (#3004)
* Use onnxruntime 1.23.2 for Windows (#3007)
* Add CI to export Whisper models to Ascend NPU (#3008)
* Add C++ runtime for Whisper with Ascend NPU (#3009)
* Use onnxruntime v1.23.2 for Linux aarch64 (#3016)
* Use onnxruntime v1.23.2 for Linux arm (#3017)
* Start to switch from onnxruntime 1.17.1 to v1.23.2 (#2993)
* Use onnxruntime 1.23.2 for Linux x64 + NVIDIA GPU (#3018)
* Update CI test for FunASR Nano C/C++ API (#3021)
* [feature] add FunASRNano Swift api (#2994)
* swift: add FunASR nano Swift API (#3022)
* Add Go API test for FunASR Nano (#3025)
* Add JavaScript API for FunASR Nano (node-addon) (#3026)
* Add Pascal API for FunASR Nano (#3029)
* Add C# API for FunASR Nano (#3031)
* Add Kotlin and Java API for FunASR Nano models (#3030)
* Fire-Red-ASR: enable ORT I/O binding for encoder/decoder (#3011)
* whisper: improve ORT IO binding execution (#3023)
* Add JavaScript API for FunASR Nano (WebAssembly) (#3027)
* Fix CI test for nodejs (#3033)

## 1.12.20

* Refactor axcl examples. (#2867)
* Update README to include Axera NPU (#2870)
* Add CI for Axera NPU (#2872)
* Refactor sense voice impl (#2873)
* Refactor Paraformer Impl (#2874)
* Remove unused lock file (#2875)
* Load QNN context binary for faster startup (#2877)
* Export models to Ascend 910B4 (#2878)
* Optimize streaming output results when VAD does not detect human voice for a long time (#2876)
* Build APKs for MatchaTTS Chinese+English (#2882)
* Publish WASM spaces for MatchaTTS Chinese+English model (#2885)
* Add script for testing zipvoice onnx models (#2887)
* upload zipvoice onnx models (#2890)
* Remove cppinyin from zipvoice (#2892)
* Fix building errors (#2893)
* Use a shorter name for Zipvoice models. (#2894)
* Export GigaAM v3 to sherpa-onnx (#2901)
* Fix typos in URL (#2905)
* Support Fun-ASR-Nano-2512 (#2906)

## 1.12.19

* Fix building without TTS for C API (#2838)
* [ZipVoice] Fix english tokenization error (#2834)
* Add simulate streaming ASR Python example for Paraformer (#2839)
* Fix building JNI for Windows (#2840)
* Avoid NaN in NeMo speaker embedding models. (#2844)
* Add spacemit ort ep for spacemit riscv cpus (#2837)
* Add token-level confidence scores (ys_probs) for offline transducer models (#2843)
* Fix token log probabilities in offline transducer modified beam search decoder (#2846)
* Support AXERA ax630, ax650, and axcl backends. (#2849)
* Refactor axera npu examples (#2850)
* Fix matcha tts zh-en model (#2851)
* Fix the English part for Matcha TTS. (#2853)
* Refactor text-utils (#2855)
* Fix matcha tts (#2856)
* Add a space between English words for Matcha zh-en TTS (#2858)
* Fix punctuations in matcha zh-en tts (#2859)
* Upload matcha tts zh-en model (#2865)
* Fix the discrepancy with the Silero VAD isSpeech logic (#2863)

## 1.12.18

* Fix building wheels (#2786)
* export omniASR_CTC_1B (#2788)
* Add C++ QNN support for SenseVoice (#2793)
* Export models for CANN toolkit 7.0 (#2795)
* Support hotwords with byte level bpe (#2802)
* Add Android demo with QNN (Qualcomm NPU) for SenseVoice ASR (#2803)
* Export zipformer ctc models to QNN (#2815)
* Add spaces between English words for Homophone replacer. (#2817)
* Add C++ QNN support for Zipformer CTC models. (#2809)
* Limit symbol visibility in the shared libraries (#2822)
* Fix warnings for initializing tts lexicon. (#2823)
* Export zipformer ctc models to Ascend NPU (#2824)
* Refactor scripts for exporting models to Ascend NPU. (#2825)
* Add C++ support for Zipformer CTC on Ascend NPU (#2826)
* Fix segfault when non-wav file is passed to ReadWave (#2821)
* Avoid calling rknn_dup_context(). (#2828)
* Add C++ support for Paraformer with RK NPU (#2829)
* Update README to include NPU support (#2830)
* Support running whisper large v3 with external data weight (#2807)

## 1.12.17

* Fix releasing

## 1.12.16

* Support exporting SenseVoice and Paraformer to Ascend 310P3 NPU. (#2716)
* Demo for no stream vad asr with flutter (#2705)
* Fix crashing in Android KWS demo (#2719)
* Add C++ API with ACL C API for SenseVoice ASR on Ascend NPU (#2728)
* Allow up to 30 seconds ASR for sense-voice on Ascend NPU (#2729)
* Fix compilation error for Ascend NPU (#2731)
* docs: fix Flutter TTS macOS mirror link targets; fix speech-enhancement link typo (#2723)
* Export models for Ascend910B2 (#2740)
* Add C++ runtime for Paraformer on Ascend NPU. (#2741)
* Expose ys probs to JNI, Kotlin and Java API (#2736)
* Add CI for Ascend NPU (#2743)
* Export models for CANN 8.2 (#2745)
* Fix validating model config for Paraformer. (#2749)
* Add cxx API for online punctuation models (#2759)
* Export sense voice to qnn (#2760)
* Export models to Ascend 910B3 (#2761)
* Support MatchaTTS models for Chinese+English. (#2763)
* Fix zipvoice. (#2764)
* Support passing multiple lexicon files for matcha tts models. (#2765)
* Begin to add qnn C API (#2766)
* Add QnnConfig. (#2768)
* Fix missing includes. (#2769)
* Begin to export omnilingual-asr to sherpa-onnx (#2770)
* Add C++ and Python API for Omnilingual ASR models. (#2772)
* Add C API for Omnilingual ASR CTC models (#2773)
* Add CXX API for Omnilingual ASR CTC models (#2774)
* Add C# API for Omnilingual ASR CTC models (#2775)
* Add Swift API for Omnilingual ASR CTC models (#2776)
* Add Go API for Omnilingual ASR CTC models (#2778)
* Add JavaScript (node-addon) API for Omnilingual ASR CTC models (#2780)
* Add Dart API for Omnilingual ASR CTC models (#2779)
* Add JavaScript (WebAssembly) API for Omnilingual ASR CTC models (#2781)
* Add Pascal API for Omnilingual ASR CTC models (#2782)
* Add Kotlin and Java API for Omnilingual ASR CTC models (#2783)

## 1.12.15

* Exposing online punctuation model support in node-addon-api (#2609)
* Fix building wheels (#2619)
* Export one more Piper Arabic TTS model (#2623)
* fix: hot update language for sencevoice (#2627)
* Add C API and Go API for Zipvoice (#2628)
* Add CI tests for Zipvoice Go API (#2630)
* Remove hardcoded dithering value in NeMo transducer recognizer (#2639)
* Reduce verbose output about reading lexicon for TTS (#2648)
* Add Parakeet TDT model for generating subtitles (#2649)
* Add more Piper TTS models (#2651)
* Add CXX API for audio tagging (#2652)
* Add C# API for audio tagging (#2653)
* Support KWS + RKNN. (#2190)
* Support https://github.com/ASLP-lab/WenetSpeech-Chuan (#2656)
* Fix building for android (#2657)
* fix ios build script (#2645)
* Update kaldi-native-fbank (#2659)
* Add missing python class definitions for builds without TTS support (#2660)
* Remove jieba from kokoro and matcha tts. (#2662)
* add flet_sherpa_onnx in readme (#2663)
* Remove cppjieba (#2664)
* Add phrase matcher to merge words into phrases for TTS. (#2668)
* Limit number of tokens per sentence in MatchaTTS. (#2671)
* Update README to include a ROS2 project using sherpa-onnx (#2672)
* Fix building Flutter APPs (#2673)
* Export Paraformer to RKNN (#2689)
* Update README.md add achatbot-go Projects using sherpa-onnx link (#2691)
* Add CI to export Paraformer to RKNN (#2692)
* Support MatchaTTS with English and Chinese (#2695)
* Export Paraformer ASR models from FunASR to Ascend NPU 910B (#2697)
* Update README to include Ascend NPU (#2698)
* Fix WASM (JS) after adding zipvoice. (#2702)
* Export SenseVoice ASR models to Ascend NPU 910B (#2707)
* Fix building for various language bindings after adding zipvoice (#2709)

## 1.12.14

* Fix setting rknn core mask (#2594)
* Add Dart API for spoken language identification (#2596)
* Add CI tests for dart spoken language identification example (#2598)
* Provide pre-compiled sherpa-onnx libs/binaries for CUDA 12.x + onnxruntime 1.22.0 (#2599)
* Provide pre-compiled whls for cuda 12.x on Linux x64 and Windows x64 (#2601)
* Fix TDT decoding for NeMo TDT transducers (#2606)
* Add a C++ example for simulated streaming ASR (#2607)

## 1.12.13

* Fix initializing symbol table for OnlineRecognizer. (#2590)
* Support RK NPU for SenseVoice non-streaming ASR models (#2589)
* Upload RKNN models for sense-voice (#2592)

## 1.12.12

* Fix building for risc-v (#2549)
* Fix using sherpa-onnx as a cmake sub-project. (#2550)
* Update kaldifst and kaldi-decoder (#2551)
* Support armv8l in Java API (#2556)
* Disable loading libs from jar on Android. (#2557)
* Fix cantonese vits tts (#2558)
* Avoid appending blanks for Cantonese vits tts. (#2559)
* Add hint for loading model files from SD card on Android. (#2564)
* Update README to include https://github.com/Mentra-Community/MentraOS (#2565)
* Export models from https://github.com/voicekit-team/T-one to sherpa-onnx (#2571)
* Add C++ and Python support for T-one streaming Russian ASR models (#2575)
* Add various language bindings for streaming T-one Russian ASR models (#2576)
* Fix the missing online punctuation in android aar (#2577)
* Export KittenTTS mini v0.1 to sherpa-onnx (#2578)
* Upload new sense-voice models (#2580)
* Export ASLP-lab/WSYue-ASR/tree/main/u2pp_conformer_yue to sherpa-onnx (#2582)
* Add various language bindings for Wenet non-streaming CTC models (#2584)

## 1.12.11

* Add two more Piper tts models (#2525)
* Generate tts samples for MatchaTTS (English). (#2527)
* Fix releasing go packages (#2529)
* Add license info about tts models from OpenVoiceOS (#2530)
* Support BPE models with byte fallback. (#2531)
* Simplify the usage of our non-Android Java API (#2533)
* Fix wasm for kws (#2535)
* Add one more German tts model from OpenVoiceOS. (#2536)
* Fix uploading win32 libs to huggingface (#2537)
* Add Zipvoice (#2487)
* Fix c api (#2545)
* Fix linking (#2546)

## 1.12.10

* Add VOSK streaming Russian ASR models and Kroko streaming German ASR models (#2502)
* Refactor CI tests (#2504)
* Update APK versions (#2505)
* Export whisper distil-large-v3 and distil-large-v3.5 to sherpa-onnx (#2506)
* Support specifying pronunciations of phrases in Chinese TTS. (#2507)
* fix(flutter): fix unicode problem in windows path (#2508)
* feat: add punctuation C++ API (#2510)
* Fix ctrl+c may lead to coredump (#2511)
* Add kitten tts nano v0.2 (#2512)
* Scripts to generate tts samples (#2513)
* Add tdt duration to APIs (#2514)
* Support 16KB page size for Android (#2520)
* Split sherpa-onnx Python package (#2521)
* Fix kokoro tts for punctuations (#2522)

## 1.12.9

* Add more piper tts models (#2480)
* Fix ASR for UE (#2483)
* push to maven center (#2463)
* Specify ABIs when building APKs (#2488)
* Add more debug info for vits tts (#2491)
* Add Swift API for computing speaker embeddings (#2492)
* Alex/feat add python example (#2490)
* Support TDT transducer decoding (#2495)
* Fix java test (#2496)
* Refactor Swift API (#2493)
* add TtsReader app to README.md (#2498)
* Export https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3 to sherpa-onnx (#2500)
* Fix building apk (#2499)

## 1.12.8

* Expose JNI to compute probability of chunk in VAD (#2433)
* Add https://huggingface.co/Banafo/Kroko-ASR (#2453)
* Add APIs for Online NeMo CTC models (#2454)
* Export https://github.com/KittenML/KittenTTS to sherpa-onnx (#2456)
* Fix punctuations in kokoro tts. (#2458)
* Limit number of tokens in fire red asr decoding. (#2459)
* Add C++ runtime for kitten-tts (#2460)
* Add Kotlin and Java API for KittenTTS (#2461)
* Add Android TTS Engine APK for KittenTTS (#2465)
* Add Python API for KittenTTS. (#2466)
* Add C API for KittenTTS (#2467)
* Add CXX API for KittenTTS (#2469)
* Add JavaScript API (node-addon) for KittenTTS (#2470)
* Add JavaScript API (WebAssembly) for KittenTTS (#2471)
* Add Pascal API for KittenTTS (#2474)
* Add Dart API for KittenTTS (#2475)
* Add Swift API for KittenTTS (#2476)
* Add C# API for KittenTTS (#2477)
* Add Go API for KittenTTS (#2478)

## 1.12.7

* Support Portuguese and German ASR models from NeMo (#2394)
* Support returning the current speech segment for VAD. (#2397)
* Add more piper tts polish models (#2403)
* Support VAD+ASR for WearOS (#2404)
* Support test long audio with streaming-model & vad (#2405)
* Fix typo in sherpa-onnx-vad-with-online-asr.cc (#2407)
* Add tail padding for sherpa-onnx-vad-with-online-asr (#2408)
* Add more French TTS models (#2424)
* Add more piper tts models (#2425)
* Implement max_symbols_per_frame for GigaAM2 accurate decoding since model uses char tokens instead of BPE. (#2423)
* Fix GigaAM transducer encoder output length data type (#2426)
* Add friendly log messages for Android and HarmonyOS TTS users. (#2427)
* Fix setGraph in OnlineCtcFstDecoderConfig Java API (#2411)


## 1.12.6

* Support silero-vad v4 exported by k2-fsa (#2372)
* Add C++ and Python support for ten-vad (#2377)
* Fix compile errors for Linux (#2378)
* Add C API for ten-vad (#2379)
* Add CXX API examples for ten-vad. (#2380)
* Add JavaScript (WebAssembly) API for ten-vad (#2382)
* Add JavaScript (node-addon) API for ten-vad (#2383)
* Add Go API for ten-vad (#2384)
* Add C# API for ten-vad (#2385)
* Add Dart API for ten-vad (#2386)
* Add Swift API for ten-vad (#2387)
* Add Pascal API for ten-vad (#2388)
* Add Java/Kotlin API and Android support for ten-vad (#2389)

## 1.12.5

* Fix typo CMAKE_EXECUTBLE_LINKER_FLAGS -> CMAKE_EXECUTABLE_LINKER_FLAGS (#2344)
* Fix testing dart packages (#2345)
* fix(canary): use dynamo export, single input_ids and avoid 0/1 specialization (#2348)
* Fix TTS for Unreal Engine (#2349)
* Update readme to include https://github.com/mawwalker/stt-server (#2350)
* Add meta data to NeMo canary ONNX models (#2351)
* Update README to include https://github.com/bbeyondllove/asr_server (#2353)
* Add C++ runtime and Python API for NeMo Canary models (#2352)
* Add C/CXX/JavaScript API for NeMo Canary models (#2357)
* Add Java and Kotlin API for NeMo Canary models (#2359)
* Upload fp16 onnx model files for FireRedASR (#2360)
* Fix nemo feature normalization in test code (#2361)
* Refactor exporting NeMo models (#2362)
* Add LODR support to online and offline recognizers (#2026)
* Add CXX examples for NeMo TDT ASR. (#2363)
* Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)

## 1.12.4

* Refactor release scripts. (#2323)
* Add TTS engine APKs for more models (#2327)
* Fix static link without tts (#2328)
* Fix VAD+ASR C++ example. (#2335)
* Add sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30 to android ASR apk (#2336)
* Support non-streaming zipformer CTC ASR models (#2340)
* Support linux aarch64 for Dart and Flutter (#2342)

## 1.12.3

* Show CMake debug information. (#2316)
* Remove portaudio-go in Go API examples. (#2317)
* Support Zipformer CTC ASR with whisper features. (#2319)
* Support Zipformer transducer ASR with whisper features. (#2321)

## 1.12.2

* Fix CI for windows (#2279)
* Add jar for Java 24. (#2280)
* Add Python API for source separation (#2283)
* Add link to huggingface space for source separation. (#2284)
* Fix isspace on windows in debug build (#2042)
* Update wasm/vad-asr/assets/README.md for more clear (#2297)
* Update TTS Engine APK to support multi-lang (#2294)
* Add scripts for exporting Piper TTS models to sherpa-onnx (#2299)
* Update sherpa-onnx-shared.pc.in (#2300)
* Fixes #2172 (#2301)
* Refactor kokoro export (#2302)
* Fix building for Pascal (#2305)
* Support extra languages in multi-lang kokoro tts (#2303)
* Update readme to include BreezeApp from MediaTek Research. (#2313)
* Add API to get version information (#2309)


## 1.12.1

* Use jlong explicitly in jni. (#2229)
* Fix building RKNN wheels (#2233)
* Fix publishing binaries for RKNN (#2234)
* Export spleeter model to onnx for source separation (#2237)
* Add C++ runtime for spleeter about source separation (#2242)
* Add include headers for __ANDROID_API__,__OHOS__ (#2251)
* JAVA-API: Manual Library Loading Support for Restricted Environments (#2253)
* Build APK with replace.fst (#2254)
* repair rknn wheels (#2257)
* Update kaldi-native-fbank. (#2259)
* Fix building sherpa-onnx (#2262)
* Fix building MFC examples (#2263)
* Add UVR models for source separation. (#2266)
* move portaudio common record code to microphone (#2264)
* fixed mfc build error (#2267)
* Add C++ support for UVR models (#2269)
* Export nvidia/canary-180m-flash to sherpa-onnx (#2272)
* Update utils.dart (#2275)
* Fix rknn for multi-threads (#2274)
* Fix 32-bit arm CI (#2276)

## 1.12.0

* Fix building wheels for macOS (#2192)
* Show verbose logs in homophone replacer (#2194)
* Fix displaying streaming speech recognition results for Python. (#2196)
* Add real-time speech recognition example for SenseVoice. (#2197)
* docs: add Open-XiaoAI KWS project (#2198)
* Add C++ example for streaming ASR with SenseVoice. (#2199)
* Add C++ example for real-time ASR with nvidia/parakeet-tdt-0.6b-v2. (#2201)
* Add a link to YouTube video including sherpa-onnx. (#2202)
* Support sending is_eof for online websocket server. (#2204)
* Add alsa-based streaming ASR example for sense voice. (#2207)
* Support homophone replacer in Android asr demo. (#2210)
* Add Go implementation of the TTS generation callback (#2213)
* Add Android demo for real-time ASR with non-streaming ASR models. (#2214)
* Expose dither for JNI (#2215)
* Add nodejs example for parakeet-tdt-0.6b-v2. (#2219)
* Add script to build APK for simulated-streaming-asr. (#2220)


## 1.11.5

* export parakeet-tdt-0.6b-v2 to sherpa-onnx (#2180)
* Add C++ runtime for parakeet-tdt-0.6b-v2. (#2181)
* Avoid NaN in feature normalization. (#2186)

## 1.11.4

* Disable strict hotword matching mode for offline transducer (#1837)
* Comment refinement: Add note about vocoder file for matcha TTS config (#2106)
* Fix a typo in the JNI for Android. (#2108)
* Generate subtitles with FireRedAsr models (#2112)
* Use manylinux_2_28_x86_64 to build linux gpu for sherpa-onnx (#2123)
* Support running sherpa-onnx with RK NPU on Android (#2124)
* Fix building for HarmonyOS (#2125)
* cmake build, configurable from env (#2115)
* Expose dither in python API (#2127)
* Add support for GigaAM-CTC-v2 (#2135)
* Support Giga AM transducer V2 (#2136)
* Export kokoro 1.0 int8 models (#2137)
* Upload more onnx ASR models (#2141)
* Fix building for open harmonyOS (#2142)
* online-transducer: reset the encoder together with 2 previous output symbols (non-blank) (#2129)
* Fix punctuations for kokoro tts 1.1-zh. (#2146)
* Fix setting OnlineModelConfig in Java API (#2147)
* Support decoding multiple streams in Java API. (#2149)
* Support replacing homophonic phrases (#2153)
* Add C and CXX API for homophone replacer (#2156)
* Add JavaScript API (WASM) for homophone replacer (#2157)
* Add JavaScript API (node-addon) for homophone replacer (#2158)
* Fix building without TTS (#2159)
* Add homophone replacer example for Python API. (#2161)
* More fix for building without tts (#2162)
* Add Swift API for homophone replacer. (#2164)
* Add C# API for homophone replacer (#2165)
* Add Kotlin and Java API for homophone replacer (#2166)
* Add Dart API for homophone replacer (#2167)
* Add Go API for homophone replacer (#2168)

## 1.11.3

* fix vits dict dir config (#2036)
* fix case (#2037)
* Fix building wheels for RKNN (#2041)
* Change scale factor to 32767 (#2056)
* Fix length scale for kokoro tts (#2060)
* Allow building repository as CMake subdirectory (#2059)
* Export silero_vad v4 to RKNN (#2067)
* fix dml with preinstall ort (#2066)
* Fix building aar to include speech denoiser (#2069)
* Add CXX API for VAD (#2077)
* Add C++ runtime for silero_vad with RKNN (#2078)
* Refactor rknn code (#2079)
* Fix building for android (#2081)
* Add C++ and Python API for Dolphin CTC models (#2085)
* Add Kotlin and Java API for Dolphin CTC models (#2086)
* Add C and CXX API for Dolphin CTC models (#2088)
* Preserve more context after endpointing in transducer (#2061)
* Add C# API for Dolphin CTC models (#2089)
* Add Go API for Dolphin CTC models (#2090)
* Add Swift API for Dolphin CTC models (#2091)
* Add Javascript (WebAssembly) API for Dolphin CTC models (#2093)
* Add Javascript (node-addon) API for Dolphin CTC models (#2094)
* Add Dart API for Dolphin CTC models (#2095)
* Add Pascal API for Dolphin CTC models (#2096)

## 1.11.2

* Fix CI (#2016)
* Publish jar for more java versions (#2017)
* add alsa example for vad+offline asr (#2020)
* Support cuda12 and cudnn8 for Linux aarch64. (#2021)
* Update README to include more projects using sherpa-onnx (#2022)
* Fix a bug in vad.reset() (#2023)
* Fix Matcha + vocos for Android (#2024)
* Fix crash in Android tts engine demo. (#2029)
* Fix build script: add 'cd build' after 'mkdir build' to ensure the correct working directory for CMake (#2033)
* fix static linking (#2032)

## 1.11.1

* Export vocos to sherpa-onnx (#2012)
* Add C++ runtime for vocos (#2014)

## 1.11.0

* Fix building wheels for Python 3.7 (#1933)
* Add Kotlin and Java API for online punctuation models (#1936)
* Add Kokoro v1.1-zh (#1942)
* Support RKNN for Zipformer CTC models. (#1948)
* Add transducer modified_beam_search for RKNN. (#1949)
* Update README to include projects that is using sherpa-onnx (#1956)
* Limit number of tokens per second for whisper. (#1958)
* Ebranchformer (#1951)
* Test using sherpa-onnx as a cmake subproject (#1961)
* Add C++ demo for VAD+non-streaming ASR (#1964)
* Export gtcrn models to sherpa-onnx (#1975)
* c-api add wave write to buffer. (#1962)
* add SherpaOnnxOfflineRecognizerSetConfig binding for go, and optimize the new/free for C.struct_SherpaOnnxOfflineRecognizerConfig ptr (#1976)
* Add C++ runtime for speech enhancement GTCRN models (#1977)
* Add Python API for speech enhancement GTCRN models (#1978)
* Add C API for speech enhancement GTCRN models (#1984)
* Add CXX API for speech enhancement GTCRN models (#1986)
* Add Swift API for speech enhancement GTCRN models (#1989)
* Add C# API for speech enhancement GTCRN models (#1990)
* Add Go API for speech enhancement GTCRN models (#1991)
* Add Pascal API for speech enhancement GTCRN models (#1992)
* Add Dart API for speech enhancement GTCRN models (#1993)
* Add JavaScript (node-addon) API for speech enhancement GTCRN models (#1996)
* Add WebAssembly (WASM) for speech enhancement GTCRN models (#2002)
* Add JavaScript API (wasm) for speech enhancement GTCRN models (#2007)
* Add Kotlin API for speech enhancement GTCRN models (#2008)
* Add Java API for speech enhancement GTCRN models (#2009)


## 1.10.46

* Fix kokoro lexicon. (#1886)
* speaker-identification-with-vad-non-streaming-asr.py Lack of support for sense_voice. (#1884)
* Fix generating Chinese lexicon for Kokoro TTS 1.0 (#1888)
* Reduce vad-whisper-c-api example code. (#1891)
* JNI Exception Handling (#1452)
* Fix #1901: UnicodeEncodeError running export_bpe_vocab.py (#1902)
* Fix publishing pre-built windows libraries (#1905)
* Fixing Whisper Model Token Normalization (#1904)
* feat: add mic example for better compatibility (#1909)
* Add onnxruntime 1.18.1 for Linux aarch64 GPU (#1914)
* Add C++ API for streaming zipformer ASR on RK NPU (#1908)
* change [1<<28] to [1<<10], to fix build issues on GOARCH=386 that [1<<28] too large (#1916)
* Flutter Config toJson/fromJson (#1893)
* Fix publishing linux pre-built artifacts (#1919)
* go.mod set to use go 1.17, and use unsafe.Slice to optimize the code (#1920)
* fix: AddPunct panic for Go(#1921)
* Fix publishing macos pre-built artifacts (#1922)
* Minor fixes for rknn (#1925)
* Build wheels for rknn linux aarch64 (#1928)

## 1.10.45

* [update] fixed bug: create golang instance succeed while the c struct create failed (#1860)
* fixed typo in RTF calculations (#1861)
* Export FireRedASR to sherpa-onnx. (#1865)
* Add C++ and Python API for FireRedASR AED models (#1867)
* Add Kotlin and Java API for FireRedAsr AED model (#1870)
* Add C API for FireRedAsr AED model. (#1871)
* Add CXX API for FireRedAsr (#1872)
* Add JavaScript API (node-addon) for FireRedAsr (#1873)
* Add JavaScript API (WebAssembly) for FireRedAsr model. (#1874)
* Add C# API for FireRedAsr Model (#1875)
* Add C# API for FireRedAsr Model (#1875)
* Add Swift API for FireRedAsr AED Model (#1876)
* Add Dart API for FireRedAsr AED Model (#1877)
* Add Go API for FireRedAsr AED Model (#1879)
* Add Pascal API for FireRedAsr AED Model (#1880)

## 1.10.44

* Export MatchaTTS fa-en model to sherpa-onnx (#1832)
* Add C++ support for MatchaTTS models not from icefall. (#1834)
* OfflineRecognizer supports create stream with hotwords (#1833)
* Add PengChengStarling models to sherpa-onnx (#1835)
* Support specifying voice in espeak-ng for kokoro tts models. (#1836)
* Fix: made print sherpa_onnx_loge when it is in debug mode (#1838)
* Add Go API for audio tagging (#1840)
* Fix CI (#1841)
* Update readme to contain links for pre-built Apps (#1853)
* Modify the model used (#1855)
* Flutter OnlinePunctuation (#1854)
* Fix spliting text by languages for kokoro tts. (#1849)

## 1.10.43

* Add MFC example for Kokoro TTS 1.0 (#1815)
* Update sherpa-onnx-tts.js VitsModelConfig.model can be none (#1817)
* Fix passing gb2312 encoded strings to tts on Windows (#1819)
* Support scaling the duration of a pause in TTS. (#1820)
* Fix building wheels for linux aarch64. (#1821)
* Fix CI for Linux aarch64. (#1822)

## 1.10.42

* Fix publishing wheels (#1746)
* Update README to include https://github.com/xinhecuican/QSmartAssistant (#1755)
* Add Kokoro TTS to MFC examples (#1760)
* Refactor node-addon C++ code. (#1768)
* Add keyword spotter C API for HarmonyOS (#1769)
* Add ArkTS API for Keyword spotting. (#1775)
* Add Flutter example for Kokoro TTS (#1776)
* Initialize the audio session for iOS ASR example (#1786)
* Fix: Prepend 0 to tokenization to prevent word skipping for Kokoro. (#1787)
* Export Kokoro 1.0 to sherpa-onnx (#1788)
* Add C++ and Python API for Kokoro 1.0 multilingual TTS model (#1795)
* Add Java and Kotlin API for Kokoro TTS 1.0 (#1798)
* Add Android demo for Kokoro TTS 1.0 (#1799)
* Add C API for Kokoro TTS 1.0 (#1801)
* Add CXX API for Kokoro TTS 1.0 (#1802)
* Add Swift API for Kokoro TTS 1.0 (#1803)
* Add Go API for Kokoro TTS 1.0 (#1804)
* Add C# API for Kokoro TTS 1.0 (#1805)
* Add Dart API for Kokoro TTS 1.0 (#1806)
* Add Pascal API for Kokoro TTS 1.0 (#1807)
* Add JavaScript API (node-addon) for Kokoro TTS 1.0 (#1808)
* Add JavaScript API (WebAssembly) for Kokoro TTS 1.0 (#1809)
* Add Flutter example for Kokoro TTS 1.0 (#1810)
* Add iOS demo for Kokoro TTS 1.0 (#1812)
* Add HarmonyOS demo for Kokoro TTS 1.0 (#1813)

## 1.10.41

* Fix UI for Android TTS Engine. (#1735)
* Add iOS TTS example for MatchaTTS (#1736)
* Add iOS example for Kokoro TTS (#1737)
* Fix dither binding in Pybind11 to ensure independence from high_freq in FeatureExtractorConfig (#1739)
* Fix keyword spotting. (#1689)
* Update readme to include https://github.com/hfyydd/sherpa-onnx-server (#1741)
* Reduce vad-moonshine-c-api example code. (#1742)
* Support Kokoro TTS for HarmonyOS. (#1743)

## 1.10.40

* Fix building wheels (#1703)
* Export kokoro to sherpa-onnx (#1713)
* Add C++ and Python API for Kokoro TTS models. (#1715)
* Add C API for Kokoro TTS models (#1717)
* Fix style issues (#1718)
* Add C# API for Kokoro TTS models (#1720)
* Add Swift API for Kokoro TTS models (#1721)
* Add Go API for Kokoro TTS models (#1722)
* Add Dart API for Kokoro TTS models (#1723)
* Add Pascal API for Kokoro TTS models (#1724)
* Add JavaScript API (node-addon) for Kokoro TTS models (#1725)
* Add JavaScript (WebAssembly) API for Kokoro TTS models. (#1726)
* Add Kotlin and Java API for Kokoro TTS models (#1728)
* Update README.md for KWS to not use git lfs. (#1729)


## 1.10.39

* Fix building without TTS (#1691)
* Add README for android libs. (#1693)
* Fix: export-onnx.py(expected all tensors to be on the same device) (#1699)
* Fix passing strings from C# to C. (#1701)

## 1.10.38

* Fix initializing TTS in Python. (#1664)
* Remove spaces after punctuations for TTS (#1666)
* Add constructor fromPtr() for all flutter class with factory ctor. (#1667)
* Add Kotlin API for Matcha-TTS models. (#1668)
* Support Matcha-TTS models using espeak-ng (#1672)
* Add Java API for Matcha-TTS models. (#1673)
* Avoid adding tail padding for VAD in generate-subtitles.py (#1674)
* Add C API for MatchaTTS models (#1675)
* Add CXX API for MatchaTTS models (#1676)
* Add JavaScript API (node-addon-api) for MatchaTTS models. (#1677)
* Add HarmonyOS examples for MatchaTTS. (#1678)
* Upgraded to .NET 8 and made code style a little more internally consistent. (#1680)
* Update workflows to use .NET 8.0 also. (#1681)
* Add C# and JavaScript (wasm) API for MatchaTTS models (#1682)
* Add Android demo for MatchaTTS models. (#1683)
* Add Swift API for MatchaTTS models. (#1684)
* Add Go API for MatchaTTS models (#1685)
* Add Pascal API for MatchaTTS models. (#1686)
* Add Dart API for MatchaTTS models (#1687)

## 1.10.37

* Add new tts models for Latvia and Persian+English (#1644)
* Add a byte-level BPE Chinese+English non-streaming zipformer model (#1645)
* Support removing invalid utf-8 sequences. (#1648)
* Add TeleSpeech CTC to non_streaming_server.py (#1649)
* Fix building macOS libs (#1656)
* Add Go API for Keyword spotting (#1662)
* Add Swift online punctuation (#1661)
* Add C++ runtime for Matcha-TTS (#1627)

## 1.10.36

* Update AAR version in Android Java demo (#1618)
* Support linking onnxruntime statically for Android (#1619)
* Update readme to include Open-LLM-VTuber (#1622)
* Rename maxNumStences to maxNumSentences (#1625)
* Support using onnxruntime 1.16.0 with CUDA 11.4 on Jetson Orin NX (Linux arm64 GPU). (#1630)
* Update readme to include jetson orin nx and nano b01 (#1631)
* feat: add checksum action (#1632)
* Support decoding with byte-level BPE (bbpe) models. (#1633)
* feat: enable c api for android ci (#1635)
* Update README.md (#1640)
* SherpaOnnxVadAsr: Offload runSecondPass to background thread for improved real-time audio processing (#1638)
* Fix GitHub actions. (#1642)


## 1.10.35

* Add missing changes about speaker identification demo for HarmonyOS (#1612)
* Provide sherpa-onnx.aar for Android (#1615)
* Use aar in Android Java demo. (#1616)

## 1.10.34

* Fix building node-addon package (#1598)
* Update doc links for HarmonyOS (#1601)
* Add on-device real-time ASR demo for HarmonyOS (#1606)
* Add speaker identification APIs for HarmonyOS (#1607)
* Add speaker identification demo for HarmonyOS (#1608)
* Add speaker diarization API for HarmonyOS. (#1609)
* Add speaker diarization demo for HarmonyOS (#1610)

## 1.10.33

* Add non-streaming ASR support for HarmonyOS. (#1564)
* Add streaming ASR support for HarmonyOS. (#1565)
* Fix building for Android (#1568)
* Publish `sherpa_onnx.har` for HarmonyOS (#1572)
* Add VAD+ASR demo for HarmonyOS (#1573)
* Fix publishing har packages for HarmonyOS (#1576)
* Add CI to build HAPs for HarmonyOS (#1578)
* Add microphone demo about VAD+ASR for HarmonyOS (#1581)
* Fix getting microphone permission for HarmonyOS VAD+ASR example (#1582)
* Add HarmonyOS support for text-to-speech. (#1584)
* Fix: support both old and new websockets request headers format (#1588)
* Add on-device text-to-speech (TTS) demo for HarmonyOS (#1590)

## 1.10.32

* Support cross-compiling for HarmonyOS (#1553)
* HarmonyOS support for VAD. (#1561)
* Fix publishing flutter iOS app to appstore (#1563).

## 1.10.31

* Publish pre-built wheels for Python 3.13 (#1485)
* Publish pre-built macos xcframework (#1490)
* Fix reading tokens.txt on Windows. (#1497)
* Add two-pass ASR Android APKs for Moonshine models. (#1499)
* Support building GPU-capable sherpa-onnx on Linux aarch64. (#1500)
* Publish pre-built wheels with CUDA support for Linux aarch64. (#1507)
* Export the English TTS model from MeloTTS (#1509)
* Add Lazarus example for Moonshine models. (#1532)
* Add isolate_tts demo (#1529)
* Add WebAssembly example for VAD + Moonshine models. (#1535)
* Add Android APK for streaming Paraformer ASR (#1538)
* Support static build for windows arm64. (#1539)
* Use xcframework for Flutter iOS plugin to support iOS simulators.

## 1.10.30

* Fix building node-addon for Windows x86. (#1469)
* Begin to support https://github.com/usefulsensors/moonshine (#1470)
* Publish pre-built JNI libs for Linux aarch64 (#1472)
* Add C++ runtime and Python APIs for Moonshine models (#1473)
* Add Kotlin and Java API for Moonshine models (#1474)
* Add C and C++ API for Moonshine models (#1476)
* Add Swift API for Moonshine models. (#1477)
* Add Go API examples for adding punctuations to text. (#1478)
* Add Go API for Moonshine models (#1479)
* Add JavaScript API for Moonshine models (#1480)
* Add Dart API for Moonshine models. (#1481)
* Add Pascal API for Moonshine models (#1482)
* Add C# API for Moonshine models. (#1483)

## 1.10.29

* Add Go API for offline punctuation models (#1434)
* Support https://huggingface.co/Revai/reverb-diarization-v1 (#1437)
* Add more models for speaker diarization (#1440)
* Add Java API example for hotwords. (#1442)
* Add java android demo (#1454)
* Add C++ API for streaming ASR. (#1455)
* Add C++ API for non-streaming ASR (#1456)
* Handle NaN embeddings in speaker diarization. (#1461)
* Add speaker identification with VAD and non-streaming ASR using ALSA (#1463)
* Support GigaAM CTC models for Russian ASR (#1464)
* Add GigaAM NeMo transducer model for Russian ASR (#1467)

## 1.10.28

* Fix swift example for generating subtitles. (#1362)
* Allow more online models to load tokens file from the memory (#1352)
* Fix CI errors introduced by supporting loading keywords from buffers (#1366)
* Fix running MeloTTS models on GPU. (#1379)
* Support Parakeet models from NeMo (#1381)
* Export Pyannote speaker segmentation models to onnx (#1382)
* Support Agglomerative clustering. (#1384)
* Add Python API for clustering (#1385)
* support whisper turbo (#1390)
* context_state is not set correctly when previous context is passed after reset (#1393)
* Speaker diarization example with onnxruntime Python API (#1395)
* C++ API for speaker diarization (#1396)
* Python API for speaker diarization. (#1400)
* C API for speaker diarization (#1402)
* docs(nodejs-addon-examples): add guide for pnpm user (#1401)
* Go API for speaker diarization (#1403)
* Swift API for speaker diarization (#1404)
* Update readme to include more external projects using sherpa-onnx (#1405)
* C# API for speaker diarization (#1407)
* JavaScript API (node-addon) for speaker diarization (#1408)
* WebAssembly example for speaker diarization (#1411)
* Handle audio files less than 10s long for speaker diarization. (#1412)
* JavaScript API with WebAssembly for speaker diarization (#1414)
* Kotlin API for speaker diarization (#1415)
* Java API for speaker diarization (#1416)
* Dart API for speaker diarization (#1418)
* Pascal API for speaker diarization (#1420)
* Android JNI support for speaker diarization (#1421)
* Android demo for speaker diarization (#1423)

## 1.10.27

* Add non-streaming ONNX models for Russian ASR (#1358)
* Fix building Flutter TTS examples for Linux (#1356)
* Support passing utf-8 strings from JavaScript to C++. (#1355)
* Fix sherpa_onnx.go to support returning empty recognition results (#1353)

## 1.10.26

* Add links to projects using sherpa-onnx. (#1345)
* Support lang/emotion/event results from SenseVoice in Swift API. (#1346)
* Support specifying max speech duration for VAD. (#1348)
* Add APIs about max speech duration in VAD for various programming languages (#1349)

## 1.10.25

* Allow tokens and hotwords to be loaded from buffered string directly (#1339)
* Fix computing features for CED audio tagging models. (#1341)
* Preserve previous result as context for next segment (#1335)
* Add Python binding for online punctuation models (#1312)
* Fix vad.Flush(). (#1329)
* Fix wasm app for streaming paraformer (#1328)
* Build websocket related binaries for embedded systems. (#1327)
* Fixed the C api calls and created the TTS project file (#1324)
* Re-implement LM rescore for online transducer (#1231)

## 1.10.24

* Add VAD and keyword spotting for the Node package with WebAssembly (#1286)
* Fix releasing npm package and fix building Android VAD+ASR example (#1288)
* add Tokens []string, Timestamps []float32, Lang string, Emotion string, Event string (#1277)
* add vad+sense voice example for C API (#1291)
* ADD VAD+ASR example for dart with CircularBuffer. (#1293)
* Fix VAD+ASR example for Dart API. (#1294)
* Avoid SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches freeing null. (#1296)
* Fix releasing wasm app for vad+asr (#1300)
* remove extra files from linux/macos/windows jni libs (#1301)
* two-pass Android APK for SenseVoice (#1302)
* Downgrade flutter sdk versions. (#1305)
* Reduce onnxruntime log output. (#1306)
* Provide prebuilt .jar files for different java versions. (#1307)


## 1.10.23

* flutter: add lang, emotion, event to OfflineRecognizerResult (#1268)
* Use a separate thread to initialize models for lazarus examples. (#1270)
* Object pascal examples for recording and playing audio with portaudio. (#1271)
* Text to speech API for Object Pascal. (#1273)
* update kotlin api for better release native object and add user-friendly apis. (#1275)
* Update wave-reader.cc to support 8/16/32-bit waves (#1278)
* Add WebAssembly for VAD (#1281)
* WebAssembly example for VAD + Non-streaming ASR (#1284)

## 1.10.22

* Add Pascal API for reading wave files (#1243)
* Pascal API for streaming ASR (#1246)
* Pascal API for non-streaming ASR (#1247)
* Pascal API for VAD (#1249)
* Add more C API examples (#1255)
* Add emotion, event of SenseVoice. (#1257)
* Support reading multi-channel wave files with 8/16/32-bit encoded samples (#1258)
* Enable IPO only for Release build. (#1261)
* Add Lazarus example for generating subtitles using Silero VAD with non-streaming ASR (#1251)
* Fix looking up OOVs in lexicon.txt for MeloTTS models. (#1266)


## 1.10.21

* Fix ffmpeg c api example (#1185)
* Fix splitting sentences for MeloTTS (#1186)
* Non-streaming WebSocket client for Java. (#1190)
* Fix copying asset files for flutter examples. (#1191)
* Add Chinese+English tts example for flutter (#1192)
* Add speaker identification and verification example for Dart API (#1194)
* Fix reading non-standard wav files. (#1199)
* Add ReazonSpeech Japanese pre-trained model (#1203)
* Describe how to add new words for MeloTTS models (#1209)
* Remove libonnxruntime_providers_cuda.so as a dependency. (#1210)
* Fix setting SenseVoice language. (#1214)
* Support passing TTS callback in Swift API (#1218)
* Add MeloTTS example for ios (#1223)
* Add online punctuation and casing prediction model for English language (#1224)
* Fix python two pass ASR examples (#1230)
* Add blank penalty for various language bindings

## 1.10.20

* Add Dart API for audio tagging
* Add Dart API for adding punctuations to text

## 1.10.19

* Prefix all C API functions with SherpaOnnx

## 1.10.18

* Fix the case when recognition results contain the symbol `"`. It caused
  issues when converting results to a json string.

## 1.10.17

* Support SenseVoice CTC models.
* Add Dart API for keyword spotter.

## 1.10.16

* Support zh-en TTS model from MeloTTS.

## 1.10.15

* Downgrade onnxruntime from v1.18.1 to v1.17.1

## 1.10.14

* Support whisper large v3
* Update onnxruntime from v1.18.0 to v1.18.1
* Fix invalid utf8 sequence from Whisper for Dart API.

## 1.10.13

* Update onnxruntime from 1.17.1 to 1.18.0
* Add C# API for Keyword spotting

## 1.10.12

* Add Flush to VAD so that the last speech segment can be detected. See also
  https://github.com/k2-fsa/sherpa-onnx/discussions/1077#discussioncomment-9979740

## 1.10.11

* Support the iOS platform for Flutter.

## 1.10.10

* Build sherpa-onnx into a single shared library.

## 1.10.9

* Fix released packages. piper-phonemize was not included in v1.10.8.

## 1.10.8

* Fix released packages. There should be a lib directory.

## 1.10.7

* Support Android for Flutter.

## 1.10.2

* Fix passing C# string to C++

## 1.10.1

* Enable to stop TTS generation

## 1.10.0

* Add inverse text normalization

## 1.9.30

* Add TTS

## 1.9.29

* Publish with CI

## 0.0.3

* Fix path separator on Windows.

## 0.0.2

* Support specifying lib path.

## 0.0.1

* Initial release.


================================================
FILE: CMakeLists.txt
================================================
if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
  set(CMAKE_POLICY_VERSION_MINIMUM 3.10)
endif()

cmake_minimum_required(VERSION 3.15 FATAL_ERROR)

# https://cmake.org/cmake/help/latest/prop_tgt/MSVC_RUNTIME_LIBRARY.html
cmake_policy(SET CMP0091 NEW)

message(STATUS "CMake version: ${CMAKE_VERSION}")

set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum OS X deployment version. Used only for macOS")

set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)

project(sherpa-onnx)

# Remember to update
# ./CHANGELOG.md
# ./new-release.sh
set(SHERPA_ONNX_VERSION "1.12.31")

# Disable warning about
#
# "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is
#  not set.
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
  cmake_policy(SET CMP0135 NEW)
endif()


if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
  set(SUGGEST_BUILD_BINARIES ON)
else()
  set(SUGGEST_BUILD_BINARIES OFF)
endif()

option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF)
option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF)
option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" OFF)
option(BUILD_SHARED_LIBS "Whether to build shared libraries" OFF)
option(SHERPA_ONNX_ENABLE_PORTAUDIO "Whether to build with portaudio" ON)
option(SHERPA_ONNX_ENABLE_JNI "Whether to build JNI internface" OFF)
option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON)
option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON)
option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
option(SHERPA_ONNX_ENABLE_DIRECTML "Enable ONNX Runtime DirectML support" OFF)
option(SHERPA_ONNX_LINK_D3D "Whether static ONNX runtime lib with DML" OFF)

option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
option(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION "Whether to enable WASM for speaker diarization" OFF)
option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF)
option(SHERPA_ONNX_ENABLE_WASM_VAD_ASR "Whether to enable WASM for VAD+ASR" OFF)
option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
option(SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT "Whether to enable WASM for speech enhancement" OFF)
option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ${SUGGEST_BUILD_BINARIES})
option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON)
option(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION "Whether to build speaker diarization related code" ON)
option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON)
option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-installed onnxruntime if available" ON)
option(SHERPA_ONNX_ENABLE_SANITIZER "Whether to enable ubsan and asan" OFF)
option(SHERPA_ONNX_BUILD_C_API_EXAMPLES "Whether to enable C API examples" ${SUGGEST_BUILD_BINARIES})
option(SHERPA_ONNX_ENABLE_RKNN "Whether to build for RKNN NPU " OFF)
option(SHERPA_ONNX_ENABLE_AXERA "Whether to build for Axera NPU " OFF)
option(SHERPA_ONNX_ENABLE_AXCL "Whether to build for Axcl NPU " OFF)
option(SHERPA_ONNX_ENABLE_ASCEND_NPU "Whether to build for Ascend NPU " OFF)
option(SHERPA_ONNX_ENABLE_QNN "Whether to build for Qualcomm NPU" OFF)
option(SHERPA_ONNX_ENABLE_SPACEMIT "Whether to build for SpacemiT CPUs " OFF)
set(SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION "1.11.0" CACHE STRING "Used only for Linux ARM64 GPU. Set to 1.11.0 if you use CUDA 10.2 and cudnn8. Set it to 1.16.0 if you use CUDA 11.4 and cudnn8. Set it to 1.18.0 if you use CUDA 12.2 and cudnn8. Set it to 1.18.1 if you use CUDA 12.6 and cudnn9")

# SHERPA_ONNX_USE_STATIC_CRT controls whether we use:
#   - Static CRT:   /MT  (Release), /MTd (Debug)
#   - Dynamic CRT:  /MD  (Release), /MDd (Debug)
option(SHERPA_ONNX_USE_STATIC_CRT "For Windows only. ON to use static CRT (/MT /MTd); OFF to use dynamic (/MD /MDd)" ON)


# On Windows with MSVC, explicitly control which C runtime (CRT) to use.
#
# We rely on CMAKE_MSVC_RUNTIME_LIBRARY (CMake >= 3.15) instead of manually
# appending /MT, /MTd, /MD, or /MDd to compiler flags.
#
# Benefits:
#   - Correct behavior for multi-config generators (Visual Studio, Ninja Multi-Config)
#   - No reliance on CMAKE_BUILD_TYPE (which is empty for multi-config generators)
#   - Cleaner interaction with subprojects and FetchContent dependencies
#
# The generator expression automatically selects:
#   - Debug   -> /MTd or /MDd
#   - Release -> /MT  or /MD
#   - RelWithDebInfo / MinSizeRel -> /MT or /MD
if (MSVC AND NOT DEFINED CMAKE_MSVC_RUNTIME_LIBRARY)
  if(DEFINED CMAKE_BUILD_TYPE AND NOT CMAKE_BUILD_TYPE STREQUAL "")
    if (SHERPA_ONNX_USE_STATIC_CRT)
      # Use static CRT: /MT (Release) and /MTd (Debug)
      if(CMAKE_BUILD_TYPE MATCHES Debug)
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
      else()
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
      endif()
    else()
      # Use dynamic CRT: /MD (Release) and /MDd (Debug)
      if(CMAKE_BUILD_TYPE MATCHES Debug)
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL")
      else()
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL")
      endif()
    endif()
  else()
    if (SHERPA_ONNX_USE_STATIC_CRT)
      # Use static CRT: /MT (Release) and /MTd (Debug)
      set(CMAKE_MSVC_RUNTIME_LIBRARY
          "MultiThreaded$<$<CONFIG:Debug>:Debug>")
    else()
      # Use dynamic CRT: /MD (Release) and /MDd (Debug)
      set(CMAKE_MSVC_RUNTIME_LIBRARY
          "MultiThreadedDLL$<$<CONFIG:Debug>:Debug>")
    endif()
  endif()
endif()

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")

if(NOT WIN32)
  set(CMAKE_SKIP_BUILD_RPATH FALSE)
  set(BUILD_RPATH_USE_ORIGIN TRUE)
  set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
endif()

if(NOT APPLE)
  set(SHERPA_ONNX_RPATH_ORIGIN "$ORIGIN")
else()
  set(SHERPA_ONNX_RPATH_ORIGIN "@loader_path")
endif()

if(NOT WIN32)
  set(CMAKE_INSTALL_RPATH ${SHERPA_ONNX_RPATH_ORIGIN})
  set(CMAKE_BUILD_RPATH ${SHERPA_ONNX_RPATH_ORIGIN})
endif()

if(NOT CMAKE_BUILD_TYPE)
  message(STATUS "No CMAKE_BUILD_TYPE given, default to Release")
  set(CMAKE_BUILD_TYPE Release)
endif()

if(DEFINED ANDROID_ABI AND NOT SHERPA_ONNX_ENABLE_JNI AND NOT SHERPA_ONNX_ENABLE_C_API)
  message(STATUS "Set SHERPA_ONNX_ENABLE_JNI to ON for Android")
  set(SHERPA_ONNX_ENABLE_JNI ON CACHE BOOL "" FORCE)
endif()

if(SHERPA_ONNX_ENABLE_PYTHON AND NOT BUILD_SHARED_LIBS)
  message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_PYTHON is ON")
  set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
endif()

if(SHERPA_ONNX_ENABLE_GPU)
  message(WARNING "\
Compiling for NVIDIA GPU is enabled. Please make sure cudatoolkit
is installed on your system. Otherwise, you will get errors at runtime.
Hint: You don't need sudo permission to install CUDA toolkit. Please refer to
  https://k2-fsa.github.io/k2/installation/cuda-cudnn.html
to install CUDA toolkit if you have not installed it.")
  if(NOT BUILD_SHARED_LIBS)
    message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_GPU is ON")
    set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
  endif()
endif()

if(SHERPA_ONNX_ENABLE_DIRECTML)
  message(WARNING "\
Compiling with DirectML enabled. Please make sure Windows 10 SDK
is installed on your system. Otherwise, you will get errors at runtime.
Please refer to
  https://onnxruntime.ai/docs/execution-providers/DirectML-ExecutionProvider.html#requirements
to install Windows 10 SDK if you have not installed it.")
  if(NOT BUILD_SHARED_LIBS)
    message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_DIRECTML is ON")
    set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
  endif()
endif()

if(CMAKE_SYSTEM_NAME STREQUAL OHOS)
  set(CMAKE_CXX_FLAGS "-Wno-unused-command-line-argument ${CMAKE_CXX_FLAGS}")
  set(CMAKE_C_FLAGS "-Wno-unused-command-line-argument ${CMAKE_C_FLAGS}")
endif()

if(ANDROID)
  # see https://github.com/microsoft/onnxruntime/pull/22076
  # https://github.com/k2-fsa/sherpa-onnx/issues/2413
  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,max-page-size=16384")
endif()

message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}")
message(STATUS "SHERPA_ONNX_ENABLE_TESTS ${SHERPA_ONNX_ENABLE_TESTS}")
message(STATUS "SHERPA_ONNX_ENABLE_CHECK ${SHERPA_ONNX_ENABLE_CHECK}")
message(STATUS "SHERPA_ONNX_ENABLE_PORTAUDIO ${SHERPA_ONNX_ENABLE_PORTAUDIO}")
message(STATUS "SHERPA_ONNX_ENABLE_JNI ${SHERPA_ONNX_ENABLE_JNI}")
message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}")
message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}")
message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION ${SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD_ASR ${SHERPA_ONNX_ENABLE_WASM_VAD_ASR}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT ${SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT}")
message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}")
message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")
message(STATUS "SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION ${SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION}")
message(STATUS "SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY ${SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY}")
message(STATUS "SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE ${SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE}")
message(STATUS "SHERPA_ONNX_ENABLE_SANITIZER: ${SHERPA_ONNX_ENABLE_SANITIZER}")
message(STATUS "SHERPA_ONNX_BUILD_C_API_EXAMPLES: ${SHERPA_ONNX_BUILD_C_API_EXAMPLES}")
message(STATUS "SHERPA_ONNX_ENABLE_RKNN: ${SHERPA_ONNX_ENABLE_RKNN}")
message(STATUS "SHERPA_ONNX_ENABLE_AXERA: ${SHERPA_ONNX_ENABLE_AXERA}")
message(STATUS "SHERPA_ONNX_ENABLE_AXCL: ${SHERPA_ONNX_ENABLE_AXCL}")
message(STATUS "SHERPA_ONNX_ENABLE_ASCEND_NPU: ${SHERPA_ONNX_ENABLE_ASCEND_NPU}")
message(STATUS "SHERPA_ONNX_ENABLE_QNN: ${SHERPA_ONNX_ENABLE_QNN}")
message(STATUS "SHERPA_ONNX_ENABLE_SPACEMIT: ${SHERPA_ONNX_ENABLE_SPACEMIT}")
message(STATUS "SHERPA_ONNX_LINK_D3D: ${SHERPA_ONNX_LINK_D3D}")
message(STATUS "SHERPA_ONNX_USE_STATIC_CRT: ${SHERPA_ONNX_USE_STATIC_CRT}")
if(MSVC)
  message(STATUS "CMAKE_MSVC_RUNTIME_LIBRARY ${CMAKE_MSVC_RUNTIME_LIBRARY}")
endif()

if(BUILD_SHARED_LIBS OR SHERPA_ONNX_ENABLE_JNI)
  set(CMAKE_CXX_VISIBILITY_PRESET hidden)
  set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif()

if(BUILD_SHARED_LIBS AND NOT CMAKE_SYSTEM_NAME STREQUAL iOS AND CMAKE_BUILD_TYPE STREQUAL Release)
  # Don't use LTO for iOS since it causes the following error
  # error: unable to find any architecture information in the binary
  # at '/Users/fangjun/open-source/sherpa-onnx/build-ios/build/os64/sherpa-onnx.a':
  # Unknown header: 0xb17c0de
  # See also https://forums.developer.apple.com/forums/thread/714324

  include(CheckIPOSupported)
  check_ipo_supported(RESULT ipo)
  if(ipo)
    message(STATUS "IPO is enabled")
    set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
  else()
    message(STATUS "IPO is not available")
  endif()
endif()

if(SHERPA_ONNX_ENABLE_TTS)
  message(STATUS "TTS is enabled")
  add_definitions(-DSHERPA_ONNX_ENABLE_TTS=1)
else()
  message(STATUS "TTS is disabled")
  add_definitions(-DSHERPA_ONNX_ENABLE_TTS=0)
endif()

if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
  message(STATUS "speaker diarization is enabled")
  add_definitions(-DSHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=1)
else()
  message(STATUS "speaker diarization is disabled")
  add_definitions(-DSHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=0)
endif()

if(SHERPA_ONNX_ENABLE_DIRECTML)
  message(STATUS "DirectML is enabled")
  add_definitions(-DSHERPA_ONNX_ENABLE_DIRECTML=1)
else()
  message(STATUS "DirectML is disabled")
  add_definitions(-DSHERPA_ONNX_ENABLE_DIRECTML=0)
endif()

if(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION)
  if(NOT SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION to ON if you want to build WASM for speaker diarization")
  endif()

  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for speaker diarization")
  endif()
endif()

if(SHERPA_ONNX_ENABLE_WASM_TTS)
  if(NOT SHERPA_ONNX_ENABLE_TTS)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build WASM for TTS")
  endif()

  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for TTS")
  endif()
endif()

if(SHERPA_ONNX_ENABLE_WASM_ASR)
  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for ASR")
  endif()
endif()

if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for NodeJS")
  endif()
  add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
endif()

if(SHERPA_ONNX_ENABLE_WASM)
  add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1)
endif()

if(SHERPA_ONNX_ENABLE_WASM_KWS)
  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for KWS")
  endif()
  add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
endif()

if(SHERPA_ONNX_ENABLE_WASM_VAD)
  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD")
  endif()
endif()

if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD+ASR")
  endif()
endif()

if(SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT)
  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for speech enhancement")
  endif()
endif()

if(NOT CMAKE_CXX_STANDARD)
  set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.")
endif()
set(CMAKE_CXX_EXTENSIONS OFF)
message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")

include(CheckIncludeFileCXX)

if(SHERPA_ONNX_ENABLE_RKNN)
  add_definitions(-DSHERPA_ONNX_ENABLE_RKNN=1)
endif()

if(SHERPA_ONNX_ENABLE_AXERA)
  add_definitions(-DSHERPA_ONNX_ENABLE_AXERA=1)
endif()

if(SHERPA_ONNX_ENABLE_AXCL)
  add_definitions(-DSHERPA_ONNX_ENABLE_AXCL=1)
endif()

if(SHERPA_ONNX_ENABLE_QNN)
  add_definitions(-DSHERPA_ONNX_ENABLE_QNN=1)
endif()

if(SHERPA_ONNX_ENABLE_SPACEMIT)
  add_definitions(-DSHERPA_ONNX_ENABLE_SPACEMIT=1)
endif()

if(SHERPA_ONNX_ENABLE_ASCEND_NPU)
  set(ASCEND_TOOLKIT_HOME)
  if(NOT DEFINED ENV{ASCEND_TOOLKIT_HOME})
    if(EXISTS /usr/local/Ascend/ascend-toolkit/latest)
      set(ASCEND_TOOLKIT_HOME /usr/local/Ascend/ascend-toolkit/latest)
    else()
      message(FATAL_ERROR "\
      Please specify the installation directory of the ascend toolkit.
      For instance, if it is installed in

        /usr/local/Ascend/ascend-toolkit/latest

      You can run

        export ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
      ")
    endif()
  else()
    set(ASCEND_TOOLKIT_HOME $ENV{ASCEND_TOOLKIT_HOME})
  endif()

  message(STATUS "ASCEND_TOOLKIT_HOME: ${ASCEND_TOOLKIT_HOME}")

  if(NOT EXISTS ${ASCEND_TOOLKIT_HOME}/include/acl/acl.h)
    message(FATAL_ERROR "${ASCEND_TOOLKIT_HOME}/include/acl/acl.h does not exist")
  endif()

  if(NOT EXISTS ${ASCEND_TOOLKIT_HOME}/lib64/libascendcl.so)
    message(FATAL_ERROR "${ASCEND_TOOLKIT_HOME}/lib64/libascendcl.so does not exist")
  endif()

  add_definitions(-DSHERPA_ONNX_ENABLE_ASCEND_NPU=1)
  message(STATUS "Build with Ascend NPU")
endif()

if(SHERPA_ONNX_ENABLE_QNN)
  if(NOT DEFINED ENV{QNN_SDK_ROOT})
      message(FATAL_ERROR "\
      Please specify the installation directory of the QNN SDK toolkit.
      For instance, if it is installed in

        /mnt/sdb/open-source/qairt/2.33.0.250327

      You can run

        source /mnt/sdb/open-source/qairt/2.33.0.250327/bin/envsetup.sh

      which will give you the following output

      [INFO] AISW SDK environment set
      [INFO] QNN_SDK_ROOT: /mnt/sdb/open-source/qairt/2.33.0.250327
      [INFO] SNPE_ROOT: /mnt/sdb/open-source/qairt/2.33.0.250327

      Then run

        echo $QNN_SDK_ROOT

      It should print:

        /mnt/sdb/open-source/qairt/2.33.0.250327

      You can choose a version of QNN SDK by yourself. You don't need
      to use 2.33.0.250327
      ")
  endif()

  set(QNN_SDK_ROOT $ENV{QNN_SDK_ROOT})

  if(NOT EXISTS ${QNN_SDK_ROOT}/include/QNN/QnnInterface.h)
    message(FATAL_ERROR "${QNN_SDK_ROOT}/include/QNN/QnnInterface.h does not exist")
  endif()
endif()

if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME STREQUAL Android AND NOT CMAKE_SYSTEM_NAME STREQUAL OHOS)
  check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
  if(SHERPA_ONNX_HAS_ALSA)
    message(STATUS "With Alsa")
    add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1)
  else()
    message(WARNING "\
Could not find alsa/asoundlib.h !
We won't build sherpa-onnx-alsa
To fix that, please do:
  (1) sudo apt-get install alsa-utils libasound2-dev pkg-config
  (2) rm -rf build
  (3) re-try
  ")
  endif()
endif()

check_include_file_cxx(cxxabi.h SHERPA_ONNX_HAVE_CXXABI_H)
check_include_file_cxx(execinfo.h SHERPA_ONNX_HAVE_EXECINFO_H)

if(WIN32)
  add_definitions(-DNOMINMAX) # Otherwise, std::max() and std::min() won't work
endif()


if(WIN32 AND MSVC)
  # disable various warnings for MSVC
  # 4244: 'return': conversion from 'unsigned __int64' to 'int', possible loss of data
  # 4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data
  # 4305: 'argument': truncation from 'double' to 'const float'
  # 4334: '<<': result of 32-bit shift implicitly converted to 64 bits
  # 4800: 'int': forcing value to bool 'true' or 'false'
  # 4996: 'fopen': This function or variable may be unsafe
  set(disabled_warnings
      /wd4244
      /wd4267
      /wd4305
      /wd4334
      /wd4800
      /wd4996
  )
  message(STATUS "Disabled warnings: ${disabled_warnings}")
  foreach(w IN LISTS disabled_warnings)
    string(APPEND CMAKE_CXX_FLAGS " ${w} ")
  endforeach()

  add_compile_options("$<$<C_COMPILER_ID:MSVC>:/utf-8>")
  add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
endif()

list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

include(show-info)

if(SHERPA_ONNX_ENABLE_WASM)
  # Enable it for debugging in case there is something wrong.
  # string(APPEND CMAKE_CXX_FLAGS " -g4 -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=1 ")
endif()

if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux)
  if(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY)
    message(STATUS "Link libstdc++ statically")
    set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -static-libstdc++ -static-libgcc ")
  else()
    message(STATUS "Link libstdc++ dynamically")
  endif()
endif()

include(kaldi-native-fbank)
include(kaldi-decoder)
include(onnxruntime)
include(simple-sentencepiece)
set(ONNXRUNTIME_DIR ${onnxruntime_SOURCE_DIR})
message(STATUS "ONNXRUNTIME_DIR: ${ONNXRUNTIME_DIR}")

if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
  # portaudio is used only in building demo binaries and the sherpa-onnx-core
  # library does not depend on it.
  include(portaudio)
endif()

if(SHERPA_ONNX_ENABLE_PYTHON)
  include(pybind11)
endif()

if(SHERPA_ONNX_ENABLE_TESTS)
  enable_testing()
  include(googletest)
endif()

if(SHERPA_ONNX_ENABLE_WEBSOCKET)
  include(websocketpp)
  include(asio)
endif()

include(json)

if(SHERPA_ONNX_ENABLE_TTS)
  include(espeak-ng-for-piper)
  set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR})
  message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}")
  include(piper-phonemize)
endif()

if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
  include(hclust-cpp)
endif()

# if(NOT MSVC AND CMAKE_BUILD_TYPE STREQUAL Debug AND (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"))
if(SHERPA_ONNX_ENABLE_SANITIZER)
  message(WARNING "enable ubsan and asan")
  set(CMAKE_REQUIRED_LIBRARIES -lubsan -lasan)
  include(CheckCCompilerFlag)

  set(flags -fsanitize=undefined )
  string(APPEND flags " -fno-sanitize-recover=undefined ")
  string(APPEND flags " -fsanitize=integer ")
  string(APPEND flags " -fsanitize=nullability ")
  string(APPEND flags " -fsanitize=implicit-conversion ")
  string(APPEND flags " -fsanitize=bounds ")
  string(APPEND flags " -fsanitize=address ")

  if(OFF)
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flags} -Wall -Wextra")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flags} -Wall -Wextra")
  else()
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flags}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flags}")
  endif()

  set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${flags}")

  add_compile_options(-fno-omit-frame-pointer)
endif()

add_subdirectory(sherpa-onnx)

if(SHERPA_ONNX_ENABLE_C_API AND SHERPA_ONNX_ENABLE_BINARY AND SHERPA_ONNX_BUILD_C_API_EXAMPLES)
  set(SHERPA_ONNX_PKG_WITH_CARGS "-lcargs")
  add_subdirectory(c-api-examples)
  add_subdirectory(cxx-api-examples)
endif()

if(SHERPA_ONNX_ENABLE_WASM)
  add_subdirectory(wasm)
endif()

message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")

if(NOT BUILD_SHARED_LIBS)
  if(APPLE)
    set(SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS "-lc++ -framework Foundation")
  endif()

  if(UNIX AND NOT APPLE)
    set(SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS "-lstdc++ -lm -pthread -ldl")
  endif()
endif()

if(NOT BUILD_SHARED_LIBS)
# See https://people.freedesktop.org/~dbn/pkg-config-guide.html
  if(SHERPA_ONNX_ENABLE_TTS)
    configure_file(cmake/sherpa-onnx-static.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY)
  else()
    configure_file(cmake/sherpa-onnx-static-no-tts.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY)
  endif()
else()
  configure_file(cmake/sherpa-onnx-shared.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY)
endif()

install(
  FILES
    ${PROJECT_BINARY_DIR}/sherpa-onnx.pc
  DESTINATION
    ./
)
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")


================================================
FILE: CPPLINT.cfg
================================================
filter=-./mfc-examples


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
include LICENSE
include README.md
include CMakeLists.txt
recursive-include c-api-examples *.*
recursive-include sherpa-onnx *.*
recursive-include cmake *.*
prune */__pycache__
prune android
prune sherpa-onnx/java-api
prune ios-swift
prune ios-swiftui


================================================
FILE: README.md
================================================
 ### Supported functions

|Speech recognition| [Speech synthesis][tts-url] | [Source separation][ss-url] |
|------------------|------------------|-------------------|
|   ✔️              |         ✔️        |       ✔️           |

|Speaker identification| [Speaker diarization][sd-url] | Speaker verification |
|----------------------|-------------------- |------------------------|
|   ✔️                  |         ✔️           |            ✔️           |

| [Spoken Language identification][slid-url] | [Audio tagging][at-url] | [Voice activity detection][vad-url] |
|--------------------------------|---------------|--------------------------|
|                 ✔️              |          ✔️    |                ✔️         |

| [Keyword spotting][kws-url] | [Add punctuation][punct-url] | [Speech enhancement][se-url] |
|------------------|-----------------|--------------------|
|     ✔️            |       ✔️         |      ✔️             |


### Supported platforms

|Architecture| Android | iOS     | Windows    | macOS | linux | HarmonyOS |
|------------|---------|---------|------------|-------|-------|-----------|
|   x64      |  ✔️      |         |   ✔️      | ✔️    |  ✔️    |   ✔️   |
|   x86      |  ✔️      |         |   ✔️      |       |        |        |
|   arm64    |  ✔️      | ✔️      |   ✔️      | ✔️    |  ✔️    |   ✔️   |
|   arm32    |  ✔️      |         |           |       |  ✔️    |   ✔️   |
|   riscv64  |          |         |           |       |  ✔️    |        |

### Supported programming languages

| 1. C++ | 2. C  | 3. Python | 4. JavaScript |
|--------|-------|-----------|---------------|
|   ✔️    | ✔️     | ✔️         |    ✔️          |

|5. Java | 6. C# | 7. Kotlin | 8. Swift |
|--------|-------|-----------|----------|
| ✔️      |  ✔️    | ✔️         |  ✔️       |

| 9. Go | 10. Dart | 11. Rust | 12. Pascal |
|-------|----------|----------|------------|
| ✔️     |  ✔️       |   ✔️      |    ✔️       |


It also supports WebAssembly.

### Supported NPUs

| [1. Rockchip NPU (RKNN)][rknpu-doc] | [2. Qualcomm NPU (QNN)][qnn-doc]  | [3. Ascend NPU][ascend-doc] |
|-------------------------------------|-----------------------------------|-----------------------------|
|     ✔️                              |                  ✔️               |     ✔️                      |

| [4. Axera NPU][axera-npu] |
|---------------------------|
|     ✔️                    |

[Join our discord](https://discord.gg/fJdxzg2VbG)


## Introduction

This repository supports running the following functions **locally**

  - Speech-to-text (i.e., ASR); both streaming and non-streaming are supported
  - Text-to-speech (i.e., TTS)
  - Speaker diarization
  - Speaker identification
  - Speaker verification
  - Spoken language identification
  - Audio tagging
  - VAD (e.g., [silero-vad][silero-vad])
  - Speech enhancement (e.g., [gtcrn][gtcrn], [DPDFNet](https://github.com/ceva-ip/DPDFNet))
  - Keyword spotting
  - Source separation (e.g., [spleeter][spleeter], [UVR][UVR])

on the following platforms and operating systems:

  - x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64), **RK NPU**, **Ascend NPU**
  - Linux, macOS, Windows, openKylin
  - Android, WearOS
  - iOS
  - HarmonyOS
  - NodeJS
  - WebAssembly
  - [NVIDIA Jetson Orin NX][NVIDIA Jetson Orin NX] (Support running on both CPU and GPU)
  - [NVIDIA Jetson Nano B01][NVIDIA Jetson Nano B01] (Support running on both CPU and GPU)
  - [Raspberry Pi][Raspberry Pi]
  - [RV1126][RV1126]
  - [LicheePi4A][LicheePi4A]
  - [VisionFive 2][VisionFive 2]
  - [旭日X3派][旭日X3派]
  - [爱芯派][爱芯派]
  - [RK3588][RK3588]
  - etc

with the following APIs

  - C++, C, Python, Go, ``C#``
  - Java, Kotlin, JavaScript
  - Swift, Rust
  - Dart, Object Pascal

### Links for Huggingface Spaces

<details>
<summary>You can visit the following Huggingface spaces to try sherpa-onnx without
installing anything. All you need is a browser.</summary>

| Description                                           | URL                                     | 中国镜像                               |
|-------------------------------------------------------|-----------------------------------------|----------------------------------------|
| Speaker diarization                                   | [Click me][hf-space-speaker-diarization]| [镜像][hf-space-speaker-diarization-cn]|
| Speech recognition                                    | [Click me][hf-space-asr]                | [镜像][hf-space-asr-cn]                |
| Speech recognition with [Whisper][Whisper]            | [Click me][hf-space-asr-whisper]        | [镜像][hf-space-asr-whisper-cn]        |
| Speech synthesis                                      | [Click me][hf-space-tts]                | [镜像][hf-space-tts-cn]                |
| Generate subtitles                                    | [Click me][hf-space-subtitle]           | [镜像][hf-space-subtitle-cn]           |
| Audio tagging                                         | [Click me][hf-space-audio-tagging]      | [镜像][hf-space-audio-tagging-cn]      |
| Source separation                                     | [Click me][hf-space-source-separation]  | [镜像][hf-space-source-separation-cn]  |
| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper]       | [镜像][hf-space-slid-whisper-cn]       |

We also have spaces built using WebAssembly. They are listed below:

| Description                                                                              | Huggingface space| ModelScope space|
|------------------------------------------------------------------------------------------|------------------|-----------------|
|Voice activity detection with [silero-vad][silero-vad]                                    | [Click me][wasm-hf-vad]|[地址][wasm-ms-vad]|
|Real-time speech recognition (Chinese + English) with Zipformer                           | [Click me][wasm-hf-streaming-asr-zh-en-zipformer]|[地址][wasm-hf-streaming-asr-zh-en-zipformer]|
|Real-time speech recognition (Chinese + English) with Paraformer                          |[Click me][wasm-hf-streaming-asr-zh-en-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-paraformer]|
|Real-time speech recognition (Chinese + English + Cantonese) with [Paraformer-large][Paraformer-large]|[Click me][wasm-hf-streaming-asr-zh-en-yue-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-yue-paraformer]|
|Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer]    |[地址][wasm-ms-streaming-asr-en-zipformer]|
|VAD + speech recognition (Chinese) with [Zipformer CTC](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html#sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03-chinese)|[Click me][wasm-hf-vad-asr-zh-zipformer-ctc-07-03]| [地址][wasm-ms-vad-asr-zh-zipformer-ctc-07-03]|
|VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]|
|VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]|
|VAD + speech recognition (English) with [Moonshine tiny][Moonshine tiny]|[Click me][wasm-hf-vad-asr-en-moonshine-tiny-en]| [地址][wasm-ms-vad-asr-en-moonshine-tiny-en]|
|VAD + speech recognition (English) with Zipformer trained with [GigaSpeech][GigaSpeech]    |[Click me][wasm-hf-vad-asr-en-zipformer-gigaspeech]| [地址][wasm-ms-vad-asr-en-zipformer-gigaspeech]|
|VAD + speech recognition (Chinese) with Zipformer trained with [WenetSpeech][WenetSpeech]  |[Click me][wasm-hf-vad-asr-zh-zipformer-wenetspeech]| [地址][wasm-ms-vad-asr-zh-zipformer-wenetspeech]|
|VAD + speech recognition (Japanese) with Zipformer trained with [ReazonSpeech][ReazonSpeech]|[Click me][wasm-hf-vad-asr-ja-zipformer-reazonspeech]| [地址][wasm-ms-vad-asr-ja-zipformer-reazonspeech]|
|VAD + speech recognition (Thai) with Zipformer trained with [GigaSpeech2][GigaSpeech2]      |[Click me][wasm-hf-vad-asr-th-zipformer-gigaspeech2]| [地址][wasm-ms-vad-asr-th-zipformer-gigaspeech2]|
|VAD + speech recognition (Chinese 多种方言) with a [TeleSpeech-ASR][TeleSpeech-ASR] CTC model|[Click me][wasm-hf-vad-asr-zh-telespeech]| [地址][wasm-ms-vad-asr-zh-telespeech]|
|VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-large          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-large]| [地址][wasm-ms-vad-asr-zh-en-paraformer-large]|
|VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-small          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-small]| [地址][wasm-ms-vad-asr-zh-en-paraformer-small]|
|VAD + speech recognition (多语种及多种中文方言) with [Dolphin][Dolphin]-base          |[Click me][wasm-hf-vad-asr-multi-lang-dolphin-base]| [地址][wasm-ms-vad-asr-multi-lang-dolphin-base]|
|Speech synthesis (Piper, English)                                                                  |[Click me][wasm-hf-tts-piper-en]| [地址][wasm-ms-tts-piper-en]|
|Speech synthesis (Piper, German)                                                                   |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]|
|Speech synthesis (Matcha, Chinese)                                                                  |[Click me][wasm-hf-tts-matcha-zh]| [地址][wasm-ms-tts-matcha-zh]|
|Speech synthesis (Matcha, English)                                                                  |[Click me][wasm-hf-tts-matcha-en]| [地址][wasm-ms-tts-matcha-en]|
|Speech synthesis (Matcha, Chinese+English)                                                          |[Click me][wasm-hf-tts-matcha-zh-en]| [地址][wasm-ms-tts-matcha-zh-en]|
|Speaker diarization                                                                         |[Click me][wasm-hf-speaker-diarization]|[地址][wasm-ms-speaker-diarization]|
|Voice cloning with ZipVoice (Chinese+English)                                               |[Click me][wasm-hf-voice-cloning-zipvoice]|[地址][wasm-ms-voice-cloning-zipvoice]|
|Voice cloning with Pocket TTS (English)                                               |[Click me][wasm-hf-voice-cloning-pocket]|[地址][wasm-ms-voice-cloning-pocket]|

</details>

### Links for pre-built Android APKs

<details>

<summary>You can find pre-built Android APKs for this repository in the following table</summary>

| Description                            | URL                                | 中国用户                          |
|----------------------------------------|------------------------------------|-----------------------------------|
| Speaker diarization                    | [Address][apk-speaker-diarization] | [点此][apk-speaker-diarization-cn]|
| Streaming speech recognition           | [Address][apk-streaming-asr]       | [点此][apk-streaming-asr-cn]      |
| Simulated-streaming speech recognition | [Address][apk-simula-streaming-asr]| [点此][apk-simula-streaming-asr-cn]|
| Text-to-speech                         | [Address][apk-tts]                 | [点此][apk-tts-cn]                |
| Voice activity detection (VAD)         | [Address][apk-vad]                 | [点此][apk-vad-cn]                |
| VAD + non-streaming speech recognition | [Address][apk-vad-asr]             | [点此][apk-vad-asr-cn]            |
| Two-pass speech recognition            | [Address][apk-2pass]               | [点此][apk-2pass-cn]              |
| Audio tagging                          | [Address][apk-at]                  | [点此][apk-at-cn]                 |
| Audio tagging (WearOS)                 | [Address][apk-at-wearos]           | [点此][apk-at-wearos-cn]          |
| Speaker identification                 | [Address][apk-sid]                 | [点此][apk-sid-cn]                |
| Spoken language identification         | [Address][apk-slid]                | [点此][apk-slid-cn]               |
| Keyword spotting                       | [Address][apk-kws]                 | [点此][apk-kws-cn]                |

</details>

### Links for pre-built Flutter APPs

<details>

#### Real-time speech recognition

| Description                    | URL                                 | 中国用户                            |
|--------------------------------|-------------------------------------|-------------------------------------|
| Streaming speech recognition   | [Address][apk-flutter-streaming-asr]| [点此][apk-flutter-streaming-asr-cn]|

#### Text-to-speech

| Description                              | URL                                | 中国用户                           |
|------------------------------------------|------------------------------------|------------------------------------|
| Android (arm64-v8a, armeabi-v7a, x86_64) | [Address][flutter-tts-android]     | [点此][flutter-tts-android-cn]     |
| Linux (x64)                              | [Address][flutter-tts-linux]       | [点此][flutter-tts-linux-cn]       |
| macOS (x64)                              | [Address][flutter-tts-macos-x64]   | [点此][flutter-tts-macos-x64-cn] |
| macOS (arm64)                            | [Address][flutter-tts-macos-arm64] | [点此][flutter-tts-macos-arm64-cn]   |
| Windows (x64)                            | [Address][flutter-tts-win-x64]     | [点此][flutter-tts-win-x64-cn]     |

> Note: You need to build from source for iOS.

</details>

### Links for pre-built Lazarus APPs

<details>

#### Generating subtitles

| Description                    | URL                        | 中国用户                   |
|--------------------------------|----------------------------|----------------------------|
| Generate subtitles (生成字幕)  | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]|

</details>

### Links for pre-trained models

<details>

| Description                                 | URL                                                                                   |
|---------------------------------------------|---------------------------------------------------------------------------------------|
| Speech recognition (speech to text, ASR)    | [Address][asr-models]                                                                 |
| Text-to-speech (TTS)                        | [Address][tts-models]                                                                 |
| VAD                                         | [Address][vad-models]                                                                 |
| Keyword spotting                            | [Address][kws-models]                                                                 |
| Audio tagging                               | [Address][at-models]                                                                  |
| Speaker identification (Speaker ID)         | [Address][sid-models]                                                                 |
| Spoken language identification (Language ID)| See multi-lingual [Whisper][Whisper] ASR models from  [Speech recognition][asr-models]|
| Punctuation                                 | [Address][punct-models]                                                               |
| Speaker segmentation                        | [Address][speaker-segmentation-models]                                                |
| Speech enhancement                          | [Address][speech-enhancement-models]                                                  |
| Source separation                           | [Address][source-separation-models]                                                  |

</details>

#### Some pre-trained ASR models (Streaming)

<details>

Please see

  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html>
  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html>
  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/index.html>

for more models. The following table lists only **SOME** of them.


|Name | Supported Languages| Description|
|-----|-----|----|
|[sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20][sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20]| Chinese, English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english)|
|[sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16][sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16]| Chinese, English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16-bilingual-chinese-english)|
|[sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23][sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23]|Chinese| Suitable for Cortex A7 CPU. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23)|
|[sherpa-onnx-streaming-zipformer-en-20M-2023-02-17][sherpa-onnx-streaming-zipformer-en-20M-2023-02-17]|English|Suitable for Cortex A7 CPU. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-en-20m-2023-02-17)|
|[sherpa-onnx-streaming-zipformer-korean-2024-06-16][sherpa-onnx-streaming-zipformer-korean-2024-06-16]|Korean| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-korean-2024-06-16-korean)|
|[sherpa-onnx-streaming-zipformer-fr-2023-04-14][sherpa-onnx-streaming-zipformer-fr-2023-04-14]|French| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#shaojieli-sherpa-onnx-streaming-zipformer-fr-2023-04-14-french)|

</details>


#### Some pre-trained ASR models (Non-Streaming)

<details>

Please see

  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html>
  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html>
  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html>
  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/index.html>
  - <https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html>

for more models. The following table lists only **SOME** of them.

|Name | Supported Languages| Description|
|-----|-----|----|
|[sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english)| English | It is converted from <https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2>|
|[Whisper tiny.en](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2)|English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html)|
|[Moonshine tiny][Moonshine tiny]|English|See [also](https://github.com/usefulsensors/moonshine)|
|[sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html#sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03-chinese)|Chinese| A Zipformer CTC model|
|[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17][sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]|Chinese, Cantonese, English, Korean, Japanese| 支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html)|
|[sherpa-onnx-paraformer-zh-2024-03-09][sherpa-onnx-paraformer-zh-2024-03-09]|Chinese, English| 也支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2024-03-09-chinese-english)|
|[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01][sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]|Japanese|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01-japanese)|
|[sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24][sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24]|Russian|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24-russian)|
|[sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24][sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24]|Russian| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/russian.html#sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24)|
|[sherpa-onnx-zipformer-ru-2024-09-18][sherpa-onnx-zipformer-ru-2024-09-18]|Russian|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ru-2024-09-18-russian)|
|[sherpa-onnx-zipformer-korean-2024-06-24][sherpa-onnx-zipformer-korean-2024-06-24]|Korean|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-korean-2024-06-24-korean)|
|[sherpa-onnx-zipformer-thai-2024-06-20][sherpa-onnx-zipformer-thai-2024-06-20]|Thai| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-thai-2024-06-20-thai)|
|[sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04][sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04]|Chinese| 支持多种方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html#sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04)|

</details>

### Useful links

- Documentation: https://k2-fsa.github.io/sherpa/onnx/
- Bilibili 演示视频: https://search.bilibili.com/all?keyword=%E6%96%B0%E4%B8%80%E4%BB%A3Kaldi

### How to reach us

Please see
https://k2-fsa.github.io/sherpa/social-groups.html
for 新一代 Kaldi **微信交流群** and **QQ 交流群**.

## Projects using sherpa-onnx

### [BreezeApp](https://github.com/mtkresearch/BreezeApp) from [MediaTek Research](https://github.com/mtkresearch)

> BreezeAPP is a mobile AI application developed for both Android and iOS platforms.
> Users can download it directly from the App Store and enjoy a variety of features
> offline, including speech-to-text, text-to-speech, text-based chatbot interactions,
> and image question-answering

  - [Download APK for BreezeAPP](https://huggingface.co/MediaTek-Research/BreezeApp/resolve/main/BreezeApp.apk)
  - [APK 中国镜像](https://hf-mirror.com/MediaTek-Research/BreezeApp/blob/main/BreezeApp.apk)

| 1 | 2 | 3 |
|---|---|---|
|![](https://github.com/user-attachments/assets/1cdbc057-b893-4de6-9e9c-f1d7dfd1d992)|![](https://github.com/user-attachments/assets/d77cd98e-b057-442f-860d-d5befd5c769b)|![](https://github.com/user-attachments/assets/57e546bf-3d39-45b9-b392-b48ca4fb3c58)|

### [Open-LLM-VTuber](https://github.com/t41372/Open-LLM-VTuber)

Talk to any LLM with hands-free voice interaction, voice interruption, and Live2D taking
face running locally across platforms

See also <https://github.com/t41372/Open-LLM-VTuber/pull/50>

### [voiceapi](https://github.com/ruzhila/voiceapi)

<details>
  <summary>Streaming ASR and TTS based on FastAPI</summary>


It shows how to use the ASR and TTS Python APIs with FastAPI.
</details>

### [腾讯会议摸鱼工具 TMSpeech](https://github.com/jxlpzqc/TMSpeech)

Uses streaming ASR in C# with graphical user interface.

Video demo in Chinese: [【开源】Windows实时字幕软件（网课/开会必备）](https://www.bilibili.com/video/BV1rX4y1p7Nx)

### [lol互动助手](https://github.com/l1veIn/lol-wom-electron)

It uses the JavaScript API of sherpa-onnx along with [Electron](https://electronjs.org/)

Video demo in Chinese: [爆了！炫神教你开打字挂！真正影响胜率的英雄联盟工具！英雄联盟的最后一块拼图！和游戏中的每个人无障碍沟通！](https://www.bilibili.com/video/BV142tje9E74)

### [Sherpa-ONNX 语音识别服务器](https://github.com/hfyydd/sherpa-onnx-server)

A server based on nodejs providing Restful API for speech recognition.

### [QSmartAssistant](https://github.com/xinhecuican/QSmartAssistant)

一个模块化，全过程可离线，低占用率的对话机器人/智能音箱

It uses QT. Both [ASR](https://github.com/xinhecuican/QSmartAssistant/blob/master/doc/%E5%AE%89%E8%A3%85.md#asr)
and [TTS](https://github.com/xinhecuican/QSmartAssistant/blob/master/doc/%E5%AE%89%E8%A3%85.md#tts)
are used.

### [Flutter-EasySpeechRecognition](https://github.com/Jason-chen-coder/Flutter-EasySpeechRecognition)

It extends [./flutter-examples/streaming_asr](./flutter-examples/streaming_asr) by
downloading models inside the app to reduce the size of the app.

Note: [[Team B] Sherpa AI backend](https://github.com/umgc/spring2025/pull/82) also uses
sherpa-onnx in a Flutter APP.

### [sherpa-onnx-unity](https://github.com/xue-fei/sherpa-onnx-unity)

sherpa-onnx in Unity. See also [#1695](https://github.com/k2-fsa/sherpa-onnx/issues/1695),
[#1892](https://github.com/k2-fsa/sherpa-onnx/issues/1892), and [#1859](https://github.com/k2-fsa/sherpa-onnx/issues/1859)

### [xiaozhi-esp32-server](https://github.com/xinnan-tech/xiaozhi-esp32-server)

本项目为xiaozhi-esp32提供后端服务，帮助您快速搭建ESP32设备控制服务器
Backend service for xiaozhi-esp32, helps you quickly build an ESP32 device control server.

See also

  - [ASR新增轻量级sherpa-onnx-asr](https://github.com/xinnan-tech/xiaozhi-esp32-server/issues/315)
  - [feat: ASR增加sherpa-onnx模型](https://github.com/xinnan-tech/xiaozhi-esp32-server/pull/379)

### [KaithemAutomation](https://github.com/EternityForest/KaithemAutomation)

Pure Python, GUI-focused home automation/consumer grade SCADA.

It uses TTS from sherpa-onnx. See also [✨ Speak command that uses the new globally configured TTS model.](https://github.com/EternityForest/KaithemAutomation/commit/8e64d2b138725e426532f7d66bb69dd0b4f53693)

### [Open-XiaoAI KWS](https://github.com/idootop/open-xiaoai-kws)

Enable custom wake word for XiaoAi Speakers. 让小爱音箱支持自定义唤醒词。

Video demo in Chinese: [小爱同学启动～˶╹ꇴ╹˶！](https://www.bilibili.com/video/BV1YfVUz5EMj)

### [C++ WebSocket ASR Server](https://github.com/mawwalker/stt-server)

It provides a WebSocket server based on C++ for ASR using sherpa-onnx.

### [Go WebSocket Server](https://github.com/bbeyondllove/asr_server)

It provides a WebSocket server based on the Go programming language for sherpa-onnx.

### [Making robot Paimon, Ep10 "The AI Part 1"](https://www.youtube.com/watch?v=KxPKkwxGWZs)

It is a [YouTube video](https://www.youtube.com/watch?v=KxPKkwxGWZs),
showing how the author tried to use AI so he can have a conversation with Paimon.

It uses sherpa-onnx for speech-to-text and text-to-speech.
|1|
|---|
|![](https://github.com/user-attachments/assets/f6eea2d5-1807-42cb-9160-be8da2971e1f)|

### [TtsReader - Desktop application](https://github.com/ys-pro-duction/TtsReader)

A desktop text-to-speech application built using Kotlin Multiplatform.

### [MentraOS](https://github.com/Mentra-Community/MentraOS)

> Smart glasses OS, with dozens of built-in apps. Users get AI assistant, notifications,
> translation, screen mirror, captions, and more. Devs get to write 1 app that runs on
> any pair of smart glasses.

It uses sherpa-onnx for real-time speech recognition on iOS and Android devices.
See also <https://github.com/Mentra-Community/MentraOS/pull/861>

It uses Swift for iOS and Java for Android.

### [flet_sherpa_onnx](https://github.com/SamYuan1990/flet_sherpa_onnx)

Flet ASR/STT component based on sherpa-onnx.
Example [a chat box agent](https://github.com/SamYuan1990/i18n-agent-action)

### [achatbot-go](https://github.com/ai-bot-pro/achatbot-go)

a multimodal chatbot based on go with sherpa-onnx's speech lib api.

### [fcitx5-vinput](https://github.com/xifan2333/fcitx5-vinput)

Local offline voice input plugin for [Fcitx5](https://github.com/fcitx/fcitx5) (Linux input method framework).
It uses C++ with offline ASR for speech recognition, supporting push-to-talk,
command mode, and optional LLM post-processing.

Video demo in Chinese: [fcitx5-vinput](https://www.bilibili.com/video/BV1a6cUzVE6F)

[silero-vad]: https://github.com/snakers4/silero-vad
[Raspberry Pi]: https://www.raspberrypi.com/
[RV1126]: https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf
[LicheePi4A]: https://sipeed.com/licheepi4a
[VisionFive 2]: https://www.starfivetech.com/en/site/boards
[旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html
[爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html
[hf-space-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/speaker-diarization
[hf-space-speaker-diarization-cn]: https://hf.qhduan.com/spaces/k2-fsa/speaker-diarization
[hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
[hf-space-asr-cn]: https://hf.qhduan.com/spaces/k2-fsa/automatic-speech-recognition
[Whisper]: https://github.com/openai/whisper
[hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper
[hf-space-asr-whisper-cn]: https://hf.qhduan.com/spaces/k2-fsa/automatic-speech-recognition-with-whisper
[hf-space-tts]: https://huggingface.co/spaces/k2-fsa/text-to-speech
[hf-space-tts-cn]: https://hf.qhduan.com/spaces/k2-fsa/text-to-speech
[hf-space-subtitle]: https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos
[hf-space-subtitle-cn]: https://hf.qhduan.com/spaces/k2-fsa/generate-subtitles-for-videos
[hf-space-audio-tagging]: https://huggingface.co/spaces/k2-fsa/audio-tagging
[hf-space-audio-tagging-cn]: https://hf.qhduan.com/spaces/k2-fsa/audio-tagging
[hf-space-source-separation]: https://huggingface.co/spaces/k2-fsa/source-separation
[hf-space-source-separation-cn]: https://hf.qhduan.com/spaces/k2-fsa/source-separation
[hf-space-slid-whisper]: https://huggingface.co/spaces/k2-fsa/spoken-language-identification
[hf-space-slid-whisper-cn]: https://hf.qhduan.com/spaces/k2-fsa/spoken-language-identification
[wasm-hf-vad]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx
[wasm-ms-vad]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx
[wasm-hf-streaming-asr-zh-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en
[wasm-ms-streaming-asr-zh-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en
[wasm-hf-streaming-asr-zh-en-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
[wasm-ms-streaming-asr-zh-en-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
[Paraformer-large]: https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary
[wasm-hf-streaming-asr-zh-en-yue-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer
[wasm-ms-streaming-asr-zh-en-yue-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer
[wasm-hf-streaming-asr-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en
[wasm-ms-streaming-asr-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en
[SenseVoice]: https://github.com/FunAudioLLM/SenseVoice
[wasm-hf-vad-asr-zh-zipformer-ctc-07-03]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc
[wasm-ms-vad-asr-zh-zipformer-ctc-07-03]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc/summary
[wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice
[wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice
[wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
[wasm-ms-vad-asr-en-whisper-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
[wasm-hf-vad-asr-en-moonshine-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny
[wasm-ms-vad-asr-en-moonshine-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny
[wasm-hf-vad-asr-en-zipformer-gigaspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
[wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
[wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
[wasm-ms-vad-asr-zh-zipformer-wenetspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
[reazonspeech]: https://research.reazon.jp/_static/reazonspeech_nlp2023.pdf
[wasm-hf-vad-asr-ja-zipformer-reazonspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
[wasm-ms-vad-asr-ja-zipformer-reazonspeech]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
[gigaspeech2]: https://github.com/speechcolab/gigaspeech2
[wasm-hf-vad-asr-th-zipformer-gigaspeech2]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer
[wasm-ms-vad-asr-th-zipformer-gigaspeech2]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer
[telespeech-asr]: https://github.com/tele-ai/telespeech-asr
[wasm-hf-vad-asr-zh-telespeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
[wasm-ms-vad-asr-zh-telespeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
[wasm-hf-vad-asr-zh-en-paraformer-large]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
[wasm-ms-vad-asr-zh-en-paraformer-large]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
[wasm-hf-vad-asr-zh-en-paraformer-small]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
[wasm-ms-vad-asr-zh-en-paraformer-small]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
[dolphin]: https://github.com/dataoceanai/dolphin
[wasm-ms-vad-asr-multi-lang-dolphin-base]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-multi-lang-dophin-ctc
[wasm-hf-vad-asr-multi-lang-dolphin-base]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-multi-lang-dophin-ctc

[wasm-hf-tts-matcha-zh-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-zh-en-tts-matcha
[wasm-hf-tts-matcha-zh]: https://huggingface.co/spaces/k2-fsa/web-assembly-zh-tts-matcha
[wasm-ms-tts-matcha-zh-en]: https://modelscope.cn/studios/csukuangfj/web-assembly-zh-en-tts-matcha
[wasm-ms-tts-matcha-zh]: https://modelscope.cn/studios/csukuangfj/web-assembly-zh-tts-matcha
[wasm-hf-tts-matcha-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-en-tts-matcha
[wasm-ms-tts-matcha-en]: https://modelscope.cn/studios/csukuangfj/web-assembly-en-tts-matcha
[wasm-hf-tts-piper-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en
[wasm-ms-tts-piper-en]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en
[wasm-hf-tts-piper-de]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de
[wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de
[wasm-hf-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx
[wasm-ms-speaker-diarization]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx
[wasm-hf-voice-cloning-zipvoice]: https://huggingface.co/spaces/k2-fsa/web-assembly-zh-en-tts-zipvoice
[wasm-ms-voice-cloning-zipvoice]: https://modelscope.cn/studios/csukuangfj/web-assembly-zh-en-tts-zipvoice
[wasm-hf-voice-cloning-pocket]: https://huggingface.co/spaces/k2-fsa/web-assembly-en-tts-pocket
[wasm-ms-voice-cloning-pocket]: https://modelscope.cn/studios/csukuangfj/web-assembly-en-tts-pocket
[apk-speaker-diarization]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk.html
[apk-speaker-diarization-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk-cn.html
[apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html
[apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html
[apk-simula-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk-simulate-streaming-asr.html
[apk-simula-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-simulate-streaming-asr-cn.html
[apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
[apk-tts-cn]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html
[apk-vad]: https://k2-fsa.github.io/sherpa/onnx/vad/apk.html
[apk-vad-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html
[apk-vad-asr]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html
[apk-vad-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html
[apk-2pass]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html
[apk-2pass-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html
[apk-at]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html
[apk-at-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html
[apk-at-wearos]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html
[apk-at-wearos-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html
[apk-sid]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
[apk-sid-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html
[apk-slid]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html
[apk-slid-cn]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html
[apk-kws]: https://k2-fsa.github.io/sherpa/onnx/kws/apk.html
[apk-kws-cn]: https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html
[apk-flutter-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/flutter/pre-built-app.html#streaming-speech-recognition-stt-asr
[apk-flutter-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/pre-built-app.html#streaming-speech-recognition-stt-asr
[flutter-tts-android]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html
[flutter-tts-android-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html
[flutter-tts-linux]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html
[flutter-tts-linux-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html
[flutter-tts-macos-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html
[flutter-tts-macos-arm64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html
[flutter-tts-macos-arm64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html
[flutter-tts-macos-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html
[flutter-tts-win-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html
[flutter-tts-win-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html
[lazarus-subtitle]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html
[lazarus-subtitle-cn]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html
[asr-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
[tts-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
[vad-models]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
[kws-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
[at-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
[sid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
[slid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
[punct-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
[speaker-segmentation-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
[GigaSpeech]: https://github.com/SpeechColab/GigaSpeech
[WenetSpeech]: https://github.com/wenet-e2e/WenetSpeech
[sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
[sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16.tar.bz2
[sherpa-onnx-streaming-zipformer-korean-2024-06-16]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-korean-2024-06-16.tar.bz2
[sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23.tar.bz2
[sherpa-onnx-streaming-zipformer-en-20M-2023-02-17]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
[sherpa-onnx-zipformer-ru-2024-09-18]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ru-2024-09-18.tar.bz2
[sherpa-onnx-zipformer-korean-2024-06-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-korean-2024-06-24.tar.bz2
[sherpa-onnx-zipformer-thai-2024-06-20]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-thai-2024-06-20.tar.bz2
[sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24.tar.bz2
[sherpa-onnx-paraformer-zh-2024-03-09]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2024-03-09.tar.bz2
[sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2
[sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
[sherpa-onnx-streaming-zipformer-fr-2023-04-14]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-fr-2023-04-14.tar.bz2
[Moonshine tiny]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
[NVIDIA Jetson Orin NX]: https://developer.download.nvidia.com/assets/embedded/secure/jetson/orin_nx/docs/Jetson_Orin_NX_DS-10712-001_v0.5.pdf?RCPGu9Q6OVAOv7a7vgtwc9-BLScXRIWq6cSLuditMALECJ_dOj27DgnqAPGVnT2VpiNpQan9SyFy-9zRykR58CokzbXwjSA7Gj819e91AXPrWkGZR3oS1VLxiDEpJa_Y0lr7UT-N4GnXtb8NlUkP4GkCkkF_FQivGPrAucCUywL481GH_WpP_p7ziHU1Wg==&t=eyJscyI6ImdzZW8iLCJsc2QiOiJodHRwczovL3d3dy5nb29nbGUuY29tLmhrLyJ9
[NVIDIA Jetson Nano B01]: https://www.seeedstudio.com/blog/2020/01/16/new-revision-of-jetson-nano-dev-kit-now-supports-new-jetson-nano-module/
[speech-enhancement-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
[source-separation-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models
[RK3588]: https://www.rock-chips.com/uploads/pdf/2022.8.26/192/RK3588%20Brief%20Datasheet.pdf
[spleeter]: https://github.com/deezer/spleeter
[UVR]: https://github.com/Anjok07/ultimatevocalremovergui
[gtcrn]: https://github.com/Xiaobin-Rong/gtcrn
[tts-url]: https://k2-fsa.github.io/sherpa/onnx/tts/all-in-one.html
[ss-url]: https://k2-fsa.github.io/sherpa/onnx/source-separation/index.html
[sd-url]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/index.html
[slid-url]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/index.html
[at-url]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/index.html
[vad-url]: https://k2-fsa.github.io/sherpa/onnx/vad/index.html
[kws-url]: https://k2-fsa.github.io/sherpa/onnx/kws/index.html
[punct-url]: https://k2-fsa.github.io/sherpa/onnx/punctuation/index.html
[se-url]: https://k2-fsa.github.io/sherpa/onnx/speech-enhancement/index.html
[rknpu-doc]: https://k2-fsa.github.io/sherpa/onnx/rknn/index.html
[qnn-doc]: https://k2-fsa.github.io/sherpa/onnx/qnn/index.html
[ascend-doc]: https://k2-fsa.github.io/sherpa/onnx/ascend/index.html
[axera-npu]: https://axera-tech.com/Skill/166.html


================================================
FILE: android/.gitignore
================================================
# Gradle files
.gradle/
build/

# Local configuration file (sdk path, etc)
local.properties

# Log/OS Files
*.log

# Android Studio generated files and folders
captures/
.externalNativeBuild/
.cxx/
*.apk
output.json

# IntelliJ
*.iml
.idea/
misc.xml
deploymentTargetDropDown.xml
render.experimental.xml

# Keystore files
*.jks
*.keystore

# Google Services (e.g. APIs or Firebase)
google-services.json

# Android Profiling
*.hprof
*.so


================================================
FILE: android/README.md
================================================
# Introduction

Please refer to
https://k2-fsa.github.io/sherpa/onnx/android/index.html
for usage.

|Folder| Pre-built APK | Description|
|------|---------------|-------------|
|[SherpaOnnxSpeakerDiarization](./SherpaOnnxSpeakerDiarization)| | It is for speaker diarization.|
|[SherpaOnnx](./SherpaOnnx)| [URL](https://k2-fsa.github.io/sherpa/onnx/android/apk.html)| It uses a streaming ASR model.|
|[SherpaOnnx2Pass](./SherpaOnnx2Pass)|[URL](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html)| It uses a streaming ASR model for the first pass and use a non-streaming ASR model for the second pass|
|[SherpaOnnxKws](./SherpaOnnxKws)|[URL](https://k2-fsa.github.io/sherpa/onnx/kws/apk.html)| It demonstrates how to use keyword spotting|
|[SherpaOnnxSpeakerIdentification](./SherpaOnnxSpeakerIdentification)|[URL](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html)| It demonstrates how to use speaker identification|
|[SherpaOnnxTts](./SherpaOnnxTts)|[URL](https://k2-fsa.github.io/sherpa/onnx/tts/apk.html)| It is for standalone text-to-speech.|
|[SherpaOnnxTtsEngine](./SherpaOnnxTtsEngine)|[URL](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html)| It is for text-to-speech engine; you can use it to replace the system TTS engine, e.g., use it in a e-book reader app|
|[SherpaOnnxVad](./SherpaOnnxVad)|[URL](https://k2-fsa.github.io/sherpa/onnx/vad/apk.html)| It demonstrates how to use a VAD|
|[SherpaOnnxVadAsr](./SherpaOnnxVadAsr)|[URL](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html)| It uses a VAD with a non-streaming ASR model.|
|[SherpaOnnxWebSocket](./SherpaOnnxWebSocket)| |It shows how to write a websocket client for the [Python streaming websocket server](https://github.com/k2-fsa/sherpa-onnx/blob/master/python-api-examples/streaming_server.py).|
|[SherpaOnnxAudioTagging](./SherpaOnnxAudioTagging)|[URL](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html)| It shows how to use audio tagging.|
|[SherpaOnnxAudioTaggingWearOS](./SherpaOnnxAudioTagging)|[URL](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html)| It shows how to use audio tagging on WearOS.|
|[SherpaOnnxSimulateStreamingAsr](./SherpaOnnxSimulateStreamingAsr)|| It shows how to use a non-streaming ASR model for streaming speech recognition.|
|[SherpaOnnxSimulateStreamingAsrWearOs](./SherpaOnnxSimulateStreamingAsrWearOs)|| It shows how to use a non-streaming ASR model for streaming speech recognition with WearOS.|


================================================
FILE: android/SherpaOnnx/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnx/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnx/app/build.gradle
================================================
plugins {
    id 'com.android.application'
    id 'org.jetbrains.kotlin.android'
}

android {
    namespace 'com.k2fsa.sherpa.onnx'
    compileSdk 32

    defaultConfig {
        applicationId "com.k2fsa.sherpa.onnx"
        minSdk 21
        targetSdk 32
        versionCode 20260320
        versionName "1.12.31"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
}

dependencies {

    implementation 'androidx.core:core-ktx:1.7.0'
    implementation 'androidx.appcompat:appcompat:1.5.1'
    implementation 'com.google.android.material:material:1.7.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'androidx.test.ext:junit:1.1.4'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.0'
}

================================================
FILE: android/SherpaOnnx/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnx/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnx/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnx"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:label="ASR: Next-gen Kaldi"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>

            <meta-data
                android:name="android.app.lib_name"
                android:value="" />
        </activity>
    </application>

</manifest>


================================================
FILE: android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx

import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.widget.Button
import android.widget.TextView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import java.io.File
import java.io.FileOutputStream
import java.io.IOException
import kotlin.concurrent.thread

private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

// To enable microphone in android emulator, use
//
// adb emu avd hostmicon

class MainActivity : AppCompatActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)

    private lateinit var recognizer: OnlineRecognizer
    private var audioRecord: AudioRecord? = null
    private lateinit var recordButton: Button
    private lateinit var textView: TextView
    private var recordingThread: Thread? = null

    private val audioSource = MediaRecorder.AudioSource.MIC
    private val sampleRateInHz = 16000
    private val channelConfig = AudioFormat.CHANNEL_IN_MONO

    // Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
    // since the AudioRecord.read(float[]) needs API level >= 23
    // but we are targeting API level >= 21
    private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
    private var idx: Int = 0
    private var lastText: String = ""

    @Volatile
    private var isRecording: Boolean = false

    override fun onRequestPermissionsResult(
        requestCode: Int, permissions: Array<String>, grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            finish()
        }

        Log.i(TAG, "Audio record is permitted")
    }

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)

        Log.i(TAG, "Start to initialize model")
        initModel()
        Log.i(TAG, "Finished initializing model")

        recordButton = findViewById(R.id.record_button)
        recordButton.setOnClickListener { onclick() }

        textView = findViewById(R.id.my_text)
        textView.movementMethod = ScrollingMovementMethod()
    }

    private fun onclick() {
        if (!isRecording) {
            val ret = initMicrophone()
            if (!ret) {
                Log.e(TAG, "Failed to initialize microphone")
                return
            }
            Log.i(TAG, "state: ${audioRecord?.state}")
            audioRecord!!.startRecording()
            recordButton.setText(R.string.stop)
            isRecording = true
            textView.text = ""
            lastText = ""
            idx = 0

            recordingThread = thread(true) {
                processSamples()
            }
            Log.i(TAG, "Started recording")
        } else {
            isRecording = false
            audioRecord!!.stop()
            audioRecord!!.release()
            audioRecord = null
            recordButton.setText(R.string.start)
            Log.i(TAG, "Stopped recording")
        }
    }

    private fun processSamples() {
        Log.i(TAG, "processing samples")
        val stream = recognizer.createStream()

        val interval = 0.1 // i.e., 100 ms
        val bufferSize = (interval * sampleRateInHz).toInt() // in samples
        val buffer = ShortArray(bufferSize)

        while (isRecording) {
            val ret = audioRecord?.read(buffer, 0, buffer.size)
            if (ret != null && ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }
                stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
                while (recognizer.isReady(stream)) {
                    recognizer.decode(stream)
                }

                val isEndpoint = recognizer.isEndpoint(stream)
                var text = recognizer.getResult(stream).text

                // For streaming parformer, we need to manually add some
                // paddings so that it has enough right context to
                // recognize the last word of this segment
                if (isEndpoint && recognizer.config.modelConfig.paraformer.encoder.isNotBlank()) {
                    val tailPaddings = FloatArray((0.8 * sampleRateInHz).toInt())
                    stream.acceptWaveform(tailPaddings, sampleRate = sampleRateInHz)
                    while (recognizer.isReady(stream)) {
                        recognizer.decode(stream)
                    }
                    text = recognizer.getResult(stream).text
                }

                var textToDisplay = lastText

                if (text.isNotBlank()) {
                    textToDisplay = if (lastText.isBlank()) {
                        "${idx}: $text"
                    } else {
                        "${lastText}\n${idx}: $text"
                    }
                }

                if (isEndpoint) {
                    recognizer.reset(stream)
                    if (text.isNotBlank()) {
                        lastText = "${lastText}\n${idx}: $text"
                        textToDisplay = lastText
                        idx += 1
                    }
                }

                runOnUiThread {
                    textView.text = textToDisplay
                }
            }
        }
        stream.release()
    }

    private fun initMicrophone(): Boolean {
        if (ActivityCompat.checkSelfPermission(
                this, Manifest.permission.RECORD_AUDIO
            ) != PackageManager.PERMISSION_GRANTED
        ) {
            ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
            return false
        }

        val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
        Log.i(
            TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
        )

        audioRecord = AudioRecord(
            audioSource,
            sampleRateInHz,
            channelConfig,
            audioFormat,
            numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
        )
        return true
    }

    private fun initModel() {
        // Please change getModelConfig() to add new models
        // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
        // for a list of available models
        val type = 0
        var ruleFsts : String?
        ruleFsts = null

        val useHr = false
        val hr =  HomophoneReplacerConfig(
            // Used only when useHr is true
            // Please download the following 3 files from
            // https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
            //
            // dict and lexicon.txt can be shared by different apps
            //
            // replace.fst is specific for an app
            lexicon = "lexicon.txt",
            ruleFsts = "replace.fst",
        )

        Log.i(TAG, "Select model type $type")
        var config = OnlineRecognizerConfig(
            featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
            modelConfig = getModelConfig(type = type)!!,
            // lmConfig = getOnlineLMConfig(type = type),
            endpointConfig = getEndpointConfig(),
            enableEndpoint = true,
        )

        if (ruleFsts != null) {
            config.ruleFsts = ruleFsts
        }

        if (useHr) {
            config.hr = hr
        }

        recognizer = OnlineRecognizer(
            assetManager = application.assets,
            config = config,
        )
    }
}


================================================
FILE: android/SherpaOnnx/app/src/main/jniLibs/.gitignore
================================================
*.so
*.txt
*.onnx
*.wav


================================================
FILE: android/SherpaOnnx/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnx/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnx/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <LinearLayout
        android:layout_width="match_parent"
        android:layout_height="match_parent"
        android:gravity="center"
        android:orientation="vertical">

        <TextView
            android:id="@+id/my_text"
            android:layout_width="match_parent"
            android:layout_height="match_parent"
            android:layout_weight="2.5"
            android:padding="24dp"
            android:scrollbars="vertical"
            android:singleLine="false"
            android:text="@string/hint"
            app:layout_constraintBottom_toBottomOf="parent"
            app:layout_constraintEnd_toEndOf="parent"
            app:layout_constraintStart_toStartOf="parent"
            app:layout_constraintTop_toTopOf="parent" />

        <Button
            android:id="@+id/record_button"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_weight="0.5"
            android:text="@string/start" />
    </LinearLayout>


</androidx.constraintlayout.widget.ConstraintLayout>

================================================
FILE: android/SherpaOnnx/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnx/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnx/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnx/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">ASR</string>
    <string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
        \n
        \n\n\n
        The source code and pre-trained models are publicly available.
        Please see https://github.com/k2-fsa/sherpa-onnx for details.
    </string>
    <string name="start">Start</string>
    <string name="stop">Stop</string>
</resources>

================================================
FILE: android/SherpaOnnx/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnx" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnx/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnx" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnx/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnx/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnx/app/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnx/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.3.1' apply false
    id 'com.android.library' version '7.3.1' apply false
    id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

================================================
FILE: android/SherpaOnnx/gradle/wrapper/gradle-wrapper.properties
================================================
#Thu Feb 23 11:09:06 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: android/SherpaOnnx/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnx/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnx/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnx/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "SherpaOnnx"
include ':app'


================================================
FILE: android/SherpaOnnx2Pass/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnx2Pass/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnx2Pass/app/build.gradle
================================================
plugins {
    id 'com.android.application'
    id 'org.jetbrains.kotlin.android'
}

android {
    namespace 'com.k2fsa.sherpa.onnx'
    compileSdk 32

    defaultConfig {
        applicationId "com.k2fsa.sherpa.onnx"
        minSdk 21
        targetSdk 32
        versionCode 20260320
        versionName "1.12.31"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
}

dependencies {

    implementation 'androidx.core:core-ktx:1.7.0'
    implementation 'androidx.appcompat:appcompat:1.5.1'
    implementation 'com.google.android.material:material:1.7.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'androidx.test.ext:junit:1.1.4'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.0'
}

================================================
FILE: android/SherpaOnnx2Pass/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnx2Pass/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnx2Pass/app/src/main/.gitignore
================================================
*.so


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnx2Pass"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:label="2pass ASR: Next-gen Kaldi"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>

            <meta-data
                android:name="android.app.lib_name"
                android:value="" />
        </activity>
    </application>

</manifest>


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx

import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.widget.Button
import android.widget.TextView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import kotlin.concurrent.thread

private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

// adb emu avd hostmicon
// to enable microphone inside the emulator
class MainActivity : AppCompatActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)

    private lateinit var onlineRecognizer: OnlineRecognizer
    private lateinit var offlineRecognizer: OfflineRecognizer
    private var audioRecord: AudioRecord? = null
    private lateinit var recordButton: Button
    private lateinit var textView: TextView
    private var recordingThread: Thread? = null

    private val audioSource = MediaRecorder.AudioSource.MIC
    private val sampleRateInHz = 16000
    private val channelConfig = AudioFormat.CHANNEL_IN_MONO

    private var samplesBuffer = arrayListOf<FloatArray>()

    // Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
    // since the AudioRecord.read(float[]) needs API level >= 23
    // but we are targeting API level >= 21
    private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
    private var idx: Int = 0
    private var lastText: String = ""

    @Volatile
    private var isRecording: Boolean = false

    override fun onRequestPermissionsResult(
        requestCode: Int, permissions: Array<String>, grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            finish()
        }

        Log.i(TAG, "Audio record is permitted")
    }

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)

        Log.i(TAG, "Start to initialize first-pass recognizer")
        initOnlineRecognizer()
        Log.i(TAG, "Finished initializing first-pass recognizer")

        Log.i(TAG, "Start to initialize second-pass recognizer")
        initOfflineRecognizer()
        Log.i(TAG, "Finished initializing second-pass recognizer")

        recordButton = findViewById(R.id.record_button)
        recordButton.setOnClickListener { onclick() }

        textView = findViewById(R.id.my_text)
        textView.movementMethod = ScrollingMovementMethod()
    }

    private fun onclick() {
        if (!isRecording) {
            val ret = initMicrophone()
            if (!ret) {
                Log.e(TAG, "Failed to initialize microphone")
                return
            }
            Log.i(TAG, "state: ${audioRecord?.state}")
            audioRecord!!.startRecording()
            recordButton.setText(R.string.stop)
            isRecording = true
            samplesBuffer.clear()
            textView.text = ""
            lastText = ""
            idx = 0

            recordingThread = thread(true) {
                processSamples()
            }
            Log.i(TAG, "Started recording")
        } else {
            isRecording = false
            audioRecord!!.stop()
            audioRecord!!.release()
            audioRecord = null
            recordButton.setText(R.string.start)
            Log.i(TAG, "Stopped recording")
        }
    }

    private fun processSamples() {
        Log.i(TAG, "processing samples")
        val stream = onlineRecognizer.createStream()

        val interval = 0.1 // i.e., 100 ms
        val bufferSize = (interval * sampleRateInHz).toInt() // in samples
        val buffer = ShortArray(bufferSize)

        while (isRecording) {
            val ret = audioRecord?.read(buffer, 0, buffer.size)
            if (ret != null && ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }
                samplesBuffer.add(samples)

                stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
                while (onlineRecognizer.isReady(stream)) {
                    onlineRecognizer.decode(stream)
                }
                val isEndpoint = onlineRecognizer.isEndpoint(stream)
                var textToDisplay = lastText

                var text = onlineRecognizer.getResult(stream).text
                if (text.isNotBlank()) {
                    textToDisplay = if (lastText.isBlank()) {
                        // textView.text = "${idx}: ${text}"
                        "${idx}: $text"
                    } else {
                        "${lastText}\n${idx}: $text"
                    }
                }

                if (isEndpoint) {
                    onlineRecognizer.reset(stream)

                    if (text.isNotBlank()) {
                        text = runSecondPass()
                        lastText = "${lastText}\n${idx}: $text"
                        idx += 1
                    } else {
                        samplesBuffer.clear()
                    }
                }

                runOnUiThread {
                    textView.text = textToDisplay.lowercase()
                }
            }
        }
        stream.release()
    }

    private fun initMicrophone(): Boolean {
        if (ActivityCompat.checkSelfPermission(
                this, Manifest.permission.RECORD_AUDIO
            ) != PackageManager.PERMISSION_GRANTED
        ) {
            ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
            return false
        }

        val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
        Log.i(
            TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
        )

        audioRecord = AudioRecord(
            audioSource,
            sampleRateInHz,
            channelConfig,
            audioFormat,
            numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
        )
        return true
    }

    private fun initOnlineRecognizer() {
        // Please change getModelConfig() to add new models
        // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
        // for a list of available models
        val firstType = 9
        val firstRuleFsts: String?
        firstRuleFsts = null
        Log.i(TAG, "Select model type $firstType for the first pass")
        val config = OnlineRecognizerConfig(
            featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
            modelConfig = getModelConfig(type = firstType)!!,
            endpointConfig = getEndpointConfig(),
            enableEndpoint = true,
        )
        if (firstRuleFsts != null) {
            config.ruleFsts = firstRuleFsts;
        }

        onlineRecognizer = OnlineRecognizer(
            assetManager = application.assets,
            config = config,
        )
    }

    private fun initOfflineRecognizer() {
        // Please change getOfflineModelConfig() to add new models
        // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
        // for a list of available models
        val secondType = 0
        var secondRuleFsts: String?
        secondRuleFsts = null
        Log.i(TAG, "Select model type $secondType for the second pass")

        val config = OfflineRecognizerConfig(
            featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
            modelConfig = getOfflineModelConfig(type = secondType)!!,
        )

        if (secondRuleFsts != null) {
            config.ruleFsts = secondRuleFsts
        }

        offlineRecognizer = OfflineRecognizer(
            assetManager = application.assets,
            config = config,
        )
    }

    private fun runSecondPass(): String {
        var totalSamples = 0
        for (a in samplesBuffer) {
            totalSamples += a.size
        }
        var i = 0

        val samples = FloatArray(totalSamples)

        // todo(fangjun): Make it more efficient
        for (a in samplesBuffer) {
            for (s in a) {
                samples[i] = s
                i += 1
            }
        }


        val n = maxOf(0, samples.size - 8000)

        samplesBuffer.clear()
        samplesBuffer.add(samples.sliceArray(n until samples.size))

        val stream = offlineRecognizer.createStream()
        stream.acceptWaveform(samples.sliceArray(0..n), sampleRateInHz)
        offlineRecognizer.decode(stream)
        val result = offlineRecognizer.getResult(stream)

        stream.release()

        return result.text
    }
}


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/jniLibs/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <LinearLayout
        android:layout_width="match_parent"
        android:layout_height="match_parent"
        android:gravity="center"
        android:orientation="vertical">

        <TextView
            android:id="@+id/my_text"
            android:layout_width="match_parent"
            android:layout_height="match_parent"
            android:layout_weight="2.5"
            android:padding="24dp"
            android:scrollbars="vertical"
            android:singleLine="false"
            android:text="@string/hint"
            app:layout_constraintBottom_toBottomOf="parent"
            app:layout_constraintEnd_toEndOf="parent"
            app:layout_constraintStart_toStartOf="parent"
            android:gravity="bottom"
            app:layout_constraintTop_toTopOf="parent" />

        <Button
            android:id="@+id/record_button"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_weight="0.5"
            android:text="@string/start" />
    </LinearLayout>


</androidx.constraintlayout.widget.ConstraintLayout>

================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">ASR2pass </string>
    <string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
        \n
        \n\n\n
        The source code and pre-trained models are publicly available.
        Please see https://github.com/k2-fsa/sherpa-onnx for details.
        \n\n
        Two-pass speech recognition with Next-gen Kaldi.
    </string>
    <string name="start">Start</string>
    <string name="stop">Stop</string>
</resources>

================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnx2Pass" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnx2Pass" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>


================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnx2Pass/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnx2Pass/app/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnx2Pass/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.3.1' apply false
    id 'com.android.library' version '7.3.1' apply false
    id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

================================================
FILE: android/SherpaOnnx2Pass/gradle/wrapper/gradle-wrapper.properties
================================================
#Sun Sep 10 18:03:03 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: android/SherpaOnnx2Pass/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnx2Pass/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnx2Pass/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnx2Pass/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "SherpaOnnx2Pass"
include ':app'


================================================
FILE: android/SherpaOnnxAar/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxAar/README.md
================================================
# Usage of this project

```
git clone https://github.com/k2-fsa/sherpa-onnx
cd sherpa-onnx

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-v1.12.31-android.tar.bz2
tar xvf sherpa-onnx-v1.12.31-android.tar.bz2

cp -v jniLibs/arm64-v8a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/
cp -v jniLibs/armeabi-v7a/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/
cp -v jniLibs/x86/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/
cp -v jniLibs/x86_64/* android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/

cd android/SherpaOnnxAar

./gradlew :sherpa_onnx:assembleRelease
ls -lh ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar
cp ./sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar ../../sherpa-onnx-1.12.31.aar
```


================================================
FILE: android/SherpaOnnxAar/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    alias(libs.plugins.android.application) apply false
    alias(libs.plugins.jetbrains.kotlin.android) apply false
    alias(libs.plugins.android.library) apply false
}

================================================
FILE: android/SherpaOnnxAar/gradle/libs.versions.toml
================================================
[versions]
agp = "8.4.0"
kotlin = "1.7.20"
coreKtx = "1.15.0"
junit = "4.13.2"
junitVersion = "1.2.1"
espressoCore = "3.6.1"
appcompat = "1.7.0"
material = "1.12.0"

[libraries]
androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
junit = { group = "junit", name = "junit", version.ref = "junit" }
androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
androidx-appcompat = { group = "androidx.appcompat", name = "appcompat", version.ref = "appcompat" }
material = { group = "com.google.android.material", name = "material", version.ref = "material" }

[plugins]
android-application = { id = "com.android.application", version.ref = "agp" }
jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
android-library = { id = "com.android.library", version.ref = "agp" }


================================================
FILE: android/SherpaOnnxAar/gradle/wrapper/gradle-wrapper.properties
================================================
#Thu Dec 12 14:02:30 CST 2024
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxAar/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. For more details, visit
# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxAar/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxAar/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxAar/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google {
            content {
                includeGroupByRegex("com\\.android.*")
                includeGroupByRegex("com\\.google.*")
                includeGroupByRegex("androidx.*")
            }
        }
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.name = "SherpaOnnxAar"
include(":sherpa_onnx")


================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/build.gradle.kts
================================================
plugins {
    alias(libs.plugins.android.library)
    alias(libs.plugins.jetbrains.kotlin.android)
}

android {
    namespace = "com.k2fsa.sherpa.onnx"
    compileSdk = 34

    defaultConfig {
        minSdk = 21

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        consumerProguardFiles("consumer-rules.pro")
    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
}

dependencies {

    implementation(libs.androidx.core.ktx)
    implementation(libs.androidx.appcompat)
    implementation(libs.material)
    testImplementation(libs.junit)
    androidTestImplementation(libs.androidx.junit)
    androidTestImplementation(libs.androidx.espresso.core)
}

================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/consumer-rules.pro
================================================


================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx.test", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">

</manifest>

================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxAar/sherpa_onnx/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxAudioTagging/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxAudioTagging/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxAudioTagging/app/build.gradle.kts
================================================
plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
}

android {
    namespace = "com.k2fsa.sherpa.onnx.audio.tagging"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.k2fsa.sherpa.onnx.audio.tagging"
        minSdk = 21
        targetSdk = 34
        versionCode = 20260320
        versionName = "1.12.31"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary = true
        }
    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.1"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

dependencies {

    implementation("androidx.core:core-ktx:1.12.0")
    implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.7.0")
    implementation("androidx.activity:activity-compose:1.8.2")
    implementation(platform("androidx.compose:compose-bom:2023.08.00"))
    implementation("androidx.compose.ui:ui")
    implementation("androidx.compose.ui:ui-graphics")
    implementation("androidx.compose.ui:ui-tooling-preview")
    implementation("androidx.compose.material3:material3")
    testImplementation("junit:junit:4.13.2")
    androidTestImplementation("androidx.test.ext:junit:1.1.5")
    androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
    androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00"))
    androidTestImplementation("androidx.compose.ui:ui-test-junit4")
    debugImplementation("androidx.compose.ui:ui-tooling")
    debugImplementation("androidx.compose.ui:ui-test-manifest")
}

================================================
FILE: android/SherpaOnnxAudioTagging/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/androidTest/java/com/k2fsa/sherpa/onnx/audio/tagging/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx.audio.tagging", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxAudioTagging"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:label="@string/app_name"
            android:theme="@style/Theme.SherpaOnnxAudioTagging">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/assets/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Home.kt
================================================
@file:OptIn(ExperimentalMaterial3Api::class, ExperimentalFoundationApi::class)

package com.k2fsa.sherpa.onnx.audio.tagging

import android.Manifest
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import androidx.compose.foundation.ExperimentalFoundationApi
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.PaddingValues
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.lazy.LazyColumn
import androidx.compose.foundation.lazy.items
import androidx.compose.material3.Button
import androidx.compose.material3.CenterAlignedTopAppBar
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Slider
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.material3.TopAppBarDefaults
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateListOf
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.text.style.TextAlign
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.AudioEvent
import kotlin.concurrent.thread


@Composable
fun Home() {
    Scaffold(
        topBar = {
            CenterAlignedTopAppBar(
                colors = TopAppBarDefaults.topAppBarColors(
                    containerColor = MaterialTheme.colorScheme.primaryContainer,
                    titleContentColor = MaterialTheme.colorScheme.primary,
                ),
                title = {
                    Text(
                        "Next-gen Kaldi: Audio tagging",
                        fontWeight = FontWeight.Bold,
                        fontSize = 15.sp,
                    )
                },
            )
        },
        content = {
            MyApp(it)
        },
    )
}

private var audioRecord: AudioRecord? = null
private val sampleRateInHz = 16000

@Composable
fun MyApp(padding: PaddingValues) {
    val activity = LocalContext.current as Activity
    var threshold by remember { mutableStateOf<Float>(0.6F) }
    var isStarted by remember { mutableStateOf(false) }
    val result = remember { mutableStateListOf<AudioEvent>() }


    val onButtonClick: () -> Unit = {
        isStarted = !isStarted
        if (isStarted) {
            result.clear()
            if (ActivityCompat.checkSelfPermission(
                    activity,
                    Manifest.permission.RECORD_AUDIO
                ) != PackageManager.PERMISSION_GRANTED
            ) {
                Log.i(TAG, "Recording is not allowed")
            } else {
                val audioSource = MediaRecorder.AudioSource.MIC
                val channelConfig = AudioFormat.CHANNEL_IN_MONO
                val audioFormat = AudioFormat.ENCODING_PCM_16BIT
                val numBytes =
                    AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)

                audioRecord = AudioRecord(
                    audioSource,
                    sampleRateInHz,
                    AudioFormat.CHANNEL_IN_MONO,
                    AudioFormat.ENCODING_PCM_16BIT,
                    numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
                )

                thread(true) {
                    Log.i(TAG, "processing samples")
                    val interval = 0.1 // i.e., 100 ms
                    val bufferSize = (interval * sampleRateInHz).toInt() // in samples
                    val buffer = ShortArray(bufferSize)
                    val sampleList = ArrayList<FloatArray>()
                    audioRecord?.let {
                        it.startRecording()
                        while (isStarted) {
                            val ret = it.read(buffer, 0, buffer.size)
                            ret.let { n ->
                                val samples = FloatArray(n) { buffer[it] / 32768.0f }
                                sampleList.add(samples)
                            }
                        }
                    }
                    Log.i(TAG, "Stop recording")
                    Log.i(TAG, "Start recognition")
                    val samples = Flatten(sampleList)
                    val stream = Tagger.tagger.createStream()
                    stream.acceptWaveform(samples, sampleRateInHz)
                    val events = Tagger.tagger.compute(stream)
                    stream.release()
                    for (e in events) {
                        if (e.prob > threshold) {
                            result.add(e)
                        }

                    }

                }
            }
        }
    }

    Box(
        modifier = Modifier.fillMaxSize(),
        contentAlignment = Alignment.TopCenter
    ) {
        Column(
            Modifier.padding(padding),
            horizontalAlignment = Alignment.CenterHorizontally,
        ) {
            Spacer(modifier = Modifier.height(16.dp))
            Text("Threshold " + String.format("%.1f", threshold))
            Slider(
                value = threshold,
                onValueChange = { threshold = it },
                valueRange = 0.1F..1.0F,
                modifier = Modifier.fillMaxWidth()
            )

            Button(onClick = onButtonClick) {
                if (isStarted) {
                    Text("Stop")
                } else {
                    Text("Start")
                }
            }

            Spacer(modifier = Modifier.height(16.dp))
            LazyColumn(modifier = Modifier.fillMaxSize()) {
                if (!result.isEmpty()) {

                    item {
                        Row(
                            modifier = Modifier.fillMaxWidth(),
                            horizontalArrangement = Arrangement.SpaceEvenly
                        ) {
                            Text(
                                text = "Event name",
                            )
                            Text(
                                text = "Probability",
                            )
                        }
                    }
                }

                items(result) { event: AudioEvent ->
                    ViewRow(event = event)
                }
            }
        }
    }
}

@Composable
fun ShowResult(result: String) {
    Text(
        modifier = Modifier.fillMaxWidth(),
        textAlign = TextAlign.Center,
        color = MaterialTheme.colorScheme.primary,
        text = result,
    )
}

@Composable
fun ViewRow(
    modifier: Modifier = Modifier,
    event: AudioEvent
) {
    Surface(
        modifier = modifier
            .fillMaxWidth()
            .padding(8.dp),
        color = MaterialTheme.colorScheme.inversePrimary,
    ) {
        Row(
            modifier = modifier,
            horizontalArrangement = Arrangement.Center,
            verticalAlignment = Alignment.CenterVertically,
        ) {
            Text(
                text = event.name,
                modifier = modifier.weight(1.0F),
            )
            Text(
                text = "%.2f".format(event.prob),
                modifier = modifier.weight(1.0F),
            )
        }
    }
}

fun Flatten(sampleList: ArrayList<FloatArray>): FloatArray {
    var totalSamples = 0
    for (a in sampleList) {
        totalSamples += a.size
    }
    var i = 0
    val samples = FloatArray(totalSamples)
    for (a in sampleList) {
        for (s in a) {
            samples[i] = s
            i += 1
        }
    }
    Log.i(TAG, "$i, $totalSamples")

    return samples
}

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging

import android.Manifest
import android.content.pm.PackageManager
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Surface
import androidx.compose.runtime.Composable
import androidx.compose.ui.Modifier
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme

const val TAG = "sherpa-onnx"

private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

// adb emu avd hostmicon
// to enable mic inside the emulator
class MainActivity : ComponentActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
    override fun onCreate(savedInstanceState: Bundle?) {

        super.onCreate(savedInstanceState)
        setContent {
            AudioTaggingApp()
        }
        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
        Tagger.initTagger(this.assets)
    }

    @Suppress("DEPRECATION")
    @Deprecated("Deprecated in Java")
    override fun onRequestPermissionsResult(
        requestCode: Int,
        permissions: Array<out String>,
        grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            Toast.makeText(
                this,
                "This App needs access to the microphone",
                Toast.LENGTH_SHORT
            )
                .show()
            finish()
        }
        Log.i(TAG, "Audio record is permitted")
    }
}

@Composable
fun AudioTaggingApp() {
    SherpaOnnxAudioTaggingTheme {
        // A surface container using the 'background' color from the theme
        Surface(
            modifier = Modifier.fillMaxSize(),
            color = MaterialTheme.colorScheme.background
        ) {
            Home()
        }
    }
}

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Tagger.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging

import android.content.res.AssetManager
import android.util.Log
import com.k2fsa.sherpa.onnx.AudioTagging
import com.k2fsa.sherpa.onnx.getAudioTaggingConfig


object Tagger {
    private var _tagger: AudioTagging? = null
    val tagger: AudioTagging
        get() {
            return _tagger!!
        }

    fun initTagger(assetManager: AssetManager? = null, numThreads: Int = 1) {
        synchronized(this) {
            if (_tagger != null) {
                return
            }

            Log.i("sherpa-onnx", "Initializing audio tagger")
            val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!!
            _tagger = AudioTagging(assetManager, config)
        }
    }
}

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Color.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging.ui.theme

import androidx.compose.ui.graphics.Color

val Purple80 = Color(0xFFD0BCFF)
val PurpleGrey80 = Color(0xFFCCC2DC)
val Pink80 = Color(0xFFEFB8C8)

val Purple40 = Color(0xFF6650a4)
val PurpleGrey40 = Color(0xFF625b71)
val Pink40 = Color(0xFF7D5260)

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Theme.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging.ui.theme

import android.app.Activity
import android.os.Build
import androidx.compose.foundation.isSystemInDarkTheme
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.darkColorScheme
import androidx.compose.material3.dynamicDarkColorScheme
import androidx.compose.material3.dynamicLightColorScheme
import androidx.compose.material3.lightColorScheme
import androidx.compose.runtime.Composable
import androidx.compose.runtime.SideEffect
import androidx.compose.ui.graphics.toArgb
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.platform.LocalView
import androidx.core.view.WindowCompat

private val DarkColorScheme = darkColorScheme(
    primary = Purple80,
    secondary = PurpleGrey80,
    tertiary = Pink80
)

private val LightColorScheme = lightColorScheme(
    primary = Purple40,
    secondary = PurpleGrey40,
    tertiary = Pink40

    /* Other default colors to override
    background = Color(0xFFFFFBFE),
    surface = Color(0xFFFFFBFE),
    onPrimary = Color.White,
    onSecondary = Color.White,
    onTertiary = Color.White,
    onBackground = Color(0xFF1C1B1F),
    onSurface = Color(0xFF1C1B1F),
    */
)

@Composable
fun SherpaOnnxAudioTaggingTheme(
    darkTheme: Boolean = isSystemInDarkTheme(),
    // Dynamic color is available on Android 12+
    dynamicColor: Boolean = true,
    content: @Composable () -> Unit
) {
    val colorScheme = when {
        dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
            val context = LocalContext.current
            if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
        }

        darkTheme -> DarkColorScheme
        else -> LightColorScheme
    }
    val view = LocalView.current
    if (!view.isInEditMode) {
        SideEffect {
            val window = (view.context as Activity).window
            window.statusBarColor = colorScheme.primary.toArgb()
            WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme
        }
    }

    MaterialTheme(
        colorScheme = colorScheme,
        typography = Typography,
        content = content
    )
}

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/ui/theme/Type.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging.ui.theme

import androidx.compose.material3.Typography
import androidx.compose.ui.text.TextStyle
import androidx.compose.ui.text.font.FontFamily
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.sp

// Set of Material typography styles to start with
val Typography = Typography(
    bodyLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 16.sp,
        lineHeight = 24.sp,
        letterSpacing = 0.5.sp
    )
    /* Other default text styles to override
    titleLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 22.sp,
        lineHeight = 28.sp,
        letterSpacing = 0.sp
    ),
    labelSmall = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Medium,
        fontSize = 11.sp,
        lineHeight = 16.sp,
        letterSpacing = 0.5.sp
    )
    */
)

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/jniLibs/arm64-v8a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/jniLibs/armeabi-v7a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/jniLibs/x86/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/jniLibs/x86_64/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">Audio Tagging</string>
</resources>


================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/values/themes.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>

    <style name="Theme.SherpaOnnxAudioTagging" parent="android:Theme.Material.Light.NoActionBar" />
</resources>

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxAudioTagging/app/src/test/java/com/k2fsa/sherpa/onnx/audio/tagging/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxAudioTagging/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id("com.android.application") version "8.2.0" apply false
    id("org.jetbrains.kotlin.android") version "1.9.0" apply false
}

================================================
FILE: android/SherpaOnnxAudioTagging/gradle/wrapper/gradle-wrapper.properties
================================================
#Tue Apr 16 10:10:01 CST 2024
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxAudioTagging/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxAudioTagging/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxAudioTagging/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxAudioTagging/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google()
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.name = "SherpaOnnxAudioTagging"
include(":app")


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/build.gradle.kts
================================================
plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
}

android {
    namespace = "com.k2fsa.sherpa.onnx.audio.tagging.wear.os"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.k2fsa.sherpa.onnx.audio.tagging.wear.os"
        minSdk = 26
        targetSdk = 34
        versionCode = 20260320
        versionName = "1.12.31"
        vectorDrawables {
            useSupportLibrary = true
        }

    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.1"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

dependencies {

    implementation("com.google.android.gms:play-services-wearable:18.1.0")
    implementation(platform("androidx.compose:compose-bom:2023.08.00"))
    implementation("androidx.compose.ui:ui")
    implementation("androidx.compose.ui:ui-tooling-preview")
    implementation("androidx.wear.compose:compose-material:1.1.2")
    implementation("androidx.wear.compose:compose-foundation:1.1.2")
    implementation("androidx.activity:activity-compose:1.7.2")
    implementation("androidx.core:core-splashscreen:1.0.1")
    implementation("androidx.compose.material3:material3-android:1.2.1")
    androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00"))
    androidTestImplementation("androidx.compose.ui:ui-test-junit4")
    debugImplementation("androidx.compose.ui:ui-tooling")
    debugImplementation("androidx.compose.ui:ui-test-manifest")
}

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/lint.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<lint>
    <!-- Ignore the IconLocation for the Tile preview images -->
    <issue id="IconLocation">
        <ignore path="res/drawable/tile_preview.png" />
        <ignore path="res/drawable-round/tile_preview.png" />
    </issue>
</lint>

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">

    <uses-permission android:name="android.permission.WAKE_LOCK" />
    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <uses-feature android:name="android.hardware.type.watch" />

    <application
        android:allowBackup="true"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:supportsRtl="true"
        android:theme="@android:style/Theme.DeviceDefault">
        <uses-library
            android:name="com.google.android.wearable"
            android:required="true" />

        <!--
               Set to true if your app is Standalone, that is, it does not require the handheld
               app to run.
        -->
        <meta-data
            android:name="com.google.android.wearable.standalone"
            android:value="true" />

        <activity
            android:name=".presentation.MainActivity"
            android:exported="true"
            android:taskAffinity=""
            android:theme="@style/MainActivityTheme.Starting">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/assets/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/HomeScreen.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation

import android.Manifest
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.height
import androidx.compose.material3.Slider
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.text.style.TextAlign
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import androidx.core.app.ActivityCompat
import androidx.wear.compose.material.Button
import androidx.wear.compose.material.MaterialTheme
import androidx.wear.compose.material.Text
import com.k2fsa.sherpa.onnx.AudioEvent
import com.k2fsa.sherpa.onnx.audio.tagging.Tagger
import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme
import kotlin.concurrent.thread

private var audioRecord: AudioRecord? = null
private val sampleRateInHz = 16000

@Composable
fun HomeScreen() {
    val activity = LocalContext.current as Activity
    var threshold by remember { mutableStateOf<Float>(0.6F) }
    var firstTime by remember { mutableStateOf(true) }
    var isStarted by remember { mutableStateOf(false) }
    var result by remember { mutableStateOf("") }
    val onButtonClick: () -> Unit = {
        firstTime = false

        isStarted = !isStarted
        if (isStarted) {
            result = ""
            if (ActivityCompat.checkSelfPermission(
                    activity,
                    Manifest.permission.RECORD_AUDIO
                ) != PackageManager.PERMISSION_GRANTED
            ) {
                Log.i(TAG, "Recording is not allowed")
            } else {
                val audioSource = MediaRecorder.AudioSource.MIC
                val channelConfig = AudioFormat.CHANNEL_IN_MONO
                val audioFormat = AudioFormat.ENCODING_PCM_16BIT
                val numBytes =
                    AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)

                audioRecord = AudioRecord(
                    audioSource,
                    sampleRateInHz,
                    AudioFormat.CHANNEL_IN_MONO,
                    AudioFormat.ENCODING_PCM_16BIT,
                    numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
                )

                thread(true) {
                    Log.i(TAG, "processing samples")
                    val interval = 0.1 // i.e., 100 ms
                    val bufferSize = (interval * sampleRateInHz).toInt() // in samples
                    val buffer = ShortArray(bufferSize)
                    val sampleList = ArrayList<FloatArray>()
                    audioRecord?.let {
                        it.startRecording()
                        while (isStarted) {
                            val ret = it.read(buffer, 0, buffer.size)
                            ret.let { n ->
                                val samples = FloatArray(n) { buffer[it] / 32768.0f }
                                sampleList.add(samples)
                            }
                        }
                    }
                    Log.i(TAG, "Stop recording")
                    Log.i(TAG, "Start recognition")
                    val samples = Flatten(sampleList)
                    val stream = Tagger.tagger.createStream()
                    stream.acceptWaveform(samples, sampleRateInHz)
                    val events = Tagger.tagger.compute(stream)
                    stream.release()

                    var str: String = ""
                    for (e in events) {
                        if (e.prob > threshold) {
                            str += "%s (%.2f)\n".format(e.name, e.prob)
                        }
                    }
                    result = str
                }
            }
        }
    }


    SherpaOnnxAudioTaggingWearOsTheme {
        Box(
            modifier = Modifier
                .fillMaxSize()
                .background(MaterialTheme.colors.background),
            contentAlignment = Alignment.Center
        ) {
            Column(
                horizontalAlignment = Alignment.CenterHorizontally
            ) {
                Spacer(modifier = Modifier.height(16.dp))
                if (firstTime) {
                    ShowMessage()
                }

                Spacer(modifier = Modifier.height(16.dp))
                Text(
                    result,
                    fontSize = 12.sp,
                )

                Text(
                    "Threshold " + String.format("%.1f", threshold),
                    fontSize = 12.sp
                )
                Slider(
                    value = threshold,
                    onValueChange = { threshold = it },
                    valueRange = 0.1F..1.0F,
                    modifier = Modifier.fillMaxWidth()
                )
                Button(
                    onClick = onButtonClick,
                ) {
                    if (isStarted) {
                        Text("Stop")
                    } else {
                        Text("Start")
                    }
                }
            }
        }
    }
}

@Composable
fun ShowMessage() {
    val msg = "Audio tagging\nwith\nNext-gen Kaldi"
    Text(
        modifier = Modifier.fillMaxWidth(),
        textAlign = TextAlign.Center,
        color = MaterialTheme.colors.primary,
        text = msg,
    )
}

@Composable
fun ViewRow(
    modifier: Modifier = Modifier,
    event: AudioEvent
) {
    Row(
        modifier = modifier,
        horizontalArrangement = Arrangement.Center,
        verticalAlignment = Alignment.CenterVertically,
    ) {
        Text(
            text = event.name,
            modifier = modifier.weight(1.0F),
        )
        Text(
            text = "%.2f".format(event.prob),
            modifier = modifier.weight(1.0F),
        )
    }

}


fun Flatten(sampleList: ArrayList<FloatArray>): FloatArray {
    var totalSamples = 0
    for (a in sampleList) {
        totalSamples += a.size
    }
    var i = 0
    val samples = FloatArray(totalSamples)
    for (a in sampleList) {
        for (s in a) {
            samples[i] = s
            i += 1
        }
    }
    Log.i(TAG, "$i, $totalSamples")

    return samples
}

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/MainActivity.kt
================================================
/* While this template provides a good starting point for using Wear Compose, you can always
 * take a look at https://github.com/android/wear-os-samples/tree/main/ComposeStarter and
 * https://github.com/android/wear-os-samples/tree/main/ComposeAdvanced to find the most up to date
 * changes to the libraries and their usages.
 */

package com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation

import android.Manifest
import android.content.pm.PackageManager
import android.os.Bundle
import android.util.Log
import android.view.WindowManager
import android.widget.Toast
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.compose.runtime.Composable
import androidx.core.app.ActivityCompat
import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen
import com.k2fsa.sherpa.onnx.audio.tagging.Tagger

const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

// adb emu avd hostmicon
// to enable mic inside the emulator

class MainActivity : ComponentActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
    override fun onCreate(savedInstanceState: Bundle?) {
        installSplashScreen()

        super.onCreate(savedInstanceState)

        // Keep the screen always on
        // https://developer.android.com/develop/background-work/background-tasks/scheduling/wakelock
        window.addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON)

        setTheme(android.R.style.Theme_DeviceDefault)

        setContent {
            WearApp()
        }

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
        Tagger.initTagger(this.assets, numThreads = 2)
    }

    @Suppress("DEPRECATION")
    override fun onRequestPermissionsResult(
        requestCode: Int,
        permissions: Array<out String>,
        grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            Toast.makeText(
                this,
                "This App needs access to the microphone",
                Toast.LENGTH_SHORT
            )
                .show()
            finish()
        }
        Log.i(TAG, "Audio record is permitted")
    }
}

@Composable
fun WearApp() {
    HomeScreen()
}

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/theme/Theme.kt
================================================
package com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme

import androidx.compose.runtime.Composable
import androidx.wear.compose.material.MaterialTheme

@Composable
fun SherpaOnnxAudioTaggingWearOsTheme(
    content: @Composable () -> Unit
) {
    /**
     * Empty theme to customize for your app.
     * See: https://developer.android.com/jetpack/compose/designsystems/custom
     */
    MaterialTheme(
        content = content
    )
}

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/jniLibs/arm64-v8a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/jniLibs/armeabi-v7a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/jniLibs/x86/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/jniLibs/x86_64/.gitignore
================================================


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/res/drawable/splash_icon.xml
================================================
<?xml version="1.0" encoding="utf-8"?>

<layer-list xmlns:android="http://schemas.android.com/apk/res/android">
    <item
        android:width="48dp"
        android:height="48dp"
        android:gravity="center">
        <shape android:shape="oval">
            <solid android:color="#FFFFFF" />
        </shape>
    </item>
    <item
        android:width="40dp"
        android:height="40dp"
        android:gravity="center">
        <vector
            android:width="24dp"
            android:height="24dp"
            android:tint="#000000"
            android:viewportWidth="24"
            android:viewportHeight="24">
            <path
                android:fillColor="#FF000000"
                android:pathData="M17.6,11.48 L19.44,8.3a0.63,0.63 0,0 0,-1.09 -0.63l-1.88,3.24a11.43,11.43 0,0 0,-8.94 0L5.65,7.67a0.63,0.63 0,0 0,-1.09 0.63L6.4,11.48A10.81,10.81 0,0 0,1 20L23,20A10.81,10.81 0,0 0,17.6 11.48ZM7,17.25A1.25,1.25 0,1 1,8.25 16,1.25 1.25,0 0,1 7,17.25ZM17,17.25A1.25,1.25 0,1 1,18.25 16,1.25 1.25,0 0,1 17,17.25Z" />
        </vector>
    </item>
</layer-list>


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">Audio Tagging</string>
    <!--
    This string is used for square devices and overridden by hello_world in
    values-round/strings.xml for round devices.
    -->
    <string name="hello_world">From the Square world,\nHello, %1$s!</string>
</resources>

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/res/values/styles.xml
================================================
<resources>

    <style name="MainActivityTheme.Starting" parent="Theme.SplashScreen">
        <item name="windowSplashScreenBackground">@android:color/black</item>
        <item name="windowSplashScreenAnimatedIcon">@drawable/splash_icon</item>
        <item name="postSplashScreenTheme">@android:style/Theme.DeviceDefault</item>
    </style>
</resources>

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/app/src/main/res/values-round/strings.xml
================================================
<resources>
    <string name="hello_world">From the Round world,\nHello, %1$s!</string>
</resources>

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id("com.android.application") version "8.2.0" apply false
    id("org.jetbrains.kotlin.android") version "1.9.0" apply false
}

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/gradle/wrapper/gradle-wrapper.properties
================================================
#Tue Apr 16 20:57:10 CST 2024
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxAudioTaggingWearOs/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google()
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.name = "SherpaOnnxAudioTaggingWearOs"
include(":app")
 

================================================
FILE: android/SherpaOnnxJavaDemo/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxJavaDemo/README.md
================================================
# Introduction

Please run the following commands to download model files before you run this Android demo:

```bash
# Assume we are inside
# /Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo

cd app/src/main/assets/
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ./
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ./
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ./
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ./

rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/*

mv encoder-epoch-99-avg-1.int8.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
mv decoder-epoch-99-avg-1.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
mv joiner-epoch-99-avg-1.int8.onnx sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
mv tokens.txt sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
```

You should have the following directory structure:
```
(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd
/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo/app/src/main/assets

(py38) fangjuns-MacBook-Pro:assets fangjun$ tree .
.
└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
    ├── decoder-epoch-99-avg-1.onnx
    ├── encoder-epoch-99-avg-1.int8.onnx
    ├── joiner-epoch-99-avg-1.int8.onnx
    └── tokens.txt

1 directory, 4 files
```

Remember to remove unused files to reduce the file size of the final APK.


================================================
FILE: android/SherpaOnnxJavaDemo/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxJavaDemo/app/build.gradle
================================================
plugins {
    id 'com.android.application'
}

android {
    compileSdk 34

    defaultConfig {
        applicationId "com.k2fsa.sherpa.onnx"
        minSdk 28
        targetSdk 34
        versionCode 20260320
        versionName "1.12.31"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
}

dependencies {
    implementation 'androidx.appcompat:appcompat:1.3.1'
    implementation 'com.google.android.material:material:1.3.0'
    implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
    implementation 'pub.devrel:easypermissions:3.0.0'
    implementation 'androidx.core:core-ktx:1.7.0'
    // implementation files('/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxAar/sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar')
    implementation 'com.github.k2-fsa:sherpa-onnx:v1.12.31'
}


================================================
FILE: android/SherpaOnnxJavaDemo/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    package="com.k2fsa.sherpa.onnx">
    <uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:name=".Application"
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxJavaDemo"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />
                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
        <service
            android:name=".service.SpeechSherpaRecognitionService"
            android:exported="false"/>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/AppViewModel.java
================================================
package com.k2fsa.sherpa.onnx;

import androidx.lifecycle.LiveData;
import androidx.lifecycle.MutableLiveData;
import androidx.lifecycle.ViewModel;

public class AppViewModel extends ViewModel {
    private final MutableLiveData<String> speechRecognitionResult = new MutableLiveData<>();

    public LiveData<String> getSpeechRecognitionResult() {
        return speechRecognitionResult;
    }

    public void setSpeechRecognitionResult(String result) {
        speechRecognitionResult.postValue(result);
    }

}


================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/Application.java
================================================
package com.k2fsa.sherpa.onnx;

import androidx.annotation.NonNull;
import androidx.lifecycle.ViewModelProvider;
import androidx.lifecycle.ViewModelStore;
import androidx.lifecycle.ViewModelStoreOwner;


public class Application extends android.app.Application implements ViewModelStoreOwner {
    public static Application sApplication;


    private AppViewModel viewModel;
    private ViewModelStore viewModelStore;

    public static Application getInstance() {
        return sApplication;
    }

    @Override
    public void onCreate() {
        super.onCreate();
        sApplication = this;
        viewModelStore = new ViewModelStore();
        viewModel = new ViewModelProvider(this).get(AppViewModel.class);
    }

    @NonNull
    @Override
    public ViewModelStore getViewModelStore() {
        return viewModelStore;
    }

    public AppViewModel getViewModel() {
        return viewModel;
    }


}


================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.java
================================================
package com.k2fsa.sherpa.onnx;

import androidx.appcompat.app.AppCompatActivity;
import androidx.core.content.ContextCompat;
import androidx.lifecycle.ViewModelProvider;

import android.Manifest;
import android.content.Intent;
import android.os.Bundle;
import android.util.Log;
import android.widget.TextView;

import com.k2fsa.sherpa.onnx.service.SpeechSherpaRecognitionService;

import pub.devrel.easypermissions.EasyPermissions;

public class MainActivity extends AppCompatActivity {
    private AppViewModel appViewModel;
    private TextView tvText;
    private static final int RC_AUDIO_PERM = 123;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        tvText = findViewById(R.id.text);
        requestMicrophonePermission();
    }


    private void startSpeechService() {
        Intent serviceIntent = new Intent(this, SpeechSherpaRecognitionService.class);
        ContextCompat.startForegroundService(this, serviceIntent);
        appViewModel = new ViewModelProvider(Application.getInstance()).get(AppViewModel.class);
        appViewModel.getSpeechRecognitionResult().observe(this, this::handleSpeechRecognitionResult);
    }

    private void handleSpeechRecognitionResult(String result) {
        tvText.setText(result);
    }

    private void requestMicrophonePermission() {
        String[] perms = {Manifest.permission.RECORD_AUDIO};
        if (EasyPermissions.hasPermissions(this, perms)) {
            startSpeechService();
        } else {
            EasyPermissions.requestPermissions(MainActivity.this,
                    "We need access to your microphone for voice recognition",
                    RC_AUDIO_PERM, perms);
        }
    }
}

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/java/com/k2fsa/sherpa/onnx/service/SpeechSherpaRecognitionService.java
================================================
package com.k2fsa.sherpa.onnx.service;

import android.Manifest;
import android.annotation.SuppressLint;
import android.app.Notification;
import android.app.NotificationChannel;
import android.app.NotificationManager;
import android.app.Service;
import android.content.Intent;
import android.content.pm.PackageManager;
import android.content.res.AssetManager;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.os.Build;
import android.os.IBinder;
import android.text.TextUtils;
import android.util.Log;

import androidx.core.app.ActivityCompat;
import androidx.core.app.NotificationCompat;


import com.k2fsa.sherpa.onnx.AppViewModel;
import com.k2fsa.sherpa.onnx.Application;

import com.k2fsa.sherpa.onnx.OnlineModelConfig;
import com.k2fsa.sherpa.onnx.OnlineRecognizer;

import com.k2fsa.sherpa.onnx.OnlineRecognizerConfig;
import com.k2fsa.sherpa.onnx.OnlineStream;
import com.k2fsa.sherpa.onnx.OnlineTransducerModelConfig;
import com.k2fsa.sherpa.onnx.R;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import java.util.Objects;

import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;


public class SpeechSherpaRecognitionService extends Service {

    private AppViewModel appViewModel;
    private OnlineRecognizer recognizer;
    private final int sampleRateInHz = 16000;

    private Thread recordingThread;
    private boolean isRecording = false;
    private int audioSource = MediaRecorder.AudioSource.MIC;
    private int channelConfig = AudioFormat.CHANNEL_IN_MONO;
    private int audioFormat = AudioFormat.ENCODING_PCM_16BIT;
    private AudioRecord audioRecord;
    private int idx = 0;
    private String lastText = "";
    private ExecutorService executor;

    @Override
    public void onCreate() {
        super.onCreate();
        startForegroundService();
        // 获取 ViewModel
        appViewModel = Application.getInstance().getViewModel();
        int numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);

        if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
            // TODO: Consider calling
            //    ActivityCompat#requestPermissions
            // here to request the missing permissions, and then overriding
            //   public void onRequestPermissionsResult(int requestCode, String[] permissions,
            //                                          int[] grantResults)
            // to handle the case where the user grants the permission. See the documentation
            // for ActivityCompat#requestPermissions for more details.
            return;
        }
        audioRecord = new AudioRecord(
                audioSource,
                sampleRateInHz,
                channelConfig,
                audioFormat,
                numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
        );
        executor = Executors.newSingleThreadExecutor();
        executor.execute(this::initializeSherpa);
    }


    private void initializeSherpa() {
        Log.d("Current Directory", System.getProperty("user.dir"));
        String modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
        initializeSherpaDir(modelDir, modelDir);
        OnlineTransducerModelConfig onlineTransducerModelConfig = new OnlineTransducerModelConfig();
        onlineTransducerModelConfig.setEncoder(modelDir + "/encoder-epoch-99-avg-1.int8.onnx");
        onlineTransducerModelConfig.setDecoder(modelDir + "/decoder-epoch-99-avg-1.onnx");
        onlineTransducerModelConfig.setJoiner(modelDir + "/joiner-epoch-99-avg-1.int8.onnx");

        OnlineModelConfig onlineModelConfig = new OnlineModelConfig();
        onlineModelConfig.setTransducer(onlineTransducerModelConfig);
        onlineModelConfig.setTokens(modelDir + "/tokens.txt");
        onlineModelConfig.setModelType("zipformer");
        onlineModelConfig.setDebug(true);

        OnlineRecognizerConfig config = new OnlineRecognizerConfig();
        config.setModelConfig(onlineModelConfig);
        recognizer = new OnlineRecognizer(getAssets(), config);

        audioRecord.startRecording();
        startRecognition();
    }

    private void startRecognition() {
        isRecording = true;
        recordingThread = new Thread(this::processSamples);
        recordingThread.start();
    }

    private void processSamples() {
        OnlineStream stream = recognizer.createStream("");
        double interval = 0.1;
        int bufferSize = (int) (interval * sampleRateInHz);
        short[] buffer = new short[bufferSize];

        while (isRecording) {
            int ret = audioRecord != null ? audioRecord.read(buffer, 0, buffer.length) : -1;
            if (ret > 0) {
                float[] samples = new float[ret];
                for (int i = 0; i < ret; i++) {
                    samples[i] = buffer[i] / 32768.0f;
                }
                stream.acceptWaveform(samples, sampleRateInHz);
                while (recognizer.isReady(stream)) {
                    recognizer.decode(stream);
                }

                boolean isEndpoint = recognizer.isEndpoint(stream);
                String text = recognizer.getResult(stream).getText();
                if (isEndpoint) {
                    float[] tailPaddings = new float[(int) (0.8 * sampleRateInHz)];
                    stream.acceptWaveform(tailPaddings, sampleRateInHz);
                    while (recognizer.isReady(stream)) {
                        recognizer.decode(stream);
                    }
                    text = recognizer.getResult(stream).getText();
                }

                String textToDisplay = lastText;

                if (!TextUtils.isEmpty(text)) {
                    textToDisplay = TextUtils.isEmpty(text) ? idx + ": " + text : lastText + "\n" + idx + ": " + text;
                }

                if (isEndpoint) {
                    recognizer.reset(stream);
                    if (!TextUtils.isEmpty(text)) {
                        lastText = lastText + "\n" + idx + ": " + text;
                        textToDisplay = lastText;
                        idx += 1;
                    }
                    appViewModel.setSpeechRecognitionResult(textToDisplay);
                }
            }

        }
        stream.release();

    }


    @Override
    public int onStartCommand(Intent intent, int flags, int startId) {

        return START_STICKY;
    }

    @Override
    public void onDestroy() {
        super.onDestroy();
        audioRecord.stop();
        audioRecord.release();
        executor.shutdown();
        stopForeground(true);
    }

    @Override
    public IBinder onBind(Intent intent) {
        return null;
    }


    @SuppressLint("ForegroundServiceType")
    private void startForegroundService() {
        String channelId = createNotificationChannel();

        Notification notification = new NotificationCompat.Builder(this, channelId)
                .setContentTitle("Foreground Service")
                .setContentText("Running in the foreground")
                .setSmallIcon(R.drawable.ic_bg_mic_24)
                .build();

        startForeground(1, notification);
    }

    // 创建通知渠道 (针对 Android 8.0 及以上版本)
    private String createNotificationChannel() {
        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
            String channelId = "speech_channel";
            String channelName = "Speech Channel";
            NotificationChannel channel = new NotificationChannel(channelId, channelName, NotificationManager.IMPORTANCE_LOW);
            NotificationManager manager = getSystemService(NotificationManager.class);
            if (manager != null) {
                manager.createNotificationChannel(channel);
            }
            return channelId;
        } else {
            return "";
        }
    }

    private void initializeSherpaDir(String assetDir, String internalDir) {
        AssetManager assetManager = getAssets();
        File outDir = new File(getFilesDir(), internalDir);

        if (!outDir.exists()) {
            outDir.mkdirs();
        }

        try {
            String[] assets = assetManager.list(assetDir);
            if (assets != null) {
                for (String asset : assets) {
                    String assetPath = assetDir.isEmpty() ? asset : assetDir + "/" + asset;
                    File outFile = new File(outDir, asset);
                    if (Objects.requireNonNull(assetManager.list(assetPath)).length > 0) {
                        outFile.mkdirs();
                        initializeSherpaDir(assetPath, internalDir + "/" + asset); // 递归复制子目录
                    } else {
                        InputStream in = assetManager.open(assetPath);
                        OutputStream out = new FileOutputStream(outFile);

                        byte[] buffer = new byte[1024];
                        int read;
                        while ((read = in.read(buffer)) != -1) {
                            out.write(buffer, 0, read);
                        }

                        in.close();
                        out.flush();
                        out.close();
                    }
                }
            }
        } catch (IOException e) {
            Log.e("ModelCopy", "Failed to copy assets", e);
        }
    }
}


================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_bg_mic_24.xml
================================================
<vector android:height="24dp" android:tint="#000000"
    android:viewportHeight="24" android:viewportWidth="24"
    android:width="24dp" xmlns:android="http://schemas.android.com/apk/res/android">
    <path android:fillColor="@android:color/white" android:pathData="M12,14c1.66,0 2.99,-1.34 2.99,-3L15,5c0,-1.66 -1.34,-3 -3,-3S9,3.34 9,5v6c0,1.66 1.34,3 3,3zM17.3,11c0,3 -2.54,5.1 -5.3,5.1S6.7,14 6.7,11L5,11c0,3.41 2.72,6.23 6,6.72L11,21h2v-3.28c3.28,-0.48 6,-3.3 6,-6.72h-1.7z"/>
</vector>


================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <TextView
        android:id="@+id/text"
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:text="Hello World!"

        app:layout_constraintStart_toStartOf="parent"
        app:layout_constraintTop_toTopOf="parent" />

</androidx.constraintlayout.widget.ConstraintLayout>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">SherpaOnnxJavaDemo</string>
</resources>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnxJavaDemo" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnxJavaDemo" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxJavaDemo/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxJavaDemo/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.2.2' apply false
    id 'com.android.library' version '7.2.2' apply false
}

task clean(type: Delete) {
    delete rootProject.buildDir
}

================================================
FILE: android/SherpaOnnxJavaDemo/gradle/wrapper/gradle-wrapper.properties
================================================
#Tue Oct 22 10:59:18 CST 2024
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: android/SherpaOnnxJavaDemo/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app"s APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxJavaDemo/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxJavaDemo/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxJavaDemo/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
        maven { url 'https://jitpack.io' }
    }
}
rootProject.name = "SherpaOnnxJavaDemo"
include ':app'


================================================
FILE: android/SherpaOnnxKws/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxKws/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxKws/app/build.gradle
================================================
plugins {
    id 'com.android.application'
    id 'org.jetbrains.kotlin.android'
}

android {
    namespace 'com.k2fsa.sherpa.onnx'
    compileSdk 32

    defaultConfig {
        applicationId "com.k2fsa.sherpa.onnx"
        minSdk 21
        targetSdk 32
        versionCode 20260320
        versionName "1.12.31"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
}

dependencies {

    implementation 'androidx.core:core-ktx:1.7.0'
    implementation 'androidx.appcompat:appcompat:1.5.1'
    implementation 'com.google.android.material:material:1.7.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'androidx.test.ext:junit:1.1.4'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.0'
}

================================================
FILE: android/SherpaOnnxKws/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxKws/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxKws/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnx"
        tools:targetApi="31">
        <activity
            android:name=".kws.MainActivity"
            android:label="Keyword-spotter"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>

            <meta-data
                android:name="android.app.lib_name"
                android:value="" />
        </activity>
    </application>

</manifest>


================================================
FILE: android/SherpaOnnxKws/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.kws

import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.widget.Button
import android.widget.EditText
import android.widget.TextView
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.KeywordSpotter
import com.k2fsa.sherpa.onnx.KeywordSpotterConfig
import com.k2fsa.sherpa.onnx.OnlineStream
import com.k2fsa.sherpa.onnx.R
import com.k2fsa.sherpa.onnx.getFeatureConfig
import com.k2fsa.sherpa.onnx.getKeywordsFile
import com.k2fsa.sherpa.onnx.getKwsModelConfig
import kotlin.concurrent.thread

private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

class MainActivity : AppCompatActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)

    private lateinit var kws: KeywordSpotter
    private lateinit var stream: OnlineStream
    private var audioRecord: AudioRecord? = null
    private lateinit var recordButton: Button
    private lateinit var textView: TextView
    private lateinit var inputText: EditText
    private var recordingThread: Thread? = null

    private val audioSource = MediaRecorder.AudioSource.MIC
    private val sampleRateInHz = 16000
    private val channelConfig = AudioFormat.CHANNEL_IN_MONO

    // Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
    // since the AudioRecord.read(float[]) needs API level >= 23
    // but we are targeting API level >= 21
    private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
    private var idx: Int = 0
    private var lastText: String = ""

    @Volatile
    private var isRecording: Boolean = false

    override fun onRequestPermissionsResult(
        requestCode: Int, permissions: Array<String>, grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            finish()
        }

        Log.i(TAG, "Audio record is permitted")
    }

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)

        Log.i(TAG, "Start to initialize model")
        initModel()
        Log.i(TAG, "Finished initializing model")

        recordButton = findViewById(R.id.record_button)
        recordButton.setOnClickListener { onclick() }

        textView = findViewById(R.id.my_text)
        textView.movementMethod = ScrollingMovementMethod()

        inputText = findViewById(R.id.input_text)
    }

    private fun onclick() {
        if (!isRecording) {
            val ret = initMicrophone()
            if (!ret) {
                Log.e(TAG, "Failed to initialize microphone")
                return
            }
            Log.i(TAG, "state: ${audioRecord?.state}")
            audioRecord!!.startRecording()
            recordButton.setText(R.string.stop)
            isRecording = true
            textView.text = ""
            lastText = ""
            idx = 0

            var keywords = inputText.text.toString()
            Log.i(TAG, "Raw keywords: $keywords")

            keywords = keywords.replace("\n", "/")
            keywords = keywords.trim()

            Log.i(TAG, "Normalized keywords: $keywords")

            stream = kws.createStream(keywords)
            if (stream.ptr == 0L) {
                Log.i(TAG, "Failed to create stream with keywords: $keywords")

                Toast.makeText(this, "Failed to set keywords to $keywords.", Toast.LENGTH_LONG)
                    .show()

                audioRecord?.let {
                  it.stop()
                  it.release()
                }
                audioRecord = null

                return
            }

            Log.i(TAG, "Created stream. Running ...")

            recordingThread = thread(true) {
                processSamples()
            }

            Log.i(TAG, "Started recording")
        } else {
            isRecording = false

            recordButton.setText(R.string.start)
            Log.i(TAG, "Stopped recording")
        }
    }

    private fun processSamples() {
        Log.i(TAG, "processing samples")

        val interval = 0.1 // i.e., 100 ms
        val bufferSize = (interval * sampleRateInHz).toInt() // in samples
        val buffer = ShortArray(bufferSize)

        while (isRecording) {
            val ret = audioRecord?.read(buffer, 0, buffer.size)
            if (ret != null && ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }
                stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
                while (kws.isReady(stream)) {
                    kws.decode(stream)

                    val text = kws.getResult(stream).keyword

                    var textToDisplay = lastText

                    if (text.isNotBlank()) {
                        // Remember to reset the stream right after detecting a keyword

                        kws.reset(stream)
                        if (lastText.isBlank()) {
                            textToDisplay = "$idx: $text"
                        } else {
                            textToDisplay = "$idx: $text\n$lastText"
                        }
                        lastText = "$idx: $text\n$lastText"
                        idx += 1
                    }

                    runOnUiThread {
                        textView.text = textToDisplay
                    }
                }
            }
        }

        stream.release()
        Log.i(TAG, "Released stream. Stopped")

        audioRecord?.let {
          it.stop()
          it.release()
        }

        audioRecord = null
    }

    private fun initMicrophone(): Boolean {
        if (ActivityCompat.checkSelfPermission(
                this, Manifest.permission.RECORD_AUDIO
            ) != PackageManager.PERMISSION_GRANTED
        ) {
            ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
            return false
        }

        val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
        Log.i(
            TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
        )

        audioRecord = AudioRecord(
            audioSource,
            sampleRateInHz,
            channelConfig,
            audioFormat,
            numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
        )
        return true
    }

    private fun initModel() {
        // Please change getKwsModelConfig() to add new models
        // See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
        // for a list of available models
        val type = 0
        Log.i(TAG, "Select model type $type")
        val config = KeywordSpotterConfig(
            featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
            modelConfig = getKwsModelConfig(type = type)!!,
            keywordsFile = getKeywordsFile(type = type),
        )

        kws = KeywordSpotter(
            assetManager = application.assets,
            config = config,
        )
    }
}


================================================
FILE: android/SherpaOnnxKws/app/src/main/jniLibs/.gitignore
================================================
*.so
*.txt
*.onnx
*.wav


================================================
FILE: android/SherpaOnnxKws/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxKws/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxKws/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxKws/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxKws/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxKws/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxKws/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <LinearLayout
        android:layout_width="match_parent"
        android:layout_height="match_parent"
        android:gravity="center"
        android:orientation="vertical">

        <EditText
            android:id="@+id/input_text"
            android:layout_width="match_parent"
            android:layout_height="320dp"
            android:layout_weight="2.5"
            android:hint="@string/keyword_hint"
            android:scrollbars="vertical"
            android:text=""
            android:textSize="15dp" />

        <TextView
            android:id="@+id/my_text"
            android:layout_width="match_parent"
            android:layout_height="443dp"
            android:layout_weight="2.5"
            android:padding="24dp"
            android:scrollbars="vertical"
            android:singleLine="false"
            android:text="@string/hint"
            android:textSize="15dp" />

        <Button
            android:id="@+id/record_button"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_weight="0.5"
            android:text="@string/start" />

    </LinearLayout>


</androidx.constraintlayout.widget.ConstraintLayout>

================================================
FILE: android/SherpaOnnxKws/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxKws/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxKws/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxKws/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">Keyword spotting</string>
    <string name="hint">Click the Start button to play keyword spotting with Next-gen Kaldi.
        \n
        \n\n\n
        The source code and pre-trained models are publicly available.
        Please see https://github.com/k2-fsa/sherpa-onnx for details.
    </string>
    <string name="keyword_hint">Input your keywords here, one keyword per line.\nTwo example keywords are given below:\n\nn ǐ h ǎo @你好\nd àn g ē d àn g ē @蛋哥蛋哥</string>
    <string name="start">Start</string>
    <string name="stop">Stop</string>
</resources>


================================================
FILE: android/SherpaOnnxKws/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnx" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxKws/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnx" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxKws/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxKws/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxKws/app/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxKws/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.3.1' apply false
    id 'com.android.library' version '7.3.1' apply false
    id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

================================================
FILE: android/SherpaOnnxKws/gradle/wrapper/gradle-wrapper.properties
================================================
#Thu Feb 23 11:09:06 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: android/SherpaOnnxKws/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxKws/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxKws/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxKws/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "SherpaOnnxKws"
include ':app'


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/build.gradle.kts
================================================
plugins {
    alias(libs.plugins.android.application)
    alias(libs.plugins.jetbrains.kotlin.android)
}

android {
    namespace = "com.k2fsa.sherpa.onnx.simulate.streaming.asr"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.k2fsa.sherpa.onnx.simulate.streaming.asr"
        minSdk = 21
        targetSdk = 34
        versionCode = 20260320
        versionName = "1.12.31"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary = true
        }
    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.1"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

dependencies {
    implementation(libs.androidx.core.ktx)
    implementation(libs.androidx.lifecycle.runtime.ktx)
    implementation(libs.androidx.activity.compose)
    implementation(platform(libs.androidx.compose.bom))
    implementation(libs.androidx.ui)
    implementation(libs.androidx.ui.graphics)
    implementation(libs.androidx.ui.tooling.preview)
    implementation(libs.androidx.material3)
    implementation(libs.androidx.navigation.compose)
    testImplementation(libs.junit)
    androidTestImplementation(libs.androidx.junit)
    androidTestImplementation(libs.androidx.espresso.core)
    androidTestImplementation(platform(libs.androidx.compose.bom))
    androidTestImplementation(libs.androidx.ui.test.junit4)
    debugImplementation(libs.androidx.ui.tooling)
    debugImplementation(libs.androidx.ui.test.manifest)
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/androidTest/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx.simulate.streaming.asr", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SimulateStreamingAsr"
        tools:targetApi="31">

        <!--
        required by qnn

        If you don't add it, you would get an error from the deviceCreate() API
        and the error code is 14001

        It is located at /vendor/lib64/libcdsprpc.so on your Phone
        -->
        <uses-native-library
            android:name="libcdsprpc.so"
            android:required="false"/>

        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:label="@string/app_name"
            android:theme="@style/Theme.SimulateStreamingAsr">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/BarItem.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr

import androidx.compose.ui.graphics.vector.ImageVector

data class BarItem(
    val title: String,

    // see https://www.composables.com/icons
    // and
    // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary
    val image: ImageVector,
    val route: String,
)


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr

import android.Manifest
import android.content.pm.PackageManager
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.activity.enableEdgeToEdge
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.CenterAlignedTopAppBar
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.Icon
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.NavigationBar
import androidx.compose.material3.NavigationBarItem
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.material3.TopAppBarDefaults
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.ui.Modifier
import androidx.compose.ui.text.font.FontWeight
import androidx.core.app.ActivityCompat
import androidx.navigation.NavGraph.Companion.findStartDestination
import androidx.navigation.NavHostController
import androidx.navigation.compose.NavHost
import androidx.navigation.compose.composable
import androidx.navigation.compose.currentBackStackEntryAsState
import androidx.navigation.compose.rememberNavController
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.screens.HelpScreen
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.screens.HomeScreen
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.ui.theme.SimulateStreamingAsrTheme

const val TAG = "sherpa-onnx-sim-asr"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

@Suppress("DEPRECATION")
class MainActivity : ComponentActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        enableEdgeToEdge()
        setContent {
            SimulateStreamingAsrTheme {
                Surface(
                    modifier = Modifier.fillMaxSize(),
                    color = MaterialTheme.colorScheme.background
                ) {
                    MainScreen()
                }
            }
        }
        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
    }

    @Deprecated("Deprecated in Java")
    override fun onRequestPermissionsResult(
        requestCode: Int,
        permissions: Array<out String>,
        grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            Toast.makeText(
                this,
                "This App needs to access the microphone",
                Toast.LENGTH_SHORT
            )
                .show()
            finish()
        }

        Log.i(TAG, "Audio record is permitted")
    }
}

@OptIn(ExperimentalMaterial3Api::class)
@Composable
fun MainScreen(modifier: Modifier = Modifier) {
    val navController = rememberNavController()

    Scaffold(
        topBar = {
            CenterAlignedTopAppBar(
                colors = TopAppBarDefaults.topAppBarColors(
                    containerColor = MaterialTheme.colorScheme.primaryContainer,
                    titleContentColor = MaterialTheme.colorScheme.primary,
                ),
                title = {
                    Text(
                        "Next-gen Kaldi: Simulate real-time speech recognition",
                        fontWeight = FontWeight.Bold,
                    )
                },
            )
        },
        content = { padding ->
            Column(Modifier.padding(padding)) {
                NavigationHost(navController = navController)

            }
        },
        bottomBar = {
            BottomNavigationBar(navController = navController)
        }
    )
}

@Composable
fun NavigationHost(navController: NavHostController) {
    NavHost(navController = navController, startDestination = NavRoutes.Home.route) {
        composable(NavRoutes.Home.route) {
            HomeScreen()
        }

        composable(NavRoutes.Help.route) {
            HelpScreen()
        }
    }
}

@Composable
fun BottomNavigationBar(navController: NavHostController) {
    NavigationBar {
        val backStackEntry by navController.currentBackStackEntryAsState()
        val currentRoute = backStackEntry?.destination?.route

        NavBarItems.BarItems.forEach { navItem ->
            NavigationBarItem(selected = currentRoute == navItem.route,
                onClick = {
                    navController.navigate(navItem.route) {
                        popUpTo(navController.graph.findStartDestination().id) {
                            saveState = true
                        }
                        launchSingleTop = true
                        restoreState = true
                    }
                },
                icon = {
                    Icon(imageVector = navItem.image, contentDescription = navItem.title)
                }, label = {
                    Text(text = navItem.title)
                })
        }
    }
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/NavBarItems.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr

import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.Home
import androidx.compose.material.icons.filled.Info

object NavBarItems {
    val BarItems = listOf(
        BarItem(
            title = "Home",
            image = Icons.Filled.Home,
            route = "home",
        ),
        BarItem(
            title = "Help",
            image = Icons.Filled.Info,
            route = "help",
        ),
    )
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/NavRoutes.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr

sealed class NavRoutes(val route: String) {
    object Home : NavRoutes("home")
    object Help : NavRoutes("help")
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/SimulateStreamingAsr.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr

import android.content.Context
import android.content.res.AssetManager
import android.util.Log
import com.k2fsa.sherpa.onnx.HomophoneReplacerConfig
import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.Vad
import com.k2fsa.sherpa.onnx.getOfflineModelConfig
import com.k2fsa.sherpa.onnx.getVadModelConfig
import java.io.File
import java.io.FileOutputStream
import java.io.InputStream
import java.io.OutputStream


fun assetExists(assetManager: AssetManager, path: String): Boolean {
    val dir = path.substringBeforeLast('/', "")
    val fileName = path.substringAfterLast('/')

    val files = assetManager.list(dir) ?: return false
    return files.contains(fileName)
}

fun assetListExists(
    assetManager: AssetManager,
    paths: String
): Boolean {
    if (paths.isBlank()) return false

    val pathList = paths.split(",")
        .map { it.trim() }
        .filter { it.isNotEmpty() }

    if (pathList.isEmpty()) return false

    return pathList.all { path ->
        assetExists(assetManager, path)
    }
}

fun copyAssetToInternalStorage(path: String, context: Context): String {
    val targetRoot = context.filesDir
    val outFile = File(targetRoot, path)

    if (!assetExists(context.assets, path = path)) {
        // for context binary, if it is does not exist, we return a path
        // that can be written to
        outFile.parentFile?.mkdirs()
        Log.i(TAG, "$path does not exist, return ${outFile.absolutePath}")
        return outFile.absolutePath
    }

    if (outFile.exists()) {
        val assetSize = context.assets.open(path).use { it.available() }
        if (outFile.length() == assetSize.toLong()) {
            Log.i(TAG, "$targetRoot/$path already exists, skip copying, return $targetRoot/$path")

            return "$targetRoot/$path"
        }
    }

    outFile.parentFile?.mkdirs()

    context.assets.open(path).use { input: InputStream ->
        FileOutputStream(outFile).use { output: OutputStream ->
            input.copyTo(output)
        }
    }
    Log.i(TAG, "Copied $path to $targetRoot/$path")

    return outFile.absolutePath
}

fun copyAssetListToInternalStorage(
    paths: String,
    context: Context
): String {
    if (paths.isBlank()) return paths

    val pathList = paths.split(",")
        .map { it.trim() }
        .filter { it.isNotEmpty() }

    val copiedPaths = pathList.map { path ->
        copyAssetToInternalStorage(path, context)
    }

    return copiedPaths.joinToString(",")
}


object SimulateStreamingAsr {
    private var _recognizer: OfflineRecognizer? = null
    val recognizer: OfflineRecognizer
        get() {
            return _recognizer!!
        }

    private var _vad: Vad? = null
    val vad: Vad
        get() {
            return _vad!!
        }

    fun initOfflineRecognizer(context: Context, asrModelType: Int) {
        synchronized(this) {
            if (_recognizer != null) {
                return
            }
            Log.i(TAG, "Initializing sherpa-onnx offline recognizer")
            // Please change getOfflineModelConfig() to add new models
            // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
            // for a list of available models
            val asrRuleFsts: String?
            asrRuleFsts = null
            Log.i(TAG, "Select model type $asrModelType for ASR")

            val useHr = false
            val hr = HomophoneReplacerConfig(
                // Used only when useHr is true
                // Please download the following 2 files from
                // https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
                //
                // lexicon.txt can be shared by different apps
                //
                // replace.fst is specific for an app
                lexicon = "lexicon.txt",
                ruleFsts = "replace.fst",
            )

            val config = OfflineRecognizerConfig(
                modelConfig = getOfflineModelConfig(type = asrModelType)!!,
            )

            if (config.modelConfig.numThreads == 1) {
                config.modelConfig.numThreads = 2
            }

            if (asrRuleFsts != null) {
                config.ruleFsts = asrRuleFsts
            }

            if (useHr) {
                config.hr = hr
            }

            var assetManager: AssetManager? = context.assets

            if (config.modelConfig.provider == "qnn") {
                // We assume you have copied files like libQnnHtpV81Skel.so to jniLibs/arm64-v8a
                Log.i(TAG, "nativelibdir: ${context.applicationInfo.nativeLibraryDir}")

                // If we don't set the environment variable for ADSP_LIBRARY_PATH, we will see
                // the error code 1008 from qnn_interface.deviceCreate()
                // See also
                // https://workbench.aihub.qualcomm.com/docs/hub/faq.html#why-am-i-seeing-error-1008-when-trying-to-use-htp
                OfflineRecognizer.prependAdspLibraryPath(context.applicationInfo.nativeLibraryDir)

                // for qnn, we need to copy *.so files from assets folder to sd card
                if (config.modelConfig.senseVoice.qnnConfig.backendLib.isEmpty()
                    && config.modelConfig.zipformerCtc.qnnConfig.backendLib.isEmpty()
                    && config.modelConfig.paraformer.qnnConfig.backendLib.isEmpty()
                ) {
                    Log.e(TAG, "You should provide libQnnHtp.so for qnn")
                    throw IllegalArgumentException("You should provide libQnnHtp.so for qnn")
                }
                config.modelConfig.tokens =
                    copyAssetToInternalStorage(config.modelConfig.tokens, context)

                if (config.modelConfig.senseVoice.model.isNotEmpty() || assetExists(
                        context.assets,
                        path = config.modelConfig.senseVoice.qnnConfig.contextBinary
                    )
                ) {
                    if (config.modelConfig.senseVoice.model.isNotEmpty()) {
                        config.modelConfig.senseVoice.model =
                            copyAssetToInternalStorage(config.modelConfig.senseVoice.model, context)
                    }

                    config.modelConfig.senseVoice.qnnConfig.contextBinary =
                        copyAssetToInternalStorage(
                            config.modelConfig.senseVoice.qnnConfig.contextBinary,
                            context
                        )
                } else if (config.modelConfig.zipformerCtc.model.isNotEmpty() ||
                    assetExists(
                        context.assets,
                        path = config.modelConfig.zipformerCtc.qnnConfig.contextBinary
                    )
                ) {
                    if (config.modelConfig.zipformerCtc.model.isNotEmpty()) {
                        config.modelConfig.zipformerCtc.model =
                            copyAssetToInternalStorage(
                                config.modelConfig.zipformerCtc.model,
                                context
                            )
                    }

                    config.modelConfig.zipformerCtc.qnnConfig.contextBinary =
                        copyAssetToInternalStorage(
                            config.modelConfig.zipformerCtc.qnnConfig.contextBinary,
                            context
                        )
                } else if (config.modelConfig.paraformer.model.isNotEmpty()
                    || assetListExists(
                        context.assets,
                        config.modelConfig.paraformer.qnnConfig.contextBinary
                    )
                ) {
                    if (config.modelConfig.paraformer.model.isNotEmpty()) {
                        config.modelConfig.paraformer.model =
                            copyAssetListToInternalStorage(
                                config.modelConfig.paraformer.model,
                                context
                            )
                    }

                    config.modelConfig.paraformer.qnnConfig.contextBinary =
                        copyAssetListToInternalStorage(
                            config.modelConfig.paraformer.qnnConfig.contextBinary,
                            context
                        )
                }

                if (config.hr.lexicon.isNotEmpty()) {
                    config.hr.lexicon = copyAssetToInternalStorage(config.hr.lexicon, context)
                }

                if (config.hr.ruleFsts.isNotEmpty()) {
                    // it assumes there is only one fst. otherwise, you need to copy each fst separately
                    config.hr.ruleFsts = copyAssetToInternalStorage(config.hr.ruleFsts, context)
                }

                assetManager = null
            }

            _recognizer = OfflineRecognizer(
                assetManager = assetManager,
                config = config,
            )

            Log.i(TAG, "sherpa-onnx offline recognizer initialized")
        }
    }

    fun initVad(assetManager: AssetManager? = null) {
        if (_vad != null) {
            return
        }
        val type = 0
        Log.i(TAG, "Select VAD model type $type")
        val config = getVadModelConfig(type)

        _vad = Vad(
            assetManager = assetManager,
            config = config!!,
        )
        Log.i(TAG, "sherpa-onnx vad initialized")
    }
}


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/screens/Help.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr.screens

import androidx.compose.runtime.Composable
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.Text
import androidx.compose.ui.Modifier
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp

@Composable
fun HelpScreen() {
    Box(modifier = Modifier.fillMaxSize()) {
        Column(
            modifier = Modifier.padding(8.dp)
        ) {
            Text(
                "This app uses a non-streaming ASR model together with silero-vad " +
                        "for streaming/real-time speech recognition. ",
                fontSize=10.sp
            )
            Spacer(modifier = Modifier.height(10.dp))
            Text("Please see http://github.com/k2-fsa/sherpa-onnx ")

            Spacer(modifier = Modifier.height(10.dp))
            Text("Everything is open-sourced!", fontSize = 20.sp)
        }
    }
}


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/screens/Home.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr.screens

import android.Manifest
import android.annotation.SuppressLint
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import android.widget.Toast
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.PaddingValues
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxHeight
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.width
import androidx.compose.foundation.lazy.LazyColumn
import androidx.compose.foundation.lazy.itemsIndexed
import androidx.compose.foundation.lazy.rememberLazyListState
import androidx.compose.material3.Button
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.LaunchedEffect
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateListOf
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.rememberCoroutineScope
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalClipboardManager
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.AnnotatedString
import androidx.compose.ui.unit.dp
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.R
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.SimulateStreamingAsr
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.TAG
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext

private var audioRecord: AudioRecord? = null

private const val sampleRateInHz = 16000
private var samplesChannel = Channel<FloatArray>(capacity = Channel.UNLIMITED)

@Composable
fun HomeScreen() {
    val context = LocalContext.current
    val clipboardManager = LocalClipboardManager.current

    val activity = LocalContext.current as Activity
    var isStarted by remember { mutableStateOf(false) }
    val resultList: MutableList<String> = remember { mutableStateListOf() }
    val lazyColumnListState = rememberLazyListState()
    val coroutineScope = rememberCoroutineScope()

    var isInitialized by remember { mutableStateOf(false) }

    // we change asrModelType in github actions
    val asrModelType = 15

    LaunchedEffect(Unit) {
        if (asrModelType >= 9000) {
            resultList.add("Using QNN for Qualcomm NPU (HTP backend)")
            resultList.add("It takes about 10s for the first run to start")
            resultList.add("Later runs require less than 1 second")
        }

        withContext(Dispatchers.Default) {
            // Call your heavy initialization off the main thread
            SimulateStreamingAsr.initOfflineRecognizer(activity, asrModelType)
            SimulateStreamingAsr.initVad(activity.assets)
        }

        // Back on the Main thread: update UI state
        isInitialized = true
        resultList.clear()
    }

    val onRecordingButtonClick: () -> Unit = {
        isStarted = !isStarted
        if (isStarted) {
            if (ActivityCompat.checkSelfPermission(
                    activity,
                    Manifest.permission.RECORD_AUDIO
                ) != PackageManager.PERMISSION_GRANTED
            ) {
                Log.i(TAG, "Recording is not allowed")
            } else {
                // recording is allowed
                val audioSource = MediaRecorder.AudioSource.MIC
                val channelConfig = AudioFormat.CHANNEL_IN_MONO
                val audioFormat = AudioFormat.ENCODING_PCM_16BIT
                val numBytes =
                    AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
                audioRecord = AudioRecord(
                    audioSource,
                    sampleRateInHz,
                    AudioFormat.CHANNEL_IN_MONO,
                    AudioFormat.ENCODING_PCM_16BIT,
                    numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
                )

                SimulateStreamingAsr.vad.reset()

                CoroutineScope(Dispatchers.IO).launch {
                    Log.i(TAG, "processing samples")
                    val interval = 0.1 // i.e., 100 ms
                    val bufferSize = (interval * sampleRateInHz).toInt() // in samples
                    val buffer = ShortArray(bufferSize)

                    audioRecord?.let { it ->
                        it.startRecording()

                        while (isStarted) {
                            val ret = audioRecord?.read(buffer, 0, buffer.size)
                            ret?.let { n ->
                                val samples = FloatArray(n) { buffer[it] / 32768.0f }
                                samplesChannel.send(samples)
                            }
                        }
                        val samples = FloatArray(0)
                        samplesChannel.send(samples)
                    }
                }

                CoroutineScope(Dispatchers.Default).launch {
                    var buffer = arrayListOf<Float>()
                    var offset = 0
                    val windowSize = 512
                    var isSpeechStarted = false
                    var startTime = System.currentTimeMillis()
                    var lastText = ""
                    var added = false
                    var speechStartOffset = 0


                    while (isStarted) {
                        for (s in samplesChannel) {
                            if (s.isEmpty()) {
                                break
                            }

                            buffer.addAll(s.toList())
                            while (offset + windowSize < buffer.size) {
                                SimulateStreamingAsr.vad.acceptWaveform(
                                    buffer.subList(
                                        offset,
                                        offset + windowSize
                                    ).toFloatArray()
                                )
                                offset += windowSize
                                if (!isSpeechStarted && SimulateStreamingAsr.vad.isSpeechDetected()) {
                                    isSpeechStarted = true
                                    // offset 0.4s
                                    speechStartOffset = offset - 6400
                                    if(speechStartOffset < 0) {
                                        speechStartOffset = 0
                                    }
                                    startTime = System.currentTimeMillis()
                                }
                            }

                            val elapsed = System.currentTimeMillis() - startTime
                            if (isSpeechStarted && elapsed > 200) {
                                // Run ASR every 0.2 seconds == 200 milliseconds
                                // You can change it to some other value
                                val stream = SimulateStreamingAsr.recognizer.createStream()
                                stream.acceptWaveform(
                                    buffer.subList(speechStartOffset, offset).toFloatArray(),
                                    sampleRateInHz
                                )
                                SimulateStreamingAsr.recognizer.decode(stream)
                                val result = SimulateStreamingAsr.recognizer.getResult(stream)
                                stream.release()

                                lastText = result.text

                                if (lastText.isNotBlank()) {
                                    if (!added || resultList.isEmpty()) {
                                        resultList.add(lastText)
                                        added = true
                                    } else {
                                        resultList[resultList.size - 1] = lastText
                                    }

                                    coroutineScope.launch {
                                        lazyColumnListState.animateScrollToItem(resultList.size - 1)
                                    }
                                }

                                startTime = System.currentTimeMillis()
                            }


                            while (!SimulateStreamingAsr.vad.empty()) {
                                val stream = SimulateStreamingAsr.recognizer.createStream()
                                stream.acceptWaveform(
                                    SimulateStreamingAsr.vad.front().samples,
                                    sampleRateInHz
                                )
                                SimulateStreamingAsr.recognizer.decode(stream)
                                val result = SimulateStreamingAsr.recognizer.getResult(stream)
                                stream.release()

                                isSpeechStarted = false
                                SimulateStreamingAsr.vad.pop()

                                buffer = arrayListOf()
                                offset = 0
                                if (lastText.isNotBlank()) {
                                    if (added && resultList.isNotEmpty()) {
                                        resultList[resultList.size - 1] = result.text
                                    } else {
                                        resultList.add(result.text)
                                    }

                                    coroutineScope.launch {
                                        lazyColumnListState.animateScrollToItem(resultList.size - 1)
                                    }
                                    added = false
                                }
                            }
                        }
                    }
                }
            }
        } else {
            audioRecord?.stop()
            audioRecord?.release()
            audioRecord = null
        }
    }

    Box(
        modifier = Modifier.fillMaxSize(),
        contentAlignment = Alignment.TopCenter,
    ) {
        Column(modifier = Modifier) {
            if (!isInitialized) {
                Row(
                    modifier = Modifier.fillMaxWidth(),
                    horizontalArrangement = Arrangement.Center,
                ) {
                    Text(text = "Initializing... Please wait")
                }
            }
            if (asrModelType >= 9000) {
                Row(
                    modifier = Modifier.fillMaxWidth(),
                    horizontalArrangement = Arrangement.Center,
                ) {
                    Text(text = "Qualcomm NPU (HTP backend with QNN)")
                }
            }

            HomeButtonRow(
                isStarted = isStarted,
                isInitialized = isInitialized,
                onRecordingButtonClick = onRecordingButtonClick,
                onCopyButtonClick = {
                    if (resultList.isNotEmpty()) {
                        val s = resultList.mapIndexed { i, s -> "${i + 1}: $s" }
                            .joinToString(separator = "\n")
                        clipboardManager.setText(AnnotatedString(s))

                        Toast.makeText(
                            context,
                            "Copied to clipboard",
                            Toast.LENGTH_SHORT
                        )
                            .show()
                    } else {
                        Toast.makeText(
                            context,
                            "Nothing to copy",
                            Toast.LENGTH_SHORT
                        )
                            .show()

                    }
                },
                onClearButtonClick = {
                    resultList.clear()
                }
            )

            if (resultList.size > 0) {
                LazyColumn(
                    modifier = Modifier
                        .fillMaxWidth()
                        .fillMaxHeight(),
                    contentPadding = PaddingValues(16.dp),
                    state = lazyColumnListState
                ) {
                    itemsIndexed(resultList) { index, line ->
                        Text(text = "${index + 1}: $line")
                    }
                }
            }

        }
    }
}

@SuppressLint("UnrememberedMutableState")
@Composable
private fun HomeButtonRow(
    modifier: Modifier = Modifier,
    isStarted: Boolean,
    isInitialized: Boolean,
    onRecordingButtonClick: () -> Unit,
    onCopyButtonClick: () -> Unit,
    onClearButtonClick: () -> Unit,
) {
    Row(
        modifier = modifier.fillMaxWidth(),
        horizontalArrangement = Arrangement.Center,
    ) {
        Button(
            onClick = onRecordingButtonClick,
            enabled = isInitialized,
        ) {
            Text(text = stringResource(if (isStarted) R.string.stop else R.string.start))
        }

        Spacer(modifier = Modifier.width(24.dp))

        Button(
            onClick = onCopyButtonClick,
            enabled = isInitialized,
        ) {
            Text(text = stringResource(id = R.string.copy))
        }

        Spacer(modifier = Modifier.width(24.dp))

        Button(
            onClick = onClearButtonClick,
            enabled = isInitialized,
        ) {
            Text(text = stringResource(id = R.string.clear))
        }
    }
}


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/ui/theme/Color.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr.ui.theme

import androidx.compose.ui.graphics.Color

val Purple80 = Color(0xFFD0BCFF)
val PurpleGrey80 = Color(0xFFCCC2DC)
val Pink80 = Color(0xFFEFB8C8)

val Purple40 = Color(0xFF6650a4)
val PurpleGrey40 = Color(0xFF625b71)
val Pink40 = Color(0xFF7D5260)

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/ui/theme/Theme.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr.ui.theme

import android.app.Activity
import android.os.Build
import androidx.compose.foundation.isSystemInDarkTheme
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.darkColorScheme
import androidx.compose.material3.dynamicDarkColorScheme
import androidx.compose.material3.dynamicLightColorScheme
import androidx.compose.material3.lightColorScheme
import androidx.compose.runtime.Composable
import androidx.compose.ui.platform.LocalContext

private val DarkColorScheme = darkColorScheme(
    primary = Purple80,
    secondary = PurpleGrey80,
    tertiary = Pink80
)

private val LightColorScheme = lightColorScheme(
    primary = Purple40,
    secondary = PurpleGrey40,
    tertiary = Pink40

    /* Other default colors to override
    background = Color(0xFFFFFBFE),
    surface = Color(0xFFFFFBFE),
    onPrimary = Color.White,
    onSecondary = Color.White,
    onTertiary = Color.White,
    onBackground = Color(0xFF1C1B1F),
    onSurface = Color(0xFF1C1B1F),
    */
)

@Composable
fun SimulateStreamingAsrTheme(
    darkTheme: Boolean = isSystemInDarkTheme(),
    // Dynamic color is available on Android 12+
    dynamicColor: Boolean = true,
    content: @Composable () -> Unit
) {
    val colorScheme = when {
        dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
            val context = LocalContext.current
            if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
        }

        darkTheme -> DarkColorScheme
        else -> LightColorScheme
    }

    MaterialTheme(
        colorScheme = colorScheme,
        typography = Typography,
        content = content
    )
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/ui/theme/Type.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr.ui.theme

import androidx.compose.material3.Typography
import androidx.compose.ui.text.TextStyle
import androidx.compose.ui.text.font.FontFamily
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.sp

// Set of Material typography styles to start with
val Typography = Typography(
    bodyLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 16.sp,
        lineHeight = 24.sp,
        letterSpacing = 0.5.sp
    )
    /* Other default text styles to override
    titleLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 22.sp,
        lineHeight = 28.sp,
        letterSpacing = 0.sp
    ),
    labelSmall = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Medium,
        fontSize = 11.sp,
        lineHeight = 16.sp,
        letterSpacing = 0.5.sp
    )
    */
)

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">SimulateStreamingAsr</string>
    <string name="start">Start</string>
    <string name="stop">Stop</string>
    <string name="copy">Copy</string>
    <string name="clear">Clear</string>
</resources>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/values/themes.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>

    <style name="Theme.SimulateStreamingAsr" parent="android:Theme.Material.Light.NoActionBar" />
</resources>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/app/src/test/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    alias(libs.plugins.android.application) apply false
    alias(libs.plugins.jetbrains.kotlin.android) apply false
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/gradle/libs.versions.toml
================================================
[versions]
agp = "8.4.0"
kotlin = "1.9.0"
coreKtx = "1.10.0"
junit = "4.13.2"
junitVersion = "1.1.5"
espressoCore = "3.5.1"
lifecycleRuntimeKtx = "2.6.1"
activityCompose = "1.8.0"
composeBom = "2023.08.00"
navigationCompose = "2.8.2"

[libraries]
androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
junit = { group = "junit", name = "junit", version.ref = "junit" }
androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
androidx-lifecycle-runtime-ktx = { group = "androidx.lifecycle", name = "lifecycle-runtime-ktx", version.ref = "lifecycleRuntimeKtx" }
androidx-activity-compose = { group = "androidx.activity", name = "activity-compose", version.ref = "activityCompose" }
androidx-compose-bom = { group = "androidx.compose", name = "compose-bom", version.ref = "composeBom" }
androidx-ui = { group = "androidx.compose.ui", name = "ui" }
androidx-ui-graphics = { group = "androidx.compose.ui", name = "ui-graphics" }
androidx-ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling" }
androidx-ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview" }
androidx-ui-test-manifest = { group = "androidx.compose.ui", name = "ui-test-manifest" }
androidx-ui-test-junit4 = { group = "androidx.compose.ui", name = "ui-test-junit4" }
androidx-material3 = { group = "androidx.compose.material3", name = "material3" }
androidx-navigation-compose = { group = "androidx.navigation", name = "navigation-compose", version.ref = "navigationCompose" }


[plugins]
android-application = { id = "com.android.application", version.ref = "agp" }
jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/gradle/wrapper/gradle-wrapper.properties
================================================
#Wed May 14 11:10:06 CST 2025
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. For more details, visit
# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxSimulateStreamingAsr/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google {
            content {
                includeGroupByRegex("com\\.android.*")
                includeGroupByRegex("com\\.google.*")
                includeGroupByRegex("androidx.*")
            }
        }
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.name = "SimulateStreamingAsr"
include(":app")


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/build.gradle.kts
================================================
plugins {
    alias(libs.plugins.android.application)
    alias(libs.plugins.jetbrains.kotlin.android)
}

android {
    namespace = "com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os"
        minSdk = 28
        targetSdk = 34
        versionCode = 20260320
        versionName = "1.12.31"
        vectorDrawables {
            useSupportLibrary = true
        }

    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.1"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

dependencies {

    implementation(libs.play.services.wearable)
    implementation(platform(libs.compose.bom))
    implementation(libs.ui)
    implementation(libs.ui.tooling.preview)
    implementation(libs.compose.material)
    implementation(libs.compose.foundation)
    implementation(libs.activity.compose)
    implementation(libs.core.splashscreen)
    implementation("com.github.k2-fsa:sherpa-onnx:v1.12.31")
    androidTestImplementation(platform(libs.compose.bom))
    androidTestImplementation(libs.ui.test.junit4)
    debugImplementation(libs.ui.tooling)
    debugImplementation(libs.ui.test.manifest)
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/lint.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<lint>
    <!-- Ignore the IconLocation for the Tile preview images -->
    <issue id="IconLocation">
        <ignore path="res/drawable/tile_preview.png" />
        <ignore path="res/drawable-round/tile_preview.png" />
    </issue>
</lint>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">

    <uses-permission android:name="android.permission.WAKE_LOCK" />

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <uses-feature android:name="android.hardware.type.watch" />

    <application
        android:allowBackup="true"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:supportsRtl="true"
        android:theme="@android:style/Theme.DeviceDefault">
        <uses-library
            android:name="com.google.android.wearable"
            android:required="true" />

        <!--
               Set to true if your app is Standalone, that is, it does not require the handheld
               app to run.
        -->
        <meta-data
            android:name="com.google.android.wearable.standalone"
            android:value="true" />

        <activity
            android:name=".presentation.MainActivity"
            android:exported="true"
            android:taskAffinity=""
            android:theme="@style/MainActivityTheme.Starting">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/assets/.gitignore
================================================


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/wear/os/presentation/HomeScreen.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os.presentation

import android.Manifest
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.height
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.rememberCoroutineScope
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.text.style.TextAlign
import androidx.compose.ui.unit.dp
import androidx.core.app.ActivityCompat
import androidx.wear.compose.material.Button
import androidx.wear.compose.material.MaterialTheme
import androidx.wear.compose.material.Text
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os.presentation.theme.SherpaOnnxSimulateStreamingAsrWearOsTheme
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.launch


private var audioRecord: AudioRecord? = null

private const val sampleRateInHz = 16000
private var samplesChannel = Channel<FloatArray>(capacity = Channel.UNLIMITED)

@Composable
fun HomeScreen() {
    val activity = LocalContext.current as Activity

    var firstTime by remember { mutableStateOf(true) }
    var isStarted by remember { mutableStateOf(false) }
    var result by remember { mutableStateOf("") }

    val coroutineScope = rememberCoroutineScope()

    val onButtonClick: () -> Unit = {
        firstTime = false
        isStarted = !isStarted


        if (isStarted) {
            if (ActivityCompat.checkSelfPermission(
                    activity, Manifest.permission.RECORD_AUDIO
                ) != PackageManager.PERMISSION_GRANTED
            ) {
                Log.i(TAG, "Recording is not allowed")
            } else {
                // recording is allowed
                val audioSource = MediaRecorder.AudioSource.MIC
                val channelConfig = AudioFormat.CHANNEL_IN_MONO
                val audioFormat = AudioFormat.ENCODING_PCM_16BIT
                val numBytes =
                    AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)

                audioRecord = AudioRecord(
                    audioSource,
                    sampleRateInHz,
                    AudioFormat.CHANNEL_IN_MONO,
                    AudioFormat.ENCODING_PCM_16BIT,
                    numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
                )

                SimulateStreamingAsr.vad.reset()

                result = "Started! Please speak"

                CoroutineScope(Dispatchers.IO).launch {
                    Log.i(TAG, "processing samples")
                    val interval = 0.2 // i.e., 200 ms
                    val bufferSize = (interval * sampleRateInHz).toInt() // in samples
                    val buffer = ShortArray(bufferSize)

                    audioRecord?.let { it ->
                        it.startRecording()

                        while (isStarted) {
                            val ret = audioRecord?.read(buffer, 0, buffer.size)
                            ret?.let { n ->
                                val samples = FloatArray(n) { buffer[it] / 32768.0f }
                                samplesChannel.send(samples)
                            }
                        }
                        val samples = FloatArray(0)
                        samplesChannel.send(samples)
                    }
                }

                CoroutineScope(Dispatchers.Default).launch {
                    var buffer = arrayListOf<Float>()
                    var offset = 0
                    val windowSize = 512 // change it for ten-vad

                    while (isStarted) {
                        for (s in samplesChannel) {
                            if (s.isEmpty()) {
                                break
                            }

                            buffer.addAll(s.toList())
                            while (offset + windowSize < buffer.size) {
                                SimulateStreamingAsr.vad.acceptWaveform(
                                    buffer.subList(
                                        offset, offset + windowSize
                                    ).toFloatArray()
                                )

                                offset += windowSize
                            }

                            while (!SimulateStreamingAsr.vad.empty()) {
                                val duration = SimulateStreamingAsr.vad.front().samples.count().toFloat() / 16000

                                val s0 = System.currentTimeMillis()
                                val stream = SimulateStreamingAsr.recognizer.createStream()
                                stream.acceptWaveform(
                                    SimulateStreamingAsr.vad.front().samples,
                                    sampleRateInHz
                                )
                                SimulateStreamingAsr.recognizer.decode(stream)

                                val s1 = System.currentTimeMillis()
                                val diff = (s1 - s0).toFloat() / 1000
                                val rtf = diff / duration
                                Log.i(TAG, "rtf: ${rtf}, elapsed: ${diff}, duration: ${duration}")
                                val r = SimulateStreamingAsr.recognizer.getResult(stream)
                                stream.release()

                                Log.i(TAG, "result: ${r.text}")

                                coroutineScope.launch {
                                    result = r.text
                                }

                                SimulateStreamingAsr.vad.pop()
                                buffer = arrayListOf()
                                offset = 0
                            }
                        }
                    }
                }
            }
        } else {
            audioRecord?.stop()
            audioRecord?.release()
            audioRecord = null

            result = "Click Start and speak"
        }
    }

    SherpaOnnxSimulateStreamingAsrWearOsTheme {
        Box(
            modifier = Modifier
                .fillMaxSize()
                .background(MaterialTheme.colors.background),
            contentAlignment = Alignment.Center
        ) {
            Column(
                horizontalAlignment = Alignment.CenterHorizontally
            ) {
                Spacer(modifier = Modifier.height(16.dp))
                if (firstTime) {
                    ShowMessage()
                } else {
                    ShowResult(result)
                }

                Spacer(modifier = Modifier.height(32.dp))

                Button(
                    onClick = onButtonClick
                ) {
                    if (isStarted) {
                        Text("Stop")
                    } else {
                        Text("Start")
                    }
                }
            }
        }
    }

}

@Composable
fun ShowMessage() {
    val msg = "Real-time\nspeech recognition\nwith\nNext-gen Kaldi"
    Text(
        modifier = Modifier.fillMaxWidth(),
        textAlign = TextAlign.Center,
        color = MaterialTheme.colors.primary,
        text = msg,
    )
}

@Composable
fun ShowResult(result: String) {
    var msg: String = result
    if (msg.length > 10) {
        val n = 5
        val first = result.take(n)
        val last = result.takeLast(result.length - n)
        msg = "${first}\n${last}"
    }
    Text(
        modifier = Modifier.fillMaxWidth(),
        textAlign = TextAlign.Center,
        color = MaterialTheme.colors.primary,
        text = msg,
    )
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/wear/os/presentation/MainActivity.kt
================================================
/* While this template provides a good starting point for using Wear Compose, you can always
 * take a look at https://github.com/android/wear-os-samples/tree/main/ComposeStarter and
 * https://github.com/android/wear-os-samples/tree/main/ComposeAdvanced to find the most up to date
 * changes to the libraries and their usages.
 */

package com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os.presentation

import android.Manifest
import android.content.pm.PackageManager
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.runtime.Composable
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.style.TextAlign
import androidx.compose.ui.tooling.preview.Devices
import androidx.compose.ui.tooling.preview.Preview
import androidx.core.app.ActivityCompat
import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen
import androidx.wear.compose.material.MaterialTheme
import androidx.wear.compose.material.Text
import androidx.wear.compose.material.TimeText
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os.R
import com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os.presentation.theme.SherpaOnnxSimulateStreamingAsrWearOsTheme

const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

class MainActivity : ComponentActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
    override fun onCreate(savedInstanceState: Bundle?) {
        installSplashScreen()

        super.onCreate(savedInstanceState)

        setTheme(android.R.style.Theme_DeviceDefault)

        setContent {
            WearApp("Android")
        }

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
        SimulateStreamingAsr.initOfflineRecognizer(this.assets, this.application)
        SimulateStreamingAsr.initVad(this.assets)
    }

    override fun onRequestPermissionsResult(
        requestCode: Int,
        permissions: Array<out String>,
        grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)

        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            Toast.makeText(
                this,
                "This App needs access to the microphone",
                Toast.LENGTH_SHORT
            )
                .show()
            finish()
        }
        Log.i(TAG, "Audio record is permitted")
    }
}

@Composable
fun WearApp(greetingName: String) {
    HomeScreen()
}

@Composable
fun Greeting(greetingName: String) {
    Text(
        modifier = Modifier.fillMaxWidth(),
        textAlign = TextAlign.Center,
        color = MaterialTheme.colors.primary,
        text = stringResource(R.string.hello_world, greetingName)
    )
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/wear/os/presentation/SimulateStreamingAsr.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os.presentation

import android.app.Application
import android.content.res.AssetManager
import android.util.Log
import com.k2fsa.sherpa.onnx.HomophoneReplacerConfig
import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.Vad
import com.k2fsa.sherpa.onnx.getOfflineModelConfig
import com.k2fsa.sherpa.onnx.getVadModelConfig
import java.io.File
import java.io.FileOutputStream
import java.io.IOException


object SimulateStreamingAsr {
    private var _recognizer: OfflineRecognizer? = null
    val recognizer: OfflineRecognizer
        get() {
            return _recognizer!!
        }

    private var _vad: Vad? = null
    val vad: Vad
        get() {
            return _vad!!
        }

    fun initOfflineRecognizer(assetManager: AssetManager? = null, application: Application) {
        synchronized(this) {
            if (_recognizer != null) {
                return
            }
            Log.i(TAG, "Initializing sherpa-onnx offline recognizer")
            // Please change getOfflineModelConfig() to add new models
            // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
            // for a list of available models
            val asrModelType = 39
            val asrRuleFsts: String?
            asrRuleFsts = null
            Log.i(TAG, "Select model type $asrModelType for ASR")

            val useHr = false
            val hr = HomophoneReplacerConfig(
                // Used only when useHr is true
                // Please download the following 2 files from
                // https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
                //
                // lexicon.txt can be shared by different apps
                //
                // replace.fst is specific for an app
                lexicon = "lexicon.txt",
                ruleFsts = "replace.fst",
            )

            val config = OfflineRecognizerConfig(
                modelConfig = getOfflineModelConfig(type = asrModelType)!!,
            )

            if (config.modelConfig.numThreads == 1) {
                config.modelConfig.numThreads = 2
            }
            config.modelConfig.debug = true

            if (asrRuleFsts != null) {
                config.ruleFsts = asrRuleFsts
            }

            if (useHr) {
                config.hr = hr
            }

            _recognizer = OfflineRecognizer(
                assetManager = assetManager,
                config = config,
            )

            Log.i(TAG, "sherpa-onnx offline recognizer initialized")
        }
    }

    fun initVad(assetManager: AssetManager? = null) {
        if (_vad != null) {
            return
        }
        val type = 0
        Log.i(TAG, "Select VAD model type $type")
        val config = getVadModelConfig(type)

        _vad = Vad(
            assetManager = assetManager,
            config = config!!,
        )
        Log.i(TAG, "sherpa-onnx vad initialized")
    }
}


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/wear/os/presentation/theme/Theme.kt
================================================
package com.k2fsa.sherpa.onnx.simulate.streaming.asr.wear.os.presentation.theme

import androidx.compose.runtime.Composable
import androidx.wear.compose.material.MaterialTheme

@Composable
fun SherpaOnnxSimulateStreamingAsrWearOsTheme(
    content: @Composable () -> Unit
) {
    /**
     * Empty theme to customize for your app.
     * See: https://developer.android.com/jetpack/compose/designsystems/custom
     */
    MaterialTheme(
        content = content
    )
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/res/drawable/splash_icon.xml
================================================
<?xml version="1.0" encoding="utf-8"?>

<layer-list xmlns:android="http://schemas.android.com/apk/res/android">
    <item
        android:width="48dp"
        android:height="48dp"
        android:gravity="center">
        <shape android:shape="oval">
            <solid android:color="#FFFFFF" />
        </shape>
    </item>
    <item
        android:width="40dp"
        android:height="40dp"
        android:gravity="center">
        <vector
            android:width="24dp"
            android:height="24dp"
            android:tint="#000000"
            android:viewportWidth="24"
            android:viewportHeight="24">
            <path
                android:fillColor="#FF000000"
                android:pathData="M17.6,11.48 L19.44,8.3a0.63,0.63 0,0 0,-1.09 -0.63l-1.88,3.24a11.43,11.43 0,0 0,-8.94 0L5.65,7.67a0.63,0.63 0,0 0,-1.09 0.63L6.4,11.48A10.81,10.81 0,0 0,1 20L23,20A10.81,10.81 0,0 0,17.6 11.48ZM7,17.25A1.25,1.25 0,1 1,8.25 16,1.25 1.25,0 0,1 7,17.25ZM17,17.25A1.25,1.25 0,1 1,18.25 16,1.25 1.25,0 0,1 17,17.25Z" />
        </vector>
    </item>
</layer-list>


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">SherpaOnnxSimulateStreamingAsrWearOs</string>
    <!--
    This string is used for square devices and overridden by hello_world in
    values-round/strings.xml for round devices.
    -->
    <string name="hello_world">From the Square world,\nHello, %1$s!</string>
</resources>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/res/values/styles.xml
================================================
<resources>

    <style name="MainActivityTheme.Starting" parent="Theme.SplashScreen">
        <item name="windowSplashScreenBackground">@android:color/black</item>
        <item name="windowSplashScreenAnimatedIcon">@drawable/splash_icon</item>
        <item name="postSplashScreenTheme">@android:style/Theme.DeviceDefault</item>
    </style>
</resources>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/app/src/main/res/values-round/strings.xml
================================================
<resources>
    <string name="hello_world">From the Round world,\nHello, %1$s!</string>
</resources>

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    alias(libs.plugins.android.application) apply false
    alias(libs.plugins.jetbrains.kotlin.android) apply false
}

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/gradle/libs.versions.toml
================================================
[versions]
agp = "8.4.0"
kotlin = "1.9.0"
playServicesWearable = "18.0.0"
composeBom = "2023.08.00"
composeMaterial = "1.2.1"
composeFoundation = "1.2.1"
activityCompose = "1.7.2"
coreSplashscreen = "1.0.1"

[libraries]
play-services-wearable = { group = "com.google.android.gms", name = "play-services-wearable", version.ref = "playServicesWearable" }
compose-bom = { group = "androidx.compose", name = "compose-bom", version.ref = "composeBom" }
ui = { group = "androidx.compose.ui", name = "ui" }
ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview" }
ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling" }
ui-test-manifest = { group = "androidx.compose.ui", name = "ui-test-manifest" }
ui-test-junit4 = { group = "androidx.compose.ui", name = "ui-test-junit4" }
compose-material = { group = "androidx.wear.compose", name = "compose-material", version.ref = "composeMaterial" }
compose-foundation = { group = "androidx.wear.compose", name = "compose-foundation", version.ref = "composeFoundation" }
activity-compose = { group = "androidx.activity", name = "activity-compose", version.ref = "activityCompose" }
core-splashscreen = { group = "androidx.core", name = "core-splashscreen", version.ref = "coreSplashscreen" }

[plugins]
android-application = { id = "com.android.application", version.ref = "agp" }
jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/gradle/wrapper/gradle-wrapper.properties
================================================
#Tue Jul 15 18:18:24 CST 2025
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. For more details, visit
# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxSimulateStreamingAsrWearOs/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google {
            content {
                includeGroupByRegex("com\\.android.*")
                includeGroupByRegex("com\\.google.*")
                includeGroupByRegex("androidx.*")
            }
        }
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
        maven { url = uri("https://jitpack.io") }
    }
}

rootProject.name = "SherpaOnnxSimulateStreamingAsrWearOs"
include(":app")


================================================
FILE: android/SherpaOnnxSpeakerDiarization/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts
================================================
plugins {
    alias(libs.plugins.android.application)
    alias(libs.plugins.jetbrains.kotlin.android)
}

android {
    namespace = "com.k2fsa.sherpa.onnx.speaker.diarization"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.k2fsa.sherpa.onnx.speaker.diarization"
        minSdk = 21
        targetSdk = 34
        versionCode = 20260320
        versionName = "1.12.31"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary = true
        }
    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.1"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

dependencies {

    implementation(libs.androidx.core.ktx)
    implementation(libs.androidx.lifecycle.runtime.ktx)
    implementation(libs.androidx.activity.compose)
    implementation(platform(libs.androidx.compose.bom))
    implementation(libs.androidx.ui)
    implementation(libs.androidx.ui.graphics)
    implementation(libs.androidx.ui.tooling.preview)
    implementation(libs.androidx.material3)
    implementation(libs.androidx.navigation.compose)
    implementation(libs.androidx.documentfile)
    testImplementation(libs.junit)
    androidTestImplementation(libs.androidx.junit)
    androidTestImplementation(libs.androidx.espresso.core)
    androidTestImplementation(platform(libs.androidx.compose.bom))
    androidTestImplementation(libs.androidx.ui.test.junit4)
    debugImplementation(libs.androidx.ui.tooling)
    debugImplementation(libs.androidx.ui.test.manifest)
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx.speaker.diarization", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission
        android:name="android.permission.READ_EXTERNAL_STORAGE"
        android:maxSdkVersion="32" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxSpeakerDiarization"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:label="@string/app_name"
            android:theme="@style/Theme.SherpaOnnxSpeakerDiarization">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization

import androidx.compose.ui.graphics.vector.ImageVector

data class BarItem(
    val title: String,

    // see https://www.composables.com/icons
    // and
    // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary
    val image: ImageVector,
    val route: String,
)

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization

import android.os.Bundle
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.activity.enableEdgeToEdge
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.CenterAlignedTopAppBar
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.Icon
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.NavigationBar
import androidx.compose.material3.NavigationBarItem
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.material3.TopAppBarDefaults
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.ui.Modifier
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.tooling.preview.Preview
import androidx.navigation.NavGraph.Companion.findStartDestination
import androidx.navigation.NavHostController
import androidx.navigation.compose.NavHost
import androidx.navigation.compose.composable
import androidx.navigation.compose.currentBackStackEntryAsState
import androidx.navigation.compose.rememberNavController
import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HelpScreen
import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HomeScreen
import com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme.SherpaOnnxSpeakerDiarizationTheme

const val TAG = "sherpa-onnx-sd"

class MainActivity : ComponentActivity() {
    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        enableEdgeToEdge()
        setContent {
            SherpaOnnxSpeakerDiarizationTheme {
                // A surface container using the 'background' color from the theme
                Surface(
                    modifier = Modifier.fillMaxSize(),
                    color = MaterialTheme.colorScheme.background
                ) {
                    MainScreen()
                }
            }
        }
        SpeakerDiarizationObject.initSpeakerDiarization(this.assets)
    }
}

@OptIn(ExperimentalMaterial3Api::class)
@Composable
fun MainScreen(modifier: Modifier = Modifier) {
    val navController = rememberNavController()
    Scaffold(
        topBar = {
            CenterAlignedTopAppBar(
                colors = TopAppBarDefaults.topAppBarColors(
                    containerColor = MaterialTheme.colorScheme.primaryContainer,
                    titleContentColor = MaterialTheme.colorScheme.primary,
                ),
                title = {
                    Text(
                        "Next-gen Kaldi: Speaker Diarization",
                        fontWeight = FontWeight.Bold,
                    )
                },
            )
        },
        content = { padding ->
            Column(Modifier.padding(padding)) {
                NavigationHost(navController = navController)

            }
        },
        bottomBar = {
            BottomNavigationBar(navController = navController)
        }
    )
}

@Composable
fun NavigationHost(navController: NavHostController) {
    NavHost(navController = navController, startDestination = NavRoutes.Home.route) {
        composable(NavRoutes.Home.route) {
            HomeScreen()
        }

        composable(NavRoutes.Help.route) {
            HelpScreen()
        }
    }
}

@Composable
fun BottomNavigationBar(navController: NavHostController) {
    NavigationBar {
        val backStackEntry by navController.currentBackStackEntryAsState()
        val currentRoute = backStackEntry?.destination?.route

        NavBarItems.BarItems.forEach { navItem ->
            NavigationBarItem(selected = currentRoute == navItem.route,
                onClick = {
                    navController.navigate(navItem.route) {
                        popUpTo(navController.graph.findStartDestination().id) {
                            saveState = true
                        }
                        launchSingleTop = true
                        restoreState = true
                    }
                },
                icon = {
                    Icon(imageVector = navItem.image, contentDescription = navItem.title)
                }, label = {
                    Text(text = navItem.title)
                })
        }
    }
}

@Preview(showBackground = true)
@Composable
fun MainScreenPreview() {
    SherpaOnnxSpeakerDiarizationTheme {
        MainScreen()
    }
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization

import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.Home
import androidx.compose.material.icons.filled.Info

object NavBarItems {
    val BarItems = listOf(
        BarItem(
            title = "Home",
            image = Icons.Filled.Home,
            route = "home",
        ),
        BarItem(
            title = "Help",
            image = Icons.Filled.Info,
            route = "help",
        ),
    )
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization

sealed class NavRoutes(val route: String) {
    object Home : NavRoutes("home")
    object Help : NavRoutes("help")
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization.screens

import android.content.Context
import android.media.AudioFormat
import android.media.MediaCodec
import android.media.MediaExtractor
import android.media.MediaFormat
import android.net.Uri

data class WaveData(
    val sampleRate: Int? = null,
    val samples: FloatArray? = null,
    val msg: String? = null
)

// It supports only 16-bit encoded wave files
//
// References
// - https://gist.github.com/a-m-s/1991ab18fbcb0fcc2cf9
// - https://github.com/taehwandev/MediaCodecExample/blob/master/app/src/main/java/tech/thdev/mediacodecexample/audio/AACAudioDecoderThread.kt
fun readUri(context: Context, uri: Uri): WaveData {
    val extractor = MediaExtractor()
    extractor.setDataSource(context, uri, null)

    val samplesList: MutableList<FloatArray> = ArrayList()

    for (i in 0 until extractor.trackCount) {
        val format = extractor.getTrackFormat(i)
        val mime = format.getString(MediaFormat.KEY_MIME)
        if (mime?.startsWith("audio/") == true) {
            extractor.selectTrack(i)

            var encoding: Int = -1
            try {
                encoding = format.getInteger(MediaFormat.KEY_PCM_ENCODING)
            } catch (_: Exception) {
            }

            if (encoding != AudioFormat.ENCODING_PCM_16BIT) {
                return WaveData(msg = "We support only 16-bit encoded wave files")
            }

            val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
            val decoder = MediaCodec.createDecoderByType(mime)
            decoder.configure(format, null, null, 0)
            decoder.start()

            val inputBuffers = decoder.inputBuffers
            var outputBuffers = decoder.outputBuffers

            val info = MediaCodec.BufferInfo()
            var eof = false

            var outputBufferIndex = -1

            while (true) {
                if (!eof) {
                    val inputBufferIndex = decoder.dequeueInputBuffer(10000)
                    if (inputBufferIndex > 0) {
                        val size = extractor.readSampleData(inputBuffers[inputBufferIndex], 0)
                        if (size < 0) {
                            decoder.queueInputBuffer(
                                inputBufferIndex,
                                0,
                                0,
                                0,
                                MediaCodec.BUFFER_FLAG_END_OF_STREAM
                            )
                            eof = true
                        } else {
                            decoder.queueInputBuffer(
                                inputBufferIndex,
                                0,
                                size,
                                extractor.sampleTime,
                                0
                            )
                            extractor.advance()
                        }
                    }
                } // if (!eof)

                if (outputBufferIndex >= 0) {
                    outputBuffers[outputBufferIndex].position(0)
                }

                outputBufferIndex = decoder.dequeueOutputBuffer(info, 10000)
                if (outputBufferIndex >= 0) {
                    if (info.flags != 0) {
                        decoder.stop()
                        decoder.release()

                        var k = 0
                        for (s in samplesList) {
                            k += s.size
                        }
                        if (k == 0) {
                            return WaveData(msg = "Failed to read selected file")
                        }

                        val ans = FloatArray(k)
                        k = 0
                        for (s in samplesList) {
                            s.copyInto(ans, k)
                            k += s.size
                        }

                        return WaveData(sampleRate = sampleRate, samples = ans)
                    }

                    val buffer = outputBuffers[outputBufferIndex]
                    val chunk = ByteArray(info.size)
                    buffer[chunk]
                    buffer.clear()

                    val numSamples = info.size / 2

                    val samples = FloatArray(numSamples)
                    for (k in 0 until numSamples) {
                        // assume little endian
                        val s = chunk[2 * k] + (chunk[2 * k + 1] * 256.0f)

                        samples[k] = s / 32768.0f
                    }
                    samplesList.add(samples)

                    decoder.releaseOutputBuffer(outputBufferIndex, false)
                } else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
                    outputBuffers = decoder.outputBuffers
                }
            }
        }
    }

    extractor.release()
    return WaveData(msg = "not an audio file")
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization

import android.content.res.AssetManager
import android.util.Log
import com.k2fsa.sherpa.onnx.FastClusteringConfig
import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarization
import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarizationConfig
import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationModelConfig
import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationPyannoteModelConfig
import com.k2fsa.sherpa.onnx.SpeakerEmbeddingExtractorConfig

// Please download
// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
// then unzip it, rename model.onnx to segmentation.onnx, and mv
// segmentation.onnx to the assets folder
val segmentationModel = "segmentation.onnx"

// please download it from
// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
// and rename it to embedding.onnx
// and move it to the assets folder
val embeddingModel = "embedding.onnx"

// in the end, your assets folder should look like below
/*
(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd
/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets
(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh
total 89048
-rw-r--r--  1 fangjun  staff    38M Oct 12 20:28 embedding.onnx
-rw-r--r--  1 fangjun  staff   5.7M Oct 12 20:28 segmentation.onnx
 */

object SpeakerDiarizationObject {
    var _sd: OfflineSpeakerDiarization? = null
    val sd: OfflineSpeakerDiarization
        get() {
            return _sd!!
        }

    fun initSpeakerDiarization(assetManager: AssetManager? = null) {
        synchronized(this) {
            if (_sd != null) {
                return
            }
            Log.i(TAG, "Initializing sherpa-onnx speaker diarization")

            val config = OfflineSpeakerDiarizationConfig(
                segmentation = OfflineSpeakerSegmentationModelConfig(
                    pyannote = OfflineSpeakerSegmentationPyannoteModelConfig(
                        segmentationModel
                    ),
                    debug = true,
                ),
                embedding = SpeakerEmbeddingExtractorConfig(
                    model = embeddingModel,
                    debug = true,
                    numThreads = 2,
                ),
                clustering = FastClusteringConfig(numClusters = -1, threshold = 0.5f),
                minDurationOn = 0.2f,
                minDurationOff = 0.5f,
            )
            _sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config)
        }
    }
}


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization.screens

import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.ui.Modifier
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp

@Composable
fun HelpScreen() {
    Box(modifier = Modifier.fillMaxSize()) {
        Column(
            modifier = Modifier.padding(8.dp)
        ) {
            Text(
                "This app accepts only 16kHz 16-bit 1-channel *.wav files. " +
                        "It has two arguments: Number of speakers and clustering threshold. " +
                        "If you know the actual number of speakers in the file, please set it. " +
                        "Otherwise, please set it to 0. In that case, you have to set the threshold. " +
                        "A larger threshold leads to fewer segmented speakers."
            )
            Spacer(modifier = Modifier.height(5.dp))
            Text("The speaker segmentation model is from " +
                "pyannote-audio (https://huggingface.co/pyannote/segmentation-3.0), "+
                 "whereas the embedding extractor model is from 3D-Speaker (https://github.com/modelscope/3D-Speaker)")
            Spacer(modifier = Modifier.height(5.dp))
            Text("Please see http://github.com/k2-fsa/sherpa-onnx ")
            Spacer(modifier = Modifier.height(5.dp))
            Text("Everything is open-sourced!", fontSize = 20.sp)
        }
    }
}


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization.screens

import android.util.Log
import androidx.activity.compose.rememberLauncherForActivityResult
import androidx.activity.result.contract.ActivityResultContracts
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.size
import androidx.compose.foundation.rememberScrollState
import androidx.compose.foundation.verticalScroll
import androidx.compose.material3.Button
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalClipboardManager
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.text.AnnotatedString
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import androidx.documentfile.provider.DocumentFile
import com.k2fsa.sherpa.onnx.speaker.diarization.SpeakerDiarizationObject
import com.k2fsa.sherpa.onnx.speaker.diarization.TAG
import kotlin.concurrent.thread


private var samples: FloatArray? = null

@Composable
fun HomeScreen() {
    val context = LocalContext.current

    var sampleRate: Int
    var filename by remember { mutableStateOf("") }
    var status by remember { mutableStateOf("") }
    var progress by remember { mutableStateOf("") }
    val clipboardManager = LocalClipboardManager.current
    var done by remember { mutableStateOf(false) }
    var fileIsOk by remember { mutableStateOf(false) }
    var started by remember { mutableStateOf(false) }
    var numSpeakers by remember { mutableStateOf(0) }
    var threshold by remember { mutableStateOf(0.5f) }


    val callback = here@{ numProcessedChunks: Int, numTotalChunks: Int, arg: Long ->
        Int
        val percent = 100.0 * numProcessedChunks / numTotalChunks
        progress = "%.2f%%".format(percent)
        Log.i(TAG, progress)
        return@here 0
    }

    val launcher = rememberLauncherForActivityResult(ActivityResultContracts.OpenDocument()) {
        it?.let {
            val documentFile = DocumentFile.fromSingleUri(context, it)
            filename = documentFile?.name ?: ""

            progress = ""
            done = false
            fileIsOk = false

            if (filename.isNotEmpty()) {
                val data = readUri(context, it)
                Log.i(TAG, "sample rate: ${data.sampleRate}")
                Log.i(TAG, "numSamples: ${data.samples?.size ?: 0}")
                if (data.msg != null) {
                    Log.i(TAG, "failed to read $filename")
                    status = data.msg
                } else if (data.sampleRate != SpeakerDiarizationObject.sd.sampleRate()) {
                    status =
                        "Expected sample rate: ${SpeakerDiarizationObject.sd.sampleRate()}. Given wave file with sample rate: ${data.sampleRate}"
                } else {
                    samples = data.samples!!
                    fileIsOk = true
                }
            }
        }
    }

    Column(
        modifier = Modifier.padding(10.dp),
        verticalArrangement = Arrangement.Top,
    ) {
        Row(
            modifier = Modifier.fillMaxWidth(),
            horizontalArrangement = Arrangement.SpaceEvenly,
            verticalAlignment = Alignment.CenterVertically
        ) {

            Button(onClick = {
                launcher.launch(arrayOf("audio/*"))
            }) {
                Text("Select a .wav file")
            }

            Button(enabled = fileIsOk && !started,
                onClick = {
                    Log.i(TAG, "started")
                    Log.i(TAG, "num samples: ${samples?.size}")
                    started = true
                    progress = ""

                    val config = SpeakerDiarizationObject.sd.config
                    config.clustering.numClusters = numSpeakers
                    config.clustering.threshold = threshold

                    SpeakerDiarizationObject.sd.setConfig(config)

                    thread(true) {
                        done = false
                        status = "Started! Please wait"
                        val segments = SpeakerDiarizationObject.sd.processWithCallback(
                            samples!!,
                            callback = callback,
                        )
                        done = true
                        started = false
                        status = ""
                        for (s in segments) {
                            val start = "%.2f".format(s.start)
                            val end = "%.2f".format(s.end)
                            val speaker = "speaker_%02d".format(s.speaker)
                            status += "$start -- $end $speaker\n"
                            Log.i(TAG, "$start -- $end $speaker")
                        }

                        Log.i(TAG, status)
                    }
                }) {
                Text("Start")
            }
            if (progress.isNotEmpty()) {
                Text(progress, fontSize = 25.sp)
            }
        }

        Row(
            modifier = Modifier.fillMaxWidth(),
            horizontalArrangement = Arrangement.SpaceEvenly,
            verticalAlignment = Alignment.CenterVertically
        ) {
            OutlinedTextField(
                value = numSpeakers.toString(),
                onValueChange = {
                    if (it.isEmpty() || it.isBlank()) {
                        numSpeakers = 0
                    } else {
                        numSpeakers = it.toIntOrNull() ?: 0
                    }
                },
                label = {
                    Text("Number of Speakers")
                },
            )
        }

        Row(
            modifier = Modifier.fillMaxWidth(),
            horizontalArrangement = Arrangement.SpaceEvenly,
            verticalAlignment = Alignment.CenterVertically
        ) {
            OutlinedTextField(
                value = threshold.toString(),
                onValueChange = {
                    if (it.isEmpty() || it.isBlank()) {
                        threshold = 0.5f
                    } else {
                        threshold = it.toFloatOrNull() ?: 0.5f
                    }
                },
                label = {
                    Text("Clustering threshold")
                },
            )
        }

        if (filename.isNotEmpty()) {
            Text(text = "Selected $filename")
            Spacer(Modifier.size(20.dp))
        }

        if (done) {
            Button(onClick = {
                clipboardManager.setText(AnnotatedString(status))
                progress = "Copied!"
            }) {
                Text("Copy result")
            }
            Spacer(Modifier.size(20.dp))
        }

        if (status.isNotEmpty()) {
            Text(
                status,
                modifier = Modifier.verticalScroll(rememberScrollState()),
            )
        }


    }
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme

import androidx.compose.ui.graphics.Color

val Purple80 = Color(0xFFD0BCFF)
val PurpleGrey80 = Color(0xFFCCC2DC)
val Pink80 = Color(0xFFEFB8C8)

val Purple40 = Color(0xFF6650a4)
val PurpleGrey40 = Color(0xFF625b71)
val Pink40 = Color(0xFF7D5260)

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme

import android.app.Activity
import android.os.Build
import androidx.compose.foundation.isSystemInDarkTheme
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.darkColorScheme
import androidx.compose.material3.dynamicDarkColorScheme
import androidx.compose.material3.dynamicLightColorScheme
import androidx.compose.material3.lightColorScheme
import androidx.compose.runtime.Composable
import androidx.compose.ui.platform.LocalContext

private val DarkColorScheme = darkColorScheme(
    primary = Purple80,
    secondary = PurpleGrey80,
    tertiary = Pink80
)

private val LightColorScheme = lightColorScheme(
    primary = Purple40,
    secondary = PurpleGrey40,
    tertiary = Pink40

    /* Other default colors to override
    background = Color(0xFFFFFBFE),
    surface = Color(0xFFFFFBFE),
    onPrimary = Color.White,
    onSecondary = Color.White,
    onTertiary = Color.White,
    onBackground = Color(0xFF1C1B1F),
    onSurface = Color(0xFF1C1B1F),
    */
)

@Composable
fun SherpaOnnxSpeakerDiarizationTheme(
    darkTheme: Boolean = isSystemInDarkTheme(),
    // Dynamic color is available on Android 12+
    dynamicColor: Boolean = true,
    content: @Composable () -> Unit
) {
    val colorScheme = when {
        dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
            val context = LocalContext.current
            if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
        }

        darkTheme -> DarkColorScheme
        else -> LightColorScheme
    }

    MaterialTheme(
        colorScheme = colorScheme,
        typography = Typography,
        content = content
    )
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme

import androidx.compose.material3.Typography
import androidx.compose.ui.text.TextStyle
import androidx.compose.ui.text.font.FontFamily
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.sp

// Set of Material typography styles to start with
val Typography = Typography(
    bodyLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 16.sp,
        lineHeight = 24.sp,
        letterSpacing = 0.5.sp
    )
    /* Other default text styles to override
    titleLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 22.sp,
        lineHeight = 28.sp,
        letterSpacing = 0.sp
    ),
    labelSmall = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Medium,
        fontSize = 11.sp,
        lineHeight = 16.sp,
        letterSpacing = 0.5.sp
    )
    */
)

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">SherpaOnnxSpeakerDiarization</string>
</resources>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>

    <style name="Theme.SherpaOnnxSpeakerDiarization" parent="android:Theme.Material.Light.NoActionBar" />
</resources>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxSpeakerDiarization/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.diarization

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    alias(libs.plugins.android.application) apply false
    alias(libs.plugins.jetbrains.kotlin.android) apply false
}

================================================
FILE: android/SherpaOnnxSpeakerDiarization/gradle/libs.versions.toml
================================================
[versions]
agp = "8.4.0"
kotlin = "1.9.0"
coreKtx = "1.10.1"
junit = "4.13.2"
junitVersion = "1.1.5"
espressoCore = "3.5.1"
lifecycleRuntimeKtx = "2.6.1"
activityCompose = "1.8.0"
composeBom = "2023.08.00"
navigationCompose = "2.8.2"
documentfile = "1.0.1"

[libraries]
androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
junit = { group = "junit", name = "junit", version.ref = "junit" }
androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
androidx-lifecycle-runtime-ktx = { group = "androidx.lifecycle", name = "lifecycle-runtime-ktx", version.ref = "lifecycleRuntimeKtx" }
androidx-activity-compose = { group = "androidx.activity", name = "activity-compose", version.ref = "activityCompose" }
androidx-compose-bom = { group = "androidx.compose", name = "compose-bom", version.ref = "composeBom" }
androidx-ui = { group = "androidx.compose.ui", name = "ui" }
androidx-ui-graphics = { group = "androidx.compose.ui", name = "ui-graphics" }
androidx-ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling" }
androidx-ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview" }
androidx-ui-test-manifest = { group = "androidx.compose.ui", name = "ui-test-manifest" }
androidx-ui-test-junit4 = { group = "androidx.compose.ui", name = "ui-test-junit4" }
androidx-material3 = { group = "androidx.compose.material3", name = "material3" }
androidx-navigation-compose = { group = "androidx.navigation", name = "navigation-compose", version.ref = "navigationCompose" }
androidx-documentfile = { group = "androidx.documentfile", name = "documentfile", version.ref = "documentfile" }

[plugins]
android-application = { id = "com.android.application", version.ref = "agp" }
jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }


================================================
FILE: android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.properties
================================================
#Sat Oct 12 14:27:04 CST 2024
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxSpeakerDiarization/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. For more details, visit
# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxSpeakerDiarization/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxSpeakerDiarization/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxSpeakerDiarization/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google {
            content {
                includeGroupByRegex("com\\.android.*")
                includeGroupByRegex("com\\.google.*")
                includeGroupByRegex("androidx.*")
            }
        }
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.name = "SherpaOnnxSpeakerDiarization"
include(":app")


================================================
FILE: android/SherpaOnnxSpeakerIdentification/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/build.gradle.kts
================================================
plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
}

android {
    namespace = "com.k2fsa.sherpa.onnx.speaker.identification"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.k2fsa.sherpa.onnx.speaker.identification"
        minSdk = 21
        targetSdk = 34
        versionCode = 20260320
        versionName = "1.12.31"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary = true
        }
    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.1"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

dependencies {

    implementation("androidx.core:core-ktx:1.12.0")
    implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.7.0")
    implementation("androidx.activity:activity-compose:1.8.2")
    implementation(platform("androidx.compose:compose-bom:2023.08.00"))
    implementation("androidx.compose.ui:ui")
    implementation("androidx.compose.ui:ui-graphics")
    implementation("androidx.compose.ui:ui-tooling-preview")
    implementation("androidx.compose.material3:material3")
    implementation("androidx.navigation:navigation-compose:2.7.6")
    testImplementation("junit:junit:4.13.2")
    androidTestImplementation("androidx.test.ext:junit:1.1.5")
    androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
    androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00"))
    androidTestImplementation("androidx.compose.ui:ui-test-junit4")
    debugImplementation("androidx.compose.ui:ui-tooling")
    debugImplementation("androidx.compose.ui:ui-test-manifest")
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/identification/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx.speaker.identification", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxSpeakerIdentification"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:label="@string/app_name"
            android:theme="@style/Theme.SherpaOnnxSpeakerIdentification">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/BarItem.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification

import androidx.compose.ui.graphics.vector.ImageVector

data class BarItem(
    val title: String,

    // see https://www.composables.com/icons
    // and
    // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary
    val image: ImageVector,
    val route: String,
)

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification

import android.Manifest
import android.content.pm.PackageManager
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.CenterAlignedTopAppBar
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.Icon
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.NavigationBar
import androidx.compose.material3.NavigationBarItem
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.material3.TopAppBarDefaults
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.ui.Modifier
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.tooling.preview.Preview
import androidx.core.app.ActivityCompat
import androidx.navigation.NavGraph.Companion.findStartDestination
import androidx.navigation.NavHostController
import androidx.navigation.compose.NavHost
import androidx.navigation.compose.composable
import androidx.navigation.compose.currentBackStackEntryAsState
import androidx.navigation.compose.rememberNavController
import com.k2fsa.sherpa.onnx.SpeakerRecognition
import com.k2fsa.sherpa.onnx.speaker.identification.screens.HelpScreen
import com.k2fsa.sherpa.onnx.speaker.identification.screens.HomeScreen
import com.k2fsa.sherpa.onnx.speaker.identification.screens.RegisterScreen
import com.k2fsa.sherpa.onnx.speaker.identification.screens.ViewScreen
import com.k2fsa.sherpa.onnx.speaker.identification.ui.theme.SherpaOnnxSpeakerIdentificationTheme

const val TAG = "sherpa-onnx-speaker"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

class MainActivity : ComponentActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContent {
            SherpaOnnxSpeakerIdentificationTheme {
                // A surface container using the 'background' color from the theme
                Surface(
                    modifier = Modifier.fillMaxSize(),
                    color = MaterialTheme.colorScheme.background
                ) {
                    MainScreen()
                }
            }
        }

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)

        SpeakerRecognition.initExtractor(this.assets)
    }

    @Deprecated("Deprecated in Java")
    override fun onRequestPermissionsResult(
        requestCode: Int,
        permissions: Array<out String>,
        grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            Toast.makeText(
                this,
                "This App needs access to the microphone",
                Toast.LENGTH_SHORT
            )
                .show()
            finish()
        }

        Log.i(TAG, "Audio record is permitted")
    }
}

@OptIn(ExperimentalMaterial3Api::class)
@Composable
fun MainScreen(modifier: Modifier = Modifier) {
    val navController = rememberNavController()

    Scaffold(
        topBar = {
            CenterAlignedTopAppBar(
                colors = TopAppBarDefaults.topAppBarColors(
                    containerColor = MaterialTheme.colorScheme.primaryContainer,
                    titleContentColor = MaterialTheme.colorScheme.primary,
                ),
                title = {
                    Text(
                        "Next-gen Kaldi: Speaker Identification",
                        fontWeight = FontWeight.Bold,
                    )
                },
            )
        },
        content = { padding ->
            Column(Modifier.padding(padding)) {
                NavigationHost(navController = navController)

            }
        },
        bottomBar = {
            BottomNavigationBar(navController = navController)
        }
    )
}

@Composable
fun NavigationHost(navController: NavHostController) {
    NavHost(navController = navController, startDestination = NavRoutes.Home.route) {
        composable(NavRoutes.Home.route) {
            HomeScreen()
        }

        composable(NavRoutes.Register.route) {
            RegisterScreen()
        }

        composable(NavRoutes.View.route) {
            ViewScreen()
        }

        composable(NavRoutes.Help.route) {
            HelpScreen()
        }
    }
}

@Composable
fun BottomNavigationBar(navController: NavHostController) {
    NavigationBar {
        val backStackEntry by navController.currentBackStackEntryAsState()
        val currentRoute = backStackEntry?.destination?.route

        NavBarItems.BarItems.forEach { navItem ->
            NavigationBarItem(selected = currentRoute == navItem.route,
                onClick = {
                    navController.navigate(navItem.route) {
                        popUpTo(navController.graph.findStartDestination().id) {
                            saveState = true
                        }
                        launchSingleTop = true
                        restoreState = true
                    }
                },
                icon = {
                    Icon(imageVector = navItem.image, contentDescription = navItem.title)
                }, label = {
                    Text(text = navItem.title)
                })
        }
    }
}

@Preview(showBackground = true)
@Composable
fun MainScreenPreview() {
    SherpaOnnxSpeakerIdentificationTheme {
        MainScreen()
    }
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/NavBarItems.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification

import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.AccountCircle
import androidx.compose.material.icons.filled.Add
import androidx.compose.material.icons.filled.Home
import androidx.compose.material.icons.filled.Info


object NavBarItems {
    val BarItems = listOf(
        BarItem(
            title = "Home",
            image = Icons.Filled.Home,
            route = "home",
        ),
        BarItem(
            title = "Register",
            image = Icons.Filled.Add,
            route = "register",
        ),
        BarItem(
            title = "View",
            image = Icons.Filled.AccountCircle,
            route = "view",
        ),
        BarItem(
            title = "Help",
            image = Icons.Filled.Info,
            route = "help",
        ),
    )
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/NavRoutes.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification

sealed class NavRoutes(val route: String) {
    object Home : NavRoutes("home")
    object Register : NavRoutes("register")
    object View : NavRoutes("view")
    object Help : NavRoutes("help")
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/screens/Help.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification.screens

import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.ui.Modifier
import androidx.compose.ui.unit.dp

@Composable
fun HelpScreen() {
    Box(modifier= Modifier.fillMaxSize()) {
        Column(
            modifier = Modifier.padding(16.dp)
        ) {
            Text("Please see http://github.com/k2-fsa/sherpa-onnx ")
            Spacer(modifier = Modifier.height(16.dp))
            Text("https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models")
            Spacer(modifier = Modifier.height(16.dp))
            Text("https://k2-fsa.github.io/sherpa/social-groups.html")
            Spacer(modifier = Modifier.height(16.dp))
            Text("Everything is open-sourced!")
        }
    }
}


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/screens/Home.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification.screens

import android.Manifest
import android.annotation.SuppressLint
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.width
import androidx.compose.material3.Button
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Slider
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.dp
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.SpeakerRecognition
import com.k2fsa.sherpa.onnx.speaker.identification.R
import com.k2fsa.sherpa.onnx.speaker.identification.TAG
import kotlin.concurrent.thread

private var audioRecord: AudioRecord? = null
private var sampleList: MutableList<FloatArray>? = null

private val clearedResult = "-cleared-"
@Composable
fun HomeScreen() {
    val activity = LocalContext.current as Activity
    var threshold by remember {
        mutableStateOf(0.5F)
    }

    var detectedName by remember {
        mutableStateOf(clearedResult)
    }

    var isStarted by remember { mutableStateOf(false) }
    val onRecordingButtonClick: () -> Unit = {
        isStarted = !isStarted

        if (isStarted) {
            if (ActivityCompat.checkSelfPermission(
                    activity,
                    Manifest.permission.RECORD_AUDIO
                ) != PackageManager.PERMISSION_GRANTED
            ) {
                Log.i(TAG, "Recording is not allowed")
            } else {
                // recording is allowed
                val audioSource = MediaRecorder.AudioSource.MIC
                val channelConfig = AudioFormat.CHANNEL_IN_MONO
                val audioFormat = AudioFormat.ENCODING_PCM_16BIT
                val numBytes =
                    AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)

                audioRecord = AudioRecord(
                    audioSource,
                    sampleRateInHz,
                    AudioFormat.CHANNEL_IN_MONO,
                    AudioFormat.ENCODING_PCM_16BIT,
                    numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
                )

                sampleList = null
                detectedName = clearedResult

                // recording is started here
                thread(true) {
                    Log.i(TAG, "processing samples")

                    val interval = 0.1 // i.e., 100 ms
                    val bufferSize = (interval * sampleRateInHz).toInt() // in samples
                    val buffer = ShortArray(bufferSize)
                    audioRecord?.let {
                        it.startRecording()

                        while (isStarted) {
                            val ret = audioRecord?.read(buffer, 0, buffer.size)
                            ret?.let { n ->
                                val samples = FloatArray(n) { buffer[it] / 32768.0f }
                                if (sampleList == null) {
                                    sampleList = mutableListOf(samples)
                                } else {
                                    sampleList?.add(samples)
                                }
                            }
                        }
                    }

                    Log.i(TAG, "Home: Recording is stopped. ${sampleList?.count()}")
                }
            }
        } else {
            // recording is stopped here
            audioRecord?.stop()
            audioRecord?.release()
            audioRecord = null

            sampleList?.let {
                val stream = SpeakerRecognition.extractor.createStream()
                for (samples in it) {
                    stream.acceptWaveform(samples = samples, sampleRate = sampleRateInHz)
                }
                stream.inputFinished()
                if (SpeakerRecognition.extractor.isReady(stream)) {
                    val embedding = SpeakerRecognition.extractor.compute(stream)
                    detectedName = SpeakerRecognition.manager.search(
                        embedding = embedding,
                        threshold = threshold,
                    )
                }
            }
        }
    }

    val onThresholdChange = { newValue: Float ->
        threshold = newValue
    }

    Box(
        modifier = Modifier.fillMaxSize(),
        contentAlignment = Alignment.TopCenter,
    ) {
        Column(
            horizontalAlignment = Alignment.CenterHorizontally,
        ) {
            HomeThresholdRow(
                threshold = threshold,
                onValueChange = onThresholdChange,
            )
            HomeButtonRow(
                isStarted = isStarted,
                onRecordingButtonClick = onRecordingButtonClick,
                onClearButtonClick = {
                    detectedName = clearedResult
                },
            )

            Spacer(modifier = Modifier.height(48.dp))

            if(detectedName == clearedResult) {
                // do nothing
            } else if (detectedName.length > 0) {
                Text(
                    text = "Speaker: ${detectedName}",
                    style = MaterialTheme.typography.headlineLarge,
                    fontWeight = FontWeight.Bold,
                )
            } else {
                Text(
                    text = "Unknown speaker",
                    style = MaterialTheme.typography.headlineLarge,
                    fontWeight = FontWeight.Bold,
                )
            }
        }
    }
}

@SuppressLint("UnrememberedMutableState")
@Composable
private fun HomeButtonRow(
    modifier: Modifier = Modifier,
    isStarted: Boolean,
    onRecordingButtonClick: () -> Unit,
    onClearButtonClick: () -> Unit,
) {
    val numSpeakers: Int by mutableStateOf(SpeakerRecognition.manager.numSpeakers())
    Row(
        modifier = modifier.fillMaxWidth(),
        horizontalArrangement = Arrangement.Center,
    ) {
        Button(
            enabled = numSpeakers > 0,
            onClick = onRecordingButtonClick
        ) {
            Text(text = stringResource(if (isStarted) R.string.stop else R.string.start))
        }

        Spacer(modifier = Modifier.width(24.dp))

        Button(onClick = onClearButtonClick) {
            Text(text = stringResource(id = R.string.clear))
        }
    }
}

@Composable
fun HomeThresholdRow(
    modifier: Modifier = Modifier,
    threshold: Float,
    onValueChange: (Float) -> Unit,
) {
    Column(modifier = Modifier) {
        Text(
            text = "Threshold: " + String.format("%.2f", threshold),
            style = MaterialTheme.typography.headlineMedium,
            fontWeight = FontWeight.Bold,
            modifier = modifier.padding(bottom = 8.dp, top = 8.dp),
        )
        Slider(
            value = threshold,
            onValueChange = onValueChange,
            valueRange = 0.1F..1.0F,
            modifier = modifier.fillMaxWidth(),
        )
    }
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/screens/Register.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification.screens

import android.Manifest
import android.annotation.SuppressLint
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import android.widget.Toast
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.width
import androidx.compose.material3.Button
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.tooling.preview.Preview
import androidx.compose.ui.unit.dp
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.SpeakerRecognition
import com.k2fsa.sherpa.onnx.speaker.identification.R
import com.k2fsa.sherpa.onnx.speaker.identification.TAG
import kotlin.concurrent.thread

private var audioRecord: AudioRecord? = null

private var sampleList: MutableList<FloatArray>? = null

private var embeddingList: MutableList<FloatArray>? = null

val sampleRateInHz = 16000

@SuppressLint("UnrememberedMutableState")
@Preview
@Composable
fun RegisterScreen(modifier: Modifier = Modifier) {
    val activity = LocalContext.current as Activity

    var firstTime by remember { mutableStateOf(true) }
    if (firstTime) {
        firstTime = false
        // clear states
        embeddingList = null
    }

    val numberAudio: Int by mutableStateOf(embeddingList?.count() ?: 0)

    Box(
        modifier = Modifier.fillMaxSize(),
        contentAlignment = Alignment.TopCenter
    ) {
        var speakerName by remember { mutableStateOf("") }
        val onSpeakerNameChange = { newName: String -> speakerName = newName }

        var isStarted by remember { mutableStateOf(false) }
        val onRecordingButtonClick: () -> Unit = {
            isStarted = !isStarted

            if (isStarted) {
                if (ActivityCompat.checkSelfPermission(
                        activity,
                        Manifest.permission.RECORD_AUDIO
                    ) != PackageManager.PERMISSION_GRANTED
                ) {
                    Log.i(TAG, "Recording is not allowed")
                } else {
                    // recording is allowed
                    val audioSource = MediaRecorder.AudioSource.MIC
                    val channelConfig = AudioFormat.CHANNEL_IN_MONO
                    val audioFormat = AudioFormat.ENCODING_PCM_16BIT
                    val numBytes =
                        AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)

                    audioRecord = AudioRecord(
                        audioSource,
                        sampleRateInHz,
                        AudioFormat.CHANNEL_IN_MONO,
                        AudioFormat.ENCODING_PCM_16BIT,
                        numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
                    )

                    sampleList = null

                    // recording is started here
                    thread(true) {
                        Log.i(TAG, "processing samples")

                        val interval = 0.1 // i.e., 100 ms
                        val bufferSize = (interval * sampleRateInHz).toInt() // in samples
                        val buffer = ShortArray(bufferSize)
                        audioRecord?.let {
                            it.startRecording()

                            while (isStarted) {
                                val ret = audioRecord?.read(buffer, 0, buffer.size)
                                ret?.let { n ->
                                    val samples = FloatArray(n) { buffer[it] / 32768.0f }
                                    if (sampleList == null) {
                                        sampleList = mutableListOf(samples)
                                    } else {
                                        sampleList?.add(samples)
                                    }
                                }
                            }
                        }

                        Log.i(TAG, "Recording is stopped. ${sampleList?.count()}")

                    }
                }
            } else {
                // recording is stopped here
                audioRecord?.stop()
                audioRecord?.release()
                audioRecord = null

                sampleList?.let {
                    val stream = SpeakerRecognition.extractor.createStream()
                    for (samples in it) {
                        stream.acceptWaveform(samples=samples, sampleRate=sampleRateInHz)
                    }
                    stream.inputFinished()
                    if(SpeakerRecognition.extractor.isReady(stream)) {
                        val embedding = SpeakerRecognition.extractor.compute(stream)
                        if(embeddingList == null) {
                            embeddingList = mutableListOf(embedding)
                        } else {
                            embeddingList?.add(embedding)
                        }
                    }
                }
            }
        }

        val onAddButtonClick: () -> Unit = {
            if(speakerName.isEmpty() || speakerName.isBlank()) {
                Toast.makeText(
                    activity,
                    "please input a speaker name",
                    Toast.LENGTH_SHORT
                ).show()
            } else if(SpeakerRecognition.manager.contains(speakerName.trim())) {
                Toast.makeText(
                    activity,
                    "A speaker with $speakerName already exists. Please choose a new name",
                    Toast.LENGTH_SHORT
                ).show()
            } else {
                val ok = SpeakerRecognition.manager.add(speakerName.trim(), embedding = embeddingList!!.toTypedArray())
                if(ok) {
                    Log.i(TAG, "Added ${speakerName.trim()} successfully")
                    Toast.makeText(
                        activity,
                        "Added ${speakerName.trim()}",
                        Toast.LENGTH_SHORT
                    ).show()

                    embeddingList = null
                    sampleList = null
                    speakerName = ""
                    firstTime = true
                } else {
                    Log.i(TAG, "Failed to add ${speakerName.trim()}")
                    Toast.makeText(
                        activity,
                        "Failed to add ${speakerName.trim()}",
                        Toast.LENGTH_SHORT
                    ).show()
                }
            }
        }

        Column(horizontalAlignment = Alignment.CenterHorizontally) {
            SpeakerNameRow(speakerName = speakerName, onValueChange = onSpeakerNameChange)
            Text(
                "Number of recordings: ${numberAudio}",
                modifier = modifier.padding(24.dp),
                style = MaterialTheme.typography.headlineMedium,
                fontWeight = FontWeight.Bold,
            )
            RegisterSpeakerButtonRow(
                modifier,
                isStarted = isStarted,
                onRecordingButtonClick = onRecordingButtonClick,
                onAddButtonClick = onAddButtonClick,
            )
        }
    }
}

@Composable
fun SpeakerNameRow(
    modifier: Modifier = Modifier,
    speakerName: String,
    onValueChange: (String) -> Unit
) {
    OutlinedTextField(
        value = speakerName,
        onValueChange = onValueChange,
        label = {
            Text("Please input the speaker name")
        },
        singleLine = true,
        modifier = modifier
            .fillMaxWidth()
            .padding(8.dp)
    )
}

@SuppressLint("UnrememberedMutableState")
@Composable
fun RegisterSpeakerButtonRow(
    modifier: Modifier = Modifier,
    isStarted: Boolean,
    onRecordingButtonClick: () -> Unit,
    onAddButtonClick: () -> Unit,
) {
    val numberAudio: Int by mutableStateOf(embeddingList?.count() ?: 0)
    Row(
        modifier = modifier.fillMaxWidth(),
        horizontalArrangement = Arrangement.Center,
    ) {
        Button(onClick = onRecordingButtonClick) {
            Text(text = stringResource(if (isStarted) R.string.stop else R.string.start))
        }

        Spacer(modifier = Modifier.width(24.dp))

        Button(
            enabled = numberAudio > 0,
            onClick = onAddButtonClick,
        ) {
            Text(text = stringResource(id = R.string.add))
        }
    }
}


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/screens/View.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification.screens

import android.annotation.SuppressLint
import androidx.compose.foundation.ExperimentalFoundationApi
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.lazy.LazyColumn
import androidx.compose.foundation.lazy.items
import androidx.compose.material3.Button
import androidx.compose.material3.Checkbox
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.runtime.toMutableStateList
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.unit.dp
import com.k2fsa.sherpa.onnx.SpeakerRecognition

class SpeakerName(val name: String) {
    val nameState = mutableStateOf(name)
    val checked = mutableStateOf(false)

    fun onCheckedChange(newValue: Boolean) {
        checked.value = newValue
    }
}

@SuppressLint("UnrememberedMutableState")
@OptIn(ExperimentalFoundationApi::class)
@Composable
fun ViewScreen() {
    val allSpeakerNames = SpeakerRecognition.manager.allSpeakerNames()
    val allSpeakerNameList = remember {
        MutableList(
            allSpeakerNames.size
        ) {
            SpeakerName(allSpeakerNames[it])
        }.toMutableStateList()
    }

    var enabled by remember {
        mutableStateOf(SpeakerRecognition.manager.numSpeakers() > 0)
    }

    Box(
        modifier = Modifier.fillMaxSize(),
        contentAlignment = Alignment.TopCenter
    ) {
        Column(
            modifier = Modifier.padding(16.dp),
            horizontalAlignment = Alignment.CenterHorizontally,
        ) {
            Button(
                enabled = enabled,
                onClick = {
                    val toRemove: MutableList<SpeakerName> = mutableListOf()
                    for (s in allSpeakerNameList) {
                        if (s.checked.value) {
                            SpeakerRecognition.manager.remove(s.name)
                            toRemove.add(s)
                        }
                    }
                    allSpeakerNameList.removeAll(toRemove)
                    enabled = SpeakerRecognition.manager.numSpeakers() > 0
                }) {
                Text("Delete selected")
            }
            LazyColumn(modifier = Modifier.fillMaxSize()) {
                items(allSpeakerNameList) { s: SpeakerName ->
                    ViewRow(speakerName = s)
                }
            }
        }
    }
}

@Composable
fun ViewRow(
    modifier: Modifier = Modifier,
    speakerName: SpeakerName
) {
    Surface(
        modifier = modifier
            .fillMaxWidth()
            .padding(8.dp),
        color = MaterialTheme.colorScheme.inversePrimary,
    ) {
        Row(
            modifier = modifier,
            horizontalArrangement = Arrangement.Center,
            verticalAlignment = Alignment.CenterVertically,
        ) {
            Text(
                text = speakerName.name,
                modifier = modifier.weight(1.0F),
            )
            Checkbox(checked = speakerName.checked.value,
                onCheckedChange = { speakerName.onCheckedChange(it) }
            )
        }
    }
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/ui/theme/Color.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme

import androidx.compose.ui.graphics.Color

val Purple80 = Color(0xFFD0BCFF)
val PurpleGrey80 = Color(0xFFCCC2DC)
val Pink80 = Color(0xFFEFB8C8)

val Purple40 = Color(0xFF6650a4)
val PurpleGrey40 = Color(0xFF625b71)
val Pink40 = Color(0xFF7D5260)

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/ui/theme/Theme.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme

import android.app.Activity
import android.os.Build
import androidx.compose.foundation.isSystemInDarkTheme
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.darkColorScheme
import androidx.compose.material3.dynamicDarkColorScheme
import androidx.compose.material3.dynamicLightColorScheme
import androidx.compose.material3.lightColorScheme
import androidx.compose.runtime.Composable
import androidx.compose.runtime.SideEffect
import androidx.compose.ui.graphics.toArgb
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.platform.LocalView
import androidx.core.view.WindowCompat

private val DarkColorScheme = darkColorScheme(
    primary = Purple80,
    secondary = PurpleGrey80,
    tertiary = Pink80
)

private val LightColorScheme = lightColorScheme(
    primary = Purple40,
    secondary = PurpleGrey40,
    tertiary = Pink40

    /* Other default colors to override
    background = Color(0xFFFFFBFE),
    surface = Color(0xFFFFFBFE),
    onPrimary = Color.White,
    onSecondary = Color.White,
    onTertiary = Color.White,
    onBackground = Color(0xFF1C1B1F),
    onSurface = Color(0xFF1C1B1F),
    */
)

@Composable
fun SherpaOnnxSpeakerIdentificationTheme(
    darkTheme: Boolean = isSystemInDarkTheme(),
    // Dynamic color is available on Android 12+
    dynamicColor: Boolean = true,
    content: @Composable () -> Unit
) {
    val colorScheme = when {
        dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
            val context = LocalContext.current
            if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
        }

        darkTheme -> DarkColorScheme
        else -> LightColorScheme
    }
    val view = LocalView.current
    if (!view.isInEditMode) {
        SideEffect {
            val window = (view.context as Activity).window
            window.statusBarColor = colorScheme.primary.toArgb()
            WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme
        }
    }

    MaterialTheme(
        colorScheme = colorScheme,
        typography = Typography,
        content = content
    )
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/ui/theme/Type.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme

import androidx.compose.material3.Typography
import androidx.compose.ui.text.TextStyle
import androidx.compose.ui.text.font.FontFamily
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.sp

// Set of Material typography styles to start with
val Typography = Typography(
    bodyLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 16.sp,
        lineHeight = 24.sp,
        letterSpacing = 0.5.sp
    )
    /* Other default text styles to override
    titleLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 22.sp,
        lineHeight = 28.sp,
        letterSpacing = 0.sp
    ),
    labelSmall = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Medium,
        fontSize = 11.sp,
        lineHeight = 16.sp,
        letterSpacing = 0.5.sp
    )
    */
)

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">Speaker ID</string>
    <string name="start">Start recording</string>
    <string name="stop">Stop recording</string>
    <string name="add">Add speaker</string>
    <string name="clear">Clear result</string>
</resources>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/values/themes.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>

    <style name="Theme.SherpaOnnxSpeakerIdentification" parent="android:Theme.Material.Light.NoActionBar" />
</resources>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxSpeakerIdentification/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/identification/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx.speaker.identification

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id("com.android.application") version "8.2.0" apply false
    id("org.jetbrains.kotlin.android") version "1.9.0" apply false
}

================================================
FILE: android/SherpaOnnxSpeakerIdentification/gradle/wrapper/gradle-wrapper.properties
================================================
#Sun Jan 21 18:37:37 CST 2024
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxSpeakerIdentification/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxSpeakerIdentification/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxSpeakerIdentification/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxSpeakerIdentification/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google()
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.name = "SherpaOnnxSpeakerIdentification"
include(":app")


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/build.gradle.kts
================================================
plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
}

android {
    namespace = "com.k2fsa.sherpa.onnx.slid"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.k2fsa.sherpa.onnx.slid"
        minSdk = 21
        targetSdk = 34
        versionCode = 20260320
        versionName = "1.12.31"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary = true
        }
    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.1"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

dependencies {

    implementation("androidx.core:core-ktx:1.12.0")
    implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.7.0")
    implementation("androidx.activity:activity-compose:1.8.2")
    implementation(platform("androidx.compose:compose-bom:2023.08.00"))
    implementation("androidx.compose.ui:ui")
    implementation("androidx.compose.ui:ui-graphics")
    implementation("androidx.compose.ui:ui-tooling-preview")
    implementation("androidx.compose.material3:material3")
    testImplementation("junit:junit:4.13.2")
    androidTestImplementation("androidx.test.ext:junit:1.1.5")
    androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
    androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00"))
    androidTestImplementation("androidx.compose.ui:ui-test-junit4")
    debugImplementation("androidx.compose.ui:ui-tooling")
    debugImplementation("androidx.compose.ui:ui-test-manifest")
}

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/androidTest/java/com/k2fsa/sherpa/onnx/slid/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx.slid

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx.slid", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxSpokenLanguageIdentification"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:label="@string/app_name"
            android:theme="@style/Theme.SherpaOnnxSpokenLanguageIdentification">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/assets/.gitignore
================================================


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/Home.kt
================================================
@file:OptIn(ExperimentalMaterial3Api::class)

package com.k2fsa.sherpa.onnx.slid

import android.Manifest
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.PaddingValues
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.Button
import androidx.compose.material3.CenterAlignedTopAppBar
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Text
import androidx.compose.material3.TopAppBarDefaults
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import androidx.core.app.ActivityCompat
import kotlin.concurrent.thread

@Composable
fun Home() {
    Scaffold(
        topBar = {
            CenterAlignedTopAppBar(
                colors = TopAppBarDefaults.topAppBarColors(
                    containerColor = MaterialTheme.colorScheme.primaryContainer,
                    titleContentColor = MaterialTheme.colorScheme.primary,
                ),
                title = {
                    Text(
                        "Next-gen Kaldi: Spoken language identification",
                        fontWeight = FontWeight.Bold,
                        fontSize = 13.sp,
                    )
                },
            )
        },
        content = {
            MyApp(it)
        },
    )
}

private var audioRecord: AudioRecord? = null
private const val sampleRateInHz = 16000

@Composable
fun MyApp(padding: PaddingValues) {
    val activity = LocalContext.current as Activity
    var isStarted by remember { mutableStateOf(false) }
    var result by remember { mutableStateOf("") }

    val onButtonClick: () -> Unit = {
        isStarted = !isStarted
        if (isStarted) {
            result = ""
            if (ActivityCompat.checkSelfPermission(
                    activity,
                    Manifest.permission.RECORD_AUDIO
                ) != PackageManager.PERMISSION_GRANTED
            ) {
                Log.i(TAG, "Recording is not allowed")
            } else {
                val audioSource = MediaRecorder.AudioSource.MIC
                val channelConfig = AudioFormat.CHANNEL_IN_MONO
                val audioFormat = AudioFormat.ENCODING_PCM_16BIT
                val numBytes =
                    AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)

                audioRecord = AudioRecord(
                    audioSource,
                    sampleRateInHz,
                    AudioFormat.CHANNEL_IN_MONO,
                    AudioFormat.ENCODING_PCM_16BIT,
                    numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
                )

                thread(true) {
                    Log.i(TAG, "processing samples")
                    val interval = 0.1 // i.e., 100 ms
                    val bufferSize = (interval * sampleRateInHz).toInt() // in samples
                    val buffer = ShortArray(bufferSize)
                    val sampleList = ArrayList<FloatArray>()
                    audioRecord?.let {
                        it.startRecording()
                        while (isStarted) {
                            val ret = it.read(buffer, 0, buffer.size)
                            ret.let { n ->
                                val samples = FloatArray(n) { buffer[it] / 32768.0f }
                                sampleList.add(samples)
                            }
                        }
                    }
                    Log.i(TAG, "Stop recording")
                    Log.i(TAG, "Start recognition")
                    val samples = flatten(sampleList)
                    val stream = Slid.slid.createStream()
                    stream.acceptWaveform(samples, sampleRateInHz)
                    val lang = Slid.slid.compute(stream)

                    result = Slid.localeMap[lang] ?: lang

                    stream.release()
                }
            }
        }
    }

    Box(
        modifier = Modifier.fillMaxSize(),
        contentAlignment = Alignment.TopCenter
    ) {
        Column(
            Modifier.padding(padding),
            horizontalAlignment = Alignment.CenterHorizontally,
        ) {
            Spacer(modifier = Modifier.height(16.dp))
            Button(onClick = onButtonClick) {
                if (isStarted) {
                    Text("Stop")
                } else {
                    Text("Start")
                }
            }

            Spacer(modifier = Modifier.height(16.dp))
            if (result.isNotEmpty() && result.isNotBlank()) {
                Text("Detected language: $result")
            }
        }
    }
}

fun flatten(sampleList: ArrayList<FloatArray>): FloatArray {
    var totalSamples = 0
    for (a in sampleList) {
        totalSamples += a.size
    }
    var i = 0
    val samples = FloatArray(totalSamples)
    for (a in sampleList) {
        for (s in a) {
            samples[i] = s
            i += 1
        }
    }
    Log.i(TAG, "$i, $totalSamples")

    return samples
}

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.slid

import android.Manifest
import android.content.pm.PackageManager
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Surface
import androidx.compose.runtime.Composable
import androidx.compose.ui.Modifier
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.slid.ui.theme.SherpaOnnxSpokenLanguageIdentificationTheme

const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

class MainActivity : ComponentActivity() {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContent {
            SpokenLanguageIdentificationApp()
        }
        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
        Slid.initSlid(this.assets)
    }

    @Suppress("DEPRECATION")
    @Deprecated("Deprecated in Java")
    override fun onRequestPermissionsResult(
        requestCode: Int,
        permissions: Array<out String>,
        grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            Toast.makeText(
                this,
                "This App needs access to the microphone",
                Toast.LENGTH_SHORT
            )
                .show()
            finish()
        }
        Log.i(TAG, "Audio record is permitted")
    }
}

@Composable
fun SpokenLanguageIdentificationApp() {
    SherpaOnnxSpokenLanguageIdentificationTheme {
        // A surface container using the 'background' color from the theme
        Surface(
            modifier = Modifier.fillMaxSize(),
            color = MaterialTheme.colorScheme.background
        ) {
            Home()
        }
    }
}

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/slid.kt
================================================
package com.k2fsa.sherpa.onnx.slid

import android.content.res.AssetManager
import android.util.Log
import com.k2fsa.sherpa.onnx.SpokenLanguageIdentification
import com.k2fsa.sherpa.onnx.getSpokenLanguageIdentificationConfig
import java.util.Locale


object Slid {
    private var _slid: SpokenLanguageIdentification? = null

    private var _localeMap = mutableMapOf<String, String>()
    val slid: SpokenLanguageIdentification
        get() {
            return _slid!!
        }
    val localeMap: Map<String, String>
        get() {
            return _localeMap
        }

    fun initSlid(assetManager: AssetManager? = null, numThreads: Int = 1) {
        synchronized(this) {
            if (_slid == null) {

                Log.i(TAG, "Initializing slid")
                val config =
                    getSpokenLanguageIdentificationConfig(type = 0, numThreads = numThreads)!!
                _slid = SpokenLanguageIdentification(assetManager, config)
            }

            if (_localeMap.isEmpty()) {
                val allLang = Locale.getISOLanguages()
                for (lang in allLang) {
                    val locale = Locale(lang)
                    _localeMap[lang] = locale.displayName
                }
            }
        }
    }
}

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Color.kt
================================================
package com.k2fsa.sherpa.onnx.slid.ui.theme

import androidx.compose.ui.graphics.Color

val Purple80 = Color(0xFFD0BCFF)
val PurpleGrey80 = Color(0xFFCCC2DC)
val Pink80 = Color(0xFFEFB8C8)

val Purple40 = Color(0xFF6650a4)
val PurpleGrey40 = Color(0xFF625b71)
val Pink40 = Color(0xFF7D5260)

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Theme.kt
================================================
package com.k2fsa.sherpa.onnx.slid.ui.theme

import android.app.Activity
import android.os.Build
import androidx.compose.foundation.isSystemInDarkTheme
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.darkColorScheme
import androidx.compose.material3.dynamicDarkColorScheme
import androidx.compose.material3.dynamicLightColorScheme
import androidx.compose.material3.lightColorScheme
import androidx.compose.runtime.Composable
import androidx.compose.runtime.SideEffect
import androidx.compose.ui.graphics.toArgb
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.platform.LocalView
import androidx.core.view.WindowCompat

private val DarkColorScheme = darkColorScheme(
    primary = Purple80,
    secondary = PurpleGrey80,
    tertiary = Pink80
)

private val LightColorScheme = lightColorScheme(
    primary = Purple40,
    secondary = PurpleGrey40,
    tertiary = Pink40

    /* Other default colors to override
    background = Color(0xFFFFFBFE),
    surface = Color(0xFFFFFBFE),
    onPrimary = Color.White,
    onSecondary = Color.White,
    onTertiary = Color.White,
    onBackground = Color(0xFF1C1B1F),
    onSurface = Color(0xFF1C1B1F),
    */
)

@Composable
fun SherpaOnnxSpokenLanguageIdentificationTheme(
    darkTheme: Boolean = isSystemInDarkTheme(),
    // Dynamic color is available on Android 12+
    dynamicColor: Boolean = true,
    content: @Composable () -> Unit
) {
    val colorScheme = when {
        dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
            val context = LocalContext.current
            if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
        }

        darkTheme -> DarkColorScheme
        else -> LightColorScheme
    }
    val view = LocalView.current
    if (!view.isInEditMode) {
        SideEffect {
            val window = (view.context as Activity).window
            window.statusBarColor = colorScheme.primary.toArgb()
            WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme
        }
    }

    MaterialTheme(
        colorScheme = colorScheme,
        typography = Typography,
        content = content
    )
}

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/ui/theme/Type.kt
================================================
package com.k2fsa.sherpa.onnx.slid.ui.theme

import androidx.compose.material3.Typography
import androidx.compose.ui.text.TextStyle
import androidx.compose.ui.text.font.FontFamily
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.sp

// Set of Material typography styles to start with
val Typography = Typography(
    bodyLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 16.sp,
        lineHeight = 24.sp,
        letterSpacing = 0.5.sp
    )
    /* Other default text styles to override
    titleLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 22.sp,
        lineHeight = 28.sp,
        letterSpacing = 0.sp
    ),
    labelSmall = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Medium,
        fontSize = 11.sp,
        lineHeight = 16.sp,
        letterSpacing = 0.5.sp
    )
    */
)

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/arm64-v8a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/armeabi-v7a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/x86/.gitignore
================================================


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/x86_64/.gitignore
================================================


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">Language ID</string>
</resources>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/values/themes.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>

    <style name="Theme.SherpaOnnxSpokenLanguageIdentification" parent="android:Theme.Material.Light.NoActionBar" />
</resources>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/app/src/test/java/com/k2fsa/sherpa/onnx/slid/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx.slid

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id("com.android.application") version "8.2.0" apply false
    id("org.jetbrains.kotlin.android") version "1.9.0" apply false
}

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/gradle/wrapper/gradle-wrapper.properties
================================================
#Wed Apr 17 19:48:00 CST 2024
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxSpokenLanguageIdentification/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google()
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.name = "SherpaOnnxSpokenLanguageIdentification"
include(":app")


================================================
FILE: android/SherpaOnnxTts/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxTts/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxTts/app/build.gradle
================================================
plugins {
    id 'com.android.application'
    id 'org.jetbrains.kotlin.android'
}

android {
    namespace 'com.k2fsa.sherpa.onnx'
    compileSdk 32

    defaultConfig {
        applicationId "com.k2fsa.sherpa.onnx"
        minSdk 21
        targetSdk 32
        versionCode 20260320
        versionName "1.12.31"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
}

dependencies {

    implementation 'com.android.support.constraint:constraint-layout:1.1.3'
    implementation 'androidx.core:core-ktx:1.7.0'
    implementation 'com.google.android.material:material:1.9.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'androidx.test.ext:junit:1.1.5'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
}

================================================
FILE: android/SherpaOnnxTts/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxTts/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxTts/app/src/main/.gitignore
================================================
vits-zh-aishell3
vits-vctk


================================================
FILE: android/SherpaOnnxTts/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.WRITE_INTERNAL_STORAGE" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxTts"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>

            <meta-data
                android:name="android.app.lib_name"
                android:value="" />
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxTts/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager
import android.media.AudioAttributes
import android.media.AudioFormat
import android.media.AudioManager
import android.media.AudioTrack
import android.media.MediaPlayer
import android.net.Uri
import android.os.Bundle
import android.util.Log
import android.widget.Button
import android.widget.EditText
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import java.io.File
import java.io.FileOutputStream
import java.io.IOException

const val TAG = "sherpa-onnx"

class MainActivity : AppCompatActivity() {
    private lateinit var tts: OfflineTts
    private lateinit var text: EditText
    private lateinit var sid: EditText
    private lateinit var speed: EditText
    private lateinit var generate: Button
    private lateinit var play: Button
    private lateinit var stop: Button
    private var stopped: Boolean = false
    private var mediaPlayer: MediaPlayer? = null

    // see
    // https://developer.android.com/reference/kotlin/android/media/AudioTrack
    private lateinit var track: AudioTrack

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        Log.i(TAG, "Start to initialize TTS")
        initTts()
        Log.i(TAG, "Finish initializing TTS")

        Log.i(TAG, "Start to initialize AudioTrack")
        initAudioTrack()
        Log.i(TAG, "Finish initializing AudioTrack")

        text = findViewById(R.id.text)
        sid = findViewById(R.id.sid)
        speed = findViewById(R.id.speed)

        generate = findViewById(R.id.generate)
        play = findViewById(R.id.play)
        stop = findViewById(R.id.stop)

        generate.setOnClickListener { onClickGenerate() }
        play.setOnClickListener { onClickPlay() }
        stop.setOnClickListener { onClickStop() }

        sid.setText("0")
        speed.setText("1.0")

        // we will change sampleText here in the CI
        val sampleText = ""
        text.setText(sampleText)

        play.isEnabled = false
    }

    private fun initAudioTrack() {
        val sampleRate = tts.sampleRate()
        val bufLength = AudioTrack.getMinBufferSize(
            sampleRate,
            AudioFormat.CHANNEL_OUT_MONO,
            AudioFormat.ENCODING_PCM_FLOAT
        )
        Log.i(TAG, "sampleRate: $sampleRate, buffLength: $bufLength")

        val attr = AudioAttributes.Builder().setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
            .setUsage(AudioAttributes.USAGE_MEDIA)
            .build()

        val format = AudioFormat.Builder()
            .setEncoding(AudioFormat.ENCODING_PCM_FLOAT)
            .setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
            .setSampleRate(sampleRate)
            .build()

        track = AudioTrack(
            attr, format, bufLength, AudioTrack.MODE_STREAM,
            AudioManager.AUDIO_SESSION_ID_GENERATE
        )
        track.play()
    }

    // this function is called from C++
    private fun callback(samples: FloatArray): Int {
        if (!stopped) {
            track.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING)
            return 1
        } else {
            track.stop()
            return 0
        }
    }

    private fun onClickGenerate() {
        val sidInt = sid.text.toString().toIntOrNull()
        if (sidInt == null || sidInt < 0) {
            Toast.makeText(
                applicationContext,
                "Please input a non-negative integer for speaker ID!",
                Toast.LENGTH_SHORT
            ).show()
            return
        }

        val speedFloat = speed.text.toString().toFloatOrNull()
        if (speedFloat == null || speedFloat <= 0) {
            Toast.makeText(
                applicationContext,
                "Please input a positive number for speech speed!",
                Toast.LENGTH_SHORT
            ).show()
            return
        }

        val textStr = text.text.toString().trim()
        if (textStr.isBlank() || textStr.isEmpty()) {
            Toast.makeText(applicationContext, "Please input a non-empty text!", Toast.LENGTH_SHORT)
                .show()
            return
        }

        track.pause()
        track.flush()
        track.play()

        play.isEnabled = false
        generate.isEnabled = false
        stopped = false
        Thread {
            val audio = tts.generateWithCallback(
                text = textStr,
                sid = sidInt,
                speed = speedFloat,
                callback = this::callback
            )

            val filename = application.filesDir.absolutePath + "/generated.wav"
            val ok = audio.samples.size > 0 && audio.save(filename)
            if (ok) {
                runOnUiThread {
                    play.isEnabled = true
                    generate.isEnabled = true
                    track.stop()
                }
            }
        }.start()
    }

    private fun onClickPlay() {
        val filename = application.filesDir.absolutePath + "/generated.wav"
        mediaPlayer?.stop()
        mediaPlayer = MediaPlayer.create(
            applicationContext,
            Uri.fromFile(File(filename))
        )
        mediaPlayer?.start()
    }

    private fun onClickStop() {
        stopped = true
        play.isEnabled = true
        generate.isEnabled = true
        track.pause()
        track.flush()
        mediaPlayer?.stop()
        mediaPlayer = null
    }

    private fun initTts() {
        var modelDir: String?
        var modelName: String?
        var acousticModelName: String?
        var vocoder: String?
        var voices: String?
        var ruleFsts: String?
        var ruleFars: String?
        var lexicon: String?
        var dataDir: String?
        var assets: AssetManager? = application.assets
        var isKitten = false

        // The purpose of such a design is to make the CI test easier
        // Please see
        // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py

        // VITS -- begin
        modelName = null
        // VITS -- end

        // Matcha -- begin
        acousticModelName = null
        vocoder = null
        // Matcha -- end

        // For Kokoro -- begin
        voices = null
        // For Kokoro -- end


        modelDir = null
        ruleFsts = null
        ruleFars = null
        lexicon = null
        dataDir = null

        // Example 1:
        // modelDir = "vits-vctk"
        // modelName = "vits-vctk.onnx"
        // lexicon = "lexicon.txt"

        // Example 2:
        // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
        // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
        // modelDir = "vits-piper-en_US-amy-low"
        // modelName = "en_US-amy-low.onnx"
        // dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"

        // Example 3:
        // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
        // modelDir = "vits-icefall-zh-aishell3"
        // modelName = "model.onnx"
        // ruleFars = "vits-icefall-zh-aishell3/rule.far"
        // lexicon = "lexicon.txt"

        // Example 4:
        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
        // modelDir = "vits-zh-hf-fanchen-C"
        // modelName = "vits-zh-hf-fanchen-C.onnx"
        // lexicon = "lexicon.txt"

        // Example 5:
        // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
        // modelDir = "vits-coqui-de-css10"
        // modelName = "model.onnx"

        // Example 6
        // vits-melo-tts-zh_en
        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
        // modelDir = "vits-melo-tts-zh_en"
        // modelName = "model.onnx"
        // lexicon = "lexicon.txt"

        // Example 7
        // matcha-icefall-zh-baker
        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
        // modelDir = "matcha-icefall-zh-baker"
        // acousticModelName = "model-steps-3.onnx"
        // vocoder = "vocos-22khz-univ.onnx"    // Vocoder should be downloaded separately; place in the **root directory of your resources folder**, not under modelDir.
        // lexicon = "lexicon.txt"

        // Example 8
        // matcha-icefall-en_US-ljspeech
        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
        // modelDir = "matcha-icefall-en_US-ljspeech"
        // acousticModelName = "model-steps-3.onnx"
        // vocoder = "vocos-22khz-univ.onnx"
        // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"

        // Example 9
        // kokoro-en-v0_19
        // modelDir = "kokoro-en-v0_19"
        // modelName = "model.onnx"
        // voices = "voices.bin"
        // dataDir = "kokoro-en-v0_19/espeak-ng-data"

        // Example 10
        // kokoro-multi-lang-v1_0
        // modelDir = "kokoro-multi-lang-v1_0"
        // modelName = "model.onnx"
        // voices = "voices.bin"
        // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
        // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
        // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"

        // Example 11
        // kitten-nano-en-v0_1-fp16
        // modelDir = "kitten-nano-en-v0_1-fp16"
        // modelName = "model.fp16.onnx"
        // voices = "voices.bin"
        // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
        // isKitten = true

        // Example 12
        // matcha-icefall-zh-en
        // https://k2-fsa.github.io/sherpa/onnx/tts/all/Chinese-English/matcha-icefall-zh-en.html
        // modelDir = "matcha-icefall-zh-en"
        // acousticModelName = "model-steps-3.onnx"
        // vocoder = "vocos-16khz-univ.onnx"    // Vocoder should be downloaded separately; place in the **root directory of your resources folder**, not under modelDir.
        // dataDir = "matcha-icefall-zh-en/espeak-ng-data"
        // lexicon = "lexicon.txt"

        if (dataDir != null) {
            val newDir = copyDataDir(dataDir!!)
            dataDir = "$newDir/$dataDir"
        }

        val config = getOfflineTtsConfig(
            modelDir = modelDir!!,
            modelName = modelName ?: "",
            acousticModelName = acousticModelName ?: "",
            vocoder = vocoder ?: "",
            voices = voices ?: "",
            lexicon = lexicon ?: "",
            dataDir = dataDir ?: "",
            dictDir = "",
            ruleFsts = ruleFsts ?: "",
            ruleFars = ruleFars ?: "",
            isKitten = isKitten,
        )!!

        tts = OfflineTts(assetManager = assets, config = config)
    }


    private fun copyDataDir(dataDir: String): String {
        Log.i(TAG, "data dir is $dataDir")
        copyAssets(dataDir)

        val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
        Log.i(TAG, "newDataDir: $newDataDir")
        return newDataDir
    }

    private fun copyAssets(path: String) {
        val assets: Array<String>?
        try {
            assets = application.assets.list(path)
            if (assets!!.isEmpty()) {
                copyFile(path)
            } else {
                val fullPath = "${application.getExternalFilesDir(null)}/$path"
                val dir = File(fullPath)
                dir.mkdirs()
                for (asset in assets.iterator()) {
                    val p: String = if (path == "") "" else path + "/"
                    copyAssets(p + asset)
                }
            }
        } catch (ex: IOException) {
            Log.e(TAG, "Failed to copy $path. $ex")
        }
    }

    private fun copyFile(filename: String) {
        try {
            val istream = application.assets.open(filename)
            val newFilename = application.getExternalFilesDir(null).toString() + "/" + filename
            val ostream = FileOutputStream(newFilename)
            // Log.i(TAG, "Copying $filename to $newFilename")
            val buffer = ByteArray(1024)
            var read = 0
            while (read != -1) {
                ostream.write(buffer, 0, read)
                read = istream.read(buffer)
            }
            istream.close()
            ostream.flush()
            ostream.close()
        } catch (ex: Exception) {
            Log.e(TAG, "Failed to copy $filename, $ex")
        }
    }
}


================================================
FILE: android/SherpaOnnxTts/app/src/main/jniLibs/arm64-v8a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxTts/app/src/main/jniLibs/armeabi-v7a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxTts/app/src/main/jniLibs/x86/.gitignore
================================================


================================================
FILE: android/SherpaOnnxTts/app/src/main/jniLibs/x86_64/.gitignore
================================================


================================================
FILE: android/SherpaOnnxTts/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxTts/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <TextView
        android:id="@+id/sid_label_hint"
        android:layout_width="match_parent"
        android:layout_height="wrap_content"
        android:text="@string/sid_label"
        android:gravity="center"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toTopOf="parent"
        />
    <EditText
        android:id="@+id/sid"
        android:layout_width="match_parent"
        android:layout_height="60dp"
        android:layout_marginTop="0dp"
        android:hint="@string/sid_hint"
        android:gravity="center"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/sid_label_hint" />

    <TextView
        android:id="@+id/speed_label_hint"
        android:layout_width="match_parent"
        android:layout_height="wrap_content"
        android:layout_marginTop="3dp"
        android:text="@string/speed_label"
        android:gravity="center"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/sid"/>
    <EditText
        android:id="@+id/speed"
        android:layout_width="match_parent"
        android:layout_height="60dp"
        android:layout_marginTop="0dp"
        android:hint="@string/speed_hint"
        android:gravity="center"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/speed_label_hint" />

    <EditText
        android:id="@+id/text"
        android:inputType="textMultiLine"
        android:lines="8"
        android:minLines="10"
        android:gravity="top|start"
        android:maxLines="30"
        android:layout_height="wrap_content"
        android:layout_width="match_parent"
        android:scrollbars="vertical"
        android:hint="@string/text_hint"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/speed" />

    <Button
        android:id="@+id/generate"
        android:textAllCaps="false"
        android:layout_width="match_parent"
        android:layout_height="50dp"
        android:layout_marginTop="4dp"
        android:text="@string/generate"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/text" />

    <Button
        android:id="@+id/play"
        android:textAllCaps="false"
        android:layout_width="match_parent"
        android:layout_height="50dp"
        android:layout_marginTop="4dp"
        android:text="@string/play"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/generate" />

    <Button
        android:id="@+id/stop"
        android:textAllCaps="false"
        android:layout_width="match_parent"
        android:layout_height="50dp"
        android:layout_marginTop="4dp"
        android:text="@string/stop"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/play" />

</androidx.constraintlayout.widget.ConstraintLayout>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">TTS</string>
    <string name="sid_label">Speaker ID</string>
    <string name="sid_hint">0</string>
    <string name="speed_label">Speech speed (large->fast)</string>
    <string name="speed_hint">1.0</string>
    <string name="text_hint">Please input your text here</string>
    <string name="generate">Generate</string>
    <string name="play">Play</string>
    <string name="stop">Stop</string>
</resources>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnxTts" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnxTts" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxTts/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxTts/app/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxTts/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.3.1' apply false
    id 'com.android.library' version '7.3.1' apply false
    id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

================================================
FILE: android/SherpaOnnxTts/gradle/wrapper/gradle-wrapper.properties
================================================
#Mon Oct 23 15:40:58 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: android/SherpaOnnxTts/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxTts/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxTts/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxTts/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "SherpaOnnxTts"
include ':app'


================================================
FILE: android/SherpaOnnxTtsEngine/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxTtsEngine/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxTtsEngine/app/build.gradle.kts
================================================
plugins {
    id("com.android.application")
    id("org.jetbrains.kotlin.android")
}

android {
    namespace = "com.k2fsa.sherpa.onnx.tts.engine"
    compileSdk = 34

    defaultConfig {
        applicationId = "com.k2fsa.sherpa.onnx.tts.engine"
        minSdk = 21
        targetSdk = 34
        versionCode = 20260320
        versionName = "1.12.31"

        testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
        vectorDrawables {
            useSupportLibrary = true
        }
    }

    buildTypes {
        release {
            isMinifyEnabled = false
            proguardFiles(
                getDefaultProguardFile("proguard-android-optimize.txt"),
                "proguard-rules.pro"
            )
        }
    }
    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = "1.8"
    }
    buildFeatures {
        compose = true
    }
    composeOptions {
        kotlinCompilerExtensionVersion = "1.5.1"
    }
    packaging {
        resources {
            excludes += "/META-INF/{AL2.0,LGPL2.1}"
        }
    }
}

dependencies {

    implementation("androidx.core:core-ktx:1.12.0")
    implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.6.2")
    implementation("androidx.activity:activity-compose:1.8.2")
    implementation(platform("androidx.compose:compose-bom:2023.08.00"))
    implementation("androidx.compose.ui:ui")
    implementation("androidx.compose.ui:ui-graphics")
    implementation("androidx.compose.ui:ui-tooling-preview")
    implementation("androidx.compose.material3:material3")
    implementation("androidx.appcompat:appcompat:1.6.1")
    implementation("com.google.android.material:material:1.9.0")
    testImplementation("junit:junit:4.13.2")
    androidTestImplementation("androidx.test.ext:junit:1.1.5")
    androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
    androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00"))
    androidTestImplementation("androidx.compose.ui:ui-test-junit4")
    debugImplementation("androidx.compose.ui:ui-tooling")
    debugImplementation("androidx.compose.ui:ui-test-manifest")
}

================================================
FILE: android/SherpaOnnxTtsEngine/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/androidTest/java/com/k2fsa/sherpa/onnx/tts/engine/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx.tts.engine", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    package="com.k2fsa.sherpa.onnx.tts.engine">

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxTtsEngine"
        tools:targetApi="31">
        <activity
            android:name=".GetSampleText"
            android:exported="true"
            android:theme="@android:style/Theme.Translucent.NoTitleBar">
            <intent-filter>
                <action android:name="android.speech.tts.engine.GET_SAMPLE_TEXT" />

                <category android:name="android.intent.category.DEFAULT" />
            </intent-filter>
        </activity>
        <activity
            android:name=".CheckVoiceData"
            android:exported="true">
            <intent-filter>
                <action android:name="android.speech.tts.engine.CHECK_TTS_DATA" />

                <category android:name="android.intent.category.DEFAULT" />
            </intent-filter>
        </activity>
        <activity
            android:name=".InstallVoiceData"
            android:exported="true">
            <intent-filter>
                <action android:name="android.speech.tts.engine.INSTALL_TTS_DATA" />

                <category android:name="android.intent.category.DEFAULT" />
            </intent-filter>
        </activity>

        <service
            android:name=".TtsService"
            android:enabled="true"
            android:exported="true"
            android:label="@string/app_name">
            <intent-filter>
                <action android:name="android.intent.action.TTS_SERVICE" />

                <category android:name="android.intent.category.DEFAULT" />
            </intent-filter>

            <meta-data
                android:name="android.speech.tts"
                android:resource="@xml/tts_engine" />
        </service>

        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:label="@string/app_name"
            android:theme="@style/Theme.SherpaOnnxTtsEngine">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
            <intent-filter>
                <action android:name="android.speech.tts.engine.CONFIGURE_ENGINE" />

                <category android:name="android.intent.category.DEFAULT" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/CheckVoiceData.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine

import android.content.Intent
import android.os.Bundle
import android.speech.tts.TextToSpeech
import androidx.appcompat.app.AppCompatActivity

class CheckVoiceData : AppCompatActivity() {
    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        val intent = Intent().apply {
            putStringArrayListExtra(
                TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES,
                arrayListOf(TtsEngine.lang)
            )
            putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, arrayListOf())
        }
        setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_PASS, intent)
        finish()
    }
}

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine

import android.app.Activity
import android.content.Intent
import android.os.Bundle
import android.speech.tts.TextToSpeech

fun getSampleText(lang: String): String {
    var text = ""
    when (lang) {
        "ara" -> {
            text = "هذا هو محرك تحويل النص إلى كلام باستخدام الجيل القادم من كالدي"
        }

        "ben" -> {
            text = "এটি একটি টেক্সট-টু-স্পীচ ইঞ্জিন যা পরবর্তী প্রজন্মের কালডি ব্যবহার করে"
        }

        "bul" -> {
            text =
                "Това е машина за преобразуване на текст в реч, използваща Kaldi от следващо поколение"
        }

        "cat" -> {
            text = "Aquest és un motor de text a veu que utilitza Kaldi de nova generació"
        }

        "cym" -> {
            text = "Peiriant testun-i-lais yw hwn sy'n defnyddio Kaldi'r genhedlaeth nesaf"
        }

        "ces" -> {
            text = "Toto je převodník textu na řeč využívající novou generaci kaldi"
        }

        "dan" -> {
            text = "Dette er en tekst til tale-motor, der bruger næste generation af kaldi"
        }

        "deu" -> {
            text =
                "Dies ist eine Text-to-Speech-Engine, die Kaldi der nächsten Generation verwendet"
        }

        "ell" -> {
            text = "Αυτή είναι μια μηχανή κειμένου σε ομιλία που χρησιμοποιεί kaldi επόμενης γενιάς"
        }

        "eng" -> {
            text = "How are you doing today? This is a text-to-speech engine using next generation Kaldi"
        }

        "est" -> {
            text = "See on teksti kõneks muutmise mootor, mis kasutab järgmise põlvkonna Kaldi"
        }

        "fin" -> {
            text = "Tämä on tekstistä puheeksi -moottori, joka käyttää seuraavan sukupolven kaldia"
        }

        "fra" -> {
            text = "Il s'agit d'un moteur de synthèse vocale utilisant Kaldi de nouvelle génération"
        }

        "gle" -> {
            text = "Is inneall téacs-go-hurlabhra é seo a úsáideann Kaldi den chéad ghlúin eile"
        }

        "hrv" -> {
            text =
                "Ovo je mehanizam za pretvaranje teksta u govor koji koristi Kaldi sljedeće generacije"
        }

        "hun" -> {
            text = "Ez egy szövegfelolvasó motor a következő generációs kaldi használatával"
        }

        "isl" -> {
            text = "Þetta er texta í tal vél sem notar næstu kynslóð kaldi"
        }

        "ita" -> {
            text = "Questo è un motore di sintesi vocale che utilizza kaldi di nuova generazione"
        }

        "kat" -> {
            text = "ეს არის ტექსტიდან მეტყველების ძრავა შემდეგი თაობის კალდის გამოყენებით"
        }

        "kaz" -> {
            text = "Бұл келесі буын kaldi көмегімен мәтіннен сөйлеуге арналған қозғалтқыш"
        }

        "mlt" -> {
            text = "Din hija magna text-to-speech li tuża Kaldi tal-ġenerazzjoni li jmiss"
        }

        "lav" -> {
            text = "Šis ir teksta pārvēršanas runā dzinējs, kas izmanto nākamās paaudzes Kaldi"
        }

        "lit" -> {
            text = "Tai teksto į kalbą variklis, kuriame naudojamas naujos kartos Kaldi"
        }

        "ltz" -> {
            text = "Dëst ass en Text-zu-Speech-Motor mat der nächster Generatioun Kaldi"
        }

        "nep" -> {
            text = "यो अर्को पुस्ता काल्डी प्रयोग गरेर स्पीच इन्जिनको पाठ हो"
        }

        "nld" -> {
            text =
                "Dit is een tekst-naar-spraak-engine die gebruik maakt van Kaldi van de volgende generatie"
        }

        "nor" -> {
            text = "Dette er en tekst til tale-motor som bruker neste generasjons kaldi"
        }

        "pol" -> {
            text = "Jest to silnik syntezatora mowy wykorzystujący Kaldi nowej generacji"
        }

        "por" -> {
            text =
                "Este é um mecanismo de conversão de texto em fala usando Kaldi de próxima geração"
        }

        "ron" -> {
            text = "Acesta este un motor text to speech care folosește generația următoare de kadi"
        }

        "rus" -> {
            text =
                "Это движок преобразования текста в речь, использующий Kaldi следующего поколения."
        }

        "slk" -> {
            text = "Toto je nástroj na prevod textu na reč využívajúci kaldi novej generácie"
        }

        "slv" -> {
            text =
                "To je mehanizem za pretvorbo besedila v govor, ki uporablja Kaldi naslednje generacije"
        }

        "spa" -> {
            text = "Este es un motor de texto a voz que utiliza kaldi de próxima generación."
        }

        "srp" -> {
            text =
                "Ово је механизам за претварање текста у говор који користи калди следеће генерације"
        }

        "swa" -> {
            text = "Haya ni maandishi kwa injini ya hotuba kwa kutumia kizazi kijacho kaldi"
        }

        "swe" -> {
            text = "Detta är en text till tal-motor som använder nästa generations kaldi"
        }

        "tur" -> {
            text = "Bu, yeni nesil kaldi'yi kullanan bir metinden konuşmaya motorudur"
        }

        "ukr" -> {
            text =
                "Це механізм перетворення тексту на мовлення, який використовує kaldi нового покоління"
        }

        "vie" -> {
            text = "Đây là công cụ chuyển văn bản thành giọng nói sử dụng kaldi thế hệ tiếp theo"
        }

        "zho", "cmn" -> {
            text = "使用新一代卡尔迪的语音合成引擎"
        }
    }
    return text
}

class GetSampleText : Activity() {
    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        var result = TextToSpeech.LANG_AVAILABLE
        val text: String = getSampleText(TtsEngine.lang ?: "")
        if (text.isEmpty()) {
            result = TextToSpeech.LANG_NOT_SUPPORTED
        }

        val intent = Intent().apply {
            if (result == TextToSpeech.LANG_AVAILABLE) {
                putExtra(TextToSpeech.Engine.EXTRA_SAMPLE_TEXT, text)
            } else {
                putExtra("sampleText", text)
            }
        }

        setResult(result, intent)
        finish()
    }
}

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/InstallVoiceData.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine

import android.app.Activity
import android.os.Bundle
import android.view.Window

class InstallVoiceData : Activity() {
    override fun onCreate(savedInstanceState: Bundle?) {
        requestWindowFeature(Window.FEATURE_NO_TITLE)
        super.onCreate(savedInstanceState)
    }
}

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
================================================
@file:OptIn(ExperimentalMaterial3Api::class)

package com.k2fsa.sherpa.onnx.tts.engine

import PreferenceHelper
import android.media.AudioAttributes
import android.media.AudioFormat
import android.media.AudioManager
import android.media.AudioTrack
import android.media.MediaPlayer
import android.net.Uri
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.activity.viewModels
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.wrapContentHeight
import androidx.compose.foundation.rememberScrollState
import androidx.compose.foundation.text.KeyboardOptions
import androidx.compose.foundation.verticalScroll
import androidx.compose.material3.Button
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Slider
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.material3.TopAppBar
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Modifier
import androidx.compose.ui.text.input.KeyboardType
import androidx.compose.ui.unit.dp
import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import java.io.File
import kotlin.time.TimeSource

const val TAG = "sherpa-onnx-tts-engine"

class MainActivity : ComponentActivity() {
    // TODO(fangjun): Save settings in ttsViewModel
    private val ttsViewModel: TtsViewModel by viewModels()

    private var mediaPlayer: MediaPlayer? = null

    // see
    // https://developer.android.com/reference/kotlin/android/media/AudioTrack
    private lateinit var track: AudioTrack

    private var stopped: Boolean = false

    private var samplesChannel = Channel<FloatArray>(capacity = 128)
    private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())


    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)

        Log.i(TAG, "Start to initialize TTS")
        TtsEngine.createTts(this)
        Log.i(TAG, "Finish initializing TTS")

        Log.i(TAG, "Start to initialize AudioTrack")
        initAudioTrack()
        Log.i(TAG, "Finish initializing AudioTrack")

        val preferenceHelper = PreferenceHelper(this)
        setContent {
            SherpaOnnxTtsEngineTheme {
                // A surface container using the 'background' color from the theme
                Surface(
                    modifier = Modifier.fillMaxSize(),
                    color = MaterialTheme.colorScheme.background
                ) {
                    Scaffold(topBar = {
                        TopAppBar(title = { Text("Next-gen Kaldi: TTS Engine") })
                    }) {
                        Box(modifier = Modifier.padding(it)) {
                            Column(modifier = Modifier.padding(16.dp)) {
                                Column {
                                    Text("Speed " + String.format("%.1f", TtsEngine.speed))
                                    Slider(
                                        value = TtsEngine.speedState.value,
                                        onValueChange = {
                                            TtsEngine.speed = it
                                            preferenceHelper.setSpeed(it)
                                        },
                                        valueRange = MIN_TTS_SPEED..MAX_TTS_SPEED,
                                        modifier = Modifier.fillMaxWidth()
                                    )
                                }

                                val testTextContent = getSampleText(TtsEngine.lang ?: "")

                                var testText by remember { mutableStateOf(testTextContent) }
                                var startEnabled by remember { mutableStateOf(true) }
                                var playEnabled by remember { mutableStateOf(false) }
                                var rtfText by remember {
                                    mutableStateOf("")
                                }
                                val scrollState = rememberScrollState(0)

                                val numSpeakers = TtsEngine.tts!!.numSpeakers()
                                if (numSpeakers > 1) {
                                    OutlinedTextField(
                                        value = TtsEngine.speakerIdState.value.toString(),
                                        onValueChange = {
                                            if (it.isEmpty() || it.isBlank()) {
                                                TtsEngine.speakerId = 0
                                            } else {
                                                try {
                                                    TtsEngine.speakerId = it.toString().toInt()
                                                } catch (ex: NumberFormatException) {
                                                    Log.i(TAG, "Invalid input: $it")
                                                    TtsEngine.speakerId = 0
                                                }
                                            }
                                            preferenceHelper.setSid(TtsEngine.speakerId)
                                        },
                                        label = {
                                            Text("Speaker ID: (0-${numSpeakers - 1})")
                                        },
                                        keyboardOptions = KeyboardOptions(keyboardType = KeyboardType.Number),
                                        modifier = Modifier
                                            .fillMaxWidth()
                                            .padding(bottom = 16.dp)
                                            .wrapContentHeight(),
                                    )
                                }

                                OutlinedTextField(
                                    value = testText,
                                    onValueChange = { testText = it },
                                    label = { Text("Please input your text here") },
                                    maxLines = 10,
                                    modifier = Modifier
                                        .fillMaxWidth()
                                        .padding(bottom = 16.dp)
                                        .verticalScroll(scrollState)
                                        .wrapContentHeight(),
                                    singleLine = false,
                                )

                                Row {
                                    Button(
                                        enabled = startEnabled,
                                        modifier = Modifier.padding(5.dp),
                                        onClick = {
                                            Log.i(TAG, "Clicked, text: $testText")
                                            if (testText.isBlank() || testText.isEmpty()) {
                                                Toast.makeText(
                                                    applicationContext,
                                                    "Please input some text to generate",
                                                    Toast.LENGTH_SHORT
                                                ).show()
                                            } else {
                                                startEnabled = false
                                                playEnabled = false
                                                stopped = false

                                                track.pause()
                                                track.flush()
                                                track.play()
                                                rtfText = ""
                                                Log.i(TAG, "Started with text $testText")

                                                scope.launch {
                                                    for (samples in samplesChannel) {
                                                        if (samples.isEmpty()) {
                                                            break
                                                        }

                                                        Log.i(
                                                            TAG,
                                                            "Received ${samples.count()} samples"
                                                        )
                                                        track.write(
                                                            samples,
                                                            0,
                                                            samples.size,
                                                            AudioTrack.WRITE_BLOCKING
                                                        )
                                                        if (stopped) {
                                                            break
                                                        }
                                                    }
                                                    Log.i(TAG, "Draining the channel")

                                                    // drain remaining
                                                    while (!samplesChannel.isEmpty) {
                                                        samplesChannel.tryReceive().getOrNull()
                                                    }
                                                    Log.i(TAG, "Channel drained")

                                                }

                                                CoroutineScope(Dispatchers.Default).launch {
                                                    val timeSource = TimeSource.Monotonic
                                                    val startTime = timeSource.markNow()

                                                    val audio =
                                                        TtsEngine.tts!!.generateWithCallback(
                                                            text = testText,
                                                            sid = TtsEngine.speakerId,
                                                            speed = TtsEngine.speed,
                                                            callback = ::callback,
                                                        )

                                                    val elapsed =
                                                        startTime.elapsedNow().inWholeMilliseconds.toFloat() / 1000;
                                                    val audioDuration =
                                                        audio.samples.size / TtsEngine.tts!!.sampleRate()
                                                            .toFloat()
                                                    val RTF = String.format(
                                                        "Number of threads: %d\nElapsed: %.3f s\nAudio duration: %.3f s\nRTF: %.3f/%.3f = %.3f",
                                                        TtsEngine.tts!!.config.model.numThreads,
                                                        elapsed,
                                                        audioDuration,
                                                        elapsed,
                                                        audioDuration,
                                                        elapsed / audioDuration
                                                    )

                                                    scope.launch {
                                                        Log.i(TAG, "send 0 samples")
                                                            samplesChannel.send(FloatArray(0))
                                                        Log.i(TAG, "send 0 samples done")
                                                    }

                                                    val filename =
                                                        application.filesDir.absolutePath + "/generated.wav"


                                                    val ok =
                                                        audio.samples.isNotEmpty() && audio.save(
                                                            filename
                                                        )

                                                    if (ok) {
                                                        withContext(Dispatchers.Main) {
                                                            startEnabled = true
                                                            playEnabled = true
                                                            rtfText = RTF
                                                        }


                                                    }
                                                }
                                            }
                                        }) {
                                        Text("Start")
                                    }

                                    Button(
                                        modifier = Modifier.padding(5.dp),
                                        enabled = playEnabled,
                                        onClick = {
                                            stopped = true
                                            track.pause()
                                            track.flush()
                                            onClickPlay()
                                        }) {
                                        Text("Play")
                                    }

                                    Button(
                                        modifier = Modifier.padding(5.dp),
                                        onClick = {
                                            onClickStop()
                                            startEnabled = true
                                        }) {
                                        Text("Stop")
                                    }
                                }
                                if (rtfText.isNotEmpty()) {
                                    Row {
                                        Text(rtfText)
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    override fun onDestroy() {
        stopMediaPlayer()
        super.onDestroy()
    }

    private fun stopMediaPlayer() {
        mediaPlayer?.stop()
        mediaPlayer?.release()
        mediaPlayer = null
    }

    private fun onClickPlay() {
        val filename = application.filesDir.absolutePath + "/generated.wav"
        stopMediaPlayer()
        mediaPlayer = MediaPlayer.create(
            applicationContext,
            Uri.fromFile(File(filename))
        )
        mediaPlayer?.start()
    }

    private fun onClickStop() {
        stopped = true
        track.pause()
        track.flush()

        stopMediaPlayer()
    }

    // this function is called from C++
    private fun callback(samples: FloatArray): Int {
        if (!stopped) {
            val samplesCopy = samples.copyOf()
            scope.launch {
                Log.i(TAG, "callback called with ${samplesCopy.count()} samples")
                val ok = samplesChannel.trySend(samplesCopy).isSuccess
                Log.i(TAG, "callback called with $ok")
            }
            return 1
        } else {
            track.stop()
            Log.i(TAG, " return 0")
            return 0
        }
    }

    private fun initAudioTrack() {
        val sampleRate = TtsEngine.tts!!.sampleRate()
        val bufLength = AudioTrack.getMinBufferSize(
            sampleRate,
            AudioFormat.CHANNEL_OUT_MONO,
            AudioFormat.ENCODING_PCM_FLOAT
        )
        Log.i(TAG, "sampleRate: $sampleRate, buffLength: $bufLength")

        val attr = AudioAttributes.Builder().setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
            .setUsage(AudioAttributes.USAGE_MEDIA)
            .build()

        val format = AudioFormat.Builder()
            .setEncoding(AudioFormat.ENCODING_PCM_FLOAT)
            .setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
            .setSampleRate(sampleRate)
            .build()

        track = AudioTrack(
            attr, format, bufLength, AudioTrack.MODE_STREAM,
            AudioManager.AUDIO_SESSION_ID_GENERATE
        )
        track.play()
    }
}


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/PreferencesHelper.kt
================================================
import android.content.Context
import android.content.SharedPreferences

class PreferenceHelper(context: Context) {

    private val PREFS_NAME = "com.k2fsa.sherpa.onnx.tts.engine"
    private val SPEED_KEY = "speed"
    private val SID_KEY = "speaker_id"

    private val sharedPreferences: SharedPreferences =
        context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE)

    fun setSpeed(value: Float) {
        val editor = sharedPreferences.edit()
        editor.putFloat(SPEED_KEY, value)
        editor.apply()
    }

    fun getSpeed(): Float {
        return sharedPreferences.getFloat(SPEED_KEY, 1.0f)
    }

    fun setSid(value: Int) {
        val editor = sharedPreferences.edit()
        editor.putInt(SID_KEY, value)
        editor.apply()
    }

    fun getSid(): Int {
        return sharedPreferences.getInt(SID_KEY, 0)
    }
}

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine

import PreferenceHelper
import android.content.Context
import android.content.res.AssetManager
import android.util.Log
import androidx.compose.runtime.MutableState
import androidx.compose.runtime.mutableFloatStateOf
import androidx.compose.runtime.mutableIntStateOf
import com.k2fsa.sherpa.onnx.OfflineTts
import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
import java.io.File
import java.io.FileOutputStream
import java.io.IOException

const val MIN_TTS_SPEED = 0.1f
const val MAX_TTS_SPEED = 5.0f

object TtsEngine {
    var tts: OfflineTts? = null

    // https://en.wikipedia.org/wiki/ISO_639-3
    // Example:
    // eng for English,
    // deu for German
    // cmn for Mandarin
    var lang: String? = null

    // if a model supports two languages, set also lang2
    var lang2: String? = null


    val speedState: MutableState<Float> = mutableFloatStateOf(1.0F)
    val speakerIdState: MutableState<Int> = mutableIntStateOf(0)

    var speed: Float
        get() = speedState.value
        set(value) {
            speedState.value = value
        }

    var speakerId: Int
        get() = speakerIdState.value
        set(value) {
            speakerIdState.value = value
        }

    private var modelDir: String? = null
    private var modelName: String? = null
    private var acousticModelName: String? = null // for matcha tts
    private var vocoder: String? = null // for matcha tts
    private var voices: String? = null // for kokoro
    private var ruleFsts: String? = null
    private var ruleFars: String? = null
    private var lexicon: String? = null
    private var dataDir: String? = null
    private var assets: AssetManager? = null
    private var isKitten = false

    init {
        // The purpose of such a design is to make the CI test easier
        // Please see
        // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
        //
        // For VITS -- begin
        modelName = null
        // For VITS -- end

        // For Matcha -- begin
        acousticModelName = null
        vocoder = null
        // For Matcha -- end

        // For Kokoro -- begin
        voices = null
        // For Kokoro -- end

        modelDir = null
        ruleFsts = null
        ruleFars = null
        lexicon = null
        dataDir = null
        lang = null
        lang2 = null

        // Please enable one and only one of the examples below

        // Example 1:
        // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
        // modelDir = "vits-vctk"
        // modelName = "vits-vctk.onnx"
        // lexicon = "lexicon.txt"
        // lang = "eng"

        // Example 2:
        // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
        // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
        // modelDir = "vits-piper-en_US-amy-low"
        // modelName = "en_US-amy-low.onnx"
        // dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"
        // lang = "eng"

        // Example 3:
        // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
        // modelDir = "vits-icefall-zh-aishell3"
        // modelName = "model.onnx"
        // ruleFars = "vits-icefall-zh-aishell3/rule.far"
        // lexicon = "lexicon.txt"
        // lang = "zho"

        // Example 4:
        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
        // modelDir = "vits-zh-hf-fanchen-C"
        // modelName = "vits-zh-hf-fanchen-C.onnx"
        // lexicon = "lexicon.txt"
        // lang = "zho"

        // Example 5:
        // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
        // This model does not need lexicon or dataDir
        // modelDir = "vits-coqui-de-css10"
        // modelName = "model.onnx"
        // lang = "deu"

        // Example 6
        // vits-melo-tts-zh_en
        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
        // modelDir = "vits-melo-tts-zh_en"
        // modelName = "model.onnx"
        // lexicon = "lexicon.txt"
        // lang = "zho"
        // lang2 = "eng"

        // Example 7
        // matcha-icefall-zh-baker
        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
        // modelDir = "matcha-icefall-zh-baker"
        // acousticModelName = "model-steps-3.onnx"
        // vocoder = "vocos-22khz-univ.onnx"
        // lexicon = "lexicon.txt"
        // lang = "zho"

        // Example 8
        // matcha-icefall-en_US-ljspeech
        // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
        // modelDir = "matcha-icefall-en_US-ljspeech"
        // acousticModelName = "model-steps-3.onnx"
        // vocoder = "vocos-22khz-univ.onnx"
        // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
        // lang = "eng"

        // Example 9
        // kokoro-en-v0_19
        // modelDir = "kokoro-en-v0_19"
        // modelName = "model.onnx"
        // voices = "voices.bin"
        // dataDir = "kokoro-en-v0_19/espeak-ng-data"
        // lang = "eng"

        // Example 10
        // kokoro-multi-lang-v1_0
        // modelDir = "kokoro-multi-lang-v1_0"
        // modelName = "model.onnx"
        // voices = "voices.bin"
        // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
        // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
        // lang = "eng"
        // lang2 = "zho"
        // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
        //
        // This model supports many languages, e.g., English, Chinese, etc.
        // We set lang to eng here.

        // Example 11
        // kitten-nano-en-v0_1-fp16
        // modelDir = "kitten-nano-en-v0_1-fp16"
        // modelName = "model.fp16.onnx"
        // voices = "voices.bin"
        // dataDir = "kitten-nano-en-v0_1-fp16/espeak-ng-data"
        // lang = "eng"
        // isKitten = true

        // Example 12
        // matcha-icefall-zh-en
        // https://k2-fsa.github.io/sherpa/onnx/tts/all/Chinese-English/matcha-icefall-zh-en.html
        // modelDir = "matcha-icefall-zh-en"
        // acousticModelName = "model-steps-3.onnx"
        // vocoder = "vocos-16khz-univ.onnx"
        // dataDir = "matcha-icefall-zh-en/espeak-ng-data"
        // lexicon = "lexicon.txt"
        // lang = "zho"
    }

    fun createTts(context: Context) {
        Log.i(TAG, "Init Next-gen Kaldi TTS")
        if (tts == null) {
            initTts(context)
        }
    }

    private fun initTts(context: Context) {
        assets = context.assets

        if (dataDir != null) {
            val newDir = copyDataDir(context, dataDir!!)
            dataDir = "$newDir/$dataDir"
        }

        val config = getOfflineTtsConfig(
            modelDir = modelDir!!,
            modelName = modelName ?: "",
            acousticModelName = acousticModelName ?: "",
            vocoder = vocoder ?: "",
            voices = voices ?: "",
            lexicon = lexicon ?: "",
            dataDir = dataDir ?: "",
            dictDir = "",
            ruleFsts = ruleFsts ?: "",
            ruleFars = ruleFars ?: "",
            isKitten = isKitten,
        )

        speed = PreferenceHelper(context).getSpeed()
        speakerId = PreferenceHelper(context).getSid()

        tts = OfflineTts(assetManager = assets, config = config)
    }


    private fun copyDataDir(context: Context, dataDir: String): String {
        Log.i(TAG, "data dir is $dataDir")
        copyAssets(context, dataDir)

        val newDataDir = context.getExternalFilesDir(null)!!.absolutePath
        Log.i(TAG, "newDataDir: $newDataDir")
        return newDataDir
    }

    private fun copyAssets(context: Context, path: String) {
        val assets: Array<String>?
        try {
            assets = context.assets.list(path)
            if (assets!!.isEmpty()) {
                copyFile(context, path)
            } else {
                val fullPath = "${context.getExternalFilesDir(null)}/$path"
                val dir = File(fullPath)
                dir.mkdirs()
                for (asset in assets.iterator()) {
                    val p: String = if (path == "") "" else "$path/"
                    copyAssets(context, p + asset)
                }
            }
        } catch (ex: IOException) {
            Log.e(TAG, "Failed to copy $path. $ex")
        }
    }

    private fun copyFile(context: Context, filename: String) {
        try {
            val istream = context.assets.open(filename)
            val newFilename = context.getExternalFilesDir(null).toString() + "/" + filename
            val ostream = FileOutputStream(newFilename)
            // Log.i(TAG, "Copying $filename to $newFilename")
            val buffer = ByteArray(1024)
            var read = 0
            while (read != -1) {
                ostream.write(buffer, 0, read)
                read = istream.read(buffer)
            }
            istream.close()
            ostream.flush()
            ostream.close()
        } catch (ex: Exception) {
            Log.e(TAG, "Failed to copy $filename, $ex")
        }
    }
}


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine

import android.media.AudioFormat
import android.speech.tts.SynthesisCallback
import android.speech.tts.SynthesisRequest
import android.speech.tts.TextToSpeech
import android.speech.tts.TextToSpeechService
import android.util.Log

/*
https://developer.android.com/reference/java/util/Locale#getISO3Language()
https://developer.android.com/reference/java/util/Locale#getISO3Country()

eng, USA,
eng, USA, POSIX
eng,
eng, GBR
afr,
afr, NAM
afr, ZAF
agq
agq, CMR
aka,
aka, GHA
amh,
amh, ETH
ara,
ara, 001
ara, ARE
ara, BHR,
deu
deu, AUT
deu, BEL
deu, CHE
deu, ITA
deu, ITA
deu, LIE
deu, LUX
spa,
spa, 419
spa, ARG,
spa, BRA
fra,
fra, BEL,
fra, FRA,

E  Failed to check TTS data, no activity found for Intent
{ act=android.speech.tts.engine.CHECK_TTS_DATA pkg=com.k2fsa.sherpa.chapter5 })

E Failed to get default language from engine com.k2fsa.sherpa.chapter5
Engine failed voice data integrity check (null return)com.k2fsa.sherpa.chapter5
Failed to get default language from engine com.k2fsa.sherpa.chapter5

*/

class TtsService : TextToSpeechService() {
    override fun onCreate() {
        Log.i(TAG, "onCreate tts service")
        super.onCreate()

        // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68
        onLoadLanguage(TtsEngine.lang, "", "")
        if (TtsEngine.lang2 != null) {
            onLoadLanguage(TtsEngine.lang2, "", "")
        }
    }

    override fun onDestroy() {
        Log.i(TAG, "onDestroy tts service")
        super.onDestroy()
    }

    // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeechService#onislanguageavailable
    override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int {
        val lang = _lang ?: ""

        if (lang == TtsEngine.lang || lang == TtsEngine.lang2) {
            return TextToSpeech.LANG_AVAILABLE
        }

        return TextToSpeech.LANG_NOT_SUPPORTED
    }

    override fun onGetLanguage(): Array<String> {
        return arrayOf(TtsEngine.lang!!, "", "")
    }

    // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeechService#onLoadLanguage(kotlin.String,%20kotlin.String,%20kotlin.String)
    override fun onLoadLanguage(_lang: String?, _country: String?, _variant: String?): Int {
        Log.i(TAG, "onLoadLanguage: $_lang, $_country")
        val lang = _lang ?: ""

        return if (lang == TtsEngine.lang || lang == TtsEngine.lang2) {
            Log.i(TAG, "creating tts, lang :$lang")
            TtsEngine.createTts(application)
            TextToSpeech.LANG_AVAILABLE
        } else {
            Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}, ${TtsEngine.lang2}")
            TextToSpeech.LANG_NOT_SUPPORTED
        }
    }

    override fun onStop() {}

    override fun onSynthesizeText(request: SynthesisRequest?, callback: SynthesisCallback?) {
        if (request == null || callback == null) {
            return
        }
        val language = request.language
        val country = request.country
        val variant = request.variant
        val text = request.charSequenceText.toString()
        // Map Android TTS speech rate (where 100 == normal) to engine speed (1.0 == normal)
        // Allow per-request override from external apps; fallback to engine default if absent.
        val rate = runCatching { request.speechRate }.getOrDefault(-1)
        val engineSpeed = if (rate > 0) {
            // Map 100 -> 1.0f
            val mapped = rate / 100.0f
            mapped.coerceIn(MIN_TTS_SPEED, MAX_TTS_SPEED)
        } else {
            // Fallback to current engine/global setting
            TtsEngine.speed
        }

        val ret = onIsLanguageAvailable(language, country, variant)
        if (ret == TextToSpeech.LANG_NOT_SUPPORTED) {
            callback.error()
            return
        }
        Log.i(TAG, "text: $text, engineSpeed: $engineSpeed")
        val tts = TtsEngine.tts!!

        // Note that AudioFormat.ENCODING_PCM_FLOAT requires API level >= 24
        // callback.start(tts.sampleRate(), AudioFormat.ENCODING_PCM_FLOAT, 1)

        callback.start(tts.sampleRate(), AudioFormat.ENCODING_PCM_16BIT, 1)

        if (text.isBlank() || text.isEmpty()) {
            callback.done()
            return
        }

        val ttsCallback: (FloatArray) -> Int = fun(floatSamples): Int {
            // convert FloatArray to ByteArray
            val samples = floatArrayToByteArray(floatSamples)
            val maxBufferSize: Int = callback.maxBufferSize
            var offset = 0
            while (offset < samples.size) {
                val bytesToWrite = Math.min(maxBufferSize, samples.size - offset)
                callback.audioAvailable(samples, offset, bytesToWrite)
                offset += bytesToWrite
            }

            // 1 means to continue
            // 0 means to stop
            return 1
        }

        Log.i(TAG, "text: $text")
        tts.generateWithCallback(
            text = text,
            sid = TtsEngine.speakerId,
            speed = engineSpeed,
            callback = ttsCallback,
        )

        callback.done()
    }

    private fun floatArrayToByteArray(audio: FloatArray): ByteArray {
        // byteArray is actually a ShortArray
        val byteArray = ByteArray(audio.size * 2)
        for (i in audio.indices) {
            val sample = (audio[i] * 32767).toInt()
            byteArray[2 * i] = sample.toByte()
            byteArray[2 * i + 1] = (sample shr 8).toByte()
        }
        return byteArray
    }
}


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsViewModel.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine

import android.app.Application
import android.speech.tts.TextToSpeech
import android.speech.tts.TextToSpeech.OnInitListener
import android.speech.tts.UtteranceProgressListener
import android.util.Log
import androidx.lifecycle.ViewModel
import java.util.Locale

class TtsApp : Application() {
    companion object {
        lateinit var instance: TtsApp
    }

    override fun onCreate() {
        super.onCreate()
        instance = this
    }

}

class TtsViewModel : ViewModel() {

    // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeech.OnInitListener
    private val onInitListener = object : OnInitListener {
        override fun onInit(status: Int) {
            when (status) {
                TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeeded")
                TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed")
                else -> Log.i(TAG, "Unknown status $status")
            }
        }
    }

    // https://developer.android.com/reference/kotlin/android/speech/tts/UtteranceProgressListener
    private val utteranceProgressListener = object : UtteranceProgressListener() {
        override fun onStart(utteranceId: String?) {
            Log.i(TAG, "onStart: $utteranceId")
        }

        override fun onStop(utteranceId: String?, interrupted: Boolean) {
            Log.i(TAG, "onStop: $utteranceId, $interrupted")
            super.onStop(utteranceId, interrupted)
        }

        override fun onError(utteranceId: String?, errorCode: Int) {
            Log.i(TAG, "onError: $utteranceId, $errorCode")
            super.onError(utteranceId, errorCode)
        }

        override fun onDone(utteranceId: String?) {
            Log.i(TAG, "onDone: $utteranceId")
        }

        @Deprecated("Deprecated in Java")
        override fun onError(utteranceId: String?) {
            Log.i(TAG, "onError: $utteranceId")
        }
    }

    val tts = TextToSpeech(TtsApp.instance, onInitListener, "com.k2fsa.sherpa.onnx.tts.engine")

    init {
        tts.setLanguage(Locale(TtsEngine.lang!!))
        tts.setOnUtteranceProgressListener(utteranceProgressListener)
    }

    override fun onCleared() {
        super.onCleared()
        tts.shutdown()
    }
}

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/ui/theme/Color.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine.ui.theme

import androidx.compose.ui.graphics.Color

val Purple80 = Color(0xFFD0BCFF)
val PurpleGrey80 = Color(0xFFCCC2DC)
val Pink80 = Color(0xFFEFB8C8)

val Purple40 = Color(0xFF6650a4)
val PurpleGrey40 = Color(0xFF625b71)
val Pink40 = Color(0xFF7D5260)

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/ui/theme/Theme.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine.ui.theme

import android.app.Activity
import android.os.Build
import androidx.compose.foundation.isSystemInDarkTheme
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.darkColorScheme
import androidx.compose.material3.dynamicDarkColorScheme
import androidx.compose.material3.dynamicLightColorScheme
import androidx.compose.material3.lightColorScheme
import androidx.compose.runtime.Composable
import androidx.compose.runtime.SideEffect
import androidx.compose.ui.graphics.toArgb
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.platform.LocalView
import androidx.core.view.WindowCompat

private val DarkColorScheme = darkColorScheme(
    primary = Purple80,
    secondary = PurpleGrey80,
    tertiary = Pink80
)

private val LightColorScheme = lightColorScheme(
    primary = Purple40,
    secondary = PurpleGrey40,
    tertiary = Pink40

    /* Other default colors to override
    background = Color(0xFFFFFBFE),
    surface = Color(0xFFFFFBFE),
    onPrimary = Color.White,
    onSecondary = Color.White,
    onTertiary = Color.White,
    onBackground = Color(0xFF1C1B1F),
    onSurface = Color(0xFF1C1B1F),
    */
)

@Composable
fun SherpaOnnxTtsEngineTheme(
    darkTheme: Boolean = isSystemInDarkTheme(),
    // Dynamic color is available on Android 12+
    dynamicColor: Boolean = true,
    content: @Composable () -> Unit
) {
    val colorScheme = when {
        dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
            val context = LocalContext.current
            if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
        }

        darkTheme -> DarkColorScheme
        else -> LightColorScheme
    }
    val view = LocalView.current
    if (!view.isInEditMode) {
        SideEffect {
            val window = (view.context as Activity).window
            window.statusBarColor = colorScheme.primary.toArgb()
            WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme
        }
    }

    MaterialTheme(
        colorScheme = colorScheme,
        typography = Typography,
        content = content
    )
}

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/ui/theme/Type.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine.ui.theme

import androidx.compose.material3.Typography
import androidx.compose.ui.text.TextStyle
import androidx.compose.ui.text.font.FontFamily
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.sp

// Set of Material typography styles to start with
val Typography = Typography(
    bodyLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 16.sp,
        lineHeight = 24.sp,
        letterSpacing = 0.5.sp
    )
    /* Other default text styles to override
    titleLarge = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Normal,
        fontSize = 22.sp,
        lineHeight = 28.sp,
        letterSpacing = 0.sp
    ),
    labelSmall = TextStyle(
        fontFamily = FontFamily.Default,
        fontWeight = FontWeight.Medium,
        fontSize = 11.sp,
        lineHeight = 16.sp,
        letterSpacing = 0.5.sp
    )
    */
)

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector android:height="108dp" android:viewportHeight="12267"
    android:viewportWidth="12267" android:width="108dp" xmlns:android="http://schemas.android.com/apk/res/android">
    <path android:fillColor="#ffffff"
        android:pathData="m4121,10338c-0,-105 3,-209 2,-313 -2,-26 -15,-55 -40,-66 -11,-14 -35,-7 -22,12 10,48 20,96 30,143 6,22 8,44 14,65 1,13 14,35 3,42 -19,0 -40,-17 -56,-7 -4,17 5,35 8,52 8,28 17,57 15,87 -1,15 -3,33 -13,45 -14,-5 -17,-25 -32,-30 -21,-7 -39,12 -60,12 -17,3 -31,12 -48,14 -19,11 -20,-16 -25,-28 -8,-21 -17,-43 -23,-65 -2,-24 -28,-31 -47,-37 -28,-11 -57,-21 -85,-32 -21,1 -31,-21 -17,-36 18,-51 37,-102 55,-153 5,-25 -10,-49 -29,-62 -80,-72 -157,-148 -234,-223 3,-20 27,-27 38,-43 40,-35 89,-60 120,-104 21,-29 32,-65 35,-100 3,-26 28,-39 45,-55 61,-50 122,-101 189,-143 18,-12 35,-29 34,-52 2,-22 -18,-31 -34,-39 -26,-20 -38,-53 -40,-84 -6,-26 -21,9 -34,3 -5,-19 -1,-40 -2,-60 -0,-20 6,-47 2,-63 -19,8 -36,18 -54,28 -10,-9 6,-27 6,-39 47,-166 93,-333 140,-499 -17,14 -28,34 -42,51 -23,31 -50,61 -55,101 -5,27 -4,54 -6,81 -15,5 -27,-19 -35,-31 -17,-33 -16,-74 2,-107 21,-42 44,-82 67,-123 -16,-18 -44,1 -60,-18 -8,-21 22,-31 31,-47 23,-25 49,-48 78,-66 20,-13 23,-38 20,-60 -3,-38 -5,-76 -8,-114 -3,-38 -7,-76 -13,-113 -10,16 -11,35 -18,52 -14,45 -27,89 -42,134 -25,6 -8,-31 -20,-42 -8,8 -20,18 -29,5 -14,-15 -12,-37 -21,-54 -14,-42 -25,-87 -18,-131 4,-35 16,-69 20,-103 -20,-10 -44,-11 -64,-22 -31,-13 -65,-35 -72,-71 -4,-22 4,-45 15,-63 18,-34 55,-57 59,-98 6,-33 -10,-65 -8,-97 1,-71 28,-141 75,-193 20,-21 44,-36 70,-50 25,-16 46,-43 44,-74 -0,-18 -5,-35 -10,-52 -20,-11 -43,-0 -64,-4 -26,0 -53,0 -79,2 -36,-45 -91,-74 -119,-126 -15,-30 -14,-64 -19,-96 -8,-65 -16,-130 -17,-196 -1,-41 19,-79 31,-117 -2,-21 -28,-28 -39,-43 -18,-16 -36,-40 -28,-66 8,-28 34,-46 54,-66 43,-38 98,-59 148,-86 121,-62 241,-125 365,-181 38,-13 80,-8 119,-2 55,11 105,35 157,54 16,-12 31,-35 54,-30 29,3 52,24 80,30 13,-18 22,-40 43,-51 35,-22 77,-19 116,-23 26,-1 51,-7 75,-15 108,-27 218,-49 327,-72 21,-6 41,7 38,29 53,242 82,490 155,728 8,26 17,52 26,78 2,-22 -7,-44 -9,-66 -20,-106 -39,-213 -59,-319 12,-21 22,16 24,26 47,128 93,257 141,385 7,18 26,-5 16,-17 -17,-54 -32,-108 -48,-163 -40,-136 -79,-272 -119,-407 13,-10 23,17 36,21 161,132 322,264 483,395 17,-8 9,-33 18,-48 19,-73 40,-147 72,-215 13,-19 39,-21 60,-19 24,10 28,-20 29,-36 7,-34 14,-67 19,-101 -21,-13 -47,-8 -69,-12 -26,-2 -25,-31 -24,-50 2,-15 5,-51 -19,-47 -16,5 -18,-16 -27,-25 -14,-24 -43,-32 -69,-35 -24,-3 -48,-1 -71,-4 -8,-7 -4,-34 -20,-19 -12,6 -30,29 -41,20 -0,-11 15,-41 -7,-30 -24,4 -31,-24 -41,-39 -17,-27 -36,-59 -27,-92 5,-20 16,-40 10,-61 -11,-22 16,-34 31,-43 16,-15 -32,-44 -1,-45 20,2 39,-5 53,-20 20,-21 30,-49 52,-69 14,-15 29,-29 44,-43 12,2 14,34 25,11 12,-13 17,-33 35,-40 -1,16 15,29 27,15 57,-22 114,-45 171,-69 31,-12 64,-18 97,-16 41,1 78,-21 107,-49 36,-35 61,-82 67,-131 9,-60 30,-118 51,-175 22,-60 54,-116 82,-174 11,-20 28,-36 43,-53 18,-16 44,-17 66,-25 71,-21 142,-39 216,-45 48,-5 96,-8 144,-13 21,5 -9,26 -15,34 -8,17 18,-1 25,-2 38,-15 77,-32 118,-31 51,-1 103,1 154,-2 34,-9 69,-28 105,-16 35,14 52,50 73,79 15,23 30,46 45,69 4,22 -25,17 -38,12 -35,-10 -70,-18 -105,-27 10,15 31,17 44,28 57,33 112,72 154,124 15,10 0,17 -11,13 -36,-0 -71,-6 -107,-8 -16,-5 -8,16 4,13 76,28 151,56 227,85 8,-18 30,-25 47,-15 23,5 49,18 55,43 -1,12 8,31 17,12 5,-14 6,-46 29,-33 60,16 121,31 181,47 16,16 34,33 57,36 27,5 55,-2 79,-14 19,3 25,29 30,42 -3,11 -31,13 -24,23 22,5 45,5 68,8 17,2 33,4 50,7 -1,11 -28,36 -1,31 14,-3 33,-16 43,-1 10,20 11,47 33,59 20,11 31,34 26,56 -6,12 -14,34 7,33 20,3 21,25 7,36 -13,22 11,36 22,51 7,18 4,39 10,58 4,22 5,48 -12,65 -11,13 -46,18 -28,41 7,9 8,31 -9,19 -20,-12 -30,12 -36,27 -17,30 -33,61 -49,91 -14,17 -38,17 -57,26 -38,14 -76,26 -113,40 -6,9 10,30 -11,25 -31,-0 -61,1 -92,1 -14,1 -29,-0 -42,3 -42,25 -95,22 -140,8 -36,-10 -74,-12 -111,-8 -28,10 -37,42 -43,68 -7,42 0,84 -4,125 -2,31 -2,62 1,93 0,29 -18,60 -48,65 -24,5 -50,-2 -71,-13 -22,2 -22,29 -26,46 -9,62 -50,111 -92,155 -14,14 -29,29 -31,50 -3,23 -7,48 -23,67 -30,38 -77,61 -125,66 -25,3 -47,-10 -70,-17 -49,-19 -96,-42 -144,-63 -19,9 -29,29 -43,43 9,21 34,28 47,46 19,19 40,40 48,67 3,28 0,59 17,83 22,38 52,71 76,108 80,108 160,217 239,325 11,17 25,34 31,53 -7,21 -33,24 -46,39 -36,27 -74,51 -110,78 -15,14 -20,37 -11,56 15,39 32,77 48,116 75,176 151,352 198,539 43,167 71,337 105,505 6,20 5,42 18,59 8,-4 1,-28 3,-40 -0,-99 -2,-199 -23,-297 0,-16 -12,-33 -4,-49 38,37 65,85 75,137 16,79 2,161 -18,238 -6,23 -13,46 -20,69 -20,8 -30,-19 -47,-25 -42,-32 -87,-62 -139,-75 -28,-8 -57,-14 -84,-24 -11,-21 -12,-46 -29,-64 -32,-44 -82,-68 -130,-91 -21,-14 -22,-43 -37,-62 -29,-44 -76,-70 -121,-94 -38,-23 -69,-60 -74,-105 -6,-40 -9,-81 -21,-120 -16,-60 -44,-115 -74,-168 -19,-43 -22,-91 -12,-136 23,-147 34,-296 32,-445 -0,-87 -3,-174 -12,-261 -4,-29 -14,-56 -25,-83 -5,23 2,46 2,70 3,32 5,65 7,97 -13,20 -27,-12 -30,-24 -11,-24 -23,-47 -33,-71 -22,-2 0,29 1,41 22,66 42,133 55,202 11,53 17,108 15,162 -0,67 -7,134 -20,200 -8,36 -18,72 -32,106 -17,-8 -12,-34 -22,-49 -15,-41 -35,-79 -53,-118 -17,-35 -39,-69 -69,-95 -87,-85 -182,-163 -277,-240 -12,-3 7,21 8,28 76,162 188,303 294,447 38,51 75,103 110,155 0,16 -20,-7 -28,-7 -77,-42 -154,-85 -231,-127 -20,15 10,31 20,41 128,112 229,251 336,383 -6,25 -36,11 -46,-2 -45,-31 -90,-63 -135,-94 -8,11 5,38 -20,33 -12,4 -41,2 -42,11 84,54 170,105 255,159 22,14 45,28 68,41 12,19 -19,30 -25,46 -12,20 22,15 33,20 42,10 84,20 126,29 4,18 -14,30 -29,20 -65,-12 -129,-23 -194,-34 -11,22 25,22 38,31 104,42 207,83 312,122 18,8 31,31 20,50 -12,29 -33,53 -49,80 -17,27 -35,53 -51,80 21,-3 39,-16 59,-22 62,-24 125,-49 187,-74 20,6 7,30 -1,41 -9,17 -18,33 -25,51 31,-1 61,-8 91,-11 25,-3 51,-7 76,-9 12,18 -23,24 -31,36 -8,7 -32,18 -29,27 14,-3 43,0 26,20 -3,11 -23,34 -9,39 28,-5 56,-13 84,-18 20,2 2,29 2,42 -1,23 25,-1 35,-4 12,-12 43,-10 31,12 -12,14 -21,31 -32,46 -4,8 -25,31 -5,22 14,-6 33,-24 46,-6 18,19 34,43 59,54 19,-0 33,-23 53,-9 31,8 62,13 93,21 7,13 8,43 29,27 17,-6 42,-1 46,18 6,12 -3,38 10,41 19,-9 32,-27 52,-32 24,1 25,34 45,41 20,3 41,-23 60,-6 21,21 9,58 33,77 7,6 40,6 21,19 -21,7 -27,32 -10,47 10,11 13,36 27,39 12,-12 12,-31 21,-45 7,-17 14,-34 20,-52 -6,-4 -33,-16 -12,-16 61,-1 111,-41 167,-59 54,-17 111,-11 166,-5 21,1 44,3 55,24 13,16 -3,22 -17,20 -22,2 -45,1 -67,4 -24,8 5,25 18,24 66,18 133,36 200,52 19,6 46,12 46,36 0,24 -23,38 -43,46 -41,14 -83,-1 -123,-8 -12,0 -40,-15 -31,6 7,18 6,44 28,51 20,6 40,-5 61,-5 31,-1 61,5 91,13 16,3 37,14 31,34 -7,25 -19,51 -41,67 -10,18 19,35 12,56 -1,23 -19,40 -38,49 -20,17 -17,45 -24,68 -5,21 -11,42 -19,62 10,25 9,59 -14,76 -21,18 -49,26 -74,37 0,115 0,230 0,345 -40,-0 -80,1 -119,-1 -7,-105 -14,-210 -24,-315 3,-19 -19,-24 -34,-25 -18,-7 -46,-9 -52,14 -2,42 3,85 5,127 2,30 4,61 5,91 3,26 3,51 6,77 -3,10 9,35 -7,33 -47,0 -94,0 -141,0 -5,-40 -7,-80 -11,-120 -4,-56 -8,-111 -15,-167 -7,-16 -11,-53 -35,-43 -33,12 -64,30 -91,53 -17,13 -37,22 -56,33 -17,9 -34,19 -51,27 -21,0 -25,-31 -47,-29 -17,-3 -46,16 -56,-2 -12,-20 -48,-6 -54,-31 -1,-26 -29,-28 -47,-38 -161,-77 -322,-156 -483,-234 -38,-18 -80,-30 -122,-33 -31,-1 -62,-2 -92,5 6,13 28,6 40,13 47,12 94,28 139,46 23,11 47,21 71,32 45,21 88,46 132,70 47,27 93,56 140,85 42,25 88,44 121,81 17,18 31,40 41,63 16,39 27,81 34,122 2,20 5,40 1,60 -6,14 -17,-2 -26,3 -19,-8 -24,-31 -36,-46 -25,-42 -49,-84 -74,-126 -3,-14 -22,-11 -15,4 7,18 10,37 17,56 4,15 -5,11 -12,2 -55,-42 -109,-85 -164,-127 -8,-4 -21,-21 -27,-16 5,15 17,27 23,41 42,67 97,125 148,185 4,9 31,26 17,31 -502,2 -1004,1 -1506,1 -310,-0 -621,0 -931,-1 -16,-3 4,-24 4,-34 49,-136 105,-271 184,-393 4,-9 31,-33 7,-31 -56,2 -113,7 -169,10 -16,1 -32,2 -49,2 -19,9 -14,36 -21,53 -10,45 -21,90 -31,135 2,11 -9,22 -11,6 -7,-11 -9,-39 -24,-37 -16,21 -30,45 -45,67 -5,9 -18,34 -20,13 -12,-13 -3,-34 -6,-50 -1,-58 -0,-116 -2,-173 -11,-16 -34,-0 -50,-3 -47,4 -95,8 -142,13 -15,12 2,33 -1,48 14,100 28,200 42,300 4,24 12,47 7,71 3,1 5,-11 3,0 -14,11 -36,-0 -53,4 -16,4 -38,-0 -34,-21 -11,-120 -20,-240 -32,-360 -5,-14 3,-46 -20,-38 -37,4 -74,8 -111,12 -13,16 -1,38 -6,57 0,92 -1,183 -5,275 -0,25 -0,50 -0,76 -26,0 -52,0 -79,0 -2,-68 -7,-136 -9,-205 -3,-57 -5,-114 -8,-171 1,-13 0,-35 -19,-27 -71,5 -141,8 -212,14 -7,21 9,41 11,62 14,52 27,105 45,156 7,25 5,52 -6,76 -7,23 4,47 8,70 10,11 6,34 -12,26 -60,1 -120,0 -180,0 0,-40 0,-80 1,-120zM4842,10439c-2,-11 -1,13 0,0zM7749,10377c-4,-6 0,6 0,0zM7147,10366c-55,-141 -148,-268 -268,-362 -63,-52 -136,-91 -210,-126 -33,-15 -66,-28 -100,-40 -4,10 22,21 29,30 41,35 82,69 123,104 24,12 2,-36 23,-32 14,10 26,22 39,34 16,20 27,44 43,64 46,66 103,123 161,178 35,33 70,65 104,98 20,17 36,37 57,52zM7743,10343c-3,-7 -2,6 0,0zM7512,10338c-6,0 5,7 0,0zM7617,10337c-3,-0 3,6 0,0zM7502,10329c-4,-5 2,6 0,0zM7247,10156c7,-22 -27,-25 -37,-40 -34,-26 -68,-51 -103,-76 -18,14 13,26 22,36 33,29 67,58 102,86 6,4 13,2 16,-5zM7470,10136c19,-16 -7,-32 -22,-38 -61,-38 -122,-75 -182,-113 -9,-17 -44,-8 -29,11 19,14 40,24 59,38 41,27 83,53 125,80 17,5 30,31 50,23zM7149,8470c-12,-125 -46,-252 -119,-355 -11,-15 -28,-39 -38,-45 9,31 23,60 33,90 40,103 79,206 119,308 1,3 7,10 5,2zM7319,8389c-10,-208 -49,-416 -133,-608 -39,-89 -86,-173 -142,-252 -12,-16 -2,15 1,21 44,142 88,283 133,425 5,14 9,28 13,42 11,-16 3,-46 24,-53 22,2 14,30 20,44 28,138 56,275 84,413 5,-11 0,-22 1,-34zM7169,8174c-25,-131 -70,-261 -149,-371 -29,-38 -62,-74 -102,-101 -16,-5 4,16 5,23 79,146 158,291 237,437 2,2 9,20 8,12zM3893,7961c13,-35 22,-75 5,-110 -10,-16 -6,14 -7,20 -0,31 -0,62 1,94l1,-2zM6762,7794c-2,-119 -38,-235 -82,-344 -47,-111 -108,-218 -188,-308 -22,-24 -45,-46 -71,-66 -12,11 14,34 1,40 -19,-7 -35,-21 -53,-28 -7,-4 -25,-17 -28,-9 111,145 235,282 320,445 48,90 83,188 97,289 8,24 3,-13 4,-19zM3934,7658c-0,-21 1,-43 -1,-64 -26,17 -44,46 -51,77 -8,43 11,87 36,121 3,8 18,22 14,5 1,-46 1,-92 2,-139zM7001,7695c-34,-83 -68,-167 -102,-250 -5,-18 -9,-1 -7,8 0,72 30,141 72,199 12,17 23,34 38,48 1,-2 -0,-4 -1,-5zM6552,7567c23,-7 6,-30 -4,-41 -55,-86 -109,-172 -164,-257 -19,3 -21,24 -6,33 56,88 112,176 168,264 1,2 4,3 5,1zM5349,6953c-5,-2 3,10 0,0zM5561,6729c-16,-22 -39,-38 -57,-58 -36,-35 -72,-70 -109,-103 -6,16 8,33 11,49 11,29 22,61 45,83 30,27 72,35 111,30zM9053,4618c-20,0 -42,-3 -51,-23 -86,-94 -172,-190 -241,-298 -15,-23 -27,-48 -38,-74 -14,-20 -41,-19 -62,-15 -20,4 -40,1 -60,1 -19,2 -38,8 -58,5 -20,3 -39,-7 -58,-6 -21,-0 -41,-7 -61,-6 -35,-3 -70,-7 -104,-14 -22,-10 -48,-7 -70,-18 -15,-5 -30,-10 -44,-16 -26,13 -48,33 -74,45 -20,-1 -35,10 -43,28 -11,21 -31,35 -43,55 -14,19 -38,25 -57,38 -18,8 -34,-9 -47,-20 -13,-13 -23,-29 -31,-45 -22,-9 -11,-35 -15,-53 -18,-2 -27,-30 -4,-30 7,1 20,2 16,-9 16,6 20,-23 17,-28 -14,-3 -2,21 -17,13 -8,-14 -25,-9 -34,-0 7,-0 33,-7 26,6 -18,11 -47,7 -50,-17 -11,-23 12,-37 27,-49 25,-21 51,-42 74,-65 11,-16 33,-28 31,-50 -7,-17 13,-24 26,-21 1,-10 4,-20 12,-23 4,-18 39,-12 24,-35 -3,-11 -31,-30 -13,-35 11,-2 25,20 29,11 -5,-19 -29,-29 -26,-51 8,-3 28,15 19,-3 5,-20 21,5 33,3 12,14 26,-1 38,-9 12,-10 25,-22 27,-38 9,-22 26,-40 36,-62 24,-43 42,-90 62,-135 12,-19 18,-41 31,-58 5,-17 10,-33 19,-48 49,-100 83,-207 108,-316 6,-24 11,-48 21,-70 15,-15 28,17 34,28 2,11 2,31 18,18 18,-6 27,-21 35,-38 15,-21 29,16 31,30 6,34 22,65 33,98 9,23 18,46 27,69 -5,23 18,38 23,58 13,29 20,60 38,87 48,87 90,178 135,266 55,110 113,218 170,327 23,48 47,96 77,140 34,53 73,102 111,152 18,31 32,64 43,98 -2,19 -24,34 -10,54 9,21 -5,40 -14,58 -5,14 -19,34 -36,24 -15,-14 -31,4 -19,20 5,16 -10,29 -21,35 -11,-9 -22,-21 -33,-31 -11,-12 -26,-21 -36,-34 -6,-13 -17,-47 -35,-35 5,18 -12,2 -18,1 2,13 -23,22 -5,31 17,0 34,-3 44,15 18,18 33,38 45,60 0,7 -10,7 -13,3zM9028,4599c-6,-13 -7,10 -1,2zM9019,4587c17,-11 -17,-32 -12,-10 3,4 6,10 12,10zM9001,4553c-10,-10 4,12 0,0zM8965,4513c-8,-9 -21,-37 -32,-28 4,13 18,33 32,28zM8993,4490c-1,-8 -19,-33 -20,-15 2,5 14,29 20,15zM8672,4185c2,-14 -25,-0 -7,2 2,0 5,-0 7,-2zM8623,4179c-8,-23 -35,-29 -56,-23 -17,-5 -37,-11 -53,1 -18,9 4,19 14,11 19,-5 32,16 50,5 16,-8 29,7 43,9l1,-1zM8661,4166c3,-18 -37,-1 -10,3 3,1 9,2 10,-3zM7945,4150c13,-14 -24,-19 -10,-1 2,4 8,3 10,1zM8611,4145c-11,-12 -15,13 0,0zM8589,4141c-6,-11 -7,11 0,0zM8664,4136c-9,-20 -35,-19 -52,-13 -18,-10 -35,2 -55,-6 -29,-3 -58,-1 -86,-9 -22,1 -45,3 -66,-2 -29,-5 -57,-13 -87,-13 -6,-4 -29,-7 -18,3 15,10 32,20 48,8 21,-2 38,11 58,16 21,-9 42,8 63,0 23,-1 44,13 68,8 19,1 41,-7 57,4 17,-0 33,-2 50,3 7,2 15,3 22,0zM7914,4120c10,-6 5,-33 22,-16 8,5 31,13 26,-3 10,-10 22,-21 19,-35 9,-7 44,2 32,-17 -12,-10 -25,-27 -42,-20 -20,13 -31,34 -46,52 -12,10 -30,36 -18,47 3,-2 4,-5 6,-7zM7985,4105c-3,-13 -18,7 -3,4 2,2 4,-2 3,-4zM8430,4033c2,-18 -36,-17 -25,-2 8,3 17,4 25,2zM8368,4022c-2,-12 -24,3 -7,2 2,0 5,0 7,-2zM8567,3929c1,-21 -15,-38 -21,-58 -16,-35 -34,-69 -54,-101 -18,-18 -34,6 -39,22 -12,22 -20,47 -32,70 -14,16 -22,49 6,56 19,13 42,15 63,9 25,-0 50,8 75,6 1,-1 2,-2 2,-3zM8357,3887c-0,-11 1,-22 11,-26 -1,-12 1,-24 13,-25 9,-17 16,-35 21,-53 2,-19 18,-30 23,-48 4,-16 31,-13 21,-32 -7,-12 4,-19 11,-23 2,-23 -14,-43 -28,-59 -19,-13 2,-32 -12,-47 -7,-14 -8,-40 -31,-29 -12,9 -25,13 -38,4 -16,5 -22,31 -13,44 14,7 27,22 12,36 -10,18 -19,39 -13,59 -8,17 -7,47 -31,48 -14,1 -18,15 -8,24 6,22 -21,40 -13,60 10,3 17,18 18,20 13,-11 32,-33 50,-18 13,9 2,23 -10,22 -12,12 -8,42 10,44 3,2 9,3 10,-2zM8364,3718c-6,-19 24,7 2,3zM8346,3591c-20,-16 31,-30 16,-6 -3,6 -10,12 -16,6zM8275,3746c14,-18 -25,-31 -22,-12 6,5 13,16 22,12zM8317,3712c-1,-11 -13,7 0,0zM8326,3624c6,-16 -25,-5 -5,1 2,1 4,1 5,-1zM8323,3591c-10,-6 3,13 0,0zM8438,3219c-3,-9 -27,-23 -21,-4 3,9 14,7 21,4zM8427,3134c2,-12 -6,9 0,0zM5128,4517c-15,-4 -21,-41 -29,-38 5,24 -28,24 -34,4 -2,-7 -22,-20 -11,-7 1,22 -17,-2 -20,-10 -7,-21 -27,-29 -42,-42 -13,-9 -15,-24 -5,-35 -3,-21 -32,-26 -33,-50 -32,-79 -54,-162 -68,-245 -2,-19 -7,-37 -7,-56 -1,-22 -9,-43 -9,-65 -2,-25 -3,-49 -4,-74 -12,-22 -40,-10 -60,-13 -62,-1 -124,-9 -186,-14 -17,-2 -19,14 -17,26 -4,12 17,17 8,32 -8,20 -10,40 -10,61 -6,17 -26,25 -33,43 -12,20 -20,42 -27,63 -3,16 8,42 -14,48 -16,8 -3,-26 -15,-12 -4,15 -17,26 -21,39 11,2 28,5 21,21 -5,19 -27,29 -27,50 -8,17 -14,39 -31,50 -17,4 -45,-12 -28,-31 0,-22 -9,13 -13,20 -2,10 -23,37 -25,17 4,-12 -0,-19 -11,-10 -5,-8 -9,-13 -18,-13 -11,-13 7,-23 17,-24 9,-7 7,-27 -7,-23 -6,-15 9,-42 -2,-51 -7,17 -20,-9 -26,9 -10,22 -5,47 -15,69 -8,11 -1,37 -18,37 1,-20 7,-41 10,-62 5,-22 8,-45 13,-67 4,-19 20,-10 30,-4 12,-5 4,-15 -4,-14 -2,-7 -11,-25 -13,-8 -4,15 -17,-0 -10,-9 4,-39 12,-78 7,-117 -9,-118 -20,-237 -38,-354 -10,-61 -23,-122 -46,-179 -22,-60 -49,-118 -76,-176 -16,-2 -17,29 -32,35 -20,10 -41,-4 -62,-7 -54,-13 -111,-25 -167,-13 -23,3 -46,-5 -69,2 -16,4 -32,5 -48,1 -24,-3 -46,7 -69,11 -19,5 -35,17 -55,19 -15,6 -19,27 -39,21 -19,1 -32,20 -53,17 -15,9 -32,5 -48,10 -12,17 -34,16 -50,25 -50,23 -95,55 -141,84 -31,14 -60,34 -78,64 -15,23 -20,51 -23,79 -2,17 -14,37 2,52 19,17 46,17 70,21 68,6 137,-2 204,-9 69,-7 137,-17 206,-25 49,-3 99,-1 148,-0 79,4 162,16 230,60 19,14 39,29 51,49 9,25 13,52 20,77 3,28 -16,52 -26,77 -17,34 -37,66 -56,98 -8,13 -24,16 -19,33 0,19 -18,20 -31,20 -10,12 5,22 14,11 19,-1 6,25 -6,26 -10,17 -21,36 -43,38 -17,14 -30,35 -55,36 -17,-0 -12,14 -9,23 -8,5 -30,-2 -15,11 6,22 -25,20 -35,32 -15,6 -39,-3 -46,17 -10,16 -26,-4 -32,-8 -14,-0 -33,-19 -36,4 -11,12 -29,25 -46,18 -12,-24 -33,2 -50,5 -23,8 -49,11 -70,24 -20,13 -47,7 -66,23 -21,14 -44,-10 -63,9 -16,15 -37,17 -57,11 -19,3 -36,16 -55,14 -27,-5 -3,-31 12,-35 14,-4 9,-16 -4,-12 -13,-1 -32,16 -40,4 16,-10 17,-35 36,-41 139,-84 282,-160 429,-230 37,-18 76,-34 108,-61 14,-15 39,-29 39,-50 -13,-8 -30,-5 -45,-11 -36,-9 -73,-17 -109,-27 -6,-4 -5,-29 -14,-12 -19,10 -10,-22 -27,-20 -18,-8 -48,-1 -59,-19 6,-27 35,-9 53,-11 15,-2 38,-0 41,-16 19,1 38,2 56,-3 6,5 28,10 23,0 -14,-7 12,-15 15,-3 6,9 25,18 24,-0 18,-14 36,10 51,17 18,-1 3,-28 -11,-25 -22,-1 -41,-10 -61,-15 -26,1 -50,-7 -75,-10 -21,-3 -43,-8 -65,-5 -15,2 -30,0 -45,-3 -27,-2 -52,-16 -79,-10 -19,-0 -36,-12 -56,-7 -16,-2 -24,9 -28,22 -20,8 10,15 20,13 10,4 35,4 32,18 -17,4 -35,1 -52,1 -18,1 -38,-5 -52,7 -22,-4 -43,-12 -66,-7 -16,1 -30,-6 -46,-2 -16,-10 -36,-12 -52,-21 -10,-17 -36,-5 -51,-18 -14,-3 -27,-6 -40,-13 -34,-15 -70,-26 -99,-50 -22,-26 -48,-49 -65,-78 -9,-12 -0,-28 -9,-41 -6,-13 -10,-26 -8,-40 -2,-22 8,-45 -5,-63 3,-20 1,-40 2,-60 6,-21 20,-40 27,-61 3,-17 18,-26 27,-40 11,-18 35,-12 48,-28 14,-15 12,-39 35,-46 18,-8 9,-37 33,-37 13,-7 25,-7 37,1 6,-18 30,-0 39,-15 3,-4 -25,-3 -14,-14 37,-27 80,-43 123,-56 109,-33 221,-53 332,-72 37,-5 74,-10 111,-15 49,1 98,-3 145,12 51,14 100,36 145,64 6,12 15,19 28,11 23,-7 35,-29 55,-41 10,-6 25,-3 22,-18 9,-16 31,-15 45,-22 19,-7 43,-10 60,4 38,28 63,70 87,110 40,71 73,146 103,222 5,18 13,35 20,52 6,20 3,41 11,60 2,19 21,25 36,28 77,14 155,25 233,30 23,5 26,-18 24,-35 6,-44 14,-87 27,-130 11,-33 21,-68 44,-96 18,-23 44,-39 73,-47 25,-13 46,-32 72,-44 16,-9 34,-15 53,-19 20,-8 39,-17 61,-19 2,23 -28,33 -31,56 -12,24 -19,50 -27,75 -13,16 -5,40 -6,57 11,19 3,42 10,62 2,21 -5,43 3,64 7,30 -8,59 -7,89 -0,21 2,42 2,64 -24,15 14,21 17,36 4,18 1,36 0,54 -2,19 -6,38 -14,56 -3,20 -3,43 -3,61 -14,12 -11,30 -7,45 3,19 19,10 17,-5 10,-23 19,9 18,20 -4,23 7,44 7,66 0,30 10,60 14,89 -1,14 12,38 1,47 -7,-4 -18,-28 -13,-7 8,20 13,40 17,61 11,20 3,41 -4,60 4,21 14,42 20,63 3,10 12,29 14,32 -5,-17 15,-15 13,1 3,20 23,13 15,-4 -6,-21 -18,-41 -19,-64 -3,-8 -3,-35 10,-26 21,58 31,120 46,180 3,17 -23,-4 -18,17 6,19 -12,37 -31,39 -14,-2 -24,-0 -31,14 -13,21 -39,10 -53,-2 -8,-4 -19,-29 -17,-8 0,9 2,32 -14,19zM5277,4422c3,-15 -23,-14 -9,0 2,3 8,5 9,-0zM5170,4358c3,-9 -14,-25 -8,-6 0,5 9,22 8,6zM5124,4349c-7,-20 -5,15 0,0zM3765,4294c5,-13 -9,7 -0,0zM3784,4283c-7,-17 -3,13 0,0zM3820,4276c3,-18 -27,6 -5,5 2,-1 4,-3 5,-5zM4403,4261c17,7 13,-19 9,-28 -11,-6 -10,18 -15,25 -1,4 3,5 6,3zM3864,4257c-5,-12 -6,11 0,0zM3911,4245c-8,-18 -4,18 0,0zM4435,4238c6,-12 -8,0 -1,4l1,-2zM4495,4205c-5,-16 -5,14 0,0zM3968,4173c-9,-10 -8,13 0,0zM4369,4112c21,-22 -35,-11 -4,1zM4389,4097c2,-17 -11,2 -1,6l1,-3zM4532,4097c-4,-15 -3,15 0,0zM5165,4055c2,-11 1,-40 -8,-38 0,12 -5,33 8,38zM3881,3935c9,-22 -33,-13 -16,2 5,1 11,-0 16,-2zM4149,3928c1,-20 -11,16 0,0zM4587,3903c3,-8 3,-31 -2,-26 -0,13 -13,9 -16,7 -8,10 9,35 19,19zM4011,3892c-3,-12 -19,-44 -26,-18 -13,22 12,26 26,18zM3141,3594c4,-6 15,-34 0,-25 -2,3 -10,31 -0,25zM3191,3547c-8,-19 -12,18 0,0zM3197,3533c-9,-14 -3,16 0,0zM3211,3508c6,-15 -8,-2 -3,3l1,-1zM3180,3505c-3,-0 3,6 0,0zM3194,3490c15,-18 32,-34 49,-50 3,-21 -21,5 -26,12 -14,15 -27,30 -36,49 1,7 11,-10 13,-11zM3238,3483c9,-14 35,-37 28,-48 -17,10 -28,28 -43,41 -14,16 7,23 15,8zM3301,3411c19,-18 -19,11 -2,2zM3323,3375c-1,-15 -16,10 -0,1zM5031,4494c5,-25 16,15 0,0zM7095,4489c-14,-4 -27,-11 -42,-4 -20,1 -38,-12 -56,-19 -17,-12 -21,-37 -41,-45 -27,-15 -55,-31 -75,-55 -20,-19 -41,-38 -57,-60 -17,-19 -36,-37 -50,-59 -29,-37 -61,-72 -85,-113 -14,-21 -34,-38 -48,-59 -7,-19 -24,-31 -38,-44 -25,-23 -49,-47 -72,-73 -14,-18 -38,-3 -27,17 2,11 -15,13 -5,22 5,21 -12,39 -7,60 -4,22 -6,43 -13,64 -8,10 1,38 -18,36 -3,-5 -4,-25 -8,-8 -4,14 -22,25 -10,41 -18,36 -44,71 -82,88 -18,6 -44,0 -51,22 -13,10 -28,-11 -35,8 -10,11 -24,-2 -32,13 -11,12 -26,7 -38,12 -15,-12 -2,-36 -4,-53 11,-99 25,-198 37,-297 4,-81 7,-162 -6,-242 -16,-97 -42,-191 -70,-285 -9,-28 -17,-58 -33,-83 -2,-17 -20,-21 -29,-6 -13,12 -30,18 -46,27 -49,23 -101,40 -155,45 -53,3 -106,2 -159,1 -16,-4 -41,0 -34,23 6,21 5,43 6,64 8,40 10,80 16,120 20,8 42,-3 62,5 31,1 62,0 93,0 18,1 35,-2 52,-4 18,7 41,2 55,17 13,11 33,27 17,46 -6,15 -35,17 -28,36 8,10 9,20 -4,27 -16,11 -19,33 -36,42 -8,4 -19,9 -9,17 -7,20 -22,40 -42,50 -28,18 -61,23 -93,24 -41,4 -83,9 -123,17 -10,14 -10,34 -20,50 -10,24 -19,49 -29,73 4,22 32,25 51,26 22,-1 44,-3 66,-6 20,4 41,-1 62,-2 21,-5 42,-11 64,-8 22,0 45,0 67,-4 16,1 -8,27 13,22 18,-2 51,-4 49,23 -7,19 -22,32 -31,50 -12,15 -24,31 -42,38 -22,2 -34,23 -25,42 -14,18 -36,-14 -17,-25 12,-11 -29,-7 -23,9 19,13 6,33 -2,49 -11,21 -43,15 -58,33 -4,11 -14,14 -17,-0 -20,-15 -41,14 -63,5 -23,-2 -46,-7 -67,5 -14,10 -39,-15 -42,6 5,4 32,-6 23,10 -20,1 -40,-1 -60,-0 -20,3 -39,-3 -56,-12 -27,-15 -51,-35 -74,-56 -11,-19 4,-43 -5,-62 -14,-13 -2,-28 3,-42 6,-17 6,-35 5,-53 1,-16 20,-40 3,-51 -21,8 0,-20 -6,-30 4,-20 1,-38 -6,-57 -3,-23 12,-42 16,-63 0,-22 -1,-44 1,-66 4,-31 -2,-62 3,-92 4,-36 4,-73 -3,-109 -9,-63 -25,-125 -44,-186 -11,-42 -22,-83 -34,-125 -9,-20 -21,-38 -26,-59 4,-24 -17,-41 -24,-61 -6,-15 -33,-37 -12,-50 28,-22 61,-38 87,-63 22,-26 45,-56 80,-67 23,1 32,28 48,41 14,17 30,35 53,39 80,23 161,46 244,62 25,5 51,6 77,5 31,5 63,8 94,-0 21,2 42,6 63,4 21,-2 41,-10 56,-24 18,-11 44,-2 55,-25 14,-18 36,-28 57,-33 21,-6 44,-10 63,4 31,2 62,-3 93,-4 33,-3 67,5 100,2 16,-3 31,-2 46,-0 18,-10 -4,24 15,18 18,5 29,-11 40,-19 18,-1 37,15 52,-1 22,-1 44,8 61,22 17,16 41,20 61,33 21,11 40,25 56,43 9,13 22,3 31,6 4,7 -5,27 10,17 18,2 20,25 19,38 6,10 6,16 18,13 12,3 9,20 -3,19 6,17 24,43 7,58 -10,-13 2,-36 -13,-47 -7,-9 -28,-6 -12,5 14,11 11,32 19,47 1,14 22,7 17,22 -3,19 21,21 12,39 -5,20 -13,40 -24,58 -11,17 -16,38 -32,52 -20,19 -37,40 -57,59 -6,11 -26,9 -31,13 13,10 -1,12 -10,11 -13,8 -26,27 -42,20 5,-18 -31,8 -14,14 -16,6 -33,-10 -49,-1 -10,-0 -26,18 -9,19 14,-6 33,-24 46,-8 -12,18 -42,18 -50,38 1,21 19,36 31,52 24,26 46,54 68,82 48,58 101,111 145,173 13,25 38,39 51,63 -14,16 13,13 19,25 14,14 28,28 42,43 19,20 34,43 52,64 15,15 32,28 45,45 -7,22 40,13 29,36 -11,22 24,7 29,24 7,10 18,28 16,5 6,-73 15,-146 26,-219 3,-22 6,-45 4,-67 8,-22 -18,-28 -33,-18 -8,9 -34,8 -19,-7 18,-17 38,-32 53,-52 18,-18 12,-46 10,-68 -1,-16 3,-33 -1,-49 -3,-15 -5,-30 -3,-45 -4,-37 -10,-74 -16,-111 -13,-79 -26,-158 -54,-233 -12,-28 -23,-58 -26,-88 4,-23 28,-33 43,-47 24,-18 50,-35 74,-53 12,-21 41,-23 53,-44 23,-29 49,-58 83,-74 19,-6 39,-8 57,-16 47,-16 98,-18 148,-9 59,10 117,34 165,71 10,12 40,25 28,41 14,11 20,31 41,31 13,2 39,16 29,30 -9,6 -32,-22 -19,-2 12,15 8,38 24,50 17,15 7,-14 1,-21 -12,-12 2,-30 14,-15 21,12 18,36 27,54 11,14 14,29 19,46 -2,24 9,45 15,68 7,14 31,27 15,43 -9,3 -3,-23 -9,-8 -7,20 21,29 21,43 -8,9 -21,-8 -24,10 -4,18 22,11 17,29 1,19 -14,30 -25,43 -26,29 -51,59 -76,89 -18,11 -24,35 -45,42 -17,13 -39,23 -46,45 -5,10 -11,4 -13,-2 -10,13 -23,22 -36,32 -7,9 -1,20 -17,19 -33,8 -56,36 -88,46 -15,16 -42,14 -53,33 5,15 1,26 -15,31 -16,8 -33,16 -42,32 -6,23 -2,49 -14,70 -2,12 -27,24 -7,29 15,15 -7,32 -10,48 -12,25 -22,52 -36,76 -18,19 -22,46 -37,68 -11,20 -24,39 -39,57 -14,5 -23,-33 -31,-7 -9,19 -34,10 -44,21 -4,7 -18,34 -22,19 3,-9 -1,-18 -5,-5 -5,21 -16,40 -21,60 1,14 4,44 -14,44 -13,-5 -28,-4 -35,10 -13,2 -20,12 -21,23 -9,14 -26,21 -42,17 -3,-0 -5,-2 -7,-4zM7204,4352c13,-19 -12,-5 0,0zM7209,4322c9,-11 -14,-40 -13,-38 -4,12 -2,45 12,39zM7152,4307c-1,-14 -27,-6 -8,0 2,1 5,2 8,-0zM6226,4270c4,-7 -0,-31 -3,-11 -4,5 -3,36 2,17l0,-3zM5815,4243c7,-3 31,-3 25,-15 -15,12 -30,-24 -40,-6 9,8 1,13 -1,21 4,4 11,1 16,-0zM5768,4234c7,-11 5,-30 -11,-18 -25,5 -10,34 9,23l2,-3 0,-3zM5673,4230c9,-21 -29,6 -6,3l3,-1zM5834,4219c10,-16 -17,-1 0,0zM6228,4212c-6,3 4,6 0,0zM5819,4205c-1,-12 -6,9 0,0zM6955,4116c1,-13 -26,-9 -8,-1 2,2 6,4 8,1zM6466,4110c11,-13 -15,-11 -4,1l1,0zM6902,4044c1,-10 -28,-26 -15,-7 3,4 11,18 15,7zM6477,4042c2,-16 -7,-0 -2,4l1,-1zM6478,4016c10,-6 -4,-22 -4,-6 -2,4 -1,14 4,6zM6857,3999c-12,-17 -29,-30 -39,-49 0,22 22,35 35,49l2,0zM7408,3805c5,-13 28,10 26,-13 -0,-13 -11,-48 13,-40 13,12 6,53 34,41 62,-21 125,-41 179,-78 33,-26 52,-66 62,-106 7,-31 -0,-63 -7,-93 -13,-52 -45,-98 -87,-131 -32,-25 -68,-44 -107,-57 -39,-10 -79,-13 -119,-14 -18,15 8,36 7,54 13,41 21,84 23,127 3,16 -0,33 -0,48 -8,15 5,41 -10,49 -7,-5 -18,-30 -17,-8 -0,23 0,47 6,69 -6,21 10,42 4,63 3,21 12,44 3,65 -6,7 -25,22 -12,28 1,-1 2,-3 3,-5zM7427,3680c0,-25 24,20 -0,10 -1,-3 1,-6 0,-10zM7424,3615c-9,-15 20,-7 3,1l-2,-0zM7324,3698c-4,-17 -4,18 0,0zM6494,3625c28,-16 58,-28 90,-32 20,-10 41,-20 61,-31 31,-24 56,-57 69,-93 7,-48 -14,-101 -58,-125 -20,-14 -40,-30 -65,-31 -27,-1 -53,3 -80,4 -36,3 -72,7 -107,18 -21,9 -2,25 3,37 7,19 13,38 22,56 19,49 27,102 46,151 9,15 -4,43 16,48l2,-1zM7892,3598c8,-19 -16,7 0,0zM7897,3413c5,-9 -11,-35 -5,-12 -2,9 5,38 5,15zM6905,3421c-2,-12 -9,9 0,0zM7889,3381c-6,-16 -3,11 0,0zM6878,3355c7,-9 -6,-19 -4,-4 -1,5 0,14 4,4zM5881,3353c-7,-8 -6,11 0,0zM6101,3352c20,-4 -6,-29 -6,-8 1,5 -3,19 6,8zM6866,3330c16,-3 -14,-10 -5,0 1,-0 4,-2 5,-0zM6861,3295c8,-12 -21,-25 -8,-5 1,2 4,11 8,5zM6861,3251c-3,-7 -3,6 0,0zM6829,3236c-8,-4 -2,11 0,0zM6821,3222c-0,-13 25,14 17,-3 -32,-34 -66,-67 -108,-88 -26,-8 -52,-14 -78,-22 -18,6 14,15 21,16 41,11 82,28 113,57 1,20 23,31 31,46 3,0 5,-3 4,-6zM7764,3211c-3,-14 -22,-4 -7,3 3,1 5,-1 7,-3zM6628,3106c-5,-6 -24,3 -8,1 3,0 6,2 8,-1zM6600,3104c-13,-10 -10,10 0,0zM6524,3098c-4,-4 -6,2 0,0zM3794,4322c-1,-17 43,-22 35,-4 -9,8 -23,3 -35,4zM4365,4322c-10,-13 26,-24 11,-6 -2,4 -6,9 -11,6zM5747,4316c-19,-19 31,-15 14,0 -4,2 -10,2 -14,-0zM5219,4314c-10,-9 4,-25 7,-7 2,5 -2,12 -7,7zM5208,4278c-12,-15 19,-6 5,2l-3,-1zM6448,4174c-3,-16 22,4 2,0zM5196,4163c-10,-18 25,-12 7,1 -2,0 -5,0 -7,-1zM5961,4160c-4,-18 18,8 0,0zM4552,4121c-9,-14 19,-9 7,0l-4,0zM4088,4107c-12,-14 22,-14 7,-1 -2,1 -5,2 -7,1zM5419,4083c-9,-24 22,-1 0,0zM3677,3891c6,-8 -5,-29 11,-21 14,10 5,28 -11,21zM3752,3864c-4,-20 21,6 4,2l-2,-1zM3145,3332c-2,-14 32,-19 15,-2 -4,3 -10,5 -15,2zM6797,3155c-17,1 -23,-33 -2,-18 7,2 28,31 2,18zM6763,3132c-12,-14 21,0 2,1z" android:strokeWidth="1.33333337"/>
</vector>


================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@color/ic_launcher_background"/>
    <foreground android:drawable="@drawable/ic_launcher_foreground"/>
    <monochrome android:drawable="@drawable/ic_launcher_foreground"/>
</adaptive-icon>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@color/ic_launcher_background"/>
    <foreground android:drawable="@drawable/ic_launcher_foreground"/>
    <monochrome android:drawable="@drawable/ic_launcher_foreground"/>
</adaptive-icon>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/values/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="ic_launcher_background">#0b62c2</color>
</resources>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">TTS Engine: Next-gen Kaldi</string>
</resources>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/values/themes.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>

    <style name="Theme.SherpaOnnxTtsEngine" parent="android:Theme.Material.Light.NoActionBar" />
</resources>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/main/res/xml/tts_engine.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<tts-engine xmlns:android="http://schemas.android.com/apk/res/android"
    android:settingsActivity="com.k2fsa.sherpa.onnx.tts.engine.MainActivity"
    >
</tts-engine>

================================================
FILE: android/SherpaOnnxTtsEngine/app/src/test/java/com/k2fsa/sherpa/onnx/tts/engine/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx.tts.engine

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxTtsEngine/build.gradle.kts
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id("com.android.application") version "8.2.0" apply false
    id("org.jetbrains.kotlin.android") version "1.9.0" apply false
}

================================================
FILE: android/SherpaOnnxTtsEngine/gradle/wrapper/gradle-wrapper.properties
================================================
#Sun Dec 31 18:47:53 CST 2023
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists


================================================
FILE: android/SherpaOnnxTtsEngine/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxTtsEngine/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxTtsEngine/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxTtsEngine/settings.gradle.kts
================================================
pluginManagement {
    repositories {
        google()
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.name = "SherpaOnnxTtsEngine"
include(":app")


================================================
FILE: android/SherpaOnnxVad/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxVad/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxVad/app/build.gradle
================================================
plugins {
    id 'com.android.application'
    id 'org.jetbrains.kotlin.android'
}

android {
    namespace 'com.k2fsa.sherpa.onnx'
    compileSdk 33

    defaultConfig {
        applicationId "com.k2fsa.sherpa.onnx"
        minSdk 21
        targetSdk 33
        versionCode 20260320
        versionName "1.12.31"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
}

dependencies {

    implementation 'androidx.core:core-ktx:1.7.0'
    implementation 'androidx.appcompat:appcompat:1.6.1'
    implementation 'com.google.android.material:material:1.9.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'androidx.test.ext:junit:1.1.5'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
}

================================================
FILE: android/SherpaOnnxVad/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxVad/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxVad/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxVad"
        tools:targetApi="31">
        <activity
            android:name="com.k2fsa.sherpa.onnx.vad.MainActivity"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>

            <meta-data
                android:name="android.app.lib_name"
                android:value="" />
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxVad/app/src/main/assets/.gitignore
================================================
*.onnx


================================================
FILE: android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.vad

import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.util.Log
import android.view.View
import android.widget.Button
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.R
import com.k2fsa.sherpa.onnx.Vad
import com.k2fsa.sherpa.onnx.getVadModelConfig
import kotlin.concurrent.thread


private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

class MainActivity : AppCompatActivity() {

    private lateinit var recordButton: Button
    private lateinit var circle: View

    private lateinit var vad: Vad

    private var audioRecord: AudioRecord? = null
    private var recordingThread: Thread? = null
    private val audioSource = MediaRecorder.AudioSource.MIC
    private val sampleRateInHz = 16000
    private val channelConfig = AudioFormat.CHANNEL_IN_MONO

    // Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
    // since the AudioRecord.read(float[]) needs API level >= 23
    // but we are targeting API level >= 21
    private val audioFormat = AudioFormat.ENCODING_PCM_16BIT

    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)

    @Volatile
    private var isRecording: Boolean = false

    override fun onRequestPermissionsResult(
        requestCode: Int, permissions: Array<String>, grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            finish()
        }

        Log.i(TAG, "Audio record is permitted")
    }

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)

        Log.i(TAG, "Start to initialize model")
        initVadModel()
        Log.i(TAG, "Finished initializing model")

        circle= findViewById(R.id.powerCircle)

        recordButton = findViewById(R.id.record_button)
        recordButton.setOnClickListener { onclick() }
    }

    private fun onclick() {
        if (!isRecording) {
            val ret = initMicrophone()
            if (!ret) {
                Log.e(TAG, "Failed to initialize microphone")
                return
            }
            Log.i(TAG, "state: ${audioRecord?.state}")
            audioRecord!!.startRecording()
            recordButton.setText(R.string.stop)
            isRecording = true

            vad.reset()
            recordingThread = thread(true) {
                processSamples()
            }
            Log.i(TAG, "Started recording")
            onVad(false)

        } else {
            isRecording = false

            audioRecord!!.stop()
            audioRecord!!.release()
            audioRecord = null

            recordButton.setText(R.string.start)
            onVad(false)
            Log.i(TAG, "Stopped recording")
        }
    }

    private fun onVad(isSpeech: Boolean) {
        if(isSpeech) {
            circle.background = resources.getDrawable(R.drawable.red_circle)
        } else {
            circle.background = resources.getDrawable(R.drawable.black_circle)
        }
    }

    private  fun initVadModel() {
        val type = 0
        Log.i(TAG, "Select VAD model type ${type}")
        val config = getVadModelConfig(type)

        vad = Vad(
            assetManager = application.assets,
            config = config!!,
        )
    }

    private fun initMicrophone(): Boolean {
        if (ActivityCompat.checkSelfPermission(
                this, Manifest.permission.RECORD_AUDIO
            ) != PackageManager.PERMISSION_GRANTED
        ) {
            ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
            return false
        }

        val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
        Log.i(
            TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
        )

        audioRecord = AudioRecord(
            audioSource,
            sampleRateInHz,
            channelConfig,
            audioFormat,
            numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
        )
        return true
    }

    private fun processSamples() {
        Log.i(TAG, "processing samples")

        val bufferSize = 512 // in samples
        val buffer = ShortArray(bufferSize)

        while (isRecording) {
            val ret = audioRecord?.read(buffer, 0, buffer.size)
            if (ret != null && ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }

                vad.acceptWaveform(samples)

                val isSpeechDetected = vad.isSpeechDetected()
                vad.clear()

                runOnUiThread {
                    onVad(isSpeechDetected)
                }
            }
        }
    }
}


================================================
FILE: android/SherpaOnnxVad/app/src/main/jniLibs/.gitignore
================================================
*.so


================================================
FILE: android/SherpaOnnxVad/app/src/main/jniLibs/arm64-v8a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxVad/app/src/main/jniLibs/armeabi-v7a/.gitignore
================================================


================================================
FILE: android/SherpaOnnxVad/app/src/main/jniLibs/x86/.gitignore
================================================


================================================
FILE: android/SherpaOnnxVad/app/src/main/jniLibs/x86_64/.gitignore
================================================


================================================
FILE: android/SherpaOnnxVad/app/src/main/res/drawable/black_circle.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
  <item>
    <shape  android:shape="oval">

    <solid  android:color="#FF000000"/>

    <size
        android:width="300dp"
        android:height="300dp"/>
    </shape>
  </item>
</selector>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxVad/app/src/main/res/drawable/red_circle.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
  <item>
    <shape  android:shape="oval">

    <solid  android:color="#FFFF0000"/>

    <size
        android:width="300dp"
        android:height="300dp"/>
    </shape>
  </item>
</selector>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context="com.k2fsa.sherpa.onnx.vad.MainActivity">
    <LinearLayout
        android:layout_width="match_parent"
        android:layout_height="match_parent"
        android:gravity="bottom"
        android:orientation="vertical"
        >

        <Space
            android:layout_width="match_parent"
            android:layout_height="10dp" />

        <LinearLayout
            android:id="@+id/powerCircle"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_gravity="center_horizontal"
            android:background="@drawable/black_circle"
            android:orientation="vertical" />

        <Space
            android:layout_width="match_parent"
            android:layout_height="200dp" />

        <Button
            android:id="@+id/record_button"
            android:layout_width="match_parent"
            android:layout_height="wrap_content"
            android:text="@string/start" />


    </LinearLayout>


</androidx.constraintlayout.widget.ConstraintLayout>


================================================
FILE: android/SherpaOnnxVad/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">VAD: Next-gen Kaldi</string>

    <string name="hint">Click the Start button to play Silero VAD with Next-gen Kaldi.</string>
    <string name="start">Start</string>
    <string name="stop">Stop</string>
</resources>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnxVad" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnxVad" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxVad/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxVad/app/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxVad/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.3.1' apply false
    id 'com.android.library' version '7.3.1' apply false
    id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

================================================
FILE: android/SherpaOnnxVad/gradle/wrapper/gradle-wrapper.properties
================================================
#Sat Sep 23 10:24:21 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: android/SherpaOnnxVad/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxVad/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxVad/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxVad/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "SherpaOnnxVad"
include ':app'


================================================
FILE: android/SherpaOnnxVadAsr/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxVadAsr/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxVadAsr/app/build.gradle
================================================
plugins {
    id 'com.android.application'
    id 'org.jetbrains.kotlin.android'
}

android {
    namespace 'com.k2fsa.sherpa.onnx'
    compileSdk 33

    defaultConfig {
        applicationId "com.k2fsa.sherpa.onnx"
        minSdk 21
        targetSdk 33
        versionCode 20260320
        versionName "1.12.31"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
}

dependencies {

    implementation 'androidx.core:core-ktx:1.7.0'
    implementation 'androidx.appcompat:appcompat:1.6.1'
    implementation 'com.google.android.material:material:1.9.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
    implementation 'androidx.lifecycle:lifecycle-runtime-ktx:2.5.1'
    
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'androidx.test.ext:junit:1.1.5'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
}


================================================
FILE: android/SherpaOnnxVadAsr/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxVadAsr/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnxVadAsr"
        tools:targetApi="31">
        <activity
            android:name=".vad.asr.MainActivity"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>

            <meta-data
                android:name="android.app.lib_name"
                android:value="" />
        </activity>
    </application>

</manifest>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/assets/.gitignore
================================================
*.onnx


================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.vad.asr

import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.widget.Button
import android.widget.TextView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.OfflineRecognizer
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
import com.k2fsa.sherpa.onnx.R
import com.k2fsa.sherpa.onnx.Vad
import com.k2fsa.sherpa.onnx.getFeatureConfig
import com.k2fsa.sherpa.onnx.getOfflineModelConfig
import com.k2fsa.sherpa.onnx.getVadModelConfig
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.cancel
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import kotlin.concurrent.thread
import androidx.lifecycle.lifecycleScope


private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

class MainActivity : AppCompatActivity() {

    private lateinit var recordButton: Button
    private lateinit var textView: TextView

    private lateinit var vad: Vad

    private var audioRecord: AudioRecord? = null
    private var recordingThread: Thread? = null
    private val audioSource = MediaRecorder.AudioSource.MIC
    private val sampleRateInHz = 16000
    private val channelConfig = AudioFormat.CHANNEL_IN_MONO

    // Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
    // since the AudioRecord.read(float[]) needs API level >= 23
    // but we are targeting API level >= 21
    private val audioFormat = AudioFormat.ENCODING_PCM_16BIT

    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)

    // Non-streaming ASR
    private lateinit var offlineRecognizer: OfflineRecognizer

    private var idx: Int = 0
    private var lastText: String = ""

    @Volatile
    private var isRecording: Boolean = false

    override fun onRequestPermissionsResult(
        requestCode: Int, permissions: Array<String>, grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            finish()
        }

        Log.i(TAG, "Audio record is permitted")
    }

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)

        textView = findViewById(R.id.my_text)
        textView.movementMethod = ScrollingMovementMethod()

        recordButton = findViewById(R.id.record_button)
        recordButton.isEnabled = false
        recordButton.setOnClickListener { onclick() }

        textView.text = "Initializing models... Please wait."

        lifecycleScope.launch(Dispatchers.IO) {
            Log.i(TAG, "Start to initialize model")
            initVadModel()
            Log.i(TAG, "Finished initializing model")

            Log.i(TAG, "Start to initialize non-streaming recognizer")
            initOfflineRecognizer()
            Log.i(TAG, "Finished initializing non-streaming recognizer")

            withContext(Dispatchers.Main) {
                recordButton.isEnabled = true
                textView.text = "" 
                Log.i(TAG, "Model initialization completed, button enabled")
            }
        }
    }

    private fun onclick() {
        if (!isRecording) {
            val ret = initMicrophone()
            if (!ret) {
                Log.e(TAG, "Failed to initialize microphone")
                return
            }
            Log.i(TAG, "state: ${audioRecord?.state}")
            audioRecord!!.startRecording()
            recordButton.setText(R.string.stop)
            isRecording = true

            textView.text = ""
            lastText = ""
            idx = 0

            vad.reset()
            recordingThread = thread(true) {
                processSamples()
            }
            Log.i(TAG, "Started recording")
        } else {
            isRecording = false

            audioRecord!!.stop()
            audioRecord!!.release()
            audioRecord = null

            recordButton.setText(R.string.start)
            Log.i(TAG, "Stopped recording")
        }
    }

    private  fun initVadModel() {
        val type = 0
        Log.i(TAG, "Select VAD model type ${type}")
        val config = getVadModelConfig(type)

        vad = Vad(
            assetManager = application.assets,
            config = config!!,
        )
    }

    private fun initMicrophone(): Boolean {
        if (ActivityCompat.checkSelfPermission(
                this, Manifest.permission.RECORD_AUDIO
            ) != PackageManager.PERMISSION_GRANTED
        ) {
            ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
            return false
        }

        val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
        Log.i(
            TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
        )

        audioRecord = AudioRecord(
            audioSource,
            sampleRateInHz,
            channelConfig,
            audioFormat,
            numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
        )
        return true
    }

    private fun processSamples() {
        Log.i(TAG, "processing samples")

        val bufferSize = 512 // in samples
        val buffer = ShortArray(bufferSize)
        val coroutineScope = CoroutineScope(Dispatchers.IO)


        while (isRecording) {
            val ret = audioRecord?.read(buffer, 0, buffer.size)
            if (ret != null && ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }

                vad.acceptWaveform(samples)
                while(!vad.empty()) {
                    var segment = vad.front()
                    coroutineScope.launch {
                        val text = runSecondPass(segment.samples)
                        if (text.isNotBlank()) {
                            withContext(Dispatchers.Main) {
                                lastText = "${lastText}\n${idx}: ${text}"
                                idx += 1
                                textView.text = lastText.lowercase()
                            }
                        }
                    }

                    vad.pop();
                }
            }
        }

        // Clean up the coroutine scope when done
        coroutineScope.cancel()
    }

    private fun initOfflineRecognizer() {
        // Please change getOfflineModelConfig() to add new models
        // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
        // for a list of available models
        val asrModelType = 0
        val asrRuleFsts: String?
        asrRuleFsts = null
        Log.i(TAG, "Select model type ${asrModelType} for ASR")

        val config = OfflineRecognizerConfig(
            featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
            modelConfig = getOfflineModelConfig(type = asrModelType)!!,
        )
        if (asrRuleFsts != null) {
            config.ruleFsts = asrRuleFsts;
        }

        offlineRecognizer = OfflineRecognizer(
            assetManager = application.assets,
            config = config,
        )
    }

    private fun runSecondPass(samples: FloatArray): String {
        val stream = offlineRecognizer.createStream()
        stream.acceptWaveform(samples, sampleRateInHz)
        offlineRecognizer.decode(stream)
        val result = offlineRecognizer.getResult(stream)
        stream.release()
        return result.text
    }
}


================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".vad.asr.MainActivity">

    <LinearLayout
        android:layout_width="match_parent"
        android:layout_height="match_parent"
        android:gravity="center"
        android:orientation="vertical">

        <TextView
            android:id="@+id/my_text"
            android:layout_width="match_parent"
            android:layout_height="match_parent"
            android:layout_weight="2.5"
            android:padding="24dp"
            android:scrollbars="vertical"
            android:singleLine="false"
            android:text="@string/hint"
            app:layout_constraintBottom_toBottomOf="parent"
            app:layout_constraintEnd_toEndOf="parent"
            app:layout_constraintStart_toStartOf="parent"
            android:gravity="bottom"
            app:layout_constraintTop_toTopOf="parent" />

        <Button
            android:id="@+id/record_button"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_weight="0.5"
            android:text="@string/start" />
    </LinearLayout>


</androidx.constraintlayout.widget.ConstraintLayout>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">VAD+ASR: Next-gen Kaldi</string>
    <string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
        \n
        \n\n\n
        The source code and pre-trained models are publicly available.
        Please see https://github.com/k2-fsa/sherpa-onnx for details.
        \n\n
        Speech recognition with Next-gen Kaldi using VAD and non-streaming ASR models.
    </string>
    <string name="start">Start</string>
    <string name="stop">Stop</string>
</resources>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnxVadAsr" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnxVadAsr" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxVadAsr/app/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxVadAsr/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.3.1' apply false
    id 'com.android.library' version '7.3.1' apply false
    id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

================================================
FILE: android/SherpaOnnxVadAsr/gradle/wrapper/gradle-wrapper.properties
================================================
#Sat Sep 23 20:50:52 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: android/SherpaOnnxVadAsr/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxVadAsr/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxVadAsr/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxVadAsr/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "SherpaOnnxVadAsr"
include ':app'


================================================
FILE: android/SherpaOnnxWebSocket/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: android/SherpaOnnxWebSocket/app/.gitignore
================================================
/build

================================================
FILE: android/SherpaOnnxWebSocket/app/build.gradle
================================================
plugins {
    id 'com.android.application'
    id 'org.jetbrains.kotlin.android'
}

android {
    namespace 'com.k2fsa.sherpa.onnx'
    compileSdk 32

    defaultConfig {
        applicationId "com.k2fsa.sherpa.onnx"
        minSdk 21
        targetSdk 32
        versionCode 20260320
        versionName "1.12.31"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
}

dependencies {

    implementation 'androidx.core:core-ktx:1.7.0'
    implementation 'androidx.appcompat:appcompat:1.5.1'
    implementation 'com.google.android.material:material:1.7.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'androidx.test.ext:junit:1.1.4'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.0'

    implementation 'org.java-websocket:Java-WebSocket:1.4.0'
    implementation 'com.google.code.gson:gson:2.10.1'
}

================================================
FILE: android/SherpaOnnxWebSocket/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: android/SherpaOnnxWebSocket/app/src/androidTest/java/com/k2fsa/sherpa/onnx/ExampleInstrumentedTest.kt
================================================
package com.k2fsa.sherpa.onnx

import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
    }
}

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />
    <uses-permission android:name="android.permission.INTERNET"/>

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.SherpaOnnx"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>

            <meta-data
                android:name="android.app.lib_name"
                android:value="" />
        </activity>
    </application>

</manifest>


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/assets/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
================================================
// add by longsm at 2023/10/13
package com.k2fsa.sherpa.onnx

import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.text.TextUtils
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.widget.Button
import android.widget.EditText
import android.widget.TextView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import com.google.gson.Gson
import com.google.gson.reflect.TypeToken
import org.java_websocket.handshake.ServerHandshake
import java.net.URI
import java.net.URISyntaxException
import java.nio.ByteBuffer
import java.nio.ByteOrder
import kotlin.concurrent.thread

private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200

class MainActivity : AppCompatActivity(), MyWebsocketClient.WebsocketClientCallback {
    private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)

    private var audioRecord: AudioRecord? = null
    private lateinit var recordButton: Button
    private lateinit var connectButton: Button
    private lateinit var textView: TextView
    private lateinit var etUrl: EditText
    private var recordingThread: Thread? = null

    private var websocketClient: MyWebsocketClient? = null

    private val audioSource = MediaRecorder.AudioSource.MIC
    private val sampleRateInHz = 16000
    private val channelConfig = AudioFormat.CHANNEL_IN_MONO

    // Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
    // since the AudioRecord.read(float[]) needs API level >= 23
    // but we are targeting API level >= 21
    private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
    private var idx: Long = 0
    private var lastText: String = ""

    @Volatile
    private var isRecording: Boolean = false

    @Volatile
    private var isConnected: Boolean = false

    override fun onRequestPermissionsResult(
        requestCode: Int, permissions: Array<String>, grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
            grantResults[0] == PackageManager.PERMISSION_GRANTED
        } else {
            false
        }

        if (!permissionToRecordAccepted) {
            Log.e(TAG, "Audio record is disallowed")
            finish()
        }

        Log.i(TAG, "Audio record is permitted")
    }

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)

        recordButton = findViewById(R.id.record_button)
        recordButton.setOnClickListener { onclick() }

        connectButton = findViewById(R.id.connect_button)
        connectButton.setOnClickListener { onclickConnect() }

        textView = findViewById(R.id.my_text)
        textView.movementMethod = ScrollingMovementMethod()

        recordButton.isEnabled = false

        etUrl = findViewById(R.id.et_uri)
    }

    private fun onclickConnect() {
        if (!isConnected) {
            val etUrlStr = etUrl.text.toString().trim()
            var uriStr = "ws://172.28.13.167:6006"
            if (!TextUtils.isEmpty(etUrlStr)) {
                uriStr = etUrlStr
            }
            try {
                val uri = URI(uriStr)
                websocketClient = MyWebsocketClient(uri)
                websocketClient?.setClientCallback(this)
                websocketClient?.connect()
            } catch (e: URISyntaxException) {
                Log.e(TAG, "URISyntaxException === >> $e")
            }
        } else {
            Log.e(TAG, "onclick disconnect")
            websocketClient?.close()
            websocketClient = null
        }

    }

    private fun onclick() {

        if (!isRecording) {
            val ret = initMicrophone()
            if (!ret) {
                Log.e(TAG, "Failed to initialize microphone")
                return
            }
            Log.i(TAG, "state: ${audioRecord?.state}")
            audioRecord!!.startRecording()
            recordButton.setText(R.string.stop)
            isRecording = true
            textView.text = ""
            lastText = ""
            idx = 0

            recordingThread = thread(true) {
                processSamples()
            }
            connectButton.isEnabled = false
            Log.i(TAG, "Started recording")
        } else {
            isRecording = false
            audioRecord!!.stop()
            audioRecord!!.release()
            audioRecord = null
            recordButton.setText(R.string.start)
            connectButton.isEnabled = true
            Log.i(TAG, "Stopped recording")
        }
    }

    private fun processSamples() {
        Log.i(TAG, "processing samples")

        val interval = 0.1 // i.e., 100 ms
        val bufferSize = (interval * sampleRateInHz).toInt() // in samples
        val buffer = ShortArray(bufferSize)

        while (isRecording) {
            val ret = audioRecord?.read(buffer, 0, buffer.size)
            if (ret != null && ret > 0) {
                val samples = FloatArray(ret) { buffer[it] / 32768.0f }

                val buffer = ByteBuffer.allocate(4 * samples.size)
                    .order(ByteOrder.LITTLE_ENDIAN) // float is sizeof 4. allocate enough buffer


                for (f in samples) {
                    buffer.putFloat(f)
                }
                buffer.rewind()
                buffer.flip()
                buffer.order(ByteOrder.LITTLE_ENDIAN)

                if (isConnected) {
                    websocketClient?.send(buffer.array()) // send buf to server
                }

            }
        }
    }

    private fun initMicrophone(): Boolean {
        if (ActivityCompat.checkSelfPermission(
                this, Manifest.permission.RECORD_AUDIO
            ) != PackageManager.PERMISSION_GRANTED
        ) {
            ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
            return false
        }

        val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
        Log.i(
            TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
        )

        audioRecord = AudioRecord(
            audioSource,
            sampleRateInHz,
            channelConfig,
            audioFormat,
            numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
        )
        return true
    }

    override fun onOpen(handshakedata: ServerHandshake?) {
        Log.i(TAG, "onOpen === >>")
        isConnected = true
        runOnUiThread {
            recordButton.isEnabled = true
            connectButton.text = getString(R.string.disconnect)
        }
    }

    private val gson = Gson()
    private val recognitionText = hashMapOf<Long, String>()

    private fun getDisplayResult(): String {
        var i = 0
        var ans = ""
        for ((key,value) in recognitionText){
            if (value == ""){
                continue
            }
            ans += " $i : ${recognitionText[key]}\n"
            i += 1
        }
        return ans

    }

    override fun onMessage(message: String?) {
        Log.i(TAG, "onMessage === >> $message")
        val speechContent = gson.fromJson<SpeechContent>(
            message,
            object : TypeToken<SpeechContent?>() {}.type
        )

        val text = speechContent.text
        val segment = speechContent.segment
        Log.i(TAG, "text === >> $text")

        recognitionText[segment] = text
        runOnUiThread {
            textView.text = getDisplayResult()
        }
    }

    override fun onClose(code: Int, reason: String?, remote: Boolean?) {
        Log.i(TAG, "onClose === >> code$code reason$reason remote$remote")
        isConnected = false
        runOnUiThread {
            recordButton.isEnabled = false
            connectButton.text = getString(R.string.connect)
            textView.text = getString(R.string.hint)
        }

    }

    override fun onError(ex: Exception?) {
        Log.i(TAG, "onError === >> $ex")
        runOnUiThread {
            textView.text = "onError === >> $ex"
        }

    }
}


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/java/com/k2fsa/sherpa/onnx/MyWebsocketClient.kt
================================================
package com.k2fsa.sherpa.onnx

import org.java_websocket.client.WebSocketClient
import org.java_websocket.handshake.ServerHandshake
import java.net.URI

class MyWebsocketClient(serverUri: URI?) : WebSocketClient(serverUri) {

    override fun onOpen(handshakedata: ServerHandshake) {
        clientCallback?.onOpen(handshakedata)

    }
    override fun onMessage(message: String) {
        clientCallback?.onMessage(message)
    }

    override fun onClose(code: Int, reason: String, remote: Boolean) {
        clientCallback?.onClose(code,reason,remote)
    }

    override fun onError(ex: Exception) {
        clientCallback?.onError(ex)
    }

    private var clientCallback: WebsocketClientCallback? = null

    fun setClientCallback(clientCallback: WebsocketClientCallback?) {
        this.clientCallback = clientCallback
    }

    interface WebsocketClientCallback {
        fun onOpen(handshakedata: ServerHandshake?)
        fun onMessage(message: String?)
        fun onClose(code: Int, reason: String?, remote: Boolean?)
        fun onError(ex: Exception?)
    }


}

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/java/com/k2fsa/sherpa/onnx/SpeechContent.kt
================================================
package com.k2fsa.sherpa.onnx

data class SpeechContent(val text:String,val segment:Long)


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt
================================================
// Copyright (c)  2023  Xiaomi Corporation
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

class WaveReader {
    companion object {
        // Read a mono wave file asset
        // The returned array has two entries:
        //  - the first entry contains an 1-D float array
        //  - the second entry is the sample rate
        external fun readWaveFromAsset(
            assetManager: AssetManager,
            filename: String,
        ): Array<Any>

        // Read a mono wave file from disk
        // The returned array has two entries:
        //  - the first entry contains an 1-D float array
        //  - the second entry is the sample rate
        external fun readWaveFromFile(
            filename: String,
        ): Array<Any>

        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/jniLibs/.gitignore
================================================
*.so
*.txt
*.onnx
*.wav


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <TextView
        android:id="@+id/text_hint"
        android:layout_width="match_parent"
        android:layout_height="wrap_content"
        android:text="@string/uri_format"
        android:gravity="center"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toTopOf="parent" />

    <EditText
        android:id="@+id/et_uri"
        android:layout_width="match_parent"
        android:layout_height="56dp"
        android:layout_marginTop="4dp"
        android:hint="@string/uri_hint"
        android:gravity="center"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/text_hint" />
    <Button
        android:id="@+id/connect_button"
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:layout_marginTop="4dp"
        android:textAllCaps="false"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/et_uri"
        android:text="@string/connect" />
    <Button
        android:id="@+id/record_button"
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:layout_marginTop="4dp"
        android:textAllCaps="false"
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/connect_button"
        android:text="@string/start" />

    <TextView
        android:id="@+id/my_text"
        android:layout_width="match_parent"
        android:layout_height="0dp"
        android:padding="24dp"
        android:scrollbars="vertical"
        android:singleLine="false"
        android:text="@string/hint"
        app:layout_constraintBottom_toBottomOf="parent"
        app:layout_constraintEnd_toEndOf="parent"
        app:layout_constraintStart_toStartOf="parent"
        app:layout_constraintTop_toBottomOf="@id/record_button" />


</androidx.constraintlayout.widget.ConstraintLayout>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">ASR with Next-gen Kaldi</string>
    <string name="hint">
        Click the connect button to connect websocket.
        \n
        \n\n\n
        Click the Start button to play speech-to-text with Next-gen Kaldi.
        \n
        \n\n\n
        The source code and pre-trained models are publicly available.
        Please see https://github.com/k2-fsa/sherpa-onnx for details.
    </string>
    <string name="start">Start</string>
    <string name="stop">Stop</string>
    <string name="connect">connect</string>
    <string name="disconnect">disconnect</string>
    <string name="uri_format">please input uri first,format as follows:\n
        ws://ip:port or wss://ip:port</string>
    <string name="uri_hint">please input uri first</string>
</resources>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnx" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.SherpaOnnx" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: android/SherpaOnnxWebSocket/app/src/test/java/com/k2fsa/sherpa/onnx/ExampleUnitTest.kt
================================================
package com.k2fsa.sherpa.onnx

import org.junit.Test

import org.junit.Assert.*

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
class ExampleUnitTest {
    @Test
    fun addition_isCorrect() {
        assertEquals(4, 2 + 2)
    }
}

================================================
FILE: android/SherpaOnnxWebSocket/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.3.1' apply false
    id 'com.android.library' version '7.3.1' apply false
    id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

================================================
FILE: android/SherpaOnnxWebSocket/gradle/wrapper/gradle-wrapper.properties
================================================
#Thu Feb 23 11:09:06 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: android/SherpaOnnxWebSocket/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: android/SherpaOnnxWebSocket/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: android/SherpaOnnxWebSocket/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: android/SherpaOnnxWebSocket/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "SherpaOnnx"
include ':app'


================================================
FILE: c-api-examples/CMakeLists.txt
================================================
include(cargs)

include_directories(${PROJECT_SOURCE_DIR})
add_executable(decode-file-c-api decode-file-c-api.c)
target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)

add_executable(kws-c-api kws-c-api.c)
target_link_libraries(kws-c-api sherpa-onnx-c-api)

add_executable(speech-enhancement-gtcrn-c-api speech-enhancement-gtcrn-c-api.c)
target_link_libraries(speech-enhancement-gtcrn-c-api sherpa-onnx-c-api)

add_executable(speech-enhancement-dpdfnet-c-api speech-enhancement-dpdfnet-c-api.c)
target_link_libraries(speech-enhancement-dpdfnet-c-api sherpa-onnx-c-api)

add_executable(online-speech-enhancement-gtcrn-c-api
               online-speech-enhancement-gtcrn-c-api.c)
target_link_libraries(online-speech-enhancement-gtcrn-c-api sherpa-onnx-c-api)

add_executable(online-speech-enhancement-dpdfnet-c-api
               online-speech-enhancement-dpdfnet-c-api.c)
target_link_libraries(online-speech-enhancement-dpdfnet-c-api sherpa-onnx-c-api)

if(SHERPA_ONNX_ENABLE_TTS)
  add_executable(offline-tts-c-api offline-tts-c-api.c)
  target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)

  add_executable(matcha-tts-zh-c-api matcha-tts-zh-c-api.c)
  target_link_libraries(matcha-tts-zh-c-api sherpa-onnx-c-api)

  add_executable(matcha-tts-en-c-api matcha-tts-en-c-api.c)
  target_link_libraries(matcha-tts-en-c-api sherpa-onnx-c-api)

  add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c)
  target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api)

  add_executable(kitten-tts-en-c-api kitten-tts-en-c-api.c)
  target_link_libraries(kitten-tts-en-c-api sherpa-onnx-c-api)

  add_executable(kokoro-tts-zh-en-c-api kokoro-tts-zh-en-c-api.c)
  target_link_libraries(kokoro-tts-zh-en-c-api sherpa-onnx-c-api)

  add_executable(pocket-tts-en-c-api pocket-tts-en-c-api.c)
  target_link_libraries(pocket-tts-en-c-api sherpa-onnx-c-api)

  add_executable(supertonic-tts-en-c-api supertonic-tts-en-c-api.c)
  target_link_libraries(supertonic-tts-en-c-api sherpa-onnx-c-api)

  add_executable(zipvoice-tts-zh-en-c-api zipvoice-tts-zh-en-c-api.c)
  target_link_libraries(zipvoice-tts-zh-en-c-api sherpa-onnx-c-api)
endif()

if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
  add_executable(offline-speaker-diarization-c-api offline-speaker-diarization-c-api.c)
  target_link_libraries(offline-speaker-diarization-c-api sherpa-onnx-c-api)
endif()

add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)

add_executable(speaker-identification-c-api speaker-identification-c-api.c)
target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)

add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)

add_executable(streaming-t-one-ctc-c-api streaming-t-one-ctc-c-api.c)
target_link_libraries(streaming-t-one-ctc-c-api sherpa-onnx-c-api)

add_executable(audio-tagging-c-api audio-tagging-c-api.c)
target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api)

add_executable(add-punctuation-c-api add-punctuation-c-api.c)
target_link_libraries(add-punctuation-c-api sherpa-onnx-c-api)

add_executable(add-punctuation-online-c-api add-punctuation-online-c-api.c)
target_link_libraries(add-punctuation-online-c-api sherpa-onnx-c-api)

add_executable(whisper-c-api whisper-c-api.c)
target_link_libraries(whisper-c-api sherpa-onnx-c-api)

add_executable(fire-red-asr-c-api fire-red-asr-c-api.c)
target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api)

add_executable(nemo-canary-c-api nemo-canary-c-api.c)
target_link_libraries(nemo-canary-c-api sherpa-onnx-c-api)

add_executable(nemo-parakeet-c-api nemo-parakeet-c-api.c)
target_link_libraries(nemo-parakeet-c-api sherpa-onnx-c-api)

add_executable(sense-voice-c-api sense-voice-c-api.c)
target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)

add_executable(funasr-nano-c-api funasr-nano-c-api.c)
target_link_libraries(funasr-nano-c-api sherpa-onnx-c-api)

add_executable(sense-voice-with-hr-c-api sense-voice-with-hr-c-api.c)
target_link_libraries(sense-voice-with-hr-c-api sherpa-onnx-c-api)

add_executable(dolphin-ctc-c-api dolphin-ctc-c-api.c)
target_link_libraries(dolphin-ctc-c-api sherpa-onnx-c-api)

add_executable(moonshine-c-api moonshine-c-api.c)
target_link_libraries(moonshine-c-api sherpa-onnx-c-api)

add_executable(moonshine-v2-c-api moonshine-v2-c-api.c)
target_link_libraries(moonshine-v2-c-api sherpa-onnx-c-api)

add_executable(zipformer-c-api zipformer-c-api.c)
target_link_libraries(zipformer-c-api sherpa-onnx-c-api)

add_executable(wenet-ctc-c-api wenet-ctc-c-api.c)
target_link_libraries(wenet-ctc-c-api sherpa-onnx-c-api)

add_executable(omnilingual-asr-ctc-c-api omnilingual-asr-ctc-c-api.c)
target_link_libraries(omnilingual-asr-ctc-c-api sherpa-onnx-c-api)

add_executable(medasr-ctc-c-api medasr-ctc-c-api.c)
target_link_libraries(medasr-ctc-c-api sherpa-onnx-c-api)

add_executable(fire-red-asr-ctc-c-api fire-red-asr-ctc-c-api.c)
target_link_libraries(fire-red-asr-ctc-c-api sherpa-onnx-c-api)

add_executable(streaming-zipformer-c-api streaming-zipformer-c-api.c)
target_link_libraries(streaming-zipformer-c-api sherpa-onnx-c-api)

add_executable(streaming-zipformer-with-hr-c-api streaming-zipformer-with-hr-c-api.c)
target_link_libraries(streaming-zipformer-with-hr-c-api sherpa-onnx-c-api)

add_executable(paraformer-c-api paraformer-c-api.c)
target_link_libraries(paraformer-c-api sherpa-onnx-c-api)

add_executable(streaming-paraformer-c-api streaming-paraformer-c-api.c)
target_link_libraries(streaming-paraformer-c-api sherpa-onnx-c-api)

add_executable(telespeech-c-api telespeech-c-api.c)
target_link_libraries(telespeech-c-api sherpa-onnx-c-api)

add_executable(vad-sense-voice-c-api vad-sense-voice-c-api.c)
target_link_libraries(vad-sense-voice-c-api sherpa-onnx-c-api)

add_executable(vad-whisper-c-api vad-whisper-c-api.c)
target_link_libraries(vad-whisper-c-api sherpa-onnx-c-api)

add_executable(vad-moonshine-c-api vad-moonshine-c-api.c)
target_link_libraries(vad-moonshine-c-api sherpa-onnx-c-api)

add_executable(streaming-zipformer-buffered-tokens-hotwords-c-api
               streaming-zipformer-buffered-tokens-hotwords-c-api.c)
target_link_libraries(streaming-zipformer-buffered-tokens-hotwords-c-api sherpa-onnx-c-api)

add_executable(streaming-paraformer-buffered-tokens-c-api
               streaming-paraformer-buffered-tokens-c-api.c)
target_link_libraries(streaming-paraformer-buffered-tokens-c-api sherpa-onnx-c-api)

add_executable(streaming-ctc-buffered-tokens-c-api
               streaming-ctc-buffered-tokens-c-api.c)
target_link_libraries(streaming-ctc-buffered-tokens-c-api sherpa-onnx-c-api)

add_executable(keywords-spotter-buffered-tokens-keywords-c-api
               keywords-spotter-buffered-tokens-keywords-c-api.c)
target_link_libraries(keywords-spotter-buffered-tokens-keywords-c-api sherpa-onnx-c-api)

if(SHERPA_ONNX_HAS_ALSA)
  add_subdirectory(./asr-microphone-example)
elseif((UNIX AND NOT APPLE) OR LINUX)
  message(WARNING "Not include ./asr-microphone-example since alsa is not available")
endif()


================================================
FILE: c-api-examples/Makefile
================================================

CUR_DIR :=$(shell pwd)

CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
LDFLAGS := -L ../build/lib
LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lkissfft-float -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS += -framework Foundation
LDFLAGS += -lc++
LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/_deps/onnxruntime-src/lib

.PHONY: all clean

all: decode-file-c-api offline-tts-c-api

decode-file-c-api: decode-file-c-api.c
	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

offline-tts-c-api: offline-tts-c-api.c
	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

clean:
	$(RM) ./decode-file-c-api ./offline-tts-c-api


================================================
FILE: c-api-examples/README.md
================================================
# Introduction

This folder contains C API examples for [sherpa-onnx][sherpa-onnx].

Please refer to the documentation
https://k2-fsa.github.io/sherpa/onnx/c-api/index.html
for details.


## File descriptions

- [decode-file-c-api.c](./decode-file-c-api.c) This file shows how to use the C API
  for speech recognition with a streaming model.

- [offline-tts-c-api.c](./offline-tts-c-api.c) This file shows how to use the C API
  to convert text to speech with a non-streaming model.

- [speech-enhancement-gtcrn-c-api.c](./speech-enhancement-gtcrn-c-api.c)
  This file shows how to use the C API for speech enhancement with GTCRN
  models.

- [speech-enhancement-dpdfnet-c-api.c](./speech-enhancement-dpdfnet-c-api.c)
  This file shows how to use the C API for speech enhancement with DPDFNet
  models. Use 16 kHz DPDFNet models such as `dpdfnet_baseline.onnx`,
  `dpdfnet2.onnx`, `dpdfnet4.onnx`, or `dpdfnet8.onnx` for downstream ASR and
  `dpdfnet2_48khz_hr.onnx` for 48 kHz enhancement output.

- [online-speech-enhancement-gtcrn-c-api.c](./online-speech-enhancement-gtcrn-c-api.c)
  This file shows how to use the C API for online speech enhancement with
  GTCRN models.

- [online-speech-enhancement-dpdfnet-c-api.c](./online-speech-enhancement-dpdfnet-c-api.c)
  This file shows how to use the C API for online speech enhancement with
  DPDFNet models. Use `dpdfnet_baseline.onnx`, `dpdfnet2.onnx`,
  `dpdfnet4.onnx`, or `dpdfnet8.onnx` for 16 kHz output.

[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx


================================================
FILE: c-api-examples/add-punctuation-c-api.c
================================================
// c-api-examples/add-punctuation-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

// We assume you have pre-downloaded the model files for testing
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
//
// An example is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
// tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
// rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  SherpaOnnxOfflinePunctuationConfig config;
  memset(&config, 0, sizeof(config));

  // clang-format off
  config.model.ct_transformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
  // clang-format on
  config.model.num_threads = 1;
  config.model.debug = 1;
  config.model.provider = "cpu";

  const SherpaOnnxOfflinePunctuation *punct =
      SherpaOnnxCreateOfflinePunctuation(&config);
  if (!punct) {
    fprintf(stderr,
            "Failed to create OfflinePunctuation. Please check your config");
    return -1;
  }

  const char *texts[] = {
      "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
      "我们都是木头人不会说话不会动",
      ("The African blogosphere is rapidly expanding bringing more voices "
       "online in the form of commentaries opinions analyses rants and poetry"),
  };

  int32_t n = sizeof(texts) / sizeof(const char *);
  fprintf(stderr, "n: %d\n", n);

  fprintf(stderr, "--------------------\n");
  for (int32_t i = 0; i != n; ++i) {
    const char *text_with_punct =
        SherpaOfflinePunctuationAddPunct(punct, texts[i]);

    fprintf(stderr, "Input text: %s\n", texts[i]);
    fprintf(stderr, "Output text: %s\n", text_with_punct);
    SherpaOfflinePunctuationFreeText(text_with_punct);
    fprintf(stderr, "--------------------\n");
  }

  SherpaOnnxDestroyOfflinePunctuation(punct);

  return 0;
};


================================================
FILE: c-api-examples/add-punctuation-online-c-api.c
================================================
// c-api-examples/add-punctuation-online-c-api.c
//
// Copyright (c)  zengyw

// We assume you have pre-downloaded the model files for testing
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
//
// An example is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
// tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
// rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  SherpaOnnxOnlinePunctuationConfig config;
  memset(&config, 0, sizeof(config));

  // clang-format off
  config.model.cnn_bilstm = "./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx";
  config.model.bpe_vocab = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab";
  // clang-format on
  config.model.num_threads = 1;
  config.model.debug = 1;
  config.model.provider = "cpu";

  const SherpaOnnxOnlinePunctuation *punct =
      SherpaOnnxCreateOnlinePunctuation(&config);
  if (!punct) {
    fprintf(stderr,
            "Failed to create OnlinePunctuation. Please check your config\n");
    return -1;
  }

  const char *texts[] = {
      "how are you i am fine thank you",
      ("The African blogosphere is rapidly expanding bringing more voices "
       "online in the form of commentaries opinions analyses rants and poetry"),
  };

  int32_t n = sizeof(texts) / sizeof(const char *);
  fprintf(stderr, "n: %d\n", n);

  fprintf(stderr, "--------------------\n");
  for (int32_t i = 0; i != n; ++i) {
    const char *text_with_punct =
        SherpaOnnxOnlinePunctuationAddPunct(punct, texts[i]);
    if (!text_with_punct) {
      fprintf(stderr, "Failed to add punctuation for: %s\n", texts[i]);
      continue;
    }

    fprintf(stderr, "Input text: %s\n", texts[i]);
    fprintf(stderr, "Output text: %s\n", text_with_punct);
    SherpaOnnxOnlinePunctuationFreeText(text_with_punct);
    fprintf(stderr, "--------------------\n");
  }

  SherpaOnnxDestroyOnlinePunctuation(punct);

  return 0;
}


================================================
FILE: c-api-examples/asr-microphone-example/CMakeLists.txt
================================================

add_executable(c-api-alsa c-api-alsa.cc alsa.cc)
target_link_libraries(c-api-alsa sherpa-onnx-c-api cargs)

if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
  target_link_libraries(c-api-alsa -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
else()
  target_link_libraries(c-api-alsa asound)
endif()


================================================
FILE: c-api-examples/asr-microphone-example/CPPLINT.cfg
================================================
exclude_files=alsa.cc|alsa.h


================================================
FILE: c-api-examples/asr-microphone-example/README.md
================================================
# Introduction

This folder contains examples for real-time speech recognition from a microphone
using sherpa-onnx C API.

**Note**: You can call C API from C++ files.


## ./c-api-alsa.cc

This file uses alsa to read a microphone. It runs only on Linux. This file
does not support macOS or Windows.


================================================
FILE: c-api-examples/asr-microphone-example/c-api-alsa.cc
================================================
// c-api-examples/asr-microphone-example/c-api-alsa.cc
// Copyright (c)  2022-2024  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <algorithm>
#include <cctype>  // std::tolower
#include <cstdint>
#include <string>
#include <vector>

#include "c-api-examples/asr-microphone-example/alsa.h"

// NOTE: You don't need to use cargs.h in your own project.
// We use it in this file to parse commandline arguments
#include "cargs.h"  // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static struct cag_option options[] = {
    {/*.identifier =*/'h',
     /*.access_letters =*/"h",
     /*.access_name =*/"help",
     /*.value_name =*/"help",
     /*.description =*/"Show help"},
    {/*.identifier =*/'t',
     /*.access_letters =*/NULL,
     /*.access_name =*/"tokens",
     /*.value_name =*/"tokens",
     /*.description =*/"Tokens file"},
    {/*.identifier =*/'e',
     /*.access_letters =*/NULL,
     /*.access_name =*/"encoder",
     /*.value_name =*/"encoder",
     /*.description =*/"Encoder ONNX file"},
    {/*.identifier =*/'d',
     /*.access_letters =*/NULL,
     /*.access_name =*/"decoder",
     /*.value_name =*/"decoder",
     /*.description =*/"Decoder ONNX file"},
    {/*.identifier =*/'j',
     /*.access_letters =*/NULL,
     /*.access_name =*/"joiner",
     /*.value_name =*/"joiner",
     /*.description =*/"Joiner ONNX file"},
    {/*.identifier =*/'n',
     /*.access_letters =*/NULL,
     /*.access_name =*/"num-threads",
     /*.value_name =*/"num-threads",
     /*.description =*/"Number of threads"},
    {/*.identifier =*/'p',
     /*.access_letters =*/NULL,
     /*.access_name =*/"provider",
     /*.value_name =*/"provider",
     /*.description =*/"Provider: cpu (default), cuda, coreml"},
    {/*.identifier =*/'m',
     /*.access_letters =*/NULL,
     /*.access_name =*/"decoding-method",
     /*.value_name =*/"decoding-method",
     /*.description =*/
     "Decoding method: greedy_search (default), modified_beam_search"},
    {/*.identifier =*/'f',
     /*.access_letters =*/NULL,
     /*.access_name =*/"hotwords-file",
     /*.value_name =*/"hotwords-file",
     /*.description =*/
     "The file containing hotwords, one words/phrases per line, and for each "
     "phrase the bpe/cjkchar are separated by a space. For example: ▁HE LL O "
     "▁WORLD, 你 好 世 界"},
    {/*.identifier =*/'s',
     /*.access_letters =*/NULL,
     /*.access_name =*/"hotwords-score",
     /*.value_name =*/"hotwords-score",
     /*.description =*/
     "The bonus score for each token in hotwords. Used only when "
     "decoding_method is modified_beam_search"},
};

const char *kUsage =
    R"(
Usage:
  ./bin/c-api-alsa \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/decoder.onnx \
    device_name

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)";

bool stop = false;

static void Handler(int sig) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
  if (argc < 6) {
    fprintf(stderr, "%s\n", kUsage);
    exit(0);
  }

  signal(SIGINT, Handler);

  SherpaOnnxOnlineRecognizerConfig config;
  memset(&config, 0, sizeof(config));

  config.model_config.debug = 0;
  config.model_config.num_threads = 1;
  config.model_config.provider = "cpu";

  config.decoding_method = "greedy_search";

  config.max_active_paths = 4;

  config.feat_config.sample_rate = 16000;
  config.feat_config.feature_dim = 80;

  config.enable_endpoint = 1;
  config.rule1_min_trailing_silence = 2.4;
  config.rule2_min_trailing_silence = 1.2;
  config.rule3_min_utterance_length = 300;

  cag_option_context context;
  char identifier;
  const char *value;

  cag_option_prepare(&context, options, CAG_ARRAY_SIZE(options), argc, argv);

  while (cag_option_fetch(&context)) {
    identifier = cag_option_get(&context);
    value = cag_option_get_value(&context);
    switch (identifier) {
      case 't':
        config.model_config.tokens = value;
        break;
      case 'e':
        config.model_config.transducer.encoder = value;
        break;
      case 'd':
        config.model_config.transducer.decoder = value;
        break;
      case 'j':
        config.model_config.transducer.joiner = value;
        break;
      case 'n':
        config.model_config.num_threads = atoi(value);
        break;
      case 'p':
        config.model_config.provider = value;
        break;
      case 'm':
        config.decoding_method = value;
        break;
      case 'f':
        config.hotwords_file = value;
        break;
      case 's':
        config.hotwords_score = atof(value);
        break;
      case 'h': {
        fprintf(stderr, "%s\n", kUsage);
        exit(0);
        break;
      }
      default:
        // do nothing as config already has valid default values
        break;
    }
  }

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&config);
  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

  const char *device_name = argv[context.index];
  sherpa_onnx::Alsa alsa(device_name);
  fprintf(stderr, "Use recording device: %s\n", device_name);
  fprintf(stderr,
          "Please \033[32m\033[1mspeak\033[0m! Press \033[31m\033[1mCtrl + "
          "C\033[0m to exit\n");

  int32_t expected_sample_rate = 16000;

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  int32_t chunk = 0.1 * alsa.GetActualSampleRate();

  std::string last_text;

  int32_t segment_index = 0;

  while (!stop) {
    const std::vector<float> &samples = alsa.Read(chunk);
    SherpaOnnxOnlineStreamAcceptWaveform(stream, expected_sample_rate,
                                         samples.data(), samples.size());
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    std::string text = r->text;
    SherpaOnnxDestroyOnlineRecognizerResult(r);

    if (!text.empty() && last_text != text) {
      last_text = text;

      std::transform(text.begin(), text.end(), text.begin(),
                     [](auto c) { return std::tolower(c); });

      SherpaOnnxPrint(display, segment_index, text.c_str());
      fflush(stderr);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (!text.empty()) {
        ++segment_index;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }
  }

  // free allocated resources
  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/audio-tagging-c-api.c
================================================
// c-api-examples/audio-tagging-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

// We assume you have pre-downloaded the model files for testing
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
//
// An example is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
// tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
// rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  SherpaOnnxAudioTaggingConfig config;
  memset(&config, 0, sizeof(config));

  config.model.zipformer.model =
      "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx";
  config.model.num_threads = 1;
  config.model.debug = 1;
  config.model.provider = "cpu";
  // clang-format off
  config.labels = "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv";
  // clang-format on

  const SherpaOnnxAudioTagging *tagger = SherpaOnnxCreateAudioTagging(&config);
  if (!tagger) {
    fprintf(stderr, "Failed to create audio tagger. Please check your config");
    return -1;
  }

  // You can find more test waves from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
  const char *wav_filename =
      "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxAudioTaggingCreateOfflineStream(tagger);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);

  int32_t top_k = 5;
  const SherpaOnnxAudioEvent *const *results =
      SherpaOnnxAudioTaggingCompute(tagger, stream, top_k);

  fprintf(stderr, "--------------------------------------------------\n");
  fprintf(stderr, "Index\t\tProbability\t\tEvent name\n");
  fprintf(stderr, "--------------------------------------------------\n");
  for (int32_t i = 0; i != top_k; ++i) {
    fprintf(stderr, "%d\t\t%.3f\t\t\t%s\n", i, results[i]->prob,
            results[i]->name);
  }
  fprintf(stderr, "--------------------------------------------------\n");

  SherpaOnnxAudioTaggingFreeResults(results);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxFreeWave(wave);
  SherpaOnnxDestroyAudioTagging(tagger);

  return 0;
};


================================================
FILE: c-api-examples/decode-file-c-api.c
================================================
// c-api-examples/decode-file-c-api.c
//
// Copyright (c)  2023  Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// to decode a file.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "cargs.h"
#include "sherpa-onnx/c-api/c-api.h"

static struct cag_option options[] = {
    {.identifier = 'h',
     .access_letters = "h",
     .access_name = "help",
     .description = "Show help"},
    {.identifier = 't',
     .access_letters = NULL,
     .access_name = "tokens",
     .value_name = "tokens",
     .description = "Tokens file"},
    {.identifier = 'e',
     .access_letters = NULL,
     .access_name = "encoder",
     .value_name = "encoder",
     .description = "Encoder ONNX file"},
    {.identifier = 'd',
     .access_letters = NULL,
     .access_name = "decoder",
     .value_name = "decoder",
     .description = "Decoder ONNX file"},
    {.identifier = 'j',
     .access_letters = NULL,
     .access_name = "joiner",
     .value_name = "joiner",
     .description = "Joiner ONNX file"},
    {.identifier = 'n',
     .access_letters = NULL,
     .access_name = "num-threads",
     .value_name = "num-threads",
     .description = "Number of threads"},
    {.identifier = 'p',
     .access_letters = NULL,
     .access_name = "provider",
     .value_name = "provider",
     .description = "Provider: cpu (default), cuda, coreml"},
    {.identifier = 'm',
     .access_letters = NULL,
     .access_name = "decoding-method",
     .value_name = "decoding-method",
     .description =
         "Decoding method: greedy_search (default), modified_beam_search"},
    {.identifier = 'f',
     .access_letters = NULL,
     .access_name = "hotwords-file",
     .value_name = "hotwords-file",
     .description = "The file containing hotwords, one words/phrases per line, "
                    "and for each phrase the bpe/cjkchar are separated by a "
                    "space. For example: ▁HE LL O ▁WORLD, 你 好 世 界"},
    {.identifier = 's',
     .access_letters = NULL,
     .access_name = "hotwords-score",
     .value_name = "hotwords-score",
     .description = "The bonus score for each token in hotwords. Used only "
                    "when decoding_method is modified_beam_search"},
};

const char *kUsage =
    "\n"
    "Usage:\n "
    "  ./bin/decode-file-c-api \\\n"
    "    --tokens=/path/to/tokens.txt \\\n"
    "    --encoder=/path/to/encoder.onnx \\\n"
    "    --decoder=/path/to/decoder.onnx \\\n"
    "    --joiner=/path/to/joiner.onnx \\\n"
    "    --provider=cpu \\\n"
    "    /path/to/foo.wav\n"
    "\n\n"
    "Default num_threads is 1.\n"
    "Valid decoding_method: greedy_search (default), modified_beam_search\n\n"
    "Valid provider: cpu (default), cuda, coreml\n\n"
    "Please refer to \n"
    "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/"
    "index.html\n"
    "for a list of pre-trained models to download.\n"
    "\n"
    "Note that this file supports only streaming transducer models.\n";

int32_t main(int32_t argc, char *argv[]) {
  if (argc < 6) {
    fprintf(stderr, "%s\n", kUsage);
    exit(0);
  }

  SherpaOnnxOnlineRecognizerConfig config;
  memset(&config, 0, sizeof(config));

  config.model_config.debug = 0;
  config.model_config.num_threads = 1;
  config.model_config.provider = "cpu";

  config.decoding_method = "greedy_search";

  config.max_active_paths = 4;

  config.feat_config.sample_rate = 16000;
  config.feat_config.feature_dim = 80;

  config.enable_endpoint = 1;
  config.rule1_min_trailing_silence = 2.4;
  config.rule2_min_trailing_silence = 1.2;
  config.rule3_min_utterance_length = 300;

  cag_option_context context;
  char identifier;
  const char *value;

  cag_option_prepare(&context, options, CAG_ARRAY_SIZE(options), argc, argv);

  while (cag_option_fetch(&context)) {
    identifier = cag_option_get(&context);
    value = cag_option_get_value(&context);
    switch (identifier) {
      case 't':
        config.model_config.tokens = value;
        break;
      case 'e':
        config.model_config.transducer.encoder = value;
        break;
      case 'd':
        config.model_config.transducer.decoder = value;
        break;
      case 'j':
        config.model_config.transducer.joiner = value;
        break;
      case 'n':
        config.model_config.num_threads = atoi(value);
        break;
      case 'p':
        config.model_config.provider = value;
        break;
      case 'm':
        config.decoding_method = value;
        break;
      case 'f':
        config.hotwords_file = value;
        break;
      case 's':
        config.hotwords_score = atof(value);
        break;
      case 'h': {
        fprintf(stderr, "%s\n", kUsage);
        exit(0);
        break;
      }
      default:
        // do nothing as config already has valid default values
        break;
    }
  }

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&config);
  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

  const char *wav_filename = argv[context.index];
  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }
  // simulate streaming

#define N 3200  // 0.2 s. Sample rate is fixed to 16 kHz

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/dolphin-ctc-c-api.c
================================================
// c-api-examples/dolphin-ctc-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use Dolphin CTC model with sherpa-onnx's C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
// tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
// rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  // clang-format off
  const char *wav_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav";
  const char *model_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
  const char *tokens_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
  // clang-format on

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = "cpu";
  offline_model_config.tokens = tokens_filename;
  offline_model_config.dolphin.model = model_filename;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/fire-red-asr-c-api.c
================================================
// c-api-examples/fire-red-asr-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

// We assume you have pre-downloaded the FireRedAsr model
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// An example is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
// tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
// rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav";
  const char *encoder_filename =
      "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx";
  const char *decoder_filename =
      "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx";
  const char *tokens_filename =
      "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.fire_red_asr.encoder = encoder_filename;
  offline_model_config.fire_red_asr.decoder = decoder_filename;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");

    SherpaOnnxFreeWave(wave);

    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/fire-red-asr-ctc-c-api.c
================================================
// c-api-examples/fire-red-asr-ctc-c-api.c
//
// Copyright (c)  2026  Xiaomi Corporation

//
// This file demonstrates how to use FireRedASR with sherpa-onnx's C API.
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
*/
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  // clang-format off
  const char *wav_filename = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav";
  const char *model_filename = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx";
  const char *tokens_filename = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt";
  // clang-format on

  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineFireRedAsrCtcModelConfig fire_red_asr_ctc;
  memset(&fire_red_asr_ctc, 0, sizeof(fire_red_asr_ctc));
  fire_red_asr_ctc.model = model_filename;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.fire_red_asr_ctc = fire_red_asr_ctc;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/funasr-nano-c-api.c
================================================
// c-api-examples/funasr-nano-c-api.c
//
// Copyright (c)  2026  Xiaomi Corporation

//
// This file demonstrates how to use FunASR Nano with sherpa-onnx's C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
// tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
// rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  // clang-format off
  const char *wav_filename = "./sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/dia_yue.wav";
  const char *encoder_adaptor = "./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx";
  const char *embedding = "./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx";
  const char *llm = "./sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx";
  const char *tokenizer = "./sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B";
  // clang-format on

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineFunASRNanoModelConfig funasr_nano;
  memset(&funasr_nano, 0, sizeof(funasr_nano));
  funasr_nano.encoder_adaptor = encoder_adaptor;
  funasr_nano.embedding = embedding;
  funasr_nano.llm = llm;
  funasr_nano.tokenizer = tokenizer;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 2;
  offline_model_config.provider = "cpu";
  offline_model_config.funasr_nano = funasr_nano;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c
================================================
// c-api-examples/keywords-spotter-buffered-tokens-keywords-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation
// Copyright (c)  2024  Luo Xiao

//
// This file demonstrates how to use keywords spotter with sherpa-onnx's C
// API and with tokens and keywords loaded from buffered strings instead of from
// external files API.
// clang-format off
// 
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
// rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static size_t ReadFile(const char *filename, const char **buffer_out) {
  FILE *file = fopen(filename, "r");
  if (file == NULL) {
    fprintf(stderr, "Failed to open %s\n", filename);
    return -1;
  }
  fseek(file, 0L, SEEK_END);
  long size = ftell(file);
  rewind(file);
  *buffer_out = malloc(size);
  if (*buffer_out == NULL) {
    fclose(file);
    fprintf(stderr, "Memory error\n");
    return -1;
  }
  size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
  if (read_bytes != size) {
    printf("Errors occurred in reading the file %s\n", filename);
    free((void *)*buffer_out);
    *buffer_out = NULL;
    fclose(file);
    return -1;
  }
  fclose(file);
  return read_bytes;
}

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/test_wavs/"
      "6.wav";
  const char *encoder_filename =
      "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
  const char *decoder_filename =
      "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
  const char *joiner_filename =
      "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
  const char *provider = "cpu";
  const char *tokens_filename =
      "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/tokens.txt";
  const char *keywords_filename =
      "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/test_wavs/"
      "test_keywords.txt";
  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // reading tokens and keywords to buffers
  const char *tokens_buf;
  size_t token_buf_size = ReadFile(tokens_filename, &tokens_buf);
  if (token_buf_size < 1) {
    fprintf(stderr, "Please check your tokens.txt!\n");
    free((void *)tokens_buf);
    return -1;
  }
  const char *keywords_buf;
  size_t keywords_buf_size = ReadFile(keywords_filename, &keywords_buf);
  if (keywords_buf_size < 1) {
    fprintf(stderr, "Please check your keywords.txt!\n");
    free((void *)keywords_buf);
    return -1;
  }

  // Zipformer config
  SherpaOnnxOnlineTransducerModelConfig zipformer_config;
  memset(&zipformer_config, 0, sizeof(zipformer_config));
  zipformer_config.encoder = encoder_filename;
  zipformer_config.decoder = decoder_filename;
  zipformer_config.joiner = joiner_filename;

  // Online model config
  SherpaOnnxOnlineModelConfig online_model_config;
  memset(&online_model_config, 0, sizeof(online_model_config));
  online_model_config.debug = 1;
  online_model_config.num_threads = 1;
  online_model_config.provider = provider;
  online_model_config.tokens_buf = tokens_buf;
  online_model_config.tokens_buf_size = token_buf_size;
  online_model_config.transducer = zipformer_config;

  // Keywords-spotter config
  SherpaOnnxKeywordSpotterConfig keywords_spotter_config;
  memset(&keywords_spotter_config, 0, sizeof(keywords_spotter_config));
  keywords_spotter_config.max_active_paths = 4;
  keywords_spotter_config.keywords_threshold = 0.1;
  keywords_spotter_config.keywords_score = 3.0;
  keywords_spotter_config.model_config = online_model_config;
  keywords_spotter_config.keywords_buf = keywords_buf;
  keywords_spotter_config.keywords_buf_size = keywords_buf_size;

  const SherpaOnnxKeywordSpotter *keywords_spotter =
      SherpaOnnxCreateKeywordSpotter(&keywords_spotter_config);

  free((void *)tokens_buf);
  tokens_buf = NULL;
  free((void *)keywords_buf);
  keywords_buf = NULL;

  if (keywords_spotter == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateKeywordStream(keywords_spotter);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsKeywordStreamReady(keywords_spotter, stream)) {
      SherpaOnnxDecodeKeywordStream(keywords_spotter, stream);
    }

    const SherpaOnnxKeywordResult *r =
        SherpaOnnxGetKeywordResult(keywords_spotter, stream);

    if (strlen(r->keyword)) {
      SherpaOnnxPrint(display, segment_id, r->keyword);
    }

    SherpaOnnxDestroyKeywordResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsKeywordStreamReady(keywords_spotter, stream)) {
    SherpaOnnxDecodeKeywordStream(keywords_spotter, stream);
  }

  const SherpaOnnxKeywordResult *r =
      SherpaOnnxGetKeywordResult(keywords_spotter, stream);

  if (strlen(r->keyword)) {
    SherpaOnnxPrint(display, segment_id, r->keyword);
  }

  SherpaOnnxDestroyKeywordResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyKeywordSpotter(keywords_spotter);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/kitten-tts-en-c-api.c
================================================
// c-api-examples/kitten-tts-en-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// for English TTS with Kitten.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

./kitten-tts-en-c-api

 */
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.kitten.model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx";
  config.model.kitten.voices = "./kitten-nano-en-v0_1-fp16/voices.bin";
  config.model.kitten.tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt";
  config.model.kitten.data_dir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  const char *filename = "./generated-kitten-en.wav";
  const char *text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar. "
      "Friends fell out often because life was changing so fast. The easiest "
      "thing in the world was to lose touch with someone.";

  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  // mapping of sid to voice name
  // 0->expr-voice-2-m, 1->expr-voice-2-f, 2->expr-voice-3-m
  // 3->expr-voice-3-f, 4->expr-voice-4-m, 5->expr-voice-4-f
  // 6->expr-voice-5-m, 7->expr-voice-5-f
  int32_t sid = 0;
  float speed = 1.0;  // larger -> faster in speech speed
  SherpaOnnxGenerationConfig cfg = {0};
  cfg.silence_scale = 0.2f;
  cfg.sid = sid;
  cfg.speed = speed;

#if 0
  // If you don't want to use a callback, then please enable this branch
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, NULL, NULL);
#else
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, ProgressCallback,
                                             NULL);
#endif

  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  SherpaOnnxDestroyOfflineTts(tts);

  fprintf(stderr, "Input text is: %s\n", text);
  fprintf(stderr, "Speaker ID is: %d\n", sid);
  fprintf(stderr, "Saved to: %s\n", filename);

  return 0;
}


================================================
FILE: c-api-examples/kokoro-tts-en-c-api.c
================================================
// c-api-examples/kokoro-tts-en-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// for English TTS with Kokoro.
//
// clang-format off
/*
Usage


wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

./kokoro-tts-en-c-api

 */
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
  config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
  config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
  config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  const char *filename = "./generated-kokoro-en.wav";
  const char *text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar. "
      "Friends fell out often because life was changing so fast. The easiest "
      "thing in the world was to lose touch with someone.";

  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  // mapping of sid to voice name
  // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
  // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
  int32_t sid = 0;
  float speed = 1.0;  // larger -> faster in speech speed
  SherpaOnnxGenerationConfig cfg = {0};
  cfg.silence_scale = 0.2f;
  cfg.sid = sid;
  cfg.speed = speed;

#if 0
  // If you don't want to use a callback, then please enable this branch
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, NULL, NULL);
#else
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, ProgressCallback,
                                             NULL);
#endif

  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  SherpaOnnxDestroyOfflineTts(tts);

  fprintf(stderr, "Input text is: %s\n", text);
  fprintf(stderr, "Speaker ID is: %d\n", sid);
  fprintf(stderr, "Saved to: %s\n", filename);

  return 0;
}


================================================
FILE: c-api-examples/kokoro-tts-zh-en-c-api.c
================================================
// c-api-examples/kokoro-tts-zh-en-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// for English + Chinese TTS with Kokoro.
//
// clang-format off
/*
Usage


wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

./kokoro-tts-zh-en-c-api

 */
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx";
  config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin";
  config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
  config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
  config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict";
  config.model.kokoro.lexicon =
      "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/"
      "lexicon-zh.txt";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  const char *filename = "./generated-kokoro-zh-en.wav";
  const char *text =
      "中英文语音合成测试。This is generated by next generation Kaldi using "
      "Kokoro without Misaki. 你觉得中英文说的如何呢？";

  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  int32_t sid = 0;    // there are 53 speakers
  float speed = 1.0;  // larger -> faster in speech speed
  SherpaOnnxGenerationConfig cfg = {0};
  cfg.silence_scale = 0.2f;
  cfg.sid = sid;
  cfg.speed = speed;

#if 0
  // If you don't want to use a callback, then please enable this branch
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, NULL, NULL);
#else
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, ProgressCallback,
                                             NULL);
#endif

  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  SherpaOnnxDestroyOfflineTts(tts);

  fprintf(stderr, "Input text is: %s\n", text);
  fprintf(stderr, "Speaker ID is: %d\n", sid);
  fprintf(stderr, "Saved to: %s\n", filename);

  return 0;
}


================================================
FILE: c-api-examples/kws-c-api.c
================================================
// c-api-examples/kws-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation
//
// This file demonstrates how to use keywords spotter with sherpa-onnx's C
// clang-format off
//
// Usage
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
// rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
//
// ./kws-c-api
//
// clang-format on
#include <stdio.h>
#include <stdlib.h>  // exit
#include <string.h>  // memset

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  SherpaOnnxKeywordSpotterConfig config;

  memset(&config, 0, sizeof(config));
  config.model_config.transducer.encoder =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx";

  config.model_config.transducer.decoder =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "decoder-epoch-12-avg-2-chunk-16-left-64.onnx";

  config.model_config.transducer.joiner =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx";

  config.model_config.tokens =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "tokens.txt";

  config.model_config.provider = "cpu";
  config.model_config.num_threads = 1;
  config.model_config.debug = 1;

  config.keywords_file =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "test_wavs/test_keywords.txt";

  const SherpaOnnxKeywordSpotter *kws = SherpaOnnxCreateKeywordSpotter(&config);
  if (!kws) {
    fprintf(stderr, "Please check your config");
    exit(-1);
  }

  fprintf(stderr,
          "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n");

  const char *wav_filename =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "test_wavs/3.wav";

  float tail_paddings[8000] = {0};  // 0.5 seconds

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    exit(-1);
  }

  const SherpaOnnxOnlineStream *stream = SherpaOnnxCreateKeywordStream(kws);
  if (!stream) {
    fprintf(stderr, "Failed to create stream\n");
    exit(-1);
  }

  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
                                       wave->num_samples);

  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       sizeof(tail_paddings) / sizeof(float));
  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
    SherpaOnnxDecodeKeywordStream(kws, stream);
    const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
    if (r && r->json && strlen(r->keyword)) {
      fprintf(stderr, "Detected keyword: %s\n", r->json);

      // Remember to reset the keyword stream right after a keyword is detected
      SherpaOnnxResetKeywordStream(kws, stream);
    }
    SherpaOnnxDestroyKeywordResult(r);
  }
  SherpaOnnxDestroyOnlineStream(stream);

  // --------------------------------------------------------------------------

  fprintf(stderr, "--Use pre-defined keywords + add a new keyword--\n");

  stream = SherpaOnnxCreateKeywordStreamWithKeywords(kws, "y ǎn y uán @演员");

  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
                                       wave->num_samples);

  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       sizeof(tail_paddings) / sizeof(float));
  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
    SherpaOnnxDecodeKeywordStream(kws, stream);
    const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
    if (r && r->json && strlen(r->keyword)) {
      fprintf(stderr, "Detected keyword: %s\n", r->json);

      // Remember to reset the keyword stream
      SherpaOnnxResetKeywordStream(kws, stream);
    }
    SherpaOnnxDestroyKeywordResult(r);
  }
  SherpaOnnxDestroyOnlineStream(stream);

  // --------------------------------------------------------------------------

  fprintf(stderr, "--Use pre-defined keywords + add two new keywords--\n");

  stream = SherpaOnnxCreateKeywordStreamWithKeywords(
      kws, "y ǎn y uán @演员/zh ī m íng @知名");

  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
                                       wave->num_samples);

  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       sizeof(tail_paddings) / sizeof(float));
  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
    SherpaOnnxDecodeKeywordStream(kws, stream);
    const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
    if (r && r->json && strlen(r->keyword)) {
      fprintf(stderr, "Detected keyword: %s\n", r->json);

      // Remember to reset the keyword stream
      SherpaOnnxResetKeywordStream(kws, stream);
    }
    SherpaOnnxDestroyKeywordResult(r);
  }
  SherpaOnnxDestroyOnlineStream(stream);

  SherpaOnnxFreeWave(wave);
  SherpaOnnxDestroyKeywordSpotter(kws);

  return 0;
}


================================================
FILE: c-api-examples/matcha-tts-en-c-api.c
================================================
// c-api-examples/matcha-tts-en-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// for English TTS with MatchaTTS.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

./matcha-tts-en-c-api

 */
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.matcha.acoustic_model =
      "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";

  config.model.matcha.vocoder = "./vocos-22khz-univ.onnx";

  config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";

  config.model.matcha.data_dir =
      "./matcha-icefall-en_US-ljspeech/espeak-ng-data";

  config.model.num_threads = 1;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  const char *filename = "./generated-matcha-en.wav";
  const char *text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar. "
      "Friends fell out often because life was changing so fast. The easiest "
      "thing in the world was to lose touch with someone.";

  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  SherpaOnnxGenerationConfig cfg = {0};
  cfg.sid = 0;
  cfg.speed = 1.0f;  // larger -> faster in speech speed
  cfg.silence_scale = config.silence_scale;

#if 0
  // If you don't want to use a callback, then please enable this branch
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, NULL, NULL);
#else
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, ProgressCallback,
                                             NULL);
#endif

  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  SherpaOnnxDestroyOfflineTts(tts);

  fprintf(stderr, "Input text is: %s\n", text);
  fprintf(stderr, "Speaker ID is: %d\n", cfg.sid);
  fprintf(stderr, "Saved to: %s\n", filename);

  return 0;
}


================================================
FILE: c-api-examples/matcha-tts-zh-c-api.c
================================================
// c-api-examples/matcha-tts-zh-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// for Chinese TTS with MatchaTTS.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

./matcha-tts-zh-c-api

 */
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.matcha.acoustic_model =
      "./matcha-icefall-zh-baker/model-steps-3.onnx";
  config.model.matcha.vocoder = "./vocos-22khz-univ.onnx";
  config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
  config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
  config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
  config.model.num_threads = 1;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  // clang-format off
  config.rule_fsts = "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst";
  // clang-format on

  const char *filename = "./generated-matcha-zh.wav";
  const char *text =
      "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如"
      "涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感"
      "受着生命的奇迹与温柔."
      "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; "
      "经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。";

  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  SherpaOnnxGenerationConfig cfg = {0};
  cfg.sid = 0;
  cfg.speed = 1.0f;  // larger -> faster in speech speed
  cfg.silence_scale = config.silence_scale;

#if 0
  // If you don't want to use a callback, then please enable this branch
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, NULL, NULL);
#else
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, ProgressCallback,
                                             NULL);
#endif

  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  SherpaOnnxDestroyOfflineTts(tts);

  fprintf(stderr, "Input text is: %s\n", text);
  fprintf(stderr, "Speaker ID is: %d\n", cfg.sid);
  fprintf(stderr, "Saved to: %s\n", filename);

  return 0;
}


================================================
FILE: c-api-examples/medasr-ctc-c-api.c
================================================
// c-api-examples/medasr-ctc-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use MedASR with sherpa-onnx's C API.
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
*/
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  // clang-format off
  const char *wav_filename = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav";
  const char *model_filename = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx";
  const char *tokens_filename = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt";
  // clang-format on

  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineMedAsrCtcModelConfig medasr;
  memset(&medasr, 0, sizeof(medasr));
  medasr.model = model_filename;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.medasr = medasr;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/moonshine-c-api.c
================================================
// c-api-examples/moonshine-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use Moonshine tiny with sherpa-onnx's C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";
  const char *preprocessor =
      "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
  const char *encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
  const char *uncached_decoder =
      "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
  const char *cached_decoder =
      "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
  const char *tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = "cpu";
  offline_model_config.tokens = tokens;
  offline_model_config.moonshine.preprocessor = preprocessor;
  offline_model_config.moonshine.encoder = encoder;
  offline_model_config.moonshine.uncached_decoder = uncached_decoder;
  offline_model_config.moonshine.cached_decoder = cached_decoder;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/moonshine-v2-c-api.c
================================================
// c-api-examples/moonshine-v2-c-api.c
//
// Copyright (c)  2024-2026  Xiaomi Corporation

//
// This file demonstrates how to use Moonshine v2 with sherpa-onnx's C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
// tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
// rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  // clang-format off
  const char *wav_filename = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav";
  const char *encoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort";
  const char *merged_decoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort";
  const char *tokens = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt";
  // clang-format on

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = "cpu";
  offline_model_config.tokens = tokens;
  offline_model_config.moonshine.encoder = encoder;
  offline_model_config.moonshine.merged_decoder = merged_decoder;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/nemo-canary-c-api.c
================================================
// c-api-examples/nemo-canary-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

// We assume you have pre-downloaded the Nemo Canary model
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// An example is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
//
// clang-format on
//
// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
// for details

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";
  const char *encoder_filename =
      "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
  const char *decoder_filename =
      "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
  const char *tokens_filename =
      "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));

  // set debug to 1 to view more logs
  offline_model_config.debug = 0;

  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.canary.encoder = encoder_filename;
  offline_model_config.canary.decoder = decoder_filename;

  // so it output punctuations and cases
  offline_model_config.canary.use_pnc = 1;

  offline_model_config.canary.src_lang = "de";

  // since there is a German audio, you can set tgt_lang to en or de
  offline_model_config.canary.tgt_lang = "en";

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");

    SherpaOnnxFreeWave(wave);

    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text (English): %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);

  // now output German text
  recognizer_config.model_config.canary.tgt_lang = "de";
  SherpaOnnxOfflineRecognizerSetConfig(recognizer, &recognizer_config);

  stream = SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  result = SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text (German): %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);

  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/nemo-parakeet-c-api.c
================================================
// c-api-examples/nemo-parakeet-c-api.c
// Example using the C API and sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8 model
// Prints recognized text, per-token timestamps, and durations

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/test_wavs/en.wav";
  const char *encoder_filename =
      "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/encoder.int8.onnx";
  const char *decoder_filename =
      "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/decoder.int8.onnx";
  const char *joiner_filename =
      "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/joiner.int8.onnx";
  const char *tokens_filename =
      "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/tokens.txt";
  const char *provider = "cpu";

  if (!SherpaOnnxFileExists(wav_filename)) {
    fprintf(stderr, "File not found: %s\n", wav_filename);
    return -1;
  }
  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read or parse %s (not a valid mono 16-bit WAVE file)\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 0;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.transducer.encoder = encoder_filename;
  offline_model_config.transducer.decoder = decoder_filename;
  offline_model_config.transducer.joiner = joiner_filename;

  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);
  if (stream == NULL) {
    fprintf(stderr, "Failed to create offline stream.\n");
    SherpaOnnxDestroyOfflineRecognizer(recognizer);
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  printf("Recognized text: %s\n", result->text);

  if (result->tokens_arr && result->timestamps && result->durations) {
    printf("Token\tTimestamp\tDuration\n");
    for (int32_t i = 0; i < result->count; ++i) {
      printf("%s\t%.2f\t%.2f\n", result->tokens_arr[i], result->timestamps[i], result->durations[i]);
    }
  } else {
    printf("Timestamps or durations not available.\n");
  }

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/offline-speaker-diarization-c-api.c
================================================
// c-api-examples/offline-sepaker-diarization-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to implement speaker diarization with
// sherpa-onnx's C API.

// clang-format off
/*
Usage:

Step 1: Download a speaker segmentation model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

Step 2: Download a speaker embedding extractor model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

Step 3. Download test wave files

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

Step 4. Run it

 */
// clang-format on

#include <stdio.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(int32_t num_processed_chunks,
                                int32_t num_total_chunks, void *arg) {
  float progress = 100.0 * num_processed_chunks / num_total_chunks;
  fprintf(stderr, "progress %.2f%%\n", progress);

  // the return value is currently ignored
  return 0;
}

int main() {
  // Please see the comments at the start of this file for how to download
  // the .onnx file and .wav files below
  const char *segmentation_model =
      "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";

  const char *embedding_extractor_model =
      "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";

  const char *wav_filename = "./0-four-speakers-zh.wav";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineSpeakerDiarizationConfig config;
  memset(&config, 0, sizeof(config));

  config.segmentation.pyannote.model = segmentation_model;
  config.embedding.model = embedding_extractor_model;

  // the test wave ./0-four-speakers-zh.wav has 4 speakers, so
  // we set num_clusters to 4
  //
  config.clustering.num_clusters = 4;
  // If you don't know the number of speakers in the test wave file, please
  // use
  // config.clustering.threshold = 0.5; // You need to tune this threshold

  const SherpaOnnxOfflineSpeakerDiarization *sd =
      SherpaOnnxCreateOfflineSpeakerDiarization(&config);

  if (!sd) {
    fprintf(stderr, "Failed to initialize offline speaker diarization\n");
    return -1;
  }

  if (SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd) !=
      wave->sample_rate) {
    fprintf(
        stderr,
        "Expected sample rate: %d. Actual sample rate from the wave file: %d\n",
        SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd),
        wave->sample_rate);
    goto failed;
  }

  const SherpaOnnxOfflineSpeakerDiarizationResult *result =
      SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
          sd, wave->samples, wave->num_samples, ProgressCallback, NULL);
  if (!result) {
    fprintf(stderr, "Failed to do speaker diarization");
    goto failed;
  }

  int32_t num_segments =
      SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result);

  const SherpaOnnxOfflineSpeakerDiarizationSegment *segments =
      SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result);

  for (int32_t i = 0; i != num_segments; ++i) {
    fprintf(stderr, "%.3f -- %.3f speaker_%02d\n", segments[i].start,
            segments[i].end, segments[i].speaker);
  }

failed:

  SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
  SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result);
  SherpaOnnxDestroyOfflineSpeakerDiarization(sd);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/offline-tts-c-api.c
================================================
// c-api-examples/offline-tts-c-api.c
//
// Copyright (c)  2023  Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// to convert text to speech using an offline model.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "cargs.h"
#include "sherpa-onnx/c-api/c-api.h"

static struct cag_option options[] = {
    {.identifier = 'h',
     .access_letters = "h",
     .access_name = "help",
     .description = "Show help"},
    {.access_name = "vits-model",
     .value_name = "/path/to/xxx.onnx",
     .identifier = '0',
     .description = "Path to VITS model"},
    {.access_name = "vits-lexicon",
     .value_name = "/path/to/lexicon.txt",
     .identifier = '1',
     .description = "Path to lexicon.txt for VITS models"},
    {.access_name = "vits-tokens",
     .value_name = "/path/to/tokens.txt",
     .identifier = '2',
     .description = "Path to tokens.txt for VITS models"},
    {.access_name = "vits-noise-scale",
     .value_name = "0.667",
     .identifier = '3',
     .description = "noise_scale for VITS models"},
    {.access_name = "vits-noise-scale-w",
     .value_name = "0.8",
     .identifier = '4',
     .description = "noise_scale_w for VITS models"},
    {.access_name = "vits-length-scale",
     .value_name = "1.0",
     .identifier = '5',
     .description =
         "length_scale for VITS models. Default to 1. You can tune it "
         "to change the speech speed. small -> faster; large -> slower. "},
    {.access_name = "num-threads",
     .value_name = "1",
     .identifier = '6',
     .description = "Number of threads"},
    {.access_name = "provider",
     .value_name = "cpu",
     .identifier = '7',
     .description = "Provider: cpu (default), cuda, coreml"},
    {.access_name = "debug",
     .value_name = "0",
     .identifier = '8',
     .description = "1 to show debug messages while loading the model"},
    {.access_name = "sid",
     .value_name = "0",
     .identifier = '9',
     .description = "Speaker ID. Default to 0. Note it is not used for "
                    "single-speaker models."},
    {.access_name = "output-filename",
     .value_name = "./generated.wav",
     .identifier = 'a',
     .description =
         "Filename to save the generated audio. Default to ./generated.wav"},

    {.access_name = "tts-rule-fsts",
     .value_name = "/path/to/rule.fst",
     .identifier = 'b',
     .description = "It not empty, it contains a list of rule FST filenames."
                    "Multiple filenames are separated by a comma and they are "
                    "applied from left to right. An example value: "
                    "rule1.fst,rule2,fst,rule3.fst"},

    {.access_name = "max-num-sentences",
     .value_name = "2",
     .identifier = 'c',
     .description = "Maximum number of sentences that we process at a time. "
                    "This is to avoid OOM for very long input text. "
                    "If you set it to -1, then we process all sentences in a "
                    "single batch."},

    {.access_name = "vits-data-dir",
     .value_name = "/path/to/espeak-ng-data",
     .identifier = 'd',
     .description =
         "Path to espeak-ng-data. If it is given, --vits-lexicon is ignored"},

};

static void ShowUsage() {
  const char *kUsageMessage =
      "Offline text-to-speech with sherpa-onnx C API"
      "\n"
      "./offline-tts-c-api \\\n"
      " --vits-model=/path/to/model.onnx \\\n"
      " --vits-lexicon=/path/to/lexicon.txt \\\n"
      " --vits-tokens=/path/to/tokens.txt \\\n"
      " --sid=0 \\\n"
      " --output-filename=./generated.wav \\\n"
      " 'some text within single quotes on linux/macos or use double quotes on "
      "windows'\n"
      "\n"
      "It will generate a file ./generated.wav as specified by "
      "--output-filename.\n"
      "\n"
      "You can download a test model from\n"
      "https://huggingface.co/csukuangfj/vits-ljs\n"
      "\n"
      "For instance, you can use:\n"
      "wget "
      "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx\n"
      "wget "
      "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt\n"
      "wget "
      "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt\n"
      "\n"
      "./offline-tts-c-api \\\n"
      "  --vits-model=./vits-ljs.onnx \\\n"
      "  --vits-lexicon=./lexicon.txt \\\n"
      "  --vits-tokens=./tokens.txt \\\n"
      "  --sid=0 \\\n"
      "  --output-filename=./generated.wav \\\n"
      "  'liliana, the most beautiful and lovely assistant of our team!'\n"
      "\n"
      "Please see\n"
      "https://k2-fsa.github.io/sherpa/onnx/tts/index.html\n"
      "or details.\n\n";

  fprintf(stderr, "%s", kUsageMessage);
  cag_option_print(options, CAG_ARRAY_SIZE(options), stderr);
  exit(0);
}

int32_t main(int32_t argc, char *argv[]) {
  cag_option_context context;
  char identifier;
  const char *value;

  cag_option_prepare(&context, options, CAG_ARRAY_SIZE(options), argc, argv);

  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));

  int32_t sid = 0;
  const char *filename = strdup("./generated.wav");
  const char *text;

  while (cag_option_fetch(&context)) {
    identifier = cag_option_get(&context);
    value = cag_option_get_value(&context);
    switch (identifier) {
      case '0':
        config.model.vits.model = value;
        break;
      case '1':
        config.model.vits.lexicon = value;
        break;
      case '2':
        config.model.vits.tokens = value;
        break;
      case '3':
        config.model.vits.noise_scale = atof(value);
        break;
      case '4':
        config.model.vits.noise_scale_w = atof(value);
        break;
      case '5':
        config.model.vits.length_scale = atof(value);
        break;
      case '6':
        config.model.num_threads = atoi(value);
        break;
      case '7':
        config.model.provider = value;
        break;
      case '8':
        config.model.debug = atoi(value);
        break;
      case '9':
        sid = atoi(value);
        break;
      case 'a':
        free((void *)filename);
        filename = strdup(value);
        break;
      case 'b':
        config.rule_fsts = value;
        break;
      case 'c':
        config.max_num_sentences = atoi(value);
        break;
      case 'd':
        config.model.vits.data_dir = value;
        break;
      case '?':
        fprintf(stderr, "Unknown option\n");
        // fall through
      case 'h':
        // fall through
      default:
        ShowUsage();
    }
  }
  fprintf(stderr, "here\n");

  if (!config.model.vits.model) {
    fprintf(stderr, "Please provide --vits-model\n");
    ShowUsage();
  }

  if (!config.model.vits.tokens) {
    fprintf(stderr, "Please provide --vits-tokens\n");
    ShowUsage();
  }

  if (!config.model.vits.data_dir && !config.model.vits.lexicon) {
    fprintf(stderr, "Please provide --vits-data-dir or --vits-lexicon\n");
    ShowUsage();
  }

  // the last arg is the text
  text = argv[argc - 1];
  if (text[0] == '-') {
    fprintf(stderr, "\n***Please input your text!***\n\n");
    fprintf(stderr, "\n---------------Usage---------------\n\n");
    ShowUsage();
  }

  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);

  SherpaOnnxGenerationConfig cfg = {0};
  cfg.silence_scale = 0.2f;
  cfg.sid = sid;
  cfg.speed = 1.0f;

  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, NULL, NULL);

  SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  SherpaOnnxDestroyOfflineTts(tts);

  fprintf(stderr, "Input text is: %s\n", text);
  fprintf(stderr, "Speaker ID is: %d\n", sid);
  fprintf(stderr, "Saved to: %s\n", filename);

  free((void *)filename);

  return 0;
}


================================================
FILE: c-api-examples/omnilingual-asr-ctc-c-api.c
================================================
// c-api-examples/omnilingual-asr-ctc-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use Omnilingual ASR with sherpa-onnx's C API.
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
*/
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  // clang-format off
  const char *wav_filename = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav";
  const char *model_filename = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx";
  const char *tokens_filename = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt";
  // clang-format on

  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineOmnilingualAsrCtcModelConfig omnilingual;
  memset(&omnilingual, 0, sizeof(omnilingual));
  omnilingual.model = model_filename;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.omnilingual = omnilingual;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/online-speech-enhancement-dpdfnet-c-api.c
================================================
// c-api-examples/online-speech-enhancement-dpdfnet-c-api.c
//
// Copyright (c)  2026  Xiaomi Corporation
//
// We assume you have pre-downloaded model
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// or
// https://huggingface.co/Ceva-IP/DPDFNet
//
// An example command to download
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
*/
// clang-format on

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t AppendSamples(float **samples, int32_t *num_samples,
                             const SherpaOnnxDenoisedAudio *audio) {
  float *p = NULL;

  if (!audio || audio->n == 0) {
    return 1;
  }

  p = (float *)realloc(*samples, sizeof(float) * (*num_samples + audio->n));
  if (!p) {
    fprintf(stderr, "Failed to allocate memory for output samples\n");
    return 0;
  }

  memcpy(p + *num_samples, audio->samples, sizeof(float) * audio->n);
  *samples = p;
  *num_samples += audio->n;
  return 1;
}

int32_t main() {
  SherpaOnnxOnlineSpeechDenoiserConfig config;
  const char *model_filename = "./dpdfnet_baseline.onnx";
  const char *wav_filename = "./inp_16k.wav";
  const char *out_wave_filename = "./enhanced-online-dpdfnet.wav";
  float *samples = NULL;
  int32_t num_samples = 0;

  memset(&config, 0, sizeof(config));
  config.model.dpdfnet.model = model_filename;

  const SherpaOnnxOnlineSpeechDenoiser *sd =
      SherpaOnnxCreateOnlineSpeechDenoiser(&config);
  if (!sd) {
    fprintf(stderr, "Please check your config\n");
    return -1;
  }

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (!wave) {
    SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  int32_t frame_shift = SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(sd);
  for (int32_t start = 0; start < wave->num_samples; start += frame_shift) {
    int32_t n = frame_shift;
    if (start + n > wave->num_samples) {
      n = wave->num_samples - start;
    }

    const SherpaOnnxDenoisedAudio *audio = SherpaOnnxOnlineSpeechDenoiserRun(
        sd, wave->samples + start, n, wave->sample_rate);
    int32_t ok = AppendSamples(&samples, &num_samples, audio);
    SherpaOnnxDestroyDenoisedAudio(audio);
    if (!ok) {
      free(samples);
      SherpaOnnxFreeWave(wave);
      SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
      return -1;
    }
  }

  const SherpaOnnxDenoisedAudio *tail = SherpaOnnxOnlineSpeechDenoiserFlush(sd);
  int32_t sample_rate = tail ? tail->sample_rate
                             : SherpaOnnxOnlineSpeechDenoiserGetSampleRate(sd);
  int32_t ok = AppendSamples(&samples, &num_samples, tail);
  SherpaOnnxDestroyDenoisedAudio(tail);
  if (!ok) {
    free(samples);
    SherpaOnnxFreeWave(wave);
    SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
    return -1;
  }

  if (num_samples == 0) {
    fprintf(stderr, "No denoised samples were produced\n");
    free(samples);
    SherpaOnnxFreeWave(wave);
    SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
    return -1;
  }

  SherpaOnnxWriteWave(samples, num_samples, sample_rate, out_wave_filename);

  free(samples);
  SherpaOnnxFreeWave(wave);
  SherpaOnnxDestroyOnlineSpeechDenoiser(sd);

  fprintf(stdout, "Saved to %s\n", out_wave_filename);
  return 0;
}


================================================
FILE: c-api-examples/online-speech-enhancement-gtcrn-c-api.c
================================================
// c-api-examples/online-speech-enhancement-gtcrn-c-api.c
//
// Copyright (c)  2026  Xiaomi Corporation
//
// We assume you have pre-downloaded model
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// An example command to download
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
*/
// clang-format on

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t AppendSamples(float **samples, int32_t *num_samples,
                             const SherpaOnnxDenoisedAudio *audio) {
  float *p = NULL;

  if (!audio || audio->n == 0) {
    return 1;
  }

  p = (float *)realloc(*samples, sizeof(float) * (*num_samples + audio->n));
  if (!p) {
    fprintf(stderr, "Failed to allocate memory for output samples\n");
    return 0;
  }

  memcpy(p + *num_samples, audio->samples, sizeof(float) * audio->n);
  *samples = p;
  *num_samples += audio->n;
  return 1;
}

int32_t main() {
  SherpaOnnxOnlineSpeechDenoiserConfig config;
  const char *model_filename = "./gtcrn_simple.onnx";
  const char *wav_filename = "./inp_16k.wav";
  const char *out_wave_filename = "./enhanced-online-gtcrn.wav";
  float *samples = NULL;
  int32_t num_samples = 0;

  memset(&config, 0, sizeof(config));
  config.model.gtcrn.model = model_filename;

  const SherpaOnnxOnlineSpeechDenoiser *sd =
      SherpaOnnxCreateOnlineSpeechDenoiser(&config);
  if (!sd) {
    fprintf(stderr, "Please check your config\n");
    return -1;
  }

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (!wave) {
    SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  int32_t frame_shift = SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(sd);
  for (int32_t start = 0; start < wave->num_samples; start += frame_shift) {
    int32_t n = frame_shift;
    if (start + n > wave->num_samples) {
      n = wave->num_samples - start;
    }

    const SherpaOnnxDenoisedAudio *audio = SherpaOnnxOnlineSpeechDenoiserRun(
        sd, wave->samples + start, n, wave->sample_rate);
    int32_t ok = AppendSamples(&samples, &num_samples, audio);
    SherpaOnnxDestroyDenoisedAudio(audio);
    if (!ok) {
      free(samples);
      SherpaOnnxFreeWave(wave);
      SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
      return -1;
    }
  }

  const SherpaOnnxDenoisedAudio *tail = SherpaOnnxOnlineSpeechDenoiserFlush(sd);
  int32_t sample_rate = tail ? tail->sample_rate
                             : SherpaOnnxOnlineSpeechDenoiserGetSampleRate(sd);
  int32_t ok = AppendSamples(&samples, &num_samples, tail);
  SherpaOnnxDestroyDenoisedAudio(tail);
  if (!ok) {
    free(samples);
    SherpaOnnxFreeWave(wave);
    SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
    return -1;
  }

  if (num_samples == 0) {
    fprintf(stderr, "No denoised samples were produced\n");
    free(samples);
    SherpaOnnxFreeWave(wave);
    SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
    return -1;
  }

  SherpaOnnxWriteWave(samples, num_samples, sample_rate, out_wave_filename);

  free(samples);
  SherpaOnnxFreeWave(wave);
  SherpaOnnxDestroyOnlineSpeechDenoiser(sd);

  fprintf(stdout, "Saved to %s\n", out_wave_filename);
  return 0;
}


================================================
FILE: c-api-examples/paraformer-c-api.c
================================================
// c-api-examples/paraformer-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use non-streaming Paraformer with sherpa-onnx's
// C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2
// tar xvf sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2
// rm sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-paraformer-zh-small-2024-03-09/test_wavs/0.wav";
  const char *model_filename =
      "sherpa-onnx-paraformer-zh-small-2024-03-09/model.int8.onnx";
  const char *tokens_filename =
      "sherpa-onnx-paraformer-zh-small-2024-03-09/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Paraformer config
  SherpaOnnxOfflineParaformerModelConfig paraformer_config;
  memset(&paraformer_config, 0, sizeof(paraformer_config));
  paraformer_config.model = model_filename;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.paraformer = paraformer_config;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/pocket-tts-en-c-api.c
================================================
// c-api-examples/pocket-tts-en-c-api.c
//
// Copyright (c)  2026  Xiaoyingtao Corporation

// This file shows how to use sherpa-onnx C API
// for English TTS with Pocket TTS.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

./pocket-tts-en-c-api

 */
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.pocket.lm_flow =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx";
  config.model.pocket.lm_main =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx";
  config.model.pocket.encoder =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx";
  config.model.pocket.decoder =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx";
  config.model.pocket.text_conditioner =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx";
  config.model.pocket.vocab_json =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json";
  config.model.pocket.token_scores_json =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json";
  // Voice embedding cache capacity (default: 50)
  // Increase this if you have many different reference audios to avoid
  // recomputing voice embeddings
  config.model.pocket.voice_embedding_cache_capacity = 50;

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  const char *filename = "./generated-pocket-en.wav";
  const char *text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar. "
      "Friends fell out often because life was changing so fast. The easiest "
      "thing in the world was to lose touch with someone.";

  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  if (!tts) {
    fprintf(stderr, "Error create Offline TTS\n");
    return -1;
  }
  float speed = 1.0;  // larger -> faster in speech speed
  SherpaOnnxGenerationConfig cfg = {0};
  const char *reference_audio_file =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav";
  const SherpaOnnxWave *wave = NULL;
  wave = SherpaOnnxReadWave(reference_audio_file);
  if (!wave) {
    fprintf(stderr, "Failed to read %s\n", reference_audio_file);
    SherpaOnnxDestroyOfflineTts(tts);
    return -1;
  }
  cfg.reference_audio = wave->samples;
  cfg.reference_audio_len = wave->num_samples;
  cfg.reference_sample_rate = wave->sample_rate;
  // Extra parameters passed as JSON string
  // - max_reference_audio_len: maximum length of reference audio in seconds
  // - seed: random seed for reproducibility (optional, -1 for random)
  cfg.extra = "{\"max_reference_audio_len\": 10.0, \"seed\": 42}";

#if 0
  // If you don't want to use a callback, then please enable this branch
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, NULL, NULL);
#else
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, ProgressCallback,
                                             NULL);
#endif

  if (wave) SherpaOnnxFreeWave(wave);

  fprintf(stderr, "Input text is: %s\n", text);

  if (audio) {
    SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
    fprintf(stderr, "Saved to: %s\n", filename);
    SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  }

  SherpaOnnxDestroyOfflineTts(tts);

  return 0;
}


================================================
FILE: c-api-examples/sense-voice-c-api.c
================================================
// c-api-examples/sense-voice-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use SenseVoice with sherpa-onnx's C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav";
  const char *model_filename =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  const char *tokens_filename =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
  const char *language = "auto";
  const char *provider = "cpu";
  int32_t use_inverse_text_normalization = 1;

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineSenseVoiceModelConfig sense_voice_config;
  memset(&sense_voice_config, 0, sizeof(sense_voice_config));
  sense_voice_config.model = model_filename;
  sense_voice_config.language = language;
  sense_voice_config.use_itn = use_inverse_text_normalization;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.sense_voice = sense_voice_config;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/sense-voice-with-hr-c-api.c
================================================
// c-api-examples/sense-voice-with-hr-c-api.c
//
// Copyright (c)  2024-2025  Xiaomi Corporation

//
// This file demonstrates how to use SenseVoice with sherpa-onnx's C API
// with homophone replacer.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
// tar xf dict.tar.bz2
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename = "./test-hr.wav";
  const char *model_filename =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  const char *tokens_filename =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
  const char *language = "auto";
  const char *provider = "cpu";
  int32_t use_inverse_text_normalization = 1;

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineSenseVoiceModelConfig sense_voice_config;
  memset(&sense_voice_config, 0, sizeof(sense_voice_config));
  sense_voice_config.model = model_filename;
  sense_voice_config.language = language;
  sense_voice_config.use_itn = use_inverse_text_normalization;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.sense_voice = sense_voice_config;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;
  recognizer_config.hr.dict_dir = "./dict";
  recognizer_config.hr.lexicon = "./lexicon.txt";

  // Please see
  // https://colab.research.google.com/drive/1jEaS3s8FbRJIcVQJv2EQx19EM_mnuARi?usp=sharing
  // for how to generate your own replace.fst
  recognizer_config.hr.rule_fsts = "./replace.fst";

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/speaker-identification-c-api.c
================================================
// c-api-examples/speaker-identification-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

// We assume you have pre-downloaded the speaker embedding extractor model
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
//
// An example command to download
// "3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
// is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
//
// clang-format on
//
// Also, please download the test wave files from
//
// https://github.com/csukuangfj/sr-data

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static const float *ComputeEmbedding(
    const SherpaOnnxSpeakerEmbeddingExtractor *ex, const char *wav_filename) {
  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    exit(-1);
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxSpeakerEmbeddingExtractorCreateStream(ex);

  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
                                       wave->num_samples);
  SherpaOnnxOnlineStreamInputFinished(stream);

  if (!SherpaOnnxSpeakerEmbeddingExtractorIsReady(ex, stream)) {
    fprintf(stderr, "The input wave file %s is too short!\n", wav_filename);
    exit(-1);
  }

  // we will free `v` outside of this function
  const float *v =
      SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(ex, stream);

  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxFreeWave(wave);

  // Remember to free v to avoid memory leak
  return v;
}

int32_t main() {
  SherpaOnnxSpeakerEmbeddingExtractorConfig config;

  memset(&config, 0, sizeof(config));

  // please download the model from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
  config.model = "./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx";

  config.num_threads = 1;
  config.debug = 0;
  config.provider = "cpu";

  const SherpaOnnxSpeakerEmbeddingExtractor *ex =
      SherpaOnnxCreateSpeakerEmbeddingExtractor(&config);
  if (!ex) {
    fprintf(stderr, "Failed to create speaker embedding extractor");
    return -1;
  }

  int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(ex);

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      SherpaOnnxCreateSpeakerEmbeddingManager(dim);

  // Please download the test data from
  // https://github.com/csukuangfj/sr-data
  const char *spk1_1 = "./sr-data/enroll/fangjun-sr-1.wav";
  const char *spk1_2 = "./sr-data/enroll/fangjun-sr-2.wav";
  const char *spk1_3 = "./sr-data/enroll/fangjun-sr-3.wav";

  const char *spk2_1 = "./sr-data/enroll/leijun-sr-1.wav";
  const char *spk2_2 = "./sr-data/enroll/leijun-sr-2.wav";

  const float *spk1_vec[4] = {NULL};
  spk1_vec[0] = ComputeEmbedding(ex, spk1_1);
  spk1_vec[1] = ComputeEmbedding(ex, spk1_2);
  spk1_vec[2] = ComputeEmbedding(ex, spk1_3);

  const float *spk2_vec[3] = {NULL};
  spk2_vec[0] = ComputeEmbedding(ex, spk2_1);
  spk2_vec[1] = ComputeEmbedding(ex, spk2_2);

  if (!SherpaOnnxSpeakerEmbeddingManagerAddList(manager, "fangjun", spk1_vec)) {
    fprintf(stderr, "Failed to register fangjun\n");
    exit(-1);
  }

  if (!SherpaOnnxSpeakerEmbeddingManagerContains(manager, "fangjun")) {
    fprintf(stderr, "Failed to find fangjun\n");
    exit(-1);
  }

  if (!SherpaOnnxSpeakerEmbeddingManagerAddList(manager, "leijun", spk2_vec)) {
    fprintf(stderr, "Failed to register leijun\n");
    exit(-1);
  }

  if (!SherpaOnnxSpeakerEmbeddingManagerContains(manager, "leijun")) {
    fprintf(stderr, "Failed to find leijun\n");
    exit(-1);
  }

  if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 2) {
    fprintf(stderr, "There should be two speakers: fangjun and leijun\n");
    exit(-1);
  }

  const char *const *all_speakers =
      SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
  const char *const *p = all_speakers;
  fprintf(stderr, "list of registered speakers\n-----\n");
  while (p[0]) {
    fprintf(stderr, "speaker: %s\n", p[0]);
    ++p;
  }
  fprintf(stderr, "----\n");

  SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speakers);

  const char *test1 = "./sr-data/test/fangjun-test-sr-1.wav";
  const char *test2 = "./sr-data/test/leijun-test-sr-1.wav";
  const char *test3 = "./sr-data/test/liudehua-test-sr-1.wav";

  const float *v1 = ComputeEmbedding(ex, test1);
  const float *v2 = ComputeEmbedding(ex, test2);
  const float *v3 = ComputeEmbedding(ex, test3);

  float threshold = 0.6;

  const char *name1 =
      SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v1, threshold);
  if (name1) {
    fprintf(stderr, "%s: Found %s\n", test1, name1);
    SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name1);
  } else {
    fprintf(stderr, "%s: Not found\n", test1);
  }

  const char *name2 =
      SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v2, threshold);
  if (name2) {
    fprintf(stderr, "%s: Found %s\n", test2, name2);
    SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name2);
  } else {
    fprintf(stderr, "%s: Not found\n", test2);
  }

  const char *name3 =
      SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v3, threshold);
  if (name3) {
    fprintf(stderr, "%s: Found %s\n", test3, name3);
    SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name3);
  } else {
    fprintf(stderr, "%s: Not found\n", test3);
  }

  int32_t ok = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, "fangjun", v1,
                                                       threshold);
  if (ok) {
    fprintf(stderr, "%s matches fangjun\n", test1);
  } else {
    fprintf(stderr, "%s does NOT match fangjun\n", test1);
  }

  ok = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, "fangjun", v2,
                                               threshold);
  if (ok) {
    fprintf(stderr, "%s matches fangjun\n", test2);
  } else {
    fprintf(stderr, "%s does NOT match fangjun\n", test2);
  }

  fprintf(stderr, "Removing fangjun\n");
  if (!SherpaOnnxSpeakerEmbeddingManagerRemove(manager, "fangjun")) {
    fprintf(stderr, "Failed to remove fangjun\n");
    exit(-1);
  }

  if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 1) {
    fprintf(stderr, "There should be only 1 speaker left\n");
    exit(-1);
  }

  name1 = SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v1, threshold);
  if (name1) {
    fprintf(stderr, "%s: Found %s\n", test1, name1);
    SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name1);
  } else {
    fprintf(stderr, "%s: Not found\n", test1);
  }

  fprintf(stderr, "Removing leijun\n");
  if (!SherpaOnnxSpeakerEmbeddingManagerRemove(manager, "leijun")) {
    fprintf(stderr, "Failed to remove leijun\n");
    exit(-1);
  }

  if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 0) {
    fprintf(stderr, "There should be only 1 speaker left\n");
    exit(-1);
  }

  name2 = SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v2, threshold);
  if (name2) {
    fprintf(stderr, "%s: Found %s\n", test2, name2);
    SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name2);
  } else {
    fprintf(stderr, "%s: Not found\n", test2);
  }

  all_speakers = SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);

  p = all_speakers;
  fprintf(stderr, "list of registered speakers\n-----\n");
  while (p[0]) {
    fprintf(stderr, "speaker: %s\n", p[0]);
    ++p;
  }
  fprintf(stderr, "----\n");

  SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speakers);
  SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v1);
  SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v2);
  SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v3);

  SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[0]);
  SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[1]);
  SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[2]);

  SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk2_vec[0]);
  SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk2_vec[1]);

  SherpaOnnxDestroySpeakerEmbeddingManager(manager);
  SherpaOnnxDestroySpeakerEmbeddingExtractor(ex);

  return 0;
}


================================================
FILE: c-api-examples/speech-enhancement-dpdfnet-c-api.c
================================================
// c-api-examples/speech-enhancement-dpdfnet-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation
//
// We assume you have pre-downloaded model
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// or
// https://huggingface.co/Ceva-IP/DPDFNet
//
//
// An example command to download
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet2.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet4.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet8.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet2_48khz_hr.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
*/
// clang-format on
//
// Use dpdfnet_baseline.onnx, dpdfnet2.onnx, dpdfnet4.onnx, or dpdfnet8.onnx
// for 16 kHz downstream ASR or speech recognition.
// Use dpdfnet2_48khz_hr.onnx for 48 kHz enhancement output.
#include <stdio.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  SherpaOnnxOfflineSpeechDenoiserConfig config;
  const char *model_filename = "./dpdfnet_baseline.onnx";
  const char *wav_filename = "./inp_16k.wav";
  const char *out_wave_filename = "./enhanced.wav";

  memset(&config, 0, sizeof(config));
  config.model.dpdfnet.model = model_filename;

  const SherpaOnnxOfflineSpeechDenoiser *sd =
      SherpaOnnxCreateOfflineSpeechDenoiser(&config);
  if (!sd) {
    fprintf(stderr, "Please check your config");
    return -1;
  }

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  const SherpaOnnxDenoisedAudio *denoised = SherpaOnnxOfflineSpeechDenoiserRun(
      sd, wave->samples, wave->num_samples, wave->sample_rate);

  SherpaOnnxWriteWave(denoised->samples, denoised->n, denoised->sample_rate,
                      out_wave_filename);

  SherpaOnnxDestroyDenoisedAudio(denoised);
  SherpaOnnxFreeWave(wave);
  SherpaOnnxDestroyOfflineSpeechDenoiser(sd);

  fprintf(stdout, "Saved to %s\n", out_wave_filename);
}


================================================
FILE: c-api-examples/speech-enhancement-gtcrn-c-api.c
================================================
// c-api-examples/speech-enhancement-gtcrn-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation
//
// We assume you have pre-downloaded model
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
//
// An example command to download
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
*/
// clang-format on
#include <stdio.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  SherpaOnnxOfflineSpeechDenoiserConfig config;
  const char *model_filename = "./gtcrn_simple.onnx";
  const char *wav_filename = "./inp_16k.wav";
  const char *out_wave_filename = "./enhanced.wav";

  memset(&config, 0, sizeof(config));
  config.model.gtcrn.model = model_filename;

  const SherpaOnnxOfflineSpeechDenoiser *sd =
      SherpaOnnxCreateOfflineSpeechDenoiser(&config);
  if (!sd) {
    fprintf(stderr, "Please check your config");
    return -1;
  }

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  const SherpaOnnxDenoisedAudio *denoised = SherpaOnnxOfflineSpeechDenoiserRun(
      sd, wave->samples, wave->num_samples, wave->sample_rate);

  SherpaOnnxWriteWave(denoised->samples, denoised->n, denoised->sample_rate,
                      out_wave_filename);

  SherpaOnnxDestroyDenoisedAudio(denoised);
  SherpaOnnxFreeWave(wave);
  SherpaOnnxDestroyOfflineSpeechDenoiser(sd);

  fprintf(stdout, "Saved to %s\n", out_wave_filename);
}


================================================
FILE: c-api-examples/spoken-language-identification-c-api.c
================================================
// c-api-examples/spoken-language-identification-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

// We assume you have pre-downloaded the whisper multi-lingual models
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// An example command to download the "tiny" whisper model is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
// rm sherpa-onnx-whisper-tiny.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  SherpaOnnxSpokenLanguageIdentificationConfig config;

  memset(&config, 0, sizeof(config));

  config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
  config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
  config.num_threads = 1;
  config.debug = 1;
  config.provider = "cpu";

  const SherpaOnnxSpokenLanguageIdentification *slid =
      SherpaOnnxCreateSpokenLanguageIdentification(&config);
  if (!slid) {
    fprintf(stderr, "Failed to create spoken language identifier");
    return -1;
  }

  // You can find more test waves from
  // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs
  const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  SherpaOnnxOfflineStream *stream =
      SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);

  const SherpaOnnxSpokenLanguageIdentificationResult *result =
      SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);

  fprintf(stderr, "wav_filename: %s\n", wav_filename);
  fprintf(stderr, "Detected language: %s\n", result->lang);

  SherpaOnnxDestroySpokenLanguageIdentificationResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxFreeWave(wave);
  SherpaOnnxDestroySpokenLanguageIdentification(slid);

  return 0;
}


================================================
FILE: c-api-examples/streaming-ctc-buffered-tokens-c-api.c
================================================
// c-api-examples/streaming-ctc-buffered-tokens-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation
// Copyright (c)  2024  Luo Xiao

//
// This file demonstrates how to use streaming Zipformer2 Ctc with sherpa-onnx's
// C API and with tokens loaded from buffered strings instead of
// from external files API.
// clang-format off
// 
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
// tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
// rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static size_t ReadFile(const char *filename, const char **buffer_out) {
  FILE *file = fopen(filename, "r");
  if (file == NULL) {
    fprintf(stderr, "Failed to open %s\n", filename);
    return -1;
  }
  fseek(file, 0L, SEEK_END);
  long size = ftell(file);
  rewind(file);
  *buffer_out = malloc(size);
  if (*buffer_out == NULL) {
    fclose(file);
    fprintf(stderr, "Memory error\n");
    return -1;
  }
  size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
  if (read_bytes != size) {
    printf("Errors occurred in reading the file %s\n", filename);
    free((void *)*buffer_out);
    *buffer_out = NULL;
    fclose(file);
    return -1;
  }
  fclose(file);
  return read_bytes;
}

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/"
      "DEV_T0000000000.wav";
  const char *model_filename =
      "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/"
      "ctc-epoch-20-avg-1-chunk-16-left-128.onnx";
  const char *tokens_filename =
      "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // reading tokens to buffers
  const char *tokens_buf;
  size_t token_buf_size = ReadFile(tokens_filename, &tokens_buf);
  if (token_buf_size < 1) {
    fprintf(stderr, "Please check your tokens.txt!\n");
    free((void *)tokens_buf);
    return -1;
  }

  // Zipformer2Ctc config
  SherpaOnnxOnlineZipformer2CtcModelConfig zipformer2_ctc_config;
  memset(&zipformer2_ctc_config, 0, sizeof(zipformer2_ctc_config));
  zipformer2_ctc_config.model = model_filename;

  // Online model config
  SherpaOnnxOnlineModelConfig online_model_config;
  memset(&online_model_config, 0, sizeof(online_model_config));
  online_model_config.debug = 1;
  online_model_config.num_threads = 1;
  online_model_config.provider = provider;
  online_model_config.tokens_buf = tokens_buf;
  online_model_config.tokens_buf_size = token_buf_size;
  online_model_config.zipformer2_ctc = zipformer2_ctc_config;

  // Recognizer config
  SherpaOnnxOnlineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = online_model_config;

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&recognizer_config);

  free((void *)tokens_buf);
  tokens_buf = NULL;

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/streaming-hlg-decode-file-c-api.c
================================================
// c-api-examples/streaming-hlg-decode-file-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation
/*
We use the following model as an example

// clang-format off

Download the model from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2

tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2

build/bin/streaming-hlg-decode-file-c-api

(The above model is from https://github.com/k2-fsa/icefall/pull/1557)
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  // clang-format off
  //
  // Please download the model from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  const char *model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
  const char *tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
  const char *graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
  const char *wav_filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
  // clang-format on

  SherpaOnnxOnlineRecognizerConfig config;

  memset(&config, 0, sizeof(config));
  config.feat_config.sample_rate = 16000;
  config.feat_config.feature_dim = 80;
  config.model_config.zipformer2_ctc.model = model;
  config.model_config.tokens = tokens;
  config.model_config.num_threads = 1;
  config.model_config.provider = "cpu";
  config.model_config.debug = 0;
  config.ctc_fst_decoder_config.graph = graph;
  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&config);
  if (!recognizer) {
    fprintf(stderr, "Failed to create recognizer");
    exit(-1);
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    exit(-1);
  }

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/streaming-paraformer-buffered-tokens-c-api.c
================================================
// c-api-examples/streaming-paraformer-buffered-tokens-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation
// Copyright (c)  2024  Luo Xiao

//
// This file demonstrates how to use streaming Paraformer with sherpa-onnx's C
// API and with tokens loaded from buffered strings instead of from
// external files API.
// clang-format off
// 
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
// tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
// rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static size_t ReadFile(const char *filename, const char **buffer_out) {
  FILE *file = fopen(filename, "r");
  if (file == NULL) {
    fprintf(stderr, "Failed to open %s\n", filename);
    return -1;
  }
  fseek(file, 0L, SEEK_END);
  long size = ftell(file);
  rewind(file);
  *buffer_out = malloc(size);
  if (*buffer_out == NULL) {
    fclose(file);
    fprintf(stderr, "Memory error\n");
    return -1;
  }
  size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
  if (read_bytes != size) {
    printf("Errors occurred in reading the file %s\n", filename);
    free((void *)*buffer_out);
    *buffer_out = NULL;
    fclose(file);
    return -1;
  }
  fclose(file);
  return read_bytes;
}

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav";
  const char *encoder_filename =
      "sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx";
  const char *decoder_filename =
      "sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx";
  const char *tokens_filename =
      "sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // reading tokens to buffers
  const char *tokens_buf;
  size_t token_buf_size = ReadFile(tokens_filename, &tokens_buf);
  if (token_buf_size < 1) {
    fprintf(stderr, "Please check your tokens.txt!\n");
    free((void *)tokens_buf);
    return -1;
  }

  // Paraformer config
  SherpaOnnxOnlineParaformerModelConfig paraformer_config;
  memset(&paraformer_config, 0, sizeof(paraformer_config));
  paraformer_config.encoder = encoder_filename;
  paraformer_config.decoder = decoder_filename;

  // Online model config
  SherpaOnnxOnlineModelConfig online_model_config;
  memset(&online_model_config, 0, sizeof(online_model_config));
  online_model_config.debug = 1;
  online_model_config.num_threads = 1;
  online_model_config.provider = provider;
  online_model_config.tokens_buf = tokens_buf;
  online_model_config.tokens_buf_size = token_buf_size;
  online_model_config.paraformer = paraformer_config;

  // Recognizer config
  SherpaOnnxOnlineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = online_model_config;

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&recognizer_config);

  free((void *)tokens_buf);
  tokens_buf = NULL;

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/streaming-paraformer-c-api.c
================================================
// c-api-examples/streaming-paraformer-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use streaming Paraformer with sherpa-onnx's C
// API.
// clang-format off
// 
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
// tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
// rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav";
  const char *encoder_filename =
      "sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx";
  const char *decoder_filename =
      "sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx";
  const char *tokens_filename =
      "sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Paraformer config
  SherpaOnnxOnlineParaformerModelConfig paraformer_config;
  memset(&paraformer_config, 0, sizeof(paraformer_config));
  paraformer_config.encoder = encoder_filename;
  paraformer_config.decoder = decoder_filename;

  // Online model config
  SherpaOnnxOnlineModelConfig online_model_config;
  memset(&online_model_config, 0, sizeof(online_model_config));
  online_model_config.debug = 1;
  online_model_config.num_threads = 1;
  online_model_config.provider = provider;
  online_model_config.tokens = tokens_filename;
  online_model_config.paraformer = paraformer_config;

  // Recognizer config
  SherpaOnnxOnlineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = online_model_config;

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/streaming-t-one-ctc-c-api.c
================================================
// c-api-examples/streaming-t-one-ctc-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use streaming T-one with sherpa-onnx's C
// API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
// tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
// rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav";
  const char *model =
      "sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx";
  const char *tokens =
      "sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Zipformer config
  SherpaOnnxOnlineToneCtcModelConfig t_one_ctc;
  memset(&t_one_ctc, 0, sizeof(t_one_ctc));
  t_one_ctc.model = model;

  // Online model config
  SherpaOnnxOnlineModelConfig online_model_config;
  memset(&online_model_config, 0, sizeof(online_model_config));
  online_model_config.debug = 1;
  online_model_config.num_threads = 1;
  online_model_config.provider = provider;
  online_model_config.tokens = tokens;
  online_model_config.t_one_ctc = t_one_ctc;

  // Recognizer config
  SherpaOnnxOnlineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = online_model_config;

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  float left_paddings[2400] = {0};  // 0.3 seconds at 8 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, left_paddings,
                                       2400);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.6 seconds at 8 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  SherpaOnnxFreeWave(wave);

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c
================================================
// c-api-examples/streaming-zipformer-buffered-tokens-hotwords-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation
// Copyright (c)  2024  Luo Xiao

//
// This file demonstrates how to use streaming Zipformer with sherpa-onnx's C
// API and with tokens and hotwords loaded from buffered strings instead of from
// external files API.
// clang-format off
// 
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
// tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
// rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static size_t ReadFile(const char *filename, const char **buffer_out) {
  FILE *file = fopen(filename, "r");
  if (file == NULL) {
    fprintf(stderr, "Failed to open %s\n", filename);
    return -1;
  }
  fseek(file, 0L, SEEK_END);
  long size = ftell(file);
  rewind(file);
  *buffer_out = malloc(size);
  if (*buffer_out == NULL) {
    fclose(file);
    fprintf(stderr, "Memory error\n");
    return -1;
  }
  size_t read_bytes = fread((void *)*buffer_out, 1, size, file);
  if (read_bytes != size) {
    printf("Errors occurred in reading the file %s\n", filename);
    free((void *)*buffer_out);
    *buffer_out = NULL;
    fclose(file);
    return -1;
  }
  fclose(file);
  return read_bytes;
}

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav";
  const char *encoder_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/"
      "encoder-epoch-99-avg-1.onnx";
  const char *decoder_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/"
      "decoder-epoch-99-avg-1.onnx";
  const char *joiner_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/"
      "joiner-epoch-99-avg-1.onnx";
  const char *provider = "cpu";
  const char *modeling_unit = "bpe";
  const char *tokens_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/tokens.txt";
  const char *hotwords_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/hotwords.txt";
  const char *bpe_vocab =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/"
      "bpe.vocab";
  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // reading tokens and hotwords to buffers
  const char *tokens_buf;
  size_t token_buf_size = ReadFile(tokens_filename, &tokens_buf);
  if (token_buf_size < 1) {
    fprintf(stderr, "Please check your tokens.txt!\n");
    free((void *)tokens_buf);
    return -1;
  }
  const char *hotwords_buf;
  size_t hotwords_buf_size = ReadFile(hotwords_filename, &hotwords_buf);
  if (hotwords_buf_size < 1) {
    fprintf(stderr, "Please check your hotwords.txt!\n");
    free((void *)hotwords_buf);
    return -1;
  }

  // Zipformer config
  SherpaOnnxOnlineTransducerModelConfig zipformer_config;
  memset(&zipformer_config, 0, sizeof(zipformer_config));
  zipformer_config.encoder = encoder_filename;
  zipformer_config.decoder = decoder_filename;
  zipformer_config.joiner = joiner_filename;

  // Online model config
  SherpaOnnxOnlineModelConfig online_model_config;
  memset(&online_model_config, 0, sizeof(online_model_config));
  online_model_config.debug = 1;
  online_model_config.num_threads = 1;
  online_model_config.provider = provider;
  online_model_config.tokens_buf = tokens_buf;
  online_model_config.tokens_buf_size = token_buf_size;
  online_model_config.transducer = zipformer_config;

  // Recognizer config
  SherpaOnnxOnlineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "modified_beam_search";
  recognizer_config.model_config = online_model_config;
  recognizer_config.hotwords_buf = hotwords_buf;
  recognizer_config.hotwords_buf_size = hotwords_buf_size;

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&recognizer_config);

  free((void *)tokens_buf);
  tokens_buf = NULL;
  free((void *)hotwords_buf);
  hotwords_buf = NULL;

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/streaming-zipformer-c-api.c
================================================
// c-api-examples/streaming-zipformer-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use streaming Zipformer with sherpa-onnx's C
// API.
// clang-format off
// 
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
// tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
// rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav";
  const char *encoder_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/"
      "encoder-epoch-99-avg-1.onnx";
  const char *decoder_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/"
      "decoder-epoch-99-avg-1.onnx";
  const char *joiner_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/"
      "joiner-epoch-99-avg-1.onnx";
  const char *tokens_filename =
      "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Zipformer config
  SherpaOnnxOnlineTransducerModelConfig zipformer_config;
  memset(&zipformer_config, 0, sizeof(zipformer_config));
  zipformer_config.encoder = encoder_filename;
  zipformer_config.decoder = decoder_filename;
  zipformer_config.joiner = joiner_filename;

  // Online model config
  SherpaOnnxOnlineModelConfig online_model_config;
  memset(&online_model_config, 0, sizeof(online_model_config));
  online_model_config.debug = 1;
  online_model_config.num_threads = 1;
  online_model_config.provider = provider;
  online_model_config.tokens = tokens_filename;
  online_model_config.transducer = zipformer_config;

  // Recognizer config
  SherpaOnnxOnlineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = online_model_config;
  recognizer_config.enable_endpoint = 1;

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/streaming-zipformer-with-hr-c-api.c
================================================
// c-api-examples/streaming-zipformer-with-hr-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use streaming Zipformer with sherpa-onnx's C
// API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
// tar xf dict.tar.bz2
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename = "test-hr.wav";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Online model config
  SherpaOnnxOnlineModelConfig online_model_config;
  memset(&online_model_config, 0, sizeof(online_model_config));
  online_model_config.debug = 0;
  online_model_config.num_threads = 1;
  online_model_config.provider = "cpu";
  online_model_config.tokens =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";

  online_model_config.transducer.encoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "encoder-epoch-99-avg-1.int8.onnx";

  // Note: We recommend not using int8.onnx for the decoder.
  online_model_config.transducer.decoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "decoder-epoch-99-avg-1.onnx";

  online_model_config.transducer.joiner =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "joiner-epoch-99-avg-1.int8.onnx";

  online_model_config.tokens =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";

  online_model_config.num_threads = 1;

  // Recognizer config
  SherpaOnnxOnlineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = online_model_config;

  recognizer_config.hr.dict_dir = "./dict";
  recognizer_config.hr.lexicon = "./lexicon.txt";

  // Please see
  // https://colab.research.google.com/drive/1jEaS3s8FbRJIcVQJv2EQx19EM_mnuARi?usp=sharing
  // for how to generate your own replace.fst
  recognizer_config.hr.rule_fsts = "./replace.fst";

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

// simulate streaming. You can choose an arbitrary N
#define N 3200

  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
          wave->sample_rate, wave->num_samples,
          (float)wave->num_samples / wave->sample_rate);

  int32_t k = 0;
  while (k < wave->num_samples) {
    int32_t start = k;
    int32_t end =
        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
    k += N;

    SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
                                         wave->samples + start, end - start);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);

    if (strlen(r->text)) {
      SherpaOnnxPrint(display, segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
                                       4800);

  SherpaOnnxFreeWave(wave);

  SherpaOnnxOnlineStreamInputFinished(stream);
  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);

  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: c-api-examples/supertonic-tts-en-c-api.c
================================================
// c-api-examples/supertonic-tts-en-c-api.c
//
// Copyright (c)  2026  zengyw

// This file shows how to use sherpa-onnx C API
// for English TTS with Supertonic.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

./supertonic-tts-en-c-api

*/
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float* samples, int32_t num_samples,
                                float progress, void* arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char* argv[]) {
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.supertonic.duration_predictor =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/"
      "duration_predictor.int8.onnx";
  config.model.supertonic.text_encoder =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx";
  config.model.supertonic.vector_estimator =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx";
  config.model.supertonic.vocoder =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx";
  config.model.supertonic.tts_json =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json";
  config.model.supertonic.unicode_indexer =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin";
  config.model.supertonic.voice_style =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  const char* filename = "./generated-supertonic-en-c.wav";
  const char* text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar.";

  const SherpaOnnxOfflineTts* tts = SherpaOnnxCreateOfflineTts(&config);
  if (!tts) {
    fprintf(stderr, "Error create Offline TTS\n");
    return -1;
  }

  SherpaOnnxGenerationConfig cfg = {0};
  cfg.sid = 6;
  cfg.num_steps = 5;
  cfg.speed = 1.25f;  // larger -> faster
  cfg.extra = "{\"lang\": \"en\"}";

  const SherpaOnnxGeneratedAudio* audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, ProgressCallback,
                                             NULL);

  fprintf(stderr, "Input text is: %s\n", text);

  if (audio) {
    SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
    fprintf(stderr, "Saved to: %s\n", filename);
    SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  }

  SherpaOnnxDestroyOfflineTts(tts);

  return 0;
}


================================================
FILE: c-api-examples/telespeech-c-api.c
================================================
// c-api-examples/telespeech-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use TeleSpeech-ASR CTC model with sherpa-onnx's
// C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
// tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
// rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav";
  const char *model_filename =
      "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx";
  const char *tokens_filename =
      "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.telespeech_ctc = model_filename;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/vad-moonshine-c-api.c
================================================
// c-api-examples/vad-moonshine-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use VAD + Moonshine with sherpa-onnx's C API.
// clang-format off
//
// To use silero-vad:
//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// To use ten-vad:
//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename = "./Obama.wav";
  if (!SherpaOnnxFileExists(wav_filename)) {
    fprintf(stderr, "Please download %s\n", wav_filename);
    return -1;
  }

  const char *vad_filename;
  int32_t use_silero_vad = 0;
  int32_t use_ten_vad = 0;

  if (SherpaOnnxFileExists("./silero_vad.onnx")) {
    printf("Use silero-vad\n");
    vad_filename = "./silero_vad.onnx";
    use_silero_vad = 1;
  } else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
    printf("Use ten-vad\n");
    vad_filename = "./ten-vad.onnx";
    use_ten_vad = 1;
  } else {
    fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
    return -1;
  }

  const char *preprocessor =
      "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
  const char *encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
  const char *uncached_decoder =
      "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
  const char *cached_decoder =
      "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
  const char *tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  if (wave->sample_rate != 16000) {
    fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n",
            wave->sample_rate);
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 0;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = "cpu";
  offline_model_config.tokens = tokens;
  offline_model_config.moonshine.preprocessor = preprocessor;
  offline_model_config.moonshine.encoder = encoder;
  offline_model_config.moonshine.uncached_decoder = uncached_decoder;
  offline_model_config.moonshine.cached_decoder = cached_decoder;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your recognizer config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  SherpaOnnxVadModelConfig vadConfig;
  memset(&vadConfig, 0, sizeof(vadConfig));
  if (use_silero_vad) {
    vadConfig.silero_vad.model = vad_filename;
    vadConfig.silero_vad.threshold = 0.25;
    vadConfig.silero_vad.min_silence_duration = 0.5;
    vadConfig.silero_vad.min_speech_duration = 0.5;
    vadConfig.silero_vad.max_speech_duration = 10;
    vadConfig.silero_vad.window_size = 512;
  } else if (use_ten_vad) {
    vadConfig.ten_vad.model = vad_filename;
    vadConfig.ten_vad.threshold = 0.25;
    vadConfig.ten_vad.min_silence_duration = 0.5;
    vadConfig.ten_vad.min_speech_duration = 0.5;
    vadConfig.ten_vad.max_speech_duration = 10;
    vadConfig.ten_vad.window_size = 256;
  }

  vadConfig.sample_rate = 16000;
  vadConfig.num_threads = 1;
  vadConfig.debug = 1;

  const SherpaOnnxVoiceActivityDetector *vad =
      SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);

  if (vad == NULL) {
    fprintf(stderr, "Please check your recognizer config!\n");
    SherpaOnnxFreeWave(wave);
    SherpaOnnxDestroyOfflineRecognizer(recognizer);
    return -1;
  }

  int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
                                       : vadConfig.ten_vad.window_size;

  int32_t i = 0;
  int is_eof = 0;

  while (!is_eof) {
    if (i + window_size < wave->num_samples) {
      SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
                                                    window_size);
    } else {
      SherpaOnnxVoiceActivityDetectorFlush(vad);
      is_eof = 1;
    }
    while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
      const SherpaOnnxSpeechSegment *segment =
          SherpaOnnxVoiceActivityDetectorFront(vad);

      const SherpaOnnxOfflineStream *stream =
          SherpaOnnxCreateOfflineStream(recognizer);

      SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
                                      segment->samples, segment->n);

      SherpaOnnxDecodeOfflineStream(recognizer, stream);

      const SherpaOnnxOfflineRecognizerResult *result =
          SherpaOnnxGetOfflineStreamResult(stream);

      float start = segment->start / 16000.0f;
      float duration = segment->n / 16000.0f;
      float stop = start + duration;

      fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);

      SherpaOnnxDestroyOfflineRecognizerResult(result);
      SherpaOnnxDestroyOfflineStream(stream);

      SherpaOnnxDestroySpeechSegment(segment);
      SherpaOnnxVoiceActivityDetectorPop(vad);
    }
    i += window_size;
  }

  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxDestroyVoiceActivityDetector(vad);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/vad-sense-voice-c-api.c
================================================
// c-api-examples/vad-sense-voice-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use VAD + SenseVoice with sherpa-onnx's C API.
// clang-format off
//
// To use silero-vad:
//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// To use ten-vad:
//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename = "./lei-jun-test.wav";
  if (!SherpaOnnxFileExists(wav_filename)) {
    fprintf(stderr, "Please download %s\n", wav_filename);
    return -1;
  }

  const char *vad_filename;
  int32_t use_silero_vad = 0;
  int32_t use_ten_vad = 0;

  if (SherpaOnnxFileExists("./silero_vad.onnx")) {
    printf("Use silero-vad\n");
    vad_filename = "./silero_vad.onnx";
    use_silero_vad = 1;
  } else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
    printf("Use ten-vad\n");
    vad_filename = "./ten-vad.onnx";
    use_ten_vad = 1;
  } else {
    fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
    return -1;
  }

  const char *model_filename =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  const char *tokens_filename =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
  const char *language = "auto";
  const char *provider = "cpu";
  int32_t use_inverse_text_normalization = 1;

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  if (wave->sample_rate != 16000) {
    fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n",
            wave->sample_rate);
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  SherpaOnnxOfflineSenseVoiceModelConfig sense_voice_config;
  memset(&sense_voice_config, 0, sizeof(sense_voice_config));
  sense_voice_config.model = model_filename;
  sense_voice_config.language = language;
  sense_voice_config.use_itn = use_inverse_text_normalization;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 0;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.sense_voice = sense_voice_config;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your recognizer config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  SherpaOnnxVadModelConfig vadConfig;
  memset(&vadConfig, 0, sizeof(vadConfig));

  if (use_silero_vad) {
    vadConfig.silero_vad.model = vad_filename;
    vadConfig.silero_vad.threshold = 0.25;
    vadConfig.silero_vad.min_silence_duration = 0.5;
    vadConfig.silero_vad.min_speech_duration = 0.5;
    vadConfig.silero_vad.max_speech_duration = 10;
    vadConfig.silero_vad.window_size = 512;
  } else if (use_ten_vad) {
    vadConfig.ten_vad.model = vad_filename;
    vadConfig.ten_vad.threshold = 0.25;
    vadConfig.ten_vad.min_silence_duration = 0.5;
    vadConfig.ten_vad.min_speech_duration = 0.5;
    vadConfig.ten_vad.max_speech_duration = 10;
    vadConfig.ten_vad.window_size = 256;
  }

  vadConfig.sample_rate = 16000;
  vadConfig.num_threads = 1;
  vadConfig.debug = 1;

  const SherpaOnnxVoiceActivityDetector *vad =
      SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);

  if (vad == NULL) {
    fprintf(stderr, "Please check your recognizer config!\n");
    SherpaOnnxFreeWave(wave);
    SherpaOnnxDestroyOfflineRecognizer(recognizer);
    return -1;
  }

  int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
                                       : vadConfig.ten_vad.window_size;
  int32_t i = 0;
  int is_eof = 0;

  while (!is_eof) {
    if (i + window_size < wave->num_samples) {
      SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
                                                    window_size);
    } else {
      SherpaOnnxVoiceActivityDetectorFlush(vad);
      is_eof = 1;
    }

    while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
      const SherpaOnnxSpeechSegment *segment =
          SherpaOnnxVoiceActivityDetectorFront(vad);

      const SherpaOnnxOfflineStream *stream =
          SherpaOnnxCreateOfflineStream(recognizer);

      SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
                                      segment->samples, segment->n);

      SherpaOnnxDecodeOfflineStream(recognizer, stream);

      const SherpaOnnxOfflineRecognizerResult *result =
          SherpaOnnxGetOfflineStreamResult(stream);

      float start = segment->start / 16000.0f;
      float duration = segment->n / 16000.0f;
      float stop = start + duration;

      fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);

      SherpaOnnxDestroyOfflineRecognizerResult(result);
      SherpaOnnxDestroyOfflineStream(stream);

      SherpaOnnxDestroySpeechSegment(segment);
      SherpaOnnxVoiceActivityDetectorPop(vad);
    }
    i += window_size;
  }

  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxDestroyVoiceActivityDetector(vad);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/vad-whisper-c-api.c
================================================
// c-api-examples/vad-whisper-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use VAD + Whisper tiny.en with
// sherpa-onnx's C API.
//
// clang-format off
//
// To use silero-vad:
//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// To use ten-vad:
//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
// rm sherpa-onnx-whisper-tiny.en.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename = "./Obama.wav";

  if (!SherpaOnnxFileExists(wav_filename)) {
    fprintf(stderr, "Please download %s\n", wav_filename);
    return -1;
  }

  const char *vad_filename;
  int32_t use_silero_vad = 0;
  int32_t use_ten_vad = 0;

  if (SherpaOnnxFileExists("./silero_vad.onnx")) {
    printf("Use silero-vad\n");
    vad_filename = "./silero_vad.onnx";
    use_silero_vad = 1;
  } else if (SherpaOnnxFileExists("./ten-vad.onnx")) {
    printf("Use ten-vad\n");
    vad_filename = "./ten-vad.onnx";
    use_ten_vad = 1;
  } else {
    fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
    return -1;
  }

  const char *encoder = "sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
  const char *decoder = "sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
  const char *tokens = "sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  if (wave->sample_rate != 16000) {
    fprintf(stderr, "Expect the sample rate to be 16000. Given: %d\n",
            wave->sample_rate);
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 0;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = "cpu";
  offline_model_config.tokens = tokens;
  offline_model_config.whisper.encoder = encoder;
  offline_model_config.whisper.decoder = decoder;
  offline_model_config.whisper.language = "en";
  offline_model_config.whisper.tail_paddings = 0;
  offline_model_config.whisper.task = "transcribe";

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your recognizer config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  SherpaOnnxVadModelConfig vadConfig;
  memset(&vadConfig, 0, sizeof(vadConfig));

  if (use_silero_vad) {
    vadConfig.silero_vad.model = vad_filename;
    vadConfig.silero_vad.threshold = 0.25;
    vadConfig.silero_vad.min_silence_duration = 0.5;
    vadConfig.silero_vad.min_speech_duration = 0.5;
    vadConfig.silero_vad.max_speech_duration = 10;
    vadConfig.silero_vad.window_size = 512;
  } else if (use_ten_vad) {
    vadConfig.ten_vad.model = vad_filename;
    vadConfig.ten_vad.threshold = 0.25;
    vadConfig.ten_vad.min_silence_duration = 0.5;
    vadConfig.ten_vad.min_speech_duration = 0.5;
    vadConfig.ten_vad.max_speech_duration = 10;
    vadConfig.ten_vad.window_size = 256;
  }

  vadConfig.sample_rate = 16000;
  vadConfig.num_threads = 1;
  vadConfig.debug = 1;

  const SherpaOnnxVoiceActivityDetector *vad =
      SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);

  if (vad == NULL) {
    fprintf(stderr, "Please check your recognizer config!\n");
    SherpaOnnxFreeWave(wave);
    SherpaOnnxDestroyOfflineRecognizer(recognizer);
    return -1;
  }

  int32_t window_size = use_silero_vad ? vadConfig.silero_vad.window_size
                                       : vadConfig.ten_vad.window_size;
  int32_t i = 0;
  int is_eof = 0;

  while (!is_eof) {
    if (i + window_size < wave->num_samples) {
      SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, wave->samples + i,
                                                    window_size);
    } else {
      SherpaOnnxVoiceActivityDetectorFlush(vad);
      is_eof = 1;
    }
    while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
      const SherpaOnnxSpeechSegment *segment =
          SherpaOnnxVoiceActivityDetectorFront(vad);

      const SherpaOnnxOfflineStream *stream =
          SherpaOnnxCreateOfflineStream(recognizer);

      SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
                                      segment->samples, segment->n);

      SherpaOnnxDecodeOfflineStream(recognizer, stream);

      const SherpaOnnxOfflineRecognizerResult *result =
          SherpaOnnxGetOfflineStreamResult(stream);

      float start = segment->start / 16000.0f;
      float duration = segment->n / 16000.0f;
      float stop = start + duration;

      fprintf(stderr, "%.3f -- %.3f: %s\n", start, stop, result->text);

      SherpaOnnxDestroyOfflineRecognizerResult(result);
      SherpaOnnxDestroyOfflineStream(stream);

      SherpaOnnxDestroySpeechSegment(segment);
      SherpaOnnxVoiceActivityDetectorPop(vad);
    }
    i += window_size;
  }

  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxDestroyVoiceActivityDetector(vad);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/wenet-ctc-c-api.c
================================================
// c-api-examples/wenet-ctc-c-api.c
//
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use non-streaming Wenet CTC model with
// sherpa-onnx's C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  // clang-format off
  const char *wav_filename = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";
  const char *model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
  const char *tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
  // clang-format on
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Zipformer config
  SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc_config;
  memset(&wenet_ctc_config, 0, sizeof(wenet_ctc_config));
  wenet_ctc_config.model = model;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens;
  offline_model_config.wenet_ctc = wenet_ctc_config;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/whisper-c-api.c
================================================
// c-api-examples/whisper-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

// We assume you have pre-downloaded the whisper multi-lingual models
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// An example command to download the "tiny" whisper model is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
// rm sherpa-onnx-whisper-tiny.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
  const char *encoder_filename = "sherpa-onnx-whisper-tiny/tiny-encoder.onnx";
  const char *decoder_filename = "sherpa-onnx-whisper-tiny/tiny-decoder.onnx";
  const char *tokens_filename = "sherpa-onnx-whisper-tiny/tiny-tokens.txt";
  const char *language = "en";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Whisper config
  SherpaOnnxOfflineWhisperModelConfig whisper_config;
  memset(&whisper_config, 0, sizeof(whisper_config));
  whisper_config.decoder = decoder_filename;
  whisper_config.encoder = encoder_filename;
  whisper_config.language = language;
  whisper_config.tail_paddings = 0;
  whisper_config.task = "transcribe";

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.whisper = whisper_config;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");

    SherpaOnnxFreeWave(wave);

    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/zipformer-c-api.c
================================================
// c-api-examples/zipformer-c-api.c
//
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use non-streaming Zipformer with sherpa-onnx's
// C API.
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-small-en-2023-06-26.tar.bz2
// tar xvf sherpa-onnx-zipformer-small-en-2023-06-26.tar.bz2
// rm sherpa-onnx-zipformer-small-en-2023-06-26.tar.bz2
//
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

int32_t main() {
  const char *wav_filename =
      "sherpa-onnx-zipformer-small-en-2023-06-26/test_wavs/0.wav";
  const char *encoder_filename =
      "sherpa-onnx-zipformer-small-en-2023-06-26/encoder-epoch-99-avg-1.onnx";
  const char *decoder_filename =
      "sherpa-onnx-zipformer-small-en-2023-06-26/decoder-epoch-99-avg-1.onnx";
  const char *joiner_filename =
      "sherpa-onnx-zipformer-small-en-2023-06-26/joiner-epoch-99-avg-1.onnx";
  const char *tokens_filename =
      "sherpa-onnx-zipformer-small-en-2023-06-26/tokens.txt";
  const char *provider = "cpu";

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  if (wave == NULL) {
    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }

  // Zipformer config
  SherpaOnnxOfflineTransducerModelConfig zipformer_config;
  memset(&zipformer_config, 0, sizeof(zipformer_config));
  zipformer_config.encoder = encoder_filename;
  zipformer_config.decoder = decoder_filename;
  zipformer_config.joiner = joiner_filename;

  // Offline model config
  SherpaOnnxOfflineModelConfig offline_model_config;
  memset(&offline_model_config, 0, sizeof(offline_model_config));
  offline_model_config.debug = 1;
  offline_model_config.num_threads = 1;
  offline_model_config.provider = provider;
  offline_model_config.tokens = tokens_filename;
  offline_model_config.transducer = zipformer_config;

  // Recognizer config
  SherpaOnnxOfflineRecognizerConfig recognizer_config;
  memset(&recognizer_config, 0, sizeof(recognizer_config));
  recognizer_config.decoding_method = "greedy_search";
  recognizer_config.model_config = offline_model_config;

  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&recognizer_config);

  if (recognizer == NULL) {
    fprintf(stderr, "Please check your config!\n");
    SherpaOnnxFreeWave(wave);
    return -1;
  }

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
                                  wave->num_samples);
  SherpaOnnxDecodeOfflineStream(recognizer, stream);
  const SherpaOnnxOfflineRecognizerResult *result =
      SherpaOnnxGetOfflineStreamResult(stream);

  fprintf(stderr, "Decoded text: %s\n", result->text);

  SherpaOnnxDestroyOfflineRecognizerResult(result);
  SherpaOnnxDestroyOfflineStream(stream);
  SherpaOnnxDestroyOfflineRecognizer(recognizer);
  SherpaOnnxFreeWave(wave);

  return 0;
}


================================================
FILE: c-api-examples/zipvoice-tts-zh-en-c-api.c
================================================
// c-api-examples/zipvoice-tts-zh-en-c-api.c
//
// Copyright (c)  2026  Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// for Chinese/English zero-shot TTS with ZipVoice.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

./zipvoice-tts-zh-en-c-api
*/
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.zipvoice.encoder =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx";
  config.model.zipvoice.decoder =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx";
  config.model.zipvoice.data_dir =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data";
  config.model.zipvoice.lexicon =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt";
  config.model.zipvoice.tokens =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt";
  config.model.zipvoice.vocoder = "./vocos_24khz.onnx";

  config.model.num_threads = 2;

  // If you want to see more debug messages, please set it to 1
  config.model.debug = 0;

  const char *filename = "./generated-zipvoice-zh-en-c.wav";
  const char *text =
      "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, "
      "就是全心投入并享受其中.";
  const char *reference_text =
      "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.";
  const char *reference_audio_file =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav";

  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  if (!tts) {
    fprintf(stderr, "Error create Offline TTS\n");
    return -1;
  }

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(reference_audio_file);
  if (!wave) {
    fprintf(stderr, "Failed to read %s\n", reference_audio_file);
    SherpaOnnxDestroyOfflineTts(tts);
    return -1;
  }

  SherpaOnnxGenerationConfig cfg = {0};
  cfg.speed = 1.0f;
  cfg.num_steps = 4;
  cfg.reference_audio = wave->samples;
  cfg.reference_audio_len = wave->num_samples;
  cfg.reference_sample_rate = wave->sample_rate;
  cfg.reference_text = reference_text;
  cfg.extra = "{\"min_char_in_sentence\": 10}";

#if 0
  // If you don't want to use a callback, then please enable this branch
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, NULL, NULL);
#else
  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text, &cfg, ProgressCallback,
                                             NULL);
#endif

  SherpaOnnxFreeWave(wave);

  fprintf(stderr, "Input text is: %s\n", text);

  if (audio) {
    SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
    fprintf(stderr, "Saved to: %s\n", filename);
    SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  }

  SherpaOnnxDestroyOfflineTts(tts);

  return 0;
}


================================================
FILE: cmake/.gitignore
================================================
!*.cmake


================================================
FILE: cmake/__init__.py
================================================


================================================
FILE: cmake/asio.cmake
================================================
function(download_asio)
  include(FetchContent)

  set(asio_URL  "https://github.com/chriskohlhoff/asio/archive/refs/tags/asio-1-24-0.tar.gz")
  set(asio_URL2  "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/asio-asio-1-24-0.tar.gz")
  set(asio_HASH "SHA256=cbcaaba0f66722787b1a7c33afe1befb3a012b5af3ad7da7ff0f6b8c9b7a8a5b")

  # If you don't have access to the Internet,
  # please pre-download asio
  set(possible_file_locations
    $ENV{HOME}/Downloads/asio-asio-1-24-0.tar.gz
    ${CMAKE_SOURCE_DIR}/asio-asio-1-24-0.tar.gz
    ${CMAKE_BINARY_DIR}/asio-asio-1-24-0.tar.gz
    /tmp/asio-asio-1-24-0.tar.gz
    /star-fj/fangjun/download/github/asio-asio-1-24-0.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(asio_URL  "${f}")
      file(TO_CMAKE_PATH "${asio_URL}" asio_URL)
      message(STATUS "Found local downloaded asio: ${asio_URL}")
      set(asio_URL2)
      break()
    endif()
  endforeach()

  FetchContent_Declare(asio
    URL
      ${asio_URL}
      ${asio_URL2}
    URL_HASH          ${asio_HASH}
  )

  FetchContent_GetProperties(asio)
  if(NOT asio_POPULATED)
    message(STATUS "Downloading asio ${asio_URL}")
    FetchContent_Populate(asio)
  endif()
  message(STATUS "asio is downloaded to ${asio_SOURCE_DIR}")
  # add_subdirectory(${asio_SOURCE_DIR} ${asio_BINARY_DIR} EXCLUDE_FROM_ALL)
  include_directories(${asio_SOURCE_DIR}/asio/include)
endfunction()

download_asio()


================================================
FILE: cmake/cargs.cmake
================================================
function(download_cargs)
  include(FetchContent)

  set(cargs_URL "https://github.com/likle/cargs/archive/refs/tags/v1.0.3.tar.gz")
  set(cargs_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/cargs-1.0.3.tar.gz")
  set(cargs_HASH "SHA256=ddba25bd35e9c6c75bc706c126001b8ce8e084d40ef37050e6aa6963e836eb8b")

  # If you don't have access to the Internet,
  # please pre-download cargs
  set(possible_file_locations
    $ENV{HOME}/Downloads/cargs-1.0.3.tar.gz
    ${CMAKE_SOURCE_DIR}/cargs-1.0.3.tar.gz
    ${CMAKE_BINARY_DIR}/cargs-1.0.3.tar.gz
    /tmp/cargs-1.0.3.tar.gz
    /star-fj/fangjun/download/github/cargs-1.0.3.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(cargs_URL  "${f}")
      file(TO_CMAKE_PATH "${cargs_URL}" cargs_URL)
      message(STATUS "Found local downloaded cargs: ${cargs_URL}")
      set(cargs_URL2)
      break()
    endif()
  endforeach()

  FetchContent_Declare(cargs
    URL
      ${cargs_URL}
      ${cargs_URL2}
    URL_HASH
      ${cargs_HASH}
  )

  FetchContent_GetProperties(cargs)
  if(NOT cargs_POPULATED)
    message(STATUS "Downloading cargs ${cargs_URL}")
    FetchContent_Populate(cargs)
  endif()
  message(STATUS "cargs is downloaded to ${cargs_SOURCE_DIR}")
  add_subdirectory(${cargs_SOURCE_DIR} ${cargs_BINARY_DIR} EXCLUDE_FROM_ALL)

  install(TARGETS cargs DESTINATION lib)
  install(FILES ${cargs_SOURCE_DIR}/include/cargs.h
    DESTINATION include
  )
endfunction()

download_cargs()


================================================
FILE: cmake/cmake_extension.py
================================================
# cmake/cmake_extension.py
# Copyright (c)  2023  Xiaomi Corporation
#
# flake8: noqa

import os
import platform
import shlex
import shutil
import subprocess
import sys
from pathlib import Path

import glob
import setuptools
from setuptools.command.build_ext import build_ext


def need_split_package():
    ans = os.environ.get("SHERPA_ONNX_SPLIT_PYTHON_PACKAGE", None)
    return ans is not None


def is_for_pypi():
    ans = os.environ.get("SHERPA_ONNX_IS_FOR_PYPI", None)
    return ans is not None


def is_macos():
    return platform.system() == "Darwin"


def is_windows():
    return platform.system() == "Windows"


def is_linux():
    return platform.system() == "Linux"


def is_arm64():
    return platform.machine() in ["arm64", "aarch64"]


def is_x86():
    return platform.machine() in ["i386", "i686", "x86_64"]


def enable_alsa():
    build_alsa = os.environ.get("SHERPA_ONNX_ENABLE_ALSA", None)
    return build_alsa and is_linux() and (is_arm64() or is_x86())


def get_binaries():
    binaries = [
        "sherpa-onnx",
        "sherpa-onnx-keyword-spotter",
        "sherpa-onnx-microphone",
        "sherpa-onnx-microphone-offline",
        "sherpa-onnx-microphone-offline-audio-tagging",
        "sherpa-onnx-microphone-offline-speaker-identification",
        "sherpa-onnx-offline",
        "sherpa-onnx-offline-audio-tagging",
        "sherpa-onnx-offline-denoiser",
        "sherpa-onnx-offline-language-identification",
        "sherpa-onnx-offline-punctuation",
        "sherpa-onnx-offline-source-separation",
        "sherpa-onnx-offline-speaker-diarization",
        "sherpa-onnx-offline-tts",
        "sherpa-onnx-offline-tts-play",
        "sherpa-onnx-offline-websocket-server",
        "sherpa-onnx-online-denoiser",
        "sherpa-onnx-online-punctuation",
        "sherpa-onnx-online-websocket-client",
        "sherpa-onnx-online-websocket-server",
        "sherpa-onnx-vad",
        "sherpa-onnx-vad-microphone",
        "sherpa-onnx-vad-microphone-offline-asr",
        "sherpa-onnx-vad-microphone-simulated-streaming-asr",
        "sherpa-onnx-vad-with-offline-asr",
        "sherpa-onnx-vad-with-online-asr",
        "sherpa-onnx-version",
        "sherpa-onnx-pa-devs",
    ]

    if enable_alsa():
        binaries += [
            "sherpa-onnx-alsa",
            "sherpa-onnx-alsa-offline",
            "sherpa-onnx-alsa-offline-audio-tagging",
            "sherpa-onnx-alsa-offline-speaker-identification",
            "sherpa-onnx-offline-tts-play-alsa",
            "sherpa-onnx-vad-alsa",
            "sherpa-onnx-vad-alsa-offline-asr",
        ]

    if is_windows():
        binaries += [
            "onnxruntime.dll",
            "sherpa-onnx-c-api.dll",
            "sherpa-onnx-cxx-api.dll",
        ]

    return binaries


try:
    from wheel.bdist_wheel import bdist_wheel as _bdist_wheel

    class bdist_wheel(_bdist_wheel):
        def finalize_options(self):
            _bdist_wheel.finalize_options(self)
            # In this case, the generated wheel has a name in the form
            # sherpa-xxx-pyxx-none-any.whl
            if is_for_pypi() and not is_macos():
                self.root_is_pure = True
            else:
                # The generated wheel has a name ending with
                # -linux_x86_64.whl
                self.root_is_pure = False

except ImportError:
    bdist_wheel = None


def cmake_extension(name, *args, **kwargs) -> setuptools.Extension:
    kwargs["language"] = "c++"
    sources = []
    return setuptools.Extension(name, sources, *args, **kwargs)


class BuildExtension(build_ext):
    def build_extension(self, ext: setuptools.extension.Extension):
        # build/temp.linux-x86_64-3.8
        os.makedirs(self.build_temp, exist_ok=True)

        # build/lib.linux-x86_64-3.8
        os.makedirs(self.build_lib, exist_ok=True)

        out_bin_dir = Path(self.build_lib).resolve().parent / "sherpa_onnx" / "bin"
        install_dir = Path(self.build_lib).resolve() / "sherpa_onnx"

        sherpa_onnx_dir = Path(__file__).parent.parent.resolve()

        cmake_args = os.environ.get("SHERPA_ONNX_CMAKE_ARGS", "")
        make_args = os.environ.get("SHERPA_ONNX_MAKE_ARGS", "")
        system_make_args = os.environ.get("MAKEFLAGS", "")

        if cmake_args == "":
            cmake_args = "-DCMAKE_BUILD_TYPE=Release"

        extra_cmake_args = ""
        if not need_split_package():
            extra_cmake_args += f" -DCMAKE_INSTALL_PREFIX={install_dir} "
        extra_cmake_args += " -DBUILD_SHARED_LIBS=ON "
        extra_cmake_args += " -DBUILD_PIPER_PHONMIZE_EXE=OFF "
        extra_cmake_args += " -DBUILD_PIPER_PHONMIZE_TESTS=OFF "
        extra_cmake_args += " -DBUILD_ESPEAK_NG_EXE=OFF "
        extra_cmake_args += " -DBUILD_ESPEAK_NG_TESTS=OFF "

        if not need_split_package():
            extra_cmake_args += " -DSHERPA_ONNX_ENABLE_C_API=ON "

        extra_cmake_args += " -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF "
        extra_cmake_args += " -DSHERPA_ONNX_ENABLE_CHECK=OFF "
        extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PYTHON=ON "
        extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PORTAUDIO=ON "
        if not need_split_package():
            extra_cmake_args += " -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON "

        if "PYTHON_EXECUTABLE" not in cmake_args:
            print(f"Setting PYTHON_EXECUTABLE to {sys.executable}")
            cmake_args += f" -DPYTHON_EXECUTABLE={sys.executable}"

        # putting `cmake_args` from env variable ${SHERPA_ONNX_CMAKE_ARGS} last,
        # so they can onverride the "defaults" stored in `extra_cmake_args`
        cmake_args = extra_cmake_args + cmake_args

        if is_windows():
            if not need_split_package():
                build_cmd = f"""
             cmake {cmake_args} -B {self.build_temp} -S {sherpa_onnx_dir}
             cmake --build {self.build_temp} --target install --config Release -- -m:2
                """
            else:
                build_cmd = f"""
             cmake {cmake_args} -B {self.build_temp} -S {sherpa_onnx_dir}
             cmake --build {self.build_temp} --target _sherpa_onnx --config Release -- -m:2
                """

            print(f"build command is:\n{build_cmd}")

            cmake_configure_cmd = (
                f'cmake {cmake_args} -B "{self.build_temp}" -S "{sherpa_onnx_dir}"'
            )
            print("cmake_configure_cmd", cmake_configure_cmd)

            ret = subprocess.run(cmake_configure_cmd, shell=True).returncode

            if ret != 0:
                raise Exception("Failed to configure sherpa-onnx")

            if not need_split_package():
                cmake_build_cmd = [
                    "cmake",
                    "--build",
                    str(self.build_temp),
                    "--target",
                    "install",
                    "--config",
                    "Release",
                    "--",
                    "-m:2",
                ]
                print("cmake_build_cmd", cmake_build_cmd)
                ret = subprocess.run(cmake_build_cmd, shell=False).returncode
            else:
                cmake_build_cmd = [
                    "cmake",
                    "--build",
                    str(self.build_temp),
                    "--target",
                    "_sherpa_onnx",
                    "--config",
                    "Release",
                    "--",
                    "-m:2",
                ]
                print("cmake_build_cmd", cmake_build_cmd)
                ret = subprocess.run(cmake_build_cmd, shell=False).returncode

            if ret != 0:
                raise Exception("Failed to build and install sherpa")
        else:
            if make_args == "" and system_make_args == "":
                print("for fast compilation, run:")
                print('export SHERPA_ONNX_MAKE_ARGS="-j"; python setup.py install')
                print('Setting make_args to "-j4"')
                make_args = "-j4"

            if "-G Ninja" in cmake_args:
                if not need_split_package():
                    build_cmd = f"""
                        cd {self.build_temp}
                        cmake {cmake_args} {sherpa_onnx_dir}
                        ninja {make_args} install
                    """
                else:
                    build_cmd = f"""
                        cd {self.build_temp}
                        cmake {cmake_args} {sherpa_onnx_dir}
                        ninja {make_args} _sherpa_onnx
                    """
            else:
                if not need_split_package():
                    build_cmd = f"""
                        cd {self.build_temp}

                        cmake {cmake_args} {sherpa_onnx_dir}

                        make {make_args} install/strip
                    """
                else:
                    build_cmd = f"""
                        cd {self.build_temp}

                        cmake {cmake_args} {sherpa_onnx_dir}

                        make {make_args} _sherpa_onnx
                    """
            print(f"build command is:\n{build_cmd}")

            # Parse cmake_args and make_args into lists for safer execution
            # Use shlex.split() for safer parsing of user-provided arguments
            cmake_args_list = shlex.split(cmake_args)
            make_args_list = shlex.split(make_args) if make_args else []

            # Change to build_temp directory and execute commands
            original_dir = os.getcwd()
            try:
                os.chdir(self.build_temp)

                # Run cmake configuration
                cmake_cmd = ["cmake"] + cmake_args_list + [str(sherpa_onnx_dir)]
                ret = subprocess.run(cmake_cmd, shell=False).returncode
                if ret != 0:
                    raise Exception("Failed to configure sherpa")

                # Run build command
                if "-G Ninja" in cmake_args:
                    if not need_split_package():
                        build_cmd_list = ["ninja"] + make_args_list + ["install"]
                    else:
                        build_cmd_list = ["ninja"] + make_args_list + ["_sherpa_onnx"]
                else:
                    if not need_split_package():
                        build_cmd_list = ["make"] + make_args_list + ["install/strip"]
                    else:
                        build_cmd_list = ["make"] + make_args_list + ["_sherpa_onnx"]

                ret = subprocess.run(build_cmd_list, shell=False).returncode
            finally:
                os.chdir(original_dir)

            if ret != 0:
                raise Exception(
                    "\nBuild sherpa-onnx failed. Please check the error message.\n"
                    "You can ask for help by creating an issue on GitHub.\n"
                    "\nClick:\n\thttps://github.com/k2-fsa/sherpa-onnx/issues/new\n"  # noqa
                )

        if need_split_package():
            dst = os.path.join(f"{self.build_lib}", "sherpa_onnx", "lib")
            os.makedirs(dst, exist_ok=True)
            # Directory listing for debugging - safe with shell=False
            if is_windows():
                # On Windows, use PowerShell's Get-ChildItem or just skip the listing
                # since 'dir' is a shell built-in. For safety, we'll just skip it.
                pass
            else:
                subprocess.run(["ls", "-la", dst], shell=False)

            ext = "pyd" if sys.platform.startswith("win") else "so"
            pattern = os.path.join(self.build_temp, "**", f"_sherpa_onnx.*.{ext}")
            matches = glob.glob(pattern, recursive=True)
            print("matches", list(matches))

            for f in matches:
                print(f, os.path.join(f"{self.build_lib}", "sherpa_onnx", "lib"))
                shutil.copy(f"{f}", dst)
                # Directory listing for debugging - safe with shell=False
                if is_windows():
                    # On Windows, use PowerShell's Get-ChildItem or just skip the listing
                    # since 'dir' is a shell built-in. For safety, we'll just skip it.
                    pass
                else:
                    subprocess.run(["ls", "-la", dst], shell=False)

            return

        suffix = ".exe" if is_windows() else ""
        # Remember to also change setup.py

        binaries = get_binaries()

        for f in binaries:
            suffix = "" if ".dll" in f else suffix
            src_file = install_dir / "bin" / (f + suffix)
            if not src_file.is_file():
                src_file = install_dir / "lib" / (f + suffix)
            if not src_file.is_file():
                src_file = install_dir / ".." / (f + suffix)

            if not src_file.is_file():
                continue

            print(f"Copying {src_file} to {out_bin_dir}/")
            shutil.copy(f"{src_file}", f"{out_bin_dir}/")

        if Path(f"{install_dir}/bin").is_dir():
            shutil.rmtree(f"{install_dir}/bin")
        if Path(f"{install_dir}/share").is_dir():
            shutil.rmtree(f"{install_dir}/share")
        if Path(f"{install_dir}/lib/pkgconfig").is_dir():
            shutil.rmtree(f"{install_dir}/lib/pkgconfig")

        if is_macos():
            os.remove(f"{install_dir}/lib/libonnxruntime.dylib")


================================================
FILE: cmake/eigen.cmake
================================================
function(download_eigen)
  include(FetchContent)

  set(eigen_URL  "https://gitlab.com/libeigen/eigen/-/archive/3.4.1/eigen-3.4.1.tar.gz")
  set(eigen_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/eigen-3.4.1.tar.gz")
  set(eigen_HASH "SHA256=b93c667d1b69265cdb4d9f30ec21f8facbbe8b307cf34c0b9942834c6d4fdbe2")

  # If you don't have access to the Internet,
  # please pre-download eigen
  set(possible_file_locations
    $ENV{HOME}/Downloads/eigen-3.4.1.tar.gz
    ${CMAKE_SOURCE_DIR}/eigen-3.4.1.tar.gz
    ${CMAKE_BINARY_DIR}/eigen-3.4.1.tar.gz
    /tmp/eigen-3.4.1.tar.gz
    /star-fj/fangjun/download/github/eigen-3.4.1.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(eigen_URL  "${f}")
      file(TO_CMAKE_PATH "${eigen_URL}" eigen_URL)
      message(STATUS "Found local downloaded eigen: ${eigen_URL}")
      set(eigen_URL2)
      break()
    endif()
  endforeach()

  set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
  set(EIGEN_BUILD_DOC OFF CACHE BOOL "" FORCE)

  FetchContent_Declare(eigen
    URL               ${eigen_URL} ${eigen_URL2}
    URL_HASH          ${eigen_HASH}
  )

  FetchContent_GetProperties(eigen)
  if(NOT eigen_POPULATED)
    message(STATUS "Downloading eigen from ${eigen_URL}")
    FetchContent_Populate(eigen)
  endif()
  message(STATUS "eigen is downloaded to ${eigen_SOURCE_DIR}")
  message(STATUS "eigen's binary dir is ${eigen_BINARY_DIR}")

  add_subdirectory(${eigen_SOURCE_DIR} ${eigen_BINARY_DIR} EXCLUDE_FROM_ALL)
endfunction()

download_eigen()


================================================
FILE: cmake/espeak-ng-for-piper.cmake
================================================
function(download_espeak_ng_for_piper)
  include(FetchContent)

  set(espeak_ng_URL  "https://github.com/csukuangfj/espeak-ng/archive/f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip")
  set(espeak_ng_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/espeak-ng-f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip")
  set(espeak_ng_HASH "SHA256=70cbf4050e7a014aae19140b05e57249da4720f56128459fbe3a93beaf971ae6")

  set(BUILD_ESPEAK_NG_TESTS OFF CACHE BOOL "" FORCE)
  set(USE_ASYNC OFF CACHE BOOL "" FORCE)
  set(USE_MBROLA OFF CACHE BOOL "" FORCE)
  set(USE_LIBSONIC OFF CACHE BOOL "" FORCE)
  set(USE_LIBPCAUDIO OFF CACHE BOOL "" FORCE)
  set(USE_KLATT OFF CACHE BOOL "" FORCE)
  set(USE_SPEECHPLAYER OFF CACHE BOOL "" FORCE)
  set(EXTRA_cmn ON CACHE BOOL "" FORCE)
  set(EXTRA_ru ON CACHE BOOL "" FORCE)
  if (NOT SHERPA_ONNX_ENABLE_EPSEAK_NG_EXE)
    set(BUILD_ESPEAK_NG_EXE OFF CACHE BOOL "" FORCE)
  endif()

  # If you don't have access to the Internet,
  # please pre-download kaldi-decoder
  set(possible_file_locations
    $ENV{HOME}/Downloads/espeak-ng-f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip
    ${CMAKE_SOURCE_DIR}/espeak-ng-f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip
    ${CMAKE_BINARY_DIR}/espeak-ng-f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip
    /tmp/espeak-ng-f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip
    /star-fj/fangjun/download/github/espeak-ng-f6fed6c58b5e0998b8e68c6610125e2d07d595a7.zip
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(espeak_ng_URL  "${f}")
      file(TO_CMAKE_PATH "${espeak_ng_URL}" espeak_ng_URL)
      message(STATUS "Found local downloaded espeak-ng: ${espeak_ng_URL}")
      set(espeak_ng_URL2 )
      break()
    endif()
  endforeach()

  FetchContent_Declare(espeak_ng
    URL
      ${espeak_ng_URL}
      ${espeak_ng_URL2}
    URL_HASH          ${espeak_ng_HASH}
  )

  FetchContent_GetProperties(espeak_ng)
  if(NOT espeak_ng_POPULATED)
    message(STATUS "Downloading espeak-ng from ${espeak_ng_URL}")
    FetchContent_Populate(espeak_ng)
  endif()
  message(STATUS "espeak-ng is downloaded to ${espeak_ng_SOURCE_DIR}")
  message(STATUS "espeak-ng binary dir is ${espeak_ng_BINARY_DIR}")

  if(BUILD_SHARED_LIBS)
    set(_build_shared_libs_bak ${BUILD_SHARED_LIBS})
    set(BUILD_SHARED_LIBS OFF)
  endif()

  add_subdirectory(${espeak_ng_SOURCE_DIR} ${espeak_ng_BINARY_DIR})

  if(_build_shared_libs_bak)
    set_target_properties(espeak-ng
      PROPERTIES
        POSITION_INDEPENDENT_CODE ON
        C_VISIBILITY_PRESET hidden
        CXX_VISIBILITY_PRESET hidden
    )
    set(BUILD_SHARED_LIBS ON)
  endif()

  set(espeak_ng_SOURCE_DIR ${espeak_ng_SOURCE_DIR} PARENT_SCOPE)

  if(WIN32 AND MSVC)
    target_compile_options(ucd PUBLIC
      /wd4309
    )

    target_compile_options(espeak-ng PUBLIC
      /wd4005
      /wd4018
      /wd4067
      /wd4068
      /wd4090
      /wd4101
      /wd4244
      /wd4267
      /wd4996
    )

    if(TARGET espeak-ng-bin)
      target_compile_options(espeak-ng-bin PRIVATE
        /wd4244
        /wd4024
        /wd4047
        /wd4067
        /wd4267
        /wd4996
      )
    endif()
  endif()

  if(UNIX AND NOT APPLE)
    target_compile_options(espeak-ng PRIVATE
      -Wno-unused-result
      -Wno-format-overflow
      -Wno-format-truncation
      -Wno-uninitialized
      -Wno-format
    )

    if(TARGET espeak-ng-bin)
      target_compile_options(espeak-ng-bin PRIVATE
        -Wno-unused-result
      )
    endif()
  endif()

  target_include_directories(espeak-ng
    INTERFACE
      ${espeak_ng_SOURCE_DIR}/src/include
      ${espeak_ng_SOURCE_DIR}/src/ucd-tools/src/include
  )

  if(NOT BUILD_SHARED_LIBS)
    install(TARGETS
      espeak-ng
      ucd
    DESTINATION lib)
  endif()
endfunction()

download_espeak_ng_for_piper()


================================================
FILE: cmake/googletest.cmake
================================================
function(download_googltest)
  include(FetchContent)

  set(googletest_URL  "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
  set(googletest_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/googletest-1.13.0.tar.gz")
  set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363")

  # If you don't have access to the Internet,
  # please pre-download googletest
  set(possible_file_locations
    $ENV{HOME}/Downloads/googletest-1.13.0.tar.gz
    ${CMAKE_SOURCE_DIR}/googletest-1.13.0.tar.gz
    ${CMAKE_BINARY_DIR}/googletest-1.13.0.tar.gz
    /tmp/googletest-1.13.0.tar.gz
    /star-fj/fangjun/download/github/googletest-1.13.0.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(googletest_URL  "${f}")
      file(TO_CMAKE_PATH "${googletest_URL}" googletest_URL)
      message(STATUS "Found local downloaded googletest: ${googletest_URL}")
      set(googletest_URL2)
      break()
    endif()
  endforeach()

  set(BUILD_GMOCK ON CACHE BOOL "" FORCE)
  set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
  set(gtest_disable_pthreads ON CACHE BOOL "" FORCE)
  set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)

  FetchContent_Declare(googletest
    URL
      ${googletest_URL}
      ${googletest_URL2}
    URL_HASH          ${googletest_HASH}
  )

  FetchContent_GetProperties(googletest)
  if(NOT googletest_POPULATED)
    message(STATUS "Downloading googletest from ${googletest_URL}")
    FetchContent_Populate(googletest)
  endif()
  message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}")
  message(STATUS "googletest's binary dir is ${googletest_BINARY_DIR}")

  if(APPLE)
    set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS
  endif()
  #[==[
  -- Generating done
    Policy CMP0042 is not set: MACOSX_RPATH is enabled by default.  Run "cmake
    --help-policy CMP0042" for policy details.  Use the cmake_policy command to
    set the policy and suppress this warning.

    MACOSX_RPATH is not specified for the following targets:

      gmock
      gmock_main
      gtest
      gtest_main

  This warning is for project developers.  Use -Wno-dev to suppress it.
  ]==]

  add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)

  target_include_directories(gtest
    INTERFACE
      ${googletest_SOURCE_DIR}/googletest/include
      ${googletest_SOURCE_DIR}/googlemock/include
  )
endfunction()

download_googltest()


================================================
FILE: cmake/hclust-cpp.cmake
================================================
function(download_hclust_cpp)
  include(FetchContent)

  # The latest release as of 2026.02.25
  set(hclust_cpp_URL  "https://github.com/csukuangfj/hclust-cpp/archive/refs/tags/2026-02-25.tar.gz")
  set(hclust_cpp_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/hclust-cpp-2026-02-25.tar.gz")
  set(hclust_cpp_HASH "SHA256=8f14e024c709d73afb40ae69cb22de4b73dba67cbce40f2e518813da8139ab56")

  # If you don't have access to the Internet,
  # please pre-download hclust-cpp
  set(possible_file_locations
    $ENV{HOME}/Downloads/hclust-cpp-2026-02-25.tar.gz
    ${CMAKE_SOURCE_DIR}/hclust-cpp-2026-02-25.tar.gz
    ${CMAKE_BINARY_DIR}/hclust-cpp-2026-02-25.tar.gz
    /tmp/hclust-cpp-2026-02-25.tar.gz
    /star-fj/fangjun/download/github/hclust-cpp-2026-02-25.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(hclust_cpp_URL  "${f}")
      file(TO_CMAKE_PATH "${hclust_cpp_URL}" hclust_cpp_URL)
      message(STATUS "Found local downloaded hclust_cpp: ${hclust_cpp_URL}")
      set(hclust_cpp_URL2)
      break()
    endif()
  endforeach()

  FetchContent_Declare(hclust_cpp
    URL
      ${hclust_cpp_URL}
      ${hclust_cpp_URL2}
    URL_HASH          ${hclust_cpp_HASH}
  )

  # hclust-cpp is header-only with no CMakeLists.txt, so we just need the
  # source directory populated. Use FetchContent_MakeAvailable on CMake 3.24+
  # (which handles missing CMakeLists.txt gracefully and avoids the
  # FetchContent_Populate deprecation warning on CMake 3.28+). Fall back to
  # the older FetchContent_Populate pattern on CMake < 3.24.
  if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
    FetchContent_MakeAvailable(hclust_cpp)
  else()
    FetchContent_GetProperties(hclust_cpp)
    if(NOT hclust_cpp_POPULATED)
      message(STATUS "Downloading hclust_cpp from ${hclust_cpp_URL}")
      FetchContent_Populate(hclust_cpp)
    endif()
  endif()

  message(STATUS "hclust_cpp is downloaded to ${hclust_cpp_SOURCE_DIR}")
  message(STATUS "hclust_cpp's binary dir is ${hclust_cpp_BINARY_DIR}")
  include_directories(${hclust_cpp_SOURCE_DIR})
endfunction()

download_hclust_cpp()


================================================
FILE: cmake/json.cmake
================================================
function(download_json)
  include(FetchContent)

  set(json_URL  "https://github.com/nlohmann/json/archive/refs/tags/v3.12.0.tar.gz")
  set(json_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/json-3.12.0.tar.gz")
  set(json_HASH "SHA256=4b92eb0c06d10683f7447ce9406cb97cd4b453be18d7279320f7b2f025c10187")

  # If you don't have access to the Internet,
  # please pre-download json
  set(possible_file_locations
    $ENV{HOME}/Downloads/json-3.12.0.tar.gz
    ${CMAKE_SOURCE_DIR}/json-3.12.0.tar.gz
    ${CMAKE_BINARY_DIR}/json-3.12.0.tar.gz
    /tmp/json-3.12.0.tar.gz
    /star-fj/fangjun/download/github/json-3.12.0.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(json_URL  "${f}")
      file(TO_CMAKE_PATH "${json_URL}" json_URL)
      message(STATUS "Found local downloaded json: ${json_URL}")
      set(json_URL2)
      break()
    endif()
  endforeach()

  FetchContent_Declare(json
    URL               ${json_URL} ${json_URL2}
    URL_HASH          ${json_HASH}
  )

  FetchContent_GetProperties(json)
  if(NOT json_POPULATED)
    message(STATUS "Downloading json from ${json_URL}")
    FetchContent_Populate(json)
  endif()
  message(STATUS "json is downloaded to ${json_SOURCE_DIR}")
  message(STATUS "json's binary dir is ${json_BINARY_DIR}")
  include_directories(${json_SOURCE_DIR}/include)

  add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endfunction()

download_json()


================================================
FILE: cmake/kaldi-decoder.cmake
================================================
function(download_kaldi_decoder)
  include(FetchContent)

  set(kaldi_decoder_URL  "https://github.com/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.11.tar.gz")
  set(kaldi_decoder_HASH "SHA256=85ca462535592541eb5ba6d21843009cf34738f51b28b71f84882a3694b528bf")

  set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
  set(KALDI_DECODER_ENABLE_TESTS OFF CACHE BOOL "" FORCE)
  set(KALDIFST_BUILD_PYTHON OFF CACHE BOOL "" FORCE)

  # If you don't have access to the Internet,
  # please pre-download kaldi-decoder
  set(possible_file_locations
    $ENV{HOME}/Downloads/kaldi-decoder-0.2.11.tar.gz
    ${CMAKE_SOURCE_DIR}/kaldi-decoder-0.2.11.tar.gz
    ${CMAKE_BINARY_DIR}/kaldi-decoder-0.2.11.tar.gz
    /tmp/kaldi-decoder-0.2.11.tar.gz
    /star-fj/fangjun/download/github/kaldi-decoder-0.2.11.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(kaldi_decoder_URL  "${f}")
      file(TO_CMAKE_PATH "${kaldi_decoder_URL}" kaldi_decoder_URL)
      message(STATUS "Found local downloaded kaldi-decoder: ${kaldi_decoder_URL}")
      break()
    endif()
  endforeach()

  FetchContent_Declare(kaldi_decoder
    URL
      ${kaldi_decoder_URL}
    URL_HASH          ${kaldi_decoder_HASH}
  )

  FetchContent_GetProperties(kaldi_decoder)
  if(NOT kaldi_decoder_POPULATED)
    message(STATUS "Downloading kaldi-decoder from ${kaldi_decoder_URL}")
    FetchContent_Populate(kaldi_decoder)
  endif()
  message(STATUS "kaldi-decoder is downloaded to ${kaldi_decoder_SOURCE_DIR}")
  message(STATUS "kaldi-decoder's binary dir is ${kaldi_decoder_BINARY_DIR}")

  include_directories(${kaldi_decoder_SOURCE_DIR})

  if(BUILD_SHARED_LIBS)
    set(_build_shared_libs_bak ${BUILD_SHARED_LIBS})
    set(BUILD_SHARED_LIBS OFF)
  endif()

  add_subdirectory(${kaldi_decoder_SOURCE_DIR} ${kaldi_decoder_BINARY_DIR} EXCLUDE_FROM_ALL)

  if(_build_shared_libs_bak)
    set_target_properties(
        kaldi-decoder-core
      PROPERTIES
        POSITION_INDEPENDENT_CODE ON
        C_VISIBILITY_PRESET hidden
        CXX_VISIBILITY_PRESET hidden
    )
    set(BUILD_SHARED_LIBS ON)
  endif()

  if(WIN32 AND MSVC)
    target_compile_options(kaldi-decoder-core PUBLIC
      /wd4018
      /wd4291
    )
  endif()

  target_include_directories(kaldi-decoder-core
    INTERFACE
      ${kaldi-decoder_SOURCE_DIR}/
  )
  if(NOT BUILD_SHARED_LIBS)
    install(TARGETS
      kaldi-decoder-core
      kaldifst_core
      fst
      fstfar
    DESTINATION lib)
  endif()
endfunction()

download_kaldi_decoder()


================================================
FILE: cmake/kaldi-native-fbank.cmake
================================================
function(download_kaldi_native_fbank)
  include(FetchContent)

  set(kaldi_native_fbank_URL   "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.22.3.tar.gz")
  set(kaldi_native_fbank_URL2  "https://hf-mirror.com/csukuangfj/sherpa-ncnn-cmake-deps/resolve/main/kaldi-native-fbank-1.22.3.tar.gz")
  set(kaldi_native_fbank_HASH "SHA256=9176cc66fc7ce1edf85cf355b06e320c57db6297df74277f575183468893cf61")

  set(KALDI_NATIVE_FBANK_BUILD_TESTS OFF CACHE BOOL "" FORCE)
  set(KALDI_NATIVE_FBANK_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
  set(KALDI_NATIVE_FBANK_ENABLE_CHECK OFF CACHE BOOL "" FORCE)

  # If you don't have access to the Internet,
  # please pre-download kaldi-native-fbank
  set(possible_file_locations
    $ENV{HOME}/Downloads/kaldi-native-fbank-1.22.3.tar.gz
    ${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.22.3.tar.gz
    ${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.22.3.tar.gz
    /tmp/kaldi-native-fbank-1.22.3.tar.gz
    /star-fj/fangjun/download/github/kaldi-native-fbank-1.22.3.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(kaldi_native_fbank_URL  "${f}")
      file(TO_CMAKE_PATH "${kaldi_native_fbank_URL}" kaldi_native_fbank_URL)
      message(STATUS "Found local downloaded kaldi-native-fbank: ${kaldi_native_fbank_URL}")
      set(kaldi_native_fbank_URL2 )
      break()
    endif()
  endforeach()

  FetchContent_Declare(kaldi_native_fbank
    URL
      ${kaldi_native_fbank_URL}
      ${kaldi_native_fbank_URL2}
    URL_HASH          ${kaldi_native_fbank_HASH}
  )

  FetchContent_GetProperties(kaldi_native_fbank)
  if(NOT kaldi_native_fbank_POPULATED)
    message(STATUS "Downloading kaldi-native-fbank from ${kaldi_native_fbank_URL}")
    FetchContent_Populate(kaldi_native_fbank)
  endif()
  message(STATUS "kaldi-native-fbank is downloaded to ${kaldi_native_fbank_SOURCE_DIR}")
  message(STATUS "kaldi-native-fbank's binary dir is ${kaldi_native_fbank_BINARY_DIR}")

  if(BUILD_SHARED_LIBS)
    set(_build_shared_libs_bak ${BUILD_SHARED_LIBS})
    set(BUILD_SHARED_LIBS OFF)
  endif()

  add_subdirectory(${kaldi_native_fbank_SOURCE_DIR} ${kaldi_native_fbank_BINARY_DIR} EXCLUDE_FROM_ALL)

  if(_build_shared_libs_bak)
    set_target_properties(kaldi-native-fbank-core
      PROPERTIES
        POSITION_INDEPENDENT_CODE ON
        C_VISIBILITY_PRESET hidden
        CXX_VISIBILITY_PRESET hidden
    )
    set(BUILD_SHARED_LIBS ON)
  endif()

  target_include_directories(kaldi-native-fbank-core
    INTERFACE
      ${kaldi_native_fbank_SOURCE_DIR}/
  )

  if(NOT BUILD_SHARED_LIBS)
    install(TARGETS kaldi-native-fbank-core kissfft DESTINATION lib)
  endif()
endfunction()

download_kaldi_native_fbank()


================================================
FILE: cmake/kaldifst.cmake
================================================
function(download_kaldifst)
  include(FetchContent)

  set(kaldifst_URL  "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.17.tar.gz")
  set(kaldifst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldifst-1.7.17.tar.gz")
  set(kaldifst_HASH "SHA256=c4b701a23a400bda8032586b02c7e0d5e813a765832df60c23e6df9e62b010f4")

  # If you don't have access to the Internet,
  # please pre-download kaldifst
  set(possible_file_locations
    $ENV{HOME}/Downloads/kaldifst-1.7.17.tar.gz
    ${CMAKE_SOURCE_DIR}/kaldifst-1.7.17.tar.gz
    ${CMAKE_BINARY_DIR}/kaldifst-1.7.17.tar.gz
    /tmp/kaldifst-1.7.17.tar.gz
    /star-fj/fangjun/download/github/kaldifst-1.7.17.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(kaldifst_URL  "${f}")
      file(TO_CMAKE_PATH "${kaldifst_URL}" kaldifst_URL)
      message(STATUS "Found local downloaded kaldifst: ${kaldifst_URL}")
      set(kaldifst_URL2)
      break()
    endif()
  endforeach()

  set(KALDIFST_BUILD_TESTS OFF CACHE BOOL "" FORCE)
  set(KALDIFST_BUILD_PYTHON OFF CACHE BOOL "" FORCE)

  FetchContent_Declare(kaldifst
    URL               ${kaldifst_URL} ${kaldifst_URL2}
    URL_HASH          ${kaldifst_HASH}
  )

  FetchContent_GetProperties(kaldifst)
  if(NOT kaldifst_POPULATED)
    message(STATUS "Downloading kaldifst from ${kaldifst_URL}")
    FetchContent_Populate(kaldifst)
  endif()
  message(STATUS "kaldifst is downloaded to ${kaldifst_SOURCE_DIR}")
  message(STATUS "kaldifst's binary dir is ${kaldifst_BINARY_DIR}")

  list(APPEND CMAKE_MODULE_PATH ${kaldifst_SOURCE_DIR}/cmake)

  if(BUILD_SHARED_LIBS)
    set(_build_shared_libs_bak ${BUILD_SHARED_LIBS})
    set(BUILD_SHARED_LIBS OFF)
  endif()

  add_subdirectory(${kaldifst_SOURCE_DIR} ${kaldifst_BINARY_DIR} EXCLUDE_FROM_ALL)

  if(_build_shared_libs_bak)
    set_target_properties(kaldifst_core
      PROPERTIES
        POSITION_INDEPENDENT_CODE ON
        C_VISIBILITY_PRESET hidden
        CXX_VISIBILITY_PRESET hidden
    )
    set(BUILD_SHARED_LIBS ON)
  endif()

  target_include_directories(kaldifst_core
    PUBLIC
      ${kaldifst_SOURCE_DIR}/
  )

  set_target_properties(kaldifst_core PROPERTIES OUTPUT_NAME "sherpa-onnx-kaldifst-core")
  # installed in ./kaldi-decoder.cmake
endfunction()

download_kaldifst()


================================================
FILE: cmake/onnxruntime-linux-aarch64-gpu.cmake
================================================
# Copyright (c)  2022-2024  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
  message(FATAL_ERROR "This file is for aarch64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

if(NOT SHERPA_ONNX_ENABLE_GPU)
  message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}")
endif()

message(WARNING "\
SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION: ${SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION}
If you use Jetson nano b01, then please pass
   -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.11.0
to cmake (You need to make sure CUDA 10.2 is available on your board).

If you use Jetson Orin NX, then please pass
   -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.16.0
to cmake (You need to make sure CUDA 11.4 is available on your board).

If you use NVIDIA Jetson Orin Nano Engineering Reference Developer Kit
Super - Jetpack 6.2 [L4T 36.4.3], then please pass
   -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.18.1
to cmake (You need to make sure CUDA 12.6 is available on your board).
")

set(v ${SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION})

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${v}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2")

if(v STREQUAL "1.11.0")
  set(onnxruntime_HASH "SHA256=36eded935551e23aead09d4173bdf0bd1e7b01fdec15d77f97d6e34029aa60d7")
elseif(v STREQUAL "1.16.0")
  set(onnxruntime_HASH "SHA256=4c09d5acf2c2682b4eab1dc2f1ad98fc1fde5f5f1960063e337983ba59379a4b")
elseif(v STREQUAL "1.18.0")
  set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.18.0/onnxruntime-linux-aarch64-gpu-cuda12.2-cudnn8.9.4-trt8.6.2-1.18.0.tar.bz2")
  set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-gpu-cuda12.2-cudnn8.9.4-trt8.6.2-1.18.0.tar.bz2")
  set(onnxruntime_HASH "SHA256=da437a69be982fc28ca7d60d0c5ccce2f48d027fa888cc76458cdc05410f4e2d")
elseif(v STREQUAL "1.18.1")
  set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.18.1/onnxruntime-linux-aarch64-gpu-cuda12-1.18.1.tar.bz2")
  set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-gpu-cuda12-1.18.1.tar.bz2")
  set(onnxruntime_HASH "SHA256=1e91064ec13a6fabb6b670da8a2da4f369c1dbd50a5be77a879b2473e7afc0a6")
else()
  message(FATAL_ERROR "Unuspported onnxruntime version ${v} for Linux aarch64")
endif()

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  /tmp/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  #
  $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-gpu-cuda12.2-cudnn8.9.4-trt8.6.2-${v}.tar.bz2
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-gpu-cuda12.2-cudnn8.9.4-trt8.6.2-${v}.tar.bz2
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-gpu-cuda12.2-cudnn8.9.4-trt8.6.2-${v}.tar.bz2
  /tmp/onnxruntime-linux-aarch64-gpu-cuda12.2-cudnn8.9.4-trt8.6.2-${v}.tar.bz2
  /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-gpu-cuda12.2-cudnn8.9.4-trt8.6.2-${v}.tar.bz2
  #
  $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-gpu-cuda12-${v}.tar.bz2
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-gpu-cuda12-${v}.tar.bz2
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-gpu-cuda12-${v}.tar.bz2
  /tmp/onnxruntime-linux-aarch64-gpu-cuda12-${v}.tar.bz2
  /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-gpu-cuda12-${v}.tar.bz2
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-aarch64-static.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
  message(FATAL_ERROR "This file is for aarch64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-linux-aarch64-static_lib-1.23.2-glibc2_17.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-linux-aarch64-static_lib-1.23.2-glibc2_17.zip")
set(onnxruntime_HASH "SHA256=7a603d836aa27d37197eb76f055d3c9e4e81d3a5a343c60000d7b6345bc6c80f")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-static_lib-1.23.2-glibc2_17.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-static_lib-1.23.2-glibc2_17.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-static_lib-1.23.2-glibc2_17.zip
  /tmp/onnxruntime-linux-aarch64-static_lib-1.23.2-glibc2_17.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-static_lib-1.23.2-glibc2_17.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-aarch64.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
  message(FATAL_ERROR "This file is for aarch64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-linux-aarch64-glibc2_17-Release-1.23.2.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-linux-aarch64-glibc2_17-Release-1.23.2.zip")
set(onnxruntime_HASH "SHA256=2a40a5323827bc59844d00ffdd3697d5e30dccb691233054bace0dc61cfa8341")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-glibc2_17-Release-1.23.2.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-glibc2_17-Release-1.23.2.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-glibc2_17-Release-1.23.2.zip
  /tmp/onnxruntime-linux-aarch64-glibc2_17-Release-1.23.2.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-glibc2_17-Release-1.23.2.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

set(location_onnxruntime "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so")

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-arm-static.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_SYSTEM_PROCESSOR STREQUAL arm OR CMAKE_SYSTEM_PROCESSOR STREQUAL armv7l))
  message(FATAL_ERROR "This file is for arm only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

# requires gcc 11
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-linux-arm-static_lib-1.23.2.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-arm-static_lib-1.23.2.zip")
set(onnxruntime_HASH "SHA256=334a51dbdc6812f91ee88356cedca14b097ed2907c80aa2b91670680e155ad9f")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-arm-static_lib-1.23.2.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-arm-static_lib-1.23.2.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-arm-static_lib-1.23.2.zip
  /tmp/onnxruntime-linux-arm-static_lib-1.23.2.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-arm-static_lib-1.23.2.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-arm.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_SYSTEM_PROCESSOR STREQUAL arm OR CMAKE_SYSTEM_PROCESSOR STREQUAL armv7l))
  message(FATAL_ERROR "This file is for arm only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

# requires gcc 11
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-linux-arm-1.23.2.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-linux-arm-1.23.2.zip")
set(onnxruntime_HASH "SHA256=c00aae409731930433badaf7d629499b9a1dcfac4dd67ad6b6a4838349bd6ba5")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-arm-1.23.2.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-arm-1.23.2.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-arm-1.23.2.zip
  /tmp/onnxruntime-linux-arm-1.23.2.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-arm-1.23.2.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-riscv64-spacemit.cmake
================================================
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
  message(FATAL_ERROR "This file is for riscv64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}, SHERPA_ONNX_ENABLE_SPACEMIT: ${SHERPA_ONNX_ENABLE_SPACEMIT}")
endif()

set(onnxruntime_pkg_name "spacemit-ort.riscv64.2.0.1.tar.gz")
set(onnxruntime_URL  "https://archive.spacemit.com/spacemit-ai/onnxruntime/${onnxruntime_pkg_name}")
set(onnxruntime_HASH "SHA256=8a15035aca34d5fd95f24444d4c7843265c1a81f49d84ec6fe9c6d0fdf5b55cf")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/${onnxruntime_pkg_name}
  ${CMAKE_SOURCE_DIR}/${onnxruntime_pkg_name}
  ${CMAKE_BINARY_DIR}/${onnxruntime_pkg_name}
  /tmp/${onnxruntime_pkg_name}
  /star-fj/fangjun/download/github/${onnxruntime_pkg_name}
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime
  NAMES onnxruntime
  PATHS "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

find_library(location_spacemit_ep
  NAMES spacemit_ep
  PATHS "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_spacemit_ep: ${location_spacemit_ep}")

add_library(onnxruntime SHARED IMPORTED)
add_library(spacemit_ep SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  IMPORTED_LOCATION "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so"
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include/"
)

set_target_properties(spacemit_ep PROPERTIES
  IMPORTED_LOCATION ${location_spacemit_ep}
  IMPORTED_LOCATION "${onnxruntime_SOURCE_DIR}/lib/libspacemit_ep.so"
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include/"
)

file(GLOB onnxruntime_lib_files
  "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)

file(GLOB spacemit_ep_lib_files
  "${onnxruntime_SOURCE_DIR}/lib/libspacemit_ep*")
message(STATUS "spacemit_ep lib files: ${spacemit_ep_lib_files}")
install(FILES ${spacemit_ep_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-riscv64-static.cmake
================================================
# Copyright (c)  2022-2024  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
  message(FATAL_ERROR "This file is for riscv64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.18.0/onnxruntime-linux-riscv64-static_lib-1.18.0.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-riscv64-static_lib-1.18.0.zip")
set(onnxruntime_HASH "SHA256=77ecc51d8caf0953755db6edcdec2fc03bce3f6d379bedd635be50bb95f88da5")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-riscv64-static_lib-1.18.0.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-riscv64-static_lib-1.18.0.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-riscv64-static_lib-1.18.0.zip
  /tmp/onnxruntime-linux-riscv64-static_lib-1.18.0.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-riscv64-static_lib-1.18.0.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-riscv64.cmake
================================================
# Copyright (c)  2022-2024  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
  message(FATAL_ERROR "This file is for riscv64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.14.1/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip")
set(onnxruntime_HASH "SHA256=c2cbc5af081ff82f46640befd85433811486daaf28e702163c6e4e75020fde81")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip
  /tmp/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-riscv64-glibc2_17-Release-1.14.1.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include/"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-x86_64-gpu.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
  message(FATAL_ERROR "This file is for x86_64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

if(NOT SHERPA_ONNX_ENABLE_GPU)
  message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}")
endif()


# Requires CUDA 12, cudnn 9
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-linux-x64-gpu-1.23.2-patched.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-linux-x64-gpu-1.23.2-patched.zip")
set(onnxruntime_HASH "SHA256=e2f622513212304447e34512b99ae4eabb4fd8870dd1baac895f222179dede19")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-x64-gpu-1.23.2-patched.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-x64-gpu-1.23.2-patched.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-gpu-1.23.2-patched.zip
  /tmp/onnxruntime-linux-x64-gpu-1.23.2-patched.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-x64-gpu-1.23.2-patched.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-x86_64-static.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
  message(FATAL_ERROR "This file is for x86_64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-linux-x64-static_lib-1.23.2-glibc2_17.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-linux-x64-static_lib-1.23.2-glibc2_17.zip")
set(onnxruntime_HASH "SHA256=93a52b9d93a0932259a03090291be861ba21ad4b1b58057d3a0f57a4c4108671")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-x64-static_lib-1.23.2-glibc2_17.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-x64-static_lib-1.23.2-glibc2_17.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-static_lib-1.23.2-glibc2_17.zip
  /tmp/onnxruntime-linux-x64-static_lib-1.23.2-glibc2_17.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-x64-static_lib-1.23.2-glibc2_17.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-linux-x86_64.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux)
  message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
  message(FATAL_ERROR "This file is for x86_64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-linux-x64-glibc2_17-Release-1.23.2.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-linux-x64-glibc2_17-Release-1.23.2.zip")
set(onnxruntime_HASH "SHA256=77ea3532dfdd8d5c66918429f7eacd80c1fea834941a14746adf3109f8e7b830")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-linux-x64-glibc2_17-Release-1.23.2.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-linux-x64-glibc2_17-Release-1.23.2.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-glibc2_17-Release-1.23.2.zip
  /tmp/onnxruntime-linux-x64-glibc2_17-Release-1.23.2.zip
  /star-fj/fangjun/download/github/onnxruntime-linux-x64-glibc2_17-Release-1.23.2.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-osx-arm64-static.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_OSX_ARCHITECTURES: ${CMAKE_OSX_ARCHITECTURES}")
message(STATUS "CMAKE_APPLE_SILICON_PROCESSOR : ${CMAKE_APPLE_SILICON_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
  message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-osx-arm64-static_lib-1.23.2.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-osx-arm64-static_lib-1.23.2.zip")
set(onnxruntime_HASH "SHA256=febeb7116f075409c554434a317cd51a2efb26abbf364c2ed77191f728a56633")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-osx-arm64-static_lib-1.23.2.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-osx-arm64-static_lib-1.23.2.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-osx-arm64-static_lib-1.23.2.zip
  /tmp/onnxruntime-osx-arm64-static_lib-1.23.2.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)

# disable coreml when using static onnxruntime lib
add_definitions(-DSHERPA_ONNX_DISABLE_COREML)


================================================
FILE: cmake/onnxruntime-osx-arm64.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_OSX_ARCHITECTURES: ${CMAKE_OSX_ARCHITECTURES}")
message(STATUS "CMAKE_APPLE_SILICON_PROCESSOR : ${CMAKE_APPLE_SILICON_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
  message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-osx-arm64-1.23.2.tgz")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-osx-arm64-1.23.2.tgz")
set(onnxruntime_HASH "SHA256=b4d513ab2b26f088c66891dbbc1408166708773d7cc4163de7bdca0e9bbb7856")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-osx-arm64-1.23.2.tgz
  ${CMAKE_SOURCE_DIR}/onnxruntime-osx-arm64-1.23.2.tgz
  ${CMAKE_BINARY_DIR}/onnxruntime-osx-arm64-1.23.2.tgz
  /tmp/onnxruntime-osx-arm64-1.23.2.tgz
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*dylib")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-osx-universal-static.cmake
================================================
# Possible values for CMAKE_SYSTEM_NAME: Linux, Windows, Darwin

message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_OSX_ARCHITECTURES: ${CMAKE_OSX_ARCHITECTURES}")
message(STATUS "CMAKE_APPLE_SILICON_PROCESSOR : ${CMAKE_APPLE_SILICON_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
  message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-osx-universal2-static_lib-1.23.2.zip")
set(onnxruntime_URL2  "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-osx-universal2-static_lib-1.23.2.zip")
set(onnxruntime_HASH "SHA256=9ea206a621d6e5550ddb9de0b96c4f666b074620f5c685b0479b5fa02c0bba76")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-osx-universal2-static_lib-1.23.2.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-osx-universal2-static_lib-1.23.2.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-osx-universal2-static_lib-1.23.2.zip
  /tmp/onnxruntime-osx-universal2-static_lib-1.23.2.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)

# disable coreml when using static onnxruntime lib
add_definitions(-DSHERPA_ONNX_DISABLE_COREML)


================================================
FILE: cmake/onnxruntime-osx-universal.cmake
================================================
# Possible values for CMAKE_SYSTEM_NAME: Linux, Windows, Darwin

message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_OSX_ARCHITECTURES: ${CMAKE_OSX_ARCHITECTURES}")
message(STATUS "CMAKE_APPLE_SILICON_PROCESSOR : ${CMAKE_APPLE_SILICON_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
  message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-osx-universal2-1.23.2.tgz")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-osx-universal2-1.23.2.tgz")
set(onnxruntime_HASH "SHA256=49ae8e3a66ccb18d98ad3fe7f5906b6d7887df8a5edd40f49eb2b14e20885809")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-osx-universal2-1.23.2.tgz
  ${CMAKE_SOURCE_DIR}/onnxruntime-osx-universal2-1.23.2.tgz
  ${CMAKE_BINARY_DIR}/onnxruntime-osx-universal2-1.23.2.tgz
  /tmp/onnxruntime-osx-universal2-1.23.2.tgz
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*dylib")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-osx-x86_64-static.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_OSX_ARCHITECTURES: ${CMAKE_OSX_ARCHITECTURES}")
message(STATUS "CMAKE_APPLE_SILICON_PROCESSOR : ${CMAKE_APPLE_SILICON_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
  message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/onnxruntime-osx-x86_64-static_lib-1.23.2.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-osx-x86_64-static_lib-1.23.2.zip")
set(onnxruntime_HASH "SHA256=dc632688d5b48e478742ba1ae2d9ebc78ab6cee18fa6eb61e2fb03b8a80d1b66")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-osx-x86_64-static_lib-1.23.2.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-osx-x86_64-static_lib-1.23.2.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-osx-x86_64-static_lib-1.23.2.zip
  /tmp/onnxruntime-osx-x86_64-static_lib-1.23.2.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)

# disable coreml when using static onnxruntime lib
add_definitions(-DSHERPA_ONNX_DISABLE_COREML)


================================================
FILE: cmake/onnxruntime-osx-x86_64.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_OSX_ARCHITECTURES: ${CMAKE_OSX_ARCHITECTURES}")
message(STATUS "CMAKE_APPLE_SILICON_PROCESSOR : ${CMAKE_APPLE_SILICON_PROCESSOR}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin)
  message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-osx-x86_64-1.23.2.tgz")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-osx-x86_64-1.23.2.tgz")
set(onnxruntime_HASH "SHA256=d10359e16347b57d9959f7e80a225a5b4a66ed7d7e007274a15cae86836485a6")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-osx-x86_64-1.23.2.tgz
  ${CMAKE_SOURCE_DIR}/onnxruntime-osx-x86_64-1.23.2.tgz
  ${CMAKE_BINARY_DIR}/onnxruntime-osx-x86_64-1.23.2.tgz
  /tmp/onnxruntime-osx-x86_64-1.23.2.tgz
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*dylib")
message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-wasm-simd.cmake
================================================
# Copyright (c)  2022-2024  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT SHERPA_ONNX_ENABLE_WASM)
  message(FATAL_ERROR "This file is for WebAssembly.")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "BUILD_SHARED_LIBS should be OFF for WebAssembly")
endif()

set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-wasm-static_lib-simd-1.17.1.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-wasm-static_lib-simd-1.17.1.zip")
set(onnxruntime_HASH "SHA256=8f07778e4233cf5a61a9d0795d90c5497177fbe8a46b701fda2d8d4e2b11cef8")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-wasm-static_lib-simd-1.17.1.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-wasm-static_lib-simd-1.17.1.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-wasm-static_lib-simd-1.17.1.zip
  /tmp/onnxruntime-wasm-static_lib-simd-1.17.1.zip
  /star-fj/fangjun/download/github/onnxruntime-wasm-static_lib-simd-1.17.1.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)


================================================
FILE: cmake/onnxruntime-win-arm64-static.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL ARM64 OR CMAKE_VS_PLATFORM_NAME STREQUAL arm64))
  message(FATAL_ERROR "This file is for Windows arm64 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

# Hashes for static CRT (/MT)
set(ONNXRUNTIME_HASH_MT_Release "SHA256=03166e5c7a830586b8772ff166611f0806fbc0ca76bcd177113fe3275a8af59b")
set(ONNXRUNTIME_HASH_MT_Debug "SHA256=61941c8d3058ebb6f9b83c9a99e4ae840708cc99702921d7c950a071f04f26ed")
set(ONNXRUNTIME_HASH_MT_RelWithDebInfo "SHA256=304bd830680b773ed1fce35758f317f2b50b2278359ede292e33bd67b57290b7")
set(ONNXRUNTIME_HASH_MT_MinSizeRel "SHA256=c24f46f689f9dbb8d8cd86d4c1d83f091da82d78548371d59446566d787f8cf4")

# Hashes for dynamic CRT (/MD)
set(ONNXRUNTIME_HASH_MD_Release "SHA256=26bae6d13335ecb229baa545d8c3b910998ace4f3617a4046640b9e6ef208dd7")
set(ONNXRUNTIME_HASH_MD_Debug "SHA256=f3e6d4550ac00c9f8f7ef647974087627f41d063e9899b67a028cca6c34521ab")
set(ONNXRUNTIME_HASH_MD_RelWithDebInfo "SHA256=ddef98c48243b0d7209edb9d416566405fd793551f63962fbb4f049b899136b0")
set(ONNXRUNTIME_HASH_MD_MinSizeRel "SHA256=4b28704e04f25b0839004ca828306f387814ada953750c4103f7076768fcf8a1")

if(NOT CMAKE_BUILD_TYPE MATCHES "^(Release|Debug|RelWithDebInfo|MinSizeRel)$")
  message(FATAL_ERROR "Supported CMAKE_BUILD_TYPE values are: Release, Debug, RelWithDebInfo, MinSizeRel. Given ${CMAKE_BUILD_TYPE}")
endif()

if(SHERPA_ONNX_USE_STATIC_CRT)
  set(onnxruntime_crt "MT")
else()
  set(onnxruntime_crt "MD")
endif()

message(STATUS "Use MSVC CRT: ${onnxruntime_crt}")

set(onnxruntime_HASH "${ONNXRUNTIME_HASH_${onnxruntime_crt}_${CMAKE_BUILD_TYPE}}")
set(onnxruntime_filename "onnxruntime-win-arm64-static_lib-${onnxruntime_crt}-${CMAKE_BUILD_TYPE}-1.23.2.tar.bz2")
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/${onnxruntime_filename}")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/${onnxruntime_filename}
  ${CMAKE_SOURCE_DIR}/${onnxruntime_filename}
  ${CMAKE_BINARY_DIR}/${onnxruntime_filename}
  $ENV{TMP}/${onnxruntime_filename}
  $ENV{TEMP}/${onnxruntime_filename}
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.lib")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
if(SHERPA_ONNX_ENABLE_PYTHON)
  install(FILES ${onnxruntime_lib_files} DESTINATION ..)
else()
  install(FILES ${onnxruntime_lib_files} DESTINATION lib)
endif()


================================================
FILE: cmake/onnxruntime-win-arm64.cmake
================================================
# Copyright (c)  2022-2024  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL ARM64 OR CMAKE_VS_PLATFORM_NAME STREQUAL arm64))
  message(FATAL_ERROR "This file is for Windows arm64 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

if(NOT CMAKE_BUILD_TYPE MATCHES "^(Release|Debug|RelWithDebInfo|MinSizeRel)$")
  message(FATAL_ERROR "Please set CMAKE_BUILD_TYPE to Release, Debug, RelWithDebInfo or MinSizeRel")
endif()

# Hashes for static CRT (/MT)
set(ONNXRUNTIME_HASH_MT_Debug "SHA256=c9329ff4e8acdd0a07b40465d6521e15bb21eb0a4d9bc7843803e460dc4d02f0")
set(ONNXRUNTIME_HASH_MT_RelWithDebInfo "SHA256=342470bef5681452fb9add5668debc5e79b5cd01a8c8866fc7c47f81dbd8eb70")
set(ONNXRUNTIME_HASH_MT_MinSizeRel "SHA256=7bcbc8fd66fa1b0783dfdbd66eeb9ad4c023a1080ccc01e80412c2347aeddfa1")
set(ONNXRUNTIME_HASH_MT_Release "SHA256=ee3c257f4c56f91a0a6aa3cce9a29dcd654f432fe5a399246c8bdb86c9bb5900")

# Hashes for dynamic CRT (/MD)
set(ONNXRUNTIME_HASH_MD_Debug "SHA256=722f044c84947e37fec7941f5dc38aa8f71cdf0b4bfa57cb590a4ed634b36ddc")
set(ONNXRUNTIME_HASH_MD_RelWithDebInfo "SHA256=025b0dd682309482b3146b5c3a80e814ad9dec1e93ee8139954857b97d5798a9")
set(ONNXRUNTIME_HASH_MD_MinSizeRel "SHA256=b8d5d508b21b4604d241ac11384fa6906daf556c6667011d9a8c6806d9549b74")
set(ONNXRUNTIME_HASH_MD_Release "SHA256=08ed42a71fbce04e10a3192510a2c578a20c1d3a00652187f85002c54db84548")

if(SHERPA_ONNX_USE_STATIC_CRT)
  set(onnxruntime_crt "MT")
else()
  set(onnxruntime_crt "MD")
endif()

message(STATUS "Use MSVC CRT: ${onnxruntime_crt}")

set(onnxruntime_filename "onnxruntime-win-arm64-${onnxruntime_crt}-${CMAKE_BUILD_TYPE}-1.23.2.tar.bz2")
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/${onnxruntime_filename}")
set(onnxruntime_HASH "${ONNXRUNTIME_HASH_${onnxruntime_crt}_${CMAKE_BUILD_TYPE}}")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/${onnxruntime_filename}
  ${CMAKE_SOURCE_DIR}/${onnxruntime_filename}
  ${CMAKE_BINARY_DIR}/${onnxruntime_filename}
  $ENV{TMP}/${onnxruntime_filename}
  $ENV{TEMP}/${onnxruntime_filename}
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

set_property(TARGET onnxruntime
  PROPERTY
    IMPORTED_IMPLIB "${onnxruntime_SOURCE_DIR}/lib/onnxruntime.lib"
)

file(COPY ${onnxruntime_SOURCE_DIR}/lib/onnxruntime.dll
  DESTINATION
    ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.dll")

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")

install(FILES ${onnxruntime_lib_files} DESTINATION lib)
install(FILES ${onnxruntime_lib_files} DESTINATION bin)


================================================
FILE: cmake/onnxruntime-win-x64-directml.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL X64 OR CMAKE_VS_PLATFORM_NAME STREQUAL x64))
  message(FATAL_ERROR "This file is for Windows x64 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

if(NOT SHERPA_ONNX_ENABLE_DIRECTML)
  message(FATAL_ERROR "This file is for DirectML. Given SHERPA_ONNX_ENABLE_DIRECTML: ${SHERPA_ONNX_ENABLE_DIRECTML}")
endif()

if(location_onnxruntime_header_dir AND location_onnxruntime_lib)
    message("Use preinstall onnxruntime with directml: ${location_onnxruntime_lib}")
else()

    set(onnxruntime_URL  "https://globalcdn.nuget.org/packages/microsoft.ml.onnxruntime.directml.1.14.1.nupkg")
    set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/microsoft.ml.onnxruntime.directml.1.14.1.nupkg")
    set(onnxruntime_HASH "SHA256=c8ae7623385b19cd5de968d0df5383e13b97d1b3a6771c9177eac15b56013a5a")

    # If you don't have access to the Internet,
    # please download onnxruntime to one of the following locations.
    # You can add more if you want.
    set(possible_file_locations
        $ENV{HOME}/Downloads/microsoft.ml.onnxruntime.directml.1.14.1.nupkg
        ${PROJECT_SOURCE_DIR}/microsoft.ml.onnxruntime.directml.1.14.1.nupkg
        ${PROJECT_BINARY_DIR}/microsoft.ml.onnxruntime.directml.1.14.1.nupkg
        /tmp/microsoft.ml.onnxruntime.directml.1.14.1.nupkg
    )

    foreach(f IN LISTS possible_file_locations)
      if(EXISTS ${f})
        set(onnxruntime_URL  "${f}")
        file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
        message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
        set(onnxruntime_URL2)
        break()
      endif()
    endforeach()

    FetchContent_Declare(onnxruntime
      URL
        ${onnxruntime_URL}
        ${onnxruntime_URL2}
      URL_HASH          ${onnxruntime_HASH}
    )

    FetchContent_GetProperties(onnxruntime)
    if(NOT onnxruntime_POPULATED)
      message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
      FetchContent_Populate(onnxruntime)
    endif()
    message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

    find_library(location_onnxruntime onnxruntime
      PATHS
      "${onnxruntime_SOURCE_DIR}/runtimes/win-x64/native"
      NO_CMAKE_SYSTEM_PATH
    )

    message(STATUS "location_onnxruntime: ${location_onnxruntime}")

    add_library(onnxruntime SHARED IMPORTED)

    set_target_properties(onnxruntime PROPERTIES
      IMPORTED_LOCATION ${location_onnxruntime}
      INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/build/native/include"
    )

    set_property(TARGET onnxruntime
      PROPERTY
        IMPORTED_IMPLIB "${onnxruntime_SOURCE_DIR}/runtimes/win-x64/native/onnxruntime.lib"
    )

    file(COPY ${onnxruntime_SOURCE_DIR}/runtimes/win-x64/native/onnxruntime.dll
      DESTINATION
        ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}
    )

    file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/runtimes/win-x64/native/onnxruntime.*")

    message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")

    if(SHERPA_ONNX_ENABLE_PYTHON)
      install(FILES ${onnxruntime_lib_files} DESTINATION ..)
    else()
      install(FILES ${onnxruntime_lib_files} DESTINATION lib)
    endif()

    install(FILES ${onnxruntime_lib_files} DESTINATION bin)

endif()

# Setup DirectML

set(directml_URL "https://www.nuget.org/api/v2/package/Microsoft.AI.DirectML/1.15.0")
set(directml_HASH "SHA256=10d175f8e97447712b3680e3ac020bbb8eafdf651332b48f09ffee2eec801c23")

set(possible_directml_file_locations
    $ENV{HOME}/Downloads/Microsoft.AI.DirectML.1.15.0.nupkg
    ${PROJECT_SOURCE_DIR}/Microsoft.AI.DirectML.1.15.0.nupkg
    ${PROJECT_BINARY_DIR}/Microsoft.AI.DirectML.1.15.0.nupkg
    /tmp/Microsoft.AI.DirectML.1.15.0.nupkg
)

foreach(f IN LISTS possible_directml_file_locations)
  if(EXISTS ${f})
    set(directml_URL  "${f}")
    file(TO_CMAKE_PATH "${directml_URL}" directml_URL)
    message(STATUS "Found local downloaded DirectML: ${directml_URL}")
    break()
  endif()
endforeach()

FetchContent_Declare(directml
  URL
    ${directml_URL}
  URL_HASH ${directml_HASH}
)

FetchContent_GetProperties(directml)
if(NOT directml_POPULATED)
  message(STATUS "Downloading DirectML from ${directml_URL}")
  FetchContent_Populate(directml)
endif()
message(STATUS "DirectML is downloaded to ${directml_SOURCE_DIR}")

find_library(location_directml DirectML
  PATHS
  "${directml_SOURCE_DIR}/bin/x64-win"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_directml: ${location_directml}")

add_library(directml SHARED IMPORTED)

set_target_properties(directml PROPERTIES
  IMPORTED_LOCATION ${location_directml}
  INTERFACE_INCLUDE_DIRECTORIES "${directml_SOURCE_DIR}/bin/x64-win"
)

set_property(TARGET directml
  PROPERTY
    IMPORTED_IMPLIB "${directml_SOURCE_DIR}/bin/x64-win/DirectML.lib"
)

file(COPY ${directml_SOURCE_DIR}/bin/x64-win/DirectML.dll
  DESTINATION
    ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}
)

file(GLOB directml_lib_files "${directml_SOURCE_DIR}/bin/x64-win/DirectML.*")

message(STATUS "DirectML lib files: ${directml_lib_files}")

install(FILES ${directml_lib_files} DESTINATION lib)
install(FILES ${directml_lib_files} DESTINATION bin)


================================================
FILE: cmake/onnxruntime-win-x64-gpu.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL X64 OR CMAKE_VS_PLATFORM_NAME STREQUAL x64))
  message(FATAL_ERROR "This file is for Windows x64 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

if(NOT SHERPA_ONNX_ENABLE_GPU)
  message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}")
endif()

# Requires cuda 12.x, cudnn 9.x
set(onnxruntime_URL  "https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-win-x64-gpu-1.23.2.zip")
set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/1.23.2/onnxruntime-win-x64-gpu-1.23.2.zip")
set(onnxruntime_HASH "SHA256=e77afdbbc2b8cb6da4e5a50d89841b48c44f3e47dce4fb87b15a2743786d0bb9")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/onnxruntime-win-x64-gpu-1.23.2.zip
  ${CMAKE_SOURCE_DIR}/onnxruntime-win-x64-gpu-1.23.2.zip
  ${CMAKE_BINARY_DIR}/onnxruntime-win-x64-gpu-1.23.2.zip
  /tmp/onnxruntime-win-x64-gpu-1.23.2.zip
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    set(onnxruntime_URL2)
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
    ${onnxruntime_URL2}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

set_property(TARGET onnxruntime
  PROPERTY
    IMPORTED_IMPLIB "${onnxruntime_SOURCE_DIR}/lib/onnxruntime.lib"
)

file(COPY ${onnxruntime_SOURCE_DIR}/lib/onnxruntime.dll
  DESTINATION
    ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.dll")

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")

install(FILES ${onnxruntime_lib_files} DESTINATION lib)
install(FILES ${onnxruntime_lib_files} DESTINATION bin)


================================================
FILE: cmake/onnxruntime-win-x64-static.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL X64 OR CMAKE_VS_PLATFORM_NAME STREQUAL x64))
  message(FATAL_ERROR "This file is for Windows x64 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

if(NOT CMAKE_BUILD_TYPE MATCHES "^(Release|Debug|RelWithDebInfo|MinSizeRel)$")
  message(FATAL_ERROR "Supported CMAKE_BUILD_TYPE values are: Release, Debug, RelWithDebInfo, MinSizeRel. Given ${CMAKE_BUILD_TYPE}")
endif()

# Hashes for static CRT (/MT)
set(ONNXRUNTIME_HASH_MT_Release "SHA256=c853a7646f9ebb0bf900e547141ef3e68d3ec888b27756ecb5f32476a6472391")
set(ONNXRUNTIME_HASH_MT_Debug "SHA256=efd7c3aa9fa10a380e5534ead76627790dd533142307e3fd1de2d1fba533dd90")
set(ONNXRUNTIME_HASH_MT_RelWithDebInfo "SHA256=4cf1733121eee79c9f18b048d1f5e9603079931e62af1c878c0d873ecd48900e")
set(ONNXRUNTIME_HASH_MT_MinSizeRel "SHA256=2d362a781ff98731423688ff5a50a08e1dd0e863e2de5b1d66c6595945a60735")

# Hashes for dynamic CRT (/MD)
set(ONNXRUNTIME_HASH_MD_Release "SHA256=f4596146f3aea7d9c557e466eb55af1cf8bb8e9f2a291ce4c428dd93d0501e33")
set(ONNXRUNTIME_HASH_MD_Debug "SHA256=68aa603aa25fd1cbe7ebef465395d0b685aa66fc8fd2df0b6d6f5a1e88621c60")
set(ONNXRUNTIME_HASH_MD_RelWithDebInfo "SHA256=ba5ae7bf3b5a29ea348f38516e7c46ff49921eb2a2e81e391f36bc932c4a7a20")
set(ONNXRUNTIME_HASH_MD_MinSizeRel "SHA256=e57978b5811fcf795e07c33eb69f32fac5cac8b848d32acf1154ce13c9cbcfd7")

if(SHERPA_ONNX_USE_STATIC_CRT)
  set(onnxruntime_crt "MT")
else()
  set(onnxruntime_crt "MD")
endif()

message(STATUS "Use MSVC CRT: ${onnxruntime_crt}")

set(onnxruntime_filename "onnxruntime-win-x64-static_lib-${onnxruntime_crt}-${CMAKE_BUILD_TYPE}-1.23.2.tar.bz2")
set(onnxruntime_HASH "${ONNXRUNTIME_HASH_${onnxruntime_crt}_${CMAKE_BUILD_TYPE}}")
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/${onnxruntime_filename}")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/${onnxruntime_filename}
  ${CMAKE_SOURCE_DIR}/${onnxruntime_filename}
  ${CMAKE_BINARY_DIR}/${onnxruntime_filename}
  $ENV{TMP}/${onnxruntime_filename}
  $ENV{TEMP}/${onnxruntime_filename}
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.lib")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
if(SHERPA_ONNX_ENABLE_PYTHON)
  install(FILES ${onnxruntime_lib_files} DESTINATION ..)
else()
  install(FILES ${onnxruntime_lib_files} DESTINATION lib)
endif()


================================================
FILE: cmake/onnxruntime-win-x64.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL X64 OR CMAKE_VS_PLATFORM_NAME STREQUAL x64))
  message(FATAL_ERROR "This file is for Windows x64 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

if(NOT CMAKE_BUILD_TYPE MATCHES "^(Release|Debug|RelWithDebInfo|MinSizeRel)$")
  message(FATAL_ERROR "Supported CMAKE_BUILD_TYPE values are: Release, Debug, RelWithDebInfo, MinSizeRel. Given ${CMAKE_BUILD_TYPE}")
endif()

# Hashes for static CRT (/MT)
set(ONNXRUNTIME_HASH_MT_Debug "SHA256=f63a1dafd63bd911135a47ccc75bd04c06a717de21b96c0d8ddb351714551124")
set(ONNXRUNTIME_HASH_MT_RelWithDebInfo "SHA256=b5363e34544b1d6bf27161843a72dfc853d80e5f14369242378b2e244d2af632")
set(ONNXRUNTIME_HASH_MT_MinSizeRel "SHA256=d1d4c76747020eb7ccafd9180da1a5dda0cc7d01b8cfc153fa88a9c205291c93")
set(ONNXRUNTIME_HASH_MT_Release "SHA256=a5c917196ef3356c343a69cee919a84f40ada6e9bf756b3e6edf3d07afc8a257")

# Hashes for dynamic CRT (/MD)
set(ONNXRUNTIME_HASH_MD_Debug "SHA256=422d9aeed64c6a5fa8daf3286a2ff39485cb8eceafb0d264179dd250e240f2f0")
set(ONNXRUNTIME_HASH_MD_RelWithDebInfo "SHA256=1bb3ca8ea37f9ca3bb6417da1756aadd984a76990bafa50950da1b679c7a1e65")
set(ONNXRUNTIME_HASH_MD_MinSizeRel "SHA256=c1d74a28463eee3297cebf5d6ec06fc7cf207e720dcf81259c3acb0e53534ac3")
set(ONNXRUNTIME_HASH_MD_Release "SHA256=0fffad34226a8b5bc33e7a130f77a57757f5d6623ca8e2495bc529ec3e959dd1")

if(SHERPA_ONNX_USE_STATIC_CRT)
  set(onnxruntime_crt "MT")
else()
  set(onnxruntime_crt "MD")
endif()

message(STATUS "Use MSVC CRT: ${onnxruntime_crt}")

set(onnxruntime_filename "onnxruntime-win-x64-${onnxruntime_crt}-${CMAKE_BUILD_TYPE}-1.23.2.tar.bz2")
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/${onnxruntime_filename}")
set(onnxruntime_HASH "${ONNXRUNTIME_HASH_${onnxruntime_crt}_${CMAKE_BUILD_TYPE}}")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/${onnxruntime_filename}
  ${CMAKE_SOURCE_DIR}/${onnxruntime_filename}
  ${CMAKE_BINARY_DIR}/${onnxruntime_filename}
  $ENV{TMP}/${onnxruntime_filename}
  $ENV{TEMP}/${onnxruntime_filename}
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

set_property(TARGET onnxruntime
  PROPERTY
    IMPORTED_IMPLIB "${onnxruntime_SOURCE_DIR}/lib/onnxruntime.lib"
)

file(COPY ${onnxruntime_SOURCE_DIR}/lib/onnxruntime.dll
  DESTINATION
    ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.dll")

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")

install(FILES ${onnxruntime_lib_files} DESTINATION lib)
install(FILES ${onnxruntime_lib_files} DESTINATION bin)


================================================
FILE: cmake/onnxruntime-win-x86-static.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL Win32 OR CMAKE_VS_PLATFORM_NAME STREQUAL win32))
  message(FATAL_ERROR "This file is for Windows x86 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
endif()

if(BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building static libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

# Hashes for static CRT (/MT)
set(ONNXRUNTIME_HASH_MT_Release "SHA256=2e4ecb02d37dfb2d0ed4b4e970b9f0b0a0352a6d7cbcd95fdd693a2a2ba7a0db")
set(ONNXRUNTIME_HASH_MT_Debug "SHA256=18b1030d47f1b0ea744b82b4f6829e991d17b4206c8059d1b2e5393bf7f29b4f")
set(ONNXRUNTIME_HASH_MT_RelWithDebInfo "SHA256=400b6cff390fb36669abe681d34d307746c2ec0309471fe6046dc5def7ccf17e")
set(ONNXRUNTIME_HASH_MT_MinSizeRel "SHA256=10e9faf9f22f5c784b00db1fe907ef99af845aa211add13fc8ff8dec6ed1a665")

# Hashes for dynamic CRT (/MD)
set(ONNXRUNTIME_HASH_MD_Release "SHA256=8793c5ddd6ac44d784005c05ffc8498c15c7a0f26c6b61c4689b5098823b6dad")
set(ONNXRUNTIME_HASH_MD_Debug "SHA256=f43082bcc1f34fce1222fa5b68011d30702182e45198ac553e35add6090f3a3c")
set(ONNXRUNTIME_HASH_MD_RelWithDebInfo "SHA256=96a0be8f1b82c5eff82a8060928dd0a27d1a8a8a94926098bdd6539655393353")
set(ONNXRUNTIME_HASH_MD_MinSizeRel "SHA256=0fe7fc4cb4dba7afc6c1f622168700b4c98a5c01bcfd64ebe72a9c4bb3db4cc2")

if(NOT CMAKE_BUILD_TYPE MATCHES "^(Release|Debug|RelWithDebInfo|MinSizeRel)$")
  message(FATAL_ERROR "Supported CMAKE_BUILD_TYPE values are: Release, Debug, RelWithDebInfo, MinSizeRel. Given ${CMAKE_BUILD_TYPE}")
endif()

if(SHERPA_ONNX_USE_STATIC_CRT)
  set(onnxruntime_crt "MT")
else()
  set(onnxruntime_crt "MD")
endif()

message(STATUS "Use MSVC CRT: ${onnxruntime_crt}")

set(onnxruntime_HASH "${ONNXRUNTIME_HASH_${onnxruntime_crt}_${CMAKE_BUILD_TYPE}}")
set(onnxruntime_filename "onnxruntime-win-x86-static_lib-${onnxruntime_crt}-${CMAKE_BUILD_TYPE}-1.23.2.tar.bz2")
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/${onnxruntime_filename}")


# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/${onnxruntime_filename}
  ${CMAKE_SOURCE_DIR}/${onnxruntime_filename}
  ${CMAKE_BINARY_DIR}/${onnxruntime_filename}
  $ENV{TMP}/${onnxruntime_filename}
  $ENV{TEMP}/${onnxruntime_filename}
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.lib")
set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
if(SHERPA_ONNX_ENABLE_PYTHON)
  install(FILES ${onnxruntime_lib_files} DESTINATION ..)
else()
  install(FILES ${onnxruntime_lib_files} DESTINATION lib)
endif()


================================================
FILE: cmake/onnxruntime-win-x86.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "This file is for Windows only. Given: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT (CMAKE_VS_PLATFORM_NAME STREQUAL Win32 OR CMAKE_VS_PLATFORM_NAME STREQUAL win32))
  message(FATAL_ERROR "This file is for Windows x86 only. Given: ${CMAKE_VS_PLATFORM_NAME}")
endif()

if(NOT BUILD_SHARED_LIBS)
  message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
endif()

# Hashes for static CRT (/MT)
set(ONNXRUNTIME_HASH_MT_Release "SHA256=07536b6b0c3929df8a41352331357daac99deecf49a7856493c7d24dc036b071")
set(ONNXRUNTIME_HASH_MT_Debug "SHA256=6f9146c969f2db41049d604a3a6f922a61a7a18e2116d539bce9578da7092037")
set(ONNXRUNTIME_HASH_MT_RelWithDebInfo "SHA256=d9a40560ca39425fc19c599e2b33b39459660deb030c0dc5ef83ddb7bf5f58d0")
set(ONNXRUNTIME_HASH_MT_MinSizeRel "SHA256=c5dc5570a8144b152592de69ecf4d2aaa4557a62dc4c47e24f480e26c831bd65")

# Hashes for dynamic CRT (/MD)
set(ONNXRUNTIME_HASH_MD_Release "SHA256=ce1519a7934204cbf9f3431ba1d67ea1a8e838f743245ac96db9faaaf150581c")
set(ONNXRUNTIME_HASH_MD_Debug "SHA256=d35d5b5bee5a0483f16e845783902c686c9a186c71c17bcf20d6887a734a6ad9")
set(ONNXRUNTIME_HASH_MD_RelWithDebInfo "SHA256=e4df40832040419d9b5e7d983420c51e3095987bd20314c9d7a90b4759df2991")
set(ONNXRUNTIME_HASH_MD_MinSizeRel "SHA256=4ea6d745466f3623a13c0159f09dcf50d6b34e302b219cfc396c6af7122b7b39")

if(NOT CMAKE_BUILD_TYPE MATCHES "^(Release|Debug|RelWithDebInfo|MinSizeRel)$")
  message(FATAL_ERROR "Supported CMAKE_BUILD_TYPE values are: Release, Debug, RelWithDebInfo, MinSizeRel. Given ${CMAKE_BUILD_TYPE}")
endif()

if(SHERPA_ONNX_USE_STATIC_CRT)
  set(onnxruntime_crt "MT")
else()
  set(onnxruntime_crt "MD")
endif()

message(STATUS "Use MSVC CRT: ${onnxruntime_crt}")

set(onnxruntime_HASH "${ONNXRUNTIME_HASH_${onnxruntime_crt}_${CMAKE_BUILD_TYPE}}")
set(onnxruntime_filename "onnxruntime-win-x86-${onnxruntime_crt}-${CMAKE_BUILD_TYPE}-1.23.2.tar.bz2")
set(onnxruntime_URL  "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.23.2/${onnxruntime_filename}")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
  $ENV{HOME}/Downloads/${onnxruntime_filename}
  ${CMAKE_SOURCE_DIR}/${onnxruntime_filename}
  ${CMAKE_BINARY_DIR}/${onnxruntime_filename}
  $ENV{TMP}/${onnxruntime_filename}
  $ENV{TEMP}/${onnxruntime_filename}
)

foreach(f IN LISTS possible_file_locations)
  if(EXISTS ${f})
    set(onnxruntime_URL  "${f}")
    file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
    message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
    break()
  endif()
endforeach()

FetchContent_Declare(onnxruntime
  URL
    ${onnxruntime_URL}
  URL_HASH          ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
  message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

find_library(location_onnxruntime onnxruntime
  PATHS
  "${onnxruntime_SOURCE_DIR}/lib"
  NO_CMAKE_SYSTEM_PATH
)

message(STATUS "location_onnxruntime: ${location_onnxruntime}")

add_library(onnxruntime SHARED IMPORTED)

set_target_properties(onnxruntime PROPERTIES
  IMPORTED_LOCATION ${location_onnxruntime}
  INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
)

set_property(TARGET onnxruntime
  PROPERTY
    IMPORTED_IMPLIB "${onnxruntime_SOURCE_DIR}/lib/onnxruntime.lib"
)

file(COPY ${onnxruntime_SOURCE_DIR}/lib/onnxruntime.dll
  DESTINATION
    ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}
)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/*.dll")

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")

install(FILES ${onnxruntime_lib_files} DESTINATION lib)
install(FILES ${onnxruntime_lib_files} DESTINATION bin)


================================================
FILE: cmake/onnxruntime.cmake
================================================
# Copyright (c)  2022-2023  Xiaomi Corporation
function(download_onnxruntime)
  include(FetchContent)

  message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
  message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
  if(SHERPA_ONNX_ENABLE_WASM)
    include(onnxruntime-wasm-simd)
  elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
    if(SHERPA_ONNX_ENABLE_SPACEMIT)
      include(onnxruntime-linux-riscv64-spacemit)
    elseif(BUILD_SHARED_LIBS)
      include(onnxruntime-linux-riscv64)
    else()
      include(onnxruntime-linux-riscv64-static)
    endif()
  elseif(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
    if(SHERPA_ONNX_ENABLE_GPU)
      include(onnxruntime-linux-aarch64-gpu)
    elseif(BUILD_SHARED_LIBS)
      include(onnxruntime-linux-aarch64)
    else()
      include(onnxruntime-linux-aarch64-static)
    endif()
  elseif(CMAKE_SYSTEM_NAME STREQUAL Linux AND (CMAKE_SYSTEM_PROCESSOR STREQUAL arm OR CMAKE_SYSTEM_PROCESSOR STREQUAL armv7l))
    if(BUILD_SHARED_LIBS)
      include(onnxruntime-linux-arm)
    else()
      include(onnxruntime-linux-arm-static)
    endif()
  elseif(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
    if(SHERPA_ONNX_ENABLE_GPU)
      include(onnxruntime-linux-x86_64-gpu)
    elseif(BUILD_SHARED_LIBS)
      include(onnxruntime-linux-x86_64)
    else()
      include(onnxruntime-linux-x86_64-static)
    endif()
  elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin)
    if (arm64 IN_LIST CMAKE_OSX_ARCHITECTURES AND x86_64 IN_LIST CMAKE_OSX_ARCHITECTURES)
      if(BUILD_SHARED_LIBS)
        include(onnxruntime-osx-universal)
      else()
        include(onnxruntime-osx-universal-static)
      endif()
    elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
      # cross compiling
      if(BUILD_SHARED_LIBS)
        include(onnxruntime-osx-arm64)
      else()
        include(onnxruntime-osx-arm64-static)
      endif()
    elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64 AND CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
      # cross compiling
      if(BUILD_SHARED_LIBS)
        include(onnxruntime-osx-x86_64)
      else()
        include(onnxruntime-osx-x86_64-static)
      endif()
    elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64)
      if(BUILD_SHARED_LIBS)
        include(onnxruntime-osx-arm64)
      else()
        include(onnxruntime-osx-arm64-static)
      endif()
    elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
      if(BUILD_SHARED_LIBS)
        include(onnxruntime-osx-x86_64)
      else()
        include(onnxruntime-osx-x86_64-static)
      endif()
    else()
      message(FATAL_ERROR "Unsupported processor ${CMAKE_SYSTEM_PROCESSOR} for Darwin")
    endif()
  elseif(WIN32)
    message(STATUS "CMAKE_VS_PLATFORM_NAME: ${CMAKE_VS_PLATFORM_NAME}")

    if(CMAKE_VS_PLATFORM_NAME STREQUAL Win32 OR CMAKE_VS_PLATFORM_NAME STREQUAL win32)
      if(BUILD_SHARED_LIBS)
        include(onnxruntime-win-x86)
      else()
        include(onnxruntime-win-x86-static)
      endif()

      if(SHERPA_ONNX_ENABLE_GPU)
        message(FATAL_ERROR "GPU support for Win32 is not supported!")
      endif()
    elseif(CMAKE_VS_PLATFORM_NAME STREQUAL ARM64 OR CMAKE_VS_PLATFORM_NAME STREQUAL arm64)
      # for 64-bit windows (arm64)
      if(BUILD_SHARED_LIBS)
        include(onnxruntime-win-arm64)
      else()
        include(onnxruntime-win-arm64-static)
      endif()
    else()
      # for 64-bit windows (x64)
      if(SHERPA_ONNX_ENABLE_DIRECTML)
        message(STATUS "Use DirectML")
        include(onnxruntime-win-x64-directml)
      elseif(BUILD_SHARED_LIBS)
        message(STATUS "Use dynamic onnxruntime libraries")
        if(SHERPA_ONNX_ENABLE_GPU)
          include(onnxruntime-win-x64-gpu)
        else()
          include(onnxruntime-win-x64)
        endif()
      else()
        # static libraries for windows x64
        message(STATUS "Use static onnxruntime libraries")
        include(onnxruntime-win-x64-static)
      endif()
    endif()
  else()
    message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
    message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
    message(FATAL_ERROR "Only support Linux, macOS, and Windows at present. Will support other OSes later")
  endif()
  set(onnxruntime_SOURCE_DIR ${onnxruntime_SOURCE_DIR} PARENT_SCOPE)
endfunction()

if(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE)
  # First, we try to locate the header and the lib if the user has already
  # installed onnxruntime. Otherwise, we will download the pre-compiled lib

  message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
  message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

  if(DEFINED ENV{SHERPA_ONNXRUNTIME_INCLUDE_DIR})
    set(location_onnxruntime_header_dir $ENV{SHERPA_ONNXRUNTIME_INCLUDE_DIR})

    include_directories(${location_onnxruntime_header_dir})
  else()
    find_path(location_onnxruntime_header_dir onnxruntime_cxx_api.h
      PATHS
        /usr/include/onnxruntime
        /usr/local/include/onnxruntime
    )
  endif()

  message(STATUS "location_onnxruntime_header_dir: ${location_onnxruntime_header_dir}")

  if(DEFINED ENV{SHERPA_ONNXRUNTIME_LIB_DIR})
    if(APPLE)
      set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.dylib)
    elseif(WIN32)
      if(SHERPA_ONNX_ENABLE_GPU)
        set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/onnxruntime.dll)
        set(location_onnxruntime_lib2 $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/onnxruntime.lib)
      else()
        set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/onnxruntime.lib)
        if(SHERPA_ONNX_ENABLE_DIRECTML)
          include(onnxruntime-win-x64-directml)
        endif()
      endif()
    else()
      set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.so)
    endif()

    if(NOT EXISTS ${location_onnxruntime_lib})
      message(STATUS "${location_onnxruntime_lib} does not exist. Try static lib")

      set(location_onnxruntime_lib $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.a)
      if(NOT EXISTS ${location_onnxruntime_lib})
        message(FATAL_ERROR "${location_onnxruntime_lib} cannot be found")
      endif()
      set(onnxruntime_lib_files $ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.a)
      message("Use static lib: ${onnxruntime_lib_files}")
    endif()
  else()
    find_library(location_onnxruntime_lib onnxruntime
      PATHS
        /lib
        /usr/lib
        /usr/local/lib
    )
  endif()

  message(STATUS "location_onnxruntime_lib: ${location_onnxruntime_lib}")
endif()

if(location_onnxruntime_header_dir AND location_onnxruntime_lib)
  if(NOT DEFINED onnxruntime_lib_files)
    add_library(onnxruntime SHARED IMPORTED)

    if(WIN32)
      set_target_properties(onnxruntime PROPERTIES
        IMPORTED_LOCATION ${location_onnxruntime_lib}
        IMPORTED_IMPLIB ${location_onnxruntime_lib2}
        INTERFACE_INCLUDE_DIRECTORIES "${location_onnxruntime_header_dir}"
      )
    else()
      set_target_properties(onnxruntime PROPERTIES
        IMPORTED_LOCATION ${location_onnxruntime_lib}
        INTERFACE_INCLUDE_DIRECTORIES "${location_onnxruntime_header_dir}"
      )
    endif()

    if(WIN32)
      file(GLOB onnxruntime_lib_files "$ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/*.dll")
    else()
      if(DEFINED ANDROID_ABI)
        file(GLOB onnxruntime_lib_files "$ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime.so")
      else()
        file(GLOB _onnxruntime_all "$ENV{SHERPA_ONNXRUNTIME_LIB_DIR}/libonnxruntime*")
        set(onnxruntime_lib_files "")

        foreach(f ${_onnxruntime_all})
          if (NOT IS_DIRECTORY "${f}")
            list(APPEND onnxruntime_lib_files "${f}")
          endif()
        endforeach()
      endif()
    endif()

    message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")

    install(FILES ${onnxruntime_lib_files} DESTINATION lib)

    if(WIN32)
      install(FILES ${onnxruntime_lib_files} DESTINATION bin)
    endif()
  endif()
else()
  if(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE)
    message(STATUS "Could not find a pre-installed onnxruntime.")
  endif()
  message(STATUS "Downloading pre-compiled onnxruntime")

  download_onnxruntime()
endif()


================================================
FILE: cmake/openfst.cmake
================================================
# Copyright (c)  2020  Xiaomi Corporation (author: Fangjun Kuang)

function(download_openfst)
  include(FetchContent)

  set(openfst_URL  "https://github.com/csukuangfj/openfst/archive/refs/tags/sherpa-onnx-2024-06-19.tar.gz")
  set(openfst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/openfst-sherpa-onnx-2024-06-19.tar.gz")
  set(openfst_HASH "SHA256=5c98e82cc509c5618502dde4860b8ea04d843850ed57e6d6b590b644b268853d")

  # If you don't have access to the Internet,
  # please pre-download it
  set(possible_file_locations
    $ENV{HOME}/Downloads/openfst-sherpa-onnx-2024-06-19.tar.gz
    ${CMAKE_SOURCE_DIR}/openfst-sherpa-onnx-2024-06-19.tar.gz
    ${CMAKE_BINARY_DIR}/openfst-sherpa-onnx-2024-06-19.tar.gz
    /tmp/openfst-sherpa-onnx-2024-06-19.tar.gz
    /star-fj/fangjun/download/github/openfst-sherpa-onnx-2024-06-19.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(openfst_URL  "${f}")
      file(TO_CMAKE_PATH "${openfst_URL}" openfst_URL)
      set(openfst_URL2)
      break()
    endif()
  endforeach()

  set(HAVE_BIN OFF CACHE BOOL "" FORCE)
  set(HAVE_SCRIPT OFF CACHE BOOL "" FORCE)
  set(HAVE_COMPACT OFF CACHE BOOL "" FORCE)
  set(HAVE_COMPRESS OFF CACHE BOOL "" FORCE)
  set(HAVE_CONST OFF CACHE BOOL "" FORCE)
  set(HAVE_FAR ON CACHE BOOL "" FORCE)
  set(HAVE_GRM OFF CACHE BOOL "" FORCE)
  set(HAVE_PDT OFF CACHE BOOL "" FORCE)
  set(HAVE_MPDT OFF CACHE BOOL "" FORCE)
  set(HAVE_LINEAR OFF CACHE BOOL "" FORCE)
  set(HAVE_LOOKAHEAD OFF CACHE BOOL "" FORCE)
  set(HAVE_NGRAM OFF CACHE BOOL "" FORCE)
  set(HAVE_PYTHON OFF CACHE BOOL "" FORCE)
  set(HAVE_SPECIAL OFF CACHE BOOL "" FORCE)

  if(NOT WIN32)
    FetchContent_Declare(openfst
      URL
        ${openfst_URL}
        ${openfst_URL2}
      URL_HASH          ${openfst_HASH}
      PATCH_COMMAND
        sed -i.bak s/enable_testing\(\)//g "src/CMakeLists.txt" &&
        sed -i.bak s/add_subdirectory\(test\)//g "src/CMakeLists.txt" &&
        sed -i.bak /message/d "src/script/CMakeLists.txt"
        # sed -i.bak s/add_subdirectory\(script\)//g "src/CMakeLists.txt" &&
        # sed -i.bak s/add_subdirectory\(extensions\)//g "src/CMakeLists.txt"
    )
  else()
    FetchContent_Declare(openfst
      URL               ${openfst_URL}
      URL_HASH          ${openfst_HASH}
    )
  endif()

  FetchContent_GetProperties(openfst)
  if(NOT openfst_POPULATED)
    message(STATUS "Downloading openfst from ${openfst_URL}")
    FetchContent_Populate(openfst)
  endif()
  message(STATUS "openfst is downloaded to ${openfst_SOURCE_DIR}")

  if(_build_shared_libs_bak)
    set(_build_shared_libs_bak ${BUILD_SHARED_LIBS})
    set(BUILD_SHARED_LIBS OFF)
  endif()

  add_subdirectory(${openfst_SOURCE_DIR} ${openfst_BINARY_DIR} EXCLUDE_FROM_ALL)

  if(_build_shared_libs_bak)
    set_target_properties(fst fstfar
      PROPERTIES
        POSITION_INDEPENDENT_CODE ON
        C_VISIBILITY_PRESET hidden
        CXX_VISIBILITY_PRESET hidden
    )
    set(BUILD_SHARED_LIBS ON)
  endif()

  set(openfst_SOURCE_DIR ${openfst_SOURCE_DIR} PARENT_SCOPE)

  set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst")
  set_target_properties(fstfar PROPERTIES OUTPUT_NAME "sherpa-onnx-fstfar")

  if(LINUX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
    target_compile_options(fst PUBLIC -Wno-missing-template-keyword)
  endif()

  target_include_directories(fst
    PUBLIC
      ${openfst_SOURCE_DIR}/src/include
  )

  target_include_directories(fstfar
    PUBLIC
      ${openfst_SOURCE_DIR}/src/include
  )
  # installed in ./kaldi-decoder.cmake
endfunction()

download_openfst()


================================================
FILE: cmake/piper-phonemize.cmake
================================================
function(download_piper_phonemize)
  include(FetchContent)

  set(piper_phonemize_URL  "https://github.com/csukuangfj/piper-phonemize/archive/78a788e0b719013401572d70fef372e77bff8e43.zip")
  set(piper_phonemize_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip")
  set(piper_phonemize_HASH "SHA256=89641a46489a4898754643ce57bda9c9b54b4ca46485fdc02bf0dc84b866645d")

  # If you don't have access to the Internet,
  # please pre-download kaldi-decoder
  set(possible_file_locations
    $ENV{HOME}/Downloads/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
    ${CMAKE_SOURCE_DIR}/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
    ${CMAKE_BINARY_DIR}/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
    /tmp/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
    /star-fj/fangjun/download/github/piper-phonemize-78a788e0b719013401572d70fef372e77bff8e43.zip
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(piper_phonemize_URL  "${f}")
      file(TO_CMAKE_PATH "${piper_phonemize_URL}" piper_phonemize_URL)
      message(STATUS "Found local downloaded espeak-ng: ${piper_phonemize_URL}")
      set(piper_phonemize_URL2 )
      break()
    endif()
  endforeach()

  FetchContent_Declare(piper_phonemize
    URL
      ${piper_phonemize_URL}
      ${piper_phonemize_URL2}
    URL_HASH          ${piper_phonemize_HASH}
  )

  FetchContent_GetProperties(piper_phonemize)
  if(NOT piper_phonemize_POPULATED)
    message(STATUS "Downloading piper-phonemize from ${piper_phonemize_URL}")
    FetchContent_Populate(piper_phonemize)
  endif()
  message(STATUS "piper-phonemize is downloaded to ${piper_phonemize_SOURCE_DIR}")
  message(STATUS "piper-phonemize binary dir is ${piper_phonemize_BINARY_DIR}")

  if(BUILD_SHARED_LIBS)
    set(_build_shared_libs_bak ${BUILD_SHARED_LIBS})
    set(BUILD_SHARED_LIBS OFF)
  endif()

  add_subdirectory(${piper_phonemize_SOURCE_DIR} ${piper_phonemize_BINARY_DIR} EXCLUDE_FROM_ALL)

  if(_build_shared_libs_bak)
    set_target_properties(piper_phonemize
      PROPERTIES
        POSITION_INDEPENDENT_CODE ON
        C_VISIBILITY_PRESET hidden
        CXX_VISIBILITY_PRESET hidden
    )
    set(BUILD_SHARED_LIBS ON)
  endif()

  if(WIN32 AND MSVC)
    target_compile_options(piper_phonemize PUBLIC
      /wd4309
    )
  endif()

  target_include_directories(piper_phonemize
    INTERFACE
      ${piper_phonemize_SOURCE_DIR}/src/include
  )

  if(NOT BUILD_SHARED_LIBS)
    install(TARGETS
      piper_phonemize
    DESTINATION lib)
  endif()
endfunction()

download_piper_phonemize()


================================================
FILE: cmake/portaudio.cmake
================================================
function(download_portaudio)
  include(FetchContent)

  set(portaudio_URL  "http://files.portaudio.com/archives/pa_stable_v190700_20210406.tgz")
  set(portaudio_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/pa_stable_v190700_20210406.tgz")
  set(portaudio_HASH "SHA256=47efbf42c77c19a05d22e627d42873e991ec0c1357219c0d74ce6a2948cb2def")

  # If you don't have access to the Internet, please download it to your
  # local drive and modify the following line according to your needs.
  set(possible_file_locations
    $ENV{HOME}/Downloads/pa_stable_v190700_20210406.tgz
    $ENV{HOME}/asr/pa_stable_v190700_20210406.tgz
    ${CMAKE_SOURCE_DIR}/pa_stable_v190700_20210406.tgz
    ${CMAKE_BINARY_DIR}/pa_stable_v190700_20210406.tgz
    /tmp/pa_stable_v190700_20210406.tgz
    /star-fj/fangjun/download/github/pa_stable_v190700_20210406.tgz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(portaudio_URL  "${f}")
      file(TO_CMAKE_PATH "${portaudio_URL}" portaudio_URL)
      message(STATUS "Found local downloaded portaudio: ${portaudio_URL}")
      set(portaudio_URL2)
      break()
    endif()
  endforeach()

  # Always use static build
  set(PA_BUILD_SHARED OFF CACHE BOOL "" FORCE)
  set(PA_BUILD_STATIC ON CACHE BOOL "" FORCE)
  set(PA_BUILD_EXAMPLES ON CACHE BOOL "" FORCE)
  set(PA_USE_WDMKS OFF CACHE BOOL "" FORCE)

  FetchContent_Declare(portaudio
    URL
      ${portaudio_URL}
      ${portaudio_URL2}
    URL_HASH          ${portaudio_HASH}
  )

  FetchContent_GetProperties(portaudio)
  if(NOT portaudio_POPULATED)
    message(STATUS "Downloading portaudio from ${portaudio_URL}")
    FetchContent_Populate(portaudio)
  endif()
  message(STATUS "portaudio is downloaded to ${portaudio_SOURCE_DIR}")
  message(STATUS "portaudio's binary dir is ${portaudio_BINARY_DIR}")

  if(APPLE)
    set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS
  endif()

  add_subdirectory(${portaudio_SOURCE_DIR} ${portaudio_BINARY_DIR} EXCLUDE_FROM_ALL)
  if(CMAKE_SYSTEM_NAME STREQUAL Linux)
    if(PA_USE_ALSA)
      message(STATUS "portaudio with ALSA")
    else()
      message(STATUS "portaudio without ALSA")
    endif()
  endif()

  set_target_properties(pa_devs PROPERTIES OUTPUT_NAME "sherpa-onnx-pa-devs")

  set_target_properties(portaudio_static PROPERTIES OUTPUT_NAME "sherpa-onnx-portaudio_static")
  if(NOT WIN32)
    target_compile_options(portaudio_static PRIVATE "-Wno-deprecated-declarations")
  endif()

  if(NOT BUILD_SHARED_LIBS AND SHERPA_ONNX_ENABLE_BINARY)
    install(TARGETS
      portaudio_static
    DESTINATION lib)
  endif()

  install(TARGETS
    pa_devs
  DESTINATION bin)
  add_custom_target(build_pa_devs ALL DEPENDS pa_devs)

endfunction()

download_portaudio()

# Note
# See http://portaudio.com/docs/v19-doxydocs/tutorial_start.html
# for how to use portaudio


================================================
FILE: cmake/pybind11.cmake
================================================
function(download_pybind11)
  include(FetchContent)

  set(pybind11_URL  "https://github.com/pybind/pybind11/archive/refs/tags/v3.0.0.tar.gz")
  set(pybind11_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/pybind11-3.0.0.tar.gz")
  set(pybind11_HASH "SHA256=453b1a3e2b266c3ae9da872411cadb6d693ac18063bd73226d96cfb7015a200c")

  # If you don't have access to the Internet,
  # please pre-download pybind11
  set(possible_file_locations
    $ENV{HOME}/Downloads/pybind11-3.0.0.tar.gz
    ${CMAKE_SOURCE_DIR}/pybind11-3.0.0.tar.gz
    ${CMAKE_BINARY_DIR}/pybind11-3.0.0.tar.gz
    /tmp/pybind11-3.0.0.tar.gz
    /star-fj/fangjun/download/github/pybind11-3.0.0.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(pybind11_URL  "${f}")
      file(TO_CMAKE_PATH "${pybind11_URL}" pybind11_URL)
      message(STATUS "Found local downloaded pybind11: ${pybind11_URL}")
      set(pybind11_URL2)
      break()
    endif()
  endforeach()

  FetchContent_Declare(pybind11
    URL
      ${pybind11_URL}
      ${pybind11_URL2}
    URL_HASH          ${pybind11_HASH}
  )

  FetchContent_GetProperties(pybind11)
  if(NOT pybind11_POPULATED)
    message(STATUS "Downloading pybind11 from ${pybind11_URL}")
    FetchContent_Populate(pybind11)
  endif()
  message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}")
  add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL)
endfunction()

download_pybind11()


================================================
FILE: cmake/sherpa-onnx-shared.pc.in
================================================
# Note: If you use Python, then the prefix might not be correct.
#
# You need to either manually modify this file to change the prefix to the location
# where this sherpa-onnx.pc file actually resides
# or
# you can use
#
#   pkg-config --define-variable=prefix=/path/to/the/dir/containing/this/file --cflags sherpa-onnx

prefix="@CMAKE_INSTALL_PREFIX@"
exec_prefix="${prefix}"
includedir="${prefix}/include"
libdir="${exec_prefix}/lib"

Name: sherpa-onnx
Description: pkg-config for sherpa-onnx
URL: https://github.com/k2-fsa/sherpa-onnx

Version: @SHERPA_ONNX_VERSION@
Cflags: -I"${includedir}"

# Note: -lcargs is required only for the following file
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
Libs: -L"${libdir}" -lsherpa-onnx-cxx-api -lsherpa-onnx-c-api -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@


================================================
FILE: cmake/sherpa-onnx-static-no-tts.pc.in
================================================
# Note: If you use Python, then the prefix might not be correct.
#
# You need to either manually modify this file to change the prefix to the location
# where this sherpa-onnx.pc file actually resides
# or
# you can use
#
#   pkg-config --define-variable=prefix=/path/to/the/dir/containing/this/file --cflags sherpa-onnx

prefix="@CMAKE_INSTALL_PREFIX@"
exec_prefix="${prefix}"
includedir="${prefix}/include"
libdir="${exec_prefix}/lib"

Name: sherpa-onnx
Description: pkg-config for sherpa-onnx with TTS support
URL: https://github.com/k2-fsa/sherpa-onnx

Version: @SHERPA_ONNX_VERSION@
Cflags: -I"${includedir}"

# Note: -lcargs is required only for the following file
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lkissfft-float -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@


================================================
FILE: cmake/sherpa-onnx-static.pc.in
================================================
# Note: If you use Python, then the prefix might not be correct.
#
# You need to either manually modify this file to change the prefix to the location
# where this sherpa-onnx.pc file actually resides
# or
# you can use
#
#   pkg-config --define-variable=prefix=/path/to/the/dir/containing/this/file --cflags sherpa-onnx

prefix="@CMAKE_INSTALL_PREFIX@"
exec_prefix="${prefix}"
includedir="${prefix}/include"
libdir="${exec_prefix}/lib"

Name: sherpa-onnx
Description: pkg-config for sherpa-onnx
URL: https://github.com/k2-fsa/sherpa-onnx

Version: @SHERPA_ONNX_VERSION@
Cflags: -I"${includedir}"

# Note: -lcargs is required only for the following file
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lkissfft-float -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@


================================================
FILE: cmake/show-info.cmake
================================================
message(STATUS "CMAKE_SOURCE_DIR: ${CMAKE_SOURCE_DIR}")
message(STATUS "CMAKE_BINARY_DIR: ${CMAKE_BINARY_DIR}")
message(STATUS "PROJECT_SOURCE_DIR: ${PROJECT_SOURCE_DIR}")
message(STATUS "PROJECT_BINARY_DIR: ${PROJECT_BINARY_DIR}")
message(STATUS "CMake version: ${CMAKE_VERSION}")
message(STATUS "CMAKE_SYSTEM: ${CMAKE_SYSTEM}")
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_VERSION: ${CMAKE_SYSTEM_VERSION}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

find_package(Git QUIET)
if(Git_FOUND)
  execute_process(COMMAND
    "${GIT_EXECUTABLE}" describe --always --abbrev=40
    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
    OUTPUT_VARIABLE SHERPA_ONNX_GIT_SHA1
    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
  )

  execute_process(COMMAND
    "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
    OUTPUT_VARIABLE SHERPA_ONNX_GIT_DATE
    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE
  )
  message(STATUS "sherpa-onnx git sha1: ${SHERPA_ONNX_GIT_SHA1}")
  message(STATUS "sherpa-onnx git date: ${SHERPA_ONNX_GIT_DATE}")
else()
  message(WARNING "git is not found")
endif()

if(UNIX AND NOT APPLE)
  execute_process(COMMAND
    lsb_release -sd
    OUTPUT_VARIABLE SHERPA_ONNX_OS
    OUTPUT_STRIP_TRAILING_WHITESPACE
  )
elseif(APPLE)
  execute_process(COMMAND
    sw_vers -productName
    OUTPUT_VARIABLE _product_name
    OUTPUT_STRIP_TRAILING_WHITESPACE
  )

  execute_process(COMMAND
    sw_vers -productVersion
    OUTPUT_VARIABLE _product_version
    OUTPUT_STRIP_TRAILING_WHITESPACE
  )

  execute_process(COMMAND
    sw_vers -buildVersion
    OUTPUT_VARIABLE _build_version
    OUTPUT_STRIP_TRAILING_WHITESPACE
  )
  set(SHERPA_ONNX_OS "${_product_name} ${_product_version} ${_build_version}")
elseif(WIN32)
  # Try PowerShell first to get OS name + version
  execute_process(
    COMMAND powershell -NoProfile -Command "(Get-CimInstance Win32_OperatingSystem).Caption + ' ' + (Get-CimInstance Win32_OperatingSystem).Version"
    OUTPUT_VARIABLE SHERPA_ONNX_OS
    OUTPUT_STRIP_TRAILING_WHITESPACE
    ERROR_QUIET
  )

  if(NOT SHERPA_ONNX_OS)
    message(WARNING "PowerShell not available, falling back to cmd /c ver")
    # Fallback: cmd.exe /c ver (only version info, less detailed)
    execute_process(
      COMMAND cmd /c ver
      OUTPUT_VARIABLE _cmd_out
      OUTPUT_STRIP_TRAILING_WHITESPACE
      ERROR_QUIET
    )
    string(REPLACE "\r" "" _cmd_out "${_cmd_out}")
    if(_cmd_out)
      set(SHERPA_ONNX_OS "Windows ${_cmd_out}")
    else()
      set(SHERPA_ONNX_OS "Windows (version unknown)")
    endif()
  endif()
else()
  set(SHERPA_ONNX_OS "Unknown")
endif()
message(STATUS "OS used to build sherpa-onnx: ${SHERPA_ONNX_OS}")

if(CMAKE_CXX_COMPILER)
  message(STATUS "C++ compiler: ${CMAKE_CXX_COMPILER}")
  if(CMAKE_CXX_COMPILER_ID)
    message(STATUS "C++ compiler ID: ${CMAKE_CXX_COMPILER_ID}")
    message(STATUS "C++ compiler version: ${CMAKE_CXX_COMPILER_VERSION}")
  endif()
endif()

if(CMAKE_C_COMPILER)
  message(STATUS "C compiler: ${CMAKE_C_COMPILER}")
  if(CMAKE_C_COMPILER_ID)
    message(STATUS "C compiler ID: ${CMAKE_C_COMPILER_ID}")
    message(STATUS "C compiler version: ${CMAKE_C_COMPILER_VERSION}")
  endif()
endif()


================================================
FILE: cmake/simple-sentencepiece.cmake
================================================
function(download_simple_sentencepiece)
  include(FetchContent)

  set(simple-sentencepiece_URL  "https://github.com/pkufool/simple-sentencepiece/archive/refs/tags/v0.7.tar.gz")
  set(simple-sentencepiece_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/simple-sentencepiece-0.7.tar.gz")
  set(simple-sentencepiece_HASH "SHA256=1748a822060a35baa9f6609f84efc8eb54dc0e74b9ece3d82367b7119fdc75af")

  # If you don't have access to the Internet,
  # please pre-download simple-sentencepiece
  set(possible_file_locations
    $ENV{HOME}/Downloads/simple-sentencepiece-0.7.tar.gz
    ${CMAKE_SOURCE_DIR}/simple-sentencepiece-0.7.tar.gz
    ${CMAKE_BINARY_DIR}/simple-sentencepiece-0.7.tar.gz
    /tmp/simple-sentencepiece-0.7.tar.gz
    /star-fj/fangjun/download/github/simple-sentencepiece-0.7.tar.gz
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(simple-sentencepiece_URL  "${f}")
      file(TO_CMAKE_PATH "${simple-sentencepiece_URL}" simple-sentencepiece_URL)
      message(STATUS "Found local downloaded simple-sentencepiece: ${simple-sentencepiece_URL}")
      set(simple-sentencepiece_URL2)
      break()
    endif()
  endforeach()

  set(SBPE_ENABLE_TESTS OFF CACHE BOOL "" FORCE)
  set(SBPE_BUILD_PYTHON OFF CACHE BOOL "" FORCE)

  FetchContent_Declare(simple-sentencepiece
    URL
      ${simple-sentencepiece_URL}
      ${simple-sentencepiece_URL2}
    URL_HASH
      ${simple-sentencepiece_HASH}
  )

  FetchContent_GetProperties(simple-sentencepiece)
  if(NOT simple-sentencepiece_POPULATED)
    message(STATUS "Downloading simple-sentencepiece ${simple-sentencepiece_URL}")
    FetchContent_Populate(simple-sentencepiece)
  endif()
  message(STATUS "simple-sentencepiece is downloaded to ${simple-sentencepiece_SOURCE_DIR}")

  if(BUILD_SHARED_LIBS)
    set(_build_shared_libs_bak ${BUILD_SHARED_LIBS})
    set(BUILD_SHARED_LIBS OFF)
  endif()

  add_subdirectory(${simple-sentencepiece_SOURCE_DIR} ${simple-sentencepiece_BINARY_DIR} EXCLUDE_FROM_ALL)

  if(_build_shared_libs_bak)
    set_target_properties(ssentencepiece_core
      PROPERTIES
        POSITION_INDEPENDENT_CODE ON
        C_VISIBILITY_PRESET hidden
        CXX_VISIBILITY_PRESET hidden
    )
    set(BUILD_SHARED_LIBS ON)
  endif()

  target_include_directories(ssentencepiece_core
    PUBLIC
      ${simple-sentencepiece_SOURCE_DIR}/
  )

  if(NOT BUILD_SHARED_LIBS)
    install(TARGETS ssentencepiece_core DESTINATION lib)
  endif()
endfunction()

download_simple_sentencepiece()


================================================
FILE: cmake/websocketpp.cmake
================================================
function(download_websocketpp)
  include(FetchContent)

  # The latest commit on the develop branch os as 2022-10-22
  set(websocketpp_URL  "https://github.com/zaphoyd/websocketpp/archive/b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip")
  set(websocketpp_URL2  "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/websocketpp-b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip")
  set(websocketpp_HASH "SHA256=1385135ede8191a7fbef9ec8099e3c5a673d48df0c143958216cd1690567f583")

  # If you don't have access to the Internet,
  # please pre-download websocketpp
  set(possible_file_locations
    $ENV{HOME}/Downloads/websocketpp-b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip
    ${CMAKE_SOURCE_DIR}/websocketpp-b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip
    ${CMAKE_BINARY_DIR}/websocketpp-b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip
    /tmp/websocketpp-b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip
    /star-fj/fangjun/download/github/websocketpp-b9aeec6eaf3d5610503439b4fae3581d9aff08e8.zip
  )

  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(websocketpp_URL  "${f}")
      file(TO_CMAKE_PATH "${websocketpp_URL}" websocketpp_URL)
      message(STATUS "Found local downloaded websocketpp: ${websocketpp_URL}")
      set(websocketpp_URL2)
      break()
    endif()
  endforeach()

  FetchContent_Declare(websocketpp
    URL
      ${websocketpp_URL}
      ${websocketpp_URL2}
    URL_HASH          ${websocketpp_HASH}
  )

  FetchContent_GetProperties(websocketpp)
  if(NOT websocketpp_POPULATED)
    message(STATUS "Downloading websocketpp from ${websocketpp_URL}")
    FetchContent_Populate(websocketpp)
  endif()
  message(STATUS "websocketpp is downloaded to ${websocketpp_SOURCE_DIR}")
  # add_subdirectory(${websocketpp_SOURCE_DIR} ${websocketpp_BINARY_DIR} EXCLUDE_FROM_ALL)
  include_directories(${websocketpp_SOURCE_DIR})
endfunction()

download_websocketpp()


================================================
FILE: cxx-api-examples/CMakeLists.txt
================================================
include_directories(${PROJECT_SOURCE_DIR})

add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc)
target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api)

add_executable(streaming-zipformer-with-hr-cxx-api ./streaming-zipformer-with-hr-cxx-api.cc)
target_link_libraries(streaming-zipformer-with-hr-cxx-api sherpa-onnx-cxx-api)

add_executable(speech-enhancement-gtcrn-cxx-api ./speech-enhancement-gtcrn-cxx-api.cc)
target_link_libraries(speech-enhancement-gtcrn-cxx-api sherpa-onnx-cxx-api)

add_executable(speech-enhancement-dpdfnet-cxx-api ./speech-enhancement-dpdfnet-cxx-api.cc)
target_link_libraries(speech-enhancement-dpdfnet-cxx-api sherpa-onnx-cxx-api)

add_executable(online-speech-enhancement-gtcrn-cxx-api
               ./online-speech-enhancement-gtcrn-cxx-api.cc)
target_link_libraries(online-speech-enhancement-gtcrn-cxx-api
                      sherpa-onnx-cxx-api)

add_executable(online-speech-enhancement-dpdfnet-cxx-api
               ./online-speech-enhancement-dpdfnet-cxx-api.cc)
target_link_libraries(online-speech-enhancement-dpdfnet-cxx-api
                      sherpa-onnx-cxx-api)

add_executable(kws-cxx-api ./kws-cxx-api.cc)
target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api)

add_executable(audio-tagging-ced-cxx-api ./audio-tagging-ced-cxx-api.cc)
target_link_libraries(audio-tagging-ced-cxx-api sherpa-onnx-cxx-api)

add_executable(audio-tagging-zipformer-cxx-api ./audio-tagging-zipformer-cxx-api.cc)
target_link_libraries(audio-tagging-zipformer-cxx-api sherpa-onnx-cxx-api)

add_executable(streaming-zipformer-rtf-cxx-api ./streaming-zipformer-rtf-cxx-api.cc)
target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api)

add_executable(streaming-t-one-ctc-cxx-api   streaming-t-one-ctc-cxx-api.cc)
target_link_libraries(streaming-t-one-ctc-cxx-api sherpa-onnx-cxx-api)

add_executable(whisper-cxx-api ./whisper-cxx-api.cc)
target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api)

add_executable(fire-red-asr-cxx-api ./fire-red-asr-cxx-api.cc)
target_link_libraries(fire-red-asr-cxx-api sherpa-onnx-cxx-api)

add_executable(fire-red-asr-ctc-cxx-api ./fire-red-asr-ctc-cxx-api.cc)
target_link_libraries(fire-red-asr-ctc-cxx-api sherpa-onnx-cxx-api)

add_executable(moonshine-cxx-api ./moonshine-cxx-api.cc)
target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)

add_executable(moonshine-v2-cxx-api ./moonshine-v2-cxx-api.cc)
target_link_libraries(moonshine-v2-cxx-api sherpa-onnx-cxx-api)

add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)

add_executable(wenet-ctc-cxx-api ./wenet-ctc-cxx-api.cc)
target_link_libraries(wenet-ctc-cxx-api sherpa-onnx-cxx-api)

add_executable(omnilingual-asr-ctc-cxx-api ./omnilingual-asr-ctc-cxx-api.cc)
target_link_libraries(omnilingual-asr-ctc-cxx-api sherpa-onnx-cxx-api)

add_executable(medasr-ctc-cxx-api ./medasr-ctc-cxx-api.cc)
target_link_libraries(medasr-ctc-cxx-api sherpa-onnx-cxx-api)

add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc)
target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api)

add_executable(offline-punctuation-cxx-api ./offline-punctuation-cxx-api.cc)
target_link_libraries(offline-punctuation-cxx-api sherpa-onnx-cxx-api)

add_executable(online-punctuation-cxx-api ./online-punctuation-cxx-api.cc)
target_link_libraries(online-punctuation-cxx-api sherpa-onnx-cxx-api)

if(SHERPA_ONNX_ENABLE_PORTAUDIO)
  add_executable(sense-voice-simulate-streaming-microphone-cxx-api
    ./sense-voice-simulate-streaming-microphone-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  )
  target_link_libraries(sense-voice-simulate-streaming-microphone-cxx-api
    sherpa-onnx-cxx-api
    portaudio_static
  )

  add_executable(fire-red-asr-ctc-simulate-streaming-microphone-cxx-api
    ./fire-red-asr-ctc-simulate-streaming-microphone-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  )
  target_link_libraries(fire-red-asr-ctc-simulate-streaming-microphone-cxx-api
    sherpa-onnx-cxx-api
    portaudio_static
  )

  add_executable(wenet-ctc-simulate-streaming-microphone-cxx-api
    ./wenet-ctc-simulate-streaming-microphone-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  )
  target_link_libraries(wenet-ctc-simulate-streaming-microphone-cxx-api
    sherpa-onnx-cxx-api
    portaudio_static
  )

  add_executable(parakeet-tdt-simulate-streaming-microphone-cxx-api
    ./parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  )
  target_link_libraries(parakeet-tdt-simulate-streaming-microphone-cxx-api
    sherpa-onnx-cxx-api
    portaudio_static
  )

  add_executable(parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api
    ./parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  )
  target_link_libraries(parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api
    sherpa-onnx-cxx-api
    portaudio_static
  )

  add_executable(zipformer-ctc-simulate-streaming-microphone-cxx-api
    ./zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  )
  target_link_libraries(zipformer-ctc-simulate-streaming-microphone-cxx-api
    sherpa-onnx-cxx-api
    portaudio_static
  )

  add_executable(zipformer-transducer-simulate-streaming-microphone-cxx-api
    ./zipformer-transducer-simulate-streaming-microphone-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  )
  target_link_libraries(zipformer-transducer-simulate-streaming-microphone-cxx-api
    sherpa-onnx-cxx-api
    portaudio_static
  )
endif()

if(SHERPA_ONNX_HAS_ALSA)
  add_executable(sense-voice-simulate-streaming-alsa-cxx-api
    ./sense-voice-simulate-streaming-alsa-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/alsa.cc
  )
  target_link_libraries(sense-voice-simulate-streaming-alsa-cxx-api
    sherpa-onnx-cxx-api
  )

  add_executable(fire-red-asr-ctc-simulate-streaming-alsa-cxx-api
    ./fire-red-asr-ctc-simulate-streaming-alsa-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/alsa.cc
  )
  target_link_libraries(fire-red-asr-ctc-simulate-streaming-alsa-cxx-api
    sherpa-onnx-cxx-api
  )

  add_executable(zipformer-ctc-simulate-streaming-alsa-cxx-api
    ./zipformer-ctc-simulate-streaming-alsa-cxx-api.cc
    ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/alsa.cc
  )
  target_link_libraries(zipformer-ctc-simulate-streaming-alsa-cxx-api
    sherpa-onnx-cxx-api
  )

  if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
    target_link_libraries(sense-voice-simulate-streaming-alsa-cxx-api -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
    target_link_libraries(fire-red-asr-ctc-simulate-streaming-alsa-cxx-api -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
    target_link_libraries(zipformer-ctc-simulate-streaming-alsa-cxx-api -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
  else()
    target_link_libraries(sense-voice-simulate-streaming-alsa-cxx-api asound)
    target_link_libraries(fire-red-asr-ctc-simulate-streaming-alsa-cxx-api asound)
    target_link_libraries(zipformer-ctc-simulate-streaming-alsa-cxx-api asound)
  endif()
endif()

add_executable(sense-voice-with-hr-cxx-api ./sense-voice-with-hr-cxx-api.cc)
target_link_libraries(sense-voice-with-hr-cxx-api sherpa-onnx-cxx-api)

add_executable(dolphin-ctc-cxx-api ./dolphin-ctc-cxx-api.cc)
target_link_libraries(dolphin-ctc-cxx-api sherpa-onnx-cxx-api)

add_executable(vad-cxx-api ./vad-cxx-api.cc)
target_link_libraries(vad-cxx-api sherpa-onnx-cxx-api)

add_executable(funasr-nano-cxx-api ./funasr-nano-cxx-api.cc)
target_link_libraries(funasr-nano-cxx-api sherpa-onnx-cxx-api)

if(SHERPA_ONNX_ENABLE_TTS)
  add_executable(matcha-tts-zh-cxx-api ./matcha-tts-zh-cxx-api.cc)
  target_link_libraries(matcha-tts-zh-cxx-api sherpa-onnx-cxx-api)

  add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc)
  target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api)

  add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc)
  target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api)

  add_executable(kitten-tts-en-cxx-api ./kitten-tts-en-cxx-api.cc)
  target_link_libraries(kitten-tts-en-cxx-api sherpa-onnx-cxx-api)

  add_executable(pocket-tts-en-cxx-api ./pocket-tts-en-cxx-api.cc)
  target_link_libraries(pocket-tts-en-cxx-api sherpa-onnx-cxx-api)

  add_executable(kokoro-tts-zh-en-cxx-api ./kokoro-tts-zh-en-cxx-api.cc)
  target_link_libraries(kokoro-tts-zh-en-cxx-api sherpa-onnx-cxx-api)

  add_executable(supertonic-tts-en-cxx-api ./supertonic-tts-en-cxx-api.cc)
  target_link_libraries(supertonic-tts-en-cxx-api sherpa-onnx-cxx-api)

  add_executable(zipvoice-tts-zh-en-cxx-api ./zipvoice-tts-zh-en-cxx-api.cc)
  target_link_libraries(zipvoice-tts-zh-en-cxx-api sherpa-onnx-cxx-api)
endif()


================================================
FILE: cxx-api-examples/audio-tagging-ced-cxx-api.cc
================================================
// cxx-api-examples/audio-tagging-ced-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use CED with sherpa-onnx's C++
// API for audio tagging.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
// tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
// rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
//
// clang-format on
#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  AudioTaggingConfig config;

  config.model.ced =
      "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx";
  config.model.num_threads = 1;
  config.model.debug = true;
  config.labels =
      "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/"
      "class_labels_indices.csv";

  config.top_k = 5;

  std::cout << "Loading model\n";
  AudioTagging tagger = AudioTagging::Create(config);
  if (!tagger.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }

  std::string wave_filename =
      "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/1.wav";

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Started\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = tagger.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());
  std::vector<AudioEvent> events = tagger.Compute(&stream);

  const auto end = std::chrono::steady_clock::now();
  std::cout << "Done\n";

  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  int32_t i = 0;

  for (const auto &event : events) {
    fprintf(stderr, "%d: AudioEvent(name='%s', index=%d, prob=%.3f)\n", i,
            event.name.c_str(), event.index, event.prob);
    i += 1;
  }

  printf("Number of threads: %d\n", config.model.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);
}


================================================
FILE: cxx-api-examples/audio-tagging-zipformer-cxx-api.cc
================================================
// cxx-api-examples/audio-tagging-zipformer-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use Zipformer with sherpa-onnx's C++
// API for audio tagging.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
// tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
// rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
//
//
// clang-format on
#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  AudioTaggingConfig config;

  config.model.zipformer.model =
      "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.onnx";
  config.model.num_threads = 1;
  config.model.debug = true;
  config.labels =
      "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/"
      "class_labels_indices.csv";

  config.top_k = 5;

  std::cout << "Loading model\n";
  AudioTagging tagger = AudioTagging::Create(config);
  if (!tagger.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }

  std::string wave_filename =
      "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav";

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Started\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = tagger.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());
  std::vector<AudioEvent> events = tagger.Compute(&stream);

  const auto end = std::chrono::steady_clock::now();
  std::cout << "Done\n";

  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  int32_t i = 0;

  for (const auto &event : events) {
    fprintf(stderr, "%d: AudioEvent(name='%s', index=%d, prob=%.3f)\n", i,
            event.name.c_str(), event.index, event.prob);
    i += 1;
  }

  printf("Number of threads: %d\n", config.model.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);
}


================================================
FILE: cxx-api-examples/dolphin-ctc-cxx-api.cc
================================================
// cxx-api-examples/dolphin-ctc-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use Dolphini CTC model with sherpa-onnx's C++
// API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
// tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
// rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  // clang-format off
  config.model_config.dolphin.model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"; // NOLINT
  config.model_config.tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"; // NOLINT

  std::string wave_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"; // NOLINT
  // clang-format on

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/fire-red-asr-ctc-cxx-api.cc
================================================
// cxx-api-examples/fire-red-asr-ctc-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use FireRedASR CTC with sherpa-onnx's C++ API.
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
*/
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  // clang-format off
  config.model_config.fire_red_asr_ctc.model = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx";
  config.model_config.tokens = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename ="./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav";
  // clang-format on

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/fire-red-asr-ctc-simulate-streaming-alsa-cxx-api.cc
================================================
// cxx-api-examples/fire-red-asr-ctc-simulate-streaming-alsa-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use FireRedASR CTC models with sherpa-onnx's
// C++ API for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
// tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
// rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <string>
#include <thread>  // NOLINT
#include <utility>
#include <vector>

#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/alsa.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static void RecordCallback(sherpa_onnx::Alsa *alsa) {
  int32_t chunk = 0.1 * alsa->GetActualSampleRate();
  while (!stop) {
    std::vector<float> samples = alsa->Read(chunk);

    std::lock_guard<std::mutex> lock(mutex);
    samples_queue.emplace(std::move(samples));
    condition_variable.notify_one();
  }
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.1;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 8;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.fire_red_asr_ctc.model =
      "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main(int32_t argc, const char *argv[]) {
  const char *kUsageMessage = R"usage(
Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

./fire-red-asr-ctc-simulate-streaming-alsa-cxx-api device_name

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";

  if (argc != 2) {
    fprintf(stderr, "%s\n", kUsageMessage);
    return -1;
  }

  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  int32_t expected_sample_rate = 16000;

  std::string device_name = argv[1];
  sherpa_onnx::Alsa alsa(device_name.c_str());
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::thread record_thread(RecordCallback, &alsa);

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }
      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      buffer.insert(buffer.end(), s.begin(), s.end());

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(expected_sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(expected_sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  record_thread.join();

  return 0;
}


================================================
FILE: cxx-api-examples/fire-red-asr-ctc-simulate-streaming-microphone-cxx-api.cc
================================================
// cxx-api-examples/fire-red-asr-ctc-simulate-streaming-microphone-cxx-api.cc
// Copyright (c)  2026  Xiaomi Corporation

//
// This file demonstrates how to use FireRedASR CTC models with sherpa-onnx's
// C++ API for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
// tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
// rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <vector>

#include "portaudio.h"       // NOLINT
#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  samples_queue.emplace(
      reinterpret_cast<const float *>(input_buffer),
      reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  condition_variable.notify_one();

  return stop ? paComplete : paContinue;
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.1;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 8;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.fire_red_asr_ctc.model =
      "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main() {
  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  sherpa_onnx::Microphone mic;

  PaDeviceIndex num_devices = Pa_GetDeviceCount();
  if (num_devices == 0) {
    std::cerr
        << "  If you are using Linux, please try "
           "./build/bin/fire-red-asr-ctc-simulate-streaming-alsa-cxx-api\n";
    return -1;
  }

  int32_t device_index = Pa_GetDefaultInputDevice();
  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (sample_rate_str) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(sample_rate_str);
  }
  float sample_rate = 16000;
  LinearResampler resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
                                        lowpass_cutoff, lowpass_filter_width);
  }
  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    std::cerr << "Failed to open microphone device\n";
    return -1;
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }

      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      if (!resampler.Get()) {
        buffer.insert(buffer.end(), s.begin(), s.end());
      } else {
        auto resampled = resampler.Resample(s.data(), s.size(), false);
        buffer.insert(buffer.end(), resampled.begin(), resampled.end());
      }

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  return 0;
}


================================================
FILE: cxx-api-examples/fire-red-asr-cxx-api.cc
================================================
// cxx-api-examples/fire-red-asr-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use FireRedAsr AED with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
// tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
// rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.fire_red_asr.encoder =
      "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx";
  config.model_config.fire_red_asr.decoder =
      "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav";
  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/funasr-nano-cxx-api.cc
================================================
// cxx-api-examples/funasr-nano-cxx-api.cc
//
// Copyright (c)  2025  zengyw
//
// This file demonstrates how to use FunASR-nano with sherpa-onnx's C++ API.
//
//
// clang-format off
//
// Usage:
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
// tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
// rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
//
// clang-format on

#include <chrono>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;

  OfflineRecognizerConfig config;
  config.model_config.num_threads = 2;
  config.model_config.debug = false;
  config.model_config.provider = "cpu";

  // clang-format off
  config.model_config.funasr_nano.encoder_adaptor = "./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx";
  config.model_config.funasr_nano.llm = "./sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx";
  config.model_config.funasr_nano.embedding = "./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx";
  config.model_config.funasr_nano.tokenizer = "./sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B";

  // clang-format on

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "./sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/dia_yue.wav";

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/kitten-tts-en-cxx-api.cc
================================================
// cxx-api-examples/kitten-tts-en-cxx-api.cc
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for English TTS with Kitten.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

./kitten-tts-en-cxx-api

 */
// clang-format on

#include <cstdint>
#include <cstdio>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineTtsConfig config;

  config.model.kitten.model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx";
  config.model.kitten.voices = "./kitten-nano-en-v0_1-fp16/voices.bin";
  config.model.kitten.tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt";
  config.model.kitten.data_dir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  std::string filename = "./generated-kitten-en-cxx.wav";
  std::string text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar. "
      "Friends fell out often because life was changing so fast. The easiest "
      "thing in the world was to lose touch with someone.";

  auto tts = OfflineTts::Create(config);
  int32_t sid = 0;
  float speed = 1.0;  // larger -> faster in speech speed
  GenerationConfig gen_config;
  gen_config.sid = sid;
  gen_config.speed = speed;
  gen_config.silence_scale = 0.2f;

#if 0
  // If you don't want to use a callback, then please enable this branch
  GeneratedAudio audio = tts.Generate(text, gen_config);
#else
  GeneratedAudio audio = tts.Generate(text, gen_config, ProgressCallback);
#endif

  WriteWave(filename, {audio.samples, audio.sample_rate});

  fprintf(stderr, "Input text is: %s\n", text.c_str());
  fprintf(stderr, "Speaker ID is: %d\n", sid);
  fprintf(stderr, "Saved to: %s\n", filename.c_str());

  return 0;
}


================================================
FILE: cxx-api-examples/kokoro-tts-en-cxx-api.cc
================================================
// cxx-api-examples/kokoro-tts-en-cxx-api.cc
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for English TTS with Kokoro.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

./kokoro-tts-en-cxx-api

 */
// clang-format on

#include <cstdint>
#include <cstdio>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineTtsConfig config;

  config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
  config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
  config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
  config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  std::string filename = "./generated-kokoro-en-cxx.wav";
  std::string text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar. "
      "Friends fell out often because life was changing so fast. The easiest "
      "thing in the world was to lose touch with someone.";

  auto tts = OfflineTts::Create(config);
  int32_t sid = 0;
  float speed = 1.0;  // larger -> faster in speech speed
  GenerationConfig gen_config;
  gen_config.sid = sid;
  gen_config.speed = speed;
  gen_config.silence_scale = 0.2f;

#if 0
  // If you don't want to use a callback, then please enable this branch
  GeneratedAudio audio = tts.Generate(text, gen_config);
#else
  GeneratedAudio audio = tts.Generate(text, gen_config, ProgressCallback);
#endif

  WriteWave(filename, {audio.samples, audio.sample_rate});

  fprintf(stderr, "Input text is: %s\n", text.c_str());
  fprintf(stderr, "Speaker ID is: %d\n", sid);
  fprintf(stderr, "Saved to: %s\n", filename.c_str());

  return 0;
}


================================================
FILE: cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
================================================
// cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for Chinese + English TTS with Kokoro.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

./kokoro-tts-zh-en-cxx-api

 */
// clang-format on

#include <cstdint>
#include <cstdio>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineTtsConfig config;

  config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx";
  config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin";
  config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
  config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
  config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict";
  config.model.kokoro.lexicon =
      "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/"
      "lexicon-zh.txt";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  std::string filename = "./generated-kokoro-zh-en-cxx.wav";
  std::string text =
      "中英文语音合成测试。This is generated by next generation Kaldi using "
      "Kokoro without Misaki. 你觉得中英文说的如何呢？";

  auto tts = OfflineTts::Create(config);
  int32_t sid = 50;
  float speed = 1.0;  // larger -> faster in speech speed
  GenerationConfig gen_config;
  gen_config.sid = sid;
  gen_config.speed = speed;
  gen_config.silence_scale = 0.2f;

#if 0
  // If you don't want to use a callback, then please enable this branch
  GeneratedAudio audio = tts.Generate(text, gen_config);
#else
  GeneratedAudio audio = tts.Generate(text, gen_config, ProgressCallback);
#endif

  WriteWave(filename, {audio.samples, audio.sample_rate});

  fprintf(stderr, "Input text is: %s\n", text.c_str());
  fprintf(stderr, "Speaker ID is: %d\n", sid);
  fprintf(stderr, "Saved to: %s\n", filename.c_str());

  return 0;
}


================================================
FILE: cxx-api-examples/kws-cxx-api.cc
================================================
// cxx-api-examples/kws-cxx-api.cc
//
// Copyright (c)  2025  Xiaomi Corporation
//
// This file demonstrates how to use keywords spotter with sherpa-onnx's C
// clang-format off
//
// Usage
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
// rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
//
// ./kws-cxx-api
//
// clang-format on
#include <array>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT

  KeywordSpotterConfig config;
  config.model_config.transducer.encoder =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx";

  config.model_config.transducer.decoder =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "decoder-epoch-12-avg-2-chunk-16-left-64.onnx";

  config.model_config.transducer.joiner =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx";

  config.model_config.tokens =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "tokens.txt";

  config.model_config.provider = "cpu";
  config.model_config.num_threads = 1;
  config.model_config.debug = 1;

  config.keywords_file =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "test_wavs/test_keywords.txt";

  KeywordSpotter kws = KeywordSpotter::Create(config);
  if (!kws.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }

  std::cout
      << "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n";

  std::string wave_filename =
      "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
      "test_wavs/3.wav";

  std::array<float, 8000> tail_paddings = {0};  // 0.5 seconds

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  OnlineStream stream = kws.CreateStream();
  if (!stream.Get()) {
    std::cerr << "Failed to create stream\n";
    return -1;
  }

  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
                        tail_paddings.size());
  stream.InputFinished();

  while (kws.IsReady(&stream)) {
    kws.Decode(&stream);
    auto r = kws.GetResult(&stream);
    if (!r.keyword.empty()) {
      std::cout << "Detected keyword: " << r.json << "\n";

      // Remember to reset the keyword stream right after a keyword is detected
      kws.Reset(&stream);
    }
  }

  // --------------------------------------------------------------------------

  std::cout << "--Use pre-defined keywords + add a new keyword--\n";

  stream = kws.CreateStream("y ǎn y uán @演员");

  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
                        tail_paddings.size());
  stream.InputFinished();

  while (kws.IsReady(&stream)) {
    kws.Decode(&stream);
    auto r = kws.GetResult(&stream);
    if (!r.keyword.empty()) {
      std::cout << "Detected keyword: " << r.json << "\n";

      // Remember to reset the keyword stream right after a keyword is detected
      kws.Reset(&stream);
    }
  }

  // --------------------------------------------------------------------------

  std::cout << "--Use pre-defined keywords + add two new keywords--\n";

  stream = kws.CreateStream("y ǎn y uán @演员/zh ī m íng @知名");

  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  stream.AcceptWaveform(wave.sample_rate, tail_paddings.data(),
                        tail_paddings.size());
  stream.InputFinished();

  while (kws.IsReady(&stream)) {
    kws.Decode(&stream);
    auto r = kws.GetResult(&stream);
    if (!r.keyword.empty()) {
      std::cout << "Detected keyword: " << r.json << "\n";

      // Remember to reset the keyword stream right after a keyword is detected
      kws.Reset(&stream);
    }
  }
  return 0;
}


================================================
FILE: cxx-api-examples/matcha-tts-en-cxx-api.cc
================================================
// cxx-api-examples/matcha-tts-en-cxx-api.cc
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for Chinese TTS with MatchaTTS.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

./matcha-tts-en-cxx-api

 */
// clang-format on

#include <cstdint>
#include <cstdio>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineTtsConfig config;

  config.model.matcha.acoustic_model =
      "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";

  config.model.matcha.vocoder = "./vocos-22khz-univ.onnx";

  config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";

  config.model.matcha.data_dir =
      "./matcha-icefall-en_US-ljspeech/espeak-ng-data";

  config.model.num_threads = 1;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  std::string filename = "./generated-matcha-en-cxx.wav";
  std::string text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar. "
      "Friends fell out often because life was changing so fast. The easiest "
      "thing in the world was to lose touch with someone.";

  auto tts = OfflineTts::Create(config);
  GenerationConfig gen_config;
  gen_config.sid = 0;
  gen_config.speed = 1.0;  // larger -> faster in speech speed
  gen_config.silence_scale = config.silence_scale;

#if 0
  // If you don't want to use a callback, then please enable this branch
  GeneratedAudio audio = tts.Generate(text, gen_config);
#else
  GeneratedAudio audio = tts.Generate(text, gen_config, ProgressCallback);
#endif

  WriteWave(filename, {audio.samples, audio.sample_rate});

  fprintf(stderr, "Input text is: %s\n", text.c_str());
  fprintf(stderr, "Speaker ID is: %d\n", gen_config.sid);
  fprintf(stderr, "Saved to: %s\n", filename.c_str());

  return 0;
}


================================================
FILE: cxx-api-examples/matcha-tts-zh-cxx-api.cc
================================================
// cxx-api-examples/matcha-tts-zh-cxx-api.cc
//
// Copyright (c)  2025  Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for Chinese TTS with MatchaTTS.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

./matcha-tts-zh-cxx-api

 */
// clang-format on

#include <cstdint>
#include <cstdio>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineTtsConfig config;
  config.model.matcha.acoustic_model =
      "./matcha-icefall-zh-baker/model-steps-3.onnx";
  config.model.matcha.vocoder = "./vocos-22khz-univ.onnx";
  config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
  config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
  config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
  config.model.num_threads = 1;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  // clang-format off
  config.rule_fsts = "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst";  // NOLINT
  // clang-format on

  std::string filename = "./generated-matcha-zh-cxx.wav";
  std::string text =
      "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如"
      "涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感"
      "受着生命的奇迹与温柔."
      "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; "
      "经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。";

  auto tts = OfflineTts::Create(config);
  GenerationConfig gen_config;
  gen_config.sid = 0;
  gen_config.speed = 1.0;  // larger -> faster in speech speed
  gen_config.silence_scale = config.silence_scale;

#if 0
  // If you don't want to use a callback, then please enable this branch
  GeneratedAudio audio = tts.Generate(text, gen_config);
#else
  GeneratedAudio audio = tts.Generate(text, gen_config, ProgressCallback);
#endif

  WriteWave(filename, {audio.samples, audio.sample_rate});

  fprintf(stderr, "Input text is: %s\n", text.c_str());
  fprintf(stderr, "Speaker ID is: %d\n", gen_config.sid);
  fprintf(stderr, "Saved to: %s\n", filename.c_str());

  return 0;
}


================================================
FILE: cxx-api-examples/medasr-ctc-cxx-api.cc
================================================
// cxx-api-examples/medasr-ctc-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use MedASR with sherpa-onnx's C++ API.
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
*/
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  // clang-format off
  config.model_config.medasr.model = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx";
  config.model_config.tokens = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav";
  // clang-format on

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/moonshine-cxx-api.cc
================================================
// cxx-api-examples/moonshine-cxx-api.cc
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use Moonshine with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
// rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.moonshine.preprocessor =
      "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
  config.model_config.moonshine.encoder =
      "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
  config.model_config.moonshine.uncached_decoder =
      "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
  config.model_config.moonshine.cached_decoder =
      "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";
  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/moonshine-v2-cxx-api.cc
================================================
// cxx-api-examples/moonshine-v2-cxx-api.cc
// Copyright (c)  2024-2026  Xiaomi Corporation

//
// This file demonstrates how to use Moonshine v2 with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
// tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
// rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  // clang-format off
  config.model_config.moonshine.encoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort";
  config.model_config.moonshine.merged_decoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort";
  config.model_config.tokens = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt";
  // clang-format on

  config.model_config.num_threads = 2;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav";
  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/nemo-canary-cxx-api.cc
================================================
// cxx-api-examples/nemo-canary-cxx-api.cc
//
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use NeMo Canary models with
// sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
// tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
// rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
//
// clang-format on
//
// see https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
// for details

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.canary.encoder =
      "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
  config.model_config.canary.decoder =
      "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";

  // our input audio is German, so we set src_lang to "de"
  config.model_config.canary.src_lang = "de";

  // we can set tgt_lang either to de or en in this specific case
  config.model_config.canary.tgt_lang = "en";
  config.model_config.tokens =
      "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav";

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text (English): " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  // now output text in German
  config.model_config.canary.tgt_lang = "de";
  recognizer.SetConfig(config);
  stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  result = recognizer.GetResult(&stream);
  std::cout << "text (German): " << result.text << "\n";

  return 0;
}


================================================
FILE: cxx-api-examples/offline-punctuation-cxx-api.cc
================================================
// cxx-api-examples/offline-punctuation-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

// To use punctuation model:
// clang-format off
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8.tar.bz2
// tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8.tar.bz2
// rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8.tar.bz2
// clang-format on

#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT

  OfflinePunctuationConfig punctuation_config;
  punctuation_config.model.ct_transformer =
      "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8/"
      "model.int8.onnx";
  punctuation_config.model.num_threads = 1;
  punctuation_config.model.debug = false;
  punctuation_config.model.provider = "cpu";

  OfflinePunctuation punct = OfflinePunctuation::Create(punctuation_config);
  if (!punct.Get()) {
    std::cerr
        << "Failed to create punctuation model. Please check your config\n";
    return -1;
  }

  std::string text = "你好吗how are you Fantasitic 谢谢我很好你怎么样呢";
  std::string text_with_punct = punct.AddPunctuation(text);
  std::cout << "Original text: " << text << std::endl;
  std::cout << "With punctuation: " << text_with_punct << std::endl;

  return 0;
}


================================================
FILE: cxx-api-examples/omnilingual-asr-ctc-cxx-api.cc
================================================
// cxx-api-examples/omnilingual-asr-ctc-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use Omnilingual ASR with sherpa-onnx's C++ API.
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
*/
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  // clang-format off
  config.model_config.omnilingual.model = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx";
  config.model_config.tokens = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav";
  // clang-format on

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/online-punctuation-cxx-api.cc
================================================
// cxx-api-examples/online-punctuation-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

// To use punctuation model:
// clang-format off
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
// tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
// rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
// clang-format on

#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT

  OnlinePunctuationConfig punctuation_config;
  punctuation_config.model.cnn_bilstm =
      "sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx";
  punctuation_config.model.bpe_vocab =
      "sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab";
  punctuation_config.model.num_threads = 1;
  punctuation_config.model.debug = false;
  punctuation_config.model.provider = "cpu";

  OnlinePunctuation punct = OnlinePunctuation::Create(punctuation_config);
  if (!punct.Get()) {
    std::cerr
        << "Failed to create punctuation model. Please check your config\n";
    return -1;
  }

  std::string text = "how are you i am fine thank you";
  std::string text_with_punct = punct.AddPunctuation(text);
  std::cout << "Original text: " << text << std::endl;
  std::cout << "With punctuation: " << text_with_punct << std::endl;

  return 0;
}


================================================
FILE: cxx-api-examples/online-speech-enhancement-dpdfnet-cxx-api.cc
================================================
// cxx-api-examples/online-speech-enhancement-dpdfnet-cxx-api.cc
//
// Copyright (c)  2026  Xiaomi Corporation
//
// We assume you have pre-downloaded the DPDFNet model and sample test wave.
// DPDFNet models are available from either:
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// https://huggingface.co/Ceva-IP/DPDFNet
//
// An example command to download:
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
*/
// clang-format on
//
// Use dpdfnet_baseline.onnx, dpdfnet2.onnx, dpdfnet4.onnx, or dpdfnet8.onnx
// for 16 kHz downstream ASR or speech recognition.
// Use dpdfnet2_48khz_hr.onnx for 48 kHz enhancement output.

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT

  OnlineSpeechDenoiserConfig config;
  std::string model_filename = "./dpdfnet_baseline.onnx";
  std::string wav_filename = "./inp_16k.wav";
  std::string out_wave_filename = "./enhanced-online-dpdfnet.wav";
  config.model.dpdfnet.model = model_filename;

  auto sd = OnlineSpeechDenoiser::Create(config);
  if (!sd.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }

  Wave wave = ReadWave(wav_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wav_filename << "'\n";
    return -1;
  }

  std::vector<float> samples;
  auto frame_shift = sd.GetFrameShiftInSamples();

  std::cout << "Started\n";
  const auto begin = std::chrono::steady_clock::now();

  for (int32_t start = 0; start < static_cast<int32_t>(wave.samples.size());
       start += frame_shift) {
    int32_t n = std::min<int32_t>(frame_shift, wave.samples.size() - start);
    auto denoised = sd.Run(wave.samples.data() + start, n, wave.sample_rate);
    samples.insert(samples.end(), denoised.samples.begin(),
                   denoised.samples.end());
  }

  auto tail = sd.Flush();
  samples.insert(samples.end(), tail.samples.begin(), tail.samples.end());

  const auto end = std::chrono::steady_clock::now();
  std::cout << "Done\n";

  WriteWave(out_wave_filename, {samples, sd.GetSampleRate()});

  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "Saved to " << out_wave_filename << "\n";
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);
  return 0;
}


================================================
FILE: cxx-api-examples/online-speech-enhancement-gtcrn-cxx-api.cc
================================================
// cxx-api-examples/online-speech-enhancement-gtcrn-cxx-api.cc
//
// Copyright (c)  2026  Xiaomi Corporation
//
// We assume you have pre-downloaded the GTCRN model and sample test wave from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// An example command to download:
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
*/
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT

  OnlineSpeechDenoiserConfig config;
  std::string model_filename = "./gtcrn_simple.onnx";
  std::string wav_filename = "./inp_16k.wav";
  std::string out_wave_filename = "./enhanced-online-gtcrn.wav";
  config.model.gtcrn.model = model_filename;

  auto sd = OnlineSpeechDenoiser::Create(config);
  if (!sd.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }

  Wave wave = ReadWave(wav_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wav_filename << "'\n";
    return -1;
  }

  std::vector<float> samples;
  auto frame_shift = sd.GetFrameShiftInSamples();

  std::cout << "Started\n";
  const auto begin = std::chrono::steady_clock::now();

  for (int32_t start = 0; start < static_cast<int32_t>(wave.samples.size());
       start += frame_shift) {
    int32_t n = std::min<int32_t>(frame_shift, wave.samples.size() - start);
    auto denoised = sd.Run(wave.samples.data() + start, n, wave.sample_rate);
    samples.insert(samples.end(), denoised.samples.begin(),
                   denoised.samples.end());
  }

  auto tail = sd.Flush();
  samples.insert(samples.end(), tail.samples.begin(), tail.samples.end());

  const auto end = std::chrono::steady_clock::now();
  std::cout << "Done\n";

  WriteWave(out_wave_filename, {samples, sd.GetSampleRate()});

  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "Saved to " << out_wave_filename << "\n";
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);
  return 0;
}


================================================
FILE: cxx-api-examples/parakeet-tdt-ctc-simulate-streaming-microphone-cxx-api.cc
================================================
// cxx-api-examples/parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use parakeet-tdt with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
// tar xvf sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
// rm sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <vector>

#include "portaudio.h"       // NOLINT
#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  samples_queue.emplace(
      reinterpret_cast<const float *>(input_buffer),
      reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  condition_variable.notify_one();

  return stop ? paComplete : paContinue;
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.25;
  config.silero_vad.min_silence_duration = 0.25;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 5;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 60);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.nemo_ctc.model =
      "./sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8/model.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8/tokens.txt";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main() {
  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  sherpa_onnx::Microphone mic;

  PaDeviceIndex num_devices = Pa_GetDeviceCount();
  if (num_devices == 0) {
    std::cerr << "  If you are using Linux, please try to modify "
                 "./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n";
    return -1;
  }

  int32_t device_index = Pa_GetDefaultInputDevice();
  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (sample_rate_str) {
    mic_sample_rate = atof(sample_rate_str);
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
  }

  float sample_rate = 16000;
  LinearResampler resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
                                        lowpass_cutoff, lowpass_filter_width);
  }
  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    std::cerr << "Failed to open microphone device\n";
    return -1;
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }
      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      if (!resampler.Get()) {
        buffer.insert(buffer.end(), s.begin(), s.end());
      } else {
        auto resampled = resampler.Resample(s.data(), s.size(), false);
        buffer.insert(buffer.end(), resampled.begin(), resampled.end());
      }

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  return 0;
}


================================================
FILE: cxx-api-examples/parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
================================================
// cxx-api-examples/parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use parakeet-tdt with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
// tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
// rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <vector>

#include "portaudio.h"       // NOLINT
#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  samples_queue.emplace(
      reinterpret_cast<const float *>(input_buffer),
      reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  condition_variable.notify_one();

  return stop ? paComplete : paContinue;
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.25;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 5;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 60);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.transducer.encoder =
      "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx";
  config.model_config.transducer.decoder =
      "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx";
  config.model_config.transducer.joiner =
      "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt";

  config.model_config.model_type = "nemo_transducer";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main() {
  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  sherpa_onnx::Microphone mic;

  PaDeviceIndex num_devices = Pa_GetDeviceCount();
  if (num_devices == 0) {
    std::cerr << "  If you are using Linux, please try "
                 "./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n";
    return -1;
  }

  int32_t device_index = Pa_GetDefaultInputDevice();
  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (sample_rate_str) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(sample_rate_str);
  }

  float sample_rate = 16000;
  LinearResampler resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
                                        lowpass_cutoff, lowpass_filter_width);
  }

  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    std::cerr << "Failed to open microphone device\n";
    return -1;
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }
      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      if (!resampler.Get()) {
        buffer.insert(buffer.end(), s.begin(), s.end());
      } else {
        auto resampled = resampler.Resample(s.data(), s.size(), false);
        buffer.insert(buffer.end(), resampled.begin(), resampled.end());
      }

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  return 0;
}


================================================
FILE: cxx-api-examples/pocket-tts-en-cxx-api.cc
================================================
// cxx-api-examples/pocket-tts-en-cxx-api.cc
//
// Copyright (c)  2026  Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for English TTS with PocketTTS.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

./pocket-tts-en-cxx-api

 */
// clang-format on

#include <cstdint>
#include <cstdio>
#include <string>
#include <utility>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineTtsConfig config;

  config.model.pocket.lm_flow =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx";
  config.model.pocket.lm_main =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx";
  config.model.pocket.encoder =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx";
  config.model.pocket.decoder =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx";
  config.model.pocket.text_conditioner =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx";
  config.model.pocket.vocab_json =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json";
  config.model.pocket.token_scores_json =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  std::string filename = "./generated-pocket-en-cxx.wav";
  std::string text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar. "
      "Friends fell out often because life was changing so fast. The easiest "
      "thing in the world was to lose touch with someone.";

  auto tts = OfflineTts::Create(config);
  GenerationConfig cfg;
  cfg.speed = 1.0;

  std::string reference_audio_file =
      "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav";

  Wave wave = ReadWave(reference_audio_file);
  cfg.reference_audio = std::move(wave.samples);
  cfg.reference_sample_rate = wave.sample_rate;
  cfg.extra["max_reference_audio_len"] = "10";

#if 0
  // If you don't want to use a callback, then please enable this branch
  GeneratedAudio audio = tts.Generate(text, cfg);
#else
  GeneratedAudio audio = tts.Generate(text, cfg, ProgressCallback);
#endif

  WriteWave(filename, {audio.samples, audio.sample_rate});

  fprintf(stderr, "Input text is: %s\n", text.c_str());
  fprintf(stderr, "Saved to: %s\n", filename.c_str());

  return 0;
}


================================================
FILE: cxx-api-examples/sense-voice-cxx-api.cc
================================================
// cxx-api-examples/sense-voice-cxx-api.cc
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use sense voice with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.sense_voice.model =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  config.model_config.sense_voice.use_itn = true;
  config.model_config.sense_voice.language = "auto";
  config.model_config.tokens =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav";

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/sense-voice-simulate-streaming-alsa-cxx-api.cc
================================================
// cxx-api-examples/sense-voice-simulate-streaming-alsa-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use sense voice with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <string>
#include <thread>  // NOLINT
#include <utility>
#include <vector>

#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/alsa.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static void RecordCallback(sherpa_onnx::Alsa *alsa) {
  int32_t chunk = 0.1 * alsa->GetActualSampleRate();
  while (!stop) {
    std::vector<float> samples = alsa->Read(chunk);

    std::lock_guard<std::mutex> lock(mutex);
    samples_queue.emplace(std::move(samples));
    condition_variable.notify_one();
  }
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.1;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 8;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.sense_voice.model =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  config.model_config.sense_voice.use_itn = false;
  config.model_config.sense_voice.language = "auto";
  config.model_config.tokens =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main(int32_t argc, const char *argv[]) {
  const char *kUsageMessage = R"usage(
Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

./sense-voice-simulate-streaming-alsa-cxx-api device_name

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";

  if (argc != 2) {
    fprintf(stderr, "%s\n", kUsageMessage);
    return -1;
  }

  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  int32_t expected_sample_rate = 16000;

  std::string device_name = argv[1];
  sherpa_onnx::Alsa alsa(device_name.c_str());
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::thread record_thread(RecordCallback, &alsa);

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }
      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      buffer.insert(buffer.end(), s.begin(), s.end());

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(expected_sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(expected_sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  record_thread.join();

  return 0;
}


================================================
FILE: cxx-api-examples/sense-voice-simulate-streaming-microphone-cxx-api.cc
================================================
// cxx-api-examples/sense-voice-simulate-streaming-microphone-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use sense voice with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <vector>

#include "portaudio.h"       // NOLINT
#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  samples_queue.emplace(
      reinterpret_cast<const float *>(input_buffer),
      reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  condition_variable.notify_one();

  return stop ? paComplete : paContinue;
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.1;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 8;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.sense_voice.model =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  config.model_config.sense_voice.use_itn = false;
  config.model_config.sense_voice.language = "auto";
  config.model_config.tokens =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main() {
  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  sherpa_onnx::Microphone mic;

  PaDeviceIndex num_devices = Pa_GetDeviceCount();
  if (num_devices == 0) {
    std::cerr << "  If you are using Linux, please try "
                 "./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n";
    return -1;
  }

  int32_t device_index = Pa_GetDefaultInputDevice();
  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (sample_rate_str) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(sample_rate_str);
  }
  float sample_rate = 16000;
  LinearResampler resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
                                        lowpass_cutoff, lowpass_filter_width);
  }
  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    std::cerr << "Failed to open microphone device\n";
    return -1;
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }

      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      if (!resampler.Get()) {
        buffer.insert(buffer.end(), s.begin(), s.end());
      } else {
        auto resampled = resampler.Resample(s.data(), s.size(), false);
        buffer.insert(buffer.end(), resampled.begin(), resampled.end());
      }

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  return 0;
}


================================================
FILE: cxx-api-examples/sense-voice-with-hr-cxx-api.cc
================================================
// cxx-api-examples/sense-voice-with-hr-cxx-api.cc
//
// Copyright (c)  2024-2025  Xiaomi Corporation

//
// This file demonstrates how to use sense voice with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
// tar xf dict.tar.bz2
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.sense_voice.model =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
  config.model_config.sense_voice.use_itn = true;
  config.model_config.sense_voice.language = "auto";
  config.model_config.tokens =
      "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";
  config.hr.dict_dir = "./dict";
  config.hr.lexicon = "./lexicon.txt";

  // Please see
  // https://colab.research.google.com/drive/1jEaS3s8FbRJIcVQJv2EQx19EM_mnuARi?usp=sharing
  // for how to generate your own replace.fst
  config.hr.rule_fsts = "./replace.fst";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename = "./test-hr.wav";

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/sherpa-display.h
================================================
// cxx-api-examples/sherpa-display.cc
// Copyright (c)  2025  Xiaomi Corporation
#pragma once

#include <stdlib.h>

#include <cstdio>
#include <ctime>
#include <iomanip>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

namespace sherpa_onnx::cxx {

class SherpaDisplay {
 public:
  void UpdateText(const std::string &text) { current_text_ = text; }

  void FinalizeCurrentSentence() {
    if (!current_text_.empty() &&
        (current_text_[0] != ' ' || current_text_.size() > 1)) {
      sentences_.push_back({GetCurrentDateTime(), std::move(current_text_)});
    }
  }

  void Display() const {
    if (!sentences_.empty() || !current_text_.empty()) {
      ClearScreen();
    }

    printf("=== Speech Recognition with Next-gen Kaldi ===\n");
    printf("------------------------------\n");
    if (!sentences_.empty()) {
      int32_t i = 1;
      for (const auto &p : sentences_) {
        printf("[%s] %d. %s\n", p.first.c_str(), i, p.second.c_str());
        i += 1;
      }

      printf("------------------------------\n");
    }

    if (!current_text_.empty()) {
      printf("Recognizing: %s\n", current_text_.c_str());
    }
  }

 private:
  static void ClearScreen() {
#ifdef _MSC_VER
    auto ret = system("cls");
#else
    auto ret = system("clear");
#endif
    (void)ret;
  }

  static std::string GetCurrentDateTime() {
    std::ostringstream os;
    auto t = std::time(nullptr);
    auto tm = std::localtime(&t);
    os << std::put_time(tm, "%Y-%m-%d %H:%M:%S");
    return os.str();
  }

 private:
  std::vector<std::pair<std::string, std::string>> sentences_;
  std::string current_text_;
};

}  // namespace sherpa_onnx::cxx


================================================
FILE: cxx-api-examples/speech-enhancement-dpdfnet-cxx-api.cc
================================================
// cxx-api-examples/speech-enhancement-dpdfnet-cxx-api.cc
//
// Copyright (c)  2026  Xiaomi Corporation
//
// We assume you have pre-downloaded the DPDFNet model and sample test wave.
// DPDFNet models are available from either:
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// https://huggingface.co/Ceva-IP/DPDFNet
//
// An example command to download:
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
*/
// clang-format on
//
// Use dpdfnet_baseline.onnx, dpdfnet2.onnx, dpdfnet4.onnx, or dpdfnet8.onnx
// for 16 kHz downstream ASR or speech recognition.
// Use dpdfnet2_48khz_hr.onnx for 48 kHz enhancement output.

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT

  OfflineSpeechDenoiserConfig config;
  std::string model_filename = "./dpdfnet_baseline.onnx";
  std::string wav_filename = "./inp_16k.wav";
  std::string out_wave_filename = "./enhanced-dpdfnet.wav";
  config.model.dpdfnet.model = model_filename;

  auto sd = OfflineSpeechDenoiser::Create(config);
  if (!sd.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }

  Wave wave = ReadWave(wav_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wav_filename << "'\n";
    return -1;
  }

  std::cout << "Started\n";
  const auto begin = std::chrono::steady_clock::now();
  auto denoised =
      sd.Run(wave.samples.data(), wave.samples.size(), wave.sample_rate);
  const auto end = std::chrono::steady_clock::now();
  std::cout << "Done\n";

  WriteWave(out_wave_filename, {denoised.samples, denoised.sample_rate});

  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "Saved to " << out_wave_filename << "\n";
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);
  return 0;
}


================================================
FILE: cxx-api-examples/speech-enhancement-gtcrn-cxx-api.cc
================================================
// cxx-api-examples/speech-enhancement-gtcrn-cxx-api.cc
//
// Copyright (c)  2026  Xiaomi Corporation
//
// We assume you have pre-downloaded the GTCRN model and sample test wave from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// An example command to download:
// clang-format off
/*
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
*/
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT

  OfflineSpeechDenoiserConfig config;
  std::string model_filename = "./gtcrn_simple.onnx";
  std::string wav_filename = "./inp_16k.wav";
  std::string out_wave_filename = "./enhanced-gtcrn.wav";
  config.model.gtcrn.model = model_filename;

  auto sd = OfflineSpeechDenoiser::Create(config);
  if (!sd.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }

  Wave wave = ReadWave(wav_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wav_filename << "'\n";
    return -1;
  }

  std::cout << "Started\n";
  const auto begin = std::chrono::steady_clock::now();
  auto denoised =
      sd.Run(wave.samples.data(), wave.samples.size(), wave.sample_rate);
  const auto end = std::chrono::steady_clock::now();
  std::cout << "Done\n";

  WriteWave(out_wave_filename, {denoised.samples, denoised.sample_rate});

  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "Saved to " << out_wave_filename << "\n";
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);
  return 0;
}


================================================
FILE: cxx-api-examples/streaming-t-one-ctc-cxx-api.cc
================================================
// cxx-api-examples/streaming-t-one-ctc-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use streaming T-one
// with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
// tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
// rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OnlineRecognizerConfig config;

  // please see
  config.model_config.t_one_ctc.model =
      "sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx";

  config.model_config.tokens =
      "sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OnlineRecognizer recognizer = OnlineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav";

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OnlineStream stream = recognizer.CreateStream();
  std::vector<float> left_padding(2400);  // 0.3 seconds at 8kHz
  std::vector<float> tail_padding(4800);  // 0.6 seconds at 8kHz

  stream.AcceptWaveform(wave.sample_rate, left_padding.data(),
                        left_padding.size());
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());
  stream.AcceptWaveform(wave.sample_rate, tail_padding.data(),
                        tail_padding.size());
  stream.InputFinished();

  while (recognizer.IsReady(&stream)) {
    recognizer.Decode(&stream);
  }

  OnlineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/streaming-zipformer-cxx-api.cc
================================================
// cxx-api-examples/streaming-zipformer-cxx-api.cc
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use streaming Zipformer
// with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OnlineRecognizerConfig config;

  // please see
  // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
  config.model_config.transducer.encoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "encoder-epoch-99-avg-1.int8.onnx";

  // Note: We recommend not using int8.onnx for the decoder.
  config.model_config.transducer.decoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "decoder-epoch-99-avg-1.onnx";

  config.model_config.transducer.joiner =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "joiner-epoch-99-avg-1.int8.onnx";

  config.model_config.tokens =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OnlineRecognizer recognizer = OnlineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/"
      "0.wav";
  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OnlineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());
  stream.InputFinished();

  while (recognizer.IsReady(&stream)) {
    recognizer.Decode(&stream);
  }

  OnlineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc
================================================
// cxx-api-examples/streaming-zipformer-rtf-cxx-api.cc
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use streaming Zipformer
// with sherpa-onnx's C++ API.
//
// clang-format off
//
// cd /path/sherpa-onnx/
// mkdir build
// cd build
// cmake ..
// make
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
//
// #  1. Test on CPU, run once
//
// ./bin/streaming-zipformer-rtf-cxx-api
//
// #  2. Test on CPU, run 10 times
//
// ./bin/streaming-zipformer-rtf-cxx-api 10
//
// #  3. Test on GPU, run 10 times
//
// ./bin/streaming-zipformer-rtf-cxx-api 10 cuda
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main(int argc, char *argv[]) {
  int32_t num_runs = 1;
  if (argc >= 2) {
    num_runs = atoi(argv[1]);
    if (num_runs < 0) {
      num_runs = 1;
    }
  }

  bool use_gpu = (argc == 3);

  using namespace sherpa_onnx::cxx;  // NOLINT
  OnlineRecognizerConfig config;

  // please see
  // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
  config.model_config.transducer.encoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "encoder-epoch-99-avg-1.int8.onnx";

  // Note: We recommend not using int8.onnx for the decoder.
  config.model_config.transducer.decoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "decoder-epoch-99-avg-1.onnx";

  config.model_config.transducer.joiner =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "joiner-epoch-99-avg-1.int8.onnx";

  config.model_config.tokens =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";

  config.model_config.num_threads = 1;
  config.model_config.provider = use_gpu ? "cuda" : "cpu";

  std::cout << "Loading model\n";
  OnlineRecognizer recognizer = OnlineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/"
      "0.wav";
  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  float total_elapsed_seconds = 0;
  OnlineRecognizerResult result;
  for (int32_t i = 0; i < num_runs; ++i) {
    const auto begin = std::chrono::steady_clock::now();

    OnlineStream stream = recognizer.CreateStream();
    stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                          wave.samples.size());
    stream.InputFinished();

    while (recognizer.IsReady(&stream)) {
      recognizer.Decode(&stream);
    }

    result = recognizer.GetResult(&stream);

    auto end = std::chrono::steady_clock::now();
    float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
            .count() /
        1000.;
    printf("Run %d/%d, elapsed seconds: %.3f\n", i, num_runs, elapsed_seconds);
    total_elapsed_seconds += elapsed_seconds;
  }
  float average_elapsed_secodns = total_elapsed_seconds / num_runs;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = total_elapsed_seconds / num_runs / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Total Elapsed seconds: %.3fs\n", total_elapsed_seconds);
  printf("Num runs: %d\n", num_runs);
  printf("Elapsed seconds per run: %.3f/%d=%.3f\n", total_elapsed_seconds,
         num_runs, average_elapsed_secodns);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n",
         average_elapsed_secodns, duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/streaming-zipformer-with-hr-cxx-api.cc
================================================
// cxx-api-examples/streaming-zipformer-with-hr-cxx-api.cc
// Copyright (c)  2024-2025  Xiaomi Corporation

//
// This file demonstrates how to use streaming Zipformer
// with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
// tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
// rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
// tar xf dict.tar.bz2
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OnlineRecognizerConfig config;

  // please see
  // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
  config.model_config.transducer.encoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "encoder-epoch-99-avg-1.int8.onnx";

  // Note: We recommend not using int8.onnx for the decoder.
  config.model_config.transducer.decoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "decoder-epoch-99-avg-1.onnx";

  config.model_config.transducer.joiner =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
      "joiner-epoch-99-avg-1.int8.onnx";

  config.model_config.tokens =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";

  config.model_config.num_threads = 1;

  config.hr.dict_dir = "./dict";
  config.hr.lexicon = "./lexicon.txt";

  // Please see
  // https://colab.research.google.com/drive/1jEaS3s8FbRJIcVQJv2EQx19EM_mnuARi?usp=sharing
  // for how to generate your own replace.fst
  config.hr.rule_fsts = "./replace.fst";

  std::cout << "Loading model\n";
  OnlineRecognizer recognizer = OnlineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename = "./test-hr.wav";
  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OnlineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());
  stream.InputFinished();

  while (recognizer.IsReady(&stream)) {
    recognizer.Decode(&stream);
  }

  OnlineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/supertonic-tts-en-cxx-api.cc
================================================
// cxx-api-examples/supertonic-tts-en-cxx-api.cc
//
// Copyright (c)  2026  zengyw

// This file shows how to use sherpa-onnx CXX API
// for English TTS with Supertonic.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

./supertonic-tts-en-cxx-api

*/
// clang-format on

#include <cstdint>
#include <cstdio>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineTtsConfig config;

  config.model.supertonic.duration_predictor =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/"
      "duration_predictor.int8.onnx";
  config.model.supertonic.text_encoder =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx";
  config.model.supertonic.vector_estimator =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx";
  config.model.supertonic.vocoder =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx";
  config.model.supertonic.tts_json =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json";
  config.model.supertonic.unicode_indexer =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin";
  config.model.supertonic.voice_style =
      "./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin";

  config.model.num_threads = 2;

  // If you don't want to see debug messages, please set it to 0
  config.model.debug = 1;

  std::string filename = "./generated-supertonic-en-cxx.wav";
  std::string text =
      "Today as always, men fall into two groups: slaves and free men. Whoever "
      "does not have two-thirds of his day for himself, is a slave, whatever "
      "he may be: a statesman, a businessman, an official, or a scholar.";

  auto tts = OfflineTts::Create(config);

  GenerationConfig gen_config;
  gen_config.sid = 6;
  gen_config.num_steps = 5;
  gen_config.speed = 1.25;  // larger -> faster
  gen_config.extra["lang"] = "en";

  // Use GenerationConfig for Supertonic.
  GeneratedAudio audio = tts.Generate(text, gen_config, ProgressCallback);

  WriteWave(filename, {audio.samples, audio.sample_rate});

  fprintf(stderr, "Input text is: %s\n", text.c_str());
  fprintf(stderr, "Saved to: %s\n", filename.c_str());

  return 0;
}


================================================
FILE: cxx-api-examples/vad-cxx-api.cc
================================================
// cxx-api-examples/vad-cxx-api.cc
//
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use VAD to remove silences from a file
// clang-format off
//
// To use silero-vad:
//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// To use ten-vad:
//  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
//
// clang-format on
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT

  std::string wave_filename = "./lei-jun-test.wav";
  if (!FileExists(wave_filename)) {
    fprintf(stderr, "Please download %s\n", wave_filename.c_str());
    return -1;
  }

  std::string vad_filename;
  bool use_silero_vad = false;
  bool use_ten_vad = false;

  if (FileExists("./silero_vad.onnx")) {
    printf("Use silero-vad\n");
    vad_filename = "./silero_vad.onnx";
    use_silero_vad = true;
  } else if (FileExists("./ten-vad.onnx")) {
    printf("Use ten-vad\n");
    vad_filename = "./ten-vad.onnx";
    use_ten_vad = true;
  } else {
    fprintf(stderr, "Please provide either silero_vad.onnx or ten-vad.onnx\n");
    return -1;
  }

  VadModelConfig config;
  if (use_silero_vad) {
    config.silero_vad.model = vad_filename;
    config.silero_vad.threshold = 0.3;
    config.silero_vad.min_silence_duration = 0.5;
    config.silero_vad.min_speech_duration = 0.25;
    config.silero_vad.max_speech_duration = 20;
    config.silero_vad.window_size = 512;
  } else if (use_ten_vad) {
    config.ten_vad.model = vad_filename;
    config.ten_vad.threshold = 0.3;
    config.ten_vad.min_silence_duration = 0.5;
    config.ten_vad.min_speech_duration = 0.25;
    config.ten_vad.max_speech_duration = 20;
    config.ten_vad.window_size = 256;
  }

  config.sample_rate = 16000;
  config.debug = true;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    return -1;
  }

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }
  bool is_eof = false;
  int32_t i = 0;
  int32_t window_size = use_silero_vad ? config.silero_vad.window_size
                                       : config.ten_vad.window_size;

  int32_t sample_rate = config.sample_rate;

  std::vector<float> samples_without_silence;

  while (!is_eof) {
    if (i + window_size < wave.samples.size()) {
      vad.AcceptWaveform(wave.samples.data() + i, window_size);
      i += window_size;
    } else {
      is_eof = true;
      vad.Flush();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();
      float start_time = segment.start / static_cast<float>(sample_rate);
      float end_time =
          start_time + segment.samples.size() / static_cast<float>(sample_rate);
      printf("%.3f -- %.3f\n", start_time, end_time);

      samples_without_silence.insert(samples_without_silence.end(),
                                     segment.samples.begin(),
                                     segment.samples.end());

      vad.Pop();
    }
  }

  bool ok = WriteWave("./lei-jun-test-no-silence.wav",
                      {samples_without_silence, sample_rate});
  if (ok) {
    std::cout << "Saved to ./lei-jun-test-no-silence.wav\n";
  } else {
    std::cerr << "Failed to write ./lei-jun-test-no-silence.wav\n";
  }

  return 0;
}


================================================
FILE: cxx-api-examples/wenet-ctc-cxx-api.cc
================================================
// cxx-api-examples/wenet-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use Wenet CTC with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  // clang-format off
  config.model_config.wenet_ctc.model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
  config.model_config.tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";
  // clang-format on

  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/wenet-ctc-simulate-streaming-microphone-cxx-api.cc
================================================
// cxx-api-examples/wenet-ctc-simulate-streaming-microphone-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use Wenet CTC with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <vector>

#include "portaudio.h"       // NOLINT
#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  samples_queue.emplace(
      reinterpret_cast<const float *>(input_buffer),
      reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  condition_variable.notify_one();

  return stop ? paComplete : paContinue;
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.1;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 8;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  // clang-format off
  config.model_config.wenet_ctc.model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
  config.model_config.tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
  // clang-format on

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main() {
  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  sherpa_onnx::Microphone mic;

  PaDeviceIndex num_devices = Pa_GetDeviceCount();
  if (num_devices == 0) {
    std::cerr << "  If you are using Linux, please try "
                 "./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n";
    return -1;
  }

  int32_t device_index = Pa_GetDefaultInputDevice();
  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (sample_rate_str) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(sample_rate_str);
  }
  float sample_rate = 16000;
  LinearResampler resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
                                        lowpass_cutoff, lowpass_filter_width);
  }
  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    std::cerr << "Failed to open microphone device\n";
    return -1;
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }

      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      if (!resampler.Get()) {
        buffer.insert(buffer.end(), s.begin(), s.end());
      } else {
        auto resampled = resampler.Resample(s.data(), s.size(), false);
        buffer.insert(buffer.end(), resampled.begin(), resampled.end());
      }

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  return 0;
}


================================================
FILE: cxx-api-examples/whisper-cxx-api.cc
================================================
// cxx-api-examples/whisper-cxx-api.cc
// Copyright (c)  2024  Xiaomi Corporation

//
// This file demonstrates how to use whisper with sherpa-onnx's C++ API.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
// rm sherpa-onnx-whisper-tiny.en.tar.bz2
//
// clang-format on

#include <chrono>  // NOLINT
#include <cstdio>
#include <iostream>
#include <string>

#include "sherpa-onnx/c-api/cxx-api.h"

int32_t main() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.whisper.encoder =
      "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
  config.model_config.whisper.decoder =
      "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";

  config.model_config.num_threads = 1;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    return -1;
  }
  std::cout << "Loading model done\n";

  std::string wave_filename = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav";
  Wave wave = ReadWave(wave_filename);
  if (wave.samples.empty()) {
    std::cerr << "Failed to read: '" << wave_filename << "'\n";
    return -1;
  }

  std::cout << "Start recognition\n";
  const auto begin = std::chrono::steady_clock::now();

  OfflineStream stream = recognizer.CreateStream();
  stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
                        wave.samples.size());

  recognizer.Decode(&stream);

  OfflineRecognizerResult result = recognizer.GetResult(&stream);

  const auto end = std::chrono::steady_clock::now();
  const float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  float rtf = elapsed_seconds / duration;

  std::cout << "text: " << result.text << "\n";
  printf("Number of threads: %d\n", config.model_config.num_threads);
  printf("Duration: %.3fs\n", duration);
  printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
         duration, rtf);

  return 0;
}


================================================
FILE: cxx-api-examples/zipformer-ctc-simulate-streaming-alsa-cxx-api.cc
================================================
// cxx-api-examples/zipformer-ctc-simulate-streaming-alsa-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use zipformer CTC with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
// tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
// rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <string>
#include <thread>  // NOLINT
#include <utility>
#include <vector>

#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/alsa.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static void RecordCallback(sherpa_onnx::Alsa *alsa) {
  int32_t chunk = 0.1 * alsa->GetActualSampleRate();
  while (!stop) {
    std::vector<float> samples = alsa->Read(chunk);

    std::lock_guard<std::mutex> lock(mutex);
    samples_queue.emplace(std::move(samples));
    condition_variable.notify_one();
  }
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.1;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 8;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.zipformer_ctc.model =
      "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main(int32_t argc, const char *argv[]) {
  const char *kUsageMessage = R"usage(
Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

./zipformer-ctc-simulate-streaming-alsa-cxx-api device_name

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";

  if (argc != 2) {
    fprintf(stderr, "%s\n", kUsageMessage);
    return -1;
  }

  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  int32_t expected_sample_rate = 16000;

  std::string device_name = argv[1];
  sherpa_onnx::Alsa alsa(device_name.c_str());
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::thread record_thread(RecordCallback, &alsa);

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }
      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      buffer.insert(buffer.end(), s.begin(), s.end());

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(expected_sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(expected_sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  record_thread.join();

  return 0;
}


================================================
FILE: cxx-api-examples/zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
================================================
// cxx-api-examples/zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation

//
// This file demonstrates how to use Zipformer CTC with sherpa-onnx's C++ API
// for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
// tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
// rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <vector>

#include "portaudio.h"       // NOLINT
#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  samples_queue.emplace(
      reinterpret_cast<const float *>(input_buffer),
      reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  condition_variable.notify_one();

  return stop ? paComplete : paContinue;
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.1;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 8;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.zipformer_ctc.model =
      "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main() {
  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  sherpa_onnx::Microphone mic;

  PaDeviceIndex num_devices = Pa_GetDeviceCount();
  if (num_devices == 0) {
    std::cerr << "  If you are using Linux, please try "
                 "./build/bin/zipformer-ctc-simulate-streaming-alsa-cxx-api\n";
    return -1;
  }

  int32_t device_index = Pa_GetDefaultInputDevice();
  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (sample_rate_str) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(sample_rate_str);
  }
  float sample_rate = 16000;
  LinearResampler resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
                                        lowpass_cutoff, lowpass_filter_width);
  }
  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    std::cerr << "Failed to open microphone device\n";
    return -1;
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }
      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      if (!resampler.Get()) {
        buffer.insert(buffer.end(), s.begin(), s.end());
      } else {
        auto resampled = resampler.Resample(s.data(), s.size(), false);
        buffer.insert(buffer.end(), resampled.begin(), resampled.end());
      }

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  return 0;
}


================================================
FILE: cxx-api-examples/zipformer-transducer-simulate-streaming-microphone-cxx-api.cc
================================================
// cxx-api-examples/zipformer-transducer-simulate-streaming-microphone-cxx-api.cc
// Copyright (c)  2025  Xiaomi Corporation
//
// This file demonstrates how to use Zipformer transducer with sherpa-onnx's C++
// API for streaming speech recognition from a microphone.
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
// tar xvf sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
// rm sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2
//
// clang-format on

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <iostream>
#include <mutex>  // NOLINT
#include <queue>
#include <vector>

#include "portaudio.h"       // NOLINT
#include "sherpa-display.h"  // NOLINT
#include "sherpa-onnx/c-api/cxx-api.h"
#include "sherpa-onnx/csrc/microphone.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  samples_queue.emplace(
      reinterpret_cast<const float *>(input_buffer),
      reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  condition_variable.notify_one();

  return stop ? paComplete : paContinue;
}

static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  VadModelConfig config;
  config.silero_vad.model = "./silero_vad.onnx";
  config.silero_vad.threshold = 0.5;
  config.silero_vad.min_silence_duration = 0.1;
  config.silero_vad.min_speech_duration = 0.25;
  config.silero_vad.max_speech_duration = 8;
  config.sample_rate = 16000;
  config.debug = false;

  VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  if (!vad.Get()) {
    std::cerr << "Failed to create VAD. Please check your config\n";
    exit(-1);
  }

  return vad;
}

static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineRecognizerConfig config;

  config.model_config.transducer.encoder =
      "./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
      "encoder-epoch-99-avg-1.int8.onnx";

  config.model_config.transducer.decoder =
      "./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
      "decoder-epoch-99-avg-1.onnx";

  config.model_config.transducer.joiner =
      "./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/"
      "joiner-epoch-99-avg-1.int8.onnx";
  config.model_config.tokens =
      "./sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01/tokens.txt";

  config.model_config.num_threads = 2;
  config.model_config.debug = false;

  std::cout << "Loading model\n";
  OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  if (!recognizer.Get()) {
    std::cerr << "Please check your config\n";
    exit(-1);
  }
  std::cout << "Loading model done\n";
  return recognizer;
}

int32_t main() {
  signal(SIGINT, Handler);

  using namespace sherpa_onnx::cxx;  // NOLINT

  auto vad = CreateVad();
  auto recognizer = CreateOfflineRecognizer();

  sherpa_onnx::Microphone mic;

  PaDeviceIndex num_devices = Pa_GetDeviceCount();
  if (num_devices == 0) {
    std::cerr << "  If you are using Linux, please try "
                 "./build/bin/zipformer-ctc-simulate-streaming-alsa-cxx-api\n";
    return -1;
  }

  int32_t device_index = Pa_GetDefaultInputDevice();
  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (sample_rate_str) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(sample_rate_str);
  }
  float sample_rate = 16000;
  LinearResampler resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
                                        lowpass_cutoff, lowpass_filter_width);
  }
  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    std::cerr << "Failed to open microphone device\n";
    return -1;
  }

  int32_t window_size = 512;  // samples, please don't change

  int32_t offset = 0;
  std::vector<float> buffer;
  bool speech_started = false;

  auto started_time = std::chrono::steady_clock::now();

  SherpaDisplay display;

  std::cout << "Started! Please speak\n";

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }
      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      if (!resampler.Get()) {
        buffer.insert(buffer.end(), s.begin(), s.end());
      } else {
        auto resampled = resampler.Resample(s.data(), s.size(), false);
        buffer.insert(buffer.end(), resampled.begin(), resampled.end());
      }

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad.AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad.IsDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }
    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad.IsEmpty()) {
      auto segment = vad.Front();

      vad.Pop();

      OfflineStream stream = recognizer.CreateStream();
      stream.AcceptWaveform(sample_rate, segment.samples.data(),
                            segment.samples.size());

      recognizer.Decode(&stream);

      OfflineRecognizerResult result = recognizer.GetResult(&stream);

      display.UpdateText(result.text);
      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  return 0;
}


================================================
FILE: cxx-api-examples/zipvoice-tts-zh-en-cxx-api.cc
================================================
// cxx-api-examples/zipvoice-tts-zh-en-cxx-api.cc
//
// Copyright (c)  2026  Xiaomi Corporation

// This file shows how to use sherpa-onnx CXX API
// for Chinese/English zero-shot TTS with ZipVoice.
//
// clang-format off
/*
Usage

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

./zipvoice-tts-zh-en-cxx-api
*/
// clang-format on

#include <cstdint>
#include <cstdio>
#include <string>
#include <utility>

#include "sherpa-onnx/c-api/cxx-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
                                float progress, void *arg) {
  fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  // return 1 to continue generating
  // return 0 to stop generating
  return 1;
}

int32_t main(int32_t argc, char *argv[]) {
  using namespace sherpa_onnx::cxx;  // NOLINT
  OfflineTtsConfig config;

  config.model.zipvoice.encoder =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx";
  config.model.zipvoice.decoder =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx";
  config.model.zipvoice.data_dir =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data";
  config.model.zipvoice.lexicon =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt";
  config.model.zipvoice.tokens =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt";
  config.model.zipvoice.vocoder = "./vocos_24khz.onnx";

  config.model.num_threads = 2;

  // If you want to see debug messages, please set it to 1
  config.model.debug = 0;

  std::string filename = "./generated-zipvoice-zh-en-cxx.wav";
  std::string text =
      "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, "
      "就是全心投入并享受其中.";
  std::string reference_text =
      "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.";
  std::string reference_audio_file =
      "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav";

  auto tts = OfflineTts::Create(config);

  GenerationConfig gen_config;
  gen_config.speed = 1.0;
  gen_config.num_steps = 4;
  gen_config.reference_text = reference_text;
  gen_config.extra["min_char_in_sentence"] = "10";

  Wave wave = ReadWave(reference_audio_file);
  gen_config.reference_audio = std::move(wave.samples);
  gen_config.reference_sample_rate = wave.sample_rate;

#if 0
  // If you don't want to use a callback, then please enable this branch
  GeneratedAudio audio = tts.Generate(text, gen_config);
#else
  GeneratedAudio audio = tts.Generate(text, gen_config, ProgressCallback);
#endif

  WriteWave(filename, {audio.samples, audio.sample_rate});

  fprintf(stderr, "Input text is: %s\n", text.c_str());
  fprintf(stderr, "Saved to: %s\n", filename.c_str());

  return 0;
}


================================================
FILE: dart-api-examples/.gitignore
================================================
!run*.sh
# See https://www.dartlang.org/guides/libraries/private-files

# Files and directories created by pub
.dart_tool/
.packages
build/
# If you're building an application, you may want to check-in your pubspec.lock
pubspec.lock

# Directory created by dartdoc
# If you don't generate documentation locally you can remove this line.
doc/api/

# dotenv environment variables file
.env*

# Avoid committing generated Javascript files:
*.dart.js
*.info.json      # Produced by the --dump-info flag.
*.js             # When generated by dart2js. Don't specify *.js if your
                 # project includes source files written in JavaScript.
*.js_
*.js.deps
*.js.map

.flutter-plugins
.flutter-plugins-dependencies


================================================
FILE: dart-api-examples/README.md
================================================
# Introduction

This directory contains examples for Dart API.

You can find the package at
https://pub.dev/packages/sherpa_onnx

## Description

| Directory | Description |
|-----------|-------------|
| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.|
| [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.|
| [./audio-tagging](./audio-tagging)| Example for audio tagging.|
| [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
| [./non-streaming-asr](./non-streaming-asr)| Example for non-streaming speech recognition|
| [./speaker-identification](./speaker-identification)| Example for speaker identification and verification.|
| [./streaming-asr](./streaming-asr)| Example for streaming speech recognition|
| [./tts](./tts)| Example for text to speech|
| [./vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| Example for voice activity detection with non-streaming speech recognition. You can use it to generate subtitles.|
| [./vad](./vad)| Example for voice activity detection|
| [./speech-enhancement-gtcrn](./speech-enhancement-gtcrn)| Example for speech enhancement/denoising with GTCRN.|
| [./speech-enhancement-dpdfnet](./speech-enhancement-dpdfnet)| Example for speech enhancement/denoising with DPDFNet, including the 16 kHz family (`dpdfnet_baseline`, `dpdfnet2`, `dpdfnet4`, `dpdfnet8`).|
| [./streaming-speech-enhancement-gtcrn](./streaming-speech-enhancement-gtcrn)| Example for streaming speech enhancement/denoising with GTCRN.|
| [./streaming-speech-enhancement-dpdfnet](./streaming-speech-enhancement-dpdfnet)| Example for streaming speech enhancement/denoising with DPDFNet.|

## How to create an example in this folder

```bash
dart create vad
cd vad

# Edit pubspec.yaml and add sherpa_onnx to dependencies

dart pub get
dart run
```


================================================
FILE: dart-api-examples/add-punctuations/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/add-punctuations/README.md
================================================
# Introduction

This example shows how to use the Dart API from sherpa-onnx to add punctuations to text.

| File | Description|
|------|------------|
|[./bin/punctuations.dart](./bin/punctuations.dart)| Use a [CT Transformer model](https://modelscope.cn/models/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary) to add punctuations to text. See [./run-ct-transformer.sh](./run-ct-transformer.sh)|


================================================
FILE: dart-api-examples/add-punctuations/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/add-punctuations/bin/punctuations.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()..addOption('model', help: 'Path to model.onnx');

  final res = parser.parse(arguments);
  if (res['model'] == null) {
    print(parser.usage);
    exit(1);
  }

  final modelFile = res['model'] as String;
  final modelConfig = sherpa_onnx.OfflinePunctuationModelConfig(
    ctTransformer: modelFile,
    numThreads: 1,
    provider: 'cpu',
    debug: false,
  );

  final config = sherpa_onnx.OfflinePunctuationConfig(model: modelConfig);

  final punct = sherpa_onnx.OfflinePunctuation(config: config);

  final texts = [
    '这是一个测试你好吗How are you我很好thank you are you ok谢谢你',
    '我们都是木头人不会说话不会动',
    'The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry',
  ];

  for (final t in texts) {
    final textWithPunct = punct.addPunct(t);
    print('----------');
    print('Before: $t');
    print('After: $textWithPunct');
  }
  print('----------');

  punct.free();
}


================================================
FILE: dart-api-examples/add-punctuations/pubspec.yaml
================================================
name: add_punctuations

description: >
  This example demonstrates how to use the Dart API to add punctuations to text.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx: ^1.12.31
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/add-punctuations/run-ct-transformer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [[ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
fi

dart run \
  ./bin/punctuations.dart \
  --model ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx


================================================
FILE: dart-api-examples/audio-tagging/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/audio-tagging/README.md
================================================
# Introduction

This example shows how to use the Dart API from sherpa-onnx for audio tagging.

| File | Description|
|------|------------|
|[./bin/zipformer.dart](./bin/zipformer.dart)| Use a Zipformer model for audio tagging. See [./run-zipformer.sh](./run-zipformer.sh)|
|[./bin/ced.dart](./bin/ced.dart)| Use a [CED](https://github.com/RicherMans/CED) model for audio tagging. See [./run-ced.sh](./run-ced.sh)|


================================================
FILE: dart-api-examples/audio-tagging/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/audio-tagging/bin/ced.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the zipformer model')
    ..addOption('labels', help: 'Path to class_labels_indices.csv')
    ..addOption('top-k', help: 'topK events to be returned', defaultsTo: '5')
    ..addOption('wav', help: 'Path to test.wav to be tagged');

  final res = parser.parse(arguments);
  if (res['model'] == null || res['labels'] == null || res['wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final labels = res['labels'] as String;
  final topK = int.tryParse(res['top-k'] as String) ?? 5;
  final wav = res['wav'] as String;

  final modelConfig = sherpa_onnx.AudioTaggingModelConfig(
    ced: model,
    numThreads: 1,
    debug: true,
    provider: 'cpu',
  );

  final config = sherpa_onnx.AudioTaggingConfig(
    model: modelConfig,
    labels: labels,
  );

  final at = sherpa_onnx.AudioTagging(config: config);

  final waveData = sherpa_onnx.readWave(wav);

  final stream = at.createStream();
  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);

  final events = at.compute(stream: stream, topK: topK);

  print(events);

  stream.free();
  at.free();
}


================================================
FILE: dart-api-examples/audio-tagging/bin/zipformer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the zipformer model')
    ..addOption('labels', help: 'Path to class_labels_indices.csv')
    ..addOption('top-k', help: 'topK events to be returned', defaultsTo: '5')
    ..addOption('wav', help: 'Path to test.wav to be tagged');

  final res = parser.parse(arguments);
  if (res['model'] == null || res['labels'] == null || res['wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final labels = res['labels'] as String;
  final topK = int.tryParse(res['top-k'] as String) ?? 5;
  final wav = res['wav'] as String;

  final zipformerModelConfig =
      sherpa_onnx.OfflineZipformerAudioTaggingModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.AudioTaggingModelConfig(
    zipformer: zipformerModelConfig,
    numThreads: 1,
    debug: true,
    provider: 'cpu',
  );

  final config = sherpa_onnx.AudioTaggingConfig(
    model: modelConfig,
    labels: labels,
  );

  final at = sherpa_onnx.AudioTagging(config: config);

  final waveData = sherpa_onnx.readWave(wav);

  final stream = at.createStream();
  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);

  final events = at.compute(stream: stream, topK: topK);

  print(events);

  stream.free();
  at.free();
}


================================================
FILE: dart-api-examples/audio-tagging/pubspec.yaml
================================================
name: audio_tagging

description: >
  This example demonstrates how to use the Dart API for audio tagging.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx: ^1.12.31
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/audio-tagging/run-ced.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [[ ! -f ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
  tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
  rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
fi

for w in 1 2 3 4 5 6; do
  dart run \
    ./bin/ced.dart \
    --model ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx \
    --labels ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv \
    --wav ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/$w.wav
done


================================================
FILE: dart-api-examples/audio-tagging/run-zipformer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [[ ! -f ./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
  tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
  rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
fi

for w in 1 2 3 4 5 6; do
  dart run \
    ./bin/zipformer.dart \
    --model ./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx \
    --labels ./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv \
    --wav ./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/$w.wav
done


================================================
FILE: dart-api-examples/keyword-spotter/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/keyword-spotter/CHANGELOG.md
================================================
## 1.0.0

- Initial version.


================================================
FILE: dart-api-examples/keyword-spotter/README.md
================================================
# Introduction

This directory contains keyword spotting examples using
Dart API from [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)


================================================
FILE: dart-api-examples/keyword-spotter/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/keyword-spotter/bin/zipformer-transducer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the encoder model')
    ..addOption('decoder', help: 'Path to decoder model')
    ..addOption('joiner', help: 'Path to joiner model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('keywords-file', help: 'Path to keywords.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['joiner'] == null ||
      res['tokens'] == null ||
      res['keywords-file'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final joiner = res['joiner'] as String;
  final tokens = res['tokens'] as String;
  final keywordsFile = res['keywords-file'] as String;
  final inputWav = res['input-wav'] as String;

  final transducer = sherpa_onnx.OnlineTransducerModelConfig(
    encoder: encoder,
    decoder: decoder,
    joiner: joiner,
  );

  final modelConfig = sherpa_onnx.OnlineModelConfig(
    transducer: transducer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.KeywordSpotterConfig(
    model: modelConfig,
    keywordsFile: keywordsFile,
  );
  final spotter = sherpa_onnx.KeywordSpotter(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  var stream = spotter.createStream();

  // simulate streaming. You can choose an arbitrary chunk size.
  // chunkSize of a single sample is also ok, i.e, chunkSize = 1
  final chunkSize = 1600; // 0.1 second for 16kHz
  final numChunks = waveData.samples.length ~/ chunkSize;

  for (int i = 0; i != numChunks; ++i) {
    int start = i * chunkSize;
    stream.acceptWaveform(
      samples:
          Float32List.sublistView(waveData.samples, start, start + chunkSize),
      sampleRate: waveData.sampleRate,
    );
    while (spotter.isReady(stream)) {
      spotter.decode(stream);
      final result = spotter.getResult(stream);
      if (result.keyword != '') {
        // Remember to reset the stream right after detecting a keyword
        spotter.reset(stream);
        print('Detected: ${result.keyword}');
      }
    }
  }

  // 0.5 seconds, assume sampleRate is 16kHz
  final tailPaddings = Float32List(8000);
  stream.acceptWaveform(
    samples: tailPaddings,
    sampleRate: waveData.sampleRate,
  );

  while (spotter.isReady(stream)) {
    spotter.decode(stream);
    final result = spotter.getResult(stream);
    if (result.keyword != '') {
      print('Detected: ${result.keyword}');
    }
  }

  stream.free();
  spotter.free();
}


================================================
FILE: dart-api-examples/keyword-spotter/pubspec.yaml
================================================
name: keyword_spotter

description: >
  This example demonstrates how to use the Dart API for keyword spotting

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx: ^1.12.31
  # sherpa_onnx:
  #   path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/keyword-spotter/run-zh.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
fi

dart run \
  ./bin/zipformer-transducer.dart \
  --encoder ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  --decoder ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  --joiner ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
  --tokens ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt \
  --keywords-file ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt \
  --input-wav ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav


================================================
FILE: dart-api-examples/non-streaming-asr/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/non-streaming-asr/CHANGELOG.md
================================================
## 1.0.0

- Initial version.


================================================
FILE: dart-api-examples/non-streaming-asr/README.md
================================================
# Introduction

This folder contains examples for non-streaming ASR with Dart API.

| File | Description|
|------|------------|
|[./bin/dolphin-ctc.dart](./bin/dolphin-ctc.dart)| Use a [Dolphin](https://github.com/DataoceanAI/Dolphin) Ctc model for speech recognition. See [./run-dolphin-ctc.sh](./run-dolphin-ctc.sh)|
|[./bin/nemo-ctc.dart](./bin/nemo-ctc.dart)| Use a NeMo Ctc model for speech recognition. See [./run-nemo-ctc.sh](./run-nemo-ctc.sh)|
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
|[./bin/paraformer.dart](./bin/paraformer.dart)|Use a paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)|
|[./bin/telespeech-ctc.dart](./bin/telespeech-ctc.dart)| Use models from [Tele-AI/TeleSpeech-ASR](https://github.com/Tele-AI/TeleSpeech-ASR) for speech recognition. See [./run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|
|[./bin/whisper.dart](./bin/whisper.dart)| Use whisper for speech recognition. See [./run-whisper.sh](./run-whisper.sh)|
|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a zipformer transducer for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|
|[./bin/vad-with-paraformer.dart](./bin/vad-with-paraformer.dart)| Use a [silero-vad](https://github.com/snakers4/silero-vad) with paraformer for speech recognition. See [./run-vad-with-paraformer.sh](./run-vad-with-paraformer.sh)|
|[./bin/sense-voice.dart](./bin/sense-voice.dart)| Use a SenseVoice CTC model for speech recognition. See [./run-sense-voice.sh](./run-sense-voice.sh)|


================================================
FILE: dart-api-examples/non-streaming-asr/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/non-streaming-asr/bin/dolphin-ctc.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the Dolphin CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final dolphin = sherpa_onnx.OfflineDolphinModelConfig(model: model);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    dolphin: dolphin,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/fire-red-asr-ctc.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the FireRedASR CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final fireRedAsrCtc = sherpa_onnx.OfflineFireRedAsrCtcModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    fireRedAsrCtc: fireRedAsrCtc,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
    samples: waveData.samples,
    sampleRate: waveData.sampleRate,
  );
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/fire-red-asr.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the FireRedAsr encoder model')
    ..addOption('decoder', help: 'Path to FireRedAsr decoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final fireRedAsr = sherpa_onnx.OfflineFireRedAsrModelConfig(
    encoder: encoder,
    decoder: decoder,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    fireRedAsr: fireRedAsr,
    tokens: tokens,
    debug: false,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/funasr-nano.dart
================================================
// Copyright (c)  2026  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder-adaptor', help: 'Path to the encoder adaptor model')
    ..addOption('llm', help: 'Path to the llm model')
    ..addOption('embedding', help: 'Path to the embedding model')
    ..addOption('tokenizer', help: 'Path to the tokenizer directory')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder-adaptor'] == null ||
      res['llm'] == null ||
      res['embedding'] == null ||
      res['tokenizer'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoderAdaptor = res['encoder-adaptor'] as String;
  final llm = res['llm'] as String;
  final embedding = res['embedding'] as String;
  final tokenizer = res['tokenizer'] as String;
  final inputWav = res['input-wav'] as String;

  final funasrNano = sherpa_onnx.OfflineFunAsrNanoModelConfig(
    encoderAdaptor: encoderAdaptor,
    llm: llm,
    embedding: embedding,
    tokenizer: tokenizer,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    funasrNano: funasrNano,
    tokens: '',
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
    samples: waveData.samples,
    sampleRate: waveData.sampleRate,
  );
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/medasr-ctc.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the MedASR CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final medasr = sherpa_onnx.OfflineMedAsrCtcModelConfig(model: model);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    medasr: medasr,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
    samples: waveData.samples,
    sampleRate: waveData.sampleRate,
  );
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/moonshine.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('preprocessor',
        help: 'Path to the moonshine preprocessor model')
    ..addOption('encoder', help: 'Path to the moonshine encoder model')
    ..addOption('uncached-decoder',
        help: 'Path to moonshine uncached decoder model')
    ..addOption('cached-decoder',
        help: 'Path to moonshine cached decoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['preprocessor'] == null ||
      res['encoder'] == null ||
      res['uncached-decoder'] == null ||
      res['cached-decoder'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final preprocessor = res['preprocessor'] as String;
  final encoder = res['encoder'] as String;
  final uncachedDecoder = res['uncached-decoder'] as String;
  final cachedDecoder = res['cached-decoder'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
    preprocessor: preprocessor,
    encoder: encoder,
    uncachedDecoder: uncachedDecoder,
    cachedDecoder: cachedDecoder,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    moonshine: moonshine,
    tokens: tokens,
    debug: false,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/moonshine_v2.dart
================================================
// Copyright (c)  2024-2026  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the moonshine v2 encoder model')
    ..addOption('decoder', help: 'Path to moonshine v2 decoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
    encoder: encoder,
    mergedDecoder: decoder,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    moonshine: moonshine,
    tokens: tokens,
    debug: false,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
    samples: waveData.samples,
    sampleRate: waveData.sampleRate,
  );
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/nemo-canary.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the NeMo Canary encoder model')
    ..addOption('decoder', help: 'Path to the NeMo Canary decoder model')
    ..addOption('src-lang', help: 'Language of the input audio')
    ..addOption('tgt-lang', help: 'Language of the recognition result')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['src-lang'] == null ||
      res['tgt-lang'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final srcLang = res['src-lang'] as String;
  final tgtLang = res['tgt-lang'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final canary = sherpa_onnx.OfflineCanaryModelConfig(
      encoder: encoder, decoder: decoder, srcLang: srcLang, tgtLang: tgtLang);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    canary: canary,
    tokens: tokens,
    debug: false,
    numThreads: 1,
  );
  var config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print('Result in $tgtLang: ${result.text}');

  stream.free();

  // Example to change the target language to de
  if (tgtLang != 'en') {
    var json = config.toJson();

    ((json['model'] as Map<String, dynamic>)!['canary']
        as Map<String, dynamic>)!['tgtLang'] = 'en';

    config = sherpa_onnx.OfflineRecognizerConfig.fromJson(json);
    recognizer.setConfig(config);

    final stream = recognizer.createStream();

    stream.acceptWaveform(
        samples: waveData.samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    print('Result in English: ${result.text}');
    stream.free();
  }

  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the NeMo CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final nemo = sherpa_onnx.OfflineNemoEncDecCtcModelConfig(model: model);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    nemoCtc: nemo,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/nemo-transducer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the encoder model')
    ..addOption('decoder', help: 'Path to decoder model')
    ..addOption('joiner', help: 'Path to joiner model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['joiner'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final joiner = res['joiner'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final transducer = sherpa_onnx.OfflineTransducerModelConfig(
    encoder: encoder,
    decoder: decoder,
    joiner: joiner,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    transducer: transducer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/omnilingual-asr-ctc.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the Omnilingual ASR CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final omnilingual = sherpa_onnx.OfflineOmnilingualAsrCtcModelConfig(model: model);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    omnilingual: omnilingual,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the paraformer model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('rule-fsts',
        help: 'Path to rule fsts for inverse text normalization')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['rule-fsts'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final ruleFsts = res['rule-fsts'] as String;
  final inputWav = res['input-wav'] as String;

  final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    paraformer: paraformer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
    modelType: 'paraformer',
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(
    model: modelConfig,
    ruleFsts: ruleFsts,
  );
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/paraformer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the paraformer model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    paraformer: paraformer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
    modelType: 'paraformer',
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/sense-voice-with-hr.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  print('sherpa-onnx version: ${sherpa_onnx.getVersion()}');
  print('sherpa-onnx gitSha1: ${sherpa_onnx.getGitSha1()}');
  print('sherpa-onnx gitDate: ${sherpa_onnx.getGitDate()}');

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the SenseVoice model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('language',
        help: 'auto, zh, en, ja, ko, yue, or leave it empty to use auto',
        defaultsTo: '')
    ..addOption('use-itn',
        help: 'true to use inverse text normalization', defaultsTo: 'false')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe')
    ..addOption('hr-lexicon',
        help: 'Path to lexicon.txt for homophone replacer')
    ..addOption('hr-rule-fsts',
        help: 'Path to replace.fst for homophone replacer');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['hr-lexicon'] == null ||
      res['hr-rule-fsts'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;
  final language = res['language'] as String;
  final useItn = (res['use-itn'] as String).toLowerCase() == 'true';
  final hrLexicon = res['hr-lexicon'] as String;
  final hrRuleFsts = res['hr-rule-fsts'] as String;

  final senseVoice = sherpa_onnx.OfflineSenseVoiceModelConfig(
      model: model, language: language, useInverseTextNormalization: useItn);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    senseVoice: senseVoice,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );

  final hr = sherpa_onnx.HomophoneReplacerConfig(
      lexicon: hrLexicon, ruleFsts: hrRuleFsts);

  final config =
      sherpa_onnx.OfflineRecognizerConfig(model: modelConfig, hr: hr);

  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/sense-voice.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the SenseVoice model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('language',
        help: 'auto, zh, en, ja, ko, yue, or leave it empty to use auto',
        defaultsTo: '')
    ..addOption('use-itn',
        help: 'true to use inverse text normalization', defaultsTo: 'false')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;
  final language = res['language'] as String;
  final useItn = (res['use-itn'] as String).toLowerCase() == 'true';

  final senseVoice = sherpa_onnx.OfflineSenseVoiceModelConfig(
      model: model, language: language, useInverseTextNormalization: useItn);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    senseVoice: senseVoice,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/telespeech-ctc.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the telespeech CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    telespeechCtc: model,
    tokens: tokens,
    debug: true,
    numThreads: 1,
    modelType: 'telespeech_ctc',
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('model', help: 'Path to the paraformer model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final sileroVad = res['silero-vad'] as String;
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    paraformer: paraformer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
    modelType: 'paraformer',
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  final waveData = sherpa_onnx.readWave(inputWav);

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final stream = recognizer.createStream();
      final segment = vad.front();
      stream.acceptWaveform(
          samples: segment.samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);

      final startTime = segment.start * 1.0 / waveData.sampleRate;
      final duration = segment.samples.length * 1.0 / waveData.sampleRate;
      final stopTime = startTime + duration;
      if (result.text != '') {
        print(
            '${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}');
      }

      stream.free();
      vad.pop();
    }
  }

  vad.flush();
  while (!vad.isEmpty()) {
    final stream = recognizer.createStream();
    final segment = vad.front();
    stream.acceptWaveform(
        samples: segment.samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);

    final startTime = segment.start * 1.0 / waveData.sampleRate;
    final duration = segment.samples.length * 1.0 / waveData.sampleRate;
    final stopTime = startTime + duration;
    if (result.text != '') {
      print(
          '${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}');
    }

    stream.free();
    vad.pop();
  }

  vad.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/wenet-ctc.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the Wenet CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final wenetCtc = sherpa_onnx.OfflineWenetCtcModelConfig(model: model);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    wenetCtc: wenetCtc,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/whisper.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the whisper encoder model')
    ..addOption('decoder', help: 'Path to whisper decoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final whisper = sherpa_onnx.OfflineWhisperModelConfig(
    encoder: encoder,
    decoder: decoder,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    whisper: whisper,
    tokens: tokens,
    modelType: 'whisper',
    debug: false,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/zipformer-ctc.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the Zipformer CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final zipformerCtc = sherpa_onnx.OfflineZipformerCtcModelConfig(model: model);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    zipformerCtc: zipformerCtc,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/bin/zipformer-transducer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the encoder model')
    ..addOption('decoder', help: 'Path to decoder model')
    ..addOption('joiner', help: 'Path to joiner model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['joiner'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final joiner = res['joiner'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final transducer = sherpa_onnx.OfflineTransducerModelConfig(
    encoder: encoder,
    decoder: decoder,
    joiner: joiner,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    transducer: transducer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  stream.acceptWaveform(
      samples: waveData.samples, sampleRate: waveData.sampleRate);
  recognizer.decode(stream);

  final result = recognizer.getResult(stream);
  print(result.text);

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/non-streaming-asr/pubspec.yaml
================================================
name: non_streaming_asr
description: >
  This example demonstrates how to use the Dart API for Non-streaming speech recognition. Specifically, we use the following models as examples, whisper, zipformer, and paraformer.

version: 1.0.0
# repository: https://github.com/my_org/my_repo

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx: ^1.12.31
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/non-streaming-asr/run-dolphin-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
fi

dart run \
  ./bin/dolphin-ctc.dart \
  --model ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
  --tokens ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt \
  --input-wav ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-fire-red-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
fi

dart run \
  ./bin/fire-red-asr-ctc.dart \
  --model ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx \
  --tokens ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt \
  --input-wav ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-fire-red-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
fi

dart pub get

dart run \
  ./bin/fire-red-asr.dart \
  --encoder ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx \
  --decoder ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx \
  --tokens ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt \
  --input-wav ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-funasr-nano.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
fi

dart run \
  ./bin/funasr-nano.dart \
  --encoder-adaptor ./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx \
  --llm ./sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx \
  --embedding ./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx \
  --tokenizer ./sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B \
  --input-wav ./sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-medasr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
fi

dart run \
  ./bin/medasr-ctc.dart \
  --model ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx \
  --tokens ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt \
  --input-wav ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-moonshine-v2.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
fi

dart run \
  ./bin/moonshine_v2.dart \
  --encoder ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort \
  --decoder ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort \
  --tokens ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt \
  --input-wav ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-moonshine.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

dart run \
  ./bin/moonshine.dart \
  --preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
  --encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
  --uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
  --cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
  --tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
  --input-wav ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-nemo-canary.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi

for tgt_lang in en de es fr; do
  dart run \
    ./bin/nemo-canary.dart \
    --encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
    --decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
    --tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
    --src-lang en \
    --tgt-lang $tgt_lang \
    --input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav
done

for tgt_lang in en de; do
  dart run \
    ./bin/nemo-canary.dart \
    --encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
    --decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
    --tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
    --src-lang de \
    --tgt-lang $tgt_lang \
    --input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav
done


================================================
FILE: dart-api-examples/non-streaming-asr/run-nemo-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
fi

dart run \
  ./bin/nemo-ctc.dart \
  --model ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx \
  --tokens ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt \
  --input-wav ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-nemo-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2

  tar xvf sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  rm sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
fi

dart run \
  ./bin/nemo-transducer.dart \
  --encoder ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx \
  --decoder ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx \
  --joiner ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx \
  --tokens ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt \
  --input-wav ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-omnilingual-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
fi

dart run \
  ./bin/omnilingual-asr-ctc.dart \
  --model ./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx \
  --tokens ./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt \
  --input-wav ./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-paraformer-itn.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

dart run \
  ./bin/paraformer-itn.dart \
  --model ./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --tokens ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
  --rule-fsts ./itn_zh_number.fst \
  --input-wav ./itn-zh-number.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

dart run \
  ./bin/paraformer.dart \
  --model ./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --tokens ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
  --input-wav ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/3-sichuan.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-sense-voice-with-hr.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

if [ ! -d dict ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  tar xf dict.tar.bz2
  rm dict.tar.bz2

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
fi

dart run \
  ./bin/sense-voice-with-hr.dart \
  --model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
  --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --use-itn true \
  --hr-lexicon ./lexicon.txt \
  --hr-rule-fsts ./replace.fst \
  --input-wav ./test-hr.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-sense-voice.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

dart run \
  ./bin/sense-voice.dart \
  --model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
  --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --use-itn true \
  --input-wav ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-telespeech-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2

  tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
fi

dart run \
  ./bin/telespeech-ctc.dart \
  --model ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
  --tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
  --input-wav ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-vad-with-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [[ ! -f ./lei-jun-test.wav ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

dart run \
  ./bin/vad-with-paraformer.dart \
  --silero-vad ./silero_vad.onnx \
  --model ./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --tokens ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
  --input-wav ./lei-jun-test.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-wenet-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2

  rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
fi

dart run \
  ./bin/wenet-ctc.dart \
  --model ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
  --tokens ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
  --input-wav ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

dart run \
  ./bin/whisper.dart \
  --encoder ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx \
  --decoder ./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx \
  --tokens ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
  --input-wav ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-zipformer-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

  tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
  rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
fi

dart run \
  ./bin/zipformer-ctc.dart \
  --model ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \
  --tokens ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \
  --input-wav ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav


================================================
FILE: dart-api-examples/non-streaming-asr/run-zipformer-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2

  tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
fi

dart run \
  ./bin/zipformer-transducer.dart \
  --encoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx \
  --decoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx \
  --joiner ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.int8.onnx \
  --tokens ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt \
  --input-wav ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/test_wavs/1221-135766-0001.wav


================================================
FILE: dart-api-examples/speaker-diarization/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/speaker-diarization/CHANGELOG.md
================================================
## 1.0.0

- Initial version.


================================================
FILE: dart-api-examples/speaker-diarization/README.md
================================================
# Introduction

This example shows how to use the Dart API from sherpa-onnx for speaker diarization.

# Usage

Please see [./run.sh](./run.sh)


================================================
FILE: dart-api-examples/speaker-diarization/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';
import 'dart:ffi';

import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  /* Please use the following commands to download files used in this file
    Step 1: Download a speaker segmentation model

    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
    for a list of available models. The following is an example

      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
      tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
      rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

    Step 2: Download a speaker embedding extractor model

    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
    for a list of available models. The following is an example

      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

    Step 3. Download test wave files

    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
    for a list of available test wave files. The following is an example

      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

    Step 4. Run it
        */

  final segmentationModel =
      "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";

  final embeddingModel =
      "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";

  final waveFilename = "./0-four-speakers-zh.wav";

  final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
    pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
        model: segmentationModel),
  );

  final embeddingConfig =
      sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel);

  // since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set
  // numClusters to 4. If you don't know the exact number, please set it to -1.
  // in that case, you have to set threshold. A larger threshold leads to
  // fewer clusters, i.e., fewer speakers.
  final clusteringConfig =
      sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5);

  var config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
      segmentation: segmentationConfig,
      embedding: embeddingConfig,
      clustering: clusteringConfig,
      minDurationOn: 0.2,
      minDurationOff: 0.5);

  final sd = sherpa_onnx.OfflineSpeakerDiarization(config);
  if (sd.ptr == nullptr) {
    return;
  }

  final waveData = sherpa_onnx.readWave(waveFilename);
  if (sd.sampleRate != waveData.sampleRate) {
    print(
        'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}');
    return;
  }

  print('started');

  // Use the following statement if you don't want to use a callback
  // final segments = sd.process(samples: waveData.samples);

  final segments = sd.processWithCallback(
      samples: waveData.samples,
      callback: (int numProcessedChunk, int numTotalChunks) {
        final progress = 100.0 * numProcessedChunk / numTotalChunks;

        print('Progress ${progress.toStringAsFixed(2)}%');

        return 0;
      });

  for (int i = 0; i < segments.length; ++i) {
    print(
        '${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)}  speaker_${segments[i].speaker}');
  }
}


================================================
FILE: dart-api-examples/speaker-diarization/pubspec.yaml
================================================
name: speaker_diarization
description: >
  This example demonstrates how to use the Dart API for speaker diarization.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx: ^1.12.31
  # sherpa_onnx:
  #   path: ../../flutter/sherpa_onnx
  path: ^1.9.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/speaker-diarization/run.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi

if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./0-four-speakers-zh.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi

dart run ./bin/speaker-diarization.dart


================================================
FILE: dart-api-examples/speaker-identification/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/speaker-identification/README.md
================================================
# Introduction

This example shows how to use the Dart API from sherpa-onnx for speaker identification.

| File | Description|
|------|------------|
|[./bin/speaker_id.dart](./bin/speaker_id.dart)| Use a speaker embedding extractor model for speaker identification and verification. See also [./run-3d-speaker.sh](./run-3d-speaker.sh)|


================================================
FILE: dart-api-examples/speaker-identification/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/speaker-identification/bin/speaker_id.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

Float32List computeEmbedding(
    {required sherpa_onnx.SpeakerEmbeddingExtractor extractor,
    required String filename}) {
  final waveData = sherpa_onnx.readWave(filename);
  final stream = extractor.createStream();

  stream.acceptWaveform(
    samples: waveData.samples,
    sampleRate: waveData.sampleRate,
  );

  stream.inputFinished();

  final embedding = extractor.compute(stream);

  stream.free();

  return embedding;
}

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()..addOption('model', help: 'Path to model.onnx');

  final res = parser.parse(arguments);
  if (res['model'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  /*
     Please download test data by yourself

  curl -SL -o sr-data.tar.gz https://github.com/csukuangfj/sr-data/archive/refs/tags/v1.0.0.tar.gz
  tar xvf sr-data.tar.gz
  mv sr-data-1.0.0 sr-data
  */

  final config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
    model: model,
    numThreads: 1,
    debug: true,
    provider: 'cpu',
  );
  final extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config: config);

  final manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim);

  final spk1Files = [
    "./sr-data/enroll/fangjun-sr-1.wav",
    "./sr-data/enroll/fangjun-sr-2.wav",
    "./sr-data/enroll/fangjun-sr-3.wav",
  ];

  final spk1Vec = <Float32List>[];
  for (final f in spk1Files) {
    final embedding = computeEmbedding(extractor: extractor, filename: f);
    spk1Vec.add(embedding);
  }

  final spk2Files = [
    "./sr-data/enroll/leijun-sr-1.wav",
    "./sr-data/enroll/leijun-sr-2.wav",
  ];

  final spk2Vec = <Float32List>[];
  for (final f in spk2Files) {
    final embedding = computeEmbedding(extractor: extractor, filename: f);
    spk2Vec.add(embedding);
  }

  if (!manager.addMulti(name: "fangjun", embeddingList: spk1Vec)) {
    // Note you should free extractor and manager in your app to avoid memory leak
    print("Failed to register fangjun");
    return;
  }

  if (!manager.addMulti(name: "leijun", embeddingList: spk2Vec)) {
    print("Failed to register leijun");
    return;
  }

  if (manager.numSpeakers != 2) {
    print("There should be two speakers");
    return;
  }

  if (!manager.contains("fangjun")) {
    print("It should contain the speaker fangjun");
    return;
  }

  if (!manager.contains("leijun")) {
    print("It should contain the speaker leijun");
    return;
  }

  print("---All speakers---");
  final allSpeakers = manager.allSpeakerNames;
  for (final s in allSpeakers) {
    print(s);
  }
  print("------------");

  final testFiles = [
    "./sr-data/test/fangjun-test-sr-1.wav",
    "./sr-data/test/leijun-test-sr-1.wav",
    "./sr-data/test/liudehua-test-sr-1.wav",
  ];

  final threshold = 0.6;
  for (final file in testFiles) {
    final embedding = computeEmbedding(extractor: extractor, filename: file);

    var name = manager.search(embedding: embedding, threshold: threshold);
    if (name == '') {
      name = "<Unknown>";
    }
    print("$file: $name");
  }

  if (!manager.verify(
      name: "fangjun",
      embedding: computeEmbedding(extractor: extractor, filename: testFiles[0]),
      threshold: threshold)) {
    print("{$testFiles[0]} should match fangjun!");
    return;
  }

  if (!manager.remove("fangjun")) {
    print("Failed to remove fangjun");
    return;
  }

  if (manager.verify(
      name: "fangjun",
      embedding: computeEmbedding(extractor: extractor, filename: testFiles[0]),
      threshold: threshold)) {
    print("${testFiles[0]} should match no one!");
    return;
  }

  if (manager.numSpeakers != 1) {
    print("There should only 1 speaker left.");
    return;
  }

  extractor.free();
  manager.free();
}


================================================
FILE: dart-api-examples/speaker-identification/pubspec.yaml
================================================
name: speaker_identification

description: >
  This example demonstrates how to use the Dart API for speaker identification.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx: ^1.12.31
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/speaker-identification/run-3d-speaker.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./sr-data/enroll/leijun-sr-1.wav ]; then
  curl -SL -o sr-data.tar.gz https://github.com/csukuangfj/sr-data/archive/refs/tags/v1.0.0.tar.gz
  tar xvf sr-data.tar.gz
  mv sr-data-1.0.0 sr-data
fi

dart run \
  ./bin/speaker_id.dart \
  --model ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx


================================================
FILE: dart-api-examples/speech-enhancement-dpdfnet/.gitignore
================================================
.dart_tool/
.packages
build/


================================================
FILE: dart-api-examples/speech-enhancement-dpdfnet/CHANGELOG.md
================================================
## 1.0.0

- Initial version.


================================================
FILE: dart-api-examples/speech-enhancement-dpdfnet/README.md
================================================
# Speech Enhancement Example

This example shows how to use the Dart offline speech denoiser API with
DPDFNet models.

Use 16 kHz DPDFNet models such as `dpdfnet_baseline.onnx`, `dpdfnet2.onnx`,
`dpdfnet4.onnx`, or `dpdfnet8.onnx` for downstream ASR or speech recognition.
Use `dpdfnet2_48khz_hr.onnx` for 48 kHz enhancement output.

DPDFNet models are available from either:

- https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
- https://huggingface.co/Ceva-IP/DPDFNet

Then run:

```bash
dart pub get
dart run ./bin/speech_enhancement_dpdfnet.dart --model ./dpdfnet_baseline.onnx --input-wav ./inp_16k.wav --output-wav ./enhanced-16k.wav
```


================================================
FILE: dart-api-examples/speech-enhancement-dpdfnet/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml


================================================
FILE: dart-api-examples/speech-enhancement-dpdfnet/bin/speech_enhancement_dpdfnet.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to a DPDFNet onnx model')
    ..addOption('input-wav', help: 'Path to input.wav')
    ..addOption('output-wav', help: 'Path to output.wav');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['input-wav'] == null ||
      res['output-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final inputWav = res['input-wav'] as String;
  final outputWav = res['output-wav'] as String;

  final config = sherpa_onnx.OfflineSpeechDenoiserConfig(
      model: sherpa_onnx.OfflineSpeechDenoiserModelConfig(
    gtcrn: const sherpa_onnx.OfflineSpeechDenoiserGtcrnModelConfig(),
    dpdfnet: sherpa_onnx.OfflineSpeechDenoiserDpdfNetModelConfig(model: model),
    numThreads: 1,
    debug: true,
    provider: 'cpu',
  ));

  final sd = sherpa_onnx.OfflineSpeechDenoiser(config);

  final waveData = sherpa_onnx.readWave(inputWav);

  final denoised =
      sd.run(samples: waveData.samples, sampleRate: waveData.sampleRate);

  sd.free();

  sherpa_onnx.writeWave(
      filename: outputWav,
      samples: denoised.samples,
      sampleRate: denoised.sampleRate);

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/speech-enhancement-dpdfnet/pubspec.yaml
================================================
name: speech_enhancement_dpdfnet

description: >
  This example demonstrates how to use the Dart API for DPDFNet speech enhancement/denoising.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx: ^1.12.31
  # sherpa_onnx:
  #   path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/speech-enhancement-dpdfnet/run.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

dart run \
  ./bin/speech_enhancement_dpdfnet.dart \
  --model ./dpdfnet_baseline.onnx \
  --input-wav ./inp_16k.wav \
  --output-wav ./enhanced-16k.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/speech-enhancement-gtcrn/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/speech-enhancement-gtcrn/CHANGELOG.md
================================================
## 1.0.0

- Initial version.


================================================
FILE: dart-api-examples/speech-enhancement-gtcrn/README.md
================================================
# Speech Enhancement Example

This example shows how to use the Dart offline speech denoiser API with GTCRN
models.

Download GTCRN models and test wave files from:

- https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

Then run:

```bash
dart pub get
dart run ./bin/speech_enhancement_gtcrn.dart --model ./gtcrn_simple.onnx --input-wav ./inp_16k.wav --output-wav ./enhanced-16k.wav
```


================================================
FILE: dart-api-examples/speech-enhancement-gtcrn/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/speech-enhancement-gtcrn/bin/speech_enhancement_gtcrn.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to a GTCRN onnx model')
    ..addOption('input-wav', help: 'Path to input.wav')
    ..addOption('output-wav', help: 'Path to output.wav');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['input-wav'] == null ||
      res['output-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final inputWav = res['input-wav'] as String;
  final outputWav = res['output-wav'] as String;

  final config = sherpa_onnx.OfflineSpeechDenoiserConfig(
      model: sherpa_onnx.OfflineSpeechDenoiserModelConfig(
    gtcrn: sherpa_onnx.OfflineSpeechDenoiserGtcrnModelConfig(model: model),
    dpdfnet: const sherpa_onnx.OfflineSpeechDenoiserDpdfNetModelConfig(),
    numThreads: 1,
    debug: true,
    provider: 'cpu',
  ));

  final sd = sherpa_onnx.OfflineSpeechDenoiser(config);

  final waveData = sherpa_onnx.readWave(inputWav);

  final denoised =
      sd.run(samples: waveData.samples, sampleRate: waveData.sampleRate);

  sd.free();

  sherpa_onnx.writeWave(
      filename: outputWav,
      samples: denoised.samples,
      sampleRate: denoised.sampleRate);

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/speech-enhancement-gtcrn/pubspec.yaml
================================================
name: speech_enhancement_gtcrn

description: >
  This example demonstrates how to use the Dart API for GTCRN speech enhancement/denoising.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx: ^1.12.31
  # sherpa_onnx:
  #   path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/speech-enhancement-gtcrn/run.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi


dart run \
  ./bin/speech_enhancement_gtcrn.dart \
  --model ./gtcrn_simple.onnx \
  --input-wav ./inp_16k.wav \
  --output-wav ./enhanced-16k.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/spoken-language-identification/README.md
================================================
# Introduction

This example shows how to use the Dart API from sherpa-onnx for spoken language identification.

| File | Description|
|------|------------|
|[./bin/spoken_language_identification.dart](./bin/spoken_language_identification.dart)| Use a whisper model for spoken language identification. See also [./run-whisper.sh](./run-whisper.sh)|


================================================
FILE: dart-api-examples/spoken-language-identification/analysis_options.yaml
================================================
include: package:lints/recommended.yaml

analyzer:
  language:
    strict-casts: true
    strict-inference: true
    strict-raw-types: true

linter:
  rules:
    - always_use_package_imports
    - avoid_dynamic_calls
    - cancel_subscriptions
    - close_sinks
    - unawaited_futures
    - use_super_parameters


================================================
FILE: dart-api-examples/spoken-language-identification/bin/spoken_language_identification.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the whisper encoder model')
    ..addOption('decoder', help: 'Path to the whisper decoder model')
    ..addOption('tail-paddings', help: 'Tail paddings for the whisper model', defaultsTo: '0')
    ..addOption('wav', help: 'Path to test.wav for language identification')
    ..addFlag('help', abbr: 'h', help: 'Show this help message', negatable: false);

  final res = parser.parse(arguments);
  if (res['help'] as bool) {
    print(parser.usage);
    exit(0);
  }

  if (res['encoder'] == null || res['decoder'] == null || res['wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final tailPaddings = int.tryParse(res['tail-paddings'] as String) ?? 0;
  final wav = res['wav'] as String;

  final whisperConfig = sherpa_onnx.SpokenLanguageIdentificationWhisperConfig(
    encoder: encoder,
    decoder: decoder,
    tailPaddings: tailPaddings,
  );

  final config = sherpa_onnx.SpokenLanguageIdentificationConfig(
    whisper: whisperConfig,
    numThreads: 1,
    debug: true,
    provider: 'cpu',
  );

  final slid = sherpa_onnx.SpokenLanguageIdentification(config);

  final waveData = sherpa_onnx.readWave(wav);

  final stream = slid.createStream();
  stream.acceptWaveform(samples: waveData.samples, sampleRate: waveData.sampleRate);

  final result = slid.compute(stream);

  print('File: $wav');
  print('Detected language: ${result.lang}');

  stream.free();
  slid.free();
}


================================================
FILE: dart-api-examples/spoken-language-identification/pubspec.yaml
================================================
name: spoken_language_identification

description: >
  This example demonstrates how to use the Dart API for spoken language identification.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx: ^1.12.31
  # sherpa_onnx:
  #   path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/spoken-language-identification/run-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.tar.bz2
  rm sherpa-onnx-whisper-tiny.tar.bz2
fi

# Download test WAV files
waves=(
# ar-arabic.wav
# bg-bulgarian.wav
# cs-czech.wav
# da-danish.wav
# de-german.wav
# el-greek.wav
en-english.wav
es-spanish.wav
# fa-persian.wav
# fi-finnish.wav
# fr-french.wav
# hi-hindi.wav
# hr-croatian.wav
# id-indonesian.wav
# it-italian.wav
# ja-japanese.wav
# ko-korean.wav
# nl-dutch.wav
# no-norwegian.wav
# pl-polish.wav
# pt-portuguese.wav
# ro-romanian.wav
ru-russian.wav
# sk-slovak.wav
# sv-swedish.wav
# ta-tamil.wav
# tl-tagalog.wav
# tr-turkish.wav
# uk-ukrainian.wav
zh-chinese.wav
)

for wav in ${waves[@]}; do
  if [ ! -f ./$wav ]; then
    echo "Downloading $wav"
    curl -SL -O https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/resolve/main/test_wavs/$wav
  fi
  
  echo "Testing $wav"
  dart run \
    ./bin/spoken_language_identification.dart \
    --encoder ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx \
    --decoder ./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx \
    --wav ./$wav
  
  echo "----------------------------------------"
done


================================================
FILE: dart-api-examples/streaming-asr/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/streaming-asr/CHANGELOG.md
================================================
## 1.0.0

- Initial version.


================================================
FILE: dart-api-examples/streaming-asr/README.md
================================================
# Introduction

This folder contains examples for streaming ASR with Dart API.

| File | Description|
|------|------------|
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
|[./bin/paraformer.dart](./bin/paraformer.dart)| Use a Paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)|
|[./bin/zipformer-ctc-hlg.dart](./bin/zipformer-ctc-hlg.dart)| Use a Zipformer CTC model with HLG graph for speech recognition. See [./run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|
|[./bin/zipformer-ctc.dart](./bin/zipformer-ctc.dart)| Use a Zipformer CTC model for speech recognition. See [./run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|
|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a Zipformer transducer model for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|


================================================
FILE: dart-api-examples/streaming-asr/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/streaming-asr/bin/paraformer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the encoder model')
    ..addOption('decoder', help: 'Path to decoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final paraformer = sherpa_onnx.OnlineParaformerModelConfig(
    encoder: encoder,
    decoder: decoder,
  );

  final modelConfig = sherpa_onnx.OnlineModelConfig(
    paraformer: paraformer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OnlineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  // simulate streaming. You can choose an arbitrary chunk size.
  // chunkSize of a single sample is also ok, i.e, chunkSize = 1
  final chunkSize = 1600; // 0.1 second for 16kHz
  final numChunks = waveData.samples.length ~/ chunkSize;

  var last = '';
  for (int i = 0; i != numChunks; ++i) {
    int start = i * chunkSize;
    stream.acceptWaveform(
      samples:
          Float32List.sublistView(waveData.samples, start, start + chunkSize),
      sampleRate: waveData.sampleRate,
    );
    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }
    final result = recognizer.getResult(stream);
    if (result.text != last && result.text != '') {
      last = result.text;
      print(last);
    }
  }

  // 0.5 seconds, assume sampleRate is 16kHz
  final tailPaddings = Float32List(8000);
  stream.acceptWaveform(
    samples: tailPaddings,
    sampleRate: waveData.sampleRate,
  );

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  final result = recognizer.getResult(stream);

  if (result.text != '') {
    print(result.text);
  }

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/streaming-asr/bin/t-one-ctc.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final ctc = sherpa_onnx.OnlineToneCtcModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.OnlineModelConfig(
    toneCtc: ctc,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OnlineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  // 0.3 seconds, assume sampleRate is 8kHz
  final leftPaddings = Float32List(2400);
  stream.acceptWaveform(
    samples: leftPaddings,
    sampleRate: waveData.sampleRate,
  );

  // simulate streaming. You can choose an arbitrary chunk size.
  // chunkSize of a single sample is also ok, i.e, chunkSize = 1
  final chunkSize = 1600; // 0.1 second for 16kHz
  final numChunks = waveData.samples.length ~/ chunkSize;

  var last = '';
  for (int i = 0; i != numChunks; ++i) {
    int start = i * chunkSize;
    stream.acceptWaveform(
      samples:
          Float32List.sublistView(waveData.samples, start, start + chunkSize),
      sampleRate: waveData.sampleRate,
    );
    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }
    final result = recognizer.getResult(stream);
    if (result.text != last && result.text != '') {
      last = result.text;
      print(last);
    }
  }

  // 0.6 seconds, assume sampleRate is 8kHz
  final tailPaddings = Float32List(4800);
  stream.acceptWaveform(
    samples: tailPaddings,
    sampleRate: waveData.sampleRate,
  );

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  final result = recognizer.getResult(stream);

  if (result.text != '') {
    print(result.text);
  }

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/streaming-asr/bin/zipformer-ctc-hlg.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the model')
    ..addOption('hlg', help: 'Path to HLG.fst')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['hlg'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final hlg = res['hlg'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final ctc = sherpa_onnx.OnlineZipformer2CtcModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.OnlineModelConfig(
    zipformer2Ctc: ctc,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OnlineRecognizerConfig(
    model: modelConfig,
    ctcFstDecoderConfig: sherpa_onnx.OnlineCtcFstDecoderConfig(graph: hlg),
  );
  final recognizer = sherpa_onnx.OnlineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  // simulate streaming. You can choose an arbitrary chunk size.
  // chunkSize of a single sample is also ok, i.e, chunkSize = 1
  final chunkSize = 1600; // 0.1 second for 16kHz
  final numChunks = waveData.samples.length ~/ chunkSize;

  var last = '';
  for (int i = 0; i != numChunks; ++i) {
    int start = i * chunkSize;
    stream.acceptWaveform(
      samples:
          Float32List.sublistView(waveData.samples, start, start + chunkSize),
      sampleRate: waveData.sampleRate,
    );
    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }
    final result = recognizer.getResult(stream);
    if (result.text != last && result.text != '') {
      last = result.text;
      print(last);
    }
  }

  // 0.5 seconds, assume sampleRate is 16kHz
  final tailPaddings = Float32List(8000);
  stream.acceptWaveform(
    samples: tailPaddings,
    sampleRate: waveData.sampleRate,
  );

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  final result = recognizer.getResult(stream);

  if (result.text != '') {
    print(result.text);
  }

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/streaming-asr/bin/zipformer-ctc.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final ctc = sherpa_onnx.OnlineZipformer2CtcModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.OnlineModelConfig(
    zipformer2Ctc: ctc,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OnlineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  // simulate streaming. You can choose an arbitrary chunk size.
  // chunkSize of a single sample is also ok, i.e, chunkSize = 1
  final chunkSize = 1600; // 0.1 second for 16kHz
  final numChunks = waveData.samples.length ~/ chunkSize;

  var last = '';
  for (int i = 0; i != numChunks; ++i) {
    int start = i * chunkSize;
    stream.acceptWaveform(
      samples:
          Float32List.sublistView(waveData.samples, start, start + chunkSize),
      sampleRate: waveData.sampleRate,
    );
    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }
    final result = recognizer.getResult(stream);
    if (result.text != last && result.text != '') {
      last = result.text;
      print(last);
    }
  }

  // 0.5 seconds, assume sampleRate is 16kHz
  final tailPaddings = Float32List(8000);
  stream.acceptWaveform(
    samples: tailPaddings,
    sampleRate: waveData.sampleRate,
  );

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  final result = recognizer.getResult(stream);

  if (result.text != '') {
    print(result.text);
  }

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/streaming-asr/bin/zipformer-transducer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('encoder', help: 'Path to the encoder model')
    ..addOption('decoder', help: 'Path to decoder model')
    ..addOption('joiner', help: 'Path to joiner model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['encoder'] == null ||
      res['decoder'] == null ||
      res['joiner'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final joiner = res['joiner'] as String;
  final tokens = res['tokens'] as String;
  final ruleFsts = res['rule-fsts'] as String;
  final inputWav = res['input-wav'] as String;

  final transducer = sherpa_onnx.OnlineTransducerModelConfig(
    encoder: encoder,
    decoder: decoder,
    joiner: joiner,
  );

  final modelConfig = sherpa_onnx.OnlineModelConfig(
    transducer: transducer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OnlineRecognizerConfig(
    model: modelConfig,
    ruleFsts: ruleFsts,
  );
  final recognizer = sherpa_onnx.OnlineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  final stream = recognizer.createStream();

  // simulate streaming. You can choose an arbitrary chunk size.
  // chunkSize of a single sample is also ok, i.e, chunkSize = 1
  final chunkSize = 1600; // 0.1 second for 16kHz
  final numChunks = waveData.samples.length ~/ chunkSize;

  var last = '';
  for (int i = 0; i != numChunks; ++i) {
    int start = i * chunkSize;
    stream.acceptWaveform(
      samples:
          Float32List.sublistView(waveData.samples, start, start + chunkSize),
      sampleRate: waveData.sampleRate,
    );
    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }
    final result = recognizer.getResult(stream);
    if (result.text != last && result.text != '') {
      last = result.text;
      print(last);
    }
  }

  // 0.5 seconds, assume sampleRate is 16kHz
  final tailPaddings = Float32List(8000);
  stream.acceptWaveform(
    samples: tailPaddings,
    sampleRate: waveData.sampleRate,
  );

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  final result = recognizer.getResult(stream);

  if (result.text != '') {
    print(result.text);
  }

  stream.free();
  recognizer.free();
}


================================================
FILE: dart-api-examples/streaming-asr/pubspec.yaml
================================================
name: streaming_asr

description: >
  This example demonstrates how to use the Dart API for streaming speech recognition.

version: 1.0.0
# repository: https://github.com/my_org/my_repo

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx: ^1.12.31
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0
  test: ^1.24.0


================================================
FILE: dart-api-examples/streaming-asr/run-nemo-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
  tar xvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
  rm sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
fi

dart run \
  ./bin/zipformer-transducer.dart \
  --encoder ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/encoder.onnx \
  --decoder ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/decoder.onnx \
  --joiner ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/joiner.onnx \
  --tokens ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt \
  --input-wav ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/test_wavs/0.wav


================================================
FILE: dart-api-examples/streaming-asr/run-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
fi

dart run \
  ./bin/paraformer.dart \
  --encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  --decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
  --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  --input-wav ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav


================================================
FILE: dart-api-examples/streaming-asr/run-t-one-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
fi

dart run \
  ./bin/t-one-ctc.dart \
  --model ./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx \
  --tokens ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt \
  --input-wav ./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav


================================================
FILE: dart-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

dart run \
  ./bin/zipformer-ctc-hlg.dart \
  --model ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \
  --hlg ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst \
  --tokens ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt \
  --input-wav ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav


================================================
FILE: dart-api-examples/streaming-asr/run-zipformer-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

dart run \
  ./bin/zipformer-ctc.dart \
  --model ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \
  --tokens ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt \
  --input-wav ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav


================================================
FILE: dart-api-examples/streaming-asr/run-zipformer-transducer-itn.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

dart run \
  ./bin/zipformer-transducer.dart \
  --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
  --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  --rule-fsts ./itn_zh_number.fst \
  --input-wav ./itn-zh-number.wav


================================================
FILE: dart-api-examples/streaming-asr/run-zipformer-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

dart run \
  ./bin/zipformer-transducer.dart \
  --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
  --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  --input-wav ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav


================================================
FILE: dart-api-examples/streaming-speech-enhancement-dpdfnet/README.md
================================================
# Streaming Speech Enhancement Example

This example shows how to use the Dart streaming speech denoiser API with
DPDFNet models.

Use 16 kHz DPDFNet models such as `dpdfnet_baseline.onnx`, `dpdfnet2.onnx`,
`dpdfnet4.onnx`, or `dpdfnet8.onnx` for downstream ASR or speech recognition.

DPDFNet models are available from either:

- https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
- https://huggingface.co/Ceva-IP/DPDFNet

Then run:

```bash
dart pub get
dart run ./bin/streaming_speech_enhancement_dpdfnet.dart --model ./dpdfnet_baseline.onnx --input-wav ./inp_16k.wav --output-wav ./enhanced-online-dpdfnet.wav
```


================================================
FILE: dart-api-examples/streaming-speech-enhancement-dpdfnet/bin/streaming_speech_enhancement_dpdfnet.dart
================================================
// Copyright (c)  2026  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to a DPDFNet onnx model')
    ..addOption('input-wav', help: 'Path to input.wav')
    ..addOption('output-wav', help: 'Path to output.wav');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['input-wav'] == null ||
      res['output-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final inputWav = res['input-wav'] as String;
  final outputWav = res['output-wav'] as String;

  final config = sherpa_onnx.OnlineSpeechDenoiserConfig(
    model: sherpa_onnx.OfflineSpeechDenoiserModelConfig(
      gtcrn: const sherpa_onnx.OfflineSpeechDenoiserGtcrnModelConfig(),
      dpdfnet:
          sherpa_onnx.OfflineSpeechDenoiserDpdfNetModelConfig(model: model),
      numThreads: 1,
      debug: true,
      provider: 'cpu',
    ),
  );

  final sd = sherpa_onnx.OnlineSpeechDenoiser(config);
  final waveData = sherpa_onnx.readWave(inputWav);
  final frameShift = sd.frameShiftInSamples;
  final output = <double>[];

  var start = 0;
  while (start < waveData.samples.length) {
    final end = start + frameShift < waveData.samples.length
        ? start + frameShift
        : waveData.samples.length;
    final chunk = waveData.samples.sublist(start, end);
    final denoised = sd.run(samples: chunk, sampleRate: waveData.sampleRate);
    output.addAll(denoised.samples);
    start = end;
  }

  output.addAll(sd.flush().samples);
  sd.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: Float32List.fromList(output),
    sampleRate: waveData.sampleRate,
  );

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/streaming-speech-enhancement-dpdfnet/run.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

dart run \
  ./bin/streaming_speech_enhancement_dpdfnet.dart \
  --model ./dpdfnet_baseline.onnx \
  --input-wav ./inp_16k.wav \
  --output-wav ./enhanced-online-dpdfnet.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/streaming-speech-enhancement-gtcrn/README.md
================================================
# Streaming Speech Enhancement Example

This example shows how to use the Dart streaming speech denoiser API with GTCRN
models.

Download GTCRN models and test wave files from:

- https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

Then run:

```bash
dart pub get
dart run ./bin/streaming_speech_enhancement_gtcrn.dart --model ./gtcrn_simple.onnx --input-wav ./inp_16k.wav --output-wav ./enhanced-online-gtcrn.wav
```


================================================
FILE: dart-api-examples/streaming-speech-enhancement-gtcrn/bin/streaming_speech_enhancement_gtcrn.dart
================================================
// Copyright (c)  2026  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to a GTCRN onnx model')
    ..addOption('input-wav', help: 'Path to input.wav')
    ..addOption('output-wav', help: 'Path to output.wav');

  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['input-wav'] == null ||
      res['output-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final model = res['model'] as String;
  final inputWav = res['input-wav'] as String;
  final outputWav = res['output-wav'] as String;

  final config = sherpa_onnx.OnlineSpeechDenoiserConfig(
    model: sherpa_onnx.OfflineSpeechDenoiserModelConfig(
      gtcrn: sherpa_onnx.OfflineSpeechDenoiserGtcrnModelConfig(model: model),
      dpdfnet: const sherpa_onnx.OfflineSpeechDenoiserDpdfNetModelConfig(),
      numThreads: 1,
      debug: true,
      provider: 'cpu',
    ),
  );

  final sd = sherpa_onnx.OnlineSpeechDenoiser(config);
  final waveData = sherpa_onnx.readWave(inputWav);
  final frameShift = sd.frameShiftInSamples;
  final output = <double>[];

  var start = 0;
  while (start < waveData.samples.length) {
    final end = start + frameShift < waveData.samples.length
        ? start + frameShift
        : waveData.samples.length;
    final chunk = waveData.samples.sublist(start, end);
    final denoised = sd.run(samples: chunk, sampleRate: waveData.sampleRate);
    output.addAll(denoised.samples);
    start = end;
  }

  output.addAll(sd.flush().samples);
  sd.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: Float32List.fromList(output),
    sampleRate: waveData.sampleRate,
  );

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/streaming-speech-enhancement-gtcrn/run.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

dart run \
  ./bin/streaming_speech_enhancement_gtcrn.dart \
  --model ./gtcrn_simple.onnx \
  --input-wav ./inp_16k.wav \
  --output-wav ./enhanced-online-gtcrn.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/tts/CHANGELOG.md
================================================
## 1.0.0

- Initial version.


================================================
FILE: dart-api-examples/tts/README.md
================================================
# Introduction

This folder contains examples for text to speech with Dart API.

| File | Description|
|------|------------|
|[./bin/piper.dart](./bin/piper.dart)| Use a Piper tts model for text to speech. See [./run-piper.sh](./run-piper.sh)|
|[./bin/coqui.dart](./bin/coqui.dart)| Use a Coqui tts model for text to speech. See [./run-coqui.sh](./run-coqui.sh)|
|[./bin/zh.dart](./bin/zh.dart)| Use a Chinese VITS tts model for text to speech. See [./run-zh.sh](./run-zh.sh)|
|[./bin/zipvoice-zh-en.dart](./bin/zipvoice-zh-en.dart)| Use a ZipVoice Chinese/English zero-shot TTS model. See [./run-zipvoice-zh-en.sh](./run-zipvoice-zh-en.sh)|


================================================
FILE: dart-api-examples/tts/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/tts/bin/coqui.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the ONNX model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
    ..addOption(
      'sid',
      help: 'Speaker ID to select. Used only for multi-speaker TTS',
      defaultsTo: '0',
    );
  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  final sid = int.tryParse(res['sid'] as String) ?? 0;

  if (speed == 0) {
    speed = 1.0;
  }

  final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
    model: model,
    tokens: tokens,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    vits: vits,
    numThreads: 1,
    debug: true,
  );
  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    maxNumSenetences: 1,
  );

  final tts = sherpa_onnx.OfflineTts(config);
  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    silenceScale: 0.2,
  );
  final audio = tts.generateWithConfig(text: text, config: genConfig);
  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );
  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/kitten-en.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the onnx model')
    ..addOption('voices', help: 'Path to the voices.bin')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption(
      'data-dir',
      help: 'Path to espeak-ng-data directory',
      defaultsTo: '',
    )
    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
    ..addOption(
      'sid',
      help: 'Speaker ID to select. Used only for multi-speaker TTS',
      defaultsTo: '0',
    );
  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['voices'] == null ||
      res['tokens'] == null ||
      res['data-dir'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }
  final model = res['model'] as String;
  final voices = res['voices'] as String;
  final tokens = res['tokens'] as String;
  final dataDir = res['data-dir'] as String;
  final ruleFsts = res['rule-fsts'] as String;
  final ruleFars = res['rule-fars'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  final sid = int.tryParse(res['sid'] as String) ?? 0;

  if (speed == 0) {
    speed = 1.0;
  }

  final kitten = sherpa_onnx.OfflineTtsKittenModelConfig(
    model: model,
    voices: voices,
    tokens: tokens,
    dataDir: dataDir,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    kitten: kitten,
    numThreads: 1,
    debug: true,
  );
  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    maxNumSenetences: 1,
    ruleFsts: ruleFsts,
    ruleFars: ruleFars,
  );

  final tts = sherpa_onnx.OfflineTts(config);
  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    silenceScale: config.silenceScale,
  );
  final audio = tts.generateWithConfig(text: text, config: genConfig);
  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );
  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/kokoro-en.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the onnx model')
    ..addOption('voices', help: 'Path to the voices.bin')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption(
      'data-dir',
      help: 'Path to espeak-ng-data directory',
      defaultsTo: '',
    )
    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
    ..addOption(
      'sid',
      help: 'Speaker ID to select. Used only for multi-speaker TTS',
      defaultsTo: '0',
    );
  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['voices'] == null ||
      res['tokens'] == null ||
      res['data-dir'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }
  final model = res['model'] as String;
  final voices = res['voices'] as String;
  final tokens = res['tokens'] as String;
  final dataDir = res['data-dir'] as String;
  final ruleFsts = res['rule-fsts'] as String;
  final ruleFars = res['rule-fars'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  final sid = int.tryParse(res['sid'] as String) ?? 0;

  if (speed == 0) {
    speed = 1.0;
  }

  final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
    model: model,
    voices: voices,
    tokens: tokens,
    dataDir: dataDir,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    kokoro: kokoro,
    numThreads: 1,
    debug: true,
  );
  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    maxNumSenetences: 1,
    ruleFsts: ruleFsts,
    ruleFars: ruleFars,
  );

  final tts = sherpa_onnx.OfflineTts(config);
  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    silenceScale: config.silenceScale,
  );
  final audio = tts.generateWithConfig(text: text, config: genConfig);
  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );
  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/kokoro-zh-en.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the onnx model')
    ..addOption('voices', help: 'Path to the voices.bin')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption(
      'data-dir',
      help: 'Path to espeak-ng-data directory',
      defaultsTo: '',
    )
    ..addOption(
      'lexicon',
      help: 'Path to lexicon files',
      defaultsTo: '',
    )
    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
    ..addOption(
      'sid',
      help: 'Speaker ID to select. Used only for multi-speaker TTS',
      defaultsTo: '0',
    );
  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['voices'] == null ||
      res['tokens'] == null ||
      res['data-dir'] == null ||
      res['lexicon'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }
  final model = res['model'] as String;
  final voices = res['voices'] as String;
  final tokens = res['tokens'] as String;
  final dataDir = res['data-dir'] as String;
  final lexicon = res['lexicon'] as String;
  final ruleFsts = res['rule-fsts'] as String;
  final ruleFars = res['rule-fars'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  final sid = int.tryParse(res['sid'] as String) ?? 0;

  if (speed == 0) {
    speed = 1.0;
  }

  final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
    model: model,
    voices: voices,
    tokens: tokens,
    dataDir: dataDir,
    lexicon: lexicon,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    kokoro: kokoro,
    numThreads: 1,
    debug: true,
  );
  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    maxNumSenetences: 1,
    ruleFsts: ruleFsts,
    ruleFars: ruleFars,
  );

  final tts = sherpa_onnx.OfflineTts(config);
  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    silenceScale: config.silenceScale,
  );
  final audio = tts.generateWithConfig(text: text, config: genConfig);
  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );
  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/matcha-en.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('acoustic-model', help: 'Path to the acoustic model')
    ..addOption('vocoder', help: 'Path to the vocoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption(
      'data-dir',
      help: 'Path to espeak-ng-data directory',
      defaultsTo: '',
    )
    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
    ..addOption(
      'sid',
      help: 'Speaker ID to select. Used only for multi-speaker TTS',
      defaultsTo: '0',
    );
  final res = parser.parse(arguments);
  if (res['acoustic-model'] == null ||
      res['vocoder'] == null ||
      res['tokens'] == null ||
      res['data-dir'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }
  final acousticModel = res['acoustic-model'] as String;
  final vocoder = res['vocoder'] as String;
  final tokens = res['tokens'] as String;
  final dataDir = res['data-dir'] as String;
  final ruleFsts = res['rule-fsts'] as String;
  final ruleFars = res['rule-fars'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  final sid = int.tryParse(res['sid'] as String) ?? 0;

  if (speed == 0) {
    speed = 1.0;
  }

  final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig(
    acousticModel: acousticModel,
    vocoder: vocoder,
    tokens: tokens,
    dataDir: dataDir,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    matcha: matcha,
    numThreads: 1,
    debug: true,
  );
  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    maxNumSenetences: 1,
    ruleFsts: ruleFsts,
    ruleFars: ruleFars,
  );

  final tts = sherpa_onnx.OfflineTts(config);
  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    silenceScale: config.silenceScale,
  );
  final audio = tts.generateWithConfig(text: text, config: genConfig);
  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );
  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/matcha-zh.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('acoustic-model', help: 'Path to the acoustic model')
    ..addOption('vocoder', help: 'Path to the vocoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('lexicon', help: 'Path to lexicon.txt')
    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
    ..addOption(
      'sid',
      help: 'Speaker ID to select. Used only for multi-speaker TTS',
      defaultsTo: '0',
    );
  final res = parser.parse(arguments);
  if (res['acoustic-model'] == null ||
      res['vocoder'] == null ||
      res['lexicon'] == null ||
      res['tokens'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }
  final acousticModel = res['acoustic-model'] as String;
  final vocoder = res['vocoder'] as String;
  final lexicon = res['lexicon'] as String;
  final tokens = res['tokens'] as String;
  final ruleFsts = res['rule-fsts'] as String;
  final ruleFars = res['rule-fars'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  final sid = int.tryParse(res['sid'] as String) ?? 0;

  if (speed == 0) {
    speed = 1.0;
  }

  final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig(
    acousticModel: acousticModel,
    vocoder: vocoder,
    lexicon: lexicon,
    tokens: tokens,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    matcha: matcha,
    numThreads: 1,
    debug: true,
  );
  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    maxNumSenetences: 1,
    ruleFsts: ruleFsts,
    ruleFars: ruleFars,
  );

  final tts = sherpa_onnx.OfflineTts(config);
  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    silenceScale: config.silenceScale,
  );
  final audio = tts.generateWithConfig(text: text, config: genConfig);
  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );
  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/piper.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the ONNX model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('data-dir', help: 'Path to espeak-ng-data directory')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
    ..addOption(
      'sid',
      help: 'Speaker ID to select. Used only for multi-speaker TTS',
      defaultsTo: '0',
    );
  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['tokens'] == null ||
      res['data-dir'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final dataDir = res['data-dir'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  final sid = int.tryParse(res['sid'] as String) ?? 0;

  if (speed == 0) {
    speed = 1.0;
  }

  final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
    model: model,
    tokens: tokens,
    dataDir: dataDir,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    vits: vits,
    numThreads: 1,
    debug: true,
  );
  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    maxNumSenetences: 1,
  );

  final tts = sherpa_onnx.OfflineTts(config);
  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    silenceScale: 0.2,
  );
  final audio = tts.generateWithConfig(
      text: text,
      config: genConfig,
      callback: (Float32List samples) {
        print('${samples.length} samples received');
        // You can play samples in a separate thread/isolate

        // 1 means to continue
        // 0 means to stop
        return 1;
      });
  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );
  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/pocket-en.dart
================================================
// Copyright (c)  2026  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('lm-flow', help: 'Path to the lm flow model')
    ..addOption('lm-main', help: 'Path to the lm main model')
    ..addOption('encoder', help: 'Path to the encoder model')
    ..addOption('decoder', help: 'Path to the decoder model')
    ..addOption('text-conditioner', help: 'Path to the text conditioner model')
    ..addOption('vocab-json', help: 'Path to the vocab.json file')
    ..addOption('token-scores-json', help: 'Path to the token_scores.json file')
    ..addOption('reference-audio', help: 'Path to reference audio (wav)')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption(
      'voice-embedding-cache-capacity',
      help: 'Voice embedding cache capacity (default: 50)',
      defaultsTo: '50',
    )
    ..addOption(
      'seed',
      help: 'Random seed for reproducibility (default: -1, random)',
      defaultsTo: '-1',
    );

  final res = parser.parse(arguments);

  if (res['lm-flow'] == null ||
      res['lm-main'] == null ||
      res['encoder'] == null ||
      res['decoder'] == null ||
      res['text-conditioner'] == null ||
      res['vocab-json'] == null ||
      res['token-scores-json'] == null ||
      res['reference-audio'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }

  final lmFlow = res['lm-flow'] as String;
  final lmMain = res['lm-main'] as String;
  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final textConditioner = res['text-conditioner'] as String;
  final vocabJson = res['vocab-json'] as String;
  final tokenScoresJson = res['token-scores-json'] as String;
  final referenceAudioPath = res['reference-audio'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  final voiceEmbeddingCacheCapacity = int.parse(
    res['voice-embedding-cache-capacity'] as String,
  );
  final seed = int.parse(res['seed'] as String);

  // ---------------- Pocket model config ----------------
  final pocket = sherpa_onnx.OfflineTtsPocketModelConfig(
    lmFlow: lmFlow,
    lmMain: lmMain,
    encoder: encoder,
    decoder: decoder,
    textConditioner: textConditioner,
    vocabJson: vocabJson,
    tokenScoresJson: tokenScoresJson,
    voiceEmbeddingCacheCapacity: voiceEmbeddingCacheCapacity,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    pocket: pocket,
    numThreads: 1,
    debug: true,
  );

  final config = sherpa_onnx.OfflineTtsConfig(model: modelConfig);

  final tts = sherpa_onnx.OfflineTts(config);

  // ---------------- Reference audio (REQUIRED) ----------------
  final wave = sherpa_onnx.readWave(referenceAudioPath);
  if (wave.samples.isEmpty || wave.sampleRate == 0) {
    throw Exception('Failed to read reference audio: $referenceAudioPath');
  }

  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: 0,
    speed: 1.0,
    referenceAudio: wave.samples,
    referenceSampleRate: wave.sampleRate,
    extra: {"max_reference_audio_len": 12, if (seed >= 0) "seed": seed},
  );

  // If you don't want to use a callback
  // final audio = tts.generateWithConfig(text: text, config: genConfig);

  final audio = tts.generateWithConfig(
    text: text,
    config: genConfig,
    onProgress: (samples, progress) {
      // Print progress as percentage
      print("Progress: ${(progress * 100).toStringAsFixed(2)}%");

      // Print the length of the received samples chunk
      print("Received samples length: ${samples.length}");

      // Return 1 to continue, 0 to stop generation
      return 1;
    },
  );

  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/supertonic-en.dart
================================================
// Copyright (c)  2026  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('duration-predictor',
        help: 'Path to the duration predictor model')
    ..addOption('text-encoder', help: 'Path to the text encoder model')
    ..addOption('vector-estimator',
        help: 'Path to the vector estimator model')
    ..addOption('vocoder', help: 'Path to the vocoder model')
    ..addOption('tts-json', help: 'Path to tts.json')
    ..addOption('unicode-indexer', help: 'Path to unicode_indexer.bin')
    ..addOption('voice-style', help: 'Path to voice.bin')
    ..addOption('sid', help: 'Speaker ID (default: 6)', defaultsTo: '6')
    ..addOption('speed', help: 'Speed (default: 1.25)', defaultsTo: '1.25')
    ..addOption('num-steps',
        help: 'Number of steps (default: 5)', defaultsTo: '5')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio');

  final res = parser.parse(arguments);

  if (res['duration-predictor'] == null ||
      res['text-encoder'] == null ||
      res['vector-estimator'] == null ||
      res['vocoder'] == null ||
      res['tts-json'] == null ||
      res['unicode-indexer'] == null ||
      res['voice-style'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }

  final durationPredictor = res['duration-predictor'] as String;
  final textEncoder = res['text-encoder'] as String;
  final vectorEstimator = res['vector-estimator'] as String;
  final vocoder = res['vocoder'] as String;
  final ttsJson = res['tts-json'] as String;
  final unicodeIndexer = res['unicode-indexer'] as String;
  final voiceStyle = res['voice-style'] as String;
  final sid = int.parse(res['sid'] as String);
  final speed = double.parse(res['speed'] as String);
  final numSteps = int.parse(res['num-steps'] as String);
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;

  final supertonic = sherpa_onnx.OfflineTtsSupertonicModelConfig(
    durationPredictor: durationPredictor,
    textEncoder: textEncoder,
    vectorEstimator: vectorEstimator,
    vocoder: vocoder,
    ttsJson: ttsJson,
    unicodeIndexer: unicodeIndexer,
    voiceStyle: voiceStyle,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    supertonic: supertonic,
    numThreads: 2,
    debug: true,
  );

  final config = sherpa_onnx.OfflineTtsConfig(model: modelConfig);

  final tts = sherpa_onnx.OfflineTts(config);

  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    extra: {'lang': 'en', 'num_steps': numSteps},
  );

  final audio = tts.generateWithConfig(
    text: text,
    config: genConfig,
    onProgress: (samples, progress) {
      print('Progress: ${(progress * 100).toStringAsFixed(2)}%');
      return 1;
    },
  );

  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/vits-zh.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('model', help: 'Path to the ONNX model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('lexicon', help: 'Path to lexicon.txt')
    ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
    ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
    ..addOption(
      'sid',
      help: 'Speaker ID to select. Used only for multi-speaker TTS',
      defaultsTo: '0',
    );
  final res = parser.parse(arguments);
  if (res['model'] == null ||
      res['lexicon'] == null ||
      res['tokens'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }
  final model = res['model'] as String;
  final lexicon = res['lexicon'] as String;
  final tokens = res['tokens'] as String;
  final ruleFsts = res['rule-fsts'] as String;
  final ruleFars = res['rule-fars'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  final sid = int.tryParse(res['sid'] as String) ?? 0;

  if (speed == 0) {
    speed = 1.0;
  }

  final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
    model: model,
    lexicon: lexicon,
    tokens: tokens,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    vits: vits,
    numThreads: 1,
    debug: true,
  );
  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    maxNumSenetences: 1,
    ruleFsts: ruleFsts,
    ruleFars: ruleFars,
  );

  final tts = sherpa_onnx.OfflineTts(config);
  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    sid: sid,
    speed: speed,
    silenceScale: 0.2,
  );
  final audio = tts.generateWithConfig(text: text, config: genConfig);
  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );
  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/bin/zipvoice-zh-en.dart
================================================
// Copyright (c)  2026  Xiaomi Corporation
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('encoder', help: 'Path to the encoder model')
    ..addOption('decoder', help: 'Path to the decoder model')
    ..addOption('vocoder', help: 'Path to the vocoder model')
    ..addOption('data-dir', help: 'Path to espeak-ng-data directory')
    ..addOption('lexicon', help: 'Path to lexicon.txt')
    ..addOption('reference-audio', help: 'Path to reference audio (wav)')
    ..addOption('reference-text', help: 'Reference text for zero-shot TTS')
    ..addOption('text', help: 'Text to generate TTS for')
    ..addOption('output-wav', help: 'Filename to save the generated audio')
    ..addOption(
      'num-steps',
      help: 'Number of inference steps (default: 4)',
      defaultsTo: '4',
    );

  final res = parser.parse(arguments);

  if (res['tokens'] == null ||
      res['encoder'] == null ||
      res['decoder'] == null ||
      res['vocoder'] == null ||
      res['data-dir'] == null ||
      res['lexicon'] == null ||
      res['reference-audio'] == null ||
      res['reference-text'] == null ||
      res['output-wav'] == null ||
      res['text'] == null) {
    print(parser.usage);
    exit(1);
  }

  final tokens = res['tokens'] as String;
  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final vocoder = res['vocoder'] as String;
  final dataDir = res['data-dir'] as String;
  final lexicon = res['lexicon'] as String;
  final referenceAudioPath = res['reference-audio'] as String;
  final referenceText = res['reference-text'] as String;
  final text = res['text'] as String;
  final outputWav = res['output-wav'] as String;
  final numSteps = int.parse(res['num-steps'] as String);

  final zipvoice = sherpa_onnx.OfflineTtsZipVoiceModelConfig(
    tokens: tokens,
    encoder: encoder,
    decoder: decoder,
    vocoder: vocoder,
    dataDir: dataDir,
    lexicon: lexicon,
  );

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    zipvoice: zipvoice,
    numThreads: 2,
    debug: true,
  );

  final config = sherpa_onnx.OfflineTtsConfig(model: modelConfig);

  final tts = sherpa_onnx.OfflineTts(config);

  final wave = sherpa_onnx.readWave(referenceAudioPath);
  if (wave.samples.isEmpty || wave.sampleRate == 0) {
    throw Exception('Failed to read reference audio: $referenceAudioPath');
  }

  final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
    speed: 1.0,
    referenceAudio: wave.samples,
    referenceSampleRate: wave.sampleRate,
    referenceText: referenceText,
    numSteps: numSteps,
    extra: {'min_char_in_sentence': 10},
  );

  final audio = tts.generateWithConfig(
    text: text,
    config: genConfig,
    onProgress: (samples, progress) {
      print('Progress: ${(progress * 100).toStringAsFixed(2)}%');
      print('Received samples length: ${samples.length}');
      return 1;
    },
  );

  tts.free();

  sherpa_onnx.writeWave(
    filename: outputWav,
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  );

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/tts/pubspec.yaml
================================================
name: tts
description: A sample command-line application.
version: 1.0.0
# repository: https://github.com/my_org/my_repo

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx: ^1.12.31
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/tts/run-coqui.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get


# Please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models

if [[ ! -f ./vits-coqui-de-css10/tokens.txt ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
  tar xvf vits-coqui-de-css10.tar.bz2
  rm vits-coqui-de-css10.tar.bz2
fi

# It is a character-based TTS model, so there is no need to use a lexicon
dart run \
  ./bin/coqui.dart \
  --model ./vits-coqui-de-css10/model.onnx \
  --tokens ./vits-coqui-de-css10/tokens.txt \
  --sid 0 \
  --speed 0.7 \
  --text 'Alles hat ein Ende, nur die Wurst hat zwei.' \
  --output-wav coqui-0.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-kitten-en.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html
# to download more models
if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi

dart run \
  ./bin/kitten-en.dart \
  --model ./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
  --voices ./kitten-nano-en-v0_1-fp16/voices.bin \
  --tokens ./kitten-nano-en-v0_1-fp16/tokens.txt \
  --data-dir ./kitten-nano-en-v0_1-fp16/espeak-ng-data \
  --sid 0 \
  --speed 1.0 \
  --output-wav kitten-en-0.wav \
  --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-kokoro-en.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
# to download more models
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

dart run \
  ./bin/kokoro-en.dart \
  --model ./kokoro-en-v0_19/model.onnx \
  --voices ./kokoro-en-v0_19/voices.bin \
  --tokens ./kokoro-en-v0_19/tokens.txt \
  --data-dir ./kokoro-en-v0_19/espeak-ng-data \
  --sid 9 \
  --speed 1.0 \
  --output-wav kokoro-en-9.wav \
  --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-kokoro-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
# to download more models
if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

dart run \
  ./bin/kokoro-zh-en.dart \
  --model ./kokoro-multi-lang-v1_0/model.onnx \
  --voices ./kokoro-multi-lang-v1_0/voices.bin \
  --tokens ./kokoro-multi-lang-v1_0/tokens.txt \
  --data-dir ./kokoro-multi-lang-v1_0/espeak-ng-data \
  --lexicon ./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
  --sid 45 \
  --speed 1.0 \
  --output-wav kokoro-zh-en-45.wav \
  --text "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-matcha-en.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

dart run \
  ./bin/matcha-en.dart \
  --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --vocoder ./vocos-22khz-univ.onnx \
  --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \
  --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --sid 0 \
  --speed 1.0 \
  --output-wav matcha-en-1.wav \
  --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-matcha-zh.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

dart run \
  ./bin/matcha-zh.dart \
  --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
  --vocoder ./vocos-22khz-univ.onnx \
  --lexicon ./matcha-icefall-zh-baker/lexicon.txt \
  --tokens ./matcha-icefall-zh-baker/tokens.txt \
  --rule-fsts ./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
  --sid 0 \
  --speed 1.0 \
  --output-wav matcha-zh-1.wav \
  --text "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。" \

dart run \
  ./bin/matcha-zh.dart \
  --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
  --vocoder ./vocos-22khz-univ.onnx \
  --lexicon ./matcha-icefall-zh-baker/lexicon.txt \
  --tokens ./matcha-icefall-zh-baker/tokens.txt \
  --sid 0 \
  --speed 1.0 \
  --output-wav matcha-zh-2.wav \
  --text "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔." \

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-piper.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get


# Please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models

if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
  rm vits-piper-en_US-libritts_r-medium.tar.bz2
fi

dart run \
  ./bin/piper.dart \
  --model ./vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx \
  --tokens ./vits-piper-en_US-libritts_r-medium/tokens.txt \
  --data-dir ./vits-piper-en_US-libritts_r-medium/espeak-ng-data \
  --sid 351 \
  --speed 1.0 \
  --text 'How are you doing? This is a speech to text example, using next generation kaldi with piper.' \
  --output-wav piper-351.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-pocket-en.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
# to download more models
if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

dart run \
  ./bin/pocket-en.dart \
  --lm-flow ./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx \
  --lm-main ./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx \
  --encoder ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx \
  --decoder ./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx \
  --text-conditioner ./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx \
  --vocab-json ./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json \
  --token-scores-json ./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json \
  --reference-audio ./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav \
  --output-wav pocket-en-0.wav \
  --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-supertonic-en.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/supertonic.html
# to download more models
if [ ! -f ./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
fi

dart run \
  ./bin/supertonic-en.dart \
  --duration-predictor ./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx \
  --text-encoder ./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx \
  --vector-estimator ./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx \
  --vocoder ./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx \
  --tts-json ./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json \
  --unicode-indexer ./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin \
  --voice-style ./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin \
  --sid 6 \
  --speed 1.25 \
  --num-steps 5 \
  --output-wav supertonic-en-0.wav \
  --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-vits-zh.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get


# Please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models

if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
  tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
  rm sherpa-onnx-vits-zh-ll.tar.bz2
fi

dart run \
  ./bin/vits-zh.dart \
  --model ./sherpa-onnx-vits-zh-ll/model.onnx \
  --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
  --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
  --sid 2 \
  --speed 1.0 \
  --text '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。' \
  --output-wav vits-zh-jieba-2.wav

dart run \
  ./bin/vits-zh.dart \
  --model ./sherpa-onnx-vits-zh-ll/model.onnx \
  --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
  --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
  --rule-fsts "./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst" \
  --sid 3 \
  --speed 1.0 \
  --text '今天是2024年6月15号，13点23分。如果有困难，请拨打110或者18920240511。123456块钱。' \
  --output-wav vits-zh-jieba-3.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/tts/run-zipvoice-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
# to download more models
if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f ./vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

dart run \
  ./bin/zipvoice-zh-en.dart \
  --tokens ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt \
  --encoder ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx \
  --decoder ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx \
  --vocoder ./vocos_24khz.onnx \
  --data-dir ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data \
  --lexicon ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt \
  --reference-audio ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav \
  --reference-text "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系." \
  --num-steps 4 \
  --output-wav zipvoice-zh-en-0.wav \
  --text "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

ls -lh *.wav


================================================
FILE: dart-api-examples/vad/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/vad/CHANGELOG.md
================================================
## 1.0.0

- Initial version.


================================================
FILE: dart-api-examples/vad/README.md
================================================
# Introduction

This example shows how to use the Dart API from sherpa-onnx for voice activity detection (VAD).
Specifically, we use VAD to remove silences from a wave file.

# Usage

```bash
dart pub get

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav

dart run \
  ./bin/vad.dart \
  --silero-vad ./silero_vad.onnx \
  --input-wav ./lei-jun-test.wav \
  --output-wav ./lei-jun-test-no-silence.wav
```

It should generate a file `lei-jun-test-no-silence.wav`, where silences are removed.


================================================
FILE: dart-api-examples/vad/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/vad/bin/init.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:isolate';
import 'package:path/path.dart' as p;
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

Future<void> initSherpaOnnx() async {
  String platform = '';

  if (Platform.isMacOS) {
    platform = 'macos';
  } else if (Platform.isLinux) {
    platform = 'linux';
  } else if (Platform.isWindows) {
    platform = 'windows';
  } else {
    throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}');
  }

  var uri = await Isolate.resolvePackageUri(
      Uri.parse('package:sherpa_onnx_$platform/any_path_is_ok_here.dart'));

  if (uri == null) {
    print('File not found');
    exit(1);
  }

  var libPath = p.join(p.dirname(p.fromUri(uri)), '..', platform);
  if (platform == 'linux') {
    final arch = Platform.version.contains('arm64') ||
            Platform.version.contains('aarch64')
        ? 'aarch64'
        : 'x64';
    libPath = p.join(p.dirname(p.fromUri(uri)), '..', platform, arch);
  }

  sherpa_onnx.initBindings(libPath);
}


================================================
FILE: dart-api-examples/vad/bin/ten-vad.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('ten-vad', help: 'Path to ten-vad.onnx')
    ..addOption('input-wav', help: 'Path to input.wav')
    ..addOption('output-wav', help: 'Path to output.wav');

  final res = parser.parse(arguments);
  if (res['ten-vad'] == null ||
      res['input-wav'] == null ||
      res['output-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final tenVad = res['ten-vad'] as String;
  final inputWav = res['input-wav'] as String;
  final outputWav = res['output-wav'] as String;

  final tenVadConfig = sherpa_onnx.TenVadModelConfig(
    model: tenVad,
    threshold: 0.25,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    windowSize: 256,
  );

  final config = sherpa_onnx.VadModelConfig(
    tenVad: tenVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: config, bufferSizeInSeconds: 10);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ config.tenVad.windowSize;

  List<List<double>> allSamples = [];

  for (int i = 0; i != numIter; ++i) {
    int start = i * config.tenVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + config.tenVad.windowSize));

    if (vad.isDetected()) {
      while (!vad.isEmpty()) {
        allSamples.add(vad.front().samples);
        vad.pop();
      }
    }
  }

  vad.flush();
  while (!vad.isEmpty()) {
    allSamples.add(vad.front().samples);
    vad.pop();
  }

  vad.free();

  final s = Float32List.fromList(allSamples.expand((x) => x).toList());
  sherpa_onnx.writeWave(
      filename: outputWav, samples: s, sampleRate: waveData.sampleRate);

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/vad/bin/vad.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('input-wav', help: 'Path to input.wav')
    ..addOption('output-wav', help: 'Path to output.wav');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['input-wav'] == null ||
      res['output-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  final sileroVad = res['silero-vad'] as String;
  final inputWav = res['input-wav'] as String;
  final outputWav = res['output-wav'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
  );

  final config = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: config, bufferSizeInSeconds: 10);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ config.sileroVad.windowSize;

  List<List<double>> allSamples = [];

  for (int i = 0; i != numIter; ++i) {
    int start = i * config.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + config.sileroVad.windowSize));

    if (vad.isDetected()) {
      while (!vad.isEmpty()) {
        allSamples.add(vad.front().samples);
        vad.pop();
      }
    }
  }

  vad.flush();
  while (!vad.isEmpty()) {
    allSamples.add(vad.front().samples);
    vad.pop();
  }

  vad.free();

  final s = Float32List.fromList(allSamples.expand((x) => x).toList());
  sherpa_onnx.writeWave(
      filename: outputWav, samples: s, sampleRate: waveData.sampleRate);

  print('Saved to $outputWav');
}


================================================
FILE: dart-api-examples/vad/pubspec.yaml
================================================
name: vad

description: >
  This example demonstrates how to use the Dart API for VAD (voice activity detection).

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx: ^1.12.31
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/vad/run-ten-vad.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get


if [[ ! -f ./ten-vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi

if [[ ! -f ./lei-jun-test.wav ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

dart run \
  ./bin/ten-vad.dart \
  --ten-vad ./ten-vad.onnx \
  --input-wav ./lei-jun-test.wav \
  --output-wav ./lei-jun-test-no-silence.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/vad/run.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get


if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [[ ! -f ./lei-jun-test.wav ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

dart run \
  ./bin/vad.dart \
  --silero-vad ./silero_vad.onnx \
  --input-wav ./lei-jun-test.wav \
  --output-wav ./lei-jun-test-no-silence.wav

ls -lh *.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/.gitignore
================================================
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/README.md
================================================
# Introduction

This folder contains examples for non-streaming ASR + voice activity detection
with Dart API.

| File | Description|
|------|------------|
|[./bin/paraformer.dart](./bin/paraformer.dart)| Use a Paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)|
|[./bin/sense-voice.dart](./bin/sense-voice.dart)| Use a SenseVoice Ctc model for speech recognition. See [./run-sense-voice-zh.sh](./run-sense-voice-zh.sh) and [./run-sense-voice-en.sh](./run-sense-voice-en.sh)|
|[./bin/telespeech-ctc.dart](./bin/telespeech-ctc.dart)| Use a TeleSpeech CTC model for speech recognition. See [./run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|
|[./bin/whisper.dart](./bin/whisper.dart)| Use a Whisper model for speech recognition. See [./run-whisper.sh](./run-whisper.sh)|
|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a Zipformer transducer model for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/analysis_options.yaml
================================================
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.

include: package:lints/recommended.yaml

# Uncomment the following section to specify additional rules.

# linter:
#   rules:
#     - camel_case_types

# analyzer:
#   exclude:
#     - path/to/excluded/files/**

# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints

# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/dolphin-ctc.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('model', help: 'Path to the Dolphin CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create offline recognizer
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final dolphin = sherpa_onnx.OfflineDolphinModelConfig(model: model);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    dolphin: dolphin,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/moonshine.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('preprocessor',
        help: 'Path to the moonshine preprocessor model')
    ..addOption('encoder', help: 'Path to the moonshine encoder model')
    ..addOption('uncached-decoder',
        help: 'Path to moonshine uncached decoder model')
    ..addOption('cached-decoder',
        help: 'Path to moonshine cached decoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['preprocessor'] == null ||
      res['encoder'] == null ||
      res['uncached-decoder'] == null ||
      res['cached-decoder'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create whisper recognizer
  final preprocessor = res['preprocessor'] as String;
  final encoder = res['encoder'] as String;
  final uncachedDecoder = res['uncached-decoder'] as String;
  final cachedDecoder = res['cached-decoder'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
    preprocessor: preprocessor,
    encoder: encoder,
    uncachedDecoder: uncachedDecoder,
    cachedDecoder: cachedDecoder,
  );
  final modelConfig = sherpa_onnx.OfflineModelConfig(
    moonshine: moonshine,
    tokens: tokens,
    debug: false,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/paraformer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('model', help: 'Path to the paraformer model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create paraformer recognizer
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
    model: model,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    paraformer: paraformer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
    modelType: 'paraformer',
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/sense-voice-2.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
//
// Different from ./sense-voice.dart, this file uses a CircularBuffer
import 'dart:io';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('model', help: 'Path to the SenseVoice model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('language',
        help: 'auto, zh, en, ja, ko, yue, or leave it empty to use auto',
        defaultsTo: '')
    ..addOption('use-itn',
        help: 'true to use inverse text normalization', defaultsTo: 'false')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create SenseVoice
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;
  final language = res['language'] as String;
  final useItn = (res['use-itn'] as String).toLowerCase() == 'true';

  final senseVoice = sherpa_onnx.OfflineSenseVoiceModelConfig(
      model: model, language: language, useInverseTextNormalization: useItn);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    senseVoice: senseVoice,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  final buffer = sherpa_onnx.CircularBuffer(capacity: 30 * 16000);
  buffer.push(waveData.samples);

  while (buffer.size > vadConfig.sileroVad.windowSize) {
    final samples =
        buffer.get(startIndex: buffer.head, n: vadConfig.sileroVad.windowSize);
    buffer.pop(vadConfig.sileroVad.windowSize);

    vad.acceptWaveform(samples);

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  buffer.free();
  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/sense-voice.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('model', help: 'Path to the SenseVoice model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('language',
        help: 'auto, zh, en, ja, ko, yue, or leave it empty to use auto',
        defaultsTo: '')
    ..addOption('use-itn',
        help: 'true to use inverse text normalization', defaultsTo: 'false')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create SenseVoice
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;
  final language = res['language'] as String;
  final useItn = (res['use-itn'] as String).toLowerCase() == 'true';

  final senseVoice = sherpa_onnx.OfflineSenseVoiceModelConfig(
      model: model, language: language, useInverseTextNormalization: useItn);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    senseVoice: senseVoice,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/telespeech-ctc.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('model', help: 'Path to the telespeech CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);

  if (res['silero-vad'] == null ||
      res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create telespeech CTC recognizer
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    telespeechCtc: model,
    tokens: tokens,
    debug: true,
    numThreads: 1,
    modelType: 'telespeech_ctc',
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/whisper.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('encoder', help: 'Path to the whisper encoder model')
    ..addOption('decoder', help: 'Path to whisper decoder model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['encoder'] == null ||
      res['decoder'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create whisper recognizer
  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final whisper = sherpa_onnx.OfflineWhisperModelConfig(
    encoder: encoder,
    decoder: decoder,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    whisper: whisper,
    tokens: tokens,
    modelType: 'whisper',
    debug: false,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/zipformer-ctc.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('model', help: 'Path to the Zipformer CTC model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);
  if (res['silero-vad'] == null ||
      res['model'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create offline recognizer
  final model = res['model'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final zipformerCtc = sherpa_onnx.OfflineZipformerCtcModelConfig(model: model);

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    zipformerCtc: zipformerCtc,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/bin/zipformer-transducer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './init.dart';

void main(List<String> arguments) async {
  await initSherpaOnnx();

  final parser = ArgParser()
    ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
    ..addOption('encoder', help: 'Path to the encoder model')
    ..addOption('decoder', help: 'Path to decoder model')
    ..addOption('joiner', help: 'Path to joiner model')
    ..addOption('tokens', help: 'Path to tokens.txt')
    ..addOption('input-wav', help: 'Path to input.wav to transcribe');

  final res = parser.parse(arguments);

  if (res['silero-vad'] == null ||
      res['encoder'] == null ||
      res['decoder'] == null ||
      res['joiner'] == null ||
      res['tokens'] == null ||
      res['input-wav'] == null) {
    print(parser.usage);
    exit(1);
  }

  // create VAD
  final sileroVad = res['silero-vad'] as String;

  final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
    model: sileroVad,
    minSilenceDuration: 0.25,
    minSpeechDuration: 0.5,
    maxSpeechDuration: 5.0,
  );

  final vadConfig = sherpa_onnx.VadModelConfig(
    sileroVad: sileroVadConfig,
    numThreads: 1,
    debug: true,
  );

  final vad = sherpa_onnx.VoiceActivityDetector(
      config: vadConfig, bufferSizeInSeconds: 10);

  // create zipformer transducer recognizer
  final encoder = res['encoder'] as String;
  final decoder = res['decoder'] as String;
  final joiner = res['joiner'] as String;
  final tokens = res['tokens'] as String;
  final inputWav = res['input-wav'] as String;

  final transducer = sherpa_onnx.OfflineTransducerModelConfig(
    encoder: encoder,
    decoder: decoder,
    joiner: joiner,
  );

  final modelConfig = sherpa_onnx.OfflineModelConfig(
    transducer: transducer,
    tokens: tokens,
    debug: true,
    numThreads: 1,
  );
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  final recognizer = sherpa_onnx.OfflineRecognizer(config);

  final waveData = sherpa_onnx.readWave(inputWav);
  if (waveData.sampleRate != 16000) {
    print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
    exit(1);
  }

  int numSamples = waveData.samples.length;
  int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;

  for (int i = 0; i != numIter; ++i) {
    int start = i * vadConfig.sileroVad.windowSize;
    vad.acceptWaveform(Float32List.sublistView(
        waveData.samples, start, start + vadConfig.sileroVad.windowSize));

    while (!vad.isEmpty()) {
      final samples = vad.front().samples;
      final startTime = vad.front().start.toDouble() / waveData.sampleRate;
      final endTime =
          startTime + samples.length.toDouble() / waveData.sampleRate;

      final stream = recognizer.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
      recognizer.decode(stream);

      final result = recognizer.getResult(stream);
      stream.free();
      print(
          '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

      vad.pop();
    }
  }

  vad.flush();

  while (!vad.isEmpty()) {
    final samples = vad.front().samples;
    final startTime = vad.front().start.toDouble() / waveData.sampleRate;
    final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;

    final stream = recognizer.createStream();
    stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
    recognizer.decode(stream);

    final result = recognizer.getResult(stream);
    stream.free();
    print(
        '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');

    vad.pop();
  }

  vad.free();

  recognizer.free();
}


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
================================================
name: vad_with_non_streaming_asr

description: >
  This example demonstrates how to use the Dart API for VAD (voice activity detection)
  with non-streaming speech recognition.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx: ^1.12.31
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-dolphin-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/dolphin-ctc.dart \
  --silero-vad ./silero_vad.onnx \
  --model ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
  --tokens ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt \
  --input-wav ./lei-jun-test.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-moonshine.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

if [ ! -f ./Obama.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/moonshine.dart \
  --silero-vad ./silero_vad.onnx \
  --preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
  --encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
  --uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
  --cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
  --tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
  --input-wav ./Obama.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/paraformer.dart \
  --silero-vad ./silero_vad.onnx \
  --model ./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --tokens ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
  --input-wav ./lei-jun-test.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-sense-voice-en.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

if [ ! -f ./Obama.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/sense-voice.dart \
  --silero-vad ./silero_vad.onnx \
  --model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --use-itn true \
  --input-wav ./Obama.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-sense-voice-zh-2.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/sense-voice-2.dart \
  --silero-vad ./silero_vad.onnx \
  --model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --use-itn true \
  --input-wav ./lei-jun-test.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-sense-voice-zh.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/sense-voice.dart \
  --silero-vad ./silero_vad.onnx \
  --model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --use-itn true \
  --input-wav ./lei-jun-test.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-telespeech-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2

  tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/telespeech-ctc.dart \
  --silero-vad ./silero_vad.onnx \
  --model ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
  --tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
  --input-wav ./lei-jun-test.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi


if [ ! -f ./Obama.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/whisper.dart \
  --silero-vad ./silero_vad.onnx \
  --encoder ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx \
  --decoder ./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx \
  --tokens ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
  --input-wav ./Obama.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-zipformer-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

  tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
  rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/zipformer-ctc.dart \
  --silero-vad ./silero_vad.onnx \
  --model ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \
  --tokens ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \
  --input-wav ./lei-jun-test.wav


================================================
FILE: dart-api-examples/vad-with-non-streaming-asr/run-zipformer-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

dart pub get

if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2

  tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
fi

if [ ! -f ./Obama.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

dart run \
  ./bin/zipformer-transducer.dart \
  --silero-vad ./silero_vad.onnx \
  --encoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx \
  --decoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx \
  --joiner ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.int8.onnx \
  --tokens ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt \
  --input-wav ./Obama.wav


================================================
FILE: dotnet-examples/.editorconfig
================================================
# top-most EditorConfig file
root = true

# Don't use tabs for indentation.
[*]
indent_style = space

# Code files
[*.{cs,csx,vb,vbx}]
indent_size = 2
insert_final_newline = true
charset = utf-8-bom
end_of_line = crlf


================================================
FILE: dotnet-examples/.gitignore
================================================
bin
obj
v17
.vs
!*.sh
*.vsidx


================================================
FILE: dotnet-examples/.notes
================================================
# How to create a new project in this folder

```bash
mkdir offline-tts
cd offline-tts
dotnet new console
cd ..
dotnet sln ./sherpa-onnx.sln add ./offline-tts
```


================================================
FILE: dotnet-examples/Common/Common.csproj
================================================
﻿<Project Sdk="Microsoft.NET.Sdk">

    <PropertyGroup>
        <TargetFramework>net8.0</TargetFramework>
        <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
    </PropertyGroup>
    <ItemGroup>
        <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
    </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/Common/WaveHeader.cs
================================================
﻿// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.IO;

using System.Runtime.InteropServices;

namespace SherpaOnnx;

[StructLayout(LayoutKind.Sequential)]
public struct WaveHeader
{
  public int ChunkID;
  public int ChunkSize;
  public int Format;
  public int SubChunk1ID;
  public int SubChunk1Size;
  public short AudioFormat;
  public short NumChannels;
  public int SampleRate;
  public int ByteRate;
  public short BlockAlign;
  public short BitsPerSample;
  public int SubChunk2ID;
  public int SubChunk2Size;

  public bool Validate()
  {
    if (ChunkID != 0x46464952)
    {
      Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
      return false;
    }

    //               E V A W
    if (Format != 0x45564157)
    {
      Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
      return false;
    }

    //                      t m f
    if (SubChunk1ID != 0x20746d66)
    {
      Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
      return false;
    }

    if (SubChunk1Size != 16)
    {
      Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
      return false;
    }

    if (AudioFormat != 1)
    {
      Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
      return false;
    }

    if (NumChannels != 1)
    {
      Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
      return false;
    }

    if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
    {
      Console.WriteLine($"Invalid byte rate: {ByteRate}.");
      return false;
    }

    if (BlockAlign != (NumChannels * BitsPerSample / 8))
    {
      Console.WriteLine($"Invalid block align: {ByteRate}.");
      return false;
    }

    if (BitsPerSample != 16)
    {  // we support only 16 bits per sample
      Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
      return false;
    }

    return true;
  }
}

// It supports only 16-bit, single channel WAVE format.
// The sample rate can be any value.
public class WaveReader
{
  public WaveReader(string fileName)
  {
    if (!File.Exists(fileName))
    {
      throw new ApplicationException($"{fileName} does not exist!");
    }

    using var stream = File.Open(fileName, FileMode.Open);
    using var reader = new BinaryReader(stream);

    _header = ReadHeader(reader);

    if (!_header.Validate())
    {
      throw new ApplicationException($"Invalid wave file ${fileName}");
    }

    SkipMetaData(reader);

    // now read samples
    // _header.SubChunk2Size contains number of bytes in total.
    // we assume each sample is of type int16
    var buffer = reader.ReadBytes(_header.SubChunk2Size);
    var samples_int16 = new short[_header.SubChunk2Size / 2];
    Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);

    _samples = new float[samples_int16.Length];

    for (var i = 0; i < samples_int16.Length; ++i)
    {
      _samples[i] = samples_int16[i] / 32768.0F;
    }
  }

  private static WaveHeader ReadHeader(BinaryReader reader)
  {
    var bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));

    GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
    WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
    handle.Free();

    return header;
  }

  private void SkipMetaData(BinaryReader reader)
  {
    var bs = reader.BaseStream;

    var subChunk2ID = _header.SubChunk2ID;
    var subChunk2Size = _header.SubChunk2Size;

    while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
    {
      bs.Seek(subChunk2Size, SeekOrigin.Current);
      subChunk2ID = reader.ReadInt32();
      subChunk2Size = reader.ReadInt32();
    }
    _header.SubChunk2ID = subChunk2ID;
    _header.SubChunk2Size = subChunk2Size;
  }

  private WaveHeader _header;

  // Samples are normalized to the range [-1, 1]
  private float[] _samples;

  public int SampleRate => _header.SampleRate;

  public float[] Samples => _samples;

  public static void Test(string fileName)
  {
    WaveReader reader = new WaveReader(fileName);
    Console.WriteLine($"samples length: {reader.Samples.Length}");
    Console.WriteLine($"samples rate: {reader.SampleRate}");
  }
}


================================================
FILE: dotnet-examples/README.md
================================================
# Introduction

This folder contains C# API examples for [sherpa-onnx][sherpa-onnx].

Please refer to the documentation
https://k2-fsa.github.io/sherpa/onnx/csharp-api/index.html
for details.

- [./speech-enhancement-gtcrn](./speech-enhancement-gtcrn) It shows how to use
  the offline speech denoiser API with GTCRN models.
- [./speech-enhancement-dpdfnet](./speech-enhancement-dpdfnet) It shows how to
  use the offline speech denoiser API with DPDFNet models. Use 16 kHz DPDFNet
  models such as `dpdfnet_baseline.onnx`, `dpdfnet2.onnx`, `dpdfnet4.onnx`, or
  `dpdfnet8.onnx` for downstream ASR and `dpdfnet2_48khz_hr.onnx` for 48 kHz
  enhancement output.
- [./streaming-speech-enhancement-gtcrn](./streaming-speech-enhancement-gtcrn)
  It shows how to use the online speech denoiser API with GTCRN models.
- [./streaming-speech-enhancement-dpdfnet](./streaming-speech-enhancement-dpdfnet)
  It shows how to use the online speech denoiser API with DPDFNet models.
- [./zipvoice-tts](./zipvoice-tts) It shows how to use ZipVoice for
  Chinese/English zero-shot text-to-speech.
- [./zipvoice-tts-play](./zipvoice-tts-play) It shows how to use ZipVoice for
  Chinese/English zero-shot text-to-speech with playback.

```bash
dotnet new console -n offline-tts-play
dotnet sln ./sherpa-onnx.sln add ./offline-tts-play
```

```bash
dotnet nuget locals all --list
dotnet nuget locals all --clear
```

[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx


================================================
FILE: dotnet-examples/keyword-spotting-from-files/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to do keyword spotting with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
//
// 2. Now run it
//
// dotnet run

using SherpaOnnx;

class KeywordSpotterDemo
{
  static void Main(string[] args)
  {
    var config = new KeywordSpotterConfig();
    config.FeatConfig.SampleRate = 16000;
    config.FeatConfig.FeatureDim = 80;

    config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";

    config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
    config.ModelConfig.Provider = "cpu";
    config.ModelConfig.NumThreads = 1;
    config.ModelConfig.Debug = 1;
    config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";

    var kws = new KeywordSpotter(config);

    var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";

    var waveReader = new WaveReader(filename);

    Console.WriteLine("----------Use pre-defined keywords----------");

    var s = kws.CreateStream();
    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

    float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
    s.AcceptWaveform(waveReader.SampleRate, tailPadding);
    s.InputFinished();

    while (kws.IsReady(s))
    {
      kws.Decode(s);
      var result = kws.GetResult(s);
      if (result.Keyword != string.Empty)
      {
        // Remember to call Reset() right after detecting a keyword
        kws.Reset(s);
        Console.WriteLine("Detected: {0}", result.Keyword);
      }
    }

    Console.WriteLine("----------Use pre-defined keywords + add a new keyword----------");
    s = kws.CreateStream("y ǎn y uán @演员");
    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

    s.AcceptWaveform(waveReader.SampleRate, tailPadding);
    s.InputFinished();

    while (kws.IsReady(s))
    {
      kws.Decode(s);
      var result = kws.GetResult(s);
      if (result.Keyword != string.Empty)
      {
        // Remember to call Reset() right after detecting a keyword
        kws.Reset(s);
        Console.WriteLine("Detected: {0}", result.Keyword);
      }
    }

    Console.WriteLine("----------Use pre-defined keywords + add 2 new keywords----------");

    // Note keywords are separated by /
    s = kws.CreateStream("y ǎn y uán @演员/zh ī m íng @知名");
    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

    s.AcceptWaveform(waveReader.SampleRate, tailPadding);
    s.InputFinished();

    while (kws.IsReady(s))
    {
      kws.Decode(s);
      var result = kws.GetResult(s);
      if (result.Keyword != string.Empty)
      {
        // Remember to call Reset() right after detecting a keyword
        kws.Reset(s);
        Console.WriteLine("Detected: {0}", result.Keyword);
      }
    }
  }
}


================================================
FILE: dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>keyword_spotting_from_files</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/keyword-spotting-from-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
fi

dotnet run -c Release


================================================
FILE: dotnet-examples/keyword-spotting-from-microphone/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to do keyword spotting with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
//
// 2. Now run it
//
// dotnet run

using PortAudioSharp;
using SherpaOnnx;
using System.Runtime.InteropServices;

class KeywordSpotterDemo
{
  static void Main(string[] args)
  {
    var config = new KeywordSpotterConfig();
    config.FeatConfig.SampleRate = 16000;
    config.FeatConfig.FeatureDim = 80;

    config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";

    config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
    config.ModelConfig.Provider = "cpu";
    config.ModelConfig.NumThreads = 1;
    config.ModelConfig.Debug = 1;
    config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";

    var kws = new KeywordSpotter(config);

    var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";

    var waveReader = new WaveReader(filename);

    Console.WriteLine("----------Use pre-defined keywords----------");

    var s = kws.CreateStream();

    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();

    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      var deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max input channels: {deviceInfo.maxInputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultInputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default input device found");
      Environment.Exit(1);
    }

    var info = PortAudio.GetDeviceInfo(deviceIndex);

    Console.WriteLine();
    Console.WriteLine($"Use default device {deviceIndex} ({info.name})");

    var param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowInputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
        uint frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      var samples = new float[frameCount];
      Marshal.Copy(input, samples, 0, (int)frameCount);

      s.AcceptWaveform(config.FeatConfig.SampleRate, samples);

      return StreamCallbackResult.Continue;
    };

    var stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: callback,
        userData: IntPtr.Zero
        );

    Console.WriteLine(param);
    Console.WriteLine("Started! Please speak");

    stream.Start();

    while (true)
    {
      while (kws.IsReady(s))
      {
        kws.Decode(s);

        var result = kws.GetResult(s);
        if (result.Keyword != string.Empty)
        {
          // Remember to call Reset() right after detecting a keyword
          kws.Reset(s);

          Console.WriteLine("Detected: {0}", result.Keyword);
        }
      }

      Thread.Sleep(200); // ms
    }
  }
}


================================================
FILE: dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>keyword_spotting_from_microphone</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="PortAudioSharp2" Version="*" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/keyword-spotting-from-microphone/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
fi

dotnet run -c Release


================================================
FILE: dotnet-examples/kitten-tts/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
//
// This file shows how to use a non-streaming KittenTTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using SherpaOnnx;
using System.Runtime.InteropServices;

class KittenTtsDemo
{
  static void Main(string[] args)
  {

    TestEn();
  }

  static void TestEn()
  {
    var config = new OfflineTtsConfig();
    config.Model.Kitten.Model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx";
    config.Model.Kitten.Voices = "./kitten-nano-en-v0_1-fp16/voices.bin";
    config.Model.Kitten.Tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt";
    config.Model.Kitten.DataDir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    var tts = new OfflineTts(config);
    var speed = 1.0f;
    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
      "does not have two-thirds of his day for himself, is a slave, whatever " +
      "he may be: a statesman, a businessman, an official, or a scholar. " +
      "Friends fell out often because life was changing so fast. The easiest " +
      "thing in the world was to lose touch with someone.";

    // mapping of sid to voice name
    // 0->expr-voice-2-m, 1->expr-voice-2-f, 2->expr-voice-3-m
    // 3->expr-voice-3-f, 4->expr-voice-4-m, 5->expr-voice-4-f
    // 6->expr-voice-5-m, 7->expr-voice-5-f
    var sid = 0;

    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.Sid = sid;
    genConfig.Speed = speed;

    var MyCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      float[] data = new float[n];
      Marshal.Copy(samples, data, 0, n);
      // You can process samples here, e.g., play them.
      // See ../kitten-tts-play for how to play them
      Console.WriteLine($"Progress {progress*100}%");

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var callback = new OfflineTtsCallbackProgressWithArg(MyCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);

    var outputFilename = "./generated-kitten-en.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
  }
}


================================================
FILE: dotnet-examples/kitten-tts/kitten-tts.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>kitten_tts</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/kitten-tts/run-kitten.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi


dotnet run


================================================
FILE: dotnet-examples/kitten-tts-play/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
//
// This file shows how to use a non-streaming Kitten TTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using PortAudioSharp;
using SherpaOnnx;
using System.Collections.Concurrent;
using System.Runtime.InteropServices;

class KittenTtsPlayDemo
{
  static void Main(string[] args)
  {
    var config = new OfflineTtsConfig();
    config.Model.Kitten.Model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx";
    config.Model.Kitten.Voices = "./kitten-nano-en-v0_1-fp16/voices.bin";
    config.Model.Kitten.Tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt";
    config.Model.Kitten.DataDir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    var tts = new OfflineTts(config);
    var speed = 1.0f;
    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
      "does not have two-thirds of his day for himself, is a slave, whatever " +
      "he may be: a statesman, a businessman, an official, or a scholar. " +
      "Friends fell out often because life was changing so fast. The easiest " +
      "thing in the world was to lose touch with someone.";

    // mapping of sid to voice name
    // 0->expr-voice-2-m, 1->expr-voice-2-f, 2->expr-voice-3-m
    // 3->expr-voice-3-f, 4->expr-voice-4-m, 5->expr-voice-4-f
    // 6->expr-voice-5-m, 7->expr-voice-5-f
    var sid = 0;
    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.Sid = sid;
    genConfig.Speed = speed;


    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();
    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");

    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max output channels: {deviceInfo.maxOutputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultOutputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default output device found. Please use ../offline-tts instead");
      Environment.Exit(1);
    }

    var info = PortAudio.GetDeviceInfo(deviceIndex);
    Console.WriteLine();
    Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");

    var param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowOutputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
    var dataItems = new BlockingCollection<float[]>();

    var myCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      Console.WriteLine($"Progress {progress*100}%");

      float[] data = new float[n];

      Marshal.Copy(samples, data, 0, n);

      dataItems.Add(data);

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var playFinished = false;

    float[]? lastSampleArray = null;
    int lastIndex = 0; // not played

    PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
        UInt32 frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
      {
        Console.WriteLine($"Finished playing");
        playFinished = true;
        return StreamCallbackResult.Complete;
      }

      int expected = Convert.ToInt32(frameCount);
      int i = 0;

      while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
      {
        int needed = expected - i;

        if (lastSampleArray != null)
        {
          int remaining = lastSampleArray.Length - lastIndex;
          if (remaining >= needed)
          {
            float[] this_block = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
            lastIndex += needed;
            if (lastIndex == lastSampleArray.Length)
            {
              lastSampleArray = null;
              lastIndex = 0;
            }

            Marshal.Copy(this_block, 0, IntPtr.Add(output, i * sizeof(float)), needed);
            return StreamCallbackResult.Continue;
          }

          float[] this_block2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
          lastIndex = 0;
          lastSampleArray = null;

          Marshal.Copy(this_block2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
          i += remaining;
          continue;
        }

        if (dataItems.Count != 0)
        {
          lastSampleArray = dataItems.Take();
          lastIndex = 0;
        }
      }

      if (i < expected)
      {
        int sizeInBytes = (expected - i) * 4;
        Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
      }

      return StreamCallbackResult.Continue;
    };

    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: playCallback,
        userData: IntPtr.Zero
        );

    stream.Start();

    var callback = new OfflineTtsCallbackProgressWithArg(myCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);
    var outputFilename = "./generated-kitten-0.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
    dataItems.CompleteAdding();

    while (!playFinished)
    {
      Thread.Sleep(100); // 100ms
    }
  }
}


================================================
FILE: dotnet-examples/kitten-tts-play/kitten-tts-play.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>kitten_tts_play</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="PortAudioSharp2" Version="*" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/kitten-tts-play/run-kitten.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi


dotnet run


================================================
FILE: dotnet-examples/kokoro-tts/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
//
// This file shows how to use a non-streaming Kokoro TTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using SherpaOnnx;
using System.Runtime.InteropServices;

class KokoroTtsDemo
{
  static void Main(string[] args)
  {

    TestZhEn();
    TestEn();
  }

  static void TestZhEn()
  {
    var config = new OfflineTtsConfig();
    config.Model.Kokoro.Model = "./kokoro-multi-lang-v1_0/model.onnx";
    config.Model.Kokoro.Voices = "./kokoro-multi-lang-v1_0/voices.bin";
    config.Model.Kokoro.Tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
    config.Model.Kokoro.DataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
    config.Model.Kokoro.Lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    var tts = new OfflineTts(config);
    var speed = 1.0f;
    var text = "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？";

    var sid = 50;

    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.Sid = sid;
    genConfig.Speed = speed;
    genConfig.SilenceScale = 0.2f;

    var MyCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      float[] data = new float[n];
      Marshal.Copy(samples, data, 0, n);
      // You can process samples here, e.g., play them.
      // See ../kokoro-tts-playback for how to play them
      Console.WriteLine($"Progress {progress*100}%");

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var callback = new OfflineTtsCallbackProgressWithArg(MyCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);

    var outputFilename = "./generated-kokoro-zh-en.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
  }

  static void TestEn()
  {
    var config = new OfflineTtsConfig();
    config.Model.Kokoro.Model = "./kokoro-en-v0_19/model.onnx";
    config.Model.Kokoro.Voices = "./kokoro-en-v0_19/voices.bin";
    config.Model.Kokoro.Tokens = "./kokoro-en-v0_19/tokens.txt";
    config.Model.Kokoro.DataDir = "./kokoro-en-v0_19/espeak-ng-data";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    var tts = new OfflineTts(config);
    var speed = 1.0f;
    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
      "does not have two-thirds of his day for himself, is a slave, whatever " +
      "he may be: a statesman, a businessman, an official, or a scholar. " +
      "Friends fell out often because life was changing so fast. The easiest " +
      "thing in the world was to lose touch with someone.";

    // mapping of sid to voice name
    // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
    // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
    var sid = 0;

    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.Sid = sid;
    genConfig.Speed = speed;
    genConfig.SilenceScale = 0.2f;

    var MyCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      float[] data = new float[n];
      Marshal.Copy(samples, data, 0, n);
      // You can process samples here, e.g., play them.
      // See ../kokoro-tts-playback for how to play them
      Console.WriteLine($"Progress {progress*100}%");

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var callback = new OfflineTtsCallbackProgressWithArg(MyCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);

    var outputFilename = "./generated-kokoro-en.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
  }
}


================================================
FILE: dotnet-examples/kokoro-tts/kokoro-tts.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>kokoro_tts</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/kokoro-tts/run-kokoro.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/kokoro-tts-play/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
//
// This file shows how to use a non-streaming Kokoro TTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using PortAudioSharp;
using SherpaOnnx;
using System.Collections.Concurrent;
using System.Runtime.InteropServices;

class KokoroTtsPlayDemo
{
  static void Main(string[] args)
  {
    var config = new OfflineTtsConfig();
    config.Model.Kokoro.Model = "./kokoro-en-v0_19/model.onnx";
    config.Model.Kokoro.Voices = "./kokoro-en-v0_19/voices.bin";
    config.Model.Kokoro.Tokens = "./kokoro-en-v0_19/tokens.txt";
    config.Model.Kokoro.DataDir = "./kokoro-en-v0_19/espeak-ng-data";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    var tts = new OfflineTts(config);
    var speed = 1.0f;
    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
      "does not have two-thirds of his day for himself, is a slave, whatever " +
      "he may be: a statesman, a businessman, an official, or a scholar. " +
      "Friends fell out often because life was changing so fast. The easiest " +
      "thing in the world was to lose touch with someone.";

    // mapping of sid to voice name
    // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
    // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
    var sid = 0;
    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.Sid = sid;
    genConfig.Speed = speed;
    genConfig.SilenceScale = 0.2f;


    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();
    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");

    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max output channels: {deviceInfo.maxOutputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultOutputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default output device found. Please use ../offline-tts instead");
      Environment.Exit(1);
    }

    var info = PortAudio.GetDeviceInfo(deviceIndex);
    Console.WriteLine();
    Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");

    var param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowOutputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
    var dataItems = new BlockingCollection<float[]>();

    var MyCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      Console.WriteLine($"Progress {progress*100}%");

      float[] data = new float[n];

      Marshal.Copy(samples, data, 0, n);

      dataItems.Add(data);

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var playFinished = false;

    float[]? lastSampleArray = null;
    int lastIndex = 0; // not played

    PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
        UInt32 frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
      {
        Console.WriteLine($"Finished playing");
        playFinished = true;
        return StreamCallbackResult.Complete;
      }

      int expected = Convert.ToInt32(frameCount);
      int i = 0;

      while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
      {
        int needed = expected - i;

        if (lastSampleArray != null)
        {
          int remaining = lastSampleArray.Length - lastIndex;
          if (remaining >= needed)
          {
            float[] this_block = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
            lastIndex += needed;
            if (lastIndex == lastSampleArray.Length)
            {
              lastSampleArray = null;
              lastIndex = 0;
            }

            Marshal.Copy(this_block, 0, IntPtr.Add(output, i * sizeof(float)), needed);
            return StreamCallbackResult.Continue;
          }

          float[] this_block2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
          lastIndex = 0;
          lastSampleArray = null;

          Marshal.Copy(this_block2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
          i += remaining;
          continue;
        }

        if (dataItems.Count != 0)
        {
          lastSampleArray = dataItems.Take();
          lastIndex = 0;
        }
      }

      if (i < expected)
      {
        int sizeInBytes = (expected - i) * 4;
        Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
      }

      return StreamCallbackResult.Continue;
    };

    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: playCallback,
        userData: IntPtr.Zero
        );

    stream.Start();

    var callback = new OfflineTtsCallbackProgressWithArg(MyCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);
    var outputFilename = "./generated-kokoro-0.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
    dataItems.CompleteAdding();

    while (!playFinished)
    {
      Thread.Sleep(100); // 100ms
    }
  }
}


================================================
FILE: dotnet-examples/kokoro-tts-play/kokoro-tts-play.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>kokoro_tts_play</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="PortAudioSharp2" Version="*" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/kokoro-tts-play/run-kokoro-en.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/non-streaming-canary-decode-files/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
//
// This file shows how to use a NeMo Canary model for speech recognition.
//
// You can find the model doc at
// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
using SherpaOnnx;

class NonStreamingAsrCanary
{
  static void Main(string[] args)
  {
    // please download model files from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    var config = new OfflineRecognizerConfig();
    config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
    config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
    config.ModelConfig.Canary.SrcLang = "en";
    config.ModelConfig.Canary.TgtLang = "en";
    config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
    config.ModelConfig.Debug = 0;
    var recognizer = new OfflineRecognizer(config);

    var testWaveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
    var reader = new WaveReader(testWaveFilename);
    var stream = recognizer.CreateStream();
    stream.AcceptWaveform(reader.SampleRate, reader.Samples);
    recognizer.Decode(stream);
    var text = stream.Result.Text;
    Console.WriteLine("Text (English): {0}", text);

    // Now output text in German
    config.ModelConfig.Canary.TgtLang = "de";
    recognizer.SetConfig(config);

    stream = recognizer.CreateStream();
    stream.AcceptWaveform(reader.SampleRate, reader.Samples);
    recognizer.Decode(stream);
    text = stream.Result.Text;
    Console.WriteLine("Text (German): {0}", text);
  }
}


================================================
FILE: dotnet-examples/non-streaming-canary-decode-files/non-streaming-canary-decode-files.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>non_streaming_canary_decode_files</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/non-streaming-canary-decode-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/non-streaming-funasr-nano-decode-files/Program.cs
================================================
﻿// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use a FunASR Nano model for speech recognition.
//
// You can find the model doc at
// https://k2-fsa.github.io/sherpa/onnx/funasr-nano.html
using SherpaOnnx;

class NonStreamingFunAsrNano
{
  static void Main(string[] args)
  {
    // please download model files from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    var config = new OfflineRecognizerConfig();
    config.ModelConfig.FunAsrNano.EncoderAdaptor = "./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx";
    config.ModelConfig.FunAsrNano.LLM = "./sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx";
    config.ModelConfig.FunAsrNano.Embedding = "./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx";
    config.ModelConfig.FunAsrNano.Tokenizer = "./sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B";
    config.ModelConfig.Tokens = "";
    config.ModelConfig.Debug = 1;
    var recognizer = new OfflineRecognizer(config);

    var testWaveFilename = "./sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav";
    var reader = new WaveReader(testWaveFilename);
    var stream = recognizer.CreateStream();
    stream.AcceptWaveform(reader.SampleRate, reader.Samples);
    recognizer.Decode(stream);
    var text = stream.Result.Text;
    Console.WriteLine("Text: {0}", text);
  }
}


================================================
FILE: dotnet-examples/non-streaming-funasr-nano-decode-files/non-streaming-funasr-nano-decode-files.csproj
================================================
﻿<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>non_streaming_funasr_nano_decode_files</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/non-streaming-funasr-nano-decode-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2 
  tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/non-streaming-moonshine-v2-decode-files/Program.cs
================================================
﻿// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use a Moonshine v2 model for speech recognition.
//
// You can find the model doc at
// https://k2-fsa.github.io/sherpa/onnx/moonshine/
using SherpaOnnx;

class NonStreamingAsrMoonshineV2
{
  static void Main(string[] args)
  {
    // please download model files from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    var config = new OfflineRecognizerConfig();
    config.ModelConfig.Moonshine.Encoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort";
    config.ModelConfig.Moonshine.MergedDecoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort";
    config.ModelConfig.Tokens = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt";
    config.ModelConfig.Debug = 0;
    var recognizer = new OfflineRecognizer(config);

    var testWaveFilename = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav";
    var reader = new WaveReader(testWaveFilename);
    var stream = recognizer.CreateStream();
    stream.AcceptWaveform(reader.SampleRate, reader.Samples);
    recognizer.Decode(stream);
    var text = stream.Result.Text;
    Console.WriteLine("Text: {0}", text);
  }
}


================================================
FILE: dotnet-examples/non-streaming-moonshine-v2-decode-files/non-streaming-moonshine-v2-decode-files.csproj
================================================
﻿<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>non_streaming_moonshine_v2_decode_files</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/non-streaming-moonshine-v2-decode-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/offline-audio-tagging/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
//
// This file shows how to use a non-streaming Zipformer or CED model
// for audio tagging
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/audio-tagging/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
// to download pre-trained models

using SherpaOnnx;
using System.Runtime.InteropServices;

class AudioTaggingDemo
{
  static void Main(string[] args)
  {
    TestZipformer();
    TestCED();
  }

  static void TestZipformer()
  {
    var config = new AudioTaggingConfig();

    config.Model.Zipformer.Model = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.onnx";

    config.Model.NumThreads = 1;
    config.Model.Debug = 1;
    config.Labels = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv";

    config.TopK = 5;

    var tagger = new AudioTagging(config);

    var s = tagger.CreateStream();

    var waveFilename = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav";
    WaveReader waveReader = new WaveReader(waveFilename);
    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

    var events = tagger.Compute(s);
    foreach (var e in events)
    {
      Console.WriteLine($"Name {e.Name}, index: {e.Index}, prob: {e.Prob}");
    }
  }

  static void TestCED()
  {
    var config = new AudioTaggingConfig();

    config.Model.CED ="./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx";

    config.Model.NumThreads = 1;
    config.Model.Debug = 1;
    config.Labels = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv";

    config.TopK = 5;

    var tagger = new AudioTagging(config);

    var s = tagger.CreateStream();

    var waveFilename = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/1.wav";
    WaveReader waveReader = new WaveReader(waveFilename);
    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

    var events = tagger.Compute(s);
    foreach (var e in events)
    {
      Console.WriteLine($"Name {e.Name}, index: {e.Index}, prob: {e.Prob}");
    }
  }
}


================================================
FILE: dotnet-examples/offline-audio-tagging/offline-audio-tagging.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>offline_audio_tagging</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/offline-audio-tagging/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2

  ls -lh sherpa-onnx-zipformer-small-audio-tagging-2024-04-15
fi

if [ ! -f ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
  tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
  rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2

  ls -lh sherpa-onnx-ced-mini-audio-tagging-2024-04-19
fi

dotnet run


================================================
FILE: dotnet-examples/offline-decode-files/Program.cs
================================================
﻿// Copyright (c)  2023  Xiaomi Corporation
// Copyright (c)  2023 by manyeyes
//
// This file shows how to use a non-streaming model to decode files
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// to download non-streaming models
using CommandLine;
using CommandLine.Text;
using SherpaOnnx;

class OfflineDecodeFiles
{
  class Options
  {
    [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
    public int SampleRate { get; set; } = 16000;

    [Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
    public int FeatureDim { get; set; } = 80;

    [Option(Required = false, HelpText = "Path to tokens.txt")]
    public string Tokens { get; set; } = string.Empty;

    [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
    public string Encoder { get; set; } = string.Empty;

    [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
    public string Decoder { get; set; } = string.Empty;

    [Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
    public string Joiner { get; set; } = string.Empty;

    [Option("model-type", Required = false, Default = "", HelpText = "model type")]
    public string ModelType { get; set; } = string.Empty;

    [Option("fire-red-asr-encoder", Required = false, Default = "", HelpText = "Path to FireRedAsr encoder.int8.onnx. Used only for FireRedAsr models")]
    public string FireRedAsrEncoder { get; set; } = string.Empty;


    [Option("fire-red-asr-decoder", Required = false, Default = "", HelpText = "Path to FireRedAsr decoder.int8.onnx. Used only for FireRedAsr models")]
    public string FireRedAsrDecoder { get; set; } = string.Empty;


    [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
    public string WhisperEncoder { get; set; } = string.Empty;

    [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
    public string WhisperDecoder { get; set; } = string.Empty;

    [Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
    public string WhisperLanguage { get; set; } = string.Empty;

    [Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
    public string WhisperTask { get; set; } = "transcribe";

    [Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
    public string MoonshinePreprocessor { get; set; } = string.Empty;

    [Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
    public string MoonshineEncoder { get; set; } = string.Empty;

    [Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
    public string MoonshineUncachedDecoder { get; set; } = string.Empty;

    [Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
    public string MoonshineCachedDecoder { get; set; } = string.Empty;

    [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
    public string TdnnModel { get; set; } = string.Empty;

    [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
    public string Paraformer { get; set; } = string.Empty;

    [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
    public string NeMoCtc { get; set; } = string.Empty;

    [Option("zipformer-ctc", Required = false, HelpText = "Path to model.onnx. Used only for Zipformer CTC models")]
    public string ZipformerCtc { get; set; } = string.Empty;

    [Option("dolphin-model", Required = false, Default = "", HelpText = "Path to dolphin ctc model")]
    public string DolphinModel { get; set; } = string.Empty;

    [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
    public string TeleSpeechCtc { get; set; } = string.Empty;

    [Option("wenet-ctc", Required = false, HelpText = "Path to model.onnx. Used only for Wenet CTC models")]
    public string WenetCtc { get; set; } = string.Empty;

    [Option("omnilingual-asr-ctc", Required = false, HelpText = "Path to model.onnx. Used only for Omnilingual ASR CTC models")]
    public string Omnilingual { get; set; } = string.Empty;

    [Option("medasr", Required = false, HelpText = "Path to model.onnx. Used only for Google MedASR CTC models")]
    public string MedAsr { get; set; } = string.Empty;

    [Option("fire-red-asr-ctc", Required = false, HelpText = "Path to model.onnx. Used only for FireRedASR CTC models")]
    public string FireRedAsrCtc { get; set; } = string.Empty;

    [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
    public string SenseVoiceModel { get; set; } = string.Empty;

    [Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")]
    public int SenseVoiceUseItn { get; set; } = 1;

    [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
    public int NumThreads { get; set; } = 1;

    [Option("decoding-method", Required = false, Default = "greedy_search",
            HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
    public string DecodingMethod { get; set; } = "greedy_search";

    [Option("rule-fsts", Required = false, Default = "",
            HelpText = "If not empty, path to rule fst for inverse text normalization")]
    public string RuleFsts { get; set; } = string.Empty;

    [Option("max-active-paths", Required = false, Default = 4,
        HelpText = @"Used only when --decoding--method is modified_beam_search.
It specifies number of active paths to keep during the search")]
    public int MaxActivePaths { get; set; } = 4;

    [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
    public string HotwordsFile { get; set; } = string.Empty;

    [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
    public float HotwordsScore { get; set; } = 1.5F;

    [Option("files", Required = true, HelpText = "Audio files for decoding")]
    public IEnumerable<string> Files { get; set; } = new string[] { };
  }

  static void Main(string[] args)
  {
    var parser = new CommandLine.Parser(with => with.HelpWriter = null);
    var parserResult = parser.ParseArguments<Options>(args);

    parserResult
      .WithParsed<Options>(options => Run(options))
      .WithNotParsed(errs => DisplayHelp(parserResult, errs));
  }

  private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
  {
    var usage = @"
# Zipformer

dotnet run \
  --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
  --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
  --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
  --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
  --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
  ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \
  ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
to download pre-trained non-streaming zipformer models.

# Paraformer

dotnet run \
  --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
  --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
  ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \
  ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \
  ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav \
  ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
to download pre-trained paraformer models

# NeMo CTC

dotnet run \
  --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
  --nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
  --num-threads=1 \
  --files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
  ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
  ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
to download pre-trained paraformer models

# Whisper

dotnet run \
  --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
  --files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
  ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
  ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
to download pre-trained whisper models.

# Tdnn yesno

dotnet run \
  --sample-rate=8000 \
  --feat-dim=23 \
  --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
  --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
  --files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html
to download pre-trained Tdnn models.
";

    var helpText = HelpText.AutoBuild(result, h =>
    {
      h.AdditionalNewLineAfterOption = false;
      h.Heading = usage;
      h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
      return HelpText.DefaultParsingErrorsHandler(result, h);
    }, e => e);
    Console.WriteLine(helpText);
  }

  private static void Run(Options options)
  {
    OfflineRecognizerConfig config = new OfflineRecognizerConfig();
    config.FeatConfig.SampleRate = options.SampleRate;
    config.FeatConfig.FeatureDim = options.FeatureDim;

    config.ModelConfig.Tokens = options.Tokens;

    if (!string.IsNullOrEmpty(options.Encoder))
    {
      // this is a transducer model
      config.ModelConfig.Transducer.Encoder = options.Encoder;
      config.ModelConfig.Transducer.Decoder = options.Decoder;
      config.ModelConfig.Transducer.Joiner = options.Joiner;
    }
    else if (!string.IsNullOrEmpty(options.Paraformer))
    {
      config.ModelConfig.Paraformer.Model = options.Paraformer;
    }
    else if (!string.IsNullOrEmpty(options.NeMoCtc))
    {
      config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
    }
    else if (!string.IsNullOrEmpty(options.DolphinModel))
    {
      config.ModelConfig.Dolphin.Model = options.DolphinModel;
    }
    else if (!string.IsNullOrEmpty(options.ZipformerCtc))
    {
      config.ModelConfig.ZipformerCtc.Model = options.ZipformerCtc;
    }
    else if (!string.IsNullOrEmpty(options.TeleSpeechCtc))
    {
      config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
    }
    else if (!string.IsNullOrEmpty(options.WenetCtc))
    {
      config.ModelConfig.WenetCtc.Model = options.WenetCtc;
    }
    else if (!string.IsNullOrEmpty(options.Omnilingual))
    {
      config.ModelConfig.Omnilingual.Model = options.Omnilingual;
    }
    else if (!string.IsNullOrEmpty(options.MedAsr))
    {
      config.ModelConfig.MedAsr.Model = options.MedAsr;
    }
    else if (!string.IsNullOrEmpty(options.WhisperEncoder))
    {
      config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
      config.ModelConfig.Whisper.Decoder = options.WhisperDecoder;
      config.ModelConfig.Whisper.Language = options.WhisperLanguage;
      config.ModelConfig.Whisper.Task = options.WhisperTask;
    }
    else if (!string.IsNullOrEmpty(options.TdnnModel))
    {
      config.ModelConfig.Tdnn.Model = options.TdnnModel;
    }
    else if (!string.IsNullOrEmpty(options.SenseVoiceModel))
    {
      config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
      config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
    }
    else if (!string.IsNullOrEmpty(options.MoonshinePreprocessor))
    {
      config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
      config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
      config.ModelConfig.Moonshine.UncachedDecoder = options.MoonshineUncachedDecoder;
      config.ModelConfig.Moonshine.CachedDecoder = options.MoonshineCachedDecoder;
    }
    else if (!string.IsNullOrEmpty(options.FireRedAsrEncoder))
    {
      config.ModelConfig.FireRedAsr.Encoder = options.FireRedAsrEncoder;
      config.ModelConfig.FireRedAsr.Decoder = options.FireRedAsrDecoder;
    }
    else if (!string.IsNullOrEmpty(options.FireRedAsrCtc))
    {
      config.ModelConfig.FireRedAsrCtc.Model = options.FireRedAsrCtc;
    }
    else
    {
      Console.WriteLine("Please provide a model");
      return;
    }

    config.ModelConfig.ModelType = options.ModelType;
    config.DecodingMethod = options.DecodingMethod;
    config.MaxActivePaths = options.MaxActivePaths;
    config.HotwordsFile = options.HotwordsFile;
    config.HotwordsScore = options.HotwordsScore;
    config.RuleFsts = options.RuleFsts;

    config.ModelConfig.Debug = 0;

    var recognizer = new OfflineRecognizer(config);

    var files = options.Files.ToArray();

    // We create a separate stream for each file
    var streams = new List<OfflineStream>();
    streams.EnsureCapacity(files.Length);

    for (int i = 0; i != files.Length; ++i)
    {
      var s = recognizer.CreateStream();

      WaveReader waveReader = new WaveReader(files[i]);
      s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
      streams.Add(s);
    }

    recognizer.Decode(streams);

    // display results
    for (int i = 0; i != files.Length; ++i)
    {
      var r = streams[i].Result;
      Console.WriteLine("--------------------");
      Console.WriteLine(files[i]);
      Console.WriteLine("Text: {0}", r.Text);
      Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
      if (r.Timestamps != null && r.Timestamps.Length > 0) {
        Console.Write("Timestamps: [");
        var sep = string.Empty;
        for (int k = 0; k != r.Timestamps.Length; ++k)
        {
          Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
          sep = ", ";
        }
        Console.WriteLine("]");
      }
    }
    Console.WriteLine("--------------------");
  }
}


================================================
FILE: dotnet-examples/offline-decode-files/offline-decode-files.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>offline_decode_files</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="CommandLineParser" Version="2.9.1" />
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/offline-decode-files/run-dolphin-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
fi

dotnet run \
  --tokens=./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt \
  --dolphin-model=./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
  --num-threads=1 \
  --files ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-fire-red-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

  ls -lh sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25
fi

dotnet run \
  --num-threads=2 \
  --fire-red-asr-ctc=./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx \
  --tokens=./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt \
  --files ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-fire-red-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
fi

dotnet run \
  --num-threads=2 \
  --fire-red-asr-encoder=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx \
  --fire-red-asr-decoder=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx \
  --tokens=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt \
  --files ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-hotwords.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
  tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
  rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
fi

if [ ! -f ./sherpa-onnx-zipformer-en-2023-04-01/hotwords_en.txt ]; then
cat >./sherpa-onnx-zipformer-en-2023-04-01/hotwords_en.txt <<EOF
▁ QUA R TER S
▁FOR E VER
EOF
fi

dotnet run \
  --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
  --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
  --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
  --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  --decoding-method=modified_beam_search \
  --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
  ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav

dotnet run \
  --hotwords-file=./sherpa-onnx-zipformer-en-2023-04-01/hotwords_en.txt \
  --hotwords-score=2.0 \
  --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
  --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
  --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
  --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  --decoding-method=modified_beam_search \
  --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
  ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav

# 0.wav: QUARTER -> QUARTERS
# 1.wav: FOR EVER -> FOREVER


================================================
FILE: dotnet-examples/offline-decode-files/run-medasr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
fi

dotnet run \
  --medasr=./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx \
  --tokens=./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt \
  --files ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-moonshine.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

dotnet run \
  --num-threads=2 \
  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
  --files ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-nemo-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-nemo-ctc-en-conformer-medium ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2
  tar xvf sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2
  rm sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2
fi

dotnet run \
  --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
  --nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
  --num-threads=1 \
  --files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
  ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
  ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-omnilingual-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
fi

dotnet run \
  --omnilingual-asr-ctc=./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx \
  --tokens=./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt \
  --files ./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-paraformer-itn.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-paraformer-zh-2023-09-14 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

dotnet run \
  --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
  --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --rule-fsts=./itn_zh_number.fst \
  --num-threads=2 \
  --files ./itn-zh-number.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-paraformer-zh-2023-09-14 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

dotnet run \
  --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
  --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --num-threads=2 \
  --files ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \
  ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \
  ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav \
  ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-sense-voice-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

dotnet run \
  --sense-voice-model=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
  --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --files ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-tdnn-yesno.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-tdnn-yesno ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-tdnn-yesno.tar.bz2
  tar xvf sherpa-onnx-tdnn-yesno.tar.bz2
  rm sherpa-onnx-tdnn-yesno.tar.bz2
fi

dotnet run \
  --sample-rate=8000 \
  --feat-dim=23 \
  --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
  --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
  --files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-telespeech-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
fi

dotnet run \
  --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
  --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
  --model-type=telespeech_ctc \
  --files ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-wenet-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
fi

dotnet run \
  --wenet-ctc=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
  --tokens=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
  --files ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-whisper-large-v3.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./large-v3-encoder.int8.onnx ]; then
  git lfs install

  git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-large-v3

  ls -lh sherpa-onnx-whisper-large-v3
  cp -v sherpa-onnx-whisper-large-v3/*.onnx .
  cp -v sherpa-onnx-whisper-large-v3/*.weights .
  ls -lh
fi

dotnet run \
  --num-threads=2 \
  --whisper-encoder=./large-v3-encoder.int8.onnx \
  --whisper-decoder=./large-v3-decoder.int8.onnx \
  --tokens=./sherpa-onnx-whisper-large-v3/large-v3-tokens.txt \
  --files ./sherpa-onnx-whisper-large-v3/test_wavs/0.wav \
  ./sherpa-onnx-whisper-large-v3/test_wavs/1.wav \
  ./sherpa-onnx-whisper-large-v3/test_wavs/8k.wav

dotnet run \
  --num-threads=2 \
  --whisper-encoder=./large-v3-encoder.onnx \
  --whisper-decoder=./large-v3-decoder.onnx \
  --tokens=./sherpa-onnx-whisper-large-v3/large-v3-tokens.txt \
  --files ./sherpa-onnx-whisper-large-v3/test_wavs/0.wav \
  ./sherpa-onnx-whisper-large-v3/test_wavs/1.wav \
  ./sherpa-onnx-whisper-large-v3/test_wavs/8k.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

dotnet run \
  --num-threads=2 \
  --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
  --files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
  ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
  ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-zipformer-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

  tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
  rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
fi

dotnet run \
  --tokens=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \
  --zipformer-ctc=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \
  --num-threads=1 \
  --files ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav


================================================
FILE: dotnet-examples/offline-decode-files/run-zipformer.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
  tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
  rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
fi

dotnet run \
  --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
  --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
  --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
  --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  --decoding-method=modified_beam_search \
  --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
  ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \
  ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav


================================================
FILE: dotnet-examples/offline-punctuation/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to add punctuations to text.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
//
// 3. Now run it
//
// dotnet run

using SherpaOnnx;

class OfflinePunctuationDemo
{
  static void Main(string[] args)
  {
    var config = new OfflinePunctuationConfig();
    config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
    config.Model.Debug = 1;
    config.Model.NumThreads = 1;
    var punct = new OfflinePunctuation(config);

    var textList = new string[] {
        "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
        "我们都是木头人不会说话不会动",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
    };

    Console.WriteLine("---------");
    foreach (var text in textList)
    {
      string textWithPunct = punct.AddPunct(text);
      Console.WriteLine("Input text: {0}", text);
      Console.WriteLine("Output text: {0}", textWithPunct);
      Console.WriteLine("---------");
    }
  }
}


================================================
FILE: dotnet-examples/offline-punctuation/offline-punctuation.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>offline_punctuation</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/offline-punctuation/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -e ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/offline-speaker-diarization/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//

// This file shows how to use sherpa-onnx C# API for speaker diarization
/*
Usage:

Step 1: Download a speaker segmentation model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

Step 2: Download a speaker embedding extractor model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

Step 3. Download test wave files

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

Step 4. Run it

  dotnet run
*/

using SherpaOnnx;

class OfflineSpeakerDiarizationDemo
{
  static void Main(string[] args)
  {
    var config = new OfflineSpeakerDiarizationConfig();
    config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
    config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";

    // the test wave ./0-four-speakers-zh.wav has 4 speakers, so
    // we set num_clusters to 4
    //
    config.Clustering.NumClusters = 4;
    // If you don't know the number of speakers in the test wave file, please
    // use
    // config.Clustering.Threshold = 0.5; // You need to tune this threshold
    var sd = new OfflineSpeakerDiarization(config);

    var testWaveFile = "./0-four-speakers-zh.wav";
    var waveReader = new WaveReader(testWaveFile);
    if (sd.SampleRate != waveReader.SampleRate)
    {
      Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}");
      return;
    }

    Console.WriteLine("Started");

     // var segments = sd.Process(waveReader.Samples); // this one is also ok

    var progressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) =>
    {
      var progress = 100.0F * numProcessedChunks / numTotalChunks;
      Console.WriteLine("Progress {0}%", string.Format("{0:0.00}", progress));
      return 0;
    };

    var callback = new OfflineSpeakerDiarizationProgressCallback(progressCallback);
    var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero);

    foreach (var s in segments)
    {
      Console.WriteLine("{0} -- {1} speaker_{2}", string.Format("{0:0.00}", s.Start), string.Format("{0:0.00}", s.End), s.Speaker);
    }
  }
}


================================================
FILE: dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>offline_speaker_diarization</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/offline-speaker-diarization/run.sh
================================================
#!/usr/bin/env bash


if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi

if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./0-four-speakers-zh.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi

dotnet run


================================================
FILE: dotnet-examples/offline-tts/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to use a non-streaming TTS model for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using CommandLine;
using CommandLine.Text;
using SherpaOnnx;

class OfflineTtsDemo
{
  class Options
  {
    [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
    public string RuleFsts { get; set; } = string.Empty;

    [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
    public string RuleFars { get; set; } = string.Empty;

    [Option("data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
    public string DataDir { get; set; } = string.Empty;

    [Option("length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
    public float LengthScale { get; set; } = 1;

    [Option("noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS or Matcha models")]
    public float NoiseScale { get; set; } = 0.667F;

    [Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")]
    public float NoiseScaleW { get; set; } = 0.8F;

    [Option("lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
    public string Lexicon { get; set; } = string.Empty;

    [Option("tokens", Required = true, Default = "", HelpText = "Path to tokens.txt")]
    public string Tokens { get; set; } = string.Empty;

    [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
    public int MaxNumSentences { get; set; } = 1;

    [Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
    public int Debug { get; set; } = 0;

    [Option("vits-model", Required = false, HelpText = "Path to VITS model")]
    public string Model { get; set; } = string.Empty;

    [Option("matcha-acoustic-model", Required = false, HelpText = "Path to the acoustic model of Matcha")]
    public string AcousticModel { get; set; } = "";

    [Option("matcha-vocoder", Required = false, HelpText = "Path to the vocoder model of Matcha")]
    public string Vocoder { get; set; } = "";

    [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
    public int SpeakerId { get; set; } = 0;

    [Option("text", Required = true, HelpText = "Text to synthesize")]
    public string Text { get; set; } = string.Empty;

    [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
    public string OutputFilename { get; set; } = "./generated.wav";
  }

  static void Main(string[] args)
  {
    var parser = new Parser(with => with.HelpWriter = null);
    var parserResult = parser.ParseArguments<Options>(args);

    parserResult
      .WithParsed<Options>(options => Run(options))
      .WithNotParsed(errs => DisplayHelp(parserResult, errs));
  }

  private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
  {
    var usage = @"
# matcha-icefall-zh-baker

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

dotnet run \
  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
  --tokens=./matcha-icefall-zh-baker/tokens.txt \
  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
  --debug=1 \
  --output-filename=./matcha-zh.wav \
  --text='某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'

# matcha-icefall-en_US-ljspeech

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

dotnet run \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --tokens=./matcha-icefall-zh-baker/tokens.txt \
  --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --debug=1 \
  --output-filename=./matcha-zh.wav \
  --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'

# vits-aishell3

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2

dotnet run \
  --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  --tokens=./vits-icefall-zh-aishell3/tokens.txt \
  --lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
  --sid=66 \
  --debug=1 \
  --output-filename=./aishell3-66.wav \
  --text=这是一个语音合成测试

# Piper models

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2

dotnet run \
  --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
  --tokens=./vits-piper-en_US-amy-low/tokens.txt \
  --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
  --debug=1 \
  --output-filename=./amy.wav \
  --text='This is a text to speech application in dotnet with Next Generation Kaldi'

Please refer to
https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
to download more models.
";

    var helpText = HelpText.AutoBuild(result, h =>
    {
      h.AdditionalNewLineAfterOption = false;
      h.Heading = usage;
      h.Copyright = "Copyright (c) 2024 Xiaomi Corporation";
      return HelpText.DefaultParsingErrorsHandler(result, h);
    }, e => e);
    Console.WriteLine(helpText);
  }

  private static void Run(Options options)
  {
    var config = new OfflineTtsConfig();
    config.Model.Vits.Model = options.Model;
    config.Model.Vits.Lexicon = options.Lexicon;
    config.Model.Vits.Tokens = options.Tokens;
    config.Model.Vits.DataDir = options.DataDir;
    config.Model.Vits.NoiseScale = options.NoiseScale;
    config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
    config.Model.Vits.LengthScale = options.LengthScale;

    config.Model.Matcha.AcousticModel = options.AcousticModel;
    config.Model.Matcha.Vocoder = options.Vocoder;
    config.Model.Matcha.Lexicon = options.Lexicon;
    config.Model.Matcha.Tokens = options.Tokens;
    config.Model.Matcha.DataDir = options.DataDir;
    config.Model.Matcha.NoiseScale = options.NoiseScale;
    config.Model.Matcha.LengthScale = options.LengthScale;

    config.Model.NumThreads = 1;
    config.Model.Debug = options.Debug;
    config.Model.Provider = "cpu";
    config.RuleFsts = options.RuleFsts;
    config.RuleFars = options.RuleFars;
    config.MaxNumSentences = options.MaxNumSentences;

    var tts = new OfflineTts(config);
    var speed = 1.0f / options.LengthScale;
    var sid = options.SpeakerId;
    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.Sid = sid;
    genConfig.Speed = speed;
    genConfig.SilenceScale = 0.2f;
    var audio = tts.GenerateWithConfig(options.Text, genConfig, null);
    var ok = audio.SaveToWaveFile(options.OutputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {options.OutputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {options.OutputFilename}");
    }
  }
}


================================================
FILE: dotnet-examples/offline-tts/offline-tts.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>offline_tts</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="CommandLineParser" Version="2.9.1" />
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/offline-tts/run-aishell3.sh
================================================
#!/usr/bin/env bash
set -ex
if [ ! -f ./vits-zh-aishell3/vits-aishell3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  tar xvf vits-icefall-zh-aishell3.tar.bz2
  rm vits-icefall-zh-aishell3.tar.bz2
fi

dotnet run \
  --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  --tokens=./vits-icefall-zh-aishell3/tokens.txt \
  --lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
  --sid=66 \
  --debug=1 \
  --output-filename=./aishell3-66.wav \
  --text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分，星期天。长沙长大，去过长白山和长安街。行行出状元。行行，银行行长，行业。"


================================================
FILE: dotnet-examples/offline-tts/run-hf-fanchen.sh
================================================
#!/usr/bin/env bash
set -ex
if [ ! -f ./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-fanchen-C.tar.bz2
  tar xf vits-zh-hf-fanchen-C.tar.bz2
  rm vits-zh-hf-fanchen-C.tar.bz2
fi

dotnet run \
  --vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
  --tokens=./vits-zh-hf-fanchen-C/tokens.txt \
  --lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
  --tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
  --sid=100 \
  --debug=1 \
  --output-filename=./fanchen-100.wav \
  --text="这是一个语音合成测试, 写于公元2024年4月26号, 11点05分，星期5。小米的使命是，始终坚持做'感动人心、价格厚道'的好产品，让全球每个人都能享受科技带来的美好生活。"


================================================
FILE: dotnet-examples/offline-tts/run-matcha-en.sh
================================================
#!/usr/bin/env bash
set -ex


# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

dotnet run \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --debug=1 \
  --output-filename=./matcha-en.wav \
  --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'


================================================
FILE: dotnet-examples/offline-tts/run-matcha-zh.sh
================================================
#!/usr/bin/env bash
set -ex

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi


dotnet run \
  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
  --tokens=./matcha-icefall-zh-baker/tokens.txt \
  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
  --debug=1 \
  --output-filename=./matcha-zh.wav \
  --text="某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"


================================================
FILE: dotnet-examples/offline-tts/run-piper.sh
================================================
#!/usr/bin/env bash

set -ex
if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then
  # wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  curl -OL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  tar xf vits-piper-en_US-amy-low.tar.bz2
  rm vits-piper-en_US-amy-low.tar.bz2
fi

dotnet run \
  --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
  --tokens=./vits-piper-en_US-amy-low/tokens.txt \
  --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
  --debug=1 \
  --output-filename=./amy.wav \
  --text="This is a text to speech application in dotnet with Next Generation Kaldi"


================================================
FILE: dotnet-examples/offline-tts-play/.gitignore
================================================
run-piper.sh


================================================
FILE: dotnet-examples/offline-tts-play/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to use a non-streaming TTS model for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
//
// Note that you need a speaker to run this file since it will play
// the generated audio back as it is being generated.

using CommandLine;
using CommandLine.Text;
using PortAudioSharp;
using SherpaOnnx;
using System.Collections.Concurrent;
using System.Runtime.InteropServices;

class OfflineTtsPlayDemo
{
  class Options
  {
    [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
    public string RuleFsts { get; set; } = string.Empty;

    [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
    public string RuleFars { get; set; } = string.Empty;

    [Option("data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
    public string DataDir { get; set; } = string.Empty;

    [Option("length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
    public float LengthScale { get; set; } = 1;

    [Option("noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS or Matcha models")]
    public float NoiseScale { get; set; } = 0.667F;

    [Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")]
    public float NoiseScaleW { get; set; } = 0.8F;

    [Option("lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
    public string Lexicon { get; set; } = string.Empty;

    [Option("tokens", Required = true, Default = "", HelpText = "Path to tokens.txt")]
    public string Tokens { get; set; } = string.Empty;

    [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
    public int MaxNumSentences { get; set; } = 1;

    [Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
    public int Debug { get; set; } = 0;

    [Option("vits-model", Required = false, HelpText = "Path to VITS model")]
    public string Model { get; set; } = string.Empty;

    [Option("matcha-acoustic-model", Required = false, HelpText = "Path to the acoustic model of Matcha")]
    public string AcousticModel { get; set; } = "";

    [Option("matcha-vocoder", Required = false, HelpText = "Path to the vocoder model of Matcha")]
    public string Vocoder { get; set; } = "";

    [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
    public int SpeakerId { get; set; } = 0;

    [Option("text", Required = true, HelpText = "Text to synthesize")]
    public string Text { get; set; } = string.Empty;

    [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
    public string OutputFilename { get; set; } = "./generated.wav";
  }

  static void Main(string[] args)
  {
    var parser = new CommandLine.Parser(with => with.HelpWriter = null);
    var parserResult = parser.ParseArguments<Options>(args);

    parserResult
      .WithParsed<Options>(options => Run(options))
      .WithNotParsed(errs => DisplayHelp(parserResult, errs));
  }

  private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
  {
    string usage = @"
# matcha-icefall-zh-baker

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

dotnet run \
  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
  --tokens=./matcha-icefall-zh-baker/tokens.txt \
  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
  --debug=1 \
  --output-filename=./matcha-zh.wav \
  --text='某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'

# matcha-icefall-en_US-ljspeech

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

dotnet run \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --tokens=./matcha-icefall-zh-baker/tokens.txt \
  --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --debug=1 \
  --output-filename=./matcha-zh.wav \
  --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'

# vits-aishell3

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xf vits-zh-aishell3.tar.bz2

dotnet run \
  --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
  --tokens=./vits-zh-aishell3/tokens.txt \
  --lexicon=./vits-zh-aishell3/lexicon.txt \
  --tts-rule-fsts=./vits-zh-aishell3/rule.fst \
  --sid=66 \
  --debug=1 \
  --output-filename=./aishell3-66.wav \
  --text=这是一个语音合成测试

# Piper models

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2

dotnet run \
  --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
  ---tokens=./vits-piper-en_US-amy-low/tokens.txt \
  --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
  --debug=1 \
  --output-filename=./amy.wav \
  --text='This is a text to speech application in dotnet with Next Generation Kaldi'

Please refer to
https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
to download more models.
";

    var helpText = HelpText.AutoBuild(result, h =>
    {
      h.AdditionalNewLineAfterOption = false;
      h.Heading = usage;
      h.Copyright = "Copyright (c) 2024 Xiaomi Corporation";
      return HelpText.DefaultParsingErrorsHandler(result, h);
    }, e => e);
    Console.WriteLine(helpText);
  }

  private static void Run(Options options)
  {
    var config = new OfflineTtsConfig();

    config.Model.Vits.Model = options.Model;
    config.Model.Vits.Lexicon = options.Lexicon;
    config.Model.Vits.Tokens = options.Tokens;
    config.Model.Vits.DataDir = options.DataDir;
    config.Model.Vits.NoiseScale = options.NoiseScale;
    config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
    config.Model.Vits.LengthScale = options.LengthScale;

    config.Model.Matcha.AcousticModel = options.AcousticModel;
    config.Model.Matcha.Vocoder = options.Vocoder;
    config.Model.Matcha.Lexicon = options.Lexicon;
    config.Model.Matcha.Tokens = options.Tokens;
    config.Model.Matcha.DataDir = options.DataDir;
    config.Model.Matcha.NoiseScale = options.NoiseScale;
    config.Model.Matcha.LengthScale = options.LengthScale;

    config.Model.NumThreads = 1;
    config.Model.Debug = options.Debug;
    config.Model.Provider = "cpu";
    config.RuleFsts = options.RuleFsts;
    config.MaxNumSentences = options.MaxNumSentences;

    var tts = new OfflineTts(config);
    var speed = 1.0f / options.LengthScale;
    var sid = options.SpeakerId;
    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.Sid = sid;
    genConfig.Speed = speed;
    genConfig.SilenceScale = 0.2f;

    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();
    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");

    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max output channels: {deviceInfo.maxOutputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultOutputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default output device found. Please use ../offline-tts instead");
      Environment.Exit(1);
    }

    var info = PortAudio.GetDeviceInfo(deviceIndex);
    Console.WriteLine();
    Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");

    var param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowOutputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
    var dataItems = new BlockingCollection<float[]>();

    var myCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      float[] data = new float[n];

      Marshal.Copy(samples, data, 0, n);

      dataItems.Add(data);

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var playFinished = false;

    float[]? lastSampleArray = null;
    int lastIndex = 0; // not played

    PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
        UInt32 frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
      {
        Console.WriteLine($"Finished playing");
        playFinished = true;
        return StreamCallbackResult.Complete;
      }

      int expected = Convert.ToInt32(frameCount);
      int i = 0;

      while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
      {
        int needed = expected - i;

        if (lastSampleArray != null)
        {
          int remaining = lastSampleArray.Length - lastIndex;
          if (remaining >= needed)
          {
            float[] this_block = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
            lastIndex += needed;
            if (lastIndex == lastSampleArray.Length)
            {
              lastSampleArray = null;
              lastIndex = 0;
            }

            Marshal.Copy(this_block, 0, IntPtr.Add(output, i * sizeof(float)), needed);
            return StreamCallbackResult.Continue;
          }

          float[] this_block2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
          lastIndex = 0;
          lastSampleArray = null;

          Marshal.Copy(this_block2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
          i += remaining;
          continue;
        }

        if (dataItems.Count != 0)
        {
          lastSampleArray = dataItems.Take();
          lastIndex = 0;
        }
      }

      if (i < expected)
      {
        int sizeInBytes = (expected - i) * 4;
        Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
      }

      return StreamCallbackResult.Continue;
    };

    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: playCallback,
        userData: IntPtr.Zero
        );

    stream.Start();

    var callback = new OfflineTtsCallbackProgressWithArg(myCallback);

    var audio = tts.GenerateWithConfig(options.Text, genConfig, callback);
    var ok = audio.SaveToWaveFile(options.OutputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {options.OutputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {options.OutputFilename}");
    }
    dataItems.CompleteAdding();

    while (!playFinished)
    {
      Thread.Sleep(100); // 100ms
    }
  }
}


================================================
FILE: dotnet-examples/offline-tts-play/offline-tts-play.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>offline_tts_play</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="CommandLineParser" Version="2.9.1" />
    <PackageReference Include="PortAudioSharp2" Version="*" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/offline-tts-play/run-hf-fanchen.sh
================================================
#!/usr/bin/env bash
set -ex
if [ ! -f ./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-fanchen-C.tar.bz2
  tar xf vits-zh-hf-fanchen-C.tar.bz2
  rm vits-zh-hf-fanchen-C.tar.bz2
fi

dotnet run \
  --vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
  --tokens=./vits-zh-hf-fanchen-C/tokens.txt \
  --lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
  --tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
  --sid=100 \
  --debug=1 \
  --output-filename=./fanchen-100.wav \
  --text="这是一个语音合成测试, 写于公元2024年4月26号, 11点05分，星期5。小米的使命是，始终坚持做'感动人心、价格厚道'的好产品，让全球每个人都能享受科技带来的美好生活。"


================================================
FILE: dotnet-examples/offline-tts-play/run-matcha-en.sh
================================================
#!/usr/bin/env bash
set -ex


# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

dotnet run \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --debug=1 \
  --output-filename=./matcha-en.wav \
  --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'


================================================
FILE: dotnet-examples/offline-tts-play/run-matcha-zh.sh
================================================
#!/usr/bin/env bash
set -ex

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi


dotnet run \
  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
  --tokens=./matcha-icefall-zh-baker/tokens.txt \
  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
  --debug=1 \
  --output-filename=./matcha-zh.wav \
  --text="某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"


================================================
FILE: dotnet-examples/online-decode-files/Program.cs
================================================
﻿// Copyright (c)  2023  Xiaomi Corporation
// Copyright (c)  2023 by manyeyes
//
// This file shows how to use a streaming model to decode files
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
// to download streaming models

using CommandLine;
using CommandLine.Text;
using SherpaOnnx;

class OnlineDecodeFiles
{
  class Options
  {
    [Option(Required = true, HelpText = "Path to tokens.txt")]
    public string Tokens { get; set; } = string.Empty;

    [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
    public string Provider { get; set; } = string.Empty;

    [Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
    public string Encoder { get; set; } = string.Empty;

    [Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
    public string Decoder { get; set; } = string.Empty;

    [Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
    public string Joiner { get; set; } = string.Empty;

    [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
    public string ParaformerEncoder { get; set; } = string.Empty;

    [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
    public string ParaformerDecoder { get; set; } = string.Empty;

    [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")]
    public string Zipformer2Ctc { get; set; } = string.Empty;

    [Option("t-one-ctc", Required = false, HelpText = "Path to T-one CTC onnx model")]
    public string ToneCtc { get; set; } = string.Empty;

    [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
    public int NumThreads { get; set; } = 1;

    [Option("decoding-method", Required = false, Default = "greedy_search",
            HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
    public string DecodingMethod { get; set; } = "greedy_search";

    [Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
    public bool Debug { get; set; } = false;

    [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
    public int SampleRate { get; set; } = 16000;

    [Option("max-active-paths", Required = false, Default = 4,
        HelpText = @"Used only when --decoding--method is modified_beam_search.
It specifies number of active paths to keep during the search")]
    public int MaxActivePaths { get; set; } = 4;

    [Option("enable-endpoint", Required = false, Default = false,
        HelpText = "True to enable endpoint detection.")]
    public bool EnableEndpoint { get; set; } = false;

    [Option("rule1-min-trailing-silence", Required = false, Default = 2.4F,
        HelpText = @"An endpoint is detected if trailing silence in seconds is
larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")]
    public float Rule1MinTrailingSilence { get; set; } = 2.4F;

    [Option("rule2-min-trailing-silence", Required = false, Default = 1.2F,
        HelpText = @"An endpoint is detected if trailing silence in seconds is
larger than this value after something that is not blank has been decoded. Used
only when --enable-endpoint is true.")]
    public float Rule2MinTrailingSilence { get; set; }  = 1.2F;

    [Option("rule3-min-utterance-length", Required = false, Default = 20.0F,
        HelpText = @"An endpoint is detected if the utterance in seconds is
larger than this value. Used only when --enable-endpoint is true.")]
    public float Rule3MinUtteranceLength { get; set; } = 20.0F;

    [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")]
    public string HotwordsFile { get; set; } = string.Empty;

    [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")]
    public float HotwordsScore { get; set; } = 1.5F;

    [Option("rule-fsts", Required = false, Default = "",
            HelpText = "If not empty, path to rule fst for inverse text normalization")]
    public string RuleFsts { get; set; } = string.Empty;

    [Option("files", Required = true, HelpText = "Audio files for decoding")]
    public IEnumerable<string> Files { get; set; } = new string[] {};
  }

  static void Main(string[] args)
  {
    var parser = new CommandLine.Parser(with => with.HelpWriter = null);
    var parserResult = parser.ParseArguments<Options>(args);

    parserResult
      .WithParsed<Options>(options => Run(options))
      .WithNotParsed(errs => DisplayHelp(parserResult, errs));
  }

  private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
  {
    string usage = @"
(1) Streaming transducer models

dotnet run \
  --tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  --encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
  --decoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
  --num-threads=2 \
  --decoding-method=modified_beam_search \
  --debug=false \
  --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
  ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav

(2) Streaming Zipformer2 Ctc models

dotnet run -c Release \
  --tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
  --zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
  --files ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000002.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/TEST_MEETING_T0000000113.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/TEST_MEETING_T0000000219.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/TEST_MEETING_T0000000351.wav

(3) Streaming Paraformer models
dotnet run \
  --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
  --num-threads=2 \
  --decoding-method=greedy_search \
  --debug=false \
  --files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav \
  ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/index.html
to download pre-trained streaming models.
";

    var helpText = HelpText.AutoBuild(result, h =>
    {
      h.AdditionalNewLineAfterOption = false;
      h.Heading = usage;
      h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
      return HelpText.DefaultParsingErrorsHandler(result, h);
    }, e => e);
    Console.WriteLine(helpText);
  }

  private static void Run(Options options)
  {
    var config = new OnlineRecognizerConfig();
    config.FeatConfig.SampleRate = options.SampleRate;

    // All models from icefall using feature dim 80.
    // You can change it if your model has a different feature dim.
    config.FeatConfig.FeatureDim = 80;

    config.ModelConfig.Transducer.Encoder = options.Encoder;
    config.ModelConfig.Transducer.Decoder = options.Decoder;
    config.ModelConfig.Transducer.Joiner = options.Joiner;

    config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
    config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;

    config.ModelConfig.Zipformer2Ctc.Model = options.Zipformer2Ctc;
    config.ModelConfig.ToneCtc.Model = options.ToneCtc;

    config.ModelConfig.Tokens = options.Tokens;
    config.ModelConfig.Provider = options.Provider;
    config.ModelConfig.NumThreads = options.NumThreads;
    config.ModelConfig.Debug = options.Debug ? 1 : 0;

    config.DecodingMethod = options.DecodingMethod;
    config.MaxActivePaths = options.MaxActivePaths;
    config.EnableEndpoint = options.EnableEndpoint ? 1 : 0;

    config.Rule1MinTrailingSilence = options.Rule1MinTrailingSilence;
    config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
    config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
    config.HotwordsFile = options.HotwordsFile;
    config.HotwordsScore = options.HotwordsScore;
    config.RuleFsts = options.RuleFsts;

    var recognizer = new OnlineRecognizer(config);

    var files = options.Files.ToArray();

    // We create a separate stream for each file
    var streams = new List<OnlineStream>();
    streams.EnsureCapacity(files.Length);

    for (int i = 0; i != files.Length; ++i)
    {
      var s = recognizer.CreateStream();

      var waveReader = new WaveReader(files[i]);

      var leftPadding = new float[(int)(waveReader.SampleRate * 0.3)];
      s.AcceptWaveform(waveReader.SampleRate, leftPadding);

      s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

      var tailPadding = new float[(int)(waveReader.SampleRate * 0.6)];
      s.AcceptWaveform(waveReader.SampleRate, tailPadding);

      s.InputFinished();

      streams.Add(s);
    }

    while (true)
    {
      var readyStreams = streams.Where(s => recognizer.IsReady(s));
      if (!readyStreams.Any())
      {
        break;
      }

      recognizer.Decode(readyStreams);
    }

    // display results
    for (int i = 0; i != files.Length; ++i)
    {
      var r = recognizer.GetResult(streams[i]);
      var text = r.Text;
      var tokens = r.Tokens;
      Console.WriteLine("--------------------");
      Console.WriteLine(files[i]);
      Console.WriteLine("text: {0}", text);
      Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
      Console.Write("timestamps: [");
      r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
      Console.WriteLine("]");
    }
    Console.WriteLine("--------------------");
  }
}


================================================
FILE: dotnet-examples/online-decode-files/online-decode-files.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>online_decode_files</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="CommandLineParser" Version="2.9.1" />
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/online-decode-files/run-paraformer.sh
================================================
#!/usr/bin/env bash

# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
# to download the model files

set -ex
if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
fi

dotnet run -c Release \
  --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  --paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  --paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
  --decoding-method greedy_search \
  --files ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav \
  ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav


================================================
FILE: dotnet-examples/online-decode-files/run-t-one-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
fi

dotnet run -c Release \
  --tokens ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt \
  --t-one-ctc ./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx \
  --files ./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav


================================================
FILE: dotnet-examples/online-decode-files/run-transducer-itn.sh
================================================
#!/usr/bin/env bash

# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
# to download the model files

set -ex
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

dotnet run -c Release \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
  --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
  --rule-fsts ./itn_zh_number.fst \
  --decoding-method greedy_search \
  --files ./itn-zh-number.wav


================================================
FILE: dotnet-examples/online-decode-files/run-transducer.sh
================================================
#!/usr/bin/env bash

# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
# to download the model files

set -ex
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

dotnet run -c Release \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
  --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
  --decoding-method greedy_search \
  --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \
  ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \


================================================
FILE: dotnet-examples/online-decode-files/run-zipformer2-ctc.sh
================================================
#!/usr/bin/env bash

# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/zipformer-ctc-models.html#sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13-chinese
# to download the model files

set -ex
if [ ! -d ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
fi

dotnet run -c Release \
  --tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
  --zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
  --files ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000002.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/TEST_MEETING_T0000000113.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/TEST_MEETING_T0000000219.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/TEST_MEETING_T0000000351.wav


================================================
FILE: dotnet-examples/pocket-tts-zero-shot/Program.cs
================================================
﻿// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use a non-streaming PocketTTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using SherpaOnnx;
using System.Runtime.InteropServices;

class PocketTtsDemo
{
  static void Main(string[] args)
  {

    TestEn();
  }

  static void TestEn()
  {
    var config = new OfflineTtsConfig();
    config.Model.Pocket.LmFlow = "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx";
    config.Model.Pocket.LmMain = "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx";
    config.Model.Pocket.Encoder = "./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx";
    config.Model.Pocket.Decoder = "./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx";
    config.Model.Pocket.TextConditioner = "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx";
    config.Model.Pocket.VocabJson = "./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json";
    config.Model.Pocket.TokenScoresJson = "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();

    var referenceWaveFilename = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav";
    var reader = new WaveReader(referenceWaveFilename);

    genConfig.ReferenceAudio = reader.Samples;
    genConfig.ReferenceSampleRate = reader.SampleRate;
    genConfig.Extra["max_reference_audio_len"] = 12;

    var tts = new OfflineTts(config);
    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
      "does not have two-thirds of his day for himself, is a slave, whatever " +
      "he may be: a statesman, a businessman, an official, or a scholar. " +
      "Friends fell out often because life was changing so fast. The easiest " +
      "thing in the world was to lose touch with someone.";

    var MyCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      float[] data = new float[n];
      Marshal.Copy(samples, data, 0, n);
      // You can process samples here, e.g., play them.
      // See ../pocket-tts-zero-shot-play for how to play them
      Console.WriteLine($"Progress {progress*100}%");

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var callback = new OfflineTtsCallbackProgressWithArg(MyCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);

    var outputFilename = "./generated-pocket-en.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
  }
}


================================================
FILE: dotnet-examples/pocket-tts-zero-shot/pocket-tts-zero-shot.csproj
================================================
﻿<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>pocket_tts_zero_shot</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/pocket-tts-zero-shot/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/pocket-tts-zero-shot-play/Program.cs
================================================
﻿// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use a non-streaming PocketTTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using PortAudioSharp;
using SherpaOnnx;
using System.Collections.Concurrent;
using System.Runtime.InteropServices;

class PocketTtsDemo
{
  static void Main(string[] args)
  {

    TestEn();
  }

  static void TestEn()
  {
    var config = new OfflineTtsConfig();
    config.Model.Pocket.LmFlow = "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx";
    config.Model.Pocket.LmMain = "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx";
    config.Model.Pocket.Encoder = "./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx";
    config.Model.Pocket.Decoder = "./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx";
    config.Model.Pocket.TextConditioner = "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx";
    config.Model.Pocket.VocabJson = "./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json";
    config.Model.Pocket.TokenScoresJson = "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();

    var referenceWaveFilename = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav";
    var reader = new WaveReader(referenceWaveFilename);

    genConfig.ReferenceAudio = reader.Samples;
    genConfig.ReferenceSampleRate= reader.SampleRate;
    genConfig.Extra["max_reference_audio_len"] = 12;

    var tts = new OfflineTts(config);
    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
      "does not have two-thirds of his day for himself, is a slave, whatever " +
      "he may be: a statesman, a businessman, an official, or a scholar. " +
      "Friends fell out often because life was changing so fast. The easiest " +
      "thing in the world was to lose touch with someone.";

    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();
    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");

    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max output channels: {deviceInfo.maxOutputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultOutputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default output device found. Please use ../offline-tts instead");
      Environment.Exit(1);
    }

    var info = PortAudio.GetDeviceInfo(deviceIndex);
    Console.WriteLine();
    Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");

    var param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowOutputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
    var dataItems = new BlockingCollection<float[]>();

    var myCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      Console.WriteLine($"Progress {progress*100}%");

      float[] data = new float[n];

      Marshal.Copy(samples, data, 0, n);

      dataItems.Add(data);

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;

    };


    var playFinished = false;

    float[]? lastSampleArray = null;
    int lastIndex = 0; // not played

    PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
        UInt32 frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
      {
        Console.WriteLine($"Finished playing");
        playFinished = true;
        return StreamCallbackResult.Complete;
      }

      int expected = Convert.ToInt32(frameCount);
      int i = 0;

      while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
      {
        int needed = expected - i;

        if (lastSampleArray != null)
        {
          int remaining = lastSampleArray.Length - lastIndex;
          if (remaining >= needed)
          {
            float[] this_block = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
            lastIndex += needed;
            if (lastIndex == lastSampleArray.Length)
            {
              lastSampleArray = null;
              lastIndex = 0;
            }

            Marshal.Copy(this_block, 0, IntPtr.Add(output, i * sizeof(float)), needed);
            return StreamCallbackResult.Continue;
          }

          float[] this_block2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
          lastIndex = 0;
          lastSampleArray = null;

          Marshal.Copy(this_block2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
          i += remaining;
          continue;
        }

        if (dataItems.Count != 0)
        {
          lastSampleArray = dataItems.Take();
          lastIndex = 0;
        }
      }

      if (i < expected)
      {
        int sizeInBytes = (expected - i) * 4;
        Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
      }

      return StreamCallbackResult.Continue;
    };

    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: playCallback,
        userData: IntPtr.Zero
        );

    stream.Start();

    var callback = new OfflineTtsCallbackProgressWithArg(myCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);

    var outputFilename = "./generated-pocket-en-play.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }

    dataItems.CompleteAdding();

    while (!playFinished)
    {
      Thread.Sleep(100); // 100ms
    }
  }
}


================================================
FILE: dotnet-examples/pocket-tts-zero-shot-play/pocket-tts-zero-shot-play.csproj
================================================
﻿<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>pocket_tts_zero_shot_play</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="PortAudioSharp2" Version="*" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/pocket-tts-zero-shot-play/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/sherpa-onnx.sln
================================================
﻿
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "online-decode-files", "online-decode-files\online-decode-files.csproj", "{45307474-BECB-4ABE-9388-D01D55A1A9BE}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-decode-files", "offline-decode-files\offline-decode-files.csproj", "{2DAB152C-9E24-47A0-9DB0-781297ECE458}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "speech-recognition-from-microphone", "speech-recognition-from-microphone\speech-recognition-from-microphone.csproj", "{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-tts", "offline-tts\offline-tts.csproj", "{72196886-7143-4043-96E2-BCACEC6C79EB}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "vad-non-streaming-asr-paraformer", "vad-non-streaming-asr-paraformer\vad-non-streaming-asr-paraformer.csproj", "{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Common", "Common\Common.csproj", "{401E963F-E25A-43CE-987D-8DB2D4715756}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\kokoro-tts.csproj", "{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-dpdfnet", "speech-enhancement-dpdfnet\speech-enhancement-dpdfnet.csproj", "{016E5D0E-6D79-4AF6-B2C6-F0E091D78C00}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-canary-decode-files", "non-streaming-canary-decode-files\non-streaming-canary-decode-files.csproj", "{925779DB-4429-4366-87C3-B14DD44AE1D4}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kitten-tts", "kitten-tts\kitten-tts.csproj", "{E5AB574B-9E31-45D4-9B75-1C1892241E41}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kitten-tts-play", "kitten-tts-play\kitten-tts-play.csproj", "{D60A8A84-D6D3-4B79-A18A-1817BEBD35B9}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-audio-tagging", "offline-audio-tagging\offline-audio-tagging.csproj", "{0EBE2CE5-8940-4472-8A38-6A0E976E678F}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-funasr-nano-decode-files", "non-streaming-funasr-nano-decode-files\non-streaming-funasr-nano-decode-files.csproj", "{32F7534B-117E-4D1D-BAED-A1D1A6C6A62C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "vad-non-streaming-funasr-nano", "vad-non-streaming-funasr-nano\vad-non-streaming-funasr-nano.csproj", "{32C8C12B-D7DB-455E-B35C-945A745520CC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "pocket-tts-zero-shot", "pocket-tts-zero-shot\pocket-tts-zero-shot.csproj", "{9164FA6A-F8D3-4F52-8173-A2FA78E74BB2}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "pocket-tts-zero-shot-play", "pocket-tts-zero-shot-play\pocket-tts-zero-shot-play.csproj", "{0E73BD08-EA6F-416D-8DBF-E92893A8C3B1}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-moonshine-v2-decode-files", "non-streaming-moonshine-v2-decode-files\non-streaming-moonshine-v2-decode-files.csproj", "{C9E5A6D3-02F4-46DE-808B-5163348F45B3}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "supertonic-tts", "supertonic-tts\supertonic-tts.csproj", "{A3B7C4D1-E5F6-4A8B-9C0D-1E2F3A4B5C6D}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-speech-enhancement-gtcrn", "streaming-speech-enhancement-gtcrn\streaming-speech-enhancement-gtcrn.csproj", "{5B87496C-EF81-4232-A448-6308F8E5A18C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-speech-enhancement-dpdfnet", "streaming-speech-enhancement-dpdfnet\streaming-speech-enhancement-dpdfnet.csproj", "{8CD66C3E-3AE3-43AA-8FDA-DD5BA456F2EC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "zipvoice-tts", "zipvoice-tts\zipvoice-tts.csproj", "{BBC69A08-01A7-4F89-938F-F0D551AD3F6C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "zipvoice-tts-play", "zipvoice-tts-play\zipvoice-tts-play.csproj", "{84A37E18-095E-42A6-93CC-C27CD90B8478}"
EndProject
Global
	GlobalSection(SolutionConfigurationPlatforms) = preSolution
		Debug|Any CPU = Debug|Any CPU
		Release|Any CPU = Release|Any CPU
	EndGlobalSection
	GlobalSection(ProjectConfigurationPlatforms) = postSolution
		{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.Build.0 = Release|Any CPU
		{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.Build.0 = Release|Any CPU
		{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}.Release|Any CPU.Build.0 = Release|Any CPU
		{72196886-7143-4043-96E2-BCACEC6C79EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{72196886-7143-4043-96E2-BCACEC6C79EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{72196886-7143-4043-96E2-BCACEC6C79EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{72196886-7143-4043-96E2-BCACEC6C79EB}.Release|Any CPU.Build.0 = Release|Any CPU
		{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.Build.0 = Release|Any CPU
		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
		{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU
		{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU
		{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.Build.0 = Release|Any CPU
		{401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.Build.0 = Release|Any CPU
		{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU
		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
		{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.Build.0 = Release|Any CPU
		{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{9C0ABE6C-1F54-42B5-804E-C3FED6668F52}.Release|Any CPU.Build.0 = Release|Any CPU
		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU
		{016E5D0E-6D79-4AF6-B2C6-F0E091D78C00}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{016E5D0E-6D79-4AF6-B2C6-F0E091D78C00}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{016E5D0E-6D79-4AF6-B2C6-F0E091D78C00}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{016E5D0E-6D79-4AF6-B2C6-F0E091D78C00}.Release|Any CPU.Build.0 = Release|Any CPU
		{E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU
		{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.Build.0 = Release|Any CPU
		{E5AB574B-9E31-45D4-9B75-1C1892241E41}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{E5AB574B-9E31-45D4-9B75-1C1892241E41}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{E5AB574B-9E31-45D4-9B75-1C1892241E41}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{E5AB574B-9E31-45D4-9B75-1C1892241E41}.Release|Any CPU.Build.0 = Release|Any CPU
		{D60A8A84-D6D3-4B79-A18A-1817BEBD35B9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{D60A8A84-D6D3-4B79-A18A-1817BEBD35B9}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{D60A8A84-D6D3-4B79-A18A-1817BEBD35B9}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{D60A8A84-D6D3-4B79-A18A-1817BEBD35B9}.Release|Any CPU.Build.0 = Release|Any CPU
		{0EBE2CE5-8940-4472-8A38-6A0E976E678F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{0EBE2CE5-8940-4472-8A38-6A0E976E678F}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{0EBE2CE5-8940-4472-8A38-6A0E976E678F}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{0EBE2CE5-8940-4472-8A38-6A0E976E678F}.Release|Any CPU.Build.0 = Release|Any CPU
		{32F7534B-117E-4D1D-BAED-A1D1A6C6A62C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{32F7534B-117E-4D1D-BAED-A1D1A6C6A62C}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{32F7534B-117E-4D1D-BAED-A1D1A6C6A62C}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{32F7534B-117E-4D1D-BAED-A1D1A6C6A62C}.Release|Any CPU.Build.0 = Release|Any CPU
		{32C8C12B-D7DB-455E-B35C-945A745520CC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{32C8C12B-D7DB-455E-B35C-945A745520CC}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{32C8C12B-D7DB-455E-B35C-945A745520CC}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{32C8C12B-D7DB-455E-B35C-945A745520CC}.Release|Any CPU.Build.0 = Release|Any CPU
		{9164FA6A-F8D3-4F52-8173-A2FA78E74BB2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{9164FA6A-F8D3-4F52-8173-A2FA78E74BB2}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{9164FA6A-F8D3-4F52-8173-A2FA78E74BB2}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{9164FA6A-F8D3-4F52-8173-A2FA78E74BB2}.Release|Any CPU.Build.0 = Release|Any CPU
		{0E73BD08-EA6F-416D-8DBF-E92893A8C3B1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{0E73BD08-EA6F-416D-8DBF-E92893A8C3B1}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{0E73BD08-EA6F-416D-8DBF-E92893A8C3B1}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{0E73BD08-EA6F-416D-8DBF-E92893A8C3B1}.Release|Any CPU.Build.0 = Release|Any CPU
		{C9E5A6D3-02F4-46DE-808B-5163348F45B3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{C9E5A6D3-02F4-46DE-808B-5163348F45B3}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{C9E5A6D3-02F4-46DE-808B-5163348F45B3}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{C9E5A6D3-02F4-46DE-808B-5163348F45B3}.Release|Any CPU.Build.0 = Release|Any CPU
		{A3B7C4D1-E5F6-4A8B-9C0D-1E2F3A4B5C6D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{A3B7C4D1-E5F6-4A8B-9C0D-1E2F3A4B5C6D}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{A3B7C4D1-E5F6-4A8B-9C0D-1E2F3A4B5C6D}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{A3B7C4D1-E5F6-4A8B-9C0D-1E2F3A4B5C6D}.Release|Any CPU.Build.0 = Release|Any CPU
		{5B87496C-EF81-4232-A448-6308F8E5A18C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{5B87496C-EF81-4232-A448-6308F8E5A18C}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{5B87496C-EF81-4232-A448-6308F8E5A18C}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{5B87496C-EF81-4232-A448-6308F8E5A18C}.Release|Any CPU.Build.0 = Release|Any CPU
		{8CD66C3E-3AE3-43AA-8FDA-DD5BA456F2EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{8CD66C3E-3AE3-43AA-8FDA-DD5BA456F2EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{8CD66C3E-3AE3-43AA-8FDA-DD5BA456F2EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{8CD66C3E-3AE3-43AA-8FDA-DD5BA456F2EC}.Release|Any CPU.Build.0 = Release|Any CPU
		{BBC69A08-01A7-4F89-938F-F0D551AD3F6C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{BBC69A08-01A7-4F89-938F-F0D551AD3F6C}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{BBC69A08-01A7-4F89-938F-F0D551AD3F6C}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{BBC69A08-01A7-4F89-938F-F0D551AD3F6C}.Release|Any CPU.Build.0 = Release|Any CPU
		{84A37E18-095E-42A6-93CC-C27CD90B8478}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
		{84A37E18-095E-42A6-93CC-C27CD90B8478}.Debug|Any CPU.Build.0 = Debug|Any CPU
		{84A37E18-095E-42A6-93CC-C27CD90B8478}.Release|Any CPU.ActiveCfg = Release|Any CPU
		{84A37E18-095E-42A6-93CC-C27CD90B8478}.Release|Any CPU.Build.0 = Release|Any CPU
	EndGlobalSection
	GlobalSection(SolutionProperties) = preSolution
		HideSolutionNode = FALSE
	EndGlobalSection
	GlobalSection(ExtensibilityGlobals) = postSolution
		SolutionGuid = {07A6023C-0A37-4F82-A29F-896A3A338EAC}
	EndGlobalSection
EndGlobal


================================================
FILE: dotnet-examples/speaker-identification/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to do speaker identification with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
//
// 2. Download test data from
//
// git clone https://github.com/csukuangfj/sr-data
//
// 3. Now run it
//
// dotnet run

using SherpaOnnx;

class SpeakerIdentificationDemo
{
  public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, string filename)
  {
    var reader = new WaveReader(filename);

    var stream = extractor.CreateStream();
    stream.AcceptWaveform(reader.SampleRate, reader.Samples);
    stream.InputFinished();

    var embedding = extractor.Compute(stream);

    return embedding;
  }

  static void Main(string[] args)
  {
    var config = new SpeakerEmbeddingExtractorConfig();
    config.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
    config.Debug = 1;
    var extractor = new SpeakerEmbeddingExtractor(config);

    var manager = new SpeakerEmbeddingManager(extractor.Dim);

    var spk1Files =
        new string[] {
          "./sr-data/enroll/fangjun-sr-1.wav",
          "./sr-data/enroll/fangjun-sr-2.wav",
          "./sr-data/enroll/fangjun-sr-3.wav",
        };
    var spk1Vec = new float[spk1Files.Length][];

    for (int i = 0; i < spk1Files.Length; ++i)
    {
      spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
    }

    var spk2Files =
        new string[] {
          "./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
        };

    var spk2Vec = new float[spk2Files.Length][];

    for (int i = 0; i < spk2Files.Length; ++i)
    {
      spk2Vec[i] = ComputeEmbedding(extractor, spk2Files[i]);
    }

    if (!manager.Add("fangjun", spk1Vec))
    {
      Console.WriteLine("Failed to register fangjun");
      return;
    }

    if (!manager.Add("leijun", spk2Vec))
    {
      Console.WriteLine("Failed to register leijun");
      return;
    }

    if (manager.NumSpeakers != 2)
    {
      Console.WriteLine("There should be two speakers");
      return;
    }

    if (!manager.Contains("fangjun"))
    {
      Console.WriteLine("It should contain the speaker fangjun");
      return;
    }

    if (!manager.Contains("leijun"))
    {
      Console.WriteLine("It should contain the speaker leijun");
      return;
    }

    Console.WriteLine("---All speakers---");

    var allSpeakers = manager.GetAllSpeakers();
    foreach (var s in allSpeakers)
    {
      Console.WriteLine(s);
    }
    Console.WriteLine("------------");

    var testFiles =
        new string[] {
          "./sr-data/test/fangjun-test-sr-1.wav",
          "./sr-data/test/leijun-test-sr-1.wav",
          "./sr-data/test/liudehua-test-sr-1.wav"
        };

    float threshold = 0.6f;
    foreach (var file in testFiles)
    {
      var embedding = ComputeEmbedding(extractor, file);

      var name = manager.Search(embedding, threshold);
      if (name == "")
      {
        name = "<Unknown>";
      }
      Console.WriteLine("{0}: {1}", file, name);
    }

    // test verify
    if (!manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
    {
      Console.WriteLine("testFiles[0] should match fangjun!");
      return;
    }

    if (!manager.Remove("fangjun"))
    {
      Console.WriteLine("Failed to remove fangjun");
      return;
    }

    if (manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
    {
      Console.WriteLine("{0} should match no one!", testFiles[0]);
      return;
    }

    if (manager.NumSpeakers != 1)
    {
      Console.WriteLine("There should only 1 speaker left.");
      return;
    }
  }
}


================================================
FILE: dotnet-examples/speaker-identification/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -e ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -d ./sr-data ]; then
  git clone https://github.com/csukuangfj/sr-data
fi

dotnet run


================================================
FILE: dotnet-examples/speaker-identification/speaker-identification.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>speaker_identification</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/speech-enhancement-dpdfnet/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
//
// This file shows how to use speech enhancement API with DPDFNet models.
// Use dpdfnet_baseline.onnx, dpdfnet2.onnx, dpdfnet4.onnx, or dpdfnet8.onnx
// for 16 kHz downstream ASR or speech recognition.
// Use dpdfnet2_48khz_hr.onnx for 48 kHz enhancement output.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet2.onnx
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet4.onnx
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet8.onnx
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet2_48khz_hr.onnx
//
// 2. Download a test file
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
//
// 3. Now run it
//
// dotnet run

using SherpaOnnx;

class OfflineSpeechEnhancementDemo
{
  static void Main(string[] args)
  {
    var model = "./dpdfnet_baseline.onnx";
    var config = new OfflineSpeechDenoiserConfig();
    config.Model.Dpdfnet.Model = model;
    config.Model.Debug = 1;
    config.Model.NumThreads = 1;
    var sd = new OfflineSpeechDenoiser(config);

    WaveReader waveReader = new WaveReader("./inp_16k.wav");
    var denoisedAudio = sd.Run(waveReader.Samples, waveReader.SampleRate);

    var outputFilename = "./enhanced.wav";
    var ok = denoisedAudio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
  }
}


================================================
FILE: dotnet-examples/speech-enhancement-dpdfnet/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

dotnet run


================================================
FILE: dotnet-examples/speech-enhancement-dpdfnet/speech-enhancement-dpdfnet.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>speech_enhancement_dpdfnet</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/speech-enhancement-gtcrn/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
//
// This file shows how to use speech enhancement API with GTCRN models.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
//
// 2. Download a test file
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
//
// 3. Now run it
//
// dotnet run

using SherpaOnnx;

class OfflineSpeechEnhancementDemo
{
  static void Main(string[] args)
  {
    var model = "./gtcrn_simple.onnx";
    var config = new OfflineSpeechDenoiserConfig();
    config.Model.Gtcrn.Model = model;
    config.Model.Debug = 1;
    config.Model.NumThreads = 1;
    var sd = new OfflineSpeechDenoiser(config);

    WaveReader waveReader = new WaveReader("./inp_16k.wav");
    var denoisedAudio =  sd.Run(waveReader.Samples, waveReader.SampleRate);

    var outputFilename = "./enhanced.wav";
    var ok = denoisedAudio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
  }
}


================================================
FILE: dotnet-examples/speech-enhancement-gtcrn/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

dotnet run


================================================
FILE: dotnet-examples/speech-enhancement-gtcrn/speech-enhancement-gtcrn.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>speech_enhancement_gtcrn</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/speech-recognition-from-microphone/Program.cs
================================================
﻿// Copyright (c)  2023  Xiaomi Corporation
//
// This file shows how to use a streaming model for real-time speech
// recognition from a microphone.
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
// to download streaming models

using CommandLine;
using CommandLine.Text;
using PortAudioSharp;
using SherpaOnnx;
using System.Runtime.InteropServices;

class SpeechRecognitionFromMicrophone
{
  class Options
  {
    [Option(Required = true, HelpText = "Path to tokens.txt")]
    public string? Tokens { get; set; }

    [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")]
    public string? Provider { get; set; }

    [Option(Required = false, HelpText = "Path to transducer encoder.onnx")]
    public string? Encoder { get; set; }

    [Option(Required = false, HelpText = "Path to transducer decoder.onnx")]
    public string? Decoder { get; set; }

    [Option(Required = false, HelpText = "Path to transducer joiner.onnx")]
    public string? Joiner { get; set; }

    [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")]
    public string? ParaformerEncoder { get; set; }

    [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")]
    public string? ParaformerDecoder { get; set; }

    [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
    public int NumThreads { get; set; }

    [Option("decoding-method", Required = false, Default = "greedy_search",
            HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
    public string? DecodingMethod { get; set; }

    [Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
    public bool Debug { get; set; }

    [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
    public int SampleRate { get; set; }

    [Option("max-active-paths", Required = false, Default = 4,
        HelpText = @"Used only when --decoding--method is modified_beam_search.
It specifies number of active paths to keep during the search")]
    public int MaxActivePaths { get; set; }

    [Option("enable-endpoint", Required = false, Default = true,
        HelpText = "True to enable endpoint detection.")]
    public bool EnableEndpoint { get; set; }

    [Option("rule1-min-trailing-silence", Required = false, Default = 2.4F,
        HelpText = @"An endpoint is detected if trailing silence in seconds is
larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")]
    public float Rule1MinTrailingSilence { get; set; }

    [Option("rule2-min-trailing-silence", Required = false, Default = 0.8F,
        HelpText = @"An endpoint is detected if trailing silence in seconds is
larger than this value after something that is not blank has been decoded. Used
only when --enable-endpoint is true.")]
    public float Rule2MinTrailingSilence { get; set; }

    [Option("rule3-min-utterance-length", Required = false, Default = 20.0F,
        HelpText = @"An endpoint is detected if the utterance in seconds is
larger than this value. Used only when --enable-endpoint is true.")]
    public float Rule3MinUtteranceLength { get; set; }
  }

  static void Main(string[] args)
  {
    var parser = new CommandLine.Parser(with => with.HelpWriter = null);
    var parserResult = parser.ParseArguments<Options>(args);

    parserResult
      .WithParsed<Options>(options => Run(options))
      .WithNotParsed(errs => DisplayHelp(parserResult, errs));
  }

  private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
  {
    string usage = @"
(1) Streaming transducer models

dotnet run -c Release \
  --tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \
  --encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
  --decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \
  --joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx

(2) Streaming Paraformer models

dotnet run \
  --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
to download pre-trained streaming models.
";

    var helpText = HelpText.AutoBuild(result, h =>
    {
      h.AdditionalNewLineAfterOption = false;
      h.Heading = usage;
      h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
      return HelpText.DefaultParsingErrorsHandler(result, h);
    }, e => e);
    Console.WriteLine(helpText);
  }

  private static void Run(Options options)
  {
    var config = new OnlineRecognizerConfig();
    config.FeatConfig.SampleRate = options.SampleRate;

    // All models from icefall using feature dim 80.
    // You can change it if your model has a different feature dim.
    config.FeatConfig.FeatureDim = 80;

    config.ModelConfig.Transducer.Encoder = options.Encoder;
    config.ModelConfig.Transducer.Decoder = options.Decoder;
    config.ModelConfig.Transducer.Joiner = options.Joiner;

    config.ModelConfig.Paraformer.Encoder = options.ParaformerEncoder;
    config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;

    config.ModelConfig.Tokens = options.Tokens;
    config.ModelConfig.Provider = options.Provider;
    config.ModelConfig.NumThreads = options.NumThreads;
    config.ModelConfig.Debug = options.Debug ? 1 : 0;

    config.DecodingMethod = options.DecodingMethod;
    config.MaxActivePaths = options.MaxActivePaths;
    config.EnableEndpoint = options.EnableEndpoint ? 1 : 0;

    config.Rule1MinTrailingSilence = options.Rule1MinTrailingSilence;
    config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
    config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;

    var recognizer = new OnlineRecognizer(config);

    var s = recognizer.CreateStream();

    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();

    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max input channels: {deviceInfo.maxInputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultInputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default input device found");
      Environment.Exit(1);
    }

    var info = PortAudio.GetDeviceInfo(deviceIndex);

    Console.WriteLine();
    Console.WriteLine($"Use default device {deviceIndex} ({info.name})");

    var param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowInputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
        uint frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      var samples = new float[frameCount];
      Marshal.Copy(input, samples, 0, (int)frameCount);

      s.AcceptWaveform(options.SampleRate, samples);

      return StreamCallbackResult.Continue;
    };

    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: options.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: callback,
        userData: IntPtr.Zero
        );

    Console.WriteLine(param);
    Console.WriteLine("Started! Please speak");

    stream.Start();

    var lastText = string.Empty;
    int segmentIndex = 0;

    while (true)
    {
      while (recognizer.IsReady(s))
      {
        recognizer.Decode(s);
      }

      var text = recognizer.GetResult(s).Text;
      bool isEndpoint = recognizer.IsEndpoint(s);
      if (!string.IsNullOrWhiteSpace(text) && lastText != text)
      {
        lastText = text;
        Console.Write($"\r{segmentIndex}: {lastText}");
      }

      if (isEndpoint)
      {
        if (!string.IsNullOrWhiteSpace(text))
        {
          ++segmentIndex;
          Console.WriteLine();
        }
        recognizer.Reset(s);
      }

      Thread.Sleep(200); // ms
    }
  }
}


================================================
FILE: dotnet-examples/speech-recognition-from-microphone/run-paraformer.sh
================================================
#!/usr/bin/env bash

# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
# to download the model files

set -ex
if [ ! -d ./sherpa-onnx-streaming-paraformer-bilingual-zh-en ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
fi

dotnet run -c Release \
  --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  --paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  --paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \


================================================
FILE: dotnet-examples/speech-recognition-from-microphone/run-transducer.sh
================================================
#!/usr/bin/env bash

# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
# to download the model files
#
set -ex

export LD_LIBRARY_PATH=$PWD:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD:$DYLD_LIBRARY_PATH

if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

dotnet run -c Release \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
  --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx


================================================
FILE: dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>speech_recognition_from_microphone</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="CommandLineParser" Version="2.9.1" />
    <PackageReference Include="PortAudioSharp2" Version="*" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/spoken-language-identification/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to do spoken language identification with whisper.
//
// 1. Download a whisper multilingual model. We use a tiny model below.
// Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// to download more models.
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
// rm sherpa-onnx-whisper-tiny.tar.bz2
//
// 2. Now run it
//
// dotnet run

using SherpaOnnx;

class SpokenLanguageIdentificationDemo
{
  static void Main(string[] args)
  {
    var config = new SpokenLanguageIdentificationConfig();
    config.Whisper.Encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
    config.Whisper.Decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";

    var slid = new SpokenLanguageIdentification(config);
    var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";

    var waveReader = new WaveReader(filename);

    var s = slid.CreateStream();
    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
    var result = slid.Compute(s);
    Console.WriteLine($"Filename: {filename}");
    Console.WriteLine($"Detected language: {result.Lang}");
  }
}


================================================
FILE: dotnet-examples/spoken-language-identification/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.tar.bz2
  rm sherpa-onnx-whisper-tiny.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>spoken_language_identification</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/streaming-hlg-decoding/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to do streaming HLG decoding.
//
// 1. Download the model for testing
//
//  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
//  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
//  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
//
// 2. Now run it
//
// dotnet run

using SherpaOnnx;

class StreamingHlgDecodingDemo
{
  static void Main(string[] args)
  {
    var config = new OnlineRecognizerConfig();
    config.FeatConfig.SampleRate = 16000;
    config.FeatConfig.FeatureDim = 80;
    config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";

    config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
    config.ModelConfig.Provider = "cpu";
    config.ModelConfig.NumThreads = 1;
    config.ModelConfig.Debug = 0;
    config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";

    var recognizer = new OnlineRecognizer(config);

    var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";

    var waveReader = new WaveReader(filename);
    var s = recognizer.CreateStream();
    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);

    var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
    s.AcceptWaveform(waveReader.SampleRate, tailPadding);
    s.InputFinished();

    while (recognizer.IsReady(s))
    {
      recognizer.Decode(s);
    }

    var r = recognizer.GetResult(s);
    var text = r.Text;
    var tokens = r.Tokens;
    Console.WriteLine("--------------------");
    Console.WriteLine(filename);
    Console.WriteLine("text: {0}", text);
    Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
    Console.Write("timestamps: [");
    r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", "));
    Console.WriteLine("]");
    Console.WriteLine("--------------------");
  }
}


================================================
FILE: dotnet-examples/streaming-hlg-decoding/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

dotnet run -c Release


================================================
FILE: dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>streaming_hlg_decoding</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/streaming-speech-enhancement-dpdfnet/Program.cs
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use the online speech enhancement API with DPDFNet
// models.

using SherpaOnnx;
using System.Runtime.InteropServices;
using System.Text;

class StreamingSpeechEnhancementDpdfnet
{
  static void Main(string[] args)
  {
    var config = new OnlineSpeechDenoiserConfig();
    config.Model.Dpdfnet.Model = "./dpdfnet_baseline.onnx";
    config.Model.Debug = 1;
    config.Model.NumThreads = 1;

    var sd = new OnlineSpeechDenoiser(config);
    WaveReader waveReader = new WaveReader("./inp_16k.wav");

    var samples = waveReader.Samples;
    var output = new List<float>(samples.Length);
    int frameShift = sd.FrameShiftInSamples;

    for (int start = 0; start < samples.Length; start += frameShift)
    {
      int count = Math.Min(frameShift, samples.Length - start);
      float[] chunk = new float[count];
      Array.Copy(samples, start, chunk, 0, count);
      output.AddRange(sd.Run(chunk, waveReader.SampleRate).Samples);
    }

    output.AddRange(sd.Flush().Samples);

    var outFilename = "./enhanced-online-dpdfnet.wav";
    var outAudio = new GeneratedDenoisedAudio(output.ToArray(), sd.SampleRate);
    if (outAudio.SaveToWaveFile(outFilename))
    {
      Console.WriteLine($"Wrote to {outFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outFilename}");
    }
  }

  private sealed class GeneratedDenoisedAudio
  {
    private readonly float[] _samples;
    private readonly int _sampleRate;

    public GeneratedDenoisedAudio(float[] samples, int sampleRate)
    {
      _samples = samples;
      _sampleRate = sampleRate;
    }

    public bool SaveToWaveFile(string filename)
    {
      byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
      byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1];
      Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
      utf8FilenameWithNull[utf8Filename.Length] = 0;
      return SherpaOnnxWriteWave(_samples, _samples.Length, _sampleRate, utf8FilenameWithNull) == 1;
    }

    [DllImport(Dll.Filename)]
    private static extern int SherpaOnnxWriteWave(
        float[] samples,
        int n,
        int sampleRate,
        [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
  }
}


================================================
FILE: dotnet-examples/streaming-speech-enhancement-dpdfnet/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

dotnet run


================================================
FILE: dotnet-examples/streaming-speech-enhancement-dpdfnet/streaming-speech-enhancement-dpdfnet.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>streaming_speech_enhancement_dpdfnet</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/streaming-speech-enhancement-gtcrn/Program.cs
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use the online speech enhancement API with GTCRN
// models.

using SherpaOnnx;
using System.Runtime.InteropServices;
using System.Text;

class StreamingSpeechEnhancementGtcrn
{
  static void Main(string[] args)
  {
    var config = new OnlineSpeechDenoiserConfig();
    config.Model.Gtcrn.Model = "./gtcrn_simple.onnx";
    config.Model.Debug = 1;
    config.Model.NumThreads = 1;

    var sd = new OnlineSpeechDenoiser(config);
    WaveReader waveReader = new WaveReader("./inp_16k.wav");

    var samples = waveReader.Samples;
    var output = new List<float>(samples.Length);
    int frameShift = sd.FrameShiftInSamples;

    for (int start = 0; start < samples.Length; start += frameShift)
    {
      int count = Math.Min(frameShift, samples.Length - start);
      float[] chunk = new float[count];
      Array.Copy(samples, start, chunk, 0, count);
      output.AddRange(sd.Run(chunk, waveReader.SampleRate).Samples);
    }

    output.AddRange(sd.Flush().Samples);

    var outFilename = "./enhanced-online-gtcrn.wav";
    var outAudio = new GeneratedDenoisedAudio(output.ToArray(), sd.SampleRate);
    if (outAudio.SaveToWaveFile(outFilename))
    {
      Console.WriteLine($"Wrote to {outFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outFilename}");
    }
  }

  private sealed class GeneratedDenoisedAudio
  {
    private readonly float[] _samples;
    private readonly int _sampleRate;

    public GeneratedDenoisedAudio(float[] samples, int sampleRate)
    {
      _samples = samples;
      _sampleRate = sampleRate;
    }

    public bool SaveToWaveFile(string filename)
    {
      byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
      byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1];
      Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
      utf8FilenameWithNull[utf8Filename.Length] = 0;
      return SherpaOnnxWriteWave(_samples, _samples.Length, _sampleRate, utf8FilenameWithNull) == 1;
    }

    [DllImport(Dll.Filename)]
    private static extern int SherpaOnnxWriteWave(
        float[] samples,
        int n,
        int sampleRate,
        [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
  }
}


================================================
FILE: dotnet-examples/streaming-speech-enhancement-gtcrn/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

dotnet run


================================================
FILE: dotnet-examples/streaming-speech-enhancement-gtcrn/streaming-speech-enhancement-gtcrn.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>streaming_speech_enhancement_gtcrn</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/supertonic-tts/Program.cs
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use a non-streaming Supertonic TTS model
// for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using SherpaOnnx;
using System.Runtime.InteropServices;

class SupertonicTtsDemo
{
  static void Main(string[] args)
  {
    TestEn();
  }

  static void TestEn()
  {
    var config = new OfflineTtsConfig();
    config.Model.Supertonic.DurationPredictor = "./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx";
    config.Model.Supertonic.TextEncoder = "./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx";
    config.Model.Supertonic.VectorEstimator = "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx";
    config.Model.Supertonic.Vocoder = "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx";
    config.Model.Supertonic.TtsJson = "./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json";
    config.Model.Supertonic.UnicodeIndexer = "./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin";
    config.Model.Supertonic.VoiceStyle = "./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.Sid = 6;
    genConfig.NumSteps = 5;
    genConfig.Speed = 1.25f;  // larger -> faster
    genConfig.Extra["lang"] = "en";

    var tts = new OfflineTts(config);
    var text = "Today as always, men fall into two groups: slaves and free men. Whoever " +
      "does not have two-thirds of his day for himself, is a slave, whatever " +
      "he may be: a statesman, a businessman, an official, or a scholar.";

    var MyCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      float[] data = new float[n];
      Marshal.Copy(samples, data, 0, n);
      // You can process samples here, e.g., play them.
      Console.WriteLine($"Progress {progress*100}%");

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var callback = new OfflineTtsCallbackProgressWithArg(MyCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);

    var outputFilename = "./generated-supertonic-en.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
  }
}


================================================
FILE: dotnet-examples/supertonic-tts/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/supertonic-tts/supertonic-tts.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>supertonic_tts</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
================================================
﻿// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to use a silero_vad model or ten-vad model
// with a non-streaming Paraformer for speech recognition.
using SherpaOnnx;
using System.IO;


class VadNonStreamingAsrParaformer
{
  static void Main(string[] args)
  {
    // please download model files from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    var config = new OfflineRecognizerConfig();
    config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
    config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";
    config.ModelConfig.Debug = 0;
    var recognizer = new OfflineRecognizer(config);

    var vadModelConfig = new VadModelConfig();
    if (File.Exists("./silero_vad.onnx"))
    {
      Console.WriteLine("Use silero-vad");
      vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
      vadModelConfig.SileroVad.Threshold = 0.3F;
      vadModelConfig.SileroVad.MinSilenceDuration = 0.5F;
      vadModelConfig.SileroVad.MinSpeechDuration = 0.25F;
      vadModelConfig.SileroVad.MaxSpeechDuration = 5.0F;
      vadModelConfig.SileroVad.WindowSize = 512;
    }
    else if (File.Exists("./ten-vad.onnx"))
    {
      Console.WriteLine("Use ten-vad");
      vadModelConfig.TenVad.Model = "./ten-vad.onnx";
      vadModelConfig.TenVad.Threshold = 0.3F;
      vadModelConfig.TenVad.MinSilenceDuration = 0.5F;
      vadModelConfig.TenVad.MinSpeechDuration = 0.25F;
      vadModelConfig.TenVad.MaxSpeechDuration = 5.0F;
      vadModelConfig.TenVad.WindowSize = 256;
    }
    else
    {
      Console.WriteLine("Please download ./silero_vad.onnx or ./ten-vad.onnx");
      return;
    }
    vadModelConfig.Debug = 0;

    var vad = new VoiceActivityDetector(vadModelConfig, 60);

    var testWaveFilename = "./lei-jun-test.wav";
    var reader = new WaveReader(testWaveFilename);

    int numSamples = reader.Samples.Length;
    int windowSize = vadModelConfig.SileroVad.WindowSize;

    if (vadModelConfig.TenVad.Model != "")
    {
      windowSize = vadModelConfig.TenVad.WindowSize;
    }

    int sampleRate = vadModelConfig.SampleRate;
    int numIter = numSamples / windowSize;

    for (int i = 0; i != numIter; ++i)
    {
      int start = i * windowSize;
      var samples = new float[windowSize];
      Array.Copy(reader.Samples, start, samples, 0, windowSize);
      vad.AcceptWaveform(samples);
      if (vad.IsSpeechDetected())
      {
        while (!vad.IsEmpty())
        {
          SpeechSegment segment = vad.Front();
          var startTime = segment.Start / (float)sampleRate;
          var duration = segment.Samples.Length / (float)sampleRate;

          OfflineStream stream = recognizer.CreateStream();
          stream.AcceptWaveform(sampleRate, segment.Samples);
          recognizer.Decode(stream);
          var text = stream.Result.Text;

          if (!string.IsNullOrEmpty(text))
          {
            Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
                string.Format("{0:0.00}", startTime + duration), text);
          }

          vad.Pop();
        }
      }
    }

    vad.Flush();

    while (!vad.IsEmpty())
    {
      var segment = vad.Front();
      float startTime = segment.Start / (float)sampleRate;
      float duration = segment.Samples.Length / (float)sampleRate;

      var stream = recognizer.CreateStream();
      stream.AcceptWaveform(sampleRate, segment.Samples);
      recognizer.Decode(stream);
      var text = stream.Result.Text;

      if (!string.IsNullOrEmpty(text))
      {
        Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
            string.Format("{0:0.00}", startTime + duration), text);
      }

      vad.Pop();
    }
  }
}


================================================
FILE: dotnet-examples/vad-non-streaming-asr-paraformer/run-ten-vad.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./ten-vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/vad-non-streaming-asr-paraformer/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/vad-non-streaming-funasr-nano/Program.cs
================================================
﻿// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use a silero_vad model or ten-vad model
// with a non-streaming FunASR Nano for speech recognition.
using SherpaOnnx;
using System.IO;


class VadNonStreamingFunAsrNano
{
  static void Main(string[] args)
  {
    // please download model files from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    var config = new OfflineRecognizerConfig();
    config.ModelConfig.FunAsrNano.EncoderAdaptor = "./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx";
    config.ModelConfig.FunAsrNano.LLM = "./sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx";
    config.ModelConfig.FunAsrNano.Embedding = "./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx";
    config.ModelConfig.FunAsrNano.Tokenizer = "./sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B";
    config.ModelConfig.Tokens = "";
    config.ModelConfig.Debug = 0;
    var recognizer = new OfflineRecognizer(config);

    var vadModelConfig = new VadModelConfig();
    if (File.Exists("./silero_vad.onnx"))
    {
      Console.WriteLine("Use silero-vad");
      vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
      vadModelConfig.SileroVad.Threshold = 0.3F;
      vadModelConfig.SileroVad.MinSilenceDuration = 0.5F;
      vadModelConfig.SileroVad.MinSpeechDuration = 0.25F;
      vadModelConfig.SileroVad.MaxSpeechDuration = 5.0F;
      vadModelConfig.SileroVad.WindowSize = 512;
    }
    else if (File.Exists("./ten-vad.onnx"))
    {
      Console.WriteLine("Use ten-vad");
      vadModelConfig.TenVad.Model = "./ten-vad.onnx";
      vadModelConfig.TenVad.Threshold = 0.3F;
      vadModelConfig.TenVad.MinSilenceDuration = 0.5F;
      vadModelConfig.TenVad.MinSpeechDuration = 0.25F;
      vadModelConfig.TenVad.MaxSpeechDuration = 5.0F;
      vadModelConfig.TenVad.WindowSize = 256;
    }
    else
    {
      Console.WriteLine("Please download ./silero_vad.onnx or ./ten-vad.onnx");
      return;
    }
    vadModelConfig.Debug = 0;

    var vad = new VoiceActivityDetector(vadModelConfig, 60);

    var testWaveFilename = "./lei-jun-test.wav";
    var reader = new WaveReader(testWaveFilename);

    int numSamples = reader.Samples.Length;
    int windowSize = vadModelConfig.SileroVad.WindowSize;

    if (vadModelConfig.TenVad.Model != "")
    {
      windowSize = vadModelConfig.TenVad.WindowSize;
    }

    int sampleRate = vadModelConfig.SampleRate;
    int numIter = numSamples / windowSize;

    for (int i = 0; i != numIter; ++i)
    {
      int start = i * windowSize;
      var samples = new float[windowSize];
      Array.Copy(reader.Samples, start, samples, 0, windowSize);
      vad.AcceptWaveform(samples);
      if (vad.IsSpeechDetected())
      {
        while (!vad.IsEmpty())
        {
          SpeechSegment segment = vad.Front();
          var startTime = segment.Start / (float)sampleRate;
          var duration = segment.Samples.Length / (float)sampleRate;

          OfflineStream stream = recognizer.CreateStream();
          stream.AcceptWaveform(sampleRate, segment.Samples);
          recognizer.Decode(stream);
          var text = stream.Result.Text;

          if (!string.IsNullOrEmpty(text))
          {
            Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
                string.Format("{0:0.00}", startTime + duration), text);
          }

          vad.Pop();
        }
      }
    }

    vad.Flush();

    while (!vad.IsEmpty())
    {
      var segment = vad.Front();
      float startTime = segment.Start / (float)sampleRate;
      float duration = segment.Samples.Length / (float)sampleRate;

      var stream = recognizer.CreateStream();
      stream.AcceptWaveform(sampleRate, segment.Samples);
      recognizer.Decode(stream);
      var text = stream.Result.Text;

      if (!string.IsNullOrEmpty(text))
      {
        Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime),
            string.Format("{0:0.00}", startTime + duration), text);
      }

      vad.Pop();
    }
  }
}


================================================
FILE: dotnet-examples/vad-non-streaming-funasr-nano/run-ten-vad.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./ten-vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/vad-non-streaming-funasr-nano/run.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
fi

dotnet run


================================================
FILE: dotnet-examples/vad-non-streaming-funasr-nano/vad-non-streaming-funasr-nano.csproj
================================================
﻿<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>vad_non_streaming_funasr_nano</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/version-test/Program.cs
================================================
﻿// Copyright (c)  2025  Xiaomi Corporation
using SherpaOnnx;

class VersionTestDemo
{
  static void Main(string[] args)
  {
    var version = VersionInfo.Version;
    var gitSha1 = VersionInfo.GitSha1;
    var gitDate = VersionInfo.GitDate;

    Console.WriteLine("sherpa-onnx version: {0}", version);
    Console.WriteLine("sherpa-onnx gitSha1: {0}", gitSha1);
    Console.WriteLine("sherpa-onnx gitDate: {0}", gitDate);
  }
}


================================================
FILE: dotnet-examples/version-test/run.sh
================================================
#!/usr/bin/env bash

set -ex

dotnet run


================================================
FILE: dotnet-examples/version-test/version-test.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>version_test</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/zipvoice-tts/Program.cs
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use a non-streaming ZipVoice model
// for zero-shot text-to-speech.
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using SherpaOnnx;
using System.Runtime.InteropServices;

class ZipVoiceTtsDemo
{
  static void Main(string[] args)
  {
    TestZhEn();
  }

  static void TestZhEn()
  {
    var config = new OfflineTtsConfig();
    config.Model.ZipVoice.Tokens = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt";
    config.Model.ZipVoice.Encoder = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx";
    config.Model.ZipVoice.Decoder = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx";
    config.Model.ZipVoice.Vocoder = "./vocos_24khz.onnx";
    config.Model.ZipVoice.DataDir = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data";
    config.Model.ZipVoice.Lexicon = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    var referenceWaveFilename = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav";
    var reader = new WaveReader(referenceWaveFilename);

    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.ReferenceAudio = reader.Samples;
    genConfig.ReferenceSampleRate = reader.SampleRate;
    genConfig.ReferenceText = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.";
    genConfig.NumSteps = 4;
    genConfig.Extra["min_char_in_sentence"] = "10";

    var tts = new OfflineTts(config);
    var text = "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.";

    var myCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      float[] data = new float[n];
      Marshal.Copy(samples, data, 0, n);
      Console.WriteLine($"Progress {progress * 100}%");

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var callback = new OfflineTtsCallbackProgressWithArg(myCallback);

    var audio = tts.GenerateWithConfig(text, genConfig, callback);

    var outputFilename = "./generated-zipvoice-zh-en.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }
  }
}


================================================
FILE: dotnet-examples/zipvoice-tts/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f ./vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

dotnet run


================================================
FILE: dotnet-examples/zipvoice-tts/zipvoice-tts.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>zipvoice_tts</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: dotnet-examples/zipvoice-tts-play/Program.cs
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// This file shows how to use a non-streaming ZipVoice model
// for zero-shot text-to-speech with playback.
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using PortAudioSharp;
using SherpaOnnx;
using System.Collections.Concurrent;
using System.Runtime.InteropServices;

class ZipVoiceTtsDemo
{
  static void Main(string[] args)
  {
    TestZhEn();
  }

  static void TestZhEn()
  {
    var config = new OfflineTtsConfig();
    config.Model.ZipVoice.Tokens = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt";
    config.Model.ZipVoice.Encoder = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx";
    config.Model.ZipVoice.Decoder = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx";
    config.Model.ZipVoice.Vocoder = "./vocos_24khz.onnx";
    config.Model.ZipVoice.DataDir = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data";
    config.Model.ZipVoice.Lexicon = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt";

    config.Model.NumThreads = 2;
    config.Model.Debug = 1;
    config.Model.Provider = "cpu";

    var referenceWaveFilename = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav";
    var reader = new WaveReader(referenceWaveFilename);

    OfflineTtsGenerationConfig genConfig = new OfflineTtsGenerationConfig();
    genConfig.ReferenceAudio = reader.Samples;
    genConfig.ReferenceSampleRate = reader.SampleRate;
    genConfig.ReferenceText = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.";
    genConfig.NumSteps = 4;
    genConfig.Extra["min_char_in_sentence"] = "10";

    var tts = new OfflineTts(config);
    var text = "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.";

    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();
    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");

    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max output channels: {deviceInfo.maxOutputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultOutputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default output device found. Please use ../zipvoice-tts instead");
      Environment.Exit(1);
    }

    var info = PortAudio.GetDeviceInfo(deviceIndex);
    Console.WriteLine();
    Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");

    var param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowOutputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    var dataItems = new BlockingCollection<float[]>();

    var myCallback = (IntPtr samples, int n, float progress, IntPtr arg) =>
    {
      Console.WriteLine($"Progress {progress * 100}%");

      float[] data = new float[n];
      Marshal.Copy(samples, data, 0, n);
      dataItems.Add(data);

      // 1 means to keep generating
      // 0 means to stop generating
      return 1;
    };

    var playFinished = false;

    float[]? lastSampleArray = null;
    int lastIndex = 0;

    PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
        UInt32 frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
      {
        Console.WriteLine("Finished playing");
        playFinished = true;
        return StreamCallbackResult.Complete;
      }

      int expected = Convert.ToInt32(frameCount);
      int i = 0;

      while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
      {
        int needed = expected - i;

        if (lastSampleArray != null)
        {
          int remaining = lastSampleArray.Length - lastIndex;
          if (remaining >= needed)
          {
            float[] thisBlock = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
            lastIndex += needed;
            if (lastIndex == lastSampleArray.Length)
            {
              lastSampleArray = null;
              lastIndex = 0;
            }

            Marshal.Copy(thisBlock, 0, IntPtr.Add(output, i * sizeof(float)), needed);
            return StreamCallbackResult.Continue;
          }

          float[] thisBlock2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
          lastIndex = 0;
          lastSampleArray = null;

          Marshal.Copy(thisBlock2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
          i += remaining;
          continue;
        }

        if (dataItems.Count != 0)
        {
          lastSampleArray = dataItems.Take();
          lastIndex = 0;
        }
      }

      if (i < expected)
      {
        int sizeInBytes = (expected - i) * 4;
        Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
      }

      return StreamCallbackResult.Continue;
    };

    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: playCallback,
        userData: IntPtr.Zero
        );

    stream.Start();

    var callback = new OfflineTtsCallbackProgressWithArg(myCallback);
    var audio = tts.GenerateWithConfig(text, genConfig, callback);

    var outputFilename = "./generated-zipvoice-zh-en-play.wav";
    var ok = audio.SaveToWaveFile(outputFilename);

    if (ok)
    {
      Console.WriteLine($"Wrote to {outputFilename} succeeded!");
    }
    else
    {
      Console.WriteLine($"Failed to write {outputFilename}");
    }

    dataItems.CompleteAdding();

    while (!playFinished)
    {
      Thread.Sleep(100);
    }
  }
}


================================================
FILE: dotnet-examples/zipvoice-tts-play/run.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f ./vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

dotnet run


================================================
FILE: dotnet-examples/zipvoice-tts-play/zipvoice-tts-play.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net8.0</TargetFramework>
    <RootNamespace>zipvoice_tts_play</RootNamespace>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="PortAudioSharp2" Version="*" />
  </ItemGroup>

  <ItemGroup>
    <ProjectReference Include="..\Common\Common.csproj" />
  </ItemGroup>

</Project>


================================================
FILE: ffmpeg-examples/Makefile
================================================
CC=g++
GDB ?= FALSE

# use pkg-config for getting CFLAGS and LDLIBS
SHARED_LIBS=libavdevice                          \
            libavformat                          \
            libavfilter                          \
            libavcodec                           \
            libswresample                        \
            libswscale                           \
            libavutil

ifeq ($(GDB), TRUE)
	OPTFLAG += -g
endif

# CFLAGS := $(shell pkg-config --cflags $(SHARED_LIBS)) -I.. -Wall -std=c++17 -fopenmp ${OPTFLAG}
CFLAGS := $(shell pkg-config --cflags $(SHARED_LIBS)) -I.. -Wall -std=c++17  ${OPTFLAG}
LDLIBS := $(shell pkg-config --libs $(SHARED_LIBS))

CUR_DIR :=$(shell pwd)

LDLIBS += -L ../build/lib
LDLIBS += -L ../build/_deps/onnxruntime-src/lib
LDLIBS += -lsherpa-onnx-c-api -lonnxruntime
LDLIBS += -Wl,-rpath,${CUR_DIR}/../build/lib
LDLIBS += -Wl,-rpath,${CUR_DIR}/../build/_deps/onnxruntime-src/lib

#Get libavutil version and extract major, minor and micro
LIBAVUTIL_VERSION := $(shell pkg-config --modversion libavutil)
LIBAVUTIL_MAJOR := $(shell echo "$(LIBAVUTIL_VERSION)" | awk -F. '{print $$1}')
LIBAVUTIL_MINOR := $(shell echo "$(LIBAVUTIL_VERSION)" | awk -F. '{print $$2}')
LIBAVUTIL_MICRO := $(shell echo "$(LIBAVUTIL_VERSION)" | awk -F. '{print $$3}')
#Check if libavutil version is 57.28.100 or above
FFMPEG_51_AND_ABOVE = $(shell echo "$(LIBAVUTIL_MAJOR) $(LIBAVUTIL_MINOR) $(LIBAVUTIL_MICRO)" | awk '{if ($$1 > 57 || ($$1 == 57 && $$2 > 28) || ($$1 == 57 && $$2 == 28 && $$3 >= 100)) print "TRUE"; else print "FALSE"}')
ifeq ($(FFMPEG_51_AND_ABOVE), FALSE)
$(error FFmpeg version should be n5.1 or above!)
endif

EXAMPLES=sherpa-onnx-ffmpeg

OBJS=$(addsuffix .o,$(EXAMPLES))

.phony: all clean

all: $(EXAMPLES)
	@echo $(EXAMPLES)
	$(RM) $(OBJS)

$(EXAMPLES): $(OBJS)
	$(CC) $(addsuffix .o,$@) $(CFLAGS) $(LDLIBS) -o $@

%.o : %.c
	${CC} ${CFLAGS} -c -o $@ $<

clean:
	$(RM) $(EXAMPLES) $(OBJS)

build_info:
	@echo "libavutil version: $(LIBAVUTIL_VERSION)"
	@echo "Supported examples: $(EXAMPLES)"


================================================
FILE: ffmpeg-examples/README.md
================================================
# Introduction

You can use `sherpa-onnx-ffmpeg` to decode a wav, mp3, or even a URL.

See <https://github.com/ossrs/srs>
for more supported formats and protocols, e.g.,
RTMP/WebRTC/HLS/HTTP-FLV/SRT/MPEG-DASH/GB28181.


## How to use

Please have a look at

```
./run.sh
```


================================================
FILE: ffmpeg-examples/how-to-fix-errors.md
================================================
# Fixes for errors

To fix the following error:
```
Package libavdevice was not found in the pkg-config search path.
```
please run

```
sudo apt-get install libavdevice-dev
```

To fix the following error
```
Makefile:28: *** FFmpeg version should be n5.1 or above!.  Stop.
```
please run
```
sudo apt-get install software-properties-common
sudo add-apt-repository ppa:savoury1/ffmpeg4
sudo add-apt-repository ppa:savoury1/ffmpeg5
sudo apt-get update
sudo apt-get install ffmpeg --reinstall
sudo apt-get install libavutil-dev --reinstall
```

To fix the following error:
```
ModuleNotFoundError: No module named 'apt_pkg'
```
please run:
```
sudo apt-get install python-apt
```


================================================
FILE: ffmpeg-examples/sherpa-onnx-ffmpeg.c
================================================
// ffmpeg-examples/sherpa-onnx-ffmpeg.c
//
// Copyright (c)  2023  Xiaomi Corporation
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

/*
 * Copyright (c) 2010 Nicolas George
 * Copyright (c) 2011 Stefano Sabatini
 * Copyright (c) 2012 Clément Bœsch
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @file audio decoding and filtering usage example
 * @example sherpa-onnx-ffmpeg.c
 *
 * Demux, decode and filter audio input file, generate a raw audio
 * file to be played with ffplay.
 */

#include <unistd.h>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavformat/avformat.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
}

static const char *filter_descr =
    "aresample=16000,aformat=sample_fmts=s16:channel_layouts=mono";

static AVFormatContext *fmt_ctx;
static AVCodecContext *dec_ctx;
AVFilterContext *buffersink_ctx;
AVFilterContext *buffersrc_ctx;
AVFilterGraph *filter_graph;
static int audio_stream_index = -1;

static int open_input_file(const char *filename) {
  const AVCodec *dec;
  int ret;

  if ((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
    av_log(NULL, AV_LOG_ERROR, "Cannot open input file %s\n", filename);
    return ret;
  }

  if ((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
    av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
    return ret;
  }

  /* select the audio stream */
  ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
  if (ret < 0) {
    av_log(NULL, AV_LOG_ERROR,
           "Cannot find an audio stream in the input file\n");
    return ret;
  }
  audio_stream_index = ret;

  /* create decoding context */
  dec_ctx = avcodec_alloc_context3(dec);
  if (!dec_ctx) return AVERROR(ENOMEM);
  avcodec_parameters_to_context(dec_ctx,
                                fmt_ctx->streams[audio_stream_index]->codecpar);

  /* init the audio decoder */
  if ((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
    av_log(NULL, AV_LOG_ERROR, "Cannot open audio decoder\n");
    return ret;
  }

  return 0;
}

static int init_filters(const char *filters_descr) {
  char args[512];
  int ret = 0;
  const AVFilter *abuffersrc = avfilter_get_by_name("abuffer");
  const AVFilter *abuffersink = avfilter_get_by_name("abuffersink");
  AVFilterInOut *outputs = avfilter_inout_alloc();
  AVFilterInOut *inputs = avfilter_inout_alloc();
  static const enum AVSampleFormat out_sample_fmts[] = {AV_SAMPLE_FMT_S16,
                                                        AV_SAMPLE_FMT_NONE};
  static const int out_sample_rates[] = {16000, -1};
  const AVFilterLink *outlink;
  AVRational time_base = fmt_ctx->streams[audio_stream_index]->time_base;

  filter_graph = avfilter_graph_alloc();
  if (!outputs || !inputs || !filter_graph) {
    ret = AVERROR(ENOMEM);
    goto end;
  }

  /* buffer audio source: the decoded frames from the decoder will be inserted
   * here. */
  if (dec_ctx->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
    av_channel_layout_default(&dec_ctx->ch_layout,
                              dec_ctx->ch_layout.nb_channels);
  ret = snprintf(args, sizeof(args),
                 "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=",
                 time_base.num, time_base.den, dec_ctx->sample_rate,
                 av_get_sample_fmt_name(dec_ctx->sample_fmt));
  av_channel_layout_describe(&dec_ctx->ch_layout, args + ret,
                             sizeof(args) - ret);
  ret = avfilter_graph_create_filter(&buffersrc_ctx, abuffersrc, "in", args,
                                     NULL, filter_graph);
  if (ret < 0) {
    av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
    goto end;
  }

  /* buffer audio sink: to terminate the filter chain. */
  ret = avfilter_graph_create_filter(&buffersink_ctx, abuffersink, "out", NULL,
                                     NULL, filter_graph);
  if (ret < 0) {
    av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
    goto end;
  }

  ret = av_opt_set_int_list(buffersink_ctx, "sample_fmts", out_sample_fmts, -1,
                            AV_OPT_SEARCH_CHILDREN);
  if (ret < 0) {
    av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
    goto end;
  }

  ret =
      av_opt_set(buffersink_ctx, "ch_layouts", "mono", AV_OPT_SEARCH_CHILDREN);
  if (ret < 0) {
    av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
    goto end;
  }

  ret = av_opt_set_int_list(buffersink_ctx, "sample_rates", out_sample_rates,
                            -1, AV_OPT_SEARCH_CHILDREN);
  if (ret < 0) {
    av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
    goto end;
  }

  /*
   * Set the endpoints for the filter graph. The filter_graph will
   * be linked to the graph described by filters_descr.
   */

  /*
   * The buffer source output must be connected to the input pad of
   * the first filter described by filters_descr; since the first
   * filter input label is not specified, it is set to "in" by
   * default.
   */
  outputs->name = av_strdup("in");
  outputs->filter_ctx = buffersrc_ctx;
  outputs->pad_idx = 0;
  outputs->next = NULL;

  /*
   * The buffer sink input must be connected to the output pad of
   * the last filter described by filters_descr; since the last
   * filter output label is not specified, it is set to "out" by
   * default.
   */
  inputs->name = av_strdup("out");
  inputs->filter_ctx = buffersink_ctx;
  inputs->pad_idx = 0;
  inputs->next = NULL;

  if ((ret = avfilter_graph_parse_ptr(filter_graph, filters_descr, &inputs,
                                      &outputs, NULL)) < 0)
    goto end;

  if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) goto end;

  /* Print summary of the sink buffer
   * Note: args buffer is reused to store channel layout string */
  outlink = buffersink_ctx->inputs[0];
  av_channel_layout_describe(&outlink->ch_layout, args, sizeof(args));
  av_log(NULL, AV_LOG_INFO, "Output: srate:%dHz fmt:%s chlayout:%s\n",
         (int)outlink->sample_rate,
         (char *)av_x_if_null(
             av_get_sample_fmt_name((AVSampleFormat)outlink->format), "?"),
         args);

end:
  avfilter_inout_free(&inputs);
  avfilter_inout_free(&outputs);

  return ret;
}

static void sherpa_decode_frame(const AVFrame *frame,
                                const SherpaOnnxOnlineRecognizer *recognizer,
                                const SherpaOnnxOnlineStream *stream,
                                const SherpaOnnxDisplay *display,
                                int32_t *segment_id) {
#define N 3200  // 100s. Sample rate is fixed to 16 kHz
  static float samples[N];
  static int nb_samples = 0;
  const int16_t *p = (int16_t *)frame->data[0];

  if (frame->nb_samples + nb_samples > N) {
    SherpaOnnxOnlineStreamAcceptWaveform(stream, 16000, samples, nb_samples);
    while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
      SherpaOnnxDecodeOnlineStream(recognizer, stream);
    }

    const SherpaOnnxOnlineRecognizerResult *r =
        SherpaOnnxGetOnlineStreamResult(recognizer, stream);
    if (strlen(r->text)) {
      SherpaOnnxPrint(display, *segment_id, r->text);
    }

    if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
      if (strlen(r->text)) {
        ++*segment_id;
      }
      SherpaOnnxOnlineStreamReset(recognizer, stream);
    }

    SherpaOnnxDestroyOnlineRecognizerResult(r);
    nb_samples = 0;
  }

  for (int i = 0; i < frame->nb_samples; i++) {
    samples[nb_samples++] = p[i] / 32768.;
  }
}

static inline char *__av_err2str(int errnum) {
  static char str[AV_ERROR_MAX_STRING_SIZE];
  memset(str, 0, sizeof(str));
  return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}

int main(int argc, char **argv) {
  int ret;
  int num_threads = 1;
  AVPacket *packet = av_packet_alloc();
  AVFrame *frame = av_frame_alloc();
  AVFrame *filt_frame = av_frame_alloc();
  const char *kUsage =
      "\n"
      "Usage:\n"
      "  ./sherpa-onnx-ffmpeg \\\n"
      "    /path/to/tokens.txt \\\n"
      "    /path/to/encoder.onnx\\\n"
      "    /path/to/decoder.onnx\\\n"
      "    /path/to/joiner.onnx\\\n"
      "    /path/to/foo.wav [num_threads [decoding_method]]"
      "\n\n"
      "Default num_threads is 1.\n"
      "Valid decoding_method: greedy_search (default), modified_beam_search\n\n"
      "Please refer to \n"
      "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html\n"
      "for a list of pre-trained models to download.\n";

  if (!packet || !frame || !filt_frame) {
    fprintf(stderr, "Could not allocate frame or packet\n");
    exit(1);
  }

  if (argc < 6 || argc > 8) {
    fprintf(stderr, "%s\n", kUsage);
    return -1;
  }

  SherpaOnnxOnlineRecognizerConfig config;
  memset(&config, 0, sizeof(config));
  config.model_config.tokens = argv[1];
  config.model_config.transducer.encoder = argv[2];
  config.model_config.transducer.decoder = argv[3];
  config.model_config.transducer.joiner = argv[4];

  if (argc == 7 && atoi(argv[6]) > 0) {
    num_threads = atoi(argv[6]);
  }

  config.model_config.num_threads = num_threads;
  config.model_config.debug = 0;

  config.feat_config.sample_rate = 16000;
  config.feat_config.feature_dim = 80;

  config.decoding_method = "greedy_search";
  if (argc == 8) {
    config.decoding_method = argv[7];
  }

  config.max_active_paths = 4;

  config.enable_endpoint = 1;
  config.rule1_min_trailing_silence = 2.4;
  config.rule2_min_trailing_silence = 1.2;
  config.rule3_min_utterance_length = 300;

  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&config);
  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);
  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  int32_t segment_id = 0;

  if ((ret = open_input_file(argv[5])) < 0) exit(1);

  if ((ret = init_filters(filter_descr)) < 0) exit(1);

  /* read all packets */
  while (1) {
    if ((ret = av_read_frame(fmt_ctx, packet)) < 0) break;

    if (packet->stream_index == audio_stream_index) {
      ret = avcodec_send_packet(dec_ctx, packet);
      if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR,
               "Error while sending a packet to the decoder\n");
        break;
      }

      while (ret >= 0) {
        ret = avcodec_receive_frame(dec_ctx, frame);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
          break;
        } else if (ret < 0) {
          av_log(NULL, AV_LOG_ERROR,
                 "Error while receiving a frame from the decoder\n");
          exit(1);
        }

        if (ret >= 0) {
          /* push the audio data from decoded frame into the filtergraph */
          if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame,
                                           AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Error while feeding the audio filtergraph\n");
            break;
          }

          /* pull filtered audio from the filtergraph */
          while (1) {
            ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
            if (ret < 0) exit(1);
            sherpa_decode_frame(filt_frame, recognizer, stream, display,
                                &segment_id);
            av_frame_unref(filt_frame);
          }
          av_frame_unref(frame);
        }
      }
    }
    av_packet_unref(packet);
  }

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
  SherpaOnnxOnlineStreamAcceptWaveform(stream, 16000, tail_paddings, 4800);
  SherpaOnnxOnlineStreamInputFinished(stream);

  while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
    SherpaOnnxDecodeOnlineStream(recognizer, stream);
  }

  const SherpaOnnxOnlineRecognizerResult *r =
      SherpaOnnxGetOnlineStreamResult(recognizer, stream);
  if (strlen(r->text)) {
    SherpaOnnxPrint(display, segment_id, r->text);
  }

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  SherpaOnnxDestroyDisplay(display);
  SherpaOnnxDestroyOnlineStream(stream);
  SherpaOnnxDestroyOnlineRecognizer(recognizer);

  avfilter_graph_free(&filter_graph);
  avcodec_free_context(&dec_ctx);
  avformat_close_input(&fmt_ctx);
  av_packet_free(&packet);
  av_frame_free(&frame);
  av_frame_free(&filt_frame);

  if (ret < 0 && ret != AVERROR_EOF) {
    fprintf(stderr, "Error occurred: %s\n", __av_err2str(ret));
    exit(1);
  }
  fprintf(stderr, "\n");

  return 0;
}


================================================
FILE: flutter/.gitignore
================================================
# Do not remove or rename entries in this file, only add new ones
# See https://github.com/flutter/flutter/issues/128635 for more context.

# Miscellaneous
*.class
*.lock
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# Visual Studio Code related
.classpath
.project
.settings/
.vscode/*

# Flutter repo-specific
/bin/cache/
/bin/internal/bootstrap.bat
/bin/internal/bootstrap.sh
/bin/mingit/
/dev/benchmarks/mega_gallery/
/dev/bots/.recipe_deps
/dev/bots/android_tools/
/dev/devicelab/ABresults*.json
/dev/docs/doc/
/dev/docs/api_docs.zip
/dev/docs/flutter.docs.zip
/dev/docs/lib/
/dev/docs/pubspec.yaml
/dev/integration_tests/**/xcuserdata
/dev/integration_tests/**/Pods
/packages/flutter/coverage/
version
analysis_benchmark.json

# packages file containing multi-root paths
.packages.generated

# Flutter/Dart/Pub related
**/doc/api/
.dart_tool/
.flutter-plugins
.flutter-plugins-dependencies
**/generated_plugin_registrant.dart
.packages
.pub-preload-cache/
.pub-cache/
.pub/
build/
flutter_*.png
linked_*.ds
unlinked.ds
unlinked_spec.ds

# Android related
**/android/**/gradle-wrapper.jar
.gradle/
**/android/captures/
**/android/gradlew
**/android/gradlew.bat
**/android/local.properties
**/android/**/GeneratedPluginRegistrant.java
**/android/key.properties
*.jks

# iOS/XCode related
**/ios/**/*.mode1v3
**/ios/**/*.mode2v3
**/ios/**/*.moved-aside
**/ios/**/*.pbxuser
**/ios/**/*.perspectivev3
**/ios/**/*sync/
**/ios/**/.sconsign.dblite
**/ios/**/.tags*
**/ios/**/.vagrant/
**/ios/**/DerivedData/
**/ios/**/Icon?
**/ios/**/Pods/
**/ios/**/.symlinks/
**/ios/**/profile
**/ios/**/xcuserdata
**/ios/.generated/
**/ios/Flutter/.last_build_id
**/ios/Flutter/App.framework
**/ios/Flutter/Flutter.framework
**/ios/Flutter/Flutter.podspec
**/ios/Flutter/Generated.xcconfig
**/ios/Flutter/ephemeral
**/ios/Flutter/app.flx
**/ios/Flutter/app.zip
**/ios/Flutter/flutter_assets/
**/ios/Flutter/flutter_export_environment.sh
**/ios/ServiceDefinitions.json
**/ios/Runner/GeneratedPluginRegistrant.*

# macOS
**/Flutter/ephemeral/
**/Pods/
**/macos/Flutter/GeneratedPluginRegistrant.swift
**/macos/Flutter/ephemeral
**/xcuserdata/

# Windows
**/windows/flutter/generated_plugin_registrant.cc
**/windows/flutter/generated_plugin_registrant.h
**/windows/flutter/generated_plugins.cmake

# Linux
**/linux/flutter/generated_plugin_registrant.cc
**/linux/flutter/generated_plugin_registrant.h
**/linux/flutter/generated_plugins.cmake

# Coverage
coverage/

# Symbols
app.*.symbols

# Exceptions to above rules.
!**/ios/**/default.mode1v3
!**/ios/**/default.mode2v3
!**/ios/**/default.pbxuser
!**/ios/**/default.perspectivev3
!/packages/flutter_tools/test/data/dart_dependencies_test/**/.packages
!/dev/ci/**/Gemfile.lock
!.vscode/settings.json


================================================
FILE: flutter/README.md
================================================
# Introduction

This directory contains the source code of the flutter
package [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)

Caution: You are not expected to use this directory directly.

This directory is for developers only.

For common users, please use our package at <https://pub.dev/packages/sherpa_onnx>


================================================
FILE: flutter/notes.md
================================================
# Introduction

This file keeps some notes about how packages in this directory
are created.

1. Create `sherpa_onnx`.

```bash
flutter create --template plugin sherpa_onnx
```

2. Create `sherpa_onnx_macos`

```bash
flutter create --template plugin_ffi --platforms macos sherpa_onnx_macos
```

3. Create `sherpa_onnx_linux`

```bash
flutter create --template plugin_ffi --platforms linux sherpa_onnx_linux
```

4. Create `sherpa_onnx_windows`

```bash
flutter create --template plugin_ffi --platforms linux sherpa_onnx_windows
```

5. Create `sherpa_onnx_android`

```bash
flutter create --template plugin_ffi --platforms android --org com.k2fsa.sherpa.onnx sherpa_onnx_android
```

6. Create `sherpa_onnx_ios`

```bash
flutter create --template plugin_ffi --platforms ios sherpa_onnx_ios
```


================================================
FILE: flutter/notes2.md
================================================
# Some use commands while learning flutter/dart

## macOS

1. Build required libraries

```bash
git clone https://github.com/k2-fsa/sherpa-onnx
cd sherpa-onnx
mkdir build
cd build

cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" ..
make install
cd ../sherpa-onnx/flutter/
cp -v  ../../build/install/lib/lib* ./macos/
```

2. Test for speaker identification

```bash
cd sherpa-onnx/sherpa-onnx/flutter/example
mkdir assets
```


## Useful commands
```
flutter pub publish --dry-run
flutter run -d macos
flutter run -d linux
flutter run -d windows

flutter build macos

flutter run --release -d macos

# add platform to an existing project
flutter create --platforms=windows,macos,linux .

dart analyze

FLUTTER_XCODE_ARCHS=arm64
FLUTTER_XCODE_ARCHS=x86_64
```

## Examples

  - https://dart.dev/tools/pub/automated-publishing

     Use GitHub actions to publish

  - https://dart.dev/tools/pub/pubspec

     It describes the format of ./pubspec.yaml

  - https://github.com/folksable/blurhash_ffi/

      It supports ios, android, linux, macos, and windows.

 - https://github.com/alexmercerind/dart_vlc
 - https://github.com/dart-lang/native/tree/main/pkgs/jni


================================================
FILE: flutter/publish.md
================================================
# Note

Before publishing a new version, please first run
```
flutter analyze
```
to check if there are any issues.


================================================
FILE: flutter/sherpa_onnx/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
# Libraries should not include pubspec.lock, per https://dart.dev/guides/libraries/private-files#pubspeclock.
/pubspec.lock
**/doc/api/
.dart_tool/
build/


================================================
FILE: flutter/sherpa_onnx/.metadata
================================================
# This file tracks properties of this Flutter project.
# Used by Flutter tool to assess capabilities and perform upgrades etc.
#
# This file should be version controlled and should not be manually edited.

version:
  revision: "5dcb86f68f239346676ceb1ed1ea385bd215fba1"
  channel: "stable"

project_type: plugin

# Tracks metadata for the flutter migrate command
migration:
  platforms:
    - platform: root
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1

  # User provided section

  # List of Local paths (relative to this file) that should be
  # ignored by the migrate tool.
  #
  # Files that are not part of the templates will be ignored by default.
  unmanaged_files:
    - 'lib/main.dart'
    - 'ios/Runner.xcodeproj/project.pbxproj'


================================================
FILE: flutter/sherpa_onnx/analysis_options.yaml
================================================
include: package:flutter_lints/flutter.yaml

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter/sherpa_onnx/example/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
**/doc/api/
**/ios/Flutter/.last_build_id
.dart_tool/
.flutter-plugins
.flutter-plugins-dependencies
.pub-cache/
.pub/
/build/

# Symbolication related
app.*.symbols

# Obfuscation related
app.*.map.json

# Android Studio will place build artifacts here
/android/app/debug
/android/app/profile
/android/app/release


================================================
FILE: flutter/sherpa_onnx/example/README.md
================================================
# Introduction

Please find examples at

https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples

and

https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples


================================================
FILE: flutter/sherpa_onnx/example/example.md
================================================
# sherpa-onnx app example

## Flutter examples

| Functions | URL | Supported Platforms|
|---|---|---|
|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, iOS, Linux, macOS, Windows|
|Speech synthesis| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/tts)| Android, iOS, Linux, macOS, Windows|

## Pure dart-examples

Hint: All of the following functions can be used in Flutter, even if some of them are only provided in pure dart api examples.

| Functions | URL | Supported Platforms|
|---|---|---|
|Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux|
|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux|
|Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux|
|Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux|
|Voice activity detection (VAD)| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/vad)| macOS, Windows, Linux|
|Voice activity detection (VAD) with non-streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/vad-with-non-streaming-asr)| macOS, Windows, Linux|
|Speaker identification and verification| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-identification)| macOS, Windows, Linux|
|Audio tagging| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/audio-tagging)| macOS, Windows, Linux|
|Keyword spotter| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/keyword-spotter)| macOS, Windows, Linux|
|Add punctuations| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/add-punctuations)| macOS, Windows, Linux|
|Speech enhancement/denoising| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speech-enhancement-gtcrn) GTCRN and DPDFNet (`baseline`, `dpdfnet2`, `dpdfnet4`, `dpdfnet8` for 16 kHz ASR, `dpdfnet2_48khz_hr` for 48 kHz output)| macOS, Windows, Linux|


================================================
FILE: flutter/sherpa_onnx/lib/sherpa_onnx.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:ffi';

/// Dart bindings for the public sherpa-onnx inference APIs.
///
/// Import this library to access offline and streaming ASR, text-to-speech,
/// VAD, speaker identification, speaker diarization, punctuation restoration,
/// audio tagging, spoken language identification, speech denoising, and WAV
/// I/O helpers from a single entry point.
///
/// Before creating any runtime object, call [initBindings] once so the package
/// can load the underlying native `sherpa-onnx-c-api` library for the current
/// platform.
///
/// For concrete end-to-end usage, see `dart-api-examples/` in the repository,
/// especially:
///
/// - `non-streaming-asr/bin/sense-voice.dart`
/// - `non-streaming-asr/bin/whisper.dart`
/// - `non-streaming-asr/bin/nemo-transducer.dart`
/// - `streaming-asr/bin/zipformer-transducer.dart`
/// - `tts/bin/pocket-en.dart`
/// - `vad/bin/vad.dart`
/// - `speaker-diarization/`

export 'src/audio_tagging.dart';
export 'src/feature_config.dart';
export 'src/homophone_replacer_config.dart';
export 'src/keyword_spotter.dart';
export 'src/offline_punctuation.dart';
export 'src/offline_recognizer.dart';
export 'src/offline_speaker_diarization.dart';
export 'src/offline_speech_denoiser.dart';
export 'src/offline_stream.dart';
export 'src/online_speech_denoiser.dart';
export 'src/online_punctuation.dart';
export 'src/online_recognizer.dart';
export 'src/online_stream.dart';
export 'src/speaker_identification.dart';
export 'src/spoken_language_identification.dart';
export 'src/tts.dart';
export 'src/vad.dart';
export 'src/version.dart';
export 'src/wave_reader.dart';
export 'src/wave_writer.dart';

import 'src/sherpa_onnx_bindings.dart';

String? _path;

// see also
// https://github.com/flutter/codelabs/blob/main/ffigen_codelab/step_05/lib/ffigen_app.dart
// https://api.flutter.dev/flutter/dart-io/Platform-class.html
final DynamicLibrary _dylib = () {
  if (Platform.isMacOS) {
    if (_path == null) {
      return DynamicLibrary.open('libsherpa-onnx-c-api.dylib');
    } else {
      return DynamicLibrary.open('$_path/libsherpa-onnx-c-api.dylib');
    }
  }

  if (Platform.isIOS) {
    if (_path == null) {
      return DynamicLibrary.open('sherpa_onnx.framework/sherpa_onnx');
    } else {
      return DynamicLibrary.open('$_path/sherpa_onnx.framework/sherpa_onnx');
    }
  }

  if (Platform.isAndroid || Platform.isLinux) {
    if (_path == null) {
      return DynamicLibrary.open('libsherpa-onnx-c-api.so');
    } else {
      return DynamicLibrary.open('$_path/libsherpa-onnx-c-api.so');
    }
  }

  if (Platform.isWindows) {
    if (_path == null) {
      return DynamicLibrary.open('sherpa-onnx-c-api.dll');
    } else {
      return DynamicLibrary.open('$_path\\sherpa-onnx-c-api.dll');
    }
  }

  throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}');
}();

/// Initialize the native sherpa-onnx bindings.
///
/// Call this exactly once before using any other API from this package.
///
/// If [p] is provided, it is treated as the directory containing the native
/// dynamic library for desktop platforms, or the framework root on Apple
/// platforms. If omitted, the package tries to load the library from the
/// default platform-specific filename.
void initBindings([String? p]) {
  _path ??= p;
  SherpaOnnxBindings.init(_dylib);
}


================================================
FILE: flutter/sherpa_onnx/lib/src/audio_tagging.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'package:ffi/ffi.dart';

import './offline_stream.dart';
import './sherpa_onnx_bindings.dart';

/// Offline audio tagging.
///
/// This module classifies complete audio clips and returns the most likely
/// events. See `dart-api-examples/audio-tagging/` for working examples.
///
/// Example:
///
/// ```dart
/// final modelConfig = AudioTaggingModelConfig(
///   zipformer: const OfflineZipformerAudioTaggingModelConfig(
///     model: './sherpa-onnx-zipformer-audio-tagging/model.int8.onnx',
///   ),
///   numThreads: 1,
///   debug: true,
/// );
///
/// final config = AudioTaggingConfig(
///   model: modelConfig,
///   labels: './sherpa-onnx-zipformer-audio-tagging/class_labels_indices.csv',
/// );
///
/// final tagger = AudioTagging(config: config);
/// final wave = readWave('./test.wav');
/// final stream = tagger.createStream();
/// stream.acceptWaveform(samples: wave.samples, sampleRate: wave.sampleRate);
/// final events = tagger.compute(stream: stream, topK: 5);
/// print(events);
/// stream.free();
/// tagger.free();
/// ```
class OfflineZipformerAudioTaggingModelConfig {
  const OfflineZipformerAudioTaggingModelConfig({this.model = ''});

  factory OfflineZipformerAudioTaggingModelConfig.fromJson(
      Map<String, dynamic> map) {
    return OfflineZipformerAudioTaggingModelConfig(
      model: map['model'] ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineZipformerAudioTaggingModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() {
    return {
      'model': model,
    };
  }

  final String model;
}

/// Aggregate model configuration for audio tagging.
///
/// Configure either [zipformer] or [ced] for typical use.
class AudioTaggingModelConfig {
  AudioTaggingModelConfig(
      {this.zipformer = const OfflineZipformerAudioTaggingModelConfig(),
      this.ced = '',
      this.numThreads = 1,
      this.provider = 'cpu',
      this.debug = true});

  factory AudioTaggingModelConfig.fromJson(Map<String, dynamic> map) {
    return AudioTaggingModelConfig(
      zipformer:
          OfflineZipformerAudioTaggingModelConfig.fromJson(map['zipformer']),
      ced: map['ced'] ?? '',
      numThreads: map['numThreads'] ?? 1,
      provider: map['provider'] ?? 'cpu',
      debug: map['debug'] ?? true,
    );
  }

  @override
  String toString() {
    return 'AudioTaggingModelConfig(zipformer: $zipformer, ced: $ced, numThreads: $numThreads, provider: $provider, debug: $debug)';
  }

  Map<String, dynamic> toJson() {
    return {
      'zipformer': zipformer.toJson(),
      'ced': ced,
      'numThreads': numThreads,
      'provider': provider,
      'debug': debug,
    };
  }

  final OfflineZipformerAudioTaggingModelConfig zipformer;
  final String ced;
  final int numThreads;
  final String provider;
  final bool debug;
}

/// Top-level configuration for [AudioTagging].
class AudioTaggingConfig {
  AudioTaggingConfig({required this.model, this.labels = ''});

  factory AudioTaggingConfig.fromJson(Map<String, dynamic> map) {
    return AudioTaggingConfig(
      model: AudioTaggingModelConfig.fromJson(map['model']),
      labels: map['labels'] ?? '',
    );
  }

  @override
  String toString() {
    return 'AudioTaggingConfig(model: $model, labels: $labels)';
  }

  Map<String, dynamic> toJson() {
    return {
      'model': model.toJson(),
      'labels': labels,
    };
  }

  final AudioTaggingModelConfig model;
  final String labels;
}

/// One predicted audio event.
class AudioEvent {
  AudioEvent({required this.name, required this.index, required this.prob});

  factory AudioEvent.fromJson(Map<String, dynamic> map) {
    return AudioEvent(
      name: map['name'],
      index: map['index'],
      prob: map['prob'],
    );
  }

  @override
  String toString() {
    return 'AudioEvent(name: $name, index: $index, prob: $prob)';
  }

  Map<String, dynamic> toJson() {
    return {
      'name': name,
      'index': index,
      'prob': prob,
    };
  }

  final String name;
  final int index;
  final double prob;
}

/// Offline audio tagger.
class AudioTagging {
  AudioTagging.fromPtr({required this.ptr, required this.config});

  AudioTagging._({required this.ptr, required this.config});

  /// Create an audio tagger from [config].
  factory AudioTagging({required AudioTaggingConfig config}) {
    final c = calloc<SherpaOnnxAudioTaggingConfig>();

    final zipformerPtr = config.model.zipformer.model.toNativeUtf8();
    c.ref.model.zipformer.model = zipformerPtr;

    final cedPtr = config.model.ced.toNativeUtf8();
    c.ref.model.ced = cedPtr;

    c.ref.model.numThreads = config.model.numThreads;

    final providerPtr = config.model.provider.toNativeUtf8();
    c.ref.model.provider = providerPtr;

    c.ref.model.debug = config.model.debug ? 1 : 0;

    final labelsPtr = config.labels.toNativeUtf8();
    c.ref.labels = labelsPtr;

    if (SherpaOnnxBindings.sherpaOnnxCreateAudioTagging == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final ptr =
        SherpaOnnxBindings.sherpaOnnxCreateAudioTagging?.call(c) ?? nullptr;

    calloc.free(labelsPtr);
    calloc.free(providerPtr);
    calloc.free(cedPtr);
    calloc.free(zipformerPtr);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception(
          "Failed to create audio tagging. Please check your config");
    }

    return AudioTagging._(ptr: ptr, config: config);
  }

  /// Release the native tagger.
  void free() {
    if (SherpaOnnxBindings.sherpaOnnxDestroyAudioTagging == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.sherpaOnnxDestroyAudioTagging?.call(ptr);
    ptr = nullptr;
  }

  /// Create an offline stream for one audio clip.
  OfflineStream createStream() {
    if (SherpaOnnxBindings.sherpaOnnxAudioTaggingCreateOfflineStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      throw Exception("Failed to create offline stream");
    }

    final p = SherpaOnnxBindings.sherpaOnnxAudioTaggingCreateOfflineStream
            ?.call(ptr) ??
        nullptr;

    if (p == nullptr) {
      throw Exception("Failed to create offline stream");
    }

    return OfflineStream(ptr: p);
  }

  /// Compute the top [topK] events for [stream].
  List<AudioEvent> compute({required OfflineStream stream, required int topK}) {
    if (SherpaOnnxBindings.sherpaOnnxAudioTaggingCompute == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return <AudioEvent>[];
    }

    final pp = SherpaOnnxBindings.sherpaOnnxAudioTaggingCompute
            ?.call(ptr, stream.ptr, topK) ??
        nullptr;

    final ans = <AudioEvent>[];

    if (pp == nullptr) {
      return ans;
    }

    var i = 0;
    while (pp[i] != nullptr) {
      final p = pp[i];

      final name = p.ref.name.toDartString();
      final index = p.ref.index;
      final prob = p.ref.prob;
      final e = AudioEvent(name: name, index: index, prob: prob);
      ans.add(e);

      i += 1;
    }

    SherpaOnnxBindings.sherpaOnnxAudioTaggingFreeResults?.call(pp);

    return ans;
  }

  Pointer<SherpaOnnxAudioTagging> ptr;
  final AudioTaggingConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/feature_config.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation

/// Feature extraction settings shared by recognizers and keyword spotting.
///
/// In most cases the defaults of 16 kHz audio and 80-dimensional filterbank
/// features should match the model packages provided in the repository.
class FeatureConfig {
  const FeatureConfig({this.sampleRate = 16000, this.featureDim = 80});

  factory FeatureConfig.fromJson(Map<String, dynamic> json) {
    return FeatureConfig(
      sampleRate: json['sampleRate'] as int? ?? 16000,
      featureDim: json['featureDim'] as int? ?? 80,
    );
  }

  @override
  String toString() {
    return 'FeatureConfig(sampleRate: $sampleRate, featureDim: $featureDim)';
  }

  Map<String, dynamic> toJson() => {
        'sampleRate': sampleRate,
        'featureDim': featureDim,
      };

  final int sampleRate;
  final int featureDim;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/homophone_replacer_config.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation

/// Optional resources for homophone replacement during decoding.
///
/// Set [lexicon] and [ruleFsts] when using models or grammars that support
/// homophone-aware post-processing.
class HomophoneReplacerConfig {
  const HomophoneReplacerConfig(
      {this.dictDir = '', this.lexicon = '', this.ruleFsts = ''});

  factory HomophoneReplacerConfig.fromJson(Map<String, dynamic> json) {
    return HomophoneReplacerConfig(
      lexicon: json['lexicon'] as String? ?? '',
      ruleFsts: json['ruleFsts'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'HomophoneReplacerConfig(lexicon: $lexicon, ruleFsts: $ruleFsts)';
  }

  Map<String, dynamic> toJson() => {
        'lexicon': lexicon,
        'ruleFsts': ruleFsts,
      };

  final String dictDir; // unused
  final String lexicon;
  final String ruleFsts;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/keyword_spotter.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:convert';
import 'dart:ffi';

import 'package:ffi/ffi.dart';

import './feature_config.dart';
import './online_stream.dart';
import './online_recognizer.dart';
import './sherpa_onnx_bindings.dart';
import './utils.dart';

/// Streaming keyword spotting.
///
/// See `dart-api-examples/keyword-spotter/` for end-to-end usage.
///
/// Example:
///
/// ```dart
/// final spotter = KeywordSpotter(
///   KeywordSpotterConfig(
///     model: onlineModelConfig,
///     keywordsFile: './keywords.txt',
///   ),
/// );
///
/// final stream = spotter.createStream();
/// stream.acceptWaveform(samples: chunk, sampleRate: 16000);
/// while (spotter.isReady(stream)) {
///   spotter.decode(stream);
/// }
/// print(spotter.getResult(stream).keyword);
/// ```
class KeywordSpotterConfig {
  const KeywordSpotterConfig({
    this.feat = const FeatureConfig(),
    required this.model,
    this.maxActivePaths = 4,
    this.numTrailingBlanks = 1,
    this.keywordsScore = 1.0,
    this.keywordsThreshold = 0.25,
    this.keywordsFile = '',
    this.keywordsBuf = '',
    this.keywordsBufSize = 0,
  });

  factory KeywordSpotterConfig.fromJson(Map<String, dynamic> json) {
    return KeywordSpotterConfig(
      feat: json['feat'] != null
          ? FeatureConfig.fromJson(json['feat'] as Map<String, dynamic>)
          : const FeatureConfig(),
      model: OnlineModelConfig.fromJson(json['model'] as Map<String, dynamic>),
      maxActivePaths: json['maxActivePaths'] as int? ?? 4,
      numTrailingBlanks: json['numTrailingBlanks'] as int? ?? 1,
      keywordsScore: (json['keywordsScore'] as num?)?.toDouble() ?? 1.0,
      keywordsThreshold:
          (json['keywordsThreshold'] as num?)?.toDouble() ?? 0.25,
      keywordsFile: json['keywordsFile'] as String? ?? '',
      keywordsBuf: json['keywordsBuf'] as String? ?? '',
      keywordsBufSize: json['keywordsBufSize'] as int? ?? 0,
    );
  }

  @override
  String toString() {
    return 'KeywordSpotterConfig(feat: $feat, model: $model, maxActivePaths: $maxActivePaths, numTrailingBlanks: $numTrailingBlanks, keywordsScore: $keywordsScore, keywordsThreshold: $keywordsThreshold, keywordsFile: $keywordsFile, keywordsBuf: $keywordsBuf, keywordsBufSize: $keywordsBufSize)';
  }

  Map<String, dynamic> toJson() => {
        'feat': feat.toJson(),
        'model': model.toJson(),
        'maxActivePaths': maxActivePaths,
        'numTrailingBlanks': numTrailingBlanks,
        'keywordsScore': keywordsScore,
        'keywordsThreshold': keywordsThreshold,
        'keywordsFile': keywordsFile,
        'keywordsBuf': keywordsBuf,
        'keywordsBufSize': keywordsBufSize,
      };

  final FeatureConfig feat;
  final OnlineModelConfig model;

  final int maxActivePaths;
  final int numTrailingBlanks;

  final double keywordsScore;
  final double keywordsThreshold;
  final String keywordsFile;
  final String keywordsBuf;
  final int keywordsBufSize;
}

/// Result returned by [KeywordSpotter.getResult].
class KeywordResult {
  KeywordResult({required this.keyword});

  factory KeywordResult.fromJson(Map<String, dynamic> json) {
    return KeywordResult(
      keyword: json['keyword'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'KeywordResult(keyword: $keyword)';
  }

  Map<String, dynamic> toJson() => {
        'keyword': keyword,
      };

  final String keyword;
}

/// Streaming keyword spotter.
class KeywordSpotter {
  KeywordSpotter.fromPtr({required this.ptr, required this.config});

  KeywordSpotter._({required this.ptr, required this.config});

  /// Create a keyword spotter from [config].
  factory KeywordSpotter(KeywordSpotterConfig config) {
    final c = calloc<SherpaOnnxKeywordSpotterConfig>();
    c.ref.feat.sampleRate = config.feat.sampleRate;
    c.ref.feat.featureDim = config.feat.featureDim;

    // transducer
    c.ref.model.transducer.encoder =
        config.model.transducer.encoder.toNativeUtf8();
    c.ref.model.transducer.decoder =
        config.model.transducer.decoder.toNativeUtf8();
    c.ref.model.transducer.joiner =
        config.model.transducer.joiner.toNativeUtf8();

    // paraformer
    c.ref.model.paraformer.encoder =
        config.model.paraformer.encoder.toNativeUtf8();
    c.ref.model.paraformer.decoder =
        config.model.paraformer.decoder.toNativeUtf8();

    // zipformer2Ctc
    c.ref.model.zipformer2Ctc.model =
        config.model.zipformer2Ctc.model.toNativeUtf8();

    // nemoCtc
    c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8();

    c.ref.model.tokens = config.model.tokens.toNativeUtf8();
    c.ref.model.numThreads = config.model.numThreads;
    c.ref.model.provider = config.model.provider.toNativeUtf8();
    c.ref.model.debug = config.model.debug ? 1 : 0;
    c.ref.model.modelType = config.model.modelType.toNativeUtf8();
    c.ref.model.modelingUnit = config.model.modelingUnit.toNativeUtf8();
    c.ref.model.bpeVocab = config.model.bpeVocab.toNativeUtf8();

    c.ref.maxActivePaths = config.maxActivePaths;
    c.ref.numTrailingBlanks = config.numTrailingBlanks;
    c.ref.keywordsScore = config.keywordsScore;
    c.ref.keywordsThreshold = config.keywordsThreshold;
    c.ref.keywordsFile = config.keywordsFile.toNativeUtf8();
    c.ref.keywordsBuf = config.keywordsBuf.toNativeUtf8();
    c.ref.keywordsBufSize = config.keywordsBufSize;

    if (SherpaOnnxBindings.createKeywordSpotter == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final ptr = SherpaOnnxBindings.createKeywordSpotter?.call(c) ?? nullptr;

    calloc.free(c.ref.keywordsBuf);
    calloc.free(c.ref.keywordsFile);
    calloc.free(c.ref.model.bpeVocab);
    calloc.free(c.ref.model.modelingUnit);
    calloc.free(c.ref.model.modelType);
    calloc.free(c.ref.model.provider);
    calloc.free(c.ref.model.tokens);
    calloc.free(c.ref.model.nemoCtc.model);
    calloc.free(c.ref.model.zipformer2Ctc.model);
    calloc.free(c.ref.model.paraformer.encoder);
    calloc.free(c.ref.model.paraformer.decoder);

    calloc.free(c.ref.model.transducer.encoder);
    calloc.free(c.ref.model.transducer.decoder);
    calloc.free(c.ref.model.transducer.joiner);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception("Failed to create kws. Please check your config");
    }

    return KeywordSpotter._(ptr: ptr, config: config);
  }

  /// Release the native keyword spotter.
  void free() {
    if (SherpaOnnxBindings.destroyKeywordSpotter == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroyKeywordSpotter?.call(ptr);
    ptr = nullptr;
  }

  /// Create a streaming input stream.
  ///
  /// If [keywords] is provided, it overrides the configured keywords for that
  /// stream.
  OnlineStream createStream({String keywords = ''}) {
    if (keywords == '') {
      if (SherpaOnnxBindings.createKeywordStream == null) {
        throw Exception("Please initialize sherpa-onnx first");
      }
    } else {
      if (SherpaOnnxBindings.createKeywordStreamWithKeywords == null) {
        throw Exception("Please initialize sherpa-onnx first");
      }
    }

    if (ptr == nullptr) {
      throw Exception("Failed to create online stream");
    }

    if (keywords == '') {
      final p = SherpaOnnxBindings.createKeywordStream?.call(ptr) ?? nullptr;
      if (p == nullptr) {
        throw Exception("Failed to create online stream");
      }
      return OnlineStream(ptr: p);
    }

    final utf8 = keywords.toNativeUtf8();
    final p =
        SherpaOnnxBindings.createKeywordStreamWithKeywords?.call(ptr, utf8) ??
            nullptr;
    calloc.free(utf8);

    if (p == nullptr) {
      throw Exception("Failed to create online stream");
    }

    return OnlineStream(ptr: p);
  }

  /// Return `true` if [stream] has enough audio for another decode step.
  bool isReady(OnlineStream stream) {
    if (SherpaOnnxBindings.isKeywordStreamReady == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return false;
    }

    int ready =
        SherpaOnnxBindings.isKeywordStreamReady?.call(ptr, stream.ptr) ?? 0;

    return ready == 1;
  }

  /// Fetch the current keyword spotting result for [stream].
  KeywordResult getResult(OnlineStream stream) {
    if (SherpaOnnxBindings.getKeywordResultAsJson == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return KeywordResult(keyword: '');
    }

    final json =
        SherpaOnnxBindings.getKeywordResultAsJson?.call(ptr, stream.ptr) ??
            nullptr;
    if (json == nullptr) {
      return KeywordResult(keyword: '');
    }

    final parsedJson = jsonDecode(toDartString(json));

    SherpaOnnxBindings.freeKeywordResultJson?.call(json);

    return KeywordResult(
      keyword: parsedJson['keyword'],
    );
  }

  /// Decode one incremental step for [stream].
  void decode(OnlineStream stream) {
    if (SherpaOnnxBindings.decodeKeywordStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.decodeKeywordStream?.call(ptr, stream.ptr);
  }

  /// Reset the internal state for [stream].
  void reset(OnlineStream stream) {
    if (SherpaOnnxBindings.resetKeywordStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.resetKeywordStream?.call(ptr, stream.ptr);
  }

  Pointer<SherpaOnnxKeywordSpotter> ptr;
  KeywordSpotterConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/offline_punctuation.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';

/// Offline punctuation restoration.
///
/// This is intended for complete text strings when you want one-shot
/// punctuation insertion. See `dart-api-examples/add-punctuations/`.
class OfflinePunctuationModelConfig {
  OfflinePunctuationModelConfig(
      {required this.ctTransformer,
      this.numThreads = 1,
      this.provider = 'cpu',
      this.debug = true});

  factory OfflinePunctuationModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflinePunctuationModelConfig(
      ctTransformer: json['ctTransformer'] as String,
      numThreads: json['numThreads'] as int? ?? 1,
      provider: json['provider'] as String? ?? 'cpu',
      debug: json['debug'] as bool? ?? true,
    );
  }

  @override
  String toString() {
    return 'OfflinePunctuationModelConfig(ctTransformer: $ctTransformer, numThreads: $numThreads, provider: $provider, debug: $debug)';
  }

  Map<String, dynamic> toJson() => {
        'ctTransformer': ctTransformer,
        'numThreads': numThreads,
        'provider': provider,
        'debug': debug,
      };

  final String ctTransformer;
  final int numThreads;
  final String provider;
  final bool debug;
}

/// Top-level configuration for [OfflinePunctuation].
class OfflinePunctuationConfig {
  OfflinePunctuationConfig({
    required this.model,
  });

  factory OfflinePunctuationConfig.fromJson(Map<String, dynamic> json) {
    return OfflinePunctuationConfig(
      model: OfflinePunctuationModelConfig.fromJson(
          json['model'] as Map<String, dynamic>),
    );
  }

  @override
  String toString() {
    return 'OfflinePunctuationConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model.toJson(),
      };

  final OfflinePunctuationModelConfig model;
}

/// Offline punctuation restorer.
class OfflinePunctuation {
  OfflinePunctuation.fromPtr({required this.ptr, required this.config});

  OfflinePunctuation._({required this.ptr, required this.config});

  /// Create an offline punctuator from [config].
  factory OfflinePunctuation({required OfflinePunctuationConfig config}) {
    if (SherpaOnnxBindings.sherpaOnnxCreateOfflinePunctuation == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final c = calloc<SherpaOnnxOfflinePunctuationConfig>();

    final ctTransformerPtr = config.model.ctTransformer.toNativeUtf8();
    c.ref.model.ctTransformer = ctTransformerPtr;
    c.ref.model.numThreads = config.model.numThreads;
    c.ref.model.debug = config.model.debug ? 1 : 0;

    final providerPtr = config.model.provider.toNativeUtf8();
    c.ref.model.provider = providerPtr;

    final ptr =
        SherpaOnnxBindings.sherpaOnnxCreateOfflinePunctuation?.call(c) ??
            nullptr;

    calloc.free(providerPtr);
    calloc.free(ctTransformerPtr);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception(
          "Failed to create offline punctuation. Please check your config");
    }

    return OfflinePunctuation._(ptr: ptr, config: config);
  }

  /// Release the native punctuator.
  void free() {
    if (SherpaOnnxBindings.sherpaOnnxDestroyOfflinePunctuation == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.sherpaOnnxDestroyOfflinePunctuation?.call(ptr);
    ptr = nullptr;
  }

  /// Add punctuation to [text].
  String addPunct(String text) {
    if (SherpaOnnxBindings.sherpaOfflinePunctuationAddPunct == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return '';
    }

    final textPtr = text.toNativeUtf8();

    final p = SherpaOnnxBindings.sherpaOfflinePunctuationAddPunct
            ?.call(ptr, textPtr) ??
        nullptr;

    calloc.free(textPtr);

    if (p == nullptr) {
      return '';
    }

    final ans = p.toDartString();

    SherpaOnnxBindings.sherpaOfflinePunctuationFreeText?.call(p);

    return ans;
  }

  Pointer<SherpaOnnxOfflinePunctuation> ptr;
  final OfflinePunctuationConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/offline_recognizer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:convert';
import 'dart:ffi';

import 'package:ffi/ffi.dart';

import './feature_config.dart';
import './homophone_replacer_config.dart';
import './offline_stream.dart';
import './sherpa_onnx_bindings.dart';
import './utils.dart';

/// Offline speech recognition.
///
/// This module covers non-streaming ASR model families such as transducer,
/// Paraformer, Whisper, SenseVoice, Moonshine, Canary, Fire-Red-ASR, WeNet,
/// Omnilingual-ASR, TeleSpeech-CTC, FunASR-Nano, and several CTC variants.
///
/// See `dart-api-examples/non-streaming-asr/bin/` for concrete usage,
/// including `sense-voice.dart`, `whisper.dart`, `nemo-transducer.dart`,
/// `moonshine_v2.dart`, and `fire-red-asr-ctc.dart`.
///
/// Example:
///
/// ```dart
/// final whisper = OfflineWhisperModelConfig(
///   encoder: './sherpa-onnx-whisper-tiny/encoder.int8.onnx',
///   decoder: './sherpa-onnx-whisper-tiny/decoder.int8.onnx',
/// );
///
/// final model = OfflineModelConfig(
///   whisper: whisper,
///   tokens: './sherpa-onnx-whisper-tiny/tokens.txt',
///   modelType: 'whisper',
///   numThreads: 1,
/// );
///
/// final recognizer = OfflineRecognizer(OfflineRecognizerConfig(model: model));
/// final wave = readWave('./test.wav');
/// final stream = recognizer.createStream();
/// stream.acceptWaveform(samples: wave.samples, sampleRate: wave.sampleRate);
/// recognizer.decode(stream);
/// print(recognizer.getResult(stream).text);
/// stream.free();
/// recognizer.free();
/// ```

/// Model files for an offline transducer recognizer.
///
/// This family is also used by NeMo Parakeet TDT-style examples.
class OfflineTransducerModelConfig {
  const OfflineTransducerModelConfig({
    this.encoder = '',
    this.decoder = '',
    this.joiner = '',
  });

  factory OfflineTransducerModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTransducerModelConfig(
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
      joiner: json['joiner'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineTransducerModelConfig(encoder: $encoder, decoder: $decoder, joiner: $joiner)';
  }

  Map<String, dynamic> toJson() => {
    'encoder': encoder,
    'decoder': decoder,
    'joiner': joiner,
  };

  final String encoder;
  final String decoder;
  final String joiner;
}

/// Model files for an offline Paraformer recognizer.
class OfflineParaformerModelConfig {
  const OfflineParaformerModelConfig({this.model = ''});

  factory OfflineParaformerModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineParaformerModelConfig(model: json['model'] as String? ?? '');
  }

  @override
  String toString() {
    return 'OfflineParaformerModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files for an offline NeMo CTC recognizer.
class OfflineNemoEncDecCtcModelConfig {
  const OfflineNemoEncDecCtcModelConfig({this.model = ''});

  factory OfflineNemoEncDecCtcModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineNemoEncDecCtcModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineNemoEncDecCtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files for an offline Dolphin recognizer.
class OfflineDolphinModelConfig {
  const OfflineDolphinModelConfig({this.model = ''});

  factory OfflineDolphinModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineDolphinModelConfig(model: json['model'] as String? ?? '');
  }

  @override
  String toString() {
    return 'OfflineDolphinModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files for an offline Zipformer CTC recognizer.
class OfflineZipformerCtcModelConfig {
  const OfflineZipformerCtcModelConfig({this.model = ''});

  factory OfflineZipformerCtcModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineZipformerCtcModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineZipformerCtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files for an offline WeNet CTC recognizer.
class OfflineWenetCtcModelConfig {
  const OfflineWenetCtcModelConfig({this.model = ''});

  factory OfflineWenetCtcModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineWenetCtcModelConfig(model: json['model'] as String? ?? '');
  }

  @override
  String toString() {
    return 'OfflineWenetCtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files for the omnilingual ASR CTC recognizer.
class OfflineOmnilingualAsrCtcModelConfig {
  const OfflineOmnilingualAsrCtcModelConfig({this.model = ''});

  factory OfflineOmnilingualAsrCtcModelConfig.fromJson(
    Map<String, dynamic> json,
  ) {
    return OfflineOmnilingualAsrCtcModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineOmnilingualAsrCtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files for the MedASR CTC recognizer.
class OfflineMedAsrCtcModelConfig {
  const OfflineMedAsrCtcModelConfig({this.model = ''});

  factory OfflineMedAsrCtcModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineMedAsrCtcModelConfig(model: json['model'] as String? ?? '');
  }

  @override
  String toString() {
    return 'OfflineMedAsrCtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files for the Fire-Red-ASR CTC recognizer.
class OfflineFireRedAsrCtcModelConfig {
  const OfflineFireRedAsrCtcModelConfig({this.model = ''});

  factory OfflineFireRedAsrCtcModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineFireRedAsrCtcModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineFireRedAsrCtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files and prompt settings for FunASR-Nano.
class OfflineFunAsrNanoModelConfig {
  const OfflineFunAsrNanoModelConfig({
    this.encoderAdaptor = '',
    this.llm = '',
    this.embedding = '',
    this.tokenizer = '',
    this.systemPrompt = 'You are a helpful assistant.',
    this.userPrompt = '语音转写：',
    this.maxNewTokens = 512,
    this.temperature = 1e-6,
    this.topP = 0.8,
    this.seed = 42,
    this.language = '',
    this.itn = 1,
    this.hotwords = '',
  });

  factory OfflineFunAsrNanoModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineFunAsrNanoModelConfig(
      encoderAdaptor: json['encoderAdaptor'] as String? ?? '',
      llm: json['llm'] as String? ?? '',
      embedding: json['embedding'] as String? ?? '',
      tokenizer: json['tokenizer'] as String? ?? '',
      systemPrompt: json['systemPrompt'] as String? ?? '',
      userPrompt: json['userPrompt'] as String? ?? '',
      maxNewTokens: json['maxNewTokens'] as int? ?? 512,
      temperature: (json['temperature'] as num?)?.toDouble() ?? 1e-6,
      topP: (json['topP'] as num?)?.toDouble() ?? 0.8,
      seed: json['seed'] as int? ?? 42,
      language: json['language'] as String? ?? '',
      itn: json['itn'] as int? ?? 1,
      hotwords: json['hotwords'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineFunAsrNanoModelConfig(encoderAdaptor: $encoderAdaptor, llm: $llm, embedding: $embedding, tokenizer: $tokenizer, systemPrompt: $systemPrompt, userPrompt: $userPrompt, maxNewTokens: $maxNewTokens, temperature: $temperature, topP: $topP, seed: $seed, language: $language, itn: $itn, hotwords: $hotwords)';
  }

  Map<String, dynamic> toJson() => {
    'encoderAdaptor': encoderAdaptor,
    'llm': llm,
    'embedding': embedding,
    'tokenizer': tokenizer,
    'systemPrompt': systemPrompt,
    'userPrompt': userPrompt,
    'maxNewTokens': maxNewTokens,
    'temperature': temperature,
    'topP': topP,
    'seed': seed,
    'language': language,
    'itn': itn,
    'hotwords': hotwords,
  };

  final String encoderAdaptor;
  final String llm;
  final String embedding;
  final String tokenizer;
  final String systemPrompt;
  final String userPrompt;
  final int maxNewTokens;
  final double temperature;
  final double topP;
  final int seed;
  final String language;
  final int itn;
  final String hotwords;
}

/// Model files and options for an offline Whisper recognizer.
class OfflineWhisperModelConfig {
  const OfflineWhisperModelConfig({
    this.encoder = '',
    this.decoder = '',
    this.language = '',
    this.task = '',
    this.tailPaddings = -1,
    this.enableTokenTimestamps = false,
    this.enableSegmentTimestamps = false,
  });

  factory OfflineWhisperModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineWhisperModelConfig(
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
      language: json['language'] as String? ?? '',
      task: json['task'] as String? ?? '',
      tailPaddings: json['tailPaddings'] as int? ?? -1,
      enableTokenTimestamps: json['enableTokenTimestamps'] as bool? ?? false,
      enableSegmentTimestamps:
          json['enableSegmentTimestamps'] as bool? ?? false,
    );
  }

  @override
  String toString() {
    return 'OfflineWhisperModelConfig(encoder: $encoder, decoder: $decoder, language: $language, task: $task, tailPaddings: $tailPaddings, enableTokenTimestamps: $enableTokenTimestamps, enableSegmentTimestamps: $enableSegmentTimestamps)';
  }

  Map<String, dynamic> toJson() => {
    'encoder': encoder,
    'decoder': decoder,
    'language': language,
    'task': task,
    'tailPaddings': tailPaddings,
    'enableTokenTimestamps': enableTokenTimestamps,
    'enableSegmentTimestamps': enableSegmentTimestamps,
  };

  final String encoder;
  final String decoder;
  final String language;
  final String task;
  final int tailPaddings;
  final bool enableTokenTimestamps;
  final bool enableSegmentTimestamps;
}

/// Model files and translation options for NeMo Canary.
class OfflineCanaryModelConfig {
  const OfflineCanaryModelConfig({
    this.encoder = '',
    this.decoder = '',
    this.srcLang = 'en',
    this.tgtLang = 'en',
    this.usePnc = true,
  });

  factory OfflineCanaryModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineCanaryModelConfig(
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
      srcLang: json['srcLang'] as String? ?? 'en',
      tgtLang: json['tgtLang'] as String? ?? 'en',
      usePnc: json['usePnc'] as bool? ?? true,
    );
  }

  @override
  String toString() {
    return 'OfflineCanaryModelConfig(encoder: $encoder, decoder: $decoder, srcLang: $srcLang, tgtLang: $tgtLang, usePnc: $usePnc)';
  }

  Map<String, dynamic> toJson() => {
    'encoder': encoder,
    'decoder': decoder,
    'srcLang': srcLang,
    'tgtLang': tgtLang,
    'usePnc': usePnc,
  };

  final String encoder;
  final String decoder;
  final String srcLang;
  final String tgtLang;
  final bool usePnc;
}

/// Model files for the Fire-Red-ASR transducer recognizer.
class OfflineFireRedAsrModelConfig {
  const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''});

  factory OfflineFireRedAsrModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineFireRedAsrModelConfig(
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineFireRedAsrModelConfig(encoder: $encoder, decoder: $decoder)';
  }

  Map<String, dynamic> toJson() => {'encoder': encoder, 'decoder': decoder};

  final String encoder;
  final String decoder;
}

// For Moonshine v1, you need 4 models:
//  - preprocessor, encoder, uncachedDecoder, cachedDecoder
//
// For Moonshine v2, you need 2 models:
//  - encoder, mergedDecoder
/// Model files for Moonshine v1 or v2.
class OfflineMoonshineModelConfig {
  const OfflineMoonshineModelConfig({
    this.preprocessor = '',
    this.encoder = '',
    this.uncachedDecoder = '',
    this.cachedDecoder = '',
    this.mergedDecoder = '',
  });

  factory OfflineMoonshineModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineMoonshineModelConfig(
      preprocessor: json['preprocessor'] as String? ?? '',
      encoder: json['encoder'] as String? ?? '',
      uncachedDecoder: json['uncachedDecoder'] as String? ?? '',
      cachedDecoder: json['cachedDecoder'] as String? ?? '',
      mergedDecoder: json['mergedDecoder'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineMoonshineModelConfig(preprocessor: $preprocessor, encoder: $encoder, uncachedDecoder: $uncachedDecoder, cachedDecoder: $cachedDecoder, mergedDecoder: $mergedDecoder)';
  }

  Map<String, dynamic> toJson() => {
    'preprocessor': preprocessor,
    'encoder': encoder,
    'uncachedDecoder': uncachedDecoder,
    'cachedDecoder': cachedDecoder,
    'mergedDecoder': mergedDecoder,
  };

  final String preprocessor;
  final String encoder;
  final String uncachedDecoder;
  final String cachedDecoder;
  final String mergedDecoder;
}

/// Model files for an offline TDNN recognizer.
class OfflineTdnnModelConfig {
  const OfflineTdnnModelConfig({this.model = ''});

  factory OfflineTdnnModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTdnnModelConfig(model: json['model'] as String? ?? '');
  }

  @override
  String toString() {
    return 'OfflineTdnnModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {'model': model};

  final String model;
}

/// Model files and options for SenseVoice.
///
/// In the examples, this is typically paired with the
/// `sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8` package.
class OfflineSenseVoiceModelConfig {
  const OfflineSenseVoiceModelConfig({
    this.model = '',
    this.language = '',
    this.useInverseTextNormalization = false,
  });

  factory OfflineSenseVoiceModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineSenseVoiceModelConfig(
      model: json['model'] as String? ?? '',
      language: json['language'] as String? ?? '',
      useInverseTextNormalization:
          json['useInverseTextNormalization'] as bool? ?? false,
    );
  }

  @override
  String toString() {
    return 'OfflineSenseVoiceModelConfig(model: $model, language: $language, useInverseTextNormalization: $useInverseTextNormalization)';
  }

  Map<String, dynamic> toJson() => {
    'model': model,
    'language': language,
    'useInverseTextNormalization': useInverseTextNormalization,
  };

  final String model;
  final String language;
  final bool useInverseTextNormalization;
}

/// Optional external language model settings for offline ASR.
class OfflineLMConfig {
  const OfflineLMConfig({this.model = '', this.scale = 1.0});

  factory OfflineLMConfig.fromJson(Map<String, dynamic> json) {
    return OfflineLMConfig(
      model: json['model'] as String? ?? '',
      scale: (json['scale'] as num?)?.toDouble() ?? 1.0,
    );
  }

  @override
  String toString() {
    return 'OfflineLMConfig(model: $model, scale: $scale)';
  }

  Map<String, dynamic> toJson() => {'model': model, 'scale': scale};

  final String model;
  final double scale;
}

/// Aggregate model configuration for offline recognition.
///
/// In typical use, configure exactly one model family and set the shared
/// options such as [tokens], [provider], and [numThreads].
///
/// For NeMo Parakeet-style transducer models, set [modelType] to
/// `nemo_transducer`, matching the repository examples.
class OfflineModelConfig {
  const OfflineModelConfig({
    this.transducer = const OfflineTransducerModelConfig(),
    this.paraformer = const OfflineParaformerModelConfig(),
    this.nemoCtc = const OfflineNemoEncDecCtcModelConfig(),
    this.whisper = const OfflineWhisperModelConfig(),
    this.tdnn = const OfflineTdnnModelConfig(),
    this.senseVoice = const OfflineSenseVoiceModelConfig(),
    this.moonshine = const OfflineMoonshineModelConfig(),
    this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
    this.dolphin = const OfflineDolphinModelConfig(),
    this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
    this.canary = const OfflineCanaryModelConfig(),
    this.wenetCtc = const OfflineWenetCtcModelConfig(),
    this.omnilingual = const OfflineOmnilingualAsrCtcModelConfig(),
    this.medasr = const OfflineMedAsrCtcModelConfig(),
    this.funasrNano = const OfflineFunAsrNanoModelConfig(),
    this.fireRedAsrCtc = const OfflineFireRedAsrCtcModelConfig(),
    required this.tokens,
    this.numThreads = 1,
    this.debug = true,
    this.provider = 'cpu',
    this.modelType = '',
    this.modelingUnit = '',
    this.bpeVocab = '',
    this.telespeechCtc = '',
  });

  factory OfflineModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineModelConfig(
      transducer: json['transducer'] != null
          ? OfflineTransducerModelConfig.fromJson(
              json['transducer'] as Map<String, dynamic>,
            )
          : const OfflineTransducerModelConfig(),
      paraformer: json['paraformer'] != null
          ? OfflineParaformerModelConfig.fromJson(
              json['paraformer'] as Map<String, dynamic>,
            )
          : const OfflineParaformerModelConfig(),
      nemoCtc: json['nemoCtc'] != null
          ? OfflineNemoEncDecCtcModelConfig.fromJson(
              json['nemoCtc'] as Map<String, dynamic>,
            )
          : const OfflineNemoEncDecCtcModelConfig(),
      whisper: json['whisper'] != null
          ? OfflineWhisperModelConfig.fromJson(
              json['whisper'] as Map<String, dynamic>,
            )
          : const OfflineWhisperModelConfig(),
      tdnn: json['tdnn'] != null
          ? OfflineTdnnModelConfig.fromJson(
              json['tdnn'] as Map<String, dynamic>,
            )
          : const OfflineTdnnModelConfig(),
      senseVoice: json['senseVoice'] != null
          ? OfflineSenseVoiceModelConfig.fromJson(
              json['senseVoice'] as Map<String, dynamic>,
            )
          : const OfflineSenseVoiceModelConfig(),
      moonshine: json['moonshine'] != null
          ? OfflineMoonshineModelConfig.fromJson(
              json['moonshine'] as Map<String, dynamic>,
            )
          : const OfflineMoonshineModelConfig(),
      fireRedAsr: json['fireRedAsr'] != null
          ? OfflineFireRedAsrModelConfig.fromJson(
              json['fireRedAsr'] as Map<String, dynamic>,
            )
          : const OfflineFireRedAsrModelConfig(),
      dolphin: json['dolphin'] != null
          ? OfflineDolphinModelConfig.fromJson(
              json['dolphin'] as Map<String, dynamic>,
            )
          : const OfflineDolphinModelConfig(),
      zipformerCtc: json['zipformerCtc'] != null
          ? OfflineZipformerCtcModelConfig.fromJson(
              json['zipformerCtc'] as Map<String, dynamic>,
            )
          : const OfflineZipformerCtcModelConfig(),
      canary: json['canary'] != null
          ? OfflineCanaryModelConfig.fromJson(
              json['canary'] as Map<String, dynamic>,
            )
          : const OfflineCanaryModelConfig(),
      wenetCtc: json['wenetCtc'] != null
          ? OfflineWenetCtcModelConfig.fromJson(
              json['wenetCtc'] as Map<String, dynamic>,
            )
          : const OfflineWenetCtcModelConfig(),
      omnilingual: json['omnilingual'] != null
          ? OfflineOmnilingualAsrCtcModelConfig.fromJson(
              json['omnilingual'] as Map<String, dynamic>,
            )
          : const OfflineOmnilingualAsrCtcModelConfig(),
      medasr: json['medasr'] != null
          ? OfflineMedAsrCtcModelConfig.fromJson(
              json['medasr'] as Map<String, dynamic>,
            )
          : const OfflineMedAsrCtcModelConfig(),
      funasrNano: json['funasrNano'] != null
          ? OfflineFunAsrNanoModelConfig.fromJson(
              json['funasrNano'] as Map<String, dynamic>,
            )
          : const OfflineFunAsrNanoModelConfig(),
      fireRedAsrCtc: json['fireRedAsrCtc'] != null
          ? OfflineFireRedAsrCtcModelConfig.fromJson(
              json['fireRedAsrCtc'] as Map<String, dynamic>,
            )
          : const OfflineFireRedAsrCtcModelConfig(),
      tokens: json['tokens'] as String,
      numThreads: json['numThreads'] as int? ?? 1,
      debug: json['debug'] as bool? ?? true,
      provider: json['provider'] as String? ?? 'cpu',
      modelType: json['modelType'] as String? ?? '',
      modelingUnit: json['modelingUnit'] as String? ?? '',
      bpeVocab: json['bpeVocab'] as String? ?? '',
      telespeechCtc: json['telespeechCtc'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, wenetCtc: $wenetCtc, omnilingual: $omnilingual, medasr: $medasr, funasrNano: $funasrNano, fireRedAsrCtc: $fireRedAsrCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
  }

  Map<String, dynamic> toJson() => {
    'transducer': transducer.toJson(),
    'paraformer': paraformer.toJson(),
    'nemoCtc': nemoCtc.toJson(),
    'whisper': whisper.toJson(),
    'tdnn': tdnn.toJson(),
    'senseVoice': senseVoice.toJson(),
    'moonshine': moonshine.toJson(),
    'fireRedAsr': fireRedAsr.toJson(),
    'dolphin': dolphin.toJson(),
    'zipformerCtc': zipformerCtc.toJson(),
    'canary': canary.toJson(),
    'wenetCtc': wenetCtc.toJson(),
    'omnilingual': omnilingual.toJson(),
    'medasr': medasr.toJson(),
    'funasrNano': funasrNano.toJson(),
    'fireRedAsrCtc': fireRedAsrCtc.toJson(),
    'tokens': tokens,
    'numThreads': numThreads,
    'debug': debug,
    'provider': provider,
    'modelType': modelType,
    'modelingUnit': modelingUnit,
    'bpeVocab': bpeVocab,
    'telespeechCtc': telespeechCtc,
  };

  final OfflineTransducerModelConfig transducer;
  final OfflineParaformerModelConfig paraformer;
  final OfflineNemoEncDecCtcModelConfig nemoCtc;
  final OfflineWhisperModelConfig whisper;
  final OfflineTdnnModelConfig tdnn;
  final OfflineSenseVoiceModelConfig senseVoice;
  final OfflineMoonshineModelConfig moonshine;
  final OfflineFireRedAsrModelConfig fireRedAsr;
  final OfflineDolphinModelConfig dolphin;
  final OfflineZipformerCtcModelConfig zipformerCtc;
  final OfflineCanaryModelConfig canary;
  final OfflineWenetCtcModelConfig wenetCtc;
  final OfflineOmnilingualAsrCtcModelConfig omnilingual;
  final OfflineMedAsrCtcModelConfig medasr;
  final OfflineFunAsrNanoModelConfig funasrNano;
  final OfflineFireRedAsrCtcModelConfig fireRedAsrCtc;

  final String tokens;
  final int numThreads;
  final bool debug;
  final String provider;
  final String modelType;
  final String modelingUnit;
  final String bpeVocab;
  final String telespeechCtc;
}

/// Top-level configuration for [OfflineRecognizer].
///
/// This combines feature extraction, the selected model family, optional
/// language model settings, hotwords, grammar resources, and optional
/// homophone replacement resources.
class OfflineRecognizerConfig {
  const OfflineRecognizerConfig({
    this.feat = const FeatureConfig(),
    required this.model,
    this.lm = const OfflineLMConfig(),
    this.decodingMethod = 'greedy_search',
    this.maxActivePaths = 4,
    this.hotwordsFile = '',
    this.hotwordsScore = 1.5,
    this.ruleFsts = '',
    this.ruleFars = '',
    this.blankPenalty = 0.0,
    this.hr = const HomophoneReplacerConfig(),
  });

  factory OfflineRecognizerConfig.fromJson(Map<String, dynamic> json) {
    return OfflineRecognizerConfig(
      feat: json['feat'] != null
          ? FeatureConfig.fromJson(json['feat'] as Map<String, dynamic>)
          : const FeatureConfig(),
      model: OfflineModelConfig.fromJson(json['model'] as Map<String, dynamic>),
      lm: json['lm'] != null
          ? OfflineLMConfig.fromJson(json['lm'] as Map<String, dynamic>)
          : const OfflineLMConfig(),
      decodingMethod: json['decodingMethod'] as String? ?? 'greedy_search',
      maxActivePaths: json['maxActivePaths'] as int? ?? 4,
      hotwordsFile: json['hotwordsFile'] as String? ?? '',
      hotwordsScore: (json['hotwordsScore'] as num?)?.toDouble() ?? 1.5,
      ruleFsts: json['ruleFsts'] as String? ?? '',
      ruleFars: json['ruleFars'] as String? ?? '',
      blankPenalty: (json['blankPenalty'] as num?)?.toDouble() ?? 0.0,
      hr: HomophoneReplacerConfig.fromJson(json['hr'] as Map<String, dynamic>),
    );
  }

  @override
  String toString() {
    return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ruleFsts: $ruleFsts, ruleFars: $ruleFars, blankPenalty: $blankPenalty, hr: $hr)';
  }

  Map<String, dynamic> toJson() => {
    'feat': feat.toJson(),
    'model': model.toJson(),
    'lm': lm.toJson(),
    'decodingMethod': decodingMethod,
    'maxActivePaths': maxActivePaths,
    'hotwordsFile': hotwordsFile,
    'hotwordsScore': hotwordsScore,
    'ruleFsts': ruleFsts,
    'ruleFars': ruleFars,
    'blankPenalty': blankPenalty,
    'hr': hr.toJson(),
  };

  final FeatureConfig feat;
  final OfflineModelConfig model;
  final OfflineLMConfig lm;
  final String decodingMethod;

  final int maxActivePaths;

  final String hotwordsFile;

  final double hotwordsScore;

  final String ruleFsts;
  final String ruleFars;

  final double blankPenalty;
  final HomophoneReplacerConfig hr;
}

/// Recognition result returned by [OfflineRecognizer.getResult].
///
/// Some model families populate [lang], [emotion], or [event] in addition to
/// the decoded text and token timestamps.
class OfflineRecognizerResult {
  OfflineRecognizerResult({
    required this.text,
    required this.tokens,
    required this.timestamps,
    required this.lang,
    required this.emotion,
    required this.event,
  });

  factory OfflineRecognizerResult.fromJson(Map<String, dynamic> json) {
    return OfflineRecognizerResult(
      text: json['text'] as String? ?? '',
      tokens: (json['tokens'] as List?)?.map((e) => e as String).toList() ?? [],
      timestamps:
          (json['timestamps'] as List?)
              ?.map((e) => (e as num).toDouble())
              .toList() ??
          [],
      lang: json['lang'] as String? ?? '',
      emotion: json['emotion'] as String? ?? '',
      event: json['event'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineRecognizerResult(text: $text, tokens: $tokens, timestamps: $timestamps, lang: $lang, emotion: $emotion, event: $event)';
  }

  Map<String, dynamic> toJson() => {
    'text': text,
    'tokens': tokens,
    'timestamps': timestamps,
    'lang': lang,
    'emotion': emotion,
    'event': event,
  };

  final String text;
  final List<String> tokens;
  final List<double> timestamps;
  final String lang;
  final String emotion;
  final String event;
}

/// Offline speech recognizer.
///
/// Create one from an [OfflineRecognizerConfig], then create an
/// [OfflineStream], feed waveform samples, call [decode], and fetch the final
/// hypothesis with [getResult].
class OfflineRecognizer {
  OfflineRecognizer.fromPtr({required this.ptr, required this.config});

  OfflineRecognizer._({required this.ptr, required this.config});

  /// Release the native recognizer.
  void free() {
    if (SherpaOnnxBindings.destroyOfflineRecognizer == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroyOfflineRecognizer?.call(ptr);
    ptr = nullptr;
  }

  /// The user is responsible to call the OfflineRecognizer.free()
  /// method of the returned instance to avoid memory leak.

  /// Create a recognizer from [config].
  factory OfflineRecognizer(OfflineRecognizerConfig config) {
    final c = convertConfig(config);

    if (SherpaOnnxBindings.createOfflineRecognizer == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;

    if (ptr == nullptr) {
      throw Exception(
        "Failed to create offline recognizer. Please check your config",
      );
    }

    freeConfig(c);

    return OfflineRecognizer._(ptr: ptr, config: config);
  }

  /// Replace the runtime configuration.
  void setConfig(OfflineRecognizerConfig config) {
    if (SherpaOnnxBindings.offlineRecognizerSetConfig == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }

    final c = convertConfig(config);

    SherpaOnnxBindings.offlineRecognizerSetConfig?.call(ptr, c);

    freeConfig(c);
    // we don't update this.config
  }

  static Pointer<SherpaOnnxOfflineRecognizerConfig> convertConfig(
    OfflineRecognizerConfig config,
  ) {
    final c = calloc<SherpaOnnxOfflineRecognizerConfig>();

    c.ref.feat.sampleRate = config.feat.sampleRate;
    c.ref.feat.featureDim = config.feat.featureDim;

    // transducer
    c.ref.model.transducer.encoder = config.model.transducer.encoder
        .toNativeUtf8();
    c.ref.model.transducer.decoder = config.model.transducer.decoder
        .toNativeUtf8();
    c.ref.model.transducer.joiner = config.model.transducer.joiner
        .toNativeUtf8();

    // paraformer
    c.ref.model.paraformer.model = config.model.paraformer.model.toNativeUtf8();

    // nemoCtc
    c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8();

    // whisper
    c.ref.model.whisper.encoder = config.model.whisper.encoder.toNativeUtf8();

    c.ref.model.whisper.decoder = config.model.whisper.decoder.toNativeUtf8();

    c.ref.model.whisper.language = config.model.whisper.language.toNativeUtf8();

    c.ref.model.whisper.task = config.model.whisper.task.toNativeUtf8();

    c.ref.model.whisper.tailPaddings = config.model.whisper.tailPaddings;
    c.ref.model.whisper.enableTokenTimestamps =
        config.model.whisper.enableTokenTimestamps ? 1 : 0;
    c.ref.model.whisper.enableSegmentTimestamps =
        config.model.whisper.enableSegmentTimestamps ? 1 : 0;

    c.ref.model.tdnn.model = config.model.tdnn.model.toNativeUtf8();

    c.ref.model.senseVoice.model = config.model.senseVoice.model.toNativeUtf8();

    c.ref.model.senseVoice.language = config.model.senseVoice.language
        .toNativeUtf8();

    c.ref.model.senseVoice.useInverseTextNormalization =
        config.model.senseVoice.useInverseTextNormalization ? 1 : 0;

    c.ref.model.moonshine.preprocessor = config.model.moonshine.preprocessor
        .toNativeUtf8();
    c.ref.model.moonshine.encoder = config.model.moonshine.encoder
        .toNativeUtf8();
    c.ref.model.moonshine.uncachedDecoder = config
        .model
        .moonshine
        .uncachedDecoder
        .toNativeUtf8();
    c.ref.model.moonshine.cachedDecoder = config.model.moonshine.cachedDecoder
        .toNativeUtf8();
    c.ref.model.moonshine.mergedDecoder = config.model.moonshine.mergedDecoder
        .toNativeUtf8();

    // FireRedAsr
    c.ref.model.fireRedAsr.encoder = config.model.fireRedAsr.encoder
        .toNativeUtf8();
    c.ref.model.fireRedAsr.decoder = config.model.fireRedAsr.decoder
        .toNativeUtf8();

    c.ref.model.dolphin.model = config.model.dolphin.model.toNativeUtf8();
    c.ref.model.zipformerCtc.model = config.model.zipformerCtc.model
        .toNativeUtf8();

    c.ref.model.canary.encoder = config.model.canary.encoder.toNativeUtf8();
    c.ref.model.canary.decoder = config.model.canary.decoder.toNativeUtf8();
    c.ref.model.canary.srcLang = config.model.canary.srcLang.toNativeUtf8();
    c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8();
    c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0;

    c.ref.model.wenetCtc.model = config.model.wenetCtc.model.toNativeUtf8();
    c.ref.model.omnilingual.model = config.model.omnilingual.model
        .toNativeUtf8();
    c.ref.model.medasr.model = config.model.medasr.model.toNativeUtf8();

    c.ref.model.funasrNano.encoderAdaptor = config
        .model
        .funasrNano
        .encoderAdaptor
        .toNativeUtf8();
    c.ref.model.funasrNano.llm = config.model.funasrNano.llm.toNativeUtf8();
    c.ref.model.funasrNano.embedding = config.model.funasrNano.embedding
        .toNativeUtf8();
    c.ref.model.funasrNano.tokenizer = config.model.funasrNano.tokenizer
        .toNativeUtf8();
    c.ref.model.funasrNano.systemPrompt = config.model.funasrNano.systemPrompt
        .toNativeUtf8();
    c.ref.model.funasrNano.userPrompt = config.model.funasrNano.userPrompt
        .toNativeUtf8();
    c.ref.model.funasrNano.maxNewTokens = config.model.funasrNano.maxNewTokens;
    c.ref.model.funasrNano.temperature = config.model.funasrNano.temperature;
    c.ref.model.funasrNano.topP = config.model.funasrNano.topP;
    c.ref.model.funasrNano.seed = config.model.funasrNano.seed;
    c.ref.model.funasrNano.language = config.model.funasrNano.language
        .toNativeUtf8();
    c.ref.model.funasrNano.itn = config.model.funasrNano.itn;
    c.ref.model.funasrNano.hotwords = config.model.funasrNano.hotwords
        .toNativeUtf8();

    c.ref.model.fireRedAsrCtc.model = config.model.fireRedAsrCtc.model
        .toNativeUtf8();

    c.ref.model.tokens = config.model.tokens.toNativeUtf8();

    c.ref.model.numThreads = config.model.numThreads;
    c.ref.model.debug = config.model.debug ? 1 : 0;
    c.ref.model.provider = config.model.provider.toNativeUtf8();
    c.ref.model.modelType = config.model.modelType.toNativeUtf8();
    c.ref.model.modelingUnit = config.model.modelingUnit.toNativeUtf8();
    c.ref.model.bpeVocab = config.model.bpeVocab.toNativeUtf8();
    c.ref.model.telespeechCtc = config.model.telespeechCtc.toNativeUtf8();

    c.ref.lm.model = config.lm.model.toNativeUtf8();
    c.ref.lm.scale = config.lm.scale;

    c.ref.decodingMethod = config.decodingMethod.toNativeUtf8();
    c.ref.maxActivePaths = config.maxActivePaths;

    c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8();
    c.ref.hotwordsScore = config.hotwordsScore;

    c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
    c.ref.ruleFars = config.ruleFars.toNativeUtf8();

    c.ref.blankPenalty = config.blankPenalty;

    c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8();
    c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8();

    return c;
  }

  static void freeConfig(Pointer<SherpaOnnxOfflineRecognizerConfig> c) {
    calloc.free(c.ref.hr.lexicon);
    calloc.free(c.ref.hr.ruleFsts);
    calloc.free(c.ref.ruleFars);
    calloc.free(c.ref.ruleFsts);
    calloc.free(c.ref.hotwordsFile);
    calloc.free(c.ref.decodingMethod);
    calloc.free(c.ref.lm.model);
    calloc.free(c.ref.model.telespeechCtc);
    calloc.free(c.ref.model.bpeVocab);
    calloc.free(c.ref.model.modelingUnit);
    calloc.free(c.ref.model.modelType);
    calloc.free(c.ref.model.provider);
    calloc.free(c.ref.model.tokens);
    calloc.free(c.ref.model.fireRedAsrCtc.model);
    calloc.free(c.ref.model.funasrNano.hotwords);
    calloc.free(c.ref.model.funasrNano.language);
    calloc.free(c.ref.model.funasrNano.userPrompt);
    calloc.free(c.ref.model.funasrNano.systemPrompt);
    calloc.free(c.ref.model.funasrNano.tokenizer);
    calloc.free(c.ref.model.funasrNano.embedding);
    calloc.free(c.ref.model.funasrNano.llm);
    calloc.free(c.ref.model.funasrNano.encoderAdaptor);
    calloc.free(c.ref.model.medasr.model);
    calloc.free(c.ref.model.omnilingual.model);
    calloc.free(c.ref.model.wenetCtc.model);
    calloc.free(c.ref.model.canary.tgtLang);
    calloc.free(c.ref.model.canary.srcLang);
    calloc.free(c.ref.model.canary.decoder);
    calloc.free(c.ref.model.canary.encoder);
    calloc.free(c.ref.model.zipformerCtc.model);
    calloc.free(c.ref.model.dolphin.model);
    calloc.free(c.ref.model.fireRedAsr.decoder);
    calloc.free(c.ref.model.fireRedAsr.encoder);
    calloc.free(c.ref.model.moonshine.mergedDecoder);
    calloc.free(c.ref.model.moonshine.cachedDecoder);
    calloc.free(c.ref.model.moonshine.uncachedDecoder);
    calloc.free(c.ref.model.moonshine.encoder);
    calloc.free(c.ref.model.moonshine.preprocessor);
    calloc.free(c.ref.model.senseVoice.language);
    calloc.free(c.ref.model.senseVoice.model);
    calloc.free(c.ref.model.tdnn.model);
    calloc.free(c.ref.model.whisper.task);
    calloc.free(c.ref.model.whisper.language);
    calloc.free(c.ref.model.whisper.decoder);
    calloc.free(c.ref.model.whisper.encoder);
    calloc.free(c.ref.model.nemoCtc.model);
    calloc.free(c.ref.model.paraformer.model);
    calloc.free(c.ref.model.transducer.encoder);
    calloc.free(c.ref.model.transducer.decoder);
    calloc.free(c.ref.model.transducer.joiner);
    calloc.free(c);
  }

  /// The user has to invoke stream.free() on the returned instance
  /// to avoid memory leak
  /// Create an offline stream.
  OfflineStream createStream() {
    if (SherpaOnnxBindings.createOfflineStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      throw Exception("Failed to create offline stream");
    }

    final p = SherpaOnnxBindings.createOfflineStream?.call(ptr) ?? nullptr;

    if (p == nullptr) {
      throw Exception("Failed to create offline stream");
    }

    return OfflineStream(ptr: p);
  }

  /// Decode one stream.
  void decode(OfflineStream stream) {
    if (SherpaOnnxBindings.decodeOfflineStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return;
    }

    SherpaOnnxBindings.decodeOfflineStream?.call(ptr, stream.ptr);
  }

  /// Fetch the current recognition result for [stream].
  OfflineRecognizerResult getResult(OfflineStream stream) {
    if (SherpaOnnxBindings.getOfflineStreamResultAsJson == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return OfflineRecognizerResult(
        text: '',
        tokens: [],
        timestamps: [],
        lang: '',
        emotion: '',
        event: '',
      );
    }

    final json =
        SherpaOnnxBindings.getOfflineStreamResultAsJson?.call(stream.ptr) ??
        nullptr;
    if (json == nullptr) {
      return OfflineRecognizerResult(
        text: '',
        tokens: [],
        timestamps: [],
        lang: '',
        emotion: '',
        event: '',
      );
    }

    final parsedJson = jsonDecode(toDartString(json));

    SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json);

    return OfflineRecognizerResult(
      text: parsedJson['text'],
      tokens: List<String>.from(parsedJson['tokens']),
      timestamps: List<double>.from(parsedJson['timestamps']),
      lang: parsedJson['lang'],
      emotion: parsedJson['emotion'],
      event: parsedJson['event'],
    );
  }

  Pointer<SherpaOnnxOfflineRecognizer> ptr;
  OfflineRecognizerConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';

import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';
import './speaker_identification.dart';

/// Offline speaker diarization.
///
/// This module combines segmentation, speaker embedding extraction, and
/// clustering to assign speaker labels to time spans. See
/// `dart-api-examples/speaker-diarization/` for a complete example.
class OfflineSpeakerDiarizationSegment {
  const OfflineSpeakerDiarizationSegment({
    required this.start,
    required this.end,
    required this.speaker,
  });

  factory OfflineSpeakerDiarizationSegment.fromJson(Map<String, dynamic> json) {
    return OfflineSpeakerDiarizationSegment(
      start: (json['start'] as num).toDouble(),
      end: (json['end'] as num).toDouble(),
      speaker: json['speaker'] as int,
    );
  }

  @override
  String toString() {
    return 'OfflineSpeakerDiarizationSegment(start: $start, end: $end, speaker: $speaker)';
  }

  Map<String, dynamic> toJson() => {
        'start': start,
        'end': end,
        'speaker': speaker,
      };

  final double start;
  final double end;
  final int speaker;
}

/// Pyannote segmentation model path.
class OfflineSpeakerSegmentationPyannoteModelConfig {
  const OfflineSpeakerSegmentationPyannoteModelConfig({
    this.model = '',
  });

  factory OfflineSpeakerSegmentationPyannoteModelConfig.fromJson(
      Map<String, dynamic> json) {
    return OfflineSpeakerSegmentationPyannoteModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineSpeakerSegmentationPyannoteModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
      };

  final String model;
}

/// Segmentation model configuration for speaker diarization.
class OfflineSpeakerSegmentationModelConfig {
  const OfflineSpeakerSegmentationModelConfig({
    this.pyannote = const OfflineSpeakerSegmentationPyannoteModelConfig(),
    this.numThreads = 1,
    this.debug = true,
    this.provider = 'cpu',
  });

  factory OfflineSpeakerSegmentationModelConfig.fromJson(
      Map<String, dynamic> json) {
    return OfflineSpeakerSegmentationModelConfig(
      pyannote: json['pyannote'] != null
          ? OfflineSpeakerSegmentationPyannoteModelConfig.fromJson(
              json['pyannote'] as Map<String, dynamic>)
          : const OfflineSpeakerSegmentationPyannoteModelConfig(),
      numThreads: json['numThreads'] as int? ?? 1,
      debug: json['debug'] as bool? ?? true,
      provider: json['provider'] as String? ?? 'cpu',
    );
  }

  @override
  String toString() {
    return 'OfflineSpeakerSegmentationModelConfig(pyannote: $pyannote, numThreads: $numThreads, debug: $debug, provider: $provider)';
  }

  Map<String, dynamic> toJson() => {
        'pyannote': pyannote.toJson(),
        'numThreads': numThreads,
        'debug': debug,
        'provider': provider,
      };

  final OfflineSpeakerSegmentationPyannoteModelConfig pyannote;

  final int numThreads;
  final bool debug;
  final String provider;
}

/// Clustering options used after segmentation and embedding extraction.
class FastClusteringConfig {
  const FastClusteringConfig({
    this.numClusters = -1,
    this.threshold = 0.5,
  });

  factory FastClusteringConfig.fromJson(Map<String, dynamic> json) {
    return FastClusteringConfig(
      numClusters: json['numClusters'] as int? ?? -1,
      threshold: (json['threshold'] as num?)?.toDouble() ?? 0.5,
    );
  }

  @override
  String toString() {
    return 'FastClusteringConfig(numClusters: $numClusters, threshold: $threshold)';
  }

  Map<String, dynamic> toJson() => {
        'numClusters': numClusters,
        'threshold': threshold,
      };

  final int numClusters;
  final double threshold;
}

/// Top-level configuration for [OfflineSpeakerDiarization].
class OfflineSpeakerDiarizationConfig {
  const OfflineSpeakerDiarizationConfig({
    this.segmentation = const OfflineSpeakerSegmentationModelConfig(),
    this.embedding = const SpeakerEmbeddingExtractorConfig(model: ''),
    this.clustering = const FastClusteringConfig(),
    this.minDurationOn = 0.2,
    this.minDurationOff = 0.5,
  });

  factory OfflineSpeakerDiarizationConfig.fromJson(Map<String, dynamic> json) {
    return OfflineSpeakerDiarizationConfig(
      segmentation: json['segmentation'] != null
          ? OfflineSpeakerSegmentationModelConfig.fromJson(
              json['segmentation'] as Map<String, dynamic>)
          : const OfflineSpeakerSegmentationModelConfig(),
      embedding: json['embedding'] != null
          ? SpeakerEmbeddingExtractorConfig.fromJson(
              json['embedding'] as Map<String, dynamic>)
          : const SpeakerEmbeddingExtractorConfig(model: ''),
      clustering: json['clustering'] != null
          ? FastClusteringConfig.fromJson(
              json['clustering'] as Map<String, dynamic>)
          : const FastClusteringConfig(),
      minDurationOn: (json['minDurationOn'] as num?)?.toDouble() ?? 0.2,
      minDurationOff: (json['minDurationOff'] as num?)?.toDouble() ?? 0.5,
    );
  }

  @override
  String toString() {
    return 'OfflineSpeakerDiarizationConfig(segmentation: $segmentation, embedding: $embedding, clustering: $clustering, minDurationOn: $minDurationOn, minDurationOff: $minDurationOff)';
  }

  Map<String, dynamic> toJson() => {
        'segmentation': segmentation.toJson(),
        'embedding': embedding.toJson(),
        'clustering': clustering.toJson(),
        'minDurationOn': minDurationOn,
        'minDurationOff': minDurationOff,
      };

  final OfflineSpeakerSegmentationModelConfig segmentation;
  final SpeakerEmbeddingExtractorConfig embedding;
  final FastClusteringConfig clustering;
  final double minDurationOff; // in seconds
  final double minDurationOn; // in seconds
}

/// Offline speaker diarizer.
class OfflineSpeakerDiarization {
  OfflineSpeakerDiarization.fromPtr(
      {required this.ptr, required this.config, required this.sampleRate});

  OfflineSpeakerDiarization._(
      {required this.ptr, required this.config, required this.sampleRate});

  /// Release the native diarizer.
  void free() {
    if (SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeakerDiarization == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeakerDiarization?.call(ptr);
    ptr = nullptr;
  }

  /// Create a diarizer from [config].
  factory OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) {
    if (SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final c = calloc<SherpaOnnxOfflineSpeakerDiarizationConfig>();

    c.ref.segmentation.pyannote.model =
        config.segmentation.pyannote.model.toNativeUtf8();
    c.ref.segmentation.numThreads = config.segmentation.numThreads;
    c.ref.segmentation.debug = config.segmentation.debug ? 1 : 0;
    c.ref.segmentation.provider = config.segmentation.provider.toNativeUtf8();

    c.ref.embedding.model = config.embedding.model.toNativeUtf8();
    c.ref.embedding.numThreads = config.embedding.numThreads;
    c.ref.embedding.debug = config.embedding.debug ? 1 : 0;
    c.ref.embedding.provider = config.embedding.provider.toNativeUtf8();

    c.ref.clustering.numClusters = config.clustering.numClusters;
    c.ref.clustering.threshold = config.clustering.threshold;

    c.ref.minDurationOn = config.minDurationOn;
    c.ref.minDurationOff = config.minDurationOff;

    final ptr =
        SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ??
            nullptr;

    calloc.free(c.ref.embedding.provider);
    calloc.free(c.ref.embedding.model);
    calloc.free(c.ref.segmentation.provider);
    calloc.free(c.ref.segmentation.pyannote.model);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception(
          "Failed to create offline speaker diarization. Please check your config");
    }

    int sampleRate = SherpaOnnxBindings
              .sherpaOnnxOfflineSpeakerDiarizationGetSampleRate
              ?.call(ptr) ?? 0;

    return OfflineSpeakerDiarization._(
        ptr: ptr, config: config, sampleRate: sampleRate);
  }

  /// Process a complete waveform and return speaker-labeled segments.
  List<OfflineSpeakerDiarizationSegment> process(
      {required Float32List samples}) {
    if (SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationProcess == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return <OfflineSpeakerDiarizationSegment>[];
    }

    final n = samples.length;
    final Pointer<Float> p = calloc<Float>(n);

    final pList = p.asTypedList(n);
    pList.setAll(0, samples);

    final r = SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationProcess
            ?.call(ptr, p, n) ??
        nullptr;

    final ans = _processImpl(r);

    SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
        ?.call(r);

    return ans;
  }

  List<OfflineSpeakerDiarizationSegment> processWithCallback({
    required Float32List samples,
    required int Function(int numProcessedChunks, int numTotalChunks) callback,
  }) {
    if (SherpaOnnxBindings
            .sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg ==
        null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return <OfflineSpeakerDiarizationSegment>[];
    }

    final n = samples.length;
    final Pointer<Float> p = calloc<Float>(n);

    final pList = p.asTypedList(n);
    pList.setAll(0, samples);

    final wrapper = NativeCallable<
            SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>.isolateLocal(
        (int numProcessedChunks, int numTotalChunks) {
      return callback(numProcessedChunks, numTotalChunks);
    }, exceptionalReturn: 0);

    final r = SherpaOnnxBindings
            .sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
            ?.call(ptr, p, n, wrapper.nativeFunction) ??
        nullptr;

    wrapper.close();

    final ans = _processImpl(r);

    SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
        ?.call(r);

    return ans;
  }

  List<OfflineSpeakerDiarizationSegment> _processImpl(
      Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> r) {
    if (r == nullptr) {
      return <OfflineSpeakerDiarizationSegment>[];
    }

    final numSegments = SherpaOnnxBindings
            .sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
            ?.call(r) ??
        0;
    final segments = SherpaOnnxBindings
            .sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
            ?.call(r) ??
        nullptr;

    if (segments == nullptr) {
      return <OfflineSpeakerDiarizationSegment>[];
    }

    final ans = <OfflineSpeakerDiarizationSegment>[];
    for (int i = 0; i != numSegments; ++i) {
      final s = segments + i;

      final tmp = OfflineSpeakerDiarizationSegment(
          start: s.ref.start, end: s.ref.end, speaker: s.ref.speaker);
      ans.add(tmp);
    }

    SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroySegment
        ?.call(segments);

    return ans;
  }

  Pointer<SherpaOnnxOfflineSpeakerDiarization> ptr;
  OfflineSpeakerDiarizationConfig config;
  final int sampleRate;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/offline_speech_denoiser.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';

import 'package:ffi/ffi.dart';
import './sherpa_onnx_bindings.dart';

/// Offline speech denoising.
///
/// Supported model families include GTCRN and DPDFNet. See the examples under
/// `dart-api-examples/speech-enhancement-gtcrn/` and
/// `dart-api-examples/speech-enhancement-dpdfnet/`.
class OfflineSpeechDenoiserGtcrnModelConfig {
  const OfflineSpeechDenoiserGtcrnModelConfig({
    this.model = '',
  });

  factory OfflineSpeechDenoiserGtcrnModelConfig.fromJson(
      Map<String, dynamic> json) {
    return OfflineSpeechDenoiserGtcrnModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineSpeechDenoiserGtcrnModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
      };

  final String model;
}

/// DPDFNet model path for offline speech denoising.
class OfflineSpeechDenoiserDpdfNetModelConfig {
  const OfflineSpeechDenoiserDpdfNetModelConfig({
    this.model = '',
  });

  factory OfflineSpeechDenoiserDpdfNetModelConfig.fromJson(
      Map<String, dynamic> json) {
    return OfflineSpeechDenoiserDpdfNetModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineSpeechDenoiserDpdfNetModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
      };

  final String model;
}

/// Aggregate model configuration for [OfflineSpeechDenoiser].
///
/// Configure either [gtcrn] or [dpdfnet] for typical use.
class OfflineSpeechDenoiserModelConfig {
  const OfflineSpeechDenoiserModelConfig({
    this.gtcrn = const OfflineSpeechDenoiserGtcrnModelConfig(),
    this.dpdfnet = const OfflineSpeechDenoiserDpdfNetModelConfig(),
    this.numThreads = 1,
    this.debug = true,
    this.provider = 'cpu',
  });

  factory OfflineSpeechDenoiserModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineSpeechDenoiserModelConfig(
      gtcrn: json['gtcrn'] != null
          ? OfflineSpeechDenoiserGtcrnModelConfig.fromJson(
              json['gtcrn'] as Map<String, dynamic>)
          : const OfflineSpeechDenoiserGtcrnModelConfig(),
      dpdfnet: json['dpdfnet'] != null
          ? OfflineSpeechDenoiserDpdfNetModelConfig.fromJson(
              json['dpdfnet'] as Map<String, dynamic>)
          : const OfflineSpeechDenoiserDpdfNetModelConfig(),
      numThreads: json['numThreads'] as int? ?? 1,
      debug: json['debug'] as bool? ?? true,
      provider: json['provider'] as String? ?? 'cpu',
    );
  }

  @override
  String toString() {
    return 'OfflineSpeechDenoiserModelConfig(gtcrn: $gtcrn, dpdfnet: $dpdfnet, numThreads: $numThreads, debug: $debug, provider: $provider)';
  }

  Map<String, dynamic> toJson() => {
        'gtcrn': gtcrn.toJson(),
        'dpdfnet': dpdfnet.toJson(),
        'numThreads': numThreads,
        'debug': debug,
        'provider': provider,
      };

  final OfflineSpeechDenoiserGtcrnModelConfig gtcrn;
  final OfflineSpeechDenoiserDpdfNetModelConfig dpdfnet;
  final int numThreads;
  final bool debug;
  final String provider;
}

/// Top-level configuration for [OfflineSpeechDenoiser].
class OfflineSpeechDenoiserConfig {
  const OfflineSpeechDenoiserConfig({
    this.model = const OfflineSpeechDenoiserModelConfig(),
  });

  factory OfflineSpeechDenoiserConfig.fromJson(Map<String, dynamic> json) {
    return OfflineSpeechDenoiserConfig(
      model: json['model'] != null
          ? OfflineSpeechDenoiserModelConfig.fromJson(
              json['model'] as Map<String, dynamic>)
          : const OfflineSpeechDenoiserModelConfig(),
    );
  }

  @override
  String toString() {
    return 'OfflineSpeechDenoiserConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model.toJson(),
      };

  final OfflineSpeechDenoiserModelConfig model;
}

/// Audio returned by offline or online speech denoisers.
class DenoisedAudio {
  DenoisedAudio({
    required this.samples,
    required this.sampleRate,
  });

  final Float32List samples;
  final int sampleRate;
}

/// Offline speech denoiser.
class OfflineSpeechDenoiser {
  OfflineSpeechDenoiser.fromPtr({required this.ptr, required this.config});

  OfflineSpeechDenoiser._({required this.ptr, required this.config});

  /// Create an offline denoiser from [config].
  factory OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config) {
    if (SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeechDenoiser == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final c = calloc<SherpaOnnxOfflineSpeechDenoiserConfig>();
    c.ref.model.gtcrn.model = config.model.gtcrn.model.toNativeUtf8();
    c.ref.model.dpdfnet.model = config.model.dpdfnet.model.toNativeUtf8();

    c.ref.model.numThreads = config.model.numThreads;
    c.ref.model.debug = config.model.debug ? 1 : 0;
    c.ref.model.provider = config.model.provider.toNativeUtf8();

    final ptr =
        SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeechDenoiser?.call(c) ??
            nullptr;

    calloc.free(c.ref.model.provider);
    calloc.free(c.ref.model.gtcrn.model);
    calloc.free(c.ref.model.dpdfnet.model);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception(
          "Failed to create offline speech denoiser. Please check your config");
    }

    return OfflineSpeechDenoiser._(ptr: ptr, config: config);
  }

  /// Release the native denoiser.
  void free() {
    if (SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeechDenoiser == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }

    SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeechDenoiser?.call(ptr);
    ptr = nullptr;
  }

  /// Denoise one chunk or a complete waveform.
  DenoisedAudio run({required Float32List samples, required int sampleRate}) {
    if (SherpaOnnxBindings.sherpaOnnxOfflineSpeechDenoiserRun == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return DenoisedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final n = samples.length;
    final Pointer<Float> psamples = calloc<Float>(n);

    final pList = psamples.asTypedList(n);
    pList.setAll(0, samples);

    final p = SherpaOnnxBindings.sherpaOnnxOfflineSpeechDenoiserRun
            ?.call(ptr, psamples, n, sampleRate) ??
        nullptr;

    calloc.free(psamples);

    if (p == nullptr) {
      return DenoisedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final sampleRateOut = p.ref.sampleRate;
    final nOut = p.ref.n;
    Float32List newSamples = Float32List(0);
    if (nOut > 0 && p.ref.samples != nullptr) {
      newSamples = Float32List.fromList(p.ref.samples.asTypedList(nOut));
    }

    SherpaOnnxBindings.sherpaOnnxDestroyDenoisedAudio?.call(p);

    return DenoisedAudio(samples: newSamples, sampleRate: sampleRateOut);
  }

  /// Return the expected sample rate for this denoiser.
  int get sampleRate {
    if (SherpaOnnxBindings.sherpaOnnxOfflineSpeechDenoiserGetSampleRate ==
        null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return 0;
    }

    return SherpaOnnxBindings.sherpaOnnxOfflineSpeechDenoiserGetSampleRate
            ?.call(ptr) ??
        0;
  }

  Pointer<SherpaOnnxOfflineSpeechDenoiser> ptr;
  OfflineSpeechDenoiserConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/offline_stream.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';

/// Input stream for offline APIs such as offline ASR, audio tagging, and
/// spoken language identification.
class OfflineStream {
  /// The user has to call OfflineStream.free() to avoid memory leak.
  OfflineStream({required this.ptr});

  /// Release the native stream.
  void free() {
    if (SherpaOnnxBindings.destroyOfflineStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroyOfflineStream?.call(ptr);
    ptr = nullptr;
  }

  /// If you have List<double> data, then you can use
  /// Float32List.fromList(data) to convert data to Float32List
  ///
  /// See
  ///  https://api.flutter.dev/flutter/dart-core/List-class.html
  /// and
  ///  https://api.flutter.dev/flutter/dart-typed_data/Float32List-class.html
  /// Append waveform samples to the stream.
  ///
  /// [samples] must contain mono floating-point PCM data normalized to
  /// `[-1, 1]`. [sampleRate] should match the model expectation, typically
  /// 16000 for the provided examples.
  void acceptWaveform({required Float32List samples, required int sampleRate}) {
    if (SherpaOnnxBindings.acceptWaveformOffline == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }

    final n = samples.length;
    final Pointer<Float> p = calloc<Float>(n);

    final pList = p.asTypedList(n);
    pList.setAll(0, samples);

    SherpaOnnxBindings.acceptWaveformOffline?.call(ptr, sampleRate, p, n);

    calloc.free(p);
  }

  Pointer<SherpaOnnxOfflineStream> ptr;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/online_punctuation.dart
================================================
import 'dart:ffi';
import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';

/// Online punctuation restoration.
///
/// This wrapper is intended for shorter or incremental text fragments. See
/// `dart-api-examples/add-punctuations/` for working examples.
class OnlinePunctuationModelConfig {
  OnlinePunctuationModelConfig(
      {required this.cnnBiLstm,
      required this.bpeVocab,
      this.numThreads = 1,
      this.provider = 'cpu',
      this.debug = true});

  factory OnlinePunctuationModelConfig.fromJson(Map<String, dynamic> json) {
    return OnlinePunctuationModelConfig(
      cnnBiLstm: json['cnnBiLstm'],
      bpeVocab: json['bpeVocab'],
      numThreads: json['numThreads'],
      provider: json['provider'],
      debug: json['debug'],
    );
  }

  @override
  String toString() {
    return 'OnlinePunctuationModelConfig(cnnBiLstm: $cnnBiLstm, '
        'bpeVocab: $bpeVocab, numThreads: $numThreads, '
        'provider: $provider, debug: $debug)';
  }

  Map<String, dynamic> toJson() {
    return {
      'cnnBiLstm': cnnBiLstm,
      'bpeVocab': bpeVocab,
      'numThreads': numThreads,
      'provider': provider,
      'debug': debug,
    };
  }

  final String cnnBiLstm;
  final String bpeVocab;
  final int numThreads;
  final String provider;
  final bool debug;
}

/// Top-level configuration for [OnlinePunctuation].
class OnlinePunctuationConfig {
  OnlinePunctuationConfig({
    required this.model,
  });

  factory OnlinePunctuationConfig.fromJson(Map<String, dynamic> json) {
    return OnlinePunctuationConfig(
      model: OnlinePunctuationModelConfig.fromJson(json['model']),
    );
  }

  @override
  String toString() {
    return 'OnlinePunctuationConfig(model: $model)';
  }

  Map<String, dynamic> toJson() {
    return {
      'model': model.toJson(),
    };
  }

  final OnlinePunctuationModelConfig model;
}

/// Online punctuation restorer.
class OnlinePunctuation {
  OnlinePunctuation.fromPtr({required this.ptr, required this.config});

  OnlinePunctuation._({required this.ptr, required this.config});

  /// Create an online punctuator from [config].
  factory OnlinePunctuation({required OnlinePunctuationConfig config}) {
    if (SherpaOnnxBindings.sherpaOnnxCreateOnlinePunctuation == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final c = calloc<SherpaOnnxOnlinePunctuationConfig>();

    final cnnBiLstmPtr = config.model.cnnBiLstm.toNativeUtf8();
    final bpeVocabPtr = config.model.bpeVocab.toNativeUtf8();
    c.ref.model.cnnBiLstm = cnnBiLstmPtr;
    c.ref.model.bpeVocab = bpeVocabPtr;
    c.ref.model.numThreads = config.model.numThreads;
    c.ref.model.debug = config.model.debug ? 1 : 0;

    final providerPtr = config.model.provider.toNativeUtf8();
    c.ref.model.provider = providerPtr;

    final ptr = SherpaOnnxBindings.sherpaOnnxCreateOnlinePunctuation?.call(c) ??
        nullptr;

    calloc.free(providerPtr);
    calloc.free(cnnBiLstmPtr);
    calloc.free(bpeVocabPtr);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception(
          "Failed to create online punctuation. Please check your config");
    }

    return OnlinePunctuation._(ptr: ptr, config: config);
  }

  /// Release the native punctuator.
  void free() {
    if (SherpaOnnxBindings.sherpaOnnxDestroyOnlinePunctuation == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.sherpaOnnxDestroyOnlinePunctuation?.call(ptr);
    ptr = nullptr;
  }

  /// Add punctuation to [text].
  String addPunct(String text) {
    if (SherpaOnnxBindings.sherpaOnnxOnlinePunctuationAddPunct == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return '';
    }

    final textPtr = text.toNativeUtf8();

    final p = SherpaOnnxBindings.sherpaOnnxOnlinePunctuationAddPunct
            ?.call(ptr, textPtr) ??
        nullptr;

    calloc.free(textPtr);

    if (p == nullptr) {
      return '';
    }

    final ans = p.toDartString();

    SherpaOnnxBindings.sherpaOnnxOnlinePunctuationFreeText?.call(p);

    return ans;
  }

  Pointer<SherpaOnnxOnlinePunctuation> ptr;
  final OnlinePunctuationConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/online_recognizer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:convert';
import 'dart:ffi';

import 'package:ffi/ffi.dart';

import './feature_config.dart';
import './homophone_replacer_config.dart';
import './online_stream.dart';
import './sherpa_onnx_bindings.dart';
import './utils.dart';

/// Streaming speech recognition.
///
/// This module wraps the online ASR APIs used by the examples in
/// `dart-api-examples/streaming-asr/bin/`, including Zipformer transducer,
/// Zipformer CTC, Paraformer, T-One-CTC, and NeMo-CTC style models.
///
/// Example:
///
/// ```dart
/// final model = OnlineModelConfig(
///   transducer: const OnlineTransducerModelConfig(
///     encoder: './streaming-zipformer/encoder-epoch-99-avg-1.int8.onnx',
///     decoder: './streaming-zipformer/decoder-epoch-99-avg-1.onnx',
///     joiner: './streaming-zipformer/joiner-epoch-99-avg-1.int8.onnx',
///   ),
///   tokens: './streaming-zipformer/tokens.txt',
///   modelType: 'zipformer2',
/// );
///
/// final recognizer = OnlineRecognizer(OnlineRecognizerConfig(model: model));
/// final stream = recognizer.createStream();
/// stream.acceptWaveform(samples: chunk, sampleRate: 16000);
/// while (recognizer.isReady(stream)) {
///   recognizer.decode(stream);
/// }
/// print(recognizer.getResult(stream).text);
/// ```

/// Model files for a streaming transducer recognizer.
class OnlineTransducerModelConfig {
  const OnlineTransducerModelConfig({
    this.encoder = '',
    this.decoder = '',
    this.joiner = '',
  });

  factory OnlineTransducerModelConfig.fromJson(Map<String, dynamic> json) {
    return OnlineTransducerModelConfig(
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
      joiner: json['joiner'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OnlineTransducerModelConfig(encoder: $encoder, decoder: $decoder, joiner: $joiner)';
  }

  Map<String, dynamic> toJson() => {
        'encoder': encoder,
        'decoder': decoder,
        'joiner': joiner,
      };

  final String encoder;
  final String decoder;
  final String joiner;
}

/// Model files for a streaming Paraformer recognizer.
class OnlineParaformerModelConfig {
  const OnlineParaformerModelConfig({this.encoder = '', this.decoder = ''});

  factory OnlineParaformerModelConfig.fromJson(Map<String, dynamic> json) {
    return OnlineParaformerModelConfig(
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OnlineParaformerModelConfig(encoder: $encoder, decoder: $decoder)';
  }

  Map<String, dynamic> toJson() => {
        'encoder': encoder,
        'decoder': decoder,
      };

  final String encoder;
  final String decoder;
}

/// Model file for a streaming Zipformer2 CTC recognizer.
class OnlineZipformer2CtcModelConfig {
  const OnlineZipformer2CtcModelConfig({this.model = ''});

  factory OnlineZipformer2CtcModelConfig.fromJson(Map<String, dynamic> json) {
    return OnlineZipformer2CtcModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OnlineZipformer2CtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
      };

  final String model;
}

/// Model file for a streaming NeMo CTC recognizer.
class OnlineNemoCtcModelConfig {
  const OnlineNemoCtcModelConfig({this.model = ''});

  factory OnlineNemoCtcModelConfig.fromJson(Map<String, dynamic> json) {
    return OnlineNemoCtcModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OnlineNemoCtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
      };

  final String model;
}

/// Model file for a streaming tone-aware CTC recognizer.
class OnlineToneCtcModelConfig {
  const OnlineToneCtcModelConfig({this.model = ''});

  factory OnlineToneCtcModelConfig.fromJson(Map<String, dynamic> json) {
    return OnlineToneCtcModelConfig(
      model: json['model'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OnlineToneCtcModelConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
      };

  final String model;
}

/// Aggregate model configuration for streaming recognition.
///
/// Configure exactly one model family for a typical deployment and supply the
/// shared tokenizer and runtime settings here.
class OnlineModelConfig {
  const OnlineModelConfig({
    this.transducer = const OnlineTransducerModelConfig(),
    this.paraformer = const OnlineParaformerModelConfig(),
    this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(),
    this.nemoCtc = const OnlineNemoCtcModelConfig(),
    this.toneCtc = const OnlineToneCtcModelConfig(),
    required this.tokens,
    this.numThreads = 1,
    this.provider = 'cpu',
    this.debug = true,
    this.modelType = '',
    this.modelingUnit = '',
    this.bpeVocab = '',
  });

  factory OnlineModelConfig.fromJson(Map<String, dynamic> json) {
    return OnlineModelConfig(
      transducer: OnlineTransducerModelConfig.fromJson(
          json['transducer'] as Map<String, dynamic>? ?? const {}),
      paraformer: OnlineParaformerModelConfig.fromJson(
          json['paraformer'] as Map<String, dynamic>? ?? const {}),
      zipformer2Ctc: OnlineZipformer2CtcModelConfig.fromJson(
          json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}),
      nemoCtc: OnlineNemoCtcModelConfig.fromJson(
          json['nemoCtc'] as Map<String, dynamic>? ?? const {}),
      toneCtc: OnlineToneCtcModelConfig.fromJson(
          json['toneCtc'] as Map<String, dynamic>? ?? const {}),
      tokens: json['tokens'] as String,
      numThreads: json['numThreads'] as int? ?? 1,
      provider: json['provider'] as String? ?? 'cpu',
      debug: json['debug'] as bool? ?? true,
      modelType: json['modelType'] as String? ?? '',
      modelingUnit: json['modelingUnit'] as String? ?? '',
      bpeVocab: json['bpeVocab'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, nemoCtc: $nemoCtc, toneCtc: $toneCtc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)';
  }

  Map<String, dynamic> toJson() => {
        'transducer': transducer.toJson(),
        'paraformer': paraformer.toJson(),
        'zipformer2Ctc': zipformer2Ctc.toJson(),
        'nemoCtc': nemoCtc.toJson(),
        'toneCtc': toneCtc.toJson(),
        'tokens': tokens,
        'numThreads': numThreads,
        'provider': provider,
        'debug': debug,
        'modelType': modelType,
        'modelingUnit': modelingUnit,
        'bpeVocab': bpeVocab,
      };

  final OnlineTransducerModelConfig transducer;
  final OnlineParaformerModelConfig paraformer;
  final OnlineZipformer2CtcModelConfig zipformer2Ctc;
  final OnlineNemoCtcModelConfig nemoCtc;
  final OnlineToneCtcModelConfig toneCtc;

  final String tokens;

  final int numThreads;

  final String provider;

  final bool debug;

  final String modelType;

  final String modelingUnit;

  final String bpeVocab;
}

/// FST decoder settings for CTC-based streaming recognition.
class OnlineCtcFstDecoderConfig {
  const OnlineCtcFstDecoderConfig({this.graph = '', this.maxActive = 3000});

  factory OnlineCtcFstDecoderConfig.fromJson(Map<String, dynamic> json) {
    return OnlineCtcFstDecoderConfig(
      graph: json['graph'] as String? ?? '',
      maxActive: json['maxActive'] as int? ?? 3000,
    );
  }

  @override
  String toString() {
    return 'OnlineCtcFstDecoderConfig(graph: $graph, maxActive: $maxActive)';
  }

  Map<String, dynamic> toJson() => {
        'graph': graph,
        'maxActive': maxActive,
      };

  final String graph;
  final int maxActive;
}

/// Top-level configuration for [OnlineRecognizer].
///
/// This combines feature extraction, the selected online model family,
/// endpointing rules, hotwords, grammar resources, and optional homophone
/// replacement resources.
class OnlineRecognizerConfig {
  const OnlineRecognizerConfig({
    this.feat = const FeatureConfig(),
    required this.model,
    this.decodingMethod = 'greedy_search',
    this.maxActivePaths = 4,
    this.enableEndpoint = true,
    this.rule1MinTrailingSilence = 2.4,
    this.rule2MinTrailingSilence = 1.2,
    this.rule3MinUtteranceLength = 20,
    this.hotwordsFile = '',
    this.hotwordsScore = 1.5,
    this.ctcFstDecoderConfig = const OnlineCtcFstDecoderConfig(),
    this.ruleFsts = '',
    this.ruleFars = '',
    this.blankPenalty = 0.0,
    this.hr = const HomophoneReplacerConfig(),
  });

  factory OnlineRecognizerConfig.fromJson(Map<String, dynamic> json) {
    return OnlineRecognizerConfig(
      feat: FeatureConfig.fromJson(
          json['feat'] as Map<String, dynamic>? ?? const {}),
      model: OnlineModelConfig.fromJson(json['model'] as Map<String, dynamic>),
      decodingMethod: json['decodingMethod'] as String? ?? 'greedy_search',
      maxActivePaths: json['maxActivePaths'] as int? ?? 4,
      enableEndpoint: json['enableEndpoint'] as bool? ?? true,
      rule1MinTrailingSilence:
          (json['rule1MinTrailingSilence'] as num?)?.toDouble() ?? 2.4,
      rule2MinTrailingSilence:
          (json['rule2MinTrailingSilence'] as num?)?.toDouble() ?? 1.2,
      rule3MinUtteranceLength:
          (json['rule3MinUtteranceLength'] as num?)?.toDouble() ?? 20.0,
      hotwordsFile: json['hotwordsFile'] as String? ?? '',
      hotwordsScore: (json['hotwordsScore'] as num?)?.toDouble() ?? 1.5,
      ctcFstDecoderConfig: OnlineCtcFstDecoderConfig.fromJson(
          json['ctcFstDecoderConfig'] as Map<String, dynamic>? ?? const {}),
      ruleFsts: json['ruleFsts'] as String? ?? '',
      ruleFars: json['ruleFars'] as String? ?? '',
      blankPenalty: (json['blankPenalty'] as num?)?.toDouble() ?? 0.0,
      hr: HomophoneReplacerConfig.fromJson(
          json['hr'] as Map<String, dynamic>? ?? const {}),
    );
  }

  @override
  String toString() {
    return 'OnlineRecognizerConfig(feat: $feat, model: $model, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, enableEndpoint: $enableEndpoint, rule1MinTrailingSilence: $rule1MinTrailingSilence, rule2MinTrailingSilence: $rule2MinTrailingSilence, rule3MinUtteranceLength: $rule3MinUtteranceLength, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ctcFstDecoderConfig: $ctcFstDecoderConfig, ruleFsts: $ruleFsts, ruleFars: $ruleFars, blankPenalty: $blankPenalty, hr: $hr)';
  }

  Map<String, dynamic> toJson() => {
        'feat': feat.toJson(),
        'model': model.toJson(),
        'decodingMethod': decodingMethod,
        'maxActivePaths': maxActivePaths,
        'enableEndpoint': enableEndpoint,
        'rule1MinTrailingSilence': rule1MinTrailingSilence,
        'rule2MinTrailingSilence': rule2MinTrailingSilence,
        'rule3MinUtteranceLength': rule3MinUtteranceLength,
        'hotwordsFile': hotwordsFile,
        'hotwordsScore': hotwordsScore,
        'ctcFstDecoderConfig': ctcFstDecoderConfig.toJson(),
        'ruleFsts': ruleFsts,
        'ruleFars': ruleFars,
        'blankPenalty': blankPenalty,
        'hr': hr.toJson(),
      };

  final FeatureConfig feat;
  final OnlineModelConfig model;
  final String decodingMethod;

  final int maxActivePaths;

  final bool enableEndpoint;

  final double rule1MinTrailingSilence;

  final double rule2MinTrailingSilence;

  final double rule3MinUtteranceLength;

  final String hotwordsFile;

  final double hotwordsScore;

  final OnlineCtcFstDecoderConfig ctcFstDecoderConfig;
  final String ruleFsts;
  final String ruleFars;

  final double blankPenalty;
  final HomophoneReplacerConfig hr;
}

/// Streaming recognition result returned by [OnlineRecognizer.getResult].
class OnlineRecognizerResult {
  OnlineRecognizerResult(
      {required this.text, required this.tokens, required this.timestamps});

  factory OnlineRecognizerResult.fromJson(Map<String, dynamic> json) {
    return OnlineRecognizerResult(
      text: json['text'] as String,
      tokens: List<String>.from(json['tokens'] as List),
      timestamps: (json['timestamps'] as List)
          .map<double>((e) => (e as num).toDouble())
          .toList(),
    );
  }

  @override
  String toString() {
    return 'OnlineRecognizerResult(text: $text, tokens: $tokens, timestamps: $timestamps)';
  }

  Map<String, dynamic> toJson() => {
        'text': text,
        'tokens': tokens,
        'timestamps': timestamps,
      };

  final String text;
  final List<String> tokens;
  final List<double> timestamps;
}

/// Streaming speech recognizer.
///
/// Create one from an [OnlineRecognizerConfig], then feed chunks to an
/// [OnlineStream] and call [decode] while [isReady] is true.
class OnlineRecognizer {
  OnlineRecognizer.fromPtr({required this.ptr, required this.config});

  OnlineRecognizer._({required this.ptr, required this.config});

  /// The user is responsible to call the OnlineRecognizer.free()
  /// method of the returned instance to avoid memory leak.
  /// Create a recognizer from [config].
  factory OnlineRecognizer(OnlineRecognizerConfig config) {
    if (SherpaOnnxBindings.createOnlineRecognizer == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final c = calloc<SherpaOnnxOnlineRecognizerConfig>();
    c.ref.feat.sampleRate = config.feat.sampleRate;
    c.ref.feat.featureDim = config.feat.featureDim;

    // transducer
    c.ref.model.transducer.encoder =
        config.model.transducer.encoder.toNativeUtf8();
    c.ref.model.transducer.decoder =
        config.model.transducer.decoder.toNativeUtf8();
    c.ref.model.transducer.joiner =
        config.model.transducer.joiner.toNativeUtf8();

    // paraformer
    c.ref.model.paraformer.encoder =
        config.model.paraformer.encoder.toNativeUtf8();
    c.ref.model.paraformer.decoder =
        config.model.paraformer.decoder.toNativeUtf8();

    // zipformer2Ctc
    c.ref.model.zipformer2Ctc.model =
        config.model.zipformer2Ctc.model.toNativeUtf8();

    // nemoCtc
    c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8();

    // toneCtc
    c.ref.model.toneCtc.model = config.model.toneCtc.model.toNativeUtf8();

    c.ref.model.tokens = config.model.tokens.toNativeUtf8();
    c.ref.model.numThreads = config.model.numThreads;
    c.ref.model.provider = config.model.provider.toNativeUtf8();
    c.ref.model.debug = config.model.debug ? 1 : 0;
    c.ref.model.modelType = config.model.modelType.toNativeUtf8();
    c.ref.model.modelingUnit = config.model.modelingUnit.toNativeUtf8();
    c.ref.model.bpeVocab = config.model.bpeVocab.toNativeUtf8();

    c.ref.decodingMethod = config.decodingMethod.toNativeUtf8();
    c.ref.maxActivePaths = config.maxActivePaths;
    c.ref.enableEndpoint = config.enableEndpoint ? 1 : 0;
    c.ref.rule1MinTrailingSilence = config.rule1MinTrailingSilence;
    c.ref.rule2MinTrailingSilence = config.rule2MinTrailingSilence;
    c.ref.rule3MinUtteranceLength = config.rule3MinUtteranceLength;
    c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8();
    c.ref.hotwordsScore = config.hotwordsScore;

    c.ref.ctcFstDecoderConfig.graph =
        config.ctcFstDecoderConfig.graph.toNativeUtf8();
    c.ref.ctcFstDecoderConfig.maxActive = config.ctcFstDecoderConfig.maxActive;
    c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
    c.ref.ruleFars = config.ruleFars.toNativeUtf8();

    c.ref.blankPenalty = config.blankPenalty;

    c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8();
    c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8();

    final ptr = SherpaOnnxBindings.createOnlineRecognizer?.call(c) ?? nullptr;

    calloc.free(c.ref.hr.lexicon);
    calloc.free(c.ref.hr.ruleFsts);
    calloc.free(c.ref.ruleFars);
    calloc.free(c.ref.ruleFsts);
    calloc.free(c.ref.ctcFstDecoderConfig.graph);
    calloc.free(c.ref.hotwordsFile);
    calloc.free(c.ref.decodingMethod);
    calloc.free(c.ref.model.bpeVocab);
    calloc.free(c.ref.model.modelingUnit);
    calloc.free(c.ref.model.modelType);
    calloc.free(c.ref.model.provider);
    calloc.free(c.ref.model.tokens);
    calloc.free(c.ref.model.toneCtc.model);
    calloc.free(c.ref.model.nemoCtc.model);
    calloc.free(c.ref.model.zipformer2Ctc.model);
    calloc.free(c.ref.model.paraformer.encoder);
    calloc.free(c.ref.model.paraformer.decoder);

    calloc.free(c.ref.model.transducer.encoder);
    calloc.free(c.ref.model.transducer.decoder);
    calloc.free(c.ref.model.transducer.joiner);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception(
          "Failed to create online recognizer. Please check your config");
    }

    return OnlineRecognizer._(ptr: ptr, config: config);
  }

  /// Release the native recognizer.
  void free() {
    if (SherpaOnnxBindings.destroyOnlineRecognizer == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroyOnlineRecognizer?.call(ptr);
    ptr = nullptr;
  }

  /// The user has to invoke stream.free() on the returned instance
  /// to avoid memory leak
  /// Create a streaming input stream.
  ///
  /// If [hotwords] is provided, the stream uses those per-stream hotwords in
  /// addition to any recognizer-wide settings.
  OnlineStream createStream({String hotwords = ''}) {
    if (hotwords == '') {
      if (SherpaOnnxBindings.createOnlineStream == null) {
        throw Exception("Please initialize sherpa-onnx first");
      }
    } else {
      if (SherpaOnnxBindings.createOnlineStreamWithHotwords == null) {
        throw Exception("Please initialize sherpa-onnx first");
      }
    }

    if (ptr == nullptr) {
      throw Exception("Failed to create online stream");
    }

    if (hotwords == '') {
      final p = SherpaOnnxBindings.createOnlineStream?.call(ptr) ?? nullptr;
      if (p == nullptr) {
        throw Exception("Failed to create online stream");
      }
      return OnlineStream(ptr: p);
    }

    final utf8 = hotwords.toNativeUtf8();
    final p =
        SherpaOnnxBindings.createOnlineStreamWithHotwords?.call(ptr, utf8) ??
            nullptr;
    calloc.free(utf8);

    if (p == nullptr) {
      throw Exception("Failed to create online stream");
    }

    return OnlineStream(ptr: p);
  }

  /// Return `true` if the recognizer has enough audio to run another step.
  bool isReady(OnlineStream stream) {
    if (SherpaOnnxBindings.isOnlineStreamReady == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return false;
    }

    int ready =
        SherpaOnnxBindings.isOnlineStreamReady?.call(ptr, stream.ptr) ?? 0;

    return ready == 1;
  }

  /// Fetch the current recognition hypothesis.
  OnlineRecognizerResult getResult(OnlineStream stream) {
    if (SherpaOnnxBindings.getOnlineStreamResultAsJson == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return OnlineRecognizerResult(text: '', tokens: [], timestamps: []);
    }

    final json =
        SherpaOnnxBindings.getOnlineStreamResultAsJson?.call(ptr, stream.ptr) ??
            nullptr;
    if (json == nullptr) {
      return OnlineRecognizerResult(text: '', tokens: [], timestamps: []);
    }

    final parsedJson = jsonDecode(toDartString(json));

    SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json);

    return OnlineRecognizerResult(
        text: parsedJson['text'],
        tokens: List<String>.from(parsedJson['tokens']),
        timestamps: List<double>.from(parsedJson['timestamps']));
  }

  /// Reset stream state after an endpoint or utterance boundary.
  void reset(OnlineStream stream) {
    if (SherpaOnnxBindings.reset == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return;
    }

    SherpaOnnxBindings.reset?.call(ptr, stream.ptr);
  }

  /// Decode one incremental step for [stream].
  void decode(OnlineStream stream) {
    if (SherpaOnnxBindings.decodeOnlineStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return;
    }

    SherpaOnnxBindings.decodeOnlineStream?.call(ptr, stream.ptr);
  }

  /// Return `true` if endpointing rules say the current utterance has ended.
  bool isEndpoint(OnlineStream stream) {
    if (SherpaOnnxBindings.isEndpoint == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return false;
    }

    int yes = SherpaOnnxBindings.isEndpoint?.call(ptr, stream.ptr) ?? 0;

    return yes == 1;
  }

  Pointer<SherpaOnnxOnlineRecognizer> ptr;
  OnlineRecognizerConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/online_speech_denoiser.dart
================================================
// Copyright (c)  2026  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';

import 'package:ffi/ffi.dart';

import './offline_speech_denoiser.dart';
import './sherpa_onnx_bindings.dart';

/// Streaming speech denoising.
///
/// Call [run] on consecutive chunks, then [flush] after the final chunk to
/// drain any buffered state.
class OnlineSpeechDenoiserConfig {
  const OnlineSpeechDenoiserConfig({
    this.model = const OfflineSpeechDenoiserModelConfig(),
  });

  factory OnlineSpeechDenoiserConfig.fromJson(Map<String, dynamic> json) {
    return OnlineSpeechDenoiserConfig(
      model: json['model'] != null
          ? OfflineSpeechDenoiserModelConfig.fromJson(
              json['model'] as Map<String, dynamic>,
            )
          : const OfflineSpeechDenoiserModelConfig(),
    );
  }

  @override
  String toString() {
    return 'OnlineSpeechDenoiserConfig(model: $model)';
  }

  Map<String, dynamic> toJson() => {
        'model': model.toJson(),
      };

  final OfflineSpeechDenoiserModelConfig model;
}

/// Streaming speech denoiser.
class OnlineSpeechDenoiser {
  OnlineSpeechDenoiser.fromPtr({required this.ptr, required this.config});

  OnlineSpeechDenoiser._({required this.ptr, required this.config});

  /// Create a streaming denoiser from [config].
  factory OnlineSpeechDenoiser(OnlineSpeechDenoiserConfig config) {
    if (SherpaOnnxBindings.sherpaOnnxCreateOnlineSpeechDenoiser == null) {
      throw Exception('Please initialize sherpa-onnx first');
    }

    final c = calloc<SherpaOnnxOnlineSpeechDenoiserConfig>();
    c.ref.model.gtcrn.model = config.model.gtcrn.model.toNativeUtf8();
    c.ref.model.dpdfnet.model = config.model.dpdfnet.model.toNativeUtf8();
    c.ref.model.numThreads = config.model.numThreads;
    c.ref.model.debug = config.model.debug ? 1 : 0;
    c.ref.model.provider = config.model.provider.toNativeUtf8();

    final ptr =
        SherpaOnnxBindings.sherpaOnnxCreateOnlineSpeechDenoiser?.call(c) ??
            nullptr;

    calloc.free(c.ref.model.provider);
    calloc.free(c.ref.model.gtcrn.model);
    calloc.free(c.ref.model.dpdfnet.model);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception(
        'Failed to create online speech denoiser. Please check your config',
      );
    }

    return OnlineSpeechDenoiser._(ptr: ptr, config: config);
  }

  /// Release the native denoiser.
  void free() {
    if (SherpaOnnxBindings.sherpaOnnxDestroyOnlineSpeechDenoiser == null) {
      throw Exception('Please initialize sherpa-onnx first');
    }

    if (ptr == nullptr) {
      return;
    }

    SherpaOnnxBindings.sherpaOnnxDestroyOnlineSpeechDenoiser?.call(ptr);
    ptr = nullptr;
  }

  /// Denoise one input chunk.
  DenoisedAudio run({required Float32List samples, required int sampleRate}) {
    if (SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserRun == null) {
      throw Exception('Please initialize sherpa-onnx first');
    }

    if (ptr == nullptr) {
      return DenoisedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final n = samples.length;
    final Pointer<Float> psamples = calloc<Float>(n);
    final pList = psamples.asTypedList(n);
    pList.setAll(0, samples);

    final p =
        SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserRun?.call(
              ptr,
              psamples,
              n,
              sampleRate,
            ) ??
            nullptr;

    calloc.free(psamples);

    if (p == nullptr) {
      return DenoisedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final sampleRateOut = p.ref.sampleRate;
    final nOut = p.ref.n;
    Float32List newSamples = Float32List(0);
    if (nOut > 0 && p.ref.samples != nullptr) {
      newSamples = Float32List.fromList(p.ref.samples.asTypedList(nOut));
    }

    SherpaOnnxBindings.sherpaOnnxDestroyDenoisedAudio?.call(p);

    return DenoisedAudio(samples: newSamples, sampleRate: sampleRateOut);
  }

  /// Flush buffered output after the final chunk.
  DenoisedAudio flush() {
    if (SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserFlush == null) {
      throw Exception('Please initialize sherpa-onnx first');
    }

    if (ptr == nullptr) {
      return DenoisedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final p =
        SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserFlush?.call(ptr) ??
            nullptr;

    if (p == nullptr) {
      return DenoisedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final sampleRateOut = p.ref.sampleRate;
    final nOut = p.ref.n;
    Float32List newSamples = Float32List(0);
    if (nOut > 0 && p.ref.samples != nullptr) {
      newSamples = Float32List.fromList(p.ref.samples.asTypedList(nOut));
    }

    SherpaOnnxBindings.sherpaOnnxDestroyDenoisedAudio?.call(p);

    return DenoisedAudio(samples: newSamples, sampleRate: sampleRateOut);
  }

  /// Reset the streaming state.
  void reset() {
    if (SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserReset == null) {
      throw Exception('Please initialize sherpa-onnx first');
    }

    if (ptr == nullptr) {
      return;
    }

    SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserReset?.call(ptr);
  }

  /// Return the expected sample rate for this denoiser.
  int get sampleRate {
    if (SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserGetSampleRate ==
        null) {
      throw Exception('Please initialize sherpa-onnx first');
    }

    if (ptr == nullptr) {
      return 0;
    }

    return SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserGetSampleRate?.call(
          ptr,
        ) ??
        0;
  }

  /// Return the preferred frame shift in samples.
  int get frameShiftInSamples {
    if (SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples ==
        null) {
      throw Exception('Please initialize sherpa-onnx first');
    }

    if (ptr == nullptr) {
      return 0;
    }

    return SherpaOnnxBindings.sherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples
            ?.call(ptr) ??
        0;
  }

  Pointer<SherpaOnnxOnlineSpeechDenoiser> ptr;
  OnlineSpeechDenoiserConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/online_stream.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';

/// Input stream for streaming APIs such as online ASR and keyword spotting.
class OnlineStream {
  /// The user has to call OnlineStream.free() to avoid memory leak.
  OnlineStream({required this.ptr});

  /// Release the native stream.
  void free() {
    if (SherpaOnnxBindings.destroyOnlineStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroyOnlineStream?.call(ptr);
    ptr = nullptr;
  }

  /// If you have List<double> data, then you can use
  /// Float32List.fromList(data) to convert data to Float32List
  ///
  /// See
  ///  https://api.flutter.dev/flutter/dart-core/List-class.html
  /// and
  ///  https://api.flutter.dev/flutter/dart-typed_data/Float32List-class.html
  /// Append waveform samples to the stream.
  ///
  /// [samples] must contain mono floating-point PCM data normalized to
  /// `[-1, 1]`. Feed your audio in chunks, then call [inputFinished] after the
  /// last chunk if you want the recognizer to flush trailing context.
  void acceptWaveform({required Float32List samples, required int sampleRate}) {
    if (SherpaOnnxBindings.onlineStreamAcceptWaveform == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }

    final n = samples.length;
    final Pointer<Float> p = calloc<Float>(n);

    final pList = p.asTypedList(n);
    pList.setAll(0, samples);

    SherpaOnnxBindings.onlineStreamAcceptWaveform?.call(ptr, sampleRate, p, n);

    calloc.free(p);
  }

  /// Mark the end of input.
  void inputFinished() {
    if (SherpaOnnxBindings.onlineStreamInputFinished == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.onlineStreamInputFinished?.call(ptr);
  }

  Pointer<SherpaOnnxOnlineStream> ptr;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'package:ffi/ffi.dart';

final class SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineSpeechDenoiserModelConfig extends Struct {
  external SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn;

  @Int32()
  external int numThreads;

  @Int32()
  external int debug;

  external Pointer<Utf8> provider;

  external SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig dpdfnet;
}

final class SherpaOnnxOfflineSpeechDenoiserConfig extends Struct {
  external SherpaOnnxOfflineSpeechDenoiserModelConfig model;
}

final class SherpaOnnxOnlineSpeechDenoiserConfig extends Struct {
  external SherpaOnnxOfflineSpeechDenoiserModelConfig model;
}

final class SherpaOnnxDenoisedAudio extends Struct {
  external Pointer<Float> samples;

  @Int32()
  external int n;

  @Int32()
  external int sampleRate;
}

final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
  external Pointer<Utf8> model;

  @Int32()
  external int numThreads;

  @Int32()
  external int debug;

  external Pointer<Utf8> provider;
}

final class SherpaOnnxOfflineSpeakerDiarizationSegment extends Struct {
  @Float()
  external double start;

  @Float()
  external double end;

  @Int32()
  external int speaker;
}

final class SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
    extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineSpeakerSegmentationModelConfig extends Struct {
  external SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote;

  @Int32()
  external int numThreads;

  @Int32()
  external int debug;

  external Pointer<Utf8> provider;
}

final class SherpaOnnxFastClusteringConfig extends Struct {
  @Int32()
  external int numClusters;

  @Float()
  external double threshold;
}

final class SherpaOnnxOfflineSpeakerDiarizationConfig extends Struct {
  external SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation;
  external SherpaOnnxSpeakerEmbeddingExtractorConfig embedding;
  external SherpaOnnxFastClusteringConfig clustering;

  @Float()
  external double minDurationOn;

  @Float()
  external double minDurationOff;
}

final class SherpaOnnxOfflinePunctuationModelConfig extends Struct {
  external Pointer<Utf8> ctTransformer;

  @Int32()
  external int numThreads;

  @Int32()
  external int debug;

  external Pointer<Utf8> provider;
}

final class SherpaOnnxOfflinePunctuationConfig extends Struct {
  external SherpaOnnxOfflinePunctuationModelConfig model;
}

final class SherpaOnnxOnlinePunctuationModelConfig extends Struct {
  external Pointer<Utf8> cnnBiLstm;
  external Pointer<Utf8> bpeVocab;
  @Int32()
  external int numThreads;
  @Int32()
  external int debug;
  external Pointer<Utf8> provider;
}

final class SherpaOnnxOnlinePunctuationConfig extends Struct {
  external SherpaOnnxOnlinePunctuationModelConfig model;
}

final class SherpaOnnxOfflineZipformerAudioTaggingModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxAudioTaggingModelConfig extends Struct {
  external SherpaOnnxOfflineZipformerAudioTaggingModelConfig zipformer;
  external Pointer<Utf8> ced;

  @Int32()
  external int numThreads;

  @Int32()
  external int debug;

  external Pointer<Utf8> provider;
}

final class SherpaOnnxAudioTaggingConfig extends Struct {
  external SherpaOnnxAudioTaggingModelConfig model;
  external Pointer<Utf8> labels;

  @Int32()
  external int topK;
}

final class SherpaOnnxAudioEvent extends Struct {
  external Pointer<Utf8> name;

  @Int32()
  external int index;

  @Float()
  external double prob;
}

final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct {
  external Pointer<Utf8> model;
  external Pointer<Utf8> lexicon;
  external Pointer<Utf8> tokens;
  external Pointer<Utf8> dataDir;

  @Float()
  external double noiseScale;

  @Float()
  external double noiseScaleW;

  @Float()
  external double lengthScale;

  external Pointer<Utf8> dictDir;
}

final class SherpaOnnxOfflineTtsMatchaModelConfig extends Struct {
  external Pointer<Utf8> acousticModel;
  external Pointer<Utf8> vocoder;
  external Pointer<Utf8> lexicon;
  external Pointer<Utf8> tokens;
  external Pointer<Utf8> dataDir;

  @Float()
  external double noiseScale;

  @Float()
  external double lengthScale;

  external Pointer<Utf8> dictDir;
}

final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {
  external Pointer<Utf8> model;
  external Pointer<Utf8> voices;
  external Pointer<Utf8> tokens;
  external Pointer<Utf8> dataDir;

  @Float()
  external double lengthScale;
  external Pointer<Utf8> dictDir;
  external Pointer<Utf8> lexicon;
  external Pointer<Utf8> lang;
}

final class SherpaOnnxOfflineTtsKittenModelConfig extends Struct {
  external Pointer<Utf8> model;
  external Pointer<Utf8> voices;
  external Pointer<Utf8> tokens;
  external Pointer<Utf8> dataDir;

  @Float()
  external double lengthScale;
}

final class SherpaOnnxOfflineTtsZipVoiceModelConfig extends Struct {
  external Pointer<Utf8> tokens;
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;
  external Pointer<Utf8> vocoder;
  external Pointer<Utf8> dataDir;
  external Pointer<Utf8> lexicon;

  @Float()
  external double featScale;

  @Float()
  external double tShift;

  @Float()
  external double targetRms;

  @Float()
  external double guidanceScale;
}

final class SherpaOnnxOfflineTtsPocketModelConfig extends Struct {
  external Pointer<Utf8> lmFlow;
  external Pointer<Utf8> lmMain;
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;
  external Pointer<Utf8> textConditioner;
  external Pointer<Utf8> vocabJson;
  external Pointer<Utf8> tokenScoresJson;

  @Int32()
  external int voiceEmbeddingCacheCapacity;
}

final class SherpaOnnxOfflineTtsSupertonicModelConfig extends Struct {
  external Pointer<Utf8> durationPredictor;
  external Pointer<Utf8> textEncoder;
  external Pointer<Utf8> vectorEstimator;
  external Pointer<Utf8> vocoder;
  external Pointer<Utf8> ttsJson;
  external Pointer<Utf8> unicodeIndexer;
  external Pointer<Utf8> voiceStyle;
}

final class SherpaOnnxOfflineTtsModelConfig extends Struct {
  external SherpaOnnxOfflineTtsVitsModelConfig vits;
  @Int32()
  external int numThreads;

  @Int32()
  external int debug;

  external Pointer<Utf8> provider;
  external SherpaOnnxOfflineTtsMatchaModelConfig matcha;
  external SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
  external SherpaOnnxOfflineTtsKittenModelConfig kitten;
  external SherpaOnnxOfflineTtsZipVoiceModelConfig zipvoice;
  external SherpaOnnxOfflineTtsPocketModelConfig pocket;
  external SherpaOnnxOfflineTtsSupertonicModelConfig supertonic;
}

final class SherpaOnnxOfflineTtsConfig extends Struct {
  external SherpaOnnxOfflineTtsModelConfig model;
  external Pointer<Utf8> ruleFsts;

  @Int32()
  external int maxNumSenetences;

  external Pointer<Utf8> ruleFars;

  @Float()
  external double silenceScale;
}

final class SherpaOnnxGenerationConfig extends Struct {
  @Float()
  external double silenceScale;

  @Float()
  external double speed;

  @Int32()
  external int sid;

  external Pointer<Float> referenceAudio;

  @Int32()
  external int referenceAudioLength;

  @Int32()
  external int referenceSampleRate;

  external Pointer<Utf8> referenceText;

  @Int32()
  external int numSteps;

  external Pointer<Utf8> extra;
}

final class SherpaOnnxGeneratedAudio extends Struct {
  external Pointer<Float> samples;

  @Int32()
  external int n;

  @Int32()
  external int sampleRate;
}

final class SherpaOnnxFeatureConfig extends Struct {
  @Int32()
  external int sampleRate;

  @Int32()
  external int featureDim;
}

final class SherpaOnnxOfflineTransducerModelConfig extends Struct {
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;
  external Pointer<Utf8> joiner;
}

final class SherpaOnnxOfflineParaformerModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineNemoEncDecCtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineDolphinModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineZipformerCtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineWenetCtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineOmnilingualAsrCtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineMedAsrCtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineFireRedAsrCtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineFunAsrNanoModelConfig extends Struct {
  external Pointer<Utf8> encoderAdaptor;
  external Pointer<Utf8> llm;
  external Pointer<Utf8> embedding;
  external Pointer<Utf8> tokenizer;
  external Pointer<Utf8> systemPrompt;
  external Pointer<Utf8> userPrompt;

  @Int32()
  external int maxNewTokens;

  @Float()
  external double temperature;

  @Float()
  external double topP;

  @Int32()
  external int seed;

  external Pointer<Utf8> language;

  @Int32()
  external int itn;

  external Pointer<Utf8> hotwords;
}

final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;
  external Pointer<Utf8> language;
  external Pointer<Utf8> task;

  @Int32()
  external int tailPaddings;

  @Int32()
  external int enableTokenTimestamps;

  @Int32()
  external int enableSegmentTimestamps;
}

final class SherpaOnnxOfflineCanaryModelConfig extends Struct {
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;
  external Pointer<Utf8> srcLang;
  external Pointer<Utf8> tgtLang;

  @Int32()
  external int usePnc;
}

final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
  external Pointer<Utf8> preprocessor;
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> uncachedDecoder;
  external Pointer<Utf8> cachedDecoder;
  external Pointer<Utf8> mergedDecoder;
}

final class SherpaOnnxOfflineFireRedAsrModelConfig extends Struct {
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;
}

final class SherpaOnnxOfflineTdnnModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOfflineSenseVoiceModelConfig extends Struct {
  external Pointer<Utf8> model;
  external Pointer<Utf8> language;

  @Int32()
  external int useInverseTextNormalization;
}

final class SherpaOnnxOfflineLMConfig extends Struct {
  external Pointer<Utf8> model;

  @Float()
  external double scale;
}

final class SherpaOnnxOfflineModelConfig extends Struct {
  external SherpaOnnxOfflineTransducerModelConfig transducer;
  external SherpaOnnxOfflineParaformerModelConfig paraformer;
  external SherpaOnnxOfflineNemoEncDecCtcModelConfig nemoCtc;
  external SherpaOnnxOfflineWhisperModelConfig whisper;
  external SherpaOnnxOfflineTdnnModelConfig tdnn;

  external Pointer<Utf8> tokens;

  @Int32()
  external int numThreads;

  @Int32()
  external int debug;

  external Pointer<Utf8> provider;

  external Pointer<Utf8> modelType;
  external Pointer<Utf8> modelingUnit;
  external Pointer<Utf8> bpeVocab;
  external Pointer<Utf8> telespeechCtc;

  external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice;
  external SherpaOnnxOfflineMoonshineModelConfig moonshine;
  external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
  external SherpaOnnxOfflineDolphinModelConfig dolphin;
  external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
  external SherpaOnnxOfflineCanaryModelConfig canary;
  external SherpaOnnxOfflineWenetCtcModelConfig wenetCtc;
  external SherpaOnnxOfflineOmnilingualAsrCtcModelConfig omnilingual;
  external SherpaOnnxOfflineMedAsrCtcModelConfig medasr;
  external SherpaOnnxOfflineFunAsrNanoModelConfig funasrNano;
  external SherpaOnnxOfflineFireRedAsrCtcModelConfig fireRedAsrCtc;
}

final class SherpaOnnxOfflineRecognizerConfig extends Struct {
  external SherpaOnnxFeatureConfig feat;
  external SherpaOnnxOfflineModelConfig model;
  external SherpaOnnxOfflineLMConfig lm;
  external Pointer<Utf8> decodingMethod;

  @Int32()
  external int maxActivePaths;

  external Pointer<Utf8> hotwordsFile;

  @Float()
  external double hotwordsScore;

  external Pointer<Utf8> ruleFsts;
  external Pointer<Utf8> ruleFars;

  @Float()
  external double blankPenalty;
  external SherpaOnnxHomophoneReplacerConfig hr;
}

final class SherpaOnnxOnlineTransducerModelConfig extends Struct {
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;
  external Pointer<Utf8> joiner;
}

final class SherpaOnnxOnlineParaformerModelConfig extends Struct {
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;
}

final class SherpaOnnxOnlineZipformer2CtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOnlineNemoCtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOnlineToneCtcModelConfig extends Struct {
  external Pointer<Utf8> model;
}

final class SherpaOnnxOnlineModelConfig extends Struct {
  external SherpaOnnxOnlineTransducerModelConfig transducer;
  external SherpaOnnxOnlineParaformerModelConfig paraformer;
  external SherpaOnnxOnlineZipformer2CtcModelConfig zipformer2Ctc;

  external Pointer<Utf8> tokens;

  @Int32()
  external int numThreads;

  external Pointer<Utf8> provider;

  @Int32()
  external int debug;

  external Pointer<Utf8> modelType;

  external Pointer<Utf8> modelingUnit;

  external Pointer<Utf8> bpeVocab;

  external Pointer<Utf8> tokensBuf;

  @Int32()
  external int tokensBufSize;

  external SherpaOnnxOnlineNemoCtcModelConfig nemoCtc;

  external SherpaOnnxOnlineToneCtcModelConfig toneCtc;
}

final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct {
  external Pointer<Utf8> graph;

  @Int32()
  external int maxActive;
}

final class SherpaOnnxHomophoneReplacerConfig extends Struct {
  external Pointer<Utf8> dictDir;
  external Pointer<Utf8> lexicon;
  external Pointer<Utf8> ruleFsts;
}

final class SherpaOnnxOnlineRecognizerConfig extends Struct {
  external SherpaOnnxFeatureConfig feat;
  external SherpaOnnxOnlineModelConfig model;
  external Pointer<Utf8> decodingMethod;

  @Int32()
  external int maxActivePaths;

  @Int32()
  external int enableEndpoint;

  @Float()
  external double rule1MinTrailingSilence;

  @Float()
  external double rule2MinTrailingSilence;

  @Float()
  external double rule3MinUtteranceLength;

  external Pointer<Utf8> hotwordsFile;

  @Float()
  external double hotwordsScore;

  external SherpaOnnxOnlineCtcFstDecoderConfig ctcFstDecoderConfig;

  external Pointer<Utf8> ruleFsts;
  external Pointer<Utf8> ruleFars;

  @Float()
  external double blankPenalty;

  external Pointer<Utf8> hotwordsBuf;

  @Int32()
  external int hotwordsBufSize;
  external SherpaOnnxHomophoneReplacerConfig hr;
}

final class SherpaOnnxSileroVadModelConfig extends Struct {
  external Pointer<Utf8> model;

  @Float()
  external double threshold;

  @Float()
  external double minSilenceDuration;

  @Float()
  external double minSpeechDuration;

  @Int32()
  external int windowSize;

  @Float()
  external double maxSpeechDuration;
}

final class SherpaOnnxTenVadModelConfig extends Struct {
  external Pointer<Utf8> model;

  @Float()
  external double threshold;

  @Float()
  external double minSilenceDuration;

  @Float()
  external double minSpeechDuration;

  @Int32()
  external int windowSize;

  @Float()
  external double maxSpeechDuration;
}

final class SherpaOnnxVadModelConfig extends Struct {
  external SherpaOnnxSileroVadModelConfig sileroVad;

  @Int32()
  external int sampleRate;

  @Int32()
  external int numThreads;

  external Pointer<Utf8> provider;

  @Int32()
  external int debug;

  external SherpaOnnxTenVadModelConfig tenVad;
}

final class SherpaOnnxSpeechSegment extends Struct {
  @Int32()
  external int start;

  external Pointer<Float> samples;

  @Int32()
  external int n;
}

final class SherpaOnnxWave extends Struct {
  external Pointer<Float> samples;

  @Int32()
  external int sampleRate;

  @Int32()
  external int numSamples;
}

final class SherpaOnnxKeywordSpotterConfig extends Struct {
  external SherpaOnnxFeatureConfig feat;

  external SherpaOnnxOnlineModelConfig model;

  @Int32()
  external int maxActivePaths;

  @Int32()
  external int numTrailingBlanks;

  @Float()
  external double keywordsScore;

  @Float()
  external double keywordsThreshold;

  external Pointer<Utf8> keywordsFile;

  external Pointer<Utf8> keywordsBuf;

  @Int32()
  external int keywordsBufSize;
}

final class SherpaOnnxOfflinePunctuation extends Opaque {}

final class SherpaOnnxOnlinePunctuation extends Opaque {}

final class SherpaOnnxAudioTagging extends Opaque {}

final class SherpaOnnxKeywordSpotter extends Opaque {}

final class SherpaOnnxOfflineTts extends Opaque {}

final class SherpaOnnxCircularBuffer extends Opaque {}

final class SherpaOnnxVoiceActivityDetector extends Opaque {}

final class SherpaOnnxOnlineStream extends Opaque {}

final class SherpaOnnxOnlineRecognizer extends Opaque {}

final class SherpaOnnxOfflineRecognizer extends Opaque {}

final class SherpaOnnxOfflineStream extends Opaque {}

final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}

final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}

final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {}

final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {}

final class SherpaOnnxSpokenLanguageIdentificationWhisperConfig extends Struct {
  external Pointer<Utf8> encoder;
  external Pointer<Utf8> decoder;

  @Int32()
  external int tailPaddings;
}

final class SherpaOnnxSpokenLanguageIdentificationConfig extends Struct {
  external SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper;

  @Int32()
  external int numThreads;

  @Int32()
  external int debug;

  external Pointer<Utf8> provider;
}

final class SherpaOnnxSpokenLanguageIdentificationResult extends Struct {
  external Pointer<Utf8> lang;
}

final class SherpaOnnxSpokenLanguageIdentification extends Opaque {}

final class SherpaOnnxOfflineSpeechDenoiser extends Opaque {}

final class SherpaOnnxOnlineSpeechDenoiser extends Opaque {}

typedef SherpaOnnxCreateOfflineSpeechDenoiserNative =
    Pointer<SherpaOnnxOfflineSpeechDenoiser> Function(
      Pointer<SherpaOnnxOfflineSpeechDenoiserConfig>,
    );

typedef SherpaOnnxCreateOfflineSpeechDenoiser =
    SherpaOnnxCreateOfflineSpeechDenoiserNative;

typedef SherpaOnnxDestroyOfflineSpeechDenoiserNative =
    Void Function(Pointer<SherpaOnnxOfflineSpeechDenoiser>);

typedef SherpaOnnxDestroyOfflineSpeechDenoiser =
    void Function(Pointer<SherpaOnnxOfflineSpeechDenoiser>);

typedef SherpaOnnxOfflineSpeechDenoiserGetSampleRateNative =
    Int32 Function(Pointer<SherpaOnnxOfflineSpeechDenoiser>);

typedef SherpaOnnxOfflineSpeechDenoiserGetSampleRate =
    int Function(Pointer<SherpaOnnxOfflineSpeechDenoiser>);

typedef SherpaOnnxOfflineSpeechDenoiserRunNative =
    Pointer<SherpaOnnxDenoisedAudio> Function(
      Pointer<SherpaOnnxOfflineSpeechDenoiser>,
      Pointer<Float>,
      Int32,
      Int32,
    );

typedef SherpaOnnxOfflineSpeechDenoiserRun =
    Pointer<SherpaOnnxDenoisedAudio> Function(
      Pointer<SherpaOnnxOfflineSpeechDenoiser>,
      Pointer<Float>,
      int,
      int,
    );

typedef SherpaOnnxDestroyDenoisedAudioNative =
    Void Function(Pointer<SherpaOnnxDenoisedAudio>);

typedef SherpaOnnxDestroyDenoisedAudio =
    void Function(Pointer<SherpaOnnxDenoisedAudio>);

typedef SherpaOnnxCreateOnlineSpeechDenoiserNative =
    Pointer<SherpaOnnxOnlineSpeechDenoiser> Function(
      Pointer<SherpaOnnxOnlineSpeechDenoiserConfig>,
    );

typedef SherpaOnnxCreateOnlineSpeechDenoiser =
    SherpaOnnxCreateOnlineSpeechDenoiserNative;

typedef SherpaOnnxDestroyOnlineSpeechDenoiserNative =
    Void Function(Pointer<SherpaOnnxOnlineSpeechDenoiser>);

typedef SherpaOnnxDestroyOnlineSpeechDenoiser =
    void Function(Pointer<SherpaOnnxOnlineSpeechDenoiser>);

typedef SherpaOnnxOnlineSpeechDenoiserGetSampleRateNative =
    Int32 Function(Pointer<SherpaOnnxOnlineSpeechDenoiser>);

typedef SherpaOnnxOnlineSpeechDenoiserGetSampleRate =
    int Function(Pointer<SherpaOnnxOnlineSpeechDenoiser>);

typedef SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamplesNative =
    Int32 Function(Pointer<SherpaOnnxOnlineSpeechDenoiser>);

typedef SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples =
    int Function(Pointer<SherpaOnnxOnlineSpeechDenoiser>);

typedef SherpaOnnxOnlineSpeechDenoiserRunNative =
    Pointer<SherpaOnnxDenoisedAudio> Function(
      Pointer<SherpaOnnxOnlineSpeechDenoiser>,
      Pointer<Float>,
      Int32,
      Int32,
    );

typedef SherpaOnnxOnlineSpeechDenoiserRun =
    Pointer<SherpaOnnxDenoisedAudio> Function(
      Pointer<SherpaOnnxOnlineSpeechDenoiser>,
      Pointer<Float>,
      int,
      int,
    );

typedef SherpaOnnxOnlineSpeechDenoiserFlushNative =
    Pointer<SherpaOnnxDenoisedAudio> Function(
      Pointer<SherpaOnnxOnlineSpeechDenoiser>,
    );

typedef SherpaOnnxOnlineSpeechDenoiserFlush =
    Pointer<SherpaOnnxDenoisedAudio> Function(
      Pointer<SherpaOnnxOnlineSpeechDenoiser>,
    );

typedef SherpaOnnxOnlineSpeechDenoiserResetNative =
    Void Function(Pointer<SherpaOnnxOnlineSpeechDenoiser>);

typedef SherpaOnnxOnlineSpeechDenoiserReset =
    void Function(Pointer<SherpaOnnxOnlineSpeechDenoiser>);

typedef SherpaOnnxCreateSpokenLanguageIdentificationNative =
    Pointer<SherpaOnnxSpokenLanguageIdentification> Function(
      Pointer<SherpaOnnxSpokenLanguageIdentificationConfig>,
    );

typedef SherpaOnnxCreateSpokenLanguageIdentification =
    SherpaOnnxCreateSpokenLanguageIdentificationNative;

typedef SherpaOnnxDestroySpokenLanguageIdentificationNative =
    Void Function(Pointer<SherpaOnnxSpokenLanguageIdentification>);

typedef SherpaOnnxDestroySpokenLanguageIdentification =
    void Function(Pointer<SherpaOnnxSpokenLanguageIdentification>);

typedef SherpaOnnxSpokenLanguageIdentificationCreateOfflineStreamNative =
    Pointer<SherpaOnnxOfflineStream> Function(
      Pointer<SherpaOnnxSpokenLanguageIdentification>,
    );

typedef SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream =
    SherpaOnnxSpokenLanguageIdentificationCreateOfflineStreamNative;

typedef SherpaOnnxSpokenLanguageIdentificationComputeNative =
    Pointer<SherpaOnnxSpokenLanguageIdentificationResult> Function(
      Pointer<SherpaOnnxSpokenLanguageIdentification>,
      Pointer<SherpaOnnxOfflineStream>,
    );

typedef SherpaOnnxSpokenLanguageIdentificationCompute =
    SherpaOnnxSpokenLanguageIdentificationComputeNative;

typedef SherpaOnnxDestroySpokenLanguageIdentificationResultNative =
    Void Function(Pointer<SherpaOnnxSpokenLanguageIdentificationResult>);

typedef SherpaOnnxDestroySpokenLanguageIdentificationResult =
    void Function(Pointer<SherpaOnnxSpokenLanguageIdentificationResult>);

typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative =
    Pointer<SherpaOnnxOfflineSpeakerDiarization> Function(
      Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>,
    );

typedef SherpaOnnxCreateOfflineSpeakerDiarization =
    SherpaOnnxCreateOfflineSpeakerDiarizationNative;

typedef SherpaOnnxDestroyOfflineSpeakerDiarizationNative =
    Void Function(Pointer<SherpaOnnxOfflineSpeakerDiarization>);

typedef SherpaOnnxDestroyOfflineSpeakerDiarization =
    void Function(Pointer<SherpaOnnxOfflineSpeakerDiarization>);

typedef SherpaOnnxCreateOfflinePunctuationNative =
    Pointer<SherpaOnnxOfflinePunctuation> Function(
      Pointer<SherpaOnnxOfflinePunctuationConfig>,
    );

typedef SherpaOnnxCreateOnlinePunctuationNative =
    Pointer<SherpaOnnxOnlinePunctuation> Function(
      Pointer<SherpaOnnxOnlinePunctuationConfig>,
    );

typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative =
    Int32 Function(Pointer<SherpaOnnxOfflineSpeakerDiarization>);

typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRate =
    int Function(Pointer<SherpaOnnxOfflineSpeakerDiarization>);

typedef SherpaOnnxOfflineSpeakerDiarizationSetConfigNative =
    Void Function(
      Pointer<SherpaOnnxOfflineSpeakerDiarization>,
      Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>,
    );

typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative =
    Int32 Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);

typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers =
    int Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);

typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative =
    Int32 Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);

typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments =
    int Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);

typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative =
    Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment> Function(
      Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>,
    );

typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime =
    SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative;

typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative =
    Void Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>);

typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegment =
    void Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>);

typedef SherpaOnnxOfflineSpeakerDiarizationProcessNative =
    Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
      Pointer<SherpaOnnxOfflineSpeakerDiarization>,
      Pointer<Float>,
      Int32,
    );

typedef SherpaOnnxOfflineSpeakerDiarizationProcess =
    Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
      Pointer<SherpaOnnxOfflineSpeakerDiarization>,
      Pointer<Float>,
      int,
    );

typedef SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative =
    Int32 Function(Int32, Int32);

typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative =
    Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
      Pointer<SherpaOnnxOfflineSpeakerDiarization>,
      Pointer<Float>,
      Int32,
      Pointer<
        NativeFunction<
          SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative
        >
      >,
    );

typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg =
    Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
      Pointer<SherpaOnnxOfflineSpeakerDiarization>,
      Pointer<Float>,
      int,
      Pointer<
        NativeFunction<
          SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative
        >
      >,
    );

typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative =
    Void Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);

typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResult =
    void Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);

typedef SherpaOnnxOfflineSpeakerDiarizationSetConfig =
    void Function(
      Pointer<SherpaOnnxOfflineSpeakerDiarization>,
      Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>,
    );

typedef SherpaOnnxCreateOfflinePunctuation =
    SherpaOnnxCreateOfflinePunctuationNative;

typedef SherpaOnnxDestroyOfflinePunctuationNative =
    Void Function(Pointer<SherpaOnnxOfflinePunctuation>);

typedef SherpaOnnxDestroyOfflinePunctuation =
    void Function(Pointer<SherpaOnnxOfflinePunctuation>);

typedef SherpaOfflinePunctuationAddPunctNative =
    Pointer<Utf8> Function(
      Pointer<SherpaOnnxOfflinePunctuation>,
      Pointer<Utf8>,
    );

typedef SherpaOfflinePunctuationAddPunct =
    SherpaOfflinePunctuationAddPunctNative;

typedef SherpaOfflinePunctuationFreeTextNative = Void Function(Pointer<Utf8>);

typedef SherpaOfflinePunctuationFreeText = void Function(Pointer<Utf8>);

typedef SherpaOnnxCreateOnlinePunctuation =
    SherpaOnnxCreateOnlinePunctuationNative;

typedef SherpaOnnxDestroyOnlinePunctuationNative =
    Void Function(Pointer<SherpaOnnxOnlinePunctuation>);

typedef SherpaOnnxDestroyOnlinePunctuation =
    void Function(Pointer<SherpaOnnxOnlinePunctuation>);

typedef SherpaOnnxOnlinePunctuationAddPunctNative =
    Pointer<Utf8> Function(Pointer<SherpaOnnxOnlinePunctuation>, Pointer<Utf8>);

typedef SherpaOnnxOnlinePunctuationAddPunct =
    SherpaOnnxOnlinePunctuationAddPunctNative;

typedef SherpaOnnxOnlinePunctuationFreeTextNative =
    Void Function(Pointer<Utf8>);

typedef SherpaOnnxOnlinePunctuationFreeText = void Function(Pointer<Utf8>);

typedef SherpaOnnxCreateAudioTaggingNative =
    Pointer<SherpaOnnxAudioTagging> Function(
      Pointer<SherpaOnnxAudioTaggingConfig>,
    );

typedef SherpaOnnxCreateAudioTagging = SherpaOnnxCreateAudioTaggingNative;

typedef SherpaOnnxDestroyAudioTaggingNative =
    Void Function(Pointer<SherpaOnnxAudioTagging>);

typedef SherpaOnnxDestroyAudioTagging =
    void Function(Pointer<SherpaOnnxAudioTagging>);

typedef SherpaOnnxAudioTaggingCreateOfflineStreamNative =
    Pointer<SherpaOnnxOfflineStream> Function(Pointer<SherpaOnnxAudioTagging>);

typedef SherpaOnnxAudioTaggingCreateOfflineStream =
    SherpaOnnxAudioTaggingCreateOfflineStreamNative;

typedef SherpaOnnxAudioTaggingComputeNative =
    Pointer<Pointer<SherpaOnnxAudioEvent>> Function(
      Pointer<SherpaOnnxAudioTagging>,
      Pointer<SherpaOnnxOfflineStream>,
      Int32,
    );

typedef SherpaOnnxAudioTaggingCompute =
    Pointer<Pointer<SherpaOnnxAudioEvent>> Function(
      Pointer<SherpaOnnxAudioTagging>,
      Pointer<SherpaOnnxOfflineStream>,
      int,
    );

typedef SherpaOnnxAudioTaggingFreeResultsNative =
    Void Function(Pointer<Pointer<SherpaOnnxAudioEvent>>);

typedef SherpaOnnxAudioTaggingFreeResults =
    void Function(Pointer<Pointer<SherpaOnnxAudioEvent>>);

typedef CreateKeywordSpotterNative =
    Pointer<SherpaOnnxKeywordSpotter> Function(
      Pointer<SherpaOnnxKeywordSpotterConfig>,
    );

typedef CreateKeywordSpotter = CreateKeywordSpotterNative;

typedef DestroyKeywordSpotterNative =
    Void Function(Pointer<SherpaOnnxKeywordSpotter>);

typedef DestroyKeywordSpotter =
    void Function(Pointer<SherpaOnnxKeywordSpotter>);

typedef CreateKeywordStreamNative =
    Pointer<SherpaOnnxOnlineStream> Function(Pointer<SherpaOnnxKeywordSpotter>);

typedef CreateKeywordStream = CreateKeywordStreamNative;

typedef CreateKeywordStreamWithKeywordsNative =
    Pointer<SherpaOnnxOnlineStream> Function(
      Pointer<SherpaOnnxKeywordSpotter>,
      Pointer<Utf8>,
    );

typedef CreateKeywordStreamWithKeywords = CreateKeywordStreamWithKeywordsNative;

typedef IsKeywordStreamReadyNative =
    Int32 Function(
      Pointer<SherpaOnnxKeywordSpotter>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef IsKeywordStreamReady =
    int Function(
      Pointer<SherpaOnnxKeywordSpotter>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef DecodeKeywordStreamNative =
    Void Function(
      Pointer<SherpaOnnxKeywordSpotter>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef DecodeKeywordStream =
    void Function(
      Pointer<SherpaOnnxKeywordSpotter>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef ResetKeywordStreamNative =
    Void Function(
      Pointer<SherpaOnnxKeywordSpotter>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef ResetKeywordStream =
    void Function(
      Pointer<SherpaOnnxKeywordSpotter>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef GetKeywordResultAsJsonNative =
    Pointer<Utf8> Function(
      Pointer<SherpaOnnxKeywordSpotter>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef GetKeywordResultAsJson = GetKeywordResultAsJsonNative;

typedef FreeKeywordResultJsonNative = Void Function(Pointer<Utf8>);

typedef FreeKeywordResultJson = void Function(Pointer<Utf8>);

typedef SherpaOnnxCreateOfflineTtsNative =
    Pointer<SherpaOnnxOfflineTts> Function(Pointer<SherpaOnnxOfflineTtsConfig>);

typedef SherpaOnnxCreateOfflineTts = SherpaOnnxCreateOfflineTtsNative;

typedef SherpaOnnxDestroyOfflineTtsNative =
    Void Function(Pointer<SherpaOnnxOfflineTts>);

typedef SherpaOnnxDestroyOfflineTts =
    void Function(Pointer<SherpaOnnxOfflineTts>);

typedef SherpaOnnxOfflineTtsSampleRateNative =
    Int32 Function(Pointer<SherpaOnnxOfflineTts>);

typedef SherpaOnnxOfflineTtsSampleRate =
    int Function(Pointer<SherpaOnnxOfflineTts>);

typedef SherpaOnnxOfflineTtsNumSpeakersNative =
    Int32 Function(Pointer<SherpaOnnxOfflineTts>);

typedef SherpaOnnxOfflineTtsNumSpeakers =
    int Function(Pointer<SherpaOnnxOfflineTts>);

typedef SherpaOnnxOfflineTtsGenerateNative =
    Pointer<SherpaOnnxGeneratedAudio> Function(
      Pointer<SherpaOnnxOfflineTts>,
      Pointer<Utf8>,
      Int32,
      Float,
    );

typedef SherpaOnnxOfflineTtsGenerate =
    Pointer<SherpaOnnxGeneratedAudio> Function(
      Pointer<SherpaOnnxOfflineTts>,
      Pointer<Utf8>,
      int,
      double,
    );

typedef SherpaOnnxDestroyOfflineTtsGeneratedAudioNative =
    Void Function(Pointer<SherpaOnnxGeneratedAudio>);

typedef SherpaOnnxDestroyOfflineTtsGeneratedAudio =
    void Function(Pointer<SherpaOnnxGeneratedAudio>);

typedef SherpaOnnxGeneratedAudioCallbackNative =
    Int32 Function(Pointer<Float>, Int32);

typedef SherpaOnnxGeneratedAudioProgressCallbackWithArgNative =
    Int32 Function(Pointer<Float> samples, Int32 n, Float p, Pointer<Void> arg);

typedef SherpaOnnxGeneratedAudioProgressCallbackWithArg =
    int Function(Pointer<Float> samples, int n, double p, Pointer<Void> arg);

typedef SherpaOnnxOfflineTtsGenerateWithCallbackNative =
    Pointer<SherpaOnnxGeneratedAudio> Function(
      Pointer<SherpaOnnxOfflineTts>,
      Pointer<Utf8>,
      Int32,
      Float,
      Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>,
    );

typedef SherpaOnnxOfflineTtsGenerateWithCallback =
    Pointer<SherpaOnnxGeneratedAudio> Function(
      Pointer<SherpaOnnxOfflineTts>,
      Pointer<Utf8>,
      int,
      double,
      Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>,
    );

typedef SherpaOnnxOfflineTtsGenerateWithConfigNative =
    Pointer<SherpaOnnxGeneratedAudio> Function(
      Pointer<SherpaOnnxOfflineTts>,
      Pointer<Utf8>,
      Pointer<SherpaOnnxGenerationConfig>,
      Pointer<
        NativeFunction<SherpaOnnxGeneratedAudioProgressCallbackWithArgNative>
      >,
      Pointer<Void>,
    );

typedef SherpaOnnxOfflineTtsGenerateWithConfig =
    Pointer<SherpaOnnxGeneratedAudio> Function(
      Pointer<SherpaOnnxOfflineTts>,
      Pointer<Utf8>,
      Pointer<SherpaOnnxGenerationConfig>,
      Pointer<
        NativeFunction<SherpaOnnxGeneratedAudioProgressCallbackWithArgNative>
      >,
      Pointer<Void>,
    );

typedef CreateOfflineRecognizerNative =
    Pointer<SherpaOnnxOfflineRecognizer> Function(
      Pointer<SherpaOnnxOfflineRecognizerConfig>,
    );

typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative;

typedef OfflineRecognizerSetConfigNative =
    Void Function(
      Pointer<SherpaOnnxOfflineRecognizer>,
      Pointer<SherpaOnnxOfflineRecognizerConfig>,
    );

typedef OfflineRecognizerSetConfig =
    void Function(
      Pointer<SherpaOnnxOfflineRecognizer>,
      Pointer<SherpaOnnxOfflineRecognizerConfig>,
    );

typedef DestroyOfflineRecognizerNative =
    Void Function(Pointer<SherpaOnnxOfflineRecognizer>);

typedef DestroyOfflineRecognizer =
    void Function(Pointer<SherpaOnnxOfflineRecognizer>);

typedef CreateOfflineStreamNative =
    Pointer<SherpaOnnxOfflineStream> Function(
      Pointer<SherpaOnnxOfflineRecognizer>,
    );

typedef CreateOfflineStream = CreateOfflineStreamNative;

typedef DestroyOfflineStreamNative =
    Void Function(Pointer<SherpaOnnxOfflineStream>);

typedef DestroyOfflineStream = void Function(Pointer<SherpaOnnxOfflineStream>);

typedef AcceptWaveformOfflineNative =
    Void Function(
      Pointer<SherpaOnnxOfflineStream>,
      Int32,
      Pointer<Float>,
      Int32,
    );

typedef AcceptWaveformOffline =
    void Function(Pointer<SherpaOnnxOfflineStream>, int, Pointer<Float>, int);

typedef DecodeOfflineStreamNative =
    Void Function(
      Pointer<SherpaOnnxOfflineRecognizer>,
      Pointer<SherpaOnnxOfflineStream>,
    );

typedef DecodeOfflineStream =
    void Function(
      Pointer<SherpaOnnxOfflineRecognizer>,
      Pointer<SherpaOnnxOfflineStream>,
    );

typedef GetOfflineStreamResultAsJsonNative =
    Pointer<Utf8> Function(Pointer<SherpaOnnxOfflineStream>);

typedef GetOfflineStreamResultAsJson = GetOfflineStreamResultAsJsonNative;

typedef DestroyOfflineStreamResultJsonNative = Void Function(Pointer<Utf8>);

typedef DestroyOfflineStreamResultJson = void Function(Pointer<Utf8>);

typedef SherpaOnnxCreateOnlineRecognizerNative =
    Pointer<SherpaOnnxOnlineRecognizer> Function(
      Pointer<SherpaOnnxOnlineRecognizerConfig>,
    );

typedef SherpaOnnxCreateOnlineRecognizer =
    SherpaOnnxCreateOnlineRecognizerNative;

typedef SherpaOnnxDestroyOnlineRecognizerNative =
    Void Function(Pointer<SherpaOnnxOnlineRecognizer>);

typedef SherpaOnnxDestroyOnlineRecognizer =
    void Function(Pointer<SherpaOnnxOnlineRecognizer>);

typedef SherpaOnnxCreateOnlineStreamNative =
    Pointer<SherpaOnnxOnlineStream> Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
    );

typedef SherpaOnnxCreateOnlineStream = SherpaOnnxCreateOnlineStreamNative;

typedef SherpaOnnxCreateOnlineStreamWithHotwordsNative =
    Pointer<SherpaOnnxOnlineStream> Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<Utf8>,
    );

typedef SherpaOnnxCreateOnlineStreamWithHotwords =
    SherpaOnnxCreateOnlineStreamWithHotwordsNative;

typedef IsOnlineStreamReadyNative =
    Int32 Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef IsOnlineStreamReady =
    int Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef SherpaOnnxDecodeOnlineStreamNative =
    Void Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef SherpaOnnxDecodeOnlineStream =
    void Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef GetOnlineStreamResultAsJsonNative =
    Pointer<Utf8> Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef GetOnlineStreamResultAsJson = GetOnlineStreamResultAsJsonNative;

typedef ResetNative =
    Void Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef Reset =
    void Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef IsEndpointNative =
    Int32 Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef IsEndpoint =
    int Function(
      Pointer<SherpaOnnxOnlineRecognizer>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef DestroyOnlineStreamResultJsonNative = Void Function(Pointer<Utf8>);

typedef DestroyOnlineStreamResultJson = void Function(Pointer<Utf8>);

typedef SherpaOnnxCreateVoiceActivityDetectorNative =
    Pointer<SherpaOnnxVoiceActivityDetector> Function(
      Pointer<SherpaOnnxVadModelConfig>,
      Float,
    );

typedef SherpaOnnxCreateVoiceActivityDetector =
    Pointer<SherpaOnnxVoiceActivityDetector> Function(
      Pointer<SherpaOnnxVadModelConfig>,
      double,
    );

typedef SherpaOnnxDestroyVoiceActivityDetectorNative =
    Void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxDestroyVoiceActivityDetector =
    void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorAcceptWaveformNative =
    Void Function(
      Pointer<SherpaOnnxVoiceActivityDetector>,
      Pointer<Float>,
      Int32,
    );

typedef SherpaOnnxVoiceActivityDetectorAcceptWaveform =
    void Function(
      Pointer<SherpaOnnxVoiceActivityDetector>,
      Pointer<Float>,
      int,
    );

typedef SherpaOnnxVoiceActivityDetectorEmptyNative =
    Int32 Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorEmpty =
    int Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorDetectedNative =
    Int32 Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorDetected =
    int Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorPopNative =
    Void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorPop =
    void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorClearNative =
    Void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorClear =
    void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorResetNative =
    Void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorReset =
    void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorFlushNative =
    Void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorFlush =
    void Function(Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorFrontNative =
    Pointer<SherpaOnnxSpeechSegment> Function(
      Pointer<SherpaOnnxVoiceActivityDetector>,
    );

typedef SherpaOnnxVoiceActivityDetectorFront =
    SherpaOnnxVoiceActivityDetectorFrontNative;

typedef SherpaOnnxDestroySpeechSegmentNative =
    Void Function(Pointer<SherpaOnnxSpeechSegment>);

typedef SherpaOnnxDestroySpeechSegment =
    void Function(Pointer<SherpaOnnxSpeechSegment>);

typedef SherpaOnnxCreateCircularBufferNative =
    Pointer<SherpaOnnxCircularBuffer> Function(Int32);

typedef SherpaOnnxCreateCircularBuffer =
    Pointer<SherpaOnnxCircularBuffer> Function(int);

typedef SherpaOnnxDestroyCircularBufferNative =
    Void Function(Pointer<SherpaOnnxCircularBuffer>);

typedef SherpaOnnxDestroyCircularBuffer =
    void Function(Pointer<SherpaOnnxCircularBuffer>);

typedef SherpaOnnxCircularBufferPushNative =
    Void Function(Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, Int32);

typedef SherpaOnnxCircularBufferPush =
    void Function(Pointer<SherpaOnnxCircularBuffer>, Pointer<Float>, int);

typedef SherpaOnnxCircularBufferGetNative =
    Pointer<Float> Function(Pointer<SherpaOnnxCircularBuffer>, Int32, Int32);

typedef SherpaOnnxCircularBufferGet =
    Pointer<Float> Function(Pointer<SherpaOnnxCircularBuffer>, int, int);

typedef SherpaOnnxCircularBufferFreeNative = Void Function(Pointer<Float>);

typedef SherpaOnnxCircularBufferFree = void Function(Pointer<Float>);

typedef SherpaOnnxCircularBufferPopNative =
    Void Function(Pointer<SherpaOnnxCircularBuffer>, Int32);

typedef SherpaOnnxCircularBufferPop =
    void Function(Pointer<SherpaOnnxCircularBuffer>, int);

typedef SherpaOnnxCircularBufferSizeNative =
    Int32 Function(Pointer<SherpaOnnxCircularBuffer>);

typedef SherpaOnnxCircularBufferSize =
    int Function(Pointer<SherpaOnnxCircularBuffer>);

typedef SherpaOnnxCircularBufferHeadNative =
    Int32 Function(Pointer<SherpaOnnxCircularBuffer>);

typedef SherpaOnnxCircularBufferHead =
    int Function(Pointer<SherpaOnnxCircularBuffer>);

typedef SherpaOnnxCircularBufferResetNative =
    Void Function(Pointer<SherpaOnnxCircularBuffer>);

typedef SherpaOnnxCircularBufferReset =
    void Function(Pointer<SherpaOnnxCircularBuffer>);

typedef SherpaOnnxCreateSpeakerEmbeddingManagerNative =
    Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(Int32);

typedef SherpaOnnxCreateSpeakerEmbeddingManager =
    Pointer<SherpaOnnxSpeakerEmbeddingManager> Function(int);

typedef SherpaOnnxDestroySpeakerEmbeddingManagerNative =
    Void Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>);

typedef SherpaOnnxDestroySpeakerEmbeddingManager =
    void Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>);

typedef SherpaOnnxSpeakerEmbeddingManagerAddNative =
    Int32 Function(
      Pointer<SherpaOnnxSpeakerEmbeddingManager>,
      Pointer<Utf8>,
      Pointer<Float>,
    );

typedef SherpaOnnxSpeakerEmbeddingManagerAdd =
    int Function(
      Pointer<SherpaOnnxSpeakerEmbeddingManager>,
      Pointer<Utf8>,
      Pointer<Float>,
    );

typedef SherpaOnnxSpeakerEmbeddingManagerAddListFlattenedNative =
    Int32 Function(
      Pointer<SherpaOnnxSpeakerEmbeddingManager>,
      Pointer<Utf8>,
      Pointer<Float>,
      Int32,
    );

typedef SherpaOnnxSpeakerEmbeddingManagerAddListFlattened =
    int Function(
      Pointer<SherpaOnnxSpeakerEmbeddingManager>,
      Pointer<Utf8>,
      Pointer<Float>,
      int,
    );

typedef SherpaOnnxSpeakerEmbeddingManagerRemoveNative =
    Int32 Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>, Pointer<Utf8>);

typedef SherpaOnnxSpeakerEmbeddingManagerRemove =
    int Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>, Pointer<Utf8>);

typedef SherpaOnnxSpeakerEmbeddingManagerContainsNative =
    Int32 Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>, Pointer<Utf8>);

typedef SherpaOnnxSpeakerEmbeddingManagerContains =
    int Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>, Pointer<Utf8>);

typedef SherpaOnnxSpeakerEmbeddingManagerSearchNative =
    Pointer<Utf8> Function(
      Pointer<SherpaOnnxSpeakerEmbeddingManager>,
      Pointer<Float>,
      Float,
    );

typedef SherpaOnnxSpeakerEmbeddingManagerSearch =
    Pointer<Utf8> Function(
      Pointer<SherpaOnnxSpeakerEmbeddingManager>,
      Pointer<Float>,
      double,
    );

typedef SherpaOnnxSpeakerEmbeddingManagerFreeSearchNative =
    Void Function(Pointer<Utf8>);

typedef SherpaOnnxSpeakerEmbeddingManagerFreeSearch =
    void Function(Pointer<Utf8>);

typedef SherpaOnnxSpeakerEmbeddingManagerNumSpeakersNative =
    Int32 Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>);

typedef SherpaOnnxSpeakerEmbeddingManagerNumSpeakers =
    int Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>);

typedef SherpaOnnxSpeakerEmbeddingManagerVerifyNative =
    Int32 Function(
      Pointer<SherpaOnnxSpeakerEmbeddingManager>,
      Pointer<Utf8>,
      Pointer<Float>,
      Float,
    );

typedef SherpaOnnxSpeakerEmbeddingManagerVerify =
    int Function(
      Pointer<SherpaOnnxSpeakerEmbeddingManager>,
      Pointer<Utf8>,
      Pointer<Float>,
      double,
    );

typedef SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakersNative =
    Pointer<Pointer<Utf8>> Function(Pointer<SherpaOnnxSpeakerEmbeddingManager>);

typedef SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers =
    SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakersNative;

typedef SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakersNative =
    Void Function(Pointer<Pointer<Utf8>>);

typedef SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers =
    void Function(Pointer<Pointer<Utf8>>);

typedef SherpaOnnxCreateSpeakerEmbeddingExtractorNative =
    Pointer<SherpaOnnxSpeakerEmbeddingExtractor> Function(
      Pointer<SherpaOnnxSpeakerEmbeddingExtractorConfig>,
    );

typedef SherpaOnnxCreateSpeakerEmbeddingExtractor =
    SherpaOnnxCreateSpeakerEmbeddingExtractorNative;

typedef SherpaOnnxDestroySpeakerEmbeddingExtractorNative =
    Void Function(Pointer<SherpaOnnxSpeakerEmbeddingExtractor>);

typedef SherpaOnnxDestroySpeakerEmbeddingExtractor =
    void Function(Pointer<SherpaOnnxSpeakerEmbeddingExtractor>);

typedef SherpaOnnxSpeakerEmbeddingExtractorDimNative =
    Int32 Function(Pointer<SherpaOnnxSpeakerEmbeddingExtractor>);

typedef SherpaOnnxSpeakerEmbeddingExtractorDim =
    int Function(Pointer<SherpaOnnxSpeakerEmbeddingExtractor>);

typedef SherpaOnnxSpeakerEmbeddingExtractorCreateStreamNative =
    Pointer<SherpaOnnxOnlineStream> Function(
      Pointer<SherpaOnnxSpeakerEmbeddingExtractor>,
    );

typedef SherpaOnnxSpeakerEmbeddingExtractorCreateStream =
    SherpaOnnxSpeakerEmbeddingExtractorCreateStreamNative;

typedef SherpaOnnxDestroyOnlineStreamNative =
    Void Function(Pointer<SherpaOnnxOnlineStream>);

typedef SherpaOnnxDestroyOnlineStream =
    void Function(Pointer<SherpaOnnxOnlineStream>);

typedef OnlineStreamAcceptWaveformNative =
    Void Function(
      Pointer<SherpaOnnxOnlineStream>,
      Int32,
      Pointer<Float>,
      Int32,
    );

typedef OnlineStreamAcceptWaveform =
    void Function(Pointer<SherpaOnnxOnlineStream>, int, Pointer<Float>, int);

typedef OnlineStreamInputFinishedNative =
    Void Function(Pointer<SherpaOnnxOnlineStream>);

typedef OnlineStreamInputFinished =
    void Function(Pointer<SherpaOnnxOnlineStream>);

typedef SherpaOnnxSpeakerEmbeddingExtractorIsReadyNative =
    Int32 Function(
      Pointer<SherpaOnnxSpeakerEmbeddingExtractor>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef SherpaOnnxSpeakerEmbeddingExtractorIsReady =
    int Function(
      Pointer<SherpaOnnxSpeakerEmbeddingExtractor>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef SherpaOnnxSpeakerEmbeddingExtractorComputeEmbeddingNative =
    Pointer<Float> Function(
      Pointer<SherpaOnnxSpeakerEmbeddingExtractor>,
      Pointer<SherpaOnnxOnlineStream>,
    );

typedef SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding =
    SherpaOnnxSpeakerEmbeddingExtractorComputeEmbeddingNative;

typedef SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbeddingNative =
    Void Function(Pointer<Float>);

typedef SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding =
    void Function(Pointer<Float>);

typedef SherpaOnnxReadWaveNative =
    Pointer<SherpaOnnxWave> Function(Pointer<Utf8>);

typedef SherpaOnnxReadWave = SherpaOnnxReadWaveNative;

typedef SherpaOnnxWriteWaveNative =
    Int32 Function(Pointer<Float>, Int32, Int32, Pointer<Utf8>);

typedef SherpaOnnxWriteWave =
    int Function(Pointer<Float>, int, int, Pointer<Utf8>);

typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);

typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);

typedef SherpaOnnxGetVersionStr = Pointer<Utf8> Function();
typedef SherpaOnnxGetVersionStrNative = SherpaOnnxGetVersionStr;

typedef SherpaOnnxGetGitSha1Native = Pointer<Utf8> Function();
typedef SherpaOnnxGetGitSha1 = SherpaOnnxGetGitSha1Native;

typedef SherpaOnnxGetGitDateNative = Pointer<Utf8> Function();
typedef SherpaOnnxGetGitDate = SherpaOnnxGetGitDateNative;

class SherpaOnnxBindings {
  static SherpaOnnxCreateOfflineSpeechDenoiser?
  sherpaOnnxCreateOfflineSpeechDenoiser;

  static SherpaOnnxDestroyOfflineSpeechDenoiser?
  sherpaOnnxDestroyOfflineSpeechDenoiser;

  static SherpaOnnxOfflineSpeechDenoiserGetSampleRate?
  sherpaOnnxOfflineSpeechDenoiserGetSampleRate;
  static SherpaOnnxOfflineSpeechDenoiserRun? sherpaOnnxOfflineSpeechDenoiserRun;
  static SherpaOnnxDestroyDenoisedAudio? sherpaOnnxDestroyDenoisedAudio;
  static SherpaOnnxCreateOnlineSpeechDenoiser?
  sherpaOnnxCreateOnlineSpeechDenoiser;
  static SherpaOnnxDestroyOnlineSpeechDenoiser?
  sherpaOnnxDestroyOnlineSpeechDenoiser;
  static SherpaOnnxOnlineSpeechDenoiserGetSampleRate?
  sherpaOnnxOnlineSpeechDenoiserGetSampleRate;
  static SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples?
  sherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples;
  static SherpaOnnxOnlineSpeechDenoiserRun? sherpaOnnxOnlineSpeechDenoiserRun;
  static SherpaOnnxOnlineSpeechDenoiserFlush?
  sherpaOnnxOnlineSpeechDenoiserFlush;
  static SherpaOnnxOnlineSpeechDenoiserReset?
  sherpaOnnxOnlineSpeechDenoiserReset;

  static SherpaOnnxCreateSpokenLanguageIdentification?
  sherpaOnnxCreateSpokenLanguageIdentification;
  static SherpaOnnxDestroySpokenLanguageIdentification?
  sherpaOnnxDestroySpokenLanguageIdentification;
  static SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream?
  sherpaOnnxSpokenLanguageIdentificationCreateOfflineStream;
  static SherpaOnnxSpokenLanguageIdentificationCompute?
  sherpaOnnxSpokenLanguageIdentificationCompute;
  static SherpaOnnxDestroySpokenLanguageIdentificationResult?
  sherpaOnnxDestroySpokenLanguageIdentificationResult;

  static SherpaOnnxCreateOfflineSpeakerDiarization?
  sherpaOnnxCreateOfflineSpeakerDiarization;
  static SherpaOnnxDestroyOfflineSpeakerDiarization?
  sherpaOnnxDestroyOfflineSpeakerDiarization;
  static SherpaOnnxOfflineSpeakerDiarizationGetSampleRate?
  sherpaOnnxOfflineSpeakerDiarizationGetSampleRate;
  static SherpaOnnxOfflineSpeakerDiarizationSetConfig?
  sherpaOnnxOfflineSpeakerDiarizationSetConfig;
  static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers?
  sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers;
  static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments?
  sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments;
  static SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime?
  sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime;
  static SherpaOnnxOfflineSpeakerDiarizationDestroySegment?
  sherpaOnnxOfflineSpeakerDiarizationDestroySegment;
  static SherpaOnnxOfflineSpeakerDiarizationProcess?
  sherpaOnnxOfflineSpeakerDiarizationProcess;
  static SherpaOnnxOfflineSpeakerDiarizationDestroyResult?
  sherpaOnnxOfflineSpeakerDiarizationDestroyResult;
  static SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg?
  sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg;

  static SherpaOnnxCreateOfflinePunctuation? sherpaOnnxCreateOfflinePunctuation;
  static SherpaOnnxDestroyOfflinePunctuation?
  sherpaOnnxDestroyOfflinePunctuation;
  static SherpaOfflinePunctuationAddPunct? sherpaOfflinePunctuationAddPunct;
  static SherpaOfflinePunctuationFreeText? sherpaOfflinePunctuationFreeText;

  static SherpaOnnxCreateOnlinePunctuation? sherpaOnnxCreateOnlinePunctuation;
  static SherpaOnnxDestroyOnlinePunctuation? sherpaOnnxDestroyOnlinePunctuation;
  static SherpaOnnxOnlinePunctuationAddPunct?
  sherpaOnnxOnlinePunctuationAddPunct;
  static SherpaOnnxOnlinePunctuationFreeText?
  sherpaOnnxOnlinePunctuationFreeText;

  static SherpaOnnxCreateAudioTagging? sherpaOnnxCreateAudioTagging;
  static SherpaOnnxDestroyAudioTagging? sherpaOnnxDestroyAudioTagging;
  static SherpaOnnxAudioTaggingCreateOfflineStream?
  sherpaOnnxAudioTaggingCreateOfflineStream;
  static SherpaOnnxAudioTaggingCompute? sherpaOnnxAudioTaggingCompute;
  static SherpaOnnxAudioTaggingFreeResults? sherpaOnnxAudioTaggingFreeResults;

  static CreateKeywordSpotter? createKeywordSpotter;
  static DestroyKeywordSpotter? destroyKeywordSpotter;
  static CreateKeywordStream? createKeywordStream;
  static CreateKeywordStreamWithKeywords? createKeywordStreamWithKeywords;
  static IsKeywordStreamReady? isKeywordStreamReady;
  static DecodeKeywordStream? decodeKeywordStream;
  static ResetKeywordStream? resetKeywordStream;
  static GetKeywordResultAsJson? getKeywordResultAsJson;
  static FreeKeywordResultJson? freeKeywordResultJson;

  static SherpaOnnxCreateOfflineTts? createOfflineTts;
  static SherpaOnnxDestroyOfflineTts? destroyOfflineTts;
  static SherpaOnnxOfflineTtsSampleRate? offlineTtsSampleRate;
  static SherpaOnnxOfflineTtsNumSpeakers? offlineTtsNumSpeakers;
  static SherpaOnnxOfflineTtsGenerate? offlineTtsGenerate;
  static SherpaOnnxDestroyOfflineTtsGeneratedAudio?
  destroyOfflineTtsGeneratedAudio;
  static SherpaOnnxOfflineTtsGenerateWithCallback?
  offlineTtsGenerateWithCallback;

  static SherpaOnnxOfflineTtsGenerateWithConfig? offlineTtsGenerateWithConfig;

  static CreateOfflineRecognizer? createOfflineRecognizer;
  static DestroyOfflineRecognizer? destroyOfflineRecognizer;
  static OfflineRecognizerSetConfig? offlineRecognizerSetConfig;
  static CreateOfflineStream? createOfflineStream;
  static DestroyOfflineStream? destroyOfflineStream;
  static AcceptWaveformOffline? acceptWaveformOffline;
  static DecodeOfflineStream? decodeOfflineStream;
  static GetOfflineStreamResultAsJson? getOfflineStreamResultAsJson;
  static DestroyOfflineStreamResultJson? destroyOfflineStreamResultJson;

  static SherpaOnnxCreateOnlineRecognizer? createOnlineRecognizer;

  static SherpaOnnxDestroyOnlineRecognizer? destroyOnlineRecognizer;

  static SherpaOnnxCreateOnlineStream? createOnlineStream;

  static SherpaOnnxCreateOnlineStreamWithHotwords?
  createOnlineStreamWithHotwords;

  static IsOnlineStreamReady? isOnlineStreamReady;

  static SherpaOnnxDecodeOnlineStream? decodeOnlineStream;

  static GetOnlineStreamResultAsJson? getOnlineStreamResultAsJson;

  static Reset? reset;

  static IsEndpoint? isEndpoint;

  static DestroyOnlineStreamResultJson? destroyOnlineStreamResultJson;

  static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector;

  static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector;

  static SherpaOnnxVoiceActivityDetectorAcceptWaveform?
  voiceActivityDetectorAcceptWaveform;

  static SherpaOnnxVoiceActivityDetectorEmpty? voiceActivityDetectorEmpty;

  static SherpaOnnxVoiceActivityDetectorDetected? voiceActivityDetectorDetected;

  static SherpaOnnxVoiceActivityDetectorPop? voiceActivityDetectorPop;

  static SherpaOnnxVoiceActivityDetectorClear? voiceActivityDetectorClear;

  static SherpaOnnxVoiceActivityDetectorFront? voiceActivityDetectorFront;

  static SherpaOnnxDestroySpeechSegment? destroySpeechSegment;

  static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;

  static SherpaOnnxVoiceActivityDetectorFlush? voiceActivityDetectorFlush;

  static SherpaOnnxCreateCircularBuffer? createCircularBuffer;

  static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;

  static SherpaOnnxCircularBufferPush? circularBufferPush;

  static SherpaOnnxCircularBufferGet? circularBufferGet;

  static SherpaOnnxCircularBufferFree? circularBufferFree;

  static SherpaOnnxCircularBufferPop? circularBufferPop;

  static SherpaOnnxCircularBufferSize? circularBufferSize;

  static SherpaOnnxCircularBufferHead? circularBufferHead;

  static SherpaOnnxCircularBufferReset? circularBufferReset;

  static SherpaOnnxCreateSpeakerEmbeddingExtractor?
  createSpeakerEmbeddingExtractor;

  static SherpaOnnxDestroySpeakerEmbeddingExtractor?
  destroySpeakerEmbeddingExtractor;

  static SherpaOnnxSpeakerEmbeddingExtractorDim? speakerEmbeddingExtractorDim;

  static SherpaOnnxSpeakerEmbeddingExtractorCreateStream?
  speakerEmbeddingExtractorCreateStream;

  static SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding?
  speakerEmbeddingExtractorComputeEmbedding;

  static SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding?
  speakerEmbeddingExtractorDestroyEmbedding;

  static SherpaOnnxDestroyOnlineStream? destroyOnlineStream;

  static OnlineStreamAcceptWaveform? onlineStreamAcceptWaveform;

  static OnlineStreamInputFinished? onlineStreamInputFinished;

  static SherpaOnnxSpeakerEmbeddingExtractorIsReady?
  speakerEmbeddingExtractorIsReady;

  static SherpaOnnxCreateSpeakerEmbeddingManager? createSpeakerEmbeddingManager;

  static SherpaOnnxDestroySpeakerEmbeddingManager?
  destroySpeakerEmbeddingManager;

  static SherpaOnnxSpeakerEmbeddingManagerAdd? speakerEmbeddingManagerAdd;

  static SherpaOnnxSpeakerEmbeddingManagerAddListFlattened?
  speakerEmbeddingManagerAddListFlattened;

  static SherpaOnnxSpeakerEmbeddingManagerRemove? speakerEmbeddingManagerRemove;

  static SherpaOnnxSpeakerEmbeddingManagerContains?
  speakerEmbeddingManagerContains;

  static SherpaOnnxSpeakerEmbeddingManagerSearch? speakerEmbeddingManagerSearch;

  static SherpaOnnxSpeakerEmbeddingManagerFreeSearch?
  speakerEmbeddingManagerFreeSearch;

  static SherpaOnnxSpeakerEmbeddingManagerNumSpeakers?
  speakerEmbeddingManagerNumSpeakers;

  static SherpaOnnxSpeakerEmbeddingManagerVerify? speakerEmbeddingManagerVerify;

  static SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers?
  speakerEmbeddingManagerGetAllSpeakers;

  static SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers?
  speakerEmbeddingManagerFreeAllSpeakers;

  static SherpaOnnxReadWave? readWave;

  static SherpaOnnxWriteWave? writeWave;

  static SherpaOnnxFreeWave? freeWave;

  static SherpaOnnxGetVersionStr? getVersionStr;
  static SherpaOnnxGetGitSha1? getGitSha1;
  static SherpaOnnxGetGitDate? getGitDate;

  static void init(DynamicLibrary dynamicLibrary) {
    sherpaOnnxCreateOfflineSpeechDenoiser ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateOfflineSpeechDenoiserNative>>(
          'SherpaOnnxCreateOfflineSpeechDenoiser',
        )
        .asFunction();

    sherpaOnnxDestroyOfflineSpeechDenoiser ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyOfflineSpeechDenoiserNative>>(
          'SherpaOnnxDestroyOfflineSpeechDenoiser',
        )
        .asFunction();

    sherpaOnnxOfflineSpeechDenoiserGetSampleRate ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxOfflineSpeechDenoiserGetSampleRateNative>
        >('SherpaOnnxOfflineSpeechDenoiserGetSampleRate')
        .asFunction();

    sherpaOnnxOfflineSpeechDenoiserRun ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOfflineSpeechDenoiserRunNative>>(
          'SherpaOnnxOfflineSpeechDenoiserRun',
        )
        .asFunction();

    sherpaOnnxDestroyDenoisedAudio ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyDenoisedAudioNative>>(
          'SherpaOnnxDestroyDenoisedAudio',
        )
        .asFunction();

    sherpaOnnxCreateOnlineSpeechDenoiser ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateOnlineSpeechDenoiserNative>>(
          'SherpaOnnxCreateOnlineSpeechDenoiser',
        )
        .asFunction();

    sherpaOnnxDestroyOnlineSpeechDenoiser ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyOnlineSpeechDenoiserNative>>(
          'SherpaOnnxDestroyOnlineSpeechDenoiser',
        )
        .asFunction();

    sherpaOnnxOnlineSpeechDenoiserGetSampleRate ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxOnlineSpeechDenoiserGetSampleRateNative>
        >('SherpaOnnxOnlineSpeechDenoiserGetSampleRate')
        .asFunction();

    sherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamplesNative
          >
        >('SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples')
        .asFunction();

    sherpaOnnxOnlineSpeechDenoiserRun ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOnlineSpeechDenoiserRunNative>>(
          'SherpaOnnxOnlineSpeechDenoiserRun',
        )
        .asFunction();

    sherpaOnnxOnlineSpeechDenoiserFlush ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOnlineSpeechDenoiserFlushNative>>(
          'SherpaOnnxOnlineSpeechDenoiserFlush',
        )
        .asFunction();

    sherpaOnnxOnlineSpeechDenoiserReset ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOnlineSpeechDenoiserResetNative>>(
          'SherpaOnnxOnlineSpeechDenoiserReset',
        )
        .asFunction();

    sherpaOnnxCreateSpokenLanguageIdentification ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxCreateSpokenLanguageIdentificationNative>
        >('SherpaOnnxCreateSpokenLanguageIdentification')
        .asFunction();

    sherpaOnnxDestroySpokenLanguageIdentification ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxDestroySpokenLanguageIdentificationNative>
        >('SherpaOnnxDestroySpokenLanguageIdentification')
        .asFunction();

    sherpaOnnxSpokenLanguageIdentificationCreateOfflineStream ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxSpokenLanguageIdentificationCreateOfflineStreamNative
          >
        >('SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream')
        .asFunction();

    sherpaOnnxSpokenLanguageIdentificationCompute ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxSpokenLanguageIdentificationComputeNative>
        >('SherpaOnnxSpokenLanguageIdentificationCompute')
        .asFunction();

    sherpaOnnxDestroySpokenLanguageIdentificationResult ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxDestroySpokenLanguageIdentificationResultNative
          >
        >('SherpaOnnxDestroySpokenLanguageIdentificationResult')
        .asFunction();

    sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxCreateOfflineSpeakerDiarizationNative>
        >('SherpaOnnxCreateOfflineSpeakerDiarization')
        .asFunction();

    sherpaOnnxDestroyOfflineSpeakerDiarization ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxDestroyOfflineSpeakerDiarizationNative>
        >('SherpaOnnxDestroyOfflineSpeakerDiarization')
        .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationGetSampleRate ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative>
        >('SherpaOnnxOfflineSpeakerDiarizationGetSampleRate')
        .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationSetConfig ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxOfflineSpeakerDiarizationSetConfigNative>
        >('SherpaOnnxOfflineSpeakerDiarizationSetConfig')
        .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative
          >
        >('SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers')
        .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative
          >
        >('SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments')
        .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative
          >
        >('SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime')
        .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationDestroySegment ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative
          >
        >('SherpaOnnxOfflineSpeakerDiarizationDestroySegment')
        .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationProcess ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxOfflineSpeakerDiarizationProcessNative>
        >('SherpaOnnxOfflineSpeakerDiarizationProcess')
        .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg ??=
        dynamicLibrary
            .lookup<
              NativeFunction<
                SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative
              >
            >('SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg')
            .asFunction();

    sherpaOnnxOfflineSpeakerDiarizationDestroyResult ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative>
        >('SherpaOnnxOfflineSpeakerDiarizationDestroyResult')
        .asFunction();

    sherpaOnnxCreateOfflinePunctuation ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateOfflinePunctuationNative>>(
          'SherpaOnnxCreateOfflinePunctuation',
        )
        .asFunction();

    sherpaOnnxDestroyOfflinePunctuation ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyOfflinePunctuationNative>>(
          'SherpaOnnxDestroyOfflinePunctuation',
        )
        .asFunction();

    sherpaOfflinePunctuationAddPunct ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOfflinePunctuationAddPunctNative>>(
          'SherpaOfflinePunctuationAddPunct',
        )
        .asFunction();

    sherpaOfflinePunctuationFreeText ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOfflinePunctuationFreeTextNative>>(
          'SherpaOfflinePunctuationFreeText',
        )
        .asFunction();

    sherpaOnnxCreateOnlinePunctuation ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateOnlinePunctuationNative>>(
          'SherpaOnnxCreateOnlinePunctuation',
        )
        .asFunction();

    sherpaOnnxDestroyOnlinePunctuation ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyOnlinePunctuationNative>>(
          'SherpaOnnxDestroyOnlinePunctuation',
        )
        .asFunction();

    sherpaOnnxOnlinePunctuationAddPunct ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOnlinePunctuationAddPunctNative>>(
          'SherpaOnnxOnlinePunctuationAddPunct',
        )
        .asFunction();

    sherpaOnnxOnlinePunctuationFreeText ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOnlinePunctuationFreeTextNative>>(
          'SherpaOnnxOnlinePunctuationFreeText',
        )
        .asFunction();

    sherpaOnnxCreateAudioTagging ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateAudioTaggingNative>>(
          'SherpaOnnxCreateAudioTagging',
        )
        .asFunction();

    sherpaOnnxDestroyAudioTagging ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyAudioTaggingNative>>(
          'SherpaOnnxDestroyAudioTagging',
        )
        .asFunction();

    sherpaOnnxAudioTaggingCreateOfflineStream ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxAudioTaggingCreateOfflineStreamNative>
        >('SherpaOnnxAudioTaggingCreateOfflineStream')
        .asFunction();

    sherpaOnnxAudioTaggingCompute ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxAudioTaggingComputeNative>>(
          'SherpaOnnxAudioTaggingCompute',
        )
        .asFunction();

    sherpaOnnxAudioTaggingFreeResults ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxAudioTaggingFreeResultsNative>>(
          'SherpaOnnxAudioTaggingFreeResults',
        )
        .asFunction();

    createKeywordSpotter ??= dynamicLibrary
        .lookup<NativeFunction<CreateKeywordSpotterNative>>(
          'SherpaOnnxCreateKeywordSpotter',
        )
        .asFunction();

    destroyKeywordSpotter ??= dynamicLibrary
        .lookup<NativeFunction<DestroyKeywordSpotterNative>>(
          'SherpaOnnxDestroyKeywordSpotter',
        )
        .asFunction();

    createKeywordStream ??= dynamicLibrary
        .lookup<NativeFunction<CreateKeywordStreamNative>>(
          'SherpaOnnxCreateKeywordStream',
        )
        .asFunction();

    createKeywordStreamWithKeywords ??= dynamicLibrary
        .lookup<NativeFunction<CreateKeywordStreamWithKeywordsNative>>(
          'SherpaOnnxCreateKeywordStreamWithKeywords',
        )
        .asFunction();

    isKeywordStreamReady ??= dynamicLibrary
        .lookup<NativeFunction<IsKeywordStreamReadyNative>>(
          'SherpaOnnxIsKeywordStreamReady',
        )
        .asFunction();

    decodeKeywordStream ??= dynamicLibrary
        .lookup<NativeFunction<DecodeKeywordStreamNative>>(
          'SherpaOnnxDecodeKeywordStream',
        )
        .asFunction();

    resetKeywordStream ??= dynamicLibrary
        .lookup<NativeFunction<ResetKeywordStreamNative>>(
          'SherpaOnnxResetKeywordStream',
        )
        .asFunction();

    getKeywordResultAsJson ??= dynamicLibrary
        .lookup<NativeFunction<GetKeywordResultAsJsonNative>>(
          'SherpaOnnxGetKeywordResultAsJson',
        )
        .asFunction();

    freeKeywordResultJson ??= dynamicLibrary
        .lookup<NativeFunction<FreeKeywordResultJsonNative>>(
          'SherpaOnnxFreeKeywordResultJson',
        )
        .asFunction();

    createOfflineTts ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateOfflineTtsNative>>(
          'SherpaOnnxCreateOfflineTts',
        )
        .asFunction();

    destroyOfflineTts ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyOfflineTtsNative>>(
          'SherpaOnnxDestroyOfflineTts',
        )
        .asFunction();

    offlineTtsSampleRate ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOfflineTtsSampleRateNative>>(
          'SherpaOnnxOfflineTtsSampleRate',
        )
        .asFunction();

    offlineTtsNumSpeakers ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOfflineTtsNumSpeakersNative>>(
          'SherpaOnnxOfflineTtsNumSpeakers',
        )
        .asFunction();

    offlineTtsGenerate ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateNative>>(
          'SherpaOnnxOfflineTtsGenerate',
        )
        .asFunction();

    destroyOfflineTtsGeneratedAudio ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxDestroyOfflineTtsGeneratedAudioNative>
        >('SherpaOnnxDestroyOfflineTtsGeneratedAudio')
        .asFunction();

    offlineTtsGenerateWithCallback ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateWithCallbackNative>>(
          'SherpaOnnxOfflineTtsGenerateWithCallback',
        )
        .asFunction();

    offlineTtsGenerateWithConfig ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateWithConfigNative>>(
          'SherpaOnnxOfflineTtsGenerateWithConfig',
        )
        .asFunction();

    createOfflineRecognizer ??= dynamicLibrary
        .lookup<NativeFunction<CreateOfflineRecognizerNative>>(
          'SherpaOnnxCreateOfflineRecognizer',
        )
        .asFunction();

    destroyOfflineRecognizer ??= dynamicLibrary
        .lookup<NativeFunction<DestroyOfflineRecognizerNative>>(
          'SherpaOnnxDestroyOfflineRecognizer',
        )
        .asFunction();

    offlineRecognizerSetConfig ??= dynamicLibrary
        .lookup<NativeFunction<OfflineRecognizerSetConfigNative>>(
          'SherpaOnnxOfflineRecognizerSetConfig',
        )
        .asFunction();

    createOfflineStream ??= dynamicLibrary
        .lookup<NativeFunction<CreateOfflineStreamNative>>(
          'SherpaOnnxCreateOfflineStream',
        )
        .asFunction();

    destroyOfflineStream ??= dynamicLibrary
        .lookup<NativeFunction<DestroyOfflineStreamNative>>(
          'SherpaOnnxDestroyOfflineStream',
        )
        .asFunction();

    acceptWaveformOffline ??= dynamicLibrary
        .lookup<NativeFunction<AcceptWaveformOfflineNative>>(
          'SherpaOnnxAcceptWaveformOffline',
        )
        .asFunction();

    decodeOfflineStream ??= dynamicLibrary
        .lookup<NativeFunction<DecodeOfflineStreamNative>>(
          'SherpaOnnxDecodeOfflineStream',
        )
        .asFunction();

    getOfflineStreamResultAsJson ??= dynamicLibrary
        .lookup<NativeFunction<GetOfflineStreamResultAsJsonNative>>(
          'SherpaOnnxGetOfflineStreamResultAsJson',
        )
        .asFunction();

    destroyOfflineStreamResultJson ??= dynamicLibrary
        .lookup<NativeFunction<DestroyOfflineStreamResultJsonNative>>(
          'SherpaOnnxDestroyOfflineStreamResultJson',
        )
        .asFunction();

    createOnlineRecognizer ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateOnlineRecognizerNative>>(
          'SherpaOnnxCreateOnlineRecognizer',
        )
        .asFunction();

    destroyOnlineRecognizer ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyOnlineRecognizerNative>>(
          'SherpaOnnxDestroyOnlineRecognizer',
        )
        .asFunction();

    createOnlineStream ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateOnlineStreamNative>>(
          'SherpaOnnxCreateOnlineStream',
        )
        .asFunction();

    createOnlineStreamWithHotwords ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateOnlineStreamWithHotwordsNative>>(
          'SherpaOnnxCreateOnlineStreamWithHotwords',
        )
        .asFunction();

    isOnlineStreamReady ??= dynamicLibrary
        .lookup<NativeFunction<IsOnlineStreamReadyNative>>(
          'SherpaOnnxIsOnlineStreamReady',
        )
        .asFunction();

    decodeOnlineStream ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDecodeOnlineStreamNative>>(
          'SherpaOnnxDecodeOnlineStream',
        )
        .asFunction();

    getOnlineStreamResultAsJson ??= dynamicLibrary
        .lookup<NativeFunction<GetOnlineStreamResultAsJsonNative>>(
          'SherpaOnnxGetOnlineStreamResultAsJson',
        )
        .asFunction();

    reset ??= dynamicLibrary
        .lookup<NativeFunction<ResetNative>>('SherpaOnnxOnlineStreamReset')
        .asFunction();

    isEndpoint ??= dynamicLibrary
        .lookup<NativeFunction<IsEndpointNative>>(
          'SherpaOnnxOnlineStreamIsEndpoint',
        )
        .asFunction();

    destroyOnlineStreamResultJson ??= dynamicLibrary
        .lookup<NativeFunction<DestroyOnlineStreamResultJsonNative>>(
          'SherpaOnnxDestroyOnlineStreamResultJson',
        )
        .asFunction();

    createVoiceActivityDetector ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>(
          'SherpaOnnxCreateVoiceActivityDetector',
        )
        .asFunction();

    destroyVoiceActivityDetector ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyVoiceActivityDetectorNative>>(
          'SherpaOnnxDestroyVoiceActivityDetector',
        )
        .asFunction();

    voiceActivityDetectorAcceptWaveform ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxVoiceActivityDetectorAcceptWaveformNative>
        >('SherpaOnnxVoiceActivityDetectorAcceptWaveform')
        .asFunction();

    voiceActivityDetectorEmpty ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorEmptyNative>>(
          'SherpaOnnxVoiceActivityDetectorEmpty',
        )
        .asFunction();

    voiceActivityDetectorDetected ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorDetectedNative>>(
          'SherpaOnnxVoiceActivityDetectorDetected',
        )
        .asFunction();

    voiceActivityDetectorPop ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorPopNative>>(
          'SherpaOnnxVoiceActivityDetectorPop',
        )
        .asFunction();

    voiceActivityDetectorClear ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorClearNative>>(
          'SherpaOnnxVoiceActivityDetectorClear',
        )
        .asFunction();

    voiceActivityDetectorFront ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFrontNative>>(
          'SherpaOnnxVoiceActivityDetectorFront',
        )
        .asFunction();

    destroySpeechSegment ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroySpeechSegmentNative>>(
          'SherpaOnnxDestroySpeechSegment',
        )
        .asFunction();

    voiceActivityDetectorReset ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorResetNative>>(
          'SherpaOnnxVoiceActivityDetectorReset',
        )
        .asFunction();

    voiceActivityDetectorFlush ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFlushNative>>(
          'SherpaOnnxVoiceActivityDetectorFlush',
        )
        .asFunction();

    createCircularBuffer ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
          'SherpaOnnxCreateCircularBuffer',
        )
        .asFunction();

    destroyCircularBuffer ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyCircularBufferNative>>(
          'SherpaOnnxDestroyCircularBuffer',
        )
        .asFunction();

    circularBufferPush ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCircularBufferPushNative>>(
          'SherpaOnnxCircularBufferPush',
        )
        .asFunction();

    circularBufferGet ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCircularBufferGetNative>>(
          'SherpaOnnxCircularBufferGet',
        )
        .asFunction();

    circularBufferFree ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCircularBufferFreeNative>>(
          'SherpaOnnxCircularBufferFree',
        )
        .asFunction();

    circularBufferPop ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCircularBufferPopNative>>(
          'SherpaOnnxCircularBufferPop',
        )
        .asFunction();

    circularBufferSize ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCircularBufferSizeNative>>(
          'SherpaOnnxCircularBufferSize',
        )
        .asFunction();

    circularBufferHead ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCircularBufferHeadNative>>(
          'SherpaOnnxCircularBufferHead',
        )
        .asFunction();

    circularBufferReset ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCircularBufferResetNative>>(
          'SherpaOnnxCircularBufferReset',
        )
        .asFunction();

    createSpeakerEmbeddingExtractor ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxCreateSpeakerEmbeddingExtractorNative>
        >('SherpaOnnxCreateSpeakerEmbeddingExtractor')
        .asFunction();

    destroySpeakerEmbeddingExtractor ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxDestroySpeakerEmbeddingExtractorNative>
        >('SherpaOnnxDestroySpeakerEmbeddingExtractor')
        .asFunction();

    speakerEmbeddingExtractorDim ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxSpeakerEmbeddingExtractorDimNative>>(
          'SherpaOnnxSpeakerEmbeddingExtractorDim',
        )
        .asFunction();

    speakerEmbeddingExtractorCreateStream ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxSpeakerEmbeddingExtractorCreateStreamNative>
        >('SherpaOnnxSpeakerEmbeddingExtractorCreateStream')
        .asFunction();

    speakerEmbeddingExtractorComputeEmbedding ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxSpeakerEmbeddingExtractorComputeEmbeddingNative
          >
        >('SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding')
        .asFunction();

    speakerEmbeddingExtractorDestroyEmbedding ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbeddingNative
          >
        >('SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding')
        .asFunction();

    destroyOnlineStream ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroyOnlineStreamNative>>(
          'SherpaOnnxDestroyOnlineStream',
        )
        .asFunction();

    onlineStreamAcceptWaveform ??= dynamicLibrary
        .lookup<NativeFunction<OnlineStreamAcceptWaveformNative>>(
          'SherpaOnnxOnlineStreamAcceptWaveform',
        )
        .asFunction();

    onlineStreamInputFinished ??= dynamicLibrary
        .lookup<NativeFunction<OnlineStreamInputFinishedNative>>(
          'SherpaOnnxOnlineStreamInputFinished',
        )
        .asFunction();

    speakerEmbeddingExtractorIsReady ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxSpeakerEmbeddingExtractorIsReadyNative>
        >('SherpaOnnxSpeakerEmbeddingExtractorIsReady')
        .asFunction();

    createSpeakerEmbeddingManager ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxCreateSpeakerEmbeddingManagerNative>>(
          'SherpaOnnxCreateSpeakerEmbeddingManager',
        )
        .asFunction();

    destroySpeakerEmbeddingManager ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxDestroySpeakerEmbeddingManagerNative>>(
          'SherpaOnnxDestroySpeakerEmbeddingManager',
        )
        .asFunction();

    speakerEmbeddingManagerAdd ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxSpeakerEmbeddingManagerAddNative>>(
          'SherpaOnnxSpeakerEmbeddingManagerAdd',
        )
        .asFunction();

    speakerEmbeddingManagerAddListFlattened ??= dynamicLibrary
        .lookup<
          NativeFunction<
            SherpaOnnxSpeakerEmbeddingManagerAddListFlattenedNative
          >
        >('SherpaOnnxSpeakerEmbeddingManagerAddListFlattened')
        .asFunction();

    speakerEmbeddingManagerRemove ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxSpeakerEmbeddingManagerRemoveNative>>(
          'SherpaOnnxSpeakerEmbeddingManagerRemove',
        )
        .asFunction();

    speakerEmbeddingManagerContains ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxSpeakerEmbeddingManagerContainsNative>
        >('SherpaOnnxSpeakerEmbeddingManagerContains')
        .asFunction();

    speakerEmbeddingManagerSearch ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxSpeakerEmbeddingManagerSearchNative>>(
          'SherpaOnnxSpeakerEmbeddingManagerSearch',
        )
        .asFunction();

    speakerEmbeddingManagerFreeSearch ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxSpeakerEmbeddingManagerFreeSearchNative>
        >('SherpaOnnxSpeakerEmbeddingManagerFreeSearch')
        .asFunction();

    speakerEmbeddingManagerNumSpeakers ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxSpeakerEmbeddingManagerNumSpeakersNative>
        >('SherpaOnnxSpeakerEmbeddingManagerNumSpeakers')
        .asFunction();

    speakerEmbeddingManagerVerify ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxSpeakerEmbeddingManagerVerifyNative>>(
          'SherpaOnnxSpeakerEmbeddingManagerVerify',
        )
        .asFunction();

    speakerEmbeddingManagerGetAllSpeakers ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakersNative>
        >('SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers')
        .asFunction();

    speakerEmbeddingManagerFreeAllSpeakers ??= dynamicLibrary
        .lookup<
          NativeFunction<SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakersNative>
        >('SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers')
        .asFunction();

    readWave ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxReadWaveNative>>('SherpaOnnxReadWave')
        .asFunction();

    writeWave ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxWriteWaveNative>>(
          'SherpaOnnxWriteWave',
        )
        .asFunction();

    freeWave ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxFreeWaveNative>>('SherpaOnnxFreeWave')
        .asFunction();

    getVersionStr ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxGetVersionStrNative>>(
          'SherpaOnnxGetVersionStr',
        )
        .asFunction();

    getGitSha1 ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxGetGitSha1Native>>(
          'SherpaOnnxGetGitSha1',
        )
        .asFunction();

    getGitDate ??= dynamicLibrary
        .lookup<NativeFunction<SherpaOnnxGetGitDateNative>>(
          'SherpaOnnxGetGitDate',
        )
        .asFunction();
  }
}


================================================
FILE: flutter/sherpa_onnx/lib/src/speaker_identification.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';

import './online_stream.dart';
import './sherpa_onnx_bindings.dart';

/// Speaker embedding extraction and speaker identification utilities.
///
/// See `dart-api-examples/speaker-identification/` for end-to-end examples.
///
/// Example:
///
/// ```dart
/// final extractor = SpeakerEmbeddingExtractor(
///   config: const SpeakerEmbeddingExtractorConfig(
///     model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
///   ),
/// );
///
/// final stream = extractor.createStream();
/// stream.acceptWaveform(samples: wave.samples, sampleRate: wave.sampleRate);
/// while (extractor.isReady(stream)) {}
/// final embedding = extractor.compute(stream);
///
/// final manager = SpeakerEmbeddingManager(extractor.dim);
/// manager.add(name: 'alice', embedding: embedding);
/// print(manager.search(embedding: embedding, threshold: 0.6));
/// ```
class SpeakerEmbeddingExtractorConfig {
  const SpeakerEmbeddingExtractorConfig(
      {required this.model,
      this.numThreads = 1,
      this.debug = true,
      this.provider = 'cpu'});

  factory SpeakerEmbeddingExtractorConfig.fromJson(Map<String, dynamic> json) {
    return SpeakerEmbeddingExtractorConfig(
      model: json['model'] as String,
      numThreads: json['numThreads'] as int? ?? 1,
      debug: json['debug'] as bool? ?? true,
      provider: json['provider'] as String? ?? 'cpu',
    );
  }

  @override
  String toString() {
    return 'SpeakerEmbeddingExtractorConfig(model: $model, numThreads: $numThreads, debug: $debug, provider: $provider)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
        'numThreads': numThreads,
        'debug': debug,
        'provider': provider,
      };

  final String model;
  final int numThreads;
  final bool debug;
  final String provider;
}

/// Speaker embedding extractor.
///
/// Feed audio through an [OnlineStream], then call [compute] to obtain a fixed
/// dimensional embedding suitable for search or verification.
class SpeakerEmbeddingExtractor {
  SpeakerEmbeddingExtractor.fromPtr({required this.ptr, required this.dim});

  SpeakerEmbeddingExtractor._({required this.ptr, required this.dim});

  /// Create an extractor from [config].
  factory SpeakerEmbeddingExtractor(
      {required SpeakerEmbeddingExtractorConfig config}) {
    final c = calloc<SherpaOnnxSpeakerEmbeddingExtractorConfig>();

    final modelPtr = config.model.toNativeUtf8();
    c.ref.model = modelPtr;

    c.ref.numThreads = config.numThreads;
    c.ref.debug = config.debug ? 1 : 0;

    final providerPtr = config.provider.toNativeUtf8();
    c.ref.provider = providerPtr;

    if (SherpaOnnxBindings.createSpeakerEmbeddingExtractor == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final ptr =
        SherpaOnnxBindings.createSpeakerEmbeddingExtractor?.call(c) ?? nullptr;

    calloc.free(providerPtr);
    calloc.free(modelPtr);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception(
          "Failed to create speaker embedding extractor. Please check your config");
    }

    final dim = SherpaOnnxBindings.speakerEmbeddingExtractorDim?.call(ptr) ?? 0;

    return SpeakerEmbeddingExtractor._(ptr: ptr, dim: dim);
  }

  /// Release the native extractor.
  void free() {
    if (SherpaOnnxBindings.destroySpeakerEmbeddingExtractor == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroySpeakerEmbeddingExtractor?.call(ptr);
    ptr = nullptr;
  }

  /// Create an input stream for embedding extraction.
  OnlineStream createStream() {
    if (SherpaOnnxBindings.speakerEmbeddingExtractorCreateStream == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      throw Exception("Failed to create online stream");
    }

    final p =
        SherpaOnnxBindings.speakerEmbeddingExtractorCreateStream?.call(ptr) ??
            nullptr;

    if (p == nullptr) {
      throw Exception("Failed to create online stream");
    }

    return OnlineStream(ptr: p);
  }

  /// Return `true` if [stream] has enough audio for embedding extraction.
  bool isReady(OnlineStream stream) {
    if (SherpaOnnxBindings.speakerEmbeddingExtractorIsReady == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return false;
    }

    final int ready = SherpaOnnxBindings.speakerEmbeddingExtractorIsReady
            ?.call(ptr, stream.ptr) ??
        0;
    return ready == 1;
  }

  /// Compute an embedding for [stream].
  Float32List compute(OnlineStream stream) {
    if (SherpaOnnxBindings.speakerEmbeddingExtractorComputeEmbedding == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return Float32List(0);
    }

    final Pointer<Float> embedding = SherpaOnnxBindings
            .speakerEmbeddingExtractorComputeEmbedding
            ?.call(ptr, stream.ptr) ??
        nullptr;

    if (embedding == nullptr) {
      return Float32List(0);
    }

    final embeddingList = embedding.asTypedList(dim);
    final ans = Float32List(dim);
    ans.setAll(0, embeddingList);

    SherpaOnnxBindings.speakerEmbeddingExtractorDestroyEmbedding
        ?.call(embedding);

    return ans;
  }

  Pointer<SherpaOnnxSpeakerEmbeddingExtractor> ptr;
  final int dim;
}

/// In-memory store of named speaker embeddings.
///
/// Use this class to add reference embeddings, search for the best matching
/// speaker, and verify whether a candidate embedding belongs to a known
/// identity.
class SpeakerEmbeddingManager {
  SpeakerEmbeddingManager.fromPtr({required this.ptr, required this.dim});

  SpeakerEmbeddingManager._({required this.ptr, required this.dim});

  /// Create a manager for embeddings whose dimension is [dim].
  factory SpeakerEmbeddingManager(int dim) {
    if (SherpaOnnxBindings.createSpeakerEmbeddingManager == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final p =
        SherpaOnnxBindings.createSpeakerEmbeddingManager?.call(dim) ?? nullptr;

    if (p == nullptr) {
      throw Exception("Failed to create speaker embedding manager");
    }

    return SpeakerEmbeddingManager._(ptr: p, dim: dim);
  }

  /// Release the native manager.
  void free() {
    if (SherpaOnnxBindings.destroySpeakerEmbeddingManager == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroySpeakerEmbeddingManager?.call(ptr);
    ptr = nullptr;
  }

  /// Add one reference embedding for [name].
  bool add({required String name, required Float32List embedding}) {
    assert(embedding.length == dim, '${embedding.length} vs $dim');

    if (SherpaOnnxBindings.speakerEmbeddingManagerAdd == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return false;
    }

    final Pointer<Utf8> namePtr = name.toNativeUtf8();
    final int n = embedding.length;

    final Pointer<Float> p = calloc<Float>(n);
    final pList = p.asTypedList(n);
    pList.setAll(0, embedding);

    final int ok =
        SherpaOnnxBindings.speakerEmbeddingManagerAdd?.call(ptr, namePtr, p) ??
            0;

    calloc.free(p);
    calloc.free(namePtr);

    return ok == 1;
  }

  /// Add multiple reference embeddings for [name].
  bool addMulti(
      {required String name, required List<Float32List> embeddingList}) {
    if (SherpaOnnxBindings.speakerEmbeddingManagerAddListFlattened == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return false;
    }

    final Pointer<Utf8> namePtr = name.toNativeUtf8();
    final int n = embeddingList.length;

    final Pointer<Float> p = calloc<Float>(n * dim);
    final pList = p.asTypedList(n * dim);

    int offset = 0;
    for (final e in embeddingList) {
      assert(e.length == dim, '${e.length} vs $dim');

      pList.setAll(offset, e);
      offset += dim;
    }

    final int ok = SherpaOnnxBindings.speakerEmbeddingManagerAddListFlattened
            ?.call(ptr, namePtr, p, n) ??
        0;

    calloc.free(p);
    calloc.free(namePtr);

    return ok == 1;
  }

  /// Return `true` if [name] exists in the manager.
  bool contains(String name) {
    if (SherpaOnnxBindings.speakerEmbeddingManagerContains == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return false;
    }

    final Pointer<Utf8> namePtr = name.toNativeUtf8();

    final int found = SherpaOnnxBindings.speakerEmbeddingManagerContains
            ?.call(ptr, namePtr) ??
        0;

    calloc.free(namePtr);

    return found == 1;
  }

  /// Remove all embeddings associated with [name].
  bool remove(String name) {
    if (SherpaOnnxBindings.speakerEmbeddingManagerRemove == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return false;
    }

    final Pointer<Utf8> namePtr = name.toNativeUtf8();

    final int ok =
        SherpaOnnxBindings.speakerEmbeddingManagerRemove?.call(ptr, namePtr) ??
            0;

    calloc.free(namePtr);

    return ok == 1;
  }

  /// Search for the best matching speaker above [threshold].
  ///
  /// Returns an empty string if no speaker is found.
  String search({required Float32List embedding, required double threshold}) {
    assert(embedding.length == dim);

    if (SherpaOnnxBindings.speakerEmbeddingManagerSearch == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return '';
    }

    final Pointer<Float> p = calloc<Float>(dim);
    final pList = p.asTypedList(dim);
    pList.setAll(0, embedding);

    final Pointer<Utf8> name = SherpaOnnxBindings.speakerEmbeddingManagerSearch
            ?.call(ptr, p, threshold) ??
        nullptr;

    calloc.free(p);

    if (name == nullptr) {
      return '';
    }

    final String ans = name.toDartString();

    SherpaOnnxBindings.speakerEmbeddingManagerFreeSearch?.call(name);

    return ans;
  }

  /// Verify whether [embedding] matches [name] above [threshold].
  bool verify(
      {required String name,
       required Float32List embedding,
       required double threshold}) {
    assert(embedding.length == dim);

    if (SherpaOnnxBindings.speakerEmbeddingManagerVerify == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return false;
    }

    final Pointer<Utf8> namePtr = name.toNativeUtf8();

    final Pointer<Float> p = calloc<Float>(dim);
    final pList = p.asTypedList(dim);
    pList.setAll(0, embedding);

    final int ok = SherpaOnnxBindings.speakerEmbeddingManagerVerify
            ?.call(ptr, namePtr, p, threshold) ??
        0;

    calloc.free(p);
    calloc.free(namePtr);

    return ok == 1;
  }

  int get numSpeakers {
    if (SherpaOnnxBindings.speakerEmbeddingManagerNumSpeakers == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return 0;
    }

    return SherpaOnnxBindings.speakerEmbeddingManagerNumSpeakers?.call(ptr) ??
        0;
  }

  List<String> get allSpeakerNames {
    if (SherpaOnnxBindings.speakerEmbeddingManagerGetAllSpeakers == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    int n = numSpeakers;
    if (n == 0) {
      return <String>[];
    }

    final Pointer<Pointer<Utf8>> names =
        SherpaOnnxBindings.speakerEmbeddingManagerGetAllSpeakers?.call(ptr) ??
            nullptr;

    if (names == nullptr) {
      return <String>[];
    }

    final ans = <String>[];

    // see https://api.flutter.dev/flutter/dart-ffi/PointerPointer.html
    for (int i = 0; i != n; ++i) {
      String name = names[i].toDartString();
      ans.add(name);
    }

    SherpaOnnxBindings.speakerEmbeddingManagerFreeAllSpeakers?.call(names);

    return ans;
  }

  Pointer<SherpaOnnxSpeakerEmbeddingManager> ptr;
  final int dim;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/spoken_language_identification.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';

import 'package:ffi/ffi.dart';

import './offline_stream.dart';
import './sherpa_onnx_bindings.dart';
import './utils.dart';

/// Spoken language identification.
///
/// This module identifies the language spoken in an audio clip, using the
/// Whisper-based language ID model family exposed by the native library.
///
/// Example:
///
/// ```dart
/// final sli = SpokenLanguageIdentification(
///   SpokenLanguageIdentificationConfig(
///     whisper: const SpokenLanguageIdentificationWhisperConfig(
///       encoder: './sherpa-onnx-whisper-tiny/encoder.int8.onnx',
///       decoder: './sherpa-onnx-whisper-tiny/decoder.int8.onnx',
///     ),
///   ),
/// );
///
/// final stream = sli.createStream();
/// stream.acceptWaveform(samples: wave.samples, sampleRate: wave.sampleRate);
/// print(sli.compute(stream).lang);
/// ```
class SpokenLanguageIdentificationWhisperConfig {
  const SpokenLanguageIdentificationWhisperConfig({
    this.encoder = '',
    this.decoder = '',
    this.tailPaddings = 0,
  });

  factory SpokenLanguageIdentificationWhisperConfig.fromJson(
      Map<String, dynamic> json) {
    return SpokenLanguageIdentificationWhisperConfig(
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
      tailPaddings: json['tailPaddings'] as int? ?? 0,
    );
  }

  @override
  String toString() {
    return 'SpokenLanguageIdentificationWhisperConfig(encoder: $encoder, decoder: $decoder, tailPaddings: $tailPaddings)';
  }

  Map<String, dynamic> toJson() => {
        'encoder': encoder,
        'decoder': decoder,
        'tailPaddings': tailPaddings,
      };

  final String encoder;
  final String decoder;
  final int tailPaddings;
}

/// Top-level configuration for [SpokenLanguageIdentification].
class SpokenLanguageIdentificationConfig {
  const SpokenLanguageIdentificationConfig({
    this.whisper = const SpokenLanguageIdentificationWhisperConfig(),
    this.numThreads = 1,
    this.debug = false,
    this.provider = 'cpu',
  });

  factory SpokenLanguageIdentificationConfig.fromJson(
      Map<String, dynamic> json) {
    return SpokenLanguageIdentificationConfig(
      whisper: json['whisper'] != null
          ? SpokenLanguageIdentificationWhisperConfig.fromJson(
              json['whisper'] as Map<String, dynamic>)
          : const SpokenLanguageIdentificationWhisperConfig(),
      numThreads: json['numThreads'] as int? ?? 1,
      debug: json['debug'] as bool? ?? false,
      provider: json['provider'] as String? ?? 'cpu',
    );
  }

  @override
  String toString() {
    return 'SpokenLanguageIdentificationConfig(whisper: $whisper, numThreads: $numThreads, debug: $debug, provider: $provider)';
  }

  Map<String, dynamic> toJson() => {
        'whisper': whisper.toJson(),
        'numThreads': numThreads,
        'debug': debug,
        'provider': provider,
      };

  final SpokenLanguageIdentificationWhisperConfig whisper;
  final int numThreads;
  final bool debug;
  final String provider;
}

/// Result returned by [SpokenLanguageIdentification.compute].
class SpokenLanguageIdentificationResult {
  const SpokenLanguageIdentificationResult({
    required this.lang,
  });

  factory SpokenLanguageIdentificationResult.fromJson(
      Map<String, dynamic> json) {
    return SpokenLanguageIdentificationResult(
      lang: json['lang'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'SpokenLanguageIdentificationResult(lang: $lang)';
  }

  Map<String, dynamic> toJson() => {
        'lang': lang,
      };

  final String lang;
}

/// Spoken language identifier.
class SpokenLanguageIdentification {
  SpokenLanguageIdentification.fromPtr(
      {required this.ptr, required this.config});

  SpokenLanguageIdentification._({required this.ptr, required this.config});

  /// Release the native language identifier.
  void free() {
    if (SherpaOnnxBindings.sherpaOnnxDestroySpokenLanguageIdentification ==
        null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.sherpaOnnxDestroySpokenLanguageIdentification?.call(ptr);
    ptr = nullptr;
  }

  /// Create a language identifier from [config].
  factory SpokenLanguageIdentification(
      SpokenLanguageIdentificationConfig config) {
    final c = convertConfig(config);

    if (SherpaOnnxBindings.sherpaOnnxCreateSpokenLanguageIdentification ==
        null) {
      freeConfig(c);
      throw Exception("Please initialize sherpa-onnx first");
    }

    final ptr = SherpaOnnxBindings.sherpaOnnxCreateSpokenLanguageIdentification
            ?.call(c) ??
        nullptr;

    if (ptr == nullptr) {
      freeConfig(c);
      throw Exception(
          "Failed to create spoken language identification. Please check your config");
    }

    freeConfig(c);

    return SpokenLanguageIdentification._(ptr: ptr, config: config);
  }

  static Pointer<SherpaOnnxSpokenLanguageIdentificationConfig> convertConfig(
      SpokenLanguageIdentificationConfig config) {
    final c = calloc<SherpaOnnxSpokenLanguageIdentificationConfig>();

    c.ref.whisper.encoder = config.whisper.encoder.toNativeUtf8();
    c.ref.whisper.decoder = config.whisper.decoder.toNativeUtf8();
    c.ref.whisper.tailPaddings = config.whisper.tailPaddings;

    c.ref.numThreads = config.numThreads;
    c.ref.debug = config.debug ? 1 : 0;
    c.ref.provider = config.provider.toNativeUtf8();

    return c;
  }

  static void freeConfig(
      Pointer<SherpaOnnxSpokenLanguageIdentificationConfig> c) {
    malloc.free(c.ref.whisper.encoder);
    malloc.free(c.ref.whisper.decoder);
    malloc.free(c.ref.provider);
    malloc.free(c);
  }

  /// Create an offline stream for one audio clip.
  OfflineStream createStream() {
    if (SherpaOnnxBindings
            .sherpaOnnxSpokenLanguageIdentificationCreateOfflineStream ==
        null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      throw Exception("Failed to create offline stream");
    }

    final p = SherpaOnnxBindings
            .sherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
            ?.call(ptr) ??
        nullptr;

    if (p == nullptr) {
      throw Exception("Failed to create offline stream");
    }

    return OfflineStream(ptr: p);
  }

  /// Compute the spoken language for [stream].
  SpokenLanguageIdentificationResult compute(OfflineStream stream) {
    if (SherpaOnnxBindings.sherpaOnnxSpokenLanguageIdentificationCompute ==
        null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr || stream.ptr == nullptr) {
      return const SpokenLanguageIdentificationResult(lang: '');
    }

    final result = SherpaOnnxBindings
            .sherpaOnnxSpokenLanguageIdentificationCompute
            ?.call(ptr, stream.ptr) ??
        nullptr;

    if (result == nullptr) {
      return const SpokenLanguageIdentificationResult(lang: '');
    }

    final lang = toDartString(result.ref.lang);

    SherpaOnnxBindings.sherpaOnnxDestroySpokenLanguageIdentificationResult
        ?.call(result);

    return SpokenLanguageIdentificationResult(lang: lang);
  }

  Pointer<SherpaOnnxSpokenLanguageIdentification> ptr;
  SpokenLanguageIdentificationConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/tts.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:convert';
import 'dart:ffi';
import 'dart:typed_data';

import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';

/// Offline text-to-speech.
///
/// This module supports VITS, Matcha, Kokoro, Kitten, ZipVoice, Pocket TTS,
/// and Supertonic model families. See `dart-api-examples/tts/bin/` for working
/// examples such as `pocket-en.dart`, `kokoro-en.dart`, `kokoro-zh-en.dart`,
/// `matcha-en.dart`, and `zipvoice-zh-en.dart`.
///
/// Example:
///
/// ```dart
/// final model = OfflineTtsModelConfig(
///   pocketTts: const OfflineTtsPocketSphinxModelConfig(
///     model: './sherpa-onnx-pocket-tts/model.int8.onnx',
///     tokens: './sherpa-onnx-pocket-tts/tokens.txt',
///     dataDir: './sherpa-onnx-pocket-tts/espeak-ng-data',
///   ),
///   numThreads: 1,
/// );
///
/// final tts = OfflineTts(OfflineTtsConfig(model: model));
/// final audio = tts.generate(
///   text: 'Hello from sherpa-onnx',
///   sid: 0,
///   speed: 1.0,
/// );
/// writeWave(
///   filename: './out.wav',
///   samples: audio.samples,
///   sampleRate: audio.sampleRate,
/// );
/// tts.free();
/// ```

/// Per-request generation options for [OfflineTts.generateWithConfig].
///
/// Use this when you need advanced generation controls such as zero-shot voice
/// cloning reference audio, explicit reference sample rate, or model-specific
/// values in [extra].
class OfflineTtsGenerationConfig {
  const OfflineTtsGenerationConfig({
    this.silenceScale = 0.2,
    this.speed = 1.0,
    this.sid = 0,
    this.referenceAudio,
    this.referenceSampleRate = 0,
    this.referenceText = '',
    this.numSteps = 5,
    this.extra = const {},
  });

  /// Convert Extra to JSON string.
  /// Returns nullptr if empty.
  /// The user should use calloc.free(p); to free the returned value
  Pointer<Utf8> extraToNativeUtf8() {
    if (extra.isEmpty) {
      return nullptr;
    }

    // Validate values
    for (final v in extra.values) {
      if (v is! String && v is! int && v is! double) {
        throw ArgumentError(
          'extra values must be String, int, or double. Got: ${v.runtimeType}',
        );
      }
    }

    final jsonString = jsonEncode(extra);
    return jsonString.toNativeUtf8();
  }

  Pointer<SherpaOnnxGenerationConfig> toNative() {
    final p = calloc<SherpaOnnxGenerationConfig>();

    p.ref.silenceScale = silenceScale;
    p.ref.speed = speed;
    p.ref.sid = sid;
    p.ref.numSteps = numSteps;

    if (referenceAudio != null && referenceAudio!.isNotEmpty) {
      final audioPtr = calloc<Float>(referenceAudio!.length);
      audioPtr.asTypedList(referenceAudio!.length).setAll(0, referenceAudio!);
      p.ref.referenceAudio = audioPtr;
      p.ref.referenceAudioLength = referenceAudio!.length;
      p.ref.referenceSampleRate = referenceSampleRate;
    } else {
      p.ref.referenceAudio = nullptr;
      p.ref.referenceAudioLength = 0;
      p.ref.referenceSampleRate = 0;
    }

    p.ref.referenceText = referenceText.isEmpty
        ? nullptr
        : referenceText.toNativeUtf8();

    p.ref.extra = extraToNativeUtf8();

    return p;
  }

  void freeNative(Pointer<SherpaOnnxGenerationConfig> p) {
    if (p.ref.referenceAudio != nullptr) {
      calloc.free(p.ref.referenceAudio);
    }
    if (p.ref.referenceText != nullptr) {
      calloc.free(p.ref.referenceText);
    }
    if (p.ref.extra != nullptr) {
      calloc.free(p.ref.extra);
    }
    calloc.free(p);
  }

  final double silenceScale;
  final double speed;
  final int sid;

  /// mono audio in [-1, 1]
  final Float32List? referenceAudio;
  final int referenceSampleRate;
  final String referenceText;
  final int numSteps;

  /// Extra model-specific attributes
  /// key: string
  /// value: string | int | double
  final Map<String, Object> extra;
}

/// VITS model configuration.
class OfflineTtsVitsModelConfig {
  const OfflineTtsVitsModelConfig({
    this.model = '',
    this.lexicon = '',
    this.tokens = '',
    this.dataDir = '',
    this.noiseScale = 0.667,
    this.noiseScaleW = 0.8,
    this.lengthScale = 1.0,
    this.dictDir = '',
  });

  factory OfflineTtsVitsModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsVitsModelConfig(
      model: json['model'] as String? ?? '',
      lexicon: json['lexicon'] as String? ?? '',
      tokens: json['tokens'] as String? ?? '',
      dataDir: json['dataDir'] as String? ?? '',
      noiseScale: (json['noiseScale'] as num?)?.toDouble() ?? 0.667,
      noiseScaleW: (json['noiseScaleW'] as num?)?.toDouble() ?? 0.8,
      lengthScale: (json['lengthScale'] as num?)?.toDouble() ?? 1.0,
    );
  }

  @override
  String toString() {
    return 'OfflineTtsVitsModelConfig(model: $model, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, noiseScaleW: $noiseScaleW, lengthScale: $lengthScale)';
  }

  Map<String, dynamic> toJson() => {
    'model': model,
    'lexicon': lexicon,
    'tokens': tokens,
    'dataDir': dataDir,
    'noiseScale': noiseScale,
    'noiseScaleW': noiseScaleW,
    'lengthScale': lengthScale,
  };

  final String model;
  final String lexicon;
  final String tokens;
  final String dataDir;
  final double noiseScale;
  final double noiseScaleW;
  final double lengthScale;
  final String dictDir; // unused
}

/// Matcha model configuration.
class OfflineTtsMatchaModelConfig {
  const OfflineTtsMatchaModelConfig({
    this.acousticModel = '',
    this.vocoder = '',
    this.lexicon = '',
    this.tokens = '',
    this.dataDir = '',
    this.noiseScale = 0.667,
    this.lengthScale = 1.0,
    this.dictDir = '',
  });

  factory OfflineTtsMatchaModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsMatchaModelConfig(
      acousticModel: json['acousticModel'] as String? ?? '',
      vocoder: json['vocoder'] as String? ?? '',
      lexicon: json['lexicon'] as String? ?? '',
      tokens: json['tokens'] as String? ?? '',
      dataDir: json['dataDir'] as String? ?? '',
      noiseScale: (json['noiseScale'] as num?)?.toDouble() ?? 0.667,
      lengthScale: (json['lengthScale'] as num?)?.toDouble() ?? 1.0,
    );
  }

  @override
  String toString() {
    return 'OfflineTtsMatchaModelConfig(acousticModel: $acousticModel, vocoder: $vocoder, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, lengthScale: $lengthScale)';
  }

  Map<String, dynamic> toJson() => {
    'acousticModel': acousticModel,
    'vocoder': vocoder,
    'lexicon': lexicon,
    'tokens': tokens,
    'dataDir': dataDir,
    'noiseScale': noiseScale,
    'lengthScale': lengthScale,
  };

  final String acousticModel;
  final String vocoder;
  final String lexicon;
  final String tokens;
  final String dataDir;
  final double noiseScale;
  final double lengthScale;
  final String dictDir; // unused
}

/// Kokoro model configuration.
class OfflineTtsKokoroModelConfig {
  const OfflineTtsKokoroModelConfig({
    this.model = '',
    this.voices = '',
    this.tokens = '',
    this.dataDir = '',
    this.lengthScale = 1.0,
    this.dictDir = '',
    this.lexicon = '',
    this.lang = '',
  });

  factory OfflineTtsKokoroModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsKokoroModelConfig(
      model: json['model'] as String? ?? '',
      voices: json['voices'] as String? ?? '',
      tokens: json['tokens'] as String? ?? '',
      dataDir: json['dataDir'] as String? ?? '',
      lengthScale: (json['lengthScale'] as num?)?.toDouble() ?? 1.0,
      lexicon: json['lexicon'] as String? ?? '',
      lang: json['lang'] as String? ?? '',
    );
  }

  @override
  String toString() {
    return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale, lexicon: $lexicon, lang: $lang)';
  }

  Map<String, dynamic> toJson() => {
    'model': model,
    'voices': voices,
    'tokens': tokens,
    'dataDir': dataDir,
    'lengthScale': lengthScale,
    'lexicon': lexicon,
    'lang': lang,
  };

  final String model;
  final String voices;
  final String tokens;
  final String dataDir;
  final double lengthScale;
  final String dictDir; // unused
  final String lexicon;
  final String lang;
}

/// Kitten model configuration.
class OfflineTtsKittenModelConfig {
  const OfflineTtsKittenModelConfig({
    this.model = '',
    this.voices = '',
    this.tokens = '',
    this.dataDir = '',
    this.lengthScale = 1.0,
  });

  factory OfflineTtsKittenModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsKittenModelConfig(
      model: json['model'] as String? ?? '',
      voices: json['voices'] as String? ?? '',
      tokens: json['tokens'] as String? ?? '',
      dataDir: json['dataDir'] as String? ?? '',
      lengthScale: (json['lengthScale'] as num?)?.toDouble() ?? 1.0,
    );
  }

  @override
  String toString() {
    return 'OfflineTtsKittenModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale)';
  }

  Map<String, dynamic> toJson() => {
    'model': model,
    'voices': voices,
    'tokens': tokens,
    'dataDir': dataDir,
    'lengthScale': lengthScale,
  };

  final String model;
  final String voices;
  final String tokens;
  final String dataDir;
  final double lengthScale;
}

/// ZipVoice model configuration.
class OfflineTtsZipVoiceModelConfig {
  const OfflineTtsZipVoiceModelConfig({
    this.tokens = '',
    this.encoder = '',
    this.decoder = '',
    this.vocoder = '',
    this.dataDir = '',
    this.lexicon = '',
    this.featScale = 0.1,
    this.tShift = 0.5,
    this.targetRms = 0.1,
    this.guidanceScale = 1.0,
  });

  factory OfflineTtsZipVoiceModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsZipVoiceModelConfig(
      tokens: json['tokens'] as String? ?? '',
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
      vocoder: json['vocoder'] as String? ?? '',
      dataDir: json['dataDir'] as String? ?? '',
      lexicon: json['lexicon'] as String? ?? '',
      featScale: (json['featScale'] as num?)?.toDouble() ?? 0.1,
      tShift: (json['tShift'] as num?)?.toDouble() ?? 0.5,
      targetRms: (json['targetRms'] as num?)?.toDouble() ?? 0.1,
      guidanceScale: (json['guidanceScale'] as num?)?.toDouble() ?? 1.0,
    );
  }

  @override
  String toString() {
    return 'OfflineTtsZipVoiceModelConfig(tokens: $tokens, encoder: $encoder, decoder: $decoder, vocoder: $vocoder, dataDir: $dataDir, lexicon: $lexicon, featScale: $featScale, tShift: $tShift, targetRms: $targetRms, guidanceScale: $guidanceScale)';
  }

  Map<String, dynamic> toJson() => {
    'tokens': tokens,
    'encoder': encoder,
    'decoder': decoder,
    'vocoder': vocoder,
    'dataDir': dataDir,
    'lexicon': lexicon,
    'featScale': featScale,
    'tShift': tShift,
    'targetRms': targetRms,
    'guidanceScale': guidanceScale,
  };

  final String tokens;
  final String encoder;
  final String decoder;
  final String vocoder;
  final String dataDir;
  final String lexicon;
  final double featScale;
  final double tShift;
  final double targetRms;
  final double guidanceScale;
}

/// Pocket TTS model configuration.
///
/// This family supports zero-shot voice cloning with a reference waveform.
class OfflineTtsPocketModelConfig {
  const OfflineTtsPocketModelConfig({
    this.lmFlow = '',
    this.lmMain = '',
    this.encoder = '',
    this.decoder = '',
    this.textConditioner = '',
    this.vocabJson = '',
    this.tokenScoresJson = '',
    this.voiceEmbeddingCacheCapacity = 50,
  });

  factory OfflineTtsPocketModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsPocketModelConfig(
      lmFlow: json['lmFlow'] as String? ?? '',
      lmMain: json['lmMain'] as String? ?? '',
      encoder: json['encoder'] as String? ?? '',
      decoder: json['decoder'] as String? ?? '',
      textConditioner: json['textConditioner'] as String? ?? '',
      vocabJson: json['vocabJson'] as String? ?? '',
      tokenScoresJson: json['tokenScoresJson'] as String? ?? '',
      voiceEmbeddingCacheCapacity:
          json['voiceEmbeddingCacheCapacity'] as int? ?? 50,
    );
  }

  Map<String, dynamic> toJson() => {
    'lmFlow': lmFlow,
    'lmMain': lmMain,
    'encoder': encoder,
    'decoder': decoder,
    'textConditioner': textConditioner,
    'vocabJson': vocabJson,
    'tokenScoresJson': tokenScoresJson,
    'voiceEmbeddingCacheCapacity': voiceEmbeddingCacheCapacity,
  };

  @override
  String toString() {
    return 'OfflineTtsPocketModelConfig(lmFlow: $lmFlow, lmMain: $lmMain, encoder: $encoder, decoder: $decoder, textConditioner: $textConditioner, vocabJson: $vocabJson, tokenScoresJson: $tokenScoresJson, voiceEmbeddingCacheCapacity: $voiceEmbeddingCacheCapacity)';
  }

  final String lmFlow;
  final String lmMain;
  final String encoder;
  final String decoder;
  final String textConditioner;
  final String vocabJson;
  final String tokenScoresJson;
  final int voiceEmbeddingCacheCapacity;
}

/// Supertonic model configuration.
class OfflineTtsSupertonicModelConfig {
  const OfflineTtsSupertonicModelConfig({
    this.durationPredictor = '',
    this.textEncoder = '',
    this.vectorEstimator = '',
    this.vocoder = '',
    this.ttsJson = '',
    this.unicodeIndexer = '',
    this.voiceStyle = '',
  });

  factory OfflineTtsSupertonicModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsSupertonicModelConfig(
      durationPredictor: json['durationPredictor'] as String? ?? '',
      textEncoder: json['textEncoder'] as String? ?? '',
      vectorEstimator: json['vectorEstimator'] as String? ?? '',
      vocoder: json['vocoder'] as String? ?? '',
      ttsJson: json['ttsJson'] as String? ?? '',
      unicodeIndexer: json['unicodeIndexer'] as String? ?? '',
      voiceStyle: json['voiceStyle'] as String? ?? '',
    );
  }

  Map<String, dynamic> toJson() => {
    'durationPredictor': durationPredictor,
    'textEncoder': textEncoder,
    'vectorEstimator': vectorEstimator,
    'vocoder': vocoder,
    'ttsJson': ttsJson,
    'unicodeIndexer': unicodeIndexer,
    'voiceStyle': voiceStyle,
  };

  @override
  String toString() {
    return 'OfflineTtsSupertonicModelConfig(durationPredictor: $durationPredictor, textEncoder: $textEncoder, vectorEstimator: $vectorEstimator, vocoder: $vocoder, ttsJson: $ttsJson, unicodeIndexer: $unicodeIndexer, voiceStyle: $voiceStyle)';
  }

  final String durationPredictor;
  final String textEncoder;
  final String vectorEstimator;
  final String vocoder;
  final String ttsJson;
  final String unicodeIndexer;
  final String voiceStyle;
}

/// Aggregate model configuration for offline TTS.
///
/// Configure exactly one model family for a typical setup and set the shared
/// runtime options such as [numThreads] and [provider].
class OfflineTtsModelConfig {
  const OfflineTtsModelConfig({
    this.vits = const OfflineTtsVitsModelConfig(),
    this.matcha = const OfflineTtsMatchaModelConfig(),
    this.kokoro = const OfflineTtsKokoroModelConfig(),
    this.kitten = const OfflineTtsKittenModelConfig(),
    this.zipvoice = const OfflineTtsZipVoiceModelConfig(),
    this.pocket = const OfflineTtsPocketModelConfig(),
    this.supertonic = const OfflineTtsSupertonicModelConfig(),
    this.numThreads = 1,
    this.debug = true,
    this.provider = 'cpu',
  });

  factory OfflineTtsModelConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsModelConfig(
      vits: OfflineTtsVitsModelConfig.fromJson(
        json['vits'] as Map<String, dynamic>? ?? const {},
      ),
      matcha: OfflineTtsMatchaModelConfig.fromJson(
        json['matcha'] as Map<String, dynamic>? ?? const {},
      ),
      kokoro: OfflineTtsKokoroModelConfig.fromJson(
        json['kokoro'] as Map<String, dynamic>? ?? const {},
      ),
      kitten: OfflineTtsKittenModelConfig.fromJson(
        json['kitten'] as Map<String, dynamic>? ?? const {},
      ),
      zipvoice: OfflineTtsZipVoiceModelConfig.fromJson(
        json['zipvoice'] as Map<String, dynamic>? ?? const {},
      ),
      pocket: OfflineTtsPocketModelConfig.fromJson(
        json['pocket'] as Map<String, dynamic>? ?? const {},
      ),
      supertonic: OfflineTtsSupertonicModelConfig.fromJson(
        json['supertonic'] as Map<String, dynamic>? ?? const {},
      ),
      numThreads: json['numThreads'] as int? ?? 1,
      debug: json['debug'] as bool? ?? true,
      provider: json['provider'] as String? ?? 'cpu',
    );
  }

  @override
  String toString() {
    return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, kokoro: $kokoro, kitten: $kitten, zipvoice: $zipvoice, pocket: $pocket, supertonic: $supertonic, numThreads: $numThreads, debug: $debug, provider: $provider)';
  }

  Map<String, dynamic> toJson() => {
    'vits': vits.toJson(),
    'matcha': matcha.toJson(),
    'kokoro': kokoro.toJson(),
    'kitten': kitten.toJson(),
    'zipvoice': zipvoice.toJson(),
    'pocket': pocket.toJson(),
    'supertonic': supertonic.toJson(),
    'numThreads': numThreads,
    'debug': debug,
    'provider': provider,
  };

  final OfflineTtsVitsModelConfig vits;
  final OfflineTtsMatchaModelConfig matcha;
  final OfflineTtsKokoroModelConfig kokoro;
  final OfflineTtsKittenModelConfig kitten;
  final OfflineTtsZipVoiceModelConfig zipvoice;
  final OfflineTtsPocketModelConfig pocket;
  final OfflineTtsSupertonicModelConfig supertonic;
  final int numThreads;
  final bool debug;
  final String provider;
}

/// Top-level configuration for [OfflineTts].
class OfflineTtsConfig {
  const OfflineTtsConfig({
    required this.model,
    this.ruleFsts = '',
    this.maxNumSenetences = 1,
    this.ruleFars = '',
    this.silenceScale = 0.2,
  });

  factory OfflineTtsConfig.fromJson(Map<String, dynamic> json) {
    return OfflineTtsConfig(
      model: OfflineTtsModelConfig.fromJson(
        json['model'] as Map<String, dynamic>,
      ),
      ruleFsts: json['ruleFsts'] as String? ?? '',
      maxNumSenetences: json['maxNumSenetences'] as int? ?? 1,
      ruleFars: json['ruleFars'] as String? ?? '',
      silenceScale: (json['silenceScale'] as num?)?.toDouble() ?? 0.2,
    );
  }

  @override
  String toString() {
    return 'OfflineTtsConfig(model: $model, ruleFsts: $ruleFsts, maxNumSenetences: $maxNumSenetences, ruleFars: $ruleFars, silenceScale: $silenceScale)';
  }

  Map<String, dynamic> toJson() => {
    'model': model.toJson(),
    'ruleFsts': ruleFsts,
    'maxNumSenetences': maxNumSenetences,
    'ruleFars': ruleFars,
    'silenceScale': silenceScale,
  };

  final OfflineTtsModelConfig model;
  final String ruleFsts;
  final int maxNumSenetences;
  final String ruleFars;
  final double silenceScale;
}

/// Audio generated by [OfflineTts].
class GeneratedAudio {
  GeneratedAudio({required this.samples, required this.sampleRate});

  final Float32List samples;
  final int sampleRate;
}

/// Offline text-to-speech engine.
///
/// Create one from an [OfflineTtsConfig], then call [generate],
/// [generateWithCallback], or [generateWithConfig] depending on how much
/// control you need over the generation process.
class OfflineTts {
  OfflineTts.fromPtr({required this.ptr, required this.config});

  OfflineTts._({required this.ptr, required this.config});

  /// The user is responsible to call the OfflineTts.free()
  /// method of the returned instance to avoid memory leak.
  factory OfflineTts(OfflineTtsConfig config) {
    if (SherpaOnnxBindings.createOfflineTts == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final c = calloc<SherpaOnnxOfflineTtsConfig>();
    c.ref.model.vits.model = config.model.vits.model.toNativeUtf8();
    c.ref.model.vits.lexicon = config.model.vits.lexicon.toNativeUtf8();
    c.ref.model.vits.tokens = config.model.vits.tokens.toNativeUtf8();
    c.ref.model.vits.dataDir = config.model.vits.dataDir.toNativeUtf8();
    c.ref.model.vits.noiseScale = config.model.vits.noiseScale;
    c.ref.model.vits.noiseScaleW = config.model.vits.noiseScaleW;
    c.ref.model.vits.lengthScale = config.model.vits.lengthScale;

    c.ref.model.matcha.acousticModel = config.model.matcha.acousticModel
        .toNativeUtf8();
    c.ref.model.matcha.vocoder = config.model.matcha.vocoder.toNativeUtf8();
    c.ref.model.matcha.lexicon = config.model.matcha.lexicon.toNativeUtf8();
    c.ref.model.matcha.tokens = config.model.matcha.tokens.toNativeUtf8();
    c.ref.model.matcha.dataDir = config.model.matcha.dataDir.toNativeUtf8();
    c.ref.model.matcha.noiseScale = config.model.matcha.noiseScale;
    c.ref.model.matcha.lengthScale = config.model.matcha.lengthScale;

    c.ref.model.kokoro.model = config.model.kokoro.model.toNativeUtf8();
    c.ref.model.kokoro.voices = config.model.kokoro.voices.toNativeUtf8();
    c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8();
    c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8();
    c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale;
    c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8();
    c.ref.model.kokoro.lang = config.model.kokoro.lang.toNativeUtf8();

    c.ref.model.kitten.model = config.model.kitten.model.toNativeUtf8();
    c.ref.model.kitten.voices = config.model.kitten.voices.toNativeUtf8();
    c.ref.model.kitten.tokens = config.model.kitten.tokens.toNativeUtf8();
    c.ref.model.kitten.dataDir = config.model.kitten.dataDir.toNativeUtf8();
    c.ref.model.kitten.lengthScale = config.model.kitten.lengthScale;

    c.ref.model.zipvoice.tokens = config.model.zipvoice.tokens.toNativeUtf8();
    c.ref.model.zipvoice.encoder = config.model.zipvoice.encoder.toNativeUtf8();
    c.ref.model.zipvoice.decoder = config.model.zipvoice.decoder.toNativeUtf8();
    c.ref.model.zipvoice.vocoder = config.model.zipvoice.vocoder.toNativeUtf8();
    c.ref.model.zipvoice.dataDir = config.model.zipvoice.dataDir.toNativeUtf8();
    c.ref.model.zipvoice.lexicon = config.model.zipvoice.lexicon.toNativeUtf8();
    c.ref.model.zipvoice.featScale = config.model.zipvoice.featScale;
    c.ref.model.zipvoice.tShift = config.model.zipvoice.tShift;
    c.ref.model.zipvoice.targetRms = config.model.zipvoice.targetRms;
    c.ref.model.zipvoice.guidanceScale = config.model.zipvoice.guidanceScale;

    c.ref.model.pocket.lmFlow = config.model.pocket.lmFlow.toNativeUtf8();
    c.ref.model.pocket.lmMain = config.model.pocket.lmMain.toNativeUtf8();
    c.ref.model.pocket.encoder = config.model.pocket.encoder.toNativeUtf8();
    c.ref.model.pocket.decoder = config.model.pocket.decoder.toNativeUtf8();
    c.ref.model.pocket.textConditioner = config.model.pocket.textConditioner
        .toNativeUtf8();
    c.ref.model.pocket.vocabJson = config.model.pocket.vocabJson.toNativeUtf8();
    c.ref.model.pocket.tokenScoresJson = config.model.pocket.tokenScoresJson
        .toNativeUtf8();
    c.ref.model.pocket.voiceEmbeddingCacheCapacity =
        config.model.pocket.voiceEmbeddingCacheCapacity;

    c.ref.model.supertonic.durationPredictor = config.model.supertonic
        .durationPredictor.toNativeUtf8();
    c.ref.model.supertonic.textEncoder = config.model.supertonic.textEncoder
        .toNativeUtf8();
    c.ref.model.supertonic.vectorEstimator = config.model.supertonic
        .vectorEstimator.toNativeUtf8();
    c.ref.model.supertonic.vocoder = config.model.supertonic.vocoder
        .toNativeUtf8();
    c.ref.model.supertonic.ttsJson = config.model.supertonic.ttsJson
        .toNativeUtf8();
    c.ref.model.supertonic.unicodeIndexer = config.model.supertonic
        .unicodeIndexer.toNativeUtf8();
    c.ref.model.supertonic.voiceStyle = config.model.supertonic.voiceStyle
        .toNativeUtf8();

    c.ref.model.numThreads = config.model.numThreads;
    c.ref.model.debug = config.model.debug ? 1 : 0;
    c.ref.model.provider = config.model.provider.toNativeUtf8();

    c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
    c.ref.maxNumSenetences = config.maxNumSenetences;
    c.ref.ruleFars = config.ruleFars.toNativeUtf8();
    c.ref.silenceScale = config.silenceScale;

    final ptr = SherpaOnnxBindings.createOfflineTts?.call(c) ?? nullptr;

    calloc.free(c.ref.ruleFars);
    calloc.free(c.ref.ruleFsts);
    calloc.free(c.ref.model.provider);

    calloc.free(c.ref.model.supertonic.voiceStyle);
    calloc.free(c.ref.model.supertonic.unicodeIndexer);
    calloc.free(c.ref.model.supertonic.ttsJson);
    calloc.free(c.ref.model.supertonic.vocoder);
    calloc.free(c.ref.model.supertonic.vectorEstimator);
    calloc.free(c.ref.model.supertonic.textEncoder);
    calloc.free(c.ref.model.supertonic.durationPredictor);

    calloc.free(c.ref.model.pocket.tokenScoresJson);
    calloc.free(c.ref.model.pocket.vocabJson);
    calloc.free(c.ref.model.pocket.textConditioner);
    calloc.free(c.ref.model.pocket.decoder);
    calloc.free(c.ref.model.pocket.encoder);
    calloc.free(c.ref.model.pocket.lmMain);
    calloc.free(c.ref.model.pocket.lmFlow);

    calloc.free(c.ref.model.zipvoice.lexicon);
    calloc.free(c.ref.model.zipvoice.dataDir);
    calloc.free(c.ref.model.zipvoice.vocoder);
    calloc.free(c.ref.model.zipvoice.decoder);
    calloc.free(c.ref.model.zipvoice.encoder);
    calloc.free(c.ref.model.zipvoice.tokens);

    calloc.free(c.ref.model.kitten.dataDir);
    calloc.free(c.ref.model.kitten.tokens);
    calloc.free(c.ref.model.kitten.voices);
    calloc.free(c.ref.model.kitten.model);

    calloc.free(c.ref.model.kokoro.lang);
    calloc.free(c.ref.model.kokoro.lexicon);
    calloc.free(c.ref.model.kokoro.dataDir);
    calloc.free(c.ref.model.kokoro.tokens);
    calloc.free(c.ref.model.kokoro.voices);
    calloc.free(c.ref.model.kokoro.model);

    calloc.free(c.ref.model.matcha.dataDir);
    calloc.free(c.ref.model.matcha.tokens);
    calloc.free(c.ref.model.matcha.lexicon);
    calloc.free(c.ref.model.matcha.vocoder);
    calloc.free(c.ref.model.matcha.acousticModel);

    calloc.free(c.ref.model.vits.dataDir);
    calloc.free(c.ref.model.vits.tokens);
    calloc.free(c.ref.model.vits.lexicon);
    calloc.free(c.ref.model.vits.model);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception("Failed to create offline tts. Please check your config");
    }

    return OfflineTts._(ptr: ptr, config: config);
  }

  /// Release the native TTS engine.
  void free() {
    if (SherpaOnnxBindings.destroyOfflineTts == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroyOfflineTts?.call(ptr);
    ptr = nullptr;
  }

  /// Generate audio using the simple `(text, sid, speed)` API.
  GeneratedAudio generate({
    required String text,
    int sid = 0,
    double speed = 1.0,
  }) {
    if (SherpaOnnxBindings.offlineTtsGenerate == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final Pointer<Utf8> textPtr = text.toNativeUtf8();
    final p =
        SherpaOnnxBindings.offlineTtsGenerate?.call(ptr, textPtr, sid, speed) ??
        nullptr;
    calloc.free(textPtr);

    if (p == nullptr) {
      return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final samples = p.ref.samples.asTypedList(p.ref.n);
    final sampleRate = p.ref.sampleRate;
    final newSamples = Float32List.fromList(samples);

    SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);

    return GeneratedAudio(samples: newSamples, sampleRate: sampleRate);
  }

  /// Generate audio while receiving partial sample chunks through [callback].
  GeneratedAudio generateWithCallback({
    required String text,
    int sid = 0,
    double speed = 1.0,
    required int Function(Float32List samples) callback,
  }) {
    if (SherpaOnnxBindings.offlineTtsGenerateWithCallback == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
    }

    // see
    // https://github.com/dart-lang/sdk/issues/54276#issuecomment-1846109285
    // https://stackoverflow.com/questions/69537440/callbacks-in-dart-dartffi-only-supports-calling-static-dart-functions-from-nat
    // https://github.com/dart-lang/sdk/blob/main/tests/ffi/isolate_local_function_callbacks_test.dart#L46
    final wrapper =
        NativeCallable<SherpaOnnxGeneratedAudioCallbackNative>.isolateLocal((
          Pointer<Float> samples,
          int n,
        ) {
          final s = samples.asTypedList(n);
          final newSamples = Float32List.fromList(s);
          return callback(newSamples);
        }, exceptionalReturn: 0);

    final Pointer<Utf8> textPtr = text.toNativeUtf8();
    final p =
        SherpaOnnxBindings.offlineTtsGenerateWithCallback?.call(
          ptr,
          textPtr,
          sid,
          speed,
          wrapper.nativeFunction,
        ) ??
        nullptr;

    calloc.free(textPtr);
    wrapper.close();

    if (p == nullptr) {
      return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final samples = p.ref.samples.asTypedList(p.ref.n);
    final sampleRate = p.ref.sampleRate;
    final newSamples = Float32List.fromList(samples);

    SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);

    return GeneratedAudio(samples: newSamples, sampleRate: sampleRate);
  }

  /// Generate audio using [OfflineTtsGenerationConfig].
  ///
  /// This is the most flexible generation API and is the recommended entry
  /// point for features such as Pocket TTS reference-audio cloning and
  /// model-specific options supplied through [OfflineTtsGenerationConfig.extra].
  GeneratedAudio generateWithConfig({
    required String text,
    required OfflineTtsGenerationConfig config,
    int Function(Float32List samples, double progress)? onProgress,
  }) {
    if (SherpaOnnxBindings.offlineTtsGenerateWithConfig == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final textPtr = text.toNativeUtf8();
    final cfgPtr = config.toNative();

    NativeCallable<SherpaOnnxGeneratedAudioProgressCallbackWithArgNative>?
    wrapper;

    if (onProgress != null) {
      wrapper =
          NativeCallable<
            SherpaOnnxGeneratedAudioProgressCallbackWithArgNative
          >.isolateLocal((
            Pointer<Float> samples,
            int n,
            double p,
            Pointer<Void> arg,
          ) {
            final list = Float32List.fromList(samples.asTypedList(n));
            return onProgress(list, p);
          }, exceptionalReturn: 0);
    }

    final p =
        SherpaOnnxBindings.offlineTtsGenerateWithConfig?.call(
          ptr,
          textPtr,
          cfgPtr,
          wrapper?.nativeFunction ?? nullptr,
          nullptr,
        ) ??
        nullptr;

    calloc.free(textPtr);
    config.freeNative(cfgPtr);
    wrapper?.close();

    if (p == nullptr) {
      return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
    }

    final samples = Float32List.fromList(p.ref.samples.asTypedList(p.ref.n));
    final sampleRate = p.ref.sampleRate;

    SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);

    return GeneratedAudio(samples: samples, sampleRate: sampleRate);
  }

  /// Return the output sample rate reported by the model.
  int get sampleRate {
    if (SherpaOnnxBindings.offlineTtsSampleRate == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return 0;
    }

    return SherpaOnnxBindings.offlineTtsSampleRate?.call(ptr) ?? 0;
  }

  /// Return the number of built-in speakers reported by the model.
  int get numSpeakers {
    if (SherpaOnnxBindings.offlineTtsNumSpeakers == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return 0;
    }

    return SherpaOnnxBindings.offlineTtsNumSpeakers?.call(ptr) ?? 0;
  }

  Pointer<SherpaOnnxOfflineTts> ptr;
  OfflineTtsConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/utils.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:convert';
import 'dart:ffi';

import 'package:ffi/ffi.dart';

int _strLen(Pointer<Uint8> codeUnits) {
  // this function is copied from
  // https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L52
  var length = 0;
  while (codeUnits[length] != 0) {
    length++;
  }
  return length;
}

// This function is modified from
// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L41
// It ignores invalid utf8 sequence
String toDartString(Pointer<Utf8> s) {
  final codeUnits = s.cast<Uint8>();
  final length = _strLen(codeUnits);
  return utf8.decode(codeUnits.asTypedList(length), allowMalformed: true);
}


================================================
FILE: flutter/sherpa_onnx/lib/src/vad.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';

/// Voice activity detection and buffering helpers.
///
/// See `dart-api-examples/vad/bin/vad.dart` and
/// `dart-api-examples/vad/bin/ten-vad.dart` for complete examples.
///
/// Example:
///
/// ```dart
/// final config = VadModelConfig(
///   sileroVad: const SileroVadModelConfig(
///     model: './silero_vad.onnx',
///     minSilenceDuration: 0.25,
///     minSpeechDuration: 0.5,
///   ),
///   numThreads: 1,
/// );
///
/// final vad = VoiceActivityDetector(config: config, bufferSizeInSeconds: 10);
/// final wave = readWave('./test.wav');
/// vad.acceptWaveform(wave.samples);
/// vad.flush();
/// while (!vad.isEmpty()) {
///   print(vad.front());
///   vad.pop();
/// }
/// vad.free();
/// ```

/// Silero VAD model configuration.
class SileroVadModelConfig {
  const SileroVadModelConfig(
      {this.model = '',
      this.threshold = 0.5,
      this.minSilenceDuration = 0.5,
      this.minSpeechDuration = 0.25,
      this.windowSize = 512,
      this.maxSpeechDuration = 5.0});

  factory SileroVadModelConfig.fromJson(Map<String, dynamic> json) {
    return SileroVadModelConfig(
      model: json['model'] as String? ?? '',
      threshold: (json['threshold'] as num?)?.toDouble() ?? 0.5,
      minSilenceDuration:
          (json['minSilenceDuration'] as num?)?.toDouble() ?? 0.5,
      minSpeechDuration:
          (json['minSpeechDuration'] as num?)?.toDouble() ?? 0.25,
      windowSize: json['windowSize'] as int? ?? 512,
      maxSpeechDuration: (json['maxSpeechDuration'] as num?)?.toDouble() ?? 5.0,
    );
  }

  @override
  String toString() {
    return 'SileroVadModelConfig(model: $model, threshold: $threshold, minSilenceDuration: $minSilenceDuration, minSpeechDuration: $minSpeechDuration, windowSize: $windowSize, maxSpeechDuration: $maxSpeechDuration)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
        'threshold': threshold,
        'minSilenceDuration': minSilenceDuration,
        'minSpeechDuration': minSpeechDuration,
        'windowSize': windowSize,
        'maxSpeechDuration': maxSpeechDuration,
      };

  final String model;
  final double threshold;
  final double minSilenceDuration;
  final double minSpeechDuration;
  final int windowSize;
  final double maxSpeechDuration;
}

/// Ten VAD model configuration.
class TenVadModelConfig {
  const TenVadModelConfig(
      {this.model = '',
      this.threshold = 0.5,
      this.minSilenceDuration = 0.5,
      this.minSpeechDuration = 0.25,
      this.windowSize = 256,
      this.maxSpeechDuration = 5.0});

  factory TenVadModelConfig.fromJson(Map<String, dynamic> json) {
    return TenVadModelConfig(
      model: json['model'] as String? ?? '',
      threshold: (json['threshold'] as num?)?.toDouble() ?? 0.5,
      minSilenceDuration:
          (json['minSilenceDuration'] as num?)?.toDouble() ?? 0.5,
      minSpeechDuration:
          (json['minSpeechDuration'] as num?)?.toDouble() ?? 0.25,
      windowSize: json['windowSize'] as int? ?? 256,
      maxSpeechDuration: (json['maxSpeechDuration'] as num?)?.toDouble() ?? 5.0,
    );
  }

  @override
  String toString() {
    return 'TenVadModelConfig(model: $model, threshold: $threshold, minSilenceDuration: $minSilenceDuration, minSpeechDuration: $minSpeechDuration, windowSize: $windowSize, maxSpeechDuration: $maxSpeechDuration)';
  }

  Map<String, dynamic> toJson() => {
        'model': model,
        'threshold': threshold,
        'minSilenceDuration': minSilenceDuration,
        'minSpeechDuration': minSpeechDuration,
        'windowSize': windowSize,
        'maxSpeechDuration': maxSpeechDuration,
      };

  final String model;
  final double threshold;
  final double minSilenceDuration;
  final double minSpeechDuration;
  final int windowSize;
  final double maxSpeechDuration;
}

/// Top-level VAD model configuration.
///
/// Configure either [sileroVad] or [tenVad] for typical use and set the shared
/// sample rate and runtime settings here.
class VadModelConfig {
  VadModelConfig({
    this.sileroVad = const SileroVadModelConfig(),
    this.sampleRate = 16000,
    this.numThreads = 1,
    this.provider = 'cpu',
    this.debug = true,
    this.tenVad = const TenVadModelConfig(),
  });

  final SileroVadModelConfig sileroVad;
  final TenVadModelConfig tenVad;
  final int sampleRate;
  final int numThreads;
  final String provider;
  final bool debug;

  factory VadModelConfig.fromJson(Map<String, dynamic> json) {
    return VadModelConfig(
      sileroVad: SileroVadModelConfig.fromJson(
          json['sileroVad'] as Map<String, dynamic>? ?? const {}),
      tenVad: TenVadModelConfig.fromJson(
          json['tenVad'] as Map<String, dynamic>? ?? const {}),
      sampleRate: json['sampleRate'] as int? ?? 16000,
      numThreads: json['numThreads'] as int? ?? 1,
      provider: json['provider'] as String? ?? 'cpu',
      debug: json['debug'] as bool? ?? true,
    );
  }

  Map<String, dynamic> toJson() => {
        'sileroVad': sileroVad.toJson(),
        'tenVad': tenVad.toJson(),
        'sampleRate': sampleRate,
        'numThreads': numThreads,
        'provider': provider,
        'debug': debug,
      };

  @override
  String toString() {
    return 'VadModelConfig(sileroVad: $sileroVad, tenVad: $tenVad, sampleRate: $sampleRate, numThreads: $numThreads, provider: $provider, debug: $debug)';
  }
}

/// One detected speech segment emitted by [VoiceActivityDetector].
class SpeechSegment {
  SpeechSegment({required this.samples, required this.start});
  final Float32List samples;
  final int start;
}

/// Circular sample buffer used by VAD-related pipelines.
class CircularBuffer {
  CircularBuffer.fromPtr({required this.ptr});

  CircularBuffer._({required this.ptr});

  /// The user has to invoke CircularBuffer.free() on the returned instance
  /// to avoid memory leak.
  factory CircularBuffer({required int capacity}) {
    assert(capacity > 0, 'capacity is $capacity');

    if (SherpaOnnxBindings.createCircularBuffer == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final p =
        SherpaOnnxBindings.createCircularBuffer?.call(capacity) ?? nullptr;

    if (p == nullptr) {
      throw Exception(
          "Failed to create circular buffer. Please check your config");
    }

    return CircularBuffer._(ptr: p);
  }

  /// Release the native buffer.
  /// Release the native detector.
  void free() {
    if (SherpaOnnxBindings.destroyCircularBuffer == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroyCircularBuffer?.call(ptr);
    ptr = nullptr;
  }

  /// Append samples to the tail of the buffer.
  void push(Float32List data) {
    if (SherpaOnnxBindings.circularBufferPush == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }

    final n = data.length;
    final Pointer<Float> p = calloc<Float>(n);

    final pList = p.asTypedList(n);
    pList.setAll(0, data);

    SherpaOnnxBindings.circularBufferPush?.call(ptr, p, n);

    calloc.free(p);
  }

  /// Copy [n] samples starting at [startIndex].
  Float32List get({required int startIndex, required int n}) {
    if (SherpaOnnxBindings.circularBufferGet == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return Float32List(0);
    }

    final Pointer<Float> p =
        SherpaOnnxBindings.circularBufferGet?.call(ptr, startIndex, n) ??
            nullptr;

    if (p == nullptr) {
      return Float32List(0);
    }

    final pList = p.asTypedList(n);
    final Float32List ans = Float32List.fromList(pList);

    SherpaOnnxBindings.circularBufferFree?.call(p);

    return ans;
  }

  /// Drop [n] samples from the head of the buffer.
  void pop(int n) {
    if (SherpaOnnxBindings.circularBufferPop == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.circularBufferPop?.call(ptr, n);
  }

  /// Clear the buffer contents.
  /// Reset the detector state.
  void reset() {
    if (SherpaOnnxBindings.circularBufferReset == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.circularBufferReset?.call(ptr);
  }

  int get size {
    if (SherpaOnnxBindings.circularBufferSize == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return 0;
    }

    return SherpaOnnxBindings.circularBufferSize?.call(ptr) ?? 0;
  }

  int get head {
    if (SherpaOnnxBindings.circularBufferHead == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return 0;
    }

    return SherpaOnnxBindings.circularBufferHead?.call(ptr) ?? 0;
  }

  Pointer<SherpaOnnxCircularBuffer> ptr;
}

/// Voice activity detector that emits [SpeechSegment] objects.
///
/// Create one with a [VadModelConfig], feed audio with [acceptWaveform], then
/// inspect queued segments with [isEmpty], [front], [pop], and [flush].
class VoiceActivityDetector {
  VoiceActivityDetector.fromPtr({required this.ptr, required this.config});

  VoiceActivityDetector._({required this.ptr, required this.config});

  // The user has to invoke VoiceActivityDetector.free() to avoid memory leak.
  /// Create a detector with an internal result buffer sized in seconds.
  factory VoiceActivityDetector(
      {required VadModelConfig config, required double bufferSizeInSeconds}) {
    if (SherpaOnnxBindings.createVoiceActivityDetector == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    final c = calloc<SherpaOnnxVadModelConfig>();

    final sileroVadModelPtr = config.sileroVad.model.toNativeUtf8();
    c.ref.sileroVad.model = sileroVadModelPtr;

    c.ref.sileroVad.threshold = config.sileroVad.threshold;
    c.ref.sileroVad.minSilenceDuration = config.sileroVad.minSilenceDuration;
    c.ref.sileroVad.minSpeechDuration = config.sileroVad.minSpeechDuration;
    c.ref.sileroVad.windowSize = config.sileroVad.windowSize;
    c.ref.sileroVad.maxSpeechDuration = config.sileroVad.maxSpeechDuration;

    final tenVadModelPtr = config.tenVad.model.toNativeUtf8();
    c.ref.tenVad.model = tenVadModelPtr;

    c.ref.tenVad.threshold = config.tenVad.threshold;
    c.ref.tenVad.minSilenceDuration = config.tenVad.minSilenceDuration;
    c.ref.tenVad.minSpeechDuration = config.tenVad.minSpeechDuration;
    c.ref.tenVad.windowSize = config.tenVad.windowSize;
    c.ref.tenVad.maxSpeechDuration = config.tenVad.maxSpeechDuration;

    c.ref.sampleRate = config.sampleRate;
    c.ref.numThreads = config.numThreads;

    final providerPtr = config.provider.toNativeUtf8();
    c.ref.provider = providerPtr;

    c.ref.debug = config.debug ? 1 : 0;

    final ptr = SherpaOnnxBindings.createVoiceActivityDetector
            ?.call(c, bufferSizeInSeconds) ??
        nullptr;

    calloc.free(providerPtr);
    calloc.free(tenVadModelPtr);
    calloc.free(sileroVadModelPtr);
    calloc.free(c);

    if (ptr == nullptr) {
      throw Exception("Failed to create vad. Please check your config");
    }

    return VoiceActivityDetector._(ptr: ptr, config: config);
  }

  void free() {
    if (SherpaOnnxBindings.destroyVoiceActivityDetector == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.destroyVoiceActivityDetector?.call(ptr);
    ptr = nullptr;
  }

  /// Feed normalized waveform samples into the detector.
  void acceptWaveform(Float32List samples) {
    if (SherpaOnnxBindings.voiceActivityDetectorAcceptWaveform == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }

    final n = samples.length;
    final Pointer<Float> p = calloc<Float>(n);

    final pList = p.asTypedList(n);
    pList.setAll(0, samples);

    SherpaOnnxBindings.voiceActivityDetectorAcceptWaveform?.call(ptr, p, n);

    calloc.free(p);
  }

  /// Return `true` if there are no queued speech segments.
  bool isEmpty() {
    if (SherpaOnnxBindings.voiceActivityDetectorEmpty == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return true;
    }

    final int empty =
        SherpaOnnxBindings.voiceActivityDetectorEmpty?.call(ptr) ?? 0;

    return empty == 1;
  }

  /// Return `true` if speech is currently being detected.
  bool isDetected() {
    if (SherpaOnnxBindings.voiceActivityDetectorDetected == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return false;
    }

    final int detected =
        SherpaOnnxBindings.voiceActivityDetectorDetected?.call(ptr) ?? 0;

    return detected == 1;
  }

  /// Drop the front queued speech segment.
  void pop() {
    if (SherpaOnnxBindings.voiceActivityDetectorPop == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.voiceActivityDetectorPop?.call(ptr);
  }

  /// Remove all queued speech segments.
  void clear() {
    if (SherpaOnnxBindings.voiceActivityDetectorClear == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.voiceActivityDetectorClear?.call(ptr);
  }

  /// Return the front queued speech segment.
  SpeechSegment front() {
    if (SherpaOnnxBindings.voiceActivityDetectorFront == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return SpeechSegment(samples: Float32List(0), start: 0);
    }

    final Pointer<SherpaOnnxSpeechSegment> segment =
        SherpaOnnxBindings.voiceActivityDetectorFront?.call(ptr) ?? nullptr;
    if (segment == nullptr) {
      return SpeechSegment(samples: Float32List(0), start: 0);
    }

    final sampleList = segment.ref.samples.asTypedList(segment.ref.n);
    final start = segment.ref.start;

    final samples = Float32List.fromList(sampleList);

    SherpaOnnxBindings.destroySpeechSegment?.call(segment);

    return SpeechSegment(samples: samples, start: start);
  }

  void reset() {
    if (SherpaOnnxBindings.voiceActivityDetectorReset == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr);
  }

  /// Flush trailing buffered speech into the output queue.
  void flush() {
    if (SherpaOnnxBindings.voiceActivityDetectorFlush == null) {
      throw Exception("Please initialize sherpa-onnx first");
    }

    if (ptr == nullptr) {
      return;
    }
    SherpaOnnxBindings.voiceActivityDetectorFlush?.call(ptr);
  }

  Pointer<SherpaOnnxVoiceActivityDetector> ptr;
  final VadModelConfig config;
}


================================================
FILE: flutter/sherpa_onnx/lib/src/version.dart
================================================
// Copyright (c)  2025  Xiaomi Corporation
import 'dart:ffi';
import 'package:ffi/ffi.dart';
import './sherpa_onnx_bindings.dart';

/// Return the sherpa-onnx version string compiled into the native library.
String getVersion() {
  Pointer<Utf8> version = SherpaOnnxBindings.getVersionStr?.call() ?? nullptr;
  if (version == nullptr) {
    return '';
  }

  return version.toDartString();
}

/// Return the Git SHA1 of the native library build.
String getGitSha1() {
  Pointer<Utf8> gitSha1 = SherpaOnnxBindings.getGitSha1?.call() ?? nullptr;
  if (gitSha1 == nullptr) {
    return '';
  }

  return gitSha1.toDartString();
}

/// Return the Git date of the native library build.
String getGitDate() {
  Pointer<Utf8> gitDate = SherpaOnnxBindings.getGitDate?.call() ?? nullptr;
  if (gitDate == nullptr) {
    return '';
  }

  return gitDate.toDartString();
}


================================================
FILE: flutter/sherpa_onnx/lib/src/wave_reader.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';

/// Audio samples loaded from a WAV file.
///
/// Samples are normalized to the range `[-1, 1]` and are stored as mono
/// `Float32List` PCM data.
class WaveData {
  WaveData({required this.samples, required this.sampleRate});

  /// normalized to [-1, 1]
  Float32List samples;
  int sampleRate;
}

/// Read a WAV file from disk.
///
/// Returns an empty [WaveData] object if the file cannot be read or decoded.
WaveData readWave(String filename) {
  final Pointer<Utf8> str = filename.toNativeUtf8();

  if (SherpaOnnxBindings.readWave == null) {
    throw Exception("Please initialize sherpa-onnx first");
  }

  Pointer<SherpaOnnxWave> wave =
      SherpaOnnxBindings.readWave?.call(str) ?? nullptr;
  calloc.free(str);

  if (wave == nullptr) {
    return WaveData(samples: Float32List(0), sampleRate: 0);
  }

  final samples = wave.ref.samples.asTypedList(wave.ref.numSamples);

  final newSamples = Float32List.fromList(samples);
  int sampleRate = wave.ref.sampleRate;
  SherpaOnnxBindings.freeWave?.call(wave);

  return WaveData(samples: newSamples, sampleRate: sampleRate);
}


================================================
FILE: flutter/sherpa_onnx/lib/src/wave_writer.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';

import './sherpa_onnx_bindings.dart';

/// Write normalized mono PCM samples to a WAV file.
///
/// Returns `true` on success and `false` otherwise. This is commonly used with
/// samples returned from TTS, VAD pipelines, or speech denoisers.
bool writeWave(
    {required String filename,
    required Float32List samples,
    required int sampleRate}) {
  final Pointer<Utf8> filenamePtr = filename.toNativeUtf8();

  final n = samples.length;
  final Pointer<Float> p = calloc<Float>(n);

  final pList = p.asTypedList(n);
  pList.setAll(0, samples);

  if (SherpaOnnxBindings.writeWave == null) {
    throw Exception("Please initialize sherpa-onnx first");
  }

  int ok =
      SherpaOnnxBindings.writeWave?.call(p, n, sampleRate, filenamePtr) ?? 0;

  calloc.free(p);
  calloc.free(filenamePtr);

  return ok == 1;
}


================================================
FILE: flutter/sherpa_onnx/pubspec.yaml
================================================
name: sherpa_onnx

description: >
  Speech recognition, speech synthesis, speaker diarization, and speaker recognition
  using next-gen Kaldi with onnxruntime without Internet connection.

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues
documentation: https://k2-fsa.github.io/sherpa/onnx/

topics:
  - speech-recognition
  - speech-synthesis
  - speaker-diarization
  - audio-tagging
  - voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
version: 1.12.31

homepage: https://github.com/k2-fsa/sherpa-onnx

environment:
  sdk: ">=3.1.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  ffi: ^2.1.0
  flutter:
    sdk: flutter

  sherpa_onnx_android: ^1.12.31
  # sherpa_onnx_android:
  #   path: ../sherpa_onnx_android

  sherpa_onnx_macos: ^1.12.31
  # sherpa_onnx_macos:
  #   path: ../sherpa_onnx_macos

  sherpa_onnx_linux: ^1.12.31
  # sherpa_onnx_linux:
  #   path: ../sherpa_onnx_linux

  sherpa_onnx_windows: ^1.12.31
  # sherpa_onnx_windows:
  #   path: ../sherpa_onnx_windows

  sherpa_onnx_ios: ^1.12.31
  # sherpa_onnx_ios:
  #   path: ../sherpa_onnx_ios

dev_dependencies:
  flutter_lints: ^3.0.0

flutter:
  plugin:
    platforms:
      android:
        default_package: sherpa_onnx_android

      ios:
        default_package: sherpa_onnx_ios

      macos:
        default_package: sherpa_onnx_macos

      linux:
        default_package: sherpa_onnx_linux

      windows:
        default_package: sherpa_onnx_windows


================================================
FILE: flutter/sherpa_onnx_android/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
# Libraries should not include pubspec.lock, per https://dart.dev/guides/libraries/private-files#pubspeclock.
/pubspec.lock
**/doc/api/
.dart_tool/
build/


================================================
FILE: flutter/sherpa_onnx_android/.metadata
================================================
# This file tracks properties of this Flutter project.
# Used by Flutter tool to assess capabilities and perform upgrades etc.
#
# This file should be version controlled and should not be manually edited.

version:
  revision: "5dcb86f68f239346676ceb1ed1ea385bd215fba1"
  channel: "stable"

project_type: plugin_ffi

# Tracks metadata for the flutter migrate command
migration:
  platforms:
    - platform: root
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: android
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1

  # User provided section

  # List of Local paths (relative to this file) that should be
  # ignored by the migrate tool.
  #
  # Files that are not part of the templates will be ignored by default.
  unmanaged_files:
    - 'lib/main.dart'
    - 'ios/Runner.xcodeproj/project.pbxproj'


================================================
FILE: flutter/sherpa_onnx_android/README.md
================================================
# sherpa_onnx_android

This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).

You are not expected to use this package directly.

Please see the entry point at <https://pub.dev/packages/sherpa_onnx>.


================================================
FILE: flutter/sherpa_onnx_android/analysis_options.yaml
================================================
include: package:flutter_lints/flutter.yaml

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter/sherpa_onnx_android/android/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/workspace.xml
/.idea/libraries
.DS_Store
/build
/captures
.cxx


================================================
FILE: flutter/sherpa_onnx_android/android/build.gradle
================================================
// The Android Gradle Plugin builds the native code with the Android NDK.

group = "com.k2fsa.sherpa.onnx.sherpa_onnx_android"
version = "1.0"

buildscript {
    repositories {
        google()
        mavenCentral()
    }

    dependencies {
        // The Android Gradle Plugin knows how to build native code with the NDK.
        classpath("com.android.tools.build:gradle:7.3.0")
    }
}

rootProject.allprojects {
    repositories {
        google()
        mavenCentral()
    }
}

apply plugin: "com.android.library"

android {
    namespace 'com.k2fsa.sherpa.onnx'

    // Bumping the plugin compileSdk version requires all clients of this plugin
    // to bump the version in their app.
    compileSdk = 34

    // Use the NDK version
    // declared in /android/app/build.gradle file of the Flutter project.
    // Replace it with a version number if this plugin requires a specific NDK version.
    // (e.g. ndkVersion "23.1.7779620")
    ndkVersion = android.ndkVersion

    compileOptions {
        sourceCompatibility = JavaVersion.VERSION_1_8
        targetCompatibility = JavaVersion.VERSION_1_8
    }

    defaultConfig {
        minSdk = 21
    }
}


================================================
FILE: flutter/sherpa_onnx_android/android/settings.gradle
================================================
rootProject.name = 'sherpa_onnx_android'


================================================
FILE: flutter/sherpa_onnx_android/android/src/main/AndroidManifest.xml
================================================
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
  package="com.k2fsa.sherpa.onnx">
</manifest>


================================================
FILE: flutter/sherpa_onnx_android/android/src/main/jniLibs/README.md
================================================
# Introduction

Pre-built libs are not checked-in.

Please use

 - https://github.com/k2-fsa/sherpa-onnx/blob/master/build-android-arm64-v8a.sh
 - https://github.com/k2-fsa/sherpa-onnx/blob/master/build-android-armv7-eabi.sh
 - https://github.com/k2-fsa/sherpa-onnx/blob/master/build-android-x86-64.sh
 - https://github.com/k2-fsa/sherpa-onnx/blob/master/build-android-x86.sh

The following is an example for `arm64-v8a`:

```bash
git clone https://github.com/k2-fsa/sherpa-onnx
cd sherpa-onnx

export SHERPA_ONNX_ENABLE_JNI=OFF
export SHERPA_ONNX_ENABLE_C_API=ON
./build-android-arm64-v8a.sh

cp -v build-android-arm64-v8a/install/lib/*.so flutter/sherpa_onnx_android/android/src/main/jniLibs/arm64-v8a/
```


================================================
FILE: flutter/sherpa_onnx_android/android/src/main/jniLibs/arm64-v8a/.gitkeep
================================================


================================================
FILE: flutter/sherpa_onnx_android/android/src/main/jniLibs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: flutter/sherpa_onnx_android/android/src/main/jniLibs/x86/.gitkeep
================================================


================================================
FILE: flutter/sherpa_onnx_android/android/src/main/jniLibs/x86_64/.gitkeep
================================================


================================================
FILE: flutter/sherpa_onnx_android/lib/.gitkeep
================================================


================================================
FILE: flutter/sherpa_onnx_android/lib/README.md
================================================
# Introduction

This directory is left empty intentionally.


================================================
FILE: flutter/sherpa_onnx_android/pubspec.yaml
================================================
name: sherpa_onnx_android

description: >
  Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
  with onnxruntime without Internet connection.

version: 0.0.1

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues
documentation: https://k2-fsa.github.io/sherpa/onnx/

homepage: https://github.com/k2-fsa/sherpa-onnx

topics:
  - speech-recognition
  - speech-synthesis
  - speaker-identification
  - audio-tagging
  - voice-activity-detection

environment:
  sdk: ">=3.0.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  flutter:
    sdk: flutter

flutter:
  plugin:
    platforms:
      android:
        ffiPlugin: true


================================================
FILE: flutter/sherpa_onnx_ios/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
# Libraries should not include pubspec.lock, per https://dart.dev/guides/libraries/private-files#pubspeclock.
/pubspec.lock
**/doc/api/
.dart_tool/
build/


================================================
FILE: flutter/sherpa_onnx_ios/.metadata
================================================
# This file tracks properties of this Flutter project.
# Used by Flutter tool to assess capabilities and perform upgrades etc.
#
# This file should be version controlled and should not be manually edited.

version:
  revision: "5dcb86f68f239346676ceb1ed1ea385bd215fba1"
  channel: "stable"

project_type: plugin_ffi

# Tracks metadata for the flutter migrate command
migration:
  platforms:
    - platform: root
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: ios
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1

  # User provided section

  # List of Local paths (relative to this file) that should be
  # ignored by the migrate tool.
  #
  # Files that are not part of the templates will be ignored by default.
  unmanaged_files:
    - 'lib/main.dart'
    - 'ios/Runner.xcodeproj/project.pbxproj'


================================================
FILE: flutter/sherpa_onnx_ios/README.md
================================================
# sherpa_onnx_ios

This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).

You are not expected to use this package directly.

Please see the entry point at <https://pub.dev/packages/sherpa_onnx>.


================================================
FILE: flutter/sherpa_onnx_ios/analysis_options.yaml
================================================
include: package:flutter_lints/flutter.yaml

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
================================================
#
# To learn more about a Podspec see http://guides.cocoapods.org/syntax/podspec.html.
# Run `pod lib lint sherpa_onnx_ios.podspec` to validate before publishing.
#
# See also
# https://github.com/google/webcrypto.dart/blob/2010361a106d7a872d90e3dfebfed250e2ede609/ios/webcrypto.podspec#L23-L28
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod::Spec.new do |s|
  s.name             = 'sherpa_onnx_ios'
  s.version          = '1.12.31'
  s.summary          = 'A new Flutter FFI plugin project.'
  s.description      = <<-DESC
A new Flutter FFI plugin project.
                       DESC
  s.homepage         = 'https://github.com/k2-fsa/sherpa-onnx'
  s.license          = { :file => '../LICENSE' }
  s.author           = { 'Fangjun Kuang' => 'csukuangfj@gmail.com' }

  # This will ensure the source files in Classes/ are included in the native
  # builds of apps using this FFI plugin. Podspec does not support relative
  # paths, so Classes contains a forwarder C file that relatively imports
  # `../src/*` so that the C sources can be shared among all target platforms.
  s.source           = { :path => '.' }
  s.dependency 'Flutter'
  s.platform = :ios, '13.0'
  s.preserve_paths = 'sherpa_onnx.xcframework/**/*'
  s.vendored_frameworks = 'sherpa_onnx.xcframework'

  # Flutter.framework does not contain a i386 slice.
  s.pod_target_xcconfig = {
    'DEFINES_MODULE' => 'YES', 'EXCLUDED_ARCHS[sdk=iphonesimulator*]' => 'i386'
    }
  s.swift_version = '5.0'
end


================================================
FILE: flutter/sherpa_onnx_ios/lib/README.md
================================================
# Introduction

This directory is left empty intentionally.


================================================
FILE: flutter/sherpa_onnx_ios/pubspec.yaml
================================================
name: sherpa_onnx_ios

description: >
  Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
  with onnxruntime without Internet connection.

version: 0.0.1

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues
documentation: https://k2-fsa.github.io/sherpa/onnx/

homepage: https://github.com/k2-fsa/sherpa-onnx

topics:
  - speech-recognition
  - speech-synthesis
  - speaker-identification
  - audio-tagging
  - voice-activity-detection

environment:
  sdk: ">=3.0.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  flutter:
    sdk: flutter

flutter:
  plugin:
    platforms:
      ios:
        ffiPlugin: true


================================================
FILE: flutter/sherpa_onnx_linux/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
# Libraries should not include pubspec.lock, per https://dart.dev/guides/libraries/private-files#pubspeclock.
/pubspec.lock
**/doc/api/
.dart_tool/
build/


================================================
FILE: flutter/sherpa_onnx_linux/.metadata
================================================
# This file tracks properties of this Flutter project.
# Used by Flutter tool to assess capabilities and perform upgrades etc.
#
# This file should be version controlled and should not be manually edited.

version:
  revision: "5dcb86f68f239346676ceb1ed1ea385bd215fba1"
  channel: "stable"

project_type: plugin_ffi

# Tracks metadata for the flutter migrate command
migration:
  platforms:
    - platform: root
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: linux
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1

  # User provided section

  # List of Local paths (relative to this file) that should be
  # ignored by the migrate tool.
  #
  # Files that are not part of the templates will be ignored by default.
  unmanaged_files:
    - 'lib/main.dart'
    - 'ios/Runner.xcodeproj/project.pbxproj'


================================================
FILE: flutter/sherpa_onnx_linux/README.md
================================================
# sherpa_onnx_linux

This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).

You are not expected to use this package directly.

Please see the entry point at <https://pub.dev/packages/sherpa_onnx>.


================================================
FILE: flutter/sherpa_onnx_linux/analysis_options.yaml
================================================
include: package:flutter_lints/flutter.yaml

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter/sherpa_onnx_linux/lib/.gitkeep
================================================


================================================
FILE: flutter/sherpa_onnx_linux/lib/README.md
================================================
# Introduction

This directory is left empty intentionally.


================================================
FILE: flutter/sherpa_onnx_linux/linux/CMakeLists.txt
================================================
# The Flutter tooling requires that developers have CMake 3.10 or later
# installed. You should not increase this version, as doing so will cause
# the plugin to fail to compile for some customers of the plugin.
cmake_minimum_required(VERSION 3.10)

# Project-level configuration.
set(PROJECT_NAME "sherpa_onnx_linux")
project(${PROJECT_NAME} LANGUAGES CXX)

if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
  set(LIB_ARCH_DIR "x64")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
  set(LIB_ARCH_DIR "aarch64")
else()
  message(FATAL_ERROR "Unsupported arch: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

# List of absolute paths to libraries that should be bundled with the plugin.
# This list could contain prebuilt libraries, or libraries created by an
# external build triggered from this build file.
set(sherpa_onnx_linux_bundled_libraries
  "${CMAKE_CURRENT_SOURCE_DIR}/${LIB_ARCH_DIR}/libsherpa-onnx-c-api.so"
  "${CMAKE_CURRENT_SOURCE_DIR}/${LIB_ARCH_DIR}/libonnxruntime.so"
  PARENT_SCOPE
)


================================================
FILE: flutter/sherpa_onnx_linux/linux/README.md
================================================
# Introduction

`*.so` files are generated dynamically using GitHub actions during a new release.

We don't check-in pre-built library files into git.


================================================
FILE: flutter/sherpa_onnx_linux/linux/aarch64/.gitikeep
================================================


================================================
FILE: flutter/sherpa_onnx_linux/linux/x64/.gitikeep
================================================


================================================
FILE: flutter/sherpa_onnx_linux/pubspec.yaml
================================================
name: sherpa_onnx_linux
description: >
  Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
  with onnxruntime without Internet connection.

version: 0.0.1

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues
documentation: https://k2-fsa.github.io/sherpa/onnx/

homepage: https://github.com/k2-fsa/sherpa-onnx

topics:
  - speech-recognition
  - speech-synthesis
  - speaker-identification
  - audio-tagging
  - voice-activity-detection

environment:
  sdk: ">=3.0.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  flutter:
    sdk: flutter

flutter:
  plugin:
    platforms:
      linux:
        ffiPlugin: true


================================================
FILE: flutter/sherpa_onnx_macos/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
# Libraries should not include pubspec.lock, per https://dart.dev/guides/libraries/private-files#pubspeclock.
/pubspec.lock
**/doc/api/
.dart_tool/
build/


================================================
FILE: flutter/sherpa_onnx_macos/.metadata
================================================
# This file tracks properties of this Flutter project.
# Used by Flutter tool to assess capabilities and perform upgrades etc.
#
# This file should be version controlled and should not be manually edited.

version:
  revision: "5dcb86f68f239346676ceb1ed1ea385bd215fba1"
  channel: "stable"

project_type: plugin_ffi

# Tracks metadata for the flutter migrate command
migration:
  platforms:
    - platform: root
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: macos
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1

  # User provided section

  # List of Local paths (relative to this file) that should be
  # ignored by the migrate tool.
  #
  # Files that are not part of the templates will be ignored by default.
  unmanaged_files:
    - 'lib/main.dart'
    - 'ios/Runner.xcodeproj/project.pbxproj'


================================================
FILE: flutter/sherpa_onnx_macos/README.md
================================================
# sherpa_onnx_macos

This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).

You are not expected to use this package directly.

Please see the entry point at <https://pub.dev/packages/sherpa_onnx>.


================================================
FILE: flutter/sherpa_onnx_macos/analysis_options.yaml
================================================
include: package:flutter_lints/flutter.yaml

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter/sherpa_onnx_macos/lib/.gitkeep
================================================


================================================
FILE: flutter/sherpa_onnx_macos/lib/README.md
================================================
# Introduction

This directory is left empty intentionally.


================================================
FILE: flutter/sherpa_onnx_macos/macos/README.md
================================================
# Introduction

`*.dylib` files are generated dynamically using GitHub actions during a new release.

We don't check-in pre-built library files into git.


================================================
FILE: flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
================================================
#
# To learn more about a Podspec see http://guides.cocoapods.org/syntax/podspec.html.
# Run `pod lib lint sherpa_onnx_macos.podspec` to validate before publishing.
#
Pod::Spec.new do |s|
  s.name             = 'sherpa_onnx_macos'
  s.version          = '1.12.31'
  s.summary          = 'sherpa-onnx Flutter FFI plugin project.'
  s.description      = <<-DESC
sherpa-onnx Flutter FFI plugin project.
                       DESC
  s.homepage         = 'https://github.com/k2-fsa/sherpa-onnx'
  s.license          = { :file => '../LICENSE' }
  s.author           = { 'Fangjun Kuang' => 'csukuangfj@gmail.com' }

  # This will ensure the source files in Classes/ are included in the native
  # builds of apps using this FFI plugin. Podspec does not support relative
  # paths, so Classes contains a forwarder C file that relatively imports
  # `../src/*` so that the C sources can be shared among all target platforms.
  s.source           = { :path => '.' }
  s.dependency 'FlutterMacOS'
  s.vendored_libraries = '*.dylib'

  s.platform = :osx, '10.11'
  s.pod_target_xcconfig = { 'DEFINES_MODULE' => 'YES' }
  s.swift_version = '5.0'
end


================================================
FILE: flutter/sherpa_onnx_macos/pubspec.yaml
================================================
name: sherpa_onnx_macos

description: >
  Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
  with onnxruntime without Internet connection.

version: 0.0.1

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues
documentation: https://k2-fsa.github.io/sherpa/onnx/

homepage: https://github.com/k2-fsa/sherpa-onnx

topics:
  - speech-recognition
  - speech-synthesis
  - speaker-identification
  - audio-tagging
  - voice-activity-detection

environment:
  sdk: ">=3.0.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  flutter:
    sdk: flutter

flutter:
  plugin:
    platforms:
      macos:
        ffiPlugin: true


================================================
FILE: flutter/sherpa_onnx_windows/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
# Libraries should not include pubspec.lock, per https://dart.dev/guides/libraries/private-files#pubspeclock.
/pubspec.lock
**/doc/api/
.dart_tool/
build/


================================================
FILE: flutter/sherpa_onnx_windows/.metadata
================================================
# This file tracks properties of this Flutter project.
# Used by Flutter tool to assess capabilities and perform upgrades etc.
#
# This file should be version controlled and should not be manually edited.

version:
  revision: "5dcb86f68f239346676ceb1ed1ea385bd215fba1"
  channel: "stable"

project_type: plugin_ffi

# Tracks metadata for the flutter migrate command
migration:
  platforms:
    - platform: root
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: windows
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1

  # User provided section

  # List of Local paths (relative to this file) that should be
  # ignored by the migrate tool.
  #
  # Files that are not part of the templates will be ignored by default.
  unmanaged_files:
    - 'lib/main.dart'
    - 'ios/Runner.xcodeproj/project.pbxproj'


================================================
FILE: flutter/sherpa_onnx_windows/README.md
================================================
# sherpa_onnx_windows

This is a sub project of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).

You are not expected to use this package directly.

Please see the entry point at <https://pub.dev/packages/sherpa_onnx>.


================================================
FILE: flutter/sherpa_onnx_windows/analysis_options.yaml
================================================
include: package:flutter_lints/flutter.yaml

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter/sherpa_onnx_windows/lib/.gitkeep
================================================


================================================
FILE: flutter/sherpa_onnx_windows/lib/README.md
================================================
# Introduction

This directory is left empty intentionally.


================================================
FILE: flutter/sherpa_onnx_windows/pubspec.yaml
================================================
name: sherpa_onnx_windows

description: >
  Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
  with onnxruntime without Internet connection.

version: 0.0.1

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues
documentation: https://k2-fsa.github.io/sherpa/onnx/

homepage: https://github.com/k2-fsa/sherpa-onnx

topics:
  - speech-recognition
  - speech-synthesis
  - speaker-identification
  - audio-tagging
  - voice-activity-detection

environment:
  sdk: ">=3.0.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  flutter:
    sdk: flutter

flutter:
  plugin:
    platforms:
      windows:
        ffiPlugin: true


================================================
FILE: flutter-examples/.gitignore
================================================
# Do not remove or rename entries in this file, only add new ones
# See https://github.com/flutter/flutter/issues/128635 for more context.

# Miscellaneous
*.class
*.lock
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# Visual Studio Code related
.classpath
.project
.settings/
.vscode/*

# Flutter repo-specific
/bin/cache/
/bin/internal/bootstrap.bat
/bin/internal/bootstrap.sh
/bin/mingit/
/dev/benchmarks/mega_gallery/
/dev/bots/.recipe_deps
/dev/bots/android_tools/
/dev/devicelab/ABresults*.json
/dev/docs/doc/
/dev/docs/api_docs.zip
/dev/docs/flutter.docs.zip
/dev/docs/lib/
/dev/docs/pubspec.yaml
/dev/integration_tests/**/xcuserdata
/dev/integration_tests/**/Pods
/packages/flutter/coverage/
version
analysis_benchmark.json

# packages file containing multi-root paths
.packages.generated

# Flutter/Dart/Pub related
**/doc/api/
.dart_tool/
.flutter-plugins
.flutter-plugins-dependencies
**/generated_plugin_registrant.dart
.packages
.pub-preload-cache/
.pub-cache/
.pub/
build/
flutter_*.png
linked_*.ds
unlinked.ds
unlinked_spec.ds

# Android related
**/android/**/gradle-wrapper.jar
.gradle/
**/android/captures/
**/android/gradlew
**/android/gradlew.bat
**/android/local.properties
**/android/**/GeneratedPluginRegistrant.java
**/android/key.properties
*.jks

# iOS/XCode related
**/ios/**/*.mode1v3
**/ios/**/*.mode2v3
**/ios/**/*.moved-aside
**/ios/**/*.pbxuser
**/ios/**/*.perspectivev3
**/ios/**/*sync/
**/ios/**/.sconsign.dblite
**/ios/**/.tags*
**/ios/**/.vagrant/
**/ios/**/DerivedData/
**/ios/**/Icon?
**/ios/**/Pods/
**/ios/**/.symlinks/
**/ios/**/profile
**/ios/**/xcuserdata
**/ios/.generated/
**/ios/Flutter/.last_build_id
**/ios/Flutter/App.framework
**/ios/Flutter/Flutter.framework
**/ios/Flutter/Flutter.podspec
**/ios/Flutter/Generated.xcconfig
**/ios/Flutter/ephemeral
**/ios/Flutter/app.flx
**/ios/Flutter/app.zip
**/ios/Flutter/flutter_assets/
**/ios/Flutter/flutter_export_environment.sh
**/ios/ServiceDefinitions.json
**/ios/Runner/GeneratedPluginRegistrant.*

# macOS
**/Flutter/ephemeral/
**/Pods/
**/macos/Flutter/GeneratedPluginRegistrant.swift
**/macos/Flutter/ephemeral
**/xcuserdata/

# Windows
**/windows/flutter/generated_plugin_registrant.cc
**/windows/flutter/generated_plugin_registrant.h
**/windows/flutter/generated_plugins.cmake

# Linux
**/linux/flutter/generated_plugin_registrant.cc
**/linux/flutter/generated_plugin_registrant.h
**/linux/flutter/generated_plugins.cmake

# Coverage
coverage/

# Symbols
app.*.symbols

# Exceptions to above rules.
!**/ios/**/default.mode1v3
!**/ios/**/default.mode2v3
!**/ios/**/default.pbxuser
!**/ios/**/default.perspectivev3
!/packages/flutter_tools/test/data/dart_dependencies_test/**/.packages
!/dev/ci/**/Gemfile.lock
!.vscode/settings.json
Podfile


================================================
FILE: flutter-examples/README.md
================================================
# Introduction

This directory contains flutter examples of `sherpa-onnx`.

| Directory | Pre-built App |
|-----------|---------------|
|[./tts](./tts)|[URL](https://k2-fsa.github.io/sherpa/onnx/flutter/pre-built-app.html#text-to-speech-tts-speech-synthesis)|
|[./streaming_asr](./streaming_asr)|[URL](https://k2-fsa.github.io/sherpa/onnx/flutter/pre-built-app.html#streaming-speech-recognition-stt-asr)|

# Ways to create an example
```bash
flutter create --platforms windows,macos streaming_asr
cd streaming_asr
flutter pub get

# to support a new platform, e.g., android, use

cd streaming_asr
flutter create --platforms --org com.k2fsa.sherpa.onnx android ./

# To add linux
flutter config --enable-linux-desktop
flutter create --platforms=linux .
```

To run with android, first use
```
(py38) fangjuns-MacBook-Pro:streaming_asr fangjun$ flutter run devices
No devices found yet. Checking for wireless devices...

No supported devices found with name or id matching 'android-arm64'.

The following devices were found:
Mi 10 (mobile)  • 61106679 • android-arm64  • Android 12 (API 31)
macOS (desktop) • macos    • darwin-x64     • macOS 13.1 22C65 darwin-x64
Chrome (web)    • chrome   • web-javascript • Google Chrome 126.0.6478.127
```
to find available devices. I have attached my Android phone (Xiaomi 10) to my computer
and it shows the device ID of my Android phone is `61106679`, so I use

```bash
(py38) fangjuns-MacBook-Pro:streaming_asr fangjun$ flutter run -d 61106679
```

to run it.

If you get the following errors and hint:

```
BUILD FAILED in 2m 43s
Running Gradle task 'assembleDebug'...                            165.3s

┌─ Flutter Fix ───────────────────────────────────────────────────────────────────────────────────────────────────┐
│ The plugin record_android requires a higher Android SDK version.                                                │
│ Fix this issue by adding the following to the file                                                              │
│ /Users/fangjun/open-source/sherpa-onnx/flutter-examples/streaming_asr/android/app/build.gradle:                 │
│ android {                                                                                                       │
│   defaultConfig {                                                                                               │
│     minSdkVersion 23                                                                                            │
│   }                                                                                                             │
│ }                                                                                                               │
│                                                                                                                 │
│                                                                                                                 │
│ Following this change, your app will not be available to users running Android SDKs below 23.                   │
│ Consider searching for a version of this plugin that supports these lower versions of the Android SDK instead.  │
│ For more information, see: https://docs.flutter.dev/deployment/android#reviewing-the-gradle-build-configuration │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
Error: Gradle task assembleDebug failed with exit code 1
```

Please use the following changes:

```diff
--- a/flutter-examples/streaming_asr/android/app/build.gradle
+++ b/flutter-examples/streaming_asr/android/app/build.gradle
@@ -38,7 +38,7 @@ android {
         applicationId = "com.k2fsa.sherpa.onnx.streaming_asr"
         // You can update the following values to match your application needs.
         // For more information, see: https://docs.flutter.dev/deployment/android#reviewing-the-gradle-build-configuration.
-        minSdk = flutter.minSdkVersion
+        minSdk = 23
         targetSdk = flutter.targetSdkVersion
         versionCode = flutterVersionCode.toInteger()
         versionName = flutterVersionName
```

If you get the following errors:

```
Launching lib/main.dart on Mi 10 in debug mode...
ERROR:/Users/fangjun/open-source/sherpa-onnx/flutter-examples/streaming_asr/build/record_android/intermediates/runtime_library_classes_jar/debug/clas
ses.jar: D8: com.android.tools.r8.internal.Hc: Sealed classes are not supported as program classes

FAILURE: Build failed with an exception.

* What went wrong:
Execution failed for task ':app:mergeLibDexDebug'.
> Could not resolve all files for configuration ':app:debugRuntimeClasspath'.
   > Failed to transform classes.jar (project :record_android) to match attributes {artifactType=android-dex, asm-transformed-variant=NONE, com.andro
id.build.api.attributes.AgpVersionAttr=7.3.0, com.android.build.api.attributes.BuildTypeAttr=debug, com.android.build.gradle.internal.attributes.Vari
antAttr=debug, dexing-enable-desugaring=true, dexing-enable-jacoco-instrumentation=false, dexing-is-debuggable=true, dexing-min-sdk=23, org.gradle.ca
tegory=library, org.gradle.jvm.environment=android, org.gradle.libraryelements=jar, org.gradle.usage=java-runtime, org.jetbrains.kotlin.platform.type
=androidJvm}.
      > Execution failed for DexingWithClasspathTransform: /Users/fangjun/open-source/sherpa-onnx/flutter-examples/streaming_asr/build/record_android
/intermediates/runtime_library_classes_jar/debug/classes.jar.
         > Error while dexing.

* Try:
> Run with --stacktrace option to get the stack trace.
> Run with --info or --debug option to get more log output.
> Run with --scan to get full insights.

* Get more help at https://help.gradle.org

BUILD FAILED in 2m 10s
```

Please refer to <https://github.com/llfbandit/record/blob/master/record_android/README.md>
to make the following changes

```diff
diff --git a/flutter-examples/streaming_asr/android/settings.gradle b/flutter-examples/streaming_asr/android/settings.gradle
index 536165d3..9b1a1012 100644
--- a/flutter-examples/streaming_asr/android/settings.gradle
+++ b/flutter-examples/streaming_asr/android/settings.gradle
@@ -18,7 +18,7 @@ pluginManagement {

 plugins {
     id "dev.flutter.flutter-plugin-loader" version "1.0.0"
-    id "com.android.application" version "7.3.0" apply false
+    id "com.android.application" version "7.4.2" apply false
     id "org.jetbrains.kotlin.android" version "1.7.10" apply false
 }
```

# ios

To support ios, run

```bash
cd streaming_asr
flutter create --platforms ios ./
```

Connect your iPhone to the computer, and run `flutter devices`, which will print:

```bash
Found 4 connected devices:
  iPhone 14 (mobile) • 634110C4-168D-408F-A938-D7FC62222579 • ios            • com.apple.CoreSimulator.SimRuntime.iOS-16-2 (simulator)
  iPhone (mobile)    • 00008030-001064212E85802E            • ios            • iOS 16.3 20D47
  macOS (desktop)    • macos                                • darwin-x64     • macOS 13.1 22C65 darwin-x64
  Chrome (web)       • chrome                               • web-javascript • Google Chrome 126.0.6478.127

No wireless devices were found.

Run "flutter emulators" to list and start any available device emulators.
(E.g., flutter emulators --launch ios)

If you expected another device to be detected, please run "flutter doctor" to diagnose potential issues. You may also try increasing the time to wait
for connected devices with the "--device-timeout" flag. Visit https://flutter.dev/setup/ for troubleshooting tips.
```

Then run

```bash
flutter run -d 00008030-001064212E85802E
```

It will show:
```
Launching lib/main.dart on iPhone in debug mode...
════════════════════════════════════════════════════════════════════════════════
No valid code signing certificates were found
You can connect to your Apple Developer account by signing in with your Apple ID
in Xcode and create an iOS Development Certificate as well as a Provisioning
Profile for your project by:
  1- Open the Flutter project's Xcode target with
       open ios/Runner.xcworkspace
  2- Select the 'Runner' project in the navigator then the 'Runner' target
     in the project settings
  3- Make sure a 'Development Team' is selected under Signing & Capabilities > Team.
     You may need to:
         - Log in with your Apple ID in Xcode first
         - Ensure you have a valid unique Bundle ID
         - Register your device with your Apple Developer Account
         - Let Xcode automatically provision a profile for your app
  4- Build or run your project again
  5- Trust your newly created Development Certificate on your iOS device
     via Settings > General > Device Management > [your new certificate] > Trust

For more information, please visit:
  https://developer.apple.com/library/content/documentation/IDEs/Conceptual/
  AppDistributionGuide/MaintainingCertificates/MaintainingCertificates.html

Or run on an iOS simulator without code signing
════════════════════════════════════════════════════════════════════════════════
Error: No development certificates available to code sign app for device deployment
```

Follow the above instructions.

The following is a screenshot.

![](./ios-demo-1.jpg)

Then close `xcode` and run again

```bash
flutter run -d 00008030-001064212E85802E
```

You would get the following errors:
```
Error (Xcode): Undefined symbol: ___cxa_pure_virtual


Error (Xcode): Undefined symbol: ___cxa_throw


Error (Xcode): Undefined symbol: ___gxx_personality_v0


Error launching application on iPhone.
```

Make the following changes:

```diff
diff --git a/flutter-examples/streaming_asr/ios/Runner.xcodeproj/project.pbxproj b/flutter-examples/streaming_asr/ios/Runner.xcodeproj/project.pbxproj
index b208c7e9..466b0afb 100644
--- a/flutter-examples/streaming_asr/ios/Runner.xcodeproj/project.pbxproj
+++ b/flutter-examples/streaming_asr/ios/Runner.xcodeproj/project.pbxproj
@@ -482,6 +482,7 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
 				SWIFT_VERSION = 5.0;
+				OTHER_LDFLAGS = "-lc++";
 				VERSIONING_SYSTEM = "apple-generic";
 			};
 			name = Profile;
@@ -500,6 +501,7 @@
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 5.0;
+				OTHER_LDFLAGS = "-lc++";
 				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
 			};
 			name = Debug;
@@ -516,6 +518,7 @@
 				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.streamingAsr.RunnerTests;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_VERSION = 5.0;
+				OTHER_LDFLAGS = "-lc++";
 				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
 			};
 			name = Release;
@@ -532,6 +535,7 @@
 				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.streamingAsr.RunnerTests;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_VERSION = 5.0;
+				OTHER_LDFLAGS = "-lc++";
 				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
 			};
 			name = Profile;
@@ -666,6 +670,7 @@
 				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 5.0;
+				OTHER_LDFLAGS = "-lc++";
 				VERSIONING_SYSTEM = "apple-generic";
 			};
 			name = Debug;
@@ -688,6 +693,7 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
 				SWIFT_VERSION = 5.0;
+				OTHER_LDFLAGS = "-lc++";
 				VERSIONING_SYSTEM = "apple-generic";
 			};
 			name = Release;
```

Then re-run

```bash
flutter run -d 00008030-001064212E85802E
```

Finally, it shows the following:

```
Launching lib/main.dart on iPhone in debug mode...
Automatically signing iOS for device deployment using specified development team in Xcode project: N5ZH3Z63A6
Running Xcode build...
 └─Compiling, linking and signing...                         9.0s
Xcode build done.                                           25.6s
(lldb) 2024-07-06 17:43:54.970077+0800 Runner[4851:965716] [SceneConfiguration] Info.plist contained no UIScene configuration dictionary (looking for configuration named "(no name)")
Warning: Unable to create restoration in progress marker file
fopen failed for data file: errno = 2 (No such file or directory)
Errors found! Invalidating cache...
fopen failed for data file: errno = 2 (No such file or directory)
Errors found! Invalidating cache...
Installing and launching...                                        31.8s
Syncing files to device iPhone...                                1,080ms

Flutter run key commands.
r Hot reload. 🔥🔥🔥
R Hot restart.
h List all available interactive commands.
d Detach (terminate "flutter run" but leave application running).
c Clear the screen
q Quit (terminate the application on the device).

A Dart VM Service on iPhone is available at: http://127.0.0.1:51556/QDn_7CJ2gzk=/
The Flutter DevTools debugger and profiler on iPhone is available at: http://127.0.0.1:9100?uri=http://127.0.0.1:51556/QDn_7CJ2gzk=/
```

If it shows the following log after pressing `start` within the sherpa-onnx APP on your iPhone:

```
[access] This app has crashed because it attempted to access privacy-sensitive data without a usage description.  The app's Info.plist must contain an NSMicrophoneUsageDescription key with a string value explaining to the user how the app uses this data.
```

Please make the following changes
```diff
--- a/flutter-examples/streaming_asr/ios/Runner/Info.plist
+++ b/flutter-examples/streaming_asr/ios/Runner/Info.plist
@@ -2,6 +2,8 @@
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
+       <key>NSMicrophoneUsageDescription</key>
+       <string>Need microphone access for recording speech</string>
        <key>CFBundleDevelopmentRegion</key>
        <string>$(DEVELOPMENT_LANGUAGE)</string>
        <key>CFBundleDisplayName</key>
```

And re-run

```bash
flutter run -d 00008030-001064212E85802E
```

The following are some screenshots of the iOS APP:

|1|2|3|
|---|---|---|
|![](./ios-demo-2.jpg)|![](./ios-demo-3.jpg)|![](./ios-demo-4.jpg)|


**Hint**: If you find that you cannot start the APP on your iPhone after
disconnecting from the computer, please use

```bash
flutter run --release -d 00008030-001064212E85802E
```


================================================
FILE: flutter-examples/andriod-notes.md
================================================
# Note about android

Useful commands

```bash
flutter build apk --split-per-abi --release
```

The above commands print the following:

```
✓ Built build/app/outputs/flutter-apk/app-armeabi-v7a-release.apk (94.8MB)
✓ Built build/app/outputs/flutter-apk/app-arm64-v8a-release.apk (96.1MB)
✓ Built build/app/outputs/flutter-apk/app-x86_64-release.apk (96.9MB)
```

Note that it does not generate APK for `x86`.

```
adb install build/app/outputs/flutter-apk/app-arm64-v8a-release.apk
```


================================================
FILE: flutter-examples/how-tts-is-created.md
================================================
# Introduction

This document describes how the [tts](./tts) folder is created.


```bash
flutter create --platforms windows,macos,linux,android,ios tts
```

It prints the following:

```
Developer identity "Apple Development: xxx@zzz.com (xxxxxxx)" selected for iOS code signing
Creating project tts...
Resolving dependencies in `tts`... (1.3s)
Downloading packages...
Got dependencies in `tts`.
Wrote 122 files.

All done!
You can find general documentation for Flutter at: https://docs.flutter.dev/
Detailed API documentation is available at: https://api.flutter.dev/
If you prefer video documentation, consider: https://www.youtube.com/c/flutterdev

In order to run your application, type:

  $ cd tts
  $ flutter run

Your application code is in tts/lib/main.dart.
```

```
cd tts
flutter pub get
flutter build macos
flutter run -d macos
```


================================================
FILE: flutter-examples/non_streaming_vad_asr/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.build/
.buildlog/
.history
.svn/
.swiftpm/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
**/doc/api/
**/ios/Flutter/.last_build_id
.dart_tool/
.flutter-plugins
.flutter-plugins-dependencies
.pub-cache/
.pub/
/build/

# Symbolication related
app.*.symbols

# Obfuscation related
app.*.map.json

# Android Studio will place build artifacts here
/android/app/debug
/android/app/profile
/android/app/release


================================================
FILE: flutter-examples/non_streaming_vad_asr/README.md
================================================
# Real-time speech recognition by non streaming and VAD

This APP supports the following platforms:

- macOS (tested)

## Getting Started

Follow these steps to download and set up the required models to run the demo successfully.

### 1. Select a non-streaming model

Choose one of the following non-streaming ASR models:

#### Code Available Models:
- **whisper**: Whisper base model
- **senseVoice**: SenseVoice multilingual model (supports Chinese, English, Japanese, Korean, Cantonese)
- **parakeet-tdt**: NeMo transducer-based parakeet-tdt model

#### Model Download Links:
- **whisper**: https://huggingface.co/csukuangfj/sherpa-onnx-whisper-base
- **senseVoice**: https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09  
- **parakeet-tdt**: https://huggingface.co/csukuangfj/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8

### 2. Download VAD Model

Download the VAD (Voice Activity Detection) model from:
https://huggingface.co/csukuangfj/vad

Place the VAD model file (e.g., `silero_vad.onnx`) in the `assets` directory.

### 3. Configure the Model in Code

#### Step 3.1: Update Model Selection
Edit `lib/non_streaming_vad_asr.dart` and set the model type:

```dart
Future<sherpa_onnx.OfflineRecognizer> createOfflineRecognizer() async {
  final type = 2; // 0: whisper, 1: senseVoice, 2: parakeet-tdt
  final modelConfig = await getOfflineModelConfig(type: type);
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  return sherpa_onnx.OfflineRecognizer(config);
}
```

#### Step 3.2: Update Asset Configuration
Edit `pubspec.yaml` and add the appropriate asset directory for your chosen model:

```yaml
flutter:
  assets:
    - assets/
    - assets/whisper/        # For whisper model
    # - assets/senseVoice/    # For senseVoice model (uncomment when using)
    # - assets/nemo_transducer/ # For parakeet-tdt model (uncomment when using)
```

### 4. Directory Structure Setup

#### For whisper model:
```
./assets/
├── whisper/
│   ├── base-decoder.onnx
│   ├── base-encoder.onnx
│   └── base-tokens.txt
└── silero_vad.onnx
```

#### For senseVoice model:
```
./assets/
├── senseVoice/
│   ├── model.int8.onnx
│   └── tokens.txt
└── silero_vad.onnx
```

#### For parakeet-tdt model:
```
./assets/
├── nemo_transducer/
│   ├── encoder.int8.onnx
│   ├── decoder.int8.onnx
│   ├── joiner.int8.onnx
│   └── tokens.txt
└── silero_vad.onnx
```

### 5. Advanced Configuration (Optional)

#### Modify Model Configuration:
You can edit `lib/offline_model.dart` to customize the model configuration, such as model size and quantization settings.

#### Adjust Audio Recording Settings:
In `lib/non_streaming_vad_asr.dart`, you can modify the VAD configuration:

```dart
_vad = sherpa_onnx.VoiceActivityDetector(
  config: _vadConfig, 
  bufferSizeInSeconds: 30  // Adjust based on your needs
);
_buffer = sherpa_onnx.CircularBuffer(capacity: 30 * 16000);
```

### 6. Run the Application

Use the following command to run the app:

```bash
flutter run -d macos
```

## Troubleshooting

- Ensure all model files are placed in the correct directories
- Check that `pubspec.yaml` includes the correct asset paths
- Verify the model type in `non_streaming_vad_asr.dart` matches your chosen model
- Make sure to delete unnecessary files to reduce app size

## Notes

- The VAD model is required for all non-streaming ASR models
- Model performance may vary depending on hardware capabilities
- Adjust buffer sizes and VAD parameters based on your specific use case


================================================
FILE: flutter-examples/non_streaming_vad_asr/analysis_options.yaml
================================================
# This file configures the analyzer, which statically analyzes Dart code to
# check for errors, warnings, and lints.
#
# The issues identified by the analyzer are surfaced in the UI of Dart-enabled
# IDEs (https://dart.dev/tools#ides-and-editors). The analyzer can also be
# invoked from the command line by running `flutter analyze`.

# The following line activates a set of recommended lints for Flutter apps,
# packages, and plugins designed to encourage good coding practices.
include: package:flutter_lints/flutter.yaml

linter:
  # The lint rules applied to this project can be customized in the
  # section below to disable rules from the `package:flutter_lints/flutter.yaml`
  # included above or to enable additional rules. A list of all available lints
  # and their documentation is published at https://dart.dev/lints.
  #
  # Instead of disabling a lint rule for the entire project in the
  # section below, it can also be suppressed for a single line of code
  # or a specific dart file by using the `// ignore: name_of_lint` and
  # `// ignore_for_file: name_of_lint` syntax on the line or in the file
  # producing the lint.
  rules:
    # avoid_print: false  # Uncomment to disable the `avoid_print` rule
    # prefer_single_quotes: true  # Uncomment to enable the `prefer_single_quotes` rule

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter-examples/non_streaming_vad_asr/lib/info.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'package:flutter/material.dart';
import 'package:url_launcher/url_launcher.dart';

class InfoScreen extends StatelessWidget {
  @override
  Widget build(BuildContext context) {
    const double height = 20;
    return Container(
      child: Padding(
        padding: const EdgeInsets.all(8.0),
        child: Column(
          crossAxisAlignment: CrossAxisAlignment.start,
          children: <Widget>[
            Text('Everything is open-sourced.'),
            SizedBox(height: height),
            InkWell(
              child: Text('Code: https://github.com/k2-fsa/sherpa-onnx'),
              onTap: () => launch('https://k2-fsa.github.io/sherpa/onnx/'),
            ),
            SizedBox(height: height),
            InkWell(
              child: Text('Doc: https://k2-fsa.github.io/sherpa/onnx/'),
              onTap: () => launch('https://k2-fsa.github.io/sherpa/onnx/'),
            ),
            SizedBox(height: height),
            Text('QQ 群: 744602236'),
            SizedBox(height: height),
            InkWell(
              child: Text(
                  '微信群: https://k2-fsa.github.io/sherpa/social-groups.html'),
              onTap: () =>
                  launch('https://k2-fsa.github.io/sherpa/social-groups.html'),
            ),
          ],
        ),
      ),
    );
  }
}


================================================
FILE: flutter-examples/non_streaming_vad_asr/lib/main.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'package:flutter/material.dart';

import './non_streaming_vad_asr.dart';
import './info.dart';

void main() {
  runApp(const MyApp());
}

class MyApp extends StatelessWidget {
  const MyApp({super.key});

  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      title: 'Next-gen Kaldi flutter demo',
      theme: ThemeData(
        colorScheme: ColorScheme.fromSeed(seedColor: Colors.deepPurple),
        useMaterial3: true,
      ),
      home: const MyHomePage(title: 'Next-gen Kaldi with Flutter'),
    );
  }
}

class MyHomePage extends StatefulWidget {
  const MyHomePage({super.key, required this.title});

  final String title;

  @override
  State<MyHomePage> createState() => _MyHomePageState();
}

class _MyHomePageState extends State<MyHomePage> {
  int _currentIndex = 0;
  final List<Widget> _tabs = [
    NoStreamingAsrVAdScreen(),
    InfoScreen(),
  ];
  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(
        title: Text(widget.title),
      ),
      body: _tabs[_currentIndex],
      bottomNavigationBar: BottomNavigationBar(
        currentIndex: _currentIndex,
        onTap: (int index) {
          setState(() {
            _currentIndex = index;
          });
        },
        items: [
          BottomNavigationBarItem(
            icon: Icon(Icons.home),
            label: 'Home',
          ),
          BottomNavigationBarItem(
            icon: Icon(Icons.info),
            label: 'Info',
          ),
        ],
      ),
    );
  }
}


================================================
FILE: flutter-examples/non_streaming_vad_asr/lib/non_streaming_vad_asr.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:async';
import 'dart:typed_data';

import 'package:flutter/foundation.dart';
import 'package:flutter/material.dart';
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';
import 'package:record/record.dart';

import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './utils.dart';
import './offline_model.dart';

final modelDir = 'assets';
Future<sherpa_onnx.OfflineRecognizer> createOfflineRecognizer() async {
  final type = 2;
  final modelConfig = await getOfflineModelConfig(type: type);
  final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  return sherpa_onnx.OfflineRecognizer(config);
}

class NoStreamingAsrVAdScreen extends StatefulWidget {
  const NoStreamingAsrVAdScreen({super.key});

  @override
  State<NoStreamingAsrVAdScreen> createState() => _NoStreamingAsrVAdScreenState();
}

class _NoStreamingAsrVAdScreenState extends State<NoStreamingAsrVAdScreen> {

  late final TextEditingController _controller;
  late final AudioRecorder _audioRecorder;

  String _title = 'Real-time speech recognition(offline recognizer with vad)';
  String _last = '';
  int _index = 0;
  bool _isInitialized = false;

  // offline recognizer related vars
  sherpa_onnx.OfflineRecognizer? _recognizer;
  static const int _sampleRate = 16000;

  // VAD related vars
  sherpa_onnx.VoiceActivityDetector? _vad;
  sherpa_onnx.CircularBuffer? _buffer;
  
  // VAD config
  late sherpa_onnx.VadModelConfig _vadConfig;

  StreamSubscription<RecordState>? _recordSub;
  RecordState _recordState = RecordState.stop;

  @override
  void initState() {
    _audioRecorder = AudioRecorder();
    _controller = TextEditingController();

    _recordSub = _audioRecorder.onStateChanged().listen((recordState) {
      _updateRecordState(recordState);
    });

    super.initState();
  }

  Future<void> _start() async {
    if (!_isInitialized) {
      sherpa_onnx.initBindings();

      // 初始化 VAD
      final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
        model: await copyAssetFile('$modelDir/silero_vad.onnx'),
        minSilenceDuration: 0.25,
        minSpeechDuration: 0.5,
        maxSpeechDuration: 5.0,
      );

      _vadConfig = sherpa_onnx.VadModelConfig(
        sileroVad: sileroVadConfig,
        numThreads: 1,
        debug: false,
      );

      // create VAD, use buffer model
      _vad = sherpa_onnx.VoiceActivityDetector(
        config: _vadConfig, 
        bufferSizeInSeconds: 30
      );
      _buffer = sherpa_onnx.CircularBuffer(capacity: 30 * _sampleRate);

      _recognizer = await createOfflineRecognizer();
      _isInitialized = true;
    }

    try {
      if (await _audioRecorder.hasPermission()) {
        const encoder = AudioEncoder.pcm16bits;

        if (!await _isEncoderSupported(encoder)) {
          return;
        }

        final devs = await _audioRecorder.listInputDevices();
        debugPrint(devs.toString());

        const config = RecordConfig(
          encoder: encoder,
          sampleRate: _sampleRate,
          numChannels: 1,
        );

        final stream = await _audioRecorder.startStream(config);

        stream.listen(
          (data) {
            final samplesFloat32 = convertBytesToFloat32(Uint8List.fromList(data));
            
            // use _buffer and _vad for offline stream data making
            _buffer!.push(samplesFloat32);
            
            final windowSize = _vadConfig.sileroVad.windowSize;
            while (_buffer!.size > windowSize) {
              final samples = _buffer!.get(
                startIndex: _buffer!.head, 
                n: windowSize
              );
              _buffer!.pop(windowSize);
              _vad!.acceptWaveform(samples);  

              while (!_vad!.isEmpty()) {
                final segment = _vad!.front();
                final samples = segment.samples;  
                
                // offline _recognizer stream handle logic
                final stream = _recognizer!.createStream();
                stream.acceptWaveform(samples: samples, sampleRate: _sampleRate);
                _recognizer!.decode(stream);
                final text = _recognizer!.getResult(stream).text;
                debugPrint("recognize:"+text);
                stream.free();
                _vad!.pop();
                
                // update text to display
                String textToDisplay = _last;
                if (text != '') {
                  _index += 1;
                  if (_last == '') {
                    textToDisplay = '$_index: $text';
                  } else {
                    textToDisplay = '$_index: $text\n$_last';
                  }
                  _last = textToDisplay;
                }
                debugPrint("final:"+textToDisplay);
                _controller.value = TextEditingValue(
                      text: textToDisplay,
                      selection: TextSelection.collapsed(offset: textToDisplay.length),
                );
              }
            }
          },
          onDone: () {
            print('stream stopped.');
          },
        );
      }
    } catch (e) {
      print(e);
    }
  }

  Future<void> _stop() async {
    await _audioRecorder.stop();
    // handle rest of vad data
     _vad!.flush();
    while (!_vad!.isEmpty()) {
      final segment = _vad!.front();
      final samples = segment.samples;

      final stream = _recognizer!.createStream();
      stream.acceptWaveform(samples: samples, sampleRate: _sampleRate);
      _recognizer!.decode(stream);
      final text = _recognizer!.getResult(stream).text;
              
      String textToDisplay = _last;
      if (text != '') {
          _index += 1;
          if (_last == '') {
              textToDisplay = '$_index: $text';
          } else {
              textToDisplay = '$_index: $text\n$_last';
            }
          }
          _last = "";
          _index = 0;  
          debugPrint("final:"+textToDisplay);
          _controller.value = TextEditingValue(
            text: textToDisplay,
            selection: TextSelection.collapsed(offset: textToDisplay.length),
          );              
          stream.free();
          _vad!.pop();
    }
  }

  Future<void> _pause() => _audioRecorder.pause();

  Future<void> _resume() => _audioRecorder.resume();

  void _updateRecordState(RecordState recordState) {
    setState(() => _recordState = recordState);
  }

  Future<bool> _isEncoderSupported(AudioEncoder encoder) async {
    final isSupported = await _audioRecorder.isEncoderSupported(
      encoder,
    );

    if (!isSupported) {
      debugPrint('${encoder.name} is not supported on this platform.');
      debugPrint('Supported encoders are:');

      for (final e in AudioEncoder.values) {
        if (await _audioRecorder.isEncoderSupported(e)) {
          debugPrint('- ${e.name}');
        }
      }
    }

    return isSupported;
  }

  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      home: Scaffold(
        appBar: AppBar(
          title: Text(_title),
        ),
        body: Column(
          mainAxisAlignment: MainAxisAlignment.center,
          children: [
            const SizedBox(height: 50),
            TextField(
              maxLines: 5,
              controller: _controller,
              readOnly: true,
            ),
            const SizedBox(height: 50),
            Row(
              mainAxisAlignment: MainAxisAlignment.center,
              children: <Widget>[
                _buildRecordStopControl(),
                const SizedBox(width: 20),
                _buildText(),
              ],
            ),
          ],
        ),
      ),
    );
  }

  @override
  void dispose() {
    _recordSub?.cancel();
    _audioRecorder.dispose();
    _recognizer?.free();
    _vad?.free(); // release vad
    _buffer?.free(); // release buffer
    super.dispose();
  }

  Widget _buildRecordStopControl() {
    late Icon icon;
    late Color color;

    if (_recordState != RecordState.stop) {
      icon = const Icon(Icons.stop, color: Colors.red, size: 30);
      color = Colors.red.withOpacity(0.1);
    } else {
      final theme = Theme.of(context);
      icon = Icon(Icons.mic, color: theme.primaryColor, size: 30);
      color = theme.primaryColor.withOpacity(0.1);
    }

    return ClipOval(
      child: Material(
        color: color,
        child: InkWell(
          child: SizedBox(width: 56, height: 56, child: icon),
          onTap: () {
            (_recordState != RecordState.stop) ? _stop() : _start();
          },
        ),
      ),
    );
  }

  Widget _buildText() {
    if (_recordState == RecordState.stop) {
      return const Text("Start");
    } else {
      return const Text("Stop");
    }
  }
}

================================================
FILE: flutter-examples/non_streaming_vad_asr/lib/offline_model.dart
================================================
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './utils.dart';

final modelDir = 'assets';
// Remember to change `assets` in ../pubspec.yaml
// and download files to ../assets
Future<sherpa_onnx.OfflineModelConfig> getOfflineModelConfig(
    {required int type}) async {
  switch (type) {
    // whisper
    case 0: 
      return sherpa_onnx.OfflineModelConfig(
        whisper:sherpa_onnx.OfflineWhisperModelConfig(
          encoder: await copyAssetFile('$modelDir/whisper/base-encoder.onnx'),
          decoder: await copyAssetFile('$modelDir/whisper/base-decoder.onnx'),
        ),
        tokens: await copyAssetFile('$modelDir/whisper/base-tokens.txt'),
        modelType: 'whisper',
      );
    // senseVoice  
    case 1:
      return sherpa_onnx.OfflineModelConfig(
        senseVoice: sherpa_onnx.OfflineSenseVoiceModelConfig(
          model: await copyAssetFile('$modelDir/senseVoice/model.int8.onnx'), 
        ),
        tokens: await copyAssetFile('$modelDir/senseVoice/tokens.txt'),
      );
    // nemo_transducer-parakeet-tdt
    case 2:
      return sherpa_onnx.OfflineModelConfig(
        transducer: sherpa_onnx.OfflineTransducerModelConfig(
          encoder: await copyAssetFile(
              '$modelDir/nemo_transducer/encoder.int8.onnx'),
          decoder: await copyAssetFile(
              '$modelDir/nemo_transducer/decoder.int8.onnx'),
          joiner: await copyAssetFile(
              '$modelDir/nemo_transducer/joiner.int8.onnx'),
        ),
        tokens: await copyAssetFile('$modelDir/nemo_transducer/tokens.txt'),
        modelType: 'nemo_transducer',
      );
    default:
      throw ArgumentError('Unsupported type: $type');
  }
}


================================================
FILE: flutter-examples/non_streaming_vad_asr/lib/utils.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'package:path/path.dart';
import 'package:path_provider/path_provider.dart';
import 'package:flutter/services.dart' show rootBundle;
import 'dart:typed_data';
import "dart:io";

// Copy the asset file from src to dst
Future<String> copyAssetFile(String src, [String? dst]) async {
  final Directory directory = await getApplicationSupportDirectory();
  if (dst == null) {
    dst = basename(src);
  }
  final target = join(directory.path, dst);
  bool exists = await new File(target).exists();

  final data = await rootBundle.load(src);

  if (!exists || File(target).lengthSync() != data.lengthInBytes) {
    final List<int> bytes =
        data.buffer.asUint8List(data.offsetInBytes, data.lengthInBytes);
    await File(target).writeAsBytes(bytes);
  }

  return target;
}

Float32List convertBytesToFloat32(Uint8List bytes, [endian = Endian.little]) {
  final values = Float32List(bytes.length ~/ 2);

  final data = ByteData.view(bytes.buffer);

  for (var i = 0; i < bytes.length; i += 2) {
    int short = data.getInt16(i, endian);
    values[i ~/ 2] = short / 32768.0;
  }

  return values;
}


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/.gitignore
================================================
# Flutter-related
**/Flutter/ephemeral/
**/Pods/

# Xcode-related
**/dgph
**/xcuserdata/


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Flutter/Flutter-Debug.xcconfig
================================================
#include? "Pods/Target Support Files/Pods-Runner/Pods-Runner.debug.xcconfig"
#include "ephemeral/Flutter-Generated.xcconfig"


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Flutter/Flutter-Release.xcconfig
================================================
#include? "Pods/Target Support Files/Pods-Runner/Pods-Runner.release.xcconfig"
#include "ephemeral/Flutter-Generated.xcconfig"


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/AppDelegate.swift
================================================
import Cocoa
import FlutterMacOS

@main
class AppDelegate: FlutterAppDelegate {
  override func applicationShouldTerminateAfterLastWindowClosed(_ sender: NSApplication) -> Bool {
    return true
  }

  override func applicationSupportsSecureRestorableState(_ app: NSApplication) -> Bool {
    return true
  }
}


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "size" : "16x16",
      "idiom" : "mac",
      "filename" : "app_icon_16.png",
      "scale" : "1x"
    },
    {
      "size" : "16x16",
      "idiom" : "mac",
      "filename" : "app_icon_32.png",
      "scale" : "2x"
    },
    {
      "size" : "32x32",
      "idiom" : "mac",
      "filename" : "app_icon_32.png",
      "scale" : "1x"
    },
    {
      "size" : "32x32",
      "idiom" : "mac",
      "filename" : "app_icon_64.png",
      "scale" : "2x"
    },
    {
      "size" : "128x128",
      "idiom" : "mac",
      "filename" : "app_icon_128.png",
      "scale" : "1x"
    },
    {
      "size" : "128x128",
      "idiom" : "mac",
      "filename" : "app_icon_256.png",
      "scale" : "2x"
    },
    {
      "size" : "256x256",
      "idiom" : "mac",
      "filename" : "app_icon_256.png",
      "scale" : "1x"
    },
    {
      "size" : "256x256",
      "idiom" : "mac",
      "filename" : "app_icon_512.png",
      "scale" : "2x"
    },
    {
      "size" : "512x512",
      "idiom" : "mac",
      "filename" : "app_icon_512.png",
      "scale" : "1x"
    },
    {
      "size" : "512x512",
      "idiom" : "mac",
      "filename" : "app_icon_1024.png",
      "scale" : "2x"
    }
  ],
  "info" : {
    "version" : 1,
    "author" : "xcode"
  }
}


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/Base.lproj/MainMenu.xib
================================================
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="14490.70" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" customObjectInstantitationMethod="direct">
    <dependencies>
        <deployment identifier="macosx"/>
        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="14490.70"/>
        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
    </dependencies>
    <objects>
        <customObject id="-2" userLabel="File's Owner" customClass="NSApplication">
            <connections>
                <outlet property="delegate" destination="Voe-Tx-rLC" id="GzC-gU-4Uq"/>
            </connections>
        </customObject>
        <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
        <customObject id="-3" userLabel="Application" customClass="NSObject"/>
        <customObject id="Voe-Tx-rLC" customClass="AppDelegate" customModule="Runner" customModuleProvider="target">
            <connections>
                <outlet property="applicationMenu" destination="uQy-DD-JDr" id="XBo-yE-nKs"/>
                <outlet property="mainFlutterWindow" destination="QvC-M9-y7g" id="gIp-Ho-8D9"/>
            </connections>
        </customObject>
        <customObject id="YLy-65-1bz" customClass="NSFontManager"/>
        <menu title="Main Menu" systemMenu="main" id="AYu-sK-qS6">
            <items>
                <menuItem title="APP_NAME" id="1Xt-HY-uBw">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="APP_NAME" systemMenu="apple" id="uQy-DD-JDr">
                        <items>
                            <menuItem title="About APP_NAME" id="5kV-Vb-QxS">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="orderFrontStandardAboutPanel:" target="-1" id="Exp-CZ-Vem"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="VOq-y0-SEH"/>
                            <menuItem title="Preferences…" keyEquivalent="," id="BOF-NM-1cW"/>
                            <menuItem isSeparatorItem="YES" id="wFC-TO-SCJ"/>
                            <menuItem title="Services" id="NMo-om-nkz">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Services" systemMenu="services" id="hz9-B4-Xy5"/>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="4je-JR-u6R"/>
                            <menuItem title="Hide APP_NAME" keyEquivalent="h" id="Olw-nP-bQN">
                                <connections>
                                    <action selector="hide:" target="-1" id="PnN-Uc-m68"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Hide Others" keyEquivalent="h" id="Vdr-fp-XzO">
                                <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                <connections>
                                    <action selector="hideOtherApplications:" target="-1" id="VT4-aY-XCT"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Show All" id="Kd2-mp-pUS">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="unhideAllApplications:" target="-1" id="Dhg-Le-xox"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="kCx-OE-vgT"/>
                            <menuItem title="Quit APP_NAME" keyEquivalent="q" id="4sb-4s-VLi">
                                <connections>
                                    <action selector="terminate:" target="-1" id="Te7-pn-YzF"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Edit" id="5QF-Oa-p0T">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Edit" id="W48-6f-4Dl">
                        <items>
                            <menuItem title="Undo" keyEquivalent="z" id="dRJ-4n-Yzg">
                                <connections>
                                    <action selector="undo:" target="-1" id="M6e-cu-g7V"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Redo" keyEquivalent="Z" id="6dh-zS-Vam">
                                <connections>
                                    <action selector="redo:" target="-1" id="oIA-Rs-6OD"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="WRV-NI-Exz"/>
                            <menuItem title="Cut" keyEquivalent="x" id="uRl-iY-unG">
                                <connections>
                                    <action selector="cut:" target="-1" id="YJe-68-I9s"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Copy" keyEquivalent="c" id="x3v-GG-iWU">
                                <connections>
                                    <action selector="copy:" target="-1" id="G1f-GL-Joy"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Paste" keyEquivalent="v" id="gVA-U4-sdL">
                                <connections>
                                    <action selector="paste:" target="-1" id="UvS-8e-Qdg"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Paste and Match Style" keyEquivalent="V" id="WeT-3V-zwk">
                                <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                <connections>
                                    <action selector="pasteAsPlainText:" target="-1" id="cEh-KX-wJQ"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Delete" id="pa3-QI-u2k">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="delete:" target="-1" id="0Mk-Ml-PaM"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Select All" keyEquivalent="a" id="Ruw-6m-B2m">
                                <connections>
                                    <action selector="selectAll:" target="-1" id="VNm-Mi-diN"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="uyl-h8-XO2"/>
                            <menuItem title="Find" id="4EN-yA-p0u">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Find" id="1b7-l0-nxx">
                                    <items>
                                        <menuItem title="Find…" tag="1" keyEquivalent="f" id="Xz5-n4-O0W">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="cD7-Qs-BN4"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find and Replace…" tag="12" keyEquivalent="f" id="YEy-JH-Tfz">
                                            <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="WD3-Gg-5AJ"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find Next" tag="2" keyEquivalent="g" id="q09-fT-Sye">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="NDo-RZ-v9R"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find Previous" tag="3" keyEquivalent="G" id="OwM-mh-QMV">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="HOh-sY-3ay"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Use Selection for Find" tag="7" keyEquivalent="e" id="buJ-ug-pKt">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="U76-nv-p5D"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Jump to Selection" keyEquivalent="j" id="S0p-oC-mLd">
                                            <connections>
                                                <action selector="centerSelectionInVisibleArea:" target="-1" id="IOG-6D-g5B"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Spelling and Grammar" id="Dv1-io-Yv7">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Spelling" id="3IN-sU-3Bg">
                                    <items>
                                        <menuItem title="Show Spelling and Grammar" keyEquivalent=":" id="HFo-cy-zxI">
                                            <connections>
                                                <action selector="showGuessPanel:" target="-1" id="vFj-Ks-hy3"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Check Document Now" keyEquivalent=";" id="hz2-CU-CR7">
                                            <connections>
                                                <action selector="checkSpelling:" target="-1" id="fz7-VC-reM"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem isSeparatorItem="YES" id="bNw-od-mp5"/>
                                        <menuItem title="Check Spelling While Typing" id="rbD-Rh-wIN">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleContinuousSpellChecking:" target="-1" id="7w6-Qz-0kB"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Check Grammar With Spelling" id="mK6-2p-4JG">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleGrammarChecking:" target="-1" id="muD-Qn-j4w"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Correct Spelling Automatically" id="78Y-hA-62v">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticSpellingCorrection:" target="-1" id="2lM-Qi-WAP"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Substitutions" id="9ic-FL-obx">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Substitutions" id="FeM-D8-WVr">
                                    <items>
                                        <menuItem title="Show Substitutions" id="z6F-FW-3nz">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="orderFrontSubstitutionsPanel:" target="-1" id="oku-mr-iSq"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem isSeparatorItem="YES" id="gPx-C9-uUO"/>
                                        <menuItem title="Smart Copy/Paste" id="9yt-4B-nSM">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleSmartInsertDelete:" target="-1" id="3IJ-Se-DZD"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Quotes" id="hQb-2v-fYv">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticQuoteSubstitution:" target="-1" id="ptq-xd-QOA"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Dashes" id="rgM-f4-ycn">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticDashSubstitution:" target="-1" id="oCt-pO-9gS"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Links" id="cwL-P1-jid">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticLinkDetection:" target="-1" id="Gip-E3-Fov"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Data Detectors" id="tRr-pd-1PS">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticDataDetection:" target="-1" id="R1I-Nq-Kbl"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Text Replacement" id="HFQ-gK-NFA">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticTextReplacement:" target="-1" id="DvP-Fe-Py6"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Transformations" id="2oI-Rn-ZJC">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Transformations" id="c8a-y6-VQd">
                                    <items>
                                        <menuItem title="Make Upper Case" id="vmV-6d-7jI">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="uppercaseWord:" target="-1" id="sPh-Tk-edu"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Make Lower Case" id="d9M-CD-aMd">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="lowercaseWord:" target="-1" id="iUZ-b5-hil"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Capitalize" id="UEZ-Bs-lqG">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="capitalizeWord:" target="-1" id="26H-TL-nsh"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Speech" id="xrE-MZ-jX0">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Speech" id="3rS-ZA-NoH">
                                    <items>
                                        <menuItem title="Start Speaking" id="Ynk-f8-cLZ">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="startSpeaking:" target="-1" id="654-Ng-kyl"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Stop Speaking" id="Oyz-dy-DGm">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="stopSpeaking:" target="-1" id="dX8-6p-jy9"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="View" id="H8h-7b-M4v">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="View" id="HyV-fh-RgO">
                        <items>
                            <menuItem title="Enter Full Screen" keyEquivalent="f" id="4J7-dP-txa">
                                <modifierMask key="keyEquivalentModifierMask" control="YES" command="YES"/>
                                <connections>
                                    <action selector="toggleFullScreen:" target="-1" id="dU3-MA-1Rq"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Window" id="aUF-d1-5bR">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Window" systemMenu="window" id="Td7-aD-5lo">
                        <items>
                            <menuItem title="Minimize" keyEquivalent="m" id="OY7-WF-poV">
                                <connections>
                                    <action selector="performMiniaturize:" target="-1" id="VwT-WD-YPe"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Zoom" id="R4o-n2-Eq4">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="performZoom:" target="-1" id="DIl-cC-cCs"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="eu3-7i-yIM"/>
                            <menuItem title="Bring All to Front" id="LE2-aR-0XJ">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="arrangeInFront:" target="-1" id="DRN-fu-gQh"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Help" id="EPT-qC-fAb">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Help" systemMenu="help" id="rJ0-wn-3NY"/>
                </menuItem>
            </items>
            <point key="canvasLocation" x="142" y="-258"/>
        </menu>
        <window title="APP_NAME" allowsToolTipsWhenApplicationIsInactive="NO" autorecalculatesKeyViewLoop="NO" releasedWhenClosed="NO" animationBehavior="default" id="QvC-M9-y7g" customClass="MainFlutterWindow" customModule="Runner" customModuleProvider="target">
            <windowStyleMask key="styleMask" titled="YES" closable="YES" miniaturizable="YES" resizable="YES"/>
            <rect key="contentRect" x="335" y="390" width="800" height="600"/>
            <rect key="screenRect" x="0.0" y="0.0" width="2560" height="1577"/>
            <view key="contentView" wantsLayer="YES" id="EiT-Mj-1SZ">
                <rect key="frame" x="0.0" y="0.0" width="800" height="600"/>
                <autoresizingMask key="autoresizingMask"/>
            </view>
        </window>
    </objects>
</document>


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/Configs/AppInfo.xcconfig
================================================
// Application-level settings for the Runner target.
//
// This may be replaced with something auto-generated from metadata (e.g., pubspec.yaml) in the
// future. If not, the values below would default to using the project name when this becomes a
// 'flutter create' template.

// The application's name. By default this is also the title of the Flutter window.
PRODUCT_NAME = non_streaming_vad_asr

// The application's bundle identifier
PRODUCT_BUNDLE_IDENTIFIER = com.example.nonStreamingVadAsr

// The copyright displayed in application information
PRODUCT_COPYRIGHT = Copyright © 2024 com.example. All rights reserved.


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/Configs/Debug.xcconfig
================================================
#include "../../Flutter/Flutter-Debug.xcconfig"
#include "Warnings.xcconfig"


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/Configs/Release.xcconfig
================================================
#include "../../Flutter/Flutter-Release.xcconfig"
#include "Warnings.xcconfig"


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/Configs/Warnings.xcconfig
================================================
WARNING_CFLAGS = -Wall -Wconditional-uninitialized -Wnullable-to-nonnull-conversion -Wmissing-method-return-type -Woverlength-strings
GCC_WARN_UNDECLARED_SELECTOR = YES
CLANG_UNDEFINED_BEHAVIOR_SANITIZER_NULLABILITY = YES
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES
CLANG_WARN_PRAGMA_PACK = YES
CLANG_WARN_STRICT_PROTOTYPES = YES
CLANG_WARN_COMMA = YES
GCC_WARN_STRICT_SELECTOR_MATCH = YES
CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES
GCC_WARN_SHADOW = YES
CLANG_WARN_UNREACHABLE_CODE = YES


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/DebugProfile.entitlements
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>com.apple.security.app-sandbox</key>
	<true/>
	<key>com.apple.security.cs.allow-jit</key>
	<true/>
	<key>com.apple.security.device.audio-input</key>
	<true/>
	<key>com.apple.security.network.server</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for Next-gen kaldi to work</string>
	<key>CFBundleDevelopmentRegion</key>
	<string>$(DEVELOPMENT_LANGUAGE)</string>
	<key>CFBundleExecutable</key>
	<string>$(EXECUTABLE_NAME)</string>
	<key>CFBundleIconFile</key>
	<string></string>
	<key>CFBundleIdentifier</key>
	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
	<key>CFBundleInfoDictionaryVersion</key>
	<string>6.0</string>
	<key>CFBundleName</key>
	<string>$(PRODUCT_NAME)</string>
	<key>CFBundlePackageType</key>
	<string>APPL</string>
	<key>CFBundleShortVersionString</key>
	<string>$(FLUTTER_BUILD_NAME)</string>
	<key>CFBundleVersion</key>
	<string>$(FLUTTER_BUILD_NUMBER)</string>
	<key>LSMinimumSystemVersion</key>
	<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
	<key>NSHumanReadableCopyright</key>
	<string>$(PRODUCT_COPYRIGHT)</string>
	<key>NSMainNibFile</key>
	<string>MainMenu</string>
	<key>NSPrincipalClass</key>
	<string>NSApplication</string>
</dict>
</plist>


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/MainFlutterWindow.swift
================================================
import Cocoa
import FlutterMacOS

class MainFlutterWindow: NSWindow {
  override func awakeFromNib() {
    let flutterViewController = FlutterViewController()
    let windowFrame = self.frame
    self.contentViewController = flutterViewController
    self.setFrame(windowFrame, display: true)

    RegisterGeneratedPlugins(registry: flutterViewController)

    super.awakeFromNib()
  }
}


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner/Release.entitlements
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>com.apple.security.app-sandbox</key>
	<true/>
	<key>com.apple.security.device.audio-input</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 54;
	objects = {

/* Begin PBXAggregateTarget section */
		33CC111A2044C6BA0003C045 /* Flutter Assemble */ = {
			isa = PBXAggregateTarget;
			buildConfigurationList = 33CC111B2044C6BA0003C045 /* Build configuration list for PBXAggregateTarget "Flutter Assemble" */;
			buildPhases = (
				33CC111E2044C6BF0003C045 /* ShellScript */,
			);
			dependencies = (
			);
			name = "Flutter Assemble";
			productName = FLX;
		};
/* End PBXAggregateTarget section */

/* Begin PBXBuildFile section */
		331C80D8294CF71000263BE5 /* RunnerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 331C80D7294CF71000263BE5 /* RunnerTests.swift */; };
		335BBD1B22A9A15E00E9071D /* GeneratedPluginRegistrant.swift in Sources */ = {isa = PBXBuildFile; fileRef = 335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */; };
		33CC10F12044A3C60003C045 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 33CC10F02044A3C60003C045 /* AppDelegate.swift */; };
		33CC10F32044A3C60003C045 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 33CC10F22044A3C60003C045 /* Assets.xcassets */; };
		33CC10F62044A3C60003C045 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 33CC10F42044A3C60003C045 /* MainMenu.xib */; };
		33CC11132044BFA00003C045 /* MainFlutterWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 33CC11122044BFA00003C045 /* MainFlutterWindow.swift */; };
		3FE622CE7FAD50CAB6A50227 /* Pods_RunnerTests.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 6FDD8A902F607871AAC21564 /* Pods_RunnerTests.framework */; };
		B6BF18E4D30EDE6C4C5FD00D /* Pods_Runner.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 2DE4C5BCFCD3E0DF5FD13E12 /* Pods_Runner.framework */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
		331C80D9294CF71000263BE5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = 33CC10E52044A3C60003C045 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = 33CC10EC2044A3C60003C045;
			remoteInfo = Runner;
		};
		33CC111F2044C79F0003C045 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = 33CC10E52044A3C60003C045 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = 33CC111A2044C6BA0003C045;
			remoteInfo = FLX;
		};
/* End PBXContainerItemProxy section */

/* Begin PBXCopyFilesBuildPhase section */
		33CC110E2044A8840003C045 /* Bundle Framework */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = "";
			dstSubfolderSpec = 10;
			files = (
			);
			name = "Bundle Framework";
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		2DE4C5BCFCD3E0DF5FD13E12 /* Pods_Runner.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_Runner.framework; sourceTree = BUILT_PRODUCTS_DIR; };
		331C80D5294CF71000263BE5 /* RunnerTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = RunnerTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		331C80D7294CF71000263BE5 /* RunnerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RunnerTests.swift; sourceTree = "<group>"; };
		333000ED22D3DE5D00554162 /* Warnings.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Warnings.xcconfig; sourceTree = "<group>"; };
		335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = GeneratedPluginRegistrant.swift; sourceTree = "<group>"; };
		33CC10ED2044A3C60003C045 /* streaming_asr.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = streaming_asr.app; sourceTree = BUILT_PRODUCTS_DIR; };
		33CC10F02044A3C60003C045 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
		33CC10F22044A3C60003C045 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; name = Assets.xcassets; path = Runner/Assets.xcassets; sourceTree = "<group>"; };
		33CC10F52044A3C60003C045 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = "<group>"; };
		33CC10F72044A3C60003C045 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = Info.plist; path = Runner/Info.plist; sourceTree = "<group>"; };
		33CC11122044BFA00003C045 /* MainFlutterWindow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MainFlutterWindow.swift; sourceTree = "<group>"; };
		33CEB47222A05771004F2AC0 /* Flutter-Debug.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = "Flutter-Debug.xcconfig"; sourceTree = "<group>"; };
		33CEB47422A05771004F2AC0 /* Flutter-Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = "Flutter-Release.xcconfig"; sourceTree = "<group>"; };
		33CEB47722A0578A004F2AC0 /* Flutter-Generated.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = "Flutter-Generated.xcconfig"; path = "ephemeral/Flutter-Generated.xcconfig"; sourceTree = "<group>"; };
		33E51913231747F40026EE4D /* DebugProfile.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = DebugProfile.entitlements; sourceTree = "<group>"; };
		33E51914231749380026EE4D /* Release.entitlements */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.entitlements; path = Release.entitlements; sourceTree = "<group>"; };
		33E5194F232828860026EE4D /* AppInfo.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = AppInfo.xcconfig; sourceTree = "<group>"; };
		6FDD8A902F607871AAC21564 /* Pods_RunnerTests.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_RunnerTests.framework; sourceTree = BUILT_PRODUCTS_DIR; };
		7AFA3C8E1D35360C0083082E /* Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Release.xcconfig; sourceTree = "<group>"; };
		8C6D1508C07A543BCF20E922 /* Pods-Runner.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Runner.debug.xcconfig"; path = "Target Support Files/Pods-Runner/Pods-Runner.debug.xcconfig"; sourceTree = "<group>"; };
		9740EEB21CF90195004384FC /* Debug.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Debug.xcconfig; sourceTree = "<group>"; };
		A82F4113E9B77A3EFBDC76CB /* Pods-Runner.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Runner.release.xcconfig"; path = "Target Support Files/Pods-Runner/Pods-Runner.release.xcconfig"; sourceTree = "<group>"; };
		C2292CF6D0521881EB8F30D6 /* Pods-Runner.profile.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Runner.profile.xcconfig"; path = "Target Support Files/Pods-Runner/Pods-Runner.profile.xcconfig"; sourceTree = "<group>"; };
		D0400B19E48718CF50379B60 /* Pods-RunnerTests.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-RunnerTests.debug.xcconfig"; path = "Target Support Files/Pods-RunnerTests/Pods-RunnerTests.debug.xcconfig"; sourceTree = "<group>"; };
		E0928E31BD7FB7421B509154 /* Pods-RunnerTests.profile.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-RunnerTests.profile.xcconfig"; path = "Target Support Files/Pods-RunnerTests/Pods-RunnerTests.profile.xcconfig"; sourceTree = "<group>"; };
		FE1C524AA6E87A1F323D2F64 /* Pods-RunnerTests.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-RunnerTests.release.xcconfig"; path = "Target Support Files/Pods-RunnerTests/Pods-RunnerTests.release.xcconfig"; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		331C80D2294CF70F00263BE5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				3FE622CE7FAD50CAB6A50227 /* Pods_RunnerTests.framework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10EA2044A3C60003C045 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				B6BF18E4D30EDE6C4C5FD00D /* Pods_Runner.framework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		331C80D6294CF71000263BE5 /* RunnerTests */ = {
			isa = PBXGroup;
			children = (
				331C80D7294CF71000263BE5 /* RunnerTests.swift */,
			);
			path = RunnerTests;
			sourceTree = "<group>";
		};
		33BA886A226E78AF003329D5 /* Configs */ = {
			isa = PBXGroup;
			children = (
				33E5194F232828860026EE4D /* AppInfo.xcconfig */,
				9740EEB21CF90195004384FC /* Debug.xcconfig */,
				7AFA3C8E1D35360C0083082E /* Release.xcconfig */,
				333000ED22D3DE5D00554162 /* Warnings.xcconfig */,
			);
			path = Configs;
			sourceTree = "<group>";
		};
		33CC10E42044A3C60003C045 = {
			isa = PBXGroup;
			children = (
				33FAB671232836740065AC1E /* Runner */,
				33CEB47122A05771004F2AC0 /* Flutter */,
				331C80D6294CF71000263BE5 /* RunnerTests */,
				33CC10EE2044A3C60003C045 /* Products */,
				D73912EC22F37F3D000D13A0 /* Frameworks */,
				EFAE9269CAE479A42FBED805 /* Pods */,
			);
			sourceTree = "<group>";
		};
		33CC10EE2044A3C60003C045 /* Products */ = {
			isa = PBXGroup;
			children = (
				33CC10ED2044A3C60003C045 /* streaming_asr.app */,
				331C80D5294CF71000263BE5 /* RunnerTests.xctest */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		33CC11242044D66E0003C045 /* Resources */ = {
			isa = PBXGroup;
			children = (
				33CC10F22044A3C60003C045 /* Assets.xcassets */,
				33CC10F42044A3C60003C045 /* MainMenu.xib */,
				33CC10F72044A3C60003C045 /* Info.plist */,
			);
			name = Resources;
			path = ..;
			sourceTree = "<group>";
		};
		33CEB47122A05771004F2AC0 /* Flutter */ = {
			isa = PBXGroup;
			children = (
				335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */,
				33CEB47222A05771004F2AC0 /* Flutter-Debug.xcconfig */,
				33CEB47422A05771004F2AC0 /* Flutter-Release.xcconfig */,
				33CEB47722A0578A004F2AC0 /* Flutter-Generated.xcconfig */,
			);
			path = Flutter;
			sourceTree = "<group>";
		};
		33FAB671232836740065AC1E /* Runner */ = {
			isa = PBXGroup;
			children = (
				33CC10F02044A3C60003C045 /* AppDelegate.swift */,
				33CC11122044BFA00003C045 /* MainFlutterWindow.swift */,
				33E51913231747F40026EE4D /* DebugProfile.entitlements */,
				33E51914231749380026EE4D /* Release.entitlements */,
				33CC11242044D66E0003C045 /* Resources */,
				33BA886A226E78AF003329D5 /* Configs */,
			);
			path = Runner;
			sourceTree = "<group>";
		};
		D73912EC22F37F3D000D13A0 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
				2DE4C5BCFCD3E0DF5FD13E12 /* Pods_Runner.framework */,
				6FDD8A902F607871AAC21564 /* Pods_RunnerTests.framework */,
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
		EFAE9269CAE479A42FBED805 /* Pods */ = {
			isa = PBXGroup;
			children = (
				8C6D1508C07A543BCF20E922 /* Pods-Runner.debug.xcconfig */,
				A82F4113E9B77A3EFBDC76CB /* Pods-Runner.release.xcconfig */,
				C2292CF6D0521881EB8F30D6 /* Pods-Runner.profile.xcconfig */,
				D0400B19E48718CF50379B60 /* Pods-RunnerTests.debug.xcconfig */,
				FE1C524AA6E87A1F323D2F64 /* Pods-RunnerTests.release.xcconfig */,
				E0928E31BD7FB7421B509154 /* Pods-RunnerTests.profile.xcconfig */,
			);
			name = Pods;
			path = Pods;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		331C80D4294CF70F00263BE5 /* RunnerTests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 331C80DE294CF71000263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */;
			buildPhases = (
				82A3EDCE842FB1EFBCBBAD9F /* [CP] Check Pods Manifest.lock */,
				331C80D1294CF70F00263BE5 /* Sources */,
				331C80D2294CF70F00263BE5 /* Frameworks */,
				331C80D3294CF70F00263BE5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				331C80DA294CF71000263BE5 /* PBXTargetDependency */,
			);
			name = RunnerTests;
			productName = RunnerTests;
			productReference = 331C80D5294CF71000263BE5 /* RunnerTests.xctest */;
			productType = "com.apple.product-type.bundle.unit-test";
		};
		33CC10EC2044A3C60003C045 /* Runner */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 33CC10FB2044A3C60003C045 /* Build configuration list for PBXNativeTarget "Runner" */;
			buildPhases = (
				9382924FF32AC828037E7DB5 /* [CP] Check Pods Manifest.lock */,
				33CC10E92044A3C60003C045 /* Sources */,
				33CC10EA2044A3C60003C045 /* Frameworks */,
				33CC10EB2044A3C60003C045 /* Resources */,
				33CC110E2044A8840003C045 /* Bundle Framework */,
				3399D490228B24CF009A79C7 /* ShellScript */,
				736059A98E6FCBCF66678C71 /* [CP] Embed Pods Frameworks */,
			);
			buildRules = (
			);
			dependencies = (
				33CC11202044C79F0003C045 /* PBXTargetDependency */,
			);
			name = Runner;
			productName = Runner;
			productReference = 33CC10ED2044A3C60003C045 /* streaming_asr.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		33CC10E52044A3C60003C045 /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = YES;
				LastSwiftUpdateCheck = 0920;
				LastUpgradeCheck = 1510;
				ORGANIZATIONNAME = "";
				TargetAttributes = {
					331C80D4294CF70F00263BE5 = {
						CreatedOnToolsVersion = 14.0;
						TestTargetID = 33CC10EC2044A3C60003C045;
					};
					33CC10EC2044A3C60003C045 = {
						CreatedOnToolsVersion = 9.2;
						LastSwiftMigration = 1100;
						ProvisioningStyle = Automatic;
						SystemCapabilities = {
							com.apple.Sandbox = {
								enabled = 1;
							};
						};
					};
					33CC111A2044C6BA0003C045 = {
						CreatedOnToolsVersion = 9.2;
						ProvisioningStyle = Manual;
					};
				};
			};
			buildConfigurationList = 33CC10E82044A3C60003C045 /* Build configuration list for PBXProject "Runner" */;
			compatibilityVersion = "Xcode 9.3";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = 33CC10E42044A3C60003C045;
			productRefGroup = 33CC10EE2044A3C60003C045 /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				33CC10EC2044A3C60003C045 /* Runner */,
				331C80D4294CF70F00263BE5 /* RunnerTests */,
				33CC111A2044C6BA0003C045 /* Flutter Assemble */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		331C80D3294CF70F00263BE5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10EB2044A3C60003C045 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				33CC10F32044A3C60003C045 /* Assets.xcassets in Resources */,
				33CC10F62044A3C60003C045 /* MainMenu.xib in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXShellScriptBuildPhase section */
		3399D490228B24CF009A79C7 /* ShellScript */ = {
			isa = PBXShellScriptBuildPhase;
			alwaysOutOfDate = 1;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
			);
			inputPaths = (
			);
			outputFileListPaths = (
			);
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "echo \"$PRODUCT_NAME.app\" > \"$PROJECT_DIR\"/Flutter/ephemeral/.app_filename && \"$FLUTTER_ROOT\"/packages/flutter_tools/bin/macos_assemble.sh embed\n";
		};
		33CC111E2044C6BF0003C045 /* ShellScript */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
				Flutter/ephemeral/FlutterInputs.xcfilelist,
			);
			inputPaths = (
				Flutter/ephemeral/tripwire,
			);
			outputFileListPaths = (
				Flutter/ephemeral/FlutterOutputs.xcfilelist,
			);
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "\"$FLUTTER_ROOT\"/packages/flutter_tools/bin/macos_assemble.sh && touch Flutter/ephemeral/tripwire";
		};
		736059A98E6FCBCF66678C71 /* [CP] Embed Pods Frameworks */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
				"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-frameworks-${CONFIGURATION}-input-files.xcfilelist",
			);
			name = "[CP] Embed Pods Frameworks";
			outputFileListPaths = (
				"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-frameworks-${CONFIGURATION}-output-files.xcfilelist",
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "\"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-frameworks.sh\"\n";
			showEnvVarsInLog = 0;
		};
		82A3EDCE842FB1EFBCBBAD9F /* [CP] Check Pods Manifest.lock */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
			);
			inputPaths = (
				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
				"${PODS_ROOT}/Manifest.lock",
			);
			name = "[CP] Check Pods Manifest.lock";
			outputFileListPaths = (
			);
			outputPaths = (
				"$(DERIVED_FILE_DIR)/Pods-RunnerTests-checkManifestLockResult.txt",
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
			showEnvVarsInLog = 0;
		};
		9382924FF32AC828037E7DB5 /* [CP] Check Pods Manifest.lock */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
			);
			inputPaths = (
				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
				"${PODS_ROOT}/Manifest.lock",
			);
			name = "[CP] Check Pods Manifest.lock";
			outputFileListPaths = (
			);
			outputPaths = (
				"$(DERIVED_FILE_DIR)/Pods-Runner-checkManifestLockResult.txt",
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
			showEnvVarsInLog = 0;
		};
/* End PBXShellScriptBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		331C80D1294CF70F00263BE5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				331C80D8294CF71000263BE5 /* RunnerTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10E92044A3C60003C045 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				33CC11132044BFA00003C045 /* MainFlutterWindow.swift in Sources */,
				33CC10F12044A3C60003C045 /* AppDelegate.swift in Sources */,
				335BBD1B22A9A15E00E9071D /* GeneratedPluginRegistrant.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
		331C80DA294CF71000263BE5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = 33CC10EC2044A3C60003C045 /* Runner */;
			targetProxy = 331C80D9294CF71000263BE5 /* PBXContainerItemProxy */;
		};
		33CC11202044C79F0003C045 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = 33CC111A2044C6BA0003C045 /* Flutter Assemble */;
			targetProxy = 33CC111F2044C79F0003C045 /* PBXContainerItemProxy */;
		};
/* End PBXTargetDependency section */

/* Begin PBXVariantGroup section */
		33CC10F42044A3C60003C045 /* MainMenu.xib */ = {
			isa = PBXVariantGroup;
			children = (
				33CC10F52044A3C60003C045 /* Base */,
			);
			name = MainMenu.xib;
			path = Runner;
			sourceTree = "<group>";
		};
/* End PBXVariantGroup section */

/* Begin XCBuildConfiguration section */
		331C80DB294CF71000263BE5 /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = D0400B19E48718CF50379B60 /* Pods-RunnerTests.debug.xcconfig */;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.example.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/streaming_asr.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/streaming_asr";
			};
			name = Debug;
		};
		331C80DC294CF71000263BE5 /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = FE1C524AA6E87A1F323D2F64 /* Pods-RunnerTests.release.xcconfig */;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.example.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/streaming_asr.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/streaming_asr";
			};
			name = Release;
		};
		331C80DD294CF71000263BE5 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = E0928E31BD7FB7421B509154 /* Pods-RunnerTests.profile.xcconfig */;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.example.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/streaming_asr.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/streaming_asr";
			};
			name = Profile;
		};
		338D0CE9231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.15;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = macosx;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
			};
			name = Profile;
		};
		338D0CEA231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/DebugProfile.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_VERSION = 5.0;
			};
			name = Profile;
		};
		338D0CEB231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Manual;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Profile;
		};
		33CC10F92044A3C60003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 9740EEB21CF90195004384FC /* Debug.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.15;
				MTL_ENABLE_DEBUG_INFO = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = macosx;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		33CC10FA2044A3C60003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.15;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = macosx;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
			};
			name = Release;
		};
		33CC10FC2044A3C60003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/DebugProfile.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
				SWIFT_VERSION = 5.0;
			};
			name = Debug;
		};
		33CC10FD2044A3C60003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/Release.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_VERSION = 5.0;
			};
			name = Release;
		};
		33CC111C2044C6BA0003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Manual;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
		};
		33CC111D2044C6BA0003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Automatic;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		331C80DE294CF71000263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				331C80DB294CF71000263BE5 /* Debug */,
				331C80DC294CF71000263BE5 /* Release */,
				331C80DD294CF71000263BE5 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC10E82044A3C60003C045 /* Build configuration list for PBXProject "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC10F92044A3C60003C045 /* Debug */,
				33CC10FA2044A3C60003C045 /* Release */,
				338D0CE9231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC10FB2044A3C60003C045 /* Build configuration list for PBXNativeTarget "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC10FC2044A3C60003C045 /* Debug */,
				33CC10FD2044A3C60003C045 /* Release */,
				338D0CEA231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC111B2044C6BA0003C045 /* Build configuration list for PBXAggregateTarget "Flutter Assemble" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC111C2044C6BA0003C045 /* Debug */,
				33CC111D2044C6BA0003C045 /* Release */,
				338D0CEB231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 33CC10E52044A3C60003C045 /* Project object */;
}


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner.xcodeproj/xcshareddata/xcschemes/Runner.xcscheme
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1510"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
      buildImplicitDependencies = "YES">
      <BuildActionEntries>
         <BuildActionEntry
            buildForTesting = "YES"
            buildForRunning = "YES"
            buildForProfiling = "YES"
            buildForArchiving = "YES"
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "33CC10EC2044A3C60003C045"
               BuildableName = "non_streaming_vad_asr.app"
               BlueprintName = "Runner"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
   </BuildAction>
   <TestAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      shouldUseLaunchSchemeArgsEnv = "YES">
      <MacroExpansion>
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "streaming_asr.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </MacroExpansion>
      <Testables>
         <TestableReference
            skipped = "NO"
            parallelizable = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "331C80D4294CF70F00263BE5"
               BuildableName = "RunnerTests.xctest"
               BlueprintName = "RunnerTests"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </TestableReference>
      </Testables>
   </TestAction>
   <LaunchAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      launchStyle = "0"
      useCustomWorkingDirectory = "NO"
      ignoresPersistentStateOnLaunch = "NO"
      debugDocumentVersioning = "YES"
      debugServiceExtension = "internal"
      enableGPUValidationMode = "1"
      allowLocationSimulation = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "streaming_asr.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </LaunchAction>
   <ProfileAction
      buildConfiguration = "Profile"
      shouldUseLaunchSchemeArgsEnv = "YES"
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      debugDocumentVersioning = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "streaming_asr.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
   </AnalyzeAction>
   <ArchiveAction
      buildConfiguration = "Release"
      revealArchiveInOrganizer = "YES">
   </ArchiveAction>
</Scheme>


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "group:Runner.xcodeproj">
   </FileRef>
   <FileRef
      location = "group:Pods/Pods.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/Runner.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/non_streaming_vad_asr/macos/RunnerTests/RunnerTests.swift
================================================
import Cocoa
import FlutterMacOS
import XCTest

class RunnerTests: XCTestCase {

  func testExample() {
    // If you add code to the Runner application, consider adding tests here.
    // See https://developer.apple.com/documentation/xctest for more information about using XCTest.
  }

}


================================================
FILE: flutter-examples/non_streaming_vad_asr/pubspec.yaml
================================================
name: non_streaming_vad_asr

description: >
  This example shows how to implement "real-time" speech recognition using sherpa-onnx via non_streaming and vad.

publish_to: 'none'

version: 1.12.31

topics:
  - speech-recognition

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/sherpa-onnx/flutter

environment:
  sdk: ">=2.17.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  flutter:
    sdk: flutter

  cupertino_icons: ^1.0.6

  path_provider: ^2.1.3
  path: ^1.9.0

  # Note: record does not support Linux for streaming ASR
  record: 6.0.0
  url_launcher: ^6.2.6

  sherpa_onnx: ^1.12.31
  # sherpa_onnx:
  #   path: ../../flutter/sherpa_onnx

dev_dependencies:
  flutter_test:
    sdk: flutter

  flutter_lints: ^3.0.0

flutter:
  uses-material-design: true

  assets:
    - assets/
    #- assets/whisper/
    #- assets/senseVoice/
    - assets/nemo_transducer/
    # - assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/


================================================
FILE: flutter-examples/streaming_asr/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
**/doc/api/
**/ios/Flutter/.last_build_id
.dart_tool/
.flutter-plugins
.flutter-plugins-dependencies
.pub-cache/
.pub/
/build/

# Symbolication related
app.*.symbols

# Obfuscation related
app.*.map.json

# Android Studio will place build artifacts here
/android/app/debug
/android/app/profile
/android/app/release


================================================
FILE: flutter-examples/streaming_asr/.metadata
================================================
# This file tracks properties of this Flutter project.
# Used by Flutter tool to assess capabilities and perform upgrades etc.
#
# This file should be version controlled and should not be manually edited.

version:
  revision: "9f455d2486bcb28cad87b062475f42edc959f636"
  channel: "stable"

project_type: app

# Tracks metadata for the flutter migrate command
migration:
  platforms:
    - platform: root
      create_revision: 9f455d2486bcb28cad87b062475f42edc959f636
      base_revision: 9f455d2486bcb28cad87b062475f42edc959f636
    - platform: linux
      create_revision: 9f455d2486bcb28cad87b062475f42edc959f636
      base_revision: 9f455d2486bcb28cad87b062475f42edc959f636

  # User provided section

  # List of Local paths (relative to this file) that should be
  # ignored by the migrate tool.
  #
  # Files that are not part of the templates will be ignored by default.
  unmanaged_files:
    - 'lib/main.dart'
    - 'ios/Runner.xcodeproj/project.pbxproj'


================================================
FILE: flutter-examples/streaming_asr/README.md
================================================
# Real-time speech recognition

This APP supports the following platforms:

  - Windows
  - macOS
  - Linux
  - Android
  - iOS

Pre-built APPs for this folder can be found at <https://k2-fsa.github.io/sherpa/onnx/flutter/pre-built-app.html#streaming-speech-recognition-stt-asr>

See also <https://github.com/Jason-chen-coder/Flutter-EasySpeechRecognition>

## Getting Started

Remember to use the following steps to download a model. Otherwise, you would
get errors after you start and run the app.

###  1. Select a streaming model

Please visit <https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models>
to download a streaming ASR model.

You can find introductions about each streaming model at
<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html>


Note: `Streaming` is the same as `Online` in this context.

### 2. Let the code know which model you are using

We have pre-configured some streaming models in the following file

<https://github.com/k2-fsa/sherpa-onnx/blob/master/flutter-examples/streaming_asr/lib/online_model.dart>

If you select a model that is not in the above file, please add it to the above file
by yourself by following how existing models are added.

Then you need to update

<https://github.com/k2-fsa/sherpa-onnx/blob/master/flutter-examples/streaming_asr/lib/streaming_asr.dart#L16>

```
final type = 0;
```

Please change ``type`` accordingly.

You also need to change [./pubspec.yaml](./pubspec.yaml) so that your APP knows where to find it.
Please see the example below for how to do that.

### 3. Place your downloaded model inside the directory assets

The downloaded model has to be placed in the [assets](./assets) directory.

**HINT**: Please delete files that are not needed by the code. Otherwise, you put
unnecessary files in your APP and it will significantly increase the size of your APP.

## Example

Suppose you have selected the following model

<https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2>

Please use the following steps to make it available in your APP.

 - 1. Change [online_model.dart](./lib/online_model.dart)

    This model is already in the file and its type is `0`, so there is no need to change this file.

 - 2. Change [streaming_asr.dart](./lib/streaming_asr.dart)

    The default value for `type` is 0 and our model has also a type of `0`, so there is no need to change this file.

 - 3. Change [pubspec.yaml](./pubspec.yaml)

   At the end of [pubspec.yaml](./pubspec.yaml), please change it exactly like below:

```
  assets:
    - assets/
    - assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
```

  - 4. Download the model to the [./assets](./assets) directory.

```
cd assets
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

# Remeber to remove unused files.
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/README.md
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/bpe*
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx
```

Your [assets](./assets) directory should look like below at the end.

```
assets/
└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
    ├── decoder-epoch-99-avg-1.onnx
    ├── encoder-epoch-99-avg-1.int8.onnx
    ├── joiner-epoch-99-avg-1.onnx
    └── tokens.txt

1 directory, 4 files
```

  - 5. Run it!

    For instance

      - `flutter run -d macos` for macOS.

      - `flutter run -d windows` for windows.


================================================
FILE: flutter-examples/streaming_asr/analysis_options.yaml
================================================
# This file configures the analyzer, which statically analyzes Dart code to
# check for errors, warnings, and lints.
#
# The issues identified by the analyzer are surfaced in the UI of Dart-enabled
# IDEs (https://dart.dev/tools#ides-and-editors). The analyzer can also be
# invoked from the command line by running `flutter analyze`.

# The following line activates a set of recommended lints for Flutter apps,
# packages, and plugins designed to encourage good coding practices.
include: package:flutter_lints/flutter.yaml

linter:
  # The lint rules applied to this project can be customized in the
  # section below to disable rules from the `package:flutter_lints/flutter.yaml`
  # included above or to enable additional rules. A list of all available lints
  # and their documentation is published at https://dart.dev/lints.
  #
  # Instead of disabling a lint rule for the entire project in the
  # section below, it can also be suppressed for a single line of code
  # or a specific dart file by using the `// ignore: name_of_lint` and
  # `// ignore_for_file: name_of_lint` syntax on the line or in the file
  # producing the lint.
  rules:
    # avoid_print: false  # Uncomment to disable the `avoid_print` rule
    # prefer_single_quotes: true  # Uncomment to enable the `prefer_single_quotes` rule

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter-examples/streaming_asr/android/.gitignore
================================================
gradle-wrapper.jar
/.gradle
/captures/
/gradlew
/gradlew.bat
/local.properties
GeneratedPluginRegistrant.java

# Remember to never publicly share your keystore.
# See https://flutter.dev/docs/deployment/android#reference-the-keystore-from-the-app
key.properties
**/*.keystore
**/*.jks


================================================
FILE: flutter-examples/streaming_asr/android/app/build.gradle
================================================
plugins {
    id "com.android.application"
    id "kotlin-android"
    // The Flutter Gradle Plugin must be applied after the Android and Kotlin Gradle plugins.
    id "dev.flutter.flutter-gradle-plugin"
}

def localProperties = new Properties()
def localPropertiesFile = rootProject.file("local.properties")
if (localPropertiesFile.exists()) {
    localPropertiesFile.withReader("UTF-8") { reader ->
        localProperties.load(reader)
    }
}

def flutterVersionCode = localProperties.getProperty("flutter.versionCode")
if (flutterVersionCode == null) {
    flutterVersionCode = "1"
}

def flutterVersionName = localProperties.getProperty("flutter.versionName")
if (flutterVersionName == null) {
    flutterVersionName = "1.0"
}

android {
    namespace = "com.k2fsa.sherpa.onnx.streaming_asr"
    compileSdk = 36
    ndkVersion = "27.0.12077973"

    compileOptions {
        sourceCompatibility = JavaVersion.toVersion(17)
        targetCompatibility = JavaVersion.toVersion(17)
    }

    kotlinOptions {
        jvmTarget = '17'
    }

    java {
        toolchain {
            languageVersion = JavaLanguageVersion.of(17)
        }
    }

    defaultConfig {
        // TODO: Specify your own unique Application ID (https://developer.android.com/studio/build/application-id.html).
        applicationId = "com.k2fsa.sherpa.onnx.streaming_asr"
        // You can update the following values to match your application needs.
        // For more information, see: https://docs.flutter.dev/deployment/android#reviewing-the-gradle-build-configuration.
        minSdk = 23
        targetSdk = 36
        versionCode = flutterVersionCode.toInteger()
        versionName = flutterVersionName
    }

    buildTypes {
        release {
            // TODO: Add your own signing config for the release build.
            // Signing with the debug keys for now, so `flutter run --release` works.
            signingConfig = signingConfigs.debug
        }
    }
}

flutter {
    source = "../.."
}


================================================
FILE: flutter-examples/streaming_asr/android/app/src/debug/AndroidManifest.xml
================================================
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
    <!-- The INTERNET permission is required for development. Specifically,
         the Flutter tool needs it to communicate with the running application
         to allow setting breakpoints, to provide hot reload, etc.
    -->
    <uses-permission android:name="android.permission.INTERNET"/>
</manifest>


================================================
FILE: flutter-examples/streaming_asr/android/app/src/main/AndroidManifest.xml
================================================
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
    <uses-permission android:name="android.permission.RECORD_AUDIO" />
    <!-- Optional: Add this permission if you want to use bluetooth telephony device like headset/earbuds -->
    <uses-permission android:name="android.permission.MODIFY_AUDIO_SETTINGS" />
    <!-- Optional: Add this permission if you want to save your recordings in public folders -->
    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />

    <application
        android:label="streaming_asr"
        android:name="${applicationName}"
        android:icon="@mipmap/ic_launcher">
        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:launchMode="singleTop"
            android:taskAffinity=""
            android:theme="@style/LaunchTheme"
            android:configChanges="orientation|keyboardHidden|keyboard|screenSize|smallestScreenSize|locale|layoutDirection|fontScale|screenLayout|density|uiMode"
            android:hardwareAccelerated="true"
            android:windowSoftInputMode="adjustResize">
            <!-- Specifies an Android theme to apply to this Activity as soon as
                 the Android process has started. This theme is visible to the user
                 while the Flutter UI initializes. After that, this theme continues
                 to determine the Window background behind the Flutter UI. -->
            <meta-data
              android:name="io.flutter.embedding.android.NormalTheme"
              android:resource="@style/NormalTheme"
              />
            <intent-filter>
                <action android:name="android.intent.action.MAIN"/>
                <category android:name="android.intent.category.LAUNCHER"/>
            </intent-filter>
        </activity>
        <!-- Don't delete the meta-data below.
             This is used by the Flutter tool to generate GeneratedPluginRegistrant.java -->
        <meta-data
            android:name="flutterEmbedding"
            android:value="2" />
    </application>
    <!-- Required to query activities that can process text, see:
         https://developer.android.com/training/package-visibility and
         https://developer.android.com/reference/android/content/Intent#ACTION_PROCESS_TEXT.

         In particular, this is used by the Flutter engine in io.flutter.plugin.text.ProcessTextPlugin. -->
    <queries>
        <intent>
            <action android:name="android.intent.action.PROCESS_TEXT"/>
            <data android:mimeType="text/plain"/>
        </intent>
    </queries>
</manifest>


================================================
FILE: flutter-examples/streaming_asr/android/app/src/main/kotlin/com/k2fsa/sherpa/onnx/streaming_asr/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.streaming_asr

import io.flutter.embedding.android.FlutterActivity

class MainActivity: FlutterActivity()


================================================
FILE: flutter-examples/streaming_asr/android/app/src/main/res/drawable/launch_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<!-- Modify this file to customize your launch splash screen -->
<layer-list xmlns:android="http://schemas.android.com/apk/res/android">
    <item android:drawable="@android:color/white" />

    <!-- You can insert your own image assets here -->
    <!-- <item>
        <bitmap
            android:gravity="center"
            android:src="@mipmap/launch_image" />
    </item> -->
</layer-list>


================================================
FILE: flutter-examples/streaming_asr/android/app/src/main/res/drawable-v21/launch_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<!-- Modify this file to customize your launch splash screen -->
<layer-list xmlns:android="http://schemas.android.com/apk/res/android">
    <item android:drawable="?android:colorBackground" />

    <!-- You can insert your own image assets here -->
    <!-- <item>
        <bitmap
            android:gravity="center"
            android:src="@mipmap/launch_image" />
    </item> -->
</layer-list>


================================================
FILE: flutter-examples/streaming_asr/android/app/src/main/res/values/styles.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <!-- Theme applied to the Android Window while the process is starting when the OS's Dark Mode setting is off -->
    <style name="LaunchTheme" parent="@android:style/Theme.Light.NoTitleBar">
        <!-- Show a splash screen on the activity. Automatically removed when
             the Flutter engine draws its first frame -->
        <item name="android:windowBackground">@drawable/launch_background</item>
    </style>
    <!-- Theme applied to the Android Window as soon as the process has started.
         This theme determines the color of the Android Window while your
         Flutter UI initializes, as well as behind your Flutter UI while its
         running.

         This Theme is only used starting with V2 of Flutter's Android embedding. -->
    <style name="NormalTheme" parent="@android:style/Theme.Light.NoTitleBar">
        <item name="android:windowBackground">?android:colorBackground</item>
    </style>
</resources>


================================================
FILE: flutter-examples/streaming_asr/android/app/src/main/res/values-night/styles.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <!-- Theme applied to the Android Window while the process is starting when the OS's Dark Mode setting is on -->
    <style name="LaunchTheme" parent="@android:style/Theme.Black.NoTitleBar">
        <!-- Show a splash screen on the activity. Automatically removed when
             the Flutter engine draws its first frame -->
        <item name="android:windowBackground">@drawable/launch_background</item>
    </style>
    <!-- Theme applied to the Android Window as soon as the process has started.
         This theme determines the color of the Android Window while your
         Flutter UI initializes, as well as behind your Flutter UI while its
         running.

         This Theme is only used starting with V2 of Flutter's Android embedding. -->
    <style name="NormalTheme" parent="@android:style/Theme.Black.NoTitleBar">
        <item name="android:windowBackground">?android:colorBackground</item>
    </style>
</resources>


================================================
FILE: flutter-examples/streaming_asr/android/app/src/profile/AndroidManifest.xml
================================================
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
    <!-- The INTERNET permission is required for development. Specifically,
         the Flutter tool needs it to communicate with the running application
         to allow setting breakpoints, to provide hot reload, etc.
    -->
    <uses-permission android:name="android.permission.INTERNET"/>
</manifest>


================================================
FILE: flutter-examples/streaming_asr/android/build.gradle
================================================
allprojects {
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.buildDir = "../build"
subprojects {
    project.buildDir = "${rootProject.buildDir}/${project.name}"
}
subprojects {
    project.evaluationDependsOn(":app")
}

tasks.register("clean", Delete) {
    delete rootProject.buildDir
}


================================================
FILE: flutter-examples/streaming_asr/android/gradle/wrapper/gradle-wrapper.properties
================================================
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.11.1-all.zip


================================================
FILE: flutter-examples/streaming_asr/android/gradle.properties
================================================
org.gradle.jvmargs=-Xmx4G -XX:+HeapDumpOnOutOfMemoryError
android.useAndroidX=true
android.enableJetifier=true


================================================
FILE: flutter-examples/streaming_asr/android/settings.gradle
================================================
pluginManagement {
    def flutterSdkPath = {
        def properties = new Properties()
        file("local.properties").withInputStream { properties.load(it) }
        def flutterSdkPath = properties.getProperty("flutter.sdk")
        assert flutterSdkPath != null, "flutter.sdk not set in local.properties"
        return flutterSdkPath
    }()

    includeBuild("$flutterSdkPath/packages/flutter_tools/gradle")

    repositories {
        google()
        mavenCentral()
        gradlePluginPortal()
    }
}

plugins {
    id "dev.flutter.flutter-plugin-loader" version "1.0.0"
    id "com.android.application" version "8.9.1" apply false
    id "org.jetbrains.kotlin.android" version "1.9.24" apply false
}

include ":app"


================================================
FILE: flutter-examples/streaming_asr/assets/.gitignore
================================================


================================================
FILE: flutter-examples/streaming_asr/ios/.gitignore
================================================
**/dgph
*.mode1v3
*.mode2v3
*.moved-aside
*.pbxuser
*.perspectivev3
**/*sync/
.sconsign.dblite
.tags*
**/.vagrant/
**/DerivedData/
Icon?
**/Pods/
**/.symlinks/
profile
xcuserdata
**/.generated/
Flutter/App.framework
Flutter/Flutter.framework
Flutter/Flutter.podspec
Flutter/Generated.xcconfig
Flutter/ephemeral/
Flutter/app.flx
Flutter/app.zip
Flutter/flutter_assets/
Flutter/flutter_export_environment.sh
ServiceDefinitions.json
Runner/GeneratedPluginRegistrant.*

# Exceptions to above rules.
!default.mode1v3
!default.mode2v3
!default.pbxuser
!default.perspectivev3


================================================
FILE: flutter-examples/streaming_asr/ios/Flutter/AppFrameworkInfo.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
  <key>CFBundleDevelopmentRegion</key>
  <string>en</string>
  <key>CFBundleExecutable</key>
  <string>App</string>
  <key>CFBundleIdentifier</key>
  <string>io.flutter.flutter.app</string>
  <key>CFBundleInfoDictionaryVersion</key>
  <string>6.0</string>
  <key>CFBundleName</key>
  <string>App</string>
  <key>CFBundlePackageType</key>
  <string>FMWK</string>
  <key>CFBundleShortVersionString</key>
  <string>1.0</string>
  <key>CFBundleSignature</key>
  <string>????</string>
  <key>CFBundleVersion</key>
  <string>1.0</string>
  <key>MinimumOSVersion</key>
  <string>12.0</string>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/ios/Flutter/Debug.xcconfig
================================================
#include? "Pods/Target Support Files/Pods-Runner/Pods-Runner.debug.xcconfig"
#include "Generated.xcconfig"


================================================
FILE: flutter-examples/streaming_asr/ios/Flutter/Release.xcconfig
================================================
#include? "Pods/Target Support Files/Pods-Runner/Pods-Runner.release.xcconfig"
#include "Generated.xcconfig"


================================================
FILE: flutter-examples/streaming_asr/ios/Runner/AppDelegate.swift
================================================
import Flutter
import UIKit

@UIApplicationMain
@objc class AppDelegate: FlutterAppDelegate {
  override func application(
    _ application: UIApplication,
    didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?
  ) -> Bool {
    GeneratedPluginRegistrant.register(with: self)
    return super.application(application, didFinishLaunchingWithOptions: launchOptions)
  }
}


================================================
FILE: flutter-examples/streaming_asr/ios/Runner/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "size" : "20x20",
      "idiom" : "iphone",
      "filename" : "Icon-App-20x20@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "20x20",
      "idiom" : "iphone",
      "filename" : "Icon-App-20x20@3x.png",
      "scale" : "3x"
    },
    {
      "size" : "29x29",
      "idiom" : "iphone",
      "filename" : "Icon-App-29x29@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "29x29",
      "idiom" : "iphone",
      "filename" : "Icon-App-29x29@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "29x29",
      "idiom" : "iphone",
      "filename" : "Icon-App-29x29@3x.png",
      "scale" : "3x"
    },
    {
      "size" : "40x40",
      "idiom" : "iphone",
      "filename" : "Icon-App-40x40@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "40x40",
      "idiom" : "iphone",
      "filename" : "Icon-App-40x40@3x.png",
      "scale" : "3x"
    },
    {
      "size" : "60x60",
      "idiom" : "iphone",
      "filename" : "Icon-App-60x60@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "60x60",
      "idiom" : "iphone",
      "filename" : "Icon-App-60x60@3x.png",
      "scale" : "3x"
    },
    {
      "size" : "20x20",
      "idiom" : "ipad",
      "filename" : "Icon-App-20x20@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "20x20",
      "idiom" : "ipad",
      "filename" : "Icon-App-20x20@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "29x29",
      "idiom" : "ipad",
      "filename" : "Icon-App-29x29@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "29x29",
      "idiom" : "ipad",
      "filename" : "Icon-App-29x29@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "40x40",
      "idiom" : "ipad",
      "filename" : "Icon-App-40x40@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "40x40",
      "idiom" : "ipad",
      "filename" : "Icon-App-40x40@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "76x76",
      "idiom" : "ipad",
      "filename" : "Icon-App-76x76@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "76x76",
      "idiom" : "ipad",
      "filename" : "Icon-App-76x76@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "83.5x83.5",
      "idiom" : "ipad",
      "filename" : "Icon-App-83.5x83.5@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "1024x1024",
      "idiom" : "ios-marketing",
      "filename" : "Icon-App-1024x1024@1x.png",
      "scale" : "1x"
    }
  ],
  "info" : {
    "version" : 1,
    "author" : "xcode"
  }
}


================================================
FILE: flutter-examples/streaming_asr/ios/Runner/Assets.xcassets/LaunchImage.imageset/Contents.json
================================================
{
  "images" : [
    {
      "idiom" : "universal",
      "filename" : "LaunchImage.png",
      "scale" : "1x"
    },
    {
      "idiom" : "universal",
      "filename" : "LaunchImage@2x.png",
      "scale" : "2x"
    },
    {
      "idiom" : "universal",
      "filename" : "LaunchImage@3x.png",
      "scale" : "3x"
    }
  ],
  "info" : {
    "version" : 1,
    "author" : "xcode"
  }
}


================================================
FILE: flutter-examples/streaming_asr/ios/Runner/Assets.xcassets/LaunchImage.imageset/README.md
================================================
# Launch Screen Assets

You can customize the launch screen with your own desired assets by replacing the image files in this directory.

You can also do it by opening your Flutter project's Xcode project with `open ios/Runner.xcworkspace`, selecting `Runner/Assets.xcassets` in the Project Navigator and dropping in the desired images.

================================================
FILE: flutter-examples/streaming_asr/ios/Runner/Base.lproj/LaunchScreen.storyboard
================================================
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="12121" systemVersion="16G29" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
    <dependencies>
        <deployment identifier="iOS"/>
        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="12089"/>
    </dependencies>
    <scenes>
        <!--View Controller-->
        <scene sceneID="EHf-IW-A2E">
            <objects>
                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
                    <layoutGuides>
                        <viewControllerLayoutGuide type="top" id="Ydg-fD-yQy"/>
                        <viewControllerLayoutGuide type="bottom" id="xbc-2k-c8Z"/>
                    </layoutGuides>
                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                        <subviews>
                            <imageView opaque="NO" clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="center" image="LaunchImage" translatesAutoresizingMaskIntoConstraints="NO" id="YRO-k0-Ey4">
                            </imageView>
                        </subviews>
                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
                        <constraints>
                            <constraint firstItem="YRO-k0-Ey4" firstAttribute="centerX" secondItem="Ze5-6b-2t3" secondAttribute="centerX" id="1a2-6s-vTC"/>
                            <constraint firstItem="YRO-k0-Ey4" firstAttribute="centerY" secondItem="Ze5-6b-2t3" secondAttribute="centerY" id="4X2-HB-R7a"/>
                        </constraints>
                    </view>
                </viewController>
                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
            </objects>
            <point key="canvasLocation" x="53" y="375"/>
        </scene>
    </scenes>
    <resources>
        <image name="LaunchImage" width="168" height="185"/>
    </resources>
</document>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner/Base.lproj/Main.storyboard
================================================
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="10117" systemVersion="15F34" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" initialViewController="BYZ-38-t0r">
    <dependencies>
        <deployment identifier="iOS"/>
        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="10085"/>
    </dependencies>
    <scenes>
        <!--Flutter View Controller-->
        <scene sceneID="tne-QT-ifu">
            <objects>
                <viewController id="BYZ-38-t0r" customClass="FlutterViewController" sceneMemberID="viewController">
                    <layoutGuides>
                        <viewControllerLayoutGuide type="top" id="y3c-jy-aDJ"/>
                        <viewControllerLayoutGuide type="bottom" id="wfy-db-euE"/>
                    </layoutGuides>
                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
                        <rect key="frame" x="0.0" y="0.0" width="600" height="600"/>
                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                        <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="calibratedWhite"/>
                    </view>
                </viewController>
                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
            </objects>
        </scene>
    </scenes>
</document>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for recording speech</string>
	<key>CFBundleDevelopmentRegion</key>
	<string>$(DEVELOPMENT_LANGUAGE)</string>
	<key>CFBundleDisplayName</key>
	<string>Streaming Asr</string>
	<key>CFBundleExecutable</key>
	<string>$(EXECUTABLE_NAME)</string>
	<key>CFBundleIdentifier</key>
	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
	<key>CFBundleInfoDictionaryVersion</key>
	<string>6.0</string>
	<key>CFBundleName</key>
	<string>streaming_asr</string>
	<key>CFBundlePackageType</key>
	<string>APPL</string>
	<key>CFBundleShortVersionString</key>
	<string>$(FLUTTER_BUILD_NAME)</string>
	<key>CFBundleSignature</key>
	<string>????</string>
	<key>CFBundleVersion</key>
	<string>$(FLUTTER_BUILD_NUMBER)</string>
	<key>LSRequiresIPhoneOS</key>
	<true/>
	<key>UILaunchStoryboardName</key>
	<string>LaunchScreen</string>
	<key>UIMainStoryboardFile</key>
	<string>Main</string>
	<key>UISupportedInterfaceOrientations</key>
	<array>
		<string>UIInterfaceOrientationPortrait</string>
		<string>UIInterfaceOrientationLandscapeLeft</string>
		<string>UIInterfaceOrientationLandscapeRight</string>
	</array>
	<key>UISupportedInterfaceOrientations~ipad</key>
	<array>
		<string>UIInterfaceOrientationPortrait</string>
		<string>UIInterfaceOrientationPortraitUpsideDown</string>
		<string>UIInterfaceOrientationLandscapeLeft</string>
		<string>UIInterfaceOrientationLandscapeRight</string>
	</array>
	<key>CADisableMinimumFrameDurationOnPhone</key>
	<true/>
	<key>UIApplicationSupportsIndirectInputEvents</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner/Runner-Bridging-Header.h
================================================
#import "GeneratedPluginRegistrant.h"


================================================
FILE: flutter-examples/streaming_asr/ios/Runner.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 54;
	objects = {

/* Begin PBXBuildFile section */
		05D5EF72926AFE8B0BB8E849 /* Pods_Runner.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B422E1CC20F2C7BF721B8DEA /* Pods_Runner.framework */; };
		1498D2341E8E89220040F4C2 /* GeneratedPluginRegistrant.m in Sources */ = {isa = PBXBuildFile; fileRef = 1498D2331E8E89220040F4C2 /* GeneratedPluginRegistrant.m */; };
		331C808B294A63AB00263BE5 /* RunnerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 331C807B294A618700263BE5 /* RunnerTests.swift */; };
		3B3967161E833CAA004F5970 /* AppFrameworkInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = 3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */; };
		5A4BF2984B010F625045AEF9 /* Pods_RunnerTests.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = CD3E5A0B481F8C71365F9259 /* Pods_RunnerTests.framework */; };
		74858FAF1ED2DC5600515810 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74858FAE1ED2DC5600515810 /* AppDelegate.swift */; };
		97C146FC1CF9000F007C117D /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 97C146FA1CF9000F007C117D /* Main.storyboard */; };
		97C146FE1CF9000F007C117D /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 97C146FD1CF9000F007C117D /* Assets.xcassets */; };
		97C147011CF9000F007C117D /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 97C146FF1CF9000F007C117D /* LaunchScreen.storyboard */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
		331C8085294A63A400263BE5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = 97C146E61CF9000F007C117D /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = 97C146ED1CF9000F007C117D;
			remoteInfo = Runner;
		};
/* End PBXContainerItemProxy section */

/* Begin PBXCopyFilesBuildPhase section */
		9705A1C41CF9048500538489 /* Embed Frameworks */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = "";
			dstSubfolderSpec = 10;
			files = (
			);
			name = "Embed Frameworks";
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		0AE88D6BF022DF2B961162B1 /* Pods-RunnerTests.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-RunnerTests.debug.xcconfig"; path = "Target Support Files/Pods-RunnerTests/Pods-RunnerTests.debug.xcconfig"; sourceTree = "<group>"; };
		1498D2321E8E86230040F4C2 /* GeneratedPluginRegistrant.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GeneratedPluginRegistrant.h; sourceTree = "<group>"; };
		1498D2331E8E89220040F4C2 /* GeneratedPluginRegistrant.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GeneratedPluginRegistrant.m; sourceTree = "<group>"; };
		18DE41FC48D4E4A22BB8396E /* Pods-Runner.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Runner.release.xcconfig"; path = "Target Support Files/Pods-Runner/Pods-Runner.release.xcconfig"; sourceTree = "<group>"; };
		1FA6A3CB2526375DC4E7577F /* Pods-RunnerTests.profile.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-RunnerTests.profile.xcconfig"; path = "Target Support Files/Pods-RunnerTests/Pods-RunnerTests.profile.xcconfig"; sourceTree = "<group>"; };
		331C807B294A618700263BE5 /* RunnerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RunnerTests.swift; sourceTree = "<group>"; };
		331C8081294A63A400263BE5 /* RunnerTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = RunnerTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = AppFrameworkInfo.plist; path = Flutter/AppFrameworkInfo.plist; sourceTree = "<group>"; };
		74858FAD1ED2DC5600515810 /* Runner-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "Runner-Bridging-Header.h"; sourceTree = "<group>"; };
		74858FAE1ED2DC5600515810 /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
		7AFA3C8E1D35360C0083082E /* Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = Release.xcconfig; path = Flutter/Release.xcconfig; sourceTree = "<group>"; };
		9740EEB21CF90195004384FC /* Debug.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; name = Debug.xcconfig; path = Flutter/Debug.xcconfig; sourceTree = "<group>"; };
		9740EEB31CF90195004384FC /* Generated.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; name = Generated.xcconfig; path = Flutter/Generated.xcconfig; sourceTree = "<group>"; };
		97C146EE1CF9000F007C117D /* Runner.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Runner.app; sourceTree = BUILT_PRODUCTS_DIR; };
		97C146FB1CF9000F007C117D /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
		97C146FD1CF9000F007C117D /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
		97C147001CF9000F007C117D /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
		97C147021CF9000F007C117D /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
		B422E1CC20F2C7BF721B8DEA /* Pods_Runner.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_Runner.framework; sourceTree = BUILT_PRODUCTS_DIR; };
		CD3E5A0B481F8C71365F9259 /* Pods_RunnerTests.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_RunnerTests.framework; sourceTree = BUILT_PRODUCTS_DIR; };
		D39135D1BCA9F8B2E889A4A7 /* Pods-Runner.profile.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Runner.profile.xcconfig"; path = "Target Support Files/Pods-Runner/Pods-Runner.profile.xcconfig"; sourceTree = "<group>"; };
		ECE8263C82D7A5EDCDD523B1 /* Pods-RunnerTests.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-RunnerTests.release.xcconfig"; path = "Target Support Files/Pods-RunnerTests/Pods-RunnerTests.release.xcconfig"; sourceTree = "<group>"; };
		F2428E84328DFA24DFEF0A8B /* Pods-Runner.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Runner.debug.xcconfig"; path = "Target Support Files/Pods-Runner/Pods-Runner.debug.xcconfig"; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		370CDD7E022C5FF755B5EF47 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				5A4BF2984B010F625045AEF9 /* Pods_RunnerTests.framework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		97C146EB1CF9000F007C117D /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				05D5EF72926AFE8B0BB8E849 /* Pods_Runner.framework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		331C8082294A63A400263BE5 /* RunnerTests */ = {
			isa = PBXGroup;
			children = (
				331C807B294A618700263BE5 /* RunnerTests.swift */,
			);
			path = RunnerTests;
			sourceTree = "<group>";
		};
		50F577A9B451352B5312D8B8 /* Pods */ = {
			isa = PBXGroup;
			children = (
				F2428E84328DFA24DFEF0A8B /* Pods-Runner.debug.xcconfig */,
				18DE41FC48D4E4A22BB8396E /* Pods-Runner.release.xcconfig */,
				D39135D1BCA9F8B2E889A4A7 /* Pods-Runner.profile.xcconfig */,
				0AE88D6BF022DF2B961162B1 /* Pods-RunnerTests.debug.xcconfig */,
				ECE8263C82D7A5EDCDD523B1 /* Pods-RunnerTests.release.xcconfig */,
				1FA6A3CB2526375DC4E7577F /* Pods-RunnerTests.profile.xcconfig */,
			);
			name = Pods;
			path = Pods;
			sourceTree = "<group>";
		};
		9740EEB11CF90186004384FC /* Flutter */ = {
			isa = PBXGroup;
			children = (
				3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */,
				9740EEB21CF90195004384FC /* Debug.xcconfig */,
				7AFA3C8E1D35360C0083082E /* Release.xcconfig */,
				9740EEB31CF90195004384FC /* Generated.xcconfig */,
			);
			name = Flutter;
			sourceTree = "<group>";
		};
		97C146E51CF9000F007C117D = {
			isa = PBXGroup;
			children = (
				9740EEB11CF90186004384FC /* Flutter */,
				97C146F01CF9000F007C117D /* Runner */,
				97C146EF1CF9000F007C117D /* Products */,
				331C8082294A63A400263BE5 /* RunnerTests */,
				50F577A9B451352B5312D8B8 /* Pods */,
				D7A66A32065C41441BF0E0D3 /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		97C146EF1CF9000F007C117D /* Products */ = {
			isa = PBXGroup;
			children = (
				97C146EE1CF9000F007C117D /* Runner.app */,
				331C8081294A63A400263BE5 /* RunnerTests.xctest */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		97C146F01CF9000F007C117D /* Runner */ = {
			isa = PBXGroup;
			children = (
				97C146FA1CF9000F007C117D /* Main.storyboard */,
				97C146FD1CF9000F007C117D /* Assets.xcassets */,
				97C146FF1CF9000F007C117D /* LaunchScreen.storyboard */,
				97C147021CF9000F007C117D /* Info.plist */,
				1498D2321E8E86230040F4C2 /* GeneratedPluginRegistrant.h */,
				1498D2331E8E89220040F4C2 /* GeneratedPluginRegistrant.m */,
				74858FAE1ED2DC5600515810 /* AppDelegate.swift */,
				74858FAD1ED2DC5600515810 /* Runner-Bridging-Header.h */,
			);
			path = Runner;
			sourceTree = "<group>";
		};
		D7A66A32065C41441BF0E0D3 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
				B422E1CC20F2C7BF721B8DEA /* Pods_Runner.framework */,
				CD3E5A0B481F8C71365F9259 /* Pods_RunnerTests.framework */,
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		331C8080294A63A400263BE5 /* RunnerTests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 331C8087294A63A400263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */;
			buildPhases = (
				05C536716C891AD06C35ACE8 /* [CP] Check Pods Manifest.lock */,
				331C807D294A63A400263BE5 /* Sources */,
				331C807F294A63A400263BE5 /* Resources */,
				370CDD7E022C5FF755B5EF47 /* Frameworks */,
			);
			buildRules = (
			);
			dependencies = (
				331C8086294A63A400263BE5 /* PBXTargetDependency */,
			);
			name = RunnerTests;
			productName = RunnerTests;
			productReference = 331C8081294A63A400263BE5 /* RunnerTests.xctest */;
			productType = "com.apple.product-type.bundle.unit-test";
		};
		97C146ED1CF9000F007C117D /* Runner */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 97C147051CF9000F007C117D /* Build configuration list for PBXNativeTarget "Runner" */;
			buildPhases = (
				7BF04CD64B1097AB8C6E66EA /* [CP] Check Pods Manifest.lock */,
				9740EEB61CF901F6004384FC /* Run Script */,
				97C146EA1CF9000F007C117D /* Sources */,
				97C146EB1CF9000F007C117D /* Frameworks */,
				97C146EC1CF9000F007C117D /* Resources */,
				9705A1C41CF9048500538489 /* Embed Frameworks */,
				3B06AD1E1E4923F5004D2608 /* Thin Binary */,
				E862F7828A330E975EF6E1F9 /* [CP] Embed Pods Frameworks */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = Runner;
			productName = Runner;
			productReference = 97C146EE1CF9000F007C117D /* Runner.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		97C146E61CF9000F007C117D /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = YES;
				LastUpgradeCheck = 1510;
				ORGANIZATIONNAME = "";
				TargetAttributes = {
					331C8080294A63A400263BE5 = {
						CreatedOnToolsVersion = 14.0;
						TestTargetID = 97C146ED1CF9000F007C117D;
					};
					97C146ED1CF9000F007C117D = {
						CreatedOnToolsVersion = 7.3.1;
						LastSwiftMigration = 1100;
					};
				};
			};
			buildConfigurationList = 97C146E91CF9000F007C117D /* Build configuration list for PBXProject "Runner" */;
			compatibilityVersion = "Xcode 9.3";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = 97C146E51CF9000F007C117D;
			productRefGroup = 97C146EF1CF9000F007C117D /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				97C146ED1CF9000F007C117D /* Runner */,
				331C8080294A63A400263BE5 /* RunnerTests */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		331C807F294A63A400263BE5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		97C146EC1CF9000F007C117D /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				97C147011CF9000F007C117D /* LaunchScreen.storyboard in Resources */,
				3B3967161E833CAA004F5970 /* AppFrameworkInfo.plist in Resources */,
				97C146FE1CF9000F007C117D /* Assets.xcassets in Resources */,
				97C146FC1CF9000F007C117D /* Main.storyboard in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXShellScriptBuildPhase section */
		05C536716C891AD06C35ACE8 /* [CP] Check Pods Manifest.lock */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
			);
			inputPaths = (
				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
				"${PODS_ROOT}/Manifest.lock",
			);
			name = "[CP] Check Pods Manifest.lock";
			outputFileListPaths = (
			);
			outputPaths = (
				"$(DERIVED_FILE_DIR)/Pods-RunnerTests-checkManifestLockResult.txt",
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
			showEnvVarsInLog = 0;
		};
		3B06AD1E1E4923F5004D2608 /* Thin Binary */ = {
			isa = PBXShellScriptBuildPhase;
			alwaysOutOfDate = 1;
			buildActionMask = 2147483647;
			files = (
			);
			inputPaths = (
				"${TARGET_BUILD_DIR}/${INFOPLIST_PATH}",
			);
			name = "Thin Binary";
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "/bin/sh \"$FLUTTER_ROOT/packages/flutter_tools/bin/xcode_backend.sh\" embed_and_thin";
		};
		7BF04CD64B1097AB8C6E66EA /* [CP] Check Pods Manifest.lock */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
			);
			inputPaths = (
				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
				"${PODS_ROOT}/Manifest.lock",
			);
			name = "[CP] Check Pods Manifest.lock";
			outputFileListPaths = (
			);
			outputPaths = (
				"$(DERIVED_FILE_DIR)/Pods-Runner-checkManifestLockResult.txt",
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
			showEnvVarsInLog = 0;
		};
		9740EEB61CF901F6004384FC /* Run Script */ = {
			isa = PBXShellScriptBuildPhase;
			alwaysOutOfDate = 1;
			buildActionMask = 2147483647;
			files = (
			);
			inputPaths = (
			);
			name = "Run Script";
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "/bin/sh \"$FLUTTER_ROOT/packages/flutter_tools/bin/xcode_backend.sh\" build";
		};
		E862F7828A330E975EF6E1F9 /* [CP] Embed Pods Frameworks */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
				"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-frameworks-${CONFIGURATION}-input-files.xcfilelist",
			);
			name = "[CP] Embed Pods Frameworks";
			outputFileListPaths = (
				"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-frameworks-${CONFIGURATION}-output-files.xcfilelist",
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "\"${PODS_ROOT}/Target Support Files/Pods-Runner/Pods-Runner-frameworks.sh\"\n";
			showEnvVarsInLog = 0;
		};
/* End PBXShellScriptBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		331C807D294A63A400263BE5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				331C808B294A63AB00263BE5 /* RunnerTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		97C146EA1CF9000F007C117D /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				74858FAF1ED2DC5600515810 /* AppDelegate.swift in Sources */,
				1498D2341E8E89220040F4C2 /* GeneratedPluginRegistrant.m in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
		331C8086294A63A400263BE5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = 97C146ED1CF9000F007C117D /* Runner */;
			targetProxy = 331C8085294A63A400263BE5 /* PBXContainerItemProxy */;
		};
/* End PBXTargetDependency section */

/* Begin PBXVariantGroup section */
		97C146FA1CF9000F007C117D /* Main.storyboard */ = {
			isa = PBXVariantGroup;
			children = (
				97C146FB1CF9000F007C117D /* Base */,
			);
			name = Main.storyboard;
			sourceTree = "<group>";
		};
		97C146FF1CF9000F007C117D /* LaunchScreen.storyboard */ = {
			isa = PBXVariantGroup;
			children = (
				97C147001CF9000F007C117D /* Base */,
			);
			name = LaunchScreen.storyboard;
			sourceTree = "<group>";
		};
/* End PBXVariantGroup section */

/* Begin XCBuildConfiguration section */
		249021D3217E4FDB00AE95B9 /* Profile */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = iphoneos;
				SUPPORTED_PLATFORMS = iphoneos;
				TARGETED_DEVICE_FAMILY = "1,2";
				VALIDATE_PRODUCT = YES;
			};
			name = Profile;
		};
		249021D4217E4FDB00AE95B9 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
				DEVELOPMENT_TEAM = N5ZH3Z63A6;
				ENABLE_BITCODE = NO;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.streamingAsr;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				VERSIONING_SYSTEM = "apple-generic";
			};
			name = Profile;
		};
		331C8088294A63A400263BE5 /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 0AE88D6BF022DF2B961162B1 /* Pods-RunnerTests.debug.xcconfig */;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
			};
			name = Debug;
		};
		331C8089294A63A400263BE5 /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = ECE8263C82D7A5EDCDD523B1 /* Pods-RunnerTests.release.xcconfig */;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
			};
			name = Release;
		};
		331C808A294A63A400263BE5 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 1FA6A3CB2526375DC4E7577F /* Pods-RunnerTests.profile.xcconfig */;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
			};
			name = Profile;
		};
		97C147031CF9000F007C117D /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
				MTL_ENABLE_DEBUG_INFO = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = iphoneos;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		97C147041CF9000F007C117D /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = iphoneos;
				SUPPORTED_PLATFORMS = iphoneos;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
				TARGETED_DEVICE_FAMILY = "1,2";
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		97C147061CF9000F007C117D /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 9740EEB21CF90195004384FC /* Debug.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
				DEVELOPMENT_TEAM = N5ZH3Z63A6;
				ENABLE_BITCODE = NO;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.streamingAsr;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				VERSIONING_SYSTEM = "apple-generic";
			};
			name = Debug;
		};
		97C147071CF9000F007C117D /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
				DEVELOPMENT_TEAM = N5ZH3Z63A6;
				ENABLE_BITCODE = NO;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.streamingAsr;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				VERSIONING_SYSTEM = "apple-generic";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		331C8087294A63A400263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				331C8088294A63A400263BE5 /* Debug */,
				331C8089294A63A400263BE5 /* Release */,
				331C808A294A63A400263BE5 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		97C146E91CF9000F007C117D /* Build configuration list for PBXProject "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				97C147031CF9000F007C117D /* Debug */,
				97C147041CF9000F007C117D /* Release */,
				249021D3217E4FDB00AE95B9 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		97C147051CF9000F007C117D /* Build configuration list for PBXNativeTarget "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				97C147061CF9000F007C117D /* Debug */,
				97C147071CF9000F007C117D /* Release */,
				249021D4217E4FDB00AE95B9 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 97C146E61CF9000F007C117D /* Project object */;
}


================================================
FILE: flutter-examples/streaming_asr/ios/Runner.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:">
   </FileRef>
</Workspace>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner.xcodeproj/project.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>PreviewsEnabled</key>
	<false/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner.xcodeproj/xcshareddata/xcschemes/Runner.xcscheme
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1510"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
      buildImplicitDependencies = "YES">
      <BuildActionEntries>
         <BuildActionEntry
            buildForTesting = "YES"
            buildForRunning = "YES"
            buildForProfiling = "YES"
            buildForArchiving = "YES"
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "97C146ED1CF9000F007C117D"
               BuildableName = "Runner.app"
               BlueprintName = "Runner"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
   </BuildAction>
   <TestAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      shouldUseLaunchSchemeArgsEnv = "YES">
      <MacroExpansion>
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "97C146ED1CF9000F007C117D"
            BuildableName = "Runner.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </MacroExpansion>
      <Testables>
         <TestableReference
            skipped = "NO"
            parallelizable = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "331C8080294A63A400263BE5"
               BuildableName = "RunnerTests.xctest"
               BlueprintName = "RunnerTests"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </TestableReference>
      </Testables>
   </TestAction>
   <LaunchAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      launchStyle = "0"
      useCustomWorkingDirectory = "NO"
      ignoresPersistentStateOnLaunch = "NO"
      debugDocumentVersioning = "YES"
      debugServiceExtension = "internal"
      allowLocationSimulation = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "97C146ED1CF9000F007C117D"
            BuildableName = "Runner.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </LaunchAction>
   <ProfileAction
      buildConfiguration = "Profile"
      shouldUseLaunchSchemeArgsEnv = "YES"
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      debugDocumentVersioning = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "97C146ED1CF9000F007C117D"
            BuildableName = "Runner.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
   </AnalyzeAction>
   <ArchiveAction
      buildConfiguration = "Release"
      revealArchiveInOrganizer = "YES">
   </ArchiveAction>
</Scheme>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "group:Runner.xcodeproj">
   </FileRef>
   <FileRef
      location = "group:Pods/Pods.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/ios/Runner.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>PreviewsEnabled</key>
	<false/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/ios/RunnerTests/RunnerTests.swift
================================================
import Flutter
import UIKit
import XCTest

class RunnerTests: XCTestCase {

  func testExample() {
    // If you add code to the Runner application, consider adding tests here.
    // See https://developer.apple.com/documentation/xctest for more information about using XCTest.
  }

}


================================================
FILE: flutter-examples/streaming_asr/lib/info.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'package:flutter/material.dart';
import 'package:url_launcher/url_launcher.dart';

class InfoScreen extends StatelessWidget {
  @override
  Widget build(BuildContext context) {
    const double height = 20;
    return Container(
      child: Padding(
        padding: const EdgeInsets.all(8.0),
        child: Column(
          crossAxisAlignment: CrossAxisAlignment.start,
          children: <Widget>[
            Text('Everything is open-sourced.'),
            SizedBox(height: height),
            InkWell(
              child: Text('Code: https://github.com/k2-fsa/sherpa-onnx'),
              onTap: () => launch('https://k2-fsa.github.io/sherpa/onnx/'),
            ),
            SizedBox(height: height),
            InkWell(
              child: Text('Doc: https://k2-fsa.github.io/sherpa/onnx/'),
              onTap: () => launch('https://k2-fsa.github.io/sherpa/onnx/'),
            ),
            SizedBox(height: height),
            Text('QQ 群: 744602236'),
            SizedBox(height: height),
            InkWell(
              child: Text(
                  '微信群: https://k2-fsa.github.io/sherpa/social-groups.html'),
              onTap: () =>
                  launch('https://k2-fsa.github.io/sherpa/social-groups.html'),
            ),
          ],
        ),
      ),
    );
  }
}


================================================
FILE: flutter-examples/streaming_asr/lib/main.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'package:flutter/material.dart';

import './streaming_asr.dart';
import './info.dart';

void main() {
  runApp(const MyApp());
}

class MyApp extends StatelessWidget {
  const MyApp({super.key});

  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      title: 'Next-gen Kaldi flutter demo',
      theme: ThemeData(
        colorScheme: ColorScheme.fromSeed(seedColor: Colors.deepPurple),
        useMaterial3: true,
      ),
      home: const MyHomePage(title: 'Next-gen Kaldi with Flutter'),
    );
  }
}

class MyHomePage extends StatefulWidget {
  const MyHomePage({super.key, required this.title});

  final String title;

  @override
  State<MyHomePage> createState() => _MyHomePageState();
}

class _MyHomePageState extends State<MyHomePage> {
  int _currentIndex = 0;
  final List<Widget> _tabs = [
    StreamingAsrScreen(),
    InfoScreen(),
  ];
  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(
        title: Text(widget.title),
      ),
      body: _tabs[_currentIndex],
      bottomNavigationBar: BottomNavigationBar(
        currentIndex: _currentIndex,
        onTap: (int index) {
          setState(() {
            _currentIndex = index;
          });
        },
        items: [
          BottomNavigationBarItem(
            icon: Icon(Icons.home),
            label: 'Home',
          ),
          BottomNavigationBarItem(
            icon: Icon(Icons.info),
            label: 'Info',
          ),
        ],
      ),
    );
  }
}


================================================
FILE: flutter-examples/streaming_asr/lib/online_model.dart
================================================
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './utils.dart';

// Remember to change `assets` in ../pubspec.yaml
// and download files to ../assets
Future<sherpa_onnx.OnlineModelConfig> getOnlineModelConfig(
    {required int type}) async {
  switch (type) {
    case 0:
      final modelDir =
          'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20';
      return sherpa_onnx.OnlineModelConfig(
        transducer: sherpa_onnx.OnlineTransducerModelConfig(
          encoder:
              await copyAssetFile('$modelDir/encoder-epoch-99-avg-1.int8.onnx'),
          decoder: await copyAssetFile('$modelDir/decoder-epoch-99-avg-1.onnx'),
          joiner: await copyAssetFile('$modelDir/joiner-epoch-99-avg-1.onnx'),
        ),
        tokens: await copyAssetFile('$modelDir/tokens.txt'),
        modelType: 'zipformer',
      );
    case 1:
      final modelDir = 'assets/sherpa-onnx-streaming-zipformer-en-2023-06-26';
      return sherpa_onnx.OnlineModelConfig(
        transducer: sherpa_onnx.OnlineTransducerModelConfig(
          encoder: await copyAssetFile(
              '$modelDir/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx'),
          decoder: await copyAssetFile(
              '$modelDir/decoder-epoch-99-avg-1-chunk-16-left-128.onnx'),
          joiner: await copyAssetFile(
              '$modelDir/joiner-epoch-99-avg-1-chunk-16-left-128.onnx'),
        ),
        tokens: await copyAssetFile('$modelDir/tokens.txt'),
        modelType: 'zipformer2',
      );
    case 2:
      final modelDir =
          'assets/icefall-asr-zipformer-streaming-wenetspeech-20230615';
      return sherpa_onnx.OnlineModelConfig(
        transducer: sherpa_onnx.OnlineTransducerModelConfig(
          encoder: await copyAssetFile(
              '$modelDir/exp/encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx'),
          decoder: await copyAssetFile(
              '$modelDir/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx'),
          joiner: await copyAssetFile(
              '$modelDir/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx'),
        ),
        tokens: await copyAssetFile('$modelDir/data/lang_char/tokens.txt'),
        modelType: 'zipformer2',
      );
    case 3:
      final modelDir = 'assets/sherpa-onnx-streaming-zipformer-fr-2023-04-14';
      return sherpa_onnx.OnlineModelConfig(
        transducer: sherpa_onnx.OnlineTransducerModelConfig(
          encoder: await copyAssetFile(
              '$modelDir/encoder-epoch-29-avg-9-with-averaged-model.int8.onnx'),
          decoder: await copyAssetFile(
              '$modelDir/decoder-epoch-29-avg-9-with-averaged-model.onnx'),
          joiner: await copyAssetFile(
              '$modelDir/joiner-epoch-29-avg-9-with-averaged-model.onnx'),
        ),
        tokens: await copyAssetFile('$modelDir/tokens.txt'),
        modelType: 'zipformer',
      );
    default:
      throw ArgumentError('Unsupported type: $type');
  }
}


================================================
FILE: flutter-examples/streaming_asr/lib/streaming_asr.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:async';

import 'package:flutter/foundation.dart';
import 'package:flutter/material.dart';
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';
import 'package:record/record.dart';

import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './utils.dart';
import './online_model.dart';

Future<sherpa_onnx.OnlineRecognizer> createOnlineRecognizer() async {
  final type = 0;

  final modelConfig = await getOnlineModelConfig(type: type);
  final config = sherpa_onnx.OnlineRecognizerConfig(
    model: modelConfig,
    ruleFsts: '',
  );

  return sherpa_onnx.OnlineRecognizer(config);
}

class StreamingAsrScreen extends StatefulWidget {
  const StreamingAsrScreen({super.key});

  @override
  State<StreamingAsrScreen> createState() => _StreamingAsrScreenState();
}

class _StreamingAsrScreenState extends State<StreamingAsrScreen> {
  late final TextEditingController _controller;
  late final AudioRecorder _audioRecorder;

  String _title = 'Real-time speech recognition';
  String _last = '';
  int _index = 0;
  bool _isInitialized = false;

  sherpa_onnx.OnlineRecognizer? _recognizer;
  sherpa_onnx.OnlineStream? _stream;
  int _sampleRate = 16000;

  StreamSubscription<RecordState>? _recordSub;
  RecordState _recordState = RecordState.stop;

  @override
  void initState() {
    _audioRecorder = AudioRecorder();
    _controller = TextEditingController();

    _recordSub = _audioRecorder.onStateChanged().listen((recordState) {
      _updateRecordState(recordState);
    });

    super.initState();
  }

  Future<void> _start() async {
    if (!_isInitialized) {
      sherpa_onnx.initBindings();
      _recognizer = await createOnlineRecognizer();
      _stream = _recognizer?.createStream();

      _isInitialized = true;
    }

    try {
      if (await _audioRecorder.hasPermission()) {
        const encoder = AudioEncoder.pcm16bits;

        if (!await _isEncoderSupported(encoder)) {
          return;
        }

        final devs = await _audioRecorder.listInputDevices();
        debugPrint(devs.toString());

        const config = RecordConfig(
          encoder: encoder,
          sampleRate: 16000,
          numChannels: 1,
        );

        final stream = await _audioRecorder.startStream(config);

        stream.listen(
          (data) {
            final samplesFloat32 =
                convertBytesToFloat32(Uint8List.fromList(data));

            _stream!.acceptWaveform(
                samples: samplesFloat32, sampleRate: _sampleRate);
            while (_recognizer!.isReady(_stream!)) {
              _recognizer!.decode(_stream!);
            }
            final text = _recognizer!.getResult(_stream!).text;
            String textToDisplay = _last;
            if (text != '') {
              if (_last == '') {
                textToDisplay = '$_index: $text';
              } else {
                textToDisplay = '$_index: $text\n$_last';
              }
            }

            if (_recognizer!.isEndpoint(_stream!)) {
              _recognizer!.reset(_stream!);
              if (text != '') {
                _last = textToDisplay;
                _index += 1;
              }
            }
            // print('text: $textToDisplay');

            _controller.value = TextEditingValue(
              text: textToDisplay,
              selection: TextSelection.collapsed(offset: textToDisplay.length),
            );
          },
          onDone: () {
            print('stream stopped.');
          },
        );
      }
    } catch (e) {
      print(e);
    }
  }

  Future<void> _stop() async {
    _stream!.free();
    _stream = _recognizer!.createStream();

    await _audioRecorder.stop();
  }

  Future<void> _pause() => _audioRecorder.pause();

  Future<void> _resume() => _audioRecorder.resume();

  void _updateRecordState(RecordState recordState) {
    setState(() => _recordState = recordState);
  }

  Future<bool> _isEncoderSupported(AudioEncoder encoder) async {
    final isSupported = await _audioRecorder.isEncoderSupported(
      encoder,
    );

    if (!isSupported) {
      debugPrint('${encoder.name} is not supported on this platform.');
      debugPrint('Supported encoders are:');

      for (final e in AudioEncoder.values) {
        if (await _audioRecorder.isEncoderSupported(e)) {
          debugPrint('- ${encoder.name}');
        }
      }
    }

    return isSupported;
  }

  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      home: Scaffold(
        appBar: AppBar(
          title: Text(_title),
        ),
        body: Column(
          mainAxisAlignment: MainAxisAlignment.center,
          children: [
            const SizedBox(height: 50),
            TextField(
              maxLines: 5,
              controller: _controller,
              readOnly: true,
            ),
            const SizedBox(height: 50),
            Row(
              mainAxisAlignment: MainAxisAlignment.center,
              children: <Widget>[
                _buildRecordStopControl(),
                const SizedBox(width: 20),
                _buildText(),
              ],
            ),
          ],
        ),
      ),
    );
  }

  @override
  void dispose() {
    _recordSub?.cancel();
    _audioRecorder.dispose();
    _stream?.free();
    _recognizer?.free();
    super.dispose();
  }

  Widget _buildRecordStopControl() {
    late Icon icon;
    late Color color;

    if (_recordState != RecordState.stop) {
      icon = const Icon(Icons.stop, color: Colors.red, size: 30);
      color = Colors.red.withOpacity(0.1);
    } else {
      final theme = Theme.of(context);
      icon = Icon(Icons.mic, color: theme.primaryColor, size: 30);
      color = theme.primaryColor.withOpacity(0.1);
    }

    return ClipOval(
      child: Material(
        color: color,
        child: InkWell(
          child: SizedBox(width: 56, height: 56, child: icon),
          onTap: () {
            (_recordState != RecordState.stop) ? _stop() : _start();
          },
        ),
      ),
    );
  }

  Widget _buildText() {
    if (_recordState == RecordState.stop) {
      return const Text("Start");
    } else {
      return const Text("Stop");
    }
  }
}


================================================
FILE: flutter-examples/streaming_asr/lib/utils.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'package:path/path.dart';
import 'package:path_provider/path_provider.dart';
import 'package:flutter/services.dart' show rootBundle;
import 'dart:typed_data';
import "dart:io";

// Copy the asset file from src to dst
Future<String> copyAssetFile(String src, [String? dst]) async {
  final Directory directory = await getApplicationSupportDirectory();
  if (dst == null) {
    dst = basename(src);
  }
  final target = join(directory.path, dst);
  bool exists = await new File(target).exists();

  final data = await rootBundle.load(src);

  if (!exists || File(target).lengthSync() != data.lengthInBytes) {
    final List<int> bytes =
        data.buffer.asUint8List(data.offsetInBytes, data.lengthInBytes);
    await File(target).writeAsBytes(bytes);
  }

  return target;
}

Float32List convertBytesToFloat32(Uint8List bytes, [endian = Endian.little]) {
  final values = Float32List(bytes.length ~/ 2);

  final data = ByteData.view(bytes.buffer);

  for (var i = 0; i < bytes.length; i += 2) {
    int short = data.getInt16(i, endian);
    values[i ~/ 2] = short / 32768.0;
  }

  return values;
}


================================================
FILE: flutter-examples/streaming_asr/linux/.gitignore
================================================
flutter/ephemeral


================================================
FILE: flutter-examples/streaming_asr/linux/CMakeLists.txt
================================================
# Project-level configuration.
cmake_minimum_required(VERSION 3.10)
project(runner LANGUAGES CXX)

# The name of the executable created for the application. Change this to change
# the on-disk name of your application.
set(BINARY_NAME "streaming_asr")
# The unique GTK application identifier for this application. See:
# https://wiki.gnome.org/HowDoI/ChooseApplicationID
set(APPLICATION_ID "com.k2fsa.sherpa.onnx.streaming_asr")

# Explicitly opt in to modern CMake behaviors to avoid warnings with recent
# versions of CMake.
cmake_policy(SET CMP0063 NEW)

# Load bundled libraries from the lib/ directory relative to the binary.
set(CMAKE_INSTALL_RPATH "$ORIGIN/lib")

# Root filesystem for cross-building.
if(FLUTTER_TARGET_PLATFORM_SYSROOT)
  set(CMAKE_SYSROOT ${FLUTTER_TARGET_PLATFORM_SYSROOT})
  set(CMAKE_FIND_ROOT_PATH ${CMAKE_SYSROOT})
  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
endif()

# Define build configuration options.
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
  set(CMAKE_BUILD_TYPE "Debug" CACHE
    STRING "Flutter build mode" FORCE)
  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
    "Debug" "Profile" "Release")
endif()

# Compilation settings that should be applied to most targets.
#
# Be cautious about adding new options here, as plugins use this function by
# default. In most cases, you should add new options to specific targets instead
# of modifying this function.
function(APPLY_STANDARD_SETTINGS TARGET)
  target_compile_features(${TARGET} PUBLIC cxx_std_14)
  target_compile_options(${TARGET} PRIVATE -Wall -Werror)
  target_compile_options(${TARGET} PRIVATE "$<$<NOT:$<CONFIG:Debug>>:-O3>")
  target_compile_definitions(${TARGET} PRIVATE "$<$<NOT:$<CONFIG:Debug>>:NDEBUG>")
endfunction()

# Flutter library and tool build rules.
set(FLUTTER_MANAGED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/flutter")
add_subdirectory(${FLUTTER_MANAGED_DIR})

# System-level dependencies.
find_package(PkgConfig REQUIRED)
pkg_check_modules(GTK REQUIRED IMPORTED_TARGET gtk+-3.0)

add_definitions(-DAPPLICATION_ID="${APPLICATION_ID}")

# Define the application target. To change its name, change BINARY_NAME above,
# not the value here, or `flutter run` will no longer work.
#
# Any new source files that you add to the application should be added here.
add_executable(${BINARY_NAME}
  "main.cc"
  "my_application.cc"
  "${FLUTTER_MANAGED_DIR}/generated_plugin_registrant.cc"
)

# Apply the standard set of build settings. This can be removed for applications
# that need different build settings.
apply_standard_settings(${BINARY_NAME})

# Add dependency libraries. Add any application-specific dependencies here.
target_link_libraries(${BINARY_NAME} PRIVATE flutter)
target_link_libraries(${BINARY_NAME} PRIVATE PkgConfig::GTK)

# Run the Flutter tool portions of the build. This must not be removed.
add_dependencies(${BINARY_NAME} flutter_assemble)

# Only the install-generated bundle's copy of the executable will launch
# correctly, since the resources must in the right relative locations. To avoid
# people trying to run the unbundled copy, put it in a subdirectory instead of
# the default top-level location.
set_target_properties(${BINARY_NAME}
  PROPERTIES
  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/intermediates_do_not_run"
)


# Generated plugin build rules, which manage building the plugins and adding
# them to the application.
include(flutter/generated_plugins.cmake)


# === Installation ===
# By default, "installing" just makes a relocatable bundle in the build
# directory.
set(BUILD_BUNDLE_DIR "${PROJECT_BINARY_DIR}/bundle")
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
  set(CMAKE_INSTALL_PREFIX "${BUILD_BUNDLE_DIR}" CACHE PATH "..." FORCE)
endif()

# Start with a clean build bundle directory every time.
install(CODE "
  file(REMOVE_RECURSE \"${BUILD_BUNDLE_DIR}/\")
  " COMPONENT Runtime)

set(INSTALL_BUNDLE_DATA_DIR "${CMAKE_INSTALL_PREFIX}/data")
set(INSTALL_BUNDLE_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib")

install(TARGETS ${BINARY_NAME} RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}"
  COMPONENT Runtime)

install(FILES "${FLUTTER_ICU_DATA_FILE}" DESTINATION "${INSTALL_BUNDLE_DATA_DIR}"
  COMPONENT Runtime)

install(FILES "${FLUTTER_LIBRARY}" DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
  COMPONENT Runtime)

foreach(bundled_library ${PLUGIN_BUNDLED_LIBRARIES})
  install(FILES "${bundled_library}"
    DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
    COMPONENT Runtime)
endforeach(bundled_library)

# Copy the native assets provided by the build.dart from all packages.
set(NATIVE_ASSETS_DIR "${PROJECT_BUILD_DIR}native_assets/linux/")
install(DIRECTORY "${NATIVE_ASSETS_DIR}"
   DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
   COMPONENT Runtime)

# Fully re-copy the assets directory on each build to avoid having stale files
# from a previous install.
set(FLUTTER_ASSET_DIR_NAME "flutter_assets")
install(CODE "
  file(REMOVE_RECURSE \"${INSTALL_BUNDLE_DATA_DIR}/${FLUTTER_ASSET_DIR_NAME}\")
  " COMPONENT Runtime)
install(DIRECTORY "${PROJECT_BUILD_DIR}/${FLUTTER_ASSET_DIR_NAME}"
  DESTINATION "${INSTALL_BUNDLE_DATA_DIR}" COMPONENT Runtime)

# Install the AOT library on non-Debug builds only.
if(NOT CMAKE_BUILD_TYPE MATCHES "Debug")
  install(FILES "${AOT_LIBRARY}" DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
    COMPONENT Runtime)
endif()


================================================
FILE: flutter-examples/streaming_asr/linux/flutter/CMakeLists.txt
================================================
# This file controls Flutter-level build steps. It should not be edited.
cmake_minimum_required(VERSION 3.10)

set(EPHEMERAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ephemeral")

# Configuration provided via flutter tool.
include(${EPHEMERAL_DIR}/generated_config.cmake)

# TODO: Move the rest of this into files in ephemeral. See
# https://github.com/flutter/flutter/issues/57146.

# Serves the same purpose as list(TRANSFORM ... PREPEND ...),
# which isn't available in 3.10.
function(list_prepend LIST_NAME PREFIX)
    set(NEW_LIST "")
    foreach(element ${${LIST_NAME}})
        list(APPEND NEW_LIST "${PREFIX}${element}")
    endforeach(element)
    set(${LIST_NAME} "${NEW_LIST}" PARENT_SCOPE)
endfunction()

# === Flutter Library ===
# System-level dependencies.
find_package(PkgConfig REQUIRED)
pkg_check_modules(GTK REQUIRED IMPORTED_TARGET gtk+-3.0)
pkg_check_modules(GLIB REQUIRED IMPORTED_TARGET glib-2.0)
pkg_check_modules(GIO REQUIRED IMPORTED_TARGET gio-2.0)

set(FLUTTER_LIBRARY "${EPHEMERAL_DIR}/libflutter_linux_gtk.so")

# Published to parent scope for install step.
set(FLUTTER_LIBRARY ${FLUTTER_LIBRARY} PARENT_SCOPE)
set(FLUTTER_ICU_DATA_FILE "${EPHEMERAL_DIR}/icudtl.dat" PARENT_SCOPE)
set(PROJECT_BUILD_DIR "${PROJECT_DIR}/build/" PARENT_SCOPE)
set(AOT_LIBRARY "${PROJECT_DIR}/build/lib/libapp.so" PARENT_SCOPE)

list(APPEND FLUTTER_LIBRARY_HEADERS
  "fl_basic_message_channel.h"
  "fl_binary_codec.h"
  "fl_binary_messenger.h"
  "fl_dart_project.h"
  "fl_engine.h"
  "fl_json_message_codec.h"
  "fl_json_method_codec.h"
  "fl_message_codec.h"
  "fl_method_call.h"
  "fl_method_channel.h"
  "fl_method_codec.h"
  "fl_method_response.h"
  "fl_plugin_registrar.h"
  "fl_plugin_registry.h"
  "fl_standard_message_codec.h"
  "fl_standard_method_codec.h"
  "fl_string_codec.h"
  "fl_value.h"
  "fl_view.h"
  "flutter_linux.h"
)
list_prepend(FLUTTER_LIBRARY_HEADERS "${EPHEMERAL_DIR}/flutter_linux/")
add_library(flutter INTERFACE)
target_include_directories(flutter INTERFACE
  "${EPHEMERAL_DIR}"
)
target_link_libraries(flutter INTERFACE "${FLUTTER_LIBRARY}")
target_link_libraries(flutter INTERFACE
  PkgConfig::GTK
  PkgConfig::GLIB
  PkgConfig::GIO
)
add_dependencies(flutter flutter_assemble)

# === Flutter tool backend ===
# _phony_ is a non-existent file to force this command to run every time,
# since currently there's no way to get a full input/output list from the
# flutter tool.
add_custom_command(
  OUTPUT ${FLUTTER_LIBRARY} ${FLUTTER_LIBRARY_HEADERS}
    ${CMAKE_CURRENT_BINARY_DIR}/_phony_
  COMMAND ${CMAKE_COMMAND} -E env
    ${FLUTTER_TOOL_ENVIRONMENT}
    "${FLUTTER_ROOT}/packages/flutter_tools/bin/tool_backend.sh"
      ${FLUTTER_TARGET_PLATFORM} ${CMAKE_BUILD_TYPE}
  VERBATIM
)
add_custom_target(flutter_assemble DEPENDS
  "${FLUTTER_LIBRARY}"
  ${FLUTTER_LIBRARY_HEADERS}
)


================================================
FILE: flutter-examples/streaming_asr/linux/main.cc
================================================
#include "my_application.h"

int main(int argc, char** argv) {
  g_autoptr(MyApplication) app = my_application_new();
  return g_application_run(G_APPLICATION(app), argc, argv);
}


================================================
FILE: flutter-examples/streaming_asr/linux/my_application.cc
================================================
#include "my_application.h"

#include <flutter_linux/flutter_linux.h>
#ifdef GDK_WINDOWING_X11
#include <gdk/gdkx.h>
#endif

#include "flutter/generated_plugin_registrant.h"

struct _MyApplication {
  GtkApplication parent_instance;
  char** dart_entrypoint_arguments;
};

G_DEFINE_TYPE(MyApplication, my_application, GTK_TYPE_APPLICATION)

// Implements GApplication::activate.
static void my_application_activate(GApplication* application) {
  MyApplication* self = MY_APPLICATION(application);
  GtkWindow* window =
      GTK_WINDOW(gtk_application_window_new(GTK_APPLICATION(application)));

  // Use a header bar when running in GNOME as this is the common style used
  // by applications and is the setup most users will be using (e.g. Ubuntu
  // desktop).
  // If running on X and not using GNOME then just use a traditional title bar
  // in case the window manager does more exotic layout, e.g. tiling.
  // If running on Wayland assume the header bar will work (may need changing
  // if future cases occur).
  gboolean use_header_bar = TRUE;
#ifdef GDK_WINDOWING_X11
  GdkScreen* screen = gtk_window_get_screen(window);
  if (GDK_IS_X11_SCREEN(screen)) {
    const gchar* wm_name = gdk_x11_screen_get_window_manager_name(screen);
    if (g_strcmp0(wm_name, "GNOME Shell") != 0) {
      use_header_bar = FALSE;
    }
  }
#endif
  if (use_header_bar) {
    GtkHeaderBar* header_bar = GTK_HEADER_BAR(gtk_header_bar_new());
    gtk_widget_show(GTK_WIDGET(header_bar));
    gtk_header_bar_set_title(header_bar, "streaming_asr");
    gtk_header_bar_set_show_close_button(header_bar, TRUE);
    gtk_window_set_titlebar(window, GTK_WIDGET(header_bar));
  } else {
    gtk_window_set_title(window, "streaming_asr");
  }

  gtk_window_set_default_size(window, 1280, 720);
  gtk_widget_show(GTK_WIDGET(window));

  g_autoptr(FlDartProject) project = fl_dart_project_new();
  fl_dart_project_set_dart_entrypoint_arguments(project, self->dart_entrypoint_arguments);

  FlView* view = fl_view_new(project);
  gtk_widget_show(GTK_WIDGET(view));
  gtk_container_add(GTK_CONTAINER(window), GTK_WIDGET(view));

  fl_register_plugins(FL_PLUGIN_REGISTRY(view));

  gtk_widget_grab_focus(GTK_WIDGET(view));
}

// Implements GApplication::local_command_line.
static gboolean my_application_local_command_line(GApplication* application, gchar*** arguments, int* exit_status) {
  MyApplication* self = MY_APPLICATION(application);
  // Strip out the first argument as it is the binary name.
  self->dart_entrypoint_arguments = g_strdupv(*arguments + 1);

  g_autoptr(GError) error = nullptr;
  if (!g_application_register(application, nullptr, &error)) {
     g_warning("Failed to register: %s", error->message);
     *exit_status = 1;
     return TRUE;
  }

  g_application_activate(application);
  *exit_status = 0;

  return TRUE;
}

// Implements GApplication::startup.
static void my_application_startup(GApplication* application) {
  //MyApplication* self = MY_APPLICATION(object);

  // Perform any actions required at application startup.

  G_APPLICATION_CLASS(my_application_parent_class)->startup(application);
}

// Implements GApplication::shutdown.
static void my_application_shutdown(GApplication* application) {
  //MyApplication* self = MY_APPLICATION(object);

  // Perform any actions required at application shutdown.

  G_APPLICATION_CLASS(my_application_parent_class)->shutdown(application);
}

// Implements GObject::dispose.
static void my_application_dispose(GObject* object) {
  MyApplication* self = MY_APPLICATION(object);
  g_clear_pointer(&self->dart_entrypoint_arguments, g_strfreev);
  G_OBJECT_CLASS(my_application_parent_class)->dispose(object);
}

static void my_application_class_init(MyApplicationClass* klass) {
  G_APPLICATION_CLASS(klass)->activate = my_application_activate;
  G_APPLICATION_CLASS(klass)->local_command_line = my_application_local_command_line;
  G_APPLICATION_CLASS(klass)->startup = my_application_startup;
  G_APPLICATION_CLASS(klass)->shutdown = my_application_shutdown;
  G_OBJECT_CLASS(klass)->dispose = my_application_dispose;
}

static void my_application_init(MyApplication* self) {}

MyApplication* my_application_new() {
  return MY_APPLICATION(g_object_new(my_application_get_type(),
                                     "application-id", APPLICATION_ID,
                                     "flags", G_APPLICATION_NON_UNIQUE,
                                     nullptr));
}


================================================
FILE: flutter-examples/streaming_asr/linux/my_application.h
================================================
#ifndef FLUTTER_MY_APPLICATION_H_
#define FLUTTER_MY_APPLICATION_H_

#include <gtk/gtk.h>

G_DECLARE_FINAL_TYPE(MyApplication, my_application, MY, APPLICATION,
                     GtkApplication)

/**
 * my_application_new:
 *
 * Creates a new Flutter-based application.
 *
 * Returns: a new #MyApplication.
 */
MyApplication* my_application_new();

#endif  // FLUTTER_MY_APPLICATION_H_


================================================
FILE: flutter-examples/streaming_asr/macos/.gitignore
================================================
# Flutter-related
**/Flutter/ephemeral/
**/Pods/

# Xcode-related
**/dgph
**/xcuserdata/


================================================
FILE: flutter-examples/streaming_asr/macos/Flutter/Flutter-Debug.xcconfig
================================================
#include? "Pods/Target Support Files/Pods-Runner/Pods-Runner.debug.xcconfig"
#include "ephemeral/Flutter-Generated.xcconfig"


================================================
FILE: flutter-examples/streaming_asr/macos/Flutter/Flutter-Release.xcconfig
================================================
#include? "Pods/Target Support Files/Pods-Runner/Pods-Runner.release.xcconfig"
#include "ephemeral/Flutter-Generated.xcconfig"


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/AppDelegate.swift
================================================
import Cocoa
import FlutterMacOS

@NSApplicationMain
class AppDelegate: FlutterAppDelegate {
  override func applicationShouldTerminateAfterLastWindowClosed(_ sender: NSApplication) -> Bool {
    return true
  }
}


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "size" : "16x16",
      "idiom" : "mac",
      "filename" : "app_icon_16.png",
      "scale" : "1x"
    },
    {
      "size" : "16x16",
      "idiom" : "mac",
      "filename" : "app_icon_32.png",
      "scale" : "2x"
    },
    {
      "size" : "32x32",
      "idiom" : "mac",
      "filename" : "app_icon_32.png",
      "scale" : "1x"
    },
    {
      "size" : "32x32",
      "idiom" : "mac",
      "filename" : "app_icon_64.png",
      "scale" : "2x"
    },
    {
      "size" : "128x128",
      "idiom" : "mac",
      "filename" : "app_icon_128.png",
      "scale" : "1x"
    },
    {
      "size" : "128x128",
      "idiom" : "mac",
      "filename" : "app_icon_256.png",
      "scale" : "2x"
    },
    {
      "size" : "256x256",
      "idiom" : "mac",
      "filename" : "app_icon_256.png",
      "scale" : "1x"
    },
    {
      "size" : "256x256",
      "idiom" : "mac",
      "filename" : "app_icon_512.png",
      "scale" : "2x"
    },
    {
      "size" : "512x512",
      "idiom" : "mac",
      "filename" : "app_icon_512.png",
      "scale" : "1x"
    },
    {
      "size" : "512x512",
      "idiom" : "mac",
      "filename" : "app_icon_1024.png",
      "scale" : "2x"
    }
  ],
  "info" : {
    "version" : 1,
    "author" : "xcode"
  }
}


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/Base.lproj/MainMenu.xib
================================================
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="14490.70" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" customObjectInstantitationMethod="direct">
    <dependencies>
        <deployment identifier="macosx"/>
        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="14490.70"/>
        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
    </dependencies>
    <objects>
        <customObject id="-2" userLabel="File's Owner" customClass="NSApplication">
            <connections>
                <outlet property="delegate" destination="Voe-Tx-rLC" id="GzC-gU-4Uq"/>
            </connections>
        </customObject>
        <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
        <customObject id="-3" userLabel="Application" customClass="NSObject"/>
        <customObject id="Voe-Tx-rLC" customClass="AppDelegate" customModule="Runner" customModuleProvider="target">
            <connections>
                <outlet property="applicationMenu" destination="uQy-DD-JDr" id="XBo-yE-nKs"/>
                <outlet property="mainFlutterWindow" destination="QvC-M9-y7g" id="gIp-Ho-8D9"/>
            </connections>
        </customObject>
        <customObject id="YLy-65-1bz" customClass="NSFontManager"/>
        <menu title="Main Menu" systemMenu="main" id="AYu-sK-qS6">
            <items>
                <menuItem title="APP_NAME" id="1Xt-HY-uBw">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="APP_NAME" systemMenu="apple" id="uQy-DD-JDr">
                        <items>
                            <menuItem title="About APP_NAME" id="5kV-Vb-QxS">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="orderFrontStandardAboutPanel:" target="-1" id="Exp-CZ-Vem"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="VOq-y0-SEH"/>
                            <menuItem title="Preferences…" keyEquivalent="," id="BOF-NM-1cW"/>
                            <menuItem isSeparatorItem="YES" id="wFC-TO-SCJ"/>
                            <menuItem title="Services" id="NMo-om-nkz">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Services" systemMenu="services" id="hz9-B4-Xy5"/>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="4je-JR-u6R"/>
                            <menuItem title="Hide APP_NAME" keyEquivalent="h" id="Olw-nP-bQN">
                                <connections>
                                    <action selector="hide:" target="-1" id="PnN-Uc-m68"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Hide Others" keyEquivalent="h" id="Vdr-fp-XzO">
                                <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                <connections>
                                    <action selector="hideOtherApplications:" target="-1" id="VT4-aY-XCT"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Show All" id="Kd2-mp-pUS">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="unhideAllApplications:" target="-1" id="Dhg-Le-xox"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="kCx-OE-vgT"/>
                            <menuItem title="Quit APP_NAME" keyEquivalent="q" id="4sb-4s-VLi">
                                <connections>
                                    <action selector="terminate:" target="-1" id="Te7-pn-YzF"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Edit" id="5QF-Oa-p0T">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Edit" id="W48-6f-4Dl">
                        <items>
                            <menuItem title="Undo" keyEquivalent="z" id="dRJ-4n-Yzg">
                                <connections>
                                    <action selector="undo:" target="-1" id="M6e-cu-g7V"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Redo" keyEquivalent="Z" id="6dh-zS-Vam">
                                <connections>
                                    <action selector="redo:" target="-1" id="oIA-Rs-6OD"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="WRV-NI-Exz"/>
                            <menuItem title="Cut" keyEquivalent="x" id="uRl-iY-unG">
                                <connections>
                                    <action selector="cut:" target="-1" id="YJe-68-I9s"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Copy" keyEquivalent="c" id="x3v-GG-iWU">
                                <connections>
                                    <action selector="copy:" target="-1" id="G1f-GL-Joy"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Paste" keyEquivalent="v" id="gVA-U4-sdL">
                                <connections>
                                    <action selector="paste:" target="-1" id="UvS-8e-Qdg"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Paste and Match Style" keyEquivalent="V" id="WeT-3V-zwk">
                                <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                <connections>
                                    <action selector="pasteAsPlainText:" target="-1" id="cEh-KX-wJQ"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Delete" id="pa3-QI-u2k">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="delete:" target="-1" id="0Mk-Ml-PaM"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Select All" keyEquivalent="a" id="Ruw-6m-B2m">
                                <connections>
                                    <action selector="selectAll:" target="-1" id="VNm-Mi-diN"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="uyl-h8-XO2"/>
                            <menuItem title="Find" id="4EN-yA-p0u">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Find" id="1b7-l0-nxx">
                                    <items>
                                        <menuItem title="Find…" tag="1" keyEquivalent="f" id="Xz5-n4-O0W">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="cD7-Qs-BN4"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find and Replace…" tag="12" keyEquivalent="f" id="YEy-JH-Tfz">
                                            <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="WD3-Gg-5AJ"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find Next" tag="2" keyEquivalent="g" id="q09-fT-Sye">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="NDo-RZ-v9R"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find Previous" tag="3" keyEquivalent="G" id="OwM-mh-QMV">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="HOh-sY-3ay"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Use Selection for Find" tag="7" keyEquivalent="e" id="buJ-ug-pKt">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="U76-nv-p5D"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Jump to Selection" keyEquivalent="j" id="S0p-oC-mLd">
                                            <connections>
                                                <action selector="centerSelectionInVisibleArea:" target="-1" id="IOG-6D-g5B"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Spelling and Grammar" id="Dv1-io-Yv7">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Spelling" id="3IN-sU-3Bg">
                                    <items>
                                        <menuItem title="Show Spelling and Grammar" keyEquivalent=":" id="HFo-cy-zxI">
                                            <connections>
                                                <action selector="showGuessPanel:" target="-1" id="vFj-Ks-hy3"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Check Document Now" keyEquivalent=";" id="hz2-CU-CR7">
                                            <connections>
                                                <action selector="checkSpelling:" target="-1" id="fz7-VC-reM"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem isSeparatorItem="YES" id="bNw-od-mp5"/>
                                        <menuItem title="Check Spelling While Typing" id="rbD-Rh-wIN">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleContinuousSpellChecking:" target="-1" id="7w6-Qz-0kB"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Check Grammar With Spelling" id="mK6-2p-4JG">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleGrammarChecking:" target="-1" id="muD-Qn-j4w"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Correct Spelling Automatically" id="78Y-hA-62v">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticSpellingCorrection:" target="-1" id="2lM-Qi-WAP"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Substitutions" id="9ic-FL-obx">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Substitutions" id="FeM-D8-WVr">
                                    <items>
                                        <menuItem title="Show Substitutions" id="z6F-FW-3nz">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="orderFrontSubstitutionsPanel:" target="-1" id="oku-mr-iSq"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem isSeparatorItem="YES" id="gPx-C9-uUO"/>
                                        <menuItem title="Smart Copy/Paste" id="9yt-4B-nSM">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleSmartInsertDelete:" target="-1" id="3IJ-Se-DZD"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Quotes" id="hQb-2v-fYv">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticQuoteSubstitution:" target="-1" id="ptq-xd-QOA"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Dashes" id="rgM-f4-ycn">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticDashSubstitution:" target="-1" id="oCt-pO-9gS"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Links" id="cwL-P1-jid">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticLinkDetection:" target="-1" id="Gip-E3-Fov"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Data Detectors" id="tRr-pd-1PS">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticDataDetection:" target="-1" id="R1I-Nq-Kbl"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Text Replacement" id="HFQ-gK-NFA">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticTextReplacement:" target="-1" id="DvP-Fe-Py6"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Transformations" id="2oI-Rn-ZJC">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Transformations" id="c8a-y6-VQd">
                                    <items>
                                        <menuItem title="Make Upper Case" id="vmV-6d-7jI">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="uppercaseWord:" target="-1" id="sPh-Tk-edu"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Make Lower Case" id="d9M-CD-aMd">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="lowercaseWord:" target="-1" id="iUZ-b5-hil"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Capitalize" id="UEZ-Bs-lqG">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="capitalizeWord:" target="-1" id="26H-TL-nsh"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Speech" id="xrE-MZ-jX0">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Speech" id="3rS-ZA-NoH">
                                    <items>
                                        <menuItem title="Start Speaking" id="Ynk-f8-cLZ">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="startSpeaking:" target="-1" id="654-Ng-kyl"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Stop Speaking" id="Oyz-dy-DGm">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="stopSpeaking:" target="-1" id="dX8-6p-jy9"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="View" id="H8h-7b-M4v">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="View" id="HyV-fh-RgO">
                        <items>
                            <menuItem title="Enter Full Screen" keyEquivalent="f" id="4J7-dP-txa">
                                <modifierMask key="keyEquivalentModifierMask" control="YES" command="YES"/>
                                <connections>
                                    <action selector="toggleFullScreen:" target="-1" id="dU3-MA-1Rq"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Window" id="aUF-d1-5bR">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Window" systemMenu="window" id="Td7-aD-5lo">
                        <items>
                            <menuItem title="Minimize" keyEquivalent="m" id="OY7-WF-poV">
                                <connections>
                                    <action selector="performMiniaturize:" target="-1" id="VwT-WD-YPe"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Zoom" id="R4o-n2-Eq4">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="performZoom:" target="-1" id="DIl-cC-cCs"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="eu3-7i-yIM"/>
                            <menuItem title="Bring All to Front" id="LE2-aR-0XJ">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="arrangeInFront:" target="-1" id="DRN-fu-gQh"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Help" id="EPT-qC-fAb">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Help" systemMenu="help" id="rJ0-wn-3NY"/>
                </menuItem>
            </items>
            <point key="canvasLocation" x="142" y="-258"/>
        </menu>
        <window title="APP_NAME" allowsToolTipsWhenApplicationIsInactive="NO" autorecalculatesKeyViewLoop="NO" releasedWhenClosed="NO" animationBehavior="default" id="QvC-M9-y7g" customClass="MainFlutterWindow" customModule="Runner" customModuleProvider="target">
            <windowStyleMask key="styleMask" titled="YES" closable="YES" miniaturizable="YES" resizable="YES"/>
            <rect key="contentRect" x="335" y="390" width="800" height="600"/>
            <rect key="screenRect" x="0.0" y="0.0" width="2560" height="1577"/>
            <view key="contentView" wantsLayer="YES" id="EiT-Mj-1SZ">
                <rect key="frame" x="0.0" y="0.0" width="800" height="600"/>
                <autoresizingMask key="autoresizingMask"/>
            </view>
        </window>
    </objects>
</document>


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/Configs/AppInfo.xcconfig
================================================
// Application-level settings for the Runner target.
//
// This may be replaced with something auto-generated from metadata (e.g., pubspec.yaml) in the
// future. If not, the values below would default to using the project name when this becomes a
// 'flutter create' template.

// The application's name. By default this is also the title of the Flutter window.
PRODUCT_NAME = streaming_asr

// The application's bundle identifier
PRODUCT_BUNDLE_IDENTIFIER = com.example.streamingAsr

// The copyright displayed in application information
PRODUCT_COPYRIGHT = Copyright © 2024 com.example. All rights reserved.


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/Configs/Debug.xcconfig
================================================
#include "../../Flutter/Flutter-Debug.xcconfig"
#include "Warnings.xcconfig"


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/Configs/Release.xcconfig
================================================
#include "../../Flutter/Flutter-Release.xcconfig"
#include "Warnings.xcconfig"


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/Configs/Warnings.xcconfig
================================================
WARNING_CFLAGS = -Wall -Wconditional-uninitialized -Wnullable-to-nonnull-conversion -Wmissing-method-return-type -Woverlength-strings
GCC_WARN_UNDECLARED_SELECTOR = YES
CLANG_UNDEFINED_BEHAVIOR_SANITIZER_NULLABILITY = YES
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES
CLANG_WARN_PRAGMA_PACK = YES
CLANG_WARN_STRICT_PROTOTYPES = YES
CLANG_WARN_COMMA = YES
GCC_WARN_STRICT_SELECTOR_MATCH = YES
CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES
GCC_WARN_SHADOW = YES
CLANG_WARN_UNREACHABLE_CODE = YES


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/DebugProfile.entitlements
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>com.apple.security.app-sandbox</key>
	<true/>
	<key>com.apple.security.cs.allow-jit</key>
	<true/>
	<key>com.apple.security.device.audio-input</key>
	<true/>
	<key>com.apple.security.network.server</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for Next-gen kaldi to work</string>
	<key>CFBundleDevelopmentRegion</key>
	<string>$(DEVELOPMENT_LANGUAGE)</string>
	<key>CFBundleExecutable</key>
	<string>$(EXECUTABLE_NAME)</string>
	<key>CFBundleIconFile</key>
	<string></string>
	<key>CFBundleIdentifier</key>
	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
	<key>CFBundleInfoDictionaryVersion</key>
	<string>6.0</string>
	<key>CFBundleName</key>
	<string>$(PRODUCT_NAME)</string>
	<key>CFBundlePackageType</key>
	<string>APPL</string>
	<key>CFBundleShortVersionString</key>
	<string>$(FLUTTER_BUILD_NAME)</string>
	<key>CFBundleVersion</key>
	<string>$(FLUTTER_BUILD_NUMBER)</string>
	<key>LSMinimumSystemVersion</key>
	<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
	<key>NSHumanReadableCopyright</key>
	<string>$(PRODUCT_COPYRIGHT)</string>
	<key>NSMainNibFile</key>
	<string>MainMenu</string>
	<key>NSPrincipalClass</key>
	<string>NSApplication</string>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/MainFlutterWindow.swift
================================================
import Cocoa
import FlutterMacOS

class MainFlutterWindow: NSWindow {
  override func awakeFromNib() {
    let flutterViewController = FlutterViewController()
    let windowFrame = self.frame
    self.contentViewController = flutterViewController
    self.setFrame(windowFrame, display: true)

    RegisterGeneratedPlugins(registry: flutterViewController)

    super.awakeFromNib()
  }
}


================================================
FILE: flutter-examples/streaming_asr/macos/Runner/Release.entitlements
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>com.apple.security.app-sandbox</key>
	<true/>
	<key>com.apple.security.device.audio-input</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/macos/Runner.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 54;
	objects = {

/* Begin PBXAggregateTarget section */
		33CC111A2044C6BA0003C045 /* Flutter Assemble */ = {
			isa = PBXAggregateTarget;
			buildConfigurationList = 33CC111B2044C6BA0003C045 /* Build configuration list for PBXAggregateTarget "Flutter Assemble" */;
			buildPhases = (
				33CC111E2044C6BF0003C045 /* ShellScript */,
			);
			dependencies = (
			);
			name = "Flutter Assemble";
			productName = FLX;
		};
/* End PBXAggregateTarget section */

/* Begin PBXBuildFile section */
		331C80D8294CF71000263BE5 /* RunnerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 331C80D7294CF71000263BE5 /* RunnerTests.swift */; };
		335BBD1B22A9A15E00E9071D /* GeneratedPluginRegistrant.swift in Sources */ = {isa = PBXBuildFile; fileRef = 335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */; };
		33CC10F12044A3C60003C045 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 33CC10F02044A3C60003C045 /* AppDelegate.swift */; };
		33CC10F32044A3C60003C045 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 33CC10F22044A3C60003C045 /* Assets.xcassets */; };
		33CC10F62044A3C60003C045 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 33CC10F42044A3C60003C045 /* MainMenu.xib */; };
		33CC11132044BFA00003C045 /* MainFlutterWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 33CC11122044BFA00003C045 /* MainFlutterWindow.swift */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
		331C80D9294CF71000263BE5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = 33CC10E52044A3C60003C045 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = 33CC10EC2044A3C60003C045;
			remoteInfo = Runner;
		};
		33CC111F2044C79F0003C045 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = 33CC10E52044A3C60003C045 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = 33CC111A2044C6BA0003C045;
			remoteInfo = FLX;
		};
/* End PBXContainerItemProxy section */

/* Begin PBXCopyFilesBuildPhase section */
		33CC110E2044A8840003C045 /* Bundle Framework */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = "";
			dstSubfolderSpec = 10;
			files = (
			);
			name = "Bundle Framework";
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		331C80D5294CF71000263BE5 /* RunnerTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = RunnerTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		331C80D7294CF71000263BE5 /* RunnerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RunnerTests.swift; sourceTree = "<group>"; };
		333000ED22D3DE5D00554162 /* Warnings.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Warnings.xcconfig; sourceTree = "<group>"; };
		335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = GeneratedPluginRegistrant.swift; sourceTree = "<group>"; };
		33CC10ED2044A3C60003C045 /* streaming_asr.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "streaming_asr.app"; sourceTree = BUILT_PRODUCTS_DIR; };
		33CC10F02044A3C60003C045 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
		33CC10F22044A3C60003C045 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; name = Assets.xcassets; path = Runner/Assets.xcassets; sourceTree = "<group>"; };
		33CC10F52044A3C60003C045 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = "<group>"; };
		33CC10F72044A3C60003C045 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = Info.plist; path = Runner/Info.plist; sourceTree = "<group>"; };
		33CC11122044BFA00003C045 /* MainFlutterWindow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MainFlutterWindow.swift; sourceTree = "<group>"; };
		33CEB47222A05771004F2AC0 /* Flutter-Debug.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = "Flutter-Debug.xcconfig"; sourceTree = "<group>"; };
		33CEB47422A05771004F2AC0 /* Flutter-Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = "Flutter-Release.xcconfig"; sourceTree = "<group>"; };
		33CEB47722A0578A004F2AC0 /* Flutter-Generated.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = "Flutter-Generated.xcconfig"; path = "ephemeral/Flutter-Generated.xcconfig"; sourceTree = "<group>"; };
		33E51913231747F40026EE4D /* DebugProfile.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = DebugProfile.entitlements; sourceTree = "<group>"; };
		33E51914231749380026EE4D /* Release.entitlements */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.entitlements; path = Release.entitlements; sourceTree = "<group>"; };
		33E5194F232828860026EE4D /* AppInfo.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = AppInfo.xcconfig; sourceTree = "<group>"; };
		7AFA3C8E1D35360C0083082E /* Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Release.xcconfig; sourceTree = "<group>"; };
		9740EEB21CF90195004384FC /* Debug.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Debug.xcconfig; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		331C80D2294CF70F00263BE5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10EA2044A3C60003C045 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		331C80D6294CF71000263BE5 /* RunnerTests */ = {
			isa = PBXGroup;
			children = (
				331C80D7294CF71000263BE5 /* RunnerTests.swift */,
			);
			path = RunnerTests;
			sourceTree = "<group>";
		};
		33BA886A226E78AF003329D5 /* Configs */ = {
			isa = PBXGroup;
			children = (
				33E5194F232828860026EE4D /* AppInfo.xcconfig */,
				9740EEB21CF90195004384FC /* Debug.xcconfig */,
				7AFA3C8E1D35360C0083082E /* Release.xcconfig */,
				333000ED22D3DE5D00554162 /* Warnings.xcconfig */,
			);
			path = Configs;
			sourceTree = "<group>";
		};
		33CC10E42044A3C60003C045 = {
			isa = PBXGroup;
			children = (
				33FAB671232836740065AC1E /* Runner */,
				33CEB47122A05771004F2AC0 /* Flutter */,
				331C80D6294CF71000263BE5 /* RunnerTests */,
				33CC10EE2044A3C60003C045 /* Products */,
				D73912EC22F37F3D000D13A0 /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		33CC10EE2044A3C60003C045 /* Products */ = {
			isa = PBXGroup;
			children = (
				33CC10ED2044A3C60003C045 /* streaming_asr.app */,
				331C80D5294CF71000263BE5 /* RunnerTests.xctest */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		33CC11242044D66E0003C045 /* Resources */ = {
			isa = PBXGroup;
			children = (
				33CC10F22044A3C60003C045 /* Assets.xcassets */,
				33CC10F42044A3C60003C045 /* MainMenu.xib */,
				33CC10F72044A3C60003C045 /* Info.plist */,
			);
			name = Resources;
			path = ..;
			sourceTree = "<group>";
		};
		33CEB47122A05771004F2AC0 /* Flutter */ = {
			isa = PBXGroup;
			children = (
				335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */,
				33CEB47222A05771004F2AC0 /* Flutter-Debug.xcconfig */,
				33CEB47422A05771004F2AC0 /* Flutter-Release.xcconfig */,
				33CEB47722A0578A004F2AC0 /* Flutter-Generated.xcconfig */,
			);
			path = Flutter;
			sourceTree = "<group>";
		};
		33FAB671232836740065AC1E /* Runner */ = {
			isa = PBXGroup;
			children = (
				33CC10F02044A3C60003C045 /* AppDelegate.swift */,
				33CC11122044BFA00003C045 /* MainFlutterWindow.swift */,
				33E51913231747F40026EE4D /* DebugProfile.entitlements */,
				33E51914231749380026EE4D /* Release.entitlements */,
				33CC11242044D66E0003C045 /* Resources */,
				33BA886A226E78AF003329D5 /* Configs */,
			);
			path = Runner;
			sourceTree = "<group>";
		};
		D73912EC22F37F3D000D13A0 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		331C80D4294CF70F00263BE5 /* RunnerTests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 331C80DE294CF71000263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */;
			buildPhases = (
				331C80D1294CF70F00263BE5 /* Sources */,
				331C80D2294CF70F00263BE5 /* Frameworks */,
				331C80D3294CF70F00263BE5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				331C80DA294CF71000263BE5 /* PBXTargetDependency */,
			);
			name = RunnerTests;
			productName = RunnerTests;
			productReference = 331C80D5294CF71000263BE5 /* RunnerTests.xctest */;
			productType = "com.apple.product-type.bundle.unit-test";
		};
		33CC10EC2044A3C60003C045 /* Runner */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 33CC10FB2044A3C60003C045 /* Build configuration list for PBXNativeTarget "Runner" */;
			buildPhases = (
				33CC10E92044A3C60003C045 /* Sources */,
				33CC10EA2044A3C60003C045 /* Frameworks */,
				33CC10EB2044A3C60003C045 /* Resources */,
				33CC110E2044A8840003C045 /* Bundle Framework */,
				3399D490228B24CF009A79C7 /* ShellScript */,
			);
			buildRules = (
			);
			dependencies = (
				33CC11202044C79F0003C045 /* PBXTargetDependency */,
			);
			name = Runner;
			productName = Runner;
			productReference = 33CC10ED2044A3C60003C045 /* streaming_asr.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		33CC10E52044A3C60003C045 /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = YES;
				LastSwiftUpdateCheck = 0920;
				LastUpgradeCheck = 1510;
				ORGANIZATIONNAME = "";
				TargetAttributes = {
					331C80D4294CF70F00263BE5 = {
						CreatedOnToolsVersion = 14.0;
						TestTargetID = 33CC10EC2044A3C60003C045;
					};
					33CC10EC2044A3C60003C045 = {
						CreatedOnToolsVersion = 9.2;
						LastSwiftMigration = 1100;
						ProvisioningStyle = Automatic;
						SystemCapabilities = {
							com.apple.Sandbox = {
								enabled = 1;
							};
						};
					};
					33CC111A2044C6BA0003C045 = {
						CreatedOnToolsVersion = 9.2;
						ProvisioningStyle = Manual;
					};
				};
			};
			buildConfigurationList = 33CC10E82044A3C60003C045 /* Build configuration list for PBXProject "Runner" */;
			compatibilityVersion = "Xcode 9.3";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = 33CC10E42044A3C60003C045;
			productRefGroup = 33CC10EE2044A3C60003C045 /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				33CC10EC2044A3C60003C045 /* Runner */,
				331C80D4294CF70F00263BE5 /* RunnerTests */,
				33CC111A2044C6BA0003C045 /* Flutter Assemble */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		331C80D3294CF70F00263BE5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10EB2044A3C60003C045 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				33CC10F32044A3C60003C045 /* Assets.xcassets in Resources */,
				33CC10F62044A3C60003C045 /* MainMenu.xib in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXShellScriptBuildPhase section */
		3399D490228B24CF009A79C7 /* ShellScript */ = {
			isa = PBXShellScriptBuildPhase;
			alwaysOutOfDate = 1;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
			);
			inputPaths = (
			);
			outputFileListPaths = (
			);
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "echo \"$PRODUCT_NAME.app\" > \"$PROJECT_DIR\"/Flutter/ephemeral/.app_filename && \"$FLUTTER_ROOT\"/packages/flutter_tools/bin/macos_assemble.sh embed\n";
		};
		33CC111E2044C6BF0003C045 /* ShellScript */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
				Flutter/ephemeral/FlutterInputs.xcfilelist,
			);
			inputPaths = (
				Flutter/ephemeral/tripwire,
			);
			outputFileListPaths = (
				Flutter/ephemeral/FlutterOutputs.xcfilelist,
			);
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "\"$FLUTTER_ROOT\"/packages/flutter_tools/bin/macos_assemble.sh && touch Flutter/ephemeral/tripwire";
		};
/* End PBXShellScriptBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		331C80D1294CF70F00263BE5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				331C80D8294CF71000263BE5 /* RunnerTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10E92044A3C60003C045 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				33CC11132044BFA00003C045 /* MainFlutterWindow.swift in Sources */,
				33CC10F12044A3C60003C045 /* AppDelegate.swift in Sources */,
				335BBD1B22A9A15E00E9071D /* GeneratedPluginRegistrant.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
		331C80DA294CF71000263BE5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = 33CC10EC2044A3C60003C045 /* Runner */;
			targetProxy = 331C80D9294CF71000263BE5 /* PBXContainerItemProxy */;
		};
		33CC11202044C79F0003C045 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = 33CC111A2044C6BA0003C045 /* Flutter Assemble */;
			targetProxy = 33CC111F2044C79F0003C045 /* PBXContainerItemProxy */;
		};
/* End PBXTargetDependency section */

/* Begin PBXVariantGroup section */
		33CC10F42044A3C60003C045 /* MainMenu.xib */ = {
			isa = PBXVariantGroup;
			children = (
				33CC10F52044A3C60003C045 /* Base */,
			);
			name = MainMenu.xib;
			path = Runner;
			sourceTree = "<group>";
		};
/* End PBXVariantGroup section */

/* Begin XCBuildConfiguration section */
		331C80DB294CF71000263BE5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.example.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/streaming_asr.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/streaming_asr";
			};
			name = Debug;
		};
		331C80DC294CF71000263BE5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.example.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/streaming_asr.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/streaming_asr";
			};
			name = Release;
		};
		331C80DD294CF71000263BE5 /* Profile */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.example.streamingAsr.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/streaming_asr.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/streaming_asr";
			};
			name = Profile;
		};
		338D0CE9231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.15;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = macosx;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
			};
			name = Profile;
		};
		338D0CEA231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/DebugProfile.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_VERSION = 5.0;
			};
			name = Profile;
		};
		338D0CEB231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Manual;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Profile;
		};
		33CC10F92044A3C60003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 9740EEB21CF90195004384FC /* Debug.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.15;
				MTL_ENABLE_DEBUG_INFO = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = macosx;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		33CC10FA2044A3C60003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.15;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = macosx;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
			};
			name = Release;
		};
		33CC10FC2044A3C60003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/DebugProfile.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
				SWIFT_VERSION = 5.0;
			};
			name = Debug;
		};
		33CC10FD2044A3C60003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/Release.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_VERSION = 5.0;
			};
			name = Release;
		};
		33CC111C2044C6BA0003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Manual;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
		};
		33CC111D2044C6BA0003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Automatic;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		331C80DE294CF71000263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				331C80DB294CF71000263BE5 /* Debug */,
				331C80DC294CF71000263BE5 /* Release */,
				331C80DD294CF71000263BE5 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC10E82044A3C60003C045 /* Build configuration list for PBXProject "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC10F92044A3C60003C045 /* Debug */,
				33CC10FA2044A3C60003C045 /* Release */,
				338D0CE9231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC10FB2044A3C60003C045 /* Build configuration list for PBXNativeTarget "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC10FC2044A3C60003C045 /* Debug */,
				33CC10FD2044A3C60003C045 /* Release */,
				338D0CEA231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC111B2044C6BA0003C045 /* Build configuration list for PBXAggregateTarget "Flutter Assemble" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC111C2044C6BA0003C045 /* Debug */,
				33CC111D2044C6BA0003C045 /* Release */,
				338D0CEB231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 33CC10E52044A3C60003C045 /* Project object */;
}


================================================
FILE: flutter-examples/streaming_asr/macos/Runner.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/macos/Runner.xcodeproj/xcshareddata/xcschemes/Runner.xcscheme
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1510"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
      buildImplicitDependencies = "YES">
      <BuildActionEntries>
         <BuildActionEntry
            buildForTesting = "YES"
            buildForRunning = "YES"
            buildForProfiling = "YES"
            buildForArchiving = "YES"
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "33CC10EC2044A3C60003C045"
               BuildableName = "streaming_asr.app"
               BlueprintName = "Runner"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
   </BuildAction>
   <TestAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      shouldUseLaunchSchemeArgsEnv = "YES">
      <MacroExpansion>
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "streaming_asr.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </MacroExpansion>
      <Testables>
         <TestableReference
            skipped = "NO"
            parallelizable = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "331C80D4294CF70F00263BE5"
               BuildableName = "RunnerTests.xctest"
               BlueprintName = "RunnerTests"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </TestableReference>
      </Testables>
   </TestAction>
   <LaunchAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      launchStyle = "0"
      useCustomWorkingDirectory = "NO"
      ignoresPersistentStateOnLaunch = "NO"
      debugDocumentVersioning = "YES"
      debugServiceExtension = "internal"
      allowLocationSimulation = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "streaming_asr.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </LaunchAction>
   <ProfileAction
      buildConfiguration = "Profile"
      shouldUseLaunchSchemeArgsEnv = "YES"
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      debugDocumentVersioning = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "streaming_asr.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
   </AnalyzeAction>
   <ArchiveAction
      buildConfiguration = "Release"
      revealArchiveInOrganizer = "YES">
   </ArchiveAction>
</Scheme>


================================================
FILE: flutter-examples/streaming_asr/macos/Runner.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "group:Runner.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: flutter-examples/streaming_asr/macos/Runner.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/streaming_asr/macos/RunnerTests/RunnerTests.swift
================================================
import Cocoa
import FlutterMacOS
import XCTest

class RunnerTests: XCTestCase {

  func testExample() {
    // If you add code to the Runner application, consider adding tests here.
    // See https://developer.apple.com/documentation/xctest for more information about using XCTest.
  }

}


================================================
FILE: flutter-examples/streaming_asr/pubspec.yaml
================================================
name: streaming_asr

description: >
  This example shows how to implement real-time speech recognition using sherpa-onnx.

publish_to: 'none'

version: 1.12.31

topics:
  - speech-recognition

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/sherpa-onnx/flutter

environment:
  sdk: ">=2.17.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  flutter:
    sdk: flutter

  cupertino_icons: ^1.0.6

  path_provider: ^2.1.3
  path: ^1.9.0

  record: ^6.1.2
  url_launcher: ^6.2.6

  sherpa_onnx: ^1.12.31
  # sherpa_onnx:
  #   path: ../../flutter/sherpa_onnx

dev_dependencies:
  flutter_test:
    sdk: flutter

  flutter_lints: ^3.0.0

flutter:
  uses-material-design: true

  assets:
    - assets/
    # - assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/


================================================
FILE: flutter-examples/streaming_asr/test/widget_test.dart
================================================
// This is a basic Flutter widget test.
//
// To perform an interaction with a widget in your test, use the WidgetTester
// utility in the flutter_test package. For example, you can send tap and scroll
// gestures. You can also use WidgetTester to find child widgets in the widget
// tree, read text, and verify that the values of widget properties are correct.

import 'package:flutter/material.dart';
import 'package:flutter_test/flutter_test.dart';

import 'package:streaming_asr/main.dart';

void main() {
  testWidgets('Counter increments smoke test', (WidgetTester tester) async {
    // Build our app and trigger a frame.
    await tester.pumpWidget(const MyApp());

    // Verify that our counter starts at 0.
    expect(find.text('0'), findsOneWidget);
    expect(find.text('1'), findsNothing);

    // Tap the '+' icon and trigger a frame.
    await tester.tap(find.byIcon(Icons.add));
    await tester.pump();

    // Verify that our counter has incremented.
    expect(find.text('0'), findsNothing);
    expect(find.text('1'), findsOneWidget);
  });
}


================================================
FILE: flutter-examples/streaming_asr/windows/.gitignore
================================================
flutter/ephemeral/

# Visual Studio user-specific files.
*.suo
*.user
*.userosscache
*.sln.docstates

# Visual Studio build-related files.
x64/
x86/

# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/


================================================
FILE: flutter-examples/streaming_asr/windows/CMakeLists.txt
================================================
# Project-level configuration.
cmake_minimum_required(VERSION 3.14)
project(streaming_asr LANGUAGES CXX)

# The name of the executable created for the application. Change this to change
# the on-disk name of your application.
set(BINARY_NAME "streaming_asr")

# Explicitly opt in to modern CMake behaviors to avoid warnings with recent
# versions of CMake.
cmake_policy(VERSION 3.14...3.25)

# Define build configuration option.
get_property(IS_MULTICONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(IS_MULTICONFIG)
  set(CMAKE_CONFIGURATION_TYPES "Debug;Profile;Release"
    CACHE STRING "" FORCE)
else()
  if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
    set(CMAKE_BUILD_TYPE "Debug" CACHE
      STRING "Flutter build mode" FORCE)
    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
      "Debug" "Profile" "Release")
  endif()
endif()
# Define settings for the Profile build mode.
set(CMAKE_EXE_LINKER_FLAGS_PROFILE "${CMAKE_EXE_LINKER_FLAGS_RELEASE}")
set(CMAKE_SHARED_LINKER_FLAGS_PROFILE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE}")
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE}")

# Use Unicode for all projects.
add_definitions(-DUNICODE -D_UNICODE)

# Compilation settings that should be applied to most targets.
#
# Be cautious about adding new options here, as plugins use this function by
# default. In most cases, you should add new options to specific targets instead
# of modifying this function.
function(APPLY_STANDARD_SETTINGS TARGET)
  target_compile_features(${TARGET} PUBLIC cxx_std_17)
  target_compile_options(${TARGET} PRIVATE /W4 /WX /wd"4100")
  target_compile_options(${TARGET} PRIVATE /EHsc)
  target_compile_definitions(${TARGET} PRIVATE "_HAS_EXCEPTIONS=0")
  target_compile_definitions(${TARGET} PRIVATE "$<$<CONFIG:Debug>:_DEBUG>")
endfunction()

# Flutter library and tool build rules.
set(FLUTTER_MANAGED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/flutter")
add_subdirectory(${FLUTTER_MANAGED_DIR})

# Application build; see runner/CMakeLists.txt.
add_subdirectory("runner")


# Generated plugin build rules, which manage building the plugins and adding
# them to the application.
include(flutter/generated_plugins.cmake)


# === Installation ===
# Support files are copied into place next to the executable, so that it can
# run in place. This is done instead of making a separate bundle (as on Linux)
# so that building and running from within Visual Studio will work.
set(BUILD_BUNDLE_DIR "$<TARGET_FILE_DIR:${BINARY_NAME}>")
# Make the "install" step default, as it's required to run.
set(CMAKE_VS_INCLUDE_INSTALL_TO_DEFAULT_BUILD 1)
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
  set(CMAKE_INSTALL_PREFIX "${BUILD_BUNDLE_DIR}" CACHE PATH "..." FORCE)
endif()

set(INSTALL_BUNDLE_DATA_DIR "${CMAKE_INSTALL_PREFIX}/data")
set(INSTALL_BUNDLE_LIB_DIR "${CMAKE_INSTALL_PREFIX}")

install(TARGETS ${BINARY_NAME} RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}"
  COMPONENT Runtime)

install(FILES "${FLUTTER_ICU_DATA_FILE}" DESTINATION "${INSTALL_BUNDLE_DATA_DIR}"
  COMPONENT Runtime)

install(FILES "${FLUTTER_LIBRARY}" DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
  COMPONENT Runtime)

if(PLUGIN_BUNDLED_LIBRARIES)
  install(FILES "${PLUGIN_BUNDLED_LIBRARIES}"
    DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
    COMPONENT Runtime)
endif()

# Copy the native assets provided by the build.dart from all packages.
set(NATIVE_ASSETS_DIR "${PROJECT_BUILD_DIR}native_assets/windows/")
install(DIRECTORY "${NATIVE_ASSETS_DIR}"
   DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
   COMPONENT Runtime)

# Fully re-copy the assets directory on each build to avoid having stale files
# from a previous install.
set(FLUTTER_ASSET_DIR_NAME "flutter_assets")
install(CODE "
  file(REMOVE_RECURSE \"${INSTALL_BUNDLE_DATA_DIR}/${FLUTTER_ASSET_DIR_NAME}\")
  " COMPONENT Runtime)
install(DIRECTORY "${PROJECT_BUILD_DIR}/${FLUTTER_ASSET_DIR_NAME}"
  DESTINATION "${INSTALL_BUNDLE_DATA_DIR}" COMPONENT Runtime)

# Install the AOT library on non-Debug builds only.
install(FILES "${AOT_LIBRARY}" DESTINATION "${INSTALL_BUNDLE_DATA_DIR}"
  CONFIGURATIONS Profile;Release
  COMPONENT Runtime)


================================================
FILE: flutter-examples/streaming_asr/windows/flutter/CMakeLists.txt
================================================
# This file controls Flutter-level build steps. It should not be edited.
cmake_minimum_required(VERSION 3.14)

set(EPHEMERAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ephemeral")

# Configuration provided via flutter tool.
include(${EPHEMERAL_DIR}/generated_config.cmake)

# TODO: Move the rest of this into files in ephemeral. See
# https://github.com/flutter/flutter/issues/57146.
set(WRAPPER_ROOT "${EPHEMERAL_DIR}/cpp_client_wrapper")

# Set fallback configurations for older versions of the flutter tool.
if (NOT DEFINED FLUTTER_TARGET_PLATFORM)
  set(FLUTTER_TARGET_PLATFORM "windows-x64")
endif()

# === Flutter Library ===
set(FLUTTER_LIBRARY "${EPHEMERAL_DIR}/flutter_windows.dll")

# Published to parent scope for install step.
set(FLUTTER_LIBRARY ${FLUTTER_LIBRARY} PARENT_SCOPE)
set(FLUTTER_ICU_DATA_FILE "${EPHEMERAL_DIR}/icudtl.dat" PARENT_SCOPE)
set(PROJECT_BUILD_DIR "${PROJECT_DIR}/build/" PARENT_SCOPE)
set(AOT_LIBRARY "${PROJECT_DIR}/build/windows/app.so" PARENT_SCOPE)

list(APPEND FLUTTER_LIBRARY_HEADERS
  "flutter_export.h"
  "flutter_windows.h"
  "flutter_messenger.h"
  "flutter_plugin_registrar.h"
  "flutter_texture_registrar.h"
)
list(TRANSFORM FLUTTER_LIBRARY_HEADERS PREPEND "${EPHEMERAL_DIR}/")
add_library(flutter INTERFACE)
target_include_directories(flutter INTERFACE
  "${EPHEMERAL_DIR}"
)
target_link_libraries(flutter INTERFACE "${FLUTTER_LIBRARY}.lib")
add_dependencies(flutter flutter_assemble)

# === Wrapper ===
list(APPEND CPP_WRAPPER_SOURCES_CORE
  "core_implementations.cc"
  "standard_codec.cc"
)
list(TRANSFORM CPP_WRAPPER_SOURCES_CORE PREPEND "${WRAPPER_ROOT}/")
list(APPEND CPP_WRAPPER_SOURCES_PLUGIN
  "plugin_registrar.cc"
)
list(TRANSFORM CPP_WRAPPER_SOURCES_PLUGIN PREPEND "${WRAPPER_ROOT}/")
list(APPEND CPP_WRAPPER_SOURCES_APP
  "flutter_engine.cc"
  "flutter_view_controller.cc"
)
list(TRANSFORM CPP_WRAPPER_SOURCES_APP PREPEND "${WRAPPER_ROOT}/")

# Wrapper sources needed for a plugin.
add_library(flutter_wrapper_plugin STATIC
  ${CPP_WRAPPER_SOURCES_CORE}
  ${CPP_WRAPPER_SOURCES_PLUGIN}
)
apply_standard_settings(flutter_wrapper_plugin)
set_target_properties(flutter_wrapper_plugin PROPERTIES
  POSITION_INDEPENDENT_CODE ON)
set_target_properties(flutter_wrapper_plugin PROPERTIES
  CXX_VISIBILITY_PRESET hidden)
target_link_libraries(flutter_wrapper_plugin PUBLIC flutter)
target_include_directories(flutter_wrapper_plugin PUBLIC
  "${WRAPPER_ROOT}/include"
)
add_dependencies(flutter_wrapper_plugin flutter_assemble)

# Wrapper sources needed for the runner.
add_library(flutter_wrapper_app STATIC
  ${CPP_WRAPPER_SOURCES_CORE}
  ${CPP_WRAPPER_SOURCES_APP}
)
apply_standard_settings(flutter_wrapper_app)
target_link_libraries(flutter_wrapper_app PUBLIC flutter)
target_include_directories(flutter_wrapper_app PUBLIC
  "${WRAPPER_ROOT}/include"
)
add_dependencies(flutter_wrapper_app flutter_assemble)

# === Flutter tool backend ===
# _phony_ is a non-existent file to force this command to run every time,
# since currently there's no way to get a full input/output list from the
# flutter tool.
set(PHONY_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/_phony_")
set_source_files_properties("${PHONY_OUTPUT}" PROPERTIES SYMBOLIC TRUE)
add_custom_command(
  OUTPUT ${FLUTTER_LIBRARY} ${FLUTTER_LIBRARY_HEADERS}
    ${CPP_WRAPPER_SOURCES_CORE} ${CPP_WRAPPER_SOURCES_PLUGIN}
    ${CPP_WRAPPER_SOURCES_APP}
    ${PHONY_OUTPUT}
  COMMAND ${CMAKE_COMMAND} -E env
    ${FLUTTER_TOOL_ENVIRONMENT}
    "${FLUTTER_ROOT}/packages/flutter_tools/bin/tool_backend.bat"
      ${FLUTTER_TARGET_PLATFORM} $<CONFIG>
  VERBATIM
)
add_custom_target(flutter_assemble DEPENDS
  "${FLUTTER_LIBRARY}"
  ${FLUTTER_LIBRARY_HEADERS}
  ${CPP_WRAPPER_SOURCES_CORE}
  ${CPP_WRAPPER_SOURCES_PLUGIN}
  ${CPP_WRAPPER_SOURCES_APP}
)


================================================
FILE: flutter-examples/streaming_asr/windows/runner/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14)
project(runner LANGUAGES CXX)

# Define the application target. To change its name, change BINARY_NAME in the
# top-level CMakeLists.txt, not the value here, or `flutter run` will no longer
# work.
#
# Any new source files that you add to the application should be added here.
add_executable(${BINARY_NAME} WIN32
  "flutter_window.cpp"
  "main.cpp"
  "utils.cpp"
  "win32_window.cpp"
  "${FLUTTER_MANAGED_DIR}/generated_plugin_registrant.cc"
  "Runner.rc"
  "runner.exe.manifest"
)

# Apply the standard set of build settings. This can be removed for applications
# that need different build settings.
apply_standard_settings(${BINARY_NAME})

# Add preprocessor definitions for the build version.
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION=\"${FLUTTER_VERSION}\"")
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION_MAJOR=${FLUTTER_VERSION_MAJOR}")
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION_MINOR=${FLUTTER_VERSION_MINOR}")
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION_PATCH=${FLUTTER_VERSION_PATCH}")
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION_BUILD=${FLUTTER_VERSION_BUILD}")

# Disable Windows macros that collide with C++ standard library functions.
target_compile_definitions(${BINARY_NAME} PRIVATE "NOMINMAX")

# Add dependency libraries and include directories. Add any application-specific
# dependencies here.
target_link_libraries(${BINARY_NAME} PRIVATE flutter flutter_wrapper_app)
target_link_libraries(${BINARY_NAME} PRIVATE "dwmapi.lib")
target_include_directories(${BINARY_NAME} PRIVATE "${CMAKE_SOURCE_DIR}")

# Run the Flutter tool portions of the build. This must not be removed.
add_dependencies(${BINARY_NAME} flutter_assemble)


================================================
FILE: flutter-examples/streaming_asr/windows/runner/Runner.rc
================================================
// Microsoft Visual C++ generated resource script.
//
#pragma code_page(65001)
#include "resource.h"

#define APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 2 resource.
//
#include "winres.h"

/////////////////////////////////////////////////////////////////////////////
#undef APSTUDIO_READONLY_SYMBOLS

/////////////////////////////////////////////////////////////////////////////
// English (United States) resources

#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US

#ifdef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// TEXTINCLUDE
//

1 TEXTINCLUDE
BEGIN
    "resource.h\0"
END

2 TEXTINCLUDE
BEGIN
    "#include ""winres.h""\r\n"
    "\0"
END

3 TEXTINCLUDE
BEGIN
    "\r\n"
    "\0"
END

#endif    // APSTUDIO_INVOKED


/////////////////////////////////////////////////////////////////////////////
//
// Icon
//

// Icon with lowest ID value placed first to ensure application icon
// remains consistent on all systems.
IDI_APP_ICON            ICON                    "resources\\app_icon.ico"


/////////////////////////////////////////////////////////////////////////////
//
// Version
//

#if defined(FLUTTER_VERSION_MAJOR) && defined(FLUTTER_VERSION_MINOR) && defined(FLUTTER_VERSION_PATCH) && defined(FLUTTER_VERSION_BUILD)
#define VERSION_AS_NUMBER FLUTTER_VERSION_MAJOR,FLUTTER_VERSION_MINOR,FLUTTER_VERSION_PATCH,FLUTTER_VERSION_BUILD
#else
#define VERSION_AS_NUMBER 1,0,0,0
#endif

#if defined(FLUTTER_VERSION)
#define VERSION_AS_STRING FLUTTER_VERSION
#else
#define VERSION_AS_STRING "1.0.0"
#endif

VS_VERSION_INFO VERSIONINFO
 FILEVERSION VERSION_AS_NUMBER
 PRODUCTVERSION VERSION_AS_NUMBER
 FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
#ifdef _DEBUG
 FILEFLAGS VS_FF_DEBUG
#else
 FILEFLAGS 0x0L
#endif
 FILEOS VOS__WINDOWS32
 FILETYPE VFT_APP
 FILESUBTYPE 0x0L
BEGIN
    BLOCK "StringFileInfo"
    BEGIN
        BLOCK "040904e4"
        BEGIN
            VALUE "CompanyName", "com.example" "\0"
            VALUE "FileDescription", "streaming_asr" "\0"
            VALUE "FileVersion", VERSION_AS_STRING "\0"
            VALUE "InternalName", "streaming_asr" "\0"
            VALUE "LegalCopyright", "Copyright (C) 2024 com.example. All rights reserved." "\0"
            VALUE "OriginalFilename", "streaming_asr.exe" "\0"
            VALUE "ProductName", "streaming_asr" "\0"
            VALUE "ProductVersion", VERSION_AS_STRING "\0"
        END
    END
    BLOCK "VarFileInfo"
    BEGIN
        VALUE "Translation", 0x409, 1252
    END
END

#endif    // English (United States) resources
/////////////////////////////////////////////////////////////////////////////


#ifndef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 3 resource.
//


/////////////////////////////////////////////////////////////////////////////
#endif    // not APSTUDIO_INVOKED


================================================
FILE: flutter-examples/streaming_asr/windows/runner/flutter_window.cpp
================================================
#include "flutter_window.h"

#include <optional>

#include "flutter/generated_plugin_registrant.h"

FlutterWindow::FlutterWindow(const flutter::DartProject& project)
    : project_(project) {}

FlutterWindow::~FlutterWindow() {}

bool FlutterWindow::OnCreate() {
  if (!Win32Window::OnCreate()) {
    return false;
  }

  RECT frame = GetClientArea();

  // The size here must match the window dimensions to avoid unnecessary surface
  // creation / destruction in the startup path.
  flutter_controller_ = std::make_unique<flutter::FlutterViewController>(
      frame.right - frame.left, frame.bottom - frame.top, project_);
  // Ensure that basic setup of the controller was successful.
  if (!flutter_controller_->engine() || !flutter_controller_->view()) {
    return false;
  }
  RegisterPlugins(flutter_controller_->engine());
  SetChildContent(flutter_controller_->view()->GetNativeWindow());

  flutter_controller_->engine()->SetNextFrameCallback([&]() {
    this->Show();
  });

  // Flutter can complete the first frame before the "show window" callback is
  // registered. The following call ensures a frame is pending to ensure the
  // window is shown. It is a no-op if the first frame hasn't completed yet.
  flutter_controller_->ForceRedraw();

  return true;
}

void FlutterWindow::OnDestroy() {
  if (flutter_controller_) {
    flutter_controller_ = nullptr;
  }

  Win32Window::OnDestroy();
}

LRESULT
FlutterWindow::MessageHandler(HWND hwnd, UINT const message,
                              WPARAM const wparam,
                              LPARAM const lparam) noexcept {
  // Give Flutter, including plugins, an opportunity to handle window messages.
  if (flutter_controller_) {
    std::optional<LRESULT> result =
        flutter_controller_->HandleTopLevelWindowProc(hwnd, message, wparam,
                                                      lparam);
    if (result) {
      return *result;
    }
  }

  switch (message) {
    case WM_FONTCHANGE:
      flutter_controller_->engine()->ReloadSystemFonts();
      break;
  }

  return Win32Window::MessageHandler(hwnd, message, wparam, lparam);
}


================================================
FILE: flutter-examples/streaming_asr/windows/runner/flutter_window.h
================================================
#ifndef RUNNER_FLUTTER_WINDOW_H_
#define RUNNER_FLUTTER_WINDOW_H_

#include <flutter/dart_project.h>
#include <flutter/flutter_view_controller.h>

#include <memory>

#include "win32_window.h"

// A window that does nothing but host a Flutter view.
class FlutterWindow : public Win32Window {
 public:
  // Creates a new FlutterWindow hosting a Flutter view running |project|.
  explicit FlutterWindow(const flutter::DartProject& project);
  virtual ~FlutterWindow();

 protected:
  // Win32Window:
  bool OnCreate() override;
  void OnDestroy() override;
  LRESULT MessageHandler(HWND window, UINT const message, WPARAM const wparam,
                         LPARAM const lparam) noexcept override;

 private:
  // The project to run.
  flutter::DartProject project_;

  // The Flutter instance hosted by this window.
  std::unique_ptr<flutter::FlutterViewController> flutter_controller_;
};

#endif  // RUNNER_FLUTTER_WINDOW_H_


================================================
FILE: flutter-examples/streaming_asr/windows/runner/main.cpp
================================================
#include <flutter/dart_project.h>
#include <flutter/flutter_view_controller.h>
#include <windows.h>

#include "flutter_window.h"
#include "utils.h"

int APIENTRY wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev,
                      _In_ wchar_t *command_line, _In_ int show_command) {
  // Attach to console when present (e.g., 'flutter run') or create a
  // new console when running with a debugger.
  if (!::AttachConsole(ATTACH_PARENT_PROCESS) && ::IsDebuggerPresent()) {
    CreateAndAttachConsole();
  }

  // Initialize COM, so that it is available for use in the library and/or
  // plugins.
  ::CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED);

  flutter::DartProject project(L"data");

  std::vector<std::string> command_line_arguments =
      GetCommandLineArguments();

  project.set_dart_entrypoint_arguments(std::move(command_line_arguments));

  FlutterWindow window(project);
  Win32Window::Point origin(10, 10);
  Win32Window::Size size(1280, 720);
  if (!window.Create(L"streaming_asr", origin, size)) {
    return EXIT_FAILURE;
  }
  window.SetQuitOnClose(true);

  ::MSG msg;
  while (::GetMessage(&msg, nullptr, 0, 0)) {
    ::TranslateMessage(&msg);
    ::DispatchMessage(&msg);
  }

  ::CoUninitialize();
  return EXIT_SUCCESS;
}


================================================
FILE: flutter-examples/streaming_asr/windows/runner/resource.h
================================================
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by Runner.rc
//
#define IDI_APP_ICON                    101

// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE        102
#define _APS_NEXT_COMMAND_VALUE         40001
#define _APS_NEXT_CONTROL_VALUE         1001
#define _APS_NEXT_SYMED_VALUE           101
#endif
#endif


================================================
FILE: flutter-examples/streaming_asr/windows/runner/runner.exe.manifest
================================================
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
  <application xmlns="urn:schemas-microsoft-com:asm.v3">
    <windowsSettings>
      <dpiAwareness xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">PerMonitorV2</dpiAwareness>
    </windowsSettings>
  </application>
  <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
    <application>
      <!-- Windows 10 and Windows 11 -->
      <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
      <!-- Windows 8.1 -->
      <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
      <!-- Windows 8 -->
      <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
      <!-- Windows 7 -->
      <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
    </application>
  </compatibility>
</assembly>


================================================
FILE: flutter-examples/streaming_asr/windows/runner/utils.cpp
================================================
#include "utils.h"

#include <flutter_windows.h>
#include <io.h>
#include <stdio.h>
#include <windows.h>

#include <iostream>

void CreateAndAttachConsole() {
  if (::AllocConsole()) {
    FILE *unused;
    if (freopen_s(&unused, "CONOUT$", "w", stdout)) {
      _dup2(_fileno(stdout), 1);
    }
    if (freopen_s(&unused, "CONOUT$", "w", stderr)) {
      _dup2(_fileno(stdout), 2);
    }
    std::ios::sync_with_stdio();
    FlutterDesktopResyncOutputStreams();
  }
}

std::vector<std::string> GetCommandLineArguments() {
  // Convert the UTF-16 command line arguments to UTF-8 for the Engine to use.
  int argc;
  wchar_t** argv = ::CommandLineToArgvW(::GetCommandLineW(), &argc);
  if (argv == nullptr) {
    return std::vector<std::string>();
  }

  std::vector<std::string> command_line_arguments;

  // Skip the first argument as it's the binary name.
  for (int i = 1; i < argc; i++) {
    command_line_arguments.push_back(Utf8FromUtf16(argv[i]));
  }

  ::LocalFree(argv);

  return command_line_arguments;
}

std::string Utf8FromUtf16(const wchar_t* utf16_string) {
  if (utf16_string == nullptr) {
    return std::string();
  }
  unsigned int target_length = ::WideCharToMultiByte(
      CP_UTF8, WC_ERR_INVALID_CHARS, utf16_string,
      -1, nullptr, 0, nullptr, nullptr)
    -1; // remove the trailing null character
  int input_length = (int)wcslen(utf16_string);
  std::string utf8_string;
  if (target_length == 0 || target_length > utf8_string.max_size()) {
    return utf8_string;
  }
  utf8_string.resize(target_length);
  int converted_length = ::WideCharToMultiByte(
      CP_UTF8, WC_ERR_INVALID_CHARS, utf16_string,
      input_length, utf8_string.data(), target_length, nullptr, nullptr);
  if (converted_length == 0) {
    return std::string();
  }
  return utf8_string;
}


================================================
FILE: flutter-examples/streaming_asr/windows/runner/utils.h
================================================
#ifndef RUNNER_UTILS_H_
#define RUNNER_UTILS_H_

#include <string>
#include <vector>

// Creates a console for the process, and redirects stdout and stderr to
// it for both the runner and the Flutter library.
void CreateAndAttachConsole();

// Takes a null-terminated wchar_t* encoded in UTF-16 and returns a std::string
// encoded in UTF-8. Returns an empty std::string on failure.
std::string Utf8FromUtf16(const wchar_t* utf16_string);

// Gets the command line arguments passed in as a std::vector<std::string>,
// encoded in UTF-8. Returns an empty std::vector<std::string> on failure.
std::vector<std::string> GetCommandLineArguments();

#endif  // RUNNER_UTILS_H_


================================================
FILE: flutter-examples/streaming_asr/windows/runner/win32_window.cpp
================================================
#include "win32_window.h"

#include <dwmapi.h>
#include <flutter_windows.h>

#include "resource.h"

namespace {

/// Window attribute that enables dark mode window decorations.
///
/// Redefined in case the developer's machine has a Windows SDK older than
/// version 10.0.22000.0.
/// See: https://docs.microsoft.com/windows/win32/api/dwmapi/ne-dwmapi-dwmwindowattribute
#ifndef DWMWA_USE_IMMERSIVE_DARK_MODE
#define DWMWA_USE_IMMERSIVE_DARK_MODE 20
#endif

constexpr const wchar_t kWindowClassName[] = L"FLUTTER_RUNNER_WIN32_WINDOW";

/// Registry key for app theme preference.
///
/// A value of 0 indicates apps should use dark mode. A non-zero or missing
/// value indicates apps should use light mode.
constexpr const wchar_t kGetPreferredBrightnessRegKey[] =
  L"Software\\Microsoft\\Windows\\CurrentVersion\\Themes\\Personalize";
constexpr const wchar_t kGetPreferredBrightnessRegValue[] = L"AppsUseLightTheme";

// The number of Win32Window objects that currently exist.
static int g_active_window_count = 0;

using EnableNonClientDpiScaling = BOOL __stdcall(HWND hwnd);

// Scale helper to convert logical scaler values to physical using passed in
// scale factor
int Scale(int source, double scale_factor) {
  return static_cast<int>(source * scale_factor);
}

// Dynamically loads the |EnableNonClientDpiScaling| from the User32 module.
// This API is only needed for PerMonitor V1 awareness mode.
void EnableFullDpiSupportIfAvailable(HWND hwnd) {
  HMODULE user32_module = LoadLibraryA("User32.dll");
  if (!user32_module) {
    return;
  }
  auto enable_non_client_dpi_scaling =
      reinterpret_cast<EnableNonClientDpiScaling*>(
          GetProcAddress(user32_module, "EnableNonClientDpiScaling"));
  if (enable_non_client_dpi_scaling != nullptr) {
    enable_non_client_dpi_scaling(hwnd);
  }
  FreeLibrary(user32_module);
}

}  // namespace

// Manages the Win32Window's window class registration.
class WindowClassRegistrar {
 public:
  ~WindowClassRegistrar() = default;

  // Returns the singleton registrar instance.
  static WindowClassRegistrar* GetInstance() {
    if (!instance_) {
      instance_ = new WindowClassRegistrar();
    }
    return instance_;
  }

  // Returns the name of the window class, registering the class if it hasn't
  // previously been registered.
  const wchar_t* GetWindowClass();

  // Unregisters the window class. Should only be called if there are no
  // instances of the window.
  void UnregisterWindowClass();

 private:
  WindowClassRegistrar() = default;

  static WindowClassRegistrar* instance_;

  bool class_registered_ = false;
};

WindowClassRegistrar* WindowClassRegistrar::instance_ = nullptr;

const wchar_t* WindowClassRegistrar::GetWindowClass() {
  if (!class_registered_) {
    WNDCLASS window_class{};
    window_class.hCursor = LoadCursor(nullptr, IDC_ARROW);
    window_class.lpszClassName = kWindowClassName;
    window_class.style = CS_HREDRAW | CS_VREDRAW;
    window_class.cbClsExtra = 0;
    window_class.cbWndExtra = 0;
    window_class.hInstance = GetModuleHandle(nullptr);
    window_class.hIcon =
        LoadIcon(window_class.hInstance, MAKEINTRESOURCE(IDI_APP_ICON));
    window_class.hbrBackground = 0;
    window_class.lpszMenuName = nullptr;
    window_class.lpfnWndProc = Win32Window::WndProc;
    RegisterClass(&window_class);
    class_registered_ = true;
  }
  return kWindowClassName;
}

void WindowClassRegistrar::UnregisterWindowClass() {
  UnregisterClass(kWindowClassName, nullptr);
  class_registered_ = false;
}

Win32Window::Win32Window() {
  ++g_active_window_count;
}

Win32Window::~Win32Window() {
  --g_active_window_count;
  Destroy();
}

bool Win32Window::Create(const std::wstring& title,
                         const Point& origin,
                         const Size& size) {
  Destroy();

  const wchar_t* window_class =
      WindowClassRegistrar::GetInstance()->GetWindowClass();

  const POINT target_point = {static_cast<LONG>(origin.x),
                              static_cast<LONG>(origin.y)};
  HMONITOR monitor = MonitorFromPoint(target_point, MONITOR_DEFAULTTONEAREST);
  UINT dpi = FlutterDesktopGetDpiForMonitor(monitor);
  double scale_factor = dpi / 96.0;

  HWND window = CreateWindow(
      window_class, title.c_str(), WS_OVERLAPPEDWINDOW,
      Scale(origin.x, scale_factor), Scale(origin.y, scale_factor),
      Scale(size.width, scale_factor), Scale(size.height, scale_factor),
      nullptr, nullptr, GetModuleHandle(nullptr), this);

  if (!window) {
    return false;
  }

  UpdateTheme(window);

  return OnCreate();
}

bool Win32Window::Show() {
  return ShowWindow(window_handle_, SW_SHOWNORMAL);
}

// static
LRESULT CALLBACK Win32Window::WndProc(HWND const window,
                                      UINT const message,
                                      WPARAM const wparam,
                                      LPARAM const lparam) noexcept {
  if (message == WM_NCCREATE) {
    auto window_struct = reinterpret_cast<CREATESTRUCT*>(lparam);
    SetWindowLongPtr(window, GWLP_USERDATA,
                     reinterpret_cast<LONG_PTR>(window_struct->lpCreateParams));

    auto that = static_cast<Win32Window*>(window_struct->lpCreateParams);
    EnableFullDpiSupportIfAvailable(window);
    that->window_handle_ = window;
  } else if (Win32Window* that = GetThisFromHandle(window)) {
    return that->MessageHandler(window, message, wparam, lparam);
  }

  return DefWindowProc(window, message, wparam, lparam);
}

LRESULT
Win32Window::MessageHandler(HWND hwnd,
                            UINT const message,
                            WPARAM const wparam,
                            LPARAM const lparam) noexcept {
  switch (message) {
    case WM_DESTROY:
      window_handle_ = nullptr;
      Destroy();
      if (quit_on_close_) {
        PostQuitMessage(0);
      }
      return 0;

    case WM_DPICHANGED: {
      auto newRectSize = reinterpret_cast<RECT*>(lparam);
      LONG newWidth = newRectSize->right - newRectSize->left;
      LONG newHeight = newRectSize->bottom - newRectSize->top;

      SetWindowPos(hwnd, nullptr, newRectSize->left, newRectSize->top, newWidth,
                   newHeight, SWP_NOZORDER | SWP_NOACTIVATE);

      return 0;
    }
    case WM_SIZE: {
      RECT rect = GetClientArea();
      if (child_content_ != nullptr) {
        // Size and position the child window.
        MoveWindow(child_content_, rect.left, rect.top, rect.right - rect.left,
                   rect.bottom - rect.top, TRUE);
      }
      return 0;
    }

    case WM_ACTIVATE:
      if (child_content_ != nullptr) {
        SetFocus(child_content_);
      }
      return 0;

    case WM_DWMCOLORIZATIONCOLORCHANGED:
      UpdateTheme(hwnd);
      return 0;
  }

  return DefWindowProc(window_handle_, message, wparam, lparam);
}

void Win32Window::Destroy() {
  OnDestroy();

  if (window_handle_) {
    DestroyWindow(window_handle_);
    window_handle_ = nullptr;
  }
  if (g_active_window_count == 0) {
    WindowClassRegistrar::GetInstance()->UnregisterWindowClass();
  }
}

Win32Window* Win32Window::GetThisFromHandle(HWND const window) noexcept {
  return reinterpret_cast<Win32Window*>(
      GetWindowLongPtr(window, GWLP_USERDATA));
}

void Win32Window::SetChildContent(HWND content) {
  child_content_ = content;
  SetParent(content, window_handle_);
  RECT frame = GetClientArea();

  MoveWindow(content, frame.left, frame.top, frame.right - frame.left,
             frame.bottom - frame.top, true);

  SetFocus(child_content_);
}

RECT Win32Window::GetClientArea() {
  RECT frame;
  GetClientRect(window_handle_, &frame);
  return frame;
}

HWND Win32Window::GetHandle() {
  return window_handle_;
}

void Win32Window::SetQuitOnClose(bool quit_on_close) {
  quit_on_close_ = quit_on_close;
}

bool Win32Window::OnCreate() {
  // No-op; provided for subclasses.
  return true;
}

void Win32Window::OnDestroy() {
  // No-op; provided for subclasses.
}

void Win32Window::UpdateTheme(HWND const window) {
  DWORD light_mode;
  DWORD light_mode_size = sizeof(light_mode);
  LSTATUS result = RegGetValue(HKEY_CURRENT_USER, kGetPreferredBrightnessRegKey,
                               kGetPreferredBrightnessRegValue,
                               RRF_RT_REG_DWORD, nullptr, &light_mode,
                               &light_mode_size);

  if (result == ERROR_SUCCESS) {
    BOOL enable_dark_mode = light_mode == 0;
    DwmSetWindowAttribute(window, DWMWA_USE_IMMERSIVE_DARK_MODE,
                          &enable_dark_mode, sizeof(enable_dark_mode));
  }
}


================================================
FILE: flutter-examples/streaming_asr/windows/runner/win32_window.h
================================================
#ifndef RUNNER_WIN32_WINDOW_H_
#define RUNNER_WIN32_WINDOW_H_

#include <windows.h>

#include <functional>
#include <memory>
#include <string>

// A class abstraction for a high DPI-aware Win32 Window. Intended to be
// inherited from by classes that wish to specialize with custom
// rendering and input handling
class Win32Window {
 public:
  struct Point {
    unsigned int x;
    unsigned int y;
    Point(unsigned int x, unsigned int y) : x(x), y(y) {}
  };

  struct Size {
    unsigned int width;
    unsigned int height;
    Size(unsigned int width, unsigned int height)
        : width(width), height(height) {}
  };

  Win32Window();
  virtual ~Win32Window();

  // Creates a win32 window with |title| that is positioned and sized using
  // |origin| and |size|. New windows are created on the default monitor. Window
  // sizes are specified to the OS in physical pixels, hence to ensure a
  // consistent size this function will scale the inputted width and height as
  // as appropriate for the default monitor. The window is invisible until
  // |Show| is called. Returns true if the window was created successfully.
  bool Create(const std::wstring& title, const Point& origin, const Size& size);

  // Show the current window. Returns true if the window was successfully shown.
  bool Show();

  // Release OS resources associated with window.
  void Destroy();

  // Inserts |content| into the window tree.
  void SetChildContent(HWND content);

  // Returns the backing Window handle to enable clients to set icon and other
  // window properties. Returns nullptr if the window has been destroyed.
  HWND GetHandle();

  // If true, closing this window will quit the application.
  void SetQuitOnClose(bool quit_on_close);

  // Return a RECT representing the bounds of the current client area.
  RECT GetClientArea();

 protected:
  // Processes and route salient window messages for mouse handling,
  // size change and DPI. Delegates handling of these to member overloads that
  // inheriting classes can handle.
  virtual LRESULT MessageHandler(HWND window,
                                 UINT const message,
                                 WPARAM const wparam,
                                 LPARAM const lparam) noexcept;

  // Called when CreateAndShow is called, allowing subclass window-related
  // setup. Subclasses should return false if setup fails.
  virtual bool OnCreate();

  // Called when Destroy is called.
  virtual void OnDestroy();

 private:
  friend class WindowClassRegistrar;

  // OS callback called by message pump. Handles the WM_NCCREATE message which
  // is passed when the non-client area is being created and enables automatic
  // non-client DPI scaling so that the non-client area automatically
  // responds to changes in DPI. All other messages are handled by
  // MessageHandler.
  static LRESULT CALLBACK WndProc(HWND const window,
                                  UINT const message,
                                  WPARAM const wparam,
                                  LPARAM const lparam) noexcept;

  // Retrieves a class instance pointer for |window|
  static Win32Window* GetThisFromHandle(HWND const window) noexcept;

  // Update the window frame's theme to match the system theme.
  static void UpdateTheme(HWND const window);

  bool quit_on_close_ = false;

  // window handle for top level window.
  HWND window_handle_ = nullptr;

  // window handle for hosted content.
  HWND child_content_ = nullptr;
};

#endif  // RUNNER_WIN32_WINDOW_H_


================================================
FILE: flutter-examples/tts/.gitignore
================================================
# Miscellaneous
*.class
*.log
*.pyc
*.swp
.DS_Store
.atom/
.buildlog/
.history
.svn/
migrate_working_dir/

# IntelliJ related
*.iml
*.ipr
*.iws
.idea/

# The .vscode folder contains launch configuration and tasks you configure in
# VS Code which you may wish to be included in version control, so this line
# is commented out by default.
#.vscode/

# Flutter/Dart/Pub related
**/doc/api/
**/ios/Flutter/.last_build_id
.dart_tool/
.flutter-plugins
.flutter-plugins-dependencies
.pub-cache/
.pub/
/build/

# Symbolication related
app.*.symbols

# Obfuscation related
app.*.map.json

# Android Studio will place build artifacts here
/android/app/debug
/android/app/profile
/android/app/release


================================================
FILE: flutter-examples/tts/.metadata
================================================
# This file tracks properties of this Flutter project.
# Used by Flutter tool to assess capabilities and perform upgrades etc.
#
# This file should be version controlled and should not be manually edited.

version:
  revision: "5dcb86f68f239346676ceb1ed1ea385bd215fba1"
  channel: "stable"

project_type: app

# Tracks metadata for the flutter migrate command
migration:
  platforms:
    - platform: root
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: android
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: ios
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: linux
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: macos
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
    - platform: windows
      create_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1
      base_revision: 5dcb86f68f239346676ceb1ed1ea385bd215fba1

  # User provided section

  # List of Local paths (relative to this file) that should be
  # ignored by the migrate tool.
  #
  # Files that are not part of the templates will be ignored by default.
  unmanaged_files:
    - 'lib/main.dart'
    - 'ios/Runner.xcodeproj/project.pbxproj'


================================================
FILE: flutter-examples/tts/README.md
================================================
# tts

This example demonstrates how to use text to speech (TTS) in Flutter with sherpa-onnx.

It works on the following platforms:

  - Android
  - iOS
  - Linux
  - macOS (both arm64 and x86_64 are supported)
  - Windows

Pre-built APPs for this folder can be found at <https://k2-fsa.github.io/sherpa/onnx/flutter/pre-built-app.html#text-to-speech-tts-speech-synthesis>

Screenshots are given below:

|Android|iOS|Linux|macOS|Windows|
|-------|---|-----|-----|-------|
|![](./android.jpg)|![](./ios.jpg)|![](./ubuntu.jpg)|![](./macos.jpg)|![](./windows.jpg)|

## How to build

Before you run `flutter build`, you have to select a TTS model and change
the code to use your selected model.

### 1. Select a TTS model

We have a list of TTS models at

<https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>

You can select any of them. If you feel that there are so many that you don't know
which one is the best, please visit <http://huggingface.co/spaces/k2-fsa/text-to-speech>
and try each one by yourself and select the one you consider the best.

Suppose you select

  <https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2>

Then please do the following:

  - 1. Download and unzip the model

```bash
cd flutter-examples/tts/assets
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
rm vits-piper-en_US-libritts_r-medium.tar.bz2
cd ..

./generate-asset-list.py
```

  Note that you have to run [./generate-asset-list.py](./generate-asset-list.py) so that Flutter knows where
  to find the model.

  - 2. Change the code to use the downloaded model.

    We have given several examples for different models in [./lib/model.dart](./lib/model.dart).
    For our selected model, we need to change [./lib/model.dart](./lib/model.dart) so that it looks like below:

```
// Example 6
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
modelDir = 'vits-piper-en_US-libritts_r-medium';
modelName = 'en_US-libritts_r-medium.onnx';
dataDir = 'vits-piper-en_US-libritts_r-medium/espeak-ng-data';
```

  - 3. That's it.

### Build the APP

  - 1. For Linux

```bash
flutter build linux

# See below if you get any errors
```

  - 2. For macOS

To build a universal2 APP, use

```bash
flutter build macos
```

To build for `x86_64`, use

```bash
export FLUTTER_XCODE_ARCHS=x86_64
flutter build macos
```

To build for `arm64`, use

```bash
export FLUTTER_XCODE_ARCHS=arm64
flutter build macos
```

  - 3. For Windows

```bash
flutter build windows
```

  - 4. For Android

```bash
flutter build apk --split-per-abi
```

  - 5. For iOS

First, connect your iPhone to your computer and use `flutter devices` to show
available devices. You will see something like below:

```
Found 3 connected devices:
  iPhone (mobile) • 00008030-001064212E85802E • ios            • iOS 16.3 20D47
  macOS (desktop) • macos                     • darwin-x64     • macOS 13.1 22C65 darwin-x64
  Chrome (web)    • chrome                    • web-javascript • Google Chrome 126.0.6478.127

No wireless devices were found.

Run "flutter emulators" to list and start any available device emulators.

If you expected another device to be detected, please run "flutter doctor" to diagnose potential issues. You may also try increasing the time to wait for connected devices with the "--device-timeout" flag. Visit https://flutter.dev/setup/ for troubleshooting tips.
```

Then you can use
```
flutter run -d 00008030-001064212E85802E --release
```

You would see something like below:
```
Launching lib/main.dart on iPhone in release mode...
Automatically signing iOS for device deployment using specified development team in Xcode project: N5ZH3Z63A6
Running pod install...                                           1,773ms
Running Xcode build...
Xcode build done.                                            7.9s
Failed to build iOS app
Could not build the precompiled application for the device.
Error (Xcode): No profiles for 'com.k2fsa.sherpa.onnx.tts' were found: Xcode couldn't find any iOS App Development provisioning profiles matching
'com.k2fsa.sherpa.onnx.tts'. Automatic signing is disabled and unable to generate a profile. To enable automatic signing, pass
-allowProvisioningUpdates to xcodebuild.
/Users/fangjun/open-source/sherpa-onnx/flutter-examples/tts/ios/Runner.xcodeproj


It appears that there was a problem signing your application prior to installation on the device.

Verify that the Bundle Identifier in your project is your signing id in Xcode
  open ios/Runner.xcworkspace

Also try selecting 'Product > Build' to fix the problem.

Error running application on iPhone.
```

After you have followed the instructions in the above log, run gain

> Note: I have run `open ios/Runner.xcworkspace` and click `Product -> Build`.

```
flutter run -d 00008030-001064212E85802E --release
```

Finally, it will show something like below:

```
Launching lib/main.dart on iPhone in release mode...
Automatically signing iOS for device deployment using specified development team in Xcode project: N5ZH3Z63A6
Running Xcode build...
 └─Compiling, linking and signing...                         6.5s
Xcode build done.                                           18.3s
Installing and launching...                                        22.9s

Flutter run key commands.
h List all available interactive commands.
c Clear the screen
q Quit (terminate the application on the device).
```

## Fix for Linux

If you get the following errors on Linux,

```
Building Linux application...
CMake Error at /usr/local/share/cmake-3.29/Modules/FindPkgConfig.cmake:634 (message):
  The following required packages were not found:

   - gstreamer-1.0

Call Stack (most recent call first):
  /usr/local/share/cmake-3.29/Modules/FindPkgConfig.cmake:862 (_pkg_check_modules_internal)
  flutter/ephemeral/.plugin_symlinks/audioplayers_linux/linux/CMakeLists.txt:24 (pkg_check_modules)
```

please run:

```bash
sudo apt-get install -y libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libunwind-dev
```

See also <https://github.com/bluefireteam/audioplayers/tree/main/packages/audioplayers_linux#setup-for-linux>
for the above error.


================================================
FILE: flutter-examples/tts/analysis_options.yaml
================================================
# This file configures the analyzer, which statically analyzes Dart code to
# check for errors, warnings, and lints.
#
# The issues identified by the analyzer are surfaced in the UI of Dart-enabled
# IDEs (https://dart.dev/tools#ides-and-editors). The analyzer can also be
# invoked from the command line by running `flutter analyze`.

# The following line activates a set of recommended lints for Flutter apps,
# packages, and plugins designed to encourage good coding practices.
include: package:flutter_lints/flutter.yaml

linter:
  # The lint rules applied to this project can be customized in the
  # section below to disable rules from the `package:flutter_lints/flutter.yaml`
  # included above or to enable additional rules. A list of all available lints
  # and their documentation is published at https://dart.dev/lints.
  #
  # Instead of disabling a lint rule for the entire project in the
  # section below, it can also be suppressed for a single line of code
  # or a specific dart file by using the `// ignore: name_of_lint` and
  # `// ignore_for_file: name_of_lint` syntax on the line or in the file
  # producing the lint.
  rules:
    # avoid_print: false  # Uncomment to disable the `avoid_print` rule
    # prefer_single_quotes: true  # Uncomment to enable the `prefer_single_quotes` rule

# Additional information about this file can be found at
# https://dart.dev/guides/language/analysis-options


================================================
FILE: flutter-examples/tts/android/.gitignore
================================================
gradle-wrapper.jar
/.gradle
/captures/
/gradlew
/gradlew.bat
/local.properties
GeneratedPluginRegistrant.java

# Remember to never publicly share your keystore.
# See https://flutter.dev/docs/deployment/android#reference-the-keystore-from-the-app
key.properties
**/*.keystore
**/*.jks


================================================
FILE: flutter-examples/tts/android/app/build.gradle
================================================
plugins {
    id "com.android.application"
    id "kotlin-android"
    // The Flutter Gradle Plugin must be applied after the Android and Kotlin Gradle plugins.
    id "dev.flutter.flutter-gradle-plugin"
}

def localProperties = new Properties()
def localPropertiesFile = rootProject.file("local.properties")
if (localPropertiesFile.exists()) {
    localPropertiesFile.withReader("UTF-8") { reader ->
        localProperties.load(reader)
    }
}

def flutterVersionCode = localProperties.getProperty("flutter.versionCode")
if (flutterVersionCode == null) {
    flutterVersionCode = "1"
}

def flutterVersionName = localProperties.getProperty("flutter.versionName")
if (flutterVersionName == null) {
    flutterVersionName = "1.0"
}

android {
    namespace = "com.k2fsa.sherpa.onnx.tts"
    compileSdk = 35
    ndkVersion = flutter.ndkVersion

    compileOptions {
        sourceCompatibility = JavaVersion.toVersion(17)
        targetCompatibility = JavaVersion.toVersion(17)
    }

    kotlinOptions {
        jvmTarget = "17"
    }

    java {
        toolchain {
            languageVersion = JavaLanguageVersion.of(17)
        }
    }

    defaultConfig {
        // TODO: Specify your own unique Application ID (https://developer.android.com/studio/build/application-id.html).
        applicationId = "com.k2fsa.sherpa.onnx.tts"
        // You can update the following values to match your application needs.
        // For more information, see: https://docs.flutter.dev/deployment/android#reviewing-the-gradle-build-configuration.
        minSdk = flutter.minSdkVersion
        targetSdk = 35
        versionCode = flutterVersionCode.toInteger()
        versionName = flutterVersionName
    }

    buildTypes {
        release {
            // TODO: Add your own signing config for the release build.
            // Signing with the debug keys for now, so `flutter run --release` works.
            signingConfig = signingConfigs.debug
        }
    }
}

flutter {
    source = "../.."
}


================================================
FILE: flutter-examples/tts/android/app/src/debug/AndroidManifest.xml
================================================
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
    <!-- The INTERNET permission is required for development. Specifically,
         the Flutter tool needs it to communicate with the running application
         to allow setting breakpoints, to provide hot reload, etc.
    -->
    <uses-permission android:name="android.permission.INTERNET"/>
</manifest>


================================================
FILE: flutter-examples/tts/android/app/src/main/AndroidManifest.xml
================================================
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
    <application
        android:label="tts"
        android:name="${applicationName}"
        android:icon="@mipmap/ic_launcher">
        <activity
            android:name=".MainActivity"
            android:exported="true"
            android:launchMode="singleTop"
            android:taskAffinity=""
            android:theme="@style/LaunchTheme"
            android:configChanges="orientation|keyboardHidden|keyboard|screenSize|smallestScreenSize|locale|layoutDirection|fontScale|screenLayout|density|uiMode"
            android:hardwareAccelerated="true"
            android:windowSoftInputMode="adjustResize">
            <!-- Specifies an Android theme to apply to this Activity as soon as
                 the Android process has started. This theme is visible to the user
                 while the Flutter UI initializes. After that, this theme continues
                 to determine the Window background behind the Flutter UI. -->
            <meta-data
              android:name="io.flutter.embedding.android.NormalTheme"
              android:resource="@style/NormalTheme"
              />
            <intent-filter>
                <action android:name="android.intent.action.MAIN"/>
                <category android:name="android.intent.category.LAUNCHER"/>
            </intent-filter>
        </activity>
        <!-- Don't delete the meta-data below.
             This is used by the Flutter tool to generate GeneratedPluginRegistrant.java -->
        <meta-data
            android:name="flutterEmbedding"
            android:value="2" />
    </application>
    <!-- Required to query activities that can process text, see:
         https://developer.android.com/training/package-visibility and
         https://developer.android.com/reference/android/content/Intent#ACTION_PROCESS_TEXT.

         In particular, this is used by the Flutter engine in io.flutter.plugin.text.ProcessTextPlugin. -->
    <queries>
        <intent>
            <action android:name="android.intent.action.PROCESS_TEXT"/>
            <data android:mimeType="text/plain"/>
        </intent>
    </queries>
</manifest>


================================================
FILE: flutter-examples/tts/android/app/src/main/kotlin/com/example/tts/MainActivity.kt
================================================
package com.k2fsa.sherpa.onnx.tts

import io.flutter.embedding.android.FlutterActivity

class MainActivity: FlutterActivity()


================================================
FILE: flutter-examples/tts/android/app/src/main/res/drawable/launch_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<!-- Modify this file to customize your launch splash screen -->
<layer-list xmlns:android="http://schemas.android.com/apk/res/android">
    <item android:drawable="@android:color/white" />

    <!-- You can insert your own image assets here -->
    <!-- <item>
        <bitmap
            android:gravity="center"
            android:src="@mipmap/launch_image" />
    </item> -->
</layer-list>


================================================
FILE: flutter-examples/tts/android/app/src/main/res/drawable-v21/launch_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<!-- Modify this file to customize your launch splash screen -->
<layer-list xmlns:android="http://schemas.android.com/apk/res/android">
    <item android:drawable="?android:colorBackground" />

    <!-- You can insert your own image assets here -->
    <!-- <item>
        <bitmap
            android:gravity="center"
            android:src="@mipmap/launch_image" />
    </item> -->
</layer-list>


================================================
FILE: flutter-examples/tts/android/app/src/main/res/values/styles.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <!-- Theme applied to the Android Window while the process is starting when the OS's Dark Mode setting is off -->
    <style name="LaunchTheme" parent="@android:style/Theme.Light.NoTitleBar">
        <!-- Show a splash screen on the activity. Automatically removed when
             the Flutter engine draws its first frame -->
        <item name="android:windowBackground">@drawable/launch_background</item>
    </style>
    <!-- Theme applied to the Android Window as soon as the process has started.
         This theme determines the color of the Android Window while your
         Flutter UI initializes, as well as behind your Flutter UI while its
         running.

         This Theme is only used starting with V2 of Flutter's Android embedding. -->
    <style name="NormalTheme" parent="@android:style/Theme.Light.NoTitleBar">
        <item name="android:windowBackground">?android:colorBackground</item>
    </style>
</resources>


================================================
FILE: flutter-examples/tts/android/app/src/main/res/values-night/styles.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <!-- Theme applied to the Android Window while the process is starting when the OS's Dark Mode setting is on -->
    <style name="LaunchTheme" parent="@android:style/Theme.Black.NoTitleBar">
        <!-- Show a splash screen on the activity. Automatically removed when
             the Flutter engine draws its first frame -->
        <item name="android:windowBackground">@drawable/launch_background</item>
    </style>
    <!-- Theme applied to the Android Window as soon as the process has started.
         This theme determines the color of the Android Window while your
         Flutter UI initializes, as well as behind your Flutter UI while its
         running.

         This Theme is only used starting with V2 of Flutter's Android embedding. -->
    <style name="NormalTheme" parent="@android:style/Theme.Black.NoTitleBar">
        <item name="android:windowBackground">?android:colorBackground</item>
    </style>
</resources>


================================================
FILE: flutter-examples/tts/android/app/src/profile/AndroidManifest.xml
================================================
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
    <!-- The INTERNET permission is required for development. Specifically,
         the Flutter tool needs it to communicate with the running application
         to allow setting breakpoints, to provide hot reload, etc.
    -->
    <uses-permission android:name="android.permission.INTERNET"/>
</manifest>


================================================
FILE: flutter-examples/tts/android/build.gradle
================================================
allprojects {
    repositories {
        google()
        mavenCentral()
    }
}

rootProject.buildDir = "../build"
subprojects {
    project.buildDir = "${rootProject.buildDir}/${project.name}"
}
subprojects {
    project.evaluationDependsOn(":app")
}

tasks.register("clean", Delete) {
    delete rootProject.buildDir
}


================================================
FILE: flutter-examples/tts/android/gradle/wrapper/gradle-wrapper.properties
================================================
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip


================================================
FILE: flutter-examples/tts/android/gradle.properties
================================================
org.gradle.jvmargs=-Xmx4G -XX:+HeapDumpOnOutOfMemoryError
android.useAndroidX=true
android.enableJetifier=true
FLUTTER_COMPILE_SDK_VERSION=35
org.gradle.daemon=false


================================================
FILE: flutter-examples/tts/android/settings.gradle
================================================
pluginManagement {
    def flutterSdkPath = {
        def properties = new Properties()
        file("local.properties").withInputStream { properties.load(it) }
        def flutterSdkPath = properties.getProperty("flutter.sdk")
        assert flutterSdkPath != null, "flutter.sdk not set in local.properties"
        return flutterSdkPath
    }()

    includeBuild("$flutterSdkPath/packages/flutter_tools/gradle")

    repositories {
        google()
        mavenCentral()
        gradlePluginPortal()
    }
}

plugins {
    id "dev.flutter.flutter-plugin-loader" version "1.0.0"
    id "com.android.application" version "8.7.0" apply false
    id "org.jetbrains.kotlin.android" version "1.9.24" apply false
}

include ":app"


================================================
FILE: flutter-examples/tts/assets/.gitkeep
================================================


================================================
FILE: flutter-examples/tts/generate-asset-list.py
================================================
#!/usr/bin/env python3

"""
This file assumes that
  assets:
is the last line in ./pubspec.yaml

It reads the file names of all files from the ./assets folder
and turns them as assets and writes them into ./pubspec.yaml
"""

import os

def main():
    target = "./assets/"
    space = "    "
    subfolders = []
    patterns_to_skip = ["1.5x", "2.x", "3.x", "4.x"]
    for root, dirs, files in os.walk(target):
        for d in dirs:
            path = os.path.join(root, d).replace("\\", "/")
            if os.listdir(path):
                path = path.lstrip('./')
                if any(path.endswith(pattern) for pattern in patterns_to_skip):
                    continue
                subfolders.append("{space}- {path}/".format(space=space, path=path))

    assert subfolders, "The subfolders list is empty."

    subfolders = sorted(subfolders)

    loc_of_flutter = -1
    loc_of_flutter_asset = -1
    loc_of_end_flutter_asset = -1
    loc_of_end_flutter = -1

    with open("./pubspec.yaml", encoding="utf-8") as f:
        lines = f.readlines()
        for index, line in enumerate(lines):
            if line == "flutter:\n":
                loc_of_flutter = index + 1
                if index == len(lines) - 1:
                    loc_of_end_flutter = index + 2
                continue
            if loc_of_flutter >= 0 and loc_of_flutter_asset < 0 and line == "  assets:\n":
                loc_of_flutter_asset = index + 1
                continue

    with open("./pubspec.yaml", encoding="utf-8") as f:
        lines = f.readlines()
        for index, line in enumerate(lines):
            if index < loc_of_flutter:
                continue
            if loc_of_flutter_asset >= 0:
                if line.startswith("    - assets/"):
                    loc_of_end_flutter_asset = index + 1
                    continue
                else:
                    loc_of_end_flutter = index + 1
                    continue
            else:
                if line.startswith("  ") is False:
                    loc_of_end_flutter = index + 1
                    continue
                else:
                    loc_of_end_flutter = index + 2
                    break

    assert loc_of_flutter >= 0, "The 'flutter:' section is missing in the pubspec.yaml file."

    with open("./pubspec.yaml", "w", encoding="utf-8") as f:
        for index, line in enumerate(lines):
            if loc_of_end_flutter_asset >= 0:
                if index + 1 < loc_of_flutter_asset or index + 1 > loc_of_end_flutter_asset:
                    f.write(line)
                if index + 1 == loc_of_flutter_asset:
                    f.write("  assets:\n")
                    for folder in subfolders:
                        f.write("{folder}\n".format(folder=folder))
            else:
                if index + 1 < loc_of_end_flutter or index + 1 > loc_of_end_flutter:
                    f.write(line)
                if index + 1 == loc_of_end_flutter:
                    f.write("  assets:\n")
                    for indexOfFolder, folder in enumerate(subfolders):
                        f.write("{folder}\n".format(folder=folder))
                        if indexOfFolder == len(subfolders) - 1:
                            f.write("\n")
                            break

        if loc_of_end_flutter == len(lines) + 1:
            f.write("\n")
            f.write("  assets:\n")
            for folder in subfolders:
                f.write("{folder}\n".format(folder=folder))

if __name__ == "__main__":
    main()


================================================
FILE: flutter-examples/tts/ios/.gitignore
================================================
**/dgph
*.mode1v3
*.mode2v3
*.moved-aside
*.pbxuser
*.perspectivev3
**/*sync/
.sconsign.dblite
.tags*
**/.vagrant/
**/DerivedData/
Icon?
**/Pods/
**/.symlinks/
profile
xcuserdata
**/.generated/
Flutter/App.framework
Flutter/Flutter.framework
Flutter/Flutter.podspec
Flutter/Generated.xcconfig
Flutter/ephemeral/
Flutter/app.flx
Flutter/app.zip
Flutter/flutter_assets/
Flutter/flutter_export_environment.sh
ServiceDefinitions.json
Runner/GeneratedPluginRegistrant.*

# Exceptions to above rules.
!default.mode1v3
!default.mode2v3
!default.pbxuser
!default.perspectivev3


================================================
FILE: flutter-examples/tts/ios/Flutter/AppFrameworkInfo.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
  <key>CFBundleDevelopmentRegion</key>
  <string>en</string>
  <key>CFBundleExecutable</key>
  <string>App</string>
  <key>CFBundleIdentifier</key>
  <string>io.flutter.flutter.app</string>
  <key>CFBundleInfoDictionaryVersion</key>
  <string>6.0</string>
  <key>CFBundleName</key>
  <string>App</string>
  <key>CFBundlePackageType</key>
  <string>FMWK</string>
  <key>CFBundleShortVersionString</key>
  <string>1.0</string>
  <key>CFBundleSignature</key>
  <string>????</string>
  <key>CFBundleVersion</key>
  <string>1.0</string>
  <key>MinimumOSVersion</key>
  <string>12.0</string>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/ios/Flutter/Debug.xcconfig
================================================
#include "Generated.xcconfig"


================================================
FILE: flutter-examples/tts/ios/Flutter/Release.xcconfig
================================================
#include "Generated.xcconfig"


================================================
FILE: flutter-examples/tts/ios/Runner/AppDelegate.swift
================================================
import Flutter
import UIKit

@UIApplicationMain
@objc class AppDelegate: FlutterAppDelegate {
  override func application(
    _ application: UIApplication,
    didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?
  ) -> Bool {
    GeneratedPluginRegistrant.register(with: self)
    return super.application(application, didFinishLaunchingWithOptions: launchOptions)
  }
}


================================================
FILE: flutter-examples/tts/ios/Runner/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "size" : "20x20",
      "idiom" : "iphone",
      "filename" : "Icon-App-20x20@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "20x20",
      "idiom" : "iphone",
      "filename" : "Icon-App-20x20@3x.png",
      "scale" : "3x"
    },
    {
      "size" : "29x29",
      "idiom" : "iphone",
      "filename" : "Icon-App-29x29@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "29x29",
      "idiom" : "iphone",
      "filename" : "Icon-App-29x29@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "29x29",
      "idiom" : "iphone",
      "filename" : "Icon-App-29x29@3x.png",
      "scale" : "3x"
    },
    {
      "size" : "40x40",
      "idiom" : "iphone",
      "filename" : "Icon-App-40x40@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "40x40",
      "idiom" : "iphone",
      "filename" : "Icon-App-40x40@3x.png",
      "scale" : "3x"
    },
    {
      "size" : "60x60",
      "idiom" : "iphone",
      "filename" : "Icon-App-60x60@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "60x60",
      "idiom" : "iphone",
      "filename" : "Icon-App-60x60@3x.png",
      "scale" : "3x"
    },
    {
      "size" : "20x20",
      "idiom" : "ipad",
      "filename" : "Icon-App-20x20@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "20x20",
      "idiom" : "ipad",
      "filename" : "Icon-App-20x20@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "29x29",
      "idiom" : "ipad",
      "filename" : "Icon-App-29x29@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "29x29",
      "idiom" : "ipad",
      "filename" : "Icon-App-29x29@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "40x40",
      "idiom" : "ipad",
      "filename" : "Icon-App-40x40@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "40x40",
      "idiom" : "ipad",
      "filename" : "Icon-App-40x40@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "76x76",
      "idiom" : "ipad",
      "filename" : "Icon-App-76x76@1x.png",
      "scale" : "1x"
    },
    {
      "size" : "76x76",
      "idiom" : "ipad",
      "filename" : "Icon-App-76x76@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "83.5x83.5",
      "idiom" : "ipad",
      "filename" : "Icon-App-83.5x83.5@2x.png",
      "scale" : "2x"
    },
    {
      "size" : "1024x1024",
      "idiom" : "ios-marketing",
      "filename" : "Icon-App-1024x1024@1x.png",
      "scale" : "1x"
    }
  ],
  "info" : {
    "version" : 1,
    "author" : "xcode"
  }
}


================================================
FILE: flutter-examples/tts/ios/Runner/Assets.xcassets/LaunchImage.imageset/Contents.json
================================================
{
  "images" : [
    {
      "idiom" : "universal",
      "filename" : "LaunchImage.png",
      "scale" : "1x"
    },
    {
      "idiom" : "universal",
      "filename" : "LaunchImage@2x.png",
      "scale" : "2x"
    },
    {
      "idiom" : "universal",
      "filename" : "LaunchImage@3x.png",
      "scale" : "3x"
    }
  ],
  "info" : {
    "version" : 1,
    "author" : "xcode"
  }
}


================================================
FILE: flutter-examples/tts/ios/Runner/Assets.xcassets/LaunchImage.imageset/README.md
================================================
# Launch Screen Assets

You can customize the launch screen with your own desired assets by replacing the image files in this directory.

You can also do it by opening your Flutter project's Xcode project with `open ios/Runner.xcworkspace`, selecting `Runner/Assets.xcassets` in the Project Navigator and dropping in the desired images.

================================================
FILE: flutter-examples/tts/ios/Runner/Base.lproj/LaunchScreen.storyboard
================================================
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="12121" systemVersion="16G29" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
    <dependencies>
        <deployment identifier="iOS"/>
        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="12089"/>
    </dependencies>
    <scenes>
        <!--View Controller-->
        <scene sceneID="EHf-IW-A2E">
            <objects>
                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
                    <layoutGuides>
                        <viewControllerLayoutGuide type="top" id="Ydg-fD-yQy"/>
                        <viewControllerLayoutGuide type="bottom" id="xbc-2k-c8Z"/>
                    </layoutGuides>
                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                        <subviews>
                            <imageView opaque="NO" clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="center" image="LaunchImage" translatesAutoresizingMaskIntoConstraints="NO" id="YRO-k0-Ey4">
                            </imageView>
                        </subviews>
                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
                        <constraints>
                            <constraint firstItem="YRO-k0-Ey4" firstAttribute="centerX" secondItem="Ze5-6b-2t3" secondAttribute="centerX" id="1a2-6s-vTC"/>
                            <constraint firstItem="YRO-k0-Ey4" firstAttribute="centerY" secondItem="Ze5-6b-2t3" secondAttribute="centerY" id="4X2-HB-R7a"/>
                        </constraints>
                    </view>
                </viewController>
                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
            </objects>
            <point key="canvasLocation" x="53" y="375"/>
        </scene>
    </scenes>
    <resources>
        <image name="LaunchImage" width="168" height="185"/>
    </resources>
</document>


================================================
FILE: flutter-examples/tts/ios/Runner/Base.lproj/Main.storyboard
================================================
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="10117" systemVersion="15F34" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" initialViewController="BYZ-38-t0r">
    <dependencies>
        <deployment identifier="iOS"/>
        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="10085"/>
    </dependencies>
    <scenes>
        <!--Flutter View Controller-->
        <scene sceneID="tne-QT-ifu">
            <objects>
                <viewController id="BYZ-38-t0r" customClass="FlutterViewController" sceneMemberID="viewController">
                    <layoutGuides>
                        <viewControllerLayoutGuide type="top" id="y3c-jy-aDJ"/>
                        <viewControllerLayoutGuide type="bottom" id="wfy-db-euE"/>
                    </layoutGuides>
                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
                        <rect key="frame" x="0.0" y="0.0" width="600" height="600"/>
                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                        <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="calibratedWhite"/>
                    </view>
                </viewController>
                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
            </objects>
        </scene>
    </scenes>
</document>


================================================
FILE: flutter-examples/tts/ios/Runner/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>CFBundleDevelopmentRegion</key>
	<string>$(DEVELOPMENT_LANGUAGE)</string>
	<key>CFBundleDisplayName</key>
	<string>Tts</string>
	<key>CFBundleExecutable</key>
	<string>$(EXECUTABLE_NAME)</string>
	<key>CFBundleIdentifier</key>
	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
	<key>CFBundleInfoDictionaryVersion</key>
	<string>6.0</string>
	<key>CFBundleName</key>
	<string>tts</string>
	<key>CFBundlePackageType</key>
	<string>APPL</string>
	<key>CFBundleShortVersionString</key>
	<string>$(FLUTTER_BUILD_NAME)</string>
	<key>CFBundleSignature</key>
	<string>????</string>
	<key>CFBundleVersion</key>
	<string>$(FLUTTER_BUILD_NUMBER)</string>
	<key>LSRequiresIPhoneOS</key>
	<true/>
	<key>UILaunchStoryboardName</key>
	<string>LaunchScreen</string>
	<key>UIMainStoryboardFile</key>
	<string>Main</string>
	<key>UISupportedInterfaceOrientations</key>
	<array>
		<string>UIInterfaceOrientationPortrait</string>
		<string>UIInterfaceOrientationLandscapeLeft</string>
		<string>UIInterfaceOrientationLandscapeRight</string>
	</array>
	<key>UISupportedInterfaceOrientations~ipad</key>
	<array>
		<string>UIInterfaceOrientationPortrait</string>
		<string>UIInterfaceOrientationPortraitUpsideDown</string>
		<string>UIInterfaceOrientationLandscapeLeft</string>
		<string>UIInterfaceOrientationLandscapeRight</string>
	</array>
	<key>CADisableMinimumFrameDurationOnPhone</key>
	<true/>
	<key>UIApplicationSupportsIndirectInputEvents</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/ios/Runner/Runner-Bridging-Header.h
================================================
#import "GeneratedPluginRegistrant.h"


================================================
FILE: flutter-examples/tts/ios/Runner.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 54;
	objects = {

/* Begin PBXBuildFile section */
		1498D2341E8E89220040F4C2 /* GeneratedPluginRegistrant.m in Sources */ = {isa = PBXBuildFile; fileRef = 1498D2331E8E89220040F4C2 /* GeneratedPluginRegistrant.m */; };
		331C808B294A63AB00263BE5 /* RunnerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 331C807B294A618700263BE5 /* RunnerTests.swift */; };
		3B3967161E833CAA004F5970 /* AppFrameworkInfo.plist in Resources */ = {isa = PBXBuildFile; fileRef = 3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */; };
		74858FAF1ED2DC5600515810 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 74858FAE1ED2DC5600515810 /* AppDelegate.swift */; };
		97C146FC1CF9000F007C117D /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 97C146FA1CF9000F007C117D /* Main.storyboard */; };
		97C146FE1CF9000F007C117D /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 97C146FD1CF9000F007C117D /* Assets.xcassets */; };
		97C147011CF9000F007C117D /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 97C146FF1CF9000F007C117D /* LaunchScreen.storyboard */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
		331C8085294A63A400263BE5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = 97C146E61CF9000F007C117D /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = 97C146ED1CF9000F007C117D;
			remoteInfo = Runner;
		};
/* End PBXContainerItemProxy section */

/* Begin PBXCopyFilesBuildPhase section */
		9705A1C41CF9048500538489 /* Embed Frameworks */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = "";
			dstSubfolderSpec = 10;
			files = (
			);
			name = "Embed Frameworks";
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		1498D2321E8E86230040F4C2 /* GeneratedPluginRegistrant.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GeneratedPluginRegistrant.h; sourceTree = "<group>"; };
		1498D2331E8E89220040F4C2 /* GeneratedPluginRegistrant.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GeneratedPluginRegistrant.m; sourceTree = "<group>"; };
		331C807B294A618700263BE5 /* RunnerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RunnerTests.swift; sourceTree = "<group>"; };
		331C8081294A63A400263BE5 /* RunnerTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = RunnerTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = AppFrameworkInfo.plist; path = Flutter/AppFrameworkInfo.plist; sourceTree = "<group>"; };
		74858FAD1ED2DC5600515810 /* Runner-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "Runner-Bridging-Header.h"; sourceTree = "<group>"; };
		74858FAE1ED2DC5600515810 /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
		7AFA3C8E1D35360C0083082E /* Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = Release.xcconfig; path = Flutter/Release.xcconfig; sourceTree = "<group>"; };
		9740EEB21CF90195004384FC /* Debug.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; name = Debug.xcconfig; path = Flutter/Debug.xcconfig; sourceTree = "<group>"; };
		9740EEB31CF90195004384FC /* Generated.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; name = Generated.xcconfig; path = Flutter/Generated.xcconfig; sourceTree = "<group>"; };
		97C146EE1CF9000F007C117D /* Runner.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Runner.app; sourceTree = BUILT_PRODUCTS_DIR; };
		97C146FB1CF9000F007C117D /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
		97C146FD1CF9000F007C117D /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
		97C147001CF9000F007C117D /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
		97C147021CF9000F007C117D /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		97C146EB1CF9000F007C117D /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		331C8082294A63A400263BE5 /* RunnerTests */ = {
			isa = PBXGroup;
			children = (
				331C807B294A618700263BE5 /* RunnerTests.swift */,
			);
			path = RunnerTests;
			sourceTree = "<group>";
		};
		9740EEB11CF90186004384FC /* Flutter */ = {
			isa = PBXGroup;
			children = (
				3B3967151E833CAA004F5970 /* AppFrameworkInfo.plist */,
				9740EEB21CF90195004384FC /* Debug.xcconfig */,
				7AFA3C8E1D35360C0083082E /* Release.xcconfig */,
				9740EEB31CF90195004384FC /* Generated.xcconfig */,
			);
			name = Flutter;
			sourceTree = "<group>";
		};
		97C146E51CF9000F007C117D = {
			isa = PBXGroup;
			children = (
				9740EEB11CF90186004384FC /* Flutter */,
				97C146F01CF9000F007C117D /* Runner */,
				97C146EF1CF9000F007C117D /* Products */,
				331C8082294A63A400263BE5 /* RunnerTests */,
			);
			sourceTree = "<group>";
		};
		97C146EF1CF9000F007C117D /* Products */ = {
			isa = PBXGroup;
			children = (
				97C146EE1CF9000F007C117D /* Runner.app */,
				331C8081294A63A400263BE5 /* RunnerTests.xctest */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		97C146F01CF9000F007C117D /* Runner */ = {
			isa = PBXGroup;
			children = (
				97C146FA1CF9000F007C117D /* Main.storyboard */,
				97C146FD1CF9000F007C117D /* Assets.xcassets */,
				97C146FF1CF9000F007C117D /* LaunchScreen.storyboard */,
				97C147021CF9000F007C117D /* Info.plist */,
				1498D2321E8E86230040F4C2 /* GeneratedPluginRegistrant.h */,
				1498D2331E8E89220040F4C2 /* GeneratedPluginRegistrant.m */,
				74858FAE1ED2DC5600515810 /* AppDelegate.swift */,
				74858FAD1ED2DC5600515810 /* Runner-Bridging-Header.h */,
			);
			path = Runner;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		331C8080294A63A400263BE5 /* RunnerTests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 331C8087294A63A400263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */;
			buildPhases = (
				331C807D294A63A400263BE5 /* Sources */,
				331C807F294A63A400263BE5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				331C8086294A63A400263BE5 /* PBXTargetDependency */,
			);
			name = RunnerTests;
			productName = RunnerTests;
			productReference = 331C8081294A63A400263BE5 /* RunnerTests.xctest */;
			productType = "com.apple.product-type.bundle.unit-test";
		};
		97C146ED1CF9000F007C117D /* Runner */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 97C147051CF9000F007C117D /* Build configuration list for PBXNativeTarget "Runner" */;
			buildPhases = (
				9740EEB61CF901F6004384FC /* Run Script */,
				97C146EA1CF9000F007C117D /* Sources */,
				97C146EB1CF9000F007C117D /* Frameworks */,
				97C146EC1CF9000F007C117D /* Resources */,
				9705A1C41CF9048500538489 /* Embed Frameworks */,
				3B06AD1E1E4923F5004D2608 /* Thin Binary */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = Runner;
			productName = Runner;
			productReference = 97C146EE1CF9000F007C117D /* Runner.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		97C146E61CF9000F007C117D /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = YES;
				LastUpgradeCheck = 1510;
				ORGANIZATIONNAME = "";
				TargetAttributes = {
					331C8080294A63A400263BE5 = {
						CreatedOnToolsVersion = 14.0;
						TestTargetID = 97C146ED1CF9000F007C117D;
					};
					97C146ED1CF9000F007C117D = {
						CreatedOnToolsVersion = 7.3.1;
						LastSwiftMigration = 1100;
					};
				};
			};
			buildConfigurationList = 97C146E91CF9000F007C117D /* Build configuration list for PBXProject "Runner" */;
			compatibilityVersion = "Xcode 9.3";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = 97C146E51CF9000F007C117D;
			productRefGroup = 97C146EF1CF9000F007C117D /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				97C146ED1CF9000F007C117D /* Runner */,
				331C8080294A63A400263BE5 /* RunnerTests */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		331C807F294A63A400263BE5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		97C146EC1CF9000F007C117D /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				97C147011CF9000F007C117D /* LaunchScreen.storyboard in Resources */,
				3B3967161E833CAA004F5970 /* AppFrameworkInfo.plist in Resources */,
				97C146FE1CF9000F007C117D /* Assets.xcassets in Resources */,
				97C146FC1CF9000F007C117D /* Main.storyboard in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXShellScriptBuildPhase section */
		3B06AD1E1E4923F5004D2608 /* Thin Binary */ = {
			isa = PBXShellScriptBuildPhase;
			alwaysOutOfDate = 1;
			buildActionMask = 2147483647;
			files = (
			);
			inputPaths = (
				"${TARGET_BUILD_DIR}/${INFOPLIST_PATH}",
			);
			name = "Thin Binary";
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "/bin/sh \"$FLUTTER_ROOT/packages/flutter_tools/bin/xcode_backend.sh\" embed_and_thin";
		};
		9740EEB61CF901F6004384FC /* Run Script */ = {
			isa = PBXShellScriptBuildPhase;
			alwaysOutOfDate = 1;
			buildActionMask = 2147483647;
			files = (
			);
			inputPaths = (
			);
			name = "Run Script";
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "/bin/sh \"$FLUTTER_ROOT/packages/flutter_tools/bin/xcode_backend.sh\" build";
		};
/* End PBXShellScriptBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		331C807D294A63A400263BE5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				331C808B294A63AB00263BE5 /* RunnerTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		97C146EA1CF9000F007C117D /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				74858FAF1ED2DC5600515810 /* AppDelegate.swift in Sources */,
				1498D2341E8E89220040F4C2 /* GeneratedPluginRegistrant.m in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
		331C8086294A63A400263BE5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = 97C146ED1CF9000F007C117D /* Runner */;
			targetProxy = 331C8085294A63A400263BE5 /* PBXContainerItemProxy */;
		};
/* End PBXTargetDependency section */

/* Begin PBXVariantGroup section */
		97C146FA1CF9000F007C117D /* Main.storyboard */ = {
			isa = PBXVariantGroup;
			children = (
				97C146FB1CF9000F007C117D /* Base */,
			);
			name = Main.storyboard;
			sourceTree = "<group>";
		};
		97C146FF1CF9000F007C117D /* LaunchScreen.storyboard */ = {
			isa = PBXVariantGroup;
			children = (
				97C147001CF9000F007C117D /* Base */,
			);
			name = LaunchScreen.storyboard;
			sourceTree = "<group>";
		};
/* End PBXVariantGroup section */

/* Begin XCBuildConfiguration section */
		249021D3217E4FDB00AE95B9 /* Profile */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = iphoneos;
				SUPPORTED_PLATFORMS = iphoneos;
				TARGETED_DEVICE_FAMILY = "1,2";
				VALIDATE_PRODUCT = YES;
			};
			name = Profile;
		};
		249021D4217E4FDB00AE95B9 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
				DEVELOPMENT_TEAM = N5ZH3Z63A6;
				ENABLE_BITCODE = NO;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				VERSIONING_SYSTEM = "apple-generic";
			};
			name = Profile;
		};
		331C8088294A63A400263BE5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
			};
			name = Debug;
		};
		331C8089294A63A400263BE5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
			};
			name = Release;
		};
		331C808A294A63A400263BE5 /* Profile */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Runner.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/Runner";
			};
			name = Profile;
		};
		97C147031CF9000F007C117D /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
				MTL_ENABLE_DEBUG_INFO = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = iphoneos;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		97C147041CF9000F007C117D /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 12.0;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = iphoneos;
				SUPPORTED_PLATFORMS = iphoneos;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
				TARGETED_DEVICE_FAMILY = "1,2";
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		97C147061CF9000F007C117D /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 9740EEB21CF90195004384FC /* Debug.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
				DEVELOPMENT_TEAM = N5ZH3Z63A6;
				ENABLE_BITCODE = NO;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				VERSIONING_SYSTEM = "apple-generic";
			};
			name = Debug;
		};
		97C147071CF9000F007C117D /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CURRENT_PROJECT_VERSION = "$(FLUTTER_BUILD_NUMBER)";
				DEVELOPMENT_TEAM = N5ZH3Z63A6;
				ENABLE_BITCODE = NO;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_OBJC_BRIDGING_HEADER = "Runner/Runner-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				OTHER_LDFLAGS = "-lc++";
				VERSIONING_SYSTEM = "apple-generic";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		331C8087294A63A400263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				331C8088294A63A400263BE5 /* Debug */,
				331C8089294A63A400263BE5 /* Release */,
				331C808A294A63A400263BE5 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		97C146E91CF9000F007C117D /* Build configuration list for PBXProject "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				97C147031CF9000F007C117D /* Debug */,
				97C147041CF9000F007C117D /* Release */,
				249021D3217E4FDB00AE95B9 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		97C147051CF9000F007C117D /* Build configuration list for PBXNativeTarget "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				97C147061CF9000F007C117D /* Debug */,
				97C147071CF9000F007C117D /* Release */,
				249021D4217E4FDB00AE95B9 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 97C146E61CF9000F007C117D /* Project object */;
}


================================================
FILE: flutter-examples/tts/ios/Runner.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:">
   </FileRef>
</Workspace>


================================================
FILE: flutter-examples/tts/ios/Runner.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/ios/Runner.xcodeproj/project.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>PreviewsEnabled</key>
	<false/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/ios/Runner.xcodeproj/xcshareddata/xcschemes/Runner.xcscheme
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1510"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
      buildImplicitDependencies = "YES">
      <BuildActionEntries>
         <BuildActionEntry
            buildForTesting = "YES"
            buildForRunning = "YES"
            buildForProfiling = "YES"
            buildForArchiving = "YES"
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "97C146ED1CF9000F007C117D"
               BuildableName = "Runner.app"
               BlueprintName = "Runner"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
   </BuildAction>
   <TestAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      shouldUseLaunchSchemeArgsEnv = "YES">
      <MacroExpansion>
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "97C146ED1CF9000F007C117D"
            BuildableName = "Runner.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </MacroExpansion>
      <Testables>
         <TestableReference
            skipped = "NO"
            parallelizable = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "331C8080294A63A400263BE5"
               BuildableName = "RunnerTests.xctest"
               BlueprintName = "RunnerTests"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </TestableReference>
      </Testables>
   </TestAction>
   <LaunchAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      launchStyle = "0"
      useCustomWorkingDirectory = "NO"
      ignoresPersistentStateOnLaunch = "NO"
      debugDocumentVersioning = "YES"
      debugServiceExtension = "internal"
      allowLocationSimulation = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "97C146ED1CF9000F007C117D"
            BuildableName = "Runner.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </LaunchAction>
   <ProfileAction
      buildConfiguration = "Profile"
      shouldUseLaunchSchemeArgsEnv = "YES"
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      debugDocumentVersioning = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "97C146ED1CF9000F007C117D"
            BuildableName = "Runner.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
   </AnalyzeAction>
   <ArchiveAction
      buildConfiguration = "Release"
      revealArchiveInOrganizer = "YES">
   </ArchiveAction>
</Scheme>


================================================
FILE: flutter-examples/tts/ios/Runner.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "group:Runner.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: flutter-examples/tts/ios/Runner.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/ios/Runner.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>PreviewsEnabled</key>
	<false/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/ios/RunnerTests/RunnerTests.swift
================================================
import Flutter
import UIKit
import XCTest

class RunnerTests: XCTestCase {

  func testExample() {
    // If you add code to the Runner application, consider adding tests here.
    // See https://developer.apple.com/documentation/xctest for more information about using XCTest.
  }

}


================================================
FILE: flutter-examples/tts/lib/info.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'package:flutter/material.dart';
import 'package:url_launcher/url_launcher.dart';

class InfoScreen extends StatelessWidget {
  @override
  Widget build(BuildContext context) {
    const double height = 20;
    return Container(
      child: Padding(
        padding: const EdgeInsets.all(8.0),
        child: Column(
          crossAxisAlignment: CrossAxisAlignment.start,
          children: <Widget>[
            Text('Everything is open-sourced.'),
            SizedBox(height: height),
            InkWell(
              child: Text('Code: https://github.com/k2-fsa/sherpa-onnx'),
              onTap: () => launch('https://k2-fsa.github.io/sherpa/onnx/'),
            ),
            SizedBox(height: height),
            InkWell(
              child: Text('Doc: https://k2-fsa.github.io/sherpa/onnx/'),
              onTap: () => launch('https://k2-fsa.github.io/sherpa/onnx/'),
            ),
            SizedBox(height: height),
            Text('QQ 群: 744602236'),
            SizedBox(height: height),
            InkWell(
              child: Text(
                  '微信群: https://k2-fsa.github.io/sherpa/social-groups.html'),
              onTap: () =>
                  launch('https://k2-fsa.github.io/sherpa/social-groups.html'),
            ),
          ],
        ),
      ),
    );
  }
}


================================================
FILE: flutter-examples/tts/lib/isolate_tts.dart
================================================
import 'dart:io';
import 'dart:isolate';

import 'package:flutter/material.dart';
import 'package:flutter/services.dart';
import 'package:media_kit/media_kit.dart';
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import 'utils.dart';

class _IsolateTask<T> {
  final SendPort sendPort;

  RootIsolateToken? rootIsolateToken;

  _IsolateTask(this.sendPort, this.rootIsolateToken);
}

class _PortModel {
  final String method;

  final SendPort? sendPort;
  dynamic data;

  _PortModel({
    required this.method,
    this.sendPort,
    this.data,
  });
}

class _TtsManager {
  /// 主进程通信端口
  final ReceivePort receivePort;

  final Isolate isolate;

  final SendPort isolatePort;

  _TtsManager({
    required this.receivePort,
    required this.isolate,
    required this.isolatePort,
  });
}

class IsolateTts {
  static late final _TtsManager _ttsManager;

  /// 获取线程里的通信端口
  static SendPort get _sendPort => _ttsManager.isolatePort;

  static late sherpa_onnx.OfflineTts _tts;

  static late Player _player;

  static Future<void> init() async {
    ReceivePort port = ReceivePort();
    RootIsolateToken? rootIsolateToken = RootIsolateToken.instance;

    Isolate isolate = await Isolate.spawn(
      _isolateEntry,
      _IsolateTask(port.sendPort, rootIsolateToken),
      errorsAreFatal: false,
    );
    port.listen((msg) async {
      if (msg is SendPort) {
        print(11);
        _ttsManager =
            _TtsManager(receivePort: port, isolate: isolate, isolatePort: msg);
        return;
      }
    });
  }

  static Future<void> _isolateEntry(_IsolateTask task) async {
    if (task.rootIsolateToken != null) {
      BackgroundIsolateBinaryMessenger.ensureInitialized(
          task.rootIsolateToken!);
    }
    MediaKit.ensureInitialized();
    _player = Player();
    sherpa_onnx.initBindings();
    final receivePort = ReceivePort();
    task.sendPort.send(receivePort.sendPort);

    String modelDir = '';
    String modelName = '';
    String ruleFsts = '';
    String ruleFars = '';
    String lexicon = '';
    String dataDir = '';

    // Example 7
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
    modelDir = 'vits-melo-tts-zh_en';
    modelName = 'model.onnx';
    lexicon = 'lexicon.txt';

    if (modelName == '') {
      throw Exception(
          'You are supposed to select a model by changing the code before you run the app');
    }

    final Directory directory = await getApplicationSupportDirectory();
    modelName = p.join(directory.path, modelDir, modelName);

    if (ruleFsts != '') {
      final all = ruleFsts.split(',');
      var tmp = <String>[];
      for (final f in all) {
        tmp.add(p.join(directory.path, f));
      }
      ruleFsts = tmp.join(',');
    }

    if (ruleFars != '') {
      final all = ruleFars.split(',');
      var tmp = <String>[];
      for (final f in all) {
        tmp.add(p.join(directory.path, f));
      }
      ruleFars = tmp.join(',');
    }

    if (lexicon != '') {
      lexicon = p.join(directory.path, modelDir, lexicon);
    }

    if (dataDir != '') {
      dataDir = p.join(directory.path, dataDir);
    }

    final tokens = p.join(directory.path, modelDir, 'tokens.txt');

    final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
      model: modelName,
      lexicon: lexicon,
      tokens: tokens,
      dataDir: dataDir,
    );

    final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
      vits: vits,
      numThreads: 2,
      debug: true,
      provider: 'cpu',
    );

    final config = sherpa_onnx.OfflineTtsConfig(
      model: modelConfig,
      ruleFsts: ruleFsts,
      ruleFars: ruleFars,
      maxNumSenetences: 1,
    );
    // print(config);
    receivePort.listen((msg) async {
      print(msg);
      if (msg is _PortModel) {
        switch (msg.method) {
          case 'generate':
            {
              _PortModel _v = msg;
              final stopwatch = Stopwatch();
              stopwatch.start();
              final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
                sid: _v.data['sid'],
                speed: _v.data['speed'],
                silenceScale: 0.2,
              );
              final audio =
                  _tts.generateWithConfig(text: _v.data['text'], config: genConfig);
              final suffix =
                  '-sid-${_v.data['sid']}-speed-${_v.data['speed'].toStringAsPrecision(2)}';
              final filename = await generateWaveFilename(suffix);

              final ok = sherpa_onnx.writeWave(
                filename: filename,
                samples: audio.samples,
                sampleRate: audio.sampleRate,
              );

              if (ok) {
                stopwatch.stop();
                double elapsed = stopwatch.elapsed.inMilliseconds.toDouble();

                double waveDuration = audio.samples.length.toDouble() /
                    audio.sampleRate.toDouble();

                print('Saved to\n$filename\n'
                    'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
                    'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
                    'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
                    '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ');

                await _player.open(Media('file:///$filename'));
                await _player.play();
              }
            }
            break;
        }
      }
    });
    _tts = sherpa_onnx.OfflineTts(config);
  }

  static Future<void> generate(
      {required String text, int sid = 0, double speed = 1.0}) async {
    ReceivePort receivePort = ReceivePort();
    _sendPort.send(_PortModel(
      method: 'generate',
      data: {'text': text, 'sid': sid, 'speed': speed},
      sendPort: receivePort.sendPort,
    ));
    await receivePort.first;
    receivePort.close();
  }
}

/// 这里是页面
class IsolateTtsView extends StatefulWidget {
  const IsolateTtsView({super.key});

  @override
  State<IsolateTtsView> createState() => _IsolateTtsViewState();
}

class _IsolateTtsViewState extends State<IsolateTtsView> {
  @override
  void initState() {
    super.initState();
    IsolateTts.init();
  }

  @override
  Widget build(BuildContext context) {
    return Scaffold(
      body: Center(
        child: ElevatedButton(
          onPressed: () {
            IsolateTts.generate(text: '这是已退出的 isolate TTS');
          },
          child: Text('Isolate TTS'),
        ),
      ),
    );
  }
}


================================================
FILE: flutter-examples/tts/lib/main.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'package:flutter/material.dart';

import './info.dart';
import './tts.dart';
import 'isolate_tts.dart';

void main() {
  runApp(const MyApp());
}

class MyApp extends StatelessWidget {
  const MyApp({super.key});

  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      title: 'Next-gen Kaldi flutter demo',
      theme: ThemeData(
        colorScheme: ColorScheme.fromSeed(seedColor: Colors.deepPurple),
        useMaterial3: true,
      ),
      home: const MyHomePage(title: 'Next-gen Kaldi with Flutter'),
    );
  }
}

class MyHomePage extends StatefulWidget {
  const MyHomePage({super.key, required this.title});

  final String title;

  @override
  State<MyHomePage> createState() => _MyHomePageState();
}

class _MyHomePageState extends State<MyHomePage> {
  int _currentIndex = 0;
  final List<Widget> _tabs = [
    TtsScreen(),
    InfoScreen(),
    IsolateTtsView(),
  ];
  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(
        title: Text(widget.title),
      ),
      body: _tabs[_currentIndex],
      bottomNavigationBar: BottomNavigationBar(
        currentIndex: _currentIndex,
        onTap: (int index) {
          setState(() {
            _currentIndex = index;
          });
        },
        items: [
          BottomNavigationBarItem(
            icon: Icon(Icons.home),
            label: 'Home',
          ),
          BottomNavigationBarItem(
            icon: Icon(Icons.info),
            label: 'Info',
          ),
          BottomNavigationBarItem(
            icon: Icon(Icons.multiline_chart),
            label: 'isolate',
          ),
        ],
      ),
    );
  }
}


================================================
FILE: flutter-examples/tts/lib/model.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation

import "dart:io";

import 'package:flutter/services.dart';
import 'package:path_provider/path_provider.dart';
import 'package:path/path.dart' as p;
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './utils.dart';

Future<sherpa_onnx.OfflineTts> createOfflineTts() async {
  // sherpa_onnx requires that model files are in the local disk, so we
  // need to copy all asset files to disk.
  await copyAllAssetFiles();

  sherpa_onnx.initBindings();

  // Such a design is to make it easier to build flutter APPs with
  // github actions for a variety of tts models
  //
  // See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
  // for details

  String modelDir = '';
  String modelName = '';
  String voices = ''; // for Kokoro only
  String ruleFsts = '';
  String ruleFars = '';
  String lexicon = '';
  String dataDir = '';

  // You can select an example below and change it accordingly to match your
  // selected tts model

  // ============================================================
  // Your change starts here
  // ============================================================

  // Example 1:
  // modelDir = 'vits-vctk';
  // modelName = 'vits-vctk.onnx';
  // lexicon = 'lexicon.txt';

  // Example 2:
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  // modelDir = 'vits-piper-en_US-amy-low';
  // modelName = 'en_US-amy-low.onnx';
  // dataDir = 'vits-piper-en_US-amy-low/espeak-ng-data';

  // Example 3:
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  // modelDir = 'vits-icefall-zh-aishell3';
  // modelName = 'model.onnx';
  // ruleFsts = 'vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst';
  // ruleFars = 'vits-icefall-zh-aishell3/rule.far';
  // lexicon = 'lexicon.txt';

  // Example 4:
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
  // modelDir = 'vits-zh-hf-fanchen-C';
  // modelName = 'vits-zh-hf-fanchen-C.onnx';
  // lexicon = 'lexicon.txt';

  // Example 5:
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
  // modelDir = 'vits-coqui-de-css10';
  // modelName = 'model.onnx';

  // Example 6
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  // modelDir = 'vits-piper-en_US-libritts_r-medium';
  // modelName = 'en_US-libritts_r-medium.onnx';
  // dataDir = 'vits-piper-en_US-libritts_r-medium/espeak-ng-data';

  // Example 7
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
  // modelDir = 'vits-melo-tts-zh_en';
  // modelName = 'model.onnx';
  // lexicon = 'lexicon.txt';

  // Example 8
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
  // modelDir = 'kokoro-en-v0_19';
  // modelName = 'model.onnx';
  // voices = 'voices.bin';
  // dataDir = 'kokoro-en-v0_19/espeak-ng-data';

  // Example 9
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
  // modelDir = 'kokoro-multi-lang-v1_0';
  // modelName = 'model.onnx';
  // voices = 'voices.bin';
  // dataDir = 'kokoro-multi-lang-v1_0/espeak-ng-data';
  // lexicon = 'kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt';

  // ============================================================
  // Please don't change the remaining part of this function
  // ============================================================
  if (modelName == '') {
    throw Exception(
        'You are supposed to select a model by changing the code before you run the app');
  }

  final Directory directory = await getApplicationSupportDirectory();
  modelName = p.join(directory.path, modelDir, modelName);

  if (ruleFsts != '') {
    final all = ruleFsts.split(',');
    var tmp = <String>[];
    for (final f in all) {
      tmp.add(p.join(directory.path, f));
    }
    ruleFsts = tmp.join(',');
  }

  if (ruleFars != '') {
    final all = ruleFars.split(',');
    var tmp = <String>[];
    for (final f in all) {
      tmp.add(p.join(directory.path, f));
    }
    ruleFars = tmp.join(',');
  }

  if (lexicon.contains(',')) {
    final all = lexicon.split(',');
    var tmp = <String>[];
    for (final f in all) {
      tmp.add(p.join(directory.path, f));
    }
    lexicon = tmp.join(',');
  } else if (lexicon != '') {
    lexicon = p.join(directory.path, modelDir, lexicon);
  }

  if (dataDir != '') {
    dataDir = p.join(directory.path, dataDir);
  }

  final tokens = p.join(directory.path, modelDir, 'tokens.txt');
  if (voices != '') {
    voices = p.join(directory.path, modelDir, voices);
  }

  late final sherpa_onnx.OfflineTtsVitsModelConfig vits;
  late final sherpa_onnx.OfflineTtsKokoroModelConfig kokoro;

  if (voices != '') {
    vits = sherpa_onnx.OfflineTtsVitsModelConfig();
    kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
      model: modelName,
      voices: voices,
      tokens: tokens,
      dataDir: dataDir,
      lexicon: lexicon,
    );
  } else {
    vits = sherpa_onnx.OfflineTtsVitsModelConfig(
      model: modelName,
      lexicon: lexicon,
      tokens: tokens,
      dataDir: dataDir,
    );

    kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig();
  }

  final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
    vits: vits,
    kokoro: kokoro,
    numThreads: 2,
    debug: true,
    provider: 'cpu',
  );

  final config = sherpa_onnx.OfflineTtsConfig(
    model: modelConfig,
    ruleFsts: ruleFsts,
    ruleFars: ruleFars,
    maxNumSenetences: 1,
  );
  // print(config);

  final tts = sherpa_onnx.OfflineTts(config);
  print('tts created successfully');

  return tts;
}


================================================
FILE: flutter-examples/tts/lib/tts.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:async';

import 'package:flutter/foundation.dart';
import 'package:flutter/services.dart';

import 'package:flutter/material.dart';

import 'package:audioplayers/audioplayers.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;

import './model.dart';
import './utils.dart';

class TtsScreen extends StatefulWidget {
  const TtsScreen({super.key});

  @override
  State<TtsScreen> createState() => _TtsScreenState();
}

class _TtsScreenState extends State<TtsScreen> {
  late final TextEditingController _controller_text_input;
  late final TextEditingController _controller_sid;
  late final TextEditingController _controller_hint;
  late final AudioPlayer _player;
  String _title = 'Text to speech';
  String _lastFilename = '';
  bool _isInitialized = false;
  int _maxSpeakerID = 0;
  double _speed = 1.0;

  sherpa_onnx.OfflineTts? _tts;

  @override
  void initState() {
    _controller_text_input = TextEditingController();
    _controller_hint = TextEditingController();
    _controller_sid = TextEditingController(text: '0');

    super.initState();
  }

  Future<void> _init() async {
    if (!_isInitialized) {
      sherpa_onnx.initBindings();

      _tts?.free();
      _tts = await createOfflineTts();

      _player = AudioPlayer();

      _isInitialized = true;
    }
  }

  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      home: Scaffold(
        appBar: AppBar(
          title: Text(_title),
        ),
        body: Padding(
          padding: EdgeInsets.all(10),
          child: Column(
            // mainAxisAlignment: MainAxisAlignment.center,
            children: <Widget>[
              TextField(
                  decoration: InputDecoration(
                    labelText: "Speaker ID (0-$_maxSpeakerID)",
                    hintText: 'Please input your speaker ID',
                  ),
                  keyboardType: TextInputType.number,
                  maxLines: 1,
                  controller: _controller_sid,
                  onTapOutside: (PointerDownEvent event) {
                    FocusManager.instance.primaryFocus?.unfocus();
                  },
                  inputFormatters: <TextInputFormatter>[FilteringTextInputFormatter.digitsOnly]),
              Slider(
                // decoration: InputDecoration(
                //   labelText: "speech speed",
                // ),
                label: "Speech speed ${_speed.toStringAsPrecision(2)}",
                min: 0.5,
                max: 3.0,
                divisions: 25,
                value: _speed,
                onChanged: (value) {
                  setState(() {
                    _speed = value;
                  });
                },
              ),
              const SizedBox(height: 5),
              TextField(
                decoration: InputDecoration(
                  border: OutlineInputBorder(),
                  hintText: 'Please enter your text here',
                ),
                maxLines: 5,
                controller: _controller_text_input,
                onTapOutside: (PointerDownEvent event) {
                  FocusManager.instance.primaryFocus?.unfocus();
                },
              ),
              const SizedBox(height: 5),
              Row(mainAxisAlignment: MainAxisAlignment.center, children: <Widget>[
                OutlinedButton(
                  child: Text("Generate"),
                  onPressed: () async {
                    await _init();
                    await _player?.stop();

                    setState(() {
                      _maxSpeakerID = _tts?.numSpeakers ?? 0;
                      if (_maxSpeakerID > 0) {
                        _maxSpeakerID -= 1;
                      }
                    });

                    if (_tts == null) {
                      _controller_hint.value = TextEditingValue(
                        text: 'Failed to initialize tts',
                      );
                      return;
                    }

                    _controller_hint.value = TextEditingValue(
                      text: '',
                    );

                    final text = _controller_text_input.text.trim();
                    if (text == '') {
                      _controller_hint.value = TextEditingValue(
                        text: 'Please first input your text to generate',
                      );
                      return;
                    }

                    final sid = int.tryParse(_controller_sid.text.trim()) ?? 0;

                    final stopwatch = Stopwatch();
                    stopwatch.start();
                    final genConfig = sherpa_onnx.OfflineTtsGenerationConfig(
                      sid: sid,
                      speed: _speed,
                      silenceScale: 0.2,
                    );
                    final audio =
                        _tts!.generateWithConfig(text: text, config: genConfig);
                    final suffix = '-sid-$sid-speed-${_speed.toStringAsPrecision(2)}';
                    final filename = await generateWaveFilename(suffix);

                    final ok = sherpa_onnx.writeWave(
                      filename: filename,
                      samples: audio.samples,
                      sampleRate: audio.sampleRate,
                    );

                    if (ok) {
                      stopwatch.stop();
                      double elapsed = stopwatch.elapsed.inMilliseconds.toDouble();

                      double waveDuration = audio.samples.length.toDouble() / audio.sampleRate.toDouble();

                      _controller_hint.value = TextEditingValue(
                        text: 'Saved to\n$filename\n'
                            'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
                            'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
                            'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
                            '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ',
                      );
                      _lastFilename = filename;

                      await _player?.play(DeviceFileSource(_lastFilename));
                    } else {
                      _controller_hint.value = TextEditingValue(
                        text: 'Failed to save generated audio',
                      );
                    }
                  },
                ),
                const SizedBox(width: 5),
                OutlinedButton(
                  child: Text("Clear"),
                  onPressed: () {
                    _controller_text_input.value = TextEditingValue(
                      text: '',
                    );

                    _controller_hint.value = TextEditingValue(
                      text: '',
                    );
                  },
                ),
                const SizedBox(width: 5),
                OutlinedButton(
                  child: Text("Play"),
                  onPressed: () async {
                    if (_lastFilename == '') {
                      _controller_hint.value = TextEditingValue(
                        text: 'No generated wave file found',
                      );
                      return;
                    }
                    await _player?.stop();
                    await _player?.play(DeviceFileSource(_lastFilename));
                    _controller_hint.value = TextEditingValue(
                      text: 'Playing\n$_lastFilename',
                    );
                  },
                ),
                const SizedBox(width: 5),
                OutlinedButton(
                  child: Text("Stop"),
                  onPressed: () async {
                    await _player?.stop();
                    _controller_hint.value = TextEditingValue(
                      text: '',
                    );
                  },
                ),
              ]),
              const SizedBox(height: 5),
              TextField(
                decoration: InputDecoration(
                  border: OutlineInputBorder(),
                  hintText: 'Logs will be shown here.\n'
                      'The first run is slower due to model initialization.',
                ),
                maxLines: 6,
                controller: _controller_hint,
                readOnly: true,
              ),
            ],
          ),
        ),
      ),
    );
  }

  @override
  void dispose() {
    _tts?.free();
    super.dispose();
  }
}


================================================
FILE: flutter-examples/tts/lib/utils.dart
================================================
// Copyright (c)  2024  Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';

import 'package:flutter/services.dart';
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';

Future<String> generateWaveFilename([String suffix = '']) async {
  final Directory directory = await getApplicationSupportDirectory();
  DateTime now = DateTime.now();
  final filename =
      '${now.year.toString()}-${now.month.toString().padLeft(2, '0')}-${now.day.toString().padLeft(2, '0')}-${now.hour.toString().padLeft(2, '0')}-${now.minute.toString().padLeft(2, '0')}-${now.second.toString().padLeft(2, '0')}$suffix.wav';

  return p.join(directory.path, filename);
}

// https://stackoverflow.com/questions/68862225/flutter-how-to-get-all-files-from-assets-folder-in-one-list
Future<List<String>> getAllAssetFiles() async {
  final AssetManifest assetManifest =
      await AssetManifest.loadFromAssetBundle(rootBundle);
  final List<String> assets = assetManifest.listAssets();
  return assets;
}

String stripLeadingDirectory(String src, {int n = 1}) {
  return p.joinAll(p.split(src).sublist(n));
}

Future<void> copyAllAssetFiles() async {
  final allFiles = await getAllAssetFiles();
  for (final src in allFiles) {
    final dst = stripLeadingDirectory(src);
    await copyAssetFile(src, dst);
  }
}

// Copy the asset file from src to dst.
// If dst already exists, then just skip the copy
Future<String> copyAssetFile(String src, [String? dst]) async {
  final Directory directory = await getApplicationSupportDirectory();
  if (dst == null) {
    dst = p.basename(src);
  }
  final target = p.join(directory.path, dst);
  bool exists = await new File(target).exists();

  final data = await rootBundle.load(src);
  if (!exists || File(target).lengthSync() != data.lengthInBytes) {
    final List<int> bytes =
        data.buffer.asUint8List(data.offsetInBytes, data.lengthInBytes);
    await (await File(target).create(recursive: true)).writeAsBytes(bytes);
  }

  return target;
}


================================================
FILE: flutter-examples/tts/linux/.gitignore
================================================
flutter/ephemeral


================================================
FILE: flutter-examples/tts/linux/CMakeLists.txt
================================================
# Project-level configuration.
cmake_minimum_required(VERSION 3.10)
project(runner LANGUAGES CXX)

# The name of the executable created for the application. Change this to change
# the on-disk name of your application.
set(BINARY_NAME "tts")
# The unique GTK application identifier for this application. See:
# https://wiki.gnome.org/HowDoI/ChooseApplicationID
set(APPLICATION_ID "com.k2fsa.sherpa.onnx.tts")

# Explicitly opt in to modern CMake behaviors to avoid warnings with recent
# versions of CMake.
cmake_policy(SET CMP0063 NEW)

# Load bundled libraries from the lib/ directory relative to the binary.
set(CMAKE_INSTALL_RPATH "$ORIGIN/lib")

# Root filesystem for cross-building.
if(FLUTTER_TARGET_PLATFORM_SYSROOT)
  set(CMAKE_SYSROOT ${FLUTTER_TARGET_PLATFORM_SYSROOT})
  set(CMAKE_FIND_ROOT_PATH ${CMAKE_SYSROOT})
  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
endif()

# Define build configuration options.
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
  set(CMAKE_BUILD_TYPE "Debug" CACHE
    STRING "Flutter build mode" FORCE)
  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
    "Debug" "Profile" "Release")
endif()

# Compilation settings that should be applied to most targets.
#
# Be cautious about adding new options here, as plugins use this function by
# default. In most cases, you should add new options to specific targets instead
# of modifying this function.
function(APPLY_STANDARD_SETTINGS TARGET)
  target_compile_features(${TARGET} PUBLIC cxx_std_14)
  target_compile_options(${TARGET} PRIVATE -Wall -Werror)
  target_compile_options(${TARGET} PRIVATE "$<$<NOT:$<CONFIG:Debug>>:-O3>")
  target_compile_definitions(${TARGET} PRIVATE "$<$<NOT:$<CONFIG:Debug>>:NDEBUG>")
endfunction()

# Flutter library and tool build rules.
set(FLUTTER_MANAGED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/flutter")
add_subdirectory(${FLUTTER_MANAGED_DIR})

# System-level dependencies.
find_package(PkgConfig REQUIRED)
pkg_check_modules(GTK REQUIRED IMPORTED_TARGET gtk+-3.0)

add_definitions(-DAPPLICATION_ID="${APPLICATION_ID}")

# Define the application target. To change its name, change BINARY_NAME above,
# not the value here, or `flutter run` will no longer work.
#
# Any new source files that you add to the application should be added here.
add_executable(${BINARY_NAME}
  "main.cc"
  "my_application.cc"
  "${FLUTTER_MANAGED_DIR}/generated_plugin_registrant.cc"
)

# Apply the standard set of build settings. This can be removed for applications
# that need different build settings.
apply_standard_settings(${BINARY_NAME})

# Add dependency libraries. Add any application-specific dependencies here.
target_link_libraries(${BINARY_NAME} PRIVATE flutter)
target_link_libraries(${BINARY_NAME} PRIVATE PkgConfig::GTK)

# Run the Flutter tool portions of the build. This must not be removed.
add_dependencies(${BINARY_NAME} flutter_assemble)

# Only the install-generated bundle's copy of the executable will launch
# correctly, since the resources must in the right relative locations. To avoid
# people trying to run the unbundled copy, put it in a subdirectory instead of
# the default top-level location.
set_target_properties(${BINARY_NAME}
  PROPERTIES
  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/intermediates_do_not_run"
)


# Generated plugin build rules, which manage building the plugins and adding
# them to the application.
include(flutter/generated_plugins.cmake)


# === Installation ===
# By default, "installing" just makes a relocatable bundle in the build
# directory.
set(BUILD_BUNDLE_DIR "${PROJECT_BINARY_DIR}/bundle")
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
  set(CMAKE_INSTALL_PREFIX "${BUILD_BUNDLE_DIR}" CACHE PATH "..." FORCE)
endif()

# Start with a clean build bundle directory every time.
install(CODE "
  file(REMOVE_RECURSE \"${BUILD_BUNDLE_DIR}/\")
  " COMPONENT Runtime)

set(INSTALL_BUNDLE_DATA_DIR "${CMAKE_INSTALL_PREFIX}/data")
set(INSTALL_BUNDLE_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib")

install(TARGETS ${BINARY_NAME} RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}"
  COMPONENT Runtime)

install(FILES "${FLUTTER_ICU_DATA_FILE}" DESTINATION "${INSTALL_BUNDLE_DATA_DIR}"
  COMPONENT Runtime)

install(FILES "${FLUTTER_LIBRARY}" DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
  COMPONENT Runtime)

foreach(bundled_library ${PLUGIN_BUNDLED_LIBRARIES})
  install(FILES "${bundled_library}"
    DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
    COMPONENT Runtime)
endforeach(bundled_library)

# Copy the native assets provided by the build.dart from all packages.
set(NATIVE_ASSETS_DIR "${PROJECT_BUILD_DIR}native_assets/linux/")
install(DIRECTORY "${NATIVE_ASSETS_DIR}"
   DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
   COMPONENT Runtime)

# Fully re-copy the assets directory on each build to avoid having stale files
# from a previous install.
set(FLUTTER_ASSET_DIR_NAME "flutter_assets")
install(CODE "
  file(REMOVE_RECURSE \"${INSTALL_BUNDLE_DATA_DIR}/${FLUTTER_ASSET_DIR_NAME}\")
  " COMPONENT Runtime)
install(DIRECTORY "${PROJECT_BUILD_DIR}/${FLUTTER_ASSET_DIR_NAME}"
  DESTINATION "${INSTALL_BUNDLE_DATA_DIR}" COMPONENT Runtime)

# Install the AOT library on non-Debug builds only.
if(NOT CMAKE_BUILD_TYPE MATCHES "Debug")
  install(FILES "${AOT_LIBRARY}" DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
    COMPONENT Runtime)
endif()


================================================
FILE: flutter-examples/tts/linux/flutter/CMakeLists.txt
================================================
# This file controls Flutter-level build steps. It should not be edited.
cmake_minimum_required(VERSION 3.10)

set(EPHEMERAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ephemeral")

# Configuration provided via flutter tool.
include(${EPHEMERAL_DIR}/generated_config.cmake)

# TODO: Move the rest of this into files in ephemeral. See
# https://github.com/flutter/flutter/issues/57146.

# Serves the same purpose as list(TRANSFORM ... PREPEND ...),
# which isn't available in 3.10.
function(list_prepend LIST_NAME PREFIX)
    set(NEW_LIST "")
    foreach(element ${${LIST_NAME}})
        list(APPEND NEW_LIST "${PREFIX}${element}")
    endforeach(element)
    set(${LIST_NAME} "${NEW_LIST}" PARENT_SCOPE)
endfunction()

# === Flutter Library ===
# System-level dependencies.
find_package(PkgConfig REQUIRED)
pkg_check_modules(GTK REQUIRED IMPORTED_TARGET gtk+-3.0)
pkg_check_modules(GLIB REQUIRED IMPORTED_TARGET glib-2.0)
pkg_check_modules(GIO REQUIRED IMPORTED_TARGET gio-2.0)

set(FLUTTER_LIBRARY "${EPHEMERAL_DIR}/libflutter_linux_gtk.so")

# Published to parent scope for install step.
set(FLUTTER_LIBRARY ${FLUTTER_LIBRARY} PARENT_SCOPE)
set(FLUTTER_ICU_DATA_FILE "${EPHEMERAL_DIR}/icudtl.dat" PARENT_SCOPE)
set(PROJECT_BUILD_DIR "${PROJECT_DIR}/build/" PARENT_SCOPE)
set(AOT_LIBRARY "${PROJECT_DIR}/build/lib/libapp.so" PARENT_SCOPE)

list(APPEND FLUTTER_LIBRARY_HEADERS
  "fl_basic_message_channel.h"
  "fl_binary_codec.h"
  "fl_binary_messenger.h"
  "fl_dart_project.h"
  "fl_engine.h"
  "fl_json_message_codec.h"
  "fl_json_method_codec.h"
  "fl_message_codec.h"
  "fl_method_call.h"
  "fl_method_channel.h"
  "fl_method_codec.h"
  "fl_method_response.h"
  "fl_plugin_registrar.h"
  "fl_plugin_registry.h"
  "fl_standard_message_codec.h"
  "fl_standard_method_codec.h"
  "fl_string_codec.h"
  "fl_value.h"
  "fl_view.h"
  "flutter_linux.h"
)
list_prepend(FLUTTER_LIBRARY_HEADERS "${EPHEMERAL_DIR}/flutter_linux/")
add_library(flutter INTERFACE)
target_include_directories(flutter INTERFACE
  "${EPHEMERAL_DIR}"
)
target_link_libraries(flutter INTERFACE "${FLUTTER_LIBRARY}")
target_link_libraries(flutter INTERFACE
  PkgConfig::GTK
  PkgConfig::GLIB
  PkgConfig::GIO
)
add_dependencies(flutter flutter_assemble)

# === Flutter tool backend ===
# _phony_ is a non-existent file to force this command to run every time,
# since currently there's no way to get a full input/output list from the
# flutter tool.
add_custom_command(
  OUTPUT ${FLUTTER_LIBRARY} ${FLUTTER_LIBRARY_HEADERS}
    ${CMAKE_CURRENT_BINARY_DIR}/_phony_
  COMMAND ${CMAKE_COMMAND} -E env
    ${FLUTTER_TOOL_ENVIRONMENT}
    "${FLUTTER_ROOT}/packages/flutter_tools/bin/tool_backend.sh"
      ${FLUTTER_TARGET_PLATFORM} ${CMAKE_BUILD_TYPE}
  VERBATIM
)
add_custom_target(flutter_assemble DEPENDS
  "${FLUTTER_LIBRARY}"
  ${FLUTTER_LIBRARY_HEADERS}
)


================================================
FILE: flutter-examples/tts/linux/main.cc
================================================
#include "my_application.h"

int main(int argc, char** argv) {
  g_autoptr(MyApplication) app = my_application_new();
  return g_application_run(G_APPLICATION(app), argc, argv);
}


================================================
FILE: flutter-examples/tts/linux/my_application.cc
================================================
#include "my_application.h"

#include <flutter_linux/flutter_linux.h>
#ifdef GDK_WINDOWING_X11
#include <gdk/gdkx.h>
#endif

#include "flutter/generated_plugin_registrant.h"

struct _MyApplication {
  GtkApplication parent_instance;
  char** dart_entrypoint_arguments;
};

G_DEFINE_TYPE(MyApplication, my_application, GTK_TYPE_APPLICATION)

// Implements GApplication::activate.
static void my_application_activate(GApplication* application) {
  MyApplication* self = MY_APPLICATION(application);
  GtkWindow* window =
      GTK_WINDOW(gtk_application_window_new(GTK_APPLICATION(application)));

  // Use a header bar when running in GNOME as this is the common style used
  // by applications and is the setup most users will be using (e.g. Ubuntu
  // desktop).
  // If running on X and not using GNOME then just use a traditional title bar
  // in case the window manager does more exotic layout, e.g. tiling.
  // If running on Wayland assume the header bar will work (may need changing
  // if future cases occur).
  gboolean use_header_bar = TRUE;
#ifdef GDK_WINDOWING_X11
  GdkScreen* screen = gtk_window_get_screen(window);
  if (GDK_IS_X11_SCREEN(screen)) {
    const gchar* wm_name = gdk_x11_screen_get_window_manager_name(screen);
    if (g_strcmp0(wm_name, "GNOME Shell") != 0) {
      use_header_bar = FALSE;
    }
  }
#endif
  if (use_header_bar) {
    GtkHeaderBar* header_bar = GTK_HEADER_BAR(gtk_header_bar_new());
    gtk_widget_show(GTK_WIDGET(header_bar));
    gtk_header_bar_set_title(header_bar, "tts");
    gtk_header_bar_set_show_close_button(header_bar, TRUE);
    gtk_window_set_titlebar(window, GTK_WIDGET(header_bar));
  } else {
    gtk_window_set_title(window, "tts");
  }

  gtk_window_set_default_size(window, 1280, 720);
  gtk_widget_show(GTK_WIDGET(window));

  g_autoptr(FlDartProject) project = fl_dart_project_new();
  fl_dart_project_set_dart_entrypoint_arguments(project, self->dart_entrypoint_arguments);

  FlView* view = fl_view_new(project);
  gtk_widget_show(GTK_WIDGET(view));
  gtk_container_add(GTK_CONTAINER(window), GTK_WIDGET(view));

  fl_register_plugins(FL_PLUGIN_REGISTRY(view));

  gtk_widget_grab_focus(GTK_WIDGET(view));
}

// Implements GApplication::local_command_line.
static gboolean my_application_local_command_line(GApplication* application, gchar*** arguments, int* exit_status) {
  MyApplication* self = MY_APPLICATION(application);
  // Strip out the first argument as it is the binary name.
  self->dart_entrypoint_arguments = g_strdupv(*arguments + 1);

  g_autoptr(GError) error = nullptr;
  if (!g_application_register(application, nullptr, &error)) {
     g_warning("Failed to register: %s", error->message);
     *exit_status = 1;
     return TRUE;
  }

  g_application_activate(application);
  *exit_status = 0;

  return TRUE;
}

// Implements GApplication::startup.
static void my_application_startup(GApplication* application) {
  //MyApplication* self = MY_APPLICATION(object);

  // Perform any actions required at application startup.

  G_APPLICATION_CLASS(my_application_parent_class)->startup(application);
}

// Implements GApplication::shutdown.
static void my_application_shutdown(GApplication* application) {
  //MyApplication* self = MY_APPLICATION(object);

  // Perform any actions required at application shutdown.

  G_APPLICATION_CLASS(my_application_parent_class)->shutdown(application);
}

// Implements GObject::dispose.
static void my_application_dispose(GObject* object) {
  MyApplication* self = MY_APPLICATION(object);
  g_clear_pointer(&self->dart_entrypoint_arguments, g_strfreev);
  G_OBJECT_CLASS(my_application_parent_class)->dispose(object);
}

static void my_application_class_init(MyApplicationClass* klass) {
  G_APPLICATION_CLASS(klass)->activate = my_application_activate;
  G_APPLICATION_CLASS(klass)->local_command_line = my_application_local_command_line;
  G_APPLICATION_CLASS(klass)->startup = my_application_startup;
  G_APPLICATION_CLASS(klass)->shutdown = my_application_shutdown;
  G_OBJECT_CLASS(klass)->dispose = my_application_dispose;
}

static void my_application_init(MyApplication* self) {}

MyApplication* my_application_new() {
  return MY_APPLICATION(g_object_new(my_application_get_type(),
                                     "application-id", APPLICATION_ID,
                                     "flags", G_APPLICATION_NON_UNIQUE,
                                     nullptr));
}


================================================
FILE: flutter-examples/tts/linux/my_application.h
================================================
#ifndef FLUTTER_MY_APPLICATION_H_
#define FLUTTER_MY_APPLICATION_H_

#include <gtk/gtk.h>

G_DECLARE_FINAL_TYPE(MyApplication, my_application, MY, APPLICATION,
                     GtkApplication)

/**
 * my_application_new:
 *
 * Creates a new Flutter-based application.
 *
 * Returns: a new #MyApplication.
 */
MyApplication* my_application_new();

#endif  // FLUTTER_MY_APPLICATION_H_


================================================
FILE: flutter-examples/tts/macos/.gitignore
================================================
# Flutter-related
**/Flutter/ephemeral/
**/Pods/

# Xcode-related
**/dgph
**/xcuserdata/


================================================
FILE: flutter-examples/tts/macos/Flutter/Flutter-Debug.xcconfig
================================================
#include "ephemeral/Flutter-Generated.xcconfig"


================================================
FILE: flutter-examples/tts/macos/Flutter/Flutter-Release.xcconfig
================================================
#include "ephemeral/Flutter-Generated.xcconfig"


================================================
FILE: flutter-examples/tts/macos/Runner/AppDelegate.swift
================================================
import Cocoa
import FlutterMacOS

@NSApplicationMain
class AppDelegate: FlutterAppDelegate {
  override func applicationShouldTerminateAfterLastWindowClosed(_ sender: NSApplication) -> Bool {
    return true
  }
}


================================================
FILE: flutter-examples/tts/macos/Runner/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "size" : "16x16",
      "idiom" : "mac",
      "filename" : "app_icon_16.png",
      "scale" : "1x"
    },
    {
      "size" : "16x16",
      "idiom" : "mac",
      "filename" : "app_icon_32.png",
      "scale" : "2x"
    },
    {
      "size" : "32x32",
      "idiom" : "mac",
      "filename" : "app_icon_32.png",
      "scale" : "1x"
    },
    {
      "size" : "32x32",
      "idiom" : "mac",
      "filename" : "app_icon_64.png",
      "scale" : "2x"
    },
    {
      "size" : "128x128",
      "idiom" : "mac",
      "filename" : "app_icon_128.png",
      "scale" : "1x"
    },
    {
      "size" : "128x128",
      "idiom" : "mac",
      "filename" : "app_icon_256.png",
      "scale" : "2x"
    },
    {
      "size" : "256x256",
      "idiom" : "mac",
      "filename" : "app_icon_256.png",
      "scale" : "1x"
    },
    {
      "size" : "256x256",
      "idiom" : "mac",
      "filename" : "app_icon_512.png",
      "scale" : "2x"
    },
    {
      "size" : "512x512",
      "idiom" : "mac",
      "filename" : "app_icon_512.png",
      "scale" : "1x"
    },
    {
      "size" : "512x512",
      "idiom" : "mac",
      "filename" : "app_icon_1024.png",
      "scale" : "2x"
    }
  ],
  "info" : {
    "version" : 1,
    "author" : "xcode"
  }
}


================================================
FILE: flutter-examples/tts/macos/Runner/Base.lproj/MainMenu.xib
================================================
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.Cocoa.XIB" version="3.0" toolsVersion="14490.70" targetRuntime="MacOSX.Cocoa" propertyAccessControl="none" useAutolayout="YES" customObjectInstantitationMethod="direct">
    <dependencies>
        <deployment identifier="macosx"/>
        <plugIn identifier="com.apple.InterfaceBuilder.CocoaPlugin" version="14490.70"/>
        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
    </dependencies>
    <objects>
        <customObject id="-2" userLabel="File's Owner" customClass="NSApplication">
            <connections>
                <outlet property="delegate" destination="Voe-Tx-rLC" id="GzC-gU-4Uq"/>
            </connections>
        </customObject>
        <customObject id="-1" userLabel="First Responder" customClass="FirstResponder"/>
        <customObject id="-3" userLabel="Application" customClass="NSObject"/>
        <customObject id="Voe-Tx-rLC" customClass="AppDelegate" customModule="Runner" customModuleProvider="target">
            <connections>
                <outlet property="applicationMenu" destination="uQy-DD-JDr" id="XBo-yE-nKs"/>
                <outlet property="mainFlutterWindow" destination="QvC-M9-y7g" id="gIp-Ho-8D9"/>
            </connections>
        </customObject>
        <customObject id="YLy-65-1bz" customClass="NSFontManager"/>
        <menu title="Main Menu" systemMenu="main" id="AYu-sK-qS6">
            <items>
                <menuItem title="APP_NAME" id="1Xt-HY-uBw">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="APP_NAME" systemMenu="apple" id="uQy-DD-JDr">
                        <items>
                            <menuItem title="About APP_NAME" id="5kV-Vb-QxS">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="orderFrontStandardAboutPanel:" target="-1" id="Exp-CZ-Vem"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="VOq-y0-SEH"/>
                            <menuItem title="Preferences…" keyEquivalent="," id="BOF-NM-1cW"/>
                            <menuItem isSeparatorItem="YES" id="wFC-TO-SCJ"/>
                            <menuItem title="Services" id="NMo-om-nkz">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Services" systemMenu="services" id="hz9-B4-Xy5"/>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="4je-JR-u6R"/>
                            <menuItem title="Hide APP_NAME" keyEquivalent="h" id="Olw-nP-bQN">
                                <connections>
                                    <action selector="hide:" target="-1" id="PnN-Uc-m68"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Hide Others" keyEquivalent="h" id="Vdr-fp-XzO">
                                <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                <connections>
                                    <action selector="hideOtherApplications:" target="-1" id="VT4-aY-XCT"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Show All" id="Kd2-mp-pUS">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="unhideAllApplications:" target="-1" id="Dhg-Le-xox"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="kCx-OE-vgT"/>
                            <menuItem title="Quit APP_NAME" keyEquivalent="q" id="4sb-4s-VLi">
                                <connections>
                                    <action selector="terminate:" target="-1" id="Te7-pn-YzF"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Edit" id="5QF-Oa-p0T">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Edit" id="W48-6f-4Dl">
                        <items>
                            <menuItem title="Undo" keyEquivalent="z" id="dRJ-4n-Yzg">
                                <connections>
                                    <action selector="undo:" target="-1" id="M6e-cu-g7V"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Redo" keyEquivalent="Z" id="6dh-zS-Vam">
                                <connections>
                                    <action selector="redo:" target="-1" id="oIA-Rs-6OD"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="WRV-NI-Exz"/>
                            <menuItem title="Cut" keyEquivalent="x" id="uRl-iY-unG">
                                <connections>
                                    <action selector="cut:" target="-1" id="YJe-68-I9s"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Copy" keyEquivalent="c" id="x3v-GG-iWU">
                                <connections>
                                    <action selector="copy:" target="-1" id="G1f-GL-Joy"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Paste" keyEquivalent="v" id="gVA-U4-sdL">
                                <connections>
                                    <action selector="paste:" target="-1" id="UvS-8e-Qdg"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Paste and Match Style" keyEquivalent="V" id="WeT-3V-zwk">
                                <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                <connections>
                                    <action selector="pasteAsPlainText:" target="-1" id="cEh-KX-wJQ"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Delete" id="pa3-QI-u2k">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="delete:" target="-1" id="0Mk-Ml-PaM"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Select All" keyEquivalent="a" id="Ruw-6m-B2m">
                                <connections>
                                    <action selector="selectAll:" target="-1" id="VNm-Mi-diN"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="uyl-h8-XO2"/>
                            <menuItem title="Find" id="4EN-yA-p0u">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Find" id="1b7-l0-nxx">
                                    <items>
                                        <menuItem title="Find…" tag="1" keyEquivalent="f" id="Xz5-n4-O0W">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="cD7-Qs-BN4"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find and Replace…" tag="12" keyEquivalent="f" id="YEy-JH-Tfz">
                                            <modifierMask key="keyEquivalentModifierMask" option="YES" command="YES"/>
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="WD3-Gg-5AJ"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find Next" tag="2" keyEquivalent="g" id="q09-fT-Sye">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="NDo-RZ-v9R"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Find Previous" tag="3" keyEquivalent="G" id="OwM-mh-QMV">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="HOh-sY-3ay"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Use Selection for Find" tag="7" keyEquivalent="e" id="buJ-ug-pKt">
                                            <connections>
                                                <action selector="performFindPanelAction:" target="-1" id="U76-nv-p5D"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Jump to Selection" keyEquivalent="j" id="S0p-oC-mLd">
                                            <connections>
                                                <action selector="centerSelectionInVisibleArea:" target="-1" id="IOG-6D-g5B"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Spelling and Grammar" id="Dv1-io-Yv7">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Spelling" id="3IN-sU-3Bg">
                                    <items>
                                        <menuItem title="Show Spelling and Grammar" keyEquivalent=":" id="HFo-cy-zxI">
                                            <connections>
                                                <action selector="showGuessPanel:" target="-1" id="vFj-Ks-hy3"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Check Document Now" keyEquivalent=";" id="hz2-CU-CR7">
                                            <connections>
                                                <action selector="checkSpelling:" target="-1" id="fz7-VC-reM"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem isSeparatorItem="YES" id="bNw-od-mp5"/>
                                        <menuItem title="Check Spelling While Typing" id="rbD-Rh-wIN">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleContinuousSpellChecking:" target="-1" id="7w6-Qz-0kB"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Check Grammar With Spelling" id="mK6-2p-4JG">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleGrammarChecking:" target="-1" id="muD-Qn-j4w"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Correct Spelling Automatically" id="78Y-hA-62v">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticSpellingCorrection:" target="-1" id="2lM-Qi-WAP"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Substitutions" id="9ic-FL-obx">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Substitutions" id="FeM-D8-WVr">
                                    <items>
                                        <menuItem title="Show Substitutions" id="z6F-FW-3nz">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="orderFrontSubstitutionsPanel:" target="-1" id="oku-mr-iSq"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem isSeparatorItem="YES" id="gPx-C9-uUO"/>
                                        <menuItem title="Smart Copy/Paste" id="9yt-4B-nSM">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleSmartInsertDelete:" target="-1" id="3IJ-Se-DZD"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Quotes" id="hQb-2v-fYv">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticQuoteSubstitution:" target="-1" id="ptq-xd-QOA"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Dashes" id="rgM-f4-ycn">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticDashSubstitution:" target="-1" id="oCt-pO-9gS"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Smart Links" id="cwL-P1-jid">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticLinkDetection:" target="-1" id="Gip-E3-Fov"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Data Detectors" id="tRr-pd-1PS">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticDataDetection:" target="-1" id="R1I-Nq-Kbl"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Text Replacement" id="HFQ-gK-NFA">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="toggleAutomaticTextReplacement:" target="-1" id="DvP-Fe-Py6"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Transformations" id="2oI-Rn-ZJC">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Transformations" id="c8a-y6-VQd">
                                    <items>
                                        <menuItem title="Make Upper Case" id="vmV-6d-7jI">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="uppercaseWord:" target="-1" id="sPh-Tk-edu"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Make Lower Case" id="d9M-CD-aMd">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="lowercaseWord:" target="-1" id="iUZ-b5-hil"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Capitalize" id="UEZ-Bs-lqG">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="capitalizeWord:" target="-1" id="26H-TL-nsh"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                            <menuItem title="Speech" id="xrE-MZ-jX0">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <menu key="submenu" title="Speech" id="3rS-ZA-NoH">
                                    <items>
                                        <menuItem title="Start Speaking" id="Ynk-f8-cLZ">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="startSpeaking:" target="-1" id="654-Ng-kyl"/>
                                            </connections>
                                        </menuItem>
                                        <menuItem title="Stop Speaking" id="Oyz-dy-DGm">
                                            <modifierMask key="keyEquivalentModifierMask"/>
                                            <connections>
                                                <action selector="stopSpeaking:" target="-1" id="dX8-6p-jy9"/>
                                            </connections>
                                        </menuItem>
                                    </items>
                                </menu>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="View" id="H8h-7b-M4v">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="View" id="HyV-fh-RgO">
                        <items>
                            <menuItem title="Enter Full Screen" keyEquivalent="f" id="4J7-dP-txa">
                                <modifierMask key="keyEquivalentModifierMask" control="YES" command="YES"/>
                                <connections>
                                    <action selector="toggleFullScreen:" target="-1" id="dU3-MA-1Rq"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Window" id="aUF-d1-5bR">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Window" systemMenu="window" id="Td7-aD-5lo">
                        <items>
                            <menuItem title="Minimize" keyEquivalent="m" id="OY7-WF-poV">
                                <connections>
                                    <action selector="performMiniaturize:" target="-1" id="VwT-WD-YPe"/>
                                </connections>
                            </menuItem>
                            <menuItem title="Zoom" id="R4o-n2-Eq4">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="performZoom:" target="-1" id="DIl-cC-cCs"/>
                                </connections>
                            </menuItem>
                            <menuItem isSeparatorItem="YES" id="eu3-7i-yIM"/>
                            <menuItem title="Bring All to Front" id="LE2-aR-0XJ">
                                <modifierMask key="keyEquivalentModifierMask"/>
                                <connections>
                                    <action selector="arrangeInFront:" target="-1" id="DRN-fu-gQh"/>
                                </connections>
                            </menuItem>
                        </items>
                    </menu>
                </menuItem>
                <menuItem title="Help" id="EPT-qC-fAb">
                    <modifierMask key="keyEquivalentModifierMask"/>
                    <menu key="submenu" title="Help" systemMenu="help" id="rJ0-wn-3NY"/>
                </menuItem>
            </items>
            <point key="canvasLocation" x="142" y="-258"/>
        </menu>
        <window title="APP_NAME" allowsToolTipsWhenApplicationIsInactive="NO" autorecalculatesKeyViewLoop="NO" releasedWhenClosed="NO" animationBehavior="default" id="QvC-M9-y7g" customClass="MainFlutterWindow" customModule="Runner" customModuleProvider="target">
            <windowStyleMask key="styleMask" titled="YES" closable="YES" miniaturizable="YES" resizable="YES"/>
            <rect key="contentRect" x="335" y="390" width="800" height="600"/>
            <rect key="screenRect" x="0.0" y="0.0" width="2560" height="1577"/>
            <view key="contentView" wantsLayer="YES" id="EiT-Mj-1SZ">
                <rect key="frame" x="0.0" y="0.0" width="800" height="600"/>
                <autoresizingMask key="autoresizingMask"/>
            </view>
        </window>
    </objects>
</document>


================================================
FILE: flutter-examples/tts/macos/Runner/Configs/AppInfo.xcconfig
================================================
// Application-level settings for the Runner target.
//
// This may be replaced with something auto-generated from metadata (e.g., pubspec.yaml) in the
// future. If not, the values below would default to using the project name when this becomes a
// 'flutter create' template.

// The application's name. By default this is also the title of the Flutter window.
PRODUCT_NAME = tts

// The application's bundle identifier
PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts

// The copyright displayed in application information
PRODUCT_COPYRIGHT = Copyright © 2024 Next-gen Kaldi. All rights reserved.


================================================
FILE: flutter-examples/tts/macos/Runner/Configs/Debug.xcconfig
================================================
#include "../../Flutter/Flutter-Debug.xcconfig"
#include "Warnings.xcconfig"


================================================
FILE: flutter-examples/tts/macos/Runner/Configs/Release.xcconfig
================================================
#include "../../Flutter/Flutter-Release.xcconfig"
#include "Warnings.xcconfig"


================================================
FILE: flutter-examples/tts/macos/Runner/Configs/Warnings.xcconfig
================================================
WARNING_CFLAGS = -Wall -Wconditional-uninitialized -Wnullable-to-nonnull-conversion -Wmissing-method-return-type -Woverlength-strings
GCC_WARN_UNDECLARED_SELECTOR = YES
CLANG_UNDEFINED_BEHAVIOR_SANITIZER_NULLABILITY = YES
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES
CLANG_WARN_PRAGMA_PACK = YES
CLANG_WARN_STRICT_PROTOTYPES = YES
CLANG_WARN_COMMA = YES
GCC_WARN_STRICT_SELECTOR_MATCH = YES
CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES
GCC_WARN_SHADOW = YES
CLANG_WARN_UNREACHABLE_CODE = YES


================================================
FILE: flutter-examples/tts/macos/Runner/DebugProfile.entitlements
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>com.apple.security.app-sandbox</key>
	<true/>
	<key>com.apple.security.cs.allow-jit</key>
	<true/>
	<key>com.apple.security.network.server</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/macos/Runner/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>CFBundleDevelopmentRegion</key>
	<string>$(DEVELOPMENT_LANGUAGE)</string>
	<key>CFBundleExecutable</key>
	<string>$(EXECUTABLE_NAME)</string>
	<key>CFBundleIconFile</key>
	<string></string>
	<key>CFBundleIdentifier</key>
	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
	<key>CFBundleInfoDictionaryVersion</key>
	<string>6.0</string>
	<key>CFBundleName</key>
	<string>$(PRODUCT_NAME)</string>
	<key>CFBundlePackageType</key>
	<string>APPL</string>
	<key>CFBundleShortVersionString</key>
	<string>$(FLUTTER_BUILD_NAME)</string>
	<key>CFBundleVersion</key>
	<string>$(FLUTTER_BUILD_NUMBER)</string>
	<key>LSMinimumSystemVersion</key>
	<string>$(MACOSX_DEPLOYMENT_TARGET)</string>
	<key>NSHumanReadableCopyright</key>
	<string>$(PRODUCT_COPYRIGHT)</string>
	<key>NSMainNibFile</key>
	<string>MainMenu</string>
	<key>NSPrincipalClass</key>
	<string>NSApplication</string>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/macos/Runner/MainFlutterWindow.swift
================================================
import Cocoa
import FlutterMacOS

class MainFlutterWindow: NSWindow {
  override func awakeFromNib() {
    let flutterViewController = FlutterViewController()
    let windowFrame = self.frame
    self.contentViewController = flutterViewController
    self.setFrame(windowFrame, display: true)

    RegisterGeneratedPlugins(registry: flutterViewController)

    super.awakeFromNib()
  }
}


================================================
FILE: flutter-examples/tts/macos/Runner/Release.entitlements
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>com.apple.security.app-sandbox</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/macos/Runner.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 54;
	objects = {

/* Begin PBXAggregateTarget section */
		33CC111A2044C6BA0003C045 /* Flutter Assemble */ = {
			isa = PBXAggregateTarget;
			buildConfigurationList = 33CC111B2044C6BA0003C045 /* Build configuration list for PBXAggregateTarget "Flutter Assemble" */;
			buildPhases = (
				33CC111E2044C6BF0003C045 /* ShellScript */,
			);
			dependencies = (
			);
			name = "Flutter Assemble";
			productName = FLX;
		};
/* End PBXAggregateTarget section */

/* Begin PBXBuildFile section */
		331C80D8294CF71000263BE5 /* RunnerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 331C80D7294CF71000263BE5 /* RunnerTests.swift */; };
		335BBD1B22A9A15E00E9071D /* GeneratedPluginRegistrant.swift in Sources */ = {isa = PBXBuildFile; fileRef = 335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */; };
		33CC10F12044A3C60003C045 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 33CC10F02044A3C60003C045 /* AppDelegate.swift */; };
		33CC10F32044A3C60003C045 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 33CC10F22044A3C60003C045 /* Assets.xcassets */; };
		33CC10F62044A3C60003C045 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 33CC10F42044A3C60003C045 /* MainMenu.xib */; };
		33CC11132044BFA00003C045 /* MainFlutterWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 33CC11122044BFA00003C045 /* MainFlutterWindow.swift */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
		331C80D9294CF71000263BE5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = 33CC10E52044A3C60003C045 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = 33CC10EC2044A3C60003C045;
			remoteInfo = Runner;
		};
		33CC111F2044C79F0003C045 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = 33CC10E52044A3C60003C045 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = 33CC111A2044C6BA0003C045;
			remoteInfo = FLX;
		};
/* End PBXContainerItemProxy section */

/* Begin PBXCopyFilesBuildPhase section */
		33CC110E2044A8840003C045 /* Bundle Framework */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = "";
			dstSubfolderSpec = 10;
			files = (
			);
			name = "Bundle Framework";
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		331C80D5294CF71000263BE5 /* RunnerTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = RunnerTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		331C80D7294CF71000263BE5 /* RunnerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RunnerTests.swift; sourceTree = "<group>"; };
		333000ED22D3DE5D00554162 /* Warnings.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Warnings.xcconfig; sourceTree = "<group>"; };
		335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = GeneratedPluginRegistrant.swift; sourceTree = "<group>"; };
		33CC10ED2044A3C60003C045 /* tts.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "tts.app"; sourceTree = BUILT_PRODUCTS_DIR; };
		33CC10F02044A3C60003C045 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
		33CC10F22044A3C60003C045 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; name = Assets.xcassets; path = Runner/Assets.xcassets; sourceTree = "<group>"; };
		33CC10F52044A3C60003C045 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = "<group>"; };
		33CC10F72044A3C60003C045 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = Info.plist; path = Runner/Info.plist; sourceTree = "<group>"; };
		33CC11122044BFA00003C045 /* MainFlutterWindow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MainFlutterWindow.swift; sourceTree = "<group>"; };
		33CEB47222A05771004F2AC0 /* Flutter-Debug.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = "Flutter-Debug.xcconfig"; sourceTree = "<group>"; };
		33CEB47422A05771004F2AC0 /* Flutter-Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = "Flutter-Release.xcconfig"; sourceTree = "<group>"; };
		33CEB47722A0578A004F2AC0 /* Flutter-Generated.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = "Flutter-Generated.xcconfig"; path = "ephemeral/Flutter-Generated.xcconfig"; sourceTree = "<group>"; };
		33E51913231747F40026EE4D /* DebugProfile.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = DebugProfile.entitlements; sourceTree = "<group>"; };
		33E51914231749380026EE4D /* Release.entitlements */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.entitlements; path = Release.entitlements; sourceTree = "<group>"; };
		33E5194F232828860026EE4D /* AppInfo.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = AppInfo.xcconfig; sourceTree = "<group>"; };
		7AFA3C8E1D35360C0083082E /* Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Release.xcconfig; sourceTree = "<group>"; };
		9740EEB21CF90195004384FC /* Debug.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Debug.xcconfig; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		331C80D2294CF70F00263BE5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10EA2044A3C60003C045 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		331C80D6294CF71000263BE5 /* RunnerTests */ = {
			isa = PBXGroup;
			children = (
				331C80D7294CF71000263BE5 /* RunnerTests.swift */,
			);
			path = RunnerTests;
			sourceTree = "<group>";
		};
		33BA886A226E78AF003329D5 /* Configs */ = {
			isa = PBXGroup;
			children = (
				33E5194F232828860026EE4D /* AppInfo.xcconfig */,
				9740EEB21CF90195004384FC /* Debug.xcconfig */,
				7AFA3C8E1D35360C0083082E /* Release.xcconfig */,
				333000ED22D3DE5D00554162 /* Warnings.xcconfig */,
			);
			path = Configs;
			sourceTree = "<group>";
		};
		33CC10E42044A3C60003C045 = {
			isa = PBXGroup;
			children = (
				33FAB671232836740065AC1E /* Runner */,
				33CEB47122A05771004F2AC0 /* Flutter */,
				331C80D6294CF71000263BE5 /* RunnerTests */,
				33CC10EE2044A3C60003C045 /* Products */,
				D73912EC22F37F3D000D13A0 /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		33CC10EE2044A3C60003C045 /* Products */ = {
			isa = PBXGroup;
			children = (
				33CC10ED2044A3C60003C045 /* tts.app */,
				331C80D5294CF71000263BE5 /* RunnerTests.xctest */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		33CC11242044D66E0003C045 /* Resources */ = {
			isa = PBXGroup;
			children = (
				33CC10F22044A3C60003C045 /* Assets.xcassets */,
				33CC10F42044A3C60003C045 /* MainMenu.xib */,
				33CC10F72044A3C60003C045 /* Info.plist */,
			);
			name = Resources;
			path = ..;
			sourceTree = "<group>";
		};
		33CEB47122A05771004F2AC0 /* Flutter */ = {
			isa = PBXGroup;
			children = (
				335BBD1A22A9A15E00E9071D /* GeneratedPluginRegistrant.swift */,
				33CEB47222A05771004F2AC0 /* Flutter-Debug.xcconfig */,
				33CEB47422A05771004F2AC0 /* Flutter-Release.xcconfig */,
				33CEB47722A0578A004F2AC0 /* Flutter-Generated.xcconfig */,
			);
			path = Flutter;
			sourceTree = "<group>";
		};
		33FAB671232836740065AC1E /* Runner */ = {
			isa = PBXGroup;
			children = (
				33CC10F02044A3C60003C045 /* AppDelegate.swift */,
				33CC11122044BFA00003C045 /* MainFlutterWindow.swift */,
				33E51913231747F40026EE4D /* DebugProfile.entitlements */,
				33E51914231749380026EE4D /* Release.entitlements */,
				33CC11242044D66E0003C045 /* Resources */,
				33BA886A226E78AF003329D5 /* Configs */,
			);
			path = Runner;
			sourceTree = "<group>";
		};
		D73912EC22F37F3D000D13A0 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		331C80D4294CF70F00263BE5 /* RunnerTests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 331C80DE294CF71000263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */;
			buildPhases = (
				331C80D1294CF70F00263BE5 /* Sources */,
				331C80D2294CF70F00263BE5 /* Frameworks */,
				331C80D3294CF70F00263BE5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				331C80DA294CF71000263BE5 /* PBXTargetDependency */,
			);
			name = RunnerTests;
			productName = RunnerTests;
			productReference = 331C80D5294CF71000263BE5 /* RunnerTests.xctest */;
			productType = "com.apple.product-type.bundle.unit-test";
		};
		33CC10EC2044A3C60003C045 /* Runner */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 33CC10FB2044A3C60003C045 /* Build configuration list for PBXNativeTarget "Runner" */;
			buildPhases = (
				33CC10E92044A3C60003C045 /* Sources */,
				33CC10EA2044A3C60003C045 /* Frameworks */,
				33CC10EB2044A3C60003C045 /* Resources */,
				33CC110E2044A8840003C045 /* Bundle Framework */,
				3399D490228B24CF009A79C7 /* ShellScript */,
			);
			buildRules = (
			);
			dependencies = (
				33CC11202044C79F0003C045 /* PBXTargetDependency */,
			);
			name = Runner;
			productName = Runner;
			productReference = 33CC10ED2044A3C60003C045 /* tts.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		33CC10E52044A3C60003C045 /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = YES;
				LastSwiftUpdateCheck = 0920;
				LastUpgradeCheck = 1510;
				ORGANIZATIONNAME = "";
				TargetAttributes = {
					331C80D4294CF70F00263BE5 = {
						CreatedOnToolsVersion = 14.0;
						TestTargetID = 33CC10EC2044A3C60003C045;
					};
					33CC10EC2044A3C60003C045 = {
						CreatedOnToolsVersion = 9.2;
						LastSwiftMigration = 1100;
						ProvisioningStyle = Automatic;
						SystemCapabilities = {
							com.apple.Sandbox = {
								enabled = 1;
							};
						};
					};
					33CC111A2044C6BA0003C045 = {
						CreatedOnToolsVersion = 9.2;
						ProvisioningStyle = Manual;
					};
				};
			};
			buildConfigurationList = 33CC10E82044A3C60003C045 /* Build configuration list for PBXProject "Runner" */;
			compatibilityVersion = "Xcode 9.3";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = 33CC10E42044A3C60003C045;
			productRefGroup = 33CC10EE2044A3C60003C045 /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				33CC10EC2044A3C60003C045 /* Runner */,
				331C80D4294CF70F00263BE5 /* RunnerTests */,
				33CC111A2044C6BA0003C045 /* Flutter Assemble */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		331C80D3294CF70F00263BE5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10EB2044A3C60003C045 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				33CC10F32044A3C60003C045 /* Assets.xcassets in Resources */,
				33CC10F62044A3C60003C045 /* MainMenu.xib in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXShellScriptBuildPhase section */
		3399D490228B24CF009A79C7 /* ShellScript */ = {
			isa = PBXShellScriptBuildPhase;
			alwaysOutOfDate = 1;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
			);
			inputPaths = (
			);
			outputFileListPaths = (
			);
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "echo \"$PRODUCT_NAME.app\" > \"$PROJECT_DIR\"/Flutter/ephemeral/.app_filename && \"$FLUTTER_ROOT\"/packages/flutter_tools/bin/macos_assemble.sh embed\n";
		};
		33CC111E2044C6BF0003C045 /* ShellScript */ = {
			isa = PBXShellScriptBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			inputFileListPaths = (
				Flutter/ephemeral/FlutterInputs.xcfilelist,
			);
			inputPaths = (
				Flutter/ephemeral/tripwire,
			);
			outputFileListPaths = (
				Flutter/ephemeral/FlutterOutputs.xcfilelist,
			);
			outputPaths = (
			);
			runOnlyForDeploymentPostprocessing = 0;
			shellPath = /bin/sh;
			shellScript = "\"$FLUTTER_ROOT\"/packages/flutter_tools/bin/macos_assemble.sh && touch Flutter/ephemeral/tripwire";
		};
/* End PBXShellScriptBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		331C80D1294CF70F00263BE5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				331C80D8294CF71000263BE5 /* RunnerTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		33CC10E92044A3C60003C045 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				33CC11132044BFA00003C045 /* MainFlutterWindow.swift in Sources */,
				33CC10F12044A3C60003C045 /* AppDelegate.swift in Sources */,
				335BBD1B22A9A15E00E9071D /* GeneratedPluginRegistrant.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
		331C80DA294CF71000263BE5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = 33CC10EC2044A3C60003C045 /* Runner */;
			targetProxy = 331C80D9294CF71000263BE5 /* PBXContainerItemProxy */;
		};
		33CC11202044C79F0003C045 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = 33CC111A2044C6BA0003C045 /* Flutter Assemble */;
			targetProxy = 33CC111F2044C79F0003C045 /* PBXContainerItemProxy */;
		};
/* End PBXTargetDependency section */

/* Begin PBXVariantGroup section */
		33CC10F42044A3C60003C045 /* MainMenu.xib */ = {
			isa = PBXVariantGroup;
			children = (
				33CC10F52044A3C60003C045 /* Base */,
			);
			name = MainMenu.xib;
			path = Runner;
			sourceTree = "<group>";
		};
/* End PBXVariantGroup section */

/* Begin XCBuildConfiguration section */
		331C80DB294CF71000263BE5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/tts.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/tts";
			};
			name = Debug;
		};
		331C80DC294CF71000263BE5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/tts.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/tts";
			};
			name = Release;
		};
		331C80DD294CF71000263BE5 /* Profile */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				BUNDLE_LOADER = "$(TEST_HOST)";
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = com.k2fsa.sherpa.onnx.tts.RunnerTests;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_VERSION = 5.0;
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/tts.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/tts";
			};
			name = Profile;
		};
		338D0CE9231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.14;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = macosx;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
			};
			name = Profile;
		};
		338D0CEA231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/DebugProfile.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_VERSION = 5.0;
			};
			name = Profile;
		};
		338D0CEB231458BD00FA5F75 /* Profile */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Manual;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Profile;
		};
		33CC10F92044A3C60003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 9740EEB21CF90195004384FC /* Debug.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.14;
				MTL_ENABLE_DEBUG_INFO = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = macosx;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		33CC10FA2044A3C60003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 7AFA3C8E1D35360C0083082E /* Release.xcconfig */;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CODE_SIGN_IDENTITY = "-";
				COPY_PHASE_STRIP = NO;
				DEAD_CODE_STRIPPING = YES;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = NO;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.14;
				MTL_ENABLE_DEBUG_INFO = NO;
				SDKROOT = macosx;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
			};
			name = Release;
		};
		33CC10FC2044A3C60003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/DebugProfile.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
				SWIFT_VERSION = 5.0;
			};
			name = Debug;
		};
		33CC10FD2044A3C60003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			baseConfigurationReference = 33E5194F232828860026EE4D /* AppInfo.xcconfig */;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				CLANG_ENABLE_MODULES = YES;
				CODE_SIGN_ENTITLEMENTS = Runner/Release.entitlements;
				CODE_SIGN_STYLE = Automatic;
				COMBINE_HIDPI_IMAGES = YES;
				INFOPLIST_FILE = Runner/Info.plist;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/../Frameworks",
				);
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_VERSION = 5.0;
			};
			name = Release;
		};
		33CC111C2044C6BA0003C045 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Manual;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Debug;
		};
		33CC111D2044C6BA0003C045 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				CODE_SIGN_STYLE = Automatic;
				PRODUCT_NAME = "$(TARGET_NAME)";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		331C80DE294CF71000263BE5 /* Build configuration list for PBXNativeTarget "RunnerTests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				331C80DB294CF71000263BE5 /* Debug */,
				331C80DC294CF71000263BE5 /* Release */,
				331C80DD294CF71000263BE5 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC10E82044A3C60003C045 /* Build configuration list for PBXProject "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC10F92044A3C60003C045 /* Debug */,
				33CC10FA2044A3C60003C045 /* Release */,
				338D0CE9231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC10FB2044A3C60003C045 /* Build configuration list for PBXNativeTarget "Runner" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC10FC2044A3C60003C045 /* Debug */,
				33CC10FD2044A3C60003C045 /* Release */,
				338D0CEA231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		33CC111B2044C6BA0003C045 /* Build configuration list for PBXAggregateTarget "Flutter Assemble" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				33CC111C2044C6BA0003C045 /* Debug */,
				33CC111D2044C6BA0003C045 /* Release */,
				338D0CEB231458BD00FA5F75 /* Profile */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 33CC10E52044A3C60003C045 /* Project object */;
}


================================================
FILE: flutter-examples/tts/macos/Runner.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/macos/Runner.xcodeproj/xcshareddata/xcschemes/Runner.xcscheme
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "1510"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
      buildImplicitDependencies = "YES">
      <BuildActionEntries>
         <BuildActionEntry
            buildForTesting = "YES"
            buildForRunning = "YES"
            buildForProfiling = "YES"
            buildForArchiving = "YES"
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "33CC10EC2044A3C60003C045"
               BuildableName = "tts.app"
               BlueprintName = "Runner"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
   </BuildAction>
   <TestAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      shouldUseLaunchSchemeArgsEnv = "YES">
      <MacroExpansion>
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "tts.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </MacroExpansion>
      <Testables>
         <TestableReference
            skipped = "NO"
            parallelizable = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "331C80D4294CF70F00263BE5"
               BuildableName = "RunnerTests.xctest"
               BlueprintName = "RunnerTests"
               ReferencedContainer = "container:Runner.xcodeproj">
            </BuildableReference>
         </TestableReference>
      </Testables>
   </TestAction>
   <LaunchAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      launchStyle = "0"
      useCustomWorkingDirectory = "NO"
      ignoresPersistentStateOnLaunch = "NO"
      debugDocumentVersioning = "YES"
      debugServiceExtension = "internal"
      allowLocationSimulation = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "tts.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </LaunchAction>
   <ProfileAction
      buildConfiguration = "Profile"
      shouldUseLaunchSchemeArgsEnv = "YES"
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      debugDocumentVersioning = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "33CC10EC2044A3C60003C045"
            BuildableName = "tts.app"
            BlueprintName = "Runner"
            ReferencedContainer = "container:Runner.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
   </AnalyzeAction>
   <ArchiveAction
      buildConfiguration = "Release"
      revealArchiveInOrganizer = "YES">
   </ArchiveAction>
</Scheme>


================================================
FILE: flutter-examples/tts/macos/Runner.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "group:Runner.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: flutter-examples/tts/macos/Runner.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: flutter-examples/tts/macos/RunnerTests/RunnerTests.swift
================================================
import Cocoa
import FlutterMacOS
import XCTest

class RunnerTests: XCTestCase {

  func testExample() {
    // If you add code to the Runner application, consider adding tests here.
    // See https://developer.apple.com/documentation/xctest for more information about using XCTest.
  }

}


================================================
FILE: flutter-examples/tts/pubspec.yaml
================================================
name: tts
description: >
  This example shows how to implement text to speech, i.e., speech synthesis,
  using sherpa-onnx.

publish_to: 'none' # Remove this line if you wish to publish to pub.dev

version: 1.12.31

environment:
  sdk: ">=2.17.0 <4.0.0"
  flutter: ">=2.8.1"

dependencies:
  flutter:
    sdk: flutter

  cupertino_icons: ^1.0.6
  path_provider: ^2.1.3
  path: ^1.9.0
  sherpa_onnx: ^1.12.31
  # sherpa_onnx:
  #   path: ../../flutter/sherpa_onnx
  url_launcher: 6.2.6
  url_launcher_linux: 3.1.0
  audioplayers: ^5.0.0
  media_kit: 
  media_kit_libs_video: 

flutter:
  uses-material-design: true

  assets:
    - assets/vits-melo-tts-zh_en/
    - assets/vits-melo-tts-zh_en/dict/

================================================
FILE: flutter-examples/tts/test/widget_test.dart
================================================
// This is a basic Flutter widget test.
//
// To perform an interaction with a widget in your test, use the WidgetTester
// utility in the flutter_test package. For example, you can send tap and scroll
// gestures. You can also use WidgetTester to find child widgets in the widget
// tree, read text, and verify that the values of widget properties are correct.

import 'package:flutter/material.dart';
import 'package:flutter_test/flutter_test.dart';

import 'package:tts/main.dart';

void main() {
  testWidgets('Counter increments smoke test', (WidgetTester tester) async {
    // Build our app and trigger a frame.
    await tester.pumpWidget(const MyApp());

    // Verify that our counter starts at 0.
    expect(find.text('0'), findsOneWidget);
    expect(find.text('1'), findsNothing);

    // Tap the '+' icon and trigger a frame.
    await tester.tap(find.byIcon(Icons.add));
    await tester.pump();

    // Verify that our counter has incremented.
    expect(find.text('0'), findsNothing);
    expect(find.text('1'), findsOneWidget);
  });
}


================================================
FILE: flutter-examples/tts/windows/.gitignore
================================================
flutter/ephemeral/

# Visual Studio user-specific files.
*.suo
*.user
*.userosscache
*.sln.docstates

# Visual Studio build-related files.
x64/
x86/

# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/


================================================
FILE: flutter-examples/tts/windows/CMakeLists.txt
================================================
# Project-level configuration.
cmake_minimum_required(VERSION 3.14)
project(tts LANGUAGES CXX)

# The name of the executable created for the application. Change this to change
# the on-disk name of your application.
set(BINARY_NAME "tts")

# Explicitly opt in to modern CMake behaviors to avoid warnings with recent
# versions of CMake.
cmake_policy(VERSION 3.14...3.25)

# Define build configuration option.
get_property(IS_MULTICONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(IS_MULTICONFIG)
  set(CMAKE_CONFIGURATION_TYPES "Debug;Profile;Release"
    CACHE STRING "" FORCE)
else()
  if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
    set(CMAKE_BUILD_TYPE "Debug" CACHE
      STRING "Flutter build mode" FORCE)
    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
      "Debug" "Profile" "Release")
  endif()
endif()
# Define settings for the Profile build mode.
set(CMAKE_EXE_LINKER_FLAGS_PROFILE "${CMAKE_EXE_LINKER_FLAGS_RELEASE}")
set(CMAKE_SHARED_LINKER_FLAGS_PROFILE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE}")
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE}")

# Use Unicode for all projects.
add_definitions(-DUNICODE -D_UNICODE)

# Compilation settings that should be applied to most targets.
#
# Be cautious about adding new options here, as plugins use this function by
# default. In most cases, you should add new options to specific targets instead
# of modifying this function.
function(APPLY_STANDARD_SETTINGS TARGET)
  target_compile_features(${TARGET} PUBLIC cxx_std_17)
  target_compile_options(${TARGET} PRIVATE /W4 /WX /wd"4100")
  target_compile_options(${TARGET} PRIVATE /EHsc)
  target_compile_definitions(${TARGET} PRIVATE "_HAS_EXCEPTIONS=0")
  target_compile_definitions(${TARGET} PRIVATE "$<$<CONFIG:Debug>:_DEBUG>")
endfunction()

# Flutter library and tool build rules.
set(FLUTTER_MANAGED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/flutter")
add_subdirectory(${FLUTTER_MANAGED_DIR})

# Application build; see runner/CMakeLists.txt.
add_subdirectory("runner")


# Generated plugin build rules, which manage building the plugins and adding
# them to the application.
include(flutter/generated_plugins.cmake)


# === Installation ===
# Support files are copied into place next to the executable, so that it can
# run in place. This is done instead of making a separate bundle (as on Linux)
# so that building and running from within Visual Studio will work.
set(BUILD_BUNDLE_DIR "$<TARGET_FILE_DIR:${BINARY_NAME}>")
# Make the "install" step default, as it's required to run.
set(CMAKE_VS_INCLUDE_INSTALL_TO_DEFAULT_BUILD 1)
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
  set(CMAKE_INSTALL_PREFIX "${BUILD_BUNDLE_DIR}" CACHE PATH "..." FORCE)
endif()

set(INSTALL_BUNDLE_DATA_DIR "${CMAKE_INSTALL_PREFIX}/data")
set(INSTALL_BUNDLE_LIB_DIR "${CMAKE_INSTALL_PREFIX}")

install(TARGETS ${BINARY_NAME} RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}"
  COMPONENT Runtime)

install(FILES "${FLUTTER_ICU_DATA_FILE}" DESTINATION "${INSTALL_BUNDLE_DATA_DIR}"
  COMPONENT Runtime)

install(FILES "${FLUTTER_LIBRARY}" DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
  COMPONENT Runtime)

if(PLUGIN_BUNDLED_LIBRARIES)
  install(FILES "${PLUGIN_BUNDLED_LIBRARIES}"
    DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
    COMPONENT Runtime)
endif()

# Copy the native assets provided by the build.dart from all packages.
set(NATIVE_ASSETS_DIR "${PROJECT_BUILD_DIR}native_assets/windows/")
install(DIRECTORY "${NATIVE_ASSETS_DIR}"
   DESTINATION "${INSTALL_BUNDLE_LIB_DIR}"
   COMPONENT Runtime)

# Fully re-copy the assets directory on each build to avoid having stale files
# from a previous install.
set(FLUTTER_ASSET_DIR_NAME "flutter_assets")
install(CODE "
  file(REMOVE_RECURSE \"${INSTALL_BUNDLE_DATA_DIR}/${FLUTTER_ASSET_DIR_NAME}\")
  " COMPONENT Runtime)
install(DIRECTORY "${PROJECT_BUILD_DIR}/${FLUTTER_ASSET_DIR_NAME}"
  DESTINATION "${INSTALL_BUNDLE_DATA_DIR}" COMPONENT Runtime)

# Install the AOT library on non-Debug builds only.
install(FILES "${AOT_LIBRARY}" DESTINATION "${INSTALL_BUNDLE_DATA_DIR}"
  CONFIGURATIONS Profile;Release
  COMPONENT Runtime)


================================================
FILE: flutter-examples/tts/windows/flutter/CMakeLists.txt
================================================
# This file controls Flutter-level build steps. It should not be edited.
cmake_minimum_required(VERSION 3.14)

set(EPHEMERAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ephemeral")

# Configuration provided via flutter tool.
include(${EPHEMERAL_DIR}/generated_config.cmake)

# TODO: Move the rest of this into files in ephemeral. See
# https://github.com/flutter/flutter/issues/57146.
set(WRAPPER_ROOT "${EPHEMERAL_DIR}/cpp_client_wrapper")

# Set fallback configurations for older versions of the flutter tool.
if (NOT DEFINED FLUTTER_TARGET_PLATFORM)
  set(FLUTTER_TARGET_PLATFORM "windows-x64")
endif()

# === Flutter Library ===
set(FLUTTER_LIBRARY "${EPHEMERAL_DIR}/flutter_windows.dll")

# Published to parent scope for install step.
set(FLUTTER_LIBRARY ${FLUTTER_LIBRARY} PARENT_SCOPE)
set(FLUTTER_ICU_DATA_FILE "${EPHEMERAL_DIR}/icudtl.dat" PARENT_SCOPE)
set(PROJECT_BUILD_DIR "${PROJECT_DIR}/build/" PARENT_SCOPE)
set(AOT_LIBRARY "${PROJECT_DIR}/build/windows/app.so" PARENT_SCOPE)

list(APPEND FLUTTER_LIBRARY_HEADERS
  "flutter_export.h"
  "flutter_windows.h"
  "flutter_messenger.h"
  "flutter_plugin_registrar.h"
  "flutter_texture_registrar.h"
)
list(TRANSFORM FLUTTER_LIBRARY_HEADERS PREPEND "${EPHEMERAL_DIR}/")
add_library(flutter INTERFACE)
target_include_directories(flutter INTERFACE
  "${EPHEMERAL_DIR}"
)
target_link_libraries(flutter INTERFACE "${FLUTTER_LIBRARY}.lib")
add_dependencies(flutter flutter_assemble)

# === Wrapper ===
list(APPEND CPP_WRAPPER_SOURCES_CORE
  "core_implementations.cc"
  "standard_codec.cc"
)
list(TRANSFORM CPP_WRAPPER_SOURCES_CORE PREPEND "${WRAPPER_ROOT}/")
list(APPEND CPP_WRAPPER_SOURCES_PLUGIN
  "plugin_registrar.cc"
)
list(TRANSFORM CPP_WRAPPER_SOURCES_PLUGIN PREPEND "${WRAPPER_ROOT}/")
list(APPEND CPP_WRAPPER_SOURCES_APP
  "flutter_engine.cc"
  "flutter_view_controller.cc"
)
list(TRANSFORM CPP_WRAPPER_SOURCES_APP PREPEND "${WRAPPER_ROOT}/")

# Wrapper sources needed for a plugin.
add_library(flutter_wrapper_plugin STATIC
  ${CPP_WRAPPER_SOURCES_CORE}
  ${CPP_WRAPPER_SOURCES_PLUGIN}
)
apply_standard_settings(flutter_wrapper_plugin)
set_target_properties(flutter_wrapper_plugin PROPERTIES
  POSITION_INDEPENDENT_CODE ON)
set_target_properties(flutter_wrapper_plugin PROPERTIES
  CXX_VISIBILITY_PRESET hidden)
target_link_libraries(flutter_wrapper_plugin PUBLIC flutter)
target_include_directories(flutter_wrapper_plugin PUBLIC
  "${WRAPPER_ROOT}/include"
)
add_dependencies(flutter_wrapper_plugin flutter_assemble)

# Wrapper sources needed for the runner.
add_library(flutter_wrapper_app STATIC
  ${CPP_WRAPPER_SOURCES_CORE}
  ${CPP_WRAPPER_SOURCES_APP}
)
apply_standard_settings(flutter_wrapper_app)
target_link_libraries(flutter_wrapper_app PUBLIC flutter)
target_include_directories(flutter_wrapper_app PUBLIC
  "${WRAPPER_ROOT}/include"
)
add_dependencies(flutter_wrapper_app flutter_assemble)

# === Flutter tool backend ===
# _phony_ is a non-existent file to force this command to run every time,
# since currently there's no way to get a full input/output list from the
# flutter tool.
set(PHONY_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/_phony_")
set_source_files_properties("${PHONY_OUTPUT}" PROPERTIES SYMBOLIC TRUE)
add_custom_command(
  OUTPUT ${FLUTTER_LIBRARY} ${FLUTTER_LIBRARY_HEADERS}
    ${CPP_WRAPPER_SOURCES_CORE} ${CPP_WRAPPER_SOURCES_PLUGIN}
    ${CPP_WRAPPER_SOURCES_APP}
    ${PHONY_OUTPUT}
  COMMAND ${CMAKE_COMMAND} -E env
    ${FLUTTER_TOOL_ENVIRONMENT}
    "${FLUTTER_ROOT}/packages/flutter_tools/bin/tool_backend.bat"
      ${FLUTTER_TARGET_PLATFORM} $<CONFIG>
  VERBATIM
)
add_custom_target(flutter_assemble DEPENDS
  "${FLUTTER_LIBRARY}"
  ${FLUTTER_LIBRARY_HEADERS}
  ${CPP_WRAPPER_SOURCES_CORE}
  ${CPP_WRAPPER_SOURCES_PLUGIN}
  ${CPP_WRAPPER_SOURCES_APP}
)


================================================
FILE: flutter-examples/tts/windows/runner/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14)
project(runner LANGUAGES CXX)

# Define the application target. To change its name, change BINARY_NAME in the
# top-level CMakeLists.txt, not the value here, or `flutter run` will no longer
# work.
#
# Any new source files that you add to the application should be added here.
add_executable(${BINARY_NAME} WIN32
  "flutter_window.cpp"
  "main.cpp"
  "utils.cpp"
  "win32_window.cpp"
  "${FLUTTER_MANAGED_DIR}/generated_plugin_registrant.cc"
  "Runner.rc"
  "runner.exe.manifest"
)

# Apply the standard set of build settings. This can be removed for applications
# that need different build settings.
apply_standard_settings(${BINARY_NAME})

# Add preprocessor definitions for the build version.
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION=\"${FLUTTER_VERSION}\"")
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION_MAJOR=${FLUTTER_VERSION_MAJOR}")
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION_MINOR=${FLUTTER_VERSION_MINOR}")
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION_PATCH=${FLUTTER_VERSION_PATCH}")
target_compile_definitions(${BINARY_NAME} PRIVATE "FLUTTER_VERSION_BUILD=${FLUTTER_VERSION_BUILD}")

# Disable Windows macros that collide with C++ standard library functions.
target_compile_definitions(${BINARY_NAME} PRIVATE "NOMINMAX")

# Add dependency libraries and include directories. Add any application-specific
# dependencies here.
target_link_libraries(${BINARY_NAME} PRIVATE flutter flutter_wrapper_app)
target_link_libraries(${BINARY_NAME} PRIVATE "dwmapi.lib")
target_include_directories(${BINARY_NAME} PRIVATE "${CMAKE_SOURCE_DIR}")

# Run the Flutter tool portions of the build. This must not be removed.
add_dependencies(${BINARY_NAME} flutter_assemble)


================================================
FILE: flutter-examples/tts/windows/runner/Runner.rc
================================================
// Microsoft Visual C++ generated resource script.
//
#pragma code_page(65001)
#include "resource.h"

#define APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 2 resource.
//
#include "winres.h"

/////////////////////////////////////////////////////////////////////////////
#undef APSTUDIO_READONLY_SYMBOLS

/////////////////////////////////////////////////////////////////////////////
// English (United States) resources

#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US

#ifdef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// TEXTINCLUDE
//

1 TEXTINCLUDE
BEGIN
    "resource.h\0"
END

2 TEXTINCLUDE
BEGIN
    "#include ""winres.h""\r\n"
    "\0"
END

3 TEXTINCLUDE
BEGIN
    "\r\n"
    "\0"
END

#endif    // APSTUDIO_INVOKED


/////////////////////////////////////////////////////////////////////////////
//
// Icon
//

// Icon with lowest ID value placed first to ensure application icon
// remains consistent on all systems.
IDI_APP_ICON            ICON                    "resources\\app_icon.ico"


/////////////////////////////////////////////////////////////////////////////
//
// Version
//

#if defined(FLUTTER_VERSION_MAJOR) && defined(FLUTTER_VERSION_MINOR) && defined(FLUTTER_VERSION_PATCH) && defined(FLUTTER_VERSION_BUILD)
#define VERSION_AS_NUMBER FLUTTER_VERSION_MAJOR,FLUTTER_VERSION_MINOR,FLUTTER_VERSION_PATCH,FLUTTER_VERSION_BUILD
#else
#define VERSION_AS_NUMBER 1,0,0,0
#endif

#if defined(FLUTTER_VERSION)
#define VERSION_AS_STRING FLUTTER_VERSION
#else
#define VERSION_AS_STRING "1.0.0"
#endif

VS_VERSION_INFO VERSIONINFO
 FILEVERSION VERSION_AS_NUMBER
 PRODUCTVERSION VERSION_AS_NUMBER
 FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
#ifdef _DEBUG
 FILEFLAGS VS_FF_DEBUG
#else
 FILEFLAGS 0x0L
#endif
 FILEOS VOS__WINDOWS32
 FILETYPE VFT_APP
 FILESUBTYPE 0x0L
BEGIN
    BLOCK "StringFileInfo"
    BEGIN
        BLOCK "040904e4"
        BEGIN
            VALUE "CompanyName", "com.example" "\0"
            VALUE "FileDescription", "tts" "\0"
            VALUE "FileVersion", VERSION_AS_STRING "\0"
            VALUE "InternalName", "tts" "\0"
            VALUE "LegalCopyright", "Copyright (C) 2024 com.example. All rights reserved." "\0"
            VALUE "OriginalFilename", "tts.exe" "\0"
            VALUE "ProductName", "tts" "\0"
            VALUE "ProductVersion", VERSION_AS_STRING "\0"
        END
    END
    BLOCK "VarFileInfo"
    BEGIN
        VALUE "Translation", 0x409, 1252
    END
END

#endif    // English (United States) resources
/////////////////////////////////////////////////////////////////////////////


#ifndef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 3 resource.
//


/////////////////////////////////////////////////////////////////////////////
#endif    // not APSTUDIO_INVOKED


================================================
FILE: flutter-examples/tts/windows/runner/flutter_window.cpp
================================================
#include "flutter_window.h"

#include <optional>

#include "flutter/generated_plugin_registrant.h"

FlutterWindow::FlutterWindow(const flutter::DartProject& project)
    : project_(project) {}

FlutterWindow::~FlutterWindow() {}

bool FlutterWindow::OnCreate() {
  if (!Win32Window::OnCreate()) {
    return false;
  }

  RECT frame = GetClientArea();

  // The size here must match the window dimensions to avoid unnecessary surface
  // creation / destruction in the startup path.
  flutter_controller_ = std::make_unique<flutter::FlutterViewController>(
      frame.right - frame.left, frame.bottom - frame.top, project_);
  // Ensure that basic setup of the controller was successful.
  if (!flutter_controller_->engine() || !flutter_controller_->view()) {
    return false;
  }
  RegisterPlugins(flutter_controller_->engine());
  SetChildContent(flutter_controller_->view()->GetNativeWindow());

  flutter_controller_->engine()->SetNextFrameCallback([&]() {
    this->Show();
  });

  // Flutter can complete the first frame before the "show window" callback is
  // registered. The following call ensures a frame is pending to ensure the
  // window is shown. It is a no-op if the first frame hasn't completed yet.
  flutter_controller_->ForceRedraw();

  return true;
}

void FlutterWindow::OnDestroy() {
  if (flutter_controller_) {
    flutter_controller_ = nullptr;
  }

  Win32Window::OnDestroy();
}

LRESULT
FlutterWindow::MessageHandler(HWND hwnd, UINT const message,
                              WPARAM const wparam,
                              LPARAM const lparam) noexcept {
  // Give Flutter, including plugins, an opportunity to handle window messages.
  if (flutter_controller_) {
    std::optional<LRESULT> result =
        flutter_controller_->HandleTopLevelWindowProc(hwnd, message, wparam,
                                                      lparam);
    if (result) {
      return *result;
    }
  }

  switch (message) {
    case WM_FONTCHANGE:
      flutter_controller_->engine()->ReloadSystemFonts();
      break;
  }

  return Win32Window::MessageHandler(hwnd, message, wparam, lparam);
}


================================================
FILE: flutter-examples/tts/windows/runner/flutter_window.h
================================================
#ifndef RUNNER_FLUTTER_WINDOW_H_
#define RUNNER_FLUTTER_WINDOW_H_

#include <flutter/dart_project.h>
#include <flutter/flutter_view_controller.h>

#include <memory>

#include "win32_window.h"

// A window that does nothing but host a Flutter view.
class FlutterWindow : public Win32Window {
 public:
  // Creates a new FlutterWindow hosting a Flutter view running |project|.
  explicit FlutterWindow(const flutter::DartProject& project);
  virtual ~FlutterWindow();

 protected:
  // Win32Window:
  bool OnCreate() override;
  void OnDestroy() override;
  LRESULT MessageHandler(HWND window, UINT const message, WPARAM const wparam,
                         LPARAM const lparam) noexcept override;

 private:
  // The project to run.
  flutter::DartProject project_;

  // The Flutter instance hosted by this window.
  std::unique_ptr<flutter::FlutterViewController> flutter_controller_;
};

#endif  // RUNNER_FLUTTER_WINDOW_H_


================================================
FILE: flutter-examples/tts/windows/runner/main.cpp
================================================
#include <flutter/dart_project.h>
#include <flutter/flutter_view_controller.h>
#include <windows.h>

#include "flutter_window.h"
#include "utils.h"

int APIENTRY wWinMain(_In_ HINSTANCE instance, _In_opt_ HINSTANCE prev,
                      _In_ wchar_t *command_line, _In_ int show_command) {
  // Attach to console when present (e.g., 'flutter run') or create a
  // new console when running with a debugger.
  if (!::AttachConsole(ATTACH_PARENT_PROCESS) && ::IsDebuggerPresent()) {
    CreateAndAttachConsole();
  }

  // Initialize COM, so that it is available for use in the library and/or
  // plugins.
  ::CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED);

  flutter::DartProject project(L"data");

  std::vector<std::string> command_line_arguments =
      GetCommandLineArguments();

  project.set_dart_entrypoint_arguments(std::move(command_line_arguments));

  FlutterWindow window(project);
  Win32Window::Point origin(10, 10);
  Win32Window::Size size(1280, 720);
  if (!window.Create(L"tts", origin, size)) {
    return EXIT_FAILURE;
  }
  window.SetQuitOnClose(true);

  ::MSG msg;
  while (::GetMessage(&msg, nullptr, 0, 0)) {
    ::TranslateMessage(&msg);
    ::DispatchMessage(&msg);
  }

  ::CoUninitialize();
  return EXIT_SUCCESS;
}


================================================
FILE: flutter-examples/tts/windows/runner/resource.h
================================================
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by Runner.rc
//
#define IDI_APP_ICON                    101

// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE        102
#define _APS_NEXT_COMMAND_VALUE         40001
#define _APS_NEXT_CONTROL_VALUE         1001
#define _APS_NEXT_SYMED_VALUE           101
#endif
#endif


================================================
FILE: flutter-examples/tts/windows/runner/runner.exe.manifest
================================================
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
  <application xmlns="urn:schemas-microsoft-com:asm.v3">
    <windowsSettings>
      <dpiAwareness xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">PerMonitorV2</dpiAwareness>
    </windowsSettings>
  </application>
  <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
    <application>
      <!-- Windows 10 and Windows 11 -->
      <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
      <!-- Windows 8.1 -->
      <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
      <!-- Windows 8 -->
      <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
      <!-- Windows 7 -->
      <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
    </application>
  </compatibility>
</assembly>


================================================
FILE: flutter-examples/tts/windows/runner/utils.cpp
================================================
#include "utils.h"

#include <flutter_windows.h>
#include <io.h>
#include <stdio.h>
#include <windows.h>

#include <iostream>

void CreateAndAttachConsole() {
  if (::AllocConsole()) {
    FILE *unused;
    if (freopen_s(&unused, "CONOUT$", "w", stdout)) {
      _dup2(_fileno(stdout), 1);
    }
    if (freopen_s(&unused, "CONOUT$", "w", stderr)) {
      _dup2(_fileno(stdout), 2);
    }
    std::ios::sync_with_stdio();
    FlutterDesktopResyncOutputStreams();
  }
}

std::vector<std::string> GetCommandLineArguments() {
  // Convert the UTF-16 command line arguments to UTF-8 for the Engine to use.
  int argc;
  wchar_t** argv = ::CommandLineToArgvW(::GetCommandLineW(), &argc);
  if (argv == nullptr) {
    return std::vector<std::string>();
  }

  std::vector<std::string> command_line_arguments;

  // Skip the first argument as it's the binary name.
  for (int i = 1; i < argc; i++) {
    command_line_arguments.push_back(Utf8FromUtf16(argv[i]));
  }

  ::LocalFree(argv);

  return command_line_arguments;
}

std::string Utf8FromUtf16(const wchar_t* utf16_string) {
  if (utf16_string == nullptr) {
    return std::string();
  }
  unsigned int target_length = ::WideCharToMultiByte(
      CP_UTF8, WC_ERR_INVALID_CHARS, utf16_string,
      -1, nullptr, 0, nullptr, nullptr)
    -1; // remove the trailing null character
  int input_length = (int)wcslen(utf16_string);
  std::string utf8_string;
  if (target_length == 0 || target_length > utf8_string.max_size()) {
    return utf8_string;
  }
  utf8_string.resize(target_length);
  int converted_length = ::WideCharToMultiByte(
      CP_UTF8, WC_ERR_INVALID_CHARS, utf16_string,
      input_length, utf8_string.data(), target_length, nullptr, nullptr);
  if (converted_length == 0) {
    return std::string();
  }
  return utf8_string;
}


================================================
FILE: flutter-examples/tts/windows/runner/utils.h
================================================
#ifndef RUNNER_UTILS_H_
#define RUNNER_UTILS_H_

#include <string>
#include <vector>

// Creates a console for the process, and redirects stdout and stderr to
// it for both the runner and the Flutter library.
void CreateAndAttachConsole();

// Takes a null-terminated wchar_t* encoded in UTF-16 and returns a std::string
// encoded in UTF-8. Returns an empty std::string on failure.
std::string Utf8FromUtf16(const wchar_t* utf16_string);

// Gets the command line arguments passed in as a std::vector<std::string>,
// encoded in UTF-8. Returns an empty std::vector<std::string> on failure.
std::vector<std::string> GetCommandLineArguments();

#endif  // RUNNER_UTILS_H_


================================================
FILE: flutter-examples/tts/windows/runner/win32_window.cpp
================================================
#include "win32_window.h"

#include <dwmapi.h>
#include <flutter_windows.h>

#include "resource.h"

namespace {

/// Window attribute that enables dark mode window decorations.
///
/// Redefined in case the developer's machine has a Windows SDK older than
/// version 10.0.22000.0.
/// See: https://docs.microsoft.com/windows/win32/api/dwmapi/ne-dwmapi-dwmwindowattribute
#ifndef DWMWA_USE_IMMERSIVE_DARK_MODE
#define DWMWA_USE_IMMERSIVE_DARK_MODE 20
#endif

constexpr const wchar_t kWindowClassName[] = L"FLUTTER_RUNNER_WIN32_WINDOW";

/// Registry key for app theme preference.
///
/// A value of 0 indicates apps should use dark mode. A non-zero or missing
/// value indicates apps should use light mode.
constexpr const wchar_t kGetPreferredBrightnessRegKey[] =
  L"Software\\Microsoft\\Windows\\CurrentVersion\\Themes\\Personalize";
constexpr const wchar_t kGetPreferredBrightnessRegValue[] = L"AppsUseLightTheme";

// The number of Win32Window objects that currently exist.
static int g_active_window_count = 0;

using EnableNonClientDpiScaling = BOOL __stdcall(HWND hwnd);

// Scale helper to convert logical scaler values to physical using passed in
// scale factor
int Scale(int source, double scale_factor) {
  return static_cast<int>(source * scale_factor);
}

// Dynamically loads the |EnableNonClientDpiScaling| from the User32 module.
// This API is only needed for PerMonitor V1 awareness mode.
void EnableFullDpiSupportIfAvailable(HWND hwnd) {
  HMODULE user32_module = LoadLibraryA("User32.dll");
  if (!user32_module) {
    return;
  }
  auto enable_non_client_dpi_scaling =
      reinterpret_cast<EnableNonClientDpiScaling*>(
          GetProcAddress(user32_module, "EnableNonClientDpiScaling"));
  if (enable_non_client_dpi_scaling != nullptr) {
    enable_non_client_dpi_scaling(hwnd);
  }
  FreeLibrary(user32_module);
}

}  // namespace

// Manages the Win32Window's window class registration.
class WindowClassRegistrar {
 public:
  ~WindowClassRegistrar() = default;

  // Returns the singleton registrar instance.
  static WindowClassRegistrar* GetInstance() {
    if (!instance_) {
      instance_ = new WindowClassRegistrar();
    }
    return instance_;
  }

  // Returns the name of the window class, registering the class if it hasn't
  // previously been registered.
  const wchar_t* GetWindowClass();

  // Unregisters the window class. Should only be called if there are no
  // instances of the window.
  void UnregisterWindowClass();

 private:
  WindowClassRegistrar() = default;

  static WindowClassRegistrar* instance_;

  bool class_registered_ = false;
};

WindowClassRegistrar* WindowClassRegistrar::instance_ = nullptr;

const wchar_t* WindowClassRegistrar::GetWindowClass() {
  if (!class_registered_) {
    WNDCLASS window_class{};
    window_class.hCursor = LoadCursor(nullptr, IDC_ARROW);
    window_class.lpszClassName = kWindowClassName;
    window_class.style = CS_HREDRAW | CS_VREDRAW;
    window_class.cbClsExtra = 0;
    window_class.cbWndExtra = 0;
    window_class.hInstance = GetModuleHandle(nullptr);
    window_class.hIcon =
        LoadIcon(window_class.hInstance, MAKEINTRESOURCE(IDI_APP_ICON));
    window_class.hbrBackground = 0;
    window_class.lpszMenuName = nullptr;
    window_class.lpfnWndProc = Win32Window::WndProc;
    RegisterClass(&window_class);
    class_registered_ = true;
  }
  return kWindowClassName;
}

void WindowClassRegistrar::UnregisterWindowClass() {
  UnregisterClass(kWindowClassName, nullptr);
  class_registered_ = false;
}

Win32Window::Win32Window() {
  ++g_active_window_count;
}

Win32Window::~Win32Window() {
  --g_active_window_count;
  Destroy();
}

bool Win32Window::Create(const std::wstring& title,
                         const Point& origin,
                         const Size& size) {
  Destroy();

  const wchar_t* window_class =
      WindowClassRegistrar::GetInstance()->GetWindowClass();

  const POINT target_point = {static_cast<LONG>(origin.x),
                              static_cast<LONG>(origin.y)};
  HMONITOR monitor = MonitorFromPoint(target_point, MONITOR_DEFAULTTONEAREST);
  UINT dpi = FlutterDesktopGetDpiForMonitor(monitor);
  double scale_factor = dpi / 96.0;

  HWND window = CreateWindow(
      window_class, title.c_str(), WS_OVERLAPPEDWINDOW,
      Scale(origin.x, scale_factor), Scale(origin.y, scale_factor),
      Scale(size.width, scale_factor), Scale(size.height, scale_factor),
      nullptr, nullptr, GetModuleHandle(nullptr), this);

  if (!window) {
    return false;
  }

  UpdateTheme(window);

  return OnCreate();
}

bool Win32Window::Show() {
  return ShowWindow(window_handle_, SW_SHOWNORMAL);
}

// static
LRESULT CALLBACK Win32Window::WndProc(HWND const window,
                                      UINT const message,
                                      WPARAM const wparam,
                                      LPARAM const lparam) noexcept {
  if (message == WM_NCCREATE) {
    auto window_struct = reinterpret_cast<CREATESTRUCT*>(lparam);
    SetWindowLongPtr(window, GWLP_USERDATA,
                     reinterpret_cast<LONG_PTR>(window_struct->lpCreateParams));

    auto that = static_cast<Win32Window*>(window_struct->lpCreateParams);
    EnableFullDpiSupportIfAvailable(window);
    that->window_handle_ = window;
  } else if (Win32Window* that = GetThisFromHandle(window)) {
    return that->MessageHandler(window, message, wparam, lparam);
  }

  return DefWindowProc(window, message, wparam, lparam);
}

LRESULT
Win32Window::MessageHandler(HWND hwnd,
                            UINT const message,
                            WPARAM const wparam,
                            LPARAM const lparam) noexcept {
  switch (message) {
    case WM_DESTROY:
      window_handle_ = nullptr;
      Destroy();
      if (quit_on_close_) {
        PostQuitMessage(0);
      }
      return 0;

    case WM_DPICHANGED: {
      auto newRectSize = reinterpret_cast<RECT*>(lparam);
      LONG newWidth = newRectSize->right - newRectSize->left;
      LONG newHeight = newRectSize->bottom - newRectSize->top;

      SetWindowPos(hwnd, nullptr, newRectSize->left, newRectSize->top, newWidth,
                   newHeight, SWP_NOZORDER | SWP_NOACTIVATE);

      return 0;
    }
    case WM_SIZE: {
      RECT rect = GetClientArea();
      if (child_content_ != nullptr) {
        // Size and position the child window.
        MoveWindow(child_content_, rect.left, rect.top, rect.right - rect.left,
                   rect.bottom - rect.top, TRUE);
      }
      return 0;
    }

    case WM_ACTIVATE:
      if (child_content_ != nullptr) {
        SetFocus(child_content_);
      }
      return 0;

    case WM_DWMCOLORIZATIONCOLORCHANGED:
      UpdateTheme(hwnd);
      return 0;
  }

  return DefWindowProc(window_handle_, message, wparam, lparam);
}

void Win32Window::Destroy() {
  OnDestroy();

  if (window_handle_) {
    DestroyWindow(window_handle_);
    window_handle_ = nullptr;
  }
  if (g_active_window_count == 0) {
    WindowClassRegistrar::GetInstance()->UnregisterWindowClass();
  }
}

Win32Window* Win32Window::GetThisFromHandle(HWND const window) noexcept {
  return reinterpret_cast<Win32Window*>(
      GetWindowLongPtr(window, GWLP_USERDATA));
}

void Win32Window::SetChildContent(HWND content) {
  child_content_ = content;
  SetParent(content, window_handle_);
  RECT frame = GetClientArea();

  MoveWindow(content, frame.left, frame.top, frame.right - frame.left,
             frame.bottom - frame.top, true);

  SetFocus(child_content_);
}

RECT Win32Window::GetClientArea() {
  RECT frame;
  GetClientRect(window_handle_, &frame);
  return frame;
}

HWND Win32Window::GetHandle() {
  return window_handle_;
}

void Win32Window::SetQuitOnClose(bool quit_on_close) {
  quit_on_close_ = quit_on_close;
}

bool Win32Window::OnCreate() {
  // No-op; provided for subclasses.
  return true;
}

void Win32Window::OnDestroy() {
  // No-op; provided for subclasses.
}

void Win32Window::UpdateTheme(HWND const window) {
  DWORD light_mode;
  DWORD light_mode_size = sizeof(light_mode);
  LSTATUS result = RegGetValue(HKEY_CURRENT_USER, kGetPreferredBrightnessRegKey,
                               kGetPreferredBrightnessRegValue,
                               RRF_RT_REG_DWORD, nullptr, &light_mode,
                               &light_mode_size);

  if (result == ERROR_SUCCESS) {
    BOOL enable_dark_mode = light_mode == 0;
    DwmSetWindowAttribute(window, DWMWA_USE_IMMERSIVE_DARK_MODE,
                          &enable_dark_mode, sizeof(enable_dark_mode));
  }
}


================================================
FILE: flutter-examples/tts/windows/runner/win32_window.h
================================================
#ifndef RUNNER_WIN32_WINDOW_H_
#define RUNNER_WIN32_WINDOW_H_

#include <windows.h>

#include <functional>
#include <memory>
#include <string>

// A class abstraction for a high DPI-aware Win32 Window. Intended to be
// inherited from by classes that wish to specialize with custom
// rendering and input handling
class Win32Window {
 public:
  struct Point {
    unsigned int x;
    unsigned int y;
    Point(unsigned int x, unsigned int y) : x(x), y(y) {}
  };

  struct Size {
    unsigned int width;
    unsigned int height;
    Size(unsigned int width, unsigned int height)
        : width(width), height(height) {}
  };

  Win32Window();
  virtual ~Win32Window();

  // Creates a win32 window with |title| that is positioned and sized using
  // |origin| and |size|. New windows are created on the default monitor. Window
  // sizes are specified to the OS in physical pixels, hence to ensure a
  // consistent size this function will scale the inputted width and height as
  // as appropriate for the default monitor. The window is invisible until
  // |Show| is called. Returns true if the window was created successfully.
  bool Create(const std::wstring& title, const Point& origin, const Size& size);

  // Show the current window. Returns true if the window was successfully shown.
  bool Show();

  // Release OS resources associated with window.
  void Destroy();

  // Inserts |content| into the window tree.
  void SetChildContent(HWND content);

  // Returns the backing Window handle to enable clients to set icon and other
  // window properties. Returns nullptr if the window has been destroyed.
  HWND GetHandle();

  // If true, closing this window will quit the application.
  void SetQuitOnClose(bool quit_on_close);

  // Return a RECT representing the bounds of the current client area.
  RECT GetClientArea();

 protected:
  // Processes and route salient window messages for mouse handling,
  // size change and DPI. Delegates handling of these to member overloads that
  // inheriting classes can handle.
  virtual LRESULT MessageHandler(HWND window,
                                 UINT const message,
                                 WPARAM const wparam,
                                 LPARAM const lparam) noexcept;

  // Called when CreateAndShow is called, allowing subclass window-related
  // setup. Subclasses should return false if setup fails.
  virtual bool OnCreate();

  // Called when Destroy is called.
  virtual void OnDestroy();

 private:
  friend class WindowClassRegistrar;

  // OS callback called by message pump. Handles the WM_NCCREATE message which
  // is passed when the non-client area is being created and enables automatic
  // non-client DPI scaling so that the non-client area automatically
  // responds to changes in DPI. All other messages are handled by
  // MessageHandler.
  static LRESULT CALLBACK WndProc(HWND const window,
                                  UINT const message,
                                  WPARAM const wparam,
                                  LPARAM const lparam) noexcept;

  // Retrieves a class instance pointer for |window|
  static Win32Window* GetThisFromHandle(HWND const window) noexcept;

  // Update the window frame's theme to match the system theme.
  static void UpdateTheme(HWND const window);

  bool quit_on_close_ = false;

  // window handle for top level window.
  HWND window_handle_ = nullptr;

  // window handle for hosted content.
  HWND child_content_ = nullptr;
};

#endif  // RUNNER_WIN32_WINDOW_H_


================================================
FILE: go-api-examples/.gitignore
================================================
!*.sh


================================================
FILE: go-api-examples/README.md
================================================
# Introduction

This folder contains Go API examples for [sherpa-onnx][sherpa-onnx].

Please refer to the documentation
https://k2-fsa.github.io/sherpa/onnx/go-api/index.html
for details.

- [./add-punctuation](./add-punctuation) It shows how to use
  a punctuation model to add punctuations to text

- [./add-punctuation-online](./add-punctuation-online) It shows how to use
  an online punctuation model to add punctuations and casing to text

- [./non-streaming-decode-files](./non-streaming-decode-files) It shows how to use
  a non-streaming ASR model to decode files

- [./non-streaming-speaker-diarization](./non-streaming-speaker-diarization) It shows how to use
  a speaker segmentation model and a speaker embedding model for speaker diarization.

- [./speech-enhancement-gtcrn](./speech-enhancement-gtcrn) It shows how to use
  the offline speech denoiser API with GTCRN models.

- [./speech-enhancement-dpdfnet](./speech-enhancement-dpdfnet) It shows how to use
  the offline speech denoiser API with DPDFNet models.

- [./streaming-speech-enhancement-gtcrn](./streaming-speech-enhancement-gtcrn) It shows how to use
  the online speech denoiser API with GTCRN models.

- [./streaming-speech-enhancement-dpdfnet](./streaming-speech-enhancement-dpdfnet) It shows how to use
  the online speech denoiser API with DPDFNet models.

- [./non-streaming-tts](./non-streaming-tts) It shows how to use a non-streaming TTS
  model to convert text to speech

- [./offline-tts-play](./offline-tts-play) It shows how to use a non-streaming TTS
  model to convert text to speech. It plays the audio back as it is being generated.

- [./zero-shot-pocket-tts](./zero-shot-pocket-tts) It shows how to use a PocketTTS
  model for zero-shot TTS.

- [./zero-shot-pocket-tts-play](./zero-shot-pocket-tts-play) It shows how to use a PocketTTS
  model for zero-shot TTS. It plays the audio back as it is being generated.

- [./zero-shot-zipvoice-tts](./zero-shot-zipvoice-tts) It shows how to use a ZipVoice
  model for zero-shot TTS with the GenerationConfig API.

- [./zero-shot-zipvoice-tts-play](./zero-shot-zipvoice-tts-play) It shows how to use a
  ZipVoice model for zero-shot TTS. It plays the audio back as it is being generated.

- [./real-time-speech-recognition-from-microphone](./real-time-speech-recognition-from-microphone)
  It shows how to use a streaming ASR model to recognize speech from a microphone in real-time

- [./speaker-identification](./speaker-identification) It shows how to use a speaker
  embedding model for speaker identification.

- [./streaming-decode-files](./streaming-decode-files) It shows how to use a streaming
  model for streaming speech recognition

- [./streaming-hlg-decoding](./streaming-hlg-decoding) It shows how to use a streaming
  model for streaming speech recognition with HLG decoding

- [./vad](./vad) It shows how to use silero VAD with Golang.

- [./vad-asr-paraformer](./vad-asr-paraformer) It shows how to use silero VAD + Paraformer
  for speech recognition.

- [./vad-asr-whisper](./vad-asr-whisper) It shows how to use silero VAD + Whisper

- [./vad-speaker-identification](./vad-speaker-identification) It shows how to use Go API for VAD + speaker identification.
  for speech recognition.

- [./vad-spoken-language-identification](./vad-spoken-language-identification) It shows how to use silero VAD + Whisper
  for spoken language identification.

[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx


================================================
FILE: go-api-examples/add-punctuation/go.mod
================================================
module add-punctuation

go 1.17


================================================
FILE: go-api-examples/add-punctuation/main.go
================================================
package main

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflinePunctuationConfig{}
	config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"
	config.Model.NumThreads = 1
	config.Model.Provider = "cpu"

	punct := sherpa.NewOfflinePunctuation(&config)
	defer sherpa.DeleteOfflinePunc(punct)

	textArray := []string{
		"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
		"我们都是木头人不会说话不会动",
		"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
	}
	log.Println("----------")
	for _, text := range textArray {
		newText := punct.AddPunct(text)
		log.Printf("Input text: %v", text)
		log.Printf("Output text: %v", newText)
		log.Println("----------")
	}
}


================================================
FILE: go-api-examples/add-punctuation/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -d ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
fi

go mod tidy
go build

./add-punctuation


================================================
FILE: go-api-examples/add-punctuation-online/go.mod
================================================
module add-punctuation-online

go 1.17


================================================
FILE: go-api-examples/add-punctuation-online/main.go
================================================
package main

import (
	"log"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OnlinePunctuationConfig{}
	config.Model.CnnBilstm = "./sherpa-onnx-online-punct-en-2024-08-06/model.onnx"
	config.Model.BpeVocab = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab"
	config.Model.NumThreads = 1
	config.Model.Provider = "cpu"

	punct := sherpa.NewOnlinePunctuation(&config)
	if punct == nil {
		log.Fatal("Failed to create OnlinePunctuation")
	}
	defer sherpa.DeleteOnlinePunctuation(punct)

	textArray := []string{
		"how are you i am fine thank you",
		"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
	}

	log.Println("----------")
	for _, text := range textArray {
		newText := punct.AddPunct(text)
		log.Printf("Input text: %v", text)
		log.Printf("Output text: %v", newText)
		log.Println("----------")
	}
}


================================================
FILE: go-api-examples/add-punctuation-online/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -d ./sherpa-onnx-online-punct-en-2024-08-06 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
fi

go mod tidy
go build

./add-punctuation-online


================================================
FILE: go-api-examples/audio-tagging/go.mod
================================================
module audio-tagging

go 1.17


================================================
FILE: go-api-examples/audio-tagging/main.go
================================================
package main

import (
	"fmt"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func main() {
	config := sherpa.AudioTaggingConfig{}
	config.Model.Zipformer.Model = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx"
	config.Model.NumThreads = 1
	config.Model.Debug = 1
	config.Model.Provider = "cpu"
	config.Labels = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv"
	config.TopK = 5

	tagging := sherpa.NewAudioTagging(&config)
	defer sherpa.DeleteAudioTagging(tagging)

	wave_filename := "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav"

	wave := sherpa.ReadWave(wave_filename)
	if wave == nil {
		log.Printf("Failed to read %v\n", wave_filename)
		return
	}

	stream := sherpa.NewAudioTaggingStream(tagging)
	defer sherpa.DeleteOfflineStream(stream)

	stream.AcceptWaveform(wave.SampleRate, wave.Samples)

	result := tagging.Compute(stream, 10)
	fmt.Printf("the tagging result: %v\n", result)
}


================================================
FILE: go-api-examples/audio-tagging/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2

  tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
fi

go mod tidy
go build

./audio-tagging


================================================
FILE: go-api-examples/keyword-spotting-from-file/go.mod
================================================
module keyword-spotting-from-file

go 1.17


================================================
FILE: go-api-examples/keyword-spotting-from-file/main.go
================================================
package main

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.KeywordSpotterConfig{}

	// Please download the models from
	// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models

	config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
	config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
	config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"
	config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"
	config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"
	config.ModelConfig.NumThreads = 1
	config.ModelConfig.Debug = 1

	spotter := sherpa.NewKeywordSpotter(&config)
	defer sherpa.DeleteKeywordSpotter(spotter)

	wave_filename := "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"

	wave := sherpa.ReadWave(wave_filename)
	if wave == nil {
		log.Printf("Failed to read %v\n", wave_filename)
		return
	}

	log.Println("----------Use pre-defined keywords----------")

	stream := sherpa.NewKeywordStream(spotter)
	defer sherpa.DeleteOnlineStream(stream)

	stream.AcceptWaveform(wave.SampleRate, wave.Samples)

	for spotter.IsReady(stream) {
		spotter.Decode(stream)
		result := spotter.GetResult(stream)
		if result.Keyword != "" {
			// You have to reset the stream right after detecting a keyword
			spotter.Reset(stream)
			log.Printf("Detected %v\n", result.Keyword)
		}
	}

	log.Println("----------Use pre-defined keywords + add a new keyword----------")

	stream2 := sherpa.NewKeywordStreamWithKeywords(spotter, "y ǎn y uán @演员")
	defer sherpa.DeleteOnlineStream(stream2)

	stream2.AcceptWaveform(wave.SampleRate, wave.Samples)

	for spotter.IsReady(stream2) {
		spotter.Decode(stream2)
		result := spotter.GetResult(stream2)
		if result.Keyword != "" {
			log.Printf("Detected %v\n", result.Keyword)
		}
	}

	log.Println("----------Use pre-defined keywords + add 2 new keywords----------")

	stream3 := sherpa.NewKeywordStreamWithKeywords(spotter, "y ǎn y uán @演员/zh ī m íng @知名")
	defer sherpa.DeleteOnlineStream(stream3)

	stream3.AcceptWaveform(wave.SampleRate, wave.Samples)

	for spotter.IsReady(stream3) {
		spotter.Decode(stream3)
		result := spotter.GetResult(stream3)
		if result.Keyword != "" {
			log.Printf("Detected %v\n", result.Keyword)
		}
	}
}


================================================
FILE: go-api-examples/keyword-spotting-from-file/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
fi

go mod tidy
go build
./keyword-spotting-from-file


================================================
FILE: go-api-examples/non-streaming-canary-decode-files/go.mod
================================================
module non-streaming-canary-decode-files

go 1.17


================================================
FILE: go-api-examples/non-streaming-canary-decode-files/main.go
================================================
package main

import (
	"bytes"
	"encoding/binary"
	"log"
	"os"
	"strings"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"github.com/youpy/go-wav"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineRecognizerConfig{}

	config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"
	config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"
	config.ModelConfig.Canary.SrcLang = "en"
	config.ModelConfig.Canary.TgtLang = "en"
	config.ModelConfig.Canary.UsePnc = 1
	config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"

	waveFilename := "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"

	samples, sampleRate := readWave(waveFilename)

	log.Println("Initializing recognizer (may take several seconds)")
	recognizer := sherpa.NewOfflineRecognizer(&config)
	log.Println("Recognizer created!")
	defer sherpa.DeleteOfflineRecognizer(recognizer)

	log.Println("Start decoding!")
	stream := sherpa.NewOfflineStream(recognizer)
	defer sherpa.DeleteOfflineStream(stream)

	stream.AcceptWaveform(sampleRate, samples)

	recognizer.Decode(stream)
	log.Println("Decoding done!")
	result := stream.GetResult()

	log.Println("Text in English: " + strings.ToLower(result.Text))

	s := sherpa.NewOfflineStream(recognizer)
	defer sherpa.DeleteOfflineStream(s)

	s.AcceptWaveform(sampleRate, samples)

	config.ModelConfig.Canary.TgtLang = "de"
	recognizer.SetConfig(&config)
	recognizer.Decode(s)
	result = s.GetResult()

	log.Println("Text in German: " + strings.ToLower(result.Text))
}

func readWave(filename string) (samples []float32, sampleRate int) {
	file, _ := os.Open(filename)
	defer file.Close()

	reader := wav.NewReader(file)
	format, err := reader.Format()
	if err != nil {
		log.Fatalf("Failed to read wave format")
	}

	if format.AudioFormat != 1 {
		log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
	}

	if format.NumChannels != 1 {
		log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
	}

	if format.BitsPerSample != 16 {
		log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
	}

	reader.Duration() // so that it initializes reader.Size

	buf := make([]byte, reader.Size)
	n, err := reader.Read(buf)
	if n != int(reader.Size) {
		log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
	}

	samples = samplesInt16ToFloat(buf)
	sampleRate = int(format.SampleRate)

	return
}

func samplesInt16ToFloat(inSamples []byte) []float32 {
	numSamples := len(inSamples) / 2
	outSamples := make([]float32, numSamples)

	for i := 0; i != numSamples; i++ {
		s := inSamples[i*2 : (i+1)*2]

		var s16 int16
		buf := bytes.NewReader(s)
		err := binary.Read(buf, binary.LittleEndian, &s16)
		if err != nil {
			log.Fatal("Failed to parse 16-bit sample")
		}
		outSamples[i] = float32(s16) / 32768
	}

	return outSamples
}


================================================
FILE: go-api-examples/non-streaming-canary-decode-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi

go mod tidy
go build
./non-streaming-canary-decode-files


================================================
FILE: go-api-examples/non-streaming-funasr-nano-decode-files/go.mod
================================================
module non-streaming-funasr-nano-decode-files

go 1.17


================================================
FILE: go-api-examples/non-streaming-funasr-nano-decode-files/main.go
================================================
package main

import (
	"bytes"
	"encoding/binary"
	"log"
	"os"
	"strings"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"github.com/youpy/go-wav"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineRecognizerConfig{}

	config.ModelConfig.FunAsrNano.EncoderAdaptor = "./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx"
	config.ModelConfig.FunAsrNano.LLM = "./sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx"
	config.ModelConfig.FunAsrNano.Embedding = "./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx"
	config.ModelConfig.FunAsrNano.Tokenizer = "./sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B"
	// Seed for reproducibility (default: 42)
	config.ModelConfig.FunAsrNano.Seed = 42

	config.ModelConfig.Tokens = ""

	waveFilename := "./sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav"

	samples, sampleRate := readWave(waveFilename)

	log.Println("Initializing recognizer (may take several seconds)")
	recognizer := sherpa.NewOfflineRecognizer(&config)
	log.Println("Recognizer created!")
	defer sherpa.DeleteOfflineRecognizer(recognizer)

	log.Println("Start decoding!")
	stream := sherpa.NewOfflineStream(recognizer)
	defer sherpa.DeleteOfflineStream(stream)

	stream.AcceptWaveform(sampleRate, samples)

	recognizer.Decode(stream)
	log.Println("Decoding done!")
	result := stream.GetResult()

	log.Println("Text: " + strings.ToLower(result.Text))
}

func readWave(filename string) (samples []float32, sampleRate int) {
	file, _ := os.Open(filename)
	defer file.Close()

	reader := wav.NewReader(file)
	format, err := reader.Format()
	if err != nil {
		log.Fatalf("Failed to read wave format")
	}

	if format.AudioFormat != 1 {
		log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
	}

	if format.NumChannels != 1 {
		log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
	}

	if format.BitsPerSample != 16 {
		log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
	}

	reader.Duration() // so that it initializes reader.Size

	buf := make([]byte, reader.Size)
	n, err := reader.Read(buf)
	if n != int(reader.Size) {
		log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
	}

	samples = samplesInt16ToFloat(buf)
	sampleRate = int(format.SampleRate)

	return
}

func samplesInt16ToFloat(inSamples []byte) []float32 {
	numSamples := len(inSamples) / 2
	outSamples := make([]float32, numSamples)

	for i := 0; i != numSamples; i++ {
		s := inSamples[i*2 : (i+1)*2]

		var s16 int16
		buf := bytes.NewReader(s)
		err := binary.Read(buf, binary.LittleEndian, &s16)
		if err != nil {
			log.Fatal("Failed to parse 16-bit sample")
		}
		outSamples[i] = float32(s16) / 32768
	}

	return outSamples
}


================================================
FILE: go-api-examples/non-streaming-funasr-nano-decode-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
fi

go mod tidy
go build

./non-streaming-funasr-nano-decode-files


================================================
FILE: go-api-examples/non-streaming-medasr-ctc-decode-files/go.mod
================================================
module non-streaming-medasr-ctc-decode-files

go 1.17


================================================
FILE: go-api-examples/non-streaming-medasr-ctc-decode-files/main.go
================================================
package main

import (
	"bytes"
	"encoding/binary"
	"log"
	"os"
	"strings"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"github.com/youpy/go-wav"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineRecognizerConfig{}

	config.ModelConfig.MedAsr.Model = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx"
	config.ModelConfig.Tokens = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt"

	waveFilename := "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav"

	samples, sampleRate := readWave(waveFilename)

	log.Println("Initializing recognizer (may take several seconds)")
	recognizer := sherpa.NewOfflineRecognizer(&config)
	log.Println("Recognizer created!")
	defer sherpa.DeleteOfflineRecognizer(recognizer)

	log.Println("Start decoding!")
	stream := sherpa.NewOfflineStream(recognizer)
	defer sherpa.DeleteOfflineStream(stream)

	stream.AcceptWaveform(sampleRate, samples)

	recognizer.Decode(stream)
	log.Println("Decoding done!")
	result := stream.GetResult()

	log.Println("Text: " + strings.ToLower(result.Text))
}

func readWave(filename string) (samples []float32, sampleRate int) {
	file, _ := os.Open(filename)
	defer file.Close()

	reader := wav.NewReader(file)
	format, err := reader.Format()
	if err != nil {
		log.Fatalf("Failed to read wave format")
	}

	if format.AudioFormat != 1 {
		log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
	}

	if format.NumChannels != 1 {
		log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
	}

	if format.BitsPerSample != 16 {
		log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
	}

	reader.Duration() // so that it initializes reader.Size

	buf := make([]byte, reader.Size)
	n, err := reader.Read(buf)
	if n != int(reader.Size) {
		log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
	}

	samples = samplesInt16ToFloat(buf)
	sampleRate = int(format.SampleRate)

	return
}

func samplesInt16ToFloat(inSamples []byte) []float32 {
	numSamples := len(inSamples) / 2
	outSamples := make([]float32, numSamples)

	for i := 0; i != numSamples; i++ {
		s := inSamples[i*2 : (i+1)*2]

		var s16 int16
		buf := bytes.NewReader(s)
		err := binary.Read(buf, binary.LittleEndian, &s16)
		if err != nil {
			log.Fatal("Failed to parse 16-bit sample")
		}
		outSamples[i] = float32(s16) / 32768
	}

	return outSamples
}


================================================
FILE: go-api-examples/non-streaming-medasr-ctc-decode-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
fi

go mod tidy
go build
./non-streaming-medasr-ctc-decode-files


================================================
FILE: go-api-examples/non-streaming-moonshine-v2-decode-files/go.mod
================================================
module non-streaming-moonshine-v2-decode-files

go 1.17


================================================
FILE: go-api-examples/non-streaming-moonshine-v2-decode-files/main.go
================================================
package main

import (
	"bytes"
	"encoding/binary"
	"log"
	"os"
	"strings"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"github.com/youpy/go-wav"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineRecognizerConfig{}

	config.ModelConfig.Moonshine.Encoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort"
	config.ModelConfig.Moonshine.MergedDecoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort"
	config.ModelConfig.Tokens = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt"

	waveFilename := "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav"

	samples, sampleRate := readWave(waveFilename)

	log.Println("Initializing recognizer (may take several seconds)")
	recognizer := sherpa.NewOfflineRecognizer(&config)
	log.Println("Recognizer created!")
	defer sherpa.DeleteOfflineRecognizer(recognizer)

	log.Println("Start decoding!")
	stream := sherpa.NewOfflineStream(recognizer)
	defer sherpa.DeleteOfflineStream(stream)

	stream.AcceptWaveform(sampleRate, samples)

	recognizer.Decode(stream)
	log.Println("Decoding done!")
	result := stream.GetResult()

	log.Println("Text: " + strings.ToLower(result.Text))
}

func readWave(filename string) (samples []float32, sampleRate int) {
	file, _ := os.Open(filename)
	defer file.Close()

	reader := wav.NewReader(file)
	format, err := reader.Format()
	if err != nil {
		log.Fatalf("Failed to read wave format")
	}

	if format.AudioFormat != 1 {
		log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
	}

	if format.NumChannels != 1 {
		log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
	}

	if format.BitsPerSample != 16 {
		log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
	}

	reader.Duration() // so that it initializes reader.Size

	buf := make([]byte, reader.Size)
	n, err := reader.Read(buf)
	if n != int(reader.Size) {
		log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
	}

	samples = samplesInt16ToFloat(buf)
	sampleRate = int(format.SampleRate)

	return
}

func samplesInt16ToFloat(inSamples []byte) []float32 {
	numSamples := len(inSamples) / 2
	outSamples := make([]float32, numSamples)

	for i := 0; i != numSamples; i++ {
		s := inSamples[i*2 : (i+1)*2]

		var s16 int16
		buf := bytes.NewReader(s)
		err := binary.Read(buf, binary.LittleEndian, &s16)
		if err != nil {
			log.Fatal("Failed to parse 16-bit sample")
		}
		outSamples[i] = float32(s16) / 32768
	}

	return outSamples
}


================================================
FILE: go-api-examples/non-streaming-moonshine-v2-decode-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
fi

go mod tidy
go build
./non-streaming-moonshine-v2-decode-files


================================================
FILE: go-api-examples/non-streaming-omnilingual-asr-ctc-decode-files/go.mod
================================================
module non-streaming-omnilingual-asr-ctc-decode-files

go 1.17


================================================
FILE: go-api-examples/non-streaming-omnilingual-asr-ctc-decode-files/main.go
================================================
package main

import (
	"bytes"
	"encoding/binary"
	"log"
	"os"
	"strings"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"github.com/youpy/go-wav"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineRecognizerConfig{}

	config.ModelConfig.Omnilingual.Model = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx"
	config.ModelConfig.Tokens = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt"

	waveFilename := "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav"

	samples, sampleRate := readWave(waveFilename)

	log.Println("Initializing recognizer (may take several seconds)")
	recognizer := sherpa.NewOfflineRecognizer(&config)
	log.Println("Recognizer created!")
	defer sherpa.DeleteOfflineRecognizer(recognizer)

	log.Println("Start decoding!")
	stream := sherpa.NewOfflineStream(recognizer)
	defer sherpa.DeleteOfflineStream(stream)

	stream.AcceptWaveform(sampleRate, samples)

	recognizer.Decode(stream)
	log.Println("Decoding done!")
	result := stream.GetResult()

	log.Println("Text: " + strings.ToLower(result.Text))
}

func readWave(filename string) (samples []float32, sampleRate int) {
	file, _ := os.Open(filename)
	defer file.Close()

	reader := wav.NewReader(file)
	format, err := reader.Format()
	if err != nil {
		log.Fatalf("Failed to read wave format")
	}

	if format.AudioFormat != 1 {
		log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
	}

	if format.NumChannels != 1 {
		log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
	}

	if format.BitsPerSample != 16 {
		log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
	}

	reader.Duration() // so that it initializes reader.Size

	buf := make([]byte, reader.Size)
	n, err := reader.Read(buf)
	if n != int(reader.Size) {
		log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
	}

	samples = samplesInt16ToFloat(buf)
	sampleRate = int(format.SampleRate)

	return
}

func samplesInt16ToFloat(inSamples []byte) []float32 {
	numSamples := len(inSamples) / 2
	outSamples := make([]float32, numSamples)

	for i := 0; i != numSamples; i++ {
		s := inSamples[i*2 : (i+1)*2]

		var s16 int16
		buf := bytes.NewReader(s)
		err := binary.Read(buf, binary.LittleEndian, &s16)
		if err != nil {
			log.Fatal("Failed to parse 16-bit sample")
		}
		outSamples[i] = float32(s16) / 32768
	}

	return outSamples
}


================================================
FILE: go-api-examples/non-streaming-omnilingual-asr-ctc-decode-files/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
fi

go mod tidy
go build
./non-streaming-omnilingual-asr-ctc-decode-files


================================================
FILE: go-api-examples/non-streaming-speaker-diarization/go.mod
================================================
module non-streaming-speaker-diarization

go 1.17


================================================
FILE: go-api-examples/non-streaming-speaker-diarization/main.go
================================================
package main

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

/*
Usage:

Step 1: Download a speaker segmentation model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

Step 2: Download a speaker embedding extractor model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

Step 3. Download test wave files

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

Step 4. Run it
*/

func initSpeakerDiarization() *sherpa.OfflineSpeakerDiarization {
	config := sherpa.OfflineSpeakerDiarizationConfig{}

	config.Segmentation.Pyannote.Model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
	config.Embedding.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"

	// The test wave file contains 4 speakers, so we use 4 here
	config.Clustering.NumClusters = 4

	// if you don't know the actual numbers in the wave file,
	// then please don't set NumClusters; you need to use
	//
	// config.Clustering.Threshold = 0.5
	//

	// A larger Threshold leads to fewer clusters
	// A smaller Threshold leads to more clusters

	sd := sherpa.NewOfflineSpeakerDiarization(&config)
	return sd
}

func main() {
	wave_filename := "./0-four-speakers-zh.wav"
	wave := sherpa.ReadWave(wave_filename)
	if wave == nil {
		log.Printf("Failed to read %v", wave_filename)
		return
	}

	sd := initSpeakerDiarization()
	if sd == nil {
		log.Printf("Please check your config")
		return
	}

	defer sherpa.DeleteOfflineSpeakerDiarization(sd)

	if wave.SampleRate != sd.SampleRate() {
		log.Printf("Expected sample rate: %v, given: %d\n", sd.SampleRate(), wave.SampleRate)
		return
	}

	log.Println("Started")
	segments := sd.Process(wave.Samples)
	n := len(segments)

	for i := 0; i < n; i++ {
		log.Printf("%.3f -- %.3f speaker_%02d\n", segments[i].Start, segments[i].End, segments[i].Speaker)
	}
}


================================================
FILE: go-api-examples/non-streaming-speaker-diarization/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi

if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./0-four-speakers-zh.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi

go mod tidy
go build
./non-streaming-speaker-diarization


================================================
FILE: go-api-examples/non-streaming-tts/go.mod
================================================
module non-streaming-tts

go 1.17


================================================
FILE: go-api-examples/non-streaming-tts/main.go
================================================
package main

import (
	"log"
	"math"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	flag "github.com/spf13/pflag"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineTtsConfig{}
	sid := 0
	filename := "./generated.wav"

	var speed float32

	flag.StringVar(&config.Model.Vits.Model, "vits-model", "", "Path to the vits ONNX model")
	flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
	flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
	flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data")
	flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
	flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
	flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster; large -> slower")

	flag.StringVar(&config.Model.Matcha.AcousticModel, "matcha-acoustic-model", "", "Path to the matcha acoustic model")
	flag.StringVar(&config.Model.Matcha.Vocoder, "matcha-vocoder", "", "Path to the matcha vocoder model")
	flag.StringVar(&config.Model.Matcha.Lexicon, "matcha-lexicon", "", "Path to lexicon.txt")
	flag.StringVar(&config.Model.Matcha.Tokens, "matcha-tokens", "", "Path to tokens.txt")
	flag.StringVar(&config.Model.Matcha.DataDir, "matcha-data-dir", "", "Path to espeak-ng-data")
	flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha")
	flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster; large -> slower")

	flag.StringVar(&config.Model.Kokoro.Model, "kokoro-model", "", "Path to the Kokoro ONNX model")
	flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro")
	flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro")
	flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro")
	flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
	flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster; large -> slower")

	flag.StringVar(&config.Model.Kitten.Model, "kitten-model", "", "Path to the kitten ONNX model")
	flag.StringVar(&config.Model.Kitten.Voices, "kitten-voices", "", "Path to voices.bin for kitten")
	flag.StringVar(&config.Model.Kitten.Tokens, "kitten-tokens", "", "Path to tokens.txt for kitten")
	flag.StringVar(&config.Model.Kitten.DataDir, "kitten-data-dir", "", "Path to espeak-ng-data for kitten")
	flag.Float32Var(&config.Model.Kitten.LengthScale, "kitten-length-scale", 1.0, "length_scale for kitten. small -> faster; large -> slower")

	flag.Float32Var(&speed, "speed", 1.0, "Speech speed. larger->faster; smaller->slower")

	flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
	flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
	flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use: cpu/cuda/coreml")
	flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst")
	flag.StringVar(&config.RuleFars, "tts-rule-fars", "", "Path to rule.far")
	flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size (split long text to avoid OOM)")

	flag.IntVar(&sid, "sid", sid, "Speaker ID (multi-speaker models only)")
	flag.StringVar(&filename, "output-filename", filename, "Output wav filename")

	flag.Parse()

	if len(flag.Args()) != 1 {
		log.Fatalf("Please provide the text to generate audios")
	}
	text := flag.Arg(0)

	log.Println("Input text:", text)
	log.Println("Speaker ID:", sid)
	log.Println("Output filename:", filename)

	log.Println("Initializing model (may take several seconds)")
	tts := sherpa.NewOfflineTts(&config)
	defer sherpa.DeleteOfflineTts(tts)
	log.Println("Model created!")

	log.Println("Start generating!")
	cfg := sherpa.GenerationConfig{
		SilenceScale: 0.2,
		Speed:        float32(math.Max(float64(speed), 1e-6)),
		Sid:          sid,
	}
	audio := tts.GenerateWithConfig(text, &cfg, nil)

	log.Println("Done!")
	if ok := audio.Save(filename); !ok {
		log.Fatalf("Failed to write %s", filename)
	}
	log.Println("Saved to", filename)
}


================================================
FILE: go-api-examples/non-streaming-tts/run-kitten-en.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi

go mod tidy
go build

./non-streaming-tts \
  --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
  --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
  --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
  --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
  --debug=1 \
  --output-filename=./test-kitten-en.wav \
  "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."


================================================
FILE: go-api-examples/non-streaming-tts/run-kokoro-en.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

go mod tidy
go build

./non-streaming-tts \
  --kokoro-model=./kokoro-en-v0_19/model.onnx \
  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
  --debug=1 \
  --output-filename=./test-kokoro-en.wav \
  "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."


================================================
FILE: go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

go mod tidy
go build

./non-streaming-tts \
  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
  --debug=1 \
  --output-filename=./test-kokoro-zh-en.wav \
  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"


================================================
FILE: go-api-examples/non-streaming-tts/run-matcha-en.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

go mod tidy
go build

./non-streaming-tts \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --debug=1 \
  --output-filename=./test-matcha-en.wav \
  "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."


================================================
FILE: go-api-examples/non-streaming-tts/run-matcha-zh.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

go mod tidy
go build

./non-streaming-tts \
  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
  --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
  --debug=1 \
  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
  --output-filename=./test-matcha-zh.wav \
  "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"


================================================
FILE: go-api-examples/non-streaming-tts/run-vits-ljs.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -d vits-ljs ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-ljs.tar.bz2
  tar xvf vits-ljs.tar.bz2
  rm vits-ljs.tar.bz2
fi

go mod tidy
go build

./non-streaming-tts \
  --vits-model=./vits-ljs/vits-ljs.onnx \
  --vits-lexicon=./vits-ljs/lexicon.txt \
  --vits-tokens=./vits-ljs/tokens.txt \
  --sid=0 \
  --debug=1 \
  --output-filename=./vits-ljs.wav \
  "Liliana, the most beautiful and lovely assistant of our team!"


================================================
FILE: go-api-examples/non-streaming-tts/run-vits-piper-en_US-lessac-medium.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -d vits-piper-en_US-lessac-medium ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
  tar xf vits-piper-en_US-lessac-medium.tar.bz2
  rm vits-piper-en_US-lessac-medium.tar.bz2
fi

go mod tidy
go build

./non-streaming-tts \
  --vits-model=./vits-piper-en_US-lessac-medium/en_US-lessac-medium.onnx \
  --vits-data-dir=./vits-piper-en_US-lessac-medium/espeak-ng-data \
  --vits-tokens=./vits-piper-en_US-lessac-medium/tokens.txt \
  --output-filename=./liliana-piper-en_US-lessac-medium.wav \
  'liliana, the most beautiful and lovely assistant of our team!'


================================================
FILE: go-api-examples/non-streaming-tts/run-vits-vctk.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -d vits-vctk ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
  tar xvf vits-vctk.tar.bz2
  rm vits-vctk.tar.bz2
fi

go mod tidy
go build

for sid in 0 10 108; do
./non-streaming-tts \
  --vits-model=./vits-vctk/vits-vctk.onnx \
  --vits-lexicon=./vits-vctk/lexicon.txt \
  --vits-tokens=./vits-vctk/tokens.txt \
  --sid=0 \
  --debug=1 \
  --output-filename=./kennedy-$sid.wav \
  'Ask not what your country can do for you; ask what you can do for your country.'
done


================================================
FILE: go-api-examples/non-streaming-tts/run-vits-zh-aishell3.sh
================================================
#!/usr/bin/env bash
set -ex

export CGO_ENABLED=1

if [ ! -d vits-icefall-zh-aishell3 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  tar xvf vits-icefall-zh-aishell3.tar.bz2
  rm vits-icefall-zh-aishell3.tar.bz2
fi

go mod tidy
go build

for sid in 10 33 99; do
./non-streaming-tts \
  --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  --sid=$sid \
  --debug=1 \
  --output-filename=./liliana-$sid.wav \
  "林美丽最美丽、最漂亮、最可爱！"

./non-streaming-tts \
  --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  --sid=$sid \
  --debug=1 \
  --output-filename=./numbers-$sid.wav \
  "数字12345.6789怎么念"

./non-streaming-tts \
  --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
  --sid=$sid \
  --debug=1 \
  --output-filename=./heteronym-$sid.wav \
  "万古长存长沙长大长白山长孙长安街"
done


================================================
FILE: go-api-examples/offline-tts-play/go.mod
================================================
module offline-tts-play

go 1.24.0


================================================
FILE: go-api-examples/offline-tts-play/main.go
================================================
package main

import (
	"encoding/binary"
	"io"
	"log"
	"math"
	"os"
	"os/signal"
	"sync"
	"syscall"
	"time"

	oto "github.com/ebitengine/oto/v3"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	flag "github.com/spf13/pflag"
)

type pcmBuffer struct {
	mu       sync.Mutex
	queue    [][]byte
	finished bool
	started  chan struct{} // closed on first callback
	once     sync.Once
}

func newPCMBuffer() *pcmBuffer {
	return &pcmBuffer{
		started: make(chan struct{}),
	}
}

func (b *pcmBuffer) Push(p []byte) {
	b.once.Do(func() {
		close(b.started)
	})

	b.mu.Lock()
	b.queue = append(b.queue, p)
	b.mu.Unlock()
}

func (b *pcmBuffer) Finish() {
	b.once.Do(func() {
		close(b.started)
	})

	b.mu.Lock()
	b.finished = true
	b.mu.Unlock()
}

type pcmReader struct {
	buf  *pcmBuffer
	done chan struct{}
	once sync.Once
}

func (r *pcmReader) Read(p []byte) (int, error) {
	<-r.buf.started

	r.buf.mu.Lock()
	defer r.buf.mu.Unlock()

	// 2) Have audio
	if len(r.buf.queue) > 0 {
		chunk := r.buf.queue[0]
		n := copy(p, chunk)

		if n == len(chunk) {
			r.buf.queue = r.buf.queue[1:]
		} else {
			r.buf.queue[0] = chunk[n:]
		}
		return n, nil
	}

	// 3) Finished → EOF
	if r.buf.finished {
		r.once.Do(func() { close(r.done) })
		return 0, io.EOF
	}

	// 4) Gap → silence
	for i := range p {
		p[i] = 0
	}
	return len(p), nil
}

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineTtsConfig{}
	sid := 0
	filename := "./generated.wav"

	flag.StringVar(&config.Model.Vits.Model, "vits-model", "", "Path to the vits ONNX model")
	flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
	flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
	flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data")

	flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
	flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
	flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")

	flag.StringVar(&config.Model.Matcha.AcousticModel, "matcha-acoustic-model", "", "Path to the matcha acoustic model")
	flag.StringVar(&config.Model.Matcha.Vocoder, "matcha-vocoder", "", "Path to the matcha vocoder model")
	flag.StringVar(&config.Model.Matcha.Lexicon, "matcha-lexicon", "", "Path to lexicon.txt")
	flag.StringVar(&config.Model.Matcha.Tokens, "matcha-tokens", "", "Path to tokens.txt")
	flag.StringVar(&config.Model.Matcha.DataDir, "matcha-data-dir", "", "Path to espeak-ng-data")

	flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha")
	flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower")

	flag.StringVar(&config.Model.Kokoro.Model, "kokoro-model", "", "Path to the Kokoro ONNX model")
	flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro")
	flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro")
	flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro")
	flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
	flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")

	flag.StringVar(&config.Model.Kitten.Model, "kitten-model", "", "Path to the kitten ONNX model")
	flag.StringVar(&config.Model.Kitten.Voices, "kitten-voices", "", "Path to voices.bin for kitten")
	flag.StringVar(&config.Model.Kitten.Tokens, "kitten-tokens", "", "Path to tokens.txt for kitten")
	flag.StringVar(&config.Model.Kitten.DataDir, "kitten-data-dir", "", "Path to espeak-ng-data for kitten")
	flag.Float32Var(&config.Model.Kitten.LengthScale, "kitten-length-scale", 1.0, "length_scale for kitten. small -> faster in speech speed; large -> slower")

	flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
	flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
	flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
	flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst")
	flag.StringVar(&config.RuleFars, "tts-rule-fars", "", "Path to rule.far")
	flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size")

	flag.IntVar(&sid, "sid", sid, "Speaker ID. Used only for multi-speaker models")
	flag.StringVar(&filename, "output-filename", filename, "Output wav filename")

	flag.Parse()

	if len(flag.Args()) != 1 {
		log.Fatalf("Please provide the text to generate audio")
	}

	text := flag.Arg(0)

	log.Println("Input text:", text)
	log.Println("Speaker ID:", sid)

	log.Println("Initializing model (may take several seconds)")
	tts := sherpa.NewOfflineTts(&config)
	defer sherpa.DeleteOfflineTts(tts)
	log.Println("Model created!")

	ctx, ready, err := oto.NewContext(&oto.NewContextOptions{
		SampleRate:   tts.SampleRate(),
		ChannelCount: 1,
		Format:       oto.FormatSignedInt16LE,
	})
	if err != nil {
		log.Fatal(err)
	}
	<-ready

	pcmBuf := newPCMBuffer()

	reader := &pcmReader{
		buf:  pcmBuf,
		done: make(chan struct{}),
	}

	player := ctx.NewPlayer(reader)
	player.Play()
	defer player.Close()

	stop := make(chan os.Signal, 1)
	signal.Notify(stop, syscall.SIGINT, syscall.SIGTERM)

	var generated *sherpa.GeneratedAudio

	start := time.Now()
	cfg := sherpa.GenerationConfig{
		SilenceScale: 0.2,
		Speed:        1.0,
		Sid:          sid,
	}

	go func() {
		defer pcmBuf.Finish()

		generated = tts.GenerateWithConfig(
			text,
			&cfg,
			func(samples []float32, progress float32) bool {
				log.Printf("Progress: %.1f%%", progress*100)

				buf := make([]byte, len(samples)*2)
				for i, s := range samples {
					if s > 1 {
						s = 1
					} else if s < -1 {
						s = -1
					}
					v := int16(math.Round(float64(s * 32767)))
					binary.LittleEndian.PutUint16(buf[i*2:], uint16(v))
				}

				pcmBuf.Push(buf)
				return true
			},
		)

		log.Println("TTS generation finished in", time.Since(start))
	}()

	select {
	case <-stop:
		log.Println("Interrupted")
	case <-reader.done:
		log.Println("Playback finished")
	}

	if generated != nil {
		if ok := generated.Save(filename); !ok {
			log.Println("Failed to save audio")
		} else {
			log.Println("Saved generated audio to", filename)
		}
	}

	// let remaining audio drain
	time.Sleep(800 * time.Millisecond)

	log.Println("Done")
}


================================================
FILE: go-api-examples/offline-tts-play/run-kitten-en.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi

go mod tidy
go build

./offline-tts-play \
  --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
  --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
  --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
  --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
  --debug=1 \
  "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."


================================================
FILE: go-api-examples/offline-tts-play/run-kokoro-en.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

go mod tidy
go build

./offline-tts-play \
  --kokoro-model=./kokoro-en-v0_19/model.onnx \
  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
  --debug=1 \
  "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."


================================================
FILE: go-api-examples/offline-tts-play/run-kokoro-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

go mod tidy
go build

./offline-tts-play \
  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
  --debug=1 \
  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"


================================================
FILE: go-api-examples/offline-tts-play/run-matcha-en.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

go mod tidy
go build

./offline-tts-play \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --debug=1 \
  "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."


================================================
FILE: go-api-examples/offline-tts-play/run-matcha-zh.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

go mod tidy
go build

./offline-tts-play \
  --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
  --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
  --debug=0 \
  --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
  "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"


================================================
FILE: go-api-examples/offline-tts-play/run-vits-ljs.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -d vits-ljs ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-ljs.tar.bz2
  tar xvf vits-ljs.tar.bz2
  rm vits-ljs.tar.bz2
fi

go mod tidy
go build

./offline-tts-play \
  --vits-model=./vits-ljs/vits-ljs.onnx \
  --vits-lexicon=./vits-ljs/lexicon.txt \
  --vits-tokens=./vits-ljs/tokens.txt \
  --sid=0 \
  --debug=1 \
  "Liliana, the most beautiful and lovely assistant of our team!"


================================================
FILE: go-api-examples/offline-tts-play/run-vits-piper-en_US-lessac-medium.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -d vits-piper-en_US-lessac-medium ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
  tar xf vits-piper-en_US-lessac-medium.tar.bz2
  rm vits-piper-en_US-lessac-medium.tar.bz2
fi

go mod tidy
go build

./offline-tts-play \
  --vits-model=./vits-piper-en_US-lessac-medium/en_US-lessac-medium.onnx \
  --vits-data-dir=./vits-piper-en_US-lessac-medium/espeak-ng-data \
  --vits-tokens=./vits-piper-en_US-lessac-medium/tokens.txt \
  'liliana, the most beautiful and lovely assistant of our team!'


================================================
FILE: go-api-examples/offline-tts-play/run-vits-vctk.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -d vits-vctk ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
  tar xvf vits-vctk.tar.bz2
  rm vits-vctk.tar.bz2
fi

go mod tidy
go build

for sid in 0 10 108; do
./offline-tts-play \
  --vits-model=./vits-vctk/vits-vctk.onnx \
  --vits-lexicon=./vits-vctk/lexicon.txt \
  --vits-tokens=./vits-vctk/tokens.txt \
  --sid=0 \
  --debug=1 \
  'Ask not what your country can do for you; ask what you can do for your country.'
done


================================================
FILE: go-api-examples/offline-tts-play/run-vits-zh-aishell3.sh
================================================
#!/usr/bin/env bash
set -ex

export CGO_ENABLED=1

if [ ! -d vits-icefall-zh-aishell3 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  tar xvf vits-icefall-zh-aishell3.tar.bz2
  rm vits-icefall-zh-aishell3.tar.bz2
fi

go mod tidy
go build

for sid in 10 33 99; do
./offline-tts-play \
  --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  --sid=$sid \
  --debug=1 \
  "林美丽最美丽、最漂亮、最可爱！"

./offline-tts-play \
  --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  --sid=$sid \
  --debug=1 \
  "数字12345.6789怎么念"

./offline-tts-play \
  --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
  --sid=$sid \
  --debug=1 \
  "万古长存长沙长大长白山长孙长安街"
done


================================================
FILE: go-api-examples/speaker-identification/go.mod
================================================
module speaker-identification

go 1.17


================================================
FILE: go-api-examples/speaker-identification/main.go
================================================
package main

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func createSpeakerEmbeddingExtractor() *sherpa.SpeakerEmbeddingExtractor {
	config := sherpa.SpeakerEmbeddingExtractorConfig{}

	// Please download the model from
	// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
	//
	// You can find more models at
	// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models

	config.Model = "./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
	config.NumThreads = 1
	config.Debug = 1
	config.Provider = "cpu"

	ex := sherpa.NewSpeakerEmbeddingExtractor(&config)
	return ex
}

func computeEmbeddings(ex *sherpa.SpeakerEmbeddingExtractor, files []string) [][]float32 {
	embeddings := make([][]float32, len(files))

	for i, f := range files {
		wave := sherpa.ReadWave(f)

		stream := ex.CreateStream()
		defer sherpa.DeleteOnlineStream(stream)
		stream.AcceptWaveform(wave.SampleRate, wave.Samples)
		stream.InputFinished()
		embeddings[i] = ex.Compute(stream)
	}

	return embeddings

}

func registerSpeakers(ex *sherpa.SpeakerEmbeddingExtractor, manager *sherpa.SpeakerEmbeddingManager) {
	// Please download the test data from
	// https://github.com/csukuangfj/sr-data
	spk1_files := []string{
		"./sr-data/enroll/fangjun-sr-1.wav",
		"./sr-data/enroll/fangjun-sr-2.wav",
		"./sr-data/enroll/fangjun-sr-3.wav",
	}

	spk2_files := []string{
		"./sr-data/enroll/leijun-sr-1.wav",
		"./sr-data/enroll/leijun-sr-2.wav",
	}

	spk1_embeddings := computeEmbeddings(ex, spk1_files)
	spk2_embeddings := computeEmbeddings(ex, spk2_files)

	ok := manager.RegisterV("fangjun", spk1_embeddings)
	if !ok {
		panic("Failed to register fangjun")
	}

	ok = manager.RegisterV("leijun", spk2_embeddings)
	if !ok {
		panic("Failed to register leijun")
	}

	if !manager.Contains("fangjun") {
		panic("Failed to find fangjun")
	}

	if !manager.Contains("leijun") {
		panic("Failed to find leijun")
	}

	if manager.NumSpeakers() != 2 {
		panic("There should be only 2 speakers")
	}

	all_speakers := manager.AllSpeakers()
	log.Printf("All speakers: %v\n", all_speakers)
}

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	ex := createSpeakerEmbeddingExtractor()
	defer sherpa.DeleteSpeakerEmbeddingExtractor(ex)

	manager := sherpa.NewSpeakerEmbeddingManager(ex.Dim())
	defer sherpa.DeleteSpeakerEmbeddingManager(manager)
	registerSpeakers(ex, manager)

	// Please download the test data from
	// https://github.com/csukuangfj/sr-data
	test1 := "./sr-data/test/fangjun-test-sr-1.wav"
	embeddings := computeEmbeddings(ex, []string{test1})[0]
	threshold := float32(0.6)
	name := manager.Search(embeddings, threshold)
	if len(name) > 0 {
		log.Printf("%v matches %v", test1, name)
	} else {
		log.Printf("No matches found for %v", test1)
	}

	test2 := "./sr-data/test/leijun-test-sr-1.wav"
	embeddings = computeEmbeddings(ex, []string{test2})[0]
	name = manager.Search(embeddings, threshold)
	if len(name) > 0 {
		log.Printf("%v matches %v", test2, name)
	} else {
		log.Printf("No matches found for %v", test2)
	}

	test3 := "./sr-data/test/liudehua-test-sr-1.wav"
	embeddings = computeEmbeddings(ex, []string{test3})[0]
	name = manager.Search(embeddings, threshold)
	if len(name) > 0 {
		log.Printf("%v matches %v", test3, name)
	} else {
		log.Printf("No matches found for %v", test3)
	}

	if !manager.Remove("fangjun") {
		panic("Failed to deregister fangjun")
	} else {
		log.Print("fangjun deregistered\n")
	}

	test1 = "./sr-data/test/fangjun-test-sr-1.wav"
	embeddings = computeEmbeddings(ex, []string{test1})[0]
	name = manager.Search(embeddings, threshold)
	if len(name) > 0 {
		log.Printf("%v matches %v", test1, name)
	} else {
		log.Printf("No matches found for %v", test1)
	}
}

func chk(err error) {
	if err != nil {
		panic(err)
	}
}


================================================
FILE: go-api-examples/speaker-identification/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
fi

if [ ! -f ./sr-data/enroll/fangjun-sr-1.wav ]; then
  git clone https://github.com/csukuangfj/sr-data
fi

go mod tidy
go build
./speaker-identification


================================================
FILE: go-api-examples/speech-enhancement-dpdfnet/go.mod
================================================
module speech-enhancement-dpdfnet

go 1.17


================================================
FILE: go-api-examples/speech-enhancement-dpdfnet/main.go
================================================
package main

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineSpeechDenoiserConfig{}
	config.Model.DpdfNet.Model = "./dpdfnet_baseline.onnx"
	config.Model.NumThreads = 1
	config.Model.Debug = 1

	sd := sherpa.NewOfflineSpeechDenoiser(&config)
	defer sherpa.DeleteOfflineSpeechDenoiser(sd)

	waveFilename := "./inp_16k.wav"
	wave := sherpa.ReadWave(waveFilename)
	if wave == nil {
		log.Printf("Failed to read %v\n", waveFilename)
		return
	}

	audio := sd.Run(wave.Samples, wave.SampleRate)
	filename := "./enhanced-dpdfnet-16k.wav"
	if !audio.Save(filename) {
		log.Fatalf("Failed to write %v\n", filename)
	}

	log.Printf("Saved to %v\n", filename)
}


================================================
FILE: go-api-examples/speech-enhancement-dpdfnet/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

go mod tidy
go build

./speech-enhancement-dpdfnet


================================================
FILE: go-api-examples/speech-enhancement-gtcrn/go.mod
================================================
module speech-enhancement-gtcrn

go 1.17


================================================
FILE: go-api-examples/speech-enhancement-gtcrn/main.go
================================================
package main

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OfflineSpeechDenoiserConfig{}

	// Please download the models from
	// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

	config.Model.Gtcrn.Model = "./gtcrn_simple.onnx"
	config.Model.NumThreads = 1
	config.Model.Debug = 1

	sd := sherpa.NewOfflineSpeechDenoiser(&config)
	defer sherpa.DeleteOfflineSpeechDenoiser(sd)

	wave_filename := "./inp_16k.wav"

	wave := sherpa.ReadWave(wave_filename)
	if wave == nil {
		log.Printf("Failed to read %v\n", wave_filename)
		return
	}

	log.Println("Started")
	audio := sd.Run(wave.Samples, wave.SampleRate)
	log.Println("Done!")

	filename := "./enhanced-16k.wav"
	ok := audio.Save(filename)
	if !ok {
		log.Fatalf("Failed to write", filename)
	} else {
		log.Println("Saved to ", filename)
	}

}


================================================
FILE: go-api-examples/speech-enhancement-gtcrn/run.sh
================================================
#!/usr/bin/env bash
set -ex

export CGO_ENABLED=1

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

go mod tidy
go build

./speech-enhancement-gtcrn


================================================
FILE: go-api-examples/streaming-hlg-decoding/go.mod
================================================
module streaming-hlg-decoding

go 1.17


================================================
FILE: go-api-examples/streaming-hlg-decoding/main.go
================================================
package main

import (
	"bytes"
	"encoding/binary"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"github.com/youpy/go-wav"
	"log"
	"os"
	"strings"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OnlineRecognizerConfig{}
	config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}

	// please download model files from
	// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
	config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"
	config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"

	config.ModelConfig.NumThreads = 1
	config.ModelConfig.Debug = 0
	config.ModelConfig.Provider = "cpu"
	config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"

	wav_filename := "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"

	samples, sampleRate := readWave(wav_filename)

	log.Println("Initializing recognizer (may take several seconds)")
	recognizer := sherpa.NewOnlineRecognizer(&config)
	log.Println("Recognizer created!")
	defer sherpa.DeleteOnlineRecognizer(recognizer)

	log.Println("Start decoding!")
	stream := sherpa.NewOnlineStream(recognizer)
	defer sherpa.DeleteOnlineStream(stream)

	stream.AcceptWaveform(sampleRate, samples)

	tailPadding := make([]float32, int(float32(sampleRate)*0.3))
	stream.AcceptWaveform(sampleRate, tailPadding)

	for recognizer.IsReady(stream) {
		recognizer.Decode(stream)
	}
	log.Println("Decoding done!")
	result := recognizer.GetResult(stream)
	log.Println(strings.ToLower(result.Text))
	log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate))
}

func readWave(filename string) (samples []float32, sampleRate int) {
	file, _ := os.Open(filename)
	defer file.Close()

	reader := wav.NewReader(file)
	format, err := reader.Format()
	if err != nil {
		log.Fatalf("Failed to read wave format")
	}

	if format.AudioFormat != 1 {
		log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
	}

	if format.NumChannels != 1 {
		log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
	}

	if format.BitsPerSample != 16 {
		log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
	}

	reader.Duration() // so that it initializes reader.Size

	buf := make([]byte, reader.Size)
	n, err := reader.Read(buf)
	if n != int(reader.Size) {
		log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
	}

	samples = samplesInt16ToFloat(buf)
	sampleRate = int(format.SampleRate)

	return
}

func samplesInt16ToFloat(inSamples []byte) []float32 {
	numSamples := len(inSamples) / 2
	outSamples := make([]float32, numSamples)

	for i := 0; i != numSamples; i++ {
		s := inSamples[i*2 : (i+1)*2]

		var s16 int16
		buf := bytes.NewReader(s)
		err := binary.Read(buf, binary.LittleEndian, &s16)
		if err != nil {
			log.Fatal("Failed to parse 16-bit sample")
		}
		outSamples[i] = float32(s16) / 32768
	}

	return outSamples
}


================================================
FILE: go-api-examples/streaming-hlg-decoding/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

go mod tidy
go build
ls -lh
./streaming-hlg-decoding


================================================
FILE: go-api-examples/streaming-speech-enhancement-dpdfnet/go.mod
================================================
module streaming-speech-enhancement-dpdfnet

go 1.17


================================================
FILE: go-api-examples/streaming-speech-enhancement-dpdfnet/main.go
================================================
package main

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func appendSamples(dst []float32, src []float32) []float32 {
	return append(dst, src...)
}

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OnlineSpeechDenoiserConfig{}
	config.Model.DpdfNet.Model = "./dpdfnet_baseline.onnx"
	config.Model.NumThreads = 1
	config.Model.Debug = 1

	sd := sherpa.NewOnlineSpeechDenoiser(&config)
	defer sherpa.DeleteOnlineSpeechDenoiser(sd)

	waveFilename := "./inp_16k.wav"
	wave := sherpa.ReadWave(waveFilename)
	if wave == nil {
		log.Printf("Failed to read %v\n", waveFilename)
		return
	}

	output := make([]float32, 0, len(wave.Samples))
	frameShift := sd.FrameShiftInSamples()
	for start := 0; start < len(wave.Samples); start += frameShift {
		end := start + frameShift
		if end > len(wave.Samples) {
			end = len(wave.Samples)
		}
		audio := sd.Run(wave.Samples[start:end], wave.SampleRate)
		output = appendSamples(output, audio.Samples)
	}

	output = appendSamples(output, sd.Flush().Samples)
	filename := "./enhanced-online-dpdfnet.wav"
	if !(&sherpa.DenoisedAudio{Samples: output, SampleRate: sd.SampleRate()}).Save(filename) {
		log.Fatalf("Failed to write %v\n", filename)
	}

	log.Printf("Saved to %v\n", filename)
}


================================================
FILE: go-api-examples/streaming-speech-enhancement-dpdfnet/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

go mod tidy
go build

./streaming-speech-enhancement-dpdfnet


================================================
FILE: go-api-examples/streaming-speech-enhancement-gtcrn/go.mod
================================================
module streaming-speech-enhancement-gtcrn

go 1.17


================================================
FILE: go-api-examples/streaming-speech-enhancement-gtcrn/main.go
================================================
package main

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func appendSamples(dst []float32, src []float32) []float32 {
	return append(dst, src...)
}

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.OnlineSpeechDenoiserConfig{}
	config.Model.Gtcrn.Model = "./gtcrn_simple.onnx"
	config.Model.NumThreads = 1
	config.Model.Debug = 1

	sd := sherpa.NewOnlineSpeechDenoiser(&config)
	defer sherpa.DeleteOnlineSpeechDenoiser(sd)

	waveFilename := "./inp_16k.wav"
	wave := sherpa.ReadWave(waveFilename)
	if wave == nil {
		log.Printf("Failed to read %v\n", waveFilename)
		return
	}

	output := make([]float32, 0, len(wave.Samples))
	frameShift := sd.FrameShiftInSamples()
	for start := 0; start < len(wave.Samples); start += frameShift {
		end := start + frameShift
		if end > len(wave.Samples) {
			end = len(wave.Samples)
		}
		audio := sd.Run(wave.Samples[start:end], wave.SampleRate)
		output = appendSamples(output, audio.Samples)
	}

	output = appendSamples(output, sd.Flush().Samples)
	filename := "./enhanced-online-gtcrn.wav"
	if !(&sherpa.DenoisedAudio{Samples: output, SampleRate: sd.SampleRate()}).Save(filename) {
		log.Fatalf("Failed to write %v\n", filename)
	}

	log.Printf("Saved to %v\n", filename)
}


================================================
FILE: go-api-examples/streaming-speech-enhancement-gtcrn/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

go mod tidy
go build

./streaming-speech-enhancement-gtcrn


================================================
FILE: go-api-examples/supertonic-tts/go.mod
================================================
module supertonic-tts

go 1.17


================================================
FILE: go-api-examples/supertonic-tts/main.go
================================================
package main

import (
	"encoding/json"
	"log"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	// ---------------- config ----------------
	var config sherpa.OfflineTtsConfig

	config.Model.Supertonic.DurationPredictor =
		"./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx"
	config.Model.Supertonic.TextEncoder =
		"./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx"
	config.Model.Supertonic.VectorEstimator =
		"./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx"
	config.Model.Supertonic.Vocoder =
		"./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx"
	config.Model.Supertonic.TtsJson =
		"./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json"
	config.Model.Supertonic.UnicodeIndexer =
		"./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin"
	config.Model.Supertonic.VoiceStyle =
		"./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin"

	config.Model.NumThreads = 2
	config.Model.Debug = 1

	log.Println("Creating Offline TTS")
	tts := sherpa.NewOfflineTts(&config)
	if tts == nil {
		log.Fatal("Failed to create OfflineTts")
	}
	defer sherpa.DeleteOfflineTts(tts)

	text := "Today as always, men fall into two groups: slaves and free men. Whoever " +
		"does not have two-thirds of his day for himself, is a slave, whatever " +
		"he may be: a statesman, a businessman, an official, or a scholar."

	var cfg sherpa.GenerationConfig
	cfg.Sid = 6
	cfg.NumSteps = 5
	cfg.Speed = 1.25 // larger -> faster

	extraMap := map[string]interface{}{
		"lang": "en",
	}
	extraBytes, _ := json.Marshal(extraMap)
	cfg.Extra = json.RawMessage(extraBytes)

	log.Println("Start generating")

	audio := tts.GenerateWithConfig(
		text,
		&cfg,
		func(samples []float32, progress float32) bool {
			log.Printf("Progress: %.3f%%, Number of samples: %d", progress*100, len(samples))
			return true
		},
	)

	if audio == nil {
		log.Fatal("Generation failed")
	}

	outputFilename := "./generated-supertonic-en.wav"
	if !audio.Save(outputFilename) {
		log.Fatal("Failed to save wav")
	}

	log.Println("Saved to:", outputFilename)
}


================================================
FILE: go-api-examples/supertonic-tts/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
fi

go mod tidy
go build

./supertonic-tts


================================================
FILE: go-api-examples/vad/go.mod
================================================
module vad

go 1.17


================================================
FILE: go-api-examples/vad/main.go
================================================
package main

import (
	"fmt"
	"github.com/gen2brain/malgo"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
	"os"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	config := sherpa.VadModelConfig{}

	// Please download silero_vad.onnx from
	// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
	// or ten-vad.onnx from
	// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx

	if FileExists("./silero_vad.onnx") {
		fmt.Println("Use silero-vad")
		config.SileroVad.Model = "./silero_vad.onnx"
		config.SileroVad.Threshold = 0.5
		config.SileroVad.MinSilenceDuration = 0.5
		config.SileroVad.MinSpeechDuration = 0.25
		config.SileroVad.MaxSpeechDuration = 10
		config.SileroVad.WindowSize = 512
	} else if FileExists("./ten-vad.onnx") {
		fmt.Println("Use ten-vad")
		config.TenVad.Model = "./ten-vad.onnx"
		config.TenVad.Threshold = 0.5
		config.TenVad.MinSilenceDuration = 0.5
		config.TenVad.MinSpeechDuration = 0.25
		config.TenVad.MaxSpeechDuration = 10
		config.TenVad.WindowSize = 256
	} else {
		fmt.Println("Please download either ./silero_vad.onnx or ./ten-vad.onnx")
		return
	}

	config.SampleRate = 16000
	config.NumThreads = 1
	config.Provider = "cpu"
	config.Debug = 1

	windowSize := config.SileroVad.WindowSize
	if config.TenVad.Model != "" {
		windowSize = config.TenVad.WindowSize
	}

	var bufferSizeInSeconds float32 = 5

	vad := sherpa.NewVoiceActivityDetector(&config, bufferSizeInSeconds)
	defer sherpa.DeleteVoiceActivityDetector(vad)

	buffer := sherpa.NewCircularBuffer(10 * config.SampleRate)
	defer sherpa.DeleteCircularBuffer(buffer)

	ctx, err := malgo.InitContext(nil, malgo.ContextConfig{}, func(message string) {
		fmt.Printf("LOG <%v>", message)
	})
	chk(err)

	defer func() {
		_ = ctx.Uninit()
		ctx.Free()
	}()

	deviceConfig := malgo.DefaultDeviceConfig(malgo.Duplex)
	deviceConfig.Capture.Format = malgo.FormatS16
	deviceConfig.Capture.Channels = 1
	deviceConfig.Playback.Format = malgo.FormatS16
	deviceConfig.Playback.Channels = 1
	deviceConfig.SampleRate = 16000
	deviceConfig.Alsa.NoMMap = 1

	printed := false
	k := 0

	onRecvFrames := func(_, pSample []byte, framecount uint32) {
		samples := samplesInt16ToFloat(pSample)
		buffer.Push(samples)
		for buffer.Size() >= windowSize {
			head := buffer.Head()
			s := buffer.Get(head, windowSize)
			buffer.Pop(windowSize)

			vad.AcceptWaveform(s)

			if vad.IsSpeech() && !printed {
				printed = true
				log.Print("Detected speech\n")
			}

			if !vad.IsSpeech() {
				printed = false
			}

			for !vad.IsEmpty() {
				speechSegment := vad.Front()
				vad.Pop()

				duration := float32(len(speechSegment.Samples)) / float32(config.SampleRate)

				audio := sherpa.GeneratedAudio{}
				audio.Samples = speechSegment.Samples
				audio.SampleRate = config.SampleRate

				filename := fmt.Sprintf("seg-%d-%.2f-seconds.wav", k, duration)
				ok := audio.Save(filename)
				if ok {
					log.Printf("Saved to %s", filename)
				}

				k += 1

				log.Printf("Duration: %.2f seconds\n", duration)
				log.Print("----------\n")
			}
		}
	}

	captureCallbacks := malgo.DeviceCallbacks{
		Data: onRecvFrames,
	}

	device, err := malgo.InitDevice(ctx.Context, deviceConfig, captureCallbacks)
	chk(err)

	err = device.Start()
	chk(err)

	fmt.Println("Started. Please speak. Press ctrl + C  to exit")
	fmt.Scanln()
	device.Uninit()

}

func chk(err error) {
	if err != nil {
		panic(err)
	}
}

func samplesInt16ToFloat(inSamples []byte) []float32 {
	numSamples := len(inSamples) / 2
	outSamples := make([]float32, numSamples)

	for i := 0; i != numSamples; i++ {
		// Decode two bytes into an int16 using bit manipulation
		s16 := int16(inSamples[2*i]) | int16(inSamples[2*i+1])<<8
		outSamples[i] = float32(s16) / 32768
	}

	return outSamples
}

func FileExists(path string) bool {
	_, err := os.Stat(path)
	if err == nil {
		return true
	}

	return false
}


================================================
FILE: go-api-examples/vad/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./ten-vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi

go mod tidy
go build
./vad


================================================
FILE: go-api-examples/vad-asr-whisper/go.mod
================================================
module vad-asr-whisper

go 1.17


================================================
FILE: go-api-examples/vad-asr-whisper/main.go
================================================
package main

import (
	"fmt"
	portaudio "github.com/csukuangfj/portaudio-go"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
	"strings"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	// 1. Create VAD
	config := sherpa.VadModelConfig{}

	// Please download silero_vad.onnx from
	// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

	config.SileroVad.Model = "./silero_vad.onnx"
	config.SileroVad.Threshold = 0.5
	config.SileroVad.MinSilenceDuration = 0.5
	config.SileroVad.MinSpeechDuration = 0.25
	config.SileroVad.WindowSize = 512
	config.SileroVad.MaxSpeechDuration = 5.0
	config.SampleRate = 16000
	config.NumThreads = 1
	config.Provider = "cpu"
	config.Debug = 1

	var bufferSizeInSeconds float32 = 20

	vad := sherpa.NewVoiceActivityDetector(&config, bufferSizeInSeconds)
	defer sherpa.DeleteVoiceActivityDetector(vad)

	// 2. Create ASR recognizer

	c := sherpa.OfflineRecognizerConfig{}
	c.FeatConfig.SampleRate = 16000
	c.FeatConfig.FeatureDim = 80
	c.ModelConfig.Whisper.Encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"
	c.ModelConfig.Whisper.Decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"
	c.ModelConfig.Tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"
	c.ModelConfig.NumThreads = 2
	c.ModelConfig.Debug = 1
	c.ModelConfig.Provider = "cpu"

	recognizer := sherpa.NewOfflineRecognizer(&c)
	defer sherpa.DeleteOfflineRecognizer(recognizer)

	err := portaudio.Initialize()
	if err != nil {
		log.Fatalf("Unable to initialize portaudio: %v\n", err)
	}
	defer portaudio.Terminate()

	default_device, err := portaudio.DefaultInputDevice()
	if err != nil {
		log.Fatal("Failed to get default input device: %v\n", err)
	}
	log.Printf("Selected default input device: %s\n", default_device.Name)
	param := portaudio.StreamParameters{}
	param.Input.Device = default_device
	param.Input.Channels = 1
	param.Input.Latency = default_device.DefaultHighInputLatency

	param.SampleRate = float64(config.SampleRate)
	param.FramesPerBuffer = 0
	param.Flags = portaudio.ClipOff

	// you can choose another value for 0.1 if you want
	samplesPerCall := int32(param.SampleRate * 0.1) // 0.1 second
	samples := make([]float32, samplesPerCall)

	s, err := portaudio.OpenStream(param, samples)
	if err != nil {
		log.Fatalf("Failed to open the stream")
	}

	defer s.Close()
	chk(s.Start())

	log.Print("Started! Please speak")
	printed := false

	k := 0
	for {
		chk(s.Read())
		vad.AcceptWaveform(samples)

		if vad.IsSpeech() && !printed {
			printed = true
			log.Print("Detected speech\n")
		}

		if !vad.IsSpeech() {
			printed = false
		}

		for !vad.IsEmpty() {
			speechSegment := vad.Front()
			vad.Pop()

			duration := float32(len(speechSegment.Samples)) / float32(config.SampleRate)

			audio := &sherpa.Wave{}
			audio.Samples = speechSegment.Samples
			audio.SampleRate = config.SampleRate

			// Now decode it
			go decode(recognizer, audio, k)

			k += 1

			log.Printf("Duration: %.2f seconds\n", duration)
		}
	}

	chk(s.Stop())
}

func decode(recognizer *sherpa.OfflineRecognizer, audio *sherpa.Wave, id int) {
	stream := sherpa.NewOfflineStream(recognizer)
	defer sherpa.DeleteOfflineStream(stream)
	stream.AcceptWaveform(audio.SampleRate, audio.Samples)
	recognizer.Decode(stream)
	result := stream.GetResult()
	text := strings.ToLower(result.Text)
	text = strings.Trim(text, " ")
	log.Println(text)

	duration := float32(len(audio.Samples)) / float32(audio.SampleRate)

	filename := fmt.Sprintf("seg-%d-%.2f-seconds-%s.wav", id, duration, text)
	ok := audio.Save(filename)
	if ok {
		log.Printf("Saved to %s", filename)
	}
	log.Print("----------\n")
}

func chk(err error) {
	if err != nil {
		panic(err)
	}
}


================================================
FILE: go-api-examples/vad-asr-whisper/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

go mod tidy
go build
./vad-asr-whisper


================================================
FILE: go-api-examples/vad-speaker-identification/go.mod
================================================
module vad-speaker-identification

go 1.17


================================================
FILE: go-api-examples/vad-speaker-identification/main.go
================================================
package main

import (
	"fmt"
	portaudio "github.com/csukuangfj/portaudio-go"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func createSpeakerEmbeddingExtractor() *sherpa.SpeakerEmbeddingExtractor {
	config := sherpa.SpeakerEmbeddingExtractorConfig{}

	// Please download the model from
	// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
	//
	// You can find more models at
	// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models

	config.Model = "./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
	config.NumThreads = 2
	config.Debug = 1
	config.Provider = "cpu"

	ex := sherpa.NewSpeakerEmbeddingExtractor(&config)
	return ex
}

func computeEmbeddings(ex *sherpa.SpeakerEmbeddingExtractor, files []string) [][]float32 {
	embeddings := make([][]float32, len(files))

	for i, f := range files {
		wave := sherpa.ReadWave(f)

		stream := ex.CreateStream()
		defer sherpa.DeleteOnlineStream(stream)
		stream.AcceptWaveform(wave.SampleRate, wave.Samples)
		stream.InputFinished()
		embeddings[i] = ex.Compute(stream)
	}

	return embeddings

}

func registerSpeakers(ex *sherpa.SpeakerEmbeddingExtractor, manager *sherpa.SpeakerEmbeddingManager) {
	// Please download the test data from
	// https://github.com/csukuangfj/sr-data
	spk1_files := []string{
		"./sr-data/enroll/fangjun-sr-1.wav",
		"./sr-data/enroll/fangjun-sr-2.wav",
		"./sr-data/enroll/fangjun-sr-3.wav",
	}

	spk2_files := []string{
		"./sr-data/enroll/leijun-sr-1.wav",
		"./sr-data/enroll/leijun-sr-2.wav",
	}

	spk1_embeddings := computeEmbeddings(ex, spk1_files)
	spk2_embeddings := computeEmbeddings(ex, spk2_files)

	ok := manager.RegisterV("fangjun", spk1_embeddings)
	if !ok {
		panic("Failed to register fangjun")
	}

	ok = manager.RegisterV("leijun", spk2_embeddings)
	if !ok {
		panic("Failed to register leijun")
	}

	if !manager.Contains("fangjun") {
		panic("Failed to find fangjun")
	}

	if !manager.Contains("leijun") {
		panic("Failed to find leijun")
	}

	if manager.NumSpeakers() != 2 {
		panic("There should be only 2 speakers")
	}

	all_speakers := manager.AllSpeakers()
	log.Printf("All speakers: %v\n", all_speakers)
}

func createVad() *sherpa.VoiceActivityDetector {
	config := sherpa.VadModelConfig{}

	// Please download silero_vad.onnx from
	// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

	config.SileroVad.Model = "./silero_vad.onnx"
	config.SileroVad.Threshold = 0.5
	config.SileroVad.MinSilenceDuration = 0.5
	config.SileroVad.MinSpeechDuration = 0.5
	config.SileroVad.WindowSize = 512
	config.SampleRate = 16000
	config.NumThreads = 1
	config.Provider = "cpu"
	config.Debug = 1

	var bufferSizeInSeconds float32 = 20

	vad := sherpa.NewVoiceActivityDetector(&config, bufferSizeInSeconds)
	return vad
}

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	vad := createVad()
	defer sherpa.DeleteVoiceActivityDetector(vad)

	ex := createSpeakerEmbeddingExtractor()
	defer sherpa.DeleteSpeakerEmbeddingExtractor(ex)

	manager := sherpa.NewSpeakerEmbeddingManager(ex.Dim())
	defer sherpa.DeleteSpeakerEmbeddingManager(manager)
	registerSpeakers(ex, manager)

	err := portaudio.Initialize()
	if err != nil {
		log.Fatalf("Unable to initialize portaudio: %v\n", err)
	}
	defer portaudio.Terminate()

	default_device, err := portaudio.DefaultInputDevice()
	if err != nil {
		log.Fatal("Failed to get default input device: %v\n", err)
	}
	log.Printf("Selected default input device: %s\n", default_device.Name)
	param := portaudio.StreamParameters{}
	param.Input.Device = default_device
	param.Input.Channels = 1
	param.Input.Latency = default_device.DefaultHighInputLatency

	param.SampleRate = 16000
	param.FramesPerBuffer = 0
	param.Flags = portaudio.ClipOff

	// you can choose another value for 0.1 if you want
	samplesPerCall := int32(param.SampleRate * 0.1) // 0.1 second
	samples := make([]float32, samplesPerCall)

	s, err := portaudio.OpenStream(param, samples)
	if err != nil {
		log.Fatalf("Failed to open the stream")
	}

	defer s.Close()
	chk(s.Start())

	log.Print("Started! Please speak")
	printed := false

	k := 0
	for {
		chk(s.Read())
		vad.AcceptWaveform(samples)

		if vad.IsSpeech() && !printed {
			printed = true
			log.Print("Detected speech\n")
		}

		if !vad.IsSpeech() {
			printed = false
		}

		for !vad.IsEmpty() {
			speechSegment := vad.Front()
			vad.Pop()

			audio := &sherpa.Wave{}
			audio.Samples = speechSegment.Samples
			audio.SampleRate = 16000

			// Now decode it
			go decode(ex, manager, audio, k)

			k += 1
		}
	}

	chk(s.Stop())

}

func chk(err error) {
	if err != nil {
		panic(err)
	}
}

func decode(ex *sherpa.SpeakerEmbeddingExtractor, manager *sherpa.SpeakerEmbeddingManager, audio *sherpa.GeneratedAudio, id int) {
	stream := ex.CreateStream()
	defer sherpa.DeleteOnlineStream(stream)

	stream.AcceptWaveform(audio.SampleRate, audio.Samples)
	stream.InputFinished()
	embeddings := ex.Compute(stream)
	threshold := float32(0.5)
	name := manager.Search(embeddings, threshold)
	if len(name) > 0 {
		log.Printf("Found speaker: %v\n", name)
	} else {
		log.Print("Unknown speaker\n")
		name = "Unknown"
	}

	duration := float32(len(audio.Samples)) / float32(audio.SampleRate)

	filename := fmt.Sprintf("seg-%d-%.2f-seconds-%s.wav", id, duration, name)
	ok := audio.Save(filename)
	if ok {
		log.Printf("Saved to %s", filename)
	}
	log.Print("----------\n")
}


================================================
FILE: go-api-examples/vad-speaker-identification/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
fi

if [ ! -f ./sr-data/enroll/fangjun-sr-1.wav ]; then
  git clone https://github.com/csukuangfj/sr-data
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

go mod tidy
go build
./vad-speaker-identification


================================================
FILE: go-api-examples/vad-spoken-language-identification/go.mod
================================================
module vad-spoken-language-identification

go 1.17


================================================
FILE: go-api-examples/vad-spoken-language-identification/main.go
================================================
package main

import (
	"fmt"
	iso639 "github.com/barbashov/iso639-3"
	portaudio "github.com/csukuangfj/portaudio-go"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	"log"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	// 1. Create VAD
	config := sherpa.VadModelConfig{}

	// Please download silero_vad.onnx from
	// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

	config.SileroVad.Model = "./silero_vad.onnx"
	config.SileroVad.Threshold = 0.5
	config.SileroVad.MinSilenceDuration = 0.5
	config.SileroVad.MinSpeechDuration = 0.25
	config.SileroVad.WindowSize = 512
	config.SampleRate = 16000
	config.NumThreads = 1
	config.Provider = "cpu"
	config.Debug = 1

	var bufferSizeInSeconds float32 = 20

	vad := sherpa.NewVoiceActivityDetector(&config, bufferSizeInSeconds)
	defer sherpa.DeleteVoiceActivityDetector(vad)

	// 2. Create spoken language identifier

	c := sherpa.SpokenLanguageIdentificationConfig{}
	c.Whisper.Encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
	c.Whisper.Decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"
	c.NumThreads = 2
	c.Debug = 1
	c.Provider = "cpu"

	slid := sherpa.NewSpokenLanguageIdentification(&c)
	defer sherpa.DeleteSpokenLanguageIdentification(slid)

	err := portaudio.Initialize()
	if err != nil {
		log.Fatalf("Unable to initialize portaudio: %v\n", err)
	}
	defer portaudio.Terminate()

	default_device, err := portaudio.DefaultInputDevice()
	if err != nil {
		log.Fatal("Failed to get default input device: %v\n", err)
	}
	log.Printf("Selected default input device: %s\n", default_device.Name)
	param := portaudio.StreamParameters{}
	param.Input.Device = default_device
	param.Input.Channels = 1
	param.Input.Latency = default_device.DefaultHighInputLatency

	param.SampleRate = float64(config.SampleRate)
	param.FramesPerBuffer = 0
	param.Flags = portaudio.ClipOff

	// you can choose another value for 0.1 if you want
	samplesPerCall := int32(param.SampleRate * 0.1) // 0.1 second
	samples := make([]float32, samplesPerCall)

	s, err := portaudio.OpenStream(param, samples)
	if err != nil {
		log.Fatalf("Failed to open the stream")
	}

	defer s.Close()
	chk(s.Start())

	log.Print("Started! Please speak")
	printed := false

	k := 0
	for {
		chk(s.Read())
		vad.AcceptWaveform(samples)

		if vad.IsSpeech() && !printed {
			printed = true
			log.Print("Detected speech\n")
		}

		if !vad.IsSpeech() {
			printed = false
		}

		for !vad.IsEmpty() {
			speechSegment := vad.Front()
			vad.Pop()

			duration := float32(len(speechSegment.Samples)) / float32(config.SampleRate)

			audio := &sherpa.Wave{}
			audio.Samples = speechSegment.Samples
			audio.SampleRate = config.SampleRate

			// Now decode it
			go decode(slid, audio, k)

			k += 1

			log.Printf("Duration: %.2f seconds\n", duration)
		}
	}

	chk(s.Stop())
}

func decode(slid *sherpa.SpokenLanguageIdentification, audio *sherpa.Wave, id int) {
	stream := slid.CreateStream()
	defer sherpa.DeleteOfflineStream(stream)

	stream.AcceptWaveform(audio.SampleRate, audio.Samples)
	result := slid.Compute(stream)
	lang := iso639.FromPart1Code(result.Lang).Name
	log.Printf("Detected language: %v", lang)

	duration := float32(len(audio.Samples)) / float32(audio.SampleRate)

	filename := fmt.Sprintf("seg-%d-%.2f-seconds-%s.wav", id, duration, lang)
	ok := audio.Save(filename)
	if ok {
		log.Printf("Saved to %s", filename)
	}
	log.Print("----------\n")
}

func chk(err error) {
	if err != nil {
		panic(err)
	}
}


================================================
FILE: go-api-examples/vad-spoken-language-identification/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.tar.bz2
  rm sherpa-onnx-whisper-tiny.tar.bz2
fi

go mod tidy
go build
./vad-spoken-language-identification


================================================
FILE: go-api-examples/zero-shot-pocket-tts/go.mod
================================================
module zero-shot-pocket-tts

go 1.17


================================================
FILE: go-api-examples/zero-shot-pocket-tts/main.go
================================================
package main

import (
	"log"

	"encoding/json"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	flag "github.com/spf13/pflag"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	var referenceAudio string = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav"
	var outputFilename string = "./generated.wav"
	var voiceEmbeddingCacheCapacity int = 50
	var seed int = -1

	text := `Today as always, men fall into two groups: slaves and free men.
Whoever does not have two-thirds of his day for himself, is a slave,
whatever he may be: a statesman, a businessman, an official, or a scholar.`

	flag.StringVar(&referenceAudio, "reference-audio", referenceAudio, "Path to the reference audio")
	flag.StringVar(&text, "text", text, "Text to be synthesized")
	flag.StringVar(&outputFilename, "output-filename", outputFilename, "File to save the generated audio")
	flag.IntVar(&voiceEmbeddingCacheCapacity, "voice-embedding-cache-capacity", voiceEmbeddingCacheCapacity, "Voice embedding cache capacity (default: 50)")
	flag.IntVar(&seed, "seed", seed, "Random seed for reproducibility (default: -1, random)")
	flag.Parse()

	// ---------------- config ----------------
	var config sherpa.OfflineTtsConfig

	config.Model.Pocket.LmFlow =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx"
	config.Model.Pocket.LmMain =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx"
	config.Model.Pocket.Encoder =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx"
	config.Model.Pocket.Decoder =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx"
	config.Model.Pocket.TextConditioner =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx"
	config.Model.Pocket.VocabJson =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json"
	config.Model.Pocket.TokenScoresJson =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json"
	config.Model.Pocket.VoiceEmbeddingCacheCapacity = voiceEmbeddingCacheCapacity

	config.Model.NumThreads = 2
	config.Model.Debug = 0
	config.Model.Provider = "cpu"

	log.Println("Creating Offline TTS")
	tts := sherpa.NewOfflineTts(&config)
	if tts == nil {
		log.Fatal("Failed to create OfflineTts")
	}
	defer sherpa.DeleteOfflineTts(tts)

	wave := sherpa.ReadWave(referenceAudio)
	if wave == nil {
		log.Fatal("Failed to read reference wav:", referenceAudio)
	}

	var cfg sherpa.GenerationConfig
	cfg.ReferenceAudio = wave.Samples
	cfg.ReferenceSampleRate = wave.SampleRate

	// Build extra config with optional seed
	extraMap := map[string]interface{}{
		"max_reference_audio_len": 10,
		"temperature":             0.7,
	}
	if seed >= 0 {
		extraMap["seed"] = seed
	}
	extraBytes, _ := json.Marshal(extraMap)
	cfg.Extra = json.RawMessage(extraBytes)

	log.Println("Start generating")

	audio := tts.GenerateWithConfig(
		text,
		&cfg,
		func(samples []float32, progress float32) bool {
			log.Printf("Progress: %.3f%%, Number of samples: %d", progress*100, len(samples))
			// return false here if you want to cancel
			return true
		},
	)

	if audio == nil {
		log.Fatal("Generation failed")
	}

	if !audio.Save(outputFilename) {
		log.Fatal("Failed to save wav")
	}

	log.Println("Saved to:", outputFilename)
}


================================================
FILE: go-api-examples/zero-shot-pocket-tts/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

go mod tidy
go build

./zero-shot-pocket-tts \
  --reference-audio ./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav \
  --output-filename ./generated-bria.wav \
  --text "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."


================================================
FILE: go-api-examples/zero-shot-pocket-tts-play/go.mod
================================================
module zero-shot-pocket-tts-play

go 1.17


================================================
FILE: go-api-examples/zero-shot-pocket-tts-play/main.go
================================================
package main

import (
	"encoding/binary"
	"encoding/json"
	"io"
	"log"
	"math"
	"os"
	"os/signal"
	"sync"
	"syscall"
	"time"

	oto "github.com/ebitengine/oto/v3"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	flag "github.com/spf13/pflag"
)

type pcmBuffer struct {
	mu       sync.Mutex
	queue    [][]byte
	finished bool
	started  chan struct{} // closed on first callback
	once     sync.Once
}

func newPCMBuffer() *pcmBuffer {
	return &pcmBuffer{
		started: make(chan struct{}),
	}
}

func (b *pcmBuffer) Push(p []byte) {
	b.once.Do(func() {
		close(b.started)
	})

	b.mu.Lock()
	b.queue = append(b.queue, p)
	b.mu.Unlock()
}

func (b *pcmBuffer) Finish() {
	b.once.Do(func() {
		close(b.started)
	})

	b.mu.Lock()
	b.finished = true
	b.mu.Unlock()
}

type pcmReader struct {
	buf  *pcmBuffer
	done chan struct{}
	once sync.Once
}

func (r *pcmReader) Read(p []byte) (int, error) {
	<-r.buf.started

	r.buf.mu.Lock()
	defer r.buf.mu.Unlock()

	// 2) Have audio
	if len(r.buf.queue) > 0 {
		chunk := r.buf.queue[0]
		n := copy(p, chunk)

		if n == len(chunk) {
			r.buf.queue = r.buf.queue[1:]
		} else {
			r.buf.queue[0] = chunk[n:]
		}
		return n, nil
	}

	// 3) Finished → EOF
	if r.buf.finished {
		r.once.Do(func() { close(r.done) })
		return 0, io.EOF
	}

	// 4) Gap → silence
	for i := range p {
		p[i] = 0
	}
	return len(p), nil
}

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	var referenceAudio string = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav"
	var outputFilename string = "./generated.wav"
	var voiceEmbeddingCacheCapacity int = 50
	var seed int = -1

	text := `Today as always, men fall into two groups: slaves and free men.
Whoever does not have two-thirds of his day for himself, is a slave,
whatever he may be: a statesman, a businessman, an official, or a scholar.`

	flag.StringVar(&referenceAudio, "reference-audio", referenceAudio, "Path to the reference audio")
	flag.StringVar(&text, "text", text, "Text to be synthesized")
	flag.StringVar(&outputFilename, "output-filename", outputFilename, "File to save the generated audio")
	flag.IntVar(&voiceEmbeddingCacheCapacity, "voice-embedding-cache-capacity", voiceEmbeddingCacheCapacity, "Voice embedding cache capacity (default: 50)")
	flag.IntVar(&seed, "seed", seed, "Random seed for reproducibility (default: -1, random)")
	flag.Parse()

	// ---------------- config ----------------
	var config sherpa.OfflineTtsConfig

	config.Model.Pocket.LmFlow =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx"
	config.Model.Pocket.LmMain =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx"
	config.Model.Pocket.Encoder =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx"
	config.Model.Pocket.Decoder =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx"
	config.Model.Pocket.TextConditioner =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx"
	config.Model.Pocket.VocabJson =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json"
	config.Model.Pocket.TokenScoresJson =
		"./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json"
	config.Model.Pocket.VoiceEmbeddingCacheCapacity = voiceEmbeddingCacheCapacity

	config.Model.NumThreads = 2
	config.Model.Debug = 0
	config.Model.Provider = "cpu"

	log.Println("Creating Offline TTS")
	tts := sherpa.NewOfflineTts(&config)
	if tts == nil {
		log.Fatal("Failed to create OfflineTts")
	}
	defer sherpa.DeleteOfflineTts(tts)

	wave := sherpa.ReadWave(referenceAudio)
	if wave == nil {
		log.Fatal("Failed to read reference wav:", referenceAudio)
	}

	var cfg sherpa.GenerationConfig
	cfg.ReferenceAudio = wave.Samples
	cfg.ReferenceSampleRate = wave.SampleRate

	// Build extra config with optional seed
	extraMap := map[string]interface{}{
		"max_reference_audio_len": 10,
		"temperature":             0.7,
	}
	if seed >= 0 {
		extraMap["seed"] = seed
	}
	extraBytes, _ := json.Marshal(extraMap)
	cfg.Extra = json.RawMessage(extraBytes)

	log.Println("Start generating")

	ctx, ready, err := oto.NewContext(&oto.NewContextOptions{
		SampleRate:   tts.SampleRate(),
		ChannelCount: 1,
		Format:       oto.FormatSignedInt16LE,
	})
	if err != nil {
		log.Fatal(err)
	}
	<-ready

	pcmBuf := newPCMBuffer()

	reader := &pcmReader{
		buf:  pcmBuf,
		done: make(chan struct{}),
	}

	player := ctx.NewPlayer(reader)
	player.Play()
	defer player.Close()

	stop := make(chan os.Signal, 1)
	signal.Notify(stop, syscall.SIGINT, syscall.SIGTERM)

	var generated *sherpa.GeneratedAudio

	start := time.Now()

	go func() {
		defer pcmBuf.Finish()

		generated = tts.GenerateWithConfig(
			text,
			&cfg,
			func(samples []float32, progress float32) bool {
				log.Printf("Progress: %.1f%%", progress*100)

				buf := make([]byte, len(samples)*2)
				for i, s := range samples {
					if s > 1 {
						s = 1
					} else if s < -1 {
						s = -1
					}
					v := int16(math.Round(float64(s * 32767)))
					binary.LittleEndian.PutUint16(buf[i*2:], uint16(v))
				}

				pcmBuf.Push(buf)
				return true
			},
		)

		log.Println("TTS generation finished in", time.Since(start))
	}()

	select {
	case <-stop:
		log.Println("Interrupted")
	case <-reader.done:
		log.Println("Playback finished")
	}

	if generated != nil {
		if ok := generated.Save(outputFilename); !ok {
			log.Println("Failed to save audio")
		} else {
			log.Println("Saved generated audio to", outputFilename)
		}
	}

	// let remaining audio drain
	time.Sleep(800 * time.Millisecond)

	log.Println("Done")
}


================================================
FILE: go-api-examples/zero-shot-pocket-tts-play/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

go mod tidy
go build

./zero-shot-pocket-tts-play \
  --reference-audio ./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav \
  --output-filename ./generated-bria.wav \
  --text "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."


================================================
FILE: go-api-examples/zero-shot-zipvoice-tts/go.mod
================================================
module zero-shot-zipvoice-tts

go 1.17


================================================
FILE: go-api-examples/zero-shot-zipvoice-tts/main.go
================================================
package main

import (
	"encoding/json"
	"log"

	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	flag "github.com/spf13/pflag"
)

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	var referenceAudio string = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav"
	var referenceText string = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系."
	var outputFilename string = "./generated.wav"
	var numSteps int = 4
	var minCharInSentence int = 10

	text := "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

	flag.StringVar(&referenceAudio, "reference-audio", referenceAudio, "Path to the reference audio")
	flag.StringVar(&referenceText, "reference-text", referenceText, "Reference text for the reference audio")
	flag.StringVar(&text, "text", text, "Text to be synthesized")
	flag.StringVar(&outputFilename, "output-filename", outputFilename, "File to save the generated audio")
	flag.IntVar(&numSteps, "num-steps", numSteps, "Number of ZipVoice flow-matching steps")
	flag.IntVar(&minCharInSentence, "min-char-in-sentence", minCharInSentence, "Minimum characters in a sentence chunk")
	flag.Parse()

	var config sherpa.OfflineTtsConfig
	config.Model.Zipvoice.Encoder =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx"
	config.Model.Zipvoice.Decoder =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx"
	config.Model.Zipvoice.DataDir =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data"
	config.Model.Zipvoice.Lexicon =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt"
	config.Model.Zipvoice.Tokens =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt"
	config.Model.Zipvoice.Vocoder = "./vocos_24khz.onnx"

	config.Model.NumThreads = 2
	config.Model.Debug = 0
	config.Model.Provider = "cpu"

	log.Println("Creating Offline TTS")
	tts := sherpa.NewOfflineTts(&config)
	if tts == nil {
		log.Fatal("Failed to create OfflineTts")
	}
	defer sherpa.DeleteOfflineTts(tts)

	wave := sherpa.ReadWave(referenceAudio)
	if wave == nil {
		log.Fatal("Failed to read reference wav:", referenceAudio)
	}

	var cfg sherpa.GenerationConfig
	cfg.ReferenceAudio = wave.Samples
	cfg.ReferenceSampleRate = wave.SampleRate
	cfg.ReferenceText = referenceText
	cfg.NumSteps = numSteps

	extraMap := map[string]interface{}{
		"min_char_in_sentence": minCharInSentence,
	}
	extraBytes, err := json.Marshal(extraMap)
	if err != nil {
		log.Fatalf("Failed to marshal generation config extra: %v", err)
	}
	cfg.Extra = json.RawMessage(extraBytes)

	log.Println("Start generating")

	audio := tts.GenerateWithConfig(
		text,
		&cfg,
		func(samples []float32, progress float32) bool {
			log.Printf("Progress: %.3f%%, Number of samples: %d", progress*100, len(samples))
			// return false here if you want to cancel
			return true
		},
	)

	if audio == nil {
		log.Fatal("Generation failed")
	}

	if !audio.Save(outputFilename) {
		log.Fatal("Failed to save wav")
	}

	log.Println("Saved to:", outputFilename)
}


================================================
FILE: go-api-examples/zero-shot-zipvoice-tts/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

go mod tidy
go build

./zero-shot-zipvoice-tts \
  --reference-audio ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav \
  --reference-text "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系." \
  --num-steps 4 \
  --min-char-in-sentence 10 \
  --output-filename ./test-zipvoice.wav \
  --text "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."


================================================
FILE: go-api-examples/zero-shot-zipvoice-tts-play/go.mod
================================================
module zero-shot-zipvoice-tts-play

go 1.17


================================================
FILE: go-api-examples/zero-shot-zipvoice-tts-play/main.go
================================================
package main

import (
	"encoding/binary"
	"encoding/json"
	"io"
	"log"
	"math"
	"os"
	"os/signal"
	"sync"
	"syscall"
	"time"

	oto "github.com/ebitengine/oto/v3"
	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
	flag "github.com/spf13/pflag"
)

type pcmBuffer struct {
	mu       sync.Mutex
	queue    [][]byte
	finished bool
	started  chan struct{}
	once     sync.Once
}

func newPCMBuffer() *pcmBuffer {
	return &pcmBuffer{
		started: make(chan struct{}),
	}
}

func (b *pcmBuffer) Push(p []byte) {
	b.once.Do(func() {
		close(b.started)
	})

	b.mu.Lock()
	b.queue = append(b.queue, p)
	b.mu.Unlock()
}

func (b *pcmBuffer) Finish() {
	b.once.Do(func() {
		close(b.started)
	})

	b.mu.Lock()
	b.finished = true
	b.mu.Unlock()
}

type pcmReader struct {
	buf  *pcmBuffer
	done chan struct{}
	once sync.Once
}

func (r *pcmReader) Read(p []byte) (int, error) {
	<-r.buf.started

	r.buf.mu.Lock()
	defer r.buf.mu.Unlock()

	if len(r.buf.queue) > 0 {
		chunk := r.buf.queue[0]
		n := copy(p, chunk)

		if n == len(chunk) {
			r.buf.queue = r.buf.queue[1:]
		} else {
			r.buf.queue[0] = chunk[n:]
		}
		return n, nil
	}

	if r.buf.finished {
		r.once.Do(func() { close(r.done) })
		return 0, io.EOF
	}

	for i := range p {
		p[i] = 0
	}
	return len(p), nil
}

func main() {
	log.SetFlags(log.LstdFlags | log.Lmicroseconds)

	var referenceAudio string = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav"
	var referenceText string = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系."
	var outputFilename string = "./generated.wav"
	var numSteps int = 4
	var minCharInSentence int = 30

	text := "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

	flag.StringVar(&referenceAudio, "reference-audio", referenceAudio, "Path to the reference audio")
	flag.StringVar(&referenceText, "reference-text", referenceText, "Reference text for the reference audio")
	flag.StringVar(&text, "text", text, "Text to be synthesized")
	flag.StringVar(&outputFilename, "output-filename", outputFilename, "File to save the generated audio")
	flag.IntVar(&numSteps, "num-steps", numSteps, "Number of ZipVoice flow-matching steps")
	flag.IntVar(&minCharInSentence, "min-char-in-sentence", minCharInSentence, "Minimum characters in a sentence chunk")
	flag.Parse()

	var config sherpa.OfflineTtsConfig
	config.Model.Zipvoice.Encoder =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx"
	config.Model.Zipvoice.Decoder =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx"
	config.Model.Zipvoice.DataDir =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data"
	config.Model.Zipvoice.Lexicon =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt"
	config.Model.Zipvoice.Tokens =
		"./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt"
	config.Model.Zipvoice.Vocoder = "./vocos_24khz.onnx"

	config.Model.NumThreads = 2
	config.Model.Debug = 0
	config.Model.Provider = "cpu"

	log.Println("Creating Offline TTS")
	tts := sherpa.NewOfflineTts(&config)
	if tts == nil {
		log.Fatal("Failed to create OfflineTts")
	}
	defer sherpa.DeleteOfflineTts(tts)

	wave := sherpa.ReadWave(referenceAudio)
	if wave == nil {
		log.Fatal("Failed to read reference wav:", referenceAudio)
	}

	var cfg sherpa.GenerationConfig
	cfg.ReferenceAudio = wave.Samples
	cfg.ReferenceSampleRate = wave.SampleRate
	cfg.ReferenceText = referenceText
	cfg.NumSteps = numSteps

	extraMap := map[string]interface{}{
		"min_char_in_sentence": minCharInSentence,
	}
	extraBytes, err := json.Marshal(extraMap)
	if err != nil {
		log.Fatalf("Failed to marshal generation config extra: %v", err)
	}
	cfg.Extra = json.RawMessage(extraBytes)

	log.Println("Start generating")

	ctx, ready, err := oto.NewContext(&oto.NewContextOptions{
		SampleRate:   tts.SampleRate(),
		ChannelCount: 1,
		Format:       oto.FormatSignedInt16LE,
	})
	if err != nil {
		log.Fatal(err)
	}
	<-ready

	pcmBuf := newPCMBuffer()
	reader := &pcmReader{
		buf:  pcmBuf,
		done: make(chan struct{}),
	}

	player := ctx.NewPlayer(reader)
	player.Play()
	defer player.Close()

	stop := make(chan os.Signal, 1)
	signal.Notify(stop, syscall.SIGINT, syscall.SIGTERM)

	var generated *sherpa.GeneratedAudio
	start := time.Now()

	go func() {
		defer pcmBuf.Finish()

		generated = tts.GenerateWithConfig(
			text,
			&cfg,
			func(samples []float32, progress float32) bool {
				log.Printf("Progress: %.1f%%", progress*100)

				buf := make([]byte, len(samples)*2)
				for i, s := range samples {
					if s > 1 {
						s = 1
					} else if s < -1 {
						s = -1
					}
					v := int16(math.Round(float64(s * 32767)))
					binary.LittleEndian.PutUint16(buf[i*2:], uint16(v))
				}

				pcmBuf.Push(buf)
				return true
			},
		)

		log.Println("TTS generation finished in", time.Since(start))
	}()

	select {
	case <-stop:
		log.Println("Interrupted")
	case <-reader.done:
		log.Println("Playback finished")
	}

	if generated != nil {
		if ok := generated.Save(outputFilename); !ok {
			log.Println("Failed to save audio")
		} else {
			log.Println("Saved generated audio to", outputFilename)
		}
	}

	time.Sleep(800 * time.Millisecond)

	log.Println("Done")
}


================================================
FILE: go-api-examples/zero-shot-zipvoice-tts-play/run.sh
================================================
#!/usr/bin/env bash

set -ex

export CGO_ENABLED=1

if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

go mod tidy
go build

./zero-shot-zipvoice-tts-play \
  --reference-audio ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav \
  --reference-text "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系." \
  --num-steps 4 \
  --min-char-in-sentence 10 \
  --output-filename ./generated-leijun.wav \
  --text "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."


================================================
FILE: harmony-os/.gitignore
================================================
!build-profile.json5
*.har


================================================
FILE: harmony-os/README.md
================================================
# Introduction

- [./SherpaOnnxHar](./SherpaOnnxHar) It is for building `sherpa_onnx.har`.
  If you don't need to change the C++ or Typescript code of sherpa-onnx, then
  you can download pre-built `sherpa_onnx.har` from us. Just run `ohpm install sherpa_onnx`.
  Please refer to our [doc](https://k2-fsa.github.io/sherpa/onnx/harmony-os/how-to-build-har.html)
  if you want to build `sherpa-onnx` from source.

- [./SherpaOnnxSpeakerDiarization](./SherpaOnnxSpeakerDiarization) It shows how
  to run on-device speaker diarization.

- [./SherpaOnnxSpeakerIdentification](./SherpaOnnxSpeakerIdentification) It shows how to use
  speaker embedding models for on-device speaker identification.

- [./SherpaOnnxStreamingAsr](./SherpaOnnxStreamingAsr) It shows how to use
  streaming ASR models for real-time on-device speech recognition.

- [./SherpaOnnxTts](./SherpaOnnxTts) It shows how to run on-device text-to-speech.
  Please see the doc at <https://k2-fsa.github.io/sherpa/onnx/harmony-os/tts.html>

- [./SherpaOnnxVadAsr](./SherpaOnnxVadAsr) It shows how to use
  VAD + Non-streaming ASR for speech recognition.
  Please see the doc at <https://k2-fsa.github.io/sherpa/onnx/harmony-os/vad-asr.html>


================================================
FILE: harmony-os/SherpaOnnxHar/.gitignore
================================================
/node_modules
/oh_modules
/local.properties
/.idea
**/build
/.hvigor
.cxx
/.clangd
/.clang-format
/.clang-tidy
**/.test
/.appanalyzer

================================================
FILE: harmony-os/SherpaOnnxHar/AppScope/app.json5
================================================
{
  "app": {
    "bundleName": "com.k2fsa.sherpa.onnx",
    "vendor": "example",
    "versionCode": 1000000,
    "versionName": "1.0.0",
    "icon": "$media:app_icon",
    "label": "$string:app_name"
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/AppScope/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "app_name",
      "value": "SherpaOnnxHar"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxHar/README.md
================================================
# Introduction

How to build `sherpa_onnx.har` from the command line
----------------------------------------------------

Please see https://k2-fsa.github.io/sherpa/onnx/harmony-os/how-to-build-har.html


================================================
FILE: harmony-os/SherpaOnnxHar/build-profile.json5
================================================
{
  "app": {
    "signingConfigs": [],
    "products": [
      {
        "name": "default",
        "signingConfig": "default",
        "compatibleSdkVersion": "4.0.0(10)",
        "runtimeOS": "HarmonyOS",
        "buildOption": {
          "strictMode": {
            "caseSensitiveCheck": true,
          }
        }
      }
    ],
    "buildModeSet": [
      {
        "name": "debug",
      },
      {
        "name": "release"
      }
    ]
  },
  "modules": [
    {
      "name": "entry",
      "srcPath": "./entry",
      "targets": [
        {
          "name": "default",
          "applyToProducts": [
            "default"
          ]
        }
      ]
    },
    {
      "name": "sherpa_onnx",
      "srcPath": "./sherpa_onnx",
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxHar/code-linter.json5
================================================
{
  "files": [
    "**/*.ets"
  ],
  "ignore": [
    "**/src/ohosTest/**/*",
    "**/src/test/**/*",
    "**/src/mock/**/*",
    "**/node_modules/**/*",
    "**/oh_modules/**/*",
    "**/build/**/*",
    "**/.preview/**/*"
  ],
  "ruleSet": [
    "plugin:@performance/recommended",
    "plugin:@typescript-eslint/recommended"
  ],
  "rules": {
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/.gitignore
================================================
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test

================================================
FILE: harmony-os/SherpaOnnxHar/entry/build-profile.json5
================================================
{
  "apiType": "stageMode",
  "buildOption": {
  },
  "buildOptionSet": [
    {
      "name": "release",
      "arkOptions": {
        "obfuscation": {
          "ruleOptions": {
            "enable": false,
            "files": [
              "./obfuscation-rules.txt"
            ]
          }
        }
      }
    },
  ],
  "targets": [
    {
      "name": "default"
    },
    {
      "name": "ohosTest",
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/hvigorfile.ts
================================================
import { hapTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxHar/entry/obfuscation-rules.txt
================================================
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5

# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file

# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope

-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation

================================================
FILE: harmony-os/SherpaOnnxHar/entry/oh-package.json5
================================================
{
  "name": "entry",
  "version": "1.0.0",
  "description": "Please describe the basic information.",
  "main": "",
  "author": "",
  "license": "",
  "dependencies": {}
}


================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/ets/entryability/EntryAbility.ets
================================================
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
import hilog from '@ohos.hilog';
import UIAbility from '@ohos.app.ability.UIAbility';
import Want from '@ohos.app.ability.Want';
import window from '@ohos.window';

export default class EntryAbility extends UIAbility {
  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
  }

  onDestroy(): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
  }

  onWindowStageCreate(windowStage: window.WindowStage): void {
    // Main window is created, set main page for this ability
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');

    windowStage.loadContent('pages/Index', (err) => {
      if (err.code) {
        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
        return;
      }
      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
    });
  }

  onWindowStageDestroy(): void {
    // Main window is destroyed, release UI related resources
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
  }

  onForeground(): void {
    // Ability has brought to foreground
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
  }

  onBackground(): void {
    // Ability has back to background
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
================================================
import hilog from '@ohos.hilog';
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';

export default class EntryBackupAbility extends BackupExtensionAbility {
  async onBackup() {
    hilog.info(0x0000, 'testTag', 'onBackup ok');
  }

  async onRestore(bundleVersion: BundleVersion) {
    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/ets/pages/Index.ets
================================================
@Entry
@Component
struct Index {
  @State message: string = 'Hello World';

  build() {
    Row() {
      Column() {
        Text(this.message)
          .fontSize(50)
          .fontWeight(FontWeight.Bold)
      }
      .width('100%')
    }
    .height('100%')
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/module.json5
================================================
{
  "module": {
    "name": "entry",
    "type": "entry",
    "description": "$string:module_desc",
    "mainElement": "EntryAbility",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false,
    "pages": "$profile:main_pages",
    "abilities": [
      {
        "name": "EntryAbility",
        "srcEntry": "./ets/entryability/EntryAbility.ets",
        "description": "$string:EntryAbility_desc",
        "icon": "$media:layered_image",
        "label": "$string:EntryAbility_label",
        "startWindowIcon": "$media:startIcon",
        "startWindowBackground": "$color:start_window_background",
        "exported": true,
        "skills": [
          {
            "entities": [
              "entity.system.home"
            ],
            "actions": [
              "action.system.home"
            ]
          }
        ]
      }
    ],
    "extensionAbilities": [
      {
        "name": "EntryBackupAbility",
        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
        "type": "backup",
        "exported": false,
        "metadata": [
          {
            "name": "ohos.extension.backup",
            "resource": "$profile:backup_config"
          }
        ],
      }
    ]
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/color.json
================================================
{
  "color": [
    {
      "name": "start_window_background",
      "value": "#FFFFFF"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "module description"
    },
    {
      "name": "EntryAbility_desc",
      "value": "description"
    },
    {
      "name": "EntryAbility_label",
      "value": "label"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/resources/base/media/layered_image.json
================================================
{
  "layered-image":
  {
    "background" : "$media:background",
    "foreground" : "$media:foreground"
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/backup_config.json
================================================
{
  "allowToBackupRestore": true
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/resources/base/profile/main_pages.json
================================================
{
  "src": [
    "pages/Index"
  ]
}


================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/resources/en_US/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "module description"
    },
    {
      "name": "EntryAbility_desc",
      "value": "description"
    },
    {
      "name": "EntryAbility_label",
      "value": "label"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/main/resources/zh_CN/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "模块描述"
    },
    {
      "name": "EntryAbility_desc",
      "value": "description"
    },
    {
      "name": "EntryAbility_label",
      "value": "label"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/Ability.test.ets
================================================
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function abilityTest() {
  describe('ActsAbilityTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    })
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    })
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    })
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    })
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    })
  })
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/ohosTest/ets/test/List.test.ets
================================================
import abilityTest from './Ability.test';

export default function testsuite() {
  abilityTest();
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/ohosTest/module.json5
================================================
{
  "module": {
    "name": "entry_test",
    "type": "feature",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/test/List.test.ets
================================================
import localUnitTest from './LocalUnit.test';

export default function testsuite() {
  localUnitTest();
}

================================================
FILE: harmony-os/SherpaOnnxHar/entry/src/test/LocalUnit.test.ets
================================================
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function localUnitTest() {
  describe('localUnitTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    });
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    });
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    });
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    });
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    });
  });
}

================================================
FILE: harmony-os/SherpaOnnxHar/hvigor/hvigor-config.json5
================================================
{
  "modelVersion": "5.0.0",
  "dependencies": {
  },
  "execution": {
    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
  },
  "logging": {
    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
  },
  "debugging": {
    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
  },
  "nodeOptions": {
    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/hvigorfile.ts
================================================
import { appTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxHar/notes.md
================================================
# Notes

## How to publish a package

Please see
 - <https://ohpm.openharmony.cn/#/cn/help/publishrequirefile>
 - <https://ohpm.openharmony.cn/#/cn/help/createandpublish>
 - <https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/ide-har-publish-V5>

## How to sign the HAP file from commandline

Please see
<https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/ide-command-line-building-app-V5>


================================================
FILE: harmony-os/SherpaOnnxHar/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
  },
  "packages": {
    "@ohos/hypium@1.0.19": {
      "name": "@ohos/hypium",
      "version": "1.0.19",
      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
      "registryType": "ohpm"
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/oh-package.json5
================================================
{
  "modelVersion": "5.0.0",
  "description": "Please describe the basic information.",
  "dependencies": {
  },
  "devDependencies": {
    "@ohos/hypium": "1.0.19"
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/release.sh
================================================
#!/usr/bin/env bash
set -ex

export PATH=/Users/fangjun/software/command-line-tools/bin:$PATH

cp -v ../../CHANGELOG.md ./sherpa_onnx

hvigorw clean --no-daemon
hvigorw --mode module -p product=default -p module=sherpa_onnx@default assembleHar --analyze=normal --parallel --incremental --no-daemon

ohpm publish ./sherpa_onnx/build/default/outputs/default/sherpa_onnx.har


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/.gitignore
================================================
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets
================================================
/**
 * Use these variables when you tailor your ArkTS code. They must be of the const type.
 */
export const HAR_VERSION = '1.12.31';
export const BUILD_MODE_NAME = 'debug';
export const DEBUG = true;
export const TARGET_NAME = 'default';

/**
 * BuildProfile Class is used only for compatibility purposes.
 */
export default class BuildProfile { 
	static readonly HAR_VERSION = HAR_VERSION;
	static readonly BUILD_MODE_NAME = BUILD_MODE_NAME;
	static readonly DEBUG = DEBUG;
	static readonly TARGET_NAME = TARGET_NAME;
}

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
================================================
export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";

export { CircularBuffer, SileroVadConfig, TenVadConfig, SpeechSegment, Vad, VadConfig, } from './src/main/ets/components/Vad';


export { Samples,
  OfflineStream,
  FeatureConfig,
  HomophoneReplacerConfig,
  OfflineCanaryModelConfig,
  OfflineDolphinModelConfig,
  OfflineFireRedAsrCtcModelConfig,
  OfflineFireRedAsrModelConfig,
  OfflineFunASRNanoModelConfig,
  OfflineMedAsrCtcModelConfig,
  OfflineOmnilingualAsrCtcModelConfig,
  OfflineTransducerModelConfig,
  OfflineParaformerModelConfig,
  OfflineNemoEncDecCtcModelConfig,
  OfflineWhisperModelConfig,
  OfflineTdnnModelConfig,
  OfflineMoonshineModelConfig,
  OfflineSenseVoiceModelConfig,
  OfflineWenetCtcModelConfig,
  OfflineZipformerCtcModelConfig,
  OfflineModelConfig,
  OfflineLMConfig,
  OfflineRecognizerConfig,
  OfflineRecognizerResult,
  OfflineRecognizer,
} from './src/main/ets/components/NonStreamingAsr';

export { OnlineStream,
  OnlineNemoCtcModelConfig,
  OnlineParaformerModelConfig,
  OnlineToneCtcModelConfig,
  OnlineTransducerModelConfig,
  OnlineZipformer2CtcModelConfig,
  OnlineModelConfig,
  OnlineCtcFstDecoderConfig,
  OnlineRecognizerConfig,
  OnlineRecognizerResult,
  OnlineRecognizer,
} from './src/main/ets/components/StreamingAsr';

export { OfflineTtsKittenModelConfig,
  OfflineTtsKokoroModelConfig,
  OfflineTtsMatchaModelConfig,
  OfflineTtsPocketModelConfig,
  OfflineTtsSupertonicModelConfig,
  OfflineTtsVitsModelConfig,
  OfflineTtsZipvoiceModelConfig,
  OfflineTtsModelConfig,
  OfflineTtsConfig,
  OfflineTts,
  TtsOutput,
  TtsGenerationConfig,
  TtsInput,
  TtsInputWithConfig,
} from './src/main/ets/components/NonStreamingTts';

export { OfflinePunctuationModelConfig,
  OfflinePunctuationConfig,
  OfflinePunctuation,
} from './src/main/ets/components/OfflinePunctuation';

export { OnlinePunctuationModelConfig,
  OnlinePunctuationConfig,
  OnlinePunctuation,
} from './src/main/ets/components/OnlinePunctuation';

export { SpeakerEmbeddingExtractorConfig,
  SpeakerEmbeddingExtractor,
  SpeakerEmbeddingManager,
} from './src/main/ets/components/SpeakerIdentification';

export { OfflineSpeakerSegmentationPyannoteModelConfig,
  OfflineSpeakerSegmentationModelConfig,
  OfflineSpeakerDiarizationConfig,
  OfflineSpeakerDiarizationSegment,
  OfflineSpeakerDiarization,
  FastClusteringConfig,
} from './src/main/ets/components/NonStreamingSpeakerDiarization';

export { KeywordSpotterConfig,
  KeywordSpotterResult,
  KeywordSpotter,
} from './src/main/ets/components/KeywordSpotting';


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/README.md
================================================
# Introduction

[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is one of the deployment
frameworks of [Next-gen Kaldi](https://github.com/k2-fsa).

It supports speech-to-text, text-to-speech, speaker diarization, and VAD using
onnxruntime without Internet connection.

It also supports embedded systems, Android, iOS, HarmonyOS,
Raspberry Pi, RISC-V, x86_64 servers, websocket server/client,
C/C++, Python, Kotlin, C#, Go, NodeJS, Java, Swift, Dart, JavaScript,
Flutter, Object Pascal, Lazarus, Rust, etc.


# Installation

To use `sherpa-onnx` in your project, please either use

```
ohpm install sherpa_onnx
```
or update your `oh-package.json5` to include the following:

```
  "dependencies": {
    "sherpa_onnx": "1.12.31",
  },
```

Note that we recommend always using the latest version.

# Examples

| Demo | URL | Description|
|------|-----|------------|
|SherpaOnnxStreamingAsr|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxStreamingAsr)|On-device real-time/streaming speech recognition with Next-gen Kaldi|
|SherpaOnnxVadAsr|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxVadAsr)|It shows how to use VAD with a non-streaming ASR model for on-device speech recognition without accessing the network |
|SherpaOnnxTts|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxTts)|It shows how to use Next-gen Kaldi for on-device text-to-speech (TTS, i.e., speech synthesis)|
|SherpaOnnxSpeakerDiarization|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxSpeakerDiarization)|On-device speaker diarization with Next-gen Kaldi|
|SherpaOnnxSpeakerIdentification|[Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxSpeakerIdentification)|On-device speaker identification with Next-gen Kaldi|

# Documentation

If you have any issues, please either look at our doc at
<https://k2-fsa.github.io/sherpa/onnx/> or create an issue at
<https://github.com/k2-fsa/sherpa-onnx/issues>


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/build-profile.json5
================================================
{
  "apiType": "stageMode",
  "buildOption": {
    "externalNativeOptions": {
      "path": "./src/main/cpp/CMakeLists.txt",
      "arguments": "",
      "cppFlags": "-std=c++17",
      "abiFilters": [
        "arm64-v8a",
        "x86_64",
      ],
    },
  },
  "buildOptionSet": [
    {
      "name": "release",
      "arkOptions": {
        "obfuscation": {
          "ruleOptions": {
            "enable": false,
            "files": [
              "./obfuscation-rules.txt"
            ]
          },
          "consumerFiles": [
            "./consumer-rules.txt"
          ]
        }
      },
      "nativeLib": {
        "debugSymbol": {
          "strip": true,
          "exclude": []
        }
      }
    },
  ],
  "targets": [
    {
      "name": "default"
    },
    {
      "name": "ohosTest"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/consumer-rules.txt
================================================


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/hvigorfile.ts
================================================
import { harTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: harTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/obfuscation-rules.txt
================================================
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5

# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file

# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope

-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx"
  },
  "packages": {
    "libsherpa_onnx.so@src/main/cpp/types/libsherpa_onnx": {
      "name": "libsherpa_onnx.so",
      "version": "1.0.0",
      "resolved": "src/main/cpp/types/libsherpa_onnx",
      "registryType": "local"
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/oh-package.json5
================================================
{
  "name": "sherpa_onnx",
  "version": "1.12.31",
  "description": "On-device speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without Internet connection",
  "main": "Index.ets",
  "author": "The next-gen Kaldi team",
  "license": "Apache-2.0",
  "homepage": "https://github.com/k2-fsa/sherpa-onnx",
  "repository": "https://github.com/k2-fsa/sherpa-onnx/tree/master/harmony-os/SherpaOnnxHar",
  "dependencies": {
    "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
  },
  "keywords": [
    "语音识别",
    "语音合成",
    "说话人日志",
    "新一代Kaldi",
    "不联网",
    "本地",
    "tts",
    "asr",
    "privacy",
    "open-source",
  ],
  "bugs": {
    "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
  },
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt
================================================
# the minimum version of CMake.
cmake_minimum_required(VERSION 3.13.0)
project(myNpmLib)

if (NOT CMAKE_CXX_STANDARD)
  set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to use")
endif()

# Disable warning about
#
# "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is
#  not set.
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
  cmake_policy(SET CMP0135 NEW)
endif()

set(NATIVERENDER_ROOT_PATH ${CMAKE_CURRENT_SOURCE_DIR})

if(DEFINED PACKAGE_FIND_FILE)
    include(${PACKAGE_FIND_FILE})
endif()

include_directories(${NATIVERENDER_ROOT_PATH}
                    ${NATIVERENDER_ROOT_PATH}/include)

include(FetchContent)
FetchContent_Declare(node_addon_api
    GIT_REPOSITORY "https://github.com/nodejs/node-addon-api.git"
    GIT_TAG c679f6f4c9dc6bf9fc0d99cbe5982bd24a5e2c7b
    PATCH_COMMAND git checkout . && git apply --ignore-whitespace "${CMAKE_CURRENT_LIST_DIR}/my-patch.diff"
)
FetchContent_MakeAvailable(node_addon_api)
FetchContent_GetProperties(node_addon_api)
if(NOT node_addon_api_POPULATED)
    message(STATUS "Downloading node-addon-api from")
    FetchContent_Populate(node_addon_api)
endif()

message(STATUS "node-addon-api is downloaded to ${node_addon_api_SOURCE_DIR}")
include_directories(${node_addon_api_SOURCE_DIR})

add_library(sherpa_onnx SHARED
  audio-tagging.cc
  keyword-spotting.cc
  non-streaming-asr.cc
  non-streaming-speaker-diarization.cc
  non-streaming-speech-denoiser.cc
  non-streaming-tts.cc
  offline-punctuation.cc
  online-punctuation.cc
  streaming-speech-denoiser.cc
  sherpa-onnx-node-addon-api.cc
  speaker-identification.cc
  spoken-language-identification.cc
  streaming-asr.cc
  utils.cc
  vad.cc
  version.cc
  wave-reader.cc
  wave-writer.cc
)

add_library(sherpa_onnx_c_api SHARED IMPORTED)
set_target_properties(sherpa_onnx_c_api
    PROPERTIES
    IMPORTED_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/libs/${OHOS_ARCH}/libsherpa-onnx-c-api.so)

add_library(onnxruntime SHARED IMPORTED)
set_target_properties(onnxruntime
    PROPERTIES
    IMPORTED_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/libs/${OHOS_ARCH}/libonnxruntime.so)


target_link_libraries(sherpa_onnx PUBLIC libace_napi.z.so
 libhilog_ndk.z.so # for hilog
 librawfile.z.so
 sherpa_onnx_c_api onnxruntime
)


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc
================================================
// scripts/node-addon-api/src/audio-tagging.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <sstream>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static SherpaOnnxOfflineZipformerAudioTaggingModelConfig
GetAudioTaggingZipformerModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineZipformerAudioTaggingModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("zipformer") || !obj.Get("zipformer").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("zipformer").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxAudioTaggingModelConfig GetAudioTaggingModelConfig(
    Napi::Object obj) {
  SherpaOnnxAudioTaggingModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("model").As<Napi::Object>();
  c.zipformer = GetAudioTaggingZipformerModelConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_STR(ced, ced);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }
  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  return c;
}

static Napi::External<SherpaOnnxAudioTagging> CreateAudioTaggingWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "You should pass an object as the only argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxAudioTaggingConfig c;
  memset(&c, 0, sizeof(c));
  c.model = GetAudioTaggingModelConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_STR(labels, labels);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(top_k, topK);

  const SherpaOnnxAudioTagging *at = SherpaOnnxCreateAudioTagging(&c);

  SHERPA_ONNX_DELETE_C_STR(c.model.zipformer.model);
  SHERPA_ONNX_DELETE_C_STR(c.model.ced);
  SHERPA_ONNX_DELETE_C_STR(c.model.provider);
  SHERPA_ONNX_DELETE_C_STR(c.labels);

  if (!at) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxAudioTagging>::New(
      env, const_cast<SherpaOnnxAudioTagging *>(at),
      [](Napi::Env env, SherpaOnnxAudioTagging *at) {
        SherpaOnnxDestroyAudioTagging(at);
      });
}

static Napi::External<SherpaOnnxOfflineStream>
AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "You should pass an audio tagging pointer as the only argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxAudioTagging *at =
      info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxAudioTaggingCreateOfflineStream(at);

  return Napi::External<SherpaOnnxOfflineStream>::New(
      env, const_cast<SherpaOnnxOfflineStream *>(stream),
      [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
        SherpaOnnxDestroyOfflineStream(stream);
      });
}

static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 3) {
    std::ostringstream os;
    os << "Expect only 3 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "You should pass an audio tagging pointer as the first argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(
        env, "You should pass an offline stream pointer as the second argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[2].IsNumber()) {
    Napi::TypeError::New(env,
                         "You should pass an integer as the third argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxAudioTagging *at =
      info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();

  const SherpaOnnxOfflineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();

  int32_t top_k = info[2].As<Napi::Number>().Int32Value();

  const SherpaOnnxAudioEvent *const *events =
      SherpaOnnxAudioTaggingCompute(at, stream, top_k);

  auto p = events;
  int32_t k = 0;
  while (p && *p) {
    ++k;
    ++p;
  }

  Napi::Array ans = Napi::Array::New(env, k);
  for (uint32_t i = 0; i != k; ++i) {
    Napi::Object obj = Napi::Object::New(env);
    obj.Set(Napi::String::New(env, "name"),
            Napi::String::New(env, events[i]->name));
    obj.Set(Napi::String::New(env, "index"),
            Napi::Number::New(env, events[i]->index));
    obj.Set(Napi::String::New(env, "prob"),
            Napi::Number::New(env, events[i]->prob));
    // ans[i] = obj; // see #2120
    ans.Set(i, obj);
  }

  SherpaOnnxAudioTaggingFreeResults(events);

  return ans;
}

void InitAudioTagging(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createAudioTagging"),
              Napi::Function::New(env, CreateAudioTaggingWrapper));

  exports.Set(Napi::String::New(env, "audioTaggingCreateOfflineStream"),
              Napi::Function::New(env, AudioTaggingCreateOfflineStreamWrapper));

  exports.Set(Napi::String::New(env, "audioTaggingCompute"),
              Napi::Function::New(env, AudioTaggingComputeWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/include/sherpa-onnx/c-api/README.md
================================================
# Node

[./c-api.h](./c-api.h) is a symbolic link to
https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/c-api/c-api.h

If you are using Windows, then you need to manually replace this file with
https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/c-api/c-api.h
since Windows does not support symbolic links.


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/keyword-spotting.cc
================================================
// scripts/node-addon-api/src/keyword-spotting.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <memory>
#include <sstream>
#include <string>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

// defined ./streaming-asr.cc
SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj);

// defined ./streaming-asr.cc
SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj);

static Napi::External<SherpaOnnxKeywordSpotter> CreateKeywordSpotterWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif
  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the argument")
        .ThrowAsJavaScriptException();

    return {};
  }

#if __OHOS__
  bool use_resource_manager =
      info.Length() == 2 && !info[1].IsUndefined() && !info[1].IsNull();
  if (use_resource_manager && !info[1].IsObject()) {
    Napi::TypeError::New(
        env, "You should pass a resource manager as the second argument.")
        .ThrowAsJavaScriptException();

    return {};
  }
#endif

  Napi::Object o = info[0].As<Napi::Object>();
  SherpaOnnxKeywordSpotterConfig c;
  memset(&c, 0, sizeof(c));
  c.feat_config = GetFeatureConfig(o);
  c.model_config = GetOnlineModelConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_trailing_blanks, numTrailingBlanks);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_score, keywordsScore);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(keywords_threshold, keywordsThreshold);
  SHERPA_ONNX_ASSIGN_ATTR_STR(keywords_file, keywordsFile);
  SHERPA_ONNX_ASSIGN_ATTR_STR(keywords_buf, keywordsBuf);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(keywords_buf_size, keywordsBufSize);

#if __OHOS__
  const SherpaOnnxKeywordSpotter *kws = nullptr;

  if (use_resource_manager) {
    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
            &OH_ResourceManager_ReleaseNativeResourceManager);

    kws = SherpaOnnxCreateKeywordSpotterOHOS(&c, mgr.get());
  } else {
    kws = SherpaOnnxCreateKeywordSpotter(&c);
  }
#else
  const SherpaOnnxKeywordSpotter *kws = SherpaOnnxCreateKeywordSpotter(&c);
#endif

  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
  SHERPA_ONNX_DELETE_C_STR(c.keywords_file);
  SHERPA_ONNX_DELETE_C_STR(c.keywords_buf);

  if (!kws) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxKeywordSpotter>::New(
      env, const_cast<SherpaOnnxKeywordSpotter *>(kws),
      [](Napi::Env env, SherpaOnnxKeywordSpotter *kws) {
        SherpaOnnxDestroyKeywordSpotter(kws);
      });
}

static Napi::External<SherpaOnnxOnlineStream> CreateKeywordStreamWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "You should pass a keyword spotter pointer as the only argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (info.Length() == 2 && !info[1].IsString()) {
    std::ostringstream os;
    os << "Argument 2 should be a string.";
    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
    return {};
  }

  const SherpaOnnxKeywordSpotter *kws =
      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();

  const SherpaOnnxOnlineStream *stream;
  if (info.Length() == 1) {
    stream = SherpaOnnxCreateKeywordStream(kws);
  } else {
    Napi::String js_keywords = info[1].As<Napi::String>();
    std::string keywords = js_keywords.Utf8Value();
    stream = SherpaOnnxCreateKeywordStreamWithKeywords(kws, keywords.c_str());
  }

  return Napi::External<SherpaOnnxOnlineStream>::New(
      env, const_cast<SherpaOnnxOnlineStream *>(stream),
      [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
        SherpaOnnxDestroyOnlineStream(stream);
      });
}

static Napi::Boolean IsKeywordStreamReadyWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxKeywordSpotter *kws =
      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  int32_t is_ready = SherpaOnnxIsKeywordStreamReady(kws, stream);

  return Napi::Boolean::New(env, is_ready);
}

static void DecodeKeywordStreamWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxKeywordSpotter *kws =
      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  SherpaOnnxDecodeKeywordStream(kws, stream);
}

static void ResetKeywordStreamWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxKeywordSpotter *kws =
      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  SherpaOnnxResetKeywordStream(kws, stream);
}

static Napi::String GetKeywordResultAsJsonWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a keyword spotter pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxKeywordSpotter *kws =
      info[0].As<Napi::External<SherpaOnnxKeywordSpotter>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  const char *json = SherpaOnnxGetKeywordResultAsJson(kws, stream);

  Napi::String s = Napi::String::New(env, json);

  SherpaOnnxFreeKeywordResultJson(json);

  return s;
}

void InitKeywordSpotting(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createKeywordSpotter"),
              Napi::Function::New(env, CreateKeywordSpotterWrapper));

  exports.Set(Napi::String::New(env, "createKeywordStream"),
              Napi::Function::New(env, CreateKeywordStreamWrapper));

  exports.Set(Napi::String::New(env, "isKeywordStreamReady"),
              Napi::Function::New(env, IsKeywordStreamReadyWrapper));

  exports.Set(Napi::String::New(env, "decodeKeywordStream"),
              Napi::Function::New(env, DecodeKeywordStreamWrapper));

  exports.Set(Napi::String::New(env, "resetKeywordStream"),
              Napi::Function::New(env, ResetKeywordStreamWrapper));

  exports.Set(Napi::String::New(env, "getKeywordResultAsJson"),
              Napi::Function::New(env, GetKeywordResultAsJsonWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/.gitignore
================================================
*.so


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/README.md
================================================
# Introduction

You need to get the following four `.so` files using

  - [build-ohos-arm64-v8a.sh](https://github.com/k2-fsa/sherpa-onnx/blob/master/build-ohos-arm64-v8a.sh)
  - [build-ohos-x86-64.sh](https://github.com/k2-fsa/sherpa-onnx/blob/master/build-ohos-x86-64.sh)

```
.
├── README.md
├── arm64-v8a
│   ├── libonnxruntime.so
│   └── libsherpa-onnx-c-api.so
└── x86_64
    ├── libonnxruntime.so
    └── libsherpa-onnx-c-api.so
```


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/arm64-v8a/.gitkeep
================================================


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/armeabi-v7a/.gitkeep
================================================


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/libs/x86_64/.gitkeep
================================================


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h
================================================
// scripts/node-addon-api/src/macros.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_
#define SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_

#include <algorithm>
#include <string>

#if __OHOS__
#include "hilog/log.h"
#include "rawfile/raw_file_manager.h"

#undef LOG_DOMAIN
#undef LOG_TAG

// https://gitee.com/openharmony/docs/blob/145a084f0b742e4325915e32f8184817927d1251/en/contribute/OpenHarmony-Log-guide.md#hilog-api-usage-specifications
#define LOG_DOMAIN 0x6666
#define LOG_TAG "sherpa_onnx"
#endif

#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name)                       \
  do {                                                                     \
    if (o.Has(#js_name) && o.Get(#js_name).IsString()) {                   \
      Napi::String _str = o.Get(#js_name).As<Napi::String>();              \
      std::string s = _str.Utf8Value();                                    \
      char *p = new char[s.size() + 1];                                    \
      std::copy(s.begin(), s.end(), p);                                    \
      p[s.size()] = 0;                                                     \
                                                                           \
      c.c_name = p;                                                        \
    } else if (o.Has(#js_name) && o.Get(#js_name).IsTypedArray()) {        \
      Napi::Uint8Array _array = o.Get(#js_name).As<Napi::Uint8Array>();    \
      char *p = new char[_array.ElementLength() + 1];                      \
      std::copy(_array.Data(), _array.Data() + _array.ElementLength(), p); \
      p[_array.ElementLength()] = '\0';                                    \
                                                                           \
      c.c_name = p;                                                        \
    }                                                                      \
  } while (0)

#define SHERPA_ONNX_ASSIGN_ATTR_INT32(c_name, js_name)            \
  do {                                                            \
    if (o.Has(#js_name) && o.Get(#js_name).IsNumber()) {          \
      c.c_name = o.Get(#js_name).As<Napi::Number>().Int32Value(); \
    }                                                             \
  } while (0)

#define SHERPA_ONNX_ASSIGN_ATTR_FLOAT(c_name, js_name)            \
  do {                                                            \
    if (o.Has(#js_name) && o.Get(#js_name).IsNumber()) {          \
      c.c_name = o.Get(#js_name).As<Napi::Number>().FloatValue(); \
    }                                                             \
  } while (0)

#define SHERPA_ONNX_DELETE_C_STR(p) \
  do {                              \
    if (p) {                        \
      delete[] p;                   \
    }                               \
  } while (0)

#endif  // SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/my-patch.diff
================================================
diff --git a/napi-inl.h b/napi-inl.h
index e7141c0..0fd90d8 100644
--- a/napi-inl.h
+++ b/napi-inl.h
@@ -2156,7 +2156,8 @@ inline ArrayBuffer::ArrayBuffer(napi_env env, napi_value value)
 
 inline void* ArrayBuffer::Data() {
   void* data;
-  napi_status status = napi_get_arraybuffer_info(_env, _value, &data, nullptr);
+  size_t byte_length;
+  napi_status status = napi_get_arraybuffer_info(_env, _value, &data, &byte_length);
   NAPI_THROW_IF_FAILED(_env, status, nullptr);
   return data;
 }


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc
================================================
// scripts/node-addon-api/src/non-streaming-asr.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <memory>
#include <sstream>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

// defined in ./streaming-asr.cc
SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj);
SherpaOnnxHomophoneReplacerConfig GetHomophoneReplacerConfig(Napi::Object obj);

static SherpaOnnxOfflineTransducerModelConfig GetOfflineTransducerModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineTransducerModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("transducer") || !obj.Get("transducer").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("transducer").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(joiner, joiner);

  return c;
}

static SherpaOnnxOfflineParaformerModelConfig GetOfflineParaformerModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineParaformerModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("paraformer") || !obj.Get("paraformer").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("paraformer").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineZipformerCtcModelConfig
GetOfflineZipformerCtcModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineZipformerCtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("zipformerCtc") || !obj.Get("zipformerCtc").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("zipformerCtc").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineWenetCtcModelConfig GetOfflineWenetCtcModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineWenetCtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("wenetCtc") || !obj.Get("wenetCtc").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("wenetCtc").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineOmnilingualAsrCtcModelConfig
GetOfflineOmnilingualAsrCtcModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineOmnilingualAsrCtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("omnilingual") || !obj.Get("omnilingual").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("omnilingual").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineMedAsrCtcModelConfig GetOfflineMedAsrCtcModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineMedAsrCtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("medasr") || !obj.Get("medasr").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("medasr").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineFireRedAsrCtcModelConfig
GetOfflineFireRedAsrCtcModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineFireRedAsrCtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("fireRedAsrCtc") || !obj.Get("fireRedAsrCtc").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("fireRedAsrCtc").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineFunASRNanoModelConfig GetOfflineFunAsrNanoModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineFunASRNanoModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("funasrNano") || !obj.Get("funasrNano").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("funasrNano").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder_adaptor, encoderAdaptor);
  SHERPA_ONNX_ASSIGN_ATTR_STR(llm, llm);
  SHERPA_ONNX_ASSIGN_ATTR_STR(embedding, embedding);
  SHERPA_ONNX_ASSIGN_ATTR_STR(tokenizer, tokenizer);
  SHERPA_ONNX_ASSIGN_ATTR_STR(system_prompt, systemPrompt);
  SHERPA_ONNX_ASSIGN_ATTR_STR(user_prompt, userPrompt);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_new_tokens, maxNewTokens);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(temperature, temperature);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(top_p, topP);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(seed, seed);
  SHERPA_ONNX_ASSIGN_ATTR_STR(language, language);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(itn, itn);
  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords, hotwords);

  return c;
}

static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineDolphinModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("dolphin") || !obj.Get("dolphin").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("dolphin").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineNemoEncDecCtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("nemoCtc") || !obj.Get("nemoCtc").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("nemoCtc").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineCanaryModelConfig GetOfflineCanaryModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineCanaryModelConfig c;
  memset(&c, 0, sizeof(c));
  c.use_pnc = 1;  // Align default with JS default

  if (!obj.Has("canary") || !obj.Get("canary").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("canary").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(src_lang, srcLang);
  SHERPA_ONNX_ASSIGN_ATTR_STR(tgt_lang, tgtLang);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(use_pnc, usePnc);

  return c;
}

static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineWhisperModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("whisper") || !obj.Get("whisper").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("whisper").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(language, language);
  SHERPA_ONNX_ASSIGN_ATTR_STR(task, task);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(tail_paddings, tailPaddings);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(enable_token_timestamps, enableTokenTimestamps);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(enable_segment_timestamps,
                                enableSegmentTimestamps);

  return c;
}

static SherpaOnnxOfflineFireRedAsrModelConfig GetOfflineFireRedAsrModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineFireRedAsrModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("fireRedAsr") || !obj.Get("fireRedAsr").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("fireRedAsr").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);

  return c;
}

static SherpaOnnxOfflineMoonshineModelConfig GetOfflineMoonshineModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineMoonshineModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("moonshine") || !obj.Get("moonshine").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("moonshine").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(preprocessor, preprocessor);
  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(uncached_decoder, uncachedDecoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(cached_decoder, cachedDecoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(merged_decoder, mergedDecoder);

  return c;
}

static SherpaOnnxOfflineTdnnModelConfig GetOfflineTdnnModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineTdnnModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("tdnn") || !obj.Get("tdnn").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("tdnn").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineSenseVoiceModelConfig GetOfflineSenseVoiceModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineSenseVoiceModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("senseVoice") || !obj.Get("senseVoice").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("senseVoice").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_STR(language, language);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(use_itn, useInverseTextNormalization);

  return c;
}

static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("modelConfig") || !obj.Get("modelConfig").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();

  c.transducer = GetOfflineTransducerModelConfig(o);
  c.paraformer = GetOfflineParaformerModelConfig(o);
  c.nemo_ctc = GetOfflineNeMoCtcModelConfig(o);
  c.whisper = GetOfflineWhisperModelConfig(o);
  c.tdnn = GetOfflineTdnnModelConfig(o);
  c.sense_voice = GetOfflineSenseVoiceModelConfig(o);
  c.moonshine = GetOfflineMoonshineModelConfig(o);
  c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o);
  c.dolphin = GetOfflineDolphinModelConfig(o);
  c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o);
  c.canary = GetOfflineCanaryModelConfig(o);
  c.wenet_ctc = GetOfflineWenetCtcModelConfig(o);
  c.omnilingual = GetOfflineOmnilingualAsrCtcModelConfig(o);
  c.medasr = GetOfflineMedAsrCtcModelConfig(o);
  c.funasr_nano = GetOfflineFunAsrNanoModelConfig(o);
  c.fire_red_asr_ctc = GetOfflineFireRedAsrCtcModelConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }

  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
  SHERPA_ONNX_ASSIGN_ATTR_STR(model_type, modelType);
  SHERPA_ONNX_ASSIGN_ATTR_STR(modeling_unit, modelingUnit);
  SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);
  SHERPA_ONNX_ASSIGN_ATTR_STR(telespeech_ctc, teleSpeechCtc);

  return c;
}

static SherpaOnnxOfflineLMConfig GetOfflineLMConfig(Napi::Object obj) {
  SherpaOnnxOfflineLMConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("lmConfig") || !obj.Get("lmConfig").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("lmConfig").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(scale, scale);

  return c;
}

static SherpaOnnxOfflineRecognizerConfig ParseConfig(Napi::Object o) {
  SherpaOnnxOfflineRecognizerConfig c;
  memset(&c, 0, sizeof(c));
  c.feat_config = GetFeatureConfig(o);
  c.model_config = GetOfflineModelConfig(o);
  c.lm_config = GetOfflineLMConfig(o);
  c.hr = GetHomophoneReplacerConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);

  return c;
}

static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) {
  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.model);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.decoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.language);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.task);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.tdnn.model);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.model);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.language);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.preprocessor);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.uncached_decoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.cached_decoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.merged_decoder);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.decoder);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.decoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.wenet_ctc.model);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.omnilingual.model);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.medasr.model);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.funasr_nano.hotwords);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.funasr_nano.language);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.funasr_nano.user_prompt);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.funasr_nano.system_prompt);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.funasr_nano.tokenizer);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.funasr_nano.embedding);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.funasr_nano.llm);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.funasr_nano.encoder_adaptor);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr_ctc.model);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.telespeech_ctc);

  SHERPA_ONNX_DELETE_C_STR(c.lm_config.model);

  SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
  SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
  SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
  SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
  SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon);
  SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts);
}

static Napi::External<SherpaOnnxOfflineRecognizer>
CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  // the last argument is the NativeResourceManager
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the argument")
        .ThrowAsJavaScriptException();

    return {};
  }

#if __OHOS__
  bool use_resource_manager =
      info.Length() == 2 && !info[1].IsUndefined() && !info[1].IsNull();
  if (use_resource_manager && !info[1].IsObject()) {
    Napi::TypeError::New(
        env, "You should pass a resource manager as the second argument.")
        .ThrowAsJavaScriptException();

    return {};
  }
#endif

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o);

#if __OHOS__
  const SherpaOnnxOfflineRecognizer *recognizer = nullptr;

  if (use_resource_manager) {
    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
            &OH_ResourceManager_ReleaseNativeResourceManager);

    recognizer = SherpaOnnxCreateOfflineRecognizerOHOS(&c, mgr.get());
  } else {
    recognizer = SherpaOnnxCreateOfflineRecognizer(&c);
  }
#else
  const SherpaOnnxOfflineRecognizer *recognizer =
      SherpaOnnxCreateOfflineRecognizer(&c);
#endif

  FreeConfig(c);

  if (!recognizer) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxOfflineRecognizer>::New(
      env, const_cast<SherpaOnnxOfflineRecognizer *>(recognizer),
      [](Napi::Env env, SherpaOnnxOfflineRecognizer *recognizer) {
        SherpaOnnxDestroyOfflineRecognizer(recognizer);
      });
}

class CreateRecognizerAsyncWorker : public Napi::AsyncWorker {
 public:
  CreateRecognizerAsyncWorker(const Napi::Env &env,
                              const SherpaOnnxOfflineRecognizerConfig &cfg,
                              const Napi::Promise::Deferred &deferred)
      : Napi::AsyncWorker(env), cfg_(cfg), deferred_(deferred) {}

  void Execute() override {
    recognizer_ = SherpaOnnxCreateOfflineRecognizer(&cfg_);
    FreeConfig(cfg_);

    if (!recognizer_) {
      SetError("Failed to create offline recognizer");
    }
  }

  void OnOK() override {
    Napi::Env env = Env();

    deferred_.Resolve(Napi::External<SherpaOnnxOfflineRecognizer>::New(
        env, const_cast<SherpaOnnxOfflineRecognizer *>(recognizer_),
        [](Napi::Env /*env*/, SherpaOnnxOfflineRecognizer *r) {
          SherpaOnnxDestroyOfflineRecognizer(r);
        }));
  }

  void OnError(const Napi::Error &e) override { deferred_.Reject(e.Value()); }

 private:
  SherpaOnnxOfflineRecognizerConfig cfg_;
  const SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
  Napi::Promise::Deferred deferred_;
};

Napi::Value CreateOfflineRecognizerAsyncWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1 || !info[0].IsObject()) {
    Napi::TypeError::New(env, "Expected config object")
        .ThrowAsJavaScriptException();
    return env.Null();
  }

  SherpaOnnxOfflineRecognizerConfig cfg =
      ParseConfig(info[0].As<Napi::Object>());

  Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(env);

  auto *worker = new CreateRecognizerAsyncWorker(env, cfg, deferred);
  worker->Queue();

  return deferred.Promise();
}

static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env,
        "You should pass an offline recognizer pointer as the only argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxCreateOfflineStream(recognizer);

  return Napi::External<SherpaOnnxOfflineStream>::New(
      env, const_cast<SherpaOnnxOfflineStream *>(stream),
      [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
        SherpaOnnxDestroyOfflineStream(stream);
      });
}

static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an offline stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxOfflineStream *stream =
      info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return;
  }

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("samples")) {
    Napi::TypeError::New(env, "The argument object should have a field samples")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!obj.Get("samples").IsTypedArray()) {
    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!obj.Has("sampleRate")) {
    Napi::TypeError::New(env,
                         "The argument object should have a field sampleRate")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!obj.Get("sampleRate").IsNumber()) {
    Napi::TypeError::New(env, "The object['samples'] should be a number")
        .ThrowAsJavaScriptException();

    return;
  }

  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();

#if __OHOS__
  // Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by
  // sizeof(float) here
  SherpaOnnxAcceptWaveformOffline(stream, sample_rate, samples.Data(),
                                  samples.ElementLength() / sizeof(float));
#else
  SherpaOnnxAcceptWaveformOffline(stream, sample_rate, samples.Data(),
                                  samples.ElementLength());
#endif
}

static void OfflineRecognizerSetConfigWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "Argument 0 should be an offline recognizer pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the second argument")
        .ThrowAsJavaScriptException();

    return;
  }

  Napi::Object o = info[1].As<Napi::Object>();
  SherpaOnnxOfflineRecognizerConfig c = ParseConfig(o);

  const SherpaOnnxOfflineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();

  SherpaOnnxOfflineRecognizerSetConfig(recognizer, &c);

  FreeConfig(c);
}

class DecodeOfflineStreamAsyncWorker : public Napi::AsyncWorker {
 public:
  DecodeOfflineStreamAsyncWorker(Napi::Env env,
                                 const SherpaOnnxOfflineRecognizer *recognizer,
                                 const SherpaOnnxOfflineStream *stream,
                                 Napi::Promise::Deferred deferred)
      : Napi::AsyncWorker(env),
        recognizer_(recognizer),
        stream_(stream),
        deferred_(deferred) {}

  void Execute() override {
    try {
      SherpaOnnxDecodeOfflineStream(recognizer_, stream_);
    } catch (const std::exception &e) {
      SetError(e.what());
    }
  }

  void OnOK() override {
    const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream_);
    Napi::String s = Napi::String::New(Env(), json);
    SherpaOnnxDestroyOfflineStreamResultJson(json);
    deferred_.Resolve(s);
  }

  void OnError(const Napi::Error &e) override { deferred_.Reject(e.Value()); }

 private:
  const SherpaOnnxOfflineRecognizer *recognizer_;
  const SherpaOnnxOfflineStream *stream_;
  Napi::Promise::Deferred deferred_;
};

static Napi::Value DecodeOfflineStreamAsyncWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 2 arguments. Given: " << info.Length();
    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
    return env.Null();
  }

  if (!info[0].IsExternal() || !info[1].IsExternal()) {
    Napi::TypeError::New(env,
                         "Expected recognizer and stream as external pointers")
        .ThrowAsJavaScriptException();
    return env.Null();
  }

  const SherpaOnnxOfflineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();

  const SherpaOnnxOfflineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();

  Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(env);

  // no need to free worker by ourselves
  auto worker =
      new DecodeOfflineStreamAsyncWorker(env, recognizer, stream, deferred);

  worker->Queue();

  return deferred.Promise();
}

static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "Argument 0 should be an offline recognizer pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an offline stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxOfflineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();

  const SherpaOnnxOfflineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();

  SherpaOnnxDecodeOfflineStream(recognizer, stream);
}

static Napi::String GetOfflineStreamResultAsJsonWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineStream *stream =
      info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();

  const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream);
  Napi::String s = Napi::String::New(env, json);

  SherpaOnnxDestroyOfflineStreamResultJson(json);

  return s;
}

void InitNonStreamingAsr(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createOfflineRecognizer"),
              Napi::Function::New(env, CreateOfflineRecognizerWrapper));

  exports.Set(Napi::String::New(env, "createOfflineRecognizerAsync"),
              Napi::Function::New(env, CreateOfflineRecognizerAsyncWrapper));

  exports.Set(Napi::String::New(env, "createOfflineStream"),
              Napi::Function::New(env, CreateOfflineStreamWrapper));

  exports.Set(Napi::String::New(env, "acceptWaveformOffline"),
              Napi::Function::New(env, AcceptWaveformOfflineWrapper));

  exports.Set(Napi::String::New(env, "decodeOfflineStream"),
              Napi::Function::New(env, DecodeOfflineStreamWrapper));

  exports.Set(Napi::String::New(env, "decodeOfflineStreamAsync"),
              Napi::Function::New(env, DecodeOfflineStreamAsyncWrapper));

  exports.Set(Napi::String::New(env, "offlineRecognizerSetConfig"),
              Napi::Function::New(env, OfflineRecognizerSetConfigWrapper));

  exports.Set(Napi::String::New(env, "getOfflineStreamResultAsJson"),
              Napi::Function::New(env, GetOfflineStreamResultAsJsonWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc
================================================
// scripts/node-addon-api/src/non-streaming-speaker-diarization.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <algorithm>
#include <memory>
#include <sstream>
#include <utility>
#include <vector>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
GetOfflineSpeakerSegmentationPyannoteModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("pyannote") || !obj.Get("pyannote").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("pyannote").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOfflineSpeakerSegmentationModelConfig
GetOfflineSpeakerSegmentationModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineSpeakerSegmentationModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("segmentation") || !obj.Get("segmentation").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("segmentation").As<Napi::Object>();

  c.pyannote = GetOfflineSpeakerSegmentationPyannoteModelConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }

  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  return c;
}

static SherpaOnnxSpeakerEmbeddingExtractorConfig
GetSpeakerEmbeddingExtractorConfig(Napi::Object obj) {
  SherpaOnnxSpeakerEmbeddingExtractorConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("embedding") || !obj.Get("embedding").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("embedding").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }

  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  return c;
}

static SherpaOnnxFastClusteringConfig GetFastClusteringConfig(
    Napi::Object obj) {
  SherpaOnnxFastClusteringConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("clustering") || !obj.Get("clustering").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("clustering").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_clusters, numClusters);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(threshold, threshold);

  return c;
}

static Napi::External<SherpaOnnxOfflineSpeakerDiarization>
CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

#if __OHOS__
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the argument")
        .ThrowAsJavaScriptException();

    return {};
  }

#if __OHOS__
  bool use_resource_manager =
      info.Length() == 2 && !info[1].IsUndefined() && !info[1].IsNull();
  if (use_resource_manager && !info[1].IsObject()) {
    Napi::TypeError::New(
        env, "You should pass a resource manager as the second argument.")
        .ThrowAsJavaScriptException();

    return {};
  }
#endif

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxOfflineSpeakerDiarizationConfig c;
  memset(&c, 0, sizeof(c));

  c.segmentation = GetOfflineSpeakerSegmentationModelConfig(o);
  c.embedding = GetSpeakerEmbeddingExtractorConfig(o);
  c.clustering = GetFastClusteringConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_on, minDurationOn);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_off, minDurationOff);

#if __OHOS__
  const SherpaOnnxOfflineSpeakerDiarization *sd = nullptr;

  if (use_resource_manager) {
    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
            &OH_ResourceManager_ReleaseNativeResourceManager);

    sd = SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(&c, mgr.get());
  } else {
    sd = SherpaOnnxCreateOfflineSpeakerDiarization(&c);
  }
#else
  const SherpaOnnxOfflineSpeakerDiarization *sd =
      SherpaOnnxCreateOfflineSpeakerDiarization(&c);
#endif

  SHERPA_ONNX_DELETE_C_STR(c.segmentation.pyannote.model);
  SHERPA_ONNX_DELETE_C_STR(c.segmentation.provider);
  SHERPA_ONNX_DELETE_C_STR(c.embedding.model);
  SHERPA_ONNX_DELETE_C_STR(c.embedding.provider);

  if (!sd) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxOfflineSpeakerDiarization>::New(
      env, const_cast<SherpaOnnxOfflineSpeakerDiarization *>(sd),
      [](Napi::Env env, SherpaOnnxOfflineSpeakerDiarization *sd) {
        SherpaOnnxDestroyOfflineSpeakerDiarization(sd);
      });
}

static Napi::Number OfflineSpeakerDiarizationGetSampleRateWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be an offline speaker diarization pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineSpeakerDiarization *sd =
      info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();

  int32_t sample_rate = SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd);

  return Napi::Number::New(env, sample_rate);
}

static Napi::Array OfflineSpeakerDiarizationProcessWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be an offline speaker diarization pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineSpeakerDiarization *sd =
      info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();

  if (!info[1].IsTypedArray()) {
    Napi::TypeError::New(env, "Argument 1 should be a typed array")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Float32Array samples = info[1].As<Napi::Float32Array>();

#if __OHOS__
  // Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by
  // sizeof(float) here
  const SherpaOnnxOfflineSpeakerDiarizationResult *r =
      SherpaOnnxOfflineSpeakerDiarizationProcess(
          sd, samples.Data(), samples.ElementLength() / sizeof(float));
#else
  const SherpaOnnxOfflineSpeakerDiarizationResult *r =
      SherpaOnnxOfflineSpeakerDiarizationProcess(sd, samples.Data(),
                                                 samples.ElementLength());
#endif

  int32_t num_segments =
      SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r);

  const SherpaOnnxOfflineSpeakerDiarizationSegment *segments =
      SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(r);

  Napi::Array ans = Napi::Array::New(env, num_segments);

  for (int32_t i = 0; i != num_segments; ++i) {
    Napi::Object obj = Napi::Object::New(env);

    obj.Set(Napi::String::New(env, "start"), segments[i].start);
    obj.Set(Napi::String::New(env, "end"), segments[i].end);
    obj.Set(Napi::String::New(env, "speaker"), segments[i].speaker);

    ans.Set(i, obj);
  }

  SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
  SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r);

  return ans;
}

struct SpeakerDiarizationCallbackData {
  int32_t num_processed_chunks;
  int32_t num_total_chunks;
};

// see
// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
static void InvokeJsCallback(Napi::Env env, Napi::Function callback,
                             Napi::Reference<Napi::Value> *context,
                             SpeakerDiarizationCallbackData *data) {
  if (env != nullptr) {
    if (callback != nullptr) {
      Napi::Number num_processed_chunks =
          Napi::Number::New(env, data->num_processed_chunks);
      Napi::Number num_total_chunks =
          Napi::Number::New(env, data->num_total_chunks);

      callback.Call(context->Value(), {num_processed_chunks, num_total_chunks});
    }
  }
  delete data;
}

using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
                                           SpeakerDiarizationCallbackData,
                                           InvokeJsCallback>;

class SpeakerDiarizationProcessWorker : public Napi::AsyncWorker {
 public:
  SpeakerDiarizationProcessWorker(const Napi::Env &env, TSFN tsfn,
                                  const SherpaOnnxOfflineSpeakerDiarization *sd,
                                  std::vector<float> samples)
      : tsfn_(tsfn),
        Napi::AsyncWorker{env, "SpeakerDiarizationProcessAsyncWorker"},
        deferred_(env),
        sd_(sd),
        samples_(std::move(samples)) {}

  Napi::Promise Promise() { return deferred_.Promise(); }

 protected:
  void Execute() override {
    auto callback = [](int32_t num_processed_chunks, int32_t num_total_chunks,
                       void *arg) -> int32_t {
      auto _this = reinterpret_cast<SpeakerDiarizationProcessWorker *>(arg);

      auto data = new SpeakerDiarizationCallbackData;
      data->num_processed_chunks = num_processed_chunks;
      data->num_total_chunks = num_total_chunks;

      _this->tsfn_.NonBlockingCall(data);

      return 0;
    };

    r_ = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
        sd_, samples_.data(), samples_.size(), callback, this);

    tsfn_.Release();
  }

  void OnOK() override {
    Napi::Env env = deferred_.Env();

    int32_t num_segments =
        SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r_);

    const SherpaOnnxOfflineSpeakerDiarizationSegment *segments =
        SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(r_);

    Napi::Array ans = Napi::Array::New(env, num_segments);

    for (int32_t i = 0; i != num_segments; ++i) {
      Napi::Object obj = Napi::Object::New(env);

      obj.Set(Napi::String::New(env, "start"), segments[i].start);
      obj.Set(Napi::String::New(env, "end"), segments[i].end);
      obj.Set(Napi::String::New(env, "speaker"), segments[i].speaker);

      ans.Set(i, obj);
    }

    SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
    SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r_);

    deferred_.Resolve(ans);
  }

 private:
  TSFN tsfn_;
  Napi::Promise::Deferred deferred_;
  const SherpaOnnxOfflineSpeakerDiarization *sd_;
  std::vector<float> samples_;
  const SherpaOnnxOfflineSpeakerDiarizationResult *r_;
};

static Napi::Object OfflineSpeakerDiarizationProcessAsyncWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 3) {
    std::ostringstream os;
    os << "Expect only 3 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be an offline speaker diarization pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineSpeakerDiarization *sd =
      info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();

  if (!info[1].IsTypedArray()) {
    Napi::TypeError::New(env, "Argument 1 should be a typed array")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[2].IsFunction()) {
    Napi::TypeError::New(env, "Argument 2 should be a function")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Function cb = info[2].As<Napi::Function>();

  auto context =
      new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));

  TSFN tsfn = TSFN::New(
      env,
      cb,  // JavaScript function called asynchronously
      "SpeakerDiarizationProcessAsyncFunc",  // Name
      0,                                     // Unlimited queue
      1,  // Only one thread will use this initially
      context,
      [](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });

  Napi::Float32Array samples = info[1].As<Napi::Float32Array>();

#if __OHOS__
  int32_t num_samples = samples.ElementLength() / sizeof(float);
#else
  int32_t num_samples = samples.ElementLength();
#endif
  std::vector<float> v(num_samples);
  std::copy(samples.Data(), samples.Data() + num_samples, v.begin());

  SpeakerDiarizationProcessWorker *worker =
      new SpeakerDiarizationProcessWorker(env, tsfn, sd, v);
  worker->Queue();
  return worker->Promise();
}

static void OfflineSpeakerDiarizationSetConfigWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be an offline speaker diarization pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxOfflineSpeakerDiarization *sd =
      info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the argument")
        .ThrowAsJavaScriptException();

    return;
  }

  Napi::Object o = info[1].As<Napi::Object>();

  SherpaOnnxOfflineSpeakerDiarizationConfig c;
  memset(&c, 0, sizeof(c));

  c.clustering = GetFastClusteringConfig(o);
  SherpaOnnxOfflineSpeakerDiarizationSetConfig(sd, &c);
}

void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createOfflineSpeakerDiarization"),
              Napi::Function::New(env, CreateOfflineSpeakerDiarizationWrapper));

  exports.Set(
      Napi::String::New(env, "getOfflineSpeakerDiarizationSampleRate"),
      Napi::Function::New(env, OfflineSpeakerDiarizationGetSampleRateWrapper));

  exports.Set(
      Napi::String::New(env, "offlineSpeakerDiarizationProcess"),
      Napi::Function::New(env, OfflineSpeakerDiarizationProcessWrapper));

  exports.Set(
      Napi::String::New(env, "offlineSpeakerDiarizationProcessAsync"),
      Napi::Function::New(env, OfflineSpeakerDiarizationProcessAsyncWrapper));

  exports.Set(
      Napi::String::New(env, "offlineSpeakerDiarizationSetConfig"),
      Napi::Function::New(env, OfflineSpeakerDiarizationSetConfigWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speech-denoiser.cc
================================================
// scripts/node-addon-api/src/non-streaming-speech-denoiser.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include <memory>
#include <sstream>

#include "napi.h"  // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
#include "speech-denoiser.h"  // NOLINT

static Napi::External<SherpaOnnxOfflineSpeechDenoiser>
CreateOfflineSpeechDenoiserWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  // the last argument is the NativeResourceManager
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxOfflineSpeechDenoiserConfig c;
  memset(&c, 0, sizeof(c));
  c.model = GetSpeechDenoiserModelConfig(o);

#if __OHOS__
  std::unique_ptr<NativeResourceManager,
                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
          &OH_ResourceManager_ReleaseNativeResourceManager);

  const SherpaOnnxOfflineSpeechDenoiser *sd =
      SherpaOnnxCreateOfflineSpeechDenoiserOHOS(&c, mgr.get());
#else
  const SherpaOnnxOfflineSpeechDenoiser *sd =
      SherpaOnnxCreateOfflineSpeechDenoiser(&c);
#endif

  DeleteSpeechDenoiserModelConfig(c.model);

  if (!sd) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxOfflineSpeechDenoiser>::New(
      env, const_cast<SherpaOnnxOfflineSpeechDenoiser *>(sd),
      [](Napi::Env env, SherpaOnnxOfflineSpeechDenoiser *sd) {
        SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
      });
}

static Napi::Object OfflineSpeechDenoiserRunWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be an offline speech denoiser pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineSpeechDenoiser *sd =
      info[0].As<Napi::External<SherpaOnnxOfflineSpeechDenoiser>>().Data();

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("samples")) {
    Napi::TypeError::New(env, "The argument object should have a field samples")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("samples").IsTypedArray()) {
    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("sampleRate")) {
    Napi::TypeError::New(env,
                         "The argument object should have a field sampleRate")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("sampleRate").IsNumber()) {
    Napi::TypeError::New(env, "The object['samples'] should be a number")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();

  const SherpaOnnxDenoisedAudio *audio;

  audio = SherpaOnnxOfflineSpeechDenoiserRun(
      sd, samples.Data(), GetFloat32ArrayElementLength(samples), sample_rate);

  return CreateDenoisedAudioObject(env, audio, GetEnableExternalBuffer(obj));
}

static Napi::Number OfflineSpeechDenoiserGetSampleRateWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be an offline speech denoiser pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineSpeechDenoiser *sd =
      info[0].As<Napi::External<SherpaOnnxOfflineSpeechDenoiser>>().Data();

  int32_t sample_rate = SherpaOnnxOfflineSpeechDenoiserGetSampleRate(sd);

  return Napi::Number::New(env, sample_rate);
}

void InitNonStreamingSpeechDenoiser(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createOfflineSpeechDenoiser"),
              Napi::Function::New(env, CreateOfflineSpeechDenoiserWrapper));

  exports.Set(Napi::String::New(env, "offlineSpeechDenoiserRunWrapper"),
              Napi::Function::New(env, OfflineSpeechDenoiserRunWrapper));

  exports.Set(
      Napi::String::New(env, "offlineSpeechDenoiserGetSampleRateWrapper"),
      Napi::Function::New(env, OfflineSpeechDenoiserGetSampleRateWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc
================================================
// scripts/node-addon-api/src/non-streaming-tts.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <algorithm>
#include <atomic>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

#define SHERPA_ONNX_ASSIGN_TTS_ATTR()                                  \
  do {                                                                 \
    SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);                  \
    SHERPA_ONNX_ASSIGN_ATTR_INT32(max_num_sentences, maxNumSentences); \
    SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);                  \
    SHERPA_ONNX_ASSIGN_ATTR_FLOAT(silence_scale, silenceScale);        \
  } while (0)

#define SHERPA_ONNX_DELETE_TTS_C_STR()                          \
  do {                                                          \
    SHERPA_ONNX_DELETE_C_STR(c.model.vits.model);               \
    SHERPA_ONNX_DELETE_C_STR(c.model.vits.lexicon);             \
    SHERPA_ONNX_DELETE_C_STR(c.model.vits.tokens);              \
    SHERPA_ONNX_DELETE_C_STR(c.model.vits.data_dir);            \
                                                                \
    SHERPA_ONNX_DELETE_C_STR(c.model.matcha.acoustic_model);    \
    SHERPA_ONNX_DELETE_C_STR(c.model.matcha.vocoder);           \
    SHERPA_ONNX_DELETE_C_STR(c.model.matcha.lexicon);           \
    SHERPA_ONNX_DELETE_C_STR(c.model.matcha.tokens);            \
    SHERPA_ONNX_DELETE_C_STR(c.model.matcha.data_dir);          \
                                                                \
    SHERPA_ONNX_DELETE_C_STR(c.model.kitten.model);             \
    SHERPA_ONNX_DELETE_C_STR(c.model.kitten.voices);            \
    SHERPA_ONNX_DELETE_C_STR(c.model.kitten.tokens);            \
    SHERPA_ONNX_DELETE_C_STR(c.model.kitten.data_dir);          \
                                                                 \
    SHERPA_ONNX_DELETE_C_STR(c.model.zipvoice.tokens);          \
    SHERPA_ONNX_DELETE_C_STR(c.model.zipvoice.encoder);         \
    SHERPA_ONNX_DELETE_C_STR(c.model.zipvoice.decoder);         \
    SHERPA_ONNX_DELETE_C_STR(c.model.zipvoice.vocoder);         \
    SHERPA_ONNX_DELETE_C_STR(c.model.zipvoice.data_dir);        \
    SHERPA_ONNX_DELETE_C_STR(c.model.zipvoice.lexicon);         \
                                                                 \
    SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.model);             \
    SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.voices);            \
    SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.tokens);            \
    SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.data_dir);          \
    SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.lexicon);           \
    SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.lang);              \
                                                                \
    SHERPA_ONNX_DELETE_C_STR(c.model.pocket.lm_flow);           \
    SHERPA_ONNX_DELETE_C_STR(c.model.pocket.lm_main);           \
    SHERPA_ONNX_DELETE_C_STR(c.model.pocket.encoder);           \
    SHERPA_ONNX_DELETE_C_STR(c.model.pocket.decoder);           \
    SHERPA_ONNX_DELETE_C_STR(c.model.pocket.text_conditioner);  \
    SHERPA_ONNX_DELETE_C_STR(c.model.pocket.vocab_json);        \
    SHERPA_ONNX_DELETE_C_STR(c.model.pocket.token_scores_json); \
                                                                \
    SHERPA_ONNX_DELETE_C_STR(c.model.supertonic.duration_predictor);  \
    SHERPA_ONNX_DELETE_C_STR(c.model.supertonic.text_encoder);        \
    SHERPA_ONNX_DELETE_C_STR(c.model.supertonic.vector_estimator);    \
    SHERPA_ONNX_DELETE_C_STR(c.model.supertonic.vocoder);             \
    SHERPA_ONNX_DELETE_C_STR(c.model.supertonic.tts_json);            \
    SHERPA_ONNX_DELETE_C_STR(c.model.supertonic.unicode_indexer);     \
    SHERPA_ONNX_DELETE_C_STR(c.model.supertonic.voice_style);         \
                                                                \
    SHERPA_ONNX_DELETE_C_STR(c.model.provider);                 \
                                                                \
    SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);                      \
    SHERPA_ONNX_DELETE_C_STR(c.rule_fars);                      \
  } while (0)

#define SHERPA_ONNX_DELETE_GENERATION_C_STR(c)  \
  do {                                          \
    SHERPA_ONNX_DELETE_C_STR(c.reference_text); \
    SHERPA_ONNX_DELETE_C_STR(c.extra);          \
    if (c.reference_audio) {                    \
      delete[] c.reference_audio;               \
    }                                           \
  } while (0)

static std::string JsObjectToJson(Napi::Env env, const Napi::Object &obj) {
  Napi::Object json = env.Global().Get("JSON").As<Napi::Object>();
  Napi::Function stringify = json.Get("stringify").As<Napi::Function>();
  return stringify.Call(json, {obj}).As<Napi::String>().Utf8Value();
}

static SherpaOnnxGenerationConfig GetGenerationConfig(Napi::Object o) {
  SherpaOnnxGenerationConfig c;
  memset(&c, 0, sizeof(c));

  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(silence_scale, silenceScale);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(speed, speed);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(sid, sid);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_steps, numSteps);
  SHERPA_ONNX_ASSIGN_ATTR_STR(reference_text, referenceText);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(reference_sample_rate, referenceSampleRate);

  if (o.Has("referenceAudio") && o.Get("referenceAudio").IsTypedArray()) {
    auto arr = o.Get("referenceAudio").As<Napi::Float32Array>();
    int32_t n = arr.ElementLength();

    if (n > 0) {
      float *buf = new float[n];
      std::copy(arr.Data(), arr.Data() + n, buf);

      c.reference_audio = buf;
      c.reference_audio_len = n;
    }
  }

  if (o.Has("extra") && o.Get("extra").IsObject()) {
    std::string s = JsObjectToJson(o.Env(), o.Get("extra").As<Napi::Object>());

    char *p = new char[s.size() + 1];
    std::copy(s.begin(), s.end(), p);
    p[s.size()] = '\0';

    c.extra = p;
  }

  return c;
}

static SherpaOnnxOfflineTtsVitsModelConfig GetOfflineTtsVitsModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineTtsVitsModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("vits") || !obj.Get("vits").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("vits").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale, noiseScale);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale_w, noiseScaleW);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);

  return c;
}

static SherpaOnnxOfflineTtsMatchaModelConfig GetOfflineTtsMatchaModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineTtsMatchaModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("matcha") || !obj.Get("matcha").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("matcha").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(acoustic_model, acousticModel);
  SHERPA_ONNX_ASSIGN_ATTR_STR(vocoder, vocoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale, noiseScale);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);

  return c;
}

static SherpaOnnxOfflineTtsKokoroModelConfig GetOfflineTtsKokoroModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineTtsKokoroModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("kokoro") || !obj.Get("kokoro").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("kokoro").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_STR(voices, voices);
  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);
  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
  SHERPA_ONNX_ASSIGN_ATTR_STR(lang, lang);

  return c;
}

static SherpaOnnxOfflineTtsKittenModelConfig GetOfflineTtsKittenModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineTtsKittenModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("kitten") || !obj.Get("kitten").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("kitten").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_STR(voices, voices);
  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);

  return c;
}

static SherpaOnnxOfflineTtsZipvoiceModelConfig
GetOfflineTtsZipvoiceModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineTtsZipvoiceModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("zipvoice") || !obj.Get("zipvoice").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("zipvoice").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(vocoder, vocoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(feat_scale, featScale);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(t_shift, tShift);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(target_rms, targetRms);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(guidance_scale, guidanceScale);

  return c;
}

static SherpaOnnxOfflineTtsPocketModelConfig GetOfflineTtsPocketModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineTtsPocketModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("pocket") || !obj.Get("pocket").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("pocket").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(lm_flow, lmFlow);
  SHERPA_ONNX_ASSIGN_ATTR_STR(lm_main, lmMain);
  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(text_conditioner, textConditioner);
  SHERPA_ONNX_ASSIGN_ATTR_STR(vocab_json, vocabJson);
  SHERPA_ONNX_ASSIGN_ATTR_STR(token_scores_json, tokenScoresJson);

  if (o.Has("voiceEmbeddingCacheCapacity")) {
    c.voice_embedding_cache_capacity =
        o.Get("voiceEmbeddingCacheCapacity").As<Napi::Number>().Int32Value();
  } else {
    c.voice_embedding_cache_capacity = 50;
  }

  return c;
}

static SherpaOnnxOfflineTtsSupertonicModelConfig
GetOfflineTtsSupertonicModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineTtsSupertonicModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("supertonic") || !obj.Get("supertonic").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("supertonic").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(duration_predictor, durationPredictor);
  SHERPA_ONNX_ASSIGN_ATTR_STR(text_encoder, textEncoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(vector_estimator, vectorEstimator);
  SHERPA_ONNX_ASSIGN_ATTR_STR(vocoder, vocoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(tts_json, ttsJson);
  SHERPA_ONNX_ASSIGN_ATTR_STR(unicode_indexer, unicodeIndexer);
  SHERPA_ONNX_ASSIGN_ATTR_STR(voice_style, voiceStyle);

  return c;
}

static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflineTtsModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("model").As<Napi::Object>();

  c.vits = GetOfflineTtsVitsModelConfig(o);
  c.matcha = GetOfflineTtsMatchaModelConfig(o);
  c.kokoro = GetOfflineTtsKokoroModelConfig(o);
  c.kitten = GetOfflineTtsKittenModelConfig(o);
  c.zipvoice = GetOfflineTtsZipvoiceModelConfig(o);
  c.pocket = GetOfflineTtsPocketModelConfig(o);
  c.supertonic = GetOfflineTtsSupertonicModelConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }

  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  return c;
}

// Async worker for creating OfflineTts
class CreateOfflineTtsAsyncWorker : public Napi::AsyncWorker {
 public:
  CreateOfflineTtsAsyncWorker(Napi::Env env,
                              const SherpaOnnxOfflineTtsConfig &config)
      : Napi::AsyncWorker(env),
        deferred_(Napi::Promise::Deferred::New(env)),
        config_(config) {}

  Napi::Promise Promise() { return deferred_.Promise(); }

 protected:
  void Execute() override {
    // Create OfflineTts
    tts_ = SherpaOnnxCreateOfflineTts(&config_);
    if (!tts_) {
      SetError("Failed to create OfflineTts. Check your config!");
    }
  }

  void OnOK() override {
    Napi::Env env = Env();
    deferred_.Resolve(Napi::External<SherpaOnnxOfflineTts>::New(
        env, const_cast<SherpaOnnxOfflineTts *>(tts_),
        [](Napi::Env, SherpaOnnxOfflineTts *ptr) {
          SherpaOnnxDestroyOfflineTts(ptr);
        }));
  }

  void OnError(const Napi::Error &e) override { deferred_.Reject(e.Value()); }

  ~CreateOfflineTtsAsyncWorker() override {
    SherpaOnnxOfflineTtsConfig &c = config_;

    SHERPA_ONNX_DELETE_TTS_C_STR();
  }

 private:
  SherpaOnnxOfflineTtsConfig config_;
  const SherpaOnnxOfflineTts *tts_ = nullptr;
  Napi::Promise::Deferred deferred_;
};

// JS wrapper
static Napi::Value CreateOfflineTtsAsyncWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1 || !info[0].IsObject()) {
    Napi::TypeError::New(env, "Expect 1 object argument for config")
        .ThrowAsJavaScriptException();
    return env.Null();
  }

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxOfflineTtsConfig c;
  memset(&c, 0, sizeof(c));

  c.model = GetOfflineTtsModelConfig(o);
  SHERPA_ONNX_ASSIGN_TTS_ATTR();

  auto *worker = new CreateOfflineTtsAsyncWorker(env, c);
  worker->Queue();
  return worker->Promise();
}

static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  // the last argument is the NativeResourceManager
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the argument")
        .ThrowAsJavaScriptException();

    return {};
  }

#if __OHOS__
  bool use_resource_manager =
      info.Length() == 2 && !info[1].IsUndefined() && !info[1].IsNull();
  if (use_resource_manager && !info[1].IsObject()) {
    Napi::TypeError::New(
        env, "You should pass a resource manager as the second argument.")
        .ThrowAsJavaScriptException();

    return {};
  }
#endif

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxOfflineTtsConfig c;
  memset(&c, 0, sizeof(c));

  c.model = GetOfflineTtsModelConfig(o);

  SHERPA_ONNX_ASSIGN_TTS_ATTR();

#if __OHOS__
  const SherpaOnnxOfflineTts *tts = nullptr;

  if (use_resource_manager) {
    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
            &OH_ResourceManager_ReleaseNativeResourceManager);
    tts = SherpaOnnxCreateOfflineTtsOHOS(&c, mgr.get());
  } else {
    tts = SherpaOnnxCreateOfflineTts(&c);
  }
#else
  const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&c);
#endif

  SHERPA_ONNX_DELETE_TTS_C_STR();

  if (!tts) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxOfflineTts>::New(
      env, const_cast<SherpaOnnxOfflineTts *>(tts),
      [](Napi::Env env, SherpaOnnxOfflineTts *tts) {
        SherpaOnnxDestroyOfflineTts(tts);
      });
}

static Napi::Number OfflineTtsSampleRateWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineTts *tts =
      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();

  int32_t sample_rate = SherpaOnnxOfflineTtsSampleRate(tts);

  return Napi::Number::New(env, sample_rate);
}

static Napi::Number OfflineTtsNumSpeakersWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineTts *tts =
      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();

  int32_t num_speakers = SherpaOnnxOfflineTtsNumSpeakers(tts);

  return Napi::Number::New(env, num_speakers);
}

// synchronous version
static Napi::Object OfflineTtsGenerateWithConfigWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    Napi::TypeError::New(env, "Expect 2 arguments")
        .ThrowAsJavaScriptException();
    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 must be OfflineTts handle")
        .ThrowAsJavaScriptException();
    return {};
  }

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 must be an object")
        .ThrowAsJavaScriptException();
    return {};
  }

  const SherpaOnnxOfflineTts *tts =
      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("text") || !obj.Get("text").IsString()) {
    Napi::TypeError::New(env, "obj.text must be a string")
        .ThrowAsJavaScriptException();
    return {};
  }

  std::string text = obj.Get("text").As<Napi::String>().Utf8Value();

  bool enable_external_buffer = true;
  if (obj.Has("enableExternalBuffer") &&
      obj.Get("enableExternalBuffer").IsBoolean()) {
    enable_external_buffer =
        obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
  }

  Napi::Object genObj =
      obj.Has("generationConfig") && obj.Get("generationConfig").IsObject()
          ? obj.Get("generationConfig").As<Napi::Object>()
          : Napi::Object::New(env);

  SherpaOnnxGenerationConfig gen_config = GetGenerationConfig(genObj);

  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(tts, text.c_str(), &gen_config,
                                             nullptr, nullptr);

  SHERPA_ONNX_DELETE_GENERATION_C_STR(gen_config);

  if (!audio) {
    Napi::Error::New(env, "TTS generation failed").ThrowAsJavaScriptException();
    return {};
  }

  Napi::Object result = Napi::Object::New(env);

  if (enable_external_buffer) {
    Napi::ArrayBuffer buffer = Napi::ArrayBuffer::New(
        env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
        [](Napi::Env, void *, const SherpaOnnxGeneratedAudio *hint) {
          SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
        },
        audio);

    result.Set("samples", Napi::Float32Array::New(env, audio->n, buffer, 0));
  } else {
    Napi::ArrayBuffer buffer =
        Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);

    auto arr = Napi::Float32Array::New(env, audio->n, buffer, 0);

    std::copy(audio->samples, audio->samples + audio->n, arr.Data());

    SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);

    result.Set("samples", arr);
  }

  result.Set("sampleRate", audio->sample_rate);
  return result;
}

static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineTts *tts =
      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("text")) {
    Napi::TypeError::New(env, "The argument object should have a field text")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("text").IsString()) {
    Napi::TypeError::New(env, "The object['text'] should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("sid")) {
    Napi::TypeError::New(env, "The argument object should have a field sid")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("sid").IsNumber()) {
    Napi::TypeError::New(env, "The object['sid'] should be a number")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("speed")) {
    Napi::TypeError::New(env, "The argument object should have a field speed")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("speed").IsNumber()) {
    Napi::TypeError::New(env, "The object['speed'] should be a number")
        .ThrowAsJavaScriptException();

    return {};
  }

  bool enable_external_buffer = true;
  if (obj.Has("enableExternalBuffer") &&
      obj.Get("enableExternalBuffer").IsBoolean()) {
    enable_external_buffer =
        obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
  }

  Napi::String _text = obj.Get("text").As<Napi::String>();
  std::string text = _text.Utf8Value();
  int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
  float speed = obj.Get("speed").As<Napi::Number>().FloatValue();

  const SherpaOnnxGeneratedAudio *audio;
  audio = SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);

  if (enable_external_buffer) {
    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
        env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
        [](Napi::Env /*env*/, void * /*data*/,
           const SherpaOnnxGeneratedAudio *hint) {
          SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
        },
        audio);
    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);

    Napi::Object ans = Napi::Object::New(env);
    ans.Set(Napi::String::New(env, "samples"), float32Array);
    ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
    return ans;
  } else {
    // don't use external buffer
    Napi::ArrayBuffer arrayBuffer =
        Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);

    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);

    std::copy(audio->samples, audio->samples + audio->n, float32Array.Data());

    Napi::Object ans = Napi::Object::New(env);
    ans.Set(Napi::String::New(env, "samples"), float32Array);
    ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
    SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
    return ans;
  }
}

struct TtsCallbackData {
  std::vector<float> samples;
  float progress;
  std::atomic<bool> processed = {false};
  std::atomic<bool> cancelled = {false};
};

// see
// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
static void InvokeJsCallback(Napi::Env env, Napi::Function callback,
                             Napi::Reference<Napi::Value> *context,
                             TtsCallbackData *data) {
  if (env != nullptr) {
    if (callback != nullptr) {
      Napi::ArrayBuffer arrayBuffer =
          Napi::ArrayBuffer::New(env, sizeof(float) * data->samples.size());

      Napi::Float32Array float32Array =
          Napi::Float32Array::New(env, data->samples.size(), arrayBuffer, 0);

      std::copy(data->samples.begin(), data->samples.end(),
                float32Array.Data());

      Napi::Object arg = Napi::Object::New(env);
      arg.Set(Napi::String::New(env, "samples"), float32Array);
      arg.Set(Napi::String::New(env, "progress"), data->progress);

      auto v = callback.Call(context->Value(), {arg});

      if ((v.IsBoolean() && !v.As<Napi::Boolean>().Value()) ||
          (v.IsNumber() && v.As<Napi::Number>().Int32Value() == 0)) {
        data->cancelled = true;
      } else {
        data->cancelled = false;
      }

      data->processed = true;
    }
  }
}

using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
                                           TtsCallbackData, InvokeJsCallback>;

class TtsGenerateWorker : public Napi::AsyncWorker {
 public:
  TtsGenerateWorker(const Napi::Env &env, TSFN tsfn,
                    const SherpaOnnxOfflineTts *tts, const std::string &text,
                    float speed, int32_t sid, bool use_external_buffer)
      : tsfn_(tsfn),
        Napi::AsyncWorker{env, "TtsGenerateWorker"},
        deferred_(env),
        tts_(tts),
        text_(text),
        speed_(speed),
        sid_(sid),
        use_external_buffer_(use_external_buffer) {}

  Napi::Promise Promise() { return deferred_.Promise(); }

  ~TtsGenerateWorker() {
    for (auto d : data_list_) {
      delete d;
    }
  }

 protected:
  void Execute() override {
    auto callback = [](const float *samples, int32_t n, float progress,
                       void *arg) -> int32_t {
      TtsGenerateWorker *_this = reinterpret_cast<TtsGenerateWorker *>(arg);

      for (auto it = _this->data_list_.begin();
           it != _this->data_list_.end();) {
        if ((*it)->processed) {
          delete *it;
          it = _this->data_list_.erase(it);
        } else {
          ++it;
        }
      }

      for (auto d : _this->data_list_) {
        if (d->cancelled) {
#if __OHOS__
          OH_LOG_INFO(LOG_APP, "TtsGenerate is cancelled");
#endif
          return 0;
        }
      }

      auto data = new TtsCallbackData;
      data->samples = std::vector<float>{samples, samples + n};
      data->progress = progress;
      _this->data_list_.push_back(data);

      _this->tsfn_.NonBlockingCall(data);

      return 1;
    };
    audio_ = SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
        tts_, text_.c_str(), sid_, speed_, callback, this);

    tsfn_.Release();
  }

  void OnOK() override {
    Napi::Env env = deferred_.Env();
    Napi::Object ans = Napi::Object::New(env);
    if (use_external_buffer_) {
      Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
          env, const_cast<float *>(audio_->samples), sizeof(float) * audio_->n,
          [](Napi::Env /*env*/, void * /*data*/,
             const SherpaOnnxGeneratedAudio *hint) {
            SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
          },
          audio_);
      Napi::Float32Array float32Array =
          Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);

      ans.Set(Napi::String::New(env, "samples"), float32Array);
      ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
    } else {
      // don't use external buffer
      Napi::ArrayBuffer arrayBuffer =
          Napi::ArrayBuffer::New(env, sizeof(float) * audio_->n);

      Napi::Float32Array float32Array =
          Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);

      std::copy(audio_->samples, audio_->samples + audio_->n,
                float32Array.Data());

      ans.Set(Napi::String::New(env, "samples"), float32Array);
      ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
      SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_);
    }

    deferred_.Resolve(ans);
  }

 private:
  TSFN tsfn_;
  Napi::Promise::Deferred deferred_;
  const SherpaOnnxOfflineTts *tts_;
  std::string text_;
  float speed_;
  int32_t sid_;
  bool use_external_buffer_;

  const SherpaOnnxGeneratedAudio *audio_;

  std::vector<TtsCallbackData *> data_list_;
};

static Napi::Object OfflineTtsGenerateAsyncWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflineTts *tts =
      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("text")) {
    Napi::TypeError::New(env, "The argument object should have a field text")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("text").IsString()) {
    Napi::TypeError::New(env, "The object['text'] should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("sid")) {
    Napi::TypeError::New(env, "The argument object should have a field sid")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("sid").IsNumber()) {
    Napi::TypeError::New(env, "The object['sid'] should be a number")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("speed")) {
    Napi::TypeError::New(env, "The argument object should have a field speed")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("speed").IsNumber()) {
    Napi::TypeError::New(env, "The object['speed'] should be a number")
        .ThrowAsJavaScriptException();

    return {};
  }

  bool enable_external_buffer = true;
  if (obj.Has("enableExternalBuffer") &&
      obj.Get("enableExternalBuffer").IsBoolean()) {
    enable_external_buffer =
        obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
  }

  Napi::String _text = obj.Get("text").As<Napi::String>();
  std::string text = _text.Utf8Value();
  int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
  float speed = obj.Get("speed").As<Napi::Number>().FloatValue();

  Napi::Function cb;
  if (obj.Has("callback") && obj.Get("callback").IsFunction()) {
    cb = obj.Get("callback").As<Napi::Function>();
  }

  auto context =
      new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));

  TSFN tsfn = TSFN::New(
      env,
      cb,                 // JavaScript function called asynchronously
      "TtsGenerateFunc",  // Name
      0,                  // Unlimited queue
      1,                  // Only one thread will use this initially
      context,
      [](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });

  TtsGenerateWorker *worker = new TtsGenerateWorker(
      env, tsfn, tts, text, speed, sid, enable_external_buffer);
  worker->Queue();
  return worker->Promise();
}

// Async worker for TTS generation with generationConfig
class TtsGenerateWithConfigWorker : public Napi::AsyncWorker {
 public:
  TtsGenerateWithConfigWorker(const Napi::Env &env, TSFN tsfn,
                              const SherpaOnnxOfflineTts *tts,
                              const std::string &text,
                              const SherpaOnnxGenerationConfig &gen_config,
                              bool use_external_buffer)
      : tsfn_(tsfn),
        Napi::AsyncWorker(env, "TtsGenerateWithConfigWorker"),
        deferred_(env),
        tts_(tts),
        text_(text),
        gen_config_(gen_config),
        use_external_buffer_(use_external_buffer) {}

  Napi::Promise Promise() { return deferred_.Promise(); }

  ~TtsGenerateWithConfigWorker() {
    SHERPA_ONNX_DELETE_GENERATION_C_STR(gen_config_);
    for (auto d : data_list_) delete d;
  }

 protected:
  void Execute() override {
    auto callback = [](const float *samples, int32_t n, float progress,
                       void *arg) -> int32_t {
      TtsGenerateWithConfigWorker *_this =
          reinterpret_cast<TtsGenerateWithConfigWorker *>(arg);

      // Clean up processed chunks
      for (auto it = _this->data_list_.begin();
           it != _this->data_list_.end();) {
        if ((*it)->processed) {
          delete *it;
          it = _this->data_list_.erase(it);
        } else {
          ++it;
        }
      }

      // Cancel check
      for (auto d : _this->data_list_) {
        if (d->cancelled) return 0;
      }

      auto data = new TtsCallbackData;
      data->samples = std::vector<float>{samples, samples + n};
      data->progress = progress;
      _this->data_list_.push_back(data);

      _this->tsfn_.NonBlockingCall(data);

      return 1;
    };

    audio_ = SherpaOnnxOfflineTtsGenerateWithConfig(
        tts_, text_.c_str(), &gen_config_, callback, this);

    tsfn_.Release();
  }

  void OnOK() override {
    Napi::Env env = deferred_.Env();
    Napi::Object ans = Napi::Object::New(env);
    if (use_external_buffer_) {
      Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
          env, const_cast<float *>(audio_->samples), sizeof(float) * audio_->n,
          [](Napi::Env, void *, const SherpaOnnxGeneratedAudio *hint) {
            SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
          },
          audio_);
      Napi::Float32Array float32Array =
          Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
      ans.Set("samples", float32Array);
      ans.Set("sampleRate", audio_->sample_rate);
    } else {
      Napi::ArrayBuffer arrayBuffer =
          Napi::ArrayBuffer::New(env, sizeof(float) * audio_->n);
      Napi::Float32Array float32Array =
          Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
      std::copy(audio_->samples, audio_->samples + audio_->n,
                float32Array.Data());
      ans.Set("samples", float32Array);
      ans.Set("sampleRate", audio_->sample_rate);
      SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_);
    }
    deferred_.Resolve(ans);
  }

 private:
  TSFN tsfn_;
  Napi::Promise::Deferred deferred_;
  const SherpaOnnxOfflineTts *tts_;
  std::string text_;
  SherpaOnnxGenerationConfig gen_config_;
  bool use_external_buffer_;
  const SherpaOnnxGeneratedAudio *audio_;
  std::vector<TtsCallbackData *> data_list_;
};

static Napi::Object OfflineTtsGenerateAsyncWithConfigWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2 || !info[0].IsExternal() || !info[1].IsObject()) {
    Napi::TypeError::New(env, "Expect (External<OfflineTts>, Object)")
        .ThrowAsJavaScriptException();
    return {};
  }

  const SherpaOnnxOfflineTts *tts =
      info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("text") || !obj.Get("text").IsString()) {
    Napi::TypeError::New(env, "obj.text must be a string")
        .ThrowAsJavaScriptException();
    return {};
  }

  std::string text = obj.Get("text").As<Napi::String>().Utf8Value();

  bool enable_external_buffer = true;
  if (obj.Has("enableExternalBuffer") &&
      obj.Get("enableExternalBuffer").IsBoolean()) {
    enable_external_buffer =
        obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
  }

  Napi::Function cb;
  if (obj.Has("callback") && obj.Get("callback").IsFunction()) {
    cb = obj.Get("callback").As<Napi::Function>();
  }

  auto context =
      new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
  TSFN tsfn = TSFN::New(
      env, cb, "TtsGenerateWithConfig", 0, 1, context,
      [](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });

  SherpaOnnxGenerationConfig gen_config;
  memset(&gen_config, 0, sizeof(gen_config));
  if (obj.Has("generationConfig") && obj.Get("generationConfig").IsObject()) {
    gen_config =
        GetGenerationConfig(obj.Get("generationConfig").As<Napi::Object>());
  }

  TtsGenerateWithConfigWorker *worker = new TtsGenerateWithConfigWorker(
      env, tsfn, tts, text, gen_config, enable_external_buffer);
  worker->Queue();
  return worker->Promise();
}

void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createOfflineTts"),
              Napi::Function::New(env, CreateOfflineTtsWrapper));

  exports.Set(Napi::String::New(env, "createOfflineTtsAsync"),
              Napi::Function::New(env, CreateOfflineTtsAsyncWrapper));

  exports.Set(Napi::String::New(env, "getOfflineTtsSampleRate"),
              Napi::Function::New(env, OfflineTtsSampleRateWrapper));

  exports.Set(Napi::String::New(env, "getOfflineTtsNumSpeakers"),
              Napi::Function::New(env, OfflineTtsNumSpeakersWrapper));

  exports.Set(Napi::String::New(env, "offlineTtsGenerate"),
              Napi::Function::New(env, OfflineTtsGenerateWrapper));

  exports.Set(Napi::String::New(env, "offlineTtsGenerateWithConfig"),
              Napi::Function::New(env, OfflineTtsGenerateWithConfigWrapper));

  exports.Set(Napi::String::New(env, "offlineTtsGenerateAsync"),
              Napi::Function::New(env, OfflineTtsGenerateAsyncWrapper));

  exports.Set(
      Napi::String::New(env, "offlineTtsGenerateAsyncWithConfig"),
      Napi::Function::New(env, OfflineTtsGenerateAsyncWithConfigWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/offline-punctuation.cc
================================================
// scripts/node-addon-api/src/offline-punctuation.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <sstream>
#include <string>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static SherpaOnnxOfflinePunctuationModelConfig GetOfflinePunctuationModelConfig(
    Napi::Object obj) {
  SherpaOnnxOfflinePunctuationModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("model").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(ct_transformer, ctTransformer);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }
  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  return c;
}

static Napi::External<SherpaOnnxOfflinePunctuation>
CreateOfflinePunctuationWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "You should pass an object as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxOfflinePunctuationConfig c;
  memset(&c, 0, sizeof(c));
  c.model = GetOfflinePunctuationModelConfig(o);

#if __OHOS__
  const SherpaOnnxOfflinePunctuation *punct = nullptr;

  if (info.Length() == 1 || info[1].IsUndefined() || info[1].IsNull()) {
    punct = SherpaOnnxCreateOfflinePunctuation(&c);
  } else {
    if (!info[1].IsObject()) {
      Napi::TypeError::New(
          env, "You should pass a resource manager as the second argument.")
          .ThrowAsJavaScriptException();

      SHERPA_ONNX_DELETE_C_STR(c.model.ct_transformer);
      SHERPA_ONNX_DELETE_C_STR(c.model.provider);
      return {};
    }

    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
            &OH_ResourceManager_ReleaseNativeResourceManager);

    punct = SherpaOnnxCreateOfflinePunctuationOHOS(&c, mgr.get());
  }
#else
  const SherpaOnnxOfflinePunctuation *punct =
      SherpaOnnxCreateOfflinePunctuation(&c);
#endif

  SHERPA_ONNX_DELETE_C_STR(c.model.ct_transformer);
  SHERPA_ONNX_DELETE_C_STR(c.model.provider);

  if (!punct) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxOfflinePunctuation>::New(
      env, const_cast<SherpaOnnxOfflinePunctuation *>(punct),
      [](Napi::Env env, SherpaOnnxOfflinePunctuation *punct) {
        SherpaOnnxDestroyOfflinePunctuation(punct);
      });
}

static Napi::String OfflinePunctuationAddPunctWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env,
        "You should pass an offline punctuation pointer as the first argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsString()) {
    Napi::TypeError::New(env, "You should pass a string as the second argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOfflinePunctuation *punct =
      info[0].As<Napi::External<SherpaOnnxOfflinePunctuation>>().Data();
  Napi::String js_text = info[1].As<Napi::String>();
  std::string text = js_text.Utf8Value();

  const char *punct_text =
      SherpaOfflinePunctuationAddPunct(punct, text.c_str());

  Napi::String ans = Napi::String::New(env, punct_text);
  SherpaOfflinePunctuationFreeText(punct_text);
  return ans;
}

void InitOfflinePunctuation(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createOfflinePunctuation"),
              Napi::Function::New(env, CreateOfflinePunctuationWrapper));

  exports.Set(Napi::String::New(env, "offlinePunctuationAddPunct"),
              Napi::Function::New(env, OfflinePunctuationAddPunctWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/online-punctuation.cc
================================================
// scripts/node-addon-api/src/online-punctuation.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <sstream>
#include <string>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static SherpaOnnxOnlinePunctuationModelConfig GetOnlinePunctuationModelConfig(
    Napi::Object obj) {
  SherpaOnnxOnlinePunctuationModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("model").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(cnn_bilstm, cnnBilstm);

  SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }
  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  return c;
}

static Napi::External<SherpaOnnxOnlinePunctuation>
CreateOnlinePunctuationWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "You should pass an object as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

#if __OHOS__
  bool use_resource_manager =
      info.Length() == 2 && !info[1].IsUndefined() && !info[1].IsNull();
  if (use_resource_manager && !info[1].IsObject()) {
    Napi::TypeError::New(
        env, "You should pass a resource manager as the second argument.")
        .ThrowAsJavaScriptException();

    return {};
  }
#endif

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxOnlinePunctuationConfig c;
  memset(&c, 0, sizeof(c));
  c.model = GetOnlinePunctuationModelConfig(o);

#if __OHOS__
  const SherpaOnnxOnlinePunctuation *punct = nullptr;

  if (use_resource_manager) {
    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
            &OH_ResourceManager_ReleaseNativeResourceManager);

    punct = SherpaOnnxCreateOnlinePunctuationOHOS(&c, mgr.get());
  } else {
    punct = SherpaOnnxCreateOnlinePunctuation(&c);
  }
#else
  const SherpaOnnxOnlinePunctuation *punct =
      SherpaOnnxCreateOnlinePunctuation(&c);
#endif

  SHERPA_ONNX_DELETE_C_STR(c.model.cnn_bilstm);
  SHERPA_ONNX_DELETE_C_STR(c.model.bpe_vocab);
  SHERPA_ONNX_DELETE_C_STR(c.model.provider);

  if (!punct) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxOnlinePunctuation>::New(
      env, const_cast<SherpaOnnxOnlinePunctuation *>(punct),
      [](Napi::Env env, SherpaOnnxOnlinePunctuation *punct) {
        SherpaOnnxDestroyOnlinePunctuation(punct);
      });
}

static Napi::String OnlinePunctuationAddPunctWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env,
        "You should pass an online punctuation pointer as the first argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsString()) {
    Napi::TypeError::New(env, "You should pass a string as the second argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOnlinePunctuation *punct =
      info[0].As<Napi::External<SherpaOnnxOnlinePunctuation>>().Data();
  Napi::String js_text = info[1].As<Napi::String>();
  std::string text = js_text.Utf8Value();

  const char *punct_text =
      SherpaOnnxOnlinePunctuationAddPunct(punct, text.c_str());

  Napi::String ans = Napi::String::New(env, punct_text);
  SherpaOnnxOnlinePunctuationFreeText(punct_text);
  return ans;
}

void InitOnlinePunctuation(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createOnlinePunctuation"),
              Napi::Function::New(env, CreateOnlinePunctuationWrapper));

  exports.Set(Napi::String::New(env, "onlinePunctuationAddPunct"),
              Napi::Function::New(env, OnlinePunctuationAddPunctWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc
================================================
// scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "napi.h"  // NOLINT

void InitStreamingAsr(Napi::Env env, Napi::Object exports);

void InitNonStreamingAsr(Napi::Env env, Napi::Object exports);

void InitNonStreamingTts(Napi::Env env, Napi::Object exports);

void InitVad(Napi::Env env, Napi::Object exports);

void InitWaveReader(Napi::Env env, Napi::Object exports);

void InitWaveWriter(Napi::Env env, Napi::Object exports);

void InitSpokenLanguageID(Napi::Env env, Napi::Object exports);

void InitSpeakerID(Napi::Env env, Napi::Object exports);

void InitAudioTagging(Napi::Env env, Napi::Object exports);

void InitOfflinePunctuation(Napi::Env env, Napi::Object exports);

void InitOnlinePunctuation(Napi::Env env, Napi::Object exports);

void InitKeywordSpotting(Napi::Env env, Napi::Object exports);

void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports);

void InitNonStreamingSpeechDenoiser(Napi::Env env, Napi::Object exports);

void InitOnlineSpeechDenoiser(Napi::Env env, Napi::Object exports);

void InitVersion(Napi::Env env, Napi::Object exports);

#if __OHOS__
void InitUtils(Napi::Env env, Napi::Object exports);
#endif

Napi::Object Init(Napi::Env env, Napi::Object exports) {
  InitStreamingAsr(env, exports);
  InitNonStreamingAsr(env, exports);
  InitNonStreamingTts(env, exports);
  InitVad(env, exports);
  InitWaveReader(env, exports);
  InitWaveWriter(env, exports);
  InitSpokenLanguageID(env, exports);
  InitSpeakerID(env, exports);
  InitAudioTagging(env, exports);
  InitOfflinePunctuation(env, exports);
  InitOnlinePunctuation(env, exports);
  InitKeywordSpotting(env, exports);
  InitNonStreamingSpeakerDiarization(env, exports);
  InitNonStreamingSpeechDenoiser(env, exports);
  InitOnlineSpeechDenoiser(env, exports);
  InitVersion(env, exports);

#if __OHOS__
  InitUtils(env, exports);
#endif

  return exports;
}

#if __OHOS__
NODE_API_MODULE(sherpa_onnx, Init)
#else
NODE_API_MODULE(addon, Init)
#endif


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc
================================================
// scripts/node-addon-api/src/speaker-identification.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <algorithm>
#include <memory>
#include <sstream>
#include <string>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>
CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

#if __OHOS__
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "You should pass an object as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

#if __OHOS__
  bool use_resource_manager =
      info.Length() == 2 && !info[1].IsUndefined() && !info[1].IsNull();
  if (use_resource_manager && !info[1].IsObject()) {
    Napi::TypeError::New(
        env, "You should pass a resource manager as the second argument.")
        .ThrowAsJavaScriptException();

    return {};
  }
#endif

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxSpeakerEmbeddingExtractorConfig c;
  memset(&c, 0, sizeof(c));

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }

  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

#if __OHOS__
  const SherpaOnnxSpeakerEmbeddingExtractor *extractor = nullptr;

  if (use_resource_manager) {
    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
            &OH_ResourceManager_ReleaseNativeResourceManager);

    extractor = SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(&c, mgr.get());
  } else {
    extractor = SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
  }
#else
  const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
      SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
#endif
  SHERPA_ONNX_DELETE_C_STR(c.model);
  SHERPA_ONNX_DELETE_C_STR(c.provider);

  if (!extractor) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>::New(
      env, const_cast<SherpaOnnxSpeakerEmbeddingExtractor *>(extractor),
      [](Napi::Env env, SherpaOnnxSpeakerEmbeddingExtractor *extractor) {
        SherpaOnnxDestroySpeakerEmbeddingExtractor(extractor);
      });
}

static Napi::Number SpeakerEmbeddingExtractorDimWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be a speaker embedding extractor pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();

  int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);

  return Napi::Number::New(env, dim);
}

static Napi::External<SherpaOnnxOnlineStream>
SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding extractor "
                         "pointer as the only argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxSpeakerEmbeddingExtractorCreateStream(extractor);

  return Napi::External<SherpaOnnxOnlineStream>::New(
      env, const_cast<SherpaOnnxOnlineStream *>(stream),
      [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
        SherpaOnnxDestroyOnlineStream(stream);
      });
}

static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be a speaker embedding extractor pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  int32_t is_ready =
      SherpaOnnxSpeakerEmbeddingExtractorIsReady(extractor, stream);

  return Napi::Boolean::New(env, is_ready);
}

static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2 && info.Length() != 3) {
    std::ostringstream os;
    os << "Expect only 2 or 3 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be a speaker embedding extractor pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  bool enable_external_buffer = true;
  if (info.Length() == 3) {
    if (info[2].IsBoolean()) {
      enable_external_buffer = info[2].As<Napi::Boolean>().Value();
    } else {
      Napi::TypeError::New(env, "Argument 2 should be a boolean.")
          .ThrowAsJavaScriptException();
    }
  }

  const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  const float *v =
      SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(extractor, stream);

  int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);

  if (enable_external_buffer) {
    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
        env, const_cast<float *>(v), sizeof(float) * dim,
        [](Napi::Env /*env*/, void *data) {
          SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
              reinterpret_cast<float *>(data));
        });

    return Napi::Float32Array::New(env, dim, arrayBuffer, 0);
  } else {
    // don't use external buffer
    Napi::ArrayBuffer arrayBuffer =
        Napi::ArrayBuffer::New(env, sizeof(float) * dim);

    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, dim, arrayBuffer, 0);

    std::copy(v, v + dim, float32Array.Data());

    SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v);

    return float32Array;
  }
}

static Napi::External<SherpaOnnxSpeakerEmbeddingManager>
CreateSpeakerEmbeddingManagerWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsNumber()) {
    Napi::TypeError::New(env,
                         "You should pass an integer as the only argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  int32_t dim = info[0].As<Napi::Number>().Int32Value();

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      SherpaOnnxCreateSpeakerEmbeddingManager(dim);

  if (!manager) {
    Napi::TypeError::New(env, "Please check your input dim!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxSpeakerEmbeddingManager>::New(
      env, const_cast<SherpaOnnxSpeakerEmbeddingManager *>(manager),
      [](Napi::Env env, SherpaOnnxSpeakerEmbeddingManager *manager) {
        SherpaOnnxDestroySpeakerEmbeddingManager(manager);
      });
}

static Napi::Boolean SpeakerEmbeddingManagerAddWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding manager pointer "
                         "as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("v")) {
    Napi::TypeError::New(env, "The argument object should have a field v")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("v").IsTypedArray()) {
    Napi::TypeError::New(env, "The object['v'] should be a typed array")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("name")) {
    Napi::TypeError::New(env, "The argument object should have a field name")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("name").IsString()) {
    Napi::TypeError::New(env, "The object['name'] should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
  Napi::String js_name = obj.Get("name").As<Napi::String>();
  std::string name = js_name.Utf8Value();

  int32_t ok =
      SherpaOnnxSpeakerEmbeddingManagerAdd(manager, name.c_str(), v.Data());
  return Napi::Boolean::New(env, ok);
}

static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding manager pointer "
                         "as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("vv")) {
    Napi::TypeError::New(env, "The argument object should have a field vv")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("vv").IsTypedArray()) {
    Napi::TypeError::New(env, "The object['vv'] should be a typed array")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("name")) {
    Napi::TypeError::New(env, "The argument object should have a field name")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("name").IsString()) {
    Napi::TypeError::New(env, "The object['name'] should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("n")) {
    Napi::TypeError::New(env, "The argument object should have a field n")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("n").IsNumber()) {
    Napi::TypeError::New(env, "The object['n'] should be an integer")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Float32Array v = obj.Get("vv").As<Napi::Float32Array>();
  Napi::String js_name = obj.Get("name").As<Napi::String>();
  int32_t n = obj.Get("n").As<Napi::Number>().Int32Value();

  std::string name = js_name.Utf8Value();

  int32_t ok = SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
      manager, name.c_str(), v.Data(), n);

  return Napi::Boolean::New(env, ok);
}

static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding manager pointer "
                         "as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsString()) {
    Napi::TypeError::New(env, "Argument 1 should be string")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();

  Napi::String js_name = info[1].As<Napi::String>();
  std::string name = js_name.Utf8Value();

  int32_t ok = SherpaOnnxSpeakerEmbeddingManagerRemove(manager, name.c_str());

  return Napi::Boolean::New(env, ok);
}

static Napi::String SpeakerEmbeddingManagerSearchWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding manager pointer "
                         "as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("v")) {
    Napi::TypeError::New(env, "The argument object should have a field v")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("v").IsTypedArray()) {
    Napi::TypeError::New(env, "The object['v'] should be a typed array")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("threshold")) {
    Napi::TypeError::New(env,
                         "The argument object should have a field threshold")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("threshold").IsNumber()) {
    Napi::TypeError::New(env, "The object['threshold'] should be a float")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
  float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();

  const char *name =
      SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v.Data(), threshold);
  const char *p = name;
  if (!p) {
    p = "";
  }

  Napi::String js_name = Napi::String::New(env, p);
  SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name);

  return js_name;
}

static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding manager pointer "
                         "as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("v")) {
    Napi::TypeError::New(env, "The argument object should have a field v")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("v").IsTypedArray()) {
    Napi::TypeError::New(env, "The object['v'] should be a typed array")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("threshold")) {
    Napi::TypeError::New(env,
                         "The argument object should have a field threshold")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("threshold").IsNumber()) {
    Napi::TypeError::New(env, "The object['threshold'] should be a float")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("name")) {
    Napi::TypeError::New(env, "The argument object should have a field name")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("name").IsString()) {
    Napi::TypeError::New(env, "The object['name'] should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
  float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();

  Napi::String js_name = obj.Get("name").As<Napi::String>();
  std::string name = js_name.Utf8Value();

  int32_t found = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, name.c_str(),
                                                          v.Data(), threshold);

  return Napi::Boolean::New(env, found);
}

static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding manager pointer "
                         "as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsString()) {
    Napi::TypeError::New(env, "Argument 1 should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();

  Napi::String js_name = info[1].As<Napi::String>();
  std::string name = js_name.Utf8Value();

  int32_t exists =
      SherpaOnnxSpeakerEmbeddingManagerContains(manager, name.c_str());

  return Napi::Boolean::New(env, exists);
}

static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding manager pointer "
                         "as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();

  int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);

  return Napi::Number::New(env, num_speakers);
}

static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "You should pass a speaker embedding manager pointer "
                         "as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpeakerEmbeddingManager *manager =
      info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();

  int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
  if (num_speakers == 0) {
    return Napi::Array::New(env, num_speakers);
  }

  const char *const *all_speaker_names =
      SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);

  Napi::Array ans = Napi::Array::New(env, num_speakers);
  for (uint32_t i = 0; i != num_speakers; ++i) {
    // ans[i] = Napi::String::New(env, all_speaker_names[i]); // see #2120
    ans.Set(i, Napi::String::New(env, all_speaker_names[i]));
  }
  SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speaker_names);
  return ans;
}

void InitSpeakerID(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createSpeakerEmbeddingExtractor"),
              Napi::Function::New(env, CreateSpeakerEmbeddingExtractorWrapper));

  exports.Set(Napi::String::New(env, "speakerEmbeddingExtractorDim"),
              Napi::Function::New(env, SpeakerEmbeddingExtractorDimWrapper));

  exports.Set(
      Napi::String::New(env, "speakerEmbeddingExtractorCreateStream"),
      Napi::Function::New(env, SpeakerEmbeddingExtractorCreateStreamWrapper));

  exports.Set(
      Napi::String::New(env, "speakerEmbeddingExtractorIsReady"),
      Napi::Function::New(env, SpeakerEmbeddingExtractorIsReadyWrapper));

  exports.Set(
      Napi::String::New(env, "speakerEmbeddingExtractorComputeEmbedding"),
      Napi::Function::New(env,
                          SpeakerEmbeddingExtractorComputeEmbeddingWrapper));

  exports.Set(Napi::String::New(env, "createSpeakerEmbeddingManager"),
              Napi::Function::New(env, CreateSpeakerEmbeddingManagerWrapper));

  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerAdd"),
              Napi::Function::New(env, SpeakerEmbeddingManagerAddWrapper));

  exports.Set(
      Napi::String::New(env, "speakerEmbeddingManagerAddListFlattened"),
      Napi::Function::New(env, SpeakerEmbeddingManagerAddListFlattenedWrapper));

  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerRemove"),
              Napi::Function::New(env, SpeakerEmbeddingManagerRemoveWrapper));

  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerSearch"),
              Napi::Function::New(env, SpeakerEmbeddingManagerSearchWrapper));

  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerVerify"),
              Napi::Function::New(env, SpeakerEmbeddingManagerVerifyWrapper));

  exports.Set(Napi::String::New(env, "speakerEmbeddingManagerContains"),
              Napi::Function::New(env, SpeakerEmbeddingManagerContainsWrapper));

  exports.Set(
      Napi::String::New(env, "speakerEmbeddingManagerNumSpeakers"),
      Napi::Function::New(env, SpeakerEmbeddingManagerNumSpeakersWrapper));

  exports.Set(
      Napi::String::New(env, "speakerEmbeddingManagerGetAllSpeakers"),
      Napi::Function::New(env, SpeakerEmbeddingManagerGetAllSpeakersWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speech-denoiser.h
================================================
// scripts/node-addon-api/src/speech-denoiser.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_HARMONY_OS_SHERPAONNXHAR_SHERPA_ONNX_SRC_MAIN_CPP_SPEECH_DENOISER_H_
#define SHERPA_ONNX_HARMONY_OS_SHERPAONNXHAR_SHERPA_ONNX_SRC_MAIN_CPP_SPEECH_DENOISER_H_

#include <algorithm>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static inline SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig
GetSpeechDenoiserGtcrnModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("gtcrn") || !obj.Get("gtcrn").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("gtcrn").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  return c;
}

static inline SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig
GetSpeechDenoiserDpdfNetModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("dpdfnet") || !obj.Get("dpdfnet").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("dpdfnet").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  return c;
}

static inline SherpaOnnxOfflineSpeechDenoiserModelConfig
GetSpeechDenoiserModelConfig(Napi::Object obj) {
  SherpaOnnxOfflineSpeechDenoiserModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("model") || !obj.Get("model").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("model").As<Napi::Object>();
  c.gtcrn = GetSpeechDenoiserGtcrnModelConfig(o);
  c.dpdfnet = GetSpeechDenoiserDpdfNetModelConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }

  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  return c;
}

static inline void DeleteSpeechDenoiserModelConfig(
    const SherpaOnnxOfflineSpeechDenoiserModelConfig &c) {
  SHERPA_ONNX_DELETE_C_STR(c.gtcrn.model);
  SHERPA_ONNX_DELETE_C_STR(c.provider);
  SHERPA_ONNX_DELETE_C_STR(c.dpdfnet.model);
}

static inline bool GetEnableExternalBuffer(Napi::Object obj) {
  if (obj.Has("enableExternalBuffer") &&
      obj.Get("enableExternalBuffer").IsBoolean()) {
    return obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
  }

  return true;
}

static inline int32_t GetFloat32ArrayElementLength(Napi::Float32Array samples) {
#if __OHOS__
  return samples.ElementLength() / sizeof(float);
#else
  return samples.ElementLength();
#endif
}

static inline Napi::Object CreateDenoisedAudioObject(
    Napi::Env env, const SherpaOnnxDenoisedAudio *audio,
    bool enable_external_buffer) {
  Napi::Object ans = Napi::Object::New(env);

  if (!audio) {
    ans.Set(Napi::String::New(env, "samples"), Napi::Float32Array::New(env, 0));
    ans.Set(Napi::String::New(env, "sampleRate"), 0);
    return ans;
  }

  if (enable_external_buffer) {
    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
        env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
        [](Napi::Env /*env*/, void * /*data*/,
           const SherpaOnnxDenoisedAudio *hint) {
          SherpaOnnxDestroyDenoisedAudio(hint);
        },
        audio);
    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
    ans.Set(Napi::String::New(env, "samples"), float32Array);
    ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
    return ans;
  }

  Napi::ArrayBuffer arrayBuffer =
      Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);
  Napi::Float32Array float32Array =
      Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);

  if (audio->n > 0 && audio->samples) {
    std::copy(audio->samples, audio->samples + audio->n, float32Array.Data());
  }

  ans.Set(Napi::String::New(env, "samples"), float32Array);
  ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
  SherpaOnnxDestroyDenoisedAudio(audio);
  return ans;
}

#endif  // SHERPA_ONNX_HARMONY_OS_SHERPAONNXHAR_SHERPA_ONNX_SRC_MAIN_CPP_SPEECH_DENOISER_H_


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc
================================================
// scripts/node-addon-api/src/spoken-language-identification.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <sstream>
#include <string>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static SherpaOnnxSpokenLanguageIdentificationWhisperConfig
GetSpokenLanguageIdentificationWhisperConfig(Napi::Object obj) {
  SherpaOnnxSpokenLanguageIdentificationWhisperConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("whisper") || !obj.Get("whisper").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("whisper").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(tail_paddings, tailPaddings);

  return c;
}

static Napi::External<SherpaOnnxSpokenLanguageIdentification>
CreateSpokenLanguageIdentificationWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "You should pass an object as the only argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxSpokenLanguageIdentificationConfig c;
  memset(&c, 0, sizeof(c));
  c.whisper = GetSpokenLanguageIdentificationWhisperConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }
  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  const SherpaOnnxSpokenLanguageIdentification *slid =
      SherpaOnnxCreateSpokenLanguageIdentification(&c);

  SHERPA_ONNX_DELETE_C_STR(c.whisper.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.whisper.decoder);
  SHERPA_ONNX_DELETE_C_STR(c.provider);

  if (!slid) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxSpokenLanguageIdentification>::New(
      env, const_cast<SherpaOnnxSpokenLanguageIdentification *>(slid),
      [](Napi::Env env, SherpaOnnxSpokenLanguageIdentification *slid) {
        SherpaOnnxDestroySpokenLanguageIdentification(slid);
      });
}

static Napi::External<SherpaOnnxOfflineStream>
SpokenLanguageIdentificationCreateOfflineStreamWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env,
        "You should pass an offline language ID pointer as the only argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpokenLanguageIdentification *slid =
      info[0]
          .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>()
          .Data();

  const SherpaOnnxOfflineStream *stream =
      SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);

  return Napi::External<SherpaOnnxOfflineStream>::New(
      env, const_cast<SherpaOnnxOfflineStream *>(stream),
      [](Napi::Env env, SherpaOnnxOfflineStream *stream) {
        SherpaOnnxDestroyOfflineStream(stream);
      });
}

static Napi::String SpokenLanguageIdentificationComputeWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env, "Argument 0 should be an offline spoken language ID pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an offline stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxSpokenLanguageIdentification *slid =
      info[0]
          .As<Napi::External<SherpaOnnxSpokenLanguageIdentification>>()
          .Data();

  const SherpaOnnxOfflineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();

  const SherpaOnnxSpokenLanguageIdentificationResult *r =
      SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);

  std::string lang = r->lang;
  SherpaOnnxDestroySpokenLanguageIdentificationResult(r);

  return Napi::String::New(env, lang);
}

void InitSpokenLanguageID(Napi::Env env, Napi::Object exports) {
  exports.Set(
      Napi::String::New(env, "createSpokenLanguageIdentification"),
      Napi::Function::New(env, CreateSpokenLanguageIdentificationWrapper));

  exports.Set(
      Napi::String::New(env, "createSpokenLanguageIdentificationOfflineStream"),
      Napi::Function::New(
          env, SpokenLanguageIdentificationCreateOfflineStreamWrapper));

  exports.Set(
      Napi::String::New(env, "spokenLanguageIdentificationCompute"),
      Napi::Function::New(env, SpokenLanguageIdentificationComputeWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc
================================================
// scripts/node-addon-api/src/streaming-asr.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <memory>
#include <sstream>
#include <string>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
/*
{
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  }
};
 */
SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj) {
  SherpaOnnxFeatureConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("featConfig") || !obj.Get("featConfig").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("featConfig").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_INT32(sample_rate, sampleRate);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(feature_dim, featureDim);

  return c;
}
/*
{
  'transducer': {
    'encoder': './encoder.onnx',
    'decoder': './decoder.onnx',
    'joiner': './joiner.onnx',
  }
}
 */

static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig(
    Napi::Object obj) {
  SherpaOnnxOnlineTransducerModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("transducer") || !obj.Get("transducer").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("transducer").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(joiner, joiner);

  return c;
}

static SherpaOnnxOnlineZipformer2CtcModelConfig
GetOnlineZipformer2CtcModelConfig(Napi::Object obj) {
  SherpaOnnxOnlineZipformer2CtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("zipformer2Ctc") || !obj.Get("zipformer2Ctc").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("zipformer2Ctc").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOnlineNemoCtcModelConfig GetOnlineNemoCtcModelConfig(
    Napi::Object obj) {
  SherpaOnnxOnlineNemoCtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("nemoCtc") || !obj.Get("nemoCtc").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("nemoCtc").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOnlineToneCtcModelConfig GetOnlineToneCtcModelConfig(
    Napi::Object obj) {
  SherpaOnnxOnlineToneCtcModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("toneCtc") || !obj.Get("toneCtc").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("toneCtc").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);

  return c;
}

static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig(
    Napi::Object obj) {
  SherpaOnnxOnlineParaformerModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("paraformer") || !obj.Get("paraformer").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("paraformer").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder);
  SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder);

  return c;
}

SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
  SherpaOnnxOnlineModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("modelConfig") || !obj.Get("modelConfig").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();

  c.transducer = GetOnlineTransducerModelConfig(o);
  c.paraformer = GetOnlineParaformerModelConfig(o);
  c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o);
  c.nemo_ctc = GetOnlineNemoCtcModelConfig(o);
  c.t_one_ctc = GetOnlineToneCtcModelConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }

  SHERPA_ONNX_ASSIGN_ATTR_STR(model_type, modelType);
  SHERPA_ONNX_ASSIGN_ATTR_STR(modeling_unit, modelingUnit);
  SHERPA_ONNX_ASSIGN_ATTR_STR(bpe_vocab, bpeVocab);
  SHERPA_ONNX_ASSIGN_ATTR_STR(tokens_buf, tokensBuf);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(tokens_buf_size, tokensBufSize);

  return c;
}

static SherpaOnnxOnlineCtcFstDecoderConfig GetCtcFstDecoderConfig(
    Napi::Object obj) {
  SherpaOnnxOnlineCtcFstDecoderConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("ctcFstDecoderConfig") ||
      !obj.Get("ctcFstDecoderConfig").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("ctcFstDecoderConfig").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(graph, graph);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active, maxActive);

  return c;
}

// Also used in ./non-streaming-asr.cc
SherpaOnnxHomophoneReplacerConfig GetHomophoneReplacerConfig(Napi::Object obj) {
  SherpaOnnxHomophoneReplacerConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("hr") || !obj.Get("hr").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("hr").As<Napi::Object>();

  SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);

  return c;
}

static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the argument")
        .ThrowAsJavaScriptException();

    return {};
  }

#if __OHOS__
  bool use_resource_manager =
      info.Length() == 2 && !info[1].IsUndefined() && !info[1].IsNull();
  if (use_resource_manager && !info[1].IsObject()) {
    Napi::TypeError::New(
        env, "You should pass a resource manager as the second argument.")
        .ThrowAsJavaScriptException();

    return {};
  }
#endif

  Napi::Object o = info[0].As<Napi::Object>();
  SherpaOnnxOnlineRecognizerConfig c;
  memset(&c, 0, sizeof(c));
  c.feat_config = GetFeatureConfig(o);
  c.model_config = GetOnlineModelConfig(o);
  c.hr = GetHomophoneReplacerConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);

  // enableEndpoint can be either a boolean or an integer
  if (o.Has("enableEndpoint") && (o.Get("enableEndpoint").IsNumber() ||
                                  o.Get("enableEndpoint").IsBoolean())) {
    if (o.Get("enableEndpoint").IsNumber()) {
      c.enable_endpoint =
          o.Get("enableEndpoint").As<Napi::Number>().Int32Value();
    } else {
      c.enable_endpoint = o.Get("enableEndpoint").As<Napi::Boolean>().Value();
    }
  }

  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule1_min_trailing_silence,
                                rule1MinTrailingSilence);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule2_min_trailing_silence,
                                rule2MinTrailingSilence);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(rule3_min_utterance_length,
                                rule3MinUtteranceLength);
  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
  SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
  SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_buf, hotwordsBuf);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(hotwords_buf_size, hotwordsBufSize);

  c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o);

#if __OHOS__
  const SherpaOnnxOnlineRecognizer *recognizer = nullptr;

  if (use_resource_manager) {
    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
            &OH_ResourceManager_ReleaseNativeResourceManager);

    recognizer = SherpaOnnxCreateOnlineRecognizerOHOS(&c, mgr.get());
  } else {
    recognizer = SherpaOnnxCreateOnlineRecognizer(&c);
  }
#else
  const SherpaOnnxOnlineRecognizer *recognizer =
      SherpaOnnxCreateOnlineRecognizer(&c);
#endif
  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder);

  SHERPA_ONNX_DELETE_C_STR(c.model_config.t_one_ctc.model);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab);
  SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens_buf);
  SHERPA_ONNX_DELETE_C_STR(c.decoding_method);
  SHERPA_ONNX_DELETE_C_STR(c.hotwords_file);
  SHERPA_ONNX_DELETE_C_STR(c.rule_fsts);
  SHERPA_ONNX_DELETE_C_STR(c.rule_fars);
  SHERPA_ONNX_DELETE_C_STR(c.hotwords_buf);
  SHERPA_ONNX_DELETE_C_STR(c.ctc_fst_decoder_config.graph);

  SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon);
  SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts);

  if (!recognizer) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();

    return {};
  }

  return Napi::External<SherpaOnnxOnlineRecognizer>::New(
      env, const_cast<SherpaOnnxOnlineRecognizer *>(recognizer),
      [](Napi::Env env, SherpaOnnxOnlineRecognizer *recognizer) {
        SherpaOnnxDestroyOnlineRecognizer(recognizer);
      });
}

static Napi::External<SherpaOnnxOnlineStream> CreateOnlineStreamWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(
        env,
        "You should pass an online recognizer pointer as the only argument")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOnlineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();

  const SherpaOnnxOnlineStream *stream =
      SherpaOnnxCreateOnlineStream(recognizer);

  return Napi::External<SherpaOnnxOnlineStream>::New(
      env, const_cast<SherpaOnnxOnlineStream *>(stream),
      [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
        SherpaOnnxDestroyOnlineStream(stream);
      });
}

static void AcceptWaveformWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxOnlineStream *stream =
      info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return;
  }

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("samples")) {
    Napi::TypeError::New(env, "The argument object should have a field samples")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!obj.Get("samples").IsTypedArray()) {
    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!obj.Has("sampleRate")) {
    Napi::TypeError::New(env,
                         "The argument object should have a field sampleRate")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!obj.Get("sampleRate").IsNumber()) {
    Napi::TypeError::New(env, "The object['samples'] should be a number")
        .ThrowAsJavaScriptException();

    return;
  }

  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();

#if __OHOS__
  SherpaOnnxOnlineStreamAcceptWaveform(stream, sample_rate, samples.Data(),
                                       samples.ElementLength() / sizeof(float));
#else
  SherpaOnnxOnlineStreamAcceptWaveform(stream, sample_rate, samples.Data(),
                                       samples.ElementLength());
#endif
}

static Napi::Boolean IsOnlineStreamReadyWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "Argument 0 should be an online recognizer pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOnlineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  int32_t is_ready = SherpaOnnxIsOnlineStreamReady(recognizer, stream);

  return Napi::Boolean::New(env, is_ready);
}

static void DecodeOnlineStreamWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "Argument 0 should be an online recognizer pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxOnlineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  SherpaOnnxDecodeOnlineStream(recognizer, stream);
}

static Napi::String GetOnlineStreamResultAsJsonWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "Argument 0 should be an online recognizer pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOnlineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  const char *json = SherpaOnnxGetOnlineStreamResultAsJson(recognizer, stream);
  Napi::String s = Napi::String::New(env, json);

  SherpaOnnxDestroyOnlineStreamResultJson(json);

  return s;
}

static void InputFinishedWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxOnlineStream *stream =
      info[0].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  SherpaOnnxOnlineStreamInputFinished(stream);
}

static void ResetOnlineStreamWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "Argument 0 should be an online recognizer pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxOnlineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  SherpaOnnxOnlineStreamReset(recognizer, stream);
}

static Napi::Boolean IsEndpointWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env,
                         "Argument 0 should be an online recognizer pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsExternal()) {
    Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxOnlineRecognizer *recognizer =
      info[0].As<Napi::External<SherpaOnnxOnlineRecognizer>>().Data();

  const SherpaOnnxOnlineStream *stream =
      info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();

  int32_t is_endpoint = SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream);

  return Napi::Boolean::New(env, is_endpoint);
}

static Napi::External<SherpaOnnxDisplay> CreateDisplayWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsNumber()) {
    Napi::TypeError::New(env, "Expect a number as the argument")
        .ThrowAsJavaScriptException();

    return {};
  }
  int32_t max_word_per_line = info[0].As<Napi::Number>().Int32Value();

  const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(max_word_per_line);

  return Napi::External<SherpaOnnxDisplay>::New(
      env, const_cast<SherpaOnnxDisplay *>(display),
      [](Napi::Env env, SherpaOnnxDisplay *display) {
        SherpaOnnxDestroyDisplay(display);
      });
}

static void PrintWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 3) {
    std::ostringstream os;
    os << "Expect only 3 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an online stream pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!info[1].IsNumber()) {
    Napi::TypeError::New(env, "Argument 1 should be a number.")
        .ThrowAsJavaScriptException();

    return;
  }

  if (!info[2].IsString()) {
    Napi::TypeError::New(env, "Argument 2 should be a string.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxDisplay *display =
      info[0].As<Napi::External<SherpaOnnxDisplay>>().Data();

  int32_t idx = info[1].As<Napi::Number>().Int32Value();

  Napi::String text = info[2].As<Napi::String>();
  std::string s = text.Utf8Value();
  SherpaOnnxPrint(display, idx, s.c_str());
}

void InitStreamingAsr(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createOnlineRecognizer"),
              Napi::Function::New(env, CreateOnlineRecognizerWrapper));

  exports.Set(Napi::String::New(env, "createOnlineStream"),
              Napi::Function::New(env, CreateOnlineStreamWrapper));

  exports.Set(Napi::String::New(env, "acceptWaveformOnline"),
              Napi::Function::New(env, AcceptWaveformWrapper));

  exports.Set(Napi::String::New(env, "isOnlineStreamReady"),
              Napi::Function::New(env, IsOnlineStreamReadyWrapper));

  exports.Set(Napi::String::New(env, "decodeOnlineStream"),
              Napi::Function::New(env, DecodeOnlineStreamWrapper));

  exports.Set(Napi::String::New(env, "getOnlineStreamResultAsJson"),
              Napi::Function::New(env, GetOnlineStreamResultAsJsonWrapper));

  exports.Set(Napi::String::New(env, "inputFinished"),
              Napi::Function::New(env, InputFinishedWrapper));

  exports.Set(Napi::String::New(env, "reset"),
              Napi::Function::New(env, ResetOnlineStreamWrapper));

  exports.Set(Napi::String::New(env, "isEndpoint"),
              Napi::Function::New(env, IsEndpointWrapper));

  exports.Set(Napi::String::New(env, "createDisplay"),
              Napi::Function::New(env, CreateDisplayWrapper));

  exports.Set(Napi::String::New(env, "print"),
              Napi::Function::New(env, PrintWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-speech-denoiser.cc
================================================
// scripts/node-addon-api/src/streaming-speech-denoiser.cc
//
// Copyright (c)  2026  Xiaomi Corporation
#include <memory>
#include <sstream>

#include "napi.h"  // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
#include "speech-denoiser.h"  // NOLINT

static Napi::External<SherpaOnnxOnlineSpeechDenoiser>
CreateOnlineSpeechDenoiserWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
    return {};
  }
#else
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env, "Expect an object as the argument")
        .ThrowAsJavaScriptException();
    return {};
  }

  SherpaOnnxOnlineSpeechDenoiserConfig c;
  memset(&c, 0, sizeof(c));
  c.model = GetSpeechDenoiserModelConfig(info[0].As<Napi::Object>());

#if __OHOS__
  std::unique_ptr<NativeResourceManager,
                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
          &OH_ResourceManager_ReleaseNativeResourceManager);

  const SherpaOnnxOnlineSpeechDenoiser *sd =
      SherpaOnnxCreateOnlineSpeechDenoiserOHOS(&c, mgr.get());
#else
  const SherpaOnnxOnlineSpeechDenoiser *sd =
      SherpaOnnxCreateOnlineSpeechDenoiser(&c);
#endif

  DeleteSpeechDenoiserModelConfig(c.model);

  if (!sd) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();
    return {};
  }

  return Napi::External<SherpaOnnxOnlineSpeechDenoiser>::New(
      env, const_cast<SherpaOnnxOnlineSpeechDenoiser *>(sd),
      [](Napi::Env /*env*/, SherpaOnnxOnlineSpeechDenoiser *sd) {
        SherpaOnnxDestroyOnlineSpeechDenoiser(sd);
      });
}

static Napi::Object OnlineSpeechDenoiserRunWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 2 || !info[0].IsExternal() || !info[1].IsObject()) {
    Napi::TypeError::New(env, "Expect a denoiser handle and an audio object")
        .ThrowAsJavaScriptException();
    return {};
  }

  const SherpaOnnxOnlineSpeechDenoiser *sd =
      info[0].As<Napi::External<SherpaOnnxOnlineSpeechDenoiser>>().Data();
  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("samples") || !obj.Get("samples").IsTypedArray()) {
    Napi::TypeError::New(
        env, "The argument object should have a typed array field samples")
        .ThrowAsJavaScriptException();
    return {};
  }

  if (!obj.Has("sampleRate") || !obj.Get("sampleRate").IsNumber()) {
    Napi::TypeError::New(
        env, "The argument object should have a number field sampleRate")
        .ThrowAsJavaScriptException();
    return {};
  }

  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
  const SherpaOnnxDenoisedAudio *audio = SherpaOnnxOnlineSpeechDenoiserRun(
      sd, samples.Data(), GetFloat32ArrayElementLength(samples), sample_rate);
  return CreateDenoisedAudioObject(env, audio, GetEnableExternalBuffer(obj));
}

static Napi::Object OnlineSpeechDenoiserFlushWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() < 1 || !info[0].IsExternal()) {
    Napi::TypeError::New(env, "Expect an online speech denoiser pointer.")
        .ThrowAsJavaScriptException();
    return {};
  }

  bool enable_external_buffer = true;
  if (info.Length() > 1 && info[1].IsBoolean()) {
    enable_external_buffer = info[1].As<Napi::Boolean>().Value();
  }

  const SherpaOnnxOnlineSpeechDenoiser *sd =
      info[0].As<Napi::External<SherpaOnnxOnlineSpeechDenoiser>>().Data();
  const SherpaOnnxDenoisedAudio *audio =
      SherpaOnnxOnlineSpeechDenoiserFlush(sd);
  return CreateDenoisedAudioObject(env, audio, enable_external_buffer);
}

static void OnlineSpeechDenoiserResetWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1 || !info[0].IsExternal()) {
    Napi::TypeError::New(env, "Expect an online speech denoiser pointer.")
        .ThrowAsJavaScriptException();
    return;
  }

  const SherpaOnnxOnlineSpeechDenoiser *sd =
      info[0].As<Napi::External<SherpaOnnxOnlineSpeechDenoiser>>().Data();
  SherpaOnnxOnlineSpeechDenoiserReset(sd);
}

static Napi::Number OnlineSpeechDenoiserGetSampleRateWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1 || !info[0].IsExternal()) {
    Napi::TypeError::New(env, "Expect an online speech denoiser pointer.")
        .ThrowAsJavaScriptException();
    return {};
  }

  const SherpaOnnxOnlineSpeechDenoiser *sd =
      info[0].As<Napi::External<SherpaOnnxOnlineSpeechDenoiser>>().Data();
  return Napi::Number::New(env,
                           SherpaOnnxOnlineSpeechDenoiserGetSampleRate(sd));
}

static Napi::Number OnlineSpeechDenoiserGetFrameShiftInSamplesWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1 || !info[0].IsExternal()) {
    Napi::TypeError::New(env, "Expect an online speech denoiser pointer.")
        .ThrowAsJavaScriptException();
    return {};
  }

  const SherpaOnnxOnlineSpeechDenoiser *sd =
      info[0].As<Napi::External<SherpaOnnxOnlineSpeechDenoiser>>().Data();
  return Napi::Number::New(
      env, SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(sd));
}

void InitOnlineSpeechDenoiser(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createOnlineSpeechDenoiser"),
              Napi::Function::New(env, CreateOnlineSpeechDenoiserWrapper));
  exports.Set(Napi::String::New(env, "onlineSpeechDenoiserRunWrapper"),
              Napi::Function::New(env, OnlineSpeechDenoiserRunWrapper));
  exports.Set(Napi::String::New(env, "onlineSpeechDenoiserFlushWrapper"),
              Napi::Function::New(env, OnlineSpeechDenoiserFlushWrapper));
  exports.Set(Napi::String::New(env, "onlineSpeechDenoiserResetWrapper"),
              Napi::Function::New(env, OnlineSpeechDenoiserResetWrapper));
  exports.Set(
      Napi::String::New(env, "onlineSpeechDenoiserGetSampleRateWrapper"),
      Napi::Function::New(env, OnlineSpeechDenoiserGetSampleRateWrapper));
  exports.Set(Napi::String::New(
                  env, "onlineSpeechDenoiserGetFrameShiftInSamplesWrapper"),
              Napi::Function::New(
                  env, OnlineSpeechDenoiserGetFrameShiftInSamplesWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts
================================================
export const listRawfileDir: (mgr: object, dir: string) => Array<string>;

export const readWave: (filename: string, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
export const readWaveFromBinary: (data: Uint8Array, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
export const createCircularBuffer: (capacity: number) => object;
export const circularBufferPush: (handle: object, samples: Float32Array) => void;
export const circularBufferGet: (handle: object, index: number, n: number, enableExternalBuffer: boolean = true) => Float32Array;
export const circularBufferPop: (handle: object, n: number) => void;
export const circularBufferSize: (handle: object) => number;
export const circularBufferHead: (handle: object) => number;
export const circularBufferReset: (handle: object) => void;

export const createVoiceActivityDetector: (config: object, bufferSizeInSeconds?: number, mgr?: object) => object;
export const voiceActivityDetectorAcceptWaveform: (handle: object, samples: Float32Array) => void;
export const voiceActivityDetectorIsEmpty: (handle: object) => boolean;
export const voiceActivityDetectorIsDetected: (handle: object) => boolean;
export const voiceActivityDetectorPop: (handle: object) => void;
export const voiceActivityDetectorClear: (handle: object) => void;
export const voiceActivityDetectorFront: (handle: object, enableExternalBuffer: boolean = true) => {samples: Float32Array, start: number};
export const voiceActivityDetectorReset: (handle: object) => void;
export const voiceActivityDetectorFlush: (handle: object) => void;

export const createOfflineRecognizer: (config: object, mgr?: object) => object;
export const createOfflineStream: (handle: object) => object;
export const offlineRecognizerSetConfig: (handle: object, config: object) => void;
export const acceptWaveformOffline: (handle: object, audio: object) => void;
export const decodeOfflineStream: (handle: object, streamHandle: object) => void;
export const getOfflineStreamResultAsJson: (streamHandle: object) => string;

export const createOnlineRecognizer: (config: object, mgr?: object) => object;
export const createOnlineStream: (handle: object) => object;
export const acceptWaveformOnline: (handle: object, audio: object) => void;
export const inputFinished: (streamHandle: object) => void;
export const isOnlineStreamReady: (handle: object, streamHandle: object) => boolean;
export const decodeOnlineStream: (handle: object, streamHandle: object) => void;
export const isEndpoint: (handle: object, streamHandle: object) => boolean;
export const reset: (handle: object, streamHandle: object) => void;
export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object) => string;

export const createOfflineTts: (config: object, mgr?: object) => object;
export const getOfflineTtsNumSpeakers: (handle: object) => number;
export const getOfflineTtsSampleRate: (handle: object) => number;

export type TtsOutput = {
  samples: Float32Array;
  sampleRate: number;
};

export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput;
export const offlineTtsGenerateWithConfig: (handle: object, input: object) => TtsOutput;
export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>;
export const offlineTtsGenerateAsyncWithConfig: (handle: object, input: object) => Promise<TtsOutput>;

export const createOfflinePunctuation: (config: object, mgr?: object) => object;
export const offlinePunctuationAddPunct: (handle: object, text: string) => string;
export const createOnlinePunctuation: (config: object, mgr?: object) => object;
export const onlinePunctuationAddPunct: (handle: object, text: string) => string;

export const createSpeakerEmbeddingExtractor: (config: object, mgr?: object) => object;
export const speakerEmbeddingExtractorDim: (handle: object) => number;
export const speakerEmbeddingExtractorCreateStream: (handle: object) => object;
export const speakerEmbeddingExtractorIsReady: (handle: object, stream: object) => boolean;
export const speakerEmbeddingExtractorComputeEmbedding: (handle: object, stream: object, enableExternalBuffer: boolean) => Float32Array;
export const createSpeakerEmbeddingManager: (dim: number) => object;
export const speakerEmbeddingManagerAdd: (handle: object, speaker: {name: string, v: Float32Array}) => boolean;
export const speakerEmbeddingManagerAddListFlattened: (handle: object, speaker: {name: string, vv: Float32Array, n: number}) => boolean;
export const speakerEmbeddingManagerRemove: (handle: object, name: string) => boolean;
export const speakerEmbeddingManagerSearch: (handle: object, obj: {v: Float32Array, threshold: number}) => string;
export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string, v: Float32Array, threshold: number}) => boolean;
export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean;
export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number;
export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>;

export const createOfflineSpeakerDiarization: (config: object, mgr?: object) => object;
export const getOfflineSpeakerDiarizationSampleRate: (handle: object) => number;
export const offlineSpeakerDiarizationProcess: (handle: object, input: object) => object;
export const offlineSpeakerDiarizationProcessAsync: (handle: object, input: object, callback: object) => object;
export const offlineSpeakerDiarizationSetConfig: (handle: object, config: object) => void;

export const createKeywordSpotter: (config: object, mgr?: object) => object;
export const createKeywordStream: (handle: object, keywords?: string) => object;
export const isKeywordStreamReady: (handle: object, stream: object) => boolean;
export const decodeKeywordStream: (handle: object, stream: object) => void;
export const resetKeywordStream: (handle: object, stream: object) => void;
export const getKeywordResultAsJson: (handle: object, stream: object) => string;


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/oh-package.json5
================================================
{
  "name": "libsherpa_onnx.so",
  "types": "./Index.d.ts",
  "version": "1.0.0",
  "description": "Please describe the basic information."
}

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc
================================================
// Copyright (c)  2024  Xiaomi Corporation

#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "macros.h"  // NOLINT
#include "napi.h"  // NOLINT

static std::vector<std::string> GetFilenames(NativeResourceManager *mgr,
                                             const std::string &d) {
  std::unique_ptr<RawDir, decltype(&OH_ResourceManager_CloseRawDir)> raw_dir(
      OH_ResourceManager_OpenRawDir(mgr, d.c_str()),
      &OH_ResourceManager_CloseRawDir);
  int count = OH_ResourceManager_GetRawFileCount(raw_dir.get());
  std::vector<std::string> ans;
  ans.reserve(count);
  for (int32_t i = 0; i < count; ++i) {
    std::string filename = OH_ResourceManager_GetRawFileName(raw_dir.get(), i);
    bool is_dir = OH_ResourceManager_IsRawDir(
        mgr, d.empty() ? filename.c_str() : (d + "/" + filename).c_str());
    if (is_dir) {
      auto files = GetFilenames(mgr, d.empty() ? filename : d + "/" + filename);
      for (auto &f : files) {
        ans.push_back(std::move(f));
      }
    } else {
      if (d.empty()) {
        ans.push_back(std::move(filename));
      } else {
        ans.push_back(d + "/" + filename);
      }
    }
  }

  return ans;
}

static Napi::Array ListRawFileDir(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  std::unique_ptr<NativeResourceManager,
                  decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
      mgr(OH_ResourceManager_InitNativeResourceManager(env, info[0]),
          &OH_ResourceManager_ReleaseNativeResourceManager);

  if (!info[1].IsString()) {
    Napi::TypeError::New(env, "Argument 1 should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  std::string dir = info[1].As<Napi::String>().Utf8Value();

  auto files = GetFilenames(mgr.get(), dir);
  Napi::Array ans = Napi::Array::New(env, files.size());
  for (int32_t i = 0; i != files.size(); ++i) {
    // Fix #2120
    // ans[i] = Napi::String::New(env, files[i]);
    ans.Set(i, Napi::String::New(env, files[i]));
  }
  return ans;
}
void InitUtils(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "listRawfileDir"),
              Napi::Function::New(env, ListRawFileDir));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc
================================================
// scripts/node-addon-api/src/vad.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <algorithm>
#include <memory>
#include <sstream>

#include "macros.h"  // NOLINT
#include "napi.h"    // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static Napi::External<SherpaOnnxCircularBuffer> CreateCircularBufferWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsNumber()) {
    Napi::TypeError::New(env, "You should pass an integer as the argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxCircularBuffer *buf =
      SherpaOnnxCreateCircularBuffer(info[0].As<Napi::Number>().Int32Value());

  return Napi::External<SherpaOnnxCircularBuffer>::New(
      env, const_cast<SherpaOnnxCircularBuffer *>(buf),
      [](Napi::Env env, SherpaOnnxCircularBuffer *p) {
        SherpaOnnxDestroyCircularBuffer(p);
      });
}

static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxCircularBuffer *buf =
      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();

  if (!info[1].IsTypedArray()) {
    Napi::TypeError::New(env, "Argument 1 should be a Float32Array.")
        .ThrowAsJavaScriptException();

    return;
  }

  Napi::Float32Array data = info[1].As<Napi::Float32Array>();

#if __OHOS__
  // Note(fangjun): Normally, we don't need to divied it by sizeof(float).
  // However, data.ElementLength() here returns number of bytes, not number of
  // elements.
  SherpaOnnxCircularBufferPush(buf, data.Data(),
                               data.ElementLength() / sizeof(float));
#else
  SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength());
#endif
}

// see https://github.com/nodejs/node-addon-api/blob/main/doc/typed_array.md
// https://github.com/nodejs/node-addon-examples/blob/main/src/2-js-to-native-conversion/typed_array_to_native/node-addon-api/typed_array_to_native.cc
static Napi::Float32Array CircularBufferGetWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 3 && info.Length() != 4) {
    std::ostringstream os;
    os << "Expect only 3 or 4 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxCircularBuffer *buf =
      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();

  if (!info[1].IsNumber()) {
    Napi::TypeError::New(env, "Argument 1 should be an integer (startIndex).")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[2].IsNumber()) {
    Napi::TypeError::New(env, "Argument 2 should be an integer (n).")
        .ThrowAsJavaScriptException();

    return {};
  }

  bool enable_external_buffer = true;
  if (info.Length() == 4) {
    if (info[3].IsBoolean()) {
      enable_external_buffer = info[3].As<Napi::Boolean>().Value();
    } else {
      Napi::TypeError::New(env, "Argument 3 should be a boolean.")
          .ThrowAsJavaScriptException();
    }
  }

  int32_t start_index = info[1].As<Napi::Number>().Int32Value();
  int32_t n = info[2].As<Napi::Number>().Int32Value();

  const float *data = SherpaOnnxCircularBufferGet(buf, start_index, n);

  if (enable_external_buffer) {
    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
        env, const_cast<float *>(data), sizeof(float) * n,
        [](Napi::Env /*env*/, void *p) {
          SherpaOnnxCircularBufferFree(reinterpret_cast<const float *>(p));
        });

    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, n, arrayBuffer, 0);

    return float32Array;
  } else {
    // don't use external buffer
    Napi::ArrayBuffer arrayBuffer =
        Napi::ArrayBuffer::New(env, sizeof(float) * n);

    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, n, arrayBuffer, 0);

    std::copy(data, data + n, float32Array.Data());

    SherpaOnnxCircularBufferFree(data);

    return float32Array;
  }
}

static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxCircularBuffer *buf =
      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();

  if (!info[1].IsNumber()) {
    Napi::TypeError::New(env, "Argument 1 should be an integer (n).")
        .ThrowAsJavaScriptException();

    return;
  }

  int32_t n = info[1].As<Napi::Number>().Int32Value();

  SherpaOnnxCircularBufferPop(buf, n);
}

static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxCircularBuffer *buf =
      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();

  int32_t size = SherpaOnnxCircularBufferSize(buf);

  return Napi::Number::New(env, size);
}

static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxCircularBuffer *buf =
      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();

  int32_t size = SherpaOnnxCircularBufferHead(buf);

  return Napi::Number::New(env, size);
}

static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxCircularBuffer *buf =
      info[0].As<Napi::External<SherpaOnnxCircularBuffer>>().Data();

  SherpaOnnxCircularBufferReset(buf);
}

static SherpaOnnxSileroVadModelConfig GetSileroVadConfig(
    const Napi::Object &obj) {
  SherpaOnnxSileroVadModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("sileroVad") || !obj.Get("sileroVad").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("sileroVad").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(threshold, threshold);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_silence_duration, minSilenceDuration);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_speech_duration, minSpeechDuration);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(window_size, windowSize);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(max_speech_duration, maxSpeechDuration);

  return c;
}

static SherpaOnnxTenVadModelConfig GetTenVadConfig(const Napi::Object &obj) {
  SherpaOnnxTenVadModelConfig c;
  memset(&c, 0, sizeof(c));

  if (!obj.Has("tenVad") || !obj.Get("tenVad").IsObject()) {
    return c;
  }

  Napi::Object o = obj.Get("tenVad").As<Napi::Object>();
  SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(threshold, threshold);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_silence_duration, minSilenceDuration);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_speech_duration, minSpeechDuration);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(window_size, windowSize);
  SHERPA_ONNX_ASSIGN_ATTR_FLOAT(max_speech_duration, maxSpeechDuration);

  return c;
}

static Napi::External<SherpaOnnxVoiceActivityDetector>
CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
#if __OHOS__
  // the last argument is a NativeResourceManager
  if (info.Length() != 1 && info.Length() != 2 && info.Length() != 3) {
    std::ostringstream os;
    os << "Expect 1, 2, or 3 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#else
  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }
#endif

  if (!info[0].IsObject()) {
    Napi::TypeError::New(env,
                         "You should pass an object as the first argument.")
        .ThrowAsJavaScriptException();

    return {};
  }

  float buffer_size_in_seconds = 60;
  if (info.Length() >= 2 && !info[1].IsUndefined() && !info[1].IsNull()) {
    if (!info[1].IsNumber()) {
      Napi::TypeError::New(env,
                           "You should pass a number as the second argument.")
          .ThrowAsJavaScriptException();

      return {};
    }

    buffer_size_in_seconds = info[1].As<Napi::Number>().FloatValue();
  }

#if __OHOS__
  bool use_resource_manager =
      info.Length() == 3 && !info[2].IsUndefined() && !info[2].IsNull();
  if (use_resource_manager && !info[2].IsObject()) {
    Napi::TypeError::New(
        env, "You should pass a resource manager as the third argument.")
        .ThrowAsJavaScriptException();

    return {};
  }
#endif

  Napi::Object o = info[0].As<Napi::Object>();

  SherpaOnnxVadModelConfig c;
  memset(&c, 0, sizeof(c));
  c.silero_vad = GetSileroVadConfig(o);
  c.ten_vad = GetTenVadConfig(o);

  SHERPA_ONNX_ASSIGN_ATTR_INT32(sample_rate, sampleRate);
  SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
  SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);

  if (o.Has("debug") &&
      (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
    if (o.Get("debug").IsBoolean()) {
      c.debug = o.Get("debug").As<Napi::Boolean>().Value();
    } else {
      c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
    }
  }

#if __OHOS__
  const SherpaOnnxVoiceActivityDetector *vad = nullptr;

  if (use_resource_manager) {
    std::unique_ptr<NativeResourceManager,
                    decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
        mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]),
            &OH_ResourceManager_ReleaseNativeResourceManager);

    vad = SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds,
                                                    mgr.get());
  } else {
    vad = SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
  }
#else
  const SherpaOnnxVoiceActivityDetector *vad =
      SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
#endif
  SHERPA_ONNX_DELETE_C_STR(c.silero_vad.model);
  SHERPA_ONNX_DELETE_C_STR(c.ten_vad.model);
  SHERPA_ONNX_DELETE_C_STR(c.provider);

  return Napi::External<SherpaOnnxVoiceActivityDetector>::New(
      env, const_cast<SherpaOnnxVoiceActivityDetector *>(vad),
      [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) {
        SherpaOnnxDestroyVoiceActivityDetector(p);
      });
}

static void VoiceActivityDetectorAcceptWaveformWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxVoiceActivityDetector *vad =
      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();

  if (!info[1].IsTypedArray()) {
    Napi::TypeError::New(
        env, "Argument 1 should be a Float32Array containing samples")
        .ThrowAsJavaScriptException();

    return;
  }

  Napi::Float32Array samples = info[1].As<Napi::Float32Array>();

#if __OHOS__
  // Note(fangjun): For unknown reasons, we need to use `/sizeof(float)` here
  // for Huawei
  SherpaOnnxVoiceActivityDetectorAcceptWaveform(
      vad, samples.Data(), samples.ElementLength() / sizeof(float));
#else
  SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(),
                                                samples.ElementLength());
#endif
}

static Napi::Boolean VoiceActivityDetectorEmptyWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxVoiceActivityDetector *vad =
      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();

  int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad);

  return Napi::Boolean::New(env, is_empty);
}

static Napi::Boolean VoiceActivityDetectorDetectedWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  const SherpaOnnxVoiceActivityDetector *vad =
      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();

  int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad);

  return Napi::Boolean::New(env, is_detected);
}

static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxVoiceActivityDetector *vad =
      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();

  SherpaOnnxVoiceActivityDetectorPop(vad);
}

static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxVoiceActivityDetector *vad =
      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();

  SherpaOnnxVoiceActivityDetectorClear(vad);
}

static Napi::Object VoiceActivityDetectorFrontWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1 && info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
        .ThrowAsJavaScriptException();

    return {};
  }

  bool enable_external_buffer = true;
  if (info.Length() == 2) {
    if (info[1].IsBoolean()) {
      enable_external_buffer = info[1].As<Napi::Boolean>().Value();
    } else {
      Napi::TypeError::New(env, "Argument 1 should be a boolean.")
          .ThrowAsJavaScriptException();
    }
  }

  const SherpaOnnxVoiceActivityDetector *vad =
      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();

  const SherpaOnnxSpeechSegment *segment =
      SherpaOnnxVoiceActivityDetectorFront(vad);

  if (enable_external_buffer) {
    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
        env, const_cast<float *>(segment->samples), sizeof(float) * segment->n,
        [](Napi::Env /*env*/, void * /*data*/,
           const SherpaOnnxSpeechSegment *hint) {
          SherpaOnnxDestroySpeechSegment(hint);
        },
        segment);

    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, segment->n, arrayBuffer, 0);

    Napi::Object obj = Napi::Object::New(env);
    obj.Set(Napi::String::New(env, "start"), segment->start);
    obj.Set(Napi::String::New(env, "samples"), float32Array);

    return obj;
  } else {
    Napi::ArrayBuffer arrayBuffer =
        Napi::ArrayBuffer::New(env, sizeof(float) * segment->n);

    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, segment->n, arrayBuffer, 0);

    std::copy(segment->samples, segment->samples + segment->n,
              float32Array.Data());

    Napi::Object obj = Napi::Object::New(env);
    obj.Set(Napi::String::New(env, "start"), segment->start);
    obj.Set(Napi::String::New(env, "samples"), float32Array);

    SherpaOnnxDestroySpeechSegment(segment);

    return obj;
  }
}

static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxVoiceActivityDetector *vad =
      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();

  SherpaOnnxVoiceActivityDetectorReset(vad);
}

static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 1) {
    std::ostringstream os;
    os << "Expect only 1 argument. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return;
  }

  if (!info[0].IsExternal()) {
    Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
        .ThrowAsJavaScriptException();

    return;
  }

  const SherpaOnnxVoiceActivityDetector *vad =
      info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();

  SherpaOnnxVoiceActivityDetectorFlush(vad);
}

void InitVad(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "createCircularBuffer"),
              Napi::Function::New(env, CreateCircularBufferWrapper));

  exports.Set(Napi::String::New(env, "circularBufferPush"),
              Napi::Function::New(env, CircularBufferPushWrapper));

  exports.Set(Napi::String::New(env, "circularBufferGet"),
              Napi::Function::New(env, CircularBufferGetWrapper));

  exports.Set(Napi::String::New(env, "circularBufferPop"),
              Napi::Function::New(env, CircularBufferPopWrapper));

  exports.Set(Napi::String::New(env, "circularBufferSize"),
              Napi::Function::New(env, CircularBufferSizeWrapper));

  exports.Set(Napi::String::New(env, "circularBufferHead"),
              Napi::Function::New(env, CircularBufferHeadWrapper));

  exports.Set(Napi::String::New(env, "circularBufferReset"),
              Napi::Function::New(env, CircularBufferResetWrapper));

  exports.Set(Napi::String::New(env, "createVoiceActivityDetector"),
              Napi::Function::New(env, CreateVoiceActivityDetectorWrapper));

  exports.Set(
      Napi::String::New(env, "voiceActivityDetectorAcceptWaveform"),
      Napi::Function::New(env, VoiceActivityDetectorAcceptWaveformWrapper));

  exports.Set(Napi::String::New(env, "voiceActivityDetectorIsEmpty"),
              Napi::Function::New(env, VoiceActivityDetectorEmptyWrapper));

  exports.Set(Napi::String::New(env, "voiceActivityDetectorIsDetected"),
              Napi::Function::New(env, VoiceActivityDetectorDetectedWrapper));

  exports.Set(Napi::String::New(env, "voiceActivityDetectorPop"),
              Napi::Function::New(env, VoiceActivityDetectorPopWrapper));

  exports.Set(Napi::String::New(env, "voiceActivityDetectorClear"),
              Napi::Function::New(env, VoiceActivityDetectorClearWrapper));

  exports.Set(Napi::String::New(env, "voiceActivityDetectorFront"),
              Napi::Function::New(env, VoiceActivityDetectorFrontWrapper));

  exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
              Napi::Function::New(env, VoiceActivityDetectorResetWrapper));

  exports.Set(Napi::String::New(env, "voiceActivityDetectorFlush"),
              Napi::Function::New(env, VoiceActivityDetectorFlushWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/version.cc
================================================
// scripts/node-addon-api/src/version.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include <sstream>

#include "napi.h"  // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

void InitVersion(Napi::Env env, Napi::Object exports) {
  Napi::String version = Napi::String::New(env, SherpaOnnxGetVersionStr());
  Napi::String git_sha1 = Napi::String::New(env, SherpaOnnxGetGitSha1());
  Napi::String git_date = Napi::String::New(env, SherpaOnnxGetGitDate());

  exports.Set(Napi::String::New(env, "version"), version);
  exports.Set(Napi::String::New(env, "gitSha1"), git_sha1);
  exports.Set(Napi::String::New(env, "gitDate"), git_date);
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc
================================================
// scripts/node-addon-api/src/wave-reader.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <algorithm>
#include <sstream>
#include <string>

#include "napi.h"  // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

static Napi::Object ReadWaveWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() > 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsString()) {
    Napi::TypeError::New(env, "Argument 0 should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  std::string filename = info[0].As<Napi::String>().Utf8Value();

  bool enable_external_buffer = true;
  if (info.Length() == 2) {
    if (info[1].IsBoolean()) {
      enable_external_buffer = info[1].As<Napi::Boolean>().Value();
    } else {
      Napi::TypeError::New(env, "Argument 1 should be a boolean")
          .ThrowAsJavaScriptException();

      return {};
    }
  }

  const SherpaOnnxWave *wave = SherpaOnnxReadWave(filename.c_str());
  if (!wave) {
    std::ostringstream os;
    os << "Failed to read '" << filename << "'";
    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (enable_external_buffer) {
    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
        env, const_cast<float *>(wave->samples),
        sizeof(float) * wave->num_samples,
        [](Napi::Env /*env*/, void * /*data*/, const SherpaOnnxWave *hint) {
          SherpaOnnxFreeWave(hint);
        },
        wave);
    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);

    Napi::Object obj = Napi::Object::New(env);
    obj.Set(Napi::String::New(env, "samples"), float32Array);
    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);
    return obj;
  } else {
    // don't use external buffer
    Napi::ArrayBuffer arrayBuffer =
        Napi::ArrayBuffer::New(env, sizeof(float) * wave->num_samples);

    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);

    std::copy(wave->samples, wave->samples + wave->num_samples,
              float32Array.Data());

    Napi::Object obj = Napi::Object::New(env);
    obj.Set(Napi::String::New(env, "samples"), float32Array);
    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);

    SherpaOnnxFreeWave(wave);

    return obj;
  }
}

static Napi::Object ReadWaveFromBinaryWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
  if (info.Length() > 2) {
    std::ostringstream os;
    os << "Expect only 1 or 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsTypedArray()) {
    Napi::TypeError::New(env, "Argument 0 should be a float32 array")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Uint8Array data = info[0].As<Napi::Uint8Array>();
  int32_t n = data.ElementLength();
  const SherpaOnnxWave *wave = SherpaOnnxReadWaveFromBinaryData(
      reinterpret_cast<const char *>(data.Data()), n);
  if (!wave) {
    std::ostringstream os;
    os << "Failed to read wave";
    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  bool enable_external_buffer = true;
  if (info.Length() == 2) {
    if (info[1].IsBoolean()) {
      enable_external_buffer = info[1].As<Napi::Boolean>().Value();
    } else {
      Napi::TypeError::New(env, "Argument 1 should be a boolean")
          .ThrowAsJavaScriptException();

      return {};
    }
  }

  if (enable_external_buffer) {
    Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
        env, const_cast<float *>(wave->samples),
        sizeof(float) * wave->num_samples,
        [](Napi::Env /*env*/, void * /*data*/, const SherpaOnnxWave *hint) {
          SherpaOnnxFreeWave(hint);
        },
        wave);
    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);

    Napi::Object obj = Napi::Object::New(env);
    obj.Set(Napi::String::New(env, "samples"), float32Array);
    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);
    return obj;
  } else {
    // don't use external buffer
    Napi::ArrayBuffer arrayBuffer =
        Napi::ArrayBuffer::New(env, sizeof(float) * wave->num_samples);

    Napi::Float32Array float32Array =
        Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0);

    std::copy(wave->samples, wave->samples + wave->num_samples,
              float32Array.Data());

    Napi::Object obj = Napi::Object::New(env);
    obj.Set(Napi::String::New(env, "samples"), float32Array);
    obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate);

    SherpaOnnxFreeWave(wave);

    return obj;
  }
}

void InitWaveReader(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "readWave"),
              Napi::Function::New(env, ReadWaveWrapper));

  exports.Set(Napi::String::New(env, "readWaveFromBinary"),
              Napi::Function::New(env, ReadWaveFromBinaryWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-writer.cc
================================================
// scripts/node-addon-api/src/wave-writer.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <sstream>

#include "napi.h"  // NOLINT
#include "sherpa-onnx/c-api/c-api.h"

// (filename, {samples: samples, sampleRate: sampleRate}
static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();

  if (info.Length() != 2) {
    std::ostringstream os;
    os << "Expect only 2 arguments. Given: " << info.Length();

    Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();

    return {};
  }

  if (!info[0].IsString()) {
    Napi::TypeError::New(env, "Argument 0 should be a string")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!info[1].IsObject()) {
    Napi::TypeError::New(env, "Argument 1 should be an object")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Object obj = info[1].As<Napi::Object>();

  if (!obj.Has("samples")) {
    Napi::TypeError::New(env, "The argument object should have a field samples")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("samples").IsTypedArray()) {
    Napi::TypeError::New(env, "The object['samples'] should be a typed array")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Has("sampleRate")) {
    Napi::TypeError::New(env,
                         "The argument object should have a field sampleRate")
        .ThrowAsJavaScriptException();

    return {};
  }

  if (!obj.Get("sampleRate").IsNumber()) {
    Napi::TypeError::New(env, "The object['samples'] should be a number")
        .ThrowAsJavaScriptException();

    return {};
  }

  Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
  int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
#if __OHOS__
  int32_t ok = SherpaOnnxWriteWave(
      samples.Data(), samples.ElementLength() / sizeof(float), sample_rate,
      info[0].As<Napi::String>().Utf8Value().c_str());
#else
  int32_t ok =
      SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate,
                          info[0].As<Napi::String>().Utf8Value().c_str());
#endif

  return Napi::Boolean::New(env, ok);
}

void InitWaveWriter(Napi::Env env, Napi::Object exports) {
  exports.Set(Napi::String::New(env, "writeWave"),
              Napi::Function::New(env, WriteWaveWrapper));
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/KeywordSpotting.ets
================================================
import {
  createKeywordSpotter,
  createKeywordStream,
  isKeywordStreamReady,
  decodeKeywordStream,
  resetKeywordStream,
  getKeywordResultAsJson,
} from 'libsherpa_onnx.so';

import { FeatureConfig } from './NonStreamingAsr';
import { OnlineModelConfig, OnlineStream } from './StreamingAsr';

export class KeywordSpotterConfig {
  public featConfig: FeatureConfig = new FeatureConfig();
  public modelConfig: OnlineModelConfig = new OnlineModelConfig();
  public maxActivePaths: number = 4;
  public numTrailingBlanks: number = 1;
  public keywordsScore: number = 1;
  public keywordsThreshold: number = 0.25;
  public keywordsFile: string = '';
}

interface KeywordSpotterResultJson {
  keyword: string;
  timestamps: number[];
  tokens: string[];
}

export class KeywordSpotterResult {
  public keyword: string = '';
  public tokens: string[] = [];
  public timestamps: number[] = [];
  public json: string = '';
}

export class KeywordSpotter {
  public handle: object;
  public config: KeywordSpotterConfig;

  constructor(config: KeywordSpotterConfig, mgr?: object) {
    this.handle = createKeywordSpotter(config, mgr);
    this.config = config
  }

  createStream(keywords?: string): OnlineStream {
    if (typeof keywords !== "undefined") {
      return new OnlineStream(createKeywordStream(this.handle, keywords));
    } else {
      return new OnlineStream(createKeywordStream(this.handle));
    }
  }

  isReady(stream: OnlineStream): boolean {
    return isKeywordStreamReady(this.handle, stream.handle);
  }

  decode(stream: OnlineStream) {
    decodeKeywordStream(this.handle, stream.handle);
  }

  reset(stream: OnlineStream) {
    resetKeywordStream(this.handle, stream.handle);
  }

  getResult(stream: OnlineStream): KeywordSpotterResult {
    const jsonStr: string = getKeywordResultAsJson(this.handle, stream.handle);

    let o = JSON.parse(jsonStr) as KeywordSpotterResultJson;

    const r = new KeywordSpotterResult()
    r.keyword = o.keyword
    r.timestamps = o.timestamps;
    r.tokens = o.tokens;
    r.json = jsonStr;

    return r;
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/MainPage.ets
================================================
import hilog from '@ohos.hilog';
import testNapi from 'libsherpa_onnx.so';

@Component
export struct MainPage {
  @State message: string = 'Hello World';

  build() {
    Row() {
      Column() {
        Text(this.message)
          .fontSize(50)
          .fontWeight(FontWeight.Bold)
          .onClick(() => {
          })
      }
      .width('100%')
    }
    .height('100%')
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets
================================================
import {
  acceptWaveformOffline,
  createOfflineRecognizer,
  createOfflineStream,
  decodeOfflineStream,
  getOfflineStreamResultAsJson,
  offlineRecognizerSetConfig,
} from 'libsherpa_onnx.so';

export interface Samples {
  samples: Float32Array;
  sampleRate: number;
}

export class OfflineStream {
  public handle: object;

  constructor(handle: object) {
    this.handle = handle;
  }

  // obj is {samples: samples, sampleRate: sampleRate}
  // samples is a float32 array containing samples in the range [-1, 1]
  // sampleRate is a number
  acceptWaveform(obj: Samples) {
    acceptWaveformOffline(this.handle, obj)
  }
}

export class HomophoneReplacerConfig {
  public dictDir: string = '';  // unused
  public lexicon: string = '';
  public ruleFsts: string = '';
}

export class FeatureConfig {
  public sampleRate: number = 16000;
  public featureDim: number = 80;
}

export class OfflineTransducerModelConfig {
  public encoder: string = '';
  public decoder: string = '';
  public joiner: string = '';
}

export class OfflineParaformerModelConfig {
  public model: string = '';
}

export class OfflineNemoEncDecCtcModelConfig {
  public model: string = '';
}

export class OfflineDolphinModelConfig {
  public model: string = '';
}

export class OfflineOmnilingualAsrCtcModelConfig {
  public model: string = '';
}

export class OfflineMedAsrCtcModelConfig {
  public model: string = '';
}

export class OfflineFunASRNanoModelConfig {
  public encoderAdaptor: string = '';
  public llm: string = '';
  public embedding: string = '';
  public tokenizer: string = '';
  public systemPrompt: string = '';
  public userPrompt: string = '';
  public maxNewTokens: number = 0;
  public temperature: number = 1e-6;
  public topP: number = 0.8;
  public seed: number = 0;
  public language: string = '';
  public itn: number = 0;
  public hotwords: string = '';
}

export class OfflineZipformerCtcModelConfig {
  public model: string = '';
}

export class OfflineWenetCtcModelConfig {
  public model: string = '';
}

export class OfflineFireRedAsrModelConfig {
  public encoder: string = '';
  public decoder: string = '';
}

export class OfflineFireRedAsrCtcModelConfig {
  public model: string = '';
}

export class OfflineWhisperModelConfig {
  public encoder: string = '';
  public decoder: string = '';
  public language: string = '';
  public task: string = 'transcribe';
  public tailPaddings: number = -1;
  public enableTokenTimestamps: boolean = false;
  public enableSegmentTimestamps: boolean = false;
}

export class OfflineCanaryModelConfig {
  public encoder: string = '';
  public decoder: string = '';
  public srcLang: string = '';
  public tgtLang: string = '';
  public usePnc: number = 1;
}

export class OfflineTdnnModelConfig {
  public model: string = '';
}

export class OfflineSenseVoiceModelConfig {
  public model: string = '';
  public language: string = '';
  public useItn: boolean = false;
}

export class OfflineMoonshineModelConfig {
  public preprocessor: string = '';
  public encoder: string = '';
  public uncachedDecoder: string = '';
  public cachedDecoder: string = '';
  public mergedDecoder: string = '';
}

export class OfflineModelConfig {
  public transducer: OfflineTransducerModelConfig = new OfflineTransducerModelConfig();
  public paraformer: OfflineParaformerModelConfig = new OfflineParaformerModelConfig();
  public nemoCtc: OfflineNemoEncDecCtcModelConfig = new OfflineNemoEncDecCtcModelConfig();
  public whisper: OfflineWhisperModelConfig = new OfflineWhisperModelConfig();
  public tdnn: OfflineTdnnModelConfig = new OfflineTdnnModelConfig();
  public tokens: string = '';
  public numThreads: number = 1;
  public debug: boolean = false;
  public provider: string = 'cpu';
  public modelType: string = '';
  public modelingUnit: string = "cjkchar";
  public bpeVocab: string = '';
  public telespeechCtc: string = '';
  public senseVoice: OfflineSenseVoiceModelConfig = new OfflineSenseVoiceModelConfig();
  public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig();
  public fireRedAsr: OfflineFireRedAsrModelConfig = new OfflineFireRedAsrModelConfig();
  public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig();
  public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig();
  public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig();
  public wenetCtc: OfflineWenetCtcModelConfig = new OfflineWenetCtcModelConfig();
  public omnilingual: OfflineOmnilingualAsrCtcModelConfig = new OfflineOmnilingualAsrCtcModelConfig();
  public medasr: OfflineMedAsrCtcModelConfig = new OfflineMedAsrCtcModelConfig();
  public funasrNano: OfflineFunASRNanoModelConfig = new OfflineFunASRNanoModelConfig();
  public fireRedAsrCtc: OfflineFireRedAsrCtcModelConfig = new OfflineFireRedAsrCtcModelConfig();
}

export class OfflineLMConfig {
  public model: string = '';
  public scale: number = 1.0;
}

export class OfflineRecognizerConfig {
  public featConfig: FeatureConfig = new FeatureConfig();
  public modelConfig: OfflineModelConfig = new OfflineModelConfig();
  public lmConfig: OfflineLMConfig = new OfflineLMConfig();
  public decodingMethod: string = "greedy_search";
  public maxActivePaths: number = 4;
  public hotwordsFfile: string = '';
  public hotwordsScore: number = 1.5;
  public ruleFsts: string = '';
  public ruleFars: string = '';
  public blankPenalty: number = 0;
  public hr: HomophoneReplacerConfig = new HomophoneReplacerConfig();
}

export class OfflineRecognizerResult {
  public text: string = '';
  public timestamps: number[] = [];
  public tokens: string[] = [];
  public json = '';
  public lang: string = '';
  public emotion: string = '';
  public event: string = '';
}

interface OfflineRecognizerResultJson {
  text: string;
  timestamps: number[];
  tokens: string[];
  lang: string;
  emotion: string;
  event: string;
}

export class OfflineRecognizer {
  public handle: object;
  public config: OfflineRecognizerConfig;

  constructor(config: OfflineRecognizerConfig, mgr?: object) {
    this.handle = createOfflineRecognizer(config, mgr);
    this.config = config
  }

  setConfig(config: OfflineRecognizerConfig) {
    offlineRecognizerSetConfig(this.handle, config);
  }

  createStream(): OfflineStream {
    const handle: object = createOfflineStream(this.handle);
    return new OfflineStream(handle);
  }

  decode(stream: OfflineStream) {
    decodeOfflineStream(this.handle, stream.handle);
  }

  getResult(stream: OfflineStream): OfflineRecognizerResult {
    const jsonStr: string = getOfflineStreamResultAsJson(stream.handle);

    let o = JSON.parse(jsonStr) as OfflineRecognizerResultJson;

    const r = new OfflineRecognizerResult()
    r.text = o.text
    r.timestamps = o.timestamps;
    r.tokens = o.tokens;
    r.json = jsonStr;
    r.lang = o.lang;
    r.emotion = o.emotion;
    r.event = o.event;

    return r;
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingSpeakerDiarization.ets
================================================
import {
  createOfflineSpeakerDiarization,
  getOfflineSpeakerDiarizationSampleRate,
  offlineSpeakerDiarizationProcess,
  offlineSpeakerDiarizationProcessAsync,
  offlineSpeakerDiarizationSetConfig,
} from 'libsherpa_onnx.so';

import { SpeakerEmbeddingExtractorConfig } from './SpeakerIdentification';

export class OfflineSpeakerSegmentationPyannoteModelConfig {
  public model: string = '';
}

export class OfflineSpeakerSegmentationModelConfig {
  public pyannote: OfflineSpeakerSegmentationPyannoteModelConfig = new OfflineSpeakerSegmentationPyannoteModelConfig();
  public numThreads: number = 1;
  public debug: boolean = false;
  public provider: string = 'cpu';
}

export class FastClusteringConfig {
  public numClusters: number = -1;
  public threshold: number = 0.5;
}

export class OfflineSpeakerDiarizationConfig {
  public segmentation: OfflineSpeakerSegmentationModelConfig = new OfflineSpeakerSegmentationModelConfig();
  public embedding: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
  public clustering: FastClusteringConfig = new FastClusteringConfig();
  public minDurationOn: number = 0.2;
  public minDurationOff: number = 0.5;
}

export class OfflineSpeakerDiarizationSegment {
  // in seconds
  public start: number = 0;
  // in seconds
  public end: number = 0;
  // ID of the speaker; count from 0
  public speaker: number = 0;
}

export class OfflineSpeakerDiarization {
  public config: OfflineSpeakerDiarizationConfig;
  public sampleRate: number;
  private handle: object;

  constructor(config: OfflineSpeakerDiarizationConfig, mgr?: object) {
    this.handle = createOfflineSpeakerDiarization(config, mgr);
    this.config = config;

    this.sampleRate = getOfflineSpeakerDiarizationSampleRate(this.handle);
  }

  /**
   * samples is a 1-d float32 array. Each element of the array should be
   * in the range [-1, 1].
   *
   * We assume its sample rate equals to this.sampleRate.
   *
   * Returns an array of object, where an object is
   *
   *  {
   *    "start": start_time_in_seconds,
   *    "end": end_time_in_seconds,
   *    "speaker": an_integer,
   *  }
   */
  process(samples: Float32Array): OfflineSpeakerDiarizationSegment[] {
    return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment[];
  }

  processAsync(samples: Float32Array, callback: (numProcessedChunks: number,
    numTotalChunks: number) => void): Promise<OfflineSpeakerDiarizationSegment[]> {
    return offlineSpeakerDiarizationProcessAsync(this.handle, samples,
      callback) as Promise<OfflineSpeakerDiarizationSegment[]>;
  }

  setConfig(config: OfflineSpeakerDiarizationConfig) {
    offlineSpeakerDiarizationSetConfig(this.handle, config);
    this.config.clustering = config.clustering;
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets
================================================
import {
  createOfflineTts,
  getOfflineTtsNumSpeakers,
  getOfflineTtsSampleRate,
  offlineTtsGenerate,
  offlineTtsGenerateAsyncWithConfig,
  offlineTtsGenerateAsync,
  offlineTtsGenerateWithConfig,
} from 'libsherpa_onnx.so';

export class OfflineTtsVitsModelConfig {
  public model: string = '';
  public lexicon: string = '';
  public tokens: string = '';
  public dataDir: string = '';
  public dictDir: String = '';  // unused
  public noiseScale: number = 0.667;
  public noiseScaleW: number = 0.8;
  public lengthScale: number = 1.0;
}

export class OfflineTtsMatchaModelConfig {
  public acousticModel: string = '';
  public vocoder: string = '';
  public lexicon: string = '';
  public tokens: string = '';
  public dataDir: string = '';
  public dictDir: String = '';  // unused
  public noiseScale: number = 0.667;
  public lengthScale: number = 1.0;
}

export class OfflineTtsKokoroModelConfig {
  public model: string = '';
  public voices: string = '';
  public tokens: string = '';
  public dataDir: string = '';
  public lengthScale: number = 1.0;
  public dictDir: string = '';  // unused
  public lexicon: string = '';
  public lang: string = '';
}

export class OfflineTtsKittenModelConfig {
  public model: string = '';
  public voices: string = '';
  public tokens: string = '';
  public dataDir: string = '';
  public lengthScale: number = 1.0;
}

export class OfflineTtsZipvoiceModelConfig {
  public tokens: string = '';
  public encoder: string = '';
  public decoder: string = '';
  public vocoder: string = '';
  public dataDir: string = '';
  public lexicon: string = '';
  public featScale: number = 0.1;
  public tShift: number = 0.5;
  public targetRms: number = 0.1;
  public guidanceScale: number = 1.0;
}

export class OfflineTtsPocketModelConfig {
  public lmFlow: string = '';
  public lmMain: string = '';
  public encoder: string = '';
  public decoder: string = '';
  public textConditioner: string = '';
  public vocabJson: string = '';
  public tokenScoresJson: string = '';
  public voiceEmbeddingCacheCapacity: number = 50;
}

export class OfflineTtsSupertonicModelConfig {
  public durationPredictor: string = '';
  public textEncoder: string = '';
  public vectorEstimator: string = '';
  public vocoder: string = '';
  public ttsJson: string = '';
  public unicodeIndexer: string = '';
  public voiceStyle: string = '';
}

export class OfflineTtsModelConfig {
  public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
  public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
  public kokoro: OfflineTtsKokoroModelConfig = new OfflineTtsKokoroModelConfig();
  public kitten: OfflineTtsKittenModelConfig = new OfflineTtsKittenModelConfig();
  public zipvoice: OfflineTtsZipvoiceModelConfig = new OfflineTtsZipvoiceModelConfig();
  public pocket: OfflineTtsPocketModelConfig = new OfflineTtsPocketModelConfig();
  public supertonic: OfflineTtsSupertonicModelConfig = new OfflineTtsSupertonicModelConfig();
  public numThreads: number = 1;
  public debug: boolean = false;
  public provider: string = 'cpu';
}

export class OfflineTtsConfig {
  public model: OfflineTtsModelConfig = new OfflineTtsModelConfig();
  public ruleFsts: string = '';
  public ruleFars: string = '';
  public maxNumSentences: number = 1;
  public silenceScale: number = 0.2;
}

export class TtsOutput {
  public samples: Float32Array = new Float32Array(0);
  public sampleRate: number = 0;
}

interface TtsCallbackData {
  samples: Float32Array;
  progress: number;
}

export class TtsGenerationConfig {
  public silenceScale: number = 0.2;
  public speed: number = 1.0;
  public sid: number = 0;
  public referenceAudio?: Float32Array;
  public referenceSampleRate: number = 0;
  public referenceText: string = '';
  public numSteps: number = 5;
  public extra: object = new Object();
}

export class TtsInput {
  public text: string = '';
  public sid: number = 0;
  public speed: number = 1.0;
  public enableExternalBuffer: boolean = true;
  public callback?: (data: TtsCallbackData) => number;
}

export class TtsInputWithConfig {
  public text: string = '';
  public generationConfig: TtsGenerationConfig = new TtsGenerationConfig();
  public enableExternalBuffer: boolean = true;
  public callback?: (data: TtsCallbackData) => number;
}

export class OfflineTts {
  public config: OfflineTtsConfig;
  public numSpeakers: number;
  public sampleRate: number;
  private handle: object;

  constructor(config: OfflineTtsConfig, mgr?: object) {
    this.handle = createOfflineTts(config, mgr);
    this.config = config;

    this.numSpeakers = getOfflineTtsNumSpeakers(this.handle);
    this.sampleRate = getOfflineTtsSampleRate(this.handle);
  }

  /*
   input obj: {text: "xxxx", sid: 0, speed: 1.0}
   where text is a string, sid is a int32, speed is a float

   return an object {samples: Float32Array, sampleRate: <a number>}
   */
  generate(input: TtsInput): TtsOutput {
    return offlineTtsGenerate(this.handle, input);
  }

  generateAsync(input: TtsInput): Promise<TtsOutput> {
    return offlineTtsGenerateAsync(this.handle, input);
  }

  generateWithConfig(input: TtsInputWithConfig): TtsOutput {
    return offlineTtsGenerateWithConfig(this.handle, input);
  }

  generateAsyncWithConfig(input: TtsInputWithConfig): Promise<TtsOutput> {
    return offlineTtsGenerateAsyncWithConfig(this.handle, input);
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/OfflinePunctuation.ets
================================================
import {
  createOfflinePunctuation,
  offlinePunctuationAddPunct,
} from 'libsherpa_onnx.so';

export class OfflinePunctuationModelConfig {
  public ctTransformer: string = '';
  public numThreads: number = 1;
  public debug: boolean = false;
  public provider: string = 'cpu';
}

export class OfflinePunctuationConfig {
  public model: OfflinePunctuationModelConfig = new OfflinePunctuationModelConfig();
}

export class OfflinePunctuation {
  public config: OfflinePunctuationConfig;
  private handle: object;

  constructor(config: OfflinePunctuationConfig, mgr?: object) {
    this.handle = createOfflinePunctuation(config, mgr);
    this.config = config;
  }

  addPunct(text: string): string {
    return offlinePunctuationAddPunct(this.handle, text);
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/OnlinePunctuation.ets
================================================
import {
  createOnlinePunctuation,
  onlinePunctuationAddPunct,
} from 'libsherpa_onnx.so';

export class OnlinePunctuationModelConfig {
  public cnnBilstm: string = '';
  public bpeVocab: string = '';
  public numThreads: number = 1;
  public debug: boolean = false;
  public provider: string = 'cpu';
}

export class OnlinePunctuationConfig {
  public model: OnlinePunctuationModelConfig = new OnlinePunctuationModelConfig();
}

export class OnlinePunctuation {
  public config: OnlinePunctuationConfig;
  private handle: object;

  constructor(config: OnlinePunctuationConfig, mgr?: object) {
    this.handle = createOnlinePunctuation(config, mgr);
    this.config = config;
  }

  addPunct(text: string): string {
    return onlinePunctuationAddPunct(this.handle, text);
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/SpeakerIdentification.ets
================================================
import {
  createSpeakerEmbeddingExtractor,
  createSpeakerEmbeddingManager,
  speakerEmbeddingExtractorComputeEmbedding,
  speakerEmbeddingExtractorCreateStream,
  speakerEmbeddingExtractorDim,
  speakerEmbeddingExtractorIsReady,
  speakerEmbeddingManagerAdd,
  speakerEmbeddingManagerAddListFlattened,
  speakerEmbeddingManagerContains,
  speakerEmbeddingManagerGetAllSpeakers,
  speakerEmbeddingManagerNumSpeakers,
  speakerEmbeddingManagerRemove,
  speakerEmbeddingManagerSearch,
  speakerEmbeddingManagerVerify
} from 'libsherpa_onnx.so';
import { OnlineStream } from './StreamingAsr';

export class SpeakerEmbeddingExtractorConfig {
  public model: string = '';
  public numThreads: number = 1;
  public debug: boolean = false;
  public provider: string = 'cpu';
}

export class SpeakerEmbeddingExtractor {
  public config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
  public dim: number;
  private handle: object;

  constructor(config: SpeakerEmbeddingExtractorConfig, mgr?: object) {
    this.handle = createSpeakerEmbeddingExtractor(config, mgr);
    this.config = config;
    this.dim = speakerEmbeddingExtractorDim(this.handle);
  }

  createStream(): OnlineStream {
    return new OnlineStream(speakerEmbeddingExtractorCreateStream(this.handle));
  }

  isReady(stream: OnlineStream): boolean {
    return speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
  }

  compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array {
    return speakerEmbeddingExtractorComputeEmbedding(this.handle, stream.handle, enableExternalBuffer);
  }
}

function flatten(arrayList: Float32Array[]): Float32Array {
  let n = 0;
  for (let i = 0; i < arrayList.length; ++i) {
    n += arrayList[i].length;
  }
  let ans = new Float32Array(n);

  let offset = 0;
  for (let i = 0; i < arrayList.length; ++i) {
    ans.set(arrayList[i], offset);
    offset += arrayList[i].length;
  }
  return ans;
}

interface SpeakerNameWithEmbedding {
  name: string;
  v: Float32Array;
}

interface SpeakerNameWithEmbeddingList {
  name: string;
  v: Float32Array[];
}

interface SpeakerNameWithEmbeddingN {
  name: string;
  vv: Float32Array;
  n: number;
}

interface EmbeddingWithThreshold {
  v: Float32Array;
  threshold: number;
}

interface SpeakerNameEmbeddingThreshold {
  name: string;
  v: Float32Array;
  threshold: number;
}

export class SpeakerEmbeddingManager {
  public dim: number;
  private handle: object;

  constructor(dim: number) {
    this.handle = createSpeakerEmbeddingManager(dim);
    this.dim = dim;
  }

  add(speaker: SpeakerNameWithEmbedding): boolean {
    return speakerEmbeddingManagerAdd(this.handle, speaker);
  }

  addMulti(speaker: SpeakerNameWithEmbeddingList): boolean {
    const c: SpeakerNameWithEmbeddingN = {
      name: speaker.name, vv: flatten(speaker.v), n: speaker.v.length,
    };
    return speakerEmbeddingManagerAddListFlattened(this.handle, c);
  }

  remove(name: string): boolean {
    return speakerEmbeddingManagerRemove(this.handle, name);
  }

  search(obj: EmbeddingWithThreshold): string {
    return speakerEmbeddingManagerSearch(this.handle, obj);
  }

  verify(obj: SpeakerNameEmbeddingThreshold): boolean {
    return speakerEmbeddingManagerVerify(this.handle, obj);
  }

  contains(name: string): boolean {
    return speakerEmbeddingManagerContains(this.handle, name);
  }

  getNumSpeakers(): number {
    return speakerEmbeddingManagerNumSpeakers(this.handle);
  }

  getAllSpeakerNames(): string[] {
    return speakerEmbeddingManagerGetAllSpeakers(this.handle);
  }
}

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets
================================================
import {
  acceptWaveformOnline,
  createOnlineRecognizer,
  createOnlineStream,
  decodeOnlineStream,
  getOnlineStreamResultAsJson,
  inputFinished,
  isEndpoint,
  isOnlineStreamReady,
  reset,
} from 'libsherpa_onnx.so';

import { FeatureConfig, HomophoneReplacerConfig, Samples } from './NonStreamingAsr';

export class OnlineStream {
  public handle: object;

  constructor(handle: object) {
    this.handle = handle;
  }

  // obj is {samples: samples, sampleRate: sampleRate}
  // samples is a float32 array containing samples in the range [-1, 1]
  // sampleRate is a number
  acceptWaveform(obj: Samples) {
    acceptWaveformOnline(this.handle, obj)
  }

  inputFinished() {
    inputFinished(this.handle)
  }
}

export class OnlineTransducerModelConfig {
  public encoder: string = '';
  public decoder: string = '';
  public joiner: string = '';
}

export class OnlineParaformerModelConfig {
  public encoder: string = '';
  public decoder: string = '';
}

export class OnlineZipformer2CtcModelConfig {
  public model: string = '';
}

export class OnlineNemoCtcModelConfig {
  public model: string = '';
}

export class OnlineToneCtcModelConfig {
  public model: string = '';
}

export class OnlineModelConfig {
  public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig();
  public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig();
  public zipformer2Ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig();
  public nemoCtc: OnlineNemoCtcModelConfig = new OnlineNemoCtcModelConfig();
  public toneCtc: OnlineToneCtcModelConfig = new OnlineToneCtcModelConfig();
  public tokens: string = '';
  public numThreads: number = 1;
  public provider: string = 'cpu';
  public debug: boolean = false;
  public modelType: string = '';
  public modelingUnit: string = "cjkchar";
  public bpeVocab: string = '';
  // Raw string data mirrored from the native OHOS binding; size is the string length.
  public tokensBuf: string = '';
  public tokensBufSize: number = 0;
}

export class OnlineCtcFstDecoderConfig {
  public graph: string = '';
  public maxActive: number = 3000;
}

export class OnlineRecognizerConfig {
  public featConfig: FeatureConfig = new FeatureConfig();
  public modelConfig: OnlineModelConfig = new OnlineModelConfig();
  public decodingMethod: string = 'greedy_search';
  public maxActivePaths: number = 4;
  public enableEndpoint: boolean = false;
  public rule1MinTrailingSilence: number = 2.4;
  public rule2MinTrailingSilence: number = 1.2;
  public rule3MinUtteranceLength: number = 20;
  public hotwordsFile: string = '';
  public hotwordsScore: number = 1.5;
  public ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
  public ruleFsts: string = '';
  public ruleFars: string = '';
  public blankPenalty: number = 0;
  // Raw string data mirrored from the native OHOS binding; size is the string length.
  public hotwordsBuf: string = '';
  public hotwordsBufSize: number = 0;
  public hr: HomophoneReplacerConfig = new HomophoneReplacerConfig();
}

interface OnlineRecognizerResultJson {
  text: string;
  timestamps: number[];
  tokens: string[];
}

export class OnlineRecognizerResult {
  public text: string = '';
  public tokens: string[] = [];
  public timestamps: number[] = [];
  public json: string = '';
}

export class OnlineRecognizer {
  public handle: object;
  public config: OnlineRecognizerConfig

  constructor(config: OnlineRecognizerConfig, mgr?: object) {
    this.handle = createOnlineRecognizer(config, mgr);
    this.config = config
  }

  createStream(): OnlineStream {
    const handle: object = createOnlineStream(this.handle);
    return new OnlineStream(handle);
  }

  isReady(stream: OnlineStream): boolean {
    return isOnlineStreamReady(this.handle, stream.handle);
  }

  decode(stream: OnlineStream) {
    decodeOnlineStream(this.handle, stream.handle);
  }

  isEndpoint(stream: OnlineStream): boolean {
    return isEndpoint(this.handle, stream.handle);
  }

  reset(stream: OnlineStream) {
    reset(this.handle, stream.handle);
  }

  getResult(stream: OnlineStream): OnlineRecognizerResult {
    const jsonStr: string = getOnlineStreamResultAsJson(this.handle, stream.handle);

    let o = JSON.parse(jsonStr) as OnlineRecognizerResultJson;

    const r = new OnlineRecognizerResult()
    r.text = o.text
    r.timestamps = o.timestamps;
    r.tokens = o.tokens;
    r.json = jsonStr;

    return r;
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets
================================================
import {
  circularBufferGet,
  circularBufferHead,
  circularBufferPop,
  circularBufferPush,
  circularBufferReset,
  circularBufferSize,
  createCircularBuffer,
  createVoiceActivityDetector,
  voiceActivityDetectorAcceptWaveform,
  voiceActivityDetectorClear,
  voiceActivityDetectorFlush,
  voiceActivityDetectorFront,
  voiceActivityDetectorIsDetected,
  voiceActivityDetectorIsEmpty,
  voiceActivityDetectorPop,
  voiceActivityDetectorReset,
} from 'libsherpa_onnx.so';

export class SileroVadConfig {
  public model: string;
  public threshold: number;
  public minSpeechDuration: number;
  public minSilenceDuration: number;
  public windowSize: number;
  public maxSpeechDuration: number;

  public constructor(model: string, threshold: number, minSpeechDuration: number, minSilenceDuration: number,
    windowSize: number, maxSpeechDuration: number = 20) {
    this.model = model;
    this.threshold = threshold;
    this.minSpeechDuration = minSpeechDuration;
    this.minSilenceDuration = minSilenceDuration;
    this.windowSize = windowSize;
    this.maxSpeechDuration = maxSpeechDuration
  }
}

export class TenVadConfig {
  public model: string;
  public threshold: number;
  public minSpeechDuration: number;
  public minSilenceDuration: number;
  public windowSize: number;
  public maxSpeechDuration: number;

  public constructor(model: string, threshold: number, minSpeechDuration: number, minSilenceDuration: number,
    windowSize: number, maxSpeechDuration: number = 20) {
    this.model = model;
    this.threshold = threshold;
    this.minSpeechDuration = minSpeechDuration;
    this.minSilenceDuration = minSilenceDuration;
    this.windowSize = windowSize;
    this.maxSpeechDuration = maxSpeechDuration
  }
}

export class VadConfig {
  public sileroVad: SileroVadConfig;
  public tenVad: TenVadConfig;
  public sampleRate: number;
  public debug: boolean;
  public numThreads: number;
  public provider: string = 'cpu';

  public constructor(sileroVad: SileroVadConfig, tenVad: TenVadConfig, sampleRate: number, debug: boolean,
    numThreads: number, provider: string = 'cpu') {
    this.sileroVad = sileroVad;
    this.tenVad = tenVad;
    this.sampleRate = sampleRate;
    this.debug = debug;
    this.numThreads = numThreads;
    this.provider = provider;
  }
}

export class CircularBuffer {
  private handle: object;

  constructor(capacity: number) {
    this.handle = createCircularBuffer(capacity);
  }

  // samples is a float32 array
  push(samples: Float32Array) {
    circularBufferPush(this.handle, samples);
  }

  // return a float32 array
  get(startIndex: number, n: number, enableExternalBuffer: boolean = true): Float32Array {
    return circularBufferGet(this.handle, startIndex, n, enableExternalBuffer);
  }

  pop(n: number) {
    circularBufferPop(this.handle, n);
  }

  size(): number {
    return circularBufferSize(this.handle);
  }

  head(): number {
    return circularBufferHead(this.handle);
  }

  reset() {
    circularBufferReset(this.handle);
  }
}

export interface SpeechSegment {
  samples: Float32Array;
  start: number;
}

export class Vad {
  public config: VadConfig;
  private handle: object;

  constructor(config: VadConfig, bufferSizeInSeconds: number = 60, mgr?: object) {
    this.handle = createVoiceActivityDetector(config, bufferSizeInSeconds, mgr);
    this.config = config;
  }

  acceptWaveform(samples: Float32Array): void {
    voiceActivityDetectorAcceptWaveform(this.handle, samples);
  }

  isEmpty(): boolean {
    return voiceActivityDetectorIsEmpty(this.handle);
  }

  isDetected(): boolean {
    return voiceActivityDetectorIsDetected(this.handle);
  }

  pop(): void {
    voiceActivityDetectorPop(this.handle);
  }

  clear(): void {
    voiceActivityDetectorClear(this.handle);
  }

  front(enableExternalBuffer = true): SpeechSegment {
    return voiceActivityDetectorFront(this.handle, enableExternalBuffer);
  }

  reset(): void {
    voiceActivityDetectorReset(this.handle);
  }

  flush(): void {
    voiceActivityDetectorFlush(this.handle);
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/module.json5
================================================
{
  "module": {
    "name": "sherpa_onnx",
    "type": "har",
    "deviceTypes": [
      "default",
      "tablet",
      "2in1"
    ]
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "page_show",
      "value": "page from package"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/en_US/element/string.json
================================================
{
  "string": [
    {
      "name": "page_show",
      "value": "page from package"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/resources/zh_CN/element/string.json
================================================
{
  "string": [
    {
      "name": "page_show",
      "value": "page from package"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/ets/test/Ability.test.ets
================================================
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function abilityTest() {
  describe('ActsAbilityTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    })
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    })
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    })
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    })
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    })
  })
}

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/ets/test/List.test.ets
================================================
import abilityTest from './Ability.test';

export default function testsuite() {
  abilityTest();
}

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/ohosTest/module.json5
================================================
{
  "module": {
    "name": "sherpa_onnx_test",
    "type": "feature",
    "deviceTypes": [
      "default",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false
  }
}


================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/test/List.test.ets
================================================
import localUnitTest from './LocalUnit.test';

export default function testsuite() {
  localUnitTest();
}

================================================
FILE: harmony-os/SherpaOnnxHar/sherpa_onnx/src/test/LocalUnit.test.ets
================================================
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function localUnitTest() {
  describe('localUnitTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    });
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    });
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    });
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    });
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    });
  });
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/.gitignore
================================================
/node_modules
/oh_modules
/local.properties
/.idea
**/build
/.hvigor
.cxx
/.clangd
/.clang-format
/.clang-tidy
**/.test
/.appanalyzer

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/AppScope/app.json5
================================================
{
  "app": {
    "bundleName": "com.k2fsa.sherpa.onnx.speaker.diarization",
    "vendor": "example",
    "versionCode": 1000000,
    "versionName": "1.0.0",
    "icon": "$media:app_icon",
    "label": "$string:app_name"
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/AppScope/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "app_name",
      "value": "SherpaOnnxSpeakerDiarization"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/build-profile.json5
================================================
{
  "app": {
    "signingConfigs": [],
    "products": [
      {
        "name": "default",
        "signingConfig": "default",
        "compatibleSdkVersion": "4.0.0(10)",
        "runtimeOS": "HarmonyOS",
        "buildOption": {
          "strictMode": {
            "caseSensitiveCheck": true,
          }
        }
      }
    ],
    "buildModeSet": [
      {
        "name": "debug",
      },
      {
        "name": "release"
      }
    ]
  },
  "modules": [
    {
      "name": "entry",
      "srcPath": "./entry",
      "targets": [
        {
          "name": "default",
          "applyToProducts": [
            "default"
          ]
        }
      ]
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/code-linter.json5
================================================
{
  "files": [
    "**/*.ets"
  ],
  "ignore": [
    "**/src/ohosTest/**/*",
    "**/src/test/**/*",
    "**/src/mock/**/*",
    "**/node_modules/**/*",
    "**/oh_modules/**/*",
    "**/build/**/*",
    "**/.preview/**/*"
  ],
  "ruleSet": [
    "plugin:@performance/recommended",
    "plugin:@typescript-eslint/recommended"
  ],
  "rules": {
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/.gitignore
================================================
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/build-profile.json5
================================================
{
  "apiType": "stageMode",
  "buildOption": {
    "sourceOption": {
      "workers": [
        './src/main/ets/workers/SpeakerDiarizationWorker.ets'
      ]
    }
  },
  "buildOptionSet": [
    {
      "name": "release",
      "arkOptions": {
        "obfuscation": {
          "ruleOptions": {
            "enable": false,
            "files": [
              "./obfuscation-rules.txt"
            ]
          }
        }
      }
    },
  ],
  "targets": [
    {
      "name": "default"
    },
    {
      "name": "ohosTest",
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/hvigorfile.ts
================================================
import { hapTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/obfuscation-rules.txt
================================================
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5

# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file

# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope

-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/oh-package.json5
================================================
{
  "name": "entry",
  "version": "1.0.0",
  "description": "Please describe the basic information.",
  "main": "",
  "author": "",
  "license": "",
  "dependencies": {
    "sherpa_onnx": "1.12.31"
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/entryability/EntryAbility.ets
================================================
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
import hilog from '@ohos.hilog';
import UIAbility from '@ohos.app.ability.UIAbility';
import Want from '@ohos.app.ability.Want';
import window from '@ohos.window';

export default class EntryAbility extends UIAbility {
  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
  }

  onDestroy(): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
  }

  onWindowStageCreate(windowStage: window.WindowStage): void {
    // Main window is created, set main page for this ability
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');

    windowStage.loadContent('pages/Index', (err) => {
      if (err.code) {
        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
        return;
      }
      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
    });
  }

  onWindowStageDestroy(): void {
    // Main window is destroyed, release UI related resources
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
  }

  onForeground(): void {
    // Ability has brought to foreground
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
  }

  onBackground(): void {
    // Ability has back to background
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
================================================
import hilog from '@ohos.hilog';
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';

export default class EntryBackupAbility extends BackupExtensionAbility {
  async onBackup() {
    hilog.info(0x0000, 'testTag', 'onBackup ok');
  }

  async onRestore(bundleVersion: BundleVersion) {
    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/pages/Index.ets
================================================
import { LengthUnit, promptAction } from '@kit.ArkUI';
import worker, { MessageEvents } from '@ohos.worker';
import { BusinessError, pasteboard } from '@kit.BasicServicesKit';
import { picker } from '@kit.CoreFileKit';


@Entry
@Component
struct Index {
  @State title: string = 'Next-gen Kaldi: Speaker Diarization';
  @State titleFontSize: number = 15;
  @State currentIndex: number = 0;
  @State resultForFile: string = '';
  @State resultForMic: string = '';
  @State progressForFile: number = 0;
  @State selectFileBtnEnabled: boolean = false;
  @State copyBtnForFileEnabled: boolean = false;
  private controller: TabsController = new TabsController();
  private workerInstance?: worker.ThreadWorker
  private readonly scriptURL: string = 'entry/ets/workers/SpeakerDiarizationWorker.ets'
  private numSpeakers: string = '-1';

  @Builder
  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
    Column() {
      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
      this.currentIndex = targetIndex;
      this.controller.changeIndex(this.currentIndex);
    })
  }

  aboutToAppear(): void {
    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
      name: 'Streaming ASR worker'
    });

    this.workerInstance.onmessage = (e: MessageEvents) => {
      const msgType = e.data['msgType'] as string;

      if (msgType != 'speaker-diarization-file-progress') {
        console.log(`received msg from worker: ${msgType}`);
      }

      if (msgType == 'init-speaker-diarization-done') {
        console.log('Speaker diarization initialized successfully');

        this.resultForFile = 'Initialization finished.\nPlease select a .wav file.';
        this.resultForMic = 'Initialization finished.\nPlease click the button Start recording.';

        this.selectFileBtnEnabled = true;
      }

      if (msgType == 'speaker-diarization-file-progress') {
        this.progressForFile = e.data['progress'] as number;
      }

      if (msgType == 'speaker-diarization-file-done') {
        const result = e.data['result'] as string;
        this.resultForFile = result;

        this.selectFileBtnEnabled = true;
        this.copyBtnForFileEnabled = true;
      }
    };

    const context = getContext();
    this.workerInstance.postMessage({ msgType: 'init-speaker-diarization', context });
    console.log('initializing');
    this.resultForFile = 'Initializing models. Please wait';
    this.resultForMic = this.resultForFile;
  }

  build() {
    Column() {
      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
            Row({ space: 10 }) {
              Text(`Number of speakers`).width('60%')

              TextInput({ text: this.numSpeakers }).onChange((text) => {
                this.numSpeakers = text.trim();
              }).width('20%')
            }.justifyContent(FlexAlign.Center)

            Row({ space: 10 }) {
              Button('Select .wav file (16kHz) ').enabled(this.selectFileBtnEnabled).onClick(() => {
                this.resultForFile = '';
                this.progressForFile = 0;
                this.copyBtnForFileEnabled = false;

                let numSpeakers = parseInt(this.numSpeakers);
                if (numSpeakers.toString() != this.numSpeakers) {
                  this.resultForFile =
                    'Please input a valid value for the number of speakers in the .wav file you are going to select';
                  return;
                }

                if (numSpeakers < 1) {
                  this.resultForFile =
                    'Please input a positive value for the number of speakers in the .wav file you are going to select';
                  return;
                }

                this.selectFileBtnEnabled = false;

                const documentSelectOptions = new picker.DocumentSelectOptions();
                documentSelectOptions.maxSelectNumber = 1;
                documentSelectOptions.fileSuffixFilters = ['.wav'];
                const documentViewPicker = new picker.DocumentViewPicker();

                documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
                  console.log(`select file result: ${result}`);

                  if (!result[0]) {
                    this.resultForFile = 'Please select a file to decode';
                    this.selectFileBtnEnabled = true;
                    return;
                  }

                  if (this.workerInstance) {
                    this.workerInstance.postMessage({
                      msgType: 'speaker-diarization-file', filename: result[0], numSpeakers,
                    });
                    this.resultForFile = `Decoding ${result[0]} ... ...`;
                  } else {
                    console.log(`this worker instance is undefined ${this.workerInstance}`);
                  }
                }).catch((err: BusinessError) => {
                  console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
                  this.selectFileBtnEnabled = true;
                })
              })
              Button('Copy results')
                .enabled(this.copyBtnForFileEnabled)
                .onClick(() => { // See https://developer.huawei.com/consumer/cn/doc/harmonyos-faqs/faqs-arkui-308-V5
                  const pasteboardData = pasteboard.createData(pasteboard.MIMETYPE_TEXT_PLAIN, this.resultForFile);
                  const systemPasteboard = pasteboard.getSystemPasteboard();
                  systemPasteboard.setData(pasteboardData);
                  systemPasteboard.getData().then((data) => {
                    if (data) {
                      promptAction.showToast({ message: 'Result copied.' });
                    } else {
                      promptAction.showToast({ message: 'Failed to copy' });
                    }
                  })
                })
            }

            if (this.progressForFile > 0) {
              Row() {
                Progress({ value: 0, total: 100, type: ProgressType.Capsule })
                  .width('80%')
                  .height(20)
                  .value(this.progressForFile);

                Text(`${this.progressForFile.toFixed(2)}%`).width('15%')
              }.width('100%').justifyContent(FlexAlign.Center)
            }

            TextArea({ text: this.resultForFile })
              .lineSpacing({ value: 10, unit: LengthUnit.VP })
              .width('100%')
              .height('100%')
          }
        }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc')))

        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
            TextArea({
              text: `
Everyting is open-sourced.

It runs locally, without accessing the network

See also https://github.com/k2-fsa/sherpa-onnx

新一代 Kaldi QQ 和微信交流群: 请看

https://k2-fsa.github.io/sherpa/social-groups.html

微信公众号: 新一代 Kaldi
            `
            }).width('100%').height('100%').focusable(false)
          }.justifyContent(FlexAlign.Start)
        }.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info')))
      }.scrollable(false)
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/ets/workers/SpeakerDiarizationWorker.ets
================================================
import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
import {
  OfflineSpeakerDiarization,
  OfflineSpeakerDiarizationConfig,
  OfflineSpeakerDiarizationSegment,
  readWaveFromBinary,
  Samples
} from 'sherpa_onnx';
import { fileIo } from '@kit.CoreFileKit';

const workerPort: ThreadWorkerGlobalScope = worker.workerPort;

let sd: OfflineSpeakerDiarization;
let useAsync: boolean = true;

function readWave(filename: string): Samples {
  const fp = fileIo.openSync(filename);
  const stat = fileIo.statSync(fp.fd);
  const arrayBuffer = new ArrayBuffer(stat.size);
  fileIo.readSync(fp.fd, arrayBuffer);
  const data: Uint8Array = new Uint8Array(arrayBuffer);
  return readWaveFromBinary(data) as Samples;
}

function initOfflineSpeakerDiarization(context: Context): OfflineSpeakerDiarization {
  const config: OfflineSpeakerDiarizationConfig = new OfflineSpeakerDiarizationConfig();

  // Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
  // to download models.
  // Make sure you have placed it inside the directory
  // harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
  //
  // Also, please delete unused files to reduce the size of the app
  config.segmentation.pyannote.model = 'sherpa-onnx-pyannote-segmentation-3-0/model.int8.onnx';
  config.segmentation.numThreads = 2;
  config.segmentation.debug = true;

  // Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
  // to download models.
  // Make sure you have placed it inside the directory
  // harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
  config.embedding.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
  config.embedding.numThreads = 2;
  config.embedding.debug = true;

  config.minDurationOn = 0.2;
  config.minDurationOff = 0.5;
  return new OfflineSpeakerDiarization(config, context.resourceManager);

  // For the above two models files, you should have the following directory structure
  /*
  (py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
  /Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
  (py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
  total 77336
  -rw-r--r--  1 fangjun  staff    38M Dec 10 16:28 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
  drwxr-xr-x  3 fangjun  staff    96B Dec 10 19:36 sherpa-onnx-pyannote-segmentation-3-0
  (py38) fangjuns-MacBook-Pro:rawfile fangjun$ tree .
  .
  ├── 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
  └── sherpa-onnx-pyannote-segmentation-3-0
      └── model.int8.onnx

  1 directory, 2 files

  (Note that we have kept only model.int8.onnx and removed all other files
  from sherpa-onnx-pyannote-segmentation-3-0
  )
   */
}

/**
 * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessage = (e: MessageEvents) => {
  const msgType = e.data['msgType'] as string;

  console.log(`from the main thread, msg-type: ${msgType}`);
  if (msgType == 'init-speaker-diarization' && !sd) {
    const context: Context = e.data['context'] as Context;
    sd = initOfflineSpeakerDiarization(context);
    workerPort.postMessage({ msgType: 'init-speaker-diarization-done' });
    console.log('Init sd done');
  }

  if (msgType == 'speaker-diarization-file') {
    const filename = e.data['filename'] as string;
    const numSpeakers = e.data['numSpeakers'] as number;
    const wave = readWave(filename);
    let result = '';
    if (wave == undefined || wave == null) {
      result = `Failed to read ${filename}`;

      workerPort.postMessage({
        msgType: 'speaker-diarization-file-done', result
      });
      return;
    }

    if (wave.sampleRate != sd.sampleRate) {
      result = `Expected sample rate: ${sd.sampleRate}`;
      result += '\n';
      result += `Sample rate in file ${filename} is ${wave.sampleRate}`;

      workerPort.postMessage({
        msgType: 'speaker-diarization-file-done', result
      });

      return;
    }

    const duration = wave.samples.length / wave.sampleRate;
    console.log(`Processing ${filename} of ${duration} seconds`);

    // You can remove this if statement if you want
    if (duration < 0.3) {
      result = `${filename} has only ${duration} seconds. Please use a longer file`;

      workerPort.postMessage({
        msgType: 'speaker-diarization-file-done', result
      });
      return;
    }
    sd.config.clustering.numClusters = numSpeakers;
    sd.setConfig(sd.config);

    if (useAsync) {
      sd.processAsync(wave.samples, (numProcessedChunks: number, numTotalChunks: number) => {
        const progress = numProcessedChunks / numTotalChunks * 100;
        workerPort.postMessage({
          msgType: 'speaker-diarization-file-progress', progress
        });
      }).then((r: OfflineSpeakerDiarizationSegment[]) => {
        console.log(`r is ${r.length}, ${r}`);

        for (const s of r) {
          const start: string = s.start.toFixed(3);
          const end: string = s.end.toFixed(3);
          result += `${start}\t--\t${end}\tspeaker_${s.speaker}\n`;
          console.log(`result: ${result}`);
        }

        if (r.length == 0) {
          result = 'The result is empty';
        }

        workerPort.postMessage({
          msgType: 'speaker-diarization-file-done', result
        });
      });
    } else {
      const r: OfflineSpeakerDiarizationSegment[] = sd.process(wave.samples)
      console.log(`r is ${r.length}, ${r}`);
      for (const s of r) {
        const start: string = s.start.toFixed(3);
        const end: string = s.end.toFixed(3);
        result += `${start}\t--\t${end}\tspeaker_${s.speaker}\n`;
        console.log(`result: ${result}`);
      }

      if (r.length == 0) {
        result = 'The result is empty';
      }

      workerPort.postMessage({
        msgType: 'speaker-diarization-file-done', result
      });
    }
  }
} /**
 * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessageerror = (e: MessageEvents) => {
}

/**
 * Defines the event handler to be called when an exception occurs during worker execution.
 * The event handler is executed in the worker thread.
 *
 * @param e error message
 */
workerPort.onerror = (e: ErrorEvent) => {
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/module.json5
================================================
{
  "module": {
    "name": "entry",
    "type": "entry",
    "description": "$string:module_desc",
    "mainElement": "EntryAbility",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false,
    "pages": "$profile:main_pages",
    "abilities": [
      {
        "name": "EntryAbility",
        "srcEntry": "./ets/entryability/EntryAbility.ets",
        "description": "$string:EntryAbility_desc",
        "icon": "$media:layered_image",
        "label": "$string:EntryAbility_label",
        "startWindowIcon": "$media:startIcon",
        "startWindowBackground": "$color:start_window_background",
        "exported": true,
        "skills": [
          {
            "entities": [
              "entity.system.home"
            ],
            "actions": [
              "action.system.home"
            ]
          }
        ]
      }
    ],
    "extensionAbilities": [
      {
        "name": "EntryBackupAbility",
        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
        "type": "backup",
        "exported": false,
        "metadata": [
          {
            "name": "ohos.extension.backup",
            "resource": "$profile:backup_config"
          }
        ],
      }
    ]
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/color.json
================================================
{
  "color": [
    {
      "name": "start_window_background",
      "value": "#FFFFFF"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device speaker diarization with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device speaker diarization with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "Speaker diarization"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/media/layered_image.json
================================================
{
  "layered-image":
  {
    "background" : "$media:background",
    "foreground" : "$media:foreground"
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/profile/backup_config.json
================================================
{
  "allowToBackupRestore": true
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/base/profile/main_pages.json
================================================
{
  "src": [
    "pages/Index"
  ]
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/en_US/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device speaker diarization with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device speaker diarization with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "Speaker diarization"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile/.gitkeep
================================================


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/zh_CN/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "新一代Kaldi: 本地说话人日志"
    },
    {
      "name": "EntryAbility_desc",
      "value": "新一代Kaldi: 本地说话人日志"
    },
    {
      "name": "EntryAbility_label",
      "value": "说话人日志"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/ets/test/Ability.test.ets
================================================
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function abilityTest() {
  describe('ActsAbilityTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    })
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    })
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    })
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    })
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    })
  })
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/ets/test/List.test.ets
================================================
import abilityTest from './Ability.test';

export default function testsuite() {
  abilityTest();
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/ohosTest/module.json5
================================================
{
  "module": {
    "name": "entry_test",
    "type": "feature",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/test/List.test.ets
================================================
import localUnitTest from './LocalUnit.test';

export default function testsuite() {
  localUnitTest();
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/entry/src/test/LocalUnit.test.ets
================================================
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function localUnitTest() {
  describe('localUnitTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    });
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    });
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    });
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    });
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    });
  });
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/hvigor/hvigor-config.json5
================================================
{
  "modelVersion": "5.0.0",
  "dependencies": {
  },
  "execution": {
    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
  },
  "logging": {
    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
  },
  "debugging": {
    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
  },
  "nodeOptions": {
    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/hvigorfile.ts
================================================
import { appTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
  },
  "packages": {
    "@ohos/hypium@1.0.19": {
      "name": "@ohos/hypium",
      "version": "1.0.19",
      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
      "registryType": "ohpm"
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerDiarization/oh-package.json5
================================================
{
  "modelVersion": "5.0.0",
  "description": "Please describe the basic information.",
  "dependencies": {
  },
  "devDependencies": {
    "@ohos/hypium": "1.0.19"
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/.gitignore
================================================
/node_modules
/oh_modules
/local.properties
/.idea
**/build
/.hvigor
.cxx
/.clangd
/.clang-format
/.clang-tidy
**/.test
/.appanalyzer

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/AppScope/app.json5
================================================
{
  "app": {
    "bundleName": "com.k2fsa.sherpa.onnx.speaker.identification",
    "vendor": "example",
    "versionCode": 1000000,
    "versionName": "1.0.0",
    "icon": "$media:app_icon",
    "label": "$string:app_name"
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/AppScope/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "app_name",
      "value": "SherpaOnnxSpeakerIdentification"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/build-profile.json5
================================================
{
  "app": {
    "signingConfigs": [],
    "products": [
      {
        "name": "default",
        "signingConfig": "default",
        "compatibleSdkVersion": "4.0.0(10)",
        "runtimeOS": "HarmonyOS",
        "buildOption": {
          "strictMode": {
            "caseSensitiveCheck": true,
          }
        }
      }
    ],
    "buildModeSet": [
      {
        "name": "debug",
      },
      {
        "name": "release"
      }
    ]
  },
  "modules": [
    {
      "name": "entry",
      "srcPath": "./entry",
      "targets": [
        {
          "name": "default",
          "applyToProducts": [
            "default"
          ]
        }
      ]
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/code-linter.json5
================================================
{
  "files": [
    "**/*.ets"
  ],
  "ignore": [
    "**/src/ohosTest/**/*",
    "**/src/test/**/*",
    "**/src/mock/**/*",
    "**/node_modules/**/*",
    "**/oh_modules/**/*",
    "**/build/**/*",
    "**/.preview/**/*"
  ],
  "ruleSet": [
    "plugin:@performance/recommended",
    "plugin:@typescript-eslint/recommended"
  ],
  "rules": {
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/.gitignore
================================================
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/build-profile.json5
================================================
{
  "apiType": "stageMode",
  "buildOption": {
    "sourceOption": {
      "workers": [
        './src/main/ets/workers/SpeakerIdentificationWorker.ets'
      ]
    }
  },
  "buildOptionSet": [
    {
      "name": "release",
      "arkOptions": {
        "obfuscation": {
          "ruleOptions": {
            "enable": false,
            "files": [
              "./obfuscation-rules.txt"
            ]
          }
        }
      }
    },
  ],
  "targets": [
    {
      "name": "default"
    },
    {
      "name": "ohosTest",
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/hvigorfile.ts
================================================
import { hapTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/obfuscation-rules.txt
================================================
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5

# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file

# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope

-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
    "sherpa_onnx@sherpa_onnx_2.har": "sherpa_onnx@sherpa_onnx_2.har"
  },
  "packages": {
    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
      "name": "libsherpa_onnx.so",
      "version": "1.0.0",
      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
      "registryType": "local"
    },
    "sherpa_onnx@sherpa_onnx_2.har": {
      "name": "sherpa_onnx",
      "version": "1.10.33",
      "resolved": "sherpa_onnx_2.har",
      "registryType": "local",
      "dependencies": {
        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
      }
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package.json5
================================================
{
  "name": "entry",
  "version": "1.0.0",
  "description": "Please describe the basic information.",
  "main": "",
  "author": "",
  "license": "",
  "dependencies": {
    "sherpa_onnx": "1.12.31",
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/entryability/EntryAbility.ets
================================================
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
import hilog from '@ohos.hilog';
import UIAbility from '@ohos.app.ability.UIAbility';
import Want from '@ohos.app.ability.Want';
import window from '@ohos.window';

export default class EntryAbility extends UIAbility {
  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
  }

  onDestroy(): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
  }

  onWindowStageCreate(windowStage: window.WindowStage): void {
    // Main window is created, set main page for this ability
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');

    windowStage.loadContent('pages/Index', (err) => {
      if (err.code) {
        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
        return;
      }
      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
    });
  }

  onWindowStageDestroy(): void {
    // Main window is destroyed, release UI related resources
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
  }

  onForeground(): void {
    // Ability has brought to foreground
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
  }

  onBackground(): void {
    // Ability has back to background
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
================================================
import hilog from '@ohos.hilog';
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';

export default class EntryBackupAbility extends BackupExtensionAbility {
  async onBackup() {
    hilog.info(0x0000, 'testTag', 'onBackup ok');
  }

  async onRestore(bundleVersion: BundleVersion) {
    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets
================================================
import worker, { MessageEvents } from '@ohos.worker';
import { audio } from '@kit.AudioKit';
import { allAllowed, requestPermissions } from './Permission';
import { Permissions } from '@kit.AbilityKit';
import { picker } from '@kit.CoreFileKit';
import fs from '@ohos.file.fs';


function flatten(samples: Float32Array[]): Float32Array {
  let n = 0;
  for (let i = 0; i < samples.length; ++i) {
    n += samples[i].length;
  }

  const ans: Float32Array = new Float32Array(n);
  let offset: number = 0;
  for (let i = 0; i < samples.length; ++i) {
    ans.set(samples[i], offset);
    offset += samples[i].length;
  }

  return ans;
}

function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
  const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);

  const header = new ArrayBuffer(44);
  const view = new DataView(header);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true); // chunkID
  view.setUint32(4, 36 + samples.length * 2, true); // chunkSize //                   E V A W
  view.setUint32(8, 0x45564157, true); // format // //                      t m f
  view.setUint32(12, 0x20746d66, true); // subchunk1ID
  view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  view.setUint16(22, 1, true); // numChannels: 1 channel
  view.setUint32(24, sampleRate, true); // sampleRate
  view.setUint32(28, sampleRate * 2, true); // byteRate
  view.setUint16(32, 2, true); // blockAlign
  view.setUint16(34, 16, true); // bitsPerSample
  view.setUint32(36, 0x61746164, true); // Subchunk2ID
  view.setUint32(40, samples.length * 2, true); // subchunk2Size

  fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
  fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });

  fs.closeSync(fp.fd);
}

function toInt16Samples(samples: Float32Array): Int16Array {
  const int16Samples = new Int16Array(samples.length);
  for (let i = 0; i < samples.length; ++i) {
    let s = samples[i] * 32767;
    s = s > 32767 ? 32767 : s;
    s = s < -32768 ? -32768 : s;
    int16Samples[i] = s;
  }

  return int16Samples;
}

@Entry
@Component
struct Index {
  @State title: string = 'Next-gen Kaldi: Speaker Identification';
  @State titleFontSize: number = 18;
  private controller: TabsController = new TabsController();

  @State currentIndex: number = 0;

  private threshold: string = '0.5';

  private workerInstance?: worker.ThreadWorker
  private readonly scriptURL: string = 'entry/ets/workers/SpeakerIdentificationWorker.ets'

  @State allSpeakerNames: string[] = [];
  private inputSpeakerName: string = '';

  @State btnSaveAudioEnabled: boolean = false;
  @State btnAddEnabled: boolean = false;

  private sampleRate: number = 48000;
  private sampleListForAdding: Float32Array[] = []
  private sampleListForTesting: Float32Array[] = []
  private mic?: audio.AudioCapturer;

  @State infoHome: string = '';
  @State infoAdd: string = '';

  @State micBtnCaptionForAdding: string = 'Start recording';
  @State micStartedForAdding: boolean = false;
  @State micBtnEnabledForAdding: boolean = true;

  @State micBtnCaptionForTesting: string = 'Start recording';
  @State micStartedForTesting: boolean = false;
  @State micBtnEnabledForTesting: boolean = true;

  async initMic() {
    const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
    let allowed: boolean = await allAllowed(permissions);
    if (!allowed) {
      console.log("request to access the microphone");
      const status: boolean = await requestPermissions(permissions);

      if (!status) {
        console.error('access to microphone is denied')
        this.infoHome = "Failed to get microphone permission. Please retry";
        this.infoAdd = this.infoHome;
        return;
      }

      allowed = await allAllowed(permissions);
      if (!allowed) {
        console.error('failed to get microphone permission');
        this.infoHome = "Failed to get microphone permission. Please retry";
        this.infoAdd = this.infoHome;
        return;
      }
    } else {
      console.log("allowed to access microphone");
    }

    const audioStreamInfo: audio.AudioStreamInfo = {
      samplingRate: this.sampleRate,
      channels: audio.AudioChannel.CHANNEL_1,
      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW,
    };

    const audioCapturerInfo: audio.AudioCapturerInfo = {
      source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
    };

    const audioCapturerOptions: audio.AudioCapturerOptions = {
      streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo

    };
    audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
      if (err) {
        console.error(`error code is ${err.code}, error message is ${err.message}`);
        this.infoHome = 'Failed to init microphone';
        this.infoAdd = this.infoHome;
      } else {
        console.info(`init mic successfully`);
        this.mic = data;
        this.mic.on('readData', this.micCallback);
      }
    });
  }

  async aboutToAppear() {
    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
      name: 'Speaker identification worker'
    });

    this.workerInstance.onmessage = (e: MessageEvents) => {
      const msgType = e.data['msgType'] as string;
      console.log(`received msg from worker: ${msgType}`);

      if (msgType == 'manager-all-speaker-names') {
        this.allSpeakerNames = e.data['allSpeakers'] as string[];
      }

      if (msgType == 'manager-add-speaker-done') {
        const ok: boolean = e.data['ok'] as boolean;
        const status: string = e.data['status'] as string;
        this.infoAdd += '\n' + status;

        if (ok) {
          this.sampleListForAdding = [];
          this.btnSaveAudioEnabled = false;
          this.btnAddEnabled = false;
        }
      }

      if (msgType == 'manager-search-speaker-done') {
        const name = e.data['name'] as string;
        this.infoHome = name;
      }
    };

    this.workerInstance.postMessage({ msgType: 'init-extractor', context: getContext()});

    await this.initMic();
  }

  @Builder
  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
    Column() {
      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
      this.currentIndex = targetIndex;
      this.controller.changeIndex(this.currentIndex);
    })
  }

  build() {
    Column() {
      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
            Row() {
              Text('Similary threshold').width('60%');

              TextInput({ text: this.threshold }).onChange((text) => {
                this.threshold = text.trim();
              }).width('20%')
            }
            Row() {
              Button(this.micBtnCaptionForTesting)
                .enabled(this.micBtnEnabledForTesting)
                .onClick(()=>{
                  if (this.allSpeakerNames.length == 0) {
                    this.infoHome = 'There are no speakers registered. Please add them first';
                    return;
                  }

                  let threshold = parseFloat(this.threshold);
                  if (isNaN(threshold)) {
                    this.infoHome = 'Please enter a valid threshold';
                    return;
                  }

                  if (threshold <= 0) {
                    this.infoHome = 'Please enter a positive threshold';
                    return;
                  }
                  console.log(`threshold: ${threshold}`);

                  if (this.micStartedForTesting) {
                    this.micStartedForTesting = false;
                    this.micBtnCaptionForTesting = 'Start';
                    this.micBtnEnabledForAdding = true;
                    this.mic?.stop();

                    const samples = flatten(this.sampleListForTesting);
                    const duration = samples.length / this.sampleRate;
                    if (duration < 0.5) {
                      this.infoHome = `Please speak for a longer time! Current duration: ${duration}`;
                      return;
                    }
                    if (this.workerInstance) {
                      this.workerInstance.postMessage({
                        msgType: 'manager-search-speaker',
                        samples: samples,
                        sampleRate: this.sampleRate,
                        threshold,
                      });
                    }
                  } else {
                    this.sampleListForTesting = [];
                    this.micStartedForTesting = true;
                    this.micBtnCaptionForTesting = 'Stop';
                    this.micBtnEnabledForAdding = false;
                    this.mic?.start();
                    this.infoHome = `Use threshold: ${threshold}`;
                    this.infoHome += '\nPlease speak and then click Stop';
                  }
                })

              Button('Save audio')
                .enabled(!this.micStartedForTesting)
                .onClick(()=>{
                  if (this.sampleListForTesting.length == 0) {
                    this.infoHome = 'No audio samples recorded';
                    return;
                  }
                  const samples = flatten(this.sampleListForTesting);

                  if (samples.length == 0) {
                    this.infoHome = 'Empty samples';
                    return;
                  }

                  let uri: string = '';

                  const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];

                  const audioViewPicker = new picker.AudioViewPicker();

                  audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
                    uri = audioSelectResult[0];
                    savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
                    console.log(`Saved to ${uri}`);
                    this.infoHome+= `\nSaved to ${uri}`;
                  });
                })
            }
            TextArea({text: this.infoHome})
              .height('100%')
              .focusable(false)
          }
        }.tabBar(this.TabBuilder('Home', 0, $r('app.media.icon_home'), $r('app.media.icon_home')))

        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);

            if (this.allSpeakerNames.length == 0) {
              Text('Please add speakers first')
            } else {
              List({ space: 10, initialIndex: 0 }) {
                ForEach(this.allSpeakerNames, (item: string, index: number) => {
                  ListItem() {
                    Flex({ direction: FlexDirection.Row, alignItems: ItemAlign.Center }) {
                      Text(item)
                        .width('100%')
                        .height(80)
                        .fontSize(20)
                        .textAlign(TextAlign.Center)
                        .borderRadius(10)
                        .flexShrink(1)

                      Button('Delete')
                      .width('30%')
                        .height(40)
                      .onClick(() => {
                        if (index != undefined) {
                          const name = this.allSpeakerNames[index];
                          console.log(`Deleting speaker ${name}`);
                          if (this.workerInstance) {
                            this.workerInstance.postMessage({
                              msgType: 'manager-delete-speaker',
                              name: name
                            });
                          }
                        }
                      }).stateEffect(true)

                      Text('')
                        .width('15%')
                        .height(80)
                    }
                  }
                }, (item: string) => item)
              }
            }
          }
        }.tabBar(this.TabBuilder('View', 1, $r('app.media.icon_view'), $r('app.media.icon_view')))

        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);

            Row({space: 10}) {
              Text('Speaker name')
              TextInput({placeholder: 'Input speaker name'})
                .onChange((value: string)=>{
                  this.inputSpeakerName = value.trim();
                });
            }.width('100%')

            Row({space: 10}) {
              Button(this.micBtnCaptionForAdding)
                .enabled(this.micBtnEnabledForAdding)
                .onClick(()=> {
                  if (this.mic) {
                    if (this.micStartedForAdding) {
                      this.micStartedForAdding = false;
                      this.micBtnEnabledForTesting = true;
                      this.micBtnCaptionForAdding = 'Start recording';
                      this.mic.stop();
                      this.infoAdd = '';
                      if (this.sampleListForAdding.length > 0) {
                        this.btnAddEnabled = true;
                        this.btnSaveAudioEnabled = true;
                      }
                    } else {
                      this.micStartedForAdding = true;
                      this.micBtnEnabledForTesting = false;
                      this.micBtnCaptionForAdding = 'Stop recording';
                      this.sampleListForAdding = [];
                      this.mic.start();
                      this.infoAdd = '';

                      this.btnAddEnabled = false;
                      this.btnSaveAudioEnabled = false;
                    }
                  }
                })

              Button('Add')
                .enabled(this.btnAddEnabled)
                .onClick(()=>{
                  if (this.inputSpeakerName.trim() == '') {
                    this.infoAdd += '\nPlease input a speaker name first';
                    return;
                  }

                  const samples = flatten(this.sampleListForAdding);
                  const duration = samples.length / this.sampleRate;
                  if (duration < 0.5) {
                    this.infoAdd = `Please speak for a longer time. Current duration: ${duration}`;
                    return;
                  }
                  if (this.workerInstance) {
                    this.workerInstance.postMessage({
                      msgType: 'manager-add-speaker',
                      name: this.inputSpeakerName,
                      samples: samples,
                      sampleRate: this.sampleRate,
                    })
                  }
                })

              Button('Save audio')
                .enabled(this.btnSaveAudioEnabled)
                .onClick(()=>{
                  if (this.sampleListForAdding.length == 0) {
                    this.btnSaveAudioEnabled = false;
                    return;
                  }

                  const samples = flatten(this.sampleListForAdding);

                  if (samples.length == 0) {
                    this.btnSaveAudioEnabled = false;
                    return;
                  }

                  let uri: string = '';


                  const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];

                  const audioViewPicker = new picker.AudioViewPicker();

                  audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
                    uri = audioSelectResult[0];
                    savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
                    console.log(`Saved to ${uri}`);
                    this.infoAdd += `\nSaved to ${uri}`;
                  });
                })
            }
            TextArea({text: this.infoAdd})
              .height('100%')
              .width('100%')
              .focusable(false)
          }
        }.tabBar(this.TabBuilder('Add', 2, $r('app.media.icon_add'), $r('app.media.icon_add')))

        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
            TextArea({
              text: `
Everyting is open-sourced.

It runs locally, without accessing the network

See also https://github.com/k2-fsa/sherpa-onnx

新一代 Kaldi QQ 和微信交流群: 请看

https://k2-fsa.github.io/sherpa/social-groups.html

微信公众号: 新一代 Kaldi
            `
            }).width('100%').height('100%').focusable(false)
          }
        }.tabBar(this.TabBuilder('Help', 3, $r('app.media.icon_info'), $r('app.media.icon_info')))

      }.scrollable(false)
    }.width('100%')
  }

  private micCallback = (buffer: ArrayBuffer) => {
    const view: Int16Array = new Int16Array(buffer);

    const samplesFloat: Float32Array = new Float32Array(view.length);
    for (let i = 0; i < view.length; ++i) {
      samplesFloat[i] = view[i] / 32768.0;
    }

    if (this.micStartedForAdding) {
      this.sampleListForAdding.push(samplesFloat);
    }

    if (this.micStartedForTesting) {
      this.sampleListForTesting.push(samplesFloat);
    }
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Permission.ets
================================================
// This file is modified from
// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets
import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit';

export function allAllowed(permissions: Permissions[]): boolean {
  if (permissions.length == 0) {
    return false;
  }

  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();

  const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);

  let tokenID: number = bundleInfo.appInfo.accessTokenId;

  return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED ==
  mgr.checkAccessTokenSync(tokenID, permission));
}

export async function requestPermissions(permissions: Permissions[]): Promise<boolean> {
  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
  const context: Context = getContext() as common.UIAbilityContext;

  const result = await mgr.requestPermissionsFromUser(context, permissions);
  return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0);
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets
================================================
import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
import {
  OnlineStream,
  readWaveFromBinary,
  Samples,
  SpeakerEmbeddingExtractor,
  SpeakerEmbeddingExtractorConfig,
  SpeakerEmbeddingManager
} from 'sherpa_onnx';

const workerPort: ThreadWorkerGlobalScope = worker.workerPort;

let extractor: SpeakerEmbeddingExtractor;
let manager: SpeakerEmbeddingManager;

function readWaveFromRawfile(filename: string, context: Context): Samples {
  const data: Uint8Array = context.resourceManager.getRawFileContentSync(filename);
  return readWaveFromBinary(data) as Samples;
}

function initExtractor(context: Context): SpeakerEmbeddingExtractor {
  const config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();

  // Please put the model file inside the directory
  // harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
/*
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
total 77336
-rw-r--r--  1 fangjun  staff    38M Dec  9 19:34 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
 */
  // You can find more models at
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
  config.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
  config.numThreads = 2;
  config.debug = true;

  return new SpeakerEmbeddingExtractor(config, context.resourceManager);
}

function extractEmbedding(samples: Samples): Float32Array {
  const stream: OnlineStream = extractor.createStream();
  stream.acceptWaveform(samples);
  return extractor.compute(stream);
}

/**
 * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessage = (e: MessageEvents) => {
  const msgType = e.data['msgType'] as string;

  console.log(`from the main thread, msg-type: ${msgType}`);

  if (msgType == 'init-extractor' && !extractor) {
    const context: Context = e.data['context'] as Context;
    extractor = initExtractor(context);
    manager = new SpeakerEmbeddingManager(extractor.dim);

    workerPort.postMessage({
      msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
    });
  }

  if (msgType == 'manager-delete-speaker') {
    const name = e.data['name'] as string;
    const ok: boolean = manager.remove(name);
    if (ok) {
      console.log(`Removed ${name}.`);

      console.log(`Number of speakers: ${manager.getNumSpeakers()}`);
      console.log(`Number of speakers2: ${manager.getAllSpeakerNames().length}`);
      console.log(JSON.stringify(manager.getAllSpeakerNames()));
      workerPort.postMessage({
        msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
      });
    }
  }

  if (msgType == 'manager-add-speaker') {
    const name = e.data['name'] as string;
    const samples = e.data['samples'] as Float32Array;
    const sampleRate = e.data['sampleRate'] as number;

    const v = extractEmbedding({ samples, sampleRate });
    const ok: boolean = manager.add({ name, v });
    if (ok) {
      workerPort.postMessage({
        msgType: 'manager-add-speaker-done',
        status: `Added ${name}`,
        ok,
      });
      workerPort.postMessage({
        msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
      }
      );
    } else {
      workerPort.postMessage({
        msgType: 'manager-add-speaker-done',
        status: `Failed to add ${name}. Possibly due to exsiting speaker name. Please recheck`,
        ok,
      });
    }
  }

  if (msgType == 'manager-search-speaker') {
    const threshold = e.data['threshold'] as number;
    const samples = e.data['samples'] as Float32Array;
    const sampleRate = e.data['sampleRate'] as number;

    const v = extractEmbedding({ samples, sampleRate });
    let name: string = manager.search({ threshold, v });
    if (name == '' || name == undefined) {
      name = "===<Unknown>===";
    }
    workerPort.postMessage({
      msgType: 'manager-search-speaker-done',
      name
    });
  }
}

/**
 * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessageerror = (e: MessageEvents) => {
}

/**
 * Defines the event handler to be called when an exception occurs during worker execution.
 * The event handler is executed in the worker thread.
 *
 * @param e error message
 */
workerPort.onerror = (e: ErrorEvent) => {
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/module.json5
================================================
{
  "module": {
    "name": "entry",
    "type": "entry",
    "description": "$string:module_desc",
    "mainElement": "EntryAbility",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false,
    "pages": "$profile:main_pages",
    "abilities": [
      {
        "name": "EntryAbility",
        "srcEntry": "./ets/entryability/EntryAbility.ets",
        "description": "$string:EntryAbility_desc",
        "icon": "$media:layered_image",
        "label": "$string:EntryAbility_label",
        "startWindowIcon": "$media:startIcon",
        "startWindowBackground": "$color:start_window_background",
        "exported": true,
        "skills": [
          {
            "entities": [
              "entity.system.home"
            ],
            "actions": [
              "action.system.home"
            ]
          }
        ]
      }
    ],
    "extensionAbilities": [
      {
        "name": "EntryBackupAbility",
        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
        "type": "backup",
        "exported": false,
        "metadata": [
          {
            "name": "ohos.extension.backup",
            "resource": "$profile:backup_config"
          }
        ],
      }
    ],
    "requestPermissions": [
      {
        "name": "ohos.permission.MICROPHONE",
        "reason": "$string:mic_reason",
        "usedScene": {
          "abilities": [
            "EntryAbility",
          ],
          "when": "inuse",
        }
      }
    ]
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/element/color.json
================================================
{
  "color": [
    {
      "name": "start_window_background",
      "value": "#FFFFFF"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device speaker identification with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device speaker identification with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "Speaker identification"
    },
    {
      "name": "mic_reason",
      "value": "access the microphone for on-device speaker identification with Next-gen Kaldi"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/media/layered_image.json
================================================
{
  "layered-image":
  {
    "background" : "$media:background",
    "foreground" : "$media:foreground"
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/profile/backup_config.json
================================================
{
  "allowToBackupRestore": true
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/base/profile/main_pages.json
================================================
{
  "src": [
    "pages/Index"
  ]
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/en_US/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device speaker identification with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device speaker identification with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "Speaker identification"
    },
    {
      "name": "mic_reason",
      "value": "access the microphone for on-device speaker identification with Next-gen Kaldi"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile/.gitkeep
================================================


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/zh_CN/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "新一代Kaldi: 本地说话人识别"
    },
    {
      "name": "EntryAbility_desc",
      "value": "新一代Kaldi: 本地说话人识别"
    },
    {
      "name": "EntryAbility_label",
      "value": "说话人识别"
    },
    {
      "name": "mic_reason",
      "value": "使用新一代Kaldi, 访问麦克风进行本地说话人识别 (不需要联网)"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/ets/test/Ability.test.ets
================================================
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function abilityTest() {
  describe('ActsAbilityTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    })
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    })
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    })
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    })
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    })
  })
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/ets/test/List.test.ets
================================================
import abilityTest from './Ability.test';

export default function testsuite() {
  abilityTest();
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/ohosTest/module.json5
================================================
{
  "module": {
    "name": "entry_test",
    "type": "feature",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/test/List.test.ets
================================================
import localUnitTest from './LocalUnit.test';

export default function testsuite() {
  localUnitTest();
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/entry/src/test/LocalUnit.test.ets
================================================
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function localUnitTest() {
  describe('localUnitTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    });
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    });
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    });
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    });
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    });
  });
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/hvigor/hvigor-config.json5
================================================
{
  "modelVersion": "5.0.0",
  "dependencies": {
  },
  "execution": {
    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
  },
  "logging": {
    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
  },
  "debugging": {
    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
  },
  "nodeOptions": {
    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
  }
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/hvigorfile.ts
================================================
import { appTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
  },
  "packages": {
    "@ohos/hypium@1.0.19": {
      "name": "@ohos/hypium",
      "version": "1.0.19",
      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
      "registryType": "ohpm"
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxSpeakerIdentification/oh-package.json5
================================================
{
  "modelVersion": "5.0.0",
  "description": "Please describe the basic information.",
  "dependencies": {
  },
  "devDependencies": {
    "@ohos/hypium": "1.0.19"
  }
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/.gitignore
================================================
/node_modules
/oh_modules
/local.properties
/.idea
**/build
/.hvigor
.cxx
/.clangd
/.clang-format
/.clang-tidy
**/.test
/.appanalyzer

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/AppScope/app.json5
================================================
{
  "app": {
    "bundleName": "com.k2fsa.sherpa.onnx.streaming.asr",
    "vendor": "example",
    "versionCode": 1000000,
    "versionName": "1.0.0",
    "icon": "$media:app_icon",
    "label": "$string:app_name"
  }
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/AppScope/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "app_name",
      "value": "SherpaOnnxStreamingAsr"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/build-profile.json5
================================================
{
  "app": {
    "signingConfigs": [],
    "products": [
      {
        "name": "default",
        "signingConfig": "default",
        "compatibleSdkVersion": "4.0.0(10)",
        "runtimeOS": "HarmonyOS",
        "buildOption": {
          "strictMode": {
            "caseSensitiveCheck": true,
          }
        }
      }
    ],
    "buildModeSet": [
      {
        "name": "debug",
      },
      {
        "name": "release"
      }
    ]
  },
  "modules": [
    {
      "name": "entry",
      "srcPath": "./entry",
      "targets": [
        {
          "name": "default",
          "applyToProducts": [
            "default"
          ]
        }
      ]
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/code-linter.json5
================================================
{
  "files": [
    "**/*.ets"
  ],
  "ignore": [
    "**/src/ohosTest/**/*",
    "**/src/test/**/*",
    "**/src/mock/**/*",
    "**/node_modules/**/*",
    "**/oh_modules/**/*",
    "**/build/**/*",
    "**/.preview/**/*"
  ],
  "ruleSet": [
    "plugin:@performance/recommended",
    "plugin:@typescript-eslint/recommended"
  ],
  "rules": {
  }
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/.gitignore
================================================
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/build-profile.json5
================================================
{
  "apiType": "stageMode",
  "buildOption": {
    "sourceOption": {
      "workers": [
        './src/main/ets/workers/StreamingAsrWorker.ets'
      ]
    }
  },
  "buildOptionSet": [
    {
      "name": "release",
      "arkOptions": {
        "obfuscation": {
          "ruleOptions": {
            "enable": false,
            "files": [
              "./obfuscation-rules.txt"
            ]
          }
        }
      }
    },
  ],
  "targets": [
    {
      "name": "default"
    },
    {
      "name": "ohosTest",
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/hvigorfile.ts
================================================
import { hapTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/obfuscation-rules.txt
================================================
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5

# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file

# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope

-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.33/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.33/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
    "sherpa_onnx@1.10.33": "sherpa_onnx@1.10.33"
  },
  "packages": {
    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.33/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
      "name": "libsherpa_onnx.so",
      "version": "1.0.0",
      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.33/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
      "registryType": "local"
    },
    "sherpa_onnx@1.10.33": {
      "name": "sherpa_onnx",
      "version": "1.10.33",
      "integrity": "sha512-cmZ8zwOMx4qmDvOjF1/PL6/suBgReanSf5XdQTuMWWZ6qN74rynODHrt4C+Qz754MTXg0q/phAKeVjGA4rHHSA==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.33.har",
      "registryType": "ohpm",
      "dependencies": {
        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
      }
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/oh-package.json5
================================================
{
  "name": "entry",
  "version": "1.0.0",
  "description": "Please describe the basic information.",
  "main": "",
  "author": "",
  "license": "",
  "dependencies": {
    "sherpa_onnx": "1.12.31",
  }
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/entryability/EntryAbility.ets
================================================
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
import hilog from '@ohos.hilog';
import UIAbility from '@ohos.app.ability.UIAbility';
import Want from '@ohos.app.ability.Want';
import window from '@ohos.window';

export default class EntryAbility extends UIAbility {
  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
  }

  onDestroy(): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
  }

  onWindowStageCreate(windowStage: window.WindowStage): void {
    // Main window is created, set main page for this ability
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');

    windowStage.loadContent('pages/Index', (err) => {
      if (err.code) {
        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
        return;
      }
      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
    });
  }

  onWindowStageDestroy(): void {
    // Main window is destroyed, release UI related resources
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
  }

  onForeground(): void {
    // Ability has brought to foreground
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
  }

  onBackground(): void {
    // Ability has back to background
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
  }
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
================================================
import hilog from '@ohos.hilog';
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';

export default class EntryBackupAbility extends BackupExtensionAbility {
  async onBackup() {
    hilog.info(0x0000, 'testTag', 'onBackup ok');
  }

  async onRestore(bundleVersion: BundleVersion) {
    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
  }
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/pages/Index.ets
================================================
import { LengthUnit } from '@kit.ArkUI';
import worker, { MessageEvents } from '@ohos.worker';
import { BusinessError } from '@kit.BasicServicesKit';
import { picker } from '@kit.CoreFileKit';
import systemTime from '@ohos.systemTime';
import { Permissions } from '@kit.AbilityKit';
import { allAllowed, requestPermissions } from './Permission';
import { audio } from '@kit.AudioKit';
import fs from '@ohos.file.fs';


function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
  const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);

  const header = new ArrayBuffer(44);
  const view = new DataView(header);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true); // chunkID
  view.setUint32(4, 36 + samples.length * 2, true); // chunkSize //                   E V A W
  view.setUint32(8, 0x45564157, true); // format // //                      t m f
  view.setUint32(12, 0x20746d66, true); // subchunk1ID
  view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  view.setUint16(22, 1, true); // numChannels: 1 channel
  view.setUint32(24, sampleRate, true); // sampleRate
  view.setUint32(28, sampleRate * 2, true); // byteRate
  view.setUint16(32, 2, true); // blockAlign
  view.setUint16(34, 16, true); // bitsPerSample
  view.setUint32(36, 0x61746164, true); // Subchunk2ID
  view.setUint32(40, samples.length * 2, true); // subchunk2Size

  fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
  fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });

  fs.closeSync(fp.fd);
}

function toInt16Samples(samples: Float32Array): Int16Array {
  const int16Samples = new Int16Array(samples.length);
  for (let i = 0; i < samples.length; ++i) {
    let s = samples[i] * 32767;
    s = s > 32767 ? 32767 : s;
    s = s < -32768 ? -32768 : s;
    int16Samples[i] = s;
  }

  return int16Samples;
}


@Entry
@Component
struct Index {
  @State title: string = 'Next-gen Kaldi: Real-time speech recognition';
  @State titleFontSize: number = 15;
  @State currentIndex: number = 0;
  @State lang: string = 'English';
  @State resultForFile: string = ''
  @State resultForMic: string = ''
  @State selectFileBtnEnabled: boolean = false;
  @State micBtnCaption: string = 'Start';
  @State micStarted: boolean = false;
  @State micAllowed: boolean = false;
  @State micBtnEnabled: boolean = false;
  @State micSaveBtnCaption: string = 'Save recorded audio';
  @State micSaveBtnEnabled: boolean = false;
  @State info: string = '';
  @State micInfo: string = '';
  @State micInitDone: boolean = false;
  private resultListForMic: string[] = [];
  private controller: TabsController = new TabsController();
  private workerInstance?: worker.ThreadWorker
  private readonly scriptURL: string = 'entry/ets/workers/StreamingAsrWorker.ets'
  private startTime: number = 0;
  private stopTime: number = 0;
  private sampleRate: number = 48000;
  private sampleList: Float32Array[] = []
  private mic?: audio.AudioCapturer;

  flatten(samples: Float32Array[]): Float32Array {
    let n = 0;
    for (let i = 0; i < samples.length; ++i) {
      n += samples[i].length;
    }

    const ans: Float32Array = new Float32Array(n);
    let offset: number = 0;
    for (let i = 0; i < samples.length; ++i) {
      ans.set(samples[i], offset);
      offset += samples[i].length;
    }

    return ans;
  }

  async initMic() {
    const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
    let allowed: boolean = await allAllowed(permissions);
    if (!allowed) {
      console.log("request to access the microphone");
      const status: boolean = await requestPermissions(permissions);

      if (!status) {
        console.error('access to microphone is denied')
        this.resultForMic = "Failed to get microphone permission. Please retry";
        return;
      }

      allowed = await allAllowed(permissions);
      if (!allowed) {
        console.error('failed to get microphone permission');
        this.resultForMic = "Failed to get microphone permission. Please retry";
        return;
      }
      this.micAllowed = true;
    } else {
      console.log("allowed to access microphone");
      this.micAllowed = true;
    }

    const audioStreamInfo: audio.AudioStreamInfo = {
      samplingRate: this.sampleRate,
      channels: audio.AudioChannel.CHANNEL_1,
      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW,
    };

    const audioCapturerInfo: audio.AudioCapturerInfo = {
      source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
    };

    const audioCapturerOptions: audio.AudioCapturerOptions = {
      streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo

    };
    audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
      if (err) {
        console.error(`error code is ${err.code}, error message is ${err.message}`);
        this.resultForMic = 'Failed to init microphone';
      } else {
        console.info(`init mic successfully`);
        this.mic = data;
        this.mic.on('readData', this.micCallback);
      }
    });
  }

  async aboutToAppear() {
    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
      name: 'Streaming ASR worker'
    });

    this.workerInstance.onmessage = (e: MessageEvents) => {
      const msgType = e.data['msgType'] as string;
      console.log(`received msg from worker: ${msgType}`);

      if (msgType == 'init-streaming-asr-done') {
        this.selectFileBtnEnabled = true;
        this.micBtnEnabled = true;
        this.info = `Initializing done.\n\nPlease select a wave file of 16kHz in language ${this.lang}`;
        this.micInfo = `Initializing done.\n\nPlease click Start and speak`;
      }

      if (msgType == 'streaming-asr-decode-file-done') {
        const text = e.data['text'] as string;
        this.resultForFile = text;
        this.selectFileBtnEnabled = true;

        systemTime.getRealTime((err, data) => {
          if (err) {
            console.log('Failed to get stop time');
          } else {
            this.stopTime = data;

            const audioDuration = e.data['duration'] as number;
            const elapsedSeconds = (this.stopTime - this.startTime) / 1000;
            const RTF = elapsedSeconds / audioDuration;
            this.info = `Audio duration: ${audioDuration.toFixed(2)} s
Elapsed: ${elapsedSeconds.toFixed(2)} s
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
`;
          }
        });
      }

      if (msgType == 'streaming-asr-decode-mic-result') {
        const text = e.data['text'] as string;
        if (text.trim() == '') {
          return;
        }

        const isEndpoint = e.data['isEndpoint'] as boolean;

        let s = '';
        let i = 0;
        for (; i < this.resultListForMic.length; ++i) {
          s += `${i}: ${this.resultListForMic[i]}\n`
        }

        s += `${i}: ${text}`;
        this.resultForMic = s;

        if (isEndpoint) {
          this.resultListForMic.push(text);
        }
      }
    };

    const context = getContext();
    this.workerInstance.postMessage({ msgType: 'init-streaming-asr', context });
    this.info = 'Initializing ASR model.\nPlease wait';
    this.micInfo = 'Initializing ASR model.\nPlease wait';

    await this.initMic();
  }

  @Builder
  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
    Column() {
      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
      this.currentIndex = targetIndex;
      this.controller.changeIndex(this.currentIndex);
    })
  }

  build() {
    Column() {
      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
            Button('Select .wav file (16kHz) ')
              .enabled(this.selectFileBtnEnabled)
              .fontSize(13)
              .width(296)
              .height(60)
              .onClick(() => {
                this.resultForFile = '';
                this.info = '';
                this.selectFileBtnEnabled = false;

                const documentSelectOptions = new picker.DocumentSelectOptions();
                documentSelectOptions.maxSelectNumber = 1;
                documentSelectOptions.fileSuffixFilters = ['.wav'];
                const documentViewPicker = new picker.DocumentViewPicker();

                documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
                  console.log(`select file result: ${result}`);

                  if (!result[0]) {
                    this.resultForFile = 'Please select a file to decode';
                    this.selectFileBtnEnabled = true;
                    return;
                  }

                  if (this.workerInstance) {
                    systemTime.getRealTime((err, data) => {
                      if (err) {
                        console.log('Failed to get start time');
                      } else {
                        this.startTime = data;
                      }
                    });

                    this.workerInstance.postMessage({
                      msgType: 'streaming-asr-decode-file', filename: result[0],
                    });
                    this.info = `Decoding ${result[0]} ... ...`;
                  } else {
                    console.log(`this worker instance is undefined ${this.workerInstance}`);
                  }

                }).catch((err: BusinessError) => {
                  console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
                  this.selectFileBtnEnabled = true;
                })
              })

            Text(`Supported languages: ${this.lang}`);
            if (this.info != '') {
              TextArea({ text: this.info }).focusable(false);
            }
            TextArea({ text: this.resultForFile })
              .width('100%')
              .lineSpacing({ value: 10, unit: LengthUnit.VP })
              .height('100%');
          }
        }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc')))

        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
            Button(this.micBtnCaption)
              .enabled(this.micBtnEnabled)
              .fontSize(13)
              .width(296)
              .height(60)
              .onClick(() => {
                this.micInfo = '';
                if (this.mic) {
                  if (this.micStarted) {
                    this.micStarted = false;
                    this.micBtnCaption = 'Start';
                    this.mic.stop();
                    this.micSaveBtnEnabled = true;

                    if (this.workerInstance) {
                      this.workerInstance.postMessage({
                        msgType: 'streaming-asr-decode-mic-stop'
                      });
                    }
                  } else {
                    this.micStarted = true;
                    this.micSaveBtnEnabled = false;
                    this.micBtnCaption = 'Stop';
                    this.resultForMic = '';
                    this.resultListForMic = [];

                    if (this.workerInstance) {
                      this.workerInstance.postMessage({
                        msgType: 'streaming-asr-decode-mic-start'
                      });
                    }

                    this.sampleList = [];
                    this.mic.start();
                  }
                }
              });
            Button(this.micSaveBtnCaption)
              .enabled(this.micSaveBtnEnabled)
              .fontSize(13)
              .width(296)
              .height(60)
              .onClick(() => {
                if (this.sampleList.length == 0) {
                  this.micSaveBtnEnabled = false;
                  return;
                }

                const samples = this.flatten(this.sampleList);

                if (samples.length == 0) {
                  this.micSaveBtnEnabled = false;
                  return;
                }


                let uri: string = '';


                const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];

                const audioViewPicker = new picker.AudioViewPicker();

                audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
                  uri = audioSelectResult[0];
                  savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
                  console.log(`Saved to ${uri}`);
                  this.micInfo += `\nSaved to ${uri}`;
                });

              })


            Text(`Supported languages: ${this.lang}`)

            if (this.micInfo != '') {
              TextArea({ text: this.micInfo })
                .focusable(false);
            }

            TextArea({ text: this.resultForMic })
              .width('100%')
              .lineSpacing({ value: 10, unit: LengthUnit.VP })
              .width('100%')
              .height('100%');
          }
        }.tabBar(this.TabBuilder('From mic', 1, $r('app.media.icon_mic'), $r('app.media.icon_mic')))


        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
            TextArea({
              text: `
Everyting is open-sourced.

It runs locally, without accessing the network

See also https://github.com/k2-fsa/sherpa-onnx

新一代 Kaldi QQ 和微信交流群: 请看

https://k2-fsa.github.io/sherpa/social-groups.html

微信公众号: 新一代 Kaldi
            `
            }).width('100%').height('100%').focusable(false)
          }.justifyContent(FlexAlign.Start)
        }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info'), $r('app.media.info')))
      }.scrollable(false)
    }.width('100%')
  }

  private micCallback = (buffer: ArrayBuffer) => {
    const view: Int16Array = new Int16Array(buffer);

    const samplesFloat: Float32Array = new Float32Array(view.length);
    for (let i = 0; i < view.length; ++i) {
      samplesFloat[i] = view[i] / 32768.0;
    }

    this.sampleList.push(samplesFloat);

    if (this.workerInstance) {
      this.workerInstance.postMessage({
        msgType: 'streaming-asr-decode-mic-samples',
        samples: samplesFloat,
        sampleRate: this.sampleRate,
      })
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/pages/Permission.ets
================================================
// This file is modified from
// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets
import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit';

export function allAllowed(permissions: Permissions[]): boolean {
  if (permissions.length == 0) {
    return false;
  }

  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();

  const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);

  let tokenID: number = bundleInfo.appInfo.accessTokenId;

  return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED ==
  mgr.checkAccessTokenSync(tokenID, permission));
}

export async function requestPermissions(permissions: Permissions[]): Promise<boolean> {
  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
  const context: Context = getContext() as common.UIAbilityContext;

  const result = await mgr.requestPermissionsFromUser(context, permissions);
  return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0);
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/ets/workers/StreamingAsrWorker.ets
================================================
import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
import {
  OnlineModelConfig,
  OnlineRecognizer,
  OnlineRecognizerConfig,
  OnlineStream,
  readWaveFromBinary,
  Samples
} from 'sherpa_onnx';
import { fileIo } from '@kit.CoreFileKit';

const workerPort: ThreadWorkerGlobalScope = worker.workerPort;


let recognizer: OnlineRecognizer;
let micStream: OnlineStream;

function getModelConfig(type: number): OnlineModelConfig {
  const modelConfig = new OnlineModelConfig();
  switch (type) {
    case 0: {
      const modelDir = 'sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20';
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'zipformer';
      break;
    }

    case 1: {
      const modelDir = 'sherpa-onnx-lstm-zh-2023-02-20';
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-11-avg-1.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-11-avg-1.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-11-avg-1.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'lstm';
      break;
    }

    case 2: {
      const modelDir = 'sherpa-onnx-lstm-en-2023-02-17';
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'lstm';
      break;
    }

    case 3: {
      const modelDir = 'icefall-asr-zipformer-streaming-wenetspeech-20230615';
      modelConfig.transducer.encoder = `${modelDir}/exp/encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx`;
      modelConfig.tokens = `${modelDir}/data/lang_char/tokens.txt`;
      modelConfig.modelType = 'zipformer2';
      break;
    }

    case 4: {
      const modelDir = 'icefall-asr-zipformer-streaming-wenetspeech-20230615';
      modelConfig.transducer.encoder = `${modelDir}/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx`;
      modelConfig.tokens = `${modelDir}/data/lang_char/tokens.txt`;
      modelConfig.modelType = 'zipformer2';
      break;
    }

    case 5: {
      const modelDir = 'sherpa-onnx-streaming-paraformer-bilingual-zh-en';
      modelConfig.paraformer.encoder = `${modelDir}/encoder.int8.onnx`;
      modelConfig.paraformer.decoder = `${modelDir}/decoder.int8.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'paraformer';
      break;
    }

    case 6: {
      const modelDir = 'sherpa-onnx-streaming-zipformer-en-2023-06-26';
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1-chunk-16-left-128.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1-chunk-16-left-128.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'zipformer2';
      break;
    }

    case 7: {
      const modelDir = 'sherpa-onnx-streaming-zipformer-fr-2023-04-14';
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-29-avg-9-with-averaged-model.int8.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-29-avg-9-with-averaged-model.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-29-avg-9-with-averaged-model.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'zipformer';
      break;
    }

    case 8: {
      const modelDir = 'sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20';
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'zipformer';
      break;
    }

    case 9: {
      const modelDir = 'sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23'
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'zipformer';
      break;
    }

    case 10: {
      const modelDir = 'sherpa-onnx-streaming-zipformer-en-20M-2023-02-17';
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'zipformer';
      break;
    }

    case 14: {
      const modelDir = 'sherpa-onnx-streaming-zipformer-korean-2024-06-16';
      modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
      modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
      modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
      modelConfig.tokens = `${modelDir}/tokens.txt`;
      modelConfig.modelType = 'zipformer';
      break;
    }
    default: {
      console.log(`Please specify a supported type. Given type ${type}`);
    }
  }
  return modelConfig;
}

function initStreamingAsr(context: Context): OnlineRecognizer {
  let type: number;

  /*

If you use type = 8, then you should have the following directory structure in the rawfile directory

(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/rawfile
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls
sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ tree .
.
└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
    ├── decoder-epoch-99-avg-1.onnx
    ├── encoder-epoch-99-avg-1.int8.onnx
    ├── joiner-epoch-99-avg-1.int8.onnx
    └── tokens.txt

1 directory, 4 files

You can download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

Note that please delete files that are not used. Otherwise, you APP will be very large
due to containing unused large files.

   */
  type = 8;

  const config: OnlineRecognizerConfig = new OnlineRecognizerConfig();
  config.modelConfig = getModelConfig(type);
  config.modelConfig.debug = true;
  config.modelConfig.numThreads = 2;
  config.enableEndpoint = true;

  return new OnlineRecognizer(config, context.resourceManager);
}

interface DecodeFileResult {
  text: string;
  duration: number;
}

function decodeFile(filename: string): DecodeFileResult {
  const fp = fileIo.openSync(filename);
  const stat = fileIo.statSync(fp.fd);
  const arrayBuffer = new ArrayBuffer(stat.size);
  fileIo.readSync(fp.fd, arrayBuffer);
  const data: Uint8Array = new Uint8Array(arrayBuffer);
  const wave: Samples = readWaveFromBinary(data) as Samples;
  console.log(`Sample rate: ${wave.sampleRate}`);

  const stream = recognizer.createStream();
  stream.acceptWaveform(wave);
  const tailPadding = new Float32Array(0.5 * wave.sampleRate);
  tailPadding.fill(0);

  stream.acceptWaveform({ samples: tailPadding, sampleRate: wave.sampleRate });

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const audioDuration = wave.samples.length / wave.sampleRate;

  return { text: recognizer.getResult(stream).text, duration: audioDuration };
}

/**
 * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessage = (e: MessageEvents) => {
  const msgType = e.data['msgType'] as string;

  if (msgType != 'streaming-asr-decode-mic-samples') {
    console.log(`from the main thread, msg-type: ${msgType}`);
  }

  if (msgType == 'init-streaming-asr' && !recognizer) {
    console.log('initializing streaming ASR...');
    const context = e.data['context'] as Context;
    recognizer = initStreamingAsr(context);
    console.log('streaming ASR is initialized. ');
    workerPort.postMessage({ 'msgType': 'init-streaming-asr-done' });
  }

  if (msgType == 'streaming-asr-decode-file') {
    const filename = e.data['filename'] as string;
    console.log(`decoding ${filename}`);
    const result = decodeFile(filename);
    workerPort.postMessage({
      'msgType': 'streaming-asr-decode-file-done', text: result.text, duration: result.duration
    });
  }

  if (msgType == 'streaming-asr-decode-mic-start') {
    micStream = recognizer.createStream();
  }

  if (msgType == 'streaming-asr-decode-mic-stop') { // nothing to do
  }

  if (msgType == 'streaming-asr-decode-mic-samples') {
    const samples = e.data['samples'] as Float32Array;
    const sampleRate = e.data['sampleRate'] as number;

    micStream.acceptWaveform({ samples, sampleRate });
    while (recognizer.isReady(micStream)) {
      recognizer.decode(micStream);

      let isEndpoint = false;
      let text = recognizer.getResult(micStream).text;

      if (recognizer.isEndpoint(micStream)) {
        isEndpoint = true;
        recognizer.reset(micStream);
      }

      if (text.trim() != '') {
        workerPort.postMessage({
          'msgType': 'streaming-asr-decode-mic-result', text: text, isEndpoint: isEndpoint,
        });
      }
    }
  }

}

/**
 * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessageerror = (e: MessageEvents) => {
}

/**
 * Defines the event handler to be called when an exception occurs during worker execution.
 * The event handler is executed in the worker thread.
 *
 * @param e error message
 */
workerPort.onerror = (e: ErrorEvent) => {
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/module.json5
================================================
{
  "module": {
    "name": "entry",
    "type": "entry",
    "description": "$string:module_desc",
    "mainElement": "EntryAbility",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false,
    "pages": "$profile:main_pages",
    "abilities": [
      {
        "name": "EntryAbility",
        "srcEntry": "./ets/entryability/EntryAbility.ets",
        "description": "$string:EntryAbility_desc",
        "icon": "$media:layered_image",
        "label": "$string:EntryAbility_label",
        "startWindowIcon": "$media:startIcon",
        "startWindowBackground": "$color:start_window_background",
        "exported": true,
        "skills": [
          {
            "entities": [
              "entity.system.home"
            ],
            "actions": [
              "action.system.home"
            ]
          }
        ]
      }
    ],
    "extensionAbilities": [
      {
        "name": "EntryBackupAbility",
        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
        "type": "backup",
        "exported": false,
        "metadata": [
          {
            "name": "ohos.extension.backup",
            "resource": "$profile:backup_config"
          }
        ],
      }
    ],
    "requestPermissions": [
      {
        "name": "ohos.permission.MICROPHONE",
        "reason": "$string:mic_reason",
        "usedScene": {
          "abilities": [
            "EntryAbility",
          ],
          "when": "inuse",
        }
      }
    ]
  }
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/element/color.json
================================================
{
  "color": [
    {
      "name": "start_window_background",
      "value": "#FFFFFF"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device real-time speech recognition with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device real-time speech recognition with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "Real-time ASR"
    },
    {
      "name": "mic_reason",
      "value": "access the microphone for on-device real-time speech recognition with Next-gen Kaldi"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/media/layered_image.json
================================================
{
  "layered-image":
  {
    "background" : "$media:background",
    "foreground" : "$media:foreground"
  }
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/profile/backup_config.json
================================================
{
  "allowToBackupRestore": true
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/base/profile/main_pages.json
================================================
{
  "src": [
    "pages/Index"
  ]
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/en_US/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device real-time speech recognition with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device real-time speech recognition with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "Real-time ASR"
    },
    {
      "name": "mic_reason",
      "value": "access the microphone for on-device real-time speech recognition with Next-gen Kaldi"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/rawfile/.gitkeep
================================================


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/zh_CN/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "新一代Kaldi: 本地实时语音识别"
    },
    {
      "name": "EntryAbility_desc",
      "value": "新一代Kaldi: 本地实时语音识别"
    },
    {
      "name": "EntryAbility_label",
      "value": "实时语音识别"
    },
    {
      "name": "mic_reason",
      "value": "使用新一代Kaldi, 访问麦克风进行本地实时语音识别 (不需要联网)"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/ets/test/Ability.test.ets
================================================
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function abilityTest() {
  describe('ActsAbilityTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    })
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    })
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    })
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    })
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    })
  })
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/ets/test/List.test.ets
================================================
import abilityTest from './Ability.test';

export default function testsuite() {
  abilityTest();
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/ohosTest/module.json5
================================================
{
  "module": {
    "name": "entry_test",
    "type": "feature",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false
  }
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/test/List.test.ets
================================================
import localUnitTest from './LocalUnit.test';

export default function testsuite() {
  localUnitTest();
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/entry/src/test/LocalUnit.test.ets
================================================
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function localUnitTest() {
  describe('localUnitTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    });
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    });
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    });
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    });
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    });
  });
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/hvigor/hvigor-config.json5
================================================
{
  "modelVersion": "5.0.0",
  "dependencies": {
  },
  "execution": {
    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
  },
  "logging": {
    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
  },
  "debugging": {
    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
  },
  "nodeOptions": {
    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
  }
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/hvigorfile.ts
================================================
import { appTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
  },
  "packages": {
    "@ohos/hypium@1.0.19": {
      "name": "@ohos/hypium",
      "version": "1.0.19",
      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
      "registryType": "ohpm"
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxStreamingAsr/oh-package.json5
================================================
{
  "modelVersion": "5.0.0",
  "description": "Please describe the basic information.",
  "dependencies": {
  },
  "devDependencies": {
    "@ohos/hypium": "1.0.19"
  }
}


================================================
FILE: harmony-os/SherpaOnnxTts/.gitignore
================================================
/node_modules
/oh_modules
/local.properties
/.idea
**/build
/.hvigor
.cxx
/.clangd
/.clang-format
/.clang-tidy
**/.test
/.appanalyzer

================================================
FILE: harmony-os/SherpaOnnxTts/AppScope/app.json5
================================================
{
  "app": {
    "bundleName": "com.k2fsa.sherpa.onnx.tts",
    "vendor": "next-gen Kaldi",
    "versionCode": 1000000,
    "versionName": "1.0.0",
    "icon": "$media:app_icon",
    "label": "$string:app_name"
  }
}


================================================
FILE: harmony-os/SherpaOnnxTts/AppScope/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "app_name",
      "value": "SherpaOnnxTts"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxTts/README.md
================================================
# Introduction

Please see
https://k2-fsa.github.io/sherpa/onnx/harmony-os/tts.html
for how to run code in this folder.


================================================
FILE: harmony-os/SherpaOnnxTts/build-profile.json5
================================================
{
  "app": {
    "signingConfigs": [],
    "products": [
      {
        "name": "default",
        "signingConfig": "default",
        "compatibleSdkVersion": "4.0.0(10)",
        "runtimeOS": "HarmonyOS",
        "buildOption": {
          "strictMode": {
            "caseSensitiveCheck": true,
          }
        }
      }
    ],
    "buildModeSet": [
      {
        "name": "debug",
      },
      {
        "name": "release"
      }
    ]
  },
  "modules": [
    {
      "name": "entry",
      "srcPath": "./entry",
      "targets": [
        {
          "name": "default",
          "applyToProducts": [
            "default"
          ]
        }
      ]
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxTts/code-linter.json5
================================================
{
  "files": [
    "**/*.ets"
  ],
  "ignore": [
    "**/src/ohosTest/**/*",
    "**/src/test/**/*",
    "**/src/mock/**/*",
    "**/node_modules/**/*",
    "**/oh_modules/**/*",
    "**/build/**/*",
    "**/.preview/**/*"
  ],
  "ruleSet": [
    "plugin:@performance/recommended",
    "plugin:@typescript-eslint/recommended"
  ],
  "rules": {
  }
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/.gitignore
================================================
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test

================================================
FILE: harmony-os/SherpaOnnxTts/entry/build-profile.json5
================================================
{
  "apiType": "stageMode",
  "buildOption": {
    "sourceOption": {
      "workers": [
        "./src/main/ets/workers/NonStreamingTtsWorker.ets"
      ]
    }
  },
  "buildOptionSet": [
    {
      "name": "release",
      "arkOptions": {
        "obfuscation": {
          "ruleOptions": {
            "enable": false,
            "files": [
              "./obfuscation-rules.txt"
            ]
          }
        }
      }
    },
  ],
  "targets": [
    {
      "name": "default"
    },
    {
      "name": "ohosTest",
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/hvigorfile.ts
================================================
import { hapTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt
================================================
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5

# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file

# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope

-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation

================================================
FILE: harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
    "sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32"
  },
  "packages": {
    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
      "name": "libsherpa_onnx.so",
      "version": "1.0.0",
      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
      "registryType": "local"
    },
    "sherpa_onnx@1.10.32": {
      "name": "sherpa_onnx",
      "version": "1.10.32",
      "integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har",
      "registryType": "ohpm",
      "dependencies": {
        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
      }
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/oh-package.json5
================================================
{
  "name": "entry",
  "version": "1.0.0",
  "description": "Please describe the basic information.",
  "main": "",
  "author": "",
  "license": "",
  "dependencies": {
    "sherpa_onnx": "1.12.31",
  }
}


================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/ets/entryability/EntryAbility.ets
================================================
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
import hilog from '@ohos.hilog';
import UIAbility from '@ohos.app.ability.UIAbility';
import Want from '@ohos.app.ability.Want';
import window from '@ohos.window';

export default class EntryAbility extends UIAbility {
  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
  }

  onDestroy(): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
  }

  onWindowStageCreate(windowStage: window.WindowStage): void {
    // Main window is created, set main page for this ability
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');

    windowStage.loadContent('pages/Index', (err) => {
      if (err.code) {
        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
        return;
      }
      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
    });
  }

  onWindowStageDestroy(): void {
    // Main window is destroyed, release UI related resources
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
  }

  onForeground(): void {
    // Ability has brought to foreground
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
  }

  onBackground(): void {
    // Ability has back to background
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
  }
}


================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
================================================
import hilog from '@ohos.hilog';
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';

export default class EntryBackupAbility extends BackupExtensionAbility {
  async onBackup() {
    hilog.info(0x0000, 'testTag', 'onBackup ok');
  }

  async onRestore(bundleVersion: BundleVersion) {
    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
  }
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets
================================================
import { CircularBuffer } from 'sherpa_onnx';
import worker, { MessageEvents } from '@ohos.worker';
import { audio } from '@kit.AudioKit';
import picker from '@ohos.file.picker';
import fs from '@ohos.file.fs';
import systemTime from '@ohos.systemTime';


function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
  const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);

  const header = new ArrayBuffer(44);
  const view = new DataView(header);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true); // chunkID
  view.setUint32(4, 36 + samples.length * 2, true); // chunkSize //                   E V A W
  view.setUint32(8, 0x45564157, true); // format // //                      t m f
  view.setUint32(12, 0x20746d66, true); // subchunk1ID
  view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  view.setUint16(22, 1, true); // numChannels: 1 channel
  view.setUint32(24, sampleRate, true); // sampleRate
  view.setUint32(28, sampleRate * 2, true); // byteRate
  view.setUint16(32, 2, true); // blockAlign
  view.setUint16(34, 16, true); // bitsPerSample
  view.setUint32(36, 0x61746164, true); // Subchunk2ID
  view.setUint32(40, samples.length * 2, true); // subchunk2Size

  fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
  fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });

  fs.closeSync(fp.fd);
}

function toInt16Samples(samples: Float32Array): Int16Array {
  const int16Samples = new Int16Array(samples.length);
  for (let i = 0; i < samples.length; ++i) {
    let s = samples[i] * 32767;
    s = s > 32767 ? 32767 : s;
    s = s < -32768 ? -32768 : s;
    int16Samples[i] = s;
  }

  return int16Samples;
}


@Entry
@Component
struct Index {
  @State currentIndex: number = 0;
  @State title: string = 'Next-gen Kaldi: Text-to-speech';
  @State info: string = '';
  @State btnStartCaption: string = 'Start';
  @State btnStartEnabled: boolean = false;
  @State btnStopCaption: string = 'Stop';
  @State btnStopEnabled: boolean = false;
  @State btnSaveCaption: string = 'Save';
  @State btnSaveEnabled: boolean = false;
  @State progress: number = 0;
  @State sid: string = '0';
  @State speechSpeed: string = '1.0';
  @State isGenerating: boolean = false;
  @State initTtsDone: boolean = false;
  @State ttsGeneratedDone: boolean = true;
  @State numSpeakers: number = 1;
  @State numThreads: number = 1;
  @State initAudioDone: boolean = false;
  private controller: TabsController = new TabsController();
  private cancelled: boolean = false;
  private sampleRate: number = 0;
  private startTime: number = 0;
  private stopTime: number = 0;
  private inputText: string = '';
  // it specifies only the initial capacity.
  private workerInstance?: worker.ThreadWorker
  private readonly scriptURL: string = 'entry/ets/workers/NonStreamingTtsWorker.ets'
  // note that circular buffer can automatically resize.
  private sampleBuffer: CircularBuffer = new CircularBuffer(16000 * 5);
  private finalSamples: Float32Array | null = null;
  private audioRenderer: audio.AudioRenderer | null = null;

  initAudioRenderer() {
    if (this.audioRenderer) {
      console.log(`Audio renderer has already been created. Skip creating`);
      return;
    } // see // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/using-audiorenderer-for-playback-V5
    console.log('Initializing audio renderer');
    const audioStreamInfo: audio.AudioStreamInfo = {
      samplingRate: this.sampleRate,
      channels: audio.AudioChannel.CHANNEL_1, // 通道
      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW
    };

    const audioRendererInfo: audio.AudioRendererInfo = {
      usage: audio.StreamUsage.STREAM_USAGE_MUSIC, rendererFlags: 0
    };

    const audioRendererOptions: audio.AudioRendererOptions = {
      streamInfo: audioStreamInfo, rendererInfo: audioRendererInfo
    };

    audio.createAudioRenderer(audioRendererOptions, (err, renderer) => {
      if (!err) {
        console.log('audio renderer initialized successfully');
        this.initAudioDone = true;
        if (renderer) {
          this.audioRenderer = renderer;
          this.audioRenderer.on("writeData", this.audioPlayCallback);
          if (this.sampleBuffer.size()) {
            this.audioRenderer.start();
          }
        } else {
          console.log(`returned audio renderer is ${renderer}`);
        }
      } else {
        console.log(`Failed to initialize audio renderer. error message: ${err.message}, error code: ${err.code}`);
      }
    });
  }

  async aboutToAppear() {
    this.initAudioRenderer();

    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
      name: 'NonStreaming TTS worker'
    });
    this.workerInstance.onmessage = (e: MessageEvents) => {
      const msgType = e.data['msgType'] as string;
      console.log(`received msg from worker: ${msgType}`);

      if (msgType == 'init-tts-done') {
        this.info = 'Model initialized!\nPlease enter text and press start.';
        this.sampleRate = e.data['sampleRate'] as number;
        this.numSpeakers = e.data['numSpeakers'] as number;
        this.numThreads = e.data['numThreads'] as number;

        this.initTtsDone = true;
      }

      if (msgType == 'tts-generate-partial') {
        if (this.cancelled) {
          return;
        }

        const samples: Float32Array = e.data['samples'] as Float32Array;
        const progress: number = e.data['progress'] as number;
        this.progress = progress;

        this.sampleBuffer.push(samples);

        if (!this.initAudioDone) {
          this.initAudioRenderer();
        }

        if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING) {
          this.audioRenderer.start();
        }
      }

      if (msgType == 'tts-generate-done') {
        this.isGenerating = false;
        const samples: Float32Array = e.data['samples'] as Float32Array;

        systemTime.getRealTime((err, data) => {

          if (err) {
            console.log(`Failed to get stop time`)
          } else {
            this.stopTime = data;

            const audioDuration = samples.length / this.sampleRate;
            const elapsedSeconds = (this.stopTime - this.startTime) / 1000;
            const RTF = elapsedSeconds / audioDuration;

            this.info = `Audio duration: ${audioDuration} s
Elapsed: ${elapsedSeconds} s
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
Number of threads: ${this.numThreads}
`;
            if (this.cancelled) {
              this.info += '\nCancelled.';
            }
          }
        });

        this.finalSamples = samples;
        this.ttsGeneratedDone = true;
        this.btnSaveEnabled = true;

        this.ttsGeneratedDone = true;

        if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING &&
          this.sampleBuffer.size() == 0) {
          this.sampleBuffer.push(samples);
          this.progress = 1;
          this.audioRenderer.start();
        }

        if (!this.initAudioDone) {
          this.btnStartEnabled = true;
          this.btnStopEnabled = false;
          this.info += '\nAudio renderer is not initialized. Disable playing audio.';
        }
      }
    }

    this.info = 'Initializing TTS model ...';
    this.workerInstance.postMessage({ msgType: 'init-tts', context: getContext() });
  }

  @Builder
  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
    Column() {
      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
      this.currentIndex = targetIndex;
      this.controller.changeIndex(this.currentIndex);
    })
  }

  build() {
    Column() {
      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
            if (this.numSpeakers > 1) {
              Row({ space: 10 }) {
                Text(`Speaker ID (0-${this.numSpeakers - 1})`).width('60%')

                TextInput({ text: this.sid }).onChange((text) => {
                  this.sid = text.trim();
                }).width('20%')
              }.justifyContent(FlexAlign.Center)
            }

            Row() {
              Text('Speech speed').width('60%');

              TextInput({ text: this.speechSpeed }).onChange((text) => {
                this.speechSpeed = text.trim();
              }).width('20%')
            }

            Row({ space: 10 }) {
              Button(this.btnStartCaption).enabled(this.btnStartEnabled).onClick(async () => {
                let sid = parseInt(this.sid);
                if (sid.toString() != this.sid) {
                  this.info = 'Please input a valid speaker ID';
                  return;
                }

                let speed = parseFloat(this.speechSpeed);
                if (isNaN(speed)) {
                  this.info = 'Please enter a valid speech speed';
                  return;
                }

                if (speed <= 0) {
                  this.info = 'Please enter a positive speech speed';
                  return;
                }

                if (this.workerInstance && this.initTtsDone) {
                  this.info = 'Generating...';
                  this.cancelled = false;
                  this.finalSamples = null;
                  this.sampleBuffer.reset();
                  this.ttsGeneratedDone = false;
                  this.progress = 0;

                  this.btnStartEnabled = false;
                  this.btnStopEnabled = true;
                  this.btnSaveEnabled = false;
                  console.log(`sending ${this.inputText}`)
                  this.ttsGeneratedDone = false;
                  this.startTime = await systemTime.getRealTime();
                  this.workerInstance?.postMessage({
                    msgType: 'tts-generate',
                    text: this.inputText,
                    sid: sid,
                    speed: speed,
                  });
                  this.isGenerating = true;
                  this.info = '';
                } else {
                  this.info = 'Failed to initialize tts model';
                  this.btnStartEnabled = false;
                }
              });

              Button(this.btnStopCaption).enabled(this.btnStopEnabled).onClick(() => {
                this.ttsGeneratedDone = true;
                this.btnStartEnabled = true;
                this.btnStopEnabled = false;
                this.sampleBuffer.reset();
                this.cancelled = true;
                this.isGenerating = false;

                if (this.workerInstance && this.initTtsDone) {
                  this.workerInstance.postMessage({ msgType: 'tts-generate-cancel' });
                }
                this.audioRenderer?.stop();
              })

              Button(this.btnSaveCaption).enabled(this.btnSaveEnabled).onClick(() => {
                if (!this.finalSamples || this.finalSamples.length == 0) {

                  this.btnSaveEnabled = false;
                  return;
                }

                let uri: string = '';

                const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];

                const audioViewPicker = new picker.AudioViewPicker();

                audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
                  uri = audioSelectResult[0];
                  if (this.finalSamples) {
                    savePcmToWav(uri, toInt16Samples(this.finalSamples), this.sampleRate);
                    console.log(`Saved to ${uri}`);
                    this.info += `\nSaved to ${uri}`;
                  }
                });
              });
            }

            if (this.info != '') {
              TextArea({ text: this.info }).focusable(false);
            }
            if (this.progress > 0) {
              Row() {
                Progress({ value: 0, total: 100, type: ProgressType.Capsule })
                  .width('80%')
                  .height(20)
                  .value(this.progress * 100);

                Text(`${(this.progress * 100).toFixed(2)}%`).width('15%')
              }.width('100%').justifyContent(FlexAlign.Center)
            }

            TextArea({ placeholder: 'Input text for TTS and click the start button' })
              .width('100%')
              .height('100%')
              .focusable(this.isGenerating == false && this.initTtsDone)
              .onChange((text) => {
                this.inputText = text;
                if (text.trim() == '') {
                  this.btnStartEnabled = false;
                  return;
                }
                this.btnStartEnabled = true;
              })
          }.width('100%')

          // see https://composeicons.com/
        }.tabBar(this.TabBuilder('TTS', 0, $r('app.media.home'), $r('app.media.home')))

        TabContent() {
          Column({space: 10}) {
            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
            TextArea({text: `
Everyting is open-sourced.

It runs locally, without accessing the network

See also https://github.com/k2-fsa/sherpa-onnx

新一代 Kaldi QQ 和微信交流群: 请看

https://k2-fsa.github.io/sherpa/social-groups.html

微信公众号: 新一代 Kaldi
            `}).width('100%')
              .height('100%')
              .focusable(false)
          }.justifyContent(FlexAlign.Start)
        }.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info')))
      }.scrollable(false)
    }
  }

  private audioPlayCallback = (buffer: ArrayBuffer) => {
    const numSamples = buffer.byteLength / 2;
    if (this.sampleBuffer.size() >= numSamples) {
      const samples: Float32Array = this.sampleBuffer.get(this.sampleBuffer.head(), numSamples);

      const int16Samples = new Int16Array(buffer);
      for (let i = 0; i < numSamples; ++i) {
        let s = samples[i] * 32767;
        s = s > 32767 ? 32767 : s;
        s = s < -32768 ? -32768 : s;
        int16Samples[i] = s;
      }
      this.sampleBuffer.pop(numSamples);
    } else {
      (new Int16Array(buffer)).fill(0);
      if (this.ttsGeneratedDone) {
        this.audioRenderer?.stop();
        this.btnStartEnabled = true;
        this.btnStopEnabled = false;
      }
    }
  };
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets
================================================
import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';

import { fileIo as fs } from '@kit.CoreFileKit';

import { OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput } from 'sherpa_onnx';
import { buffer } from '@kit.ArkTS';

const workerPort: ThreadWorkerGlobalScope = worker.workerPort;

let tts: OfflineTts;
let cancelled = false;

function mkdir(context: Context, parts: string[]) {
  const path = parts.join('/');
  if (fs.accessSync(path)) {
    return;
  }

  const sandboxPath: string = context.getApplicationContext().filesDir;
  let d = sandboxPath
  for (const p of parts) {
    d = d + '/' + p;

    if (fs.accessSync(d)) {
      continue;
    }

    fs.mkdirSync(d);
  }
}

function copyRawFileDirToSandbox(context: Context, srcDir: string) {
  let mgr = context.resourceManager;
  const allFiles: string[] = listRawfileDir(mgr, srcDir);
  for (const src of allFiles) {
    const parts: string[] = src.split('/');
    if (parts.length != 1) {
      mkdir(context, parts.slice(0, -1));
    }

    copyRawFileToSandbox(context, src, src);
  }
}

function copyRawFileToSandbox(context: Context, src: string,
  dst: string) {
  /* see
   https://blog.csdn.net/weixin_44640245/article/details/142634846
   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
   */
  let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);

  // https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
  let sandboxPath: string = context.getApplicationContext().filesDir;
  let filepath = sandboxPath + '/' + dst;

  if (fs.accessSync(filepath)) {
    /* if the destination exists and has the expected file size
       then we skip copying it
     */
    let stat = fs.statSync(filepath);
    if (stat.size == uint8Array.length) {
      return;
    }
  }

  const fp = fs.openSync(filepath, fs.OpenMode.WRITE_ONLY | fs.OpenMode.CREATE | fs.OpenMode.TRUNC);
  fs.writeSync(fp.fd, buffer.from(uint8Array).buffer)
  fs.close(fp.fd);
}

function initTts(context: Context): OfflineTts {
  /* Such a design is to make it easier to build flutter APPs with
     github actions for a variety of tts models

     See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
     for details
   */

  let modelDir = '';

  // for VITS begin
  let modelName = '';
  // for VITS end

  // for Matcha begin
  let acousticModelName = '';
  let vocoder = '';
  // for Matcha end

  // for Kokoro begin
  let voices = '';
  // for Kokoro end

  let ruleFsts = '';
  let ruleFars = '';
  let lexicon = '';
  let dataDir = '';
  /*
    You can select an example below and change it according to match your
    selected tts model
   */

  // ============================================================
  // Your change starts here
  // ============================================================

  // Example 1:
  // modelDir = 'vits-vctk';
  // modelName = 'vits-vctk.onnx';
  // lexicon = 'lexicon.txt';

  // Example 2:
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  // modelDir = 'vits-piper-en_US-amy-low';
  // modelName = 'en_US-amy-low.onnx';
  // dataDir = 'espeak-ng-data';

  // Example 3:
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  // modelDir = 'vits-icefall-zh-aishell3';
  // modelName = 'model.onnx';
  // ruleFsts = 'phone.fst,date.fst,number.fst,new_heteronym.fst';
  // ruleFars = 'rule.far';
  // lexicon = 'lexicon.txt';

  // Example 4:
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
  // modelDir = 'vits-zh-hf-fanchen-C';
  // modelName = 'vits-zh-hf-fanchen-C.onnx';
  // lexicon = 'lexicon.txt';

  // Example 5:
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
  // modelDir = 'vits-coqui-de-css10';
  // modelName = 'model.onnx';

  // Example 6
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  // modelDir = 'vits-piper-en_US-libritts_r-medium';
  // modelName = 'en_US-libritts_r-medium.onnx';
  // dataDir = 'espeak-ng-data';

  // Example 7
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
  // modelDir = 'vits-melo-tts-zh_en';
  // modelName = 'model.onnx';
  // lexicon = 'lexicon.txt';
  // ruleFsts = `date.fst,phone.fst,number.fst`;

  // Example 8
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
  // modelDir = 'matcha-icefall-zh-baker';
  // acousticModelName = 'model-steps-3.onnx';
  // vocoder = 'vocos-22khz-univ.onnx';
  // lexicon = 'lexicon.txt';
  // ruleFsts = `date.fst,phone.fst,number.fst`;

  // Example 9
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
  // modelDir = 'matcha-icefall-en_US-ljspeech';
  // acousticModelName = 'model-steps-3.onnx';
  // vocoder = 'vocos-22khz-univ.onnx';
  // dataDir = 'espeak-ng-data';

  // Example 10
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
  // modelDir = 'kokoro-en-v0_19';
  // modelName = 'model.onnx';
  // voices = 'voices.bin'
  // dataDir = 'espeak-ng-data';

  // Example 11
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
  // modelDir = 'kokoro-multi-lang-v1_0';
  // modelName = 'model.onnx';
  // voices = 'voices.bin'
  // dataDir = 'espeak-ng-data';
  // lexicon = 'lexicon-us-en.txt,lexicon-zh.txt';
  // ruleFsts = `date-zh.fst,phone-zh.fst,number-zh.fst`;

  // ============================================================
  // Please don't change the remaining part of this function
  // ============================================================

  if (modelName == '' && acousticModelName == '' && vocoder == '') {
    throw new Error('You are supposed to select a model by changing the code before you run the app');
  }

  if (modelName != '' && acousticModelName != '') {
    throw new Error('Please select either VITS or Matcha, not both');
  }

  if (acousticModelName != '' && vocoder == '') {
    throw new Error('Please provider vocoder for matcha tts models');
  }

  if (modelName != '') {
    modelName = modelDir + '/' + modelName;
  }

  if (acousticModelName != '') {
    acousticModelName = modelDir + '/' + acousticModelName;
  }

  if (voices != '') {
    voices = modelDir + '/' + voices;
  }

  if (ruleFsts != '') {
    let fsts = ruleFsts.split(',')
    let tmp: string[] = [];
    for (const f of fsts) {
      tmp.push(modelDir + '/' + f);
    }
    ruleFsts = tmp.join(',');
  }

  if (ruleFars != '') {
    let fars = ruleFars.split(',')
    let tmp: string[] = [];
    for (const f of fars) {
      tmp.push(modelDir + '/' + f);
    }
    ruleFars = tmp.join(',');
  }

  if (lexicon.includes(",")) {
    let v = lexicon.split(',')
    let tmp: string[] = [];
    for (const f of v) {
      tmp.push(modelDir + '/' + f);
    }
    lexicon = tmp.join(',');
  } else if (lexicon != '') {
    lexicon = modelDir + '/' + lexicon;
  }

  if (dataDir != '') {
    copyRawFileDirToSandbox(context, modelDir + '/' + dataDir)
    let sandboxPath: string = context.getApplicationContext().filesDir;
    dataDir = sandboxPath + '/' + modelDir + '/' + dataDir;
  }

  const tokens = modelDir + '/tokens.txt';

  const config: OfflineTtsConfig = new OfflineTtsConfig();
  if (voices != '') {
    config.model.vits.model = '';
  } else {
    config.model.vits.model = modelName;
  }

  if (voices == '') {
    config.model.vits.lexicon = lexicon;
    config.model.vits.tokens = tokens;
    config.model.vits.dataDir = dataDir;

    config.model.matcha.acousticModel = acousticModelName;
    config.model.matcha.vocoder = vocoder;
    config.model.matcha.lexicon = lexicon;
    config.model.matcha.tokens = tokens;
    config.model.matcha.dataDir = dataDir;
  }

  if (voices != '') {
    config.model.kokoro.model = modelName;
  } else {
    config.model.kokoro.model = '';
  }

  if (voices != '') {
    config.model.kokoro.voices = voices;
    config.model.kokoro.tokens = tokens;
    config.model.kokoro.dataDir = dataDir;
    config.model.kokoro.lexicon = lexicon;
  }

  config.model.numThreads = 2;
  config.model.debug = true;
  config.ruleFsts = ruleFsts;
  config.ruleFars = ruleFars;

  return new OfflineTts(config, context.resourceManager);
}

interface TtsCallbackData {
  samples: Float32Array;
  progress: number;
}

function callback(data: TtsCallbackData): number {
  workerPort.postMessage({
    'msgType': 'tts-generate-partial', samples: Float32Array.from(data.samples), progress: data.progress,
  });

  // 0 means to stop generating in C++
  // 1 means to continue generating in C++
  return cancelled ? 0 : 1;
}

/**
 * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessage = (e: MessageEvents) => {
  const msgType = e.data['msgType'] as string;
  console.log(`msg-type: ${msgType}`);
  if (msgType == 'init-tts' && !tts) {
    const context = e.data['context'] as Context;
    tts = initTts(context);
    workerPort.postMessage({
      'msgType': 'init-tts-done',
      sampleRate: tts.sampleRate,
      numSpeakers: tts.numSpeakers,
      numThreads: tts.config.model.numThreads,
    });
  }

  if (msgType == 'tts-generate-cancel') {
    cancelled = true;
  }

  if (msgType == 'tts-generate') {
    const text = e.data['text'] as string;
    console.log(`recevied text ${text}`);
    const input: TtsInput = new TtsInput();
    input.text = text;
    input.sid = e.data['sid'] as number;
    input.speed = e.data['speed'] as number;
    input.callback = callback;

    cancelled = false;
    if (true) {
      tts.generateAsync(input).then((ttsOutput: TtsOutput) => {
        console.log(`sampleRate: ${ttsOutput.sampleRate}`);

        workerPort.postMessage({
          'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
        });

      });
    } else {
      const ttsOutput: TtsOutput = tts.generate(input);
      workerPort.postMessage({
        'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
      });
    }


  }
}

/**
 * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessageerror = (e: MessageEvents) => {
}

/**
 * Defines the event handler to be called when an exception occurs during worker execution.
 * The event handler is executed in the worker thread.
 *
 * @param e error message
 */
workerPort.onerror = (e: ErrorEvent) => {
}


================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/module.json5
================================================
{
  "module": {
    "name": "entry",
    "type": "entry",
    "description": "$string:module_desc",
    "mainElement": "EntryAbility",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false,
    "pages": "$profile:main_pages",
    "abilities": [
      {
        "name": "EntryAbility",
        "srcEntry": "./ets/entryability/EntryAbility.ets",
        "description": "$string:EntryAbility_desc",
        "icon": "$media:layered_image",
        "label": "$string:EntryAbility_label",
        "startWindowIcon": "$media:startIcon",
        "startWindowBackground": "$color:start_window_background",
        "exported": true,
        "skills": [
          {
            "entities": [
              "entity.system.home"
            ],
            "actions": [
              "action.system.home"
            ]
          }
        ]
      }
    ],
    "extensionAbilities": [
      {
        "name": "EntryBackupAbility",
        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
        "type": "backup",
        "exported": false,
        "metadata": [
          {
            "name": "ohos.extension.backup",
            "resource": "$profile:backup_config"
          }
        ],
      }
    ]
  }
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/color.json
================================================
{
  "color": [
    {
      "name": "start_window_background",
      "value": "#FFFFFF"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device text-to-speech with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device text-to-speech with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "TTS"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/resources/base/media/layered_image.json
================================================
{
  "layered-image":
  {
    "background" : "$media:background",
    "foreground" : "$media:foreground"
  }
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/backup_config.json
================================================
{
  "allowToBackupRestore": true
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/resources/base/profile/main_pages.json
================================================
{
  "src": [
    "pages/Index"
  ]
}


================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/resources/en_US/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device text-to-speech with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device text-to-speech with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "TTS"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/resources/rawfile/.gitkeep
================================================


================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/main/resources/zh_CN/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "使用新一代Kaldi进行本地离线语音合成"
    },
    {
      "name": "EntryAbility_desc",
      "value": "使用新一代Kaldi进行本地离线语音合成"
    },
    {
      "name": "EntryAbility_label",
      "value": "本地语音合成"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/Ability.test.ets
================================================
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function abilityTest() {
  describe('ActsAbilityTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    })
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    })
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    })
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    })
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    })
  })
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/ohosTest/ets/test/List.test.ets
================================================
import abilityTest from './Ability.test';

export default function testsuite() {
  abilityTest();
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5
================================================
{
  "module": {
    "name": "entry_test",
    "type": "feature",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false
  }
}


================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets
================================================
import localUnitTest from './LocalUnit.test';

export default function testsuite() {
  localUnitTest();
}

================================================
FILE: harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets
================================================
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function localUnitTest() {
  describe('localUnitTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    });
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    });
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    });
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    });
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    });
  });
}

================================================
FILE: harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5
================================================
{
  "modelVersion": "5.0.0",
  "dependencies": {
  },
  "execution": {
    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
  },
  "logging": {
    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
  },
  "debugging": {
    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
  },
  "nodeOptions": {
    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
  }
}


================================================
FILE: harmony-os/SherpaOnnxTts/hvigorfile.ts
================================================
import { appTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxTts/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
  },
  "packages": {
    "@ohos/hypium@1.0.19": {
      "name": "@ohos/hypium",
      "version": "1.0.19",
      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
      "registryType": "ohpm"
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxTts/oh-package.json5
================================================
{
  "modelVersion": "5.0.0",
  "description": "Please describe the basic information.",
  "dependencies": {
  },
  "devDependencies": {
    "@ohos/hypium": "1.0.19"
  }
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/.gitignore
================================================
/node_modules
/oh_modules
/local.properties
/.idea
**/build
/.hvigor
.cxx
/.clangd
/.clang-format
/.clang-tidy
**/.test
/.appanalyzer

================================================
FILE: harmony-os/SherpaOnnxVadAsr/AppScope/app.json5
================================================
{
  "app": {
    "bundleName": "com.k2fsa.sherpa.onnx.vad.asr",
    "vendor": "example",
    "versionCode": 1000000,
    "versionName": "1.0.0",
    "icon": "$media:app_icon",
    "label": "$string:app_name"
  }
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/AppScope/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "app_name",
      "value": "SherpaOnnxVadAsr"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/README.md
================================================
# Introduction

Please see
https://k2-fsa.github.io/sherpa/onnx/harmony-os/vad-asr.html
for how to run code in this folder.


================================================
FILE: harmony-os/SherpaOnnxVadAsr/build-profile.json5
================================================
{
  "app": {
    "signingConfigs": [],
    "products": [
      {
        "name": "default",
        "signingConfig": "default",
        "compatibleSdkVersion": "4.0.0(10)",
        "runtimeOS": "HarmonyOS",
        "buildOption": {
          "strictMode": {
            "caseSensitiveCheck": true,
          }
        }
      }
    ],
    "buildModeSet": [
      {
        "name": "debug",
      },
      {
        "name": "release"
      }
    ]
  },
  "modules": [
    {
      "name": "entry",
      "srcPath": "./entry",
      "targets": [
        {
          "name": "default",
          "applyToProducts": [
            "default"
          ]
        }
      ]
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/code-linter.json5
================================================
{
  "files": [
    "**/*.ets"
  ],
  "ignore": [
    "**/src/ohosTest/**/*",
    "**/src/test/**/*",
    "**/src/mock/**/*",
    "**/node_modules/**/*",
    "**/oh_modules/**/*",
    "**/build/**/*",
    "**/.preview/**/*"
  ],
  "ruleSet": [
    "plugin:@performance/recommended",
    "plugin:@typescript-eslint/recommended"
  ],
  "rules": {
  }
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/.gitignore
================================================
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test
*.har


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/README.md
================================================
# Introduction

Please download ./sherpa_onnx-v1.12.31.har
from <https://huggingface.co/csukuangfj/sherpa-onnx-harmony-os/tree/main/har>

Hint: For users who have no access to huggingface, please use
<https://hf-mirror.com/csukuangfj/sherpa-onnx-harmony-os/tree/main/har>


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/build-profile.json5
================================================
{
  "apiType": "stageMode",
  "buildOption": {
    "sourceOption": {
      "workers": [
        './src/main/ets/workers/NonStreamingAsrWithVadWorker.ets'
      ]
    }
  },
  "buildOptionSet": [
    {
      "name": "release",
      "arkOptions": {
        "obfuscation": {
          "ruleOptions": {
            "enable": false,
            "files": [
              "./obfuscation-rules.txt"
            ]
          }
        }
      }
    },
  ],
  "targets": [
    {
      "name": "default"
    },
    {
      "name": "ohosTest",
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/hvigorfile.ts
================================================
import { hapTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: hapTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/obfuscation-rules.txt
================================================
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
#   https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5

# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file

# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope

-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
    "sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32"
  },
  "packages": {
    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
      "name": "libsherpa_onnx.so",
      "version": "1.0.0",
      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
      "registryType": "local"
    },
    "sherpa_onnx@1.10.32": {
      "name": "sherpa_onnx",
      "version": "1.10.32",
      "integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har",
      "registryType": "ohpm",
      "dependencies": {
        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
      }
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/oh-package.json5
================================================
{
  "name": "entry",
  "version": "1.0.0",
  "description": "Please describe the basic information.",
  "main": "",
  "author": "",
  "license": "",
  "dependencies": {
    // please see https://ohpm.openharmony.cn/#/cn/detail/sherpa_onnx
    "sherpa_onnx": "1.12.31",
  }
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/entryability/EntryAbility.ets
================================================
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
import hilog from '@ohos.hilog';
import UIAbility from '@ohos.app.ability.UIAbility';
import Want from '@ohos.app.ability.Want';
import window from '@ohos.window';

export default class EntryAbility extends UIAbility {
  onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
  }

  onDestroy(): void {
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
  }

  onWindowStageCreate(windowStage: window.WindowStage): void {
    // Main window is created, set main page for this ability
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');

    windowStage.loadContent('pages/Index', (err) => {
      if (err.code) {
        hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
        return;
      }
      hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
    });
  }

  onWindowStageDestroy(): void {
    // Main window is destroyed, release UI related resources
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
  }

  onForeground(): void {
    // Ability has brought to foreground
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
  }

  onBackground(): void {
    // Ability has back to background
    hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
  }
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/entrybackupability/EntryBackupAbility.ets
================================================
import hilog from '@ohos.hilog';
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';

export default class EntryBackupAbility extends BackupExtensionAbility {
  async onBackup() {
    hilog.info(0x0000, 'testTag', 'onBackup ok');
  }

  async onRestore(bundleVersion: BundleVersion) {
    hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
  }
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets
================================================
import { LengthUnit } from '@kit.ArkUI';
import worker, { MessageEvents } from '@ohos.worker';
import { BusinessError } from '@kit.BasicServicesKit';
import { picker } from '@kit.CoreFileKit';

import { Permissions } from '@kit.AbilityKit';
import { allAllowed, requestPermissions } from './Permission';
import { audio } from '@kit.AudioKit';


@Entry
@Component
struct Index {
  @State title: string = 'Next-gen Kaldi: VAD + ASR';
  @State currentIndex: number = 0;
  @State resultForFile: string = '';
  @State progressForFile: number = 0;
  @State selectFileBtnEnabled: boolean = false;
  @State lang: string = 'English';
  @State resultForMic: string = '';
  @State micStarted: boolean = false;
  @State message: string = 'Start recording';
  @State micInitDone: boolean = false;
  private controller: TabsController = new TabsController();
  private workerInstance?: worker.ThreadWorker
  private readonly scriptURL: string = 'entry/ets/workers/NonStreamingAsrWithVadWorker.ets'
  private mic?: audio.AudioCapturer;
  private sampleList: Float32Array[] = []

  flatten(samples: Float32Array[]): Float32Array {
    let n = 0;
    for (let i = 0; i < samples.length; ++i) {
      n += samples[i].length;
    }

    const ans: Float32Array = new Float32Array(n);
    let offset: number = 0;
    for (let i = 0; i < samples.length; ++i) {
      ans.set(samples[i], offset);
      offset += samples[i].length;
    }

    return ans;
  }

  async initMic() {
    const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
    let allowed: boolean = await allAllowed(permissions);
    if (!allowed) {
      console.log("request to access the microphone");
      const status: boolean = await requestPermissions(permissions);

      if (!status) {
        console.error('access to microphone is denied')
        this.resultForMic = "Failed to get microphone permission. Please retry";
        return;
      }

      allowed = await allAllowed(permissions);
      if (!allowed) {
        console.error('failed to get microphone permission');
        this.resultForMic = "Failed to get microphone permission. Please retry";
        return;
      }
    } else {
      console.log("allowed to access microphone");
    }

    const audioStreamInfo: audio.AudioStreamInfo = {
      samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000,
      channels: audio.AudioChannel.CHANNEL_1,
      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW,
    };

    const audioCapturerInfo: audio.AudioCapturerInfo = {
      source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
    };

    const audioCapturerOptions: audio.AudioCapturerOptions = {
      streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo

    };
    audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
      if (err) {
        console.error(`error code is ${err.code}, error message is ${err.message}`);
        this.resultForMic = 'Failed to init microphone';
      } else {
        console.info(`init mic successfully`);
        this.mic = data;
        this.mic.on('readData', this.micCallback);

        if (this.workerInstance) {
          this.workerInstance.postMessage({ msgType: 'init-vad-mic', context: getContext() });
        }
      }
    });
  }

  async aboutToAppear() {
    this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
      name: 'NonStreaming ASR worker'
    });

    this.workerInstance.onmessage = (e: MessageEvents) => {
      const msgType = e.data['msgType'] as string;
      console.log(`received msg from worker: ${msgType}`);

      if (msgType == 'init-vad-mic-done') {
        this.micInitDone = true;
      }

      if (msgType == 'init-non-streaming-asr-done') {
        this.selectFileBtnEnabled = true;
        this.resultForFile = `Initializing done.\n\nPlease select a wave file of 16kHz in language ${this.lang}`;
      }

      if (msgType == 'non-streaming-asr-vad-decode-done') {
        this.resultForFile = e.data['text'] as string + '\n';
      }

      if (msgType == 'non-streaming-asr-vad-decode-partial') {
        if (this.resultForFile == '') {
          this.resultForFile = e.data['text'] as string;
        } else {
          this.resultForFile += '\n\n' + e.data['text'] as string;
        }
      }

      if (msgType == 'non-streaming-asr-vad-decode-error') {
        this.resultForFile = e.data['text'] as string;
      }

      if (msgType == 'non-streaming-asr-vad-decode-progress') {
        this.progressForFile = e.data['progress'] as number;

        this.selectFileBtnEnabled = this.progressForFile >= 100;
      }

      if (msgType == 'non-streaming-asr-vad-mic-partial') {
        if (this.resultForMic == '') {
          this.resultForMic = e.data['text'] as string;
        } else {
          this.resultForMic += '\n\n' + e.data['text'] as string;
        }
      }

      if (msgType == 'non-streaming-asr-vad-mic-error') {
        this.resultForMic = e.data['text'] as string;
      }
    }

    const context = getContext();
    this.resultForFile = 'Initializing models';
    this.workerInstance.postMessage({ msgType: 'init-vad', context });
    this.workerInstance.postMessage({ msgType: 'init-non-streaming-asr', context });

    await this.initMic();
  }

  @Builder
  TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
    Column() {
      Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
      Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
    }.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
      this.currentIndex = targetIndex;
      this.controller.changeIndex(this.currentIndex);
    })
  }

  build() {
    Column() {
      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);

            Button('Select .wav file (16kHz) ')
              .enabled(this.selectFileBtnEnabled)
              .fontSize(13)
              .width(296)
              .height(60)
              .onClick(() => {
                this.resultForFile = '';
                this.progressForFile = 0;

                const documentSelectOptions = new picker.DocumentSelectOptions();
                documentSelectOptions.maxSelectNumber = 1;
                documentSelectOptions.fileSuffixFilters = ['.wav'];
                const documentViewPicker = new picker.DocumentViewPicker();
                documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
                  console.log(`Result: ${result}`);

                  if (!result[0]) {
                    this.resultForFile = 'Please select a file to decode';
                    this.selectFileBtnEnabled = true;
                    return;
                  }

                  if (this.workerInstance) {
                    this.workerInstance.postMessage({
                      msgType: 'non-streaming-asr-vad-decode', filename: result[0],
                    });
                  } else {
                    console.log(`this worker instance is undefined ${this.workerInstance}`);
                  }
                }).catch((err: BusinessError) => {
                  console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
                })

              })

            Text(`Supported languages: ${this.lang}`)

            if (this.progressForFile > 0) {
              Row() {
                Progress({ value: 0, total: 100, type: ProgressType.Capsule })
                  .width('80%')
                  .height(20)
                  .value(this.progressForFile);

                Text(`${this.progressForFile.toFixed(2)}%`).width('15%')
              }.width('100%').justifyContent(FlexAlign.Center)
            }

            TextArea({ text: this.resultForFile })
              .width('100%')
              .lineSpacing({ value: 10, unit: LengthUnit.VP })
              .height('100%');
          }.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
        }.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc')))

        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
            Button(this.message).enabled(this.micInitDone).onClick(() => {
              console.log('clicked mic button');
              this.resultForMic = '';
              if (this.mic) {
                if (this.micStarted) {
                  this.mic.stop();
                  this.message = "Start recording";
                  this.micStarted = false;
                  console.log('mic stopped');

                  const samples = this.flatten(this.sampleList);
                  let s = 0;
                  for (let i = 0; i < samples.length; ++i) {
                    s += samples[i];
                  }
                  console.log(`samples ${samples.length}, sum: ${s}`);

                  if (this.workerInstance) {
                    console.log('decode mic');
                    this.workerInstance.postMessage({
                      msgType: 'non-streaming-asr-vad-mic', samples,
                    });
                  } else {
                    console.log(`this worker instance is undefined ${this.workerInstance}`);
                  }
                } else {
                  this.sampleList = [];
                  this.mic.start();
                  this.message = "Stop recording";
                  this.micStarted = true;
                  console.log('mic started');
                }
              }
            });

            Text(`Supported languages: ${this.lang}`)

            TextArea({ text: this.resultForMic })
              .width('100%')
              .lineSpacing({ value: 10, unit: LengthUnit.VP })
              .width('100%')
              .height('100%');
          }.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
        }
        .tabBar(this.TabBuilder('From mic', 1, $r('app.media.icon_mic'),
          $r('app.media.icon_mic')))

        TabContent() {
          Column({ space: 10 }) {
            Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
            TextArea({
              text: `
Everyting is open-sourced.

It runs locally, without accessing the network

See also https://github.com/k2-fsa/sherpa-onnx

新一代 Kaldi QQ 和微信交流群: 请看

https://k2-fsa.github.io/sherpa/social-groups.html

微信公众号: 新一代 Kaldi
            `
            }).width('100%').height('100%').focusable(false)
          }.justifyContent(FlexAlign.Start)
        }.tabBar(this.TabBuilder('Help', 2, $r('app.media.info'), $r('app.media.info')))

      }.scrollable(false)
    }.width('100%').justifyContent(FlexAlign.Start)
  }

  private micCallback = (buffer: ArrayBuffer) => {
    const view: Int16Array = new Int16Array(buffer);

    const samplesFloat: Float32Array = new Float32Array(view.length);
    for (let i = 0; i < view.length; ++i) {
      samplesFloat[i] = view[i] / 32768.0;
    }
    this.sampleList.push(samplesFloat);
  }
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets
================================================
// Please keep in sync with
// https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/kotlin-api/OfflineRecognizer.kt#L184

import { OfflineModelConfig } from 'sherpa_onnx';

export function getOfflineModelConfig(type: number): OfflineModelConfig {
  const c: OfflineModelConfig = new OfflineModelConfig();
  switch (type) {
    case 0: {
      const modelDir = 'sherpa-onnx-paraformer-zh-2023-09-14'
      c.paraformer.model = `${modelDir}/model.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "paraformer";

      break;
    }

    case 1: {
      const modelDir = 'icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04'
      c.transducer.encoder = `$modelDir}/encoder-epoch-30-avg-4.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder-epoch-30-avg-4.onnx`;
      c.transducer.encoder = `${modelDir}/joiner-epoch-30-avg-4.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    case 2: {
      const modelDir = 'sherpa-onnx-whisper-tiny.en';
      c.whisper.encoder = `${modelDir}/tiny.en-encoder.int8.onnx`;
      c.whisper.decoder = `${modelDir}/tiny.en-decoder.int8.onnx`;
      c.tokens = `${modelDir}/tiny.en-tokens.txt`;
      c.modelType = "whisper";

      break;
    }

    case 3: {
      const modelDir = 'sherpa-onnx-whisper-base.en';
      c.whisper.encoder = `${modelDir}/base.en-encoder.int8.onnx`;
      c.whisper.decoder = `${modelDir}/base.en-decoder.int8.onnx`;
      c.tokens = `${modelDir}/base.en-tokens.txt`;
      c.modelType = "whisper";

      break;
    }

    case 4: {
      const modelDir = "icefall-asr-zipformer-wenetspeech-20230615";
      c.transducer.encoder = `${modelDir}/encoder-epoch-12-avg-4.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder-epoch-12-avg-4.onnx`;
      c.transducer.joiner = `${modelDir}/joiner-epoch-12-avg-4.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    case 5: {
      const modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2";
      c.transducer.encoder = `${modelDir}/encoder-epoch-20-avg-1.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder-epoch-20-avg-1.onnx`;
      c.transducer.joiner = `${modelDir}/joiner-epoch-20-avg-1.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    case 6: {
      const modelDir = "sherpa-onnx-nemo-ctc-en-citrinet-512";
      c.nemoCtc.model = `${modelDir}/model.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 7: {
      const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k"
      c.nemoCtc.model = `${modelDir}/model.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 8: {
      const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-24500"
      c.nemoCtc.model = `${modelDir}/model.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 9: {
      const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288"
      c.nemoCtc.model = `${modelDir}/model.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 10: {
      const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-es-1424"
      c.nemoCtc.model = `${modelDir}/model.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 11: {
      const modelDir = "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04"
      c.telespeechCtc = `${modelDir}/model.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "telespeech_ctc";

      break;
    }

    case 12: {
      const modelDir = "sherpa-onnx-zipformer-thai-2024-06-20"
      c.transducer.encoder = `${modelDir}/encoder-epoch-12-avg-5.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder-epoch-12-avg-5.onnx`;
      c.transducer.joiner = `${modelDir}/joiner-epoch-12-avg-5.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    case 13: {
      const modelDir = "sherpa-onnx-zipformer-korean-2024-06-24";
      c.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
      c.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    case 14: {
      const modelDir = "sherpa-onnx-paraformer-zh-small-2024-03-09";
      c.paraformer.model = `${modelDir}/model.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "paraformer";

      break;
    }

    case 15: {
      const modelDir = "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17";
      c.senseVoice.model = `${modelDir}/model.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 16: {
      const modelDir = "sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01";
      c.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
      c.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    case 17: {
      const modelDir = "sherpa-onnx-zipformer-ru-2024-09-18";
      c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder.onnx`;
      c.transducer.joiner = `${modelDir}/joiner.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    case 18: {
      const modelDir = "sherpa-onnx-small-zipformer-ru-2024-09-18";
      c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder.onnx`;
      c.transducer.joiner = `${modelDir}/joiner.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    case 19: {
      const modelDir = "sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24";
      c.nemoCtc.model = `${modelDir}/model.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 20: {
      const modelDir = "sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24";
      c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder.onnx`;
      c.transducer.joiner = `${modelDir}/joiner.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "nemo_transducer";

      break;
    }

    case 21: {
      const modelDir = "sherpa-onnx-moonshine-tiny-en-int8";
      c.moonshine.preprocessor = `${modelDir}/preprocess.onnx`;
      c.moonshine.encoder = `${modelDir}/encode.int8.onnx`;
      c.moonshine.uncachedDecoder = `${modelDir}/uncached_decode.int8.onnx`;
      c.moonshine.cachedDecoder = `${modelDir}/cached_decode.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 22: {
      const modelDir = "sherpa-onnx-moonshine-base-en-int8";
      c.moonshine.preprocessor = `${modelDir}/preprocess.onnx`;
      c.moonshine.encoder = `${modelDir}/encode.int8.onnx`;
      c.moonshine.uncachedDecoder = `${modelDir}/uncached_decode.int8.onnx`;
      c.moonshine.cachedDecoder = `${modelDir}/cached_decode.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;

      break;
    }

    case 23: {
      const modelDir = "sherpa-onnx-zipformer-zh-en-2023-11-22";
      c.transducer.encoder = `${modelDir}/encoder-epoch-34-avg-19.int8.onnx`;
      c.transducer.decoder = `${modelDir}/decoder-epoch-34-avg-19.onnx`;
      c.transducer.joiner = `${modelDir}/joiner-epoch-34-avg-19.int8.onnx`;
      c.tokens = `${modelDir}/tokens.txt`;
      c.modelType = "transducer";

      break;
    }

    default: {
      console.log(`Please specify a supported type. Given type ${type}`);
    }
  }

  return c;
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Permission.ets
================================================
// This file is modified from
// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets
import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit';

export function allAllowed(permissions: Permissions[]): boolean {
  if (permissions.length == 0) {
    return false;
  }

  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();

  const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);

  let tokenID: number = bundleInfo.appInfo.accessTokenId;

  return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED ==
  mgr.checkAccessTokenSync(tokenID, permission));
}

export async function requestPermissions(permissions: Permissions[]): Promise<boolean> {
  const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
  const context: Context = getContext() as common.UIAbilityContext;

  const result = await mgr.requestPermissionsFromUser(context, permissions);
  return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0);
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets
================================================
import { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope, worker } from '@kit.ArkTS';
import {
  OfflineRecognizer,
  OfflineRecognizerConfig,
  OfflineStream,
  OnlineRecognizerResult,
  readWaveFromBinary,
  SileroVadConfig,
  TenVadConfig,
  SpeechSegment,
  Vad,
  VadConfig,
} from 'sherpa_onnx';
import { Context } from '@kit.AbilityKit';
import { fileIo } from '@kit.CoreFileKit';
import { getOfflineModelConfig } from '../pages/NonStreamingAsrModels';
import { BusinessError } from '@kit.BasicServicesKit';

const workerPort: ThreadWorkerGlobalScope = worker.workerPort;

let recognizer: OfflineRecognizer;
let vad: Vad; // vad for decoding files
let vadMic: Vad; // vad for mic

function initVad(context: Context): Vad {
  let mgr = context.resourceManager;
  const config: VadConfig = new VadConfig(
    new SileroVadConfig(
      'silero_vad.onnx',
      0.5,
      0.25,
      0.5,
      512,
    ),
    new TenVadConfig(
      '', // set it to ten-vad.onnx to use ten-vad
      0.5,
      0.25,
      0.5,
      256,
    ),
    16000,
    true,
    1,
  );

  const bufferSizeInSeconds = 60;
  return new Vad(config, bufferSizeInSeconds, mgr);
}

function initNonStreamingAsr(context: Context): OfflineRecognizer {
  let mgr = context.resourceManager;
  const config: OfflineRecognizerConfig = new OfflineRecognizerConfig();

  // Note that you can switch to a new model by changing type
  //
  // If you use type = 2, which means you will use
  // sherpa-onnx-whisper-tiny.en
  // we assume you have the following folder structure in you resources/rawfile
  /*
  (py38) fangjuns-MacBook-Pro:main fangjun$ pwd
  /Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxVadAsr/entry/src/main
  (py38) fangjuns-MacBook-Pro:main fangjun$ tree resources/rawfile/
  resources/rawfile/
  ├── sherpa-onnx-whisper-tiny.en
  │   ├── README.md
  │   ├── tiny.en-decoder.int8.onnx
  │   ├── tiny.en-encoder.int8.onnx
  │   └── tiny.en-tokens.txt
  └── silero_vad.onnx

  1 directory, 5 files
   */
  const type = 2;
  config.modelConfig = getOfflineModelConfig(type);
  config.modelConfig.debug = true;
  config.ruleFsts = '';
  return new OfflineRecognizer(config, mgr);
}

interface Wave {
  samples: Float32Array;
  sampleRate: number;
}

function decodeFile(filename: string): string {
  vad.reset();

  const fp = fileIo.openSync(filename);
  const stat = fileIo.statSync(fp.fd);
  const arrayBuffer = new ArrayBuffer(stat.size);
  fileIo.readSync(fp.fd, arrayBuffer);
  const data: Uint8Array = new Uint8Array(arrayBuffer);

  const wave: Wave = readWaveFromBinary(data);
  if (wave.sampleRate != 16000) {
    return `the sample rate in ${filename} is not 16000Hz. Given: ${wave.sampleRate}Hz.\nPlease select a wav file of 16kHz.`;
  }

  console.log(`sample rate ${wave.sampleRate}`);
  console.log(`samples length ${wave.samples.length}`);
  const resultList: string[] = [];

  let windowSize: number = vad.config.sileroVad.windowSize;

  if (vad.config.tenVad.model != '') {
    windowSize = vad.config.tenVad.windowSize;
  }

  for (let i = 0; i < wave.samples.length; i += windowSize) {
    const thisWindow: Float32Array = wave.samples.subarray(i, i + windowSize)
    vad.acceptWaveform(thisWindow);
    if (i + windowSize >= wave.samples.length) {
      vad.flush();
    }
    while (!vad.isEmpty()) {
      const segment: SpeechSegment = vad.front();
      const _startTime: number = (segment.start / wave.sampleRate);
      const _endTime: number = _startTime + segment.samples.length / wave.sampleRate;

      if (_endTime - _startTime < 0.2) {
        vad.pop();
        continue;
      }

      const startTime: string = _startTime.toFixed(2);
      const endTime: string = _endTime.toFixed(2);

      const progress: number = (segment.start + segment.samples.length) / wave.samples.length * 100;

      workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-progress', progress });

      const stream: OfflineStream = recognizer.createStream();
      stream.acceptWaveform({ samples: segment.samples, sampleRate: wave.sampleRate });
      recognizer.decode(stream);
      const result: OnlineRecognizerResult = recognizer.getResult(stream);

      const text: string = `${startTime} -- ${endTime} ${result.text}`
      resultList.push(text);
      console.log(`partial result ${text}`);

      workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-partial', text });

      vad.pop();
    }
  }

  return resultList.join('\n\n');
}

function decodeMic(samples: Float32Array) {
  const resultList: string[] = [];

  let windowSize: number = vad.config.sileroVad.windowSize;

  if (vad.config.tenVad.model != '') {
    windowSize = vad.config.tenVad.windowSize;
  }

  for (let i = 0; i < samples.length; i += windowSize) {
    const thisWindow: Float32Array = samples.subarray(i, i + windowSize)
    vad.acceptWaveform(thisWindow);
    if (i + windowSize >= samples.length) {
      vad.flush();
    }
    while (!vad.isEmpty()) {
      const segment: SpeechSegment = vad.front();
      const _startTime: number = (segment.start / 16000);
      const _endTime: number = _startTime + segment.samples.length / 16000;

      if (_endTime - _startTime < 0.2) {
        vad.pop();
        continue;
      }

      const startTime: string = _startTime.toFixed(2);
      const endTime: string = _endTime.toFixed(2);

      const stream: OfflineStream = recognizer.createStream();
      stream.acceptWaveform({ samples: segment.samples, sampleRate: 16000 });
      recognizer.decode(stream);
      const result: OnlineRecognizerResult = recognizer.getResult(stream);

      const text: string = `${startTime} -- ${endTime} ${result.text}`
      resultList.push(text);
      console.log(`partial result ${text}`);

      workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-mic-partial', text });

      vad.pop();
    }
  }

  return resultList.join('\n\n');
}

/**
 * Defines the event handler to be called when the worker thread receives a message sent by the host thread.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessage = (e: MessageEvents) => {
  const msgType = e.data['msgType'] as string;
  console.log(`msg-type: ${msgType}`)
  if (msgType == 'init-vad' && !vad) {
    const context = e.data['context'] as Context;
    vad = initVad(context);
    console.log('init vad done');
    workerPort.postMessage({ 'msgType': 'init-vad-done' });
  }

  if (msgType == 'init-vad-mic' && !vadMic) {
    const context = e.data['context'] as Context;
    vadMic = initVad(context);
    console.log('init vad mic done');
    workerPort.postMessage({ 'msgType': 'init-vad-mic-done' });
  }

  if (msgType == 'init-non-streaming-asr' && !recognizer) {
    const context = e.data['context'] as Context;
    recognizer = initNonStreamingAsr(context);
    console.log('init non streaming ASR done');
    workerPort.postMessage({ 'msgType': 'init-non-streaming-asr-done' });
  }

  if (msgType == 'non-streaming-asr-vad-decode') {
    const filename = e.data['filename'] as string;
    console.log(`decoding ${filename}`);
    try {
      const text = decodeFile(filename);
      workerPort.postMessage({ msgType: 'non-streaming-asr-vad-decode-done', text });
    } catch (e) {
      workerPort.postMessage({ msgType: 'non-streaming-asr-vad-decode-error', text: `Failed to decode ${filename}` });
    }

    workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-progress', progress: 100 });
  }

  if (msgType == 'non-streaming-asr-vad-mic') {
    const samples: Float32Array = e.data['samples'] as Float32Array;
    vadMic.reset();
    try {
      const text = decodeMic(samples);
      workerPort.postMessage({ msgType: 'non-streaming-asr-vad-mic-done', text });
    } catch (e) {
      workerPort.postMessage({ msgType: 'non-streaming-asr-vad-mic-error', text: `Failed to decode` });
    }
  }
}

/**
 * Defines the event handler to be called when the worker receives a message that cannot be deserialized.
 * The event handler is executed in the worker thread.
 *
 * @param e message data
 */
workerPort.onmessageerror = (e: MessageEvents) => {
}

/**
 * Defines the event handler to be called when an exception occurs during worker execution.
 * The event handler is executed in the worker thread.
 *
 * @param e error message
 */
workerPort.onerror = (e: ErrorEvent) => {
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5
================================================
{
  "module": {
    "name": "entry",
    "type": "entry",
    "description": "$string:module_desc",
    "mainElement": "EntryAbility",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false,
    "pages": "$profile:main_pages",
    "abilities": [
      {
        "name": "EntryAbility",
        "srcEntry": "./ets/entryability/EntryAbility.ets",
        "description": "$string:EntryAbility_desc",
        "icon": "$media:layered_image",
        "label": "$string:EntryAbility_label",
        "startWindowIcon": "$media:startIcon",
        "startWindowBackground": "$color:start_window_background",
        "exported": true,
        "skills": [
          {
            "entities": [
              "entity.system.home"
            ],
            "actions": [
              "action.system.home"
            ]
          }
        ]
      }
    ],
    "extensionAbilities": [
      {
        "name": "EntryBackupAbility",
        "srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
        "type": "backup",
        "exported": false,
        "metadata": [
          {
            "name": "ohos.extension.backup",
            "resource": "$profile:backup_config"
          }
        ],
      }
    ],
    "requestPermissions": [
      {
        "name": "ohos.permission.MICROPHONE",
        "reason": "$string:mic_reason",
        "usedScene": {
          "abilities": [
            "EntryAbility",
          ],
          "when": "inuse",
        }
      }
    ]
  }
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/color.json
================================================
{
  "color": [
    {
      "name": "start_window_background",
      "value": "#FFFFFF"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device VAD+ASR with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device VAD+ASR with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "On-device speech recognition"
    },
    {
      "name": "mic_reason",
      "value": "access the microphone for on-device speech recognition with Next-gen Kaldi"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/media/layered_image.json
================================================
{
  "layered-image":
  {
    "background" : "$media:background",
    "foreground" : "$media:foreground"
  }
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/profile/backup_config.json
================================================
{
  "allowToBackupRestore": true
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/profile/main_pages.json
================================================
{
  "src": [
    "pages/Index"
  ]
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/en_US/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "On-device VAD+ASR with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_desc",
      "value": "On-device VAD+ASR with Next-gen Kaldi"
    },
    {
      "name": "EntryAbility_label",
      "value": "On-device speech recognition"
    },
    {
      "name": "mic_reason",
      "value": "access the microphone for on-device speech recognition with Next-gen Kaldi"
    }
  ]
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/rawfile/.gitkeep
================================================


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/zh_CN/element/string.json
================================================
{
  "string": [
    {
      "name": "module_desc",
      "value": "基于新一代Kaldi的本地语音识别"
    },
    {
      "name": "EntryAbility_desc",
      "value": "基于新一代Kaldi的本地语音识别"
    },
    {
      "name": "EntryAbility_label",
      "value": "本地语音识别"
    },
    {
      "name": "mic_reason",
      "value": "使用新一代Kaldi, 访问麦克风进行本地语音识别 (不需要联网)"
    }
  ]
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/ets/test/Ability.test.ets
================================================
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function abilityTest() {
  describe('ActsAbilityTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    })
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    })
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    })
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    })
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    })
  })
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/ets/test/List.test.ets
================================================
import abilityTest from './Ability.test';

export default function testsuite() {
  abilityTest();
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/ohosTest/module.json5
================================================
{
  "module": {
    "name": "entry_test",
    "type": "feature",
    "deviceTypes": [
      "phone",
      "tablet",
      "2in1"
    ],
    "deliveryWithInstall": true,
    "installationFree": false
  }
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/test/List.test.ets
================================================
import localUnitTest from './LocalUnit.test';

export default function testsuite() {
  localUnitTest();
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/entry/src/test/LocalUnit.test.ets
================================================
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';

export default function localUnitTest() {
  describe('localUnitTest', () => {
    // Defines a test suite. Two parameters are supported: test suite name and test suite function.
    beforeAll(() => {
      // Presets an action, which is performed only once before all test cases of the test suite start.
      // This API supports only one parameter: preset action function.
    });
    beforeEach(() => {
      // Presets an action, which is performed before each unit test case starts.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: preset action function.
    });
    afterEach(() => {
      // Presets a clear action, which is performed after each unit test case ends.
      // The number of execution times is the same as the number of test cases defined by **it**.
      // This API supports only one parameter: clear action function.
    });
    afterAll(() => {
      // Presets a clear action, which is performed after all test cases of the test suite end.
      // This API supports only one parameter: clear action function.
    });
    it('assertContain', 0, () => {
      // Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
      let a = 'abc';
      let b = 'b';
      // Defines a variety of assertion methods, which are used to declare expected boolean conditions.
      expect(a).assertContain(b);
      expect(a).assertEqual(a);
    });
  });
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/hvigor/hvigor-config.json5
================================================
{
  "modelVersion": "5.0.0",
  "dependencies": {
  },
  "execution": {
    // "analyze": "normal",                     /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
    // "daemon": true,                          /* Enable daemon compilation. Value: [ true | false ]. Default: true */
    // "incremental": true,                     /* Enable incremental compilation. Value: [ true | false ]. Default: true */
    // "parallel": true,                        /* Enable parallel compilation. Value: [ true | false ]. Default: true */
    // "typeCheck": false,                      /* Enable typeCheck. Value: [ true | false ]. Default: false */
  },
  "logging": {
    // "level": "info"                          /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
  },
  "debugging": {
    // "stacktrace": false                      /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
  },
  "nodeOptions": {
    // "maxOldSpaceSize": 8192                  /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
    // "exposeGC": true                         /* Enable to trigger garbage collection explicitly. Default: true*/
  }
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/hvigorfile.ts
================================================
import { appTasks } from '@ohos/hvigor-ohos-plugin';

export default {
    system: appTasks,  /* Built-in plugin of Hvigor. It cannot be modified. */
    plugins:[]         /* Custom plugin to extend the functionality of Hvigor. */
}


================================================
FILE: harmony-os/SherpaOnnxVadAsr/oh-package-lock.json5
================================================
{
  "meta": {
    "stableOrder": true
  },
  "lockfileVersion": 3,
  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  "specifiers": {
    "@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
  },
  "packages": {
    "@ohos/hypium@1.0.19": {
      "name": "@ohos/hypium",
      "version": "1.0.19",
      "integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
      "resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
      "registryType": "ohpm"
    }
  }
}

================================================
FILE: harmony-os/SherpaOnnxVadAsr/oh-package.json5
================================================
{
  "modelVersion": "5.0.0",
  "description": "Please describe the basic information.",
  "dependencies": {
  },
  "devDependencies": {
    "@ohos/hypium": "1.0.19"
  }
}


================================================
FILE: ios-swift/.gitignore
================================================
# See https://github.com/github/gitignore/blob/main/Swift.gitignore
# Xcode
#
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore

## User settings
xcuserdata/

## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
*.xcscmblueprint
*.xccheckout

## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
build/
DerivedData/
*.moved-aside
*.pbxuser
!default.pbxuser
*.mode1v3
!default.mode1v3
*.mode2v3
!default.mode2v3
*.perspectivev3
!default.perspectivev3

## Obj-C/Swift specific
*.hmap

## App packaging
*.ipa
*.dSYM.zip
*.dSYM

## Playgrounds
timeline.xctimeline
playground.xcworkspace

# Swift Package Manager
#
# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
# Packages/
# Package.pins
# Package.resolved
# *.xcodeproj
#
# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
# hence it is not needed unless you have added a package configuration file to your project
# .swiftpm

.build/

# CocoaPods
#
# We recommend against adding the Pods directory to your .gitignore. However
# you should judge for yourself, the pros and cons are mentioned at:
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
#
# Pods/
#
# Add this line if you want to avoid checking in source code from the Xcode workspace
# *.xcworkspace

# Carthage
#
# Add this line if you want to avoid checking in source code from Carthage dependencies.
# Carthage/Checkouts

Carthage/Build/

# Accio dependency management
Dependencies/
.accio/

# fastlane
#
# It is recommended to not store the screenshots in the git repo.
# Instead, use fastlane to re-generate the screenshots whenever they are needed.
# For more information about the recommended setup visit:
# https://docs.fastlane.tools/best-practices/source-control/#source-control

fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots/**/*.png
fastlane/test_output

# Code Injection
#
# After new code Injection tools there's a generated folder /iOSInjectionProject
# https://github.com/johnno1962/injectionforxcode

iOSInjectionProject/


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/AppDelegate.swift
================================================
//
//  AppDelegate.swift
//  SherpaOnnx
//
//  Created by fangjun on 2023/2/25.
//

import UIKit

@main
class AppDelegate: UIResponder, UIApplicationDelegate {


    func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
        // Override point for customization after application launch.
        return true
    }

    // MARK: UISceneSession Lifecycle

    func application(_ application: UIApplication, configurationForConnecting connectingSceneSession: UISceneSession, options: UIScene.ConnectionOptions) -> UISceneConfiguration {
        // Called when a new scene session is being created.
        // Use this method to select a configuration to create the new scene with.
        return UISceneConfiguration(name: "Default Configuration", sessionRole: connectingSceneSession.role)
    }

    func application(_ application: UIApplication, didDiscardSceneSessions sceneSessions: Set<UISceneSession>) {
        // Called when the user discards a scene session.
        // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions.
        // Use this method to release any resources that were specific to the discarded scenes, as they will not return.
    }


}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/Assets.xcassets/AccentColor.colorset/Contents.json
================================================
{
  "colors" : [
    {
      "idiom" : "universal"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "filename" : "k2-1024x1024.png",
      "idiom" : "universal",
      "platform" : "ios",
      "size" : "1024x1024"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/Base.lproj/LaunchScreen.storyboard
================================================
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
    <dependencies>
        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
    </dependencies>
    <scenes>
        <!--View Controller-->
        <scene sceneID="EHf-IW-A2E">
            <objects>
                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                        <color key="backgroundColor" xcode11CocoaTouchSystemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
                    </view>
                </viewController>
                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
            </objects>
            <point key="canvasLocation" x="53" y="375"/>
        </scene>
    </scenes>
</document>


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/Base.lproj/Main.storyboard
================================================
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="21507" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
    <device id="retina6_12" orientation="portrait" appearance="light"/>
    <dependencies>
        <deployment identifier="iOS"/>
        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="21505"/>
        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
        <capability name="System colors in document resources" minToolsVersion="11.0"/>
        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
    </dependencies>
    <scenes>
        <!--View Controller-->
        <scene sceneID="tne-QT-ifu">
            <objects>
                <viewController id="BYZ-38-t0r" customClass="ViewController" customModule="SherpaNcnn" customModuleProvider="target" sceneMemberID="viewController">
                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
                        <rect key="frame" x="0.0" y="0.0" width="393" height="852"/>
                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
                        <subviews>
                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="system" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="7q8-Y3-WbJ">
                                <rect key="frame" x="166" y="773" width="61.333333333333343" height="35"/>
                                <state key="normal" title="Button"/>
                                <buttonConfiguration key="configuration" style="plain" title="Start"/>
                                <connections>
                                    <action selector="onRecordBtnClick:" destination="BYZ-38-t0r" eventType="touchUpInside" id="rS6-DT-XWm"/>
                                </connections>
                            </button>
                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Label" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="jfS-7J-m9C">
                                <rect key="frame" x="8" y="67" width="377" height="20.333333333333329"/>
                                <fontDescription key="fontDescription" type="system" pointSize="17"/>
                                <nil key="textColor"/>
                                <nil key="highlightedColor"/>
                            </label>
                        </subviews>
                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
                        <color key="backgroundColor" systemColor="systemBackgroundColor"/>
                        <constraints>
                            <constraint firstItem="jfS-7J-m9C" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="8" id="HX3-rI-U9E"/>
                            <constraint firstItem="jfS-7J-m9C" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" constant="8" id="NEv-PD-DHj"/>
                            <constraint firstItem="7q8-Y3-WbJ" firstAttribute="centerX" secondItem="8bC-Xf-vdC" secondAttribute="centerX" id="Nha-gf-R2b"/>
                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="jfS-7J-m9C" secondAttribute="trailing" constant="8" id="P2f-hG-O2e"/>
                            <constraint firstAttribute="bottomMargin" secondItem="7q8-Y3-WbJ" secondAttribute="bottom" constant="10" id="Pgb-4G-ySa"/>
                        </constraints>
                    </view>
                    <connections>
                        <outlet property="recordBtn" destination="7q8-Y3-WbJ" id="mFd-cu-zjn"/>
                        <outlet property="resultLabel" destination="jfS-7J-m9C" id="xQU-ID-m5Q"/>
                    </connections>
                </viewController>
                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
            </objects>
            <point key="canvasLocation" x="32.824427480916029" y="3.5211267605633805"/>
        </scene>
    </scenes>
    <resources>
        <systemColor name="systemBackgroundColor">
            <color white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
        </systemColor>
    </resources>
</document>


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for Next-gen Kaldi to work</string>
	<key>UIApplicationSceneManifest</key>
	<dict>
		<key>UIApplicationSupportsMultipleScenes</key>
		<false/>
		<key>UISceneConfigurations</key>
		<dict>
			<key>UIWindowSceneSessionRoleApplication</key>
			<array>
				<dict>
					<key>UISceneConfigurationName</key>
					<string>Default Configuration</string>
					<key>UISceneDelegateClassName</key>
					<string>$(PRODUCT_MODULE_NAME).SceneDelegate</string>
					<key>UISceneStoryboardFile</key>
					<string>Main</string>
				</dict>
			</array>
		</dict>
	</dict>
</dict>
</plist>


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/Model.swift
================================================
import Foundation

func getResource(_ forResource: String, _ ofType: String) -> String {
  let path = Bundle.main.path(forResource: forResource, ofType: ofType)
  precondition(
    path != nil,
    "\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
      + "  Build Phases -> Copy Bundle Resources\n" + "to add it!"
  )
  return path!
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models

/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
  let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
  let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner
    ),
    numThreads: 1,
    modelType: "zipformer"
  )
}

func getZhZipformer20230615() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
  let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
  let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner
    ),
    numThreads: 1,
    modelType: "zipformer2"
  )
}

func getZhZipformer20230615Int8() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128.int8", "onnx")
  let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
  let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner),
    numThreads: 1,
    modelType: "zipformer2"
  )
}

func getEnZipformer20230626() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
  let decoder = getResource("decoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
  let joiner = getResource("joiner-epoch-99-avg-1-chunk-16-left-128", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner),
    numThreads: 1,
    modelType: "zipformer2"
  )
}

func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder.int8", "onnx")
  let decoder = getResource("decoder.int8", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    paraformer: sherpaOnnxOnlineParaformerModelConfig(
      encoder: encoder,
      decoder: decoder),
    numThreads: 1,
    modelType: "paraformer"
  )
}

/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to add more models if you need


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/SceneDelegate.swift
================================================
//
//  SceneDelegate.swift
//  SherpaOnnx
//
//  Created by fangjun on 2023/2/25.
//

import UIKit

class SceneDelegate: UIResponder, UIWindowSceneDelegate {

    var window: UIWindow?


    func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) {
        // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`.
        // If using a storyboard, the `window` property will automatically be initialized and attached to the scene.
        // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead).
        guard let _ = (scene as? UIWindowScene) else { return }
    }

    func sceneDidDisconnect(_ scene: UIScene) {
        // Called as the scene is being released by the system.
        // This occurs shortly after the scene enters the background, or when its session is discarded.
        // Release any resources associated with this scene that can be re-created the next time the scene connects.
        // The scene may re-connect later, as its session was not necessarily discarded (see `application:didDiscardSceneSessions` instead).
    }

    func sceneDidBecomeActive(_ scene: UIScene) {
        // Called when the scene has moved from an inactive state to an active state.
        // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive.
    }

    func sceneWillResignActive(_ scene: UIScene) {
        // Called when the scene will move from an active state to an inactive state.
        // This may occur due to temporary interruptions (ex. an incoming phone call).
    }

    func sceneWillEnterForeground(_ scene: UIScene) {
        // Called as the scene transitions from the background to the foreground.
        // Use this method to undo the changes made on entering the background.
    }

    func sceneDidEnterBackground(_ scene: UIScene) {
        // Called as the scene transitions from the foreground to the background.
        // Use this method to save data, release shared resources, and store enough scene-specific state information
        // to restore the scene back to its current state.
    }


}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift
================================================
//
//  ViewController.swift
//  SherpaOnnx
//
//  Created by fangjun on 2023/1/28.
//

import AVFoundation
import UIKit

extension AudioBuffer {
    func array() -> [Float] {
        return Array(UnsafeBufferPointer(self))
    }
}

extension AVAudioPCMBuffer {
    func array() -> [Float] {
        return self.audioBufferList.pointee.mBuffers.array()
    }
}

class ViewController: UIViewController {
    @IBOutlet weak var resultLabel: UILabel!
    @IBOutlet weak var recordBtn: UIButton!

    var audioEngine: AVAudioEngine? = nil
    var recognizer: SherpaOnnxRecognizer! = nil

    /// It saves the decoded results so far
    var sentences: [String] = [] {
        didSet {
            updateLabel()
        }
    }
    var lastSentence: String = ""
    let maxSentence: Int = 20
    var results: String {
        if sentences.isEmpty && lastSentence.isEmpty {
            return ""
        }
        if sentences.isEmpty {
            return "0: \(lastSentence.lowercased())"
        }

        let start = max(sentences.count - maxSentence, 0)
        if lastSentence.isEmpty {
            return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
                .joined(separator: "\n")
        } else {
            return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
                .joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
        }
    }

    func updateLabel() {
        DispatchQueue.main.async {
            self.resultLabel.text = self.results
        }
    }

    override func viewDidLoad() {
        super.viewDidLoad()
        // Do any additional setup after loading the view.

        resultLabel.text = "ASR with Next-gen Kaldi\n\nSee https://github.com/k2-fsa/sherpa-onnx\n\nPress the Start button to run!"
        recordBtn.setTitle("Start", for: .normal)
        initRecognizer()
        initRecorder()
    }

    @IBAction func onRecordBtnClick(_ sender: UIButton) {
        if recordBtn.currentTitle == "Start" {
            startRecorder()
            recordBtn.setTitle("Stop", for: .normal)
        } else {
            stopRecorder()
            recordBtn.setTitle("Start", for: .normal)
        }
    }

    func initRecognizer() {
        // Please select one model that is best suitable for you.
        //
        // You can also modify Model.swift to add new pre-trained models from
        // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html

        let modelConfig = getBilingualStreamZhEnZipformer20230220()
        // let modelConfig = getZhZipformer20230615()
        // let modelConfig = getEnZipformer20230626()
        // let modelConfig = getBilingualStreamingZhEnParaformer()

        let featConfig = sherpaOnnxFeatureConfig(
            sampleRate: 16000,
            featureDim: 80)

        var config = sherpaOnnxOnlineRecognizerConfig(
            featConfig: featConfig,
            modelConfig: modelConfig,
            enableEndpoint: true,
            rule1MinTrailingSilence: 2.4,
            rule2MinTrailingSilence: 0.8,
            rule3MinUtteranceLength: 30,
            decodingMethod: "greedy_search",
            maxActivePaths: 4
        )
        recognizer = SherpaOnnxRecognizer(config: &config)
    }

    func initRecorder() {
        print("init recorder")
        audioEngine = AVAudioEngine()
        let inputNode = self.audioEngine?.inputNode
        let bus = 0
        let inputFormat = inputNode?.outputFormat(forBus: bus)
        let outputFormat = AVAudioFormat(
            commonFormat: .pcmFormatFloat32,
            sampleRate: 16000, channels: 1,
            interleaved: false)!

        let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!

        inputNode!.installTap(
            onBus: bus,
            bufferSize: 1024,
            format: inputFormat
        ) {
            (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            var newBufferAvailable = true

            let inputCallback: AVAudioConverterInputBlock = {
                inNumPackets, outStatus in
                if newBufferAvailable {
                    outStatus.pointee = .haveData
                    newBufferAvailable = false

                    return buffer
                } else {
                    outStatus.pointee = .noDataNow
                    return nil
                }
            }

            let convertedBuffer = AVAudioPCMBuffer(
                pcmFormat: outputFormat,
                frameCapacity:
                    AVAudioFrameCount(outputFormat.sampleRate)
                * buffer.frameLength
                / AVAudioFrameCount(buffer.format.sampleRate))!

            var error: NSError?
            let _ = converter.convert(
                to: convertedBuffer,
                error: &error, withInputFrom: inputCallback)

            // TODO(fangjun): Handle status != haveData

            let array = convertedBuffer.array()
            if !array.isEmpty {
                self.recognizer.acceptWaveform(samples: array)
                while (self.recognizer.isReady()){
                    self.recognizer.decode()
                }
                let isEndpoint = self.recognizer.isEndpoint()
                let text = self.recognizer.getResult().text

                if !text.isEmpty && self.lastSentence != text {
                    self.lastSentence = text
                    self.updateLabel()
                    print(text)
                }

                if isEndpoint {
                    if !text.isEmpty {
                        let tmp = self.lastSentence
                        self.lastSentence = ""
                        self.sentences.append(tmp)
                    }
                    self.recognizer.reset()
                }
            }
        }

    }

    func startRecorder() {
        lastSentence = ""
        sentences = []

        do {
            try self.audioEngine?.start()
        } catch let error as NSError {
            print("Got an error starting audioEngine: \(error.domain), \(error)")
        }
        print("started")
    }

    func stopRecorder() {
        audioEngine?.stop()
        print("stopped")
    }
}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 56;
	objects = {

/* Begin PBXBuildFile section */
		C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; };
		C984A7E829A9EEB700D74C52 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E729A9EEB700D74C52 /* AppDelegate.swift */; };
		C984A7EA29A9EEB700D74C52 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */; };
		C984A7F129A9EEB900D74C52 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C984A7F029A9EEB900D74C52 /* Assets.xcassets */; };
		C984A7F429A9EEB900D74C52 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C984A7F229A9EEB900D74C52 /* LaunchScreen.storyboard */; };
		C984A7FF29A9EEBA00D74C52 /* SherpaOnnxTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7FE29A9EEBA00D74C52 /* SherpaOnnxTests.swift */; };
		C984A80929A9EEBA00D74C52 /* SherpaOnnxUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A80829A9EEBA00D74C52 /* SherpaOnnxUITests.swift */; };
		C984A80B29A9EEBA00D74C52 /* SherpaOnnxUITestsLaunchTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A80A29A9EEBA00D74C52 /* SherpaOnnxUITestsLaunchTests.swift */; };
		C984A81929AA119400D74C52 /* SherpaOnnx.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A81829AA119400D74C52 /* SherpaOnnx.swift */; };
		C984A82829AA196100D74C52 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C984A82629AA196100D74C52 /* Main.storyboard */; };
		C984A82A29AA19AC00D74C52 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A82929AA19AC00D74C52 /* Model.swift */; };
		C984A83C29AA430B00D74C52 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A83B29AA430B00D74C52 /* ViewController.swift */; };
		C9AC22172BB50165008B65E2 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C9AC22162BB50165008B65E2 /* onnxruntime.xcframework */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
		C984A7FB29A9EEBA00D74C52 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = C984A7DC29A9EEB700D74C52 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = C984A7E329A9EEB700D74C52;
			remoteInfo = SherpaOnnx;
		};
		C984A80529A9EEBA00D74C52 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = C984A7DC29A9EEB700D74C52 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = C984A7E329A9EEB700D74C52;
			remoteInfo = SherpaOnnx;
		};
/* End PBXContainerItemProxy section */

/* Begin PBXFileReference section */
		C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx.app; sourceTree = BUILT_PRODUCTS_DIR; };
		C984A7E729A9EEB700D74C52 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
		C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
		C984A7F029A9EEB900D74C52 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
		C984A7F329A9EEB900D74C52 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
		C984A7F529A9EEB900D74C52 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
		C984A7FA29A9EEBA00D74C52 /* SherpaOnnxTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = SherpaOnnxTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		C984A7FE29A9EEBA00D74C52 /* SherpaOnnxTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxTests.swift; sourceTree = "<group>"; };
		C984A80429A9EEBA00D74C52 /* SherpaOnnxUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = SherpaOnnxUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		C984A80829A9EEBA00D74C52 /* SherpaOnnxUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxUITests.swift; sourceTree = "<group>"; };
		C984A80A29A9EEBA00D74C52 /* SherpaOnnxUITestsLaunchTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxUITestsLaunchTests.swift; sourceTree = "<group>"; };
		C984A81729A9F51B00D74C52 /* SherpaOnnx-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "SherpaOnnx-Bridging-Header.h"; path = "../../../swift-api-examples/SherpaOnnx-Bridging-Header.h"; sourceTree = "<group>"; };
		C984A81829AA119400D74C52 /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = "<group>"; };
		C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = "<group>"; };
		C984A82729AA196100D74C52 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
		C984A82929AA19AC00D74C52 /* Model.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Model.swift; sourceTree = "<group>"; };
		C984A83B29AA430B00D74C52 /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
		C9AC22162BB50165008B65E2 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		C984A7E129A9EEB700D74C52 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C9AC22172BB50165008B65E2 /* onnxruntime.xcframework in Frameworks */,
				C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C984A7F729A9EEBA00D74C52 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C984A80129A9EEBA00D74C52 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		C984A7DB29A9EEB700D74C52 = {
			isa = PBXGroup;
			children = (
				C984A7E629A9EEB700D74C52 /* SherpaOnnx */,
				C984A7FD29A9EEBA00D74C52 /* SherpaOnnxTests */,
				C984A80729A9EEBA00D74C52 /* SherpaOnnxUITests */,
				C984A7E529A9EEB700D74C52 /* Products */,
				C984A81A29AA11C500D74C52 /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		C984A7E529A9EEB700D74C52 /* Products */ = {
			isa = PBXGroup;
			children = (
				C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */,
				C984A7FA29A9EEBA00D74C52 /* SherpaOnnxTests.xctest */,
				C984A80429A9EEBA00D74C52 /* SherpaOnnxUITests.xctest */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		C984A7E629A9EEB700D74C52 /* SherpaOnnx */ = {
			isa = PBXGroup;
			children = (
				C984A83B29AA430B00D74C52 /* ViewController.swift */,
				C984A82929AA19AC00D74C52 /* Model.swift */,
				C984A81829AA119400D74C52 /* SherpaOnnx.swift */,
				C984A81729A9F51B00D74C52 /* SherpaOnnx-Bridging-Header.h */,
				C984A7E729A9EEB700D74C52 /* AppDelegate.swift */,
				C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */,
				C984A82629AA196100D74C52 /* Main.storyboard */,
				C984A7F029A9EEB900D74C52 /* Assets.xcassets */,
				C984A7F229A9EEB900D74C52 /* LaunchScreen.storyboard */,
				C984A7F529A9EEB900D74C52 /* Info.plist */,
			);
			path = SherpaOnnx;
			sourceTree = "<group>";
		};
		C984A7FD29A9EEBA00D74C52 /* SherpaOnnxTests */ = {
			isa = PBXGroup;
			children = (
				C984A7FE29A9EEBA00D74C52 /* SherpaOnnxTests.swift */,
			);
			path = SherpaOnnxTests;
			sourceTree = "<group>";
		};
		C984A80729A9EEBA00D74C52 /* SherpaOnnxUITests */ = {
			isa = PBXGroup;
			children = (
				C984A80829A9EEBA00D74C52 /* SherpaOnnxUITests.swift */,
				C984A80A29A9EEBA00D74C52 /* SherpaOnnxUITestsLaunchTests.swift */,
			);
			path = SherpaOnnxUITests;
			sourceTree = "<group>";
		};
		C984A81A29AA11C500D74C52 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
				C9AC22162BB50165008B65E2 /* onnxruntime.xcframework */,
				C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */,
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		C984A7E329A9EEB700D74C52 /* SherpaOnnx */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = C984A80E29A9EEBA00D74C52 /* Build configuration list for PBXNativeTarget "SherpaOnnx" */;
			buildPhases = (
				C984A7E029A9EEB700D74C52 /* Sources */,
				C984A7E129A9EEB700D74C52 /* Frameworks */,
				C984A7E229A9EEB700D74C52 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = SherpaOnnx;
			productName = SherpaOnnx;
			productReference = C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */;
			productType = "com.apple.product-type.application";
		};
		C984A7F929A9EEBA00D74C52 /* SherpaOnnxTests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = C984A81129A9EEBA00D74C52 /* Build configuration list for PBXNativeTarget "SherpaOnnxTests" */;
			buildPhases = (
				C984A7F629A9EEBA00D74C52 /* Sources */,
				C984A7F729A9EEBA00D74C52 /* Frameworks */,
				C984A7F829A9EEBA00D74C52 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				C984A7FC29A9EEBA00D74C52 /* PBXTargetDependency */,
			);
			name = SherpaOnnxTests;
			productName = SherpaOnnxTests;
			productReference = C984A7FA29A9EEBA00D74C52 /* SherpaOnnxTests.xctest */;
			productType = "com.apple.product-type.bundle.unit-test";
		};
		C984A80329A9EEBA00D74C52 /* SherpaOnnxUITests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = C984A81429A9EEBA00D74C52 /* Build configuration list for PBXNativeTarget "SherpaOnnxUITests" */;
			buildPhases = (
				C984A80029A9EEBA00D74C52 /* Sources */,
				C984A80129A9EEBA00D74C52 /* Frameworks */,
				C984A80229A9EEBA00D74C52 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				C984A80629A9EEBA00D74C52 /* PBXTargetDependency */,
			);
			name = SherpaOnnxUITests;
			productName = SherpaOnnxUITests;
			productReference = C984A80429A9EEBA00D74C52 /* SherpaOnnxUITests.xctest */;
			productType = "com.apple.product-type.bundle.ui-testing";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		C984A7DC29A9EEB700D74C52 /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = 1;
				LastSwiftUpdateCheck = 1420;
				LastUpgradeCheck = 1420;
				TargetAttributes = {
					C984A7E329A9EEB700D74C52 = {
						CreatedOnToolsVersion = 14.2;
					};
					C984A7F929A9EEBA00D74C52 = {
						CreatedOnToolsVersion = 14.2;
						TestTargetID = C984A7E329A9EEB700D74C52;
					};
					C984A80329A9EEBA00D74C52 = {
						CreatedOnToolsVersion = 14.2;
						TestTargetID = C984A7E329A9EEB700D74C52;
					};
				};
			};
			buildConfigurationList = C984A7DF29A9EEB700D74C52 /* Build configuration list for PBXProject "SherpaOnnx" */;
			compatibilityVersion = "Xcode 14.0";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = C984A7DB29A9EEB700D74C52;
			productRefGroup = C984A7E529A9EEB700D74C52 /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				C984A7E329A9EEB700D74C52 /* SherpaOnnx */,
				C984A7F929A9EEBA00D74C52 /* SherpaOnnxTests */,
				C984A80329A9EEBA00D74C52 /* SherpaOnnxUITests */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		C984A7E229A9EEB700D74C52 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C984A82829AA196100D74C52 /* Main.storyboard in Resources */,
				C984A7F429A9EEB900D74C52 /* LaunchScreen.storyboard in Resources */,
				C984A7F129A9EEB900D74C52 /* Assets.xcassets in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C984A7F829A9EEBA00D74C52 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C984A80229A9EEBA00D74C52 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		C984A7E029A9EEB700D74C52 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C984A83C29AA430B00D74C52 /* ViewController.swift in Sources */,
				C984A82A29AA19AC00D74C52 /* Model.swift in Sources */,
				C984A81929AA119400D74C52 /* SherpaOnnx.swift in Sources */,
				C984A7E829A9EEB700D74C52 /* AppDelegate.swift in Sources */,
				C984A7EA29A9EEB700D74C52 /* SceneDelegate.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C984A7F629A9EEBA00D74C52 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C984A7FF29A9EEBA00D74C52 /* SherpaOnnxTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C984A80029A9EEBA00D74C52 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C984A80B29A9EEBA00D74C52 /* SherpaOnnxUITestsLaunchTests.swift in Sources */,
				C984A80929A9EEBA00D74C52 /* SherpaOnnxUITests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
		C984A7FC29A9EEBA00D74C52 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = C984A7E329A9EEB700D74C52 /* SherpaOnnx */;
			targetProxy = C984A7FB29A9EEBA00D74C52 /* PBXContainerItemProxy */;
		};
		C984A80629A9EEBA00D74C52 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = C984A7E329A9EEB700D74C52 /* SherpaOnnx */;
			targetProxy = C984A80529A9EEBA00D74C52 /* PBXContainerItemProxy */;
		};
/* End PBXTargetDependency section */

/* Begin PBXVariantGroup section */
		C984A7F229A9EEB900D74C52 /* LaunchScreen.storyboard */ = {
			isa = PBXVariantGroup;
			children = (
				C984A7F329A9EEB900D74C52 /* Base */,
			);
			name = LaunchScreen.storyboard;
			sourceTree = "<group>";
		};
		C984A82629AA196100D74C52 /* Main.storyboard */ = {
			isa = PBXVariantGroup;
			children = (
				C984A82729AA196100D74C52 /* Base */,
			);
			name = Main.storyboard;
			sourceTree = "<group>";
		};
/* End PBXVariantGroup section */

/* Begin XCBuildConfiguration section */
		C984A80C29A9EEBA00D74C52 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
				MTL_FAST_MATH = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = iphoneos;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		C984A80D29A9EEBA00D74C52 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MTL_ENABLE_DEBUG_INFO = NO;
				MTL_FAST_MATH = YES;
				SDKROOT = iphoneos;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		C984A80F29A9EEBA00D74C52 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_FILE = SherpaOnnx/Info.plist;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
				INFOPLIST_KEY_UIMainStoryboardFile = Main;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnx";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		C984A81029A9EEBA00D74C52 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_FILE = SherpaOnnx/Info.plist;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
				INFOPLIST_KEY_UIMainStoryboardFile = Main;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnx";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Release;
		};
		C984A81229A9EEBA00D74C52 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxTests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/SherpaOnnx.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/SherpaOnnx";
			};
			name = Debug;
		};
		C984A81329A9EEBA00D74C52 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxTests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/SherpaOnnx.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/SherpaOnnx";
			};
			name = Release;
		};
		C984A81529A9EEBA00D74C52 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxUITests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_TARGET_NAME = SherpaOnnx;
			};
			name = Debug;
		};
		C984A81629A9EEBA00D74C52 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxUITests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_TARGET_NAME = SherpaOnnx;
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		C984A7DF29A9EEB700D74C52 /* Build configuration list for PBXProject "SherpaOnnx" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C984A80C29A9EEBA00D74C52 /* Debug */,
				C984A80D29A9EEBA00D74C52 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		C984A80E29A9EEBA00D74C52 /* Build configuration list for PBXNativeTarget "SherpaOnnx" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C984A80F29A9EEBA00D74C52 /* Debug */,
				C984A81029A9EEBA00D74C52 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		C984A81129A9EEBA00D74C52 /* Build configuration list for PBXNativeTarget "SherpaOnnxTests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C984A81229A9EEBA00D74C52 /* Debug */,
				C984A81329A9EEBA00D74C52 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		C984A81429A9EEBA00D74C52 /* Build configuration list for PBXNativeTarget "SherpaOnnxUITests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C984A81529A9EEBA00D74C52 /* Debug */,
				C984A81629A9EEBA00D74C52 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = C984A7DC29A9EEB700D74C52 /* Project object */;
}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:">
   </FileRef>
</Workspace>


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnxTests/SherpaOnnxTests.swift
================================================
//
//  SherpaOnnxTests.swift
//  SherpaOnnxTests
//
//  Created by fangjun on 2023/2/25.
//

import XCTest
@testable import SherpaOnnx

final class SherpaOnnxTests: XCTestCase {

    override func setUpWithError() throws {
        // Put setup code here. This method is called before the invocation of each test method in the class.
    }

    override func tearDownWithError() throws {
        // Put teardown code here. This method is called after the invocation of each test method in the class.
    }

    func testExample() throws {
        // This is an example of a functional test case.
        // Use XCTAssert and related functions to verify your tests produce the correct results.
        // Any test you write for XCTest can be annotated as throws and async.
        // Mark your test throws to produce an unexpected failure when your test encounters an uncaught error.
        // Mark your test async to allow awaiting for asynchronous code to complete. Check the results with assertions afterwards.
    }

    func testPerformanceExample() throws {
        // This is an example of a performance test case.
        self.measure {
            // Put the code you want to measure the time of here.
        }
    }

}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnxUITests/SherpaOnnxUITests.swift
================================================
//
//  SherpaOnnxUITests.swift
//  SherpaOnnxUITests
//
//  Created by fangjun on 2023/2/25.
//

import XCTest

final class SherpaOnnxUITests: XCTestCase {

    override func setUpWithError() throws {
        // Put setup code here. This method is called before the invocation of each test method in the class.

        // In UI tests it is usually best to stop immediately when a failure occurs.
        continueAfterFailure = false

        // In UI tests it’s important to set the initial state - such as interface orientation - required for your tests before they run. The setUp method is a good place to do this.
    }

    override func tearDownWithError() throws {
        // Put teardown code here. This method is called after the invocation of each test method in the class.
    }

    func testExample() throws {
        // UI tests must launch the application that they test.
        let app = XCUIApplication()
        app.launch()

        // Use XCTAssert and related functions to verify your tests produce the correct results.
    }

    func testLaunchPerformance() throws {
        if #available(macOS 10.15, iOS 13.0, tvOS 13.0, watchOS 7.0, *) {
            // This measures how long it takes to launch your application.
            measure(metrics: [XCTApplicationLaunchMetric()]) {
                XCUIApplication().launch()
            }
        }
    }
}


================================================
FILE: ios-swift/SherpaOnnx/SherpaOnnxUITests/SherpaOnnxUITestsLaunchTests.swift
================================================
//
//  SherpaOnnxUITestsLaunchTests.swift
//  SherpaOnnxUITests
//
//  Created by fangjun on 2023/2/25.
//

import XCTest

final class SherpaOnnxUITestsLaunchTests: XCTestCase {

    override class var runsForEachTargetApplicationUIConfiguration: Bool {
        true
    }

    override func setUpWithError() throws {
        continueAfterFailure = false
    }

    func testLaunch() throws {
        let app = XCUIApplication()
        app.launch()

        // Insert steps here to perform after app launch but before taking a screenshot,
        // such as logging into a test account or navigating somewhere in the app

        let attachment = XCTAttachment(screenshot: app.screenshot())
        attachment.name = "Launch Screen"
        attachment.lifetime = .keepAlways
        add(attachment)
    }
}


================================================
FILE: ios-swiftui/.gitignore
================================================
# See https://github.com/github/gitignore/blob/main/Swift.gitignore
# Xcode
#
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore

## User settings
xcuserdata/

## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
*.xcscmblueprint
*.xccheckout

## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
build/
DerivedData/
*.moved-aside
*.pbxuser
!default.pbxuser
*.mode1v3
!default.mode1v3
*.mode2v3
!default.mode2v3
*.perspectivev3
!default.perspectivev3

## Obj-C/Swift specific
*.hmap

## App packaging
*.ipa
*.dSYM.zip
*.dSYM

## Playgrounds
timeline.xctimeline
playground.xcworkspace

# Swift Package Manager
#
# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
# Packages/
# Package.pins
# Package.resolved
# *.xcodeproj
#
# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
# hence it is not needed unless you have added a package configuration file to your project
# .swiftpm

.build/

# CocoaPods
#
# We recommend against adding the Pods directory to your .gitignore. However
# you should judge for yourself, the pros and cons are mentioned at:
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
#
# Pods/
#
# Add this line if you want to avoid checking in source code from the Xcode workspace
# *.xcworkspace

# Carthage
#
# Add this line if you want to avoid checking in source code from Carthage dependencies.
# Carthage/Checkouts

Carthage/Build/

# Accio dependency management
Dependencies/
.accio/

# fastlane
#
# It is recommended to not store the screenshots in the git repo.
# Instead, use fastlane to re-generate the screenshots whenever they are needed.
# For more information about the recommended setup visit:
# https://docs.fastlane.tools/best-practices/source-control/#source-control

fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots/**/*.png
fastlane/test_output

# Code Injection
#
# After new code Injection tools there's a generated folder /iOSInjectionProject
# https://github.com/johnno1962/injectionforxcode

iOSInjectionProject/


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/Assets.xcassets/AccentColor.colorset/Contents.json
================================================
{
  "colors" : [
    {
      "idiom" : "universal"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "filename" : "k2-1024x1024.png",
      "idiom" : "universal",
      "platform" : "ios",
      "size" : "1024x1024"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/ContentView.swift
================================================
//
//  ContentView.swift
//  SherpaOnnx
//
//  Created by fangjun on 2023/4/5.
//

import SwiftUI

struct ContentView: View {
    @StateObject var sherpaOnnxVM = SherpaOnnxViewModel()

    var body: some View {
        VStack {
            Text("ASR with Next-gen Kaldi")
                .font(.title)
            if sherpaOnnxVM.status == .stop {
                Text("See https://github.com/k2-fsa/sherpa-onnx")
                Text("Press the Start button to run!")
            }
            ScrollView(.vertical, showsIndicators: true) {
                HStack {
                    Text(sherpaOnnxVM.subtitles)
                    Spacer()
                }
            }
            Spacer()
            Button {
                toggleRecorder()
            } label: {
                Text(sherpaOnnxVM.status == .stop ? "Start" : "Stop")
            }
        }
        .padding()
    }

    private func toggleRecorder() {
        sherpaOnnxVM.toggleRecorder()
    }
}

struct ContentView_Previews: PreviewProvider {
    static var previews: some View {
        ContentView()
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/Extension.swift
================================================
//
//  Extension.swift
//  SherpaOnnx
//
//  Created by knight on 2023/4/5.
//

import AVFoundation

extension AudioBuffer {
    func array() -> [Float] {
        return Array(UnsafeBufferPointer(self))
    }
}

extension AVAudioPCMBuffer {
    func array() -> [Float] {
        return self.audioBufferList.pointee.mBuffers.array()
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for Next-gen Kaldi to work</string>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/Model.swift
================================================
import Foundation

func getResource(_ forResource: String, _ ofType: String) -> String {
  let path = Bundle.main.path(forResource: forResource, ofType: ofType)
  precondition(
    path != nil,
    "\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
      + "  Build Phases -> Copy Bundle Resources\n" + "to add it!"
  )
  return path!
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models

/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
  let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
  let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner),
    numThreads: 2,
    modelType: "zipformer"
  )
}

// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder.int8", "onnx")
  let decoder = getResource("decoder.int8", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    paraformer: sherpaOnnxOnlineParaformerModelConfig(
      encoder: encoder,
      decoder: decoder),
    numThreads: 1,
    modelType: "paraformer"
  )
}

// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
//
func getLanguageIdentificationTiny() -> SherpaOnnxSpokenLanguageIdentificationConfig
 {
  let encoder = getResource("tiny-encoder.int8", "onnx")
  let decoder = getResource("tiny-decoder.int8", "onnx")
    
    let whisperConfig = sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
      encoder: encoder,
      decoder: decoder
    )

    let config = sherpaOnnxSpokenLanguageIdentificationConfig(
      whisper: whisperConfig,
      numThreads: 1,
      debug: 1,
      provider: "cpu"
    )
    return config
}


/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to add more models if you need


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/Preview Content/Preview Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxApp.swift
================================================
//
//  SherpaOnnxApp.swift
//  SherpaOnnx
//
//  Created by fangjun on 2023/4/5.
//

import SwiftUI

@main
struct SherpaOnnxApp: App {
    var body: some Scene {
        WindowGroup {
            ContentView()
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift
================================================
//
//  SherpaOnnxViewModel.swift
//  SherpaOnnx
//
//  Created by knight on 2023/4/5.
//

import AVFoundation
import Foundation

enum Status {
    case stop
    case recording
}

@MainActor
class SherpaOnnxViewModel: ObservableObject {
    @Published var status: Status = .stop
    @Published var subtitles: String = ""

    var sentences: [String] = []

    var audioEngine: AVAudioEngine? = nil
    var recognizer: SherpaOnnxRecognizer! = nil
    private var audioSession: AVAudioSession!

    var lastSentence: String = ""
    let maxSentence: Int = 20

    var results: String {
        if sentences.isEmpty && lastSentence.isEmpty {
            return ""
        }
        if sentences.isEmpty {
            return "0: \(lastSentence.lowercased())"
        }

        let start = max(sentences.count - maxSentence, 0)
        if lastSentence.isEmpty {
            return sentences.enumerated().map { (index, s) in
                "\(index): \(s.lowercased())"
            }[start...]
            .joined(separator: "\n")
        } else {
            return sentences.enumerated().map { (index, s) in
                "\(index): \(s.lowercased())"
            }[start...]
            .joined(separator: "\n")
                + "\n\(sentences.count): \(lastSentence.lowercased())"
        }
    }

    func updateLabel() {
        self.subtitles = self.results
    }

    func setupAudioSession() {
        audioSession = AVAudioSession.sharedInstance()
        do {
            try audioSession.setCategory(
                .playAndRecord, mode: .default, options: [.defaultToSpeaker])
            try audioSession.setActive(true)
        } catch {
            print("Failed to set up audio session: \(error)")
        }
    }

    init() {
        initRecognizer()
        setupAudioSession()
        initRecorder()
    }

    private func initRecognizer() {
        // Please select one model that is best suitable for you.
        //
        // You can also modify Model.swift to add new pre-trained models from
        // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
        // let modelConfig = getBilingualStreamZhEnZipformer20230220()
        let modelConfig = getBilingualStreamingZhEnParaformer()

        let featConfig = sherpaOnnxFeatureConfig(
            sampleRate: 16000,
            featureDim: 80)

        var config = sherpaOnnxOnlineRecognizerConfig(
            featConfig: featConfig,
            modelConfig: modelConfig,
            enableEndpoint: true,
            rule1MinTrailingSilence: 2.4,
            rule2MinTrailingSilence: 0.8,
            rule3MinUtteranceLength: 30,
            decodingMethod: "greedy_search",
            maxActivePaths: 4
        )
        recognizer = SherpaOnnxRecognizer(config: &config)
    }

    private func initRecorder() {
        print("init recorder")
        audioEngine = AVAudioEngine()
        let inputNode = self.audioEngine?.inputNode
        let bus = 0
        let inputFormat = inputNode?.outputFormat(forBus: bus)
        let outputFormat = AVAudioFormat(
            commonFormat: .pcmFormatFloat32,
            sampleRate: 16000, channels: 1,
            interleaved: false)!

        let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!

        inputNode!.installTap(
            onBus: bus,
            bufferSize: 1024,
            format: inputFormat
        ) {
            (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            var newBufferAvailable = true

            let inputCallback: AVAudioConverterInputBlock = {
                inNumPackets, outStatus in
                if newBufferAvailable {
                    outStatus.pointee = .haveData
                    newBufferAvailable = false

                    return buffer
                } else {
                    outStatus.pointee = .noDataNow
                    return nil
                }
            }

            let convertedBuffer = AVAudioPCMBuffer(
                pcmFormat: outputFormat,
                frameCapacity:
                    AVAudioFrameCount(outputFormat.sampleRate)
                    * buffer.frameLength
                    / AVAudioFrameCount(buffer.format.sampleRate))!

            var error: NSError?
            let _ = converter.convert(
                to: convertedBuffer,
                error: &error, withInputFrom: inputCallback)

            // TODO(fangjun): Handle status != haveData

            let array = convertedBuffer.array()
            if !array.isEmpty {
                self.recognizer.acceptWaveform(samples: array)
                while self.recognizer.isReady() {
                    self.recognizer.decode()
                }
                let isEndpoint = self.recognizer.isEndpoint()
                let text = self.recognizer.getResult().text

                if !text.isEmpty && self.lastSentence != text {
                    self.lastSentence = text
                    self.updateLabel()
                    print(text)
                }

                if isEndpoint {
                    if !text.isEmpty {
                        let tmp = self.lastSentence
                        self.lastSentence = ""
                        self.sentences.append(tmp)
                    }
                    self.recognizer.reset()
                }
            }
        }
    }

    public func toggleRecorder() {
        if status == .stop {
            startRecorder()
            status = .recording
        } else {
            stopRecorder()
            status = .stop
        }
    }

    private func startRecorder() {
        lastSentence = ""
        sentences = []

        do {
            try self.audioEngine?.start()
        } catch let error as NSError {
            print(
                "Got an error starting audioEngine: \(error.domain), \(error)")
        }
        print("started")
    }

    private func stopRecorder() {
        audioEngine?.stop()
        print("stopped")
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 56;
	objects = {

/* Begin PBXBuildFile section */
		C924F32E29DDAC0B00A440A5 /* SherpaOnnxApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F32D29DDAC0B00A440A5 /* SherpaOnnxApp.swift */; };
		C924F33029DDAC0B00A440A5 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F32F29DDAC0B00A440A5 /* ContentView.swift */; };
		C924F33229DDAC0D00A440A5 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C924F33129DDAC0D00A440A5 /* Assets.xcassets */; };
		C924F33529DDAC0D00A440A5 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C924F33429DDAC0D00A440A5 /* Preview Assets.xcassets */; };
		C924F33F29DDAC0D00A440A5 /* SherpaOnnxTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F33E29DDAC0D00A440A5 /* SherpaOnnxTests.swift */; };
		C924F34929DDAC0D00A440A5 /* SherpaOnnxUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F34829DDAC0D00A440A5 /* SherpaOnnxUITests.swift */; };
		C924F34B29DDAC0D00A440A5 /* SherpaOnnxUITestsLaunchTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F34A29DDAC0D00A440A5 /* SherpaOnnxUITestsLaunchTests.swift */; };
		C924F35929DDACED00A440A5 /* SherpaOnnx.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F35829DDACED00A440A5 /* SherpaOnnx.swift */; };
		C924F35C29DDAE4000A440A5 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C924F35B29DDAE4000A440A5 /* sherpa-onnx.xcframework */; };
		C924F35E29DDAE8200A440A5 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F35D29DDAE8200A440A5 /* Model.swift */; };
		C924F36029DDB05D00A440A5 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C924F35F29DDB05D00A440A5 /* onnxruntime.xcframework */; };
		C924F36229DDB15D00A440A5 /* Extension.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F36129DDB15D00A440A5 /* Extension.swift */; };
		C924F36429DDB1D500A440A5 /* SherpaOnnxViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = C924F36329DDB1D500A440A5 /* SherpaOnnxViewModel.swift */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
		C924F33B29DDAC0D00A440A5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = C924F32229DDAC0B00A440A5 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = C924F32929DDAC0B00A440A5;
			remoteInfo = SherpaOnnx;
		};
		C924F34529DDAC0D00A440A5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = C924F32229DDAC0B00A440A5 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = C924F32929DDAC0B00A440A5;
			remoteInfo = SherpaOnnx;
		};
/* End PBXContainerItemProxy section */

/* Begin PBXFileReference section */
		C924F32A29DDAC0B00A440A5 /* SherpaOnnx.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx.app; sourceTree = BUILT_PRODUCTS_DIR; };
		C924F32D29DDAC0B00A440A5 /* SherpaOnnxApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxApp.swift; sourceTree = "<group>"; };
		C924F32F29DDAC0B00A440A5 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
		C924F33129DDAC0D00A440A5 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
		C924F33429DDAC0D00A440A5 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
		C924F33A29DDAC0D00A440A5 /* SherpaOnnxTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = SherpaOnnxTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		C924F33E29DDAC0D00A440A5 /* SherpaOnnxTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxTests.swift; sourceTree = "<group>"; };
		C924F34429DDAC0D00A440A5 /* SherpaOnnxUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = SherpaOnnxUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		C924F34829DDAC0D00A440A5 /* SherpaOnnxUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxUITests.swift; sourceTree = "<group>"; };
		C924F34A29DDAC0D00A440A5 /* SherpaOnnxUITestsLaunchTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxUITestsLaunchTests.swift; sourceTree = "<group>"; };
		C924F35729DDACED00A440A5 /* SherpaOnnx-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "SherpaOnnx-Bridging-Header.h"; path = "../../../swift-api-examples/SherpaOnnx-Bridging-Header.h"; sourceTree = "<group>"; };
		C924F35829DDACED00A440A5 /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = "<group>"; };
		C924F35B29DDAE4000A440A5 /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = "<group>"; };
		C924F35D29DDAE8200A440A5 /* Model.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Model.swift; sourceTree = "<group>"; };
		C924F35F29DDB05D00A440A5 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/onnxruntime.xcframework"; sourceTree = "<group>"; };
		C924F36129DDB15D00A440A5 /* Extension.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Extension.swift; sourceTree = "<group>"; };
		C924F36329DDB1D500A440A5 /* SherpaOnnxViewModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SherpaOnnxViewModel.swift; sourceTree = "<group>"; };
		DEFC34EE2BBA8AD100E174E9 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		C924F32729DDAC0B00A440A5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C924F36029DDB05D00A440A5 /* onnxruntime.xcframework in Frameworks */,
				C924F35C29DDAE4000A440A5 /* sherpa-onnx.xcframework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C924F33729DDAC0D00A440A5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C924F34129DDAC0D00A440A5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		C924F32129DDAC0B00A440A5 = {
			isa = PBXGroup;
			children = (
				C924F32C29DDAC0B00A440A5 /* SherpaOnnx */,
				C924F33D29DDAC0D00A440A5 /* SherpaOnnxTests */,
				C924F34729DDAC0D00A440A5 /* SherpaOnnxUITests */,
				C924F32B29DDAC0B00A440A5 /* Products */,
				C924F35A29DDAE3F00A440A5 /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		C924F32B29DDAC0B00A440A5 /* Products */ = {
			isa = PBXGroup;
			children = (
				C924F32A29DDAC0B00A440A5 /* SherpaOnnx.app */,
				C924F33A29DDAC0D00A440A5 /* SherpaOnnxTests.xctest */,
				C924F34429DDAC0D00A440A5 /* SherpaOnnxUITests.xctest */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		C924F32C29DDAC0B00A440A5 /* SherpaOnnx */ = {
			isa = PBXGroup;
			children = (
				DEFC34EE2BBA8AD100E174E9 /* Info.plist */,
				C924F36329DDB1D500A440A5 /* SherpaOnnxViewModel.swift */,
				C924F36129DDB15D00A440A5 /* Extension.swift */,
				C924F35D29DDAE8200A440A5 /* Model.swift */,
				C924F35729DDACED00A440A5 /* SherpaOnnx-Bridging-Header.h */,
				C924F35829DDACED00A440A5 /* SherpaOnnx.swift */,
				C924F32D29DDAC0B00A440A5 /* SherpaOnnxApp.swift */,
				C924F32F29DDAC0B00A440A5 /* ContentView.swift */,
				C924F33129DDAC0D00A440A5 /* Assets.xcassets */,
				C924F33329DDAC0D00A440A5 /* Preview Content */,
			);
			path = SherpaOnnx;
			sourceTree = "<group>";
		};
		C924F33329DDAC0D00A440A5 /* Preview Content */ = {
			isa = PBXGroup;
			children = (
				C924F33429DDAC0D00A440A5 /* Preview Assets.xcassets */,
			);
			path = "Preview Content";
			sourceTree = "<group>";
		};
		C924F33D29DDAC0D00A440A5 /* SherpaOnnxTests */ = {
			isa = PBXGroup;
			children = (
				C924F33E29DDAC0D00A440A5 /* SherpaOnnxTests.swift */,
			);
			path = SherpaOnnxTests;
			sourceTree = "<group>";
		};
		C924F34729DDAC0D00A440A5 /* SherpaOnnxUITests */ = {
			isa = PBXGroup;
			children = (
				C924F34829DDAC0D00A440A5 /* SherpaOnnxUITests.swift */,
				C924F34A29DDAC0D00A440A5 /* SherpaOnnxUITestsLaunchTests.swift */,
			);
			path = SherpaOnnxUITests;
			sourceTree = "<group>";
		};
		C924F35A29DDAE3F00A440A5 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
				C924F35F29DDB05D00A440A5 /* onnxruntime.xcframework */,
				C924F35B29DDAE4000A440A5 /* sherpa-onnx.xcframework */,
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		C924F32929DDAC0B00A440A5 /* SherpaOnnx */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = C924F34E29DDAC0D00A440A5 /* Build configuration list for PBXNativeTarget "SherpaOnnx" */;
			buildPhases = (
				C924F32629DDAC0B00A440A5 /* Sources */,
				C924F32729DDAC0B00A440A5 /* Frameworks */,
				C924F32829DDAC0B00A440A5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = SherpaOnnx;
			productName = SherpaOnnx;
			productReference = C924F32A29DDAC0B00A440A5 /* SherpaOnnx.app */;
			productType = "com.apple.product-type.application";
		};
		C924F33929DDAC0D00A440A5 /* SherpaOnnxTests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = C924F35129DDAC0D00A440A5 /* Build configuration list for PBXNativeTarget "SherpaOnnxTests" */;
			buildPhases = (
				C924F33629DDAC0D00A440A5 /* Sources */,
				C924F33729DDAC0D00A440A5 /* Frameworks */,
				C924F33829DDAC0D00A440A5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				C924F33C29DDAC0D00A440A5 /* PBXTargetDependency */,
			);
			name = SherpaOnnxTests;
			productName = SherpaOnnxTests;
			productReference = C924F33A29DDAC0D00A440A5 /* SherpaOnnxTests.xctest */;
			productType = "com.apple.product-type.bundle.unit-test";
		};
		C924F34329DDAC0D00A440A5 /* SherpaOnnxUITests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = C924F35429DDAC0D00A440A5 /* Build configuration list for PBXNativeTarget "SherpaOnnxUITests" */;
			buildPhases = (
				C924F34029DDAC0D00A440A5 /* Sources */,
				C924F34129DDAC0D00A440A5 /* Frameworks */,
				C924F34229DDAC0D00A440A5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				C924F34629DDAC0D00A440A5 /* PBXTargetDependency */,
			);
			name = SherpaOnnxUITests;
			productName = SherpaOnnxUITests;
			productReference = C924F34429DDAC0D00A440A5 /* SherpaOnnxUITests.xctest */;
			productType = "com.apple.product-type.bundle.ui-testing";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		C924F32229DDAC0B00A440A5 /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = 1;
				LastSwiftUpdateCheck = 1420;
				LastUpgradeCheck = 1420;
				TargetAttributes = {
					C924F32929DDAC0B00A440A5 = {
						CreatedOnToolsVersion = 14.2;
					};
					C924F33929DDAC0D00A440A5 = {
						CreatedOnToolsVersion = 14.2;
						TestTargetID = C924F32929DDAC0B00A440A5;
					};
					C924F34329DDAC0D00A440A5 = {
						CreatedOnToolsVersion = 14.2;
						TestTargetID = C924F32929DDAC0B00A440A5;
					};
				};
			};
			buildConfigurationList = C924F32529DDAC0B00A440A5 /* Build configuration list for PBXProject "SherpaOnnx" */;
			compatibilityVersion = "Xcode 14.0";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = C924F32129DDAC0B00A440A5;
			productRefGroup = C924F32B29DDAC0B00A440A5 /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				C924F32929DDAC0B00A440A5 /* SherpaOnnx */,
				C924F33929DDAC0D00A440A5 /* SherpaOnnxTests */,
				C924F34329DDAC0D00A440A5 /* SherpaOnnxUITests */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		C924F32829DDAC0B00A440A5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C924F33529DDAC0D00A440A5 /* Preview Assets.xcassets in Resources */,
				C924F33229DDAC0D00A440A5 /* Assets.xcassets in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C924F33829DDAC0D00A440A5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C924F34229DDAC0D00A440A5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		C924F32629DDAC0B00A440A5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C924F36229DDB15D00A440A5 /* Extension.swift in Sources */,
				C924F33029DDAC0B00A440A5 /* ContentView.swift in Sources */,
				C924F35929DDACED00A440A5 /* SherpaOnnx.swift in Sources */,
				C924F32E29DDAC0B00A440A5 /* SherpaOnnxApp.swift in Sources */,
				C924F36429DDB1D500A440A5 /* SherpaOnnxViewModel.swift in Sources */,
				C924F35E29DDAE8200A440A5 /* Model.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C924F33629DDAC0D00A440A5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C924F33F29DDAC0D00A440A5 /* SherpaOnnxTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		C924F34029DDAC0D00A440A5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C924F34B29DDAC0D00A440A5 /* SherpaOnnxUITestsLaunchTests.swift in Sources */,
				C924F34929DDAC0D00A440A5 /* SherpaOnnxUITests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
		C924F33C29DDAC0D00A440A5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = C924F32929DDAC0B00A440A5 /* SherpaOnnx */;
			targetProxy = C924F33B29DDAC0D00A440A5 /* PBXContainerItemProxy */;
		};
		C924F34629DDAC0D00A440A5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = C924F32929DDAC0B00A440A5 /* SherpaOnnx */;
			targetProxy = C924F34529DDAC0D00A440A5 /* PBXContainerItemProxy */;
		};
/* End PBXTargetDependency section */

/* Begin XCBuildConfiguration section */
		C924F34C29DDAC0D00A440A5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
				MTL_FAST_MATH = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = iphoneos;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		C924F34D29DDAC0D00A440A5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MTL_ENABLE_DEBUG_INFO = NO;
				MTL_FAST_MATH = YES;
				SDKROOT = iphoneos;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		C924F34F29DDAC0D00A440A5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnx/Preview Content\"";
				DEVELOPMENT_TEAM = "";
				ENABLE_PREVIEWS = YES;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_FILE = SherpaOnnx/Info.plist;
				INFOPLIST_KEY_NSMicrophoneUsageDescription = "Use microphone to record voice";
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnx";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		C924F35029DDAC0D00A440A5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnx/Preview Content\"";
				DEVELOPMENT_TEAM = "";
				ENABLE_PREVIEWS = YES;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_FILE = SherpaOnnx/Info.plist;
				INFOPLIST_KEY_NSMicrophoneUsageDescription = "Use microphone to record voice";
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnx";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Release;
		};
		C924F35229DDAC0D00A440A5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxTests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/SherpaOnnx.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/SherpaOnnx";
			};
			name = Debug;
		};
		C924F35329DDAC0D00A440A5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxTests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/SherpaOnnx.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/SherpaOnnx";
			};
			name = Release;
		};
		C924F35529DDAC0D00A440A5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxUITests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_TARGET_NAME = SherpaOnnx;
			};
			name = Debug;
		};
		C924F35629DDAC0D00A440A5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxUITests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_TARGET_NAME = SherpaOnnx;
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		C924F32529DDAC0B00A440A5 /* Build configuration list for PBXProject "SherpaOnnx" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C924F34C29DDAC0D00A440A5 /* Debug */,
				C924F34D29DDAC0D00A440A5 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		C924F34E29DDAC0D00A440A5 /* Build configuration list for PBXNativeTarget "SherpaOnnx" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C924F34F29DDAC0D00A440A5 /* Debug */,
				C924F35029DDAC0D00A440A5 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		C924F35129DDAC0D00A440A5 /* Build configuration list for PBXNativeTarget "SherpaOnnxTests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C924F35229DDAC0D00A440A5 /* Debug */,
				C924F35329DDAC0D00A440A5 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		C924F35429DDAC0D00A440A5 /* Build configuration list for PBXNativeTarget "SherpaOnnxUITests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C924F35529DDAC0D00A440A5 /* Debug */,
				C924F35629DDAC0D00A440A5 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = C924F32229DDAC0B00A440A5 /* Project object */;
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:">
   </FileRef>
</Workspace>


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnxTests/SherpaOnnxTests.swift
================================================
//
//  SherpaOnnxTests.swift
//  SherpaOnnxTests
//
//  Created by fangjun on 2023/4/5.
//

import XCTest
@testable import SherpaOnnx

final class SherpaOnnxTests: XCTestCase {

    override func setUpWithError() throws {
        // Put setup code here. This method is called before the invocation of each test method in the class.
    }

    override func tearDownWithError() throws {
        // Put teardown code here. This method is called after the invocation of each test method in the class.
    }

    func testExample() throws {
        // This is an example of a functional test case.
        // Use XCTAssert and related functions to verify your tests produce the correct results.
        // Any test you write for XCTest can be annotated as throws and async.
        // Mark your test throws to produce an unexpected failure when your test encounters an uncaught error.
        // Mark your test async to allow awaiting for asynchronous code to complete. Check the results with assertions afterwards.
    }

    func testPerformanceExample() throws {
        // This is an example of a performance test case.
        self.measure {
            // Put the code you want to measure the time of here.
        }
    }

}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnxUITests/SherpaOnnxUITests.swift
================================================
//
//  SherpaOnnxUITests.swift
//  SherpaOnnxUITests
//
//  Created by fangjun on 2023/4/5.
//

import XCTest

final class SherpaOnnxUITests: XCTestCase {

    override func setUpWithError() throws {
        // Put setup code here. This method is called before the invocation of each test method in the class.

        // In UI tests it is usually best to stop immediately when a failure occurs.
        continueAfterFailure = false

        // In UI tests it’s important to set the initial state - such as interface orientation - required for your tests before they run. The setUp method is a good place to do this.
    }

    override func tearDownWithError() throws {
        // Put teardown code here. This method is called after the invocation of each test method in the class.
    }

    func testExample() throws {
        // UI tests must launch the application that they test.
        let app = XCUIApplication()
        app.launch()

        // Use XCTAssert and related functions to verify your tests produce the correct results.
    }

    func testLaunchPerformance() throws {
        if #available(macOS 10.15, iOS 13.0, tvOS 13.0, watchOS 7.0, *) {
            // This measures how long it takes to launch your application.
            measure(metrics: [XCTApplicationLaunchMetric()]) {
                XCUIApplication().launch()
            }
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx/SherpaOnnxUITests/SherpaOnnxUITestsLaunchTests.swift
================================================
//
//  SherpaOnnxUITestsLaunchTests.swift
//  SherpaOnnxUITests
//
//  Created by fangjun on 2023/4/5.
//

import XCTest

final class SherpaOnnxUITestsLaunchTests: XCTestCase {

    override class var runsForEachTargetApplicationUIConfiguration: Bool {
        true
    }

    override func setUpWithError() throws {
        continueAfterFailure = false
    }

    func testLaunch() throws {
        let app = XCUIApplication()
        app.launch()

        // Insert steps here to perform after app launch but before taking a screenshot,
        // such as logging into a test account or navigating somewhere in the app

        let attachment = XCTAttachment(screenshot: app.screenshot())
        attachment.name = "Launch Screen"
        attachment.lifetime = .keepAlways
        add(attachment)
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/Assets.xcassets/AccentColor.colorset/Contents.json
================================================
{
  "colors" : [
    {
      "idiom" : "universal"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "filename" : "k2-1024x1024.png",
      "idiom" : "universal",
      "platform" : "ios",
      "size" : "1024x1024"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/ContentView.swift
================================================
//
//  ContentView.swift
//  SherpaOnnx2Pass
//
//  Created by fangjun on 2023/9/11.
//

import SwiftUI

struct ContentView: View {
    @StateObject var sherpaOnnxVM = SherpaOnnxViewModel()

    var body: some View {
        VStack {
            Text("ASR with Next-gen Kaldi")
                .font(.title)
            if sherpaOnnxVM.status == .stop {
                Text("See https://github.com/k2-fsa/sherpa-onnx")
                Text("Press the Start button to run!")
            }
            ScrollView(.vertical, showsIndicators: true) {
                HStack {
                    Text(sherpaOnnxVM.subtitles)
                    Spacer()
                }
            }
            Spacer()
            Button {
                toggleRecorder()
            } label: {
                Text(sherpaOnnxVM.status == .stop ? "Start" : "Stop")
            }
        }
        .padding()
    }

    private func toggleRecorder() {
        sherpaOnnxVM.toggleRecorder()
    }
}

struct ContentView_Previews: PreviewProvider {
    static var previews: some View {
        ContentView()
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/Extension.swift
================================================
//
//  Extension.swift
//  SherpaOnnx
//
//  Created by knight on 2023/4/5.
//

import AVFoundation

extension AudioBuffer {
    func array() -> [Float] {
        return Array(UnsafeBufferPointer(self))
    }
}

extension AVAudioPCMBuffer {
    func array() -> [Float] {
        return self.audioBufferList.pointee.mBuffers.array()
    }
}

extension TimeInterval {
  var hourMinuteSecondMS: String {
    String(format: "%d:%02d:%02d,%03d", hour, minute, second, millisecond)
  }

  var hour: Int {
    Int((self / 3600).truncatingRemainder(dividingBy: 3600))
  }
  var minute: Int {
    Int((self / 60).truncatingRemainder(dividingBy: 60))
  }
  var second: Int {
    Int(truncatingRemainder(dividingBy: 60))
  }
  var millisecond: Int {
    Int((self * 1000).truncatingRemainder(dividingBy: 1000))
  }
}

extension String {
  var fileURL: URL {
    return URL(fileURLWithPath: self)
  }
  var pathExtension: String {
    return fileURL.pathExtension
  }
  var lastPathComponent: String {
    return fileURL.lastPathComponent
  }
  var stringByDeletingPathExtension: String {
    return fileURL.deletingPathExtension().path
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for Next-gen Kaldi to work</string>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/Model.swift
================================================
import Foundation

func getResource(_ forResource: String, _ ofType: String) -> String {
  let path = Bundle.main.path(forResource: forResource, ofType: ofType)
  precondition(
    path != nil,
    "\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
      + "  Build Phases -> Copy Bundle Resources\n" + "to add it!"
  )
  return path!
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models

/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
func getBilingualStreamingZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder-epoch-99-avg-1.int8", "onnx")
  let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
  let joiner = getResource("joiner-epoch-99-avg-1.int8", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner),
    numThreads: 1,
    modelType: "zipformer"
  )
}

/// csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23-chinese

func getStreamingZh14MZipformer20230223() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder-epoch-99-avg-1.int8", "onnx")
  let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
  let joiner = getResource("joiner-epoch-99-avg-1.int8", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner),
    numThreads: 1,
    modelType: "zipformer"
  )
}

/// csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english

func getStreamingEn20MZipformer20230217() -> SherpaOnnxOnlineModelConfig {
  let encoder = getResource("encoder-epoch-99-avg-1.int8", "onnx")
  let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
  let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
  let tokens = getResource("tokens", "txt")

  return sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner),
    numThreads: 1,
    modelType: "zipformer"
  )
}

/// ========================================
///   Non-streaming models
/// ========================================

/// csukuangfj/sherpa-onnx-paraformer-zh-2023-09-14 (Chinese)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-09-14-chinese
func getNonStreamingZhParaformer20230914() -> SherpaOnnxOfflineModelConfig {
  let model = getResource("model.int8", "onnx")
  let tokens = getResource("paraformer-tokens", "txt")

  return sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    paraformer: sherpaOnnxOfflineParaformerModelConfig(
      model: model),
    numThreads: 1,
    modelType: "paraformer"
  )
}

// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
// English, int8 encoder and decoder
func getNonStreamingWhisperTinyEn() -> SherpaOnnxOfflineModelConfig {
  let encoder = getResource("tiny.en-encoder.int8", "onnx")
  let decoder = getResource("tiny.en-decoder.int8", "onnx")
  let tokens = getResource("tiny.en-tokens", "txt")

  return sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    whisper: sherpaOnnxOfflineWhisperModelConfig(
      encoder: encoder,
      decoder: decoder
    ),
    numThreads: 1,
    modelType: "whisper"
  )
}

// icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English)
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english

func getNonStreamingEnZipformer20230504() -> SherpaOnnxOfflineModelConfig {
  let encoder = getResource("encoder-epoch-30-avg-4.int8", "onnx")
  let decoder = getResource("decoder-epoch-30-avg-4", "onnx")
  let joiner = getResource("joiner-epoch-30-avg-4", "onnx")
  let tokens = getResource("non-streaming-zipformer-tokens", "txt")

  return sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    transducer: sherpaOnnxOfflineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner),
    numThreads: 1,
    modelType: "zipformer"
  )
}

/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to add more models if you need


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/Preview Content/Preview Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/SherpaOnnx2PassApp.swift
================================================
//
//  SherpaOnnx2PassApp.swift
//  SherpaOnnx2Pass
//
//  Created by fangjun on 2023/9/11.
//

import SwiftUI

@main
struct SherpaOnnx2PassApp: App {
    var body: some Scene {
        WindowGroup {
            ContentView()
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass/SherpaOnnxViewModel.swift
================================================
//
//  SherpaOnnxViewModel.swift
//  SherpaOnnx
//
//  Created by knight on 2023/4/5.
//

import Foundation
import AVFoundation

enum Status {
    case stop
    case recording
}

class SherpaOnnxViewModel: ObservableObject {
    @Published var status: Status = .stop
    @Published var subtitles: String = ""

    var sentences: [String] = []
    var samplesBuffer = [[Float]] ()

    var audioEngine: AVAudioEngine? = nil
    var recognizer: SherpaOnnxRecognizer! = nil
    var offlineRecognizer: SherpaOnnxOfflineRecognizer! = nil

    var lastSentence: String = ""
    // let maxSentence: Int = 10 // for Chinese
    let maxSentence: Int = 6 // for English

    var results: String {
        if sentences.isEmpty && lastSentence.isEmpty {
            return ""
        }
        if sentences.isEmpty {
            return "0: \(lastSentence.lowercased())"
        }

        let start = max(sentences.count - maxSentence, 0)
        if lastSentence.isEmpty {
            return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
                .joined(separator: "\n")
        } else {
            return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
                .joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
        }
    }

    func updateLabel() {
        DispatchQueue.main.async {
            self.subtitles = self.results
        }
    }

    init() {
        initRecognizer()
        initOfflineRecognizer()
        initRecorder()
    }

    private func initRecognizer() {
        // Please select one model that is best suitable for you.
        //
        // You can also modify Model.swift to add new pre-trained models from
        // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
        // let modelConfig = getBilingualStreamingZhEnZipformer20230220()
        /* let modelConfig = getStreamingZh14MZipformer20230223() */

        let modelConfig = getStreamingEn20MZipformer20230217()

        let featConfig = sherpaOnnxFeatureConfig(
            sampleRate: 16000,
            featureDim: 80)

        var config = sherpaOnnxOnlineRecognizerConfig(
            featConfig: featConfig,
            modelConfig: modelConfig,
            enableEndpoint: true,
            rule1MinTrailingSilence: 2.4,

            // rule2MinTrailingSilence: 1.2, // for Chinese

            rule2MinTrailingSilence: 0.5, // for English

            rule3MinUtteranceLength: 30,
            decodingMethod: "greedy_search",
            maxActivePaths: 4
        )
        recognizer = SherpaOnnxRecognizer(config: &config)
    }

    private func initOfflineRecognizer() {
        // let modelConfig = getNonStreamingZhParaformer20230914()
        let modelConfig = getNonStreamingWhisperTinyEn()

        // let modelConfig = getNonStreamingEnZipformer20230504()

        let featConfig = sherpaOnnxFeatureConfig(
            sampleRate: 16000,
            featureDim: 80)

        var config = sherpaOnnxOfflineRecognizerConfig(
            featConfig: featConfig,
            modelConfig: modelConfig,
            decodingMethod: "greedy_search",
            maxActivePaths: 4
        )
        offlineRecognizer = SherpaOnnxOfflineRecognizer(config: &config)
    }

    private func initRecorder() {
        print("init recorder")
        audioEngine = AVAudioEngine()
        let inputNode = self.audioEngine?.inputNode
        let bus = 0
        let inputFormat = inputNode?.outputFormat(forBus: bus)
        let outputFormat = AVAudioFormat(
            commonFormat: .pcmFormatFloat32,
            sampleRate: 16000, channels: 1,
            interleaved: false)!

        let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!

        inputNode!.installTap(
            onBus: bus,
            bufferSize: 1024,
            format: inputFormat
        ) {
            (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            var newBufferAvailable = true

            let inputCallback: AVAudioConverterInputBlock = {
                inNumPackets, outStatus in
                if newBufferAvailable {
                    outStatus.pointee = .haveData
                    newBufferAvailable = false

                    return buffer
                } else {
                    outStatus.pointee = .noDataNow
                    return nil
                }
            }

            let convertedBuffer = AVAudioPCMBuffer(
                pcmFormat: outputFormat,
                frameCapacity:
                    AVAudioFrameCount(outputFormat.sampleRate)
                * buffer.frameLength
                / AVAudioFrameCount(buffer.format.sampleRate))!

            var error: NSError?
            let _ = converter.convert(
                to: convertedBuffer,
                error: &error, withInputFrom: inputCallback)

            // TODO(fangjun): Handle status != haveData

            let array = convertedBuffer.array()
            if !array.isEmpty {
                self.samplesBuffer.append(array)

                self.recognizer.acceptWaveform(samples: array)
                while (self.recognizer.isReady()){
                    self.recognizer.decode()
                }
                let isEndpoint = self.recognizer.isEndpoint()
                let text = self.recognizer.getResult().text

                if !text.isEmpty && self.lastSentence != text {
                    self.lastSentence = text
                    self.updateLabel()
                    print(text)
                }

                if isEndpoint{
                    if !text.isEmpty {
                        // Invoke offline recognizer
                        var numSamples: Int = 0
                        for a in self.samplesBuffer {
                          numSamples += a.count
                        }

                        var samples: [Float] = Array(repeating: 0, count: numSamples)
                        var i = 0
                        for a in self.samplesBuffer {
                            for s in a {
                                samples[i] = s
                                i += 1
                            }
                        }

                        // let num = 12000 // For Chinese
                        let num = 10000 // For English
                        self.lastSentence = self.offlineRecognizer.decode(samples: Array(samples[0..<samples.count-num])).text

                        let tmp = self.lastSentence
                        self.lastSentence = ""
                        self.sentences.append(tmp)

                        self.updateLabel()

                        i = 0
                        if samples.count > num {
                            i = samples.count - num
                        }
                        var tail: [Float] = Array(repeating: 0, count: samples.count - i)

                        for k in 0  ... samples.count - i - 1 {
                            tail[k] = samples[i+k];
                        }

                        self.samplesBuffer = [[Float]]()
                        self.samplesBuffer.append(tail)
                    } else {
                        self.samplesBuffer = [[Float]]()
                    }
                    self.recognizer.reset()
                }
            }
        }
    }

    public func toggleRecorder() {
        if status == .stop {
            startRecorder()
            status = .recording
        } else {
            stopRecorder()
            status = .stop
        }
    }

    private func startRecorder() {
        lastSentence = ""
        sentences = []
        samplesBuffer = [[Float]] ()
        updateLabel()

        do {
            try self.audioEngine?.start()
        } catch let error as NSError {
            print("Got an error starting audioEngine: \(error.domain), \(error)")
        }
        print("started")
    }

    private func stopRecorder() {
        audioEngine?.stop()
        print("stopped")
    }
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 56;
	objects = {

/* Begin PBXBuildFile section */
		C98126502BFEED7D000AD7AA /* Info.plist in Resources */ = {isa = PBXBuildFile; fileRef = C981264F2BFEED7C000AD7AA /* Info.plist */; };
		C9A2587D2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2587C2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift */; };
		C9A2587F2AAEFFF100E555CA /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2587E2AAEFFF100E555CA /* ContentView.swift */; };
		C9A258812AAEFFF200E555CA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C9A258802AAEFFF200E555CA /* Assets.xcassets */; };
		C9A258842AAEFFF200E555CA /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C9A258832AAEFFF200E555CA /* Preview Assets.xcassets */; };
		C9A2588E2AAF039D00E555CA /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2588A2AAF039D00E555CA /* Model.swift */; };
		C9A258902AAF039D00E555CA /* SherpaOnnxViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2588C2AAF039D00E555CA /* SherpaOnnxViewModel.swift */; };
		C9A258912AAF039D00E555CA /* Extension.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2588D2AAF039D00E555CA /* Extension.swift */; };
		C9A258932AAF057E00E555CA /* SherpaOnnx.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A258922AAF057E00E555CA /* SherpaOnnx.swift */; };
		C9A258962AAF05D100E555CA /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C9A258952AAF05D100E555CA /* sherpa-onnx.xcframework */; };
		C9A258982AAF05E400E555CA /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C9A258972AAF05E400E555CA /* onnxruntime.xcframework */; };
/* End PBXBuildFile section */

/* Begin PBXFileReference section */
		C981264F2BFEED7C000AD7AA /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
		C9A258792AAEFFF100E555CA /* SherpaOnnx2Pass.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx2Pass.app; sourceTree = BUILT_PRODUCTS_DIR; };
		C9A2587C2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnx2PassApp.swift; sourceTree = "<group>"; };
		C9A2587E2AAEFFF100E555CA /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
		C9A258802AAEFFF200E555CA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
		C9A258832AAEFFF200E555CA /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
		C9A2588A2AAF039D00E555CA /* Model.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Model.swift; sourceTree = "<group>"; };
		C9A2588C2AAF039D00E555CA /* SherpaOnnxViewModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SherpaOnnxViewModel.swift; sourceTree = "<group>"; };
		C9A2588D2AAF039D00E555CA /* Extension.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Extension.swift; sourceTree = "<group>"; };
		C9A258922AAF057E00E555CA /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = "<group>"; };
		C9A258952AAF05D100E555CA /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = "<group>"; };
		C9A258972AAF05E400E555CA /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		C9A258762AAEFFF100E555CA /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C9A258982AAF05E400E555CA /* onnxruntime.xcframework in Frameworks */,
				C9A258962AAF05D100E555CA /* sherpa-onnx.xcframework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		C9A258702AAEFFF100E555CA = {
			isa = PBXGroup;
			children = (
				C9A2587B2AAEFFF100E555CA /* SherpaOnnx2Pass */,
				C9A2587A2AAEFFF100E555CA /* Products */,
				C9A258942AAF05D100E555CA /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		C9A2587A2AAEFFF100E555CA /* Products */ = {
			isa = PBXGroup;
			children = (
				C9A258792AAEFFF100E555CA /* SherpaOnnx2Pass.app */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		C9A2587B2AAEFFF100E555CA /* SherpaOnnx2Pass */ = {
			isa = PBXGroup;
			children = (
				C981264F2BFEED7C000AD7AA /* Info.plist */,
				C9A258922AAF057E00E555CA /* SherpaOnnx.swift */,
				C9A2588D2AAF039D00E555CA /* Extension.swift */,
				C9A2588A2AAF039D00E555CA /* Model.swift */,
				C9A2588C2AAF039D00E555CA /* SherpaOnnxViewModel.swift */,
				C9A2587C2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift */,
				C9A2587E2AAEFFF100E555CA /* ContentView.swift */,
				C9A258802AAEFFF200E555CA /* Assets.xcassets */,
				C9A258822AAEFFF200E555CA /* Preview Content */,
			);
			path = SherpaOnnx2Pass;
			sourceTree = "<group>";
		};
		C9A258822AAEFFF200E555CA /* Preview Content */ = {
			isa = PBXGroup;
			children = (
				C9A258832AAEFFF200E555CA /* Preview Assets.xcassets */,
			);
			path = "Preview Content";
			sourceTree = "<group>";
		};
		C9A258942AAF05D100E555CA /* Frameworks */ = {
			isa = PBXGroup;
			children = (
				C9A258972AAF05E400E555CA /* onnxruntime.xcframework */,
				C9A258952AAF05D100E555CA /* sherpa-onnx.xcframework */,
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		C9A258782AAEFFF100E555CA /* SherpaOnnx2Pass */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = C9A258872AAEFFF200E555CA /* Build configuration list for PBXNativeTarget "SherpaOnnx2Pass" */;
			buildPhases = (
				C9A258752AAEFFF100E555CA /* Sources */,
				C9A258762AAEFFF100E555CA /* Frameworks */,
				C9A258772AAEFFF100E555CA /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = SherpaOnnx2Pass;
			productName = SherpaOnnx2Pass;
			productReference = C9A258792AAEFFF100E555CA /* SherpaOnnx2Pass.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		C9A258712AAEFFF100E555CA /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = 1;
				LastSwiftUpdateCheck = 1420;
				LastUpgradeCheck = 1420;
				TargetAttributes = {
					C9A258782AAEFFF100E555CA = {
						CreatedOnToolsVersion = 14.2;
					};
				};
			};
			buildConfigurationList = C9A258742AAEFFF100E555CA /* Build configuration list for PBXProject "SherpaOnnx2Pass" */;
			compatibilityVersion = "Xcode 14.0";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = C9A258702AAEFFF100E555CA;
			productRefGroup = C9A2587A2AAEFFF100E555CA /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				C9A258782AAEFFF100E555CA /* SherpaOnnx2Pass */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		C9A258772AAEFFF100E555CA /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C9A258842AAEFFF200E555CA /* Preview Assets.xcassets in Resources */,
				C9A258812AAEFFF200E555CA /* Assets.xcassets in Resources */,
				C98126502BFEED7D000AD7AA /* Info.plist in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		C9A258752AAEFFF100E555CA /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C9A2588E2AAF039D00E555CA /* Model.swift in Sources */,
				C9A258902AAF039D00E555CA /* SherpaOnnxViewModel.swift in Sources */,
				C9A258912AAF039D00E555CA /* Extension.swift in Sources */,
				C9A2587F2AAEFFF100E555CA /* ContentView.swift in Sources */,
				C9A258932AAF057E00E555CA /* SherpaOnnx.swift in Sources */,
				C9A2587D2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin XCBuildConfiguration section */
		C9A258852AAEFFF200E555CA /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
				MTL_FAST_MATH = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = iphoneos;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		C9A258862AAEFFF200E555CA /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MTL_ENABLE_DEBUG_INFO = NO;
				MTL_FAST_MATH = YES;
				SDKROOT = iphoneos;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		C9A258882AAEFFF200E555CA /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnx2Pass/Preview Content\"";
				ENABLE_PREVIEWS = YES;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnx2Pass";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		C9A258892AAEFFF200E555CA /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnx2Pass/Preview Content\"";
				ENABLE_PREVIEWS = YES;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnx2Pass";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		C9A258742AAEFFF100E555CA /* Build configuration list for PBXProject "SherpaOnnx2Pass" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C9A258852AAEFFF200E555CA /* Debug */,
				C9A258862AAEFFF200E555CA /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		C9A258872AAEFFF200E555CA /* Build configuration list for PBXNativeTarget "SherpaOnnx2Pass" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C9A258882AAEFFF200E555CA /* Debug */,
				C9A258892AAEFFF200E555CA /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = C9A258712AAEFFF100E555CA /* Project object */;
}


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:">
   </FileRef>
</Workspace>


================================================
FILE: ios-swiftui/SherpaOnnx2Pass/SherpaOnnx2Pass.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/Assets.xcassets/AccentColor.colorset/Contents.json
================================================
{
  "colors" : [
    {
      "idiom" : "universal"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/Assets.xcassets/AppIcon 1.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "filename" : "k2-1024x1024.png",
      "idiom" : "universal",
      "platform" : "ios",
      "size" : "1024x1024"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "idiom" : "universal",
      "platform" : "ios",
      "size" : "1024x1024"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/ContentView.swift
================================================
//
//  ContentView.swift
//  SherpaOnnxLangID
//
//  Created by knight on 2024/4/1.
//

import SwiftUI

struct ContentView: View {
    @StateObject var viewModel = ViewModel()

    var body: some View {
        VStack {
            Text("ASR with Next-gen Kaldi")
                .font(.title)
            if viewModel.status == .stop {
                Text("See https://github.com/k2-fsa/sherpa-onnx")
                Text("Press the Start button to run!")
            }
            if viewModel.status == .recording {
                Text("Stop will show recording language.")
            }
            Spacer()
            Text("Recording language is: \(viewModel.language)")
                .frame(maxWidth: .infinity)
            Spacer()
            Button {
                toggleRecorder()
            } label: {
                Text(viewModel.status == .stop ? "Start" : "Stop")
            }
        }
        .padding()
    }

    private func toggleRecorder() {
        Task {
            await viewModel.toggleRecorder()
        }
    }
}

#Preview {
    ContentView()
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for Next-gen Kaldi to work</string>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/Preview Content/Preview Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/SherpaOnnxLangIDApp.swift
================================================
//
//  SherpaOnnxLangIDApp.swift
//  SherpaOnnxLangID
//
//  Created by knight on 2024/4/1.
//

import SwiftUI

@main
struct SherpaOnnxLangIDApp: App {
    var body: some Scene {
        WindowGroup {
            ContentView()
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID/ViewModel.swift
================================================
//
//  ViewModel.swift
//  SherpaOnnxLangID
//
//  Created by knight on 2024/4/1.
//

import SwiftUI
import AVFoundation

enum Status {
    case stop
    case recording
}

@MainActor
class ViewModel:ObservableObject {
    @Published var status: Status = .stop

    @Published var language: String = ""
    
    var languageIdentifier: SherpaOnnxSpokenLanguageIdentificationWrapper? = nil
    var audioEngine: AVAudioEngine? = nil
    
    var voices: [Float] = []

    init() {
        initRecorder()
        initRecognizer()
    }
    
    private func initRecognizer() {
        var config =  getLanguageIdentificationTiny()
        self.languageIdentifier = SherpaOnnxSpokenLanguageIdentificationWrapper(config: &config)
    }
    
    private func initRecorder() {
        print("init recorder")
        audioEngine = AVAudioEngine()
        let inputNode = self.audioEngine?.inputNode
        let bus = 0
        let inputFormat = inputNode?.outputFormat(forBus: bus)
        let outputFormat = AVAudioFormat(
            commonFormat: .pcmFormatFloat32,
            sampleRate: 16000, channels: 1,
            interleaved: false)!

        let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!

        inputNode!.installTap(
            onBus: bus,
            bufferSize: 1024,
            format: inputFormat
        ) {
            (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            var newBufferAvailable = true

            let inputCallback: AVAudioConverterInputBlock = {
                inNumPackets, outStatus in
                if newBufferAvailable {
                    outStatus.pointee = .haveData
                    newBufferAvailable = false

                    return buffer
                } else {
                    outStatus.pointee = .noDataNow
                    return nil
                }
            }

            let convertedBuffer = AVAudioPCMBuffer(
                pcmFormat: outputFormat,
                frameCapacity:
                    AVAudioFrameCount(outputFormat.sampleRate)
                * buffer.frameLength
                / AVAudioFrameCount(buffer.format.sampleRate))!

            var error: NSError?
            let _ = converter.convert(
                to: convertedBuffer,
                error: &error, withInputFrom: inputCallback)

            // TODO(fangjun): Handle status != haveData

            let array = convertedBuffer.array()
            if !array.isEmpty {
                self.voices.append(contentsOf: array)
            }
        }
    }
    
    public func toggleRecorder() async{
        if status == .stop {
            await startRecorder()
        } else {
            await stopRecorder()
        }
    }

    private func startRecorder() async {
        await MainActor.run {
            self.language = ""
        }
        if !self.voices.isEmpty {
            self.voices = []
        }
        do {
            try self.audioEngine?.start()
            status = .recording
            print("started")
        } catch let error as NSError {
            print("Got an error starting audioEngine: \(error.domain), \(error)")
        }
    }

    private func stopRecorder() async {
        audioEngine?.stop()
        print("stopped, and begin identify language")
        await self.identify()
        status = .stop
    }
    
    private func identify() async {
        let result = self.languageIdentifier?    .decode(samples: self.voices)
        if let language = result?.lang {
            await MainActor.run {
                self.language = language
            }
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 56;
	objects = {

/* Begin PBXBuildFile section */
		C98126522BFEEDB7000AD7AA /* Info.plist in Resources */ = {isa = PBXBuildFile; fileRef = C98126512BFEEDB7000AD7AA /* Info.plist */; };
		DEBB2D762BBAAA3500864EF5 /* SherpaOnnxLangIDApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2D752BBAAA3500864EF5 /* SherpaOnnxLangIDApp.swift */; };
		DEBB2D782BBAAA3500864EF5 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2D772BBAAA3500864EF5 /* ContentView.swift */; };
		DEBB2D7A2BBAAA3600864EF5 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = DEBB2D792BBAAA3600864EF5 /* Assets.xcassets */; };
		DEBB2D7D2BBAAA3600864EF5 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = DEBB2D7C2BBAAA3600864EF5 /* Preview Assets.xcassets */; };
		DEBB2D872BBAAA3600864EF5 /* SherpaOnnxLangIDTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2D862BBAAA3600864EF5 /* SherpaOnnxLangIDTests.swift */; };
		DEBB2D912BBAAA3600864EF5 /* SherpaOnnxLangIDUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2D902BBAAA3600864EF5 /* SherpaOnnxLangIDUITests.swift */; };
		DEBB2D932BBAAA3600864EF5 /* SherpaOnnxLangIDUITestsLaunchTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2D922BBAAA3600864EF5 /* SherpaOnnxLangIDUITestsLaunchTests.swift */; };
		DEBB2DA12BBAAAD800864EF5 /* Extension.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2DA02BBAAAD800864EF5 /* Extension.swift */; };
		DEBB2DA32BBAAAE700864EF5 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2DA22BBAAAE700864EF5 /* Model.swift */; };
		DEBB2DA52BBAAAFD00864EF5 /* ViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2DA42BBAAAFD00864EF5 /* ViewModel.swift */; };
		DEBB2DAC2BBAAC6200864EF5 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DEBB2DAB2BBAAC6200864EF5 /* onnxruntime.xcframework */; };
		DEBB2DAD2BBAAC6200864EF5 /* onnxruntime.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = DEBB2DAB2BBAAC6200864EF5 /* onnxruntime.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
		DEBB2DAF2BBAAC6400864EF5 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DEBB2DA72BBAAC4D00864EF5 /* sherpa-onnx.xcframework */; };
		DEBB2DB02BBAAC6400864EF5 /* sherpa-onnx.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = DEBB2DA72BBAAC4D00864EF5 /* sherpa-onnx.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
		DEBB2DB22BBAAD0000864EF5 /* SherpaOnnx.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEBB2DB12BBAAD0000864EF5 /* SherpaOnnx.swift */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
		DEBB2D832BBAAA3600864EF5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = DEBB2D6A2BBAAA3500864EF5 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = DEBB2D712BBAAA3500864EF5;
			remoteInfo = SherpaOnnxLangID;
		};
		DEBB2D8D2BBAAA3600864EF5 /* PBXContainerItemProxy */ = {
			isa = PBXContainerItemProxy;
			containerPortal = DEBB2D6A2BBAAA3500864EF5 /* Project object */;
			proxyType = 1;
			remoteGlobalIDString = DEBB2D712BBAAA3500864EF5;
			remoteInfo = SherpaOnnxLangID;
		};
/* End PBXContainerItemProxy section */

/* Begin PBXCopyFilesBuildPhase section */
		DEBB2DAE2BBAAC6200864EF5 /* Embed Frameworks */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = "";
			dstSubfolderSpec = 10;
			files = (
				DEBB2DAD2BBAAC6200864EF5 /* onnxruntime.xcframework in Embed Frameworks */,
				DEBB2DB02BBAAC6400864EF5 /* sherpa-onnx.xcframework in Embed Frameworks */,
			);
			name = "Embed Frameworks";
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		C98126512BFEEDB7000AD7AA /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
		DEBB2D722BBAAA3500864EF5 /* SherpaOnnxLangID.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnxLangID.app; sourceTree = BUILT_PRODUCTS_DIR; };
		DEBB2D752BBAAA3500864EF5 /* SherpaOnnxLangIDApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxLangIDApp.swift; sourceTree = "<group>"; };
		DEBB2D772BBAAA3500864EF5 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
		DEBB2D792BBAAA3600864EF5 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
		DEBB2D7C2BBAAA3600864EF5 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
		DEBB2D822BBAAA3600864EF5 /* SherpaOnnxLangIDTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = SherpaOnnxLangIDTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		DEBB2D862BBAAA3600864EF5 /* SherpaOnnxLangIDTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxLangIDTests.swift; sourceTree = "<group>"; };
		DEBB2D8C2BBAAA3600864EF5 /* SherpaOnnxLangIDUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = SherpaOnnxLangIDUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
		DEBB2D902BBAAA3600864EF5 /* SherpaOnnxLangIDUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxLangIDUITests.swift; sourceTree = "<group>"; };
		DEBB2D922BBAAA3600864EF5 /* SherpaOnnxLangIDUITestsLaunchTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxLangIDUITestsLaunchTests.swift; sourceTree = "<group>"; };
		DEBB2D9F2BBAAACD00864EF5 /* SherpaOnnx-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "SherpaOnnx-Bridging-Header.h"; path = "../../../swift-api-examples/SherpaOnnx-Bridging-Header.h"; sourceTree = "<group>"; };
		DEBB2DA02BBAAAD800864EF5 /* Extension.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = Extension.swift; path = ../../SherpaOnnx/SherpaOnnx/Extension.swift; sourceTree = "<group>"; };
		DEBB2DA22BBAAAE700864EF5 /* Model.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = Model.swift; path = ../../SherpaOnnx/SherpaOnnx/Model.swift; sourceTree = "<group>"; };
		DEBB2DA42BBAAAFD00864EF5 /* ViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewModel.swift; sourceTree = "<group>"; };
		DEBB2DA72BBAAC4D00864EF5 /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = "<group>"; };
		DEBB2DAB2BBAAC6200864EF5 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
		DEBB2DB12BBAAD0000864EF5 /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		DEBB2D6F2BBAAA3500864EF5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				DEBB2DAC2BBAAC6200864EF5 /* onnxruntime.xcframework in Frameworks */,
				DEBB2DAF2BBAAC6400864EF5 /* sherpa-onnx.xcframework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		DEBB2D7F2BBAAA3600864EF5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		DEBB2D892BBAAA3600864EF5 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		DEBB2D692BBAAA3500864EF5 = {
			isa = PBXGroup;
			children = (
				DEBB2D742BBAAA3500864EF5 /* SherpaOnnxLangID */,
				DEBB2D852BBAAA3600864EF5 /* SherpaOnnxLangIDTests */,
				DEBB2D8F2BBAAA3600864EF5 /* SherpaOnnxLangIDUITests */,
				DEBB2D732BBAAA3500864EF5 /* Products */,
				DEBB2DA62BBAAC4D00864EF5 /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		DEBB2D732BBAAA3500864EF5 /* Products */ = {
			isa = PBXGroup;
			children = (
				DEBB2D722BBAAA3500864EF5 /* SherpaOnnxLangID.app */,
				DEBB2D822BBAAA3600864EF5 /* SherpaOnnxLangIDTests.xctest */,
				DEBB2D8C2BBAAA3600864EF5 /* SherpaOnnxLangIDUITests.xctest */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		DEBB2D742BBAAA3500864EF5 /* SherpaOnnxLangID */ = {
			isa = PBXGroup;
			children = (
				C98126512BFEEDB7000AD7AA /* Info.plist */,
				DEBB2D752BBAAA3500864EF5 /* SherpaOnnxLangIDApp.swift */,
				DEBB2D772BBAAA3500864EF5 /* ContentView.swift */,
				DEBB2DA42BBAAAFD00864EF5 /* ViewModel.swift */,
				DEBB2D9F2BBAAACD00864EF5 /* SherpaOnnx-Bridging-Header.h */,
				DEBB2DB12BBAAD0000864EF5 /* SherpaOnnx.swift */,
				DEBB2DA02BBAAAD800864EF5 /* Extension.swift */,
				DEBB2DA22BBAAAE700864EF5 /* Model.swift */,
				DEBB2D792BBAAA3600864EF5 /* Assets.xcassets */,
				DEBB2D7B2BBAAA3600864EF5 /* Preview Content */,
			);
			path = SherpaOnnxLangID;
			sourceTree = "<group>";
		};
		DEBB2D7B2BBAAA3600864EF5 /* Preview Content */ = {
			isa = PBXGroup;
			children = (
				DEBB2D7C2BBAAA3600864EF5 /* Preview Assets.xcassets */,
			);
			path = "Preview Content";
			sourceTree = "<group>";
		};
		DEBB2D852BBAAA3600864EF5 /* SherpaOnnxLangIDTests */ = {
			isa = PBXGroup;
			children = (
				DEBB2D862BBAAA3600864EF5 /* SherpaOnnxLangIDTests.swift */,
			);
			path = SherpaOnnxLangIDTests;
			sourceTree = "<group>";
		};
		DEBB2D8F2BBAAA3600864EF5 /* SherpaOnnxLangIDUITests */ = {
			isa = PBXGroup;
			children = (
				DEBB2D902BBAAA3600864EF5 /* SherpaOnnxLangIDUITests.swift */,
				DEBB2D922BBAAA3600864EF5 /* SherpaOnnxLangIDUITestsLaunchTests.swift */,
			);
			path = SherpaOnnxLangIDUITests;
			sourceTree = "<group>";
		};
		DEBB2DA62BBAAC4D00864EF5 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
				DEBB2DAB2BBAAC6200864EF5 /* onnxruntime.xcframework */,
				DEBB2DA72BBAAC4D00864EF5 /* sherpa-onnx.xcframework */,
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		DEBB2D712BBAAA3500864EF5 /* SherpaOnnxLangID */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = DEBB2D962BBAAA3600864EF5 /* Build configuration list for PBXNativeTarget "SherpaOnnxLangID" */;
			buildPhases = (
				DEBB2D6E2BBAAA3500864EF5 /* Sources */,
				DEBB2D6F2BBAAA3500864EF5 /* Frameworks */,
				DEBB2D702BBAAA3500864EF5 /* Resources */,
				DEBB2DAE2BBAAC6200864EF5 /* Embed Frameworks */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = SherpaOnnxLangID;
			productName = SherpaOnnxLangID;
			productReference = DEBB2D722BBAAA3500864EF5 /* SherpaOnnxLangID.app */;
			productType = "com.apple.product-type.application";
		};
		DEBB2D812BBAAA3600864EF5 /* SherpaOnnxLangIDTests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = DEBB2D992BBAAA3600864EF5 /* Build configuration list for PBXNativeTarget "SherpaOnnxLangIDTests" */;
			buildPhases = (
				DEBB2D7E2BBAAA3600864EF5 /* Sources */,
				DEBB2D7F2BBAAA3600864EF5 /* Frameworks */,
				DEBB2D802BBAAA3600864EF5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				DEBB2D842BBAAA3600864EF5 /* PBXTargetDependency */,
			);
			name = SherpaOnnxLangIDTests;
			productName = SherpaOnnxLangIDTests;
			productReference = DEBB2D822BBAAA3600864EF5 /* SherpaOnnxLangIDTests.xctest */;
			productType = "com.apple.product-type.bundle.unit-test";
		};
		DEBB2D8B2BBAAA3600864EF5 /* SherpaOnnxLangIDUITests */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = DEBB2D9C2BBAAA3600864EF5 /* Build configuration list for PBXNativeTarget "SherpaOnnxLangIDUITests" */;
			buildPhases = (
				DEBB2D882BBAAA3600864EF5 /* Sources */,
				DEBB2D892BBAAA3600864EF5 /* Frameworks */,
				DEBB2D8A2BBAAA3600864EF5 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
				DEBB2D8E2BBAAA3600864EF5 /* PBXTargetDependency */,
			);
			name = SherpaOnnxLangIDUITests;
			productName = SherpaOnnxLangIDUITests;
			productReference = DEBB2D8C2BBAAA3600864EF5 /* SherpaOnnxLangIDUITests.xctest */;
			productType = "com.apple.product-type.bundle.ui-testing";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		DEBB2D6A2BBAAA3500864EF5 /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = 1;
				LastSwiftUpdateCheck = 1530;
				LastUpgradeCheck = 1530;
				TargetAttributes = {
					DEBB2D712BBAAA3500864EF5 = {
						CreatedOnToolsVersion = 15.3;
					};
					DEBB2D812BBAAA3600864EF5 = {
						CreatedOnToolsVersion = 15.3;
						TestTargetID = DEBB2D712BBAAA3500864EF5;
					};
					DEBB2D8B2BBAAA3600864EF5 = {
						CreatedOnToolsVersion = 15.3;
						TestTargetID = DEBB2D712BBAAA3500864EF5;
					};
				};
			};
			buildConfigurationList = DEBB2D6D2BBAAA3500864EF5 /* Build configuration list for PBXProject "SherpaOnnxLangID" */;
			compatibilityVersion = "Xcode 14.0";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = DEBB2D692BBAAA3500864EF5;
			productRefGroup = DEBB2D732BBAAA3500864EF5 /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				DEBB2D712BBAAA3500864EF5 /* SherpaOnnxLangID */,
				DEBB2D812BBAAA3600864EF5 /* SherpaOnnxLangIDTests */,
				DEBB2D8B2BBAAA3600864EF5 /* SherpaOnnxLangIDUITests */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		DEBB2D702BBAAA3500864EF5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				DEBB2D7D2BBAAA3600864EF5 /* Preview Assets.xcassets in Resources */,
				DEBB2D7A2BBAAA3600864EF5 /* Assets.xcassets in Resources */,
				C98126522BFEEDB7000AD7AA /* Info.plist in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		DEBB2D802BBAAA3600864EF5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		DEBB2D8A2BBAAA3600864EF5 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		DEBB2D6E2BBAAA3500864EF5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				DEBB2DA52BBAAAFD00864EF5 /* ViewModel.swift in Sources */,
				DEBB2DB22BBAAD0000864EF5 /* SherpaOnnx.swift in Sources */,
				DEBB2DA12BBAAAD800864EF5 /* Extension.swift in Sources */,
				DEBB2D782BBAAA3500864EF5 /* ContentView.swift in Sources */,
				DEBB2D762BBAAA3500864EF5 /* SherpaOnnxLangIDApp.swift in Sources */,
				DEBB2DA32BBAAAE700864EF5 /* Model.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		DEBB2D7E2BBAAA3600864EF5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				DEBB2D872BBAAA3600864EF5 /* SherpaOnnxLangIDTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		DEBB2D882BBAAA3600864EF5 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				DEBB2D912BBAAA3600864EF5 /* SherpaOnnxLangIDUITests.swift in Sources */,
				DEBB2D932BBAAA3600864EF5 /* SherpaOnnxLangIDUITestsLaunchTests.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
		DEBB2D842BBAAA3600864EF5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = DEBB2D712BBAAA3500864EF5 /* SherpaOnnxLangID */;
			targetProxy = DEBB2D832BBAAA3600864EF5 /* PBXContainerItemProxy */;
		};
		DEBB2D8E2BBAAA3600864EF5 /* PBXTargetDependency */ = {
			isa = PBXTargetDependency;
			target = DEBB2D712BBAAA3500864EF5 /* SherpaOnnxLangID */;
			targetProxy = DEBB2D8D2BBAAA3600864EF5 /* PBXContainerItemProxy */;
		};
/* End PBXTargetDependency section */

/* Begin XCBuildConfiguration section */
		DEBB2D942BBAAA3600864EF5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = YES;
				GCC_C_LANGUAGE_STANDARD = gnu17;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
				MTL_FAST_MATH = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = iphoneos;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		DEBB2D952BBAAA3600864EF5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = YES;
				GCC_C_LANGUAGE_STANDARD = gnu17;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
				MTL_ENABLE_DEBUG_INFO = NO;
				MTL_FAST_MATH = YES;
				SDKROOT = iphoneos;
				SWIFT_COMPILATION_MODE = wholemodule;
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		DEBB2D972BBAAA3600864EF5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_IDENTITY = "Apple Development";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnxLangID/Preview Content\"";
				DEVELOPMENT_TEAM = "";
				ENABLE_PREVIEWS = YES;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_KEY_NSMicrophoneUsageDescription = "Use microphone to record voice";
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxLangID";
				PRODUCT_NAME = "$(TARGET_NAME)";
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		DEBB2D982BBAAA3600864EF5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_IDENTITY = "Apple Development";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnxLangID/Preview Content\"";
				DEVELOPMENT_TEAM = "";
				ENABLE_PREVIEWS = YES;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_KEY_NSMicrophoneUsageDescription = "Use microphone to record voice";
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxLangID";
				PRODUCT_NAME = "$(TARGET_NAME)";
				PROVISIONING_PROFILE_SPECIFIER = "";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Release;
		};
		DEBB2D9A2BBAAA3600864EF5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_TEAM = 896WS4KUPV;
				GENERATE_INFOPLIST_FILE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 17.4;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxLangIDTests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/SherpaOnnxLangID.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/SherpaOnnxLangID";
			};
			name = Debug;
		};
		DEBB2D9B2BBAAA3600864EF5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				BUNDLE_LOADER = "$(TEST_HOST)";
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_TEAM = 896WS4KUPV;
				GENERATE_INFOPLIST_FILE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 17.4;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxLangIDTests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/SherpaOnnxLangID.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/SherpaOnnxLangID";
			};
			name = Release;
		};
		DEBB2D9D2BBAAA3600864EF5 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_TEAM = 896WS4KUPV;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxLangIDUITests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_TARGET_NAME = SherpaOnnxLangID;
			};
			name = Debug;
		};
		DEBB2D9E2BBAAA3600864EF5 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_TEAM = 896WS4KUPV;
				GENERATE_INFOPLIST_FILE = YES;
				MARKETING_VERSION = 1.0;
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxLangIDUITests";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = NO;
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
				TEST_TARGET_NAME = SherpaOnnxLangID;
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		DEBB2D6D2BBAAA3500864EF5 /* Build configuration list for PBXProject "SherpaOnnxLangID" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				DEBB2D942BBAAA3600864EF5 /* Debug */,
				DEBB2D952BBAAA3600864EF5 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		DEBB2D962BBAAA3600864EF5 /* Build configuration list for PBXNativeTarget "SherpaOnnxLangID" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				DEBB2D972BBAAA3600864EF5 /* Debug */,
				DEBB2D982BBAAA3600864EF5 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		DEBB2D992BBAAA3600864EF5 /* Build configuration list for PBXNativeTarget "SherpaOnnxLangIDTests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				DEBB2D9A2BBAAA3600864EF5 /* Debug */,
				DEBB2D9B2BBAAA3600864EF5 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		DEBB2D9C2BBAAA3600864EF5 /* Build configuration list for PBXNativeTarget "SherpaOnnxLangIDUITests" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				DEBB2D9D2BBAAA3600864EF5 /* Debug */,
				DEBB2D9E2BBAAA3600864EF5 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = DEBB2D6A2BBAAA3500864EF5 /* Project object */;
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:">
   </FileRef>
</Workspace>


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangID.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangIDTests/SherpaOnnxLangIDTests.swift
================================================
//
//  SherpaOnnxLangIDTests.swift
//  SherpaOnnxLangIDTests
//
//  Created by knight on 2024/4/1.
//

import XCTest
@testable import SherpaOnnxLangID

final class SherpaOnnxLangIDTests: XCTestCase {

    override func setUpWithError() throws {
        // Put setup code here. This method is called before the invocation of each test method in the class.
    }

    override func tearDownWithError() throws {
        // Put teardown code here. This method is called after the invocation of each test method in the class.
    }

    func testExample() throws {
        // This is an example of a functional test case.
        // Use XCTAssert and related functions to verify your tests produce the correct results.
        // Any test you write for XCTest can be annotated as throws and async.
        // Mark your test throws to produce an unexpected failure when your test encounters an uncaught error.
        // Mark your test async to allow awaiting for asynchronous code to complete. Check the results with assertions afterwards.
    }

    func testPerformanceExample() throws {
        // This is an example of a performance test case.
        self.measure {
            // Put the code you want to measure the time of here.
        }
    }

}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangIDUITests/SherpaOnnxLangIDUITests.swift
================================================
//
//  SherpaOnnxLangIDUITests.swift
//  SherpaOnnxLangIDUITests
//
//  Created by knight on 2024/4/1.
//

import XCTest

final class SherpaOnnxLangIDUITests: XCTestCase {

    override func setUpWithError() throws {
        // Put setup code here. This method is called before the invocation of each test method in the class.

        // In UI tests it is usually best to stop immediately when a failure occurs.
        continueAfterFailure = false

        // In UI tests it’s important to set the initial state - such as interface orientation - required for your tests before they run. The setUp method is a good place to do this.
    }

    override func tearDownWithError() throws {
        // Put teardown code here. This method is called after the invocation of each test method in the class.
    }

    func testExample() throws {
        // UI tests must launch the application that they test.
        let app = XCUIApplication()
        app.launch()

        // Use XCTAssert and related functions to verify your tests produce the correct results.
    }

    func testLaunchPerformance() throws {
        if #available(macOS 10.15, iOS 13.0, tvOS 13.0, watchOS 7.0, *) {
            // This measures how long it takes to launch your application.
            measure(metrics: [XCTApplicationLaunchMetric()]) {
                XCUIApplication().launch()
            }
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxLangID/SherpaOnnxLangIDUITests/SherpaOnnxLangIDUITestsLaunchTests.swift
================================================
//
//  SherpaOnnxLangIDUITestsLaunchTests.swift
//  SherpaOnnxLangIDUITests
//
//  Created by knight on 2024/4/1.
//

import XCTest

final class SherpaOnnxLangIDUITestsLaunchTests: XCTestCase {

    override class var runsForEachTargetApplicationUIConfiguration: Bool {
        true
    }

    override func setUpWithError() throws {
        continueAfterFailure = false
    }

    func testLaunch() throws {
        let app = XCUIApplication()
        app.launch()

        // Insert steps here to perform after app launch but before taking a screenshot,
        // such as logging into a test account or navigating somewhere in the app

        let attachment = XCTAttachment(screenshot: app.screenshot())
        attachment.name = "Launch Screen"
        attachment.lifetime = .keepAlways
        add(attachment)
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/.gitignore
================================================
tiny.en-tokens.txt
*.onnx
*.ort


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Assets.xcassets/AccentColor.colorset/Contents.json
================================================
{
  "colors" : [
    {
      "idiom" : "universal"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "filename" : "k2-1024x1024.png",
      "idiom" : "universal",
      "platform" : "ios",
      "size" : "1024x1024"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/ContentView.swift
================================================
//
//  ContentView.swift
//  SherpaOnnxSubtitle
//
//  Created by knight on 2023/9/23.
//

import AVKit
import MediaPlayer
import PhotosUI
import SwiftUI

struct ContentView: View {
    @StateObject var subtitleViewModel = SubtitleViewModel()

    var body: some View {
        VStack {
            VStack {
                Text("SherpaOnnxSubtitle")
                    .font(.title)
                VStack(alignment: .leading) {
                    Text("Audio format should be **mono** channel and **16khz** sample rate")

                    Text("You can convert file with the help of ffmpeg")
                    Text("```ffmpeg -i ./foo.mov -acodec pcm_s16le -ac 1 -ar 16000 foo.wav```")
                }
            }
            .padding(.vertical)
            PhotosPicker(
                selection: $subtitleViewModel.selectedItem,
                matching: .videos
            ) {
                Label("Open Audio from Photo Library", systemImage: "photo")
                    .frame(minWidth: 0, maxWidth: .infinity)
                    .padding()
                    .background(.blue, in: .rect(cornerRadius: 8.0))
                    .foregroundColor(.white)
            }

            Button(action: {
                subtitleViewModel.importNow = true
            }, label: {
                Text("Open Audio from Files")
                    .frame(minWidth: 0, maxWidth: .infinity)
                    .padding()
                    .background(.blue, in: .rect(cornerRadius: 8.0))
            })
            .foregroundColor(.white)
            switch subtitleViewModel.loadState {
            case .initial, .loaded(_), .done:
                EmptyView()
            case .loading:
                ProgressView()
            case .failed:
                Text("Gen SRT failed")
            }
        }
        .fileImporter(isPresented: $subtitleViewModel.importNow, allowedContentTypes: [.movie, .audio], onCompletion: handleImportCompletion)
        .onChange(of: subtitleViewModel.importNow) { importNow in
            if !importNow {
                subtitleViewModel.restoreState()
            }
        }
        .fileExporter(isPresented: $subtitleViewModel.exportNow,
                      document: subtitleViewModel.srtDocument, contentType: .srt,
                      defaultFilename: subtitleViewModel.srtName,
                      onCompletion: handleExportCompletion)
        .task(id: subtitleViewModel.selectedItem) {
            do {
                if !subtitleViewModel.hasAudio {
                    return
                }
                subtitleViewModel.loadState = .loading

                if let movie = try await subtitleViewModel.selectedItem?.loadTransferable(type: Audio.self) {
                    subtitleViewModel.loadState = .loaded(movie)
                    subtitleViewModel.generateSRT(from: movie.url)
                } else {
                    subtitleViewModel.loadState = .failed
                }
            } catch {
                subtitleViewModel.loadState = .failed
            }
        }
        .padding()
    }

    private func handleImportCompletion(result: Result<URL, Error>) {
        print("file import...")
        switch result {
        case let .success(file):
            let accessing = file.startAccessingSecurityScopedResource()
            defer {
                if accessing {
                    file.stopAccessingSecurityScopedResource()
                }
            }
            subtitleViewModel.generateSRT(from: file)
        case let .failure(error):
            print(error.localizedDescription)
            subtitleViewModel.loadState = .failed
        }
    }

    private func handleExportCompletion(result: Result<URL, any Error>) {
        switch result {
        case let .success(url):
            print("audio export to: \(url)")
            subtitleViewModel.loadState = .done
        case let .failure(error):
            print("export audio error: \(error.localizedDescription)")
            subtitleViewModel.loadState = .failed
        }
    }
}

struct ContentView_Previews: PreviewProvider {
    static var previews: some View {
        ContentView()
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Extensions/UTType.swift
================================================
//
//  UTType.swift
//  YPlayer
//
//  Created by knight on 2023/7/7.
//

import UniformTypeIdentifiers

extension UTType {
    static var srt: UTType {
        UTType(exportedAs: "com.k2.srt")
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for Next-gen Kaldi to work</string>
	<key>UTExportedTypeDeclarations</key>
	<array>
		<dict>
			<key>UTTypeConformsTo</key>
			<array>
				<string>public.plain-text</string>
			</array>
			<key>UTTypeDescription</key>
			<string>SubRip Subtitle File</string>
			<key>UTTypeIconFiles</key>
			<array/>
			<key>UTTypeIdentifier</key>
			<string>com.k2.srt</string>
			<key>UTTypeTagSpecification</key>
			<dict>
				<key>public.filename-extension</key>
				<array>
					<string>srt</string>
				</array>
			</dict>
		</dict>
	</array>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Models/Audio.swift
================================================
//
//  Audio.swift
//  SherpaOnnxSubtitle
//
//  Created by knight on 2023/9/23.
//

import SwiftUI

struct Audio: Transferable {
    let url: URL

    static var transferRepresentation: some TransferRepresentation {
        FileRepresentation(contentType: .movie) { movie in
            SentTransferredFile(movie.url)
        } importing: { received in
            let copy = URL.documentsDirectory.appending(path: "audio.wav")

            if FileManager.default.fileExists(atPath: copy.path()) {
                try FileManager.default.removeItem(at: copy)
            }

            try FileManager.default.copyItem(at: received.file, to: copy)
            return Self(url: copy)
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Models/Document.swift
================================================
//
//  Document.swift
//  YPlayer
//
//  Created by knight on 2023/6/5.
//

import SwiftUI
import UniformTypeIdentifiers

struct Document: FileDocument {
    static var readableContentTypes = [UTType.srt]
    static var writableContentTypes = [UTType.srt]
    var data: Data?

    init(data: Data?) {
        self.data = data
    }

    init(configuration: ReadConfiguration) throws {
        if let data = configuration.file.regularFileContents {
            self.data = data
        }
    }

    func fileWrapper(configuration _: WriteConfiguration) throws -> FileWrapper {
        guard let data = data else {
            throw ExportError.fileNotFound
        }
        return FileWrapper(regularFileWithContents: data)
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Models/Errors.swift
================================================
//
//  Errors.swift
//  YPlayer
//
//  Created by knight on 2023/8/26.
//

import Foundation

enum ExportError: String, Error {
    case fileNotFound = "export file not found"
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Models/SpeechSegment.swift
================================================
//
//  SpeechSegment.swift
//  SherpaOnnxSubtitle
//
//  Created by knight on 2023/9/23.
//

import Foundation

class SpeechSegment: CustomStringConvertible {
    let start: Float
    let end: Float
    let text: String

    init(start: Float, duration: Float, text: String) {
        self.start = start
        end = start + duration
        self.text = text
    }

    public var description: String {
        var s: String
        s = TimeInterval(start).hourMinuteSecondMS
        s += " --> "
        s += TimeInterval(end).hourMinuteSecondMS
        s += "\n"
        s += text

        return s
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/Preview Content/Preview Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/SherpaOnnxSubtitleApp.swift
================================================
//
//  SherpaOnnxSubtitleApp.swift
//  SherpaOnnxSubtitle
//
//  Created by knight on 2023/9/23.
//

import SwiftUI

@main
struct SherpaOnnxSubtitleApp: App {
    var body: some Scene {
        WindowGroup {
            ContentView()
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle/SubtitleViewModel.swift
================================================
//
//  SubtitleViewModel.swift
//  SherpaOnnxSubtitle
//
//  Created by knight on 2023/9/23.
//

import AVFoundation
import PhotosUI
import SwiftUI

enum LoadState {
    case initial
    case loading
    case loaded(Audio)
    case done
    case failed
}

class SubtitleViewModel: ObservableObject {
    var modelType = "whisper"
    let sampleRate = 16000

    var modelConfig: SherpaOnnxOfflineModelConfig?
    // modelType = "paraformer"

    var recognizer: SherpaOnnxOfflineRecognizer?

    var vadModelConfig: SherpaOnnxVadModelConfig?
    var vad: SherpaOnnxVoiceActivityDetectorWrapper?

    @Published var loadState: LoadState = .initial

    @Published var selectedItem: PhotosPickerItem? = nil

    @Published var importNow: Bool = false {
        didSet {
            loadState = .loading
        }
    }

    @Published var exportNow: Bool = false

    var srtName: String = "unknown.srt"
    var content: String = ""

    var srtDocument: Document {
        let content = content.data(using: .utf8)
        return Document(data: content)
    }

    var hasAudio: Bool {
        return selectedItem != nil
    }

    init() {
        if modelType == "whisper" {
            // for English
            self.modelConfig = getNonStreamingWhisperTinyEn()
        } else if modelType == "paraformer" {
            // for Chinese
            self.modelConfig = getNonStreamingZhParaformer20230914()
        } else {
            print("Please specify a supported modelType \(modelType)")
            return
        }

        let featConfig = sherpaOnnxFeatureConfig(
            sampleRate: sampleRate,
            featureDim: 80
        )

        guard let modelConfig else {
            return
        }

        var config = sherpaOnnxOfflineRecognizerConfig(
            featConfig: featConfig,
            modelConfig: modelConfig
        )

        recognizer = SherpaOnnxOfflineRecognizer(config: &config)

        let sileroVadConfig = sherpaOnnxSileroVadModelConfig(
            model: getResource("silero_vad", "onnx")
        )

        self.vadModelConfig = sherpaOnnxVadModelConfig(sileroVad: sileroVadConfig)
        guard var vadModelConfig else {
            return
        }
        vad = SherpaOnnxVoiceActivityDetectorWrapper(
            config: &vadModelConfig, buffer_size_in_seconds: 120
        )
    }

    func restoreState() {
        loadState = .initial
    }

    func generateSRT(from file: URL) {
        print("gen srt from: \(file)")
        content = ""

        // restore state
        defer {
            loadState = .done
        }
        guard let recognizer else {
            return
        }
        guard let vadModelConfig else {
            return
        }

        guard let vad else {
            return
        }

        do {
            let audioFile = try AVAudioFile(forReading: file)
            let audioFormat = audioFile.processingFormat
            assert(audioFormat.sampleRate == Double(sampleRate))
            assert(audioFormat.channelCount == 1)
            assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

            let audioFrameCount = UInt32(audioFile.length)
            let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

            try audioFile.read(into: audioFileBuffer!)
            var array: [Float]! = audioFileBuffer?.array()

            let windowSize = Int(vadModelConfig.silero_vad.window_size)

            var segments: [SpeechSegment] = []

            while array.count > windowSize {
                // todo(fangjun): avoid extra copies here
                vad.acceptWaveform(samples: [Float](array[0 ..< windowSize]))
                array = [Float](array[windowSize ..< array.count])

                while !vad.isEmpty() {
                    let s = vad.front()
                    vad.pop()
                    let result = recognizer.decode(samples: s.samples)

                    segments.append(
                        SpeechSegment(
                            start: Float(s.start) / Float(sampleRate),
                            duration: Float(s.samples.count) / Float(sampleRate),
                            text: result.text
                        ))

                    print(segments.last!)
                }
            }
            content = zip(segments.indices, segments).map { index, element in
                "\(index + 1)\n\(element)"
            }.joined(separator: "\n\n")
        } catch {
            print("error: \(error.localizedDescription)")
        }
        exportNow = true

        let last = file.lastPathComponent
        srtName = "\(last).srt"
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 56;
	objects = {

/* Begin PBXBuildFile section */
		DE081A8F2ABF287C00E8CD63 /* SherpaOnnx.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081A8E2ABF287C00E8CD63 /* SherpaOnnx.swift */; };
		DE081A922ABF28D400E8CD63 /* SubtitleViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081A912ABF28D400E8CD63 /* SubtitleViewModel.swift */; };
		DE081A952ABFC60E00E8CD63 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081A942ABFC60E00E8CD63 /* Model.swift */; };
		DE081AAF2ABFF35400E8CD63 /* UTType.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081AAE2ABFF35400E8CD63 /* UTType.swift */; };
		DE081AB12ABFFEEE00E8CD63 /* Document.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081AB02ABFFEEE00E8CD63 /* Document.swift */; };
		DE081AB32ABFFF2600E8CD63 /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE081AB22ABFFF2600E8CD63 /* Errors.swift */; };
		DE8C85A62ABF23E100F667E3 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DE8C85A52ABF23E100F667E3 /* onnxruntime.xcframework */; };
		DE8C85AA2ABF23FA00F667E3 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = DE8C85A92ABF23FA00F667E3 /* sherpa-onnx.xcframework */; };
		DE8C85B22ABF257200F667E3 /* SpeechSegment.swift in Sources */ = {isa = PBXBuildFile; fileRef = DE8C85B12ABF257200F667E3 /* SpeechSegment.swift */; };
		DEA657152ABF19730066A81D /* SherpaOnnxSubtitleApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA657142ABF19730066A81D /* SherpaOnnxSubtitleApp.swift */; };
		DEA657172ABF19730066A81D /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA657162ABF19730066A81D /* ContentView.swift */; };
		DEA657192ABF19740066A81D /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = DEA657182ABF19740066A81D /* Assets.xcassets */; };
		DEA6571C2ABF19740066A81D /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = DEA6571B2ABF19740066A81D /* Preview Assets.xcassets */; };
		DEA657232ABF20130066A81D /* Audio.swift in Sources */ = {isa = PBXBuildFile; fileRef = DEA657222ABF20130066A81D /* Audio.swift */; };
		DED059702AC136FF00122A60 /* Extension.swift in Sources */ = {isa = PBXBuildFile; fileRef = DED0596F2AC136FF00122A60 /* Extension.swift */; };
/* End PBXBuildFile section */

/* Begin PBXFileReference section */
		DE081A8E2ABF287C00E8CD63 /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = "<group>"; };
		DE081A912ABF28D400E8CD63 /* SubtitleViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SubtitleViewModel.swift; sourceTree = "<group>"; };
		DE081A942ABFC60E00E8CD63 /* Model.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = Model.swift; path = ../../SherpaOnnx2Pass/SherpaOnnx2Pass/Model.swift; sourceTree = "<group>"; };
		DE081AAC2ABFF30A00E8CD63 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
		DE081AAE2ABFF35400E8CD63 /* UTType.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = UTType.swift; sourceTree = "<group>"; };
		DE081AB02ABFFEEE00E8CD63 /* Document.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Document.swift; sourceTree = "<group>"; };
		DE081AB22ABFFF2600E8CD63 /* Errors.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Errors.swift; sourceTree = "<group>"; };
		DE8C85A52ABF23E100F667E3 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
		DE8C85A92ABF23FA00F667E3 /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = "<group>"; };
		DE8C85B12ABF257200F667E3 /* SpeechSegment.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeechSegment.swift; sourceTree = "<group>"; };
		DEA657112ABF19730066A81D /* SherpaOnnxSubtitle.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnxSubtitle.app; sourceTree = BUILT_PRODUCTS_DIR; };
		DEA657142ABF19730066A81D /* SherpaOnnxSubtitleApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxSubtitleApp.swift; sourceTree = "<group>"; };
		DEA657162ABF19730066A81D /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
		DEA657182ABF19740066A81D /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
		DEA6571B2ABF19740066A81D /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
		DEA657222ABF20130066A81D /* Audio.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Audio.swift; sourceTree = "<group>"; };
		DED0596F2AC136FF00122A60 /* Extension.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = Extension.swift; path = ../../../SherpaOnnx2Pass/SherpaOnnx2Pass/Extension.swift; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		DEA6570E2ABF19730066A81D /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				DE8C85A62ABF23E100F667E3 /* onnxruntime.xcframework in Frameworks */,
				DE8C85AA2ABF23FA00F667E3 /* sherpa-onnx.xcframework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		DE081A902ABF28BE00E8CD63 /* Models */ = {
			isa = PBXGroup;
			children = (
				DEA657222ABF20130066A81D /* Audio.swift */,
				DE8C85B12ABF257200F667E3 /* SpeechSegment.swift */,
				DE081AB02ABFFEEE00E8CD63 /* Document.swift */,
				DE081AB22ABFFF2600E8CD63 /* Errors.swift */,
			);
			path = Models;
			sourceTree = "<group>";
		};
		DE081AAD2ABFF34900E8CD63 /* Extensions */ = {
			isa = PBXGroup;
			children = (
				DED0596F2AC136FF00122A60 /* Extension.swift */,
				DE081AAE2ABFF35400E8CD63 /* UTType.swift */,
			);
			path = Extensions;
			sourceTree = "<group>";
		};
		DE8C85A42ABF23E100F667E3 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
				DE8C85A92ABF23FA00F667E3 /* sherpa-onnx.xcframework */,
				DE8C85A52ABF23E100F667E3 /* onnxruntime.xcframework */,
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
		DEA657082ABF19730066A81D = {
			isa = PBXGroup;
			children = (
				DEA657132ABF19730066A81D /* SherpaOnnxSubtitle */,
				DEA657122ABF19730066A81D /* Products */,
				DE8C85A42ABF23E100F667E3 /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		DEA657122ABF19730066A81D /* Products */ = {
			isa = PBXGroup;
			children = (
				DEA657112ABF19730066A81D /* SherpaOnnxSubtitle.app */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		DEA657132ABF19730066A81D /* SherpaOnnxSubtitle */ = {
			isa = PBXGroup;
			children = (
				DE081AAC2ABFF30A00E8CD63 /* Info.plist */,
				DE081A8E2ABF287C00E8CD63 /* SherpaOnnx.swift */,
				DEA657142ABF19730066A81D /* SherpaOnnxSubtitleApp.swift */,
				DEA657162ABF19730066A81D /* ContentView.swift */,
				DE081A912ABF28D400E8CD63 /* SubtitleViewModel.swift */,
				DE081AAD2ABFF34900E8CD63 /* Extensions */,
				DE081A942ABFC60E00E8CD63 /* Model.swift */,
				DE081A902ABF28BE00E8CD63 /* Models */,
				DEA657182ABF19740066A81D /* Assets.xcassets */,
				DEA6571A2ABF19740066A81D /* Preview Content */,
			);
			path = SherpaOnnxSubtitle;
			sourceTree = "<group>";
		};
		DEA6571A2ABF19740066A81D /* Preview Content */ = {
			isa = PBXGroup;
			children = (
				DEA6571B2ABF19740066A81D /* Preview Assets.xcassets */,
			);
			path = "Preview Content";
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		DEA657102ABF19730066A81D /* SherpaOnnxSubtitle */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = DEA6571F2ABF19740066A81D /* Build configuration list for PBXNativeTarget "SherpaOnnxSubtitle" */;
			buildPhases = (
				DEA6570D2ABF19730066A81D /* Sources */,
				DEA6570E2ABF19730066A81D /* Frameworks */,
				DEA6570F2ABF19730066A81D /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = SherpaOnnxSubtitle;
			productName = SherpaOnnxSubtitle;
			productReference = DEA657112ABF19730066A81D /* SherpaOnnxSubtitle.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		DEA657092ABF19730066A81D /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = 1;
				LastSwiftUpdateCheck = 1500;
				LastUpgradeCheck = 1500;
				TargetAttributes = {
					DEA657102ABF19730066A81D = {
						CreatedOnToolsVersion = 15.0;
					};
				};
			};
			buildConfigurationList = DEA6570C2ABF19730066A81D /* Build configuration list for PBXProject "SherpaOnnxSubtitle" */;
			compatibilityVersion = "Xcode 14.0";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = DEA657082ABF19730066A81D;
			productRefGroup = DEA657122ABF19730066A81D /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				DEA657102ABF19730066A81D /* SherpaOnnxSubtitle */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		DEA6570F2ABF19730066A81D /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				DEA6571C2ABF19740066A81D /* Preview Assets.xcassets in Resources */,
				DEA657192ABF19740066A81D /* Assets.xcassets in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		DEA6570D2ABF19730066A81D /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				DE081AAF2ABFF35400E8CD63 /* UTType.swift in Sources */,
				DE8C85B22ABF257200F667E3 /* SpeechSegment.swift in Sources */,
				DE081A922ABF28D400E8CD63 /* SubtitleViewModel.swift in Sources */,
				DE081AB12ABFFEEE00E8CD63 /* Document.swift in Sources */,
				DED059702AC136FF00122A60 /* Extension.swift in Sources */,
				DEA657172ABF19730066A81D /* ContentView.swift in Sources */,
				DEA657152ABF19730066A81D /* SherpaOnnxSubtitleApp.swift in Sources */,
				DE081AB32ABFFF2600E8CD63 /* Errors.swift in Sources */,
				DEA657232ABF20130066A81D /* Audio.swift in Sources */,
				DE081A8F2ABF287C00E8CD63 /* SherpaOnnx.swift in Sources */,
				DE081A952ABFC60E00E8CD63 /* Model.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin XCBuildConfiguration section */
		DEA6571D2ABF19740066A81D /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = YES;
				GCC_C_LANGUAGE_STANDARD = gnu17;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
				MTL_FAST_MATH = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = iphoneos;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		DEA6571E2ABF19740066A81D /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_USER_SCRIPT_SANDBOXING = YES;
				GCC_C_LANGUAGE_STANDARD = gnu17;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
				LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
				MTL_ENABLE_DEBUG_INFO = NO;
				MTL_FAST_MATH = YES;
				SDKROOT = iphoneos;
				SWIFT_COMPILATION_MODE = wholemodule;
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		DEA657202ABF19740066A81D /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnxSubtitle/Preview Content\"";
				DEVELOPMENT_TEAM = 896WS4KUPV;
				ENABLE_PREVIEWS = YES;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_FILE = SherpaOnnxSubtitle/Info.plist;
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = net.duoziwei.SherpaOnnxSubtitle;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		DEA657212ABF19740066A81D /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnxSubtitle/Preview Content\"";
				DEVELOPMENT_TEAM = 896WS4KUPV;
				ENABLE_PREVIEWS = YES;
				GENERATE_INFOPLIST_FILE = YES;
				INFOPLIST_FILE = SherpaOnnxSubtitle/Info.plist;
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				IPHONEOS_DEPLOYMENT_TARGET = 16.0;
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = net.duoziwei.SherpaOnnxSubtitle;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		DEA6570C2ABF19730066A81D /* Build configuration list for PBXProject "SherpaOnnxSubtitle" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				DEA6571D2ABF19740066A81D /* Debug */,
				DEA6571E2ABF19740066A81D /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		DEA6571F2ABF19740066A81D /* Build configuration list for PBXNativeTarget "SherpaOnnxSubtitle" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				DEA657202ABF19740066A81D /* Debug */,
				DEA657212ABF19740066A81D /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = DEA657092ABF19730066A81D /* Project object */;
}


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:">
   </FileRef>
</Workspace>


================================================
FILE: ios-swiftui/SherpaOnnxSubtitle/SherpaOnnxSubtitle.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/Assets.xcassets/AccentColor.colorset/Contents.json
================================================
{
  "colors" : [
    {
      "idiom" : "universal"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/Assets.xcassets/AppIcon.appiconset/Contents.json
================================================
{
  "images" : [
    {
      "idiom" : "universal",
      "platform" : "ios",
      "size" : "1024x1024"
    }
  ],
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
================================================
//
//  ContentView.swift
//  SherpaOnnxTts
//
//  Created by fangjun on 2023/11/23.
//
// Text-to-speech with Next-gen Kaldi on iOS without Internet connection

import SwiftUI
import AVFoundation

struct ContentView: View {
    @State private var sid = "0"
    @State private var speed = 1.0
    @State private var text = ""
    @State private var showAlert = false
    @State var filename: URL = NSURL() as URL
    @State var audioPlayer: AVAudioPlayer!

    private var tts = createOfflineTts()

    var body: some View {

        VStack(alignment: .leading) {
            HStack {
                Spacer()
                Text("Next-gen Kaldi: TTS").font(.title)
                Spacer()
            }
            HStack{
                Text("Speaker ID")
                TextField("Please input a speaker ID", text: $sid).textFieldStyle(.roundedBorder)
                    .keyboardType(.numberPad)
            }
            HStack{
                Text("Speed \(String(format: "%.1f", speed))")
                    .padding(.trailing)
                Slider(value: $speed, in: 0.5...2.0, step: 0.1) {
                    Text("Speech speed")
                }
            }

            Text("Please input your text below").padding([.trailing, .top, .bottom])

            TextEditor(text: $text)
                .font(.body)
                .opacity(self.text.isEmpty ? 0.25 : 1)
                .disableAutocorrection(true)
                .border(Color.black)

            Spacer()
            HStack {
                Spacer()
                Button(action: {
                    let speakerId = Int(self.sid) ?? 0
                    let t = self.text.trimmingCharacters(in: .whitespacesAndNewlines)
                    if t.isEmpty {
                        self.showAlert = true
                        return
                    }

                    let audio = tts.generate(text: t, sid: speakerId, speed: Float(self.speed))
                    if self.filename.absoluteString.isEmpty {
                        let tempDirectoryURL = NSURL.fileURL(withPath: NSTemporaryDirectory(), isDirectory: true)
                        self.filename = tempDirectoryURL.appendingPathComponent("test.wav")
                    }

                    let _ = audio.save(filename: filename.path)

                    self.audioPlayer = try! AVAudioPlayer(contentsOf: filename)
                    self.audioPlayer.play()
                }) {
                    Text("Generate")
                }.alert(isPresented: $showAlert) {
                    Alert(title: Text("Empty text"), message: Text("Please input your text before clicking the Generate button"))
                }
                Spacer()
                Button (action: {
                    self.audioPlayer.play()
                }) {
                    Text("Play")
                }.disabled(filename.absoluteString.isEmpty)
                Spacer()
            }
            Spacer()
        }
        .padding()
    }
}

struct ContentView_Previews: PreviewProvider {
    static var previews: some View {
        ContentView()
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>NSMicrophoneUsageDescription</key>
	<string>Need microphone access for Next-gen Kaldi to work</string>
</dict>
</plist>


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/Preview Content/Preview Assets.xcassets/Contents.json
================================================
{
  "info" : {
    "author" : "xcode",
    "version" : 1
  }
}


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/SherpaOnnxTtsApp.swift
================================================
//
//  SherpaOnnxTtsApp.swift
//  SherpaOnnxTts
//
//  Created by fangjun on 2023/11/23.
//

import SwiftUI

@main
struct SherpaOnnxTtsApp: App {
    var body: some Scene {
        WindowGroup {
            ContentView()
        }
    }
}


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
================================================
//
//  ViewModel.swift
//  SherpaOnnxTts
//
//  Created by fangjun on 2023/11/23.
//

import Foundation

// used to get the path to espeak-ng-data
func resourceURL(to path: String) -> String {
  return URL(string: path, relativeTo: Bundle.main.resourceURL)!.path
}

func getResource(_ forResource: String, _ ofType: String) -> String {
  let path = Bundle.main.path(forResource: forResource, ofType: ofType)
  precondition(
    path != nil,
    "\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
      + "  Build Phases -> Copy Bundle Resources\n" + "to add it!"
  )
  return path!
}

/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
/// to download pre-trained models

func getTtsForVCTK() -> SherpaOnnxOfflineTtsWrapper {
  // See the following link
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers

  // vits-vctk.onnx
  let model = getResource("vits-vctk", "onnx")

  // lexicon.txt
  let lexicon = getResource("lexicon", "txt")

  // tokens.txt
  let tokens = getResource("tokens", "txt")

  let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
  var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
  return SherpaOnnxOfflineTtsWrapper(config: &config)
}

func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
  // See the following link
  // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3

  let model = getResource("model", "onnx")

  // lexicon.txt
  let lexicon = getResource("lexicon", "txt")

  // tokens.txt
  let tokens = getResource("tokens", "txt")

  // rule.fst
  let ruleFsts = getResource("rule", "fst")

  // rule.far
  let ruleFars = getResource("rule", "far")

  let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
  var config = sherpaOnnxOfflineTtsConfig(
    model: modelConfig,
    ruleFsts: ruleFsts,
    ruleFars: ruleFars
  )
  return SherpaOnnxOfflineTtsWrapper(config: &config)
}

// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper {
  // please see  https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2

  let model = getResource("en_US-amy-low", "onnx")

  // tokens.txt
  let tokens = getResource("tokens", "txt")

  // in this case, we don't need lexicon.txt
  let dataDir = resourceURL(to: "espeak-ng-data")

  let vits = sherpaOnnxOfflineTtsVitsModelConfig(
    model: model, lexicon: "", tokens: tokens, dataDir: dataDir)
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
  var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)

  return SherpaOnnxOfflineTtsWrapper(config: &config)
}

// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
func getTtsFor_zh_en_melo_tts() -> SherpaOnnxOfflineTtsWrapper {
  // please see https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2

  let model = getResource("model", "onnx")

  let tokens = getResource("tokens", "txt")
  let lexicon = getResource("lexicon", "txt")

  let numFst = getResource("number", "fst")
  let dateFst = getResource("date", "fst")
  let phoneFst = getResource("phone", "fst")
  let ruleFsts = "\(dateFst),\(phoneFst),\(numFst)"

  let vits = sherpaOnnxOfflineTtsVitsModelConfig(
    model: model, lexicon: lexicon, tokens: tokens,
    dataDir: "",
    noiseScale: 0.667,
    noiseScaleW: 0.8,
    lengthScale: 1.0
  )

  let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
  var config = sherpaOnnxOfflineTtsConfig(
    model: modelConfig,
    ruleFsts: ruleFsts
  )

  return SherpaOnnxOfflineTtsWrapper(config: &config)
}

func getTtsFor_matcha_icefall_zh_baker() -> SherpaOnnxOfflineTtsWrapper {
  // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker

  let acousticModel = getResource("model-steps-3", "onnx")
  let vocoder = getResource("vocos-22khz-univ", "onnx")

  let tokens = getResource("tokens", "txt")
  let lexicon = getResource("lexicon", "txt")

  let numFst = getResource("number", "fst")
  let dateFst = getResource("date", "fst")
  let phoneFst = getResource("phone", "fst")
  let ruleFsts = "\(dateFst),\(phoneFst),\(numFst)"

  let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
    acousticModel: acousticModel,
    vocoder: vocoder,
    lexicon: lexicon,
    tokens: tokens
  )

  let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha)
  var config = sherpaOnnxOfflineTtsConfig(
    model: modelConfig,
    ruleFsts: ruleFsts
  )

  return SherpaOnnxOfflineTtsWrapper(config: &config)
}

func getTtsFor_kokoro_en_v0_19() -> SherpaOnnxOfflineTtsWrapper {
  // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers

  let model = getResource("model", "onnx")
  let voices = getResource("voices", "bin")

  // tokens.txt
  let tokens = getResource("tokens", "txt")

  // in this case, we don't need lexicon.txt
  let dataDir = resourceURL(to: "espeak-ng-data")

  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
    model: model, voices: voices, tokens: tokens, dataDir: dataDir)
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro)
  var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)

  return SherpaOnnxOfflineTtsWrapper(config: &config)
}

func getTtsFor_kokoro_multi_lang_v1_0() -> SherpaOnnxOfflineTtsWrapper {
  // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html

  let model = getResource("model", "onnx")
  let voices = getResource("voices", "bin")

  // tokens.txt
  let tokens = getResource("tokens", "txt")

  let lexicon_en = getResource("lexicon-us-en", "txt")
  let lexicon_zh = getResource("lexicon-zh", "txt")
  let lexicon = "\(lexicon_en),\(lexicon_zh)"

  // in this case, we don't need lexicon.txt
  let dataDir = resourceURL(to: "espeak-ng-data")

  let numFst = getResource("number-zh", "fst")
  let dateFst = getResource("date-zh", "fst")
  let phoneFst = getResource("phone-zh", "fst")
  let ruleFsts = "\(dateFst),\(phoneFst),\(numFst)"

  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
    model: model, voices: voices, tokens: tokens, dataDir: dataDir,
    lexicon: lexicon)
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro)
  var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)

  return SherpaOnnxOfflineTtsWrapper(config: &config)
}

func createOfflineTts() -> SherpaOnnxOfflineTtsWrapper {
  // Please enable only one of them

  return getTtsFor_kokoro_multi_lang_v1_0()

  // return getTtsFor_kokoro_en_v0_19()

  // return getTtsFor_matcha_icefall_zh_baker()

  // return getTtsFor_en_US_amy_low()

  // return getTtsForVCTK()

  // return getTtsForAishell3()

  // return getTtsFor_zh_en_melo_tts()

  // please add more models on need by following the above two examples
}


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 56;
	objects = {

/* Begin PBXBuildFile section */
		C917B4E52B0EEF3B005245AC /* SherpaOnnxTtsApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C917B4E42B0EEF3B005245AC /* SherpaOnnxTtsApp.swift */; };
		C917B4E72B0EEF3B005245AC /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C917B4E62B0EEF3B005245AC /* ContentView.swift */; };
		C917B4E92B0EEF3C005245AC /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C917B4E82B0EEF3C005245AC /* Assets.xcassets */; };
		C917B4EC2B0EEF3C005245AC /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C917B4EB2B0EEF3C005245AC /* Preview Assets.xcassets */; };
		C9FE9FE52B0F33CD009F1003 /* ViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9FE9FE42B0F33CD009F1003 /* ViewModel.swift */; };
		C9FE9FE72B0F3620009F1003 /* SherpaOnnx.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9FE9FE62B0F3620009F1003 /* SherpaOnnx.swift */; };
		C9FE9FEA2B0F3754009F1003 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C9FE9FE92B0F3754009F1003 /* sherpa-onnx.xcframework */; };
		C9FE9FEF2B0F3EFB009F1003 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C9FE9FEB2B0F3785009F1003 /* onnxruntime.xcframework */; };
/* End PBXBuildFile section */

/* Begin PBXFileReference section */
		C917B4E12B0EEF3B005245AC /* SherpaOnnxTts.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnxTts.app; sourceTree = BUILT_PRODUCTS_DIR; };
		C917B4E42B0EEF3B005245AC /* SherpaOnnxTtsApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnxTtsApp.swift; sourceTree = "<group>"; };
		C917B4E62B0EEF3B005245AC /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
		C917B4E82B0EEF3C005245AC /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
		C917B4EB2B0EEF3C005245AC /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
		C9FE9FE42B0F33CD009F1003 /* ViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewModel.swift; sourceTree = "<group>"; };
		C9FE9FE62B0F3620009F1003 /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = "<group>"; };
		C9FE9FE92B0F3754009F1003 /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = "<group>"; };
		C9FE9FEB2B0F3785009F1003 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.17.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		C917B4DE2B0EEF3B005245AC /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C9FE9FEF2B0F3EFB009F1003 /* onnxruntime.xcframework in Frameworks */,
				C9FE9FEA2B0F3754009F1003 /* sherpa-onnx.xcframework in Frameworks */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		C917B4D82B0EEF3B005245AC = {
			isa = PBXGroup;
			children = (
				C917B4E32B0EEF3B005245AC /* SherpaOnnxTts */,
				C917B4E22B0EEF3B005245AC /* Products */,
				C9FE9FE82B0F3754009F1003 /* Frameworks */,
			);
			sourceTree = "<group>";
		};
		C917B4E22B0EEF3B005245AC /* Products */ = {
			isa = PBXGroup;
			children = (
				C917B4E12B0EEF3B005245AC /* SherpaOnnxTts.app */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		C917B4E32B0EEF3B005245AC /* SherpaOnnxTts */ = {
			isa = PBXGroup;
			children = (
				C9FE9FE62B0F3620009F1003 /* SherpaOnnx.swift */,
				C9FE9FE42B0F33CD009F1003 /* ViewModel.swift */,
				C917B4E42B0EEF3B005245AC /* SherpaOnnxTtsApp.swift */,
				C917B4E62B0EEF3B005245AC /* ContentView.swift */,
				C917B4E82B0EEF3C005245AC /* Assets.xcassets */,
				C917B4EA2B0EEF3C005245AC /* Preview Content */,
			);
			path = SherpaOnnxTts;
			sourceTree = "<group>";
		};
		C917B4EA2B0EEF3C005245AC /* Preview Content */ = {
			isa = PBXGroup;
			children = (
				C917B4EB2B0EEF3C005245AC /* Preview Assets.xcassets */,
			);
			path = "Preview Content";
			sourceTree = "<group>";
		};
		C9FE9FE82B0F3754009F1003 /* Frameworks */ = {
			isa = PBXGroup;
			children = (
				C9FE9FEB2B0F3785009F1003 /* onnxruntime.xcframework */,
				C9FE9FE92B0F3754009F1003 /* sherpa-onnx.xcframework */,
			);
			name = Frameworks;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		C917B4E02B0EEF3B005245AC /* SherpaOnnxTts */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = C917B4EF2B0EEF3C005245AC /* Build configuration list for PBXNativeTarget "SherpaOnnxTts" */;
			buildPhases = (
				C917B4DD2B0EEF3B005245AC /* Sources */,
				C917B4DE2B0EEF3B005245AC /* Frameworks */,
				C917B4DF2B0EEF3B005245AC /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = SherpaOnnxTts;
			productName = SherpaOnnxTts;
			productReference = C917B4E12B0EEF3B005245AC /* SherpaOnnxTts.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		C917B4D92B0EEF3B005245AC /* Project object */ = {
			isa = PBXProject;
			attributes = {
				BuildIndependentTargetsInParallel = 1;
				LastSwiftUpdateCheck = 1420;
				LastUpgradeCheck = 1420;
				TargetAttributes = {
					C917B4E02B0EEF3B005245AC = {
						CreatedOnToolsVersion = 14.2;
					};
				};
			};
			buildConfigurationList = C917B4DC2B0EEF3B005245AC /* Build configuration list for PBXProject "SherpaOnnxTts" */;
			compatibilityVersion = "Xcode 14.0";
			developmentRegion = en;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
				Base,
			);
			mainGroup = C917B4D82B0EEF3B005245AC;
			productRefGroup = C917B4E22B0EEF3B005245AC /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				C917B4E02B0EEF3B005245AC /* SherpaOnnxTts */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		C917B4DF2B0EEF3B005245AC /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C917B4EC2B0EEF3C005245AC /* Preview Assets.xcassets in Resources */,
				C917B4E92B0EEF3C005245AC /* Assets.xcassets in Resources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		C917B4DD2B0EEF3B005245AC /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				C917B4E72B0EEF3B005245AC /* ContentView.swift in Sources */,
				C9FE9FE72B0F3620009F1003 /* SherpaOnnx.swift in Sources */,
				C9FE9FE52B0F33CD009F1003 /* ViewModel.swift in Sources */,
				C917B4E52B0EEF3B005245AC /* SherpaOnnxTtsApp.swift in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin XCBuildConfiguration section */
		C917B4ED2B0EEF3C005245AC /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = dwarf;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				ENABLE_TESTABILITY = YES;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_PREPROCESSOR_DEFINITIONS = (
					"DEBUG=1",
					"$(inherited)",
				);
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
				MTL_FAST_MATH = YES;
				ONLY_ACTIVE_ARCH = YES;
				SDKROOT = iphoneos;
				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
			};
			name = Debug;
		};
		C917B4EE2B0EEF3C005245AC /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				CLANG_ANALYZER_NONNULL = YES;
				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
				CLANG_ENABLE_MODULES = YES;
				CLANG_ENABLE_OBJC_ARC = YES;
				CLANG_ENABLE_OBJC_WEAK = YES;
				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
				CLANG_WARN_BOOL_CONVERSION = YES;
				CLANG_WARN_COMMA = YES;
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INFINITE_RECURSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
				CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
				CLANG_WARN_STRICT_PROTOTYPES = YES;
				CLANG_WARN_SUSPICIOUS_MOVE = YES;
				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
				CLANG_WARN_UNREACHABLE_CODE = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				ENABLE_NS_ASSERTIONS = NO;
				ENABLE_STRICT_OBJC_MSGSEND = YES;
				GCC_C_LANGUAGE_STANDARD = gnu11;
				GCC_NO_COMMON_BLOCKS = YES;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
				GCC_WARN_UNDECLARED_SELECTOR = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
				GCC_WARN_UNUSED_FUNCTION = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				IPHONEOS_DEPLOYMENT_TARGET = 16.2;
				MTL_ENABLE_DEBUG_INFO = NO;
				MTL_FAST_MATH = YES;
				SDKROOT = iphoneos;
				SWIFT_COMPILATION_MODE = wholemodule;
				SWIFT_OPTIMIZATION_LEVEL = "-O";
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		C917B4F02B0EEF3C005245AC /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnxTts/Preview Content\"";
				ENABLE_PREVIEWS = YES;
				FRAMEWORK_SEARCH_PATHS = "${PROJECT_DIR}/../../build-ios";
				GENERATE_INFOPLIST_FILE = YES;
				HEADER_SEARCH_PATHS = "${PROJECT_DIR}/../../build-ios/sherpa-onnx.xcframework/Headers";
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxTts";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		C917B4F12B0EEF3C005245AC /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
				ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
				CODE_SIGN_STYLE = Automatic;
				CURRENT_PROJECT_VERSION = 1;
				DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnxTts/Preview Content\"";
				ENABLE_PREVIEWS = YES;
				FRAMEWORK_SEARCH_PATHS = "${PROJECT_DIR}/../../build-ios";
				GENERATE_INFOPLIST_FILE = YES;
				HEADER_SEARCH_PATHS = "${PROJECT_DIR}/../../build-ios/sherpa-onnx.xcframework/Headers";
				INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
				INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
				INFOPLIST_KEY_UILaunchScreen_Generation = YES;
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
				LD_RUNPATH_SEARCH_PATHS = (
					"$(inherited)",
					"@executable_path/Frameworks",
				);
				MARKETING_VERSION = 1.0;
				OTHER_LDFLAGS = "-lc++";
				PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnxTts";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SWIFT_EMIT_LOC_STRINGS = YES;
				SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
				SWIFT_VERSION = 5.0;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		C917B4DC2B0EEF3B005245AC /* Build configuration list for PBXProject "SherpaOnnxTts" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C917B4ED2B0EEF3C005245AC /* Debug */,
				C917B4EE2B0EEF3C005245AC /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		C917B4EF2B0EEF3C005245AC /* Build configuration list for PBXNativeTarget "SherpaOnnxTts" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				C917B4F02B0EEF3C005245AC /* Debug */,
				C917B4F12B0EEF3C005245AC /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = C917B4D92B0EEF3B005245AC /* Project object */;
}


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:">
   </FileRef>
</Workspace>


================================================
FILE: ios-swiftui/SherpaOnnxTts/SherpaOnnxTts.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>IDEDidComputeMac32BitWarning</key>
	<true/>
</dict>
</plist>


================================================
FILE: java-api-examples/.gitignore
================================================
lib
hs_err*
!run-*.sh
./hotwords_cn.txt
*.class


================================================
FILE: java-api-examples/AudioTaggingCEDFromFile.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a CED audio tagging model to tag
// input audio files.
import com.k2fsa.sherpa.onnx.*;

public class AudioTaggingCEDFromFile {
  public static void main(String[] args) {
    // please download the model from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
    String model = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx";
    String labels = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv";
    int topK = 5;

    String[] testWaves =
        new String[] {
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/1.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/2.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/3.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/4.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/5.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/6.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/7.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/8.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/9.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/10.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/11.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/12.wav",
          "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/13.wav",
        };

    AudioTaggingModelConfig modelConfig =
        AudioTaggingModelConfig.builder().setCED(model).setNumThreads(1).setDebug(true).build();

    AudioTaggingConfig config =
        AudioTaggingConfig.builder().setModel(modelConfig).setLabels(labels).setTopK(topK).build();

    AudioTagging tagger = new AudioTagging(config);
    System.out.println("------");
    for (String filename : testWaves) {
      WaveReader reader = new WaveReader(filename);

      OfflineStream stream = tagger.createStream();
      stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

      AudioEvent[] events = tagger.compute(stream);

      stream.release();

      System.out.printf("input file: %s\n", filename);
      System.out.printf("Probability\t\tName\n");
      for (AudioEvent e : events) {
        System.out.printf("%.3f\t\t\t%s\n", e.getProb(), e.getName());
      }
      System.out.println("------");
    }

    tagger.release();
  }
}


================================================
FILE: java-api-examples/AudioTaggingZipformerFromFile.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a zipformer audio tagging model to tag
// input audio files.
import com.k2fsa.sherpa.onnx.*;

public class AudioTaggingZipformerFromFile {
  public static void main(String[] args) {
    // please download the model from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
    String model = "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx";
    String labels =
        "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv";
    int topK = 5;

    String[] testWaves =
        new String[] {
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/2.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/4.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/5.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/6.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/7.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/8.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/9.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/10.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/11.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/12.wav",
          "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/13.wav",
        };

    OfflineZipformerAudioTaggingModelConfig zipformer =
        OfflineZipformerAudioTaggingModelConfig.builder().setModel(model).build();

    AudioTaggingModelConfig modelConfig =
        AudioTaggingModelConfig.builder()
            .setZipformer(zipformer)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    AudioTaggingConfig config =
        AudioTaggingConfig.builder().setModel(modelConfig).setLabels(labels).setTopK(topK).build();

    AudioTagging tagger = new AudioTagging(config);
    System.out.println("------");
    for (String filename : testWaves) {
      WaveReader reader = new WaveReader(filename);

      OfflineStream stream = tagger.createStream();
      stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

      AudioEvent[] events = tagger.compute(stream);

      stream.release();

      System.out.printf("input file: %s\n", filename);
      System.out.printf("Probability\t\tName\n");
      for (AudioEvent e : events) {
        System.out.printf("%.3f\t\t\t%s\n", e.getProb(), e.getName());
      }
      System.out.println("------");
    }

    tagger.release();
  }
}


================================================
FILE: java-api-examples/InverseTextNormalizationNonStreamingParaformer.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline paraformer, i.e., non-streaming paraformer,
// to decode files with inverse text normalization.
import com.k2fsa.sherpa.onnx.*;

public class InverseTextNormalizationNonStreamingParaformer {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-09-14-chinese-english
    // to download model files
    String model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
    String tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";

    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
    String waveFilename = "./itn-zh-number.wav";

    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
    String ruleFsts = "./itn_zh_number.fst";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineParaformerModelConfig paraformer =
        OfflineParaformerModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setParaformer(paraformer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .setRuleFsts(ruleFsts)
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/InverseTextNormalizationStreamingTransducer.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a streaming transducer
// to decode files with inverse text normalization.
import com.k2fsa.sherpa.onnx.*;

public class InverseTextNormalizationStreamingTransducer {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
    // to download model files
    String encoder =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx";
    String decoder =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx";
    String joiner =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx";
    String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";

    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
    String waveFilename = "./itn-zh-number.wav";

    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
    String ruleFsts = "./itn_zh_number.fst";

    WaveReader reader = new WaveReader(waveFilename);

    OnlineTransducerModelConfig transducer =
        OnlineTransducerModelConfig.builder()
            .setEncoder(encoder)
            .setDecoder(decoder)
            .setJoiner(joiner)
            .build();

    OnlineModelConfig modelConfig =
        OnlineModelConfig.builder()
            .setTransducer(transducer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OnlineRecognizerConfig config =
        OnlineRecognizerConfig.builder()
            .setOnlineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .setRuleFsts(ruleFsts)
            .build();

    OnlineRecognizer recognizer = new OnlineRecognizer(config);
    OnlineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())];
    stream.acceptWaveform(tailPaddings, reader.getSampleRate());

    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/KeywordSpotterFromFile.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a keyword spotter model to spot keywords from
// a file.

import com.k2fsa.sherpa.onnx.*;

public class KyewordSpotterFromFile {
  public static void main(String[] args) {
    // please download test files from https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
    String encoder =
        "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    String decoder =
        "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    String joiner =
        "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
    String tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";

    String keywordsFile =
        "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";

    String waveFilename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";

    OnlineTransducerModelConfig transducer =
        OnlineTransducerModelConfig.builder()
            .setEncoder(encoder)
            .setDecoder(decoder)
            .setJoiner(joiner)
            .build();

    OnlineModelConfig modelConfig =
        OnlineModelConfig.builder()
            .setTransducer(transducer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    KeywordSpotterConfig config =
        KeywordSpotterConfig.builder()
            .setOnlineModelConfig(modelConfig)
            .setKeywordsFile(keywordsFile)
            .build();

    KeywordSpotter kws = new KeywordSpotter(config);
    OnlineStream stream = kws.createStream();

    WaveReader reader = new WaveReader(waveFilename);

    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())];
    stream.acceptWaveform(tailPaddings, reader.getSampleRate());
    while (kws.isReady(stream)) {
      kws.decode(stream);

      String keyword = kws.getResult(stream).getKeyword();
      if (!keyword.isEmpty()) {
        // Remember to reset the stream right after detecting a keyword
        kws.reset(stream);
        System.out.printf("Detected keyword: %s\n", keyword);
      }
    }

    kws.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileDolphinCtc.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline Dolphin CTC model, i.e.,
// non-streaming Dolphin CTC model, to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileDolphinCtc {
  public static void main(String[] args) {
    // please refer to
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    // to download model files
    String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
    String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";

    String waveFilename =
        "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setDolphin(dolphin)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileFireRedAsr.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline FireRedAsr AED model
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileFireRedAsr {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/FireRedAsr/index.html
    // to download model files
    String encoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx";
    String decoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx";
    String tokens = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt";

    String waveFilename = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineFireRedAsrModelConfig fireRedAsr =
        OfflineFireRedAsrModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setFireRedAsr(fireRedAsr)
            .setTokens(tokens)
            .setNumThreads(2)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileFireRedAsrCtc.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline FireRedASR CTC model,
// i.e., non-streaming FireRedASR CTC model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileFireRedAsrCtc {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/FireRedAsr/index.html
    // to download model files
    String model = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx";

    String tokens = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt";

    String waveFilename = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineFireRedAsrCtcModelConfig medasr =
        OfflineFireRedAsrCtcModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setFireRedAsrCtc(medasr)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileFunAsrNano.java
================================================
// Copyright 2026 Xiaomi Corporation

// This file shows how to use an offline FunASR Nano model,
// i.e., non-streaming FunASR Nano model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileFunAsrNano {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/funasr-nano/index.html
    // to download model files
    String encoderAdaptor = "./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx";
    String llm = "./sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx";
    String embedding = "./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx";
    String tokenizer = "./sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B";

    String tokens = "";

    String waveFilename = "./sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineFunAsrNanoModelConfig funasrNano =
        OfflineFunAsrNanoModelConfig.builder()
            .setEncoderAdaptor(encoderAdaptor)
            .setLLM(llm)
            .setEmbedding(embedding)
            .setTokenizer(tokenizer)
            .build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setFunAsrNano(funasrNano)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileMedAsrCtc.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline Google MedASR CTC model,
// i.e., non-streaming MedASR CTC model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileMedAsrCtc {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/medasr/index.html
    // to download model files
    String model = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx";

    String tokens = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt";

    String waveFilename = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineMedAsrCtcModelConfig medasr =
        OfflineMedAsrCtcModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setMedAsr(medasr)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileMoonshine.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline Moonshine,
// i.e., non-streaming Moonshine model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileMoonshine {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html
    // to download model files

    String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
    String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
    String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
    String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";

    String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

    String waveFilename = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineMoonshineModelConfig moonshine =
        OfflineMoonshineModelConfig.builder()
            .setPreprocessor(preprocessor)
            .setEncoder(encoder)
            .setUncachedDecoder(uncachedDecoder)
            .setCachedDecoder(cachedDecoder)
            .build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setMoonshine(moonshine)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileMoonshineV2.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline Moonshine,
// i.e., non-streaming Moonshine v2 model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileMoonshineV2 {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html
    // to download model files

    String encoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort";
    String decoder =
        "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort";
    String tokens = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt";

    String waveFilename = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineMoonshineModelConfig moonshine =
        OfflineMoonshineModelConfig.builder().setEncoder(encoder).setMergedDecoder(decoder).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setMoonshine(moonshine)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileNemo.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline NeMo CTC model, i.e., non-streaming NeMo CTC model,,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileNemo {
  public static void main(String[] args) {
    // please refer to
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
    // to download model files
    String model = "./sherpa-onnx-nemo-ctc-en-citrinet-512/model.int8.onnx";
    String tokens = "./sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt";

    String waveFilename = "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/1.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineNemoEncDecCtcModelConfig nemo =
        OfflineNemoEncDecCtcModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setNemo(nemo)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileNemoCanary.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline NeMo Canary model, i.e.,
// non-streaming NeMo Canary model, to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileNemoCanary {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
    // to download model files
    String encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
    String decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
    String tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";

    String waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineCanaryModelConfig canary =
        OfflineCanaryModelConfig.builder()
            .setEncoder(encoder)
            .setDecoder(decoder)
            .setSrcLang("en")
            .setTgtLang("en")
            .setUsePnc(true)
            .build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setCanary(canary)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult(English):%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileOmnilingualAsrCtc.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline Omnilingual ASR CTC model,
// i.e., non-streaming Omnilingual ASR CTC model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileOmnilingualAsrCtc {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/omnilingual-asr/index.html
    // to download model files
    String model =
        "sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx";

    String tokens =
        "sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt";

    String waveFilename =
        "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineOmnilingualAsrCtcModelConfig omnilingual =
        OfflineOmnilingualAsrCtcModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setOmnilingual(omnilingual)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileParaformer.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline paraformer, i.e., non-streaming paraformer,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileParaformer {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-09-14-chinese-english
    // to download model files
    String model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
    String tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";

    String waveFilename = "./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/3-sichuan.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineParaformerModelConfig paraformer =
        OfflineParaformerModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setParaformer(paraformer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileSenseVoice.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline SenseVoice model,
// i.e., non-streaming SenseVoice model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileSenseVoice {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html
    // to download model files
    String model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
    String tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";

    String waveFilename = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineSenseVoiceModelConfig senseVoice =
        OfflineSenseVoiceModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setSenseVoice(senseVoice)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileSenseVoiceWithHr.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline SenseVoice model,
// i.e., non-streaming SenseVoice model
// to decode files with homophone replacer.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileSenseVoiceWithHr {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html
    // to download model files
    String model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
    String tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";

    String waveFilename = "./test-hr.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineSenseVoiceModelConfig senseVoice =
        OfflineSenseVoiceModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setSenseVoice(senseVoice)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    HomophoneReplacerConfig hr =
        HomophoneReplacerConfig.builder()
            .setDictDir("./dict")
            .setLexicon("./lexicon.txt")
            .setRuleFsts("./replace.fst")
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .setHr(hr)
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileTeleSpeechCtc.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline TeleSpeech CTC model
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileTeleSpeechCtc {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html#sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04
    // to download model files
    String model = "./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx";
    String tokens = "./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt";

    String waveFilename = "./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setTeleSpeech(model)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .setModelType("telespeech_ctc")
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileTransducer.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline transducer, i.e., non-streaming transducer,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileTransducer {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-gigaspeech-2023-12-12-english
    // to download model files
    String encoder =
        "./sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx";
    String decoder = "./sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx";
    String joiner = "./sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.onnx";
    String tokens = "./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt";

    String waveFilename =
        "./sherpa-onnx-zipformer-gigaspeech-2023-12-12/test_wavs/1089-134686-0001.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineTransducerModelConfig transducer =
        OfflineTransducerModelConfig.builder()
            .setEncoder(encoder)
            .setDecoder(decoder)
            .setJoiner(joiner)
            .build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setTransducer(transducer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileTransducerHotwords.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline transducer, i.e., non-streaming transducer,
// to decode files with hotwords support.
//
// See also
// https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html#modeling-unit-is-cjkchar
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileTransducerHotwords {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html#modeling-unit-is-cjkchar
    // to download model files
    String encoder =
        "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/encoder-epoch-99-avg-1.int8.onnx";
    String decoder = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/decoder-epoch-99-avg-1.onnx";
    String joiner = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/joiner-epoch-99-avg-1.onnx";
    String tokens = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/tokens.txt";

    String waveFilename = "./sherpa-onnx-conformer-zh-stateless2-2023-05-23/test_wavs/6.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineTransducerModelConfig transducer =
        OfflineTransducerModelConfig.builder()
            .setEncoder(encoder)
            .setDecoder(decoder)
            .setJoiner(joiner)
            .build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setTransducer(transducer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .setModelingUnit("cjkchar")
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("modified_beam_search")
            .setHotwordsFile("./hotwords_cn.txt")
            .setHotwordsScore(2.0f)
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileWenetCtc.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline Wenet CTC model,
// i.e., non-streaming Wenet CTC model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileWenetCtc {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html
    // to download model files
    String model =
        "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";

    String tokens =
        "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";

    String waveFilename =
        "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineWenetCtcModelConfig wenetCtc =
        OfflineWenetCtcModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setWenetCtc(wenetCtc)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileWhisper.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an offline whisper, i.e., non-streaming whisper,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileWhisper {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
    // to download model files
    String encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
    String decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
    String tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";

    String waveFilename = "./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineWhisperModelConfig whisper =
        OfflineWhisperModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setWhisper(whisper)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileWhisperMultiple.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline whisper, i.e., non-streaming whisper,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileWhisperMultiple {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
    // to download model files
    String encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
    String decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
    String tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";

    String waveFilename0 = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav";
    String waveFilename1 = "./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav";

    WaveReader reader0 = new WaveReader(waveFilename0);
    WaveReader reader1 = new WaveReader(waveFilename1);

    OfflineWhisperModelConfig whisper =
        OfflineWhisperModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setWhisper(whisper)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream0 = recognizer.createStream();
    stream0.acceptWaveform(reader0.getSamples(), reader0.getSampleRate());

    OfflineStream stream1 = recognizer.createStream();
    stream1.acceptWaveform(reader1.getSamples(), reader1.getSampleRate());

    OfflineStream[] ss = new OfflineStream[] {stream0, stream1};
    recognizer.decode(ss);

    String text0 = recognizer.getResult(stream0).getText();
    String text1 = recognizer.getResult(stream1).getText();

    System.out.printf("filename0:%s\nresult0:%s\n\n", waveFilename0, text0);
    System.out.printf("filename1:%s\nresult1:%s\n\n", waveFilename1, text1);

    stream0.release();
    stream1.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingDecodeFileZipformerCtc.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use an offline Zipformer CTC model,
// i.e., non-streaming Zipformer CTC model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingDecodeFileZipformerCtc {
  public static void main(String[] args) {
    // please refer to
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
    // to download model files
    String model = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx";
    String tokens = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt";

    String waveFilename = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineZipformerCtcModelConfig zipformerCtc =
        OfflineZipformerCtcModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setZipformerCtc(zipformerCtc)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OfflineRecognizer recognizer = new OfflineRecognizer(config);
    OfflineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    recognizer.decode(stream);

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingSpeechEnhancementDpdfNet.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use DPDFNet speech enhancement models in sherpa-onnx
//
// Download DPDFNet models from either:
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// https://huggingface.co/Ceva-IP/DPDFNet
//
// Use dpdfnet_baseline.onnx, dpdfnet2.onnx, dpdfnet4.onnx, or dpdfnet8.onnx
// for 16 kHz downstream ASR or speech recognition.
// Use dpdfnet2_48khz_hr.onnx for 48 kHz enhancement output.

import com.k2fsa.sherpa.onnx.*;

public class NonStreamingSpeechEnhancementDpdfNet {
  public static void main(String[] args) {
    String model = "./dpdfnet_baseline.onnx";
    OfflineSpeechDenoiserModelConfig.Builder builder =
        OfflineSpeechDenoiserModelConfig.builder()
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .setDpdfnet(
                OfflineSpeechDenoiserDpdfNetModelConfig.builder().setModel(model).build());

    OfflineSpeechDenoiserModelConfig modelConfig = builder.build();
    OfflineSpeechDenoiserConfig config =
        OfflineSpeechDenoiserConfig.builder().setModel(modelConfig).build();

    OfflineSpeechDenoiser speech_denoiser = new OfflineSpeechDenoiser(config);

    String testWaveFilename = "./inp_16k.wav";
    WaveReader reader = new WaveReader(testWaveFilename);

    DenoisedAudio denoised = speech_denoiser.run(reader.getSamples(), reader.getSampleRate());
    String outFilename = "enhanced.wav";
    WaveWriter.write(outFilename, denoised.getSamples(), denoised.getSampleRate());
    System.out.printf("Saved to %s\n", outFilename);

    speech_denoiser.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingSpeechEnhancementGtcrn.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use speech enhancement models in sherpa-onnx
//
// Download GTCRN models and sample test waves from:
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

import com.k2fsa.sherpa.onnx.*;

public class NonStreamingSpeechEnhancementGtcrn {
  public static void main(String[] args) {
    String model = "./gtcrn_simple.onnx";
    OfflineSpeechDenoiserModelConfig.Builder builder =
        OfflineSpeechDenoiserModelConfig.builder()
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu");

    builder.setGtcrn(OfflineSpeechDenoiserGtcrnModelConfig.builder().setModel(model).build());

    OfflineSpeechDenoiserModelConfig modelConfig = builder.build();
    OfflineSpeechDenoiserConfig config =
        OfflineSpeechDenoiserConfig.builder().setModel(modelConfig).build();

    OfflineSpeechDenoiser speechDenoiser = new OfflineSpeechDenoiser(config);

    String testWaveFilename = "./inp_16k.wav";
    WaveReader reader = new WaveReader(testWaveFilename);

    DenoisedAudio denoised = speechDenoiser.run(reader.getSamples(), reader.getSampleRate());
    String outFilename = "enhanced.wav";
    WaveWriter.write(outFilename, denoised.getSamples(), denoised.getSampleRate());
    System.out.printf("Saved to %s\n", outFilename);

    speechDenoiser.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsCoquiDe.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a Coqui-ai VITS German TTS model
// to convert text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsCoquiDe {
  public static void main(String[] args) {
    // please visit
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
    // to download model files
    String model = "./vits-coqui-de-css10/model.onnx";
    String tokens = "./vits-coqui-de-css10/tokens.txt";
    String text = "Alles hat ein Ende, nur die Wurst hat zwei.";

    OfflineTtsVitsModelConfig vitsModelConfig =
        OfflineTtsVitsModelConfig.builder().setModel(model).setTokens(tokens).build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setVits(vitsModelConfig)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    int sid = 0;
    float speed = 1.0f;
    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(sid);
    genConfig.setSpeed(speed);
    genConfig.setSilenceScale(config.getSilenceScale());
    long start = System.currentTimeMillis();
    GeneratedAudio audio =
        tts.generateWithConfigAndCallback(text, genConfig, (float[] samples) -> 1);
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-coqui-de.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsKittenEn.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use a KittenTTS English model
// to convert text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsKittenEn {
  public static void main(String[] args) {
    LibraryUtils.enableDebug();
    // please visit
    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html
    // to download model files
    String model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx";
    String voices = "./kitten-nano-en-v0_1-fp16/voices.bin";
    String tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt";
    String dataDir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data";
    String text =
        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
            + " businessman, an official, or a scholar.";

    OfflineTtsKittenModelConfig kittenModelConfig =
        OfflineTtsKittenModelConfig.builder()
            .setModel(model)
            .setVoices(voices)
            .setTokens(tokens)
            .setDataDir(dataDir)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setKitten(kittenModelConfig)
            .setNumThreads(2)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    int sid = 7;
    float speed = 1.0f;
    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(sid);
    genConfig.setSpeed(speed);
    genConfig.setSilenceScale(0.2f);
    long start = System.currentTimeMillis();
    GeneratedAudio audio = tts.generateWithConfigAndCallback(text, genConfig, samples -> {});
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-kitten-en.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsKokoroEn.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use a Kokoro English model
// to convert text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsKokoroEn {
  public static void main(String[] args) {
    // please visit
    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
    // to download model files
    String model = "./kokoro-en-v0_19/model.onnx";
    String voices = "./kokoro-en-v0_19/voices.bin";
    String tokens = "./kokoro-en-v0_19/tokens.txt";
    String dataDir = "./kokoro-en-v0_19/espeak-ng-data";
    String text =
        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
            + " businessman, an official, or a scholar.";

    OfflineTtsKokoroModelConfig kokoroModelConfig =
        OfflineTtsKokoroModelConfig.builder()
            .setModel(model)
            .setVoices(voices)
            .setTokens(tokens)
            .setDataDir(dataDir)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setKokoro(kokoroModelConfig)
            .setNumThreads(2)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    int sid = 0;
    float speed = 1.0f;
    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(sid);
    genConfig.setSpeed(speed);
    genConfig.setSilenceScale(0.2f);
    long start = System.currentTimeMillis();
    GeneratedAudio audio = tts.generateWithConfigAndCallback(text, genConfig, samples -> {});
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-kokoro-en.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsKokoroZhEn.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use a Kokoro multi-lingual model
// to convert Chinese and English text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsKokoroZhEn {
  public static void main(String[] args) {
    // please visit
    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
    // to download model files
    String model = "./kokoro-multi-lang-v1_0/model.onnx";
    String voices = "./kokoro-multi-lang-v1_0/voices.bin";
    String tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
    String dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
    String lexicon =
        "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt";
    String text =
        "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki."
            + " 你觉得中英文说的如何呢？";

    OfflineTtsKokoroModelConfig kokoroModelConfig =
        OfflineTtsKokoroModelConfig.builder()
            .setModel(model)
            .setVoices(voices)
            .setTokens(tokens)
            .setDataDir(dataDir)
            .setLexicon(lexicon)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setKokoro(kokoroModelConfig)
            .setNumThreads(2)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    int sid = 0; // this model has 53 speakers. You can use sid in the range 0-52
    float speed = 1.0f;
    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(sid);
    genConfig.setSpeed(speed);
    genConfig.setSilenceScale(0.2f);
    long start = System.currentTimeMillis();
    GeneratedAudio audio = tts.generateWithConfigAndCallback(text, genConfig, samples -> {});
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-kokoro-zh-en.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsMatchaEn.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use a matcha English model
// to convert text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsMatchaEn {
  public static void main(String[] args) {
    // please visit
    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
    // to download model files
    String acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
    String vocoder = "./vocos-22khz-univ.onnx";
    String tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
    String dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data";
    String text =
        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
            + " businessman, an official, or a scholar.";

    OfflineTtsMatchaModelConfig matchaModelConfig =
        OfflineTtsMatchaModelConfig.builder()
            .setAcousticModel(acousticModel)
            .setVocoder(vocoder)
            .setTokens(tokens)
            .setDataDir(dataDir)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setMatcha(matchaModelConfig)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(0);
    genConfig.setSpeed(1.0f);
    genConfig.setSilenceScale(config.getSilenceScale());

    long start = System.currentTimeMillis();
    GeneratedAudio audio = tts.generateWithConfigAndCallback(text, genConfig, (float[] samples) -> 1);
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-matcha-en.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- speaker ID: %d\n", genConfig.getSid());
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsMatchaZh.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use a matcha Chinese TTS model
// to convert text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsMatchaZh {
  public static void main(String[] args) {
    // please visit
    // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
    // to download model files
    String acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx";
    String vocoder = "./vocos-22khz-univ.onnx";
    String tokens = "./matcha-icefall-zh-baker/tokens.txt";
    String lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
    String ruleFsts =
        "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst";
    String text =
        "某某银行的副行长和一些行政领导表示，他们去过长江"
            + "和长白山; 经济不断增长。"
            + "2024年12月31号，拨打110或者18920240511。"
            + "123456块钱。";

    OfflineTtsMatchaModelConfig matchaModelConfig =
        OfflineTtsMatchaModelConfig.builder()
            .setAcousticModel(acousticModel)
            .setVocoder(vocoder)
            .setTokens(tokens)
            .setLexicon(lexicon)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setMatcha(matchaModelConfig)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineTtsConfig config =
        OfflineTtsConfig.builder().setModel(modelConfig).setRuleFsts(ruleFsts).build();
    OfflineTts tts = new OfflineTts(config);

    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(0);
    genConfig.setSpeed(1.0f);
    genConfig.setSilenceScale(config.getSilenceScale());

    long start = System.currentTimeMillis();
    GeneratedAudio audio = tts.generateWithConfigAndCallback(text, genConfig, (float[] samples) -> 1);
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-matcha-zh.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- speaker ID: %d\n", genConfig.getSid());
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsPiperEn.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a piper VITS English TTS model
// to convert text to speech
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsPiperEn {
  public static void main(String[] args) {
    // please visit
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
    // to download model files
    String model = "./vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx";
    String tokens = "./vits-piper-en_GB-cori-medium/tokens.txt";
    String dataDir = "./vits-piper-en_GB-cori-medium/espeak-ng-data";
    String text =
        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
            + " businessman, an official, or a scholar.";

    OfflineTtsVitsModelConfig vitsModelConfig =
        OfflineTtsVitsModelConfig.builder()
            .setModel(model)
            .setTokens(tokens)
            .setDataDir(dataDir)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setVits(vitsModelConfig)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    int sid = 0;
    float speed = 1.0f;
    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(sid);
    genConfig.setSpeed(speed);
    genConfig.setSilenceScale(config.getSilenceScale());
    long start = System.currentTimeMillis();
    GeneratedAudio audio =
        tts.generateWithConfigAndCallback(text, genConfig, (float[] samples) -> 1);
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-piper-en.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsPiperEnWithCallback.java
================================================
// Copyright 2024 Xiaomi Corporation
//
// References
// https://www.baeldung.com/java-passing-method-parameter
// https://www.geeksforgeeks.org/how-to-create-a-thread-safe-queue-in-java/
// https://stackoverflow.com/questions/74077394/java-audio-how-to-continuously-write-bytes-to-an-audio-file-as-they-are-being-g

// This file shows how to use a piper VITS English TTS model
// to convert text to speech. You can pass a callback to the generation call,
// which is invoked whenever max_num_sentences sentences have been
// finished generation.
//
// The callback saves the generated samples into a queue, which are played
// by a separate thread.

import com.k2fsa.sherpa.onnx.*;
import java.util.Queue;
import java.util.concurrent.*;
import java.util.concurrent.ConcurrentLinkedQueue;
import javax.sound.sampled.*;

public class NonStreamingTtsPiperEn {
  public static void main(String[] args) {
    // please visit
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
    // to download model files
    String model = "./vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx";
    String tokens = "./vits-piper-en_GB-cori-medium/tokens.txt";
    String dataDir = "./vits-piper-en_GB-cori-medium/espeak-ng-data";
    String text =
        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
            + " businessman, an official, or a scholar.";

    OfflineTtsVitsModelConfig vitsModelConfig =
        OfflineTtsVitsModelConfig.builder()
            .setModel(model)
            .setTokens(tokens)
            .setDataDir(dataDir)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setVits(vitsModelConfig)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    Queue<byte[]> samplesQueue = new ConcurrentLinkedQueue<>();

    Semaphore canPlaySem = new Semaphore(1);
    try {
      canPlaySem.acquire();
    } catch (InterruptedException ex) {
      System.out.println("Failed to acquire the play semaphore in the main thread");
      return;
    }

    Runnable playRuannable =
        () -> {
          try {
            canPlaySem.acquire();
          } catch (InterruptedException e) {
            System.out.println("Failed to get canPlay semaphore in the play thread");
            return;
          }

          // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
          AudioFormat format =
              new AudioFormat(
                  tts.getSampleRate(), // sampleRate
                  16, // sampleSizeInBits
                  1, // channels
                  true, // signed
                  false // bigEndian
                  );
          DataLine.Info info = new DataLine.Info(SourceDataLine.class, format);
          SourceDataLine line;
          try {
            line = (SourceDataLine) AudioSystem.getLine(info);

            int bufferSizeInBytes = tts.getSampleRate(); // 0.5 seconds
            line.open(format, bufferSizeInBytes);
          } catch (LineUnavailableException ex) {
            System.out.println("Failed to open a device for playing");
            return;
          }
          line.start();

          while (true) {
            if (samplesQueue.isEmpty()) {
              // Do nothing.
              //
              // If the generating speed is very slow, we can sleep
              // for some time here to save some CPU.
            } else {
              byte[] samples = samplesQueue.poll();
              if (samples.length == 1) {
                // end of the generating
                break;
              }
              line.write(samples, 0, samples.length);
            }
          }

          line.drain();
          line.close();
        };

    Thread playThread = new Thread(playRuannable);
    playThread.start();

    int sid = 0;
    float speed = 1.0f;
    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(sid);
    genConfig.setSpeed(speed);
    genConfig.setSilenceScale(config.getSilenceScale());
    long start = System.currentTimeMillis();
    GeneratedAudio audio =
        tts.generateWithConfigAndCallback(
            text,
            genConfig,
            (float[] samples) -> {

              // we use a byte array to save int16 samples
              byte[] samplesInt16 = new byte[samples.length * 2];
              for (int i = 0; i < samples.length; ++i) {
                float s = samples[i];
                if (s > 1) {
                  s = 1;
                }

                if (s < -1) {
                  s = -1;
                }

                short t = (short) (s * 32767);

                // we use little endian
                samplesInt16[2 * i] = (byte) (t & 0xff);
                samplesInt16[2 * i + 1] = (byte) ((t & 0xff00) >> 8);
              }

              samplesQueue.add(samplesInt16);

              canPlaySem.release();

              // Note: You can play the samples.
              // warning: You need to save a copy of samples since it is freed
              // when this function returns

              // return 1 to continue generation
              // return 0 to stop generation
              return 1;
            });

    // Since a sample always has two bytes. We put a single byte
    // into the queue to indicate that we have finished processing.
    samplesQueue.add(new byte[1]);

    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    try {
      playThread.join();
    } catch (InterruptedException ex) {
      System.out.println("Failed to join the play thread");
      return;
    }

    String waveFilename = "tts-piper-en.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingTtsVitsZh.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a VITS Chinese TTS model
// to convert text to speech.
//
// You can use https://github.com/Plachtaa/VITS-fast-fine-tuning
// to train your model
import com.k2fsa.sherpa.onnx.*;

public class NonStreamingTtsPiperEn {
  public static void main(String[] args) {
    // please visit
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
    // to download model files
    String model = "./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx";
    String tokens = "./vits-zh-hf-fanchen-C/tokens.txt";
    String lexicon = "./vits-zh-hf-fanchen-C/lexicon.txt";
    String ruleFsts =
        "./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst";
    String text = "有问题，请拨打110或者手机18601239876。我们的价值观是真诚热爱！";

    OfflineTtsVitsModelConfig vitsModelConfig =
        OfflineTtsVitsModelConfig.builder()
            .setModel(model)
            .setTokens(tokens)
            .setLexicon(lexicon)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setVits(vitsModelConfig)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineTtsConfig config =
        OfflineTtsConfig.builder().setModel(modelConfig).setRuleFsts(ruleFsts).build();

    OfflineTts tts = new OfflineTts(config);

    int sid = 100;
    float speed = 1.0f;
    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(sid);
    genConfig.setSpeed(speed);
    genConfig.setSilenceScale(config.getSilenceScale());
    long start = System.currentTimeMillis();
    GeneratedAudio audio =
        tts.generateWithConfigAndCallback(text, genConfig, (float[] samples) -> 1);
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-vits-zh.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/NonStreamingWebsocketClient.java
================================================
// Refer to
// https://stackoverflow.com/questions/55380813/require-assistance-with-simple-pure-java-11-websocket-client-example
//
//
// This is a WebSocketClient client for ../python-api-examples/non_streaming_server.py
//
// Please see ./run-non-streaming-websocket-client.sh
import com.k2fsa.sherpa.onnx.*;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.WebSocket;
import java.nio.*;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.CountDownLatch;

public class NonStreamingWebsocketClient {
  public static void main(String[] args) throws Exception {
    CountDownLatch latch = new CountDownLatch(1);

    WebSocket ws =
        HttpClient.newHttpClient()
            .newWebSocketBuilder()
            .buildAsync(URI.create("ws://localhost:6006"), new WebSocketClient(latch))
            .join();

    // Please use a 16-bit, single channel wav for testing.
    // the sample rate does not need to be 16kHz
    String waveFilename = "./zh.wav";
    WaveReader reader = new WaveReader(waveFilename);
    int sampleRate = reader.getSampleRate();
    int numSamples = reader.getSamples().length;

    // Here is the format of the message
    // byte 0-3 in little endian: sampleRate
    // byte 4-7 in little endian: number of bytes for samples
    // remaining bytes: samples. Each sample is a float32
    ByteBuffer buffer = ByteBuffer.allocate(8 + 4 * numSamples).order(ByteOrder.LITTLE_ENDIAN);
    buffer.putInt(sampleRate);
    buffer.putInt(numSamples * 4); // each sample has 4 bytes

    for (float s : reader.getSamples()) {
      buffer.putFloat(s);
    }

    buffer.rewind();
    buffer.flip();
    buffer.order(ByteOrder.LITTLE_ENDIAN);

    ws.sendBinary(ByteBuffer.wrap(buffer.array()), true).join();

    // Send Done to the server to indicate that we don't have new wave files to decode
    ws.sendText("Done", true).join();

    latch.await();
  }

  private static class WebSocketClient implements WebSocket.Listener {
    private final CountDownLatch latch;

    public WebSocketClient(CountDownLatch latch) {
      this.latch = latch;
    }

    @Override
    public CompletionStage<?> onText(WebSocket webSocket, CharSequence data, boolean last) {
      System.out.println("Result is " + data);
      latch.countDown();
      return WebSocket.Listener.super.onText(webSocket, data, last);
    }
  }
}


================================================
FILE: java-api-examples/OfflineAddPunctuation.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a punctuation model to add punctuations to text.
//
// The model supports both English and Chinese.
import com.k2fsa.sherpa.onnx.*;

public class OfflineAddPunctuation {
  public static void main(String[] args) {
    // please download the model from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
    String model = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
    OfflinePunctuationModelConfig modelConfig =
        OfflinePunctuationModelConfig.builder()
            .setCtTransformer(model)
            .setNumThreads(1)
            .setDebug(true)
            .build();
    OfflinePunctuationConfig config =
        OfflinePunctuationConfig.builder().setModel(modelConfig).build();

    OfflinePunctuation punct = new OfflinePunctuation(config);

    String[] sentences =
        new String[] {
          "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
          "我们都是木头人不会说话不会动",
          "The African blogosphere is rapidly expanding bringing more voices online in the form of"
              + " commentaries opinions analyses rants and poetry",
        };

    System.out.println("---");
    for (String text : sentences) {
      String out = punct.addPunctuation(text);
      System.out.printf("Input: %s\n", text);
      System.out.printf("Output: %s\n", out);
      System.out.println("---");
    }
  }
}


================================================
FILE: java-api-examples/OfflineSpeakerDiarizationDemo.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use sherpa-onnx Java API for speaker diarization,
import com.k2fsa.sherpa.onnx.*;

public class OfflineSpeakerDiarizationDemo {
  public static void main(String[] args) {
    /* Please use the following commands to download files used in this file
    Step 1: Download a speaker segmentation model

    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
    for a list of available models. The following is an example

      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
      tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
      rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

    Step 2: Download a speaker embedding extractor model

    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
    for a list of available models. The following is an example

      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

    Step 3. Download test wave files

    Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
    for a list of available test wave files. The following is an example

      wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

    Step 4. Run it
        */

    String segmentationModel = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
    String embeddingModel = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
    String waveFilename = "./0-four-speakers-zh.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OfflineSpeakerSegmentationPyannoteModelConfig pyannote =
        OfflineSpeakerSegmentationPyannoteModelConfig.builder().setModel(segmentationModel).build();

    OfflineSpeakerSegmentationModelConfig segmentation =
        OfflineSpeakerSegmentationModelConfig.builder()
            .setPyannote(pyannote)
            .setDebug(true)
            .build();

    SpeakerEmbeddingExtractorConfig embedding =
        SpeakerEmbeddingExtractorConfig.builder().setModel(embeddingModel).setDebug(true).build();

    // The test wave file ./0-four-speakers-zh.wav contains four speakers, so
    // we use numClusters=4 here. If you don't know the number of speakers
    // in the test wave file, please set the numClusters to -1 and provide
    // threshold for clustering
    FastClusteringConfig clustering =
        FastClusteringConfig.builder()
            .setNumClusters(4) // set it to -1 if you don't know the actual number
            .setThreshold(0.5f)
            .build();

    OfflineSpeakerDiarizationConfig config =
        OfflineSpeakerDiarizationConfig.builder()
            .setSegmentation(segmentation)
            .setEmbedding(embedding)
            .setClustering(clustering)
            .setMinDurationOn(0.2f)
            .setMinDurationOff(0.5f)
            .build();

    OfflineSpeakerDiarization sd = new OfflineSpeakerDiarization(config);
    if (sd.getSampleRate() != reader.getSampleRate()) {
      System.out.printf(
          "Expected sample rate: %d, given: %d\n", sd.getSampleRate(), reader.getSampleRate());
      sd.release();
      return;
    }

    // OfflineSpeakerDiarizationSegment[] segments = sd.process(reader.getSamples());
    // without callback is also ok

    // or you can use a callback to show the progress
    OfflineSpeakerDiarizationSegment[] segments =
        sd.processWithCallback(
            reader.getSamples(),
            (int numProcessedChunks, int numTotalChunks, long arg) -> {
              float progress = 100.0f * numProcessedChunks / numTotalChunks;
              System.out.printf("Progress: %.2f%%\n", progress);

              return 0;
            });

    for (OfflineSpeakerDiarizationSegment s : segments) {
      System.out.printf("%.3f -- %.3f speaker_%02d\n", s.getStart(), s.getEnd(), s.getSpeaker());
    }

    sd.release();
  }
}


================================================
FILE: java-api-examples/OnlineAddPunctuation.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use a punctuation model to add punctuations to text.
//
// The model supports ONLY English.
import com.k2fsa.sherpa.onnx.*;

public class OnlineAddPunctuation {
  public static void main(String[] args) {
    // please download the model from
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
    String model = "./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx";
    String bpeVocab = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab";
    OnlinePunctuationModelConfig modelConfig =
        OnlinePunctuationModelConfig.builder()
            .setCnnBilstm(model)
            .setBpeVocab(bpeVocab)
            .setNumThreads(1)
            .setDebug(true)
            .build();
    OnlinePunctuationConfig config =
        OnlinePunctuationConfig.builder().setModel(modelConfig).build();

    OnlinePunctuation punct = new OnlinePunctuation(config);

    String[] sentences =
        new String[] {
          "how are you doing fantastic thank you how about you",
          "The African blogosphere is rapidly expanding bringing more voices online in the form of"
              + " commentaries opinions analyses rants and poetry",
        };

    System.out.println("---");
    for (String text : sentences) {
      String out = punct.addPunctuation(text);
      System.out.printf("Input: %s\n", text);
      System.out.printf("Output: %s\n", out);
      System.out.println("---");
    }
  }
}


================================================
FILE: java-api-examples/PocketTts.java
================================================
// Copyright 2026 Xiaomi Corporation

// This file shows how to use a PocketTTS English model
// for voice cloning.
import com.k2fsa.sherpa.onnx.*;
import java.util.HashMap;
import java.util.Map;

public class PocketTts {
  public static void main(String[] args) {
    LibraryUtils.enableDebug();
    // please visit
    // https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
    // to download model files
    String lmFlow = "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx";
    String lmMain = "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx";
    String encoder = "./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx";
    String decoder = "./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx";
    String textConditioner = "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx";
    String vocabJson = "./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json";
    String tokenScoresJson = "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json";
    String text =
        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
            + " businessman, an official, or a scholar.";

    OfflineTtsPocketModelConfig pocketModelConfig =
        OfflineTtsPocketModelConfig.builder()
            .setLmMain(lmMain)
            .setLmFlow(lmFlow)
            .setEncoder(encoder)
            .setDecoder(decoder)
            .setTextConditioner(textConditioner)
            .setVocabJson(vocabJson)
            .setTokenScoresJson(tokenScoresJson)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setPocket(pocketModelConfig)
            .setNumThreads(2)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    String referenceAudioFilename = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav";
    WaveReader reader = new WaveReader(referenceAudioFilename);

    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setReferenceAudio(reader.getSamples());
    genConfig.setReferenceSampleRate(reader.getSampleRate());
    genConfig.setNumSteps(5);

    Map<String, String> extra = new HashMap<>();
    extra.put("temperature", "0.7");
    extra.put("chunk_size", "15");

    genConfig.setExtra(extra);

    long start = System.currentTimeMillis();
    GeneratedAudio audio = null;

    // You can choose one of the following callback style
    // ---------------------------------------------------
    // 1. Anonymous class implementing OfflineTtsCallback
    // ---------------------------------------------------
    if (true) {
      audio =
          tts.generateWithConfigAndCallback(
              text,
              genConfig,
              new OfflineTtsCallback() {
                @Override
                public Integer invoke(float[] samples) {
                  // you can play the generated samples in a separate thread
                  System.out.println("callback got called with " + samples.length + " samples");
                  // 1 = continue, 0 = stop
                  return 1;
                }
              });
    }

    // -------------------------------
    // 2. Lambda implementing OfflineTtsCallback
    // -------------------------------
    if (false) {
      audio =
          tts.generateWithConfigAndCallback(
              text,
              genConfig,
              samples -> {
                System.out.println("Lambda Integer callback: " + samples.length);
                return 1; // continue
              });
    }

    if (false) {
      audio =
          tts.generateWithConfigAndCallback(
              text,
              genConfig,
              samples -> {
                System.out.println("Consumer: " + samples.length);
                // implicitly, it returns 1 internally
              });
    }

    if (audio == null) {
      System.err.println("No audio was generated. Please enable at least one callback branch.");
      return;
    }

    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "pocket-tts-bria.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/README.md
================================================
# Introduction

This directory contains examples for the JAVA API of sherpa-onnx.

# Usage

## Non-streaming speech enhancement

```bash
./run-non-streaming-speech-enhancement-gtcrn.sh
./run-non-streaming-speech-enhancement-dpdfnet.sh
```

Use 16 kHz DPDFNet models such as
`dpdfnet_baseline.onnx`, `dpdfnet2.onnx`, `dpdfnet4.onnx`, or `dpdfnet8.onnx` for
downstream ASR and `dpdfnet2_48khz_hr.onnx` for 48 kHz enhancement output.

## Non-streaming speaker diarization

```bash
./run-offline-speaker-diarization.sh
```

## Streaming Speech recognition

```bash
./run-streaming-asr-from-mic-transducer.sh
./run-streaming-decode-file-ctc-hlg.sh
./run-streaming-decode-file-ctc.sh
./run-streaming-decode-file-paraformer.sh
./run-streaming-decode-file-tone-ctc.sh
./run-streaming-decode-file-transducer.sh
```

## Non-Streaming Speech recognition

```bash
./run-non-streaming-decode-file-dolphin-ctc.sh
./run-non-streaming-decode-file-fire-red-asr-ctc.sh
./run-non-streaming-decode-file-fire-red-asr.sh
./run-non-streaming-decode-file-funasr-nano.sh
./run-non-streaming-decode-file-medasr-ctc.sh
./run-non-streaming-decode-file-moonshine.sh
./run-non-streaming-decode-file-moonshine-v2.sh
./run-non-streaming-decode-file-nemo-canary.sh
./run-non-streaming-decode-file-nemo.sh
./run-non-streaming-decode-file-omnilingual-asr-ctc.sh
./run-non-streaming-decode-file-paraformer.sh
./run-non-streaming-decode-file-sense-voice-with-hr.sh
./run-non-streaming-decode-file-sense-voice.sh
./run-non-streaming-decode-file-tele-speech-ctc.sh
./run-non-streaming-decode-file-transducer-hotwords.sh
./run-non-streaming-decode-file-transducer.sh
./run-non-streaming-decode-file-wenet-ctc.sh
./run-non-streaming-decode-file-whisper-multiple.sh
./run-non-streaming-decode-file-whisper.sh
./run-non-streaming-decode-file-zipformer-ctc.sh
```

## Non-Streaming Speech recognition with homophone replacer

```bash
./run-non-streaming-decode-file-sense-voice-with-hr.sh
```

## Non-Streaming text-to-speech

```bash
./run-non-streaming-tts-coqui-de.sh
./run-non-streaming-tts-kitten-en.sh
./run-non-streaming-tts-kokoro-en.sh
./run-non-streaming-tts-kokoro-zh-en.sh
./run-non-streaming-tts-matcha-en.sh
./run-non-streaming-tts-matcha-zh.sh
./run-non-streaming-tts-piper-en-with-callback.sh
./run-non-streaming-tts-piper-en.sh
./run-non-streaming-tts-vits-zh.sh
./run-pocket-tts.sh
./run-zipvoice-tts.sh
```

## Non-Streaming text-to-speech (Playback the audio as it is being generated)

```bash
./run-non-streaming-tts-piper-en-with-callback.sh
```

## Spoken language identification

```bash
./run-spoken-language-identification-whisper.sh
```

## Add punctuations to text

The punctuation model supports both English and Chinese.

```bash
./run-offline-add-punctuation-zh-en.sh
./run-online-add-punctuation-zh-en.sh
```

## Audio tagging

```bash
./run-audio-tagging-zipformer-from-file.sh
./run-audio-tagging-ced-from-file.sh
```

## Speaker identification

```bash
./run-speaker-identification.sh
```

## VAD with a microphone

```bash
./run-vad-from-mic.sh
```

## VAD with a microphone + Non-streaming SenseVoice for speech recognition

```bash
./run-vad-from-mic-non-streaming-sense-voice.sh
```

## VAD with a microphone + Non-streaming Paraformer for speech recognition

```bash
./run-vad-from-mic-non-streaming-paraformer.sh
```

## VAD with a microphone + Non-streaming Whisper tiny.en for speech recognition

```bash
./run-vad-from-mic-non-streaming-whisper.sh
```

## VAD (Remove silence)

```bash
./run-vad-remove-slience.sh
./run-ten-vad-remove-slience.sh
```

## VAD + Non-streaming Dolphin CTC for speech recognition

```bash
./run-vad-non-streaming-dolphin-ctc.sh
```

## VAD + Non-streaming SenseVoice for speech recognition

```bash
./run-vad-non-streaming-sense-voice.sh
```

## VAD + Non-streaming Paraformer for speech recognition

```bash
./run-vad-non-streaming-paraformer.sh
```

## Keyword spotter

```bash
./run-kws-from-file.sh
```


================================================
FILE: java-api-examples/SpeakerIdentification.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a speaker embedding extractor model for speaker
// identification.
import com.k2fsa.sherpa.onnx.*;

public class SpeakerIdentification {
  public static float[] computeEmbedding(SpeakerEmbeddingExtractor extractor, String filename) {
    WaveReader reader = new WaveReader(filename);

    OnlineStream stream = extractor.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
    stream.inputFinished();

    float[] embedding = extractor.compute(stream);
    stream.release();

    return embedding;
  }

  public static void main(String[] args) {
    // Please download the model from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
    String model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
    SpeakerEmbeddingExtractorConfig config =
        SpeakerEmbeddingExtractorConfig.builder()
            .setModel(model)
            .setNumThreads(1)
            .setDebug(true)
            .build();
    SpeakerEmbeddingExtractor extractor = new SpeakerEmbeddingExtractor(config);
    SpeakerEmbeddingManager manager = new SpeakerEmbeddingManager(extractor.getDim());

    try {
      String[] spk1Files =
          new String[] {
            "./sr-data/enroll/fangjun-sr-1.wav",
            "./sr-data/enroll/fangjun-sr-2.wav",
            "./sr-data/enroll/fangjun-sr-3.wav",
          };

      float[][] spk1Vec = new float[spk1Files.length][];

      for (int i = 0; i < spk1Files.length; ++i) {
        spk1Vec[i] = computeEmbedding(extractor, spk1Files[i]);
      }

      String[] spk2Files =
          new String[] {
            "./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
          };

      float[][] spk2Vec = new float[spk2Files.length][];

      for (int i = 0; i < spk2Files.length; ++i) {
        spk2Vec[i] = computeEmbedding(extractor, spk2Files[i]);
      }

      if (!manager.add("fangjun", spk1Vec)) {
        System.out.println("Failed to register fangjun");
        return;
      }

      if (!manager.add("leijun", spk2Vec)) {
        System.out.println("Failed to register leijun");
        return;
      }

      if (manager.getNumSpeakers() != 2) {
        System.out.println("There should be two speakers");
        return;
      }

      if (!manager.contains("fangjun")) {
        System.out.println("It should contain the speaker fangjun");
        return;
      }

      if (!manager.contains("leijun")) {
        System.out.println("It should contain the speaker leijun");
        return;
      }

      System.out.println("---All speakers---");
      String[] allSpeakers = manager.getAllSpeakerNames();
      for (String s : allSpeakers) {
        System.out.println(s);
      }
      System.out.println("------------");

      String[] testFiles =
          new String[] {
            "./sr-data/test/fangjun-test-sr-1.wav",
            "./sr-data/test/leijun-test-sr-1.wav",
            "./sr-data/test/liudehua-test-sr-1.wav"
          };

      float threshold = 0.6f;
      for (String file : testFiles) {
        float[] embedding = computeEmbedding(extractor, file);

        String name = manager.search(embedding, threshold);
        if (name.isEmpty()) {
          name = "<Unknown>";
        }
        System.out.printf("%s: %s\n", file, name);
      }

      // test verify
      if (!manager.verify("fangjun", computeEmbedding(extractor, testFiles[0]), threshold)) {
        System.out.printf("%s should match fangjun!\n", testFiles[0]);
        return;
      }

      if (!manager.remove("fangjun")) {
        System.out.println("Failed to remove fangjun");
        return;
      }

      if (manager.verify("fangjun", computeEmbedding(extractor, testFiles[0]), threshold)) {
        System.out.printf("%s should match no one!\n", testFiles[0]);
        return;
      }

      if (manager.getNumSpeakers() != 1) {
        System.out.println("There should only 1 speaker left.");
        return;
      }
    } finally {
      extractor.release();
      manager.release();
    }
  }
}


================================================
FILE: java-api-examples/SpokenLanguageIdentificationWhisper.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a multilingual whisper model for
// spoken language identification.
//
// Note that it needs a multilingual whisper model. For instance,
// tiny works, but tiny.en doesn't.
import com.k2fsa.sherpa.onnx.*;

public class SpokenLanguageIdentificationWhisper {
  public static void main(String[] args) {
    // please download model and test files from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
    String decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";

    String[] testFiles =
        new String[] {
          "./spoken-language-identification-test-wavs/en-english.wav",
          "./spoken-language-identification-test-wavs/de-german.wav",
          "./spoken-language-identification-test-wavs/zh-chinese.wav",
          "./spoken-language-identification-test-wavs/es-spanish.wav",
          "./spoken-language-identification-test-wavs/fa-persian.wav",
          "./spoken-language-identification-test-wavs/ko-korean.wav",
          "./spoken-language-identification-test-wavs/ja-japanese.wav",
          "./spoken-language-identification-test-wavs/ru-russian.wav",
          "./spoken-language-identification-test-wavs/uk-ukrainian.wav",
        };

    SpokenLanguageIdentificationWhisperConfig whisper =
        SpokenLanguageIdentificationWhisperConfig.builder()
            .setEncoder(encoder)
            .setDecoder(decoder)
            .build();

    SpokenLanguageIdentificationConfig config =
        SpokenLanguageIdentificationConfig.builder()
            .setWhisper(whisper)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    SpokenLanguageIdentification slid = new SpokenLanguageIdentification(config);
    for (String filename : testFiles) {
      WaveReader reader = new WaveReader(filename);

      OfflineStream stream = slid.createStream();
      stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

      String lang = slid.compute(stream);
      System.out.println("---");
      System.out.printf("filename: %s\n", filename);
      System.out.printf("lang: %s\n", lang);

      stream.release();
    }
    System.out.println("---");

    slid.release();
  }
}


================================================
FILE: java-api-examples/StreamingAsrFromMicTransducer.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an online transducer, i.e., streaming transducer,
// for real-time speech recognition with a microphone.
import com.k2fsa.sherpa.onnx.*;
import javax.sound.sampled.*;

public class StreamingAsrFromMicTransducer {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
    // to download model files
    String encoder =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx";
    String decoder =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx";
    String joiner =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx";
    String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";

    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
    String ruleFsts = "./itn_zh_number.fst";

    int sampleRate = 16000;

    OnlineTransducerModelConfig transducer =
        OnlineTransducerModelConfig.builder()
            .setEncoder(encoder)
            .setDecoder(decoder)
            .setJoiner(joiner)
            .build();

    OnlineModelConfig modelConfig =
        OnlineModelConfig.builder()
            .setTransducer(transducer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OnlineRecognizerConfig config =
        OnlineRecognizerConfig.builder()
            .setOnlineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .setRuleFsts(ruleFsts)
            .build();

    OnlineRecognizer recognizer = new OnlineRecognizer(config);
    OnlineStream stream = recognizer.createStream();

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
    TargetDataLine targetDataLine;
    try {
      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
      targetDataLine.open(format);
      targetDataLine.start();
    } catch (LineUnavailableException e) {
      System.out.println("Failed to open target data line: " + e.getMessage());
      recognizer.release();
      stream.release();
      return;
    }

    String lastText = "";
    int segmentIndex = 0;

    // You can choose an arbitrary number
    int bufferSize = 1600; // 0.1 seconds for 16000Hz
    byte[] buffer = new byte[bufferSize * 2]; // a short has 2 bytes
    float[] samples = new float[bufferSize];

    System.out.println("Started! Please speak");
    while (targetDataLine.isOpen()) {
      int n = targetDataLine.read(buffer, 0, buffer.length);
      if (n <= 0) {
        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
        continue;
      }
      for (int i = 0; i != bufferSize; ++i) {
        short low = buffer[2 * i];
        short high = buffer[2 * i + 1];
        int s = (high << 8) + low;
        samples[i] = (float) s / 32768;
      }
      stream.acceptWaveform(samples, sampleRate);

      while (recognizer.isReady(stream)) {
        recognizer.decode(stream);
      }

      String text = recognizer.getResult(stream).getText();
      boolean isEndpoint = recognizer.isEndpoint(stream);
      if (!text.isEmpty() && text != " " && lastText != text) {
        lastText = text;
        System.out.printf("%d: %s\r", segmentIndex, text);
      }

      if (isEndpoint) {
        if (!text.isEmpty()) {
          System.out.println();
          segmentIndex += 1;
        }

        recognizer.reset(stream);
      }
    } // while (targetDataLine.isOpen())

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/StreamingDecodeFileCtc.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an online CTC model, i.e., streaming CTC model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class StreamingDecodeFileCtc {
  public static void main(String[] args) {
    // please refer to
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
    // to download model files
    String model =
        "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
    String tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
    String waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OnlineZipformer2CtcModelConfig ctc =
        OnlineZipformer2CtcModelConfig.builder().setModel(model).build();

    OnlineModelConfig modelConfig =
        OnlineModelConfig.builder()
            .setZipformer2Ctc(ctc)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OnlineRecognizerConfig config =
        OnlineRecognizerConfig.builder()
            .setOnlineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OnlineRecognizer recognizer = new OnlineRecognizer(config);
    OnlineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    float[] tailPaddings = new float[(int) (0.3 * reader.getSampleRate())];
    stream.acceptWaveform(tailPaddings, reader.getSampleRate());

    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/StreamingDecodeFileCtcHLG.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an online CTC model, i.e., streaming CTC model,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class StreamingDecodeFileCtcHLG {
  public static void main(String[] args) {
    // please refer to
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
    // to download model files
    String model =
        "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
    String tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
    String hlg = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
    String waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OnlineZipformer2CtcModelConfig ctc =
        OnlineZipformer2CtcModelConfig.builder().setModel(model).build();

    OnlineModelConfig modelConfig =
        OnlineModelConfig.builder()
            .setZipformer2Ctc(ctc)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OnlineCtcFstDecoderConfig ctcFstDecoderConfig =
        OnlineCtcFstDecoderConfig.builder().setGraph(hlg).build();

    OnlineRecognizerConfig config =
        OnlineRecognizerConfig.builder()
            .setOnlineModelConfig(modelConfig)
            .setCtcFstDecoderConfig(ctcFstDecoderConfig)
            .build();

    OnlineRecognizer recognizer = new OnlineRecognizer(config);
    OnlineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    float[] tailPaddings = new float[(int) (0.3 * reader.getSampleRate())];
    stream.acceptWaveform(tailPaddings, reader.getSampleRate());

    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/StreamingDecodeFileParaformer.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an online paraformer, i.e., streaming paraformer,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class StreamingDecodeFileParaformer {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english
    // to download model files
    String encoder = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx";
    String decoder = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx";
    String tokens = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt";
    String waveFilename = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OnlineParaformerModelConfig paraformer =
        OnlineParaformerModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build();

    OnlineModelConfig modelConfig =
        OnlineModelConfig.builder()
            .setParaformer(paraformer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OnlineRecognizerConfig config =
        OnlineRecognizerConfig.builder()
            .setOnlineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OnlineRecognizer recognizer = new OnlineRecognizer(config);
    OnlineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())];
    stream.acceptWaveform(tailPaddings, reader.getSampleRate());

    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/StreamingDecodeFileToneCtc.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an online T-one CTC model, i.e.,
// streaming T-one CTC model, to decode files.
import com.k2fsa.sherpa.onnx.*;

public class StreamingDecodeFileToneCtc {
  public static void main(String[] args) {
    String model = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx";
    String tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt";
    String waveFilename = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OnlineToneCtcModelConfig ctc = OnlineToneCtcModelConfig.builder().setModel(model).build();

    OnlineModelConfig modelConfig =
        OnlineModelConfig.builder()
            .setToneCtc(ctc)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OnlineRecognizerConfig config =
        OnlineRecognizerConfig.builder()
            .setOnlineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OnlineRecognizer recognizer = new OnlineRecognizer(config);
    OnlineStream stream = recognizer.createStream();

    float[] leftPaddings = new float[(int) (0.3 * reader.getSampleRate())];
    stream.acceptWaveform(leftPaddings, reader.getSampleRate());

    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    float[] tailPaddings = new float[(int) (0.6 * reader.getSampleRate())];
    stream.acceptWaveform(tailPaddings, reader.getSampleRate());

    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/StreamingDecodeFileTransducer.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

// This file shows how to use an online transducer, i.e., streaming transducer,
// to decode files.
import com.k2fsa.sherpa.onnx.*;

public class StreamingDecodeFileTransducer {
  public static void main(String[] args) {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
    // to download model files
    String encoder =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx";
    String decoder =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx";
    String joiner =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx";
    String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";

    String waveFilename =
        "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav";

    WaveReader reader = new WaveReader(waveFilename);

    OnlineTransducerModelConfig transducer =
        OnlineTransducerModelConfig.builder()
            .setEncoder(encoder)
            .setDecoder(decoder)
            .setJoiner(joiner)
            .build();

    OnlineModelConfig modelConfig =
        OnlineModelConfig.builder()
            .setTransducer(transducer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OnlineRecognizerConfig config =
        OnlineRecognizerConfig.builder()
            .setOnlineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    OnlineRecognizer recognizer = new OnlineRecognizer(config);
    OnlineStream stream = recognizer.createStream();
    stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());

    float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())];
    stream.acceptWaveform(tailPaddings, reader.getSampleRate());

    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }

    String text = recognizer.getResult(stream).getText();

    System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);

    stream.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/StreamingSpeechEnhancementDpdfNet.java
================================================
// Copyright 2026 Xiaomi Corporation
//
// This file shows how to use streaming DPDFNet speech enhancement models in
// sherpa-onnx.
//
// Download DPDFNet models from either:
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// https://huggingface.co/Ceva-IP/DPDFNet

import com.k2fsa.sherpa.onnx.*;

public class StreamingSpeechEnhancementDpdfNet {
  private static void appendSamples(java.util.ArrayList<Float> dst, float[] src) {
    for (float v : src) {
      dst.add(v);
    }
  }

  private static float[] toFloatArray(java.util.ArrayList<Float> src) {
    float[] ans = new float[src.size()];
    for (int i = 0; i != src.size(); ++i) {
      ans[i] = src.get(i);
    }
    return ans;
  }

  public static void main(String[] args) {
    String model = "./dpdfnet_baseline.onnx";

    OfflineSpeechDenoiserModelConfig modelConfig =
        OfflineSpeechDenoiserModelConfig.builder()
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .setDpdfnet(
                OfflineSpeechDenoiserDpdfNetModelConfig.builder().setModel(model).build())
            .build();

    OnlineSpeechDenoiserConfig config =
        OnlineSpeechDenoiserConfig.builder().setModel(modelConfig).build();

    OnlineSpeechDenoiser speechDenoiser = new OnlineSpeechDenoiser(config);

    WaveReader reader = new WaveReader("./inp_16k.wav");
    int frameShift = speechDenoiser.getFrameShiftInSamples();
    java.util.ArrayList<Float> output = new java.util.ArrayList<>();

    float[] samples = reader.getSamples();
    for (int start = 0; start < samples.length; start += frameShift) {
      int end = Math.min(start + frameShift, samples.length);
      float[] chunk = java.util.Arrays.copyOfRange(samples, start, end);
      DenoisedAudio denoised = speechDenoiser.run(chunk, reader.getSampleRate());
      appendSamples(output, denoised.getSamples());
    }

    DenoisedAudio denoised = speechDenoiser.flush();
    appendSamples(output, denoised.getSamples());
    String outFilename = "enhanced-online-dpdfnet.wav";
    WaveWriter.write(outFilename, toFloatArray(output), speechDenoiser.getSampleRate());
    System.out.printf("Saved to %s\n", outFilename);

    speechDenoiser.release();
  }
}


================================================
FILE: java-api-examples/StreamingSpeechEnhancementGtcrn.java
================================================
// Copyright 2026 Xiaomi Corporation
//
// This file shows how to use streaming GTCRN speech enhancement models in
// sherpa-onnx.
//
// Download GTCRN models and sample test waves from:
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

import com.k2fsa.sherpa.onnx.*;

public class StreamingSpeechEnhancementGtcrn {
  private static void appendSamples(java.util.ArrayList<Float> dst, float[] src) {
    for (float v : src) {
      dst.add(v);
    }
  }

  private static float[] toFloatArray(java.util.ArrayList<Float> src) {
    float[] ans = new float[src.size()];
    for (int i = 0; i != src.size(); ++i) {
      ans[i] = src.get(i);
    }
    return ans;
  }

  public static void main(String[] args) {
    String model = "./gtcrn_simple.onnx";

    OfflineSpeechDenoiserModelConfig modelConfig =
        OfflineSpeechDenoiserModelConfig.builder()
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .setGtcrn(
                OfflineSpeechDenoiserGtcrnModelConfig.builder().setModel(model).build())
            .build();

    OnlineSpeechDenoiserConfig config =
        OnlineSpeechDenoiserConfig.builder().setModel(modelConfig).build();

    OnlineSpeechDenoiser speechDenoiser = new OnlineSpeechDenoiser(config);

    WaveReader reader = new WaveReader("./inp_16k.wav");
    int frameShift = speechDenoiser.getFrameShiftInSamples();
    java.util.ArrayList<Float> output = new java.util.ArrayList<>();

    float[] samples = reader.getSamples();
    for (int start = 0; start < samples.length; start += frameShift) {
      int end = Math.min(start + frameShift, samples.length);
      float[] chunk = java.util.Arrays.copyOfRange(samples, start, end);
      DenoisedAudio denoised = speechDenoiser.run(chunk, reader.getSampleRate());
      appendSamples(output, denoised.getSamples());
    }

    DenoisedAudio denoised = speechDenoiser.flush();
    appendSamples(output, denoised.getSamples());
    String outFilename = "enhanced-online-gtcrn.wav";
    WaveWriter.write(outFilename, toFloatArray(output), speechDenoiser.getSampleRate());
    System.out.printf("Saved to %s\n", outFilename);

    speechDenoiser.release();
  }
}


================================================
FILE: java-api-examples/SupertonicTts.java
================================================
// Copyright 2026 Xiaomi Corporation

// This file shows how to use a Supertonic TTS English model.
import com.k2fsa.sherpa.onnx.*;
import java.util.HashMap;
import java.util.Map;

public class SupertonicTts {
  public static void main(String[] args) {
    LibraryUtils.enableDebug();
    // please visit
    // https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html
    // to download model files
    String modelDir = "./sherpa-onnx-supertonic-tts-int8-2026-03-06";
    String durationPredictor = modelDir + "/duration_predictor.int8.onnx";
    String textEncoder = modelDir + "/text_encoder.int8.onnx";
    String vectorEstimator = modelDir + "/vector_estimator.int8.onnx";
    String vocoder = modelDir + "/vocoder.int8.onnx";
    String ttsJson = modelDir + "/tts.json";
    String unicodeIndexer = modelDir + "/unicode_indexer.bin";
    String voiceStyle = modelDir + "/voice.bin";

    String text =
        "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
            + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
            + " businessman, an official, or a scholar.";

    OfflineTtsSupertonicModelConfig supertonicModelConfig =
        OfflineTtsSupertonicModelConfig.builder()
            .setDurationPredictor(durationPredictor)
            .setTextEncoder(textEncoder)
            .setVectorEstimator(vectorEstimator)
            .setVocoder(vocoder)
            .setTtsJson(ttsJson)
            .setUnicodeIndexer(unicodeIndexer)
            .setVoiceStyle(voiceStyle)
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setSupertonic(supertonicModelConfig)
            .setNumThreads(2)
            .setDebug(true)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setSid(6);
    genConfig.setSpeed(1.25f);
    genConfig.setNumSteps(5);

    Map<String, String> extra = new HashMap<>();
    extra.put("lang", "en");

    genConfig.setExtra(extra);

    long start = System.currentTimeMillis();
    GeneratedAudio audio =
        tts.generateWithConfigAndCallback(
            text,
            genConfig,
            new OfflineTtsCallback() {
              @Override
              public Integer invoke(float[] samples) {
                System.out.println("callback got called with " + samples.length + " samples");
                return 1;
              }
            });

    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;

    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "supertonic-tts-en.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/TenVadRemoveSilence.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use a ten-vad model to remove silences from
// a wave file.

import com.k2fsa.sherpa.onnx.*;
import java.util.ArrayList;
import java.util.Arrays;

public class TenVadRemoveSilence {
  public static void main(String[] args) {
    // please download ./ten-vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./ten-vad.onnx";
    TenVadModelConfig tenVad =
        TenVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(256)
            .setMaxSpeechDuration(5.0f)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setTenVadModelConfig(tenVad)
            .setSampleRate(16000)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    Vad vad = new Vad(config);

    // You can download the test file from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String testWaveFilename = "./lei-jun-test.wav";
    WaveReader reader = new WaveReader(testWaveFilename);

    int numSamples = reader.getSamples().length;
    int windowSize = tenVad.getWindowSize();
    int numIter = numSamples / windowSize;

    ArrayList<float[]> segments = new ArrayList<float[]>();

    for (int i = 0; i != numIter; ++i) {
      int start = i * windowSize;
      int end = start + windowSize;
      float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected()) {
        while (!vad.empty()) {

          // if you want to get the starting time of this segment, you can use
          /* float startTime = vad.front().getStart() / 16000.0f; */

          segments.add(vad.front().getSamples());
          vad.pop();
        }
      }
    }

    vad.flush();
    while (!vad.empty()) {

      // if you want to get the starting time of this segment, you can use
      /* float startTime = vad.front().getStart() / 16000.0f; */

      segments.add(vad.front().getSamples());
      vad.pop();
    }

    // get total number of samples
    int n = 0;
    for (float[] s : segments) {
      n += s.length;
    }

    float[] allSamples = new float[n];
    int i = 0;
    for (float[] s : segments) {
      System.arraycopy(s, 0, allSamples, i, s.length);
      i += s.length;
    }

    String outFilename = "lei-jun-test-no-silence.wav";
    WaveWriter.write(outFilename, allSamples, 16000);
    System.out.printf("Saved to %s\n", outFilename);

    vad.release();
  }
}


================================================
FILE: java-api-examples/VadFromMic.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model to detect speech
// and save detected speech into a wave file.

import com.k2fsa.sherpa.onnx.*;
import javax.sound.sampled.*;

public class VadFromMic {
  public static void main(String[] args) {
    int sampleRate = 16000;
    int windowSize = 512;
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(windowSize)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(sampleRate)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    Vad vad = new Vad(config);

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
    TargetDataLine targetDataLine;
    try {
      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
      targetDataLine.open(format);
      targetDataLine.start();
    } catch (LineUnavailableException e) {
      System.out.println("Failed to open target data line: " + e.getMessage());
      vad.release();
      return;
    }

    boolean printed = false;
    int index = 0;

    byte[] buffer = new byte[windowSize * 2];
    float[] samples = new float[windowSize];

    while (targetDataLine.isOpen()) {
      int n = targetDataLine.read(buffer, 0, buffer.length);
      if (n <= 0) {
        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
        continue;
      }
      for (int i = 0; i != windowSize; ++i) {
        short low = buffer[2 * i];
        short high = buffer[2 * i + 1];
        int s = (high << 8) + low;
        samples[i] = (float) s / 32768;
      }

      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected() && !printed) {
        System.out.println("Detected speech");
        printed = true;
      }

      if (!vad.isSpeechDetected()) {
        printed = false;
      }

      while (!vad.empty()) {
        float[] segment = vad.front().getSamples();
        float duration = segment.length / (float) sampleRate;
        System.out.printf("Duration: %.3f seconds\n", duration);

        String filename = String.format("seg-%d-%.3fs.wav", index, duration);
        index += 1;
        WaveWriter.write(filename, segment, sampleRate);
        System.out.printf("Saved to %s\n", filename);
        System.out.println("----------");
        vad.pop();
      }
    }

    vad.release();
  }
}


================================================
FILE: java-api-examples/VadFromMicWithNonStreamingMoonshine.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model with a non-streaming
// Moonshine tiny for speech recognition.

import com.k2fsa.sherpa.onnx.*;
import javax.sound.sampled.*;

public class VadFromMicNonStreamingMoonshine {
  private static final int sampleRate = 16000;
  private static final int windowSize = 512;

  public static Vad createVad() {
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(windowSize)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(sampleRate)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    return new Vad(config);
  }

  public static OfflineRecognizer createOfflineRecognizer() {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html
    // to download model files

    String preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx";
    String encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx";
    String uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx";
    String cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx";

    String tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt";

    OfflineMoonshineModelConfig moonshine =
        OfflineMoonshineModelConfig.builder()
            .setPreprocessor(preprocessor)
            .setEncoder(encoder)
            .setUncachedDecoder(uncachedDecoder)
            .setCachedDecoder(cachedDecoder)
            .build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setMoonshine(moonshine)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    return new OfflineRecognizer(config);
  }

  public static void main(String[] args) {
    Vad vad = createVad();
    OfflineRecognizer recognizer = createOfflineRecognizer();

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
    TargetDataLine targetDataLine;
    try {
      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
      targetDataLine.open(format);
      targetDataLine.start();
    } catch (LineUnavailableException e) {
      System.out.println("Failed to open target data line: " + e.getMessage());
      vad.release();
      recognizer.release();
      return;
    }

    boolean printed = false;
    byte[] buffer = new byte[windowSize * 2];
    float[] samples = new float[windowSize];

    System.out.println("Started. Please speak");
    boolean running = true;
    while (targetDataLine.isOpen() && running) {
      int n = targetDataLine.read(buffer, 0, buffer.length);
      if (n <= 0) {
        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
        continue;
      }
      for (int i = 0; i != windowSize; ++i) {
        short low = buffer[2 * i];
        short high = buffer[2 * i + 1];
        int s = (high << 8) + low;
        samples[i] = (float) s / 32768;
      }

      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected() && !printed) {
        System.out.println("Detected speech");
        printed = true;
      }

      if (!vad.isSpeechDetected()) {
        printed = false;
      }

      while (!vad.empty()) {
        SpeechSegment segment = vad.front();
        float startTime = segment.getStart() / (float) sampleRate;
        float duration = segment.getSamples().length / (float) sampleRate;

        OfflineStream stream = recognizer.createStream();
        stream.acceptWaveform(segment.getSamples(), sampleRate);
        recognizer.decode(stream);
        String text = recognizer.getResult(stream).getText();
        stream.release();

        if (!text.isEmpty()) {
          System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
        }

        if (text.contains("exit the program")) {
          running = false;
        }

        vad.pop();
      }
    }

    vad.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/VadFromMicWithNonStreamingParaformer.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition.

import com.k2fsa.sherpa.onnx.*;
import javax.sound.sampled.*;

public class VadFromMicWithNonStreamingParaformer {
  private static final int sampleRate = 16000;
  private static final int windowSize = 512;

  public static Vad createVad() {
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(windowSize)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(sampleRate)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    return new Vad(config);
  }

  public static OfflineRecognizer createOfflineRecognizer() {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-09-14-chinese-english
    // to download model files
    String model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
    String tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";

    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
    String ruleFsts = "./itn_zh_number.fst";

    OfflineParaformerModelConfig paraformer =
        OfflineParaformerModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setParaformer(paraformer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .setRuleFsts(ruleFsts)
            .build();

    return new OfflineRecognizer(config);
  }

  public static void main(String[] args) {
    Vad vad = createVad();
    OfflineRecognizer recognizer = createOfflineRecognizer();

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
    TargetDataLine targetDataLine;
    try {
      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
      targetDataLine.open(format);
      targetDataLine.start();
    } catch (LineUnavailableException e) {
      System.out.println("Failed to open target data line: " + e.getMessage());
      vad.release();
      recognizer.release();
      return;
    }

    boolean printed = false;
    byte[] buffer = new byte[windowSize * 2];
    float[] samples = new float[windowSize];

    System.out.println("Started. Please speak");
    boolean running = true;
    while (targetDataLine.isOpen() && running) {
      int n = targetDataLine.read(buffer, 0, buffer.length);
      if (n <= 0) {
        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
        continue;
      }
      for (int i = 0; i != windowSize; ++i) {
        short low = buffer[2 * i];
        short high = buffer[2 * i + 1];
        int s = (high << 8) + low;
        samples[i] = (float) s / 32768;
      }

      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected() && !printed) {
        System.out.println("Detected speech");
        printed = true;
      }

      if (!vad.isSpeechDetected()) {
        printed = false;
      }

      while (!vad.empty()) {
        SpeechSegment segment = vad.front();
        float startTime = segment.getStart() / (float) sampleRate;
        float duration = segment.getSamples().length / (float) sampleRate;

        OfflineStream stream = recognizer.createStream();
        stream.acceptWaveform(segment.getSamples(), sampleRate);
        recognizer.decode(stream);
        String text = recognizer.getResult(stream).getText();
        stream.release();

        if (!text.isEmpty()) {
          System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
        }

        if (text.contains("退出程序")) {
          running = false;
        }

        vad.pop();
      }
    }

    vad.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/VadFromMicWithNonStreamingSenseVoice.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model with a non-streaming
// SenseVoice model for speech recognition.

import com.k2fsa.sherpa.onnx.*;
import javax.sound.sampled.*;

public class VadFromMicWithNonStreamingSenseVoice {
  private static final int sampleRate = 16000;
  private static final int windowSize = 512;

  public static Vad createVad() {
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(windowSize)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(sampleRate)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    return new Vad(config);
  }

  public static OfflineRecognizer createOfflineRecognizer() {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html
    // to download model files
    String model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
    String tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";

    OfflineSenseVoiceModelConfig senseVoice =
        OfflineSenseVoiceModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setSenseVoice(senseVoice)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    return new OfflineRecognizer(config);
  }

  public static void main(String[] args) {
    Vad vad = createVad();
    OfflineRecognizer recognizer = createOfflineRecognizer();

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
    TargetDataLine targetDataLine;
    try {
      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
      targetDataLine.open(format);
      targetDataLine.start();
    } catch (LineUnavailableException e) {
      System.out.println("Failed to open target data line: " + e.getMessage());
      vad.release();
      recognizer.release();
      return;
    }

    boolean printed = false;
    byte[] buffer = new byte[windowSize * 2];
    float[] samples = new float[windowSize];

    System.out.println("Started. Please speak");
    boolean running = true;
    while (targetDataLine.isOpen() && running) {
      int n = targetDataLine.read(buffer, 0, buffer.length);
      if (n <= 0) {
        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
        continue;
      }
      for (int i = 0; i != windowSize; ++i) {
        short low = buffer[2 * i];
        short high = buffer[2 * i + 1];
        int s = (high << 8) + low;
        samples[i] = (float) s / 32768;
      }

      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected() && !printed) {
        System.out.println("Detected speech");
        printed = true;
      }

      if (!vad.isSpeechDetected()) {
        printed = false;
      }

      while (!vad.empty()) {
        SpeechSegment segment = vad.front();
        float startTime = segment.getStart() / (float) sampleRate;
        float duration = segment.getSamples().length / (float) sampleRate;

        OfflineStream stream = recognizer.createStream();
        stream.acceptWaveform(segment.getSamples(), sampleRate);
        recognizer.decode(stream);
        String text = recognizer.getResult(stream).getText();
        stream.release();

        if (!text.isEmpty()) {
          System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
        }

        if (text.contains("退出程序")) {
          running = false;
        }

        vad.pop();
      }
    }

    vad.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/VadFromMicWithNonStreamingWhisper.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model with a non-streaming Whisper tiny.en
// for speech recognition.

import com.k2fsa.sherpa.onnx.*;
import javax.sound.sampled.*;

public class VadFromMicNonStreamingWhisper {
  private static final int sampleRate = 16000;
  private static final int windowSize = 512;

  public static Vad createVad() {
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(windowSize)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(sampleRate)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    return new Vad(config);
  }

  public static OfflineRecognizer createOfflineRecognizer() {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
    // to download model files
    String encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx";
    String decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx";
    String tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt";

    OfflineWhisperModelConfig whisper =
        OfflineWhisperModelConfig.builder().setEncoder(encoder).setDecoder(decoder).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setWhisper(whisper)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    return new OfflineRecognizer(config);
  }

  public static void main(String[] args) {
    Vad vad = createVad();
    OfflineRecognizer recognizer = createOfflineRecognizer();

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
    // Linear PCM, 16000Hz, 16-bit, 1 channel, signed, little endian
    AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);

    // https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/DataLine.Info.html#Info-java.lang.Class-javax.sound.sampled.AudioFormat-int-
    DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
    TargetDataLine targetDataLine;
    try {
      targetDataLine = (TargetDataLine) AudioSystem.getLine(info);
      targetDataLine.open(format);
      targetDataLine.start();
    } catch (LineUnavailableException e) {
      System.out.println("Failed to open target data line: " + e.getMessage());
      vad.release();
      recognizer.release();
      return;
    }

    boolean printed = false;
    byte[] buffer = new byte[windowSize * 2];
    float[] samples = new float[windowSize];

    System.out.println("Started. Please speak");
    boolean running = true;
    while (targetDataLine.isOpen() && running) {
      int n = targetDataLine.read(buffer, 0, buffer.length);
      if (n <= 0) {
        System.out.printf("Got %d bytes. Expected %d bytes.\n", n, buffer.length);
        continue;
      }
      for (int i = 0; i != windowSize; ++i) {
        short low = buffer[2 * i];
        short high = buffer[2 * i + 1];
        int s = (high << 8) + low;
        samples[i] = (float) s / 32768;
      }

      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected() && !printed) {
        System.out.println("Detected speech");
        printed = true;
      }

      if (!vad.isSpeechDetected()) {
        printed = false;
      }

      while (!vad.empty()) {
        SpeechSegment segment = vad.front();
        float startTime = segment.getStart() / (float) sampleRate;
        float duration = segment.getSamples().length / (float) sampleRate;

        OfflineStream stream = recognizer.createStream();
        stream.acceptWaveform(segment.getSamples(), sampleRate);
        recognizer.decode(stream);
        String text = recognizer.getResult(stream).getText();
        stream.release();

        if (!text.isEmpty()) {
          System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
        }

        if (text.contains("exit the program")) {
          running = false;
        }

        vad.pop();
      }
    }

    vad.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/VadNonStreamingDolphinCtc.java
================================================
// Copyright 2025 Xiaomi Corporation

// This file shows how to use a silero_vad model with a non-streaming Dolphin
// CTC model for speech recognition.

import com.k2fsa.sherpa.onnx.*;
import java.util.Arrays;

public class VadNonStreamingSenseVoice {
  public static Vad createVad() {
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(512)
            .setMaxSpeechDuration(5.0f)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(16000)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    return new Vad(config);
  }

  public static OfflineRecognizer createOfflineRecognizer() {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html
    // to download model files
    String model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
    String tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";

    OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setDolphin(dolphin)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    return new OfflineRecognizer(config);
  }

  public static void main(String[] args) {

    Vad vad = createVad();
    OfflineRecognizer recognizer = createOfflineRecognizer();

    // You can download the test file from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String testWaveFilename = "./lei-jun-test.wav";
    WaveReader reader = new WaveReader(testWaveFilename);

    int numSamples = reader.getSamples().length;
    int numIter = numSamples / 512;

    for (int i = 0; i != numIter; ++i) {
      int start = i * 512;
      int end = start + 512;
      float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected()) {
        while (!vad.empty()) {
          SpeechSegment segment = vad.front();
          float startTime = segment.getStart() / 16000.0f;
          float duration = segment.getSamples().length / 16000.0f;

          OfflineStream stream = recognizer.createStream();
          stream.acceptWaveform(segment.getSamples(), 16000);
          recognizer.decode(stream);
          String text = recognizer.getResult(stream).getText();
          stream.release();

          if (!text.isEmpty()) {
            System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
          }

          vad.pop();
        }
      }
    }

    vad.flush();
    while (!vad.empty()) {
      SpeechSegment segment = vad.front();
      float startTime = segment.getStart() / 16000.0f;
      float duration = segment.getSamples().length / 16000.0f;

      OfflineStream stream = recognizer.createStream();
      stream.acceptWaveform(segment.getSamples(), 16000);
      recognizer.decode(stream);
      String text = recognizer.getResult(stream).getText();
      stream.release();

      if (!text.isEmpty()) {
        System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
      }

      vad.pop();
    }

    vad.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/VadNonStreamingParaformer.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition.

import com.k2fsa.sherpa.onnx.*;
import java.util.Arrays;

public class VadNonStreamingParaformer {
  public static Vad createVad() {
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(512)
            .setMaxSpeechDuration(5.0f)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(16000)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    return new Vad(config);
  }

  public static OfflineRecognizer createOfflineRecognizer() {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-09-14-chinese-english
    // to download model files
    String model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx";
    String tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt";

    OfflineParaformerModelConfig paraformer =
        OfflineParaformerModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setParaformer(paraformer)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    return new OfflineRecognizer(config);
  }

  public static void main(String[] args) {

    Vad vad = createVad();
    OfflineRecognizer recognizer = createOfflineRecognizer();

    // You can download the test file from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String testWaveFilename = "./lei-jun-test.wav";
    WaveReader reader = new WaveReader(testWaveFilename);

    int numSamples = reader.getSamples().length;
    int numIter = numSamples / 512;

    for (int i = 0; i != numIter; ++i) {
      int start = i * 512;
      int end = start + 512;
      float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected()) {
        while (!vad.empty()) {
          SpeechSegment segment = vad.front();
          float startTime = segment.getStart() / 16000.0f;
          float duration = segment.getSamples().length / 16000.0f;

          OfflineStream stream = recognizer.createStream();
          stream.acceptWaveform(segment.getSamples(), 16000);
          recognizer.decode(stream);
          String text = recognizer.getResult(stream).getText();
          stream.release();

          if (!text.isEmpty()) {
            System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
          }

          vad.pop();
        }
      }
    }

    vad.flush();
    while (!vad.empty()) {
      SpeechSegment segment = vad.front();
      float startTime = segment.getStart() / 16000.0f;
      float duration = segment.getSamples().length / 16000.0f;

      OfflineStream stream = recognizer.createStream();
      stream.acceptWaveform(segment.getSamples(), 16000);
      recognizer.decode(stream);
      String text = recognizer.getResult(stream).getText();
      stream.release();

      if (!text.isEmpty()) {
        System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
      }

      vad.pop();
    }

    vad.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/VadNonStreamingSenseVoice.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model with a non-streaming SenseVoiceModel
// for speech recognition.

import com.k2fsa.sherpa.onnx.*;
import java.util.Arrays;

public class VadNonStreamingSenseVoice {
  public static Vad createVad() {
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(512)
            .setMaxSpeechDuration(5.0f)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(16000)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    return new Vad(config);
  }

  public static OfflineRecognizer createOfflineRecognizer() {
    // please refer to
    // https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html
    // to download model files
    String model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx";
    String tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt";

    OfflineSenseVoiceModelConfig senseVoice =
        OfflineSenseVoiceModelConfig.builder().setModel(model).build();

    OfflineModelConfig modelConfig =
        OfflineModelConfig.builder()
            .setSenseVoice(senseVoice)
            .setTokens(tokens)
            .setNumThreads(1)
            .setDebug(true)
            .build();

    OfflineRecognizerConfig config =
        OfflineRecognizerConfig.builder()
            .setOfflineModelConfig(modelConfig)
            .setDecodingMethod("greedy_search")
            .build();

    return new OfflineRecognizer(config);
  }

  public static void main(String[] args) {

    Vad vad = createVad();
    OfflineRecognizer recognizer = createOfflineRecognizer();

    // You can download the test file from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String testWaveFilename = "./lei-jun-test.wav";
    WaveReader reader = new WaveReader(testWaveFilename);

    int numSamples = reader.getSamples().length;
    int numIter = numSamples / 512;

    for (int i = 0; i != numIter; ++i) {
      int start = i * 512;
      int end = start + 512;
      float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected()) {
        while (!vad.empty()) {
          SpeechSegment segment = vad.front();
          float startTime = segment.getStart() / 16000.0f;
          float duration = segment.getSamples().length / 16000.0f;

          OfflineStream stream = recognizer.createStream();
          stream.acceptWaveform(segment.getSamples(), 16000);
          recognizer.decode(stream);
          String text = recognizer.getResult(stream).getText();
          stream.release();

          if (!text.isEmpty()) {
            System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
          }

          vad.pop();
        }
      }
    }

    vad.flush();
    while (!vad.empty()) {
      SpeechSegment segment = vad.front();
      float startTime = segment.getStart() / 16000.0f;
      float duration = segment.getSamples().length / 16000.0f;

      OfflineStream stream = recognizer.createStream();
      stream.acceptWaveform(segment.getSamples(), 16000);
      recognizer.decode(stream);
      String text = recognizer.getResult(stream).getText();
      stream.release();

      if (!text.isEmpty()) {
        System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
      }

      vad.pop();
    }

    vad.release();
    recognizer.release();
  }
}


================================================
FILE: java-api-examples/VadRemoveSilence.java
================================================
// Copyright 2024 Xiaomi Corporation

// This file shows how to use a silero_vad model to remove silences from
// a wave file.

import com.k2fsa.sherpa.onnx.*;
import java.util.ArrayList;
import java.util.Arrays;

public class VadRemoveSilence {
  public static void main(String[] args) {
    // please download ./silero_vad.onnx from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String model = "./silero_vad.onnx";
    SileroVadModelConfig sileroVad =
        SileroVadModelConfig.builder()
            .setModel(model)
            .setThreshold(0.5f)
            .setMinSilenceDuration(0.25f)
            .setMinSpeechDuration(0.5f)
            .setWindowSize(512)
            .setMaxSpeechDuration(5.0f)
            .build();

    VadModelConfig config =
        VadModelConfig.builder()
            .setSileroVadModelConfig(sileroVad)
            .setSampleRate(16000)
            .setNumThreads(1)
            .setDebug(true)
            .setProvider("cpu")
            .build();

    Vad vad = new Vad(config);

    // You can download the test file from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
    String testWaveFilename = "./lei-jun-test.wav";
    WaveReader reader = new WaveReader(testWaveFilename);

    int numSamples = reader.getSamples().length;
    int numIter = numSamples / 512;

    ArrayList<float[]> segments = new ArrayList<float[]>();

    for (int i = 0; i != numIter; ++i) {
      int start = i * 512;
      int end = start + 512;
      float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
      vad.acceptWaveform(samples);
      if (vad.isSpeechDetected()) {
        while (!vad.empty()) {

          // if you want to get the starting time of this segment, you can use
          /* float startTime = vad.front().getStart() / 16000.0f; */

          segments.add(vad.front().getSamples());
          vad.pop();
        }
      }
    }

    vad.flush();
    while (!vad.empty()) {

      // if you want to get the starting time of this segment, you can use
      /* float startTime = vad.front().getStart() / 16000.0f; */

      segments.add(vad.front().getSamples());
      vad.pop();
    }

    // get total number of samples
    int n = 0;
    for (float[] s : segments) {
      n += s.length;
    }

    float[] allSamples = new float[n];
    int i = 0;
    for (float[] s : segments) {
      System.arraycopy(s, 0, allSamples, i, s.length);
      i += s.length;
    }

    String outFilename = "lei-jun-test-no-silence.wav";
    WaveWriter.write(outFilename, allSamples, 16000);
    System.out.printf("Saved to %s\n", outFilename);

    vad.release();
  }
}


================================================
FILE: java-api-examples/VersionTest.java
================================================
// Copyright 2025 Xiaomi Corporation

import com.k2fsa.sherpa.onnx.*;

public class VersionTest {
  public static void main(String[] args) {
    System.out.printf("sherpa-onnx version: %s\n", VersionInfo.getVersion());
    System.out.printf("sherpa-onnx gitSha1: %s\n", VersionInfo.getGitSha1());
    System.out.printf("sherpa-onnx gitDate: %s\n", VersionInfo.getGitDate());
  }
}


================================================
FILE: java-api-examples/ZipVoiceTts.java
================================================
// Copyright 2026 Xiaomi Corporation

// This file shows how to use a ZipVoice Chinese/English model
// for zero-shot text to speech.
import com.k2fsa.sherpa.onnx.*;
import java.util.HashMap;
import java.util.Map;

public class ZipVoiceTts {
  public static void main(String[] args) {
    LibraryUtils.enableDebug();
    // please visit
    // https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
    // to download model files
    String modelDir = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia";
    String referenceAudioFilename = modelDir + "/test_wavs/leijun-1.wav";
    String text = "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.";
    String referenceText = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.";

    OfflineTtsZipVoiceModelConfig zipvoiceModelConfig =
        OfflineTtsZipVoiceModelConfig.builder()
            .setTokens(modelDir + "/tokens.txt")
            .setEncoder(modelDir + "/encoder.int8.onnx")
            .setDecoder(modelDir + "/decoder.int8.onnx")
            .setVocoder("./vocos_24khz.onnx")
            .setDataDir(modelDir + "/espeak-ng-data")
            .setLexicon(modelDir + "/lexicon.txt")
            .build();

    OfflineTtsModelConfig modelConfig =
        OfflineTtsModelConfig.builder()
            .setZipvoice(zipvoiceModelConfig)
            .setNumThreads(2)
            .setDebug(false)
            .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
    OfflineTts tts = new OfflineTts(config);

    WaveReader reader = new WaveReader(referenceAudioFilename);

    GenerationConfig genConfig = new GenerationConfig();
    genConfig.setReferenceAudio(reader.getSamples());
    genConfig.setReferenceSampleRate(reader.getSampleRate());
    genConfig.setReferenceText(referenceText);
    genConfig.setNumSteps(4);

    Map<String, String> extra = new HashMap<>();
    extra.put("min_char_in_sentence", "10");
    genConfig.setExtra(extra);

    long start = System.currentTimeMillis();
    GeneratedAudio audio =
        tts.generateWithConfigAndCallback(
            text,
            genConfig,
            samples -> {
              System.out.println("callback got called with " + samples.length + " samples");
              return 1;
            });
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;
    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float realTimeFactor = timeElapsedSeconds / audioDuration;

    String waveFilename = "generated-zipvoice-zh-en-java.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", realTimeFactor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}


================================================
FILE: java-api-examples/run-audio-tagging-ced-from-file.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
  tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
  rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./AudioTaggingCEDFromFile.java


================================================
FILE: java-api-examples/run-audio-tagging-zipformer-from-file.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./AudioTaggingZipformerFromFile.java


================================================
FILE: java-api-examples/run-inverse-text-normalization-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  InverseTextNormalizationNonStreamingParaformer.java


================================================
FILE: java-api-examples/run-inverse-text-normalization-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  InverseTextNormalizationStreamingTransducer.java


================================================
FILE: java-api-examples/run-kws-from-file.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./KeywordSpotterFromFile.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-dolphin-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileDolphinCtc.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-fire-red-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
fi


java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileFireRedAsrCtc.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-fire-red-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileFireRedAsr.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-funasr-nano.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileFunAsrNano.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-medasr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
fi


java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileMedAsrCtc.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-moonshine-v2.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileMoonshineV2.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-moonshine.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileMoonshine.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-nemo-canary.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileNemoCanary.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-nemo.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
  tar xvf sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
  rm sherpa-onnx-nemo-ctc-en-citrinet-512.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileNemo.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-omnilingual-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
fi


java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileOmnilingualAsrCtc.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileParaformer.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-sense-voice-with-hr.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

if [ ! -d dict ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  tar xf dict.tar.bz2
  rm dict.tar.bz2

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileSenseVoiceWithHr.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-sense-voice.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileSenseVoice.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-tele-speech-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./NonStreamingDecodeFileTeleSpeechCtc.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-transducer-hotwords.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-conformer-zh-stateless2-2023-05-23/tokens.txt ]; then
  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
  tar xvf sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
  rm sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2
fi

if [ ! -f hotwords_cn.txt ]; then
  cat > hotwords_cn.txt <<EOF
朱丽楠
EOF
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileTransducerHotwords.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2

  tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileTransducer.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-wenet-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileWenetCtc.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-whisper-multiple.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileWhisperMultiple.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileWhisper.java


================================================
FILE: java-api-examples/run-non-streaming-decode-file-zipformer-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

  tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
  rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingDecodeFileZipformerCtc.java


================================================
FILE: java-api-examples/run-non-streaming-speech-enhancement-dpdfnet.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingSpeechEnhancementDpdfNet.java


================================================
FILE: java-api-examples/run-non-streaming-speech-enhancement-gtcrn.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingSpeechEnhancementGtcrn.java


================================================
FILE: java-api-examples/run-non-streaming-tts-coqui-de.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [ ! -f ./vits-coqui-de-css10/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
  tar xf vits-coqui-de-css10.tar.bz2
  rm vits-coqui-de-css10.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsCoquiDe.java


================================================
FILE: java-api-examples/run-non-streaming-tts-kitten-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html
# to download more models

if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsKittenEn.java


================================================
FILE: java-api-examples/run-non-streaming-tts-kokoro-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
# to download more models
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsKokoroEn.java


================================================
FILE: java-api-examples/run-non-streaming-tts-kokoro-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
# to download more models
if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsKokoroZhEn.java


================================================
FILE: java-api-examples/run-non-streaming-tts-matcha-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsMatchaEn.java


================================================
FILE: java-api-examples/run-non-streaming-tts-matcha-zh.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsMatchaZh.java


================================================
FILE: java-api-examples/run-non-streaming-tts-piper-en-with-callback.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [ ! -f ./vits-piper-en_GB-cori-medium/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
  tar xf vits-piper-en_GB-cori-medium.tar.bz2
  rm vits-piper-en_GB-cori-medium.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsPiperEnWithCallback.java


================================================
FILE: java-api-examples/run-non-streaming-tts-piper-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [ ! -f ./vits-piper-en_GB-cori-medium/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
  tar xf vits-piper-en_GB-cori-medium.tar.bz2
  rm vits-piper-en_GB-cori-medium.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsPiperEn.java


================================================
FILE: java-api-examples/run-non-streaming-tts-vits-zh.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if [ ! -f ./vits-zh-hf-fanchen-C/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-fanchen-C.tar.bz2
  tar xf vits-zh-hf-fanchen-C.tar.bz2
  rm vits-zh-hf-fanchen-C.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingTtsVitsZh.java


================================================
FILE: java-api-examples/run-non-streaming-websocket-client.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f zh.wav ]; then
  # wget https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
  wget https://hf-mirror.com/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  NonStreamingWebsocketClient.java


================================================
FILE: java-api-examples/run-offline-add-punctuation-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./OfflineAddPunctuation.java


================================================
FILE: java-api-examples/run-offline-speaker-diarization.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi

if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./0-four-speakers-zh.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./OfflineSpeakerDiarizationDemo.java


================================================
FILE: java-api-examples/run-online-add-punctuation-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./OnlineAddPunctuation.java


================================================
FILE: java-api-examples/run-pocket-tts.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models

if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

if false; then
  javac \
    -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
    PocketTts.java
  javap -p -s PocketTts.class
  javap -p -s PocketTts$1.class
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  PocketTts.java


================================================
FILE: java-api-examples/run-speaker-identification.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./sr-data/enroll/leijun-sr-1.wav ]; then
  curl -SL -o sr-data.tar.gz https://github.com/csukuangfj/sr-data/archive/refs/tags/v1.0.0.tar.gz
  tar xvf sr-data.tar.gz
  mv sr-data-1.0.0 sr-data
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./SpeakerIdentification.java


================================================
FILE: java-api-examples/run-spoken-language-identification-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# Note that it needs a multilingual whisper model. so, for example, tiny works while tiny.en does not work
# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.tar.bz2
  rm sherpa-onnx-whisper-tiny.tar.bz2
fi

if [ ! -f ./spoken-language-identification-test-wavs/en-english.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
  tar xvf spoken-language-identification-test-wavs.tar.bz2
  rm spoken-language-identification-test-wavs.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./SpokenLanguageIdentificationWhisper.java


================================================
FILE: java-api-examples/run-streaming-asr-from-mic-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./StreamingAsrFromMicTransducer.java


================================================
FILE: java-api-examples/run-streaming-decode-file-ctc-hlg.sh
================================================
#!/usr/bin/env bash
set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  StreamingDecodeFileCtcHLG.java


================================================
FILE: java-api-examples/run-streaming-decode-file-ctc.sh
================================================
#!/usr/bin/env bash
set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  StreamingDecodeFileCtc.java


================================================
FILE: java-api-examples/run-streaming-decode-file-paraformer.sh
================================================
#!/usr/bin/env bash
set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  StreamingDecodeFileParaformer.java


================================================
FILE: java-api-examples/run-streaming-decode-file-tone-ctc.sh
================================================
#!/usr/bin/env bash
set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  StreamingDecodeFileToneCtc.java


================================================
FILE: java-api-examples/run-streaming-decode-file-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  StreamingDecodeFileTransducer.java


================================================
FILE: java-api-examples/run-streaming-speech-enhancement-dpdfnet.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  StreamingSpeechEnhancementDpdfNet.java


================================================
FILE: java-api-examples/run-streaming-speech-enhancement-gtcrn.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  StreamingSpeechEnhancementGtcrn.java


================================================
FILE: java-api-examples/run-supertonic-tts.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models

if [ ! -f ./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  SupertonicTts.java


================================================
FILE: java-api-examples/run-ten-vad-remove-silence.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./ten-vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./TenVadRemoveSilence.java


================================================
FILE: java-api-examples/run-vad-from-mic-non-streaming-moonshine.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadFromMicWithNonStreamingMoonshine.java


================================================
FILE: java-api-examples/run-vad-from-mic-non-streaming-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadFromMicWithNonStreamingParaformer.java


================================================
FILE: java-api-examples/run-vad-from-mic-non-streaming-sense-voice.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadFromMicWithNonStreamingSenseVoice.java


================================================
FILE: java-api-examples/run-vad-from-mic-non-streaming-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadFromMicWithNonStreamingWhisper.java


================================================
FILE: java-api-examples/run-vad-from-mic.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadFromMic.java


================================================
FILE: java-api-examples/run-vad-non-streaming-dolphin-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadNonStreamingDolphinCtc.java


================================================
FILE: java-api-examples/run-vad-non-streaming-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadNonStreamingParaformer.java


================================================
FILE: java-api-examples/run-vad-non-streaming-sense-voice.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadNonStreamingSenseVoice.java


================================================
FILE: java-api-examples/run-vad-remove-silence.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VadRemoveSilence.java


================================================
FILE: java-api-examples/run-version-test.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ./VersionTest.java


================================================
FILE: java-api-examples/run-zipvoice-tts.sh
================================================
#!/usr/bin/env bash

set -ex

if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  mkdir -p ../build
  pushd ../build
  cmake \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    -DSHERPA_ONNX_ENABLE_JNI=ON \
    ..

  make -j4
  ls -lh lib
  popd
fi

if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  pushd ../sherpa-onnx/java-api
  make
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
# to download more models
if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f ./vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

java \
  -Djava.library.path=$PWD/../build/lib \
  -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  ZipVoiceTts.java


================================================
FILE: java-api-examples/src/websocketsrv/AsrWebsocketClient.java
================================================
/*
 * // Copyright 2022-2023 by zhaomingwork
 */
// java AsrWebsocketClient
// usage: AsrWebsocketClient soPath srvIp srvPort wavPath numThreads
package websocketsrv;

import com.k2fsa.sherpa.onnx.OnlineRecognizer;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.*;
import java.util.Map;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.drafts.Draft;
import org.java_websocket.handshake.ServerHandshake;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** This example demonstrates how to connect to websocket server. */
public class AsrWebsocketClient extends WebSocketClient {
  private static final Logger logger = LoggerFactory.getLogger(AsrWebsocketClient.class);

  public AsrWebsocketClient(URI serverUri, Draft draft) {
    super(serverUri, draft);
  }

  public AsrWebsocketClient(URI serverURI) {
    super(serverURI);
  }

  public AsrWebsocketClient(URI serverUri, Map<String, String> httpHeaders) {
    super(serverUri, httpHeaders);
  }

  @Override
  public void onOpen(ServerHandshake handshakedata) {

    float[] floats = OnlineRecognizer.readWavFile(AsrWebsocketClient.wavPath);
    ByteBuffer buffer =
        ByteBuffer.allocate(4 * floats.length)
            .order(ByteOrder.LITTLE_ENDIAN); // float is sizeof 4. allocate enough buffer

    for (float f : floats) {
      buffer.putFloat(f);
    }
    buffer.rewind();
    buffer.flip();
    buffer.order(ByteOrder.LITTLE_ENDIAN);

    send(buffer.array()); // send buf to server
    send("Done"); // send 'Done' means finished
  }

  @Override
  public void onMessage(String message) {

    logger.info("received: " + message);
  }

  @Override
  public void onClose(int code, String reason, boolean remote) {

    logger.info(
        "Connection closed by "
            + (remote ? "remote peer" : "us")
            + " Code: "
            + code
            + " Reason: "
            + reason);
  }

  @Override
  public void onError(Exception ex) {
    ex.printStackTrace();
    // if the error is fatal then onClose will be called additionally
  }

  public static OnlineRecognizer rcgobj;
  public static String wavPath;

  public static void main(String[] args) throws URISyntaxException {

    if (args.length != 5) {
      System.out.println("usage: AsrWebsocketClient soPath srvIp srvPort wavPath numThreads");
      return;
    }

    String soPath = args[0];
    String srvIp = args[1];
    String srvPort = args[2];
    String wavPath = args[3];
    int numThreads = Integer.parseInt(args[4]);
    System.out.println("serIp=" + srvIp + ",srvPort=" + srvPort + ",wavPath=" + wavPath);

    class ClientThread implements Runnable {

      String soPath;
      String srvIp;
      String srvPort;
      String wavPath;

      ClientThread(String soPath, String srvIp, String srvPort, String wavPath) {
        this.soPath = soPath;
        this.srvIp = srvIp;
        this.srvPort = srvPort;
        this.wavPath = wavPath;
      }

      public void run() {
        try {

          OnlineRecognizer.setSoPath(soPath);

          AsrWebsocketClient.wavPath = wavPath;

          String wsAddress = "ws://" + srvIp + ":" + srvPort;
          AsrWebsocketClient c = new AsrWebsocketClient(new URI(wsAddress));

          c.connect();
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
    }
    for (int i = 0; i < numThreads; i++) {
      System.out.println("Thread1 is running...");
      Thread t = new Thread(new ClientThread(soPath, srvIp, srvPort, wavPath));
      t.start();
    }
  }
}


================================================
FILE: java-api-examples/src/websocketsrv/AsrWebsocketServer.java
================================================
/*
 * // Copyright 2022-2023 by zhaoming
 */
// java websocketServer
// usage: AsrWebsocketServer soPath modelCfgPath
package websocketsrv;

import com.k2fsa.sherpa.onnx.OnlineRecognizer;
import com.k2fsa.sherpa.onnx.OnlineStream;
import java.io.*;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
import java.util.*;
import java.util.Collections;
import java.util.concurrent.*;
import java.util.concurrent.LinkedBlockingQueue;
import org.java_websocket.WebSocket;
import org.java_websocket.drafts.Draft;
import org.java_websocket.drafts.Draft_6455;
import org.java_websocket.handshake.ClientHandshake;
import org.java_websocket.server.WebSocketServer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * AsrWebSocketServer has three threads pools, one pool for network io, one pool for asr stream and
 * one pool for asr decoder.
 */
public class AsrWebsocketServer extends WebSocketServer {
  private static final Logger logger = LoggerFactory.getLogger(AsrWebsocketServer.class);
  //  Queue between io network io thread pool and stream thread pool, use websocket as the key
  private LinkedBlockingQueue<WebSocket> streamQueue = new LinkedBlockingQueue<WebSocket>();
  //  Queue waiting for deocdeing, use websocket as the key
  private LinkedBlockingQueue<WebSocket> decoderQueue = new LinkedBlockingQueue<WebSocket>();

  // recognizer object
  private OnlineRecognizer rcgOjb = null;

  // mapping between websocket connection and connection data
  private ConcurrentHashMap<WebSocket, ConnectionData> connectionMap =
      new ConcurrentHashMap<WebSocket, ConnectionData>();

  public AsrWebsocketServer(int port, int numThread) throws UnknownHostException {
    // server port and num of threads for  network io
    super(new InetSocketAddress(port), numThread);
  }

  public AsrWebsocketServer(InetSocketAddress address) {
    super(address);
  }

  public AsrWebsocketServer(int port, Draft_6455 draft) {
    super(new InetSocketAddress(port), Collections.<Draft>singletonList(draft));
  }

  @Override
  public void onOpen(WebSocket conn, ClientHandshake handshake) {}

  @Override
  public void onClose(WebSocket conn, int code, String reason, boolean remote) {
    connectionMap.remove(conn);
    logger.info(
        conn
            + " remove one connection!, now connection number="
            + String.valueOf(connectionMap.size()));
  }

  @Override
  public void onMessage(WebSocket conn, String message) {
    // this is text message
    try {
      // if rec "Done" msg from client
      if (message.equals("Done")) {
        ConnectionData connData = creatOrGetConnectionData(conn);
        connData.setEof(true);
        if (!streamQueueFind(conn)) {
          streamQueue.put(conn);
        }
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  private ConnectionData creatOrGetConnectionData(WebSocket conn) {
    // create a new connection data if not in connection map or return the existed one

    ConnectionData connData = null;
    try {
      if (!connectionMap.containsKey(conn)) {
        OnlineStream stream = rcgOjb.createStream();
        connData = new ConnectionData(conn, stream);
        connectionMap.put(conn, connData);
      } else {
        connData = connectionMap.get(conn);
      }

      logger.info(
          conn.getRemoteSocketAddress().getAddress().getHostAddress()
              + " open one connection,, now connection number="
              + String.valueOf(connectionMap.size()));

    } catch (Exception e) {
      System.err.println(e);
      e.printStackTrace();
    }
    return connData;
  }

  @Override
  public void onMessage(WebSocket conn, ByteBuffer blob) {
    try {

      // for handle binary data
      blob.order(ByteOrder.LITTLE_ENDIAN); // set little endian

      // set to float
      FloatBuffer floatbuf = blob.asFloatBuffer();

      if (floatbuf.capacity() > 0) {
        // allocate memory for float data
        float[] arr = new float[floatbuf.capacity()];

        floatbuf.get(arr);
        ConnectionData connData = creatOrGetConnectionData(conn);
        // put websocket  to stream queue with binary type==1
        connData.addSamplesToData(arr);

        if (!streamQueueFind(conn)) {
          streamQueue.put(conn);
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  public boolean streamQueueFind(WebSocket conn) {
    return streamQueue.contains(conn);
  }

  public void initModelWithCfg(Map<String, String> cfgMap, String cfgPath) {
    try {

      rcgOjb = new OnlineRecognizer(cfgPath);
      // size of stream thread pool
      int streamThreadNum = Integer.valueOf(cfgMap.getOrDefault("stream_thread_num", "16"));
      // size of decoder thread pool
      int decoderThreadNum = Integer.valueOf(cfgMap.getOrDefault("decoder_thread_num", "16"));

      // time(ms) idle for decoder thread when no job
      int decoderTimeIdle = Integer.valueOf(cfgMap.getOrDefault("decoder_time_idle", "200"));
      // size of streams for parallel decoding
      int parallelDecoderNum = Integer.valueOf(cfgMap.getOrDefault("parallel_decoder_num", "16"));
      // time(ms) out for connection data
      int deocderTimeOut = Integer.valueOf(cfgMap.getOrDefault("deocder_time_out", "30000"));

      // create stream threads
      for (int i = 0; i < streamThreadNum; i++) {
        new StreamThreadHandler(streamQueue, decoderQueue, connectionMap).start();
      }
      // create decoder threads
      for (int i = 0; i < decoderThreadNum; i++) {
        new DecoderThreadHandler(
                decoderQueue,
                connectionMap,
                rcgOjb,
                decoderTimeIdle,
                parallelDecoderNum,
                deocderTimeOut)
            .start();
      }
    } catch (Exception e) {
      System.err.println(e);
      e.printStackTrace();
    }
  }

  public static Map<String, String> readProperties(String CfgPath) {
    // read and parse config file
    Properties props = new Properties();
    Map<String, String> proMap = new HashMap<String, String>();
    try {

      File file = new File(CfgPath);
      if (!file.exists()) {
        logger.info(String.valueOf(CfgPath) + " cfg file not exists!");
        System.exit(0);
      }
      InputStream in = new BufferedInputStream(new FileInputStream(CfgPath));
      props.load(in);
      Enumeration en = props.propertyNames();
      while (en.hasMoreElements()) {
        String key = (String) en.nextElement();
        String Property = props.getProperty(key);
        proMap.put(key, Property);
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
    return proMap;
  }

  public static void main(String[] args) throws InterruptedException, IOException {
    if (args.length != 2) {
      logger.info("usage: AsrWebsocketServer soPath modelCfgPath");

      return;
    }

    String soPath = args[0];
    String cfgPath = args[1];

    OnlineRecognizer.setSoPath(soPath);
    logger.info("readProperties");
    Map<String, String> cfgMap = AsrWebsocketServer.readProperties(cfgPath);
    int port = Integer.valueOf(cfgMap.getOrDefault("port", "8890"));

    int connectionThreadNum = Integer.valueOf(cfgMap.getOrDefault("connection_thread_num", "16"));
    AsrWebsocketServer s = new AsrWebsocketServer(port, connectionThreadNum);
    logger.info("initModelWithCfg");
    s.initModelWithCfg(cfgMap, cfgPath);
    logger.info("Server started on port: " + s.getPort());
    s.start();
  }

  @Override
  public void onError(WebSocket conn, Exception ex) {
    ex.printStackTrace();
    if (conn != null) {
      // some errors like port binding failed may not be assignable to a specific websocket
    }
  }

  @Override
  public void onStart() {
    logger.info("Server started!");
    setConnectionLostTimeout(0);
    setConnectionLostTimeout(100);
  }
}


================================================
FILE: java-api-examples/src/websocketsrv/ConnectionData.java
================================================
/*
 * // Copyright 2022-2023 by zhaoming
 */
// connection data act as a bridge between different threads pools

package websocketsrv;

import com.k2fsa.sherpa.onnx.OnlineStream;
import java.time.LocalDateTime;
import java.util.LinkedList;
import java.util.Queue;
import java.util.concurrent.*;
import org.java_websocket.WebSocket;

public class ConnectionData {

  private WebSocket webSocket; // the websocket for this connection data

  private OnlineStream stream; // connection stream

  private Queue<float[]> queueSamples =
      new LinkedList<float[]>(); // binary data rec from the client

  private boolean eof = false; // connection data is done

  private LocalDateTime lastHandleTime; // used for time out in ms

  public ConnectionData(WebSocket webSocket, OnlineStream stream) {
    this.webSocket = webSocket;

    this.stream = stream;
  }

  public void addSamplesToData(float[] samples) {
    this.queueSamples.add(samples);
  }

  public LocalDateTime getLastHandleTime() {
    return this.lastHandleTime;
  }

  public void setLastHandleTime(LocalDateTime now) {
    this.lastHandleTime = now;
  }

  public boolean getEof() {
    return this.eof;
  }

  public void setEof(boolean eof) {
    this.eof = eof;
  }

  public WebSocket getWebSocket() {
    return this.webSocket;
  }

  public Queue<float[]> getQueueSamples() {
    return this.queueSamples;
  }

  public OnlineStream getStream() {
    return this.stream;
  }
}


================================================
FILE: java-api-examples/src/websocketsrv/DecoderThreadHandler.java
================================================
/*
 * // Copyright 2022-2023 by zhaoming
 */
// java DecoderThreadHandler
package websocketsrv;

import com.k2fsa.sherpa.onnx.OnlineRecognizer;
import com.k2fsa.sherpa.onnx.OnlineStream;
import java.nio.*;
import java.nio.charset.StandardCharsets;
import java.time.LocalDateTime;
import java.util.*;
import java.util.List;
import java.util.concurrent.*;
import java.util.concurrent.LinkedBlockingQueue;
import org.java_websocket.WebSocket;
import org.java_websocket.drafts.Draft;
import org.java_websocket.framing.Framedata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DecoderThreadHandler extends Thread {
  private static final Logger logger = LoggerFactory.getLogger(DecoderThreadHandler.class);
  // Websocket Queue that waiting for decoding
  private LinkedBlockingQueue<WebSocket> decoderQueue;
  // the mapping between websocket and connection data
  private ConcurrentHashMap<WebSocket, ConnectionData> connMap;

  private OnlineRecognizer rcgOjb = null; // recgnizer object

  // connection data list for this thread to decode in parallel
  private List<ConnectionData> connDataList = new ArrayList<ConnectionData>();

  private int parallelDecoderNum = 10; // parallel decoding number
  private int deocderTimeIdle = 10; // idle time(ms) when no job
  private int deocderTimeOut = 3000; // if it is timeout(ms), the connection data will be removed

  public DecoderThreadHandler(
      LinkedBlockingQueue<WebSocket> decoderQueue,
      ConcurrentHashMap<WebSocket, ConnectionData> connMap,
      OnlineRecognizer rcgOjb,
      int deocderTimeIdle,
      int parallelDecoderNum,
      int deocderTimeOut) {
    this.decoderQueue = decoderQueue;
    this.connMap = connMap;
    this.rcgOjb = rcgOjb;
    this.deocderTimeIdle = deocderTimeIdle;
    this.parallelDecoderNum = parallelDecoderNum;
    this.deocderTimeOut = deocderTimeOut;
  }

  public void run() {
    while (true) {
      try {
        // time(ms) idle  if there is no job

        Thread.sleep(deocderTimeIdle);
        // clear data list for this threads
        connDataList.clear();
        if (rcgOjb == null) continue;

        // loop for total decoder Queue
        while (!decoderQueue.isEmpty()) {

          // get websocket
          WebSocket conn = decoderQueue.take();
          // get connection data according to websocket
          ConnectionData connData = connMap.get(conn);

          // if the websocket closed, continue
          if (connData == null) continue;
          // get the stream
          OnlineStream stream = connData.getStream();

          // put to decoder list if 1) stream is ready; 2) and
          // size not > parallelDecoderNum
          if ((rcgOjb.isReady(stream) && connDataList.size() < parallelDecoderNum)) {

            // add to this thread's decoder list
            connDataList.add(connData);
            // change the handled time for this connection data
            connData.setLastHandleTime(LocalDateTime.now());
          }
          // break when decoder list size >= parallelDecoderNum
          if (connDataList.size() >= parallelDecoderNum) {
            break;
          }
        }

        // if decoder data list for this thread >0
        if (connDataList.size() > 0) {

          // create a stream array for parallel decoding
          OnlineStream[] arr = new OnlineStream[connDataList.size()];
          for (int i = 0; i < connDataList.size(); i++) {

            arr[i] = connDataList.get(i).getStream();
          }

          // parallel decoding
          rcgOjb.decodeStreams(arr);
        }

        // get result for each connection
        for (ConnectionData connData : connDataList) {

          OnlineStream stream = connData.getStream();
          WebSocket webSocket = connData.getWebSocket();

          String txtResult = rcgOjb.getResult(stream);

          // decode text in utf-8
          byte[] utf8Data = txtResult.getBytes(StandardCharsets.UTF_8);

          boolean isEof = (connData.getEof() == true && !rcgOjb.isReady(stream));
          // result
          if (utf8Data.length > 0) {

            String jsonResult =
                "{\"text\":\"" + txtResult + "\",\"eof\":" + String.valueOf(isEof) + "\"}";

            if (webSocket.isOpen()) {
              // create a TEXT Frame for send back json result
              Draft draft = webSocket.getDraft();
              List<Framedata> frames = null;
              frames = draft.createFrames(jsonResult, false);
              // send to client
              webSocket.sendFrame(frames);
            }
          }
        }
        // loop for each connection data in this thread
        for (ConnectionData connData : connDataList) {
          OnlineStream stream = connData.getStream();
          WebSocket webSocket = connData.getWebSocket();
          // if the stream is still ready, put it to decoder Queue again for next decoding
          if (rcgOjb.isReady(stream)) {
            decoderQueue.put(webSocket);
          }
          // the duration between last handled time and now
          java.time.Duration duration =
              java.time.Duration.between(connData.getLastHandleTime(), LocalDateTime.now());
          // close the websocket if 1) data is done  and  stream not ready; 2) or data is time out;
          // 3) or
          // connection is closed
          if ((connData.getEof() == true
                  && !rcgOjb.isReady(stream)
                  && connData.getQueueSamples().isEmpty())
              || duration.toMillis() > deocderTimeOut
              || !connData.getWebSocket().isOpen()) {

            logger.info("close websocket!!!");

            // delay close web socket as data may still in processing
            Timer timer = new Timer();
            timer.schedule(
                new TimerTask() {
                  public void run() {

                    webSocket.close();
                  }
                },
                5000); // 5 seconds
          }
        }

      } catch (Exception e) {
        e.printStackTrace();
      }
    }
  }
}


================================================
FILE: java-api-examples/src/websocketsrv/StreamThreadHandler.java
================================================
/*
 * // Copyright 2022-2023 by zhaoming
 */
// java StreamThreadHandler
package websocketsrv;

import com.k2fsa.sherpa.onnx.OnlineStream;
import java.nio.*;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.LinkedBlockingQueue;
import org.java_websocket.WebSocket;
// thread for processing stream

public class StreamThreadHandler extends Thread {
  //  Queue between io network io thread pool and stream thread pool, use websocket as the key
  private LinkedBlockingQueue<WebSocket> streamQueue;
  //  Queue waiting for deocdeing, use websocket as the key
  private LinkedBlockingQueue<WebSocket> decoderQueue;
  // mapping between websocket connection and connection data
  private ConcurrentHashMap<WebSocket, ConnectionData> connMap;

  public StreamThreadHandler(
      LinkedBlockingQueue<WebSocket> streamQueue,
      LinkedBlockingQueue<WebSocket> decoderQueue,
      ConcurrentHashMap<WebSocket, ConnectionData> connMap) {
    this.streamQueue = streamQueue;
    this.decoderQueue = decoderQueue;
    this.connMap = connMap;
  }

  public void run() {
    while (true) {
      try {
        // fetch one websocket from queue
        WebSocket conn = (WebSocket) this.streamQueue.take();
        // get the connection data according to websocket
        ConnectionData connData = connMap.get(conn);
        OnlineStream stream = connData.getStream();

        // handle received binary data
        if (!connData.getQueueSamples().isEmpty()) {
          // loop to put all received binary data to stream
          while (!connData.getQueueSamples().isEmpty()) {

            float[] samples = connData.getQueueSamples().poll();

            stream.acceptWaveform(samples);
          }
          //  if data is finished
          if (connData.getEof() == true) {

            stream.inputFinished();
          }
          // add this websocket to decoder Queue if not in the Queue
          if (!decoderQueue.contains(conn)) {

            decoderQueue.put(conn);
          }
        }

      } catch (Exception e) {
        e.printStackTrace();
      }
    }
  }
}


================================================
FILE: jitpack.yml
================================================
jdk:
  - openjdk17

before_install:
  - wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-1.12.31.aar

install:
  - FILE="-Dfile=sherpa-onnx-1.12.31.aar"
  - mvn install:install-file $FILE -DgroupId=com.k2fsa.sherpa.onnx -DartifactId=sherpa-onnx -Dversion=1.12.31 -Dpackaging=aar -DgeneratePom=true


================================================
FILE: kotlin-api-examples/.gitignore
================================================
hs_err*
vits-zh-aishell3
*.jar


================================================
FILE: kotlin-api-examples/faked-asset-manager.kt
================================================
package android.content.res

class AssetManager {}


================================================
FILE: kotlin-api-examples/faked-log.kt
================================================
package android.util

class Log {
  companion object {
    fun i(tag: String, msg: String) {
      println("$tag, $msg")
    }
  }
}


================================================
FILE: kotlin-api-examples/test_audio_tagging.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testAudioTagging()
}

fun testAudioTagging() {
  val config = AudioTaggingConfig(
      model=AudioTaggingModelConfig(
        zipformer=OfflineZipformerAudioTaggingModelConfig(
          model="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx",
        ),
        numThreads=1,
        debug=true,
        provider="cpu",
      ),
      labels="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv",
      topK=5,
   )
  val tagger = AudioTagging(config=config)

  val testFiles = arrayOf(
    "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav",
    "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/2.wav",
    "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/3.wav",
    "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/4.wav",
  )
  println("----------")
  for (waveFilename in testFiles) {
    val stream = tagger.createStream()

    val waveData = WaveReader.readWaveFromFile(
        filename = waveFilename,
    )

    stream.acceptWaveform(waveData.samples, sampleRate = waveData.sampleRate)
    val events = tagger.compute(stream)
    stream.release()

    println(waveFilename)
    for (event in events) {
      println("Name: ${event.name}, Index: ${event.index}, Probability: ${event.prob}")
    }

    println("----------")
  }

  tagger.release()
}


================================================
FILE: kotlin-api-examples/test_itn_offline_asr.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  test()
}

fun test() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./itn-zh-number.wav";

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  val stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  val result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}

fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
      modelConfig = getOfflineModelConfig(0)!!,
      ruleFsts = "./itn_zh_number.fst",
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_itn_online_asr.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  test()
}

fun test() {
  val recognizer = createOnlineRecognizer()
  val waveFilename = "./itn-zh-number.wav";

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  val stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  while (recognizer.isReady(stream)) {
    recognizer.decode(stream)
  }

  val result = recognizer.getResult(stream).text
  println(result)

  stream.release()
  recognizer.release()
}

fun createOnlineRecognizer(): OnlineRecognizer {
  val config = OnlineRecognizerConfig(
      featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
      modelConfig = getModelConfig(8)!!,
  )

  config.ruleFsts = "./itn_zh_number.fst"
  println(config)

  return OnlineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_language_id.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testSpokenLanguageIdentifcation()
}

fun testSpokenLanguageIdentifcation() {
  val config = SpokenLanguageIdentificationConfig(
    whisper = SpokenLanguageIdentificationWhisperConfig(
      encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx",
      decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx",
      tailPaddings = 33,
    ),
    numThreads=1,
    debug=true,
    provider="cpu",
  )
  val slid = SpokenLanguageIdentification(config=config)

  val testFiles = arrayOf(
    "./spoken-language-identification-test-wavs/ar-arabic.wav",
    "./spoken-language-identification-test-wavs/bg-bulgarian.wav",
    "./spoken-language-identification-test-wavs/de-german.wav",
  )

  for (waveFilename in testFiles) {
    val waveData = WaveReader.readWaveFromFile(
        filename = waveFilename,
    )

    val stream = slid.createStream()
    stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
    val lang = slid.compute(stream)
    stream.release()
    println(waveFilename)
    println(lang)
  }

  slid.release()
}


================================================
FILE: kotlin-api-examples/test_offline_asr.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val types = arrayOf(0, 2, 5, 6, 15, 21, 24, 25, 31)
  for (type in types) {
    test(type)
  }
}

fun test(type: Int) {
  val recognizer = createOfflineRecognizer(type)

  val waveFilename = when (type) {
    0 -> "./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav"
    2 -> "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"
    5 -> "./sherpa-onnx-zipformer-multi-zh-hans-2023-9-2/test_wavs/1.wav"
    6 -> "./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav"
    15 -> "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/test_wavs/zh.wav"
    21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"
    24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"
    25 -> "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
    31 -> "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav"
    else -> null
  }

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename!!,
  )

  val stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  val result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}

fun createOfflineRecognizer(type: Int): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
      modelConfig = getOfflineModelConfig(type = type)!!,
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_fire_red_asr_ctc.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav"

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  var stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  var result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}


fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      modelConfig = getOfflineModelConfig(type = 50)!!,
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_funasr_nano.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav"

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  var stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  var result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}


fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      modelConfig = getOfflineModelConfig(type = 46)!!,
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_medasr_ctc.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav"

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  var stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  var result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}


fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      modelConfig = getOfflineModelConfig(type = 45)!!,
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_moonshine_asr_v2.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav"

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  var stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  var result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}


fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      modelConfig = getOfflineModelConfig(type = 53)!!,
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_nemo_canary.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  var stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  var result = recognizer.getResult(stream)
  println("English: $result")

  stream.release()

  // now output text in German
  val config = recognizer.config.copy(modelConfig=recognizer.config.modelConfig.copy(
    canary=recognizer.config.modelConfig.canary.copy(
      tgtLang="de"
    )
  ))
  recognizer.setConfig(config)

  stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  result = recognizer.getResult(stream)
  println("German: $result")

  stream.release()
  recognizer.release()
}


fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      modelConfig = getOfflineModelConfig(type = 32)!!,
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_omnilingual_asr_ctc.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav"

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  var stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  var result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}


fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      modelConfig = getOfflineModelConfig(type = 44)!!,
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_punctuation.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testPunctuation()
}

fun testPunctuation() {
  val config = OfflinePunctuationConfig(
      model=OfflinePunctuationModelConfig(
          ctTransformer="./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx",
          numThreads=1,
          debug=true,
          provider="cpu",
      )
  )
  val punct = OfflinePunctuation(config = config)
  val sentences = arrayOf(
        "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
        "我们都是木头人不会说话不会动",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
  )
  println("---")
  for (text in sentences) {
    val out = punct.addPunctuation(text)
    println("Input: $text")
    println("Output: $out")
    println("---")
  }
  println(sentences)

}


================================================
FILE: kotlin-api-examples/test_offline_sense_voice_with_hr.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./test-hr.wav"

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  val stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  val result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}

fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
      modelConfig = getOfflineModelConfig(type = 15)!!,
      hr = HomophoneReplacerConfig(
        lexicon = "./lexicon.txt",
        ruleFsts = "./replace.fst"),
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_speaker_diarization.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testOfflineSpeakerDiarization()
}

fun callback(numProcessedChunks: Int, numTotalChunks: Int, arg: Long): Int {
  val progress = numProcessedChunks.toFloat() / numTotalChunks * 100
  val s = "%.2f".format(progress)
  println("Progress: ${s}%");

  return 0
}

fun testOfflineSpeakerDiarization() {
  var config = OfflineSpeakerDiarizationConfig(
    segmentation=OfflineSpeakerSegmentationModelConfig(
      pyannote=OfflineSpeakerSegmentationPyannoteModelConfig("./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"),
    ),
    embedding=SpeakerEmbeddingExtractorConfig(
      model="./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx",
    ),

    // The test wave file ./0-four-speakers-zh.wav contains four speakers, so
    // we use numClusters=4 here. If you don't know the number of speakers
    // in the test wave file, please set the threshold like below.
    //
    // clustering=FastClusteringConfig(threshold=0.5),
    //
    // WARNING: You need to tune threshold by yourself.
    // A larger threshold leads to fewer clusters, i.e., few speakers.
    // A smaller threshold leads to more clusters, i.e., more speakers.
    //
    clustering=FastClusteringConfig(numClusters=4),
  )

  val sd = OfflineSpeakerDiarization(config=config)

  val waveData = WaveReader.readWave(
      filename = "./0-four-speakers-zh.wav",
  )

  if (sd.sampleRate() != waveData.sampleRate) {
    println("Expected sample rate: ${sd.sampleRate()}, given: ${waveData.sampleRate}")
    return
  }

  // val segments = sd.process(waveData.samples) // this one is also ok
  val segments = sd.processWithCallback(waveData.samples, callback=::callback)
  for (segment in segments) {
    println("${segment.start} -- ${segment.end} speaker_${segment.speaker}")
  }
}


================================================
FILE: kotlin-api-examples/test_offline_speech_denoiser.kt
================================================
package com.k2fsa.sherpa.onnx
// Please download test files in this script from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

fun main() {
  test()
}

fun test() {
  val denoiser  = createOfflineSpeechDenoiser()

  val waveFilename = "./inp_16k.wav";

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  val denoised = denoiser.run(waveData.samples, waveData.sampleRate);
  denoised.save(filename="./enhanced-16k.wav")
  println("saved to ./enhanced-16k.wav")
}

fun createOfflineSpeechDenoiser(): OfflineSpeechDenoiser {
  val config = OfflineSpeechDenoiserConfig(
      model = OfflineSpeechDenoiserModelConfig(
        gtcrn = OfflineSpeechDenoiserGtcrnModelConfig(
          model = "./gtcrn_simple.onnx"
        ),
        provider = "cpu",
        numThreads = 1,
      ),
  )

  println(config)

  return OfflineSpeechDenoiser(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_speech_denoiser_dpdfnet.kt
================================================
package com.k2fsa.sherpa.onnx
// Please download test files in this script from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

fun main() {
  val denoiser = createOfflineSpeechDenoiserDpdfNet()
  val waveData = WaveReader.readWaveFromFile(filename = "./inp_16k.wav")
  val denoised = denoiser.run(waveData.samples, waveData.sampleRate)
  denoised.save(filename = "./enhanced-dpdfnet-16k.wav")
  println("saved to ./enhanced-dpdfnet-16k.wav")
}

fun createOfflineSpeechDenoiserDpdfNet(): OfflineSpeechDenoiser {
  val config = OfflineSpeechDenoiserConfig(
      model = OfflineSpeechDenoiserModelConfig(
        dpdfnet = OfflineSpeechDenoiserDpdfNetModelConfig(
          model = "./dpdfnet_baseline.onnx"
        ),
        provider = "cpu",
        numThreads = 1,
      ),
  )

  return OfflineSpeechDenoiser(config = config)
}


================================================
FILE: kotlin-api-examples/test_offline_wenet_ctc.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  val recognizer = createOfflineRecognizer()
  val waveFilename = "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"

  val waveData = WaveReader.readWaveFromFile(
      filename = waveFilename,
  )

  var stream = recognizer.createStream()
  stream.acceptWaveform(waveData.samples, sampleRate=waveData.sampleRate)
  recognizer.decode(stream)

  var result = recognizer.getResult(stream)
  println(result)

  stream.release()
  recognizer.release()
}


fun createOfflineRecognizer(): OfflineRecognizer {
  val config = OfflineRecognizerConfig(
      modelConfig = getOfflineModelConfig(type = 42)!!,
  )

  return OfflineRecognizer(config = config)
}


================================================
FILE: kotlin-api-examples/test_online_asr.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testOnlineAsr("transducer")
  testOnlineAsr("zipformer2-ctc")
  testOnlineAsr("ctc-hlg")
  testOnlineAsr("nemo-ctc")
  testOnlineAsr("tone-ctc")
}

fun testOnlineAsr(type: String) {
    val featConfig = FeatureConfig(
        sampleRate = 16000,
        featureDim = 80,
    )

    var ctcFstDecoderConfig  = OnlineCtcFstDecoderConfig()
    val waveFilename: String
    val modelConfig: OnlineModelConfig = when (type) {
      "transducer" -> {
        waveFilename = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/test_wavs/0.wav"
        // please refer to
        // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
        // to download pre-trained models
        OnlineModelConfig(
            transducer = OnlineTransducerModelConfig(
                encoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/encoder-epoch-99-avg-1.onnx",
                decoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/decoder-epoch-99-avg-1.onnx",
                joiner = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/joiner-epoch-99-avg-1.onnx",
            ),
            tokens = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt",
            numThreads = 1,
            debug = false,
        )
      }
      "zipformer2-ctc" -> {
        waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav"
        OnlineModelConfig(
            zipformer2Ctc = OnlineZipformer2CtcModelConfig(
                model = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx",
            ),
            tokens = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt",
            numThreads = 1,
            debug = false,
        )
      }
      "nemo-ctc" -> {
        waveFilename = "./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms/test_wavs/0.wav"
        OnlineModelConfig(
            neMoCtc = OnlineNeMoCtcModelConfig(
                model = "./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms/model.onnx",
            ),
            tokens = "./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms/tokens.txt",
            numThreads = 1,
            debug = false,
        )
      }
      "tone-ctc" -> {
        waveFilename = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav"
        OnlineModelConfig(
            toneCtc = OnlineToneCtcModelConfig(
                model = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx",
            ),
            tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt",
            numThreads = 1,
            debug = false,
        )
      }
      "ctc-hlg" -> {
        waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav"
        ctcFstDecoderConfig.graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"
        OnlineModelConfig(
            zipformer2Ctc = OnlineZipformer2CtcModelConfig(
                model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx",
            ),
            tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt",
            numThreads = 1,
            debug = false,
        )
      }
      else -> throw IllegalArgumentException(type)
    }

    val endpointConfig = EndpointConfig()

    val lmConfig = OnlineLMConfig()

    val config = OnlineRecognizerConfig(
        modelConfig = modelConfig,
        lmConfig = lmConfig,
        featConfig = featConfig,
        ctcFstDecoderConfig=ctcFstDecoderConfig,
        endpointConfig = endpointConfig,
        enableEndpoint = true,
        decodingMethod = "greedy_search",
        maxActivePaths = 4,
    )

    val recognizer = OnlineRecognizer(
        config = config,
    )

    val waveData = WaveReader.readWaveFromFile(
        filename = waveFilename,
    )

    val stream = recognizer.createStream()

    val leftPaddings = FloatArray((waveData.sampleRate * 0.3).toInt()) // 0.3 seconds
    stream.acceptWaveform(leftPaddings, sampleRate = waveData.sampleRate)

    stream.acceptWaveform(waveData.samples, sampleRate = waveData.sampleRate)
    while (recognizer.isReady(stream)) {
        recognizer.decode(stream)
    }

    val tailPaddings = FloatArray((waveData.sampleRate * 0.6).toInt()) // 0.6 seconds
    stream.acceptWaveform(tailPaddings, sampleRate = waveData.sampleRate)
    stream.inputFinished()
    while (recognizer.isReady(stream)) {
        recognizer.decode(stream)
    }

    println("results: ${recognizer.getResult(stream).text}")

    stream.release()
    recognizer.release()
}


================================================
FILE: kotlin-api-examples/test_online_punctuation.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testPunctuation()
}

// https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
fun testPunctuation() {
  val config = OnlinePunctuationConfig(
      model=OnlinePunctuationModelConfig(
          cnnBilstm="./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx",
          bpeVocab="./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab",
          numThreads=1,
          debug=true,
          provider="cpu",
      )
  )
  val punct = OnlinePunctuation(config = config)
  val sentences = arrayOf(
        "how are you doing fantastic thank you what is about you",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
  )
  println("---")
  for (text in sentences) {
    val out = punct.addPunctuation(text)
    println("Input: $text")
    println("Output: $out")
    println("---")
  }
}


================================================
FILE: kotlin-api-examples/test_online_speech_denoiser.kt
================================================
package com.k2fsa.sherpa.onnx

// Please download test files in this script from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

fun main() {
  testGtcrn()
  testDpdfNet()
}

fun testGtcrn() {
  val denoiser = createOnlineSpeechDenoiserGtcrn()
  val waveData = WaveReader.readWaveFromFile("./inp_16k.wav")
  val output = mutableListOf<Float>()
  val frameShift = denoiser.frameShiftInSamples

  var start = 0
  while (start < waveData.samples.size) {
    val end = minOf(start + frameShift, waveData.samples.size)
    val chunk = waveData.samples.copyOfRange(start, end)
    val denoised = denoiser.run(chunk, waveData.sampleRate)
    output.addAll(denoised.samples.asList())
    start = end
  }

  output.addAll(denoiser.flush().samples.asList())
  DenoisedAudio(output.toFloatArray(), denoiser.sampleRate).save(
    filename = "./enhanced-online-gtcrn.wav"
  )
  println("saved to ./enhanced-online-gtcrn.wav")

  denoiser.release()
}

fun testDpdfNet() {
  val denoiser = createOnlineSpeechDenoiserDpdfNet()
  val waveData = WaveReader.readWaveFromFile("./inp_16k.wav")
  val output = mutableListOf<Float>()

  val frameShift = denoiser.frameShiftInSamples
  var start = 0
  while (start < waveData.samples.size) {
    val end = minOf(start + frameShift, waveData.samples.size)
    val chunk = waveData.samples.copyOfRange(start, end)
    val denoised = denoiser.run(chunk, waveData.sampleRate)
    output.addAll(denoised.samples.asList())
    start = end
  }

  output.addAll(denoiser.flush().samples.asList())
  DenoisedAudio(output.toFloatArray(), denoiser.sampleRate).save(
    filename = "./enhanced-online-dpdfnet.wav"
  )
  println("saved to ./enhanced-online-dpdfnet.wav")

  denoiser.release()
}

fun createOnlineSpeechDenoiserGtcrn(): OnlineSpeechDenoiser {
  val config = OnlineSpeechDenoiserConfig(
      model = OfflineSpeechDenoiserModelConfig(
        gtcrn = OfflineSpeechDenoiserGtcrnModelConfig(
          model = "./gtcrn_simple.onnx"
        ),
        provider = "cpu",
        numThreads = 1,
      ),
  )

  return OnlineSpeechDenoiser(config = config)
}

fun createOnlineSpeechDenoiserDpdfNet(): OnlineSpeechDenoiser {
  val config = OnlineSpeechDenoiserConfig(
      model = OfflineSpeechDenoiserModelConfig(
        dpdfnet = OfflineSpeechDenoiserDpdfNetModelConfig(
          model = "./dpdfnet_baseline.onnx"
        ),
        provider = "cpu",
        numThreads = 1,
      ),
  )

  return OnlineSpeechDenoiser(config = config)
}


================================================
FILE: kotlin-api-examples/test_pocket_tts.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testPocketTts()
}

fun testPocketTts() {
  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  val config = OfflineTtsConfig(
    model=OfflineTtsModelConfig(
      pocket=OfflineTtsPocketModelConfig(
        lmFlow="./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx",
        lmMain="./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx",
        encoder="./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx",
        decoder="./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx",
        textConditioner="./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx",
        vocabJson="./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json",
        tokenScoresJson="./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json",
      ),
      numThreads=2,
      debug=true,
    ),
  )
  val tts = OfflineTts(config=config)

  val referenceAudioFilename = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav"
  val wave = WaveReader.readWave(
      filename = referenceAudioFilename,
  )

  val genConfig = GenerationConfig(
    referenceAudio = wave.samples,
    referenceSampleRate = wave.sampleRate,
    numSteps = 5,
    extra = mapOf(
        "temperature" to "0.7",
        "chunk_size" to "15",
    )
  )

  val text = "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be, a statesman, a businessman, an official, or a scholar."

  val audio = tts.generateWithConfigAndCallback(text=text, config=genConfig, callback=::callback)
  audio.save(filename="out-bria.wav")
  tts.release()
  println("Saved to out-bria.wav")
}

fun callback(samples: FloatArray): Int {
  println("callback got called with ${samples.size} samples")

  // 1 means to continue
  // 0 means to stop
  return 1
}


================================================
FILE: kotlin-api-examples/test_speaker_id.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testSpeakerRecognition()
}

fun testSpeakerRecognition() {
    val config = SpeakerEmbeddingExtractorConfig(
        model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",
        )
    val extractor = SpeakerEmbeddingExtractor(config = config)

    val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav")
    val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav")
    val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav")

    var manager = SpeakerEmbeddingManager(extractor.dim())
    var ok = manager.add(name = "speaker1", embedding=embedding1a)
    check(ok)

    manager.add(name = "speaker2", embedding=embedding2a)
    check(ok)

    var name = manager.search(embedding=embedding1b, threshold=0.5f)
    check(name == "speaker1")

    manager.release()

    manager = SpeakerEmbeddingManager(extractor.dim())
    val embeddingList = mutableListOf(embedding1a, embedding1b)
    ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray())
    check(ok)

    name = manager.search(embedding=embedding1b, threshold=0.5f)
    check(name == "s1")

    name = manager.search(embedding=embedding2a, threshold=0.5f)
    check(name.length == 0)

    manager.release()
    extractor.release()
    println("Speaker ID test done!")
}

fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray {
    val waveData = WaveReader.readWaveFromFile(
        filename = filename,
    )
    val stream = extractor.createStream()
    stream.acceptWaveform(sampleRate = waveData.sampleRate, samples=waveData.samples)
    stream.inputFinished()
    check(extractor.isReady(stream))

    val embedding = extractor.compute(stream)

    stream.release()

    return embedding
}


================================================
FILE: kotlin-api-examples/test_supertonic_tts.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testSupertonicTts()
}

fun testSupertonicTts() {
  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  val modelDir = "./sherpa-onnx-supertonic-tts-int8-2026-03-06"
  val config = OfflineTtsConfig(
    model=OfflineTtsModelConfig(
      supertonic=OfflineTtsSupertonicModelConfig(
        durationPredictor="$modelDir/duration_predictor.int8.onnx",
        textEncoder="$modelDir/text_encoder.int8.onnx",
        vectorEstimator="$modelDir/vector_estimator.int8.onnx",
        vocoder="$modelDir/vocoder.int8.onnx",
        ttsJson="$modelDir/tts.json",
        unicodeIndexer="$modelDir/unicode_indexer.bin",
        voiceStyle="$modelDir/voice.bin",
      ),
      numThreads=2,
      debug=true,
    ),
  )
  val tts = OfflineTts(config=config)

  val genConfig = GenerationConfig(
    sid = 6,
    speed = 1.25f,
    numSteps = 5,
    extra = mapOf(
        "lang" to "en",
    )
  )

  val text = "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be, a statesman, a businessman, an official, or a scholar."

  val audio = tts.generateWithConfigAndCallback(text=text, config=genConfig, callback=::supertonicCallback)
  audio.save(filename="test-supertonic-en.wav")
  tts.release()
  println("Saved to test-supertonic-en.wav")
}

fun supertonicCallback(samples: FloatArray): Int {
  println("callback got called with ${samples.size} samples")

  // 1 means to continue
  // 0 means to stop
  return 1
}


================================================
FILE: kotlin-api-examples/test_tts.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testVits()
  testMatcha()
  testKokoroEn()
  testKokoroZhEn()
  testKittenEn()
}

fun testKokoroZhEn() {
  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  var config = OfflineTtsConfig(
    model=OfflineTtsModelConfig(
      kokoro=OfflineTtsKokoroModelConfig(
        model="./kokoro-multi-lang-v1_0/model.onnx",
        voices="./kokoro-multi-lang-v1_0/voices.bin",
        tokens="./kokoro-multi-lang-v1_0/tokens.txt",
        dataDir="./kokoro-multi-lang-v1_0/espeak-ng-data",
        lexicon="./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt",
      ),
      numThreads=2,
      debug=true,
    ),
  )
  val tts = OfflineTts(config=config)
  val genConfig = GenerationConfig(
    silenceScale = 0.2f,
  )
  val audio = tts.generateWithConfigAndCallback(text="中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？", config=genConfig, callback=::callback)
  audio.save(filename="test-kokoro-zh-en.wav")
  tts.release()
  println("Saved to test-kokoro-zh-en.wav")
}

fun testKokoroEn() {
  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  var config = OfflineTtsConfig(
    model=OfflineTtsModelConfig(
      kokoro=OfflineTtsKokoroModelConfig(
        model="./kokoro-en-v0_19/model.onnx",
        voices="./kokoro-en-v0_19/voices.bin",
        tokens="./kokoro-en-v0_19/tokens.txt",
        dataDir="./kokoro-en-v0_19/espeak-ng-data",
      ),
      numThreads=2,
      debug=true,
    ),
  )
  val tts = OfflineTts(config=config)
  val genConfig = GenerationConfig(
    silenceScale = 0.2f,
  )
  val audio = tts.generateWithConfigAndCallback(text="How are you doing today?", config=genConfig, callback=::callback)
  audio.save(filename="test-kokoro-en.wav")
  tts.release()
  println("Saved to test-kokoro-en.wav")
}

fun testMatcha() {
  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  var config = OfflineTtsConfig(
    model=OfflineTtsModelConfig(
      matcha=OfflineTtsMatchaModelConfig(
        acousticModel="./matcha-icefall-zh-baker/model-steps-3.onnx",
        vocoder="./vocos-22khz-univ.onnx",
        tokens="./matcha-icefall-zh-baker/tokens.txt",
        lexicon="./matcha-icefall-zh-baker/lexicon.txt",
      ),
      numThreads=1,
      debug=true,
    ),
    ruleFsts="./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst",
  )
  val tts = OfflineTts(config=config)
  val genConfig = GenerationConfig(
    silenceScale = 0.2f,
  )
  val audio = tts.generateWithConfigAndCallback(text="某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。", config=genConfig, callback=::callback)
  audio.save(filename="test-matcha-zh.wav")
  tts.release()
  println("Saved to test-matcha-zh.wav")
}

fun testVits() {
  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  var config = OfflineTtsConfig(
    model=OfflineTtsModelConfig(
      vits=OfflineTtsVitsModelConfig(
        model="./vits-piper-en_US-amy-low/en_US-amy-low.onnx",
        tokens="./vits-piper-en_US-amy-low/tokens.txt",
        dataDir="./vits-piper-en_US-amy-low/espeak-ng-data",
      ),
      numThreads=1,
      debug=true,
    )
  )
  val tts = OfflineTts(config=config)
  val genConfig = GenerationConfig(
    silenceScale = 0.2f,
  )
  val audio = tts.generateWithConfigAndCallback(text="“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”", config=genConfig, callback=::callback)
  audio.save(filename="test-en.wav")
  tts.release()
  println("Saved to test-en.wav")
}

fun testKittenEn() {
  // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  var config = OfflineTtsConfig(
    model=OfflineTtsModelConfig(
      kitten=OfflineTtsKittenModelConfig(
        model="./kitten-nano-en-v0_1-fp16/model.fp16.onnx",
        voices="./kitten-nano-en-v0_1-fp16/voices.bin",
        tokens="./kitten-nano-en-v0_1-fp16/tokens.txt",
        dataDir="./kitten-nano-en-v0_1-fp16/espeak-ng-data",
      ),
      numThreads=2,
      debug=true,
    ),
  )
  val tts = OfflineTts(config=config)
  val genConfig = GenerationConfig(
    sid = 7,
    silenceScale = 0.2f,
  )
  val audio = tts.generateWithConfigAndCallback(text="How are you doing today?", config=genConfig, callback=::callback)
  audio.save(filename="test-kitten-en.wav")
  tts.release()
  println("Saved to test-kitten-en.wav")
}

/*
1. Unzip test_tts.jar
2.
javap ./com/k2fsa/sherpa/onnx/Test_ttsKt\$testTts\$audio\$1.class

3. It prints:
Compiled from "test_tts.kt"
final class com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 extends kotlin.jvm.internal.FunctionReferenceImpl implements kotlin.jvm.functions.Function1<float[], java.lang.Integer> {
  public static final com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 INSTANCE;
  com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1();
  public final java.lang.Integer invoke(float[]);
  public java.lang.Object invoke(java.lang.Object);
  static {};
}

4.
javap -s ./com/k2fsa/sherpa/onnx/Test_ttsKt\$testTts\$audio\$1.class

5. It prints
Compiled from "test_tts.kt"
final class com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 extends kotlin.jvm.internal.FunctionReferenceImpl implements kotlin.jvm.functions.Function1<float[], java.lang.Integer> {
  public static final com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 INSTANCE;
    descriptor: Lcom/k2fsa/sherpa/onnx/Test_ttsKt$testTts$audio$1;
  com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1();
    descriptor: ()V

  public final java.lang.Integer invoke(float[]);
    descriptor: ([F)Ljava/lang/Integer;

  public java.lang.Object invoke(java.lang.Object);
    descriptor: (Ljava/lang/Object;)Ljava/lang/Object;

  static {};
    descriptor: ()V
}
*/
fun callback(samples: FloatArray): Int {
  println("callback got called with ${samples.size} samples");

  // 1 means to continue
  // 0 means to stop
  return 1
}


================================================
FILE: kotlin-api-examples/test_version.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  println("sherpa-onnx version: ${VersionInfo.version}");
  println("sherpa-onnx gitSha1: ${VersionInfo.gitSha1}");
  println("sherpa-onnx gitDate: ${VersionInfo.gitDate}");
}


================================================
FILE: kotlin-api-examples/test_zipvoice_tts.kt
================================================
package com.k2fsa.sherpa.onnx

fun main() {
  testZipVoiceTts()
}

fun testZipVoiceTts() {
  val modelDir = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia"
  val referenceAudioFilename = "$modelDir/test_wavs/leijun-1.wav"
  val wave = WaveReader.readWave(filename = referenceAudioFilename)

  val config = OfflineTtsConfig(
    model = OfflineTtsModelConfig(
      zipvoice = OfflineTtsZipVoiceModelConfig(
        tokens = "$modelDir/tokens.txt",
        encoder = "$modelDir/encoder.int8.onnx",
        decoder = "$modelDir/decoder.int8.onnx",
        vocoder = "./vocos_24khz.onnx",
        dataDir = "$modelDir/espeak-ng-data",
        lexicon = "$modelDir/lexicon.txt",
      ),
      numThreads = 2,
      debug = false,
    ),
  )

  val tts = OfflineTts(config = config)
  val text = "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."
  val referenceText = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系."
  val genConfig = GenerationConfig(
    referenceAudio = wave.samples,
    referenceSampleRate = wave.sampleRate,
    referenceText = referenceText,
    numSteps = 4,
    extra = mapOf("min_char_in_sentence" to "10"),
  )

  val audio = tts.generateWithConfigAndCallback(text = text, config = genConfig, callback = ::callback)
  audio.save(filename = "test-zipvoice-zh-en.wav")
  tts.release()
  println("Saved to test-zipvoice-zh-en.wav")
}

fun callback(samples: FloatArray): Int {
  println("callback got called with ${samples.size} samples")

  // 1 means to continue
  // 0 means to stop
  return 1
}


================================================
FILE: lazarus-examples/.gitignore
================================================
# Lazarus compiler-generated binaries (safe to delete)
*.exe
*.dll
*.so
*.dylib
*.lrs
*.res
*.compiled
*.dbg
*.ppu
*.o
*.or
*.a

# Lazarus autogenerated files (duplicated info)
*.rst
*.rsj
*.lrt

# Lazarus local files (user-specific info)
*.lps

# Lazarus backups and unit output folders.
# These can be changed by user in Lazarus/project options.
backup/
*.bak
lib/

# Application bundle for Mac OS
*.app/


================================================
FILE: lazarus-examples/README.md
================================================
# Introduction

This directory contains examples about using
https://www.lazarus-ide.org/
with Object Pascal API to develop speech related applications.

**Documentation for this directory**:
https://k2-fsa.github.io/sherpa/onnx/lazarus/index.html

|Directory| Pre-built App|
|---------|--------------|
|[./generate_subtitles](./generate_subtitles)|[URL](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html)|


================================================
FILE: mfc-examples/.gitignore
================================================
# See https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore

# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates

# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs

# Mono auto generated files
mono_crash.*

# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/

# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/

# Visual Studio 2017 auto generated files
Generated\ Files/

# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*

# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml

# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c

# Benchmark Results
BenchmarkDotNet.Artifacts/

# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/

# ASP.NET Scaffolding
ScaffoldingReadMe.txt

# StyleCop
StyleCopReport.xml

# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc

# Chutzpah Test files
_Chutzpah*

# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb

# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap

# Visual Studio Trace Files
*.e2e

# TFS 2012 Local Workspace
$tf/

# Guidance Automation Toolkit
*.gpState

# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user

# TeamCity is a build add-in
_TeamCity*

# DotCover is a Code Coverage Tool
*.dotCover

# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json

# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info

# Visual Studio code coverage results
*.coverage
*.coveragexml

# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*

# MightyMoose
*.mm.*
AutoTest.Net/

# Web workbench (sass)
.sass-cache/

# Installshield output folder
[Ee]xpress/

# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html

# Click-Once directory
publish/

# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj

# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/

# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets

# Microsoft Azure Build Output
csx/
*.build.csdef

# Microsoft Azure Emulator
ecf/
rcf/

# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload

# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/

# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs

# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk

# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/

# RIA/Silverlight projects
Generated_Code/

# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak

# SQL Server files
*.mdf
*.ldf
*.ndf

# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl

# Microsoft Fakes
FakesAssemblies/

# GhostDoc plugin setting file
*.GhostDoc.xml

# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/

# Visual Studio 6 build log
*.plg

# Visual Studio 6 workspace options file
*.opt

# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw

# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp

# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp

# Visual Studio 6 technical files
*.ncb
*.aps

# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions

# Paket dependency manager
.paket/paket.exe
paket-files/

# FAKE - F# Make
.fake/

# CodeRush personal settings
.cr/personal

# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc

# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config

# Tabs Studio
*.tss

# Telerik's JustMock configuration file
*.jmconfig

# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs

# OpenCover UI analysis results
OpenCover/

# Azure Stream Analytics local run output
ASALocalRun/

# MSBuild Binary and Structured Log
*.binlog

# NVidia Nsight GPU debugger configuration file
*.nvuser

# MFractors (Xamarin productivity tool) working folder
.mfractor/

# Local History for Visual Studio
.localhistory/

# Visual Studio History (VSHistory) files
.vshistory/

# BeatPulse healthcheck temp database
healthchecksdb

# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/

# Ionide (cross platform F# VS Code tools) working folder
.ionide/

# Fody - auto-generated XML schema
FodyWeavers.xsd

# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace

# Local History for Visual Studio Code
.history/

# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp

# JetBrains Rider
*.sln.iml


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.cpp
================================================

// NonStreamingSpeechRecognition.cpp : Defines the class behaviors for the
// application.
//

// clang-format off
#include "pch.h"
#include "framework.h"
#include "NonStreamingSpeechRecognitionDlg.h"
#include "NonStreamingSpeechRecognition.h"
// clang-format on

#ifdef _DEBUG
#define new DEBUG_NEW
#endif

// CNonStreamingSpeechRecognitionApp

BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionApp, CWinApp)
ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
END_MESSAGE_MAP()

// CNonStreamingSpeechRecognitionApp construction

CNonStreamingSpeechRecognitionApp::CNonStreamingSpeechRecognitionApp() {
  // TODO: add construction code here,
  // Place all significant initialization in InitInstance
}

// The one and only CNonStreamingSpeechRecognitionApp object

CNonStreamingSpeechRecognitionApp theApp;

// CNonStreamingSpeechRecognitionApp initialization

BOOL CNonStreamingSpeechRecognitionApp::InitInstance() {
  CWinApp::InitInstance();

  // Create the shell manager, in case the dialog contains
  // any shell tree view or shell list view controls.
  CShellManager *pShellManager = new CShellManager;

  // Activate "Windows Native" visual manager for enabling themes in MFC
  // controls
  CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));

  // Standard initialization
  // If you are not using these features and wish to reduce the size
  // of your final executable, you should remove from the following
  // the specific initialization routines you do not need
  // Change the registry key under which our settings are stored
  // TODO: You should modify this string to be something appropriate
  // such as the name of your company or organization
  SetRegistryKey(_T("Local AppWizard-Generated Applications"));

  CNonStreamingSpeechRecognitionDlg dlg;
  m_pMainWnd = &dlg;
  INT_PTR nResponse = dlg.DoModal();
  if (nResponse == IDOK) {
    // TODO: Place code here to handle when the dialog is
    //  dismissed with OK
  } else if (nResponse == IDCANCEL) {
    // TODO: Place code here to handle when the dialog is
    //  dismissed with Cancel
  } else if (nResponse == -1) {
    TRACE(traceAppMsg, 0,
          "Warning: dialog creation failed, so application is terminating "
          "unexpectedly.\n");
    TRACE(traceAppMsg, 0,
          "Warning: if you are using MFC controls on the dialog, you cannot "
          "#define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
  }

  // Delete the shell manager created above.
  if (pShellManager != nullptr) {
    delete pShellManager;
  }

#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
  ControlBarCleanUp();
#endif

  // Since the dialog has been closed, return FALSE so that we exit the
  //  application, rather than start the application's message pump.
  return FALSE;
}


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.h
================================================

// NonStreamingSpeechRecognition.h : main header file for the PROJECT_NAME
// application
//

#pragma once

#ifndef __AFXWIN_H__
#error "include 'pch.h' before including this file for PCH"
#endif

#include "resource.h"  // main symbols

// CNonStreamingSpeechRecognitionApp:
// See NonStreamingSpeechRecognition.cpp for the implementation of this class
//

class CNonStreamingSpeechRecognitionApp : public CWinApp {
 public:
  CNonStreamingSpeechRecognitionApp();

  // Overrides
 public:
  virtual BOOL InitInstance();

  // Implementation

  DECLARE_MESSAGE_MAP()
};

extern CNonStreamingSpeechRecognitionApp theApp;


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <VCProjectVersion>17.0</VCProjectVersion>
    <ProjectGuid>{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}</ProjectGuid>
    <Keyword>MFCProj</Keyword>
    <RootNamespace>NonStreamingSpeechRecognition</RootNamespace>
    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="Shared">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LinkIncremental>false</LinkIncremental>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClInclude Include="framework.h" />
    <ClInclude Include="NonStreamingSpeechRecognition.h" />
    <ClInclude Include="NonStreamingSpeechRecognitionDlg.h" />
    <ClInclude Include="pch.h" />
    <ClInclude Include="Resource.h" />
    <ClInclude Include="targetver.h" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="NonStreamingSpeechRecognition.cpp" />
    <ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp" />
    <ClCompile Include="pch.cpp">
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="NonStreamingSpeechRecognition.rc" />
  </ItemGroup>
  <ItemGroup>
    <None Include="res\NonStreamingSpeechRecognition.rc2" />
  </ItemGroup>
  <ItemGroup>
    <Image Include="res\NonStreamingSpeechRecognition.ico" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj.filters
================================================
﻿<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <Filter Include="Source Files">
      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
      <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
    </Filter>
    <Filter Include="Header Files">
      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
      <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
    </Filter>
    <Filter Include="Resource Files">
      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="NonStreamingSpeechRecognition.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="NonStreamingSpeechRecognitionDlg.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="framework.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="targetver.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="Resource.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="pch.h">
      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="NonStreamingSpeechRecognition.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="pch.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="NonStreamingSpeechRecognition.rc">
      <Filter>Resource Files</Filter>
    </ResourceCompile>
  </ItemGroup>
  <ItemGroup>
    <None Include="res\NonStreamingSpeechRecognition.rc2">
      <Filter>Resource Files</Filter>
    </None>
  </ItemGroup>
  <ItemGroup>
    <Image Include="res\NonStreamingSpeechRecognition.ico">
      <Filter>Resource Files</Filter>
    </Image>
  </ItemGroup>
</Project>

================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
================================================

// NonStreamingSpeechRecognitionDlg.cpp : implementation file
//

// clang-format off
#include "pch.h"
#include "framework.h"
#include "afxdialogex.h"
#include "NonStreamingSpeechRecognition.h"
#include "NonStreamingSpeechRecognitionDlg.h"
// clang-format on

#include <fstream>
#include <sstream>
#include <string>
#include <vector>

#ifdef _DEBUG
#define new DEBUG_NEW
#endif

Microphone::Microphone() {
  PaError err = Pa_Initialize();
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    exit(-2);
  }
}

Microphone::~Microphone() {
  PaError err = Pa_Terminate();
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    exit(-2);
  }
}

// see
// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
static std::wstring Utf8ToUtf16(const std::string &utf8) {
  std::vector<unsigned long> unicode;
  size_t i = 0;
  while (i < utf8.size()) {
    unsigned long uni;
    size_t todo;
    bool error = false;
    unsigned char ch = utf8[i++];
    if (ch <= 0x7F) {
      uni = ch;
      todo = 0;
    } else if (ch <= 0xBF) {
      throw std::logic_error("not a UTF-8 string");
    } else if (ch <= 0xDF) {
      uni = ch & 0x1F;
      todo = 1;
    } else if (ch <= 0xEF) {
      uni = ch & 0x0F;
      todo = 2;
    } else if (ch <= 0xF7) {
      uni = ch & 0x07;
      todo = 3;
    } else {
      throw std::logic_error("not a UTF-8 string");
    }
    for (size_t j = 0; j < todo; ++j) {
      if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
      unsigned char ch = utf8[i++];
      if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
      uni <<= 6;
      uni += ch & 0x3F;
    }
    if (uni >= 0xD800 && uni <= 0xDFFF)
      throw std::logic_error("not a UTF-8 string");
    if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
    unicode.push_back(uni);
  }
  std::wstring utf16;
  for (size_t i = 0; i < unicode.size(); ++i) {
    unsigned long uni = unicode[i];
    if (uni <= 0xFFFF) {
      utf16 += (wchar_t)uni;
    } else {
      uni -= 0x10000;
      utf16 += (wchar_t)((uni >> 10) + 0xD800);
      utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
    }
  }
  return utf16;
}

static std::string Cat(const std::vector<std::string> &results) {
  std::ostringstream os;
  std::string sep;

  int i = 0;
  for (i = 0; i != results.size(); ++i) {
    os << sep << i << ": " << results[i];
    sep = "\r\n";
  }

  return os.str();
}

// CNonStreamingSpeechRecognitionDlg dialog

CNonStreamingSpeechRecognitionDlg::CNonStreamingSpeechRecognitionDlg(
    CWnd *pParent /*=nullptr*/)
    : CDialogEx(IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG, pParent) {
  m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
}

CNonStreamingSpeechRecognitionDlg::~CNonStreamingSpeechRecognitionDlg() {
  if (recognizer_) {
    SherpaOnnxDestroyOfflineRecognizer(recognizer_);
    recognizer_ = nullptr;
  }
}

void CNonStreamingSpeechRecognitionDlg::DoDataExchange(CDataExchange *pDX) {
  CDialogEx::DoDataExchange(pDX);
  DDX_Control(pDX, IDC_EDIT1, my_text_);
  DDX_Control(pDX, IDOK, my_btn_);
}

BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionDlg, CDialogEx)
ON_WM_PAINT()
ON_WM_QUERYDRAGICON()
ON_BN_CLICKED(IDOK, &CNonStreamingSpeechRecognitionDlg::OnBnClickedOk)
END_MESSAGE_MAP()

// CNonStreamingSpeechRecognitionDlg message handlers

BOOL CNonStreamingSpeechRecognitionDlg::OnInitDialog() {
  CDialogEx::OnInitDialog();

  // Set the icon for this dialog.  The framework does this automatically
  //  when the application's main window is not a dialog
  SetIcon(m_hIcon, TRUE);   // Set big icon
  SetIcon(m_hIcon, FALSE);  // Set small icon

  // TODO: Add extra initialization here
  InitMicrophone();

  return TRUE;  // return TRUE  unless you set the focus to a control
}

// If you add a minimize button to your dialog, you will need the code below
//  to draw the icon.  For MFC applications using the document/view model,
//  this is automatically done for you by the framework.

void CNonStreamingSpeechRecognitionDlg::OnPaint() {
  if (IsIconic()) {
    CPaintDC dc(this);  // device context for painting

    SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()),
                0);

    // Center icon in client rectangle
    int cxIcon = GetSystemMetrics(SM_CXICON);
    int cyIcon = GetSystemMetrics(SM_CYICON);
    CRect rect;
    GetClientRect(&rect);
    int x = (rect.Width() - cxIcon + 1) / 2;
    int y = (rect.Height() - cyIcon + 1) / 2;

    // Draw the icon
    dc.DrawIcon(x, y, m_hIcon);
  } else {
    CDialogEx::OnPaint();
  }
}

// The system calls this function to obtain the cursor to display while the user
// drags
//  the minimized window.
HCURSOR CNonStreamingSpeechRecognitionDlg::OnQueryDragIcon() {
  return static_cast<HCURSOR>(m_hIcon);
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void *user_data) {
  auto dlg = reinterpret_cast<CNonStreamingSpeechRecognitionDlg *>(user_data);
  auto begin = reinterpret_cast<const float *>(input_buffer);
  auto end = begin + frames_per_buffer;
  dlg->samples_.insert(dlg->samples_.end(), begin, end);

  return dlg->started_ ? paContinue : paComplete;
}

void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() {
  if (!recognizer_) {
    AppendLineToMultilineEditCtrl("Creating recognizer...");
    AppendLineToMultilineEditCtrl("It will take several seconds. Please wait");
    InitRecognizer();
    if (!recognizer_) {
      // failed to create the recognizer
      return;
    }
    AppendLineToMultilineEditCtrl("Recognizer created!");
  }

  if (!started_) {
    samples_.clear();
    started_ = true;

    PaStreamParameters param;
    param.device = Pa_GetDefaultInputDevice();
    const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
    param.channelCount = 1;
    param.sampleFormat = paFloat32;
    param.suggestedLatency = info->defaultLowInputLatency;
    param.hostApiSpecificStreamInfo = nullptr;
    float sample_rate = static_cast<float>(config_.feat_config.sample_rate);
    pa_stream_ = nullptr;
    PaError err =
        Pa_OpenStream(&pa_stream_, &param, nullptr, /* &outputParameters, */
                      sample_rate,
                      0,          // frames per buffer
                      paClipOff,  // we won't output out of range samples
                                  // so don't bother clipping them
                      RecordCallback, this);
    if (err != paNoError) {
      AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
                                    Pa_GetErrorText(err));
      my_btn_.EnableWindow(FALSE);
      return;
    }

    err = Pa_StartStream(pa_stream_);
    if (err != paNoError) {
      AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
                                    Pa_GetErrorText(err));
      my_btn_.EnableWindow(FALSE);
      return;
    }
    AppendLineToMultilineEditCtrl(
        "\r\nStarted! Please speak and click stop.\r\n");
    my_btn_.SetWindowText(_T("Stop"));

  } else {
    started_ = false;

    Pa_Sleep(200);  // sleep for 200ms
    if (pa_stream_) {
      PaError err = Pa_CloseStream(pa_stream_);
      if (err != paNoError) {
        AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
                                      Pa_GetErrorText(err));
        my_btn_.EnableWindow(FALSE);
        return;
      }
    }
    pa_stream_ = nullptr;

    const SherpaOnnxOfflineStream *stream = SherpaOnnxCreateOfflineStream(recognizer_);

    SherpaOnnxAcceptWaveformOffline(stream, config_.feat_config.sample_rate,
                          samples_.data(), static_cast<int32_t>(samples_.size()));
    SherpaOnnxDecodeOfflineStream(recognizer_, stream);
    auto r = SherpaOnnxGetOfflineStreamResult(stream);
    results_.emplace_back(r->text);

    auto str = Utf8ToUtf16(Cat(results_).c_str());
    my_text_.SetWindowText(str.c_str());
    my_text_.SetFocus();
    my_text_.SetSel(-1);

    SherpaOnnxDestroyOfflineRecognizerResult(r);

    SherpaOnnxDestroyOfflineStream(stream);
    // AfxMessageBox("Stopped", MB_OK);
    my_btn_.SetWindowText(_T("Start"));
    AppendLineToMultilineEditCtrl("\r\nStopped. Please click start and speak");
  }
}

void CNonStreamingSpeechRecognitionDlg::InitMicrophone() {
  int default_device = Pa_GetDefaultInputDevice();
  int device_count = Pa_GetDeviceCount();
  if (default_device == paNoDevice) {
    // CString str;
    // str.Format(_T("No default input device found!"));
    // AfxMessageBox(str, MB_OK | MB_ICONSTOP);
    // exit(-1);
    AppendLineToMultilineEditCtrl("No default input device found!");
    my_btn_.EnableWindow(FALSE);
    return;
  }
  AppendLineToMultilineEditCtrl(std::string("Selected device ") +
                                Pa_GetDeviceInfo(default_device)->name);
}

bool CNonStreamingSpeechRecognitionDlg::Exists(const std::string &filename) {
  std::ifstream is(filename);
  return is.good();
}

void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
  my_btn_.EnableWindow(FALSE);
  std::string msg =
      "\r\nPlease go to\r\n"
      "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html "
      "\r\n";
  msg += "to download a non-streaming model, i.e., an offline model.\r\n";
  msg += "You need to rename them after downloading\r\n\r\n";
  msg += "It supports transducer, paraformer, and whisper models.\r\n\r\n";
  msg +=
      "We give three examples below to show you how to download models\r\n\r\n";
  msg += "(1) Transducer\r\n\r\n";
  msg +=
      "We use "
      "https://huggingface.co/pkufool/"
      "icefall-asr-zipformer-wenetspeech-20230615 below\r\n";
  msg +=
      "wget "
      "https://huggingface.co/pkufool/"
      "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
      "encoder-epoch-12-avg-4.onnx\r\n";
  msg +=
      "wget "
      "https://huggingface.co/pkufool/"
      "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
      "decoder-epoch-12-avg-4.onnx\r\n";
  msg +=
      "wget "
      "https://huggingface.co/pkufool/"
      "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
      "joiner-epoch-12-avg-4.onnx\r\n";
  msg += "\r\n Now rename them\r\n";
  msg += "mv encoder-epoch-12-avg-4.onnx encoder.onnx\r\n";
  msg += "mv decoder-epoch-12-avg-4.onnx decoder.onnx\r\n";
  msg += "mv joiner-epoch-12-avg-4.onnx joiner.onnx\r\n\r\n";
  msg += "(2) Paraformer\r\n\r\n";
  msg +=
      "wget "
      "https://huggingface.co/csukuangfj/"
      "sherpa-onnx-paraformer-zh-2023-09-14/resolve/main/model.int8.onnx\r\n";
  msg +=
      "wget "
      "https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-09-14/"
      "resolve/main/tokens.txt\r\n\r\n";
  msg += "\r\n Now rename them\r\n";
  msg += "mv model.onnx paraformer.onnx\r\n\r\n";
  msg += "(3) Whisper\r\n\r\n";
  msg +=
      "wget "
      "https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en/resolve/"
      "main/tiny.en-encoder.onnx\r\n";
  msg +=
      "wget "
      "https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en/resolve/"
      "main/tiny.en-decoder.onnx\r\n";
  msg +=
      "wget "
      "https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en/resolve/"
      "main/tiny.en-tokens.txt\r\n";
  msg += "\r\n Now rename them\r\n";
  msg += "mv tiny.en-encoder.onnx whisper-encoder.onnx\r\n";
  msg += "mv tiny.en-decoder.onnx whisper-decoder.onnx\r\n";
  msg += "\r\n";
  msg += "That's it!\r\n";

  AppendLineToMultilineEditCtrl(msg);
}

void CNonStreamingSpeechRecognitionDlg::InitWhisper() {
  std::string whisper_encoder = "./whisper-encoder.onnx";
  std::string whisper_decoder = "./whisper-decoder.onnx";

  std::string tokens = "./tokens.txt";

  bool is_ok = true;

  if (Exists("./whisper-encoder.int8.onnx")) {
    whisper_encoder = "./whisper-encoder.int8.onnx";
  } else if (!Exists(whisper_encoder)) {
    std::string msg = whisper_encoder + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (Exists("./whisper-decoder.int8.onnx")) {
    whisper_decoder = "./whisper-decoder.int8.onnx";
  } else if (!Exists(whisper_decoder)) {
    std::string msg = whisper_decoder + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(tokens)) {
    std::string msg = tokens + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!is_ok) {
    ShowInitRecognizerHelpMessage();
    return;
  }

  memset(&config_, 0, sizeof(config_));

  config_.feat_config.sample_rate = 16000;
  config_.feat_config.feature_dim = 80;

  config_.model_config.whisper.encoder = whisper_encoder.c_str();
  config_.model_config.whisper.decoder = whisper_decoder.c_str();
  config_.model_config.tokens = tokens.c_str();
  config_.model_config.num_threads = 1;
  config_.model_config.debug = 1;
  config_.model_config.model_type = "whisper";

  config_.decoding_method = "greedy_search";
  config_.max_active_paths = 4;

  recognizer_ = SherpaOnnxCreateOfflineRecognizer(&config_);
}

void CNonStreamingSpeechRecognitionDlg::InitParaformer() {
  std::string paraformer = "./paraformer.onnx";
  std::string tokens = "./tokens.txt";

  bool is_ok = true;

  if (Exists("./paraformer.int8.onnx")) {
    paraformer = "./paraformer.int8.onnx";
  } else if (!Exists(paraformer)) {
    std::string msg = paraformer + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(tokens)) {
    std::string msg = tokens + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!is_ok) {
    ShowInitRecognizerHelpMessage();
    return;
  }

  memset(&config_, 0, sizeof(config_));

  config_.feat_config.sample_rate = 16000;
  config_.feat_config.feature_dim = 80;

  config_.model_config.paraformer.model = paraformer.c_str();
  config_.model_config.tokens = tokens.c_str();
  config_.model_config.num_threads = 1;
  config_.model_config.debug = 1;
  config_.model_config.model_type = "paraformer";

  config_.decoding_method = "greedy_search";
  config_.max_active_paths = 4;

  recognizer_ = SherpaOnnxCreateOfflineRecognizer(&config_);
}

void CNonStreamingSpeechRecognitionDlg::InitRecognizer() {
  if (Exists("./paraformer.onnx") || Exists("./paraformer.int8.onnx")) {
    InitParaformer();
    return;
  }

  if (Exists("./whisper-encoder.onnx") || Exists("./whisper-encoder.int8.onnx")) {
    InitWhisper();
    return;
  }

  // assume it is transducer

  std::string encoder = "./encoder.onnx";
  std::string decoder = "./decoder.onnx";
  std::string joiner = "./joiner.onnx";
  std::string tokens = "./tokens.txt";

  bool is_ok = true;
  if (!Exists(encoder)) {
    std::string msg = encoder + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(decoder)) {
    std::string msg = decoder + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(joiner)) {
    std::string msg = joiner + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(tokens)) {
    std::string msg = tokens + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!is_ok) {
    ShowInitRecognizerHelpMessage();
    return;
  }
  memset(&config_, 0, sizeof(config_));

  config_.feat_config.sample_rate = 16000;
  config_.feat_config.feature_dim = 80;

  config_.model_config.transducer.encoder = encoder.c_str();
  config_.model_config.transducer.decoder = decoder.c_str();
  config_.model_config.transducer.joiner = joiner.c_str();
  config_.model_config.tokens = tokens.c_str();
  config_.model_config.num_threads = 1;
  config_.model_config.debug = 0;
  config_.model_config.model_type = "transducer";

  config_.decoding_method = "greedy_search";
  config_.max_active_paths = 4;

  recognizer_ = SherpaOnnxCreateOfflineRecognizer(&config_);
}

void CNonStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
    const std::string &s) {
  // get the initial text length
  int nLength = my_text_.GetWindowTextLength();
  // put the selection at the end of text
  my_text_.SetSel(nLength, nLength);
  // replace the selection

  std::wstring wstr = Utf8ToUtf16(s);

  my_text_.ReplaceSel(wstr.c_str());
}

void CNonStreamingSpeechRecognitionDlg::AppendLineToMultilineEditCtrl(
    const std::string &s) {
  AppendTextToEditCtrl("\r\n" + s);
}


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h
================================================

// NonStreamingSpeechRecognitionDlg.h : header file
//

#pragma once

#include <string>
#include <vector>

#include "portaudio.h"
#include "sherpa-onnx/c-api/c-api.h"

class Microphone {
 public:
  Microphone();
  ~Microphone();
};

// CNonStreamingSpeechRecognitionDlg dialog
class CNonStreamingSpeechRecognitionDlg : public CDialogEx {
  // Construction
 public:
  CNonStreamingSpeechRecognitionDlg(
      CWnd *pParent = nullptr);  // standard constructor
  ~CNonStreamingSpeechRecognitionDlg();

// Dialog Data
#ifdef AFX_DESIGN_TIME
  enum { IDD = IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG };
#endif

 protected:
  virtual void DoDataExchange(CDataExchange *pDX);  // DDX/DDV support

  // Implementation
 protected:
  HICON m_hIcon;

  // Generated message map functions
  virtual BOOL OnInitDialog();
  afx_msg void OnPaint();
  afx_msg HCURSOR OnQueryDragIcon();
  DECLARE_MESSAGE_MAP()
 public:
  afx_msg void OnBnClickedOk();
  int RunThread();

 private:
  Microphone mic_;

  const SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
  SherpaOnnxOfflineRecognizerConfig config_;

  PaStream *pa_stream_ = nullptr;
  CButton my_btn_;
  CEdit my_text_;
  std::vector<std::string> results_;

 public:
  bool started_ = false;
  std::vector<float> samples_;

 private:
  void AppendTextToEditCtrl(const std::string &s);
  void AppendLineToMultilineEditCtrl(const std::string &s);
  void InitMicrophone();

  bool Exists(const std::string &filename);
  void InitRecognizer();

  void InitParaformer();
  void InitWhisper();
  void ShowInitRecognizerHelpMessage();
};


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/Resource.h
================================================
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by NonStreamingSpeechRecognition.rc
//
#define IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG 102
#define IDR_MAINFRAME 128
#define IDC_EDIT1 1000

// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 130
#define _APS_NEXT_COMMAND_VALUE 32771
#define _APS_NEXT_CONTROL_VALUE 1001
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/framework.h
================================================
#pragma once

#ifndef VC_EXTRALEAN
#define VC_EXTRALEAN  // Exclude rarely-used stuff from Windows headers
#endif

#include "targetver.h"

#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS  // some CString constructors will be
                                            // explicit

// turns off MFC's hiding of some common and often safely ignored warning
// messages
#define _AFX_ALL_WARNINGS

#include <afxext.h>  // MFC extensions
#include <afxwin.h>  // MFC core and standard components

#ifndef _AFX_NO_OLE_SUPPORT
#include <afxdtctl.h>  // MFC support for Internet Explorer 4 Common Controls
#endif
#ifndef _AFX_NO_AFXCMN_SUPPORT
#include <afxcmn.h>  // MFC support for Windows Common Controls
#endif               // _AFX_NO_AFXCMN_SUPPORT

#include <afxcontrolbars.h>  // MFC support for ribbons and control bars


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/pch.cpp
================================================
// pch.cpp: source file corresponding to the pre-compiled header

#include "pch.h"

// When you are using pre-compiled headers, this source file is necessary for
// compilation to succeed.


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/pch.h
================================================
// pch.h: This is a precompiled header file.
// Files listed below are compiled only once, improving build performance for
// future builds. This also affects IntelliSense performance, including code
// completion and many code browsing features. However, files listed here are
// ALL re-compiled if any one of them is updated between builds. Do not add
// files here that you will be updating frequently as this negates the
// performance advantage.

#ifndef PCH_H
#define PCH_H

// add headers that you want to pre-compile here
#include "framework.h"

#endif  // PCH_H


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/sherpa-onnx-deps.props
================================================
﻿<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ImportGroup Label="PropertySheets" />
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup>
    <SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
    <SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
    <SherpaOnnxLibraries>
        sherpa-onnx-portaudio_static.lib;
        sherpa-onnx-c-api.lib;
        sherpa-onnx-core.lib;
        kaldi-decoder-core.lib;
        sherpa-onnx-kaldifst-core.lib;
        sherpa-onnx-fstfar.lib;
        sherpa-onnx-fst.lib;
        kaldi-native-fbank-core.lib;
        kissfft-float.lib;
        onnxruntime.lib;
        piper_phonemize.lib;
        espeak-ng.lib;
        ucd.lib;
        ssentencepiece_core.lib;
    </SherpaOnnxLibraries>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <ClCompile>
      <AdditionalIncludeDirectories>
	  $(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
    $(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
      <AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup />
</Project>


================================================
FILE: mfc-examples/NonStreamingSpeechRecognition/targetver.h
================================================
#pragma once

// Including SDKDDKVer.h defines the highest available Windows platform.

// If you wish to build your application for a previous Windows platform,
// include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you wish
// to support before including SDKDDKVer.h.

#include <SDKDDKVer.h>


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.cpp
================================================

// NonStreamingTextToSpeech.cpp : Defines the class behaviors for the application.
//

#include "pch.h"
#include "framework.h"
#include "NonStreamingTextToSpeech.h"
#include "NonStreamingTextToSpeechDlg.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#endif


// CNonStreamingTextToSpeechApp

BEGIN_MESSAGE_MAP(CNonStreamingTextToSpeechApp, CWinApp)
	ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
END_MESSAGE_MAP()


// CNonStreamingTextToSpeechApp construction

CNonStreamingTextToSpeechApp::CNonStreamingTextToSpeechApp()
{
	// TODO: add construction code here,
	// Place all significant initialization in InitInstance
}


// The one and only CNonStreamingTextToSpeechApp object

CNonStreamingTextToSpeechApp theApp;


// CNonStreamingTextToSpeechApp initialization

BOOL CNonStreamingTextToSpeechApp::InitInstance()
{
	CWinApp::InitInstance();


	// Create the shell manager, in case the dialog contains
	// any shell tree view or shell list view controls.
	CShellManager *pShellManager = new CShellManager;

	// Activate "Windows Native" visual manager for enabling themes in MFC controls
	CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));

	// Standard initialization
	// If you are not using these features and wish to reduce the size
	// of your final executable, you should remove from the following
	// the specific initialization routines you do not need
	// Change the registry key under which our settings are stored
	// TODO: You should modify this string to be something appropriate
	// such as the name of your company or organization
	SetRegistryKey(_T("Local AppWizard-Generated Applications"));

	CNonStreamingTextToSpeechDlg dlg;
	m_pMainWnd = &dlg;
	INT_PTR nResponse = dlg.DoModal();
	if (nResponse == IDOK)
	{
		// TODO: Place code here to handle when the dialog is
		//  dismissed with OK
	}
	else if (nResponse == IDCANCEL)
	{
		// TODO: Place code here to handle when the dialog is
		//  dismissed with Cancel
	}
	else if (nResponse == -1)
	{
		TRACE(traceAppMsg, 0, "Warning: dialog creation failed, so application is terminating unexpectedly.\n");
		TRACE(traceAppMsg, 0, "Warning: if you are using MFC controls on the dialog, you cannot #define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
	}

	// Delete the shell manager created above.
	if (pShellManager != nullptr)
	{
		delete pShellManager;
	}

#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
	ControlBarCleanUp();
#endif

	// Since the dialog has been closed, return FALSE so that we exit the
	//  application, rather than start the application's message pump.
	return FALSE;
}


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.h
================================================

// NonStreamingTextToSpeech.h : main header file for the PROJECT_NAME application
//

#pragma once

#ifndef __AFXWIN_H__
	#error "include 'pch.h' before including this file for PCH"
#endif

#include "resource.h"		// main symbols


// CNonStreamingTextToSpeechApp:
// See NonStreamingTextToSpeech.cpp for the implementation of this class
//

class CNonStreamingTextToSpeechApp : public CWinApp
{
public:
	CNonStreamingTextToSpeechApp();

// Overrides
public:
	virtual BOOL InitInstance();

// Implementation

	DECLARE_MESSAGE_MAP()
};

extern CNonStreamingTextToSpeechApp theApp;


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <VCProjectVersion>17.0</VCProjectVersion>
    <ProjectGuid>{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}</ProjectGuid>
    <Keyword>MFCProj</Keyword>
    <RootNamespace>NonStreamingTextToSpeech</RootNamespace>
    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Dynamic</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Dynamic</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="Shared">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LinkIncremental>false</LinkIncremental>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClInclude Include="framework.h" />
    <ClInclude Include="NonStreamingTextToSpeech.h" />
    <ClInclude Include="NonStreamingTextToSpeechDlg.h" />
    <ClInclude Include="pch.h" />
    <ClInclude Include="Resource.h" />
    <ClInclude Include="targetver.h" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="NonStreamingTextToSpeech.cpp" />
    <ClCompile Include="NonStreamingTextToSpeechDlg.cpp" />
    <ClCompile Include="pch.cpp">
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="NonStreamingTextToSpeech.rc" />
  </ItemGroup>
  <ItemGroup>
    <None Include="res\NonStreamingTextToSpeech.rc2" />
  </ItemGroup>
  <ItemGroup>
    <Image Include="res\NonStreamingTextToSpeech.ico" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.vcxproj.filters
================================================
﻿<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <Filter Include="Source Files">
      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
      <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
    </Filter>
    <Filter Include="Header Files">
      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
      <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
    </Filter>
    <Filter Include="Resource Files">
      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="NonStreamingTextToSpeech.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="NonStreamingTextToSpeechDlg.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="framework.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="targetver.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="Resource.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="pch.h">
      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="NonStreamingTextToSpeech.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="NonStreamingTextToSpeechDlg.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="pch.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="NonStreamingTextToSpeech.rc">
      <Filter>Resource Files</Filter>
    </ResourceCompile>
  </ItemGroup>
  <ItemGroup>
    <None Include="res\NonStreamingTextToSpeech.rc2">
      <Filter>Resource Files</Filter>
    </None>
  </ItemGroup>
  <ItemGroup>
    <Image Include="res\NonStreamingTextToSpeech.ico">
      <Filter>Resource Files</Filter>
    </Image>
  </ItemGroup>
</Project>

================================================
FILE: mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp
================================================

// NonStreamingTextToSpeechDlg.cpp : implementation file
//

#include "pch.h"
#include "framework.h"
#include "NonStreamingTextToSpeech.h"
#include "NonStreamingTextToSpeechDlg.h"
#include "afxdialogex.h"

#include <fstream>
#include <mutex>  // NOLINT
#include <queue>
#include <stdexcept>
#include <string>
#include <thread>  // NOLINT
#include <vector>

#ifdef _DEBUG
#define new DEBUG_NEW
#endif

Microphone::Microphone() {
  PaError err = Pa_Initialize();
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    exit(-2);
  }
}

Microphone::~Microphone() {
  PaError err = Pa_Terminate();
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    exit(-2);
  }
}

// NOTE(fangjun): Code is copied from
// https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc#L22
static std::condition_variable g_cv;
static std::mutex g_cv_m;

struct Samples {
  std::vector<float> data;
  int32_t consumed = 0;
};

struct Buffer {
  std::queue<Samples> samples;
  std::mutex mutex;
};

static Buffer g_buffer;

static bool g_started = false;
static bool g_stopped = false;
static bool g_killed = false;

static int32_t AudioGeneratedCallback(const float *s, int32_t n) {
  if (n > 0) {
    Samples samples;
    samples.data = std::vector<float>{s, s + n};

    std::lock_guard<std::mutex> lock(g_buffer.mutex);
    g_buffer.samples.push(std::move(samples));
    g_started = true;
  }
  if (g_killed) {
    return 0;
  }
  return 1;
}

static int PlayCallback(const void * /*in*/, void *out,
                        unsigned long _n,  // NOLINT
                        const PaStreamCallbackTimeInfo * /*time_info*/,
                        PaStreamCallbackFlags /*status_flags*/,
                        void * /*user_data*/) {
  int32_t n = static_cast<int32_t>(_n);
  if (g_killed) {
    return paComplete;
  }

  float *pout = reinterpret_cast<float *>(out);
  std::lock_guard<std::mutex> lock(g_buffer.mutex);

  if (g_buffer.samples.empty()) {
    if (g_stopped) {
      // no more data is available and we have processed all of the samples
      return paComplete;
    }

    // The current sentence is so long, though very unlikely, that
    // the model has not finished processing it yet.
    std::fill_n(pout, n, 0);

    return paContinue;
  }

  int32_t k = 0;
  for (; k < n && !g_buffer.samples.empty();) {
    int32_t this_block = n - k;

    auto &p = g_buffer.samples.front();

    int32_t remaining = static_cast<int32_t>(p.data.size()) - p.consumed;

    if (this_block <= remaining) {
      std::copy(p.data.begin() + p.consumed,
                p.data.begin() + p.consumed + this_block, pout + k);
      p.consumed += this_block;

      k = n;

      if (p.consumed == p.data.size()) {
        g_buffer.samples.pop();
      }
      break;
    }

    std::copy(p.data.begin() + p.consumed, p.data.end(), pout + k);
    k += static_cast<int32_t>(p.data.size()) - p.consumed;
    g_buffer.samples.pop();
  }

  if (k < n) {
    std::fill_n(pout + k, n - k, 0);
  }

  if (g_stopped && g_buffer.samples.empty()) {
    return paComplete;
  }

  return paContinue;
}

static void PlayCallbackFinished(void *userData) { g_cv.notify_all(); }

static void StartPlayback(int32_t sample_rate) {
  int32_t frames_per_buffer = 1024;
  PaStreamParameters outputParameters;
  PaStream *stream;
  PaError err;

  outputParameters.device =
      Pa_GetDefaultOutputDevice(); /* default output device */

  outputParameters.channelCount = 1;         /* stereo output */
  outputParameters.sampleFormat = paFloat32; /* 32 bit floating point output */
  outputParameters.suggestedLatency =
      Pa_GetDeviceInfo(outputParameters.device)->defaultLowOutputLatency;
  outputParameters.hostApiSpecificStreamInfo = nullptr;

  err = Pa_OpenStream(&stream, nullptr, /* no input */
                      &outputParameters, sample_rate, frames_per_buffer,
                      paClipOff,  // we won't output out of range samples so
                                  //   don't bother clipping them
                      PlayCallback, nullptr);
  if (err != paNoError) {
    fprintf(stderr, "%d portaudio error: %s\n", __LINE__, Pa_GetErrorText(err));
    return;
  }

  err = Pa_SetStreamFinishedCallback(stream, &PlayCallbackFinished);
  if (err != paNoError) {
    fprintf(stderr, "%d portaudio error: %s\n", __LINE__, Pa_GetErrorText(err));
    return;
  }

  err = Pa_StartStream(stream);
  if (err != paNoError) {
    fprintf(stderr, "%d portaudio error: %s\n", __LINE__, Pa_GetErrorText(err));
    return;
  }

  std::unique_lock<std::mutex> lock(g_cv_m);
  while (!g_killed && !g_stopped &&
         (!g_started || (g_started && !g_buffer.samples.empty()))) {
    g_cv.wait(lock);
  }

  err = Pa_StopStream(stream);
  if (err != paNoError) {
    return;
  }

  err = Pa_CloseStream(stream);
  if (err != paNoError) {
    return;
  }
}


// CAboutDlg dialog used for App About

class CAboutDlg : public CDialogEx
{
public:
	CAboutDlg();

// Dialog Data
#ifdef AFX_DESIGN_TIME
	enum { IDD = IDD_ABOUTBOX };
#endif

	protected:
	virtual void DoDataExchange(CDataExchange* pDX);    // DDX/DDV support

// Implementation
protected:
	DECLARE_MESSAGE_MAP()
};

CAboutDlg::CAboutDlg() : CDialogEx(IDD_ABOUTBOX)
{
}

void CAboutDlg::DoDataExchange(CDataExchange* pDX)
{
	CDialogEx::DoDataExchange(pDX);
}

BEGIN_MESSAGE_MAP(CAboutDlg, CDialogEx)
END_MESSAGE_MAP()


// CNonStreamingTextToSpeechDlg dialog

// see
// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
static std::wstring Utf8ToUtf16(const std::string &utf8) {
  std::vector<unsigned long> unicode;
  size_t i = 0;
  while (i < utf8.size()) {
    unsigned long uni;
    size_t todo;
    bool error = false;
    unsigned char ch = utf8[i++];
    if (ch <= 0x7F) {
      uni = ch;
      todo = 0;
    } else if (ch <= 0xBF) {
      throw std::logic_error("not a UTF-8 string");
    } else if (ch <= 0xDF) {
      uni = ch & 0x1F;
      todo = 1;
    } else if (ch <= 0xEF) {
      uni = ch & 0x0F;
      todo = 2;
    } else if (ch <= 0xF7) {
      uni = ch & 0x07;
      todo = 3;
    } else {
      throw std::logic_error("not a UTF-8 string");
    }
    for (size_t j = 0; j < todo; ++j) {
      if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
      unsigned char ch = utf8[i++];
      if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
      uni <<= 6;
      uni += ch & 0x3F;
    }
    if (uni >= 0xD800 && uni <= 0xDFFF)
      throw std::logic_error("not a UTF-8 string");
    if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
    unicode.push_back(uni);
  }
  std::wstring utf16;
  for (size_t i = 0; i < unicode.size(); ++i) {
    unsigned long uni = unicode[i];
    if (uni <= 0xFFFF) {
      utf16 += (wchar_t)uni;
    } else {
      uni -= 0x10000;
      utf16 += (wchar_t)((uni >> 10) + 0xD800);
      utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
    }
  }
  return utf16;
}

// The system calls this function to obtain the cursor to display while the user drags
//  the minimized window.
HCURSOR CNonStreamingTextToSpeechDlg::OnQueryDragIcon()
{
	return static_cast<HCURSOR>(m_hIcon);
}


void AppendTextToEditCtrl(CEdit& e, const std::string &s) {
  // get the initial text length
  int nLength = e.GetWindowTextLength();
  // put the selection at the end of text
  e.SetSel(nLength, nLength);
  // replace the selection

  std::wstring wstr = Utf8ToUtf16(s);

  // my_text_.ReplaceSel(wstr.c_str());
  e.ReplaceSel(wstr.c_str());
}

void AppendLineToMultilineEditCtrl(CEdit& e, const std::string &s) {
  AppendTextToEditCtrl(e, "\r\n" + s);
}


CNonStreamingTextToSpeechDlg::CNonStreamingTextToSpeechDlg(CWnd* pParent /*=nullptr*/)
	: CDialogEx(IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG, pParent)
       {
	m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
}

void CNonStreamingTextToSpeechDlg::DoDataExchange(CDataExchange* pDX)
{
        CDialogEx::DoDataExchange(pDX);
        DDX_Control(pDX, IDC_HINT, my_hint_);
        DDX_Control(pDX, IDC_SPEAKER, speaker_id_);
        DDX_Control(pDX, IDC_SPEED, speed_);
        DDX_Control(pDX, IDOK, generate_btn_);
        DDX_Control(pDX, IDC_TEXT, my_text_);
        DDX_Control(pDX, IDC_OUTPUT_FILENAME, output_filename_);
}

BEGIN_MESSAGE_MAP(CNonStreamingTextToSpeechDlg, CDialogEx)
	ON_WM_SYSCOMMAND()
	ON_WM_PAINT()
	ON_WM_QUERYDRAGICON()
        ON_BN_CLICKED(IDOK, &CNonStreamingTextToSpeechDlg::OnBnClickedOk)
        ON_BN_CLICKED(IDC_STOP, &CNonStreamingTextToSpeechDlg::OnBnClickedStop)
        END_MESSAGE_MAP()


// CNonStreamingTextToSpeechDlg message handlers

BOOL CNonStreamingTextToSpeechDlg::OnInitDialog()
{
	CDialogEx::OnInitDialog();

	// Add "About..." menu item to system menu.

	// IDM_ABOUTBOX must be in the system command range.
	ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
	ASSERT(IDM_ABOUTBOX < 0xF000);

	CMenu* pSysMenu = GetSystemMenu(FALSE);
	if (pSysMenu != nullptr)
	{
		BOOL bNameValid;
		CString strAboutMenu;
		bNameValid = strAboutMenu.LoadString(IDS_ABOUTBOX);
		ASSERT(bNameValid);
		if (!strAboutMenu.IsEmpty())
		{
			pSysMenu->AppendMenu(MF_SEPARATOR);
			pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
		}
	}

	// Set the icon for this dialog.  The framework does this automatically
	//  when the application's main window is not a dialog
	SetIcon(m_hIcon, TRUE);			// Set big icon
	SetIcon(m_hIcon, FALSE);		// Set small icon

	// TODO: Add extra initialization here
    Init();

	return TRUE;  // return TRUE  unless you set the focus to a control
}

void CNonStreamingTextToSpeechDlg::OnSysCommand(UINT nID, LPARAM lParam)
{
	if ((nID & 0xFFF0) == IDM_ABOUTBOX)
	{
		CAboutDlg dlgAbout;
		dlgAbout.DoModal();
	}
	else
	{
		CDialogEx::OnSysCommand(nID, lParam);
	}
}

// If you add a minimize button to your dialog, you will need the code below
//  to draw the icon            .  For MFC applications using the document/view model,
//  this is automatically done for you by the framework.

void CNonStreamingTextToSpeechDlg::OnPaint()
{
	if (IsIconic())
	{
		CPaintDC dc(this); // device context for painting

		SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()), 0);

		// Center icon in client rectangle
		int cxIcon = GetSystemMetrics(SM_CXICON);
		int cyIcon =             GetSystemMetrics(SM_CYICON);
		CRect rect;
		GetClientRect(&rect);
		int x = (rect.Width() - cxIcon + 1) / 2;
		int y = (rect.Height() - cyIcon + 1) / 2;

		// Draw the icon
		dc.DrawIcon(x, y, m_hIcon);
	}
	else
	{
		CDialogEx::OnPaint();
	}
}

bool Exists(const std::string &filename) {
  std::ifstream is(filename);
  return is.good();
}

void CNonStreamingTextToSpeechDlg::InitHint() {
    AppendLineToMultilineEditCtrl(my_hint_, "Speaker ID: Used only for multi-speaker models. Example value: 0");
    AppendLineToMultilineEditCtrl(my_hint_, "Speed: Larger -> Faster in speech speed. Example value: 1.0");
    AppendLineToMultilineEditCtrl(my_hint_, "\r\nPlease input your text and click the button Generate");

}

void CNonStreamingTextToSpeechDlg::Init() {
    InitHint();
    speaker_id_.SetWindowText(Utf8ToUtf16("0").c_str());
    speed_.SetWindowText(Utf8ToUtf16("1.0").c_str());
    output_filename_.SetWindowText(Utf8ToUtf16("./generated.wav").c_str());

	bool ok = true;
  std::string error_message = "--------------------\r\n";
  if (!Exists("./model.onnx")) {
    error_message += "Cannot find ./model.onnx\r\n";
    ok = false;
  }

  if (!Exists("./tokens.txt")) {
    error_message += "Cannot find ./tokens.txt\r\n";
    ok = false;
  }
  // it is OK to leave lexicon.txt and espeak-ng-data empty
  // since models using characters don't need them

  if (!ok) {
    generate_btn_.EnableWindow(FALSE);
    error_message +=
        "\r\nPlease refer to\r\n"
        "https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models"
        "\r\nor\r\n"
        "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models";

    error_message += "\r\nto download models.\r\n";
    error_message += "\r\nWe give several examples below\r\n";
    error_message += "      1. Use a Kokoro TTS model (multi-lingual, e.g, English + Chinese)\r\n";
    error_message += "      2. Use a Kokoro TTS model (English only)\r\n";
    error_message += "      3. Use a VITS Piper TTS model\r\n";
    error_message += "      4. Use a VITS Chinese TTS model\r\n";
    error_message += "      5. Use a Matcha TTS model\r\n";
    error_message += "\r\n";

    error_message += 
        "----------1. Use a Kokoro TTS model (multi-lingual, eg., English + Chinese)----------\r\n"
        "(a) Download the model from \r\n"
        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2\r\n"
        "(b) Uncompress it and you will get a directory kokoro-multi-lang-v1_0\r\n"
        "(c) Switch to the directory kokoro-multi-lang-v1_0\r\n"
        "(d) Copy the current exe to the directory kokoro-multi-lang-v1_0\r\n"
        "(e).Done! You can now run the exe in the directory kokoro-multi-lang-v1_0\r\n";

    error_message +=  "\r\n";

    error_message += 
        "----------2. Use a Kokoro TTS model (English only)----------\r\n"
        "(a) Download the model from \r\n"
        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2\r\n"
        "(b) Uncompress it and you will get a directory kokoro-en-v0_19\r\n"
        "(c) Switch to the directory kokoro-en-v0_19\r\n"
        "(d) Copy the current exe to the directory kokoro-en-v0_19\r\n"
        "(e).Done! You can now run the exe in the directory kokoro-en-v0_19\r\n";

    error_message +=  "\r\n";

    error_message += 
        "----------3. Use a VITS Piper TTS model----------\r\n"
        "(a) Download the model from \r\n"
        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2\r\n"
        "(b) Uncompress it and you will get a directory vits-piper-en_US-amy-low\r\n"
        "(c) Switch to the directory vits-piper-en_US-amy-low \r\n"
        "(d) Rename en_US-amy-low.onnx to model.onnx\r\n"
        "(e) Copy the current exe to the directory vits-piper-en_US-amy-low\r\n"
        "(f) Done! You can now run the exe in the directory vits-piper-en_US-amy-low\r\n";

    error_message +=  "\r\n";

    error_message += 
        "----------4. Use a VITS Chinese TTS model----------\r\n"
        "(a) Download the model from \r\n"
        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2\r\n"
        "(b) Uncompress it and you will get a directory sherpa-onnx-vits-zh-ll\r\n"
        "(c) Switch to the directory sherpa-onnx-vits-zh-ll\r\n"
        "(d) Copy the current exe to the directory sherpa-onnx-vits-zh-ll\r\n"
        "(e) Done! You can now run the exe in the directory sherpa-onnx-vits-zh-ll\r\n";

    error_message +=  "\r\n";

    error_message += 
        "----------5. Use a Matcha TTS model----------\r\n"
        "(a) Download the model from \r\n"
        "     https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2\r\n"
        "(b) Uncompress it and you will get a directory matcha-icefall-zh-baker\r\n"
        "(c) Switch to the directory matcha-icefall-zh-baker\r\n"
        "(d) Rename model-steps-3.onnx to model.onnx\r\n"
        "(e) Download a vocoder model from \r\n"
        "      https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx\r\n"
        "(f) Rename vocos-22khz-univ.onnx to vocos.onnx\r\n"
        "(g) Remember to put vocos.onnx in the directory matcha-icefall-zh-baker\r\n"
        "(h) Copy the current exe to the directory matcha-icefall-zh-baker\r\n"
        "(i) Done! You can now run the exe in the directory matcha-icefall-zh-baker\r\n";

    AppendLineToMultilineEditCtrl(my_hint_, error_message);
    return;
  }

  // Now init tts
  SherpaOnnxOfflineTtsConfig config;
  memset(&config, 0, sizeof(config));
  config.model.debug = 0;
  config.model.num_threads = 4;
  config.model.provider = "cpu";

  if (Exists("./voices.bin")) {
    // it is a kokoro tts model
    config.model.kokoro.model = "./model.onnx";
    config.model.kokoro.voices = "./voices.bin";
    config.model.kokoro.tokens = "./tokens.txt";
    config.model.kokoro.data_dir = "./espeak-ng-data";
    if (Exists("./dict/jieba.dict.utf8") && Exists("./lexicon-zh.txt")) {
      config.model.kokoro.dict_dir = "./dict";
      config.model.kokoro.lexicon = "./lexicon-us-en.txt,./lexicon-zh.txt";
    }
  } else if (Exists("./hifigan.onnx") || Exists("./vocos.onnx")) {
    // it is a matcha tts model
    config.model.matcha.acoustic_model = "./model.onnx";

    if (Exists("./hifigan.onnx")) {
      config.model.matcha.vocoder = "./hifigan.onnx";
    } else if (Exists("./vocos.onnx")) {
      config.model.matcha.vocoder = "./vocos.onnx";
    }

    config.model.matcha.tokens = "./tokens.txt";

    if (Exists("./espeak-ng-data/phontab")) {
      config.model.matcha.data_dir = "./espeak-ng-data";
    }

    if(Exists("./lexicon.txt")) {
      config.model.matcha.lexicon = "./lexicon.txt";
    }

    if (Exists("./dict/jieba.dict.utf8")) {
      config.model.matcha.dict_dir = "./dict";
    }
  } else {
    // it is a vits tts model
    config.model.vits.model = "./model.onnx";
    config.model.vits.tokens = "./tokens.txt";
    if (Exists("./espeak-ng-data/phontab")) {
      config.model.vits.data_dir = "./espeak-ng-data";
    } 

    if (Exists("./lexicon.txt")) {
      config.model.vits.lexicon = "./lexicon.txt";
    }

    if (Exists("./dict/jieba.dict.utf8")) {
      config.model.vits.dict_dir = "./dict";
    }
  }

  if (Exists("./phone.fst") && Exists("./date.fst") && Exists("./number.fst")) {
    config.rule_fsts = "./phone.fst,./date.fst,number.fst";
  }

  if (Exists("./phone-zh.fst") && Exists("./date-zh.fst") && Exists("./number-zh.fst")) {
    config.rule_fsts = "./phone-zh.fst,./date-zh.fst,number-zh.fst";
  }

  if (Exists("./rule.far")) {
    config.rule_fars = "./rule.far";
  }

  tts_ = SherpaOnnxCreateOfflineTts(&config);
}

 CNonStreamingTextToSpeechDlg::~CNonStreamingTextToSpeechDlg() {
  if (tts_) {
    SherpaOnnxDestroyOfflineTts(tts_);
  }
  if (generate_thread_ && generate_thread_->joinable()) {
    generate_thread_->join();
  }

  if (play_thread_ && play_thread_->joinable()) {
    play_thread_->join();
  }
 }


 static std::string ToString(const CString &s) {
    CT2CA pszConvertedAnsiString(s);
    return std::string(pszConvertedAnsiString);
 }

void CNonStreamingTextToSpeechDlg::OnBnClickedOk() {
  CString s;
  speaker_id_.GetWindowText(s);
  int speaker_id = _ttoi(s);
  if (speaker_id < 0) {
    AfxMessageBox(Utf8ToUtf16("Please input a valid speaker ID").c_str(), MB_OK);
    return;
  }

  speed_.GetWindowText(s);
  float speed = static_cast<float>(_ttof(s));
  if (speed < 0) {
    AfxMessageBox(Utf8ToUtf16("Please input a valid speed").c_str(), MB_OK);
    return;
  }

  my_text_.GetWindowText(s);

  std::string ss = ToString(s);
  if (ss.empty()) {
    AfxMessageBox(Utf8ToUtf16("Please input your text").c_str(), MB_OK);
    return;
  }

  if (play_thread_) {
    g_killed = true;
    g_stopped = true;
    if (play_thread_->joinable()) {
      play_thread_->join();
    }
  }

  g_killed = false;
  g_stopped = false;
  g_started = false;
  g_buffer.samples = {};

  // Caution(fangjun): It is not efficient to re-create the thread. We use this approach
  // for simplicity
  play_thread_ = std::make_unique<std::thread>(StartPlayback, SherpaOnnxOfflineTtsSampleRate(tts_));

  if (generate_thread_ && generate_thread_->joinable()) {
    generate_thread_->join();
  }

  output_filename_.GetWindowText(s);
  std::string filename = ToString(s);

  generate_thread_ = std::make_unique<std::thread>([ss, this,filename, speaker_id, speed]() {
      std::string text = ss;

      // generate_btn_.EnableWindow(FALSE);

	  const SherpaOnnxGeneratedAudio *audio =
		  SherpaOnnxOfflineTtsGenerateWithCallback(tts_, text.c_str(), speaker_id, speed, &AudioGeneratedCallback);
      // generate_btn_.EnableWindow(TRUE);
       g_stopped = true;

	  int ok = SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
						filename.c_str());

	  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);

	  if (ok) {
		// AfxMessageBox(Utf8ToUtf16(std::string("Saved to ") + filename + " successfully").c_str(), MB_OK);

		// AppendLineToMultilineEditCtrl(my_hint_, std::string("Saved to ") + filename + " successfully");
	  } else {
		// AfxMessageBox(Utf8ToUtf16(std::string("Failed to save to ") + filename).c_str(), MB_OK);

		// AppendLineToMultilineEditCtrl(my_hint_, std::string("Failed to saved to ") + filename);
	  }
  });

  //CDialogEx::OnOK();
}

void CNonStreamingTextToSpeechDlg::OnBnClickedStop() { g_killed = true; }


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.h
================================================

// NonStreamingTextToSpeechDlg.h : header file
//

#pragma once

#include "sherpa-onnx/c-api/c-api.h"

#include <memory>
#include <thread>

#include "portaudio.h"

class Microphone {
 public:
  Microphone();
  ~Microphone();
};

// CNonStreamingTextToSpeechDlg dialog
class CNonStreamingTextToSpeechDlg : public CDialogEx
{
// Construction
public:
	CNonStreamingTextToSpeechDlg(CWnd* pParent = nullptr);	// standard constructor
 ~CNonStreamingTextToSpeechDlg();

// Dialog Data
#ifdef AFX_DESIGN_TIME
	enum { IDD = IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG };
#endif

	protected:
	virtual void DoDataExchange(CDataExchange* pDX);	// DDX/DDV support


// Implementation
protected:
	HICON m_hIcon;

	// Generated message map functions
	virtual BOOL OnInitDialog();
	afx_msg void OnSysCommand(UINT nID, LPARAM lParam);
	afx_msg void OnPaint();
	afx_msg HCURSOR OnQueryDragIcon();
	DECLARE_MESSAGE_MAP()
public:
	CEdit my_hint_;
	CEdit speaker_id_;
	CEdit speed_;
	void Init();
	void InitHint();
	CButton generate_btn_;
	afx_msg void OnBnClickedOk();

	const SherpaOnnxOfflineTts *tts_ = nullptr;
	CEdit my_text_;
	CEdit output_filename_;

private:
    Microphone mic_;
	std::unique_ptr<std::thread> play_thread_;
	std::unique_ptr<std::thread> generate_thread_;

   public:
    afx_msg void OnBnClickedStop();
};


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/Resource.h
================================================
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by NonStreamingTextToSpeech.rc
//
#define IDM_ABOUTBOX                    0x0010
#define IDD_ABOUTBOX                    100
#define IDS_ABOUTBOX                    101
#define IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG 102
#define IDR_MAINFRAME                   128
#define IDC_SPEAKER                     1000
#define IDC_SPEED                       1003
#define IDC_TEXT                        1004
#define IDC_HINT                        1005
#define IDC_EDIT1                       1006
#define IDC_OUTPUT_FILENAME             1006
#define IDC_STOP                        1009

// Next default values for new objects
// 
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE        130
#define _APS_NEXT_COMMAND_VALUE         32771
#define _APS_NEXT_CONTROL_VALUE         1010
#define _APS_NEXT_SYMED_VALUE           101
#endif
#endif


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/framework.h
================================================
#pragma once

#ifndef VC_EXTRALEAN
#define VC_EXTRALEAN            // Exclude rarely-used stuff from Windows headers
#endif

#include "targetver.h"

#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS      // some CString constructors will be explicit

// turns off MFC's hiding of some common and often safely ignored warning messages
#define _AFX_ALL_WARNINGS

#include <afxwin.h>         // MFC core and standard components
#include <afxext.h>         // MFC extensions


#ifndef _AFX_NO_OLE_SUPPORT
#include <afxdtctl.h>           // MFC support for Internet Explorer 4 Common Controls
#endif
#ifndef _AFX_NO_AFXCMN_SUPPORT
#include <afxcmn.h>             // MFC support for Windows Common Controls
#endif // _AFX_NO_AFXCMN_SUPPORT

#include <afxcontrolbars.h>     // MFC support for ribbons and control bars


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/pch.cpp
================================================
// pch.cpp: source file corresponding to the pre-compiled header

#include "pch.h"

// When you are using pre-compiled headers, this source file is necessary for compilation to succeed.


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/pch.h
================================================
// pch.h: This is a precompiled header file.
// Files listed below are compiled only once, improving build performance for future builds.
// This also affects IntelliSense performance, including code completion and many code browsing features.
// However, files listed here are ALL re-compiled if any one of them is updated between builds.
// Do not add files here that you will be updating frequently as this negates the performance advantage.

#ifndef PCH_H
#define PCH_H

// add headers that you want to pre-compile here
#include "framework.h"

#endif //PCH_H


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/sherpa-onnx-deps.props
================================================
﻿<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ImportGroup Label="PropertySheets" />
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup>
    <SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
    <SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
    <SherpaOnnxLibraries>
        sherpa-onnx-portaudio_static.lib;
        sherpa-onnx-c-api.lib;
        sherpa-onnx-core.lib;
        kaldi-decoder-core.lib;
        sherpa-onnx-kaldifst-core.lib;
        sherpa-onnx-fstfar.lib;
        sherpa-onnx-fst.lib;
        kaldi-native-fbank-core.lib;
        kissfft-float.lib;
        onnxruntime.lib;
        piper_phonemize.lib;
        espeak-ng.lib;
        ucd.lib;
        ssentencepiece_core.lib;
    </SherpaOnnxLibraries>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <ClCompile>
      <AdditionalIncludeDirectories>
	  $(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
    $(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
      <AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup />
</Project>


================================================
FILE: mfc-examples/NonStreamingTextToSpeech/targetver.h
================================================
#pragma once

// Including SDKDDKVer.h defines the highest available Windows platform.

// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.

#include <SDKDDKVer.h>


================================================
FILE: mfc-examples/README.md
================================================
# Speech recognition with Visual C++ MFC

This directory contains examples showing how to use Next-gen Kaldi in MFC
for speech recognition.

|Directory| Pre-built exe (x64)|Pre-built exe (x86)| Description|
|---------|--------------------|-------------------|------------|
|[./NonStreamingSpeechRecognition](./NonStreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-non-streaming-asr-x64-v1.12.31.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-non-streaming-asr-x86-v1.12.31.exe)| Non-streaming speech recognition|
|[./StreamingSpeechRecognition](./StreamingSpeechRecognition)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-streaming-asr-x64-v1.12.31.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-streaming-asr-x86-v1.12.31.exe)| Streaming speech recognition|
|[./NonStreamingTextToSpeech](./NonStreamingTextToSpeech)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-non-streaming-tts-x64-v1.12.31.exe)|[URL](https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-non-streaming-tts-x86-v1.12.31.exe)| Non-streaming text to speech|

Caution: You need to use Windows and install Visual Studio 2022 in order to
compile it.

Hint: If you don't want to install Visual Studio, you can find below
about how to download pre-compiled `exe`.

We use bash script below to demonstrate how to use it. Please change
the commands accordingly for Windows.

## How to compile


First, we need to compile sherpa-onnx:

```bash
mkdir -p $HOME/open-source
cd $HOME/open-source

git clone https://github.com/k2-fsa/sherpa-onnx
cd sherpa-onnx
mkdir build
cd build

cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install ..
cmake --build . --config Release --target install
cd ../mfc-examples

msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64

# now run the program

./x64/Release/StreamingSpeechRecognition.exe
./x64/Release/NonStreamingSpeechRecognition.exe
```

If you don't want to compile the project by yourself, you can download
pre-compiled `exe` from https://github.com/k2-fsa/sherpa-onnx/releases

For instance, you can use the following addresses:

  - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-streaming-v1.5.1.exe
  - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-non-streaming-v1.5.1.exe


================================================
FILE: mfc-examples/StreamingSpeechRecognition/Resource.h
================================================
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by StreamingSpeechRecognition.rc
//
#define IDD_STREAMINGSPEECHRECOGNITION_DIALOG 102
#define IDR_MAINFRAME 128
#define IDC_EDIT1 1000

// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 130
#define _APS_NEXT_COMMAND_VALUE 32771
#define _APS_NEXT_CONTROL_VALUE 1001
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif


================================================
FILE: mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.cpp
================================================

// StreamingSpeechRecognition.cpp : Defines the class behaviors for the
// application.
//

// clang-format off
#include "pch.h"
#include "framework.h"
// clang-format on

#include "StreamingSpeechRecognition.h"

#include "StreamingSpeechRecognitionDlg.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#endif

// CStreamingSpeechRecognitionApp

BEGIN_MESSAGE_MAP(CStreamingSpeechRecognitionApp, CWinApp)
ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
END_MESSAGE_MAP()

// CStreamingSpeechRecognitionApp construction

CStreamingSpeechRecognitionApp::CStreamingSpeechRecognitionApp() {
  // TODO: add construction code here,
  // Place all significant initialization in InitInstance
}

// The one and only CStreamingSpeechRecognitionApp object

CStreamingSpeechRecognitionApp theApp;

// CStreamingSpeechRecognitionApp initialization

BOOL CStreamingSpeechRecognitionApp::InitInstance() {
  CWinApp::InitInstance();

  // Create the shell manager, in case the dialog contains
  // any shell tree view or shell list view controls.
  CShellManager *pShellManager = new CShellManager;

  // Activate "Windows Native" visual manager for enabling themes in MFC
  // controls
  CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));

  // Standard initialization
  // If you are not using these features and wish to reduce the size
  // of your final executable, you should remove from the following
  // the specific initialization routines you do not need
  // Change the registry key under which our settings are stored
  // TODO: You should modify this string to be something appropriate
  // such as the name of your company or organization
  SetRegistryKey(_T("Local AppWizard-Generated Applications"));

  CStreamingSpeechRecognitionDlg dlg;
  m_pMainWnd = &dlg;
  INT_PTR nResponse = dlg.DoModal();
  if (nResponse == IDOK) {
    // TODO: Place code here to handle when the dialog is
    //  dismissed with OK
  } else if (nResponse == IDCANCEL) {
    // TODO: Place code here to handle when the dialog is
    //  dismissed with Cancel
  } else if (nResponse == -1) {
    TRACE(traceAppMsg, 0,
          "Warning: dialog creation failed, so application is terminating "
          "unexpectedly.\n");
    TRACE(traceAppMsg, 0,
          "Warning: if you are using MFC controls on the dialog, you cannot "
          "#define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
  }

  // Delete the shell manager created above.
  if (pShellManager != nullptr) {
    delete pShellManager;
  }

#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
  ControlBarCleanUp();
#endif

  // Since the dialog has been closed, return FALSE so that we exit the
  //  application, rather than start the application's message pump.
  return FALSE;
}


================================================
FILE: mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.h
================================================

// StreamingSpeechRecognition.h : main header file for the PROJECT_NAME
// application
//

#pragma once

#ifndef __AFXWIN_H__
#error "include 'pch.h' before including this file for PCH"
#endif

#include "resource.h"  // main symbols

// CStreamingSpeechRecognitionApp:
// See StreamingSpeechRecognition.cpp for the implementation of this class
//

class CStreamingSpeechRecognitionApp : public CWinApp {
 public:
  CStreamingSpeechRecognitionApp();

  // Overrides
 public:
  virtual BOOL InitInstance();

  // Implementation

  DECLARE_MESSAGE_MAP()
};

extern CStreamingSpeechRecognitionApp theApp;


================================================
FILE: mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <VCProjectVersion>16.0</VCProjectVersion>
    <ProjectGuid>{A79C2604-C33D-497C-9770-D34E118B77FE}</ProjectGuid>
    <Keyword>MFCProj</Keyword>
    <RootNamespace>StreamingSpeechRecognition</RootNamespace>
    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v142</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v142</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
    <UseOfMfc>Static</UseOfMfc>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="Shared">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="sherpa-onnx-deps.props" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LinkIncremental>false</LinkIncremental>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PrecompiledHeader>Use</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
    </Link>
    <Midl>
      <MkTypLibCompatible>false</MkTypLibCompatible>
      <ValidateAllParameters>true</ValidateAllParameters>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </Midl>
    <ResourceCompile>
      <Culture>0x0409</Culture>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ResourceCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClInclude Include="framework.h" />
    <ClInclude Include="pch.h" />
    <ClInclude Include="Resource.h" />
    <ClInclude Include="StreamingSpeechRecognition.h" />
    <ClInclude Include="StreamingSpeechRecognitionDlg.h" />
    <ClInclude Include="targetver.h" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="pch.cpp">
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
    </ClCompile>
    <ClCompile Include="StreamingSpeechRecognition.cpp" />
    <ClCompile Include="StreamingSpeechRecognitionDlg.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="StreamingSpeechRecognition.rc" />
  </ItemGroup>
  <ItemGroup>
    <None Include="res\StreamingSpeechRecognition.rc2" />
  </ItemGroup>
  <ItemGroup>
    <Image Include="res\StreamingSpeechRecognition.ico" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.vcxproj.filters
================================================
﻿<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <Filter Include="Source Files">
      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
      <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
    </Filter>
    <Filter Include="Header Files">
      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
      <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
    </Filter>
    <Filter Include="Resource Files">
      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="StreamingSpeechRecognition.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="StreamingSpeechRecognitionDlg.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="framework.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="targetver.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="Resource.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="pch.h">
      <Filter>Header Files</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="StreamingSpeechRecognition.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="StreamingSpeechRecognitionDlg.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="pch.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="StreamingSpeechRecognition.rc">
      <Filter>Resource Files</Filter>
    </ResourceCompile>
  </ItemGroup>
  <ItemGroup>
    <None Include="res\StreamingSpeechRecognition.rc2">
      <Filter>Resource Files</Filter>
    </None>
  </ItemGroup>
  <ItemGroup>
    <Image Include="res\StreamingSpeechRecognition.ico">
      <Filter>Resource Files</Filter>
    </Image>
  </ItemGroup>
</Project>

================================================
FILE: mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.cpp
================================================

// StreamingSpeechRecognitionDlg.cpp : implementation file
//
// clang-format off
#include "pch.h"
#include "framework.h"
#include "afxdialogex.h"
// clang-format on

#include "StreamingSpeechRecognitionDlg.h"

#include <fstream>
#include <sstream>
#include <string>
#include <vector>

#include "StreamingSpeechRecognition.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#endif

Microphone::Microphone() {
  PaError err = Pa_Initialize();
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    exit(-2);
  }
}

Microphone::~Microphone() {
  PaError err = Pa_Terminate();
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    exit(-2);
  }
}

// CStreamingSpeechRecognitionDlg dialog

CStreamingSpeechRecognitionDlg::CStreamingSpeechRecognitionDlg(
    CWnd *pParent /*=nullptr*/)
    : CDialogEx(IDD_STREAMINGSPEECHRECOGNITION_DIALOG, pParent) {
  m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
}

CStreamingSpeechRecognitionDlg::~CStreamingSpeechRecognitionDlg() {
  if (recognizer_) {
    SherpaOnnxDestroyOnlineRecognizer(recognizer_);
    recognizer_ = nullptr;
  }
}

void CStreamingSpeechRecognitionDlg::DoDataExchange(CDataExchange *pDX) {
  CDialogEx::DoDataExchange(pDX);
  DDX_Control(pDX, IDOK, my_btn_);
  DDX_Control(pDX, IDC_EDIT1, my_text_);
}

BEGIN_MESSAGE_MAP(CStreamingSpeechRecognitionDlg, CDialogEx)
ON_WM_PAINT()
ON_WM_QUERYDRAGICON()
ON_BN_CLICKED(IDOK, &CStreamingSpeechRecognitionDlg::OnBnClickedOk)
END_MESSAGE_MAP()

// CStreamingSpeechRecognitionDlg message handlers

BOOL CStreamingSpeechRecognitionDlg::OnInitDialog() {
  CDialogEx::OnInitDialog();

  // Set the icon for this dialog.  The framework does this automatically
  //  when the application's main window is not a dialog
  SetIcon(m_hIcon, TRUE);   // Set big icon
  SetIcon(m_hIcon, FALSE);  // Set small icon

  // TODO: Add extra initialization here
  SetWindowText(_T("Real-time speech recogntion with Next-gen Kaldi"));
  InitMicrophone();

  return TRUE;  // return TRUE  unless you set the focus to a control
}

// If you add a minimize button to your dialog, you will need the code below
//  to draw the icon.  For MFC applications using the document/view model,
//  this is automatically done for you by the framework.

void CStreamingSpeechRecognitionDlg::OnPaint() {
  if (IsIconic()) {
    CPaintDC dc(this);  // device context for painting

    SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()),
                0);

    // Center icon in client rectangle
    int cxIcon = GetSystemMetrics(SM_CXICON);
    int cyIcon = GetSystemMetrics(SM_CYICON);
    CRect rect;
    GetClientRect(&rect);
    int x = (rect.Width() - cxIcon + 1) / 2;
    int y = (rect.Height() - cyIcon + 1) / 2;

    // Draw the icon
    dc.DrawIcon(x, y, m_hIcon);
  } else {
    CDialogEx::OnPaint();
  }
}

// The system calls this function to obtain the cursor to display while the user
// drags
//  the minimized window.
HCURSOR CStreamingSpeechRecognitionDlg::OnQueryDragIcon() {
  return static_cast<HCURSOR>(m_hIcon);
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void *user_data) {
  auto dlg = reinterpret_cast<CStreamingSpeechRecognitionDlg *>(user_data);

  auto stream = dlg->stream_;
  if (stream) {
    SherpaOnnxOnlineStreamAcceptWaveform(stream, 16000, reinterpret_cast<const float *>(input_buffer),
                   frames_per_buffer);
  }

  return dlg->started_ ? paContinue : paComplete;
}

void CStreamingSpeechRecognitionDlg::OnBnClickedOk() {
  if (!recognizer_) {
    AppendLineToMultilineEditCtrl("Creating recognizer...");
    AppendLineToMultilineEditCtrl("It will take several seconds. Please wait");
    InitRecognizer();
    if (!recognizer_) {
      // failed to create the recognizer
      return;
    }
    AppendLineToMultilineEditCtrl("Recognizer created!");
  }

  if (!started_) {
    started_ = true;

    if (stream_) {
      SherpaOnnxDestroyOnlineStream(stream_);
      stream_ = nullptr;
    }

    stream_ = SherpaOnnxCreateOnlineStream(recognizer_);

    PaStreamParameters param;
    param.device = Pa_GetDefaultInputDevice();
    const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
    param.channelCount = 1;
    param.sampleFormat = paFloat32;
    param.suggestedLatency = info->defaultLowInputLatency;
    param.hostApiSpecificStreamInfo = nullptr;
    float sample_rate = 16000;
    pa_stream_ = nullptr;
    PaError err =
        Pa_OpenStream(&pa_stream_, &param, nullptr, /* &outputParameters, */
                      sample_rate,
                      0,          // frames per buffer
                      paClipOff,  // we won't output out of range samples
                                  // so don't bother clipping them
                      RecordCallback, this);
    if (err != paNoError) {
      AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
                                    Pa_GetErrorText(err));
      my_btn_.EnableWindow(FALSE);
      return;
    }

    err = Pa_StartStream(pa_stream_);
    if (err != paNoError) {
      AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
                                    Pa_GetErrorText(err));
      my_btn_.EnableWindow(FALSE);
      return;
    }
    AppendLineToMultilineEditCtrl("Started! Please speak");
    my_btn_.SetWindowText(_T("Stop"));

    thread_ = new RecognizerThread(this);
    thread_->CreateThread(CREATE_SUSPENDED);
    thread_->m_bAutoDelete = false;  // Let me delete it.
    thread_->ResumeThread();
  } else {
    started_ = false;
    Pa_Sleep(200);  // sleep for 200ms
    if (pa_stream_) {
      PaError err = Pa_CloseStream(pa_stream_);
      if (err != paNoError) {
        AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
                                      Pa_GetErrorText(err));
        my_btn_.EnableWindow(FALSE);
        return;
      }
    }
    pa_stream_ = nullptr;

    WaitForSingleObject(thread_->m_hThread, INFINITE);
    delete thread_;
    thread_ = nullptr;

    // AfxMessageBox("stopped", MB_OK);
    my_btn_.SetWindowText(_T("Start"));
    AppendLineToMultilineEditCtrl("Stopped");
  }
}

void CStreamingSpeechRecognitionDlg::InitMicrophone() {
  int default_device = Pa_GetDefaultInputDevice();
  int device_count = Pa_GetDeviceCount();
  if (default_device == paNoDevice) {
    // CString str;
    // str.Format(_T("No default input device found!"));
    // AfxMessageBox(str, MB_OK | MB_ICONSTOP);
    // exit(-1);
    AppendLineToMultilineEditCtrl("No default input device found!");
    my_btn_.EnableWindow(FALSE);
    return;
  }
  AppendLineToMultilineEditCtrl(std::string("Selected device ") +
                                Pa_GetDeviceInfo(default_device)->name);
}

bool CStreamingSpeechRecognitionDlg::Exists(const std::string &filename) {
  std::ifstream is(filename);
  return is.good();
}

void CStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
    my_btn_.EnableWindow(FALSE);
    std::string msg =
        "\r\nPlease go to\r\n"
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html "
        "\r\n";
    msg += "to download a streaming model, i.e., an online model.\r\n";
    msg += "You need to rename them after downloading\r\n\r\n";
    msg += "It supports both transducer and paraformer models.\r\n\r\n";
    msg +=
      "We give two examples below to show you how to download models\r\n\r\n";
    msg += "(1) Transducer\r\n\r\n";
    msg +=
        "https://huggingface.co/pkufool/"
        "icefall-asr-zipformer-streaming-wenetspeech-20230615";
    msg += "\r\n\r\n";
    msg +=
        "wget https:// "
        "huggingface.co/pkufool/"
        "icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/"
        "encoder-epoch-12-avg-4-chunk-16-left-128.onnx\r\n";
    msg +=
        "wget https:// "
        "huggingface.co/pkufool/"
        "icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/"
        "decoder-epoch-12-avg-4-chunk-16-left-128.onnx\r\n";
    msg +=
        "wget https:// "
        "huggingface.co/pkufool/"
        "icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/"
        "joiner-epoch-12-avg-4-chunk-16-left-128.onnx\r\n";
    msg +=
        "wget "
        "https://huggingface.co/pkufool/"
        "icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/"
        "data/lang_char/tokens.txt\r\n";

    msg += "\r\nNow rename them.\r\n";
    msg += "mv encoder-epoch-12-avg-4-chunk-16-left-128.onnx encoder.onnx\r\n";
    msg += "mv decoder-epoch-12-avg-4-chunk-16-left-128.onnx decoder.onnx\r\n";
    msg += "mv joiner-epoch-12-avg-4-chunk-16-left-128.onnx joiner.onnx\r\n";
    msg += "\r\n";
    msg += "(2) Paraformer\r\n\r\n";
    msg +=
        "wget "
        "https://huggingface.co/csukuangfj/"
        "sherpa-onnx-streaming-paraformer-bilingual-zh-en/resolve/main/"
        "encoder.int8.onnx\r\n";
    msg +=
        "wget "
        "https://huggingface.co/csukuangfj/"
        "sherpa-onnx-streaming-paraformer-bilingual-zh-en/resolve/main/"
        "decoder.int8.onnx\r\n";
    msg +=
        "wget "
        "https://huggingface.co/csukuangfj/"
        "sherpa-onnx-streaming-paraformer-bilingual-zh-en/resolve/main/"
        "tokens.txt\r\n";
    msg += "\r\nNow rename them.\r\n";
    msg += "mv encoder.int8.onnx paraformer-encoder.onnx\r\n";
    msg += "mv decoder.int8.onnx paraformer-decoder.onnx\r\n\r\n";
    msg += "That's it!\r\n";

    AppendLineToMultilineEditCtrl(msg);
}

void CStreamingSpeechRecognitionDlg::InitParaformer() {
  std::string paraformer_encoder = "./paraformer-encoder.onnx";
  std::string paraformer_decoder = "./paraformer-decoder.onnx";

  std::string tokens = "./tokens.txt";

  bool is_ok = true;

  if (Exists("./paraformer-encoder.int8.onnx")) {
    paraformer_encoder = "./paraformer-encoder.int8.onnx";
  } else if (!Exists(paraformer_encoder)) {
    std::string msg = paraformer_encoder + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (Exists("./paraformer-decoder.int8.onnx")) {
    paraformer_decoder = "./paraformer-decoder.int8.onnx";
  } else if (!Exists(paraformer_decoder)) {
    std::string msg = paraformer_decoder + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(tokens)) {
    std::string msg = tokens + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!is_ok) {
    ShowInitRecognizerHelpMessage();
    return;
  }

  SherpaOnnxOnlineRecognizerConfig config;
  memset(&config, 0, sizeof(config));
  config.model_config.debug = 0;
  config.model_config.num_threads = 1;
  config.model_config.provider = "cpu";

  config.decoding_method = "greedy_search";
  config.max_active_paths = 4;

  config.feat_config.sample_rate = 16000;
  config.feat_config.feature_dim = 80;

  config.enable_endpoint = 1;
  config.rule1_min_trailing_silence = 1.2f;
  config.rule2_min_trailing_silence = 0.8f;
  config.rule3_min_utterance_length = 300.0f;

  config.model_config.tokens = tokens.c_str();
  config.model_config.paraformer.encoder = paraformer_encoder.c_str();
  config.model_config.paraformer.decoder = paraformer_decoder.c_str();

  recognizer_ = SherpaOnnxCreateOnlineRecognizer(&config);
}

void CStreamingSpeechRecognitionDlg::InitRecognizer() {
  if (Exists("./paraformer-encoder.onnx") || Exists("./paraformer-encoder.int8.onnx")) {
    InitParaformer();
    return;
  }

  std::string encoder = "./encoder.onnx";
  std::string decoder = "./decoder.onnx";
  std::string joiner = "./joiner.onnx";
  std::string tokens = "./tokens.txt";

  bool is_ok = true;
  if (!Exists(encoder)) {
    std::string msg = encoder + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(decoder)) {
    std::string msg = decoder + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(joiner)) {
    std::string msg = joiner + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!Exists(tokens)) {
    std::string msg = tokens + " does not exist!";
    AppendLineToMultilineEditCtrl(msg);
    is_ok = false;
  }

  if (!is_ok) {
    ShowInitRecognizerHelpMessage();
    return;
  }

  SherpaOnnxOnlineRecognizerConfig config;
  memset(&config, 0, sizeof(config));
  config.model_config.debug = 0;
  config.model_config.num_threads = 1;
  config.model_config.provider = "cpu";

  config.decoding_method = "greedy_search";
  config.max_active_paths = 4;

  config.feat_config.sample_rate = 16000;
  config.feat_config.feature_dim = 80;

  config.enable_endpoint = 1;
  config.rule1_min_trailing_silence = 1.2f;
  config.rule2_min_trailing_silence = 0.8f;
  config.rule3_min_utterance_length = 300.0f;

  config.model_config.tokens = tokens.c_str();
  config.model_config.transducer.encoder = encoder.c_str();
  config.model_config.transducer.decoder = decoder.c_str();
  config.model_config.transducer.joiner = joiner.c_str();

  recognizer_ = SherpaOnnxCreateOnlineRecognizer(&config);
}

// see
// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
static std::wstring Utf8ToUtf16(const std::string &utf8) {
  std::vector<unsigned long> unicode;
  size_t i = 0;
  while (i < utf8.size()) {
    unsigned long uni;
    size_t todo;
    bool error = false;
    unsigned char ch = utf8[i++];
    if (ch <= 0x7F) {
      uni = ch;
      todo = 0;
    } else if (ch <= 0xBF) {
      throw std::logic_error("not a UTF-8 string");
    } else if (ch <= 0xDF) {
      uni = ch & 0x1F;
      todo = 1;
    } else if (ch <= 0xEF) {
      uni = ch & 0x0F;
      todo = 2;
    } else if (ch <= 0xF7) {
      uni = ch & 0x07;
      todo = 3;
    } else {
      throw std::logic_error("not a UTF-8 string");
    }
    for (size_t j = 0; j < todo; ++j) {
      if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
      unsigned char ch = utf8[i++];
      if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
      uni <<= 6;
      uni += ch & 0x3F;
    }
    if (uni >= 0xD800 && uni <= 0xDFFF)
      throw std::logic_error("not a UTF-8 string");
    if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
    unicode.push_back(uni);
  }
  std::wstring utf16;
  for (size_t i = 0; i < unicode.size(); ++i) {
    unsigned long uni = unicode[i];
    if (uni <= 0xFFFF) {
      utf16 += (wchar_t)uni;
    } else {
      uni -= 0x10000;
      utf16 += (wchar_t)((uni >> 10) + 0xD800);
      utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
    }
  }
  return utf16;
}

void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
    const std::string &s) {
  // get the initial text length
  int nLength = my_text_.GetWindowTextLength();
  // put the selection at the end of text
  my_text_.SetSel(nLength, nLength);
  // replace the selection

  std::wstring wstr = Utf8ToUtf16(s);

  // my_text_.ReplaceSel(wstr.c_str());
  my_text_.ReplaceSel(wstr.c_str());
}

void CStreamingSpeechRecognitionDlg::AppendLineToMultilineEditCtrl(
    const std::string &s) {
  AppendTextToEditCtrl("\r\n" + s);
}

static std::string Cat(const std::vector<std::string> &results,
                       const std::string &s) {
  std::ostringstream os;
  std::string sep;

  int i = 0;
  for (i = 0; i != results.size(); ++i) {
    os << sep << i << ": " << results[i];
    sep = "\r\n";
  }

  if (!s.empty()) {
    os << sep << i << ": " << s;
  }
  return os.str();
}

int CStreamingSpeechRecognitionDlg::RunThread() {
  std::vector<std::string> results;

  std::string last_text;
  while (started_) {
    while (SherpaOnnxIsOnlineStreamReady(recognizer_, stream_)) {
      SherpaOnnxDecodeOnlineStream(recognizer_, stream_);
    }

    auto r = SherpaOnnxGetOnlineStreamResult(recognizer_, stream_);
    std::string text = r->text;
    SherpaOnnxDestroyOnlineRecognizerResult(r);
    if (!text.empty() && last_text != text) {
      // CString str;
      // str.Format(_T("%s"), Cat(results, text).c_str());
      auto str = Utf8ToUtf16(Cat(results, text).c_str());
      my_text_.SetWindowText(str.c_str());
      my_text_.SetFocus();
      my_text_.SetSel(-1);
      last_text = text;
    }
    int is_endpoint = SherpaOnnxOnlineStreamIsEndpoint(recognizer_, stream_);
    if (is_endpoint) {
      SherpaOnnxOnlineStreamReset(recognizer_, stream_);
      if (!text.empty()) {
        results.push_back(std::move(text));
      }
    }

    Pa_Sleep(100);  // sleep for 100ms
  }

  return 0;
}


================================================
FILE: mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.h
================================================

// StreamingSpeechRecognitionDlg.h : header file
//

#pragma once

#include <string>

#include "portaudio.h"
#include "sherpa-onnx/c-api/c-api.h"

class Microphone {
 public:
  Microphone();
  ~Microphone();
};

class RecognizerThread;

// CStreamingSpeechRecognitionDlg dialog
class CStreamingSpeechRecognitionDlg : public CDialogEx {
  // Construction
 public:
  CStreamingSpeechRecognitionDlg(
      CWnd *pParent = nullptr);  // standard constructor
  ~CStreamingSpeechRecognitionDlg();

// Dialog Data
#ifdef AFX_DESIGN_TIME
  enum { IDD = IDD_STREAMINGSPEECHRECOGNITION_DIALOG };
#endif

 protected:
  virtual void DoDataExchange(CDataExchange *pDX);  // DDX/DDV support

  // Implementation
 protected:
  HICON m_hIcon;

  // Generated message map functions
  virtual BOOL OnInitDialog();
  afx_msg void OnPaint();
  afx_msg HCURSOR OnQueryDragIcon();
  DECLARE_MESSAGE_MAP()
 private:
  Microphone mic_;

  const SherpaOnnxOnlineRecognizer *recognizer_ = nullptr;

  PaStream *pa_stream_ = nullptr;
  RecognizerThread *thread_ = nullptr;
  CButton my_btn_;
  CEdit my_text_;

 public:
  bool started_ = false;
  const SherpaOnnxOnlineStream *stream_ = nullptr;

 public:
  int RunThread();
  afx_msg void OnBnClickedOk();

 private:
  void AppendTextToEditCtrl(const std::string &s);
  void AppendLineToMultilineEditCtrl(const std::string &s);
  void InitMicrophone();

  bool Exists(const std::string &filename);
  void InitRecognizer();
  void InitParaformer();
  void ShowInitRecognizerHelpMessage();
};

class RecognizerThread : public CWinThread {
 public:
  RecognizerThread(CStreamingSpeechRecognitionDlg *dlg) : dlg_(dlg) {}
  virtual BOOL InitInstance() { return TRUE; }
  virtual int Run() { return dlg_->RunThread(); }

 private:
  CStreamingSpeechRecognitionDlg *dlg_;
};


================================================
FILE: mfc-examples/StreamingSpeechRecognition/framework.h
================================================
#pragma once

#ifndef VC_EXTRALEAN
#define VC_EXTRALEAN  // Exclude rarely-used stuff from Windows headers
#endif

#include "targetver.h"

#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS  // some CString constructors will be
                                            // explicit

// turns off MFC's hiding of some common and often safely ignored warning
// messages
#define _AFX_ALL_WARNINGS

#include <afxext.h>  // MFC extensions
#include <afxwin.h>  // MFC core and standard components

#ifndef _AFX_NO_OLE_SUPPORT
#include <afxdtctl.h>  // MFC support for Internet Explorer 4 Common Controls
#endif
#ifndef _AFX_NO_AFXCMN_SUPPORT
#include <afxcmn.h>  // MFC support for Windows Common Controls
#endif               // _AFX_NO_AFXCMN_SUPPORT

#include <afxcontrolbars.h>  // MFC support for ribbons and control bars


================================================
FILE: mfc-examples/StreamingSpeechRecognition/pch.cpp
================================================
// pch.cpp: source file corresponding to the pre-compiled header

#include "pch.h"

// When you are using pre-compiled headers, this source file is necessary for
// compilation to succeed.


================================================
FILE: mfc-examples/StreamingSpeechRecognition/pch.h
================================================
// pch.h: This is a precompiled header file.
// Files listed below are compiled only once, improving build performance for
// future builds. This also affects IntelliSense performance, including code
// completion and many code browsing features. However, files listed here are
// ALL re-compiled if any one of them is updated between builds. Do not add
// files here that you will be updating frequently as this negates the
// performance advantage.

#ifndef PCH_H
#define PCH_H

// add headers that you want to pre-compile here
#include "framework.h"

#endif  // PCH_H


================================================
FILE: mfc-examples/StreamingSpeechRecognition/sherpa-onnx-deps.props
================================================
﻿<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ImportGroup Label="PropertySheets" />
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup>
    <SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
    <SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
    <SherpaOnnxLibraries>
        sherpa-onnx-portaudio_static.lib;
        sherpa-onnx-c-api.lib;
        sherpa-onnx-core.lib;
        kaldi-decoder-core.lib;
        sherpa-onnx-kaldifst-core.lib;
        sherpa-onnx-fstfar.lib;
        sherpa-onnx-fst.lib;
        kaldi-native-fbank-core.lib;
        kissfft-float.lib;
        onnxruntime.lib;
        piper_phonemize.lib;
        espeak-ng.lib;
        ucd.lib;
        ssentencepiece_core.lib;
    </SherpaOnnxLibraries>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <ClCompile>
      <AdditionalIncludeDirectories>
	  $(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
    $(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
      <AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup />
</Project>


================================================
FILE: mfc-examples/StreamingSpeechRecognition/targetver.h
================================================
#pragma once

// Including SDKDDKVer.h defines the highest available Windows platform.

// If you wish to build your application for a previous Windows platform,
// include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you wish
// to support before including SDKDDKVer.h.

#include <SDKDDKVer.h>


================================================
FILE: mfc-examples/mfc-examples.sln
================================================
﻿
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.6.33829.357
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingTextToSpeech", "NonStreamingTextToSpeech\NonStreamingTextToSpeech.vcxproj", "{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}"
EndProject
Global
	GlobalSection(SolutionConfigurationPlatforms) = preSolution
		Debug|x64 = Debug|x64
		Debug|x86 = Debug|x86
		Release|x64 = Release|x64
		Release|x86 = Release|x86
	EndGlobalSection
	GlobalSection(ProjectConfigurationPlatforms) = postSolution
		{A79C2604-C33D-497C-9770-D34E118B77FE}.Debug|x64.ActiveCfg = Debug|x64
		{A79C2604-C33D-497C-9770-D34E118B77FE}.Debug|x64.Build.0 = Debug|x64
		{A79C2604-C33D-497C-9770-D34E118B77FE}.Debug|x86.ActiveCfg = Debug|Win32
		{A79C2604-C33D-497C-9770-D34E118B77FE}.Debug|x86.Build.0 = Debug|Win32
		{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.ActiveCfg = Release|x64
		{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64
		{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32
		{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32
		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.ActiveCfg = Debug|x64
		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.Build.0 = Debug|x64
		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.ActiveCfg = Debug|Win32
		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.Build.0 = Debug|Win32
		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.ActiveCfg = Release|x64
		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64
		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32
		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32
		{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x64.ActiveCfg = Debug|x64
		{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x64.Build.0 = Debug|x64
		{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x86.ActiveCfg = Debug|Win32
		{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x86.Build.0 = Debug|Win32
		{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x64.ActiveCfg = Release|x64
		{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x64.Build.0 = Release|x64
		{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x86.ActiveCfg = Release|Win32
		{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x86.Build.0 = Release|Win32
	EndGlobalSection
	GlobalSection(SolutionProperties) = preSolution
		HideSolutionNode = FALSE
	EndGlobalSection
	GlobalSection(ExtensibilityGlobals) = postSolution
		SolutionGuid = {C0A85719-CF8C-4BCD-BDF6-7C57EE651CBB}
	EndGlobalSection
EndGlobal


================================================
FILE: new-release.sh
================================================
#!/usr/bin/env bash

set -ex

old_version_code=20260319
new_version_code=20260320

old_version="1\.12\.30"
new_version="1\.12\.31"

replace_str="s/$old_version/$new_version/g"

sed -i.bak "$replace_str" ./CMakeLists.txt

sed -i.bak "$replace_str" ./sherpa-onnx/csrc/version.cc
sha1=$(git describe --match=NeVeRmAtCh --always --abbrev=8)
date=$(git log -1 --format=%ad --date=local)

find android -name "build.gradle" -type f -exec sed -i.bak "s/versionName \"$old_version\"/versionName \"$new_version\"/g" {} \;
find android -name "build.gradle.kts" -type f -exec sed -i.bak "s/versionName = \"$old_version\"/versionName = \"$new_version\"/g" {} \;

find android -name "build.gradle" -type f -exec sed -i.bak "s/versionCode $old_version_code/versionCode $new_version_code/g" {} \;
find android -name "build.gradle.kts" -type f -exec sed -i.bak "s/versionCode = $old_version_code/versionCode = $new_version_code/g" {} \;

sed -i.bak "s/  static const char \*sha1.*/  static const char \*sha1 = \"$sha1\";/g" ./sherpa-onnx/csrc/version.cc
sed -i.bak "s/  static const char \*date.*/  static const char \*date = \"$date\";/g" ./sherpa-onnx/csrc/version.cc


find scripts/wheel -name "setup.py" -type f -exec sed -i.bak "$replace_str" {} \;
sed -i.bak "$replace_str" ./setup.py

sed -i.bak "$replace_str" ./build-ios-shared.sh
sed -i.bak "$replace_str" ./pom.xml
sed -i.bak "$replace_str" ./jitpack.yml
sed -i.bak "$replace_str" ./android/SherpaOnnxAar/README.md

sed -i.bak "$replace_str" ./rust-api-examples/Cargo.toml
sed -i.bak "$replace_str" ./sherpa-onnx/rust/sherpa-onnx-sys/Cargo.toml
sed -i.bak "$replace_str" ./sherpa-onnx/rust/sherpa-onnx/Cargo.toml
sed -i.bak "$replace_str" ./rust-api-examples/README.md

find android -name build.gradle -type f -exec sed -i.bak "s/sherpa-onnx:v$old_version/sherpa-onnx:v$new_version/g" {} \;
find android -name build.gradle.kts -type f -exec sed -i.bak "s/sherpa-onnx:v$old_version/sherpa-onnx:v$new_version/g" {} \;

find flutter -name "*.yaml" -type f -exec sed -i.bak "$replace_str" {} \;
find dart-api-examples -name "*.yaml" -type f -exec sed -i.bak "$replace_str" {} \;
find flutter-examples -name "*.yaml" -type f -exec sed -i.bak "$replace_str" {} \;
find flutter -name "*.podspec" -type f -exec sed -i.bak "$replace_str" {} \;
find nodejs-addon-examples -name package.json -type f -exec sed -i.bak "$replace_str" {} \;
find nodejs-examples -name package.json -type f -exec sed -i.bak "$replace_str" {} \;

find harmony-os -name "README.md" -type f -exec sed -i.bak "$replace_str" {} \;
find harmony-os -name oh-package.json5 -type f -exec sed -i.bak "$replace_str" {} \;
find harmony-os -name BuildProfile.ets -type f -exec sed -i.bak "$replace_str" {} \;

find mfc-examples -name "README.md" -type f -exec sed -i.bak "$replace_str" {} \;

find . -name "*.bak" -exec rm {} \;


================================================
FILE: nodejs-addon-examples/.gitignore
================================================
crash.log


================================================
FILE: nodejs-addon-examples/README.md
================================================
# Introduction

Note: You need `Node >= 16`.

This repo contains examples for NodeJS.
It uses [node-addon-api](https://github.com/nodejs/node-addon-api) to wrap
`sherpa-onnx` for NodeJS and it supports multiple threads.

Note: [../nodejs-examples](../nodejs-examples) uses WebAssembly to wrap
`sherpa-onnx` for NodeJS and it does not support multiple threads.

Before you continue, please first run

```bash
npm install # or pnpm install

# For macOS x64
## With npm
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
## With pnpm
export DYLD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WITH-THE-INSTALLED-VERSION>/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH

# For macOS arm64
## With npm
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
## With pnpm
export DYLD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WITH-THE-INSTALLED-VERSION>/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH

# For Linux x64
## With npm
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
## With pnpm
export LD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WITH-THE-INSTALLED-VERSION>/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH

# For Linux arm64, e.g., Raspberry Pi 4
## With npm
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
## With pnpm
export LD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@<REPLACE-THIS-WITH-THE-INSTALLED-VERSION>/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
```

# Examples

The following tables list the examples in this folder.

## Speech enhancement/denoising

|File| Description|
|---|---|
|[./test_offline_speech_enhancement_gtcrn.js](./test_offline_speech_enhancement_gtcrn.js)| It demonstrates how to use sherpa-onnx JavaScript API for speech enhancement with GTCRN.|
|[./test_offline_speech_enhancement_dpdfnet.js](./test_offline_speech_enhancement_dpdfnet.js)| It demonstrates how to use sherpa-onnx JavaScript API for speech enhancement with DPDFNet.|
|[./test_online_speech_enhancement_gtcrn.js](./test_online_speech_enhancement_gtcrn.js)| It demonstrates how to use sherpa-onnx JavaScript API for online speech enhancement with GTCRN.|
|[./test_online_speech_enhancement_dpdfnet.js](./test_online_speech_enhancement_dpdfnet.js)| It demonstrates how to use sherpa-onnx JavaScript API for online speech enhancement with DPDFNet.|

## Speaker diarization

|File| Description|
|---|---|
|[./test_offline_speaker_diarization.js](./test_offline_speaker_diarization.js)| It demonstrates how to use sherpa-onnx JavaScript API for speaker diarization. It supports speaker segmentation models from [pyannote-audio](https://github.com/pyannote/pyannote-audio)|

## Add punctuations to text

|File| Description|
|---|---|
|[./test_offline_punctuation.js](./test_offline_punctuation.js)| Add punctuations to input text using [CT transformer](https://modelscope.cn/models/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary). It supports both Chinese and English.|
|[./test_online_punctuation.js](./test_online_punctuation.js)| Add punctuations to input text using an online/streaming punctuation model.|

## Voice activity detection (VAD)

|File| Description|
|---|---|
|[./test_vad_microphone.js](./test_vad_microphone.js)| VAD with a microphone. It uses [silero-vad](https://github.com/snakers4/silero-vad)|

## Speaker identification

|File| Description|
|---|---|
|[ ./test_speaker_identification.js]( ./test_speaker_identification.js)| Speaker identification from a file|

## Spoken language identification

|File| Description|
|---|---|
|[./test_spoken_language_identification.js](./test_spoken_language_identification.js)|Spoken language identification from a file using a multi-lingual [Whisper](https://github.com/openai/whisper) model|
|[./test_vad_spoken_language_identification_microphone.js](./test_vad_spoken_language_identification_microphone.js)|Spoken language identification from a microphone using a multi-lingual [Whisper](https://github.com/openai/whisper) model|

## Audio tagging

|File| Description|
|---|---|
|[./test_audio_tagging_zipformer.js](./test_audio_tagging_zipformer.js)| Audio tagging with a Zipformer model|
|[./test_audio_tagging_ced.js](./test_audio_tagging_ced.js)| Audio tagging with a [CED](https://github.com/RicherMans/CED) model|

## Keyword spotting

|File| Description|
|---|---|
|[./test_keyword_spotter_transducer.js](./test_keyword_spotter_transducer.js)| Keyword spotting from a file using a Zipformer model|
|[./test_keyword_spotter_transducer_microphone.js](./test_keyword_spotter_transducer_microphone.js)| Keyword spotting from a microphone using a Zipformer model|

## Streaming speech-to-text from files

|File| Description|
|---|---|
|[./test_asr_streaming_t_one_ctc.js](./test_asr_streaming_t_one_ctc.js)| Streaming speech recognition from a file using a T-one CTC model|
|[./test_asr_streaming_transducer.js](./test_asr_streaming_transducer.js)| Streaming speech recognition from a file using a Zipformer transducer model|
|[./test_asr_streaming_transducer_itn.js](./test_asr_streaming_transducer_itn.js)| Streaming speech recognition from a file using a Zipformer transducer model with ITN|
|[./test_asr_streaming_transducer_with_hr.js](./test_asr_streaming_transducer_with_hr.js)| Streaming speech recognition from a file using a Zipformer transducer model with homophone replacer|
|[./test_asr_streaming_ctc.js](./test_asr_streaming_ctc.js)| Streaming speech recognition from a file using a Zipformer CTC model with greedy search|
|[./test_asr_streaming_ctc_hlg.js](./test_asr_streaming_ctc_hlg.js)| Streaming speech recognition from a file using a Zipformer CTC model with HLG decoding|
|[./test_asr_streaming_paraformer.js](./test_asr_streaming_paraformer.js)|Streaming speech recognition from a file using a [Paraformer](https://github.com/alibaba-damo-academy/FunASR) model|

## Streaming speech-to-text from a microphone

|File| Description|
|---|---|
|[./test_asr_streaming_transducer_microphone.js](./test_asr_streaming_transducer_microphone.js)| Streaming speech recognition from a microphone using a Zipformer transducer model|
|[./test_asr_streaming_transducer_microphone_itn.js](./test_asr_streaming_transducer_microphone_itn.js)| Streaming speech recognition from a microphone using a Zipformer transducer model with ITN|
|[./test_asr_streaming_ctc_microphone.js](./test_asr_streaming_ctc_microphone.js)| Streaming speech recognition from a microphone using a Zipformer CTC model with greedy search|
|[./test_asr_streaming_ctc_hlg_microphone.js](./test_asr_streaming_ctc_hlg_microphone.js)|Streaming speech recognition from a microphone using a Zipformer CTC model with HLG decoding|
|[./test_asr_streaming_paraformer_microphone.js](./test_asr_streaming_paraformer_microphone.js)| Streaming speech recognition from a microphone using a [Paraformer](https://github.com/alibaba-damo-academy/FunASR) model|

## Non-Streaming speech-to-text from files

|File| Description|
|---|---|
|[./test_asr_non_streaming_transducer.js](./test_asr_non_streaming_transducer.js)|Non-streaming speech recognition from a file with a Zipformer transducer model|
|[./test_asr_non_streaming_fire_red_asr.js](./test_asr_non_streaming_fire_red_asr.js)| Non-streaming speech recognition from a file using [FireRedAsr](https://github.com/FireRedTeam/FireRedASR)|
|[./test_asr_non_streaming_fire_red_asr_ctc.js](./test_asr_non_streaming_fire_red_asr_ctc.js)| Non-streaming speech recognition from a file using [FireRedAsr](https://github.com/FireRedTeam/FireRedASR) CTC model|
|[./test_asr_non_streaming_fire_red_asr_ctc_async.js](./test_asr_non_streaming_fire_red_asr_ctc_async.js)| Async non-streaming speech recognition from a file using [FireRedAsr](https://github.com/FireRedTeam/FireRedASR) CTC model|
|[./test_asr_non_streaming_whisper.js](./test_asr_non_streaming_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper)|
|[./test_vad_with_non_streaming_asr_whisper.js](./test_vad_with_non_streaming_asr_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper) + [Silero VAD](https://github.com/snakers4/silero-vad)|
|[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
|[./test_asr_non_streaming_moonshine_v2.js](./test_asr_non_streaming_moonshine_v2.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) v2|
|[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
|[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
|[./test_asr_non_streaming_wenet_ctc.js](./test_asr_non_streaming_wenet_ctc.js)|Non-streaming speech recognition from a file using a [u2pp_conformer_yue](https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/u2pp_conformer_yue) CTC model with greedy search|
|[./test_asr_non_streaming_omnilingual_asr_ctc.js](./test_asr_non_streaming_omnilingual_asr_ctc.js)|Non-streaming speech recognition from a file using a [Omnilingual-ASR](https://github.com/facebookresearch/omnilingual-asr) CTC model with greedy search|
|[./test_asr_non_streaming_medasr_ctc.js](./test_asr_non_streaming_medasr_ctc.js)|Non-streaming speech recognition from a file using a [Google MedASR](https://github.com/google-health/medasr) CTC model with greedy search|
|[./test_asr_non_streaming_funasr_nano.js](./test_asr_non_streaming_funasr_nano.js)|Non-streaming speech recognition from a file using a [FunASR Nano](https://modelscope.cn/models/FunAudioLLM/Fun-ASR-Nano-2512) model|
|[./test_asr_non_streaming_funasr_nano_async.js](./test_asr_non_streaming_funasr_nano_async.js)|Async non-streaming speech recognition from multiple files using a [FunASR Nano](https://modelscope.cn/models/FunAudioLLM/Fun-ASR-Nano-2512) model|
|[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model|
|[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search|
|[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search|
|[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search|
|[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
|[./test_asr_non_streaming_paraformer_itn.js](./test_asr_non_streaming_paraformer_itn.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR) with ITN|
|[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)|
|[./test_asr_non_streaming_sense_voice_with_hr.js](./test_asr_non_streaming_sense_voice_with_hr.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) with homophone replacer|

## Non-Streaming speech-to-text from a microphone with VAD

|File| Description|
|---|---|
|[./test_vad_asr_non_streaming_transducer_microphone.js](./test_vad_asr_non_streaming_transducer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a Zipformer transducer model|
|[./test_vad_asr_non_streaming_whisper_microphone.js](./test_vad_asr_non_streaming_whisper_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Whisper](https://github.com/openai/whisper)|
|[./test_vad_asr_non_streaming_moonshine_microphone.js](./test_vad_asr_non_streaming_moonshine_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Moonshine](https://github.com/usefulsensors/moonshine)|
|[./test_vad_asr_non_streaming_nemo_ctc_microphone.js](./test_vad_asr_non_streaming_nemo_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
|[./test_vad_asr_non_streaming_zipformer_ctc_microphone.js](./test_vad_asr_non_streaming_zipformer_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a Zipformer CTC model with greedy search|
|[./test_vad_asr_non_streaming_paraformer_microphone.js](./test_vad_asr_non_streaming_paraformer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
|[./test_vad_asr_non_streaming_sense_voice_microphone.js](./test_vad_asr_non_streaming_sense_voice_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)|

## Text-to-speech

|File| Description|
|---|---|
|[./test_tts_non_streaming_pocket_en.js](./test_tts_non_streaming_pocket_en.js)| Zero-shot text-to-speech with a PocketTTS English Model|
|[./test_tts_non_streaming_pocket_en_async.js](./test_tts_non_streaming_pocket_en_async.js)| Zero-shot text-to-speech with a PocketTTS English Model using async JS API|
|[./test_tts_non_streaming_pocket_en_play_async.js](./test_tts_non_streaming_pocket_en_play_async.js)| Zero-shot text-to-speech with a PocketTTS English Model using async JS API and live audio playback|
|[./test_tts_non_streaming_zipvoice_zh_en.js](./test_tts_non_streaming_zipvoice_zh_en.js)| Zero-shot text-to-speech with a ZipVoice Chinese/English Model|
|[./test_tts_non_streaming_zipvoice_zh_en_async.js](./test_tts_non_streaming_zipvoice_zh_en_async.js)| Zero-shot text-to-speech with a ZipVoice Chinese/English Model using async JS API|
|[./test_tts_non_streaming_zipvoice_zh_en_play_async.js](./test_tts_non_streaming_zipvoice_zh_en_play_async.js)| Zero-shot text-to-speech with a ZipVoice Chinese/English Model using async JS API and live audio playback|
|[./test_tts_non_streaming_kitten_en.js](./test_tts_non_streaming_kitten_en.js)| Text-to-speech with a KittenTTS English Model|
|[./test_tts_non_streaming_kokoro_en.js](./test_tts_non_streaming_kokoro_en.js)| Text-to-speech with a Kokoro English Model|
|[./test_tts_non_streaming_kokoro_zh_en.js](./test_tts_non_streaming_kokoro_zh_en.js)| Text-to-speech with a Kokoro Model supporting Chinese and English|
|[./test_tts_non_streaming_matcha_icefall_en.js](./test_tts_non_streaming_matcha_icefall_en.js)| Text-to-speech with a [MatchaTTS English Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)|
|[./test_tts_non_streaming_matcha_icefall_zh.js](./test_tts_non_streaming_matcha_icefall_zh.js)| Text-to-speech with a [MatchaTTS Chinese Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)|
|[./test_tts_non_streaming_supertonic_en.js](./test_tts_non_streaming_supertonic_en.js)| Text-to-speech with a Supertonic English Model|
|[./test_tts_non_streaming_supertonic_en_async.js](./test_tts_non_streaming_supertonic_en_async.js)| Text-to-speech with a Supertonic English Model using async JS API|
|[./test_tts_non_streaming_supertonic_en_play_async.js](./test_tts_non_streaming_supertonic_en_play_async.js)| Text-to-speech with a Supertonic English Model using async JS API and live audio playback|
|[./test_tts_non_streaming_vits_piper_en.js](./test_tts_non_streaming_vits_piper_en.js)| Text-to-speech with a [piper](https://github.com/rhasspy/piper) English model|
|[./test_tts_non_streaming_vits_coqui_de.js](./test_tts_non_streaming_vits_coqui_de.js)| Text-to-speech with a [coqui](https://github.com/coqui-ai/TTS) German model|
|[./test_tts_non_streaming_vits_zh_ll.js](./test_tts_non_streaming_vits_zh_ll.js)| Text-to-speech with a Chinese model using [cppjieba](https://github.com/yanyiwu/cppjieba)|
|[./test_tts_non_streaming_vits_zh_aishell3.js](./test_tts_non_streaming_vits_zh_aishell3.js)| Text-to-speech with a Chinese TTS model|


### Speaker diarization

```bash

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

node ./test_offline_speaker_diarization.js
```

### Speech enhancement/denoising

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav

node ./test_offline_speech_enhancement_gtcrn.js
```

### Voice Activity detection (VAD)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx


# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2

node ./test_vad_microphone.js
```

### Audio tagging with zipformer

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2

node ./test_audio_tagging_zipformer.js
```

### Audio tagging with CED

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-09-14.tar.bz2
tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-09-14.tar.bz2
rm sherpa-onnx-ced-mini-audio-tagging-2024-09-14.tar.bz2

node ./test_audio_tagging_ced.js
```

### Streaming speech recognition with Zipformer transducer with homophone replacer
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

node ./test_asr_streaming_transducer_with_hr.js
```

### Streaming speech recognition with T-one CTC

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2

node ./test_asr_streaming_t_one_ctc.js
```

### Streaming speech recognition with Zipformer transducer

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

node ./test_asr_streaming_transducer.js

# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2

node ./test_asr_streaming_transducer_microphone.js
```

### Streaming speech recognition with Zipformer CTC

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2

node ./test_asr_streaming_ctc.js

# To decode with HLG.fst
node ./test_asr_streaming_ctc_hlg.js

# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2

node ./test_asr_streaming_ctc_microphone.js
node ./test_asr_streaming_ctc_hlg_microphone.js
```

### Streaming speech recognition with Paraformer

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

node ./test_asr_streaming_paraformer.js

# To run the test with a microphone, you need to install the package naudiodon2
npm install naudiodon2

node ./test_asr_streaming_paraformer_microphone.js
```

### Non-streaming speech recognition with Zipformer transducer

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2

node ./test_asr_non_streaming_transducer.js

# To run VAD + non-streaming ASR with transudcer using a microphone
npm install naudiodon2
node ./test_vad_asr_non_streaming_transducer_microphone.js
```

### Non-streaming speech recognition with FireRedAsr
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2

node ./test_asr_non_streaming_fire_red_asr.js
```

### Non-streaming speech recognition with FireRedAsr CTC
```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

node ./test_asr_non_streaming_fire_red_asr_ctc.js
node ./test_asr_non_streaming_fire_red_asr_ctc_async.js
```

### Non-streaming speech recognition with Whisper

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

node ./test_asr_non_streaming_whisper.js

# To run VAD + non-streaming ASR with Whisper using a microphone
npm install naudiodon2
node ./test_vad_asr_non_streaming_whisper_microphone.js
```

### Non-streaming speech recognition with Moonshine v2

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2

node ./test_asr_non_streaming_moonshine_v2.js
```

### Non-streaming speech recognition with Moonshine

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

node ./test_asr_non_streaming_moonshine.js

# To run VAD + non-streaming ASR with Moonshine using a microphone
npm install naudiodon2
node ./test_vad_asr_non_streaming_moonshine_microphone.js
```

### Non-streaming speech recognition with Moonshine + VAD

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test_vad_with_non_streaming_asr_moonshine.js
```

### Non-streaming speech recognition with Whisper + VAD

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test_vad_with_non_streaming_asr_whisper.js
```

### Non-streaming speech recognition with Dolphin CTC models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2

node ./test_asr_non_streaming_dolphin_ctc.js
```

### Non-streaming speech recognition with NeMo parakeet-tdt-0.6b-v2 models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2

node ./test_asr_non_streaming_nemo_parakeet_tdt_v2.js
```

### Non-streaming speech recognition with Zipformer CTC models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

node ./test_asr_non_streaming_zipformer_ctc.js

# To run VAD + non-streaming ASR with Paraformer using a microphone
npm install naudiodon2
node ./test_vad_asr_non_streaming_zipformer_ctc_microphone.js
```

### Non-streaming speech recognition with NeMo Canary models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2

node ./test_asr_non_streaming_nemo_canary.js
```

### Non-streaming speech recognition with NeMo CTC models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2

node ./test_asr_non_streaming_nemo_ctc.js

# To run VAD + non-streaming ASR with Paraformer using a microphone
npm install naudiodon2
node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js
```

### Asynchronous non-streaming speech recognition with FunASR Nano models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2

node ./test_asr_non_streaming_funasr_nano_async.js
```

### Non-streaming speech recognition with FunASR Nano models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2

node ./test_asr_non_streaming_funasr_nano.js
```

### Non-streaming speech recognition with Google MedASR CTC models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2

node ./test_asr_non_streaming_medasr_ctc.js
```

### Non-streaming speech recognition with Omnilingual ASR CTC models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2

node ./test_asr_non_streaming_omnilingual_asr_ctc.js
```

### Non-streaming speech recognition with WeNet CTC models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2

node ./test_asr_non_streaming_wenet_ctc.js
```

### Non-streaming speech recognition with Paraformer

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

node ./test_asr_non_streaming_paraformer.js

# To run VAD + non-streaming ASR with Paraformer using a microphone
npm install naudiodon2
node ./test_vad_asr_non_streaming_paraformer_microphone.js
```

### Non-streaming speech recognition with SenseVoice with homophone replacer
```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

node ./test_asr_non_streaming_sense_voice_with_hr.js
```

### Non-streaming speech recognition with SenseVoice

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

node ./test_asr_non_streaming_sense_voice.js

# To run VAD + non-streaming ASR with Paraformer using a microphone
npm install naudiodon2
node ./test_vad_asr_non_streaming_sense_voice_microphone.js
```

### Zero-shot text-to-speech with PocketTTS models (English TTS, async API)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

node ./test_tts_non_streaming_pocket_en_async.js
```

### Zero-shot text-to-speech with PocketTTS models (English TTS, async API + playback)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

npm install speaker

node ./test_tts_non_streaming_pocket_en_play_async.js
```

### Zero-shot text-to-speech with PocketTTS models (English TTS, sync API)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

node ./test_tts_non_streaming_pocket_en.js
```

### Zero-shot text-to-speech with ZipVoice models (Chinese/English TTS, async API)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

# The reference text must match the reference audio transcript.
node ./test_tts_non_streaming_zipvoice_zh_en_async.js
```

### Zero-shot text-to-speech with ZipVoice models (Chinese/English TTS, async API + playback)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

# Install the playback package once.
npm install speaker

# The reference text must match the reference audio transcript.
node ./test_tts_non_streaming_zipvoice_zh_en_play_async.js
```

### Zero-shot text-to-speech with ZipVoice models (Chinese/English TTS, sync API)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

# The reference text must match the reference audio transcript.
node ./test_tts_non_streaming_zipvoice_zh_en.js
```

### Text-to-speech with KittenTTS models (English TTS)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

node ./test_tts_non_streaming_kitten_en.js
```

### Text-to-speech with Supertonic TTS models (English TTS, sync API)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

node ./test_tts_non_streaming_supertonic_en.js
```

### Text-to-speech with Supertonic TTS models (English TTS, async API)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

node ./test_tts_non_streaming_supertonic_en_async.js
```

### Text-to-speech with Supertonic TTS models (English TTS, async API + playback)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

npm install speaker

node ./test_tts_non_streaming_supertonic_en_play_async.js
```

### Text-to-speech with Kokoro TTS models (Chinese + English TTS)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

node ./test_tts_non_streaming_kokoro_zh_en.js
```

### Text-to-speech with Kokoro TTS models (English TTS)

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

node ./test_tts_non_streaming_kokoro_en.js
```

### Text-to-speech with MatchaTTS models (English TTS)
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

node ./test_tts_non_streaming_matcha_icefall_en.js
```

### Text-to-speech with MatchaTTS models (Chinese TTS)
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

node ./test_tts_non_streaming_matcha_icefall_zh.js
```

### Text-to-speech with piper VITS models (TTS)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xvf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2

node ./test_tts_non_streaming_vits_piper_en.js
```

### Text-to-speech with piper Coqui-ai/TTS models (TTS)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xvf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2

node ./test_tts_non_streaming_vits_coqui_de.js
```

### Text-to-speech with vits Chinese models (1/2)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2

node ./test_tts_non_streaming_vits_zh_ll.js
```

### Text-to-speech with vits Chinese models (2/2)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2

node ./test_tts_non_streaming_vits_zh_aishell3.js
```

### Spoken language identification with Whisper multi-lingual models

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2

node ./test_spoken_language_identification.js

# To run VAD + spoken language identification using a microphone
npm install naudiodon2
node ./test_vad_spoken_language_identification_microphone.js
```

### Speaker identification

You can find more models at
<https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models>

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

git clone https://github.com/csukuangfj/sr-data

node ./test_speaker_identification.js
```

### Add punctuations

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2

node ./test_offline_punctuation.js
```

### Online punctuation

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2

node ./test_online_punctuation.js
```

## Keyword spotting

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2

node ./test_keyword_spotter_transducer.js

# To run keyword spotting using a microphone
npm install naudiodon2
node ./test_keyword_spotter_transducer_microphone.js
```


================================================
FILE: nodejs-addon-examples/package.json
================================================
{
  "dependencies": {
    "sherpa-onnx-node": "^1.12.31"
  }
}


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_dolphin_ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'dolphin': {
      'model':
          './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_fire_red_asr.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'fireRedAsr': {
      'encoder':
          './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx',
      'decoder':
          './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx',
    },
    'tokens': './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_fire_red_asr_ctc.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'fireRedAsrCtc': {
      'model':
          './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_fire_red_asr_ctc_async.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
//  This file shows how to use the async API to decode multiple files
const path = require('path');
const sherpa_onnx = require('sherpa-onnx-node');

/**
 * Create an OfflineRecognizer with FireRedASR CTC model asynchronously.
 */
async function createRecognizerAsync(numThreads = 2, debug = 1) {
  const config = {
    featConfig: {
      sampleRate: 16000,
      featureDim: 80,
    },
    modelConfig: {
      fireRedAsrCtc: {
        model:
            './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx',
      },
      tokens:
          './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt',
      numThreads,
      provider: 'cpu',
      debug,
    },
  };

  // Use the async C++ API to create recognizer without blocking Node.js
  return await sherpa_onnx.OfflineRecognizer.createAsync(config);
}

/**
 * Read a waveform and create a stream for decoding.
 */
function createStreamFromFile(recognizer, file) {
  const wave = sherpa_onnx.readWave(file);
  const stream = recognizer.createStream();
  stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  return stream;
}

async function main() {
  const modelDir = './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25';

  // Async recognizer creation
  const recognizer = await createRecognizerAsync(modelDir);

  const testFiles = [
    'test_wavs/0.wav',
    'test_wavs/1.wav',
    'test_wavs/2.wav',
    'test_wavs/3-sichuan.wav',
    'test_wavs/3.wav',
    'test_wavs/4-tianjin.wav',
    'test_wavs/5-henan.wav',
    'test_wavs/8k.wav',
  ].map(f => path.join(modelDir, f));

  // Create streams for each file
  const streams = testFiles.map(file => createStreamFromFile(recognizer, file));

  // Decode all streams concurrently
  const results =
      await Promise.all(streams.map(stream => recognizer.decodeAsync(stream)));

  console.log('Concurrent decode results:');
  testFiles.forEach((file, i) => {
    console.log(`${file}: ${results[i].text}`);
  });
}

main().catch(console.error);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_funasr_nano.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'funasrNano': {
      'encoderAdaptor':
          './sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx',
      'llm': './sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx',
      'embedding':
          './sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx',
      'tokenizer': './sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B',
    },
    'tokens': '',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_funasr_nano_async.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
//  This file shows how to use the async API to decode multiple files
const path = require('path');
const sherpa_onnx = require('sherpa-onnx-node');

/**
 * Create an OfflineRecognizer with FunASR Nano model asynchronously.
 */
async function createRecognizerAsync(modelDir, numThreads = 2, debug = 1) {
  const config = {
    featConfig: {
      sampleRate: 16000,
      featureDim: 80,
    },
    modelConfig: {
      funasrNano: {
        encoderAdaptor: path.join(modelDir, 'encoder_adaptor.int8.onnx'),
        llm: path.join(modelDir, 'llm.int8.onnx'),
        embedding: path.join(modelDir, 'embedding.int8.onnx'),
        tokenizer: path.join(modelDir, 'Qwen3-0.6B'),
      },
      tokens: '',
      numThreads,
      provider: 'cpu',
      debug,
    },
  };

  // Use the async C++ API to create recognizer without blocking Node.js
  return await sherpa_onnx.OfflineRecognizer.createAsync(config);
}

/**
 * Read a waveform and create a stream for decoding.
 */
function createStreamFromFile(recognizer, file) {
  const wave = sherpa_onnx.readWave(file);
  const stream = recognizer.createStream();
  stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  return stream;
}

async function main() {
  const modelDir = './sherpa-onnx-funasr-nano-int8-2025-12-30';

  // Async recognizer creation
  const recognizer = await createRecognizerAsync(modelDir);

  const testFiles = [
    'test_wavs/lyrics_en_1.wav',
    'test_wavs/lyrics_en_2.wav',
    'test_wavs/lyrics_en_3.wav',
  ].map(f => path.join(modelDir, f));

  // Create streams for each file
  const streams = testFiles.map(file => createStreamFromFile(recognizer, file));

  // Decode all streams concurrently
  const results =
      await Promise.all(streams.map(stream => recognizer.decodeAsync(stream)));

  console.log('Concurrent decode results:');
  testFiles.forEach((file, i) => {
    console.log(`${file}: ${results[i].text}`);
  });
}

main().catch(console.error);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_medasr_ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'medasr': {
      'model': './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx',
    },
    'tokens': './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_moonshine.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'moonshine': {
      'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
      'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
      'uncachedDecoder':
          './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
      'cachedDecoder':
          './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
    },
    'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename = './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_moonshine_v2.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'moonshine': {
      'encoder':
          './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort',
      'mergedDecoder':
          './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort',
    },
    'tokens': './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_nemo_canary.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'canary': {
      'encoder':
          './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx',
      'decoder':
          './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx',
      'srcLang': 'en',
      'tgtLang': 'en',
      'usePnc': 1,
    },
    'tokens':
        './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 0,
  }
};

const waveFilename =
    './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
let stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
let result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result (English)\n', result);

stream = recognizer.createStream();
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
recognizer.config.modelConfig.canary.tgtLang = 'de';
recognizer.setConfig(recognizer.config);

recognizer.decode(stream);
result = recognizer.getResult(stream);
console.log('result (German)\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_nemo_ctc.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'nemoCtc': {
      'model':
          './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx',
    },
    'tokens':
        './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_nemo_parakeet_tdt_v2.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'transducer': {
      'encoder':
          './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx',
      'decoder':
          './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx',
      'joiner': './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx',
    },
    'tokens': './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
    'modelType': 'nemo_transducer',
  }
};

const waveFilename =
    './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_omnilingual_asr_ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'omnilingual': {
      'model':
          './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_paraformer.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'paraformer': {
      'model': './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
    },
    'tokens': './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/5-henan.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_paraformer_itn.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'paraformer': {
      'model': './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
    },
    'tokens': './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  },
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  ruleFsts: './itn_zh_number.fst',
};

// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
const waveFilename = './itn-zh-number.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_sense_voice.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models


// If your path contains non-ascii characters, e.g., Chinese, you can use
// the following code
//

// let encoder = new TextEncoder();
// let tokens = encoder.encode(
//     './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.txt');
// let model = encoder.encode(
//     './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.int8.onnx');


const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'senseVoice': {
      'model':
          './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
      // 'model': model,
      'useInverseTextNormalization': 1,
    },
    'tokens': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
    // 'tokens': tokens,
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_sense_voice_with_hr.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files


// If your path contains non-ascii characters, e.g., Chinese, you can use
// the following code
//

// let encoder = new TextEncoder();
// let tokens = encoder.encode(
//     './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.txt');
// let model = encoder.encode(
//     './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.int8.onnx');


const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'senseVoice': {
      'model':
          './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
      // 'model': model,
      'useInverseTextNormalization': 1,
    },
    'tokens': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
    // 'tokens': tokens,
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  },
  'hr': {
    // Please download files from
    // https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
    'lexicon': './lexicon.txt',
    'ruleFsts': './replace.fst',
  }
};

const waveFilename = './test-hr.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_transducer.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'transducer': {
      'encoder':
          './sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx',
      'decoder':
          './sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx',
      'joiner':
          './sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx',
    },
    'tokens': './sherpa-onnx-zipformer-en-2023-04-01/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename = './sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_wenet_ctc.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'wenetCtc': {
      'model':
          './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_whisper.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
console.log(`version : ${sherpa_onnx.version}`);
console.log(`git sha1: ${sherpa_onnx.gitSha1}`);
console.log(`git date: ${sherpa_onnx.gitDate}`);

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'whisper': {
      'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
      'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
    },
    'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_non_streaming_zipformer_ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'zipformerCtc': {
      'model': './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx',
    },
    'tokens': './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OfflineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

recognizer.decode(stream);
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_streaming_ctc.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'zipformer2Ctc': {
      'model':
          './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_streaming_ctc_hlg.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'zipformer2Ctc': {
      'model':
          './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  },
  'ctcFstDecoderConfig': {
    'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
  },
};

const waveFilename =
    './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_streaming_ctc_hlg_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createOnlineRecognizer() {
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'zipformer2Ctc': {
        'model':
            './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
      },
      'tokens':
          './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    },
    'ctcFstDecoderConfig': {
      'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
    },
    'enableEndpoint': true,
    'rule1MinTrailingSilence': 2.4,
    'rule2MinTrailingSilence': 1.2,
    'rule3MinUtteranceLength': 20
  };

  return new sherpa_onnx.OnlineRecognizer(config);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
});

const display = new sherpa_onnx.Display(50);

ai.on('data', data => {
  const samples = new Float32Array(data.buffer);

  stream.acceptWaveform(
      {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text.toLowerCase();

  if (text.length > 0 && lastText != text) {
    lastText = text;
    display.print(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream);
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_asr_streaming_ctc_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createOnlineRecognizer() {
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'zipformer2Ctc': {
        'model':
            './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
      },
      'tokens':
          './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    },
    'decodingMethod': 'greedy_search',
    'maxActivePaths': 4,
    'enableEndpoint': true,
    'rule1MinTrailingSilence': 2.4,
    'rule2MinTrailingSilence': 1.2,
    'rule3MinUtteranceLength': 20
  };

  return new sherpa_onnx.OnlineRecognizer(config);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
});

const display = new sherpa_onnx.Display(50);

ai.on('data', data => {
  const samples = new Float32Array(data.buffer);

  stream.acceptWaveform(
      {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text.toLowerCase();

  if (text.length > 0 && lastText != text) {
    lastText = text;
    display.print(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream);
  }
});


ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_asr_streaming_paraformer.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'paraformer': {
      'encoder':
          './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
      'decoder':
          './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
    },
    'tokens': './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_streaming_paraformer_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createOnlineRecognizer() {
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'paraformer': {
        'encoder':
            './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
        'decoder':
            './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
      },
      'tokens': './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    },
    'decodingMethod': 'greedy_search',
    'maxActivePaths': 4,
    'enableEndpoint': true,
    'rule1MinTrailingSilence': 2.4,
    'rule2MinTrailingSilence': 1.2,
    'rule3MinUtteranceLength': 20
  };

  return new sherpa_onnx.OnlineRecognizer(config);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
});

const display = new sherpa_onnx.Display(50);

ai.on('data', data => {
  const samples = new Float32Array(data.buffer);

  stream.acceptWaveform(
      {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  let text = recognizer.getResult(stream).text.toLowerCase();

  if (isEndpoint) {
    // for online paraformer models, we have to manually padding on endpoint
    // so that the last word can be recognized
    const tailPadding =
        new Float32Array(recognizer.config.featConfig.sampleRate * 0.4);
    stream.acceptWaveform({
      samples: tailPadding,
      sampleRate: recognizer.config.featConfig.sampleRate
    });
    while (recognizer.isReady(stream)) {
      recognizer.decode(stream);
    }
    text = recognizer.getResult(stream).text.toLowerCase();
  }

  if (text.length > 0 && lastText != text) {
    lastText = text;
    display.print(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream);
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_asr_streaming_t_one_ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'modelConfig': {
    'toneCtc': {
      'model': './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx',
    },
    'tokens': './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename = './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);

const leftPadding = new Float32Array(wave.sampleRate * 0.3);
stream.acceptWaveform({samples: leftPadding, sampleRate: wave.sampleRate});

stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.6);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_streaming_transducer.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'transducer': {
      'encoder':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx',
      'decoder':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
      'joiner':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx',
    },
    'tokens':
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
};

const waveFilename =
    './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_streaming_transducer_itn.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'transducer': {
      'encoder':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx',
      'decoder':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
      'joiner':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx',
    },
    'tokens':
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  },
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  ruleFsts: './itn_zh_number.fst',
};

// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
const waveFilename = './itn-zh-number.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_asr_streaming_transducer_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createOnlineRecognizer() {
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'transducer': {
        'encoder':
            './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx',
        'decoder':
            './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
        'joiner':
            './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx',
      },
      'tokens':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    },
    'decodingMethod': 'greedy_search',
    'maxActivePaths': 4,
    'enableEndpoint': true,
    'rule1MinTrailingSilence': 2.4,
    'rule2MinTrailingSilence': 1.2,
    'rule3MinUtteranceLength': 20
  };

  return new sherpa_onnx.OnlineRecognizer(config);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
});

const display = new sherpa_onnx.Display(50);

ai.on('data', data => {
  const samples = new Float32Array(data.buffer);

  stream.acceptWaveform(
      {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text.toLowerCase();

  if (text.length > 0 && lastText != text) {
    lastText = text;
    display.print(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream);
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_asr_streaming_transducer_microphone_itn.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createOnlineRecognizer() {
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'transducer': {
        'encoder':
            './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx',
        'decoder':
            './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
        'joiner':
            './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx',
      },
      'tokens':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    },
    'decodingMethod': 'greedy_search',
    'maxActivePaths': 4,
    'enableEndpoint': true,
    'rule1MinTrailingSilence': 2.4,
    'rule2MinTrailingSilence': 1.2,
    'rule3MinUtteranceLength': 20,
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
    ruleFsts: './itn_zh_number.fst',
  };

  return new sherpa_onnx.OnlineRecognizer(config);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
});

const display = new sherpa_onnx.Display(50);

ai.on('data', data => {
  const samples = new Float32Array(data.buffer);

  stream.acceptWaveform(
      {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text.toLowerCase();

  if (text.length > 0 && lastText != text) {
    lastText = text;
    display.print(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream);
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_asr_streaming_transducer_with_hr.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'transducer': {
      'encoder':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
      'decoder':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
      'joiner':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  },
  'hr': {
    'lexicon': './lexicon.txt',
    'ruleFsts': './replace.fst',
  },
};

const waveFilename = './test-hr.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started');
let start = Date.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
const result = recognizer.getResult(stream);
let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', result);


================================================
FILE: nodejs-addon-examples/test_audio_tagging_ced.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download models files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
function createAudioTagging() {
  const config = {
    model: {
      ced: './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx',
      numThreads: 1,
      debug: true,
    },
    labels:
        './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv',
    topK: 5,
  };
  return new sherpa_onnx.AudioTagging(config);
}

const at = createAudioTagging();

const testWaves = [
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/1.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/2.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/3.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/4.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/5.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/6.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/7.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/8.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/9.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/10.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/11.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/12.wav',
  './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/13.wav',
];

console.log('------');

for (let filename of testWaves) {
  const start = Date.now();
  const stream = at.createStream();
  const wave = sherpa_onnx.readWave(filename);
  stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  const events = at.compute(stream);
  const stop = Date.now();

  const elapsed_seconds = (stop - start) / 1000;
  const duration = wave.samples.length / wave.sampleRate;
  const real_time_factor = elapsed_seconds / duration;

  console.log('input file:', filename);
  console.log('Probability\t\tName');
  for (let e of events) {
    console.log(`${e.prob.toFixed(3)}\t\t\t${e.name}`);
  }
  console.log('Wave duration', duration.toFixed(3), 'seconds');
  console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
      real_time_factor.toFixed(3));
  console.log('------');
}


================================================
FILE: nodejs-addon-examples/test_audio_tagging_zipformer.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download models files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
function createAudioTagging() {
  const config = {
    model: {
      zipformer: {
        model:
            './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx'
      },
      numThreads: 1,
      debug: true,
    },
    labels:
        './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv',
    topK: 5,
  };
  return new sherpa_onnx.AudioTagging(config);
}

const at = createAudioTagging();

const testWaves = [
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/2.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/4.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/5.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/6.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/7.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/8.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/9.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/10.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/11.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/12.wav',
  './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/13.wav',
];

console.log('------');

for (let filename of testWaves) {
  const start = Date.now();
  const stream = at.createStream();
  const wave = sherpa_onnx.readWave(filename);
  stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  const events = at.compute(stream);
  const stop = Date.now();

  const elapsed_seconds = (stop - start) / 1000;
  const duration = wave.samples.length / wave.sampleRate;
  const real_time_factor = elapsed_seconds / duration;

  console.log('input file:', filename);
  console.log('Probability\t\tName');
  for (let e of events) {
    console.log(`${e.prob.toFixed(3)}\t\t\t${e.name}`);
  }
  console.log('Wave duration', duration.toFixed(3), 'seconds');
  console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
      real_time_factor.toFixed(3));
  console.log('------');
}


================================================
FILE: nodejs-addon-examples/test_keyword_spotter_transducer.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'transducer': {
      'encoder':
          './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
      'decoder':
          './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
      'joiner':
          './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
    },
    'tokens':
        './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
    'numThreads': 1,
    'provider': 'cpu',
    'debug': 1,
  },
  'keywordsFile':
      './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt',
};

const waveFilename =
    './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';

const kws = new sherpa_onnx.KeywordSpotter(config);
console.log('Started');
let start = Date.now();
const stream = kws.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

const detectedKeywords = [];
while (kws.isReady(stream)) {
  kws.decode(stream);
  const keyword = kws.getResult(stream).keyword;
  if (keyword != '') {
    detectedKeywords.push(keyword);
  }
}
let stop = Date.now();

console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));
console.log(waveFilename);
console.log('result\n', detectedKeywords);


================================================
FILE: nodejs-addon-examples/test_keyword_spotter_transducer_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createKeywordSpotter() {
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'transducer': {
        'encoder':
            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
        'decoder':
            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
        'joiner':
            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
      },
      'tokens':
          './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    },
    'keywordsFile':
        './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt',
  };

  return new sherpa_onnx.KeywordSpotter(config);
}

const kws = createKeywordSpotter();
const stream = kws.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: kws.config.featConfig.sampleRate
  }
});

const display = new sherpa_onnx.Display(50);

ai.on('data', data => {
  const samples = new Float32Array(data.buffer);

  stream.acceptWaveform(
      {sampleRate: kws.config.featConfig.sampleRate, samples: samples});

  while (kws.isReady(stream)) {
    kws.decode(stream);
  }

  const keyword = kws.getResult(stream).keyword;
  if (keyword != '') {
    display.print(segmentIndex, keyword);
    segmentIndex += 1;
  }
});

ai.start();
console.log('Started! Please speak.');
console.log(`Only words from ${kws.config.keywordsFile} can be recognized`);


================================================
FILE: nodejs-addon-examples/test_offline_punctuation.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
function createPunctuation() {
  const config = {
    model: {
      ctTransformer:
          './sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx',
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
  };
  return new sherpa_onnx.OfflinePunctuation(config);
}

const punct = createPunctuation();
const sentences = [
  '这是一个测试你好吗How are you我很好thank you are you ok谢谢你',
  '我们都是木头人不会说话不会动',
  'The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry',
];
console.log('---');
for (let sentence of sentences) {
  const punct_text = punct.addPunct(sentence);
  console.log(`Input: ${sentence}`);
  console.log(`Output: ${punct_text}`);
  console.log('---');
}


================================================
FILE: nodejs-addon-examples/test_offline_speaker_diarization.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// clang-format off
/* Please use the following commands to download files
   used in this script

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

 */
// clang-format on

const config = {
  segmentation: {
    pyannote: {
      model: './sherpa-onnx-pyannote-segmentation-3-0/model.onnx',
    },
  },
  embedding: {
    model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
  },
  clustering: {
    // since we know that the test wave file
    // ./0-four-speakers-zh.wav contains 4 speakers, we use 4 for numClusters
    // here. if you don't have such information, please set numClusters to -1
    numClusters: 4,

    // If numClusters is not -1, then threshold is ignored.
    //
    // A larger threshold leads to fewer clusters, i.e., fewer speakers
    // A smaller threshold leads to more clusters, i.e., more speakers
    // You need to tune it by yourself.
    threshold: 0.5,
  },

  // If a segment is shorter than minDurationOn, we discard it
  minDurationOn: 0.2,  // in seconds

  // If the gap between two segments is less than minDurationOff, then we
  // merge these two segments into a single one
  minDurationOff: 0.5,  // in seconds
};

const waveFilename = './0-four-speakers-zh.wav';

const sd = new sherpa_onnx.OfflineSpeakerDiarization(config);
console.log('Started');

const wave = sherpa_onnx.readWave(waveFilename);
if (sd.sampleRate != wave.sampleRate) {
  throw new Error(
      `Expected sample rate: ${sd.sampleRate}, given: ${wave.sampleRate}`);
}

const segments = sd.process(wave.samples);
console.log(segments);


================================================
FILE: nodejs-addon-examples/test_offline_speech_enhancement_dpdfnet.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

function createOfflineSpeechDenoiser() {
  // please download models from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  const config = {
    model: {
      dpdfnet: {model: './dpdfnet_baseline.onnx'},
      debug: true,
      numThreads: 1,
    },
  };

  return new sherpa_onnx.OfflineSpeechDenoiser(config);
}

const sd = createOfflineSpeechDenoiser();

const waveFilename = './inp_16k.wav';
const wave = sherpa_onnx.readWave(waveFilename);
const denoised = sd.run({
  samples: wave.samples,
  sampleRate: wave.sampleRate,
  enableExternalBuffer: true
});
sherpa_onnx.writeWave(
    './enhanced-dpdfnet-16k.wav',
    {samples: denoised.samples, sampleRate: denoised.sampleRate});

console.log('Saved to ./enhanced-dpdfnet-16k.wav');


================================================
FILE: nodejs-addon-examples/test_offline_speech_enhancement_gtcrn.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

function createOfflineSpeechDenoiser() {
  // please download models from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  const config = {
    model: {
      gtcrn: {model: './gtcrn_simple.onnx'},
      debug: true,
      numThreads: 1,
    },
  };

  return new sherpa_onnx.OfflineSpeechDenoiser(config);
}

const sd = createOfflineSpeechDenoiser();

const waveFilename = './inp_16k.wav';
const wave = sherpa_onnx.readWave(waveFilename);
const denoised = sd.run({
  samples: wave.samples,
  sampleRate: wave.sampleRate,
  enableExternalBuffer: true
});
sherpa_onnx.writeWave(
    './enhanced-16k.wav',
    {samples: denoised.samples, sampleRate: denoised.sampleRate});

console.log(`Saved to ./enhanced-16k.wav`);


================================================
FILE: nodejs-addon-examples/test_online_punctuation.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
function createPunctuation() {
  const config = {
    model: {
      cnnBilstm:
          './sherpa-onnx-online-punct-en-2024-08-06/model.onnx',
      bpeVocab:
          './sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab',
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
  };
  return new sherpa_onnx.OnlinePunctuation(config);
}

const punct = createPunctuation();
const sentences = [
  'How are you i am fine thank you',
  'The african blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry',
];
console.log('---');
for (let sentence of sentences) {
  const punct_text = punct.addPunct(sentence);
  console.log(`Input: ${sentence}`);
  console.log(`Output: ${punct_text}`);
  console.log('---');
}


================================================
FILE: nodejs-addon-examples/test_online_speech_enhancement_dpdfnet.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

function createOnlineSpeechDenoiser() {
  const config = {
    model: {
      dpdfnet: {model: './dpdfnet_baseline.onnx'},
      debug: true,
      numThreads: 1,
    },
  };

  return new sherpa_onnx.OnlineSpeechDenoiser(config);
}

const sd = createOnlineSpeechDenoiser();
const wave = sherpa_onnx.readWave('./inp_16k.wav');
const output = [];
const frameShift = sd.frameShiftInSamples;

for (let start = 0; start < wave.samples.length; start += frameShift) {
  const end = Math.min(start + frameShift, wave.samples.length);
  const chunk = wave.samples.slice(start, end);
  const denoised = sd.run({
    samples: chunk,
    sampleRate: wave.sampleRate,
    enableExternalBuffer: true
  });
  output.push(...denoised.samples);
}

const tail = sd.flush(true);
output.push(...tail.samples);

sherpa_onnx.writeWave(
    './enhanced-online-dpdfnet.wav',
    {samples: Float32Array.from(output), sampleRate: sd.sampleRate});

console.log('Saved to ./enhanced-online-dpdfnet.wav');


================================================
FILE: nodejs-addon-examples/test_online_speech_enhancement_gtcrn.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

function createOnlineSpeechDenoiser() {
  const config = {
    model: {
      gtcrn: {model: './gtcrn_simple.onnx'},
      debug: true,
      numThreads: 1,
    },
  };

  return new sherpa_onnx.OnlineSpeechDenoiser(config);
}

const sd = createOnlineSpeechDenoiser();
const wave = sherpa_onnx.readWave('./inp_16k.wav');
const output = [];
const frameShift = sd.frameShiftInSamples;

for (let start = 0; start < wave.samples.length; start += frameShift) {
  const end = Math.min(start + frameShift, wave.samples.length);
  const chunk = wave.samples.slice(start, end);
  const denoised = sd.run({
    samples: chunk,
    sampleRate: wave.sampleRate,
    enableExternalBuffer: true
  });
  output.push(...denoised.samples);
}

const tail = sd.flush(true);
output.push(...tail.samples);

sherpa_onnx.writeWave(
    './enhanced-online-gtcrn.wav',
    {samples: Float32Array.from(output), sampleRate: sd.sampleRate});

console.log('Saved to ./enhanced-online-gtcrn.wav');


================================================
FILE: nodejs-addon-examples/test_speaker_identification.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const assert = require('node:assert');

// Please download models files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
function createSpeakerEmbeddingExtractor() {
  const config = {
    model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
    numThreads: 1,
    debug: true,
  };
  return new sherpa_onnx.SpeakerEmbeddingExtractor(config);
}

function computeEmbedding(extractor, filename) {
  const stream = extractor.createStream();
  const wave = sherpa_onnx.readWave(filename);
  stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  return extractor.compute(stream);
}

const extractor = createSpeakerEmbeddingExtractor();
const manager = new sherpa_onnx.SpeakerEmbeddingManager(extractor.dim);

// Please download test files from
// https://github.com/csukuangfj/sr-data
const spk1Files = [
  './sr-data/enroll/fangjun-sr-1.wav',
  './sr-data/enroll/fangjun-sr-2.wav',
  './sr-data/enroll/fangjun-sr-3.wav',
];

let spk1Vec = [];
for (let f of spk1Files) {
  spk1Vec.push(computeEmbedding(extractor, f));
}

const spk2Files = [
  './sr-data/enroll/leijun-sr-1.wav',
  './sr-data/enroll/leijun-sr-2.wav',
];

let spk2Vec = [];
for (let f of spk2Files) {
  spk2Vec.push(computeEmbedding(extractor, f));
}

let ok = manager.addMulti({name: 'fangjun', v: spk1Vec});
assert.equal(ok, true);

ok = manager.addMulti({name: 'leijun', v: spk2Vec});
assert.equal(ok, true);

assert.equal(manager.getNumSpeakers(), 2);

assert.equal(manager.contains('fangjun'), true);
assert.equal(manager.contains('leijun'), true);

console.log('---All speakers---');

console.log(manager.getAllSpeakerNames());
console.log('------------');

const testFiles = [
  './sr-data/test/fangjun-test-sr-1.wav',
  './sr-data/test/leijun-test-sr-1.wav',
  './sr-data/test/liudehua-test-sr-1.wav',
];

const threshold = 0.6;

for (let f of testFiles) {
  const embedding = computeEmbedding(extractor, f);

  let name = manager.search({v: embedding, threshold: threshold});
  if (name == '') {
    name = '<Unknown>';
  }
  console.log(`${f}: ${name}`);
}


ok = manager.verify({
  name: 'fangjun',
  v: computeEmbedding(extractor, testFiles[0]),
  threshold: threshold
});

assert.equal(ok, true);

ok = manager.remove('fangjun');
assert.equal(ok, true);

ok = manager.verify({
  name: 'fangjun',
  v: computeEmbedding(extractor, testFiles[0]),
  threshold: threshold
});
assert.equal(ok, false);

assert.equal(manager.getNumSpeakers(), 1);


================================================
FILE: nodejs-addon-examples/test_spoken_language_identification.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

// Please download whisper multi-lingual models from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
function createSpokenLanguageID() {
  const config = {
    whisper: {
      encoder: './sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx',
      decoder: './sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx',
    },
    debug: true,
    numThreads: 1,
    provider: 'cpu',
  };
  return new sherpa_onnx.SpokenLanguageIdentification(config);
}

const slid = createSpokenLanguageID();

const testWaves = [
  './spoken-language-identification-test-wavs/ar-arabic.wav',
  './spoken-language-identification-test-wavs/de-german.wav',
  './spoken-language-identification-test-wavs/en-english.wav',
  './spoken-language-identification-test-wavs/fr-french.wav',
  './spoken-language-identification-test-wavs/pt-portuguese.wav',
  './spoken-language-identification-test-wavs/es-spanish.wav',
  './spoken-language-identification-test-wavs/zh-chinese.wav',
];

const display = new Intl.DisplayNames(['en'], {type: 'language'});

for (let f of testWaves) {
  const stream = slid.createStream();

  const wave = sherpa_onnx.readWave(f);
  stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

  const lang = slid.compute(stream);
  console.log(f.split('/')[2], lang, display.of(lang));
}


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_kitten_en.js
================================================
// Copyright (c)  2025  Xiaomi Corporation

const sherpa_onnx = require('sherpa-onnx-node');

/**
 * Create an offline TTS instance asynchronously using the Kitten model.
 *
 * Model files can be downloaded from:
 * https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html
 */
async function createOfflineTtsAsync() {
  const config = {
    model: {
      kitten: {
        model: './kitten-nano-en-v0_1-fp16/model.fp16.onnx',
        voices: './kitten-nano-en-v0_1-fp16/voices.bin',
        tokens: './kitten-nano-en-v0_1-fp16/tokens.txt',
        dataDir: './kitten-nano-en-v0_1-fp16/espeak-ng-data',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };

  // Use the async factory (non-blocking)
  return await sherpa_onnx.OfflineTts.createAsync(config);
}

async function main() {
  // Asynchronously create the OfflineTts instance
  const tts = await createOfflineTtsAsync();

  const text =
      'Today as always, men fall into two groups: slaves and free men. ' +
      'Whoever does not have two-thirds of his day for himself, is a slave, ' +
      'whatever he may be: a statesman, a businessman, an official, or a scholar.';

  console.log('Number of speakers:', tts.numSpeakers);
  console.log('Sample rate:', tts.sampleRate);

  const start = Date.now();
  const generationConfig = new sherpa_onnx.GenerationConfig({
    sid: 6,
    speed: 1.0,
    silenceScale: 0.2,
  });

  // Asynchronous generation with progress reporting
  const audio = await tts.generateAsync({
    text,
    generationConfig,

    // Progress callback receives audio chunks
    onProgress({samples, progress}) {
      // samples is Float32Array for this chunk
      process.stdout.write(`\rGenerating... ${
          (progress * 100).toFixed(1)}% (chunk length: ${samples.length})`);

      // Return 0 or false to cancel, any other value to continue
      return true;
    },
  });

  console.log('\nGeneration finished.');

  const stop = Date.now();
  const elapsedSeconds = (stop - start) / 1000;
  const durationSeconds = audio.samples.length / audio.sampleRate;
  const realTimeFactor = elapsedSeconds / durationSeconds;

  console.log('Wave duration:', durationSeconds.toFixed(3), 'seconds');
  console.log('Elapsed time:', elapsedSeconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${elapsedSeconds.toFixed(3)} / ${durationSeconds.toFixed(3)} =`,
      realTimeFactor.toFixed(3));

  const filename = 'test-kitten-en-6.wav';
  sherpa_onnx.writeWave(filename, {
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  });

  console.log(`Saved to ${filename}`);
}

// Run the demo
main().catch((err) => {
  console.error('TTS failed:', err);
  process.exit(1);
});


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_kokoro_en.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
// to download model files
function createOfflineTts() {
  const config = {
    model: {
      kokoro: {
        model: './kokoro-en-v0_19/model.onnx',
        voices: './kokoro-en-v0_19/voices.bin',
        tokens: './kokoro-en-v0_19/tokens.txt',
        dataDir: './kokoro-en-v0_19/espeak-ng-data',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 6,
  speed: 1.0,
  silenceScale: 0.2,
});


let start = Date.now();
const audio = tts.generate({text, generationConfig});
let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-kokoro-en-6.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_kokoro_zh_en.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
// to download model files
function createOfflineTts() {
  const config = {
    model: {
      kokoro: {
        model: './kokoro-multi-lang-v1_0/model.onnx',
        voices: './kokoro-multi-lang-v1_0/voices.bin',
        tokens: './kokoro-multi-lang-v1_0/tokens.txt',
        dataDir: './kokoro-multi-lang-v1_0/espeak-ng-data',
        lexicon:
            './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 48,
  speed: 1.0,
  silenceScale: 0.2,
});

let start = Date.now();
const audio = tts.generate({text, generationConfig});
let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-kokoro-zh-en-48.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_en.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// to download model files
function createOfflineTts() {
  const config = {
    model: {
      matcha: {
        acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
        vocoder: './vocos-22khz-univ.onnx',
        tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
        dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 0,
  speed: 1.0,
  silenceScale: 0.2,
});


let start = Date.now();
const audio = tts.generate({text, generationConfig});
let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-matcha-en.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_zh.js
================================================
// Copyright (c)  2025  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// to download model files
function createOfflineTts() {
  const config = {
    model: {
      matcha: {
        acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
        vocoder: './vocos-22khz-univ.onnx',
        lexicon: './matcha-icefall-zh-baker/lexicon.txt',
        tokens: './matcha-icefall-zh-baker/tokens.txt',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
    ruleFsts:
        './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 0,
  speed: 1.0,
  silenceScale: 0.2,
});


let start = Date.now();
const audio = tts.generate({text, generationConfig});
let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-matcha-zh.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_pocket_en.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
// to download model files
function createOfflineTts() {
  const config = {
    model: {
      pocket: {
        lmFlow: './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx',
        lmMain: './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx',
        encoder: './sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx',
        decoder: './sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx',
        textConditioner:
            './sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx',
        vocabJson: './sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json',
        tokenScoresJson:
            './sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json',
        voiceEmbeddingCacheCapacity: 50,
      },
      debug: true,
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const referenceAudioFilename =
    './sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav';
const referenceWave = sherpa_onnx.readWave(referenceAudioFilename);

const generationConfig = new sherpa_onnx.GenerationConfig({
  speed: 1.0,
  referenceAudio: referenceWave.samples,
  referenceSampleRate: referenceWave.sampleRate,
  numSteps: 5,
  extra: {max_reference_audio_len: 12, seed: 42}
});


let start = Date.now();
const audio = tts.generate({text, generationConfig});

let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-pocket-bria.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_pocket_en_async.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

async function createOfflineTts() {
  const config = {
    model: {
      pocket: {
        lmFlow: './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx',
        lmMain: './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx',
        encoder: './sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx',
        decoder: './sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx',
        textConditioner:
            './sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx',
        vocabJson: './sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json',
        tokenScoresJson:
            './sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json',
        voiceEmbeddingCacheCapacity: 50,
      },
      debug: false,  // set to true to see verbose logs
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };

  return await sherpa_onnx.OfflineTts.createAsync(config);
}

/**
 * Async function to generate audio with progress callback
 * @param {sherpa_onnx.OfflineTts} tts
 * @param {string} text
 */
async function generateAudioAsync(tts, text) {
  const referenceAudioFilename =
      './sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav';
  const referenceWave = sherpa_onnx.readWave(referenceAudioFilename);

  const generationConfig = new sherpa_onnx.GenerationConfig({
    speed: 1.0,
    referenceAudio: referenceWave.samples,
    referenceSampleRate: referenceWave.sampleRate,
    numSteps: 5,
    extra: {max_reference_audio_len: 12, seed: 42},
  });

  console.log('Starting generation...');

  const audio = await tts.generateAsync({
    text,
    enableExternalBuffer: true,
    generationConfig,
    onProgress: ({samples, progress}) => {
      // Print progress percentage and number of samples generated
      process.stdout.write(
          `Progress: ${(progress * 100).toFixed(1)}%, ` +
          `Samples: ${samples.length}\r`);

      // Return anything other than 0/false to continue generation
      return 1;
    },
  });

  console.log('\nGeneration complete!');
  return audio;
}

/**
 * Main entry
 */
async function main() {
  console.log('Creating OfflineTts...');
  const tts = await createOfflineTts();
  console.log('OfflineTts created!');

  const text =
      'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';


  const start = Date.now();
  const audio = await generateAudioAsync(tts, text);
  const stop = Date.now();

  const elapsed_seconds = (stop - start) / 1000;
  const duration = audio.samples.length / audio.sampleRate;
  const real_time_factor = elapsed_seconds / duration;

  console.log('Wave duration', duration.toFixed(3), 'seconds');
  console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
      real_time_factor.toFixed(3));

  const filename = 'test-pocket-bria-async.wav';
  sherpa_onnx.writeWave(filename, {
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  });
  console.log(`Saved to ${filename}`);
}

// Run the async main
main().catch((err) => {
  console.error('Error:', err);
});


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_pocket_en_play_async.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// npm install speaker
//
const Speaker = require('speaker');
const sherpa_onnx = require('sherpa-onnx-node');

async function createOfflineTts() {
  const config = {
    model: {
      pocket: {
        lmFlow: './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx',
        lmMain: './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx',
        encoder: './sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx',
        decoder: './sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx',
        textConditioner:
            './sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx',
        vocabJson: './sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json',
        tokenScoresJson:
            './sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json',
        voiceEmbeddingCacheCapacity: 50,
      },
      debug: false,  // set to true to see verbose logs
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };

  return await sherpa_onnx.OfflineTts.createAsync(config);
}

function createSpeaker(sampleRate) {
  return new Speaker({
    channels: 1,
    bitDepth: 16,
    sampleRate: sampleRate,
    signed: true,
  });
}

function float32ToInt16Buffer(samples) {
  const buffer = Buffer.alloc(samples.length * 2);

  for (let i = 0; i < samples.length; ++i) {
    const s = Math.max(-1, Math.min(1, samples[i]));
    const v = s < 0 ? s * 0x8000 : s * 0x7fff;
    buffer.writeInt16LE(Math.round(v), i * 2);
  }

  return buffer;
}

function waitForEvent(emitter, eventName) {
  return new Promise((resolve, reject) => {
    emitter.once(eventName, resolve);
    emitter.once('error', reject);
  });
}

/**
 * @param {sherpa_onnx.OfflineTts} tts
 * @param {string} text
 */
async function generateAudioAsync(tts, text) {
  const referenceAudioFilename =
      './sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav';
  const referenceWave = sherpa_onnx.readWave(referenceAudioFilename);

  const generationConfig = new sherpa_onnx.GenerationConfig({
    speed: 1.0,
    referenceAudio: referenceWave.samples,
    referenceSampleRate: referenceWave.sampleRate,
    numSteps: 5,
    extra: {max_reference_audio_len: 12, seed: 42},
  });

  const speaker = createSpeaker(tts.sampleRate);
  const start = Date.now();

  console.log('Starting generation and playback...');

  const audio = await tts.generateAsync({
    text,
    enableExternalBuffer: true,
    generationConfig,
    onProgress: ({samples, progress}) => {
      process.stdout.write(
          `Progress: ${(progress * 100).toFixed(1)}%, ` +
          `Chunk samples: ${samples.length}\r`);
      speaker.write(float32ToInt16Buffer(samples));
      return 1;
    },
  });

  const generationStop = Date.now();
  speaker.end();
  await waitForEvent(speaker, 'close');
  const playbackStop = Date.now();

  console.log('\nGeneration and playback complete!');
  return {
    audio,
    generationElapsedSeconds: (generationStop - start) / 1000,
    playbackElapsedSeconds: (playbackStop - start) / 1000,
  };
}

async function main() {
  console.log('Creating OfflineTts...');
  const tts = await createOfflineTts();
  console.log('OfflineTts created!');

  const text =
      'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

  const {audio, generationElapsedSeconds, playbackElapsedSeconds} =
      await generateAudioAsync(tts, text);
  const duration = audio.samples.length / audio.sampleRate;
  const real_time_factor = generationElapsedSeconds / duration;

  console.log('Wave duration', duration.toFixed(3), 'seconds');
  console.log(
      'Generation elapsed', generationElapsedSeconds.toFixed(3), 'seconds');
  console.log(
      'Playback drained in', playbackElapsedSeconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${generationElapsedSeconds.toFixed(3)}/${duration.toFixed(3)} =`,
      real_time_factor.toFixed(3));

  const filename = 'test-pocket-bria-play-async.wav';
  sherpa_onnx.writeWave(filename, {
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  });
  console.log(`Saved to ${filename}`);
}

main().catch((err) => {
  console.error('Error:', err);
});


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_supertonic_en.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html
// to download model files
function createOfflineTts() {
  const config = {
    model: {
      supertonic: {
        durationPredictor:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx',
        textEncoder:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx',
        vectorEstimator:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx',
        vocoder:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx',
        ttsJson: './sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json',
        unicodeIndexer:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin',
        voiceStyle: './sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin',
      },
      debug: true,
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 6,
  speed: 1.25,
  numSteps: 5,
  extra: {lang: 'en'},
});

let start = Date.now();
const audio = tts.generate({text, generationConfig});

let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-supertonic-en.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_supertonic_en_async.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

async function createOfflineTts() {
  const config = {
    model: {
      supertonic: {
        durationPredictor:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx',
        textEncoder:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx',
        vectorEstimator:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx',
        vocoder:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx',
        ttsJson: './sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json',
        unicodeIndexer:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin',
        voiceStyle: './sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin',
      },
      debug: false,  // set to true to see verbose logs
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };

  return await sherpa_onnx.OfflineTts.createAsync(config);
}

/**
 * Async function to generate audio with progress callback
 * @param {sherpa_onnx.OfflineTts} tts
 * @param {string} text
 */
async function generateAudioAsync(tts, text) {
  const generationConfig = new sherpa_onnx.GenerationConfig({
    sid: 6,
    speed: 1.25,
    numSteps: 5,
    extra: {lang: 'en'},
  });

  console.log('Starting generation...');

  const audio = await tts.generateAsync({
    text,
    enableExternalBuffer: true,
    generationConfig,
    onProgress: ({samples, progress}) => {
      // Print progress percentage and number of samples generated
      process.stdout.write(
          `Progress: ${(progress * 100).toFixed(1)}%, ` +
          `Samples: ${samples.length}\r`);

      // Return anything other than 0/false to continue generation
      return 1;
    },
  });

  console.log('\nGeneration complete!');
  return audio;
}

/**
 * Main entry
 */
async function main() {
  console.log('Creating OfflineTts...');
  const tts = await createOfflineTts();
  console.log('OfflineTts created!');

  const text =
      'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';


  const start = Date.now();
  const audio = await generateAudioAsync(tts, text);
  const stop = Date.now();

  const elapsed_seconds = (stop - start) / 1000;
  const duration = audio.samples.length / audio.sampleRate;
  const real_time_factor = elapsed_seconds / duration;

  console.log('Wave duration', duration.toFixed(3), 'seconds');
  console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
      real_time_factor.toFixed(3));

  const filename = 'test-supertonic-en-async.wav';
  sherpa_onnx.writeWave(filename, {
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  });
  console.log(`Saved to ${filename}`);
}

// Run the async main
main().catch((err) => {
  console.error('Error:', err);
});


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_supertonic_en_play_async.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// npm install speaker
//
const Speaker = require('speaker');
const sherpa_onnx = require('sherpa-onnx-node');

async function createOfflineTts() {
  const config = {
    model: {
      supertonic: {
        durationPredictor:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx',
        textEncoder:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx',
        vectorEstimator:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx',
        vocoder:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx',
        ttsJson: './sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json',
        unicodeIndexer:
            './sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin',
        voiceStyle: './sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin',
      },
      debug: false,  // set to true to see verbose logs
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };

  return await sherpa_onnx.OfflineTts.createAsync(config);
}

function createSpeaker(sampleRate) {
  return new Speaker({
    channels: 1,
    bitDepth: 16,
    sampleRate: sampleRate,
    signed: true,
  });
}

function float32ToInt16Buffer(samples) {
  const buffer = Buffer.alloc(samples.length * 2);

  for (let i = 0; i < samples.length; ++i) {
    const s = Math.max(-1, Math.min(1, samples[i]));
    const v = s < 0 ? s * 0x8000 : s * 0x7fff;
    buffer.writeInt16LE(Math.round(v), i * 2);
  }

  return buffer;
}

function waitForEvent(emitter, eventName) {
  return new Promise((resolve, reject) => {
    emitter.once(eventName, resolve);
    emitter.once('error', reject);
  });
}

/**
 * @param {sherpa_onnx.OfflineTts} tts
 * @param {string} text
 */
async function generateAudioAsync(tts, text) {
  const generationConfig = new sherpa_onnx.GenerationConfig({
    sid: 6,
    speed: 1.25,
    numSteps: 5,
    extra: {lang: 'en'},
  });

  const speaker = createSpeaker(tts.sampleRate);
  const start = Date.now();

  console.log('Starting generation and playback...');

  const audio = await tts.generateAsync({
    text,
    enableExternalBuffer: true,
    generationConfig,
    onProgress: ({samples, progress}) => {
      process.stdout.write(
          `Progress: ${(progress * 100).toFixed(1)}%, ` +
          `Chunk samples: ${samples.length}\r`);
      speaker.write(float32ToInt16Buffer(samples));
      return 1;
    },
  });

  const generationStop = Date.now();
  speaker.end();
  await waitForEvent(speaker, 'close');
  const playbackStop = Date.now();

  console.log('\nGeneration and playback complete!');
  return {
    audio,
    generationElapsedSeconds: (generationStop - start) / 1000,
    playbackElapsedSeconds: (playbackStop - start) / 1000,
  };
}

async function main() {
  console.log('Creating OfflineTts...');
  const tts = await createOfflineTts();
  console.log('OfflineTts created!');

  const text =
      'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

  const {audio, generationElapsedSeconds, playbackElapsedSeconds} =
      await generateAudioAsync(tts, text);
  const duration = audio.samples.length / audio.sampleRate;
  const real_time_factor = generationElapsedSeconds / duration;

  console.log('Wave duration', duration.toFixed(3), 'seconds');
  console.log(
      'Generation elapsed', generationElapsedSeconds.toFixed(3), 'seconds');
  console.log(
      'Playback drained in', playbackElapsedSeconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${generationElapsedSeconds.toFixed(3)}/${duration.toFixed(3)} =`,
      real_time_factor.toFixed(3));

  const filename = 'test-supertonic-en-play-async.wav';
  sherpa_onnx.writeWave(filename, {
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  });
  console.log(`Saved to ${filename}`);
}

main().catch((err) => {
  console.error('Error:', err);
});


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
  const config = {
    model: {
      vits: {
        model: './vits-coqui-de-css10/model.onnx',
        tokens: './vits-coqui-de-css10/tokens.txt',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text = 'Alles hat ein Ende, nur die Wurst hat zwei.';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 0,
  speed: 1.0,
  silenceScale: 0.2,
});

let start = Date.now();
const audio = tts.generate({
  text: text,
  generationConfig,
  enableExternalBuffer: true,
});
let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-coqui-de.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_vits_piper_en.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
  const config = {
    model: {
      vits: {
        model: './vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx',
        tokens: './vits-piper-en_GB-cori-medium/tokens.txt',
        dataDir: './vits-piper-en_GB-cori-medium/espeak-ng-data',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 0,
  speed: 1.0,
  silenceScale: 0.2,
});


let start = Date.now();
const audio = tts.generate({text: text, generationConfig});
let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-piper-en.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_vits_zh_aishell3.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
  const config = {
    model: {
      vits: {
        model: './vits-icefall-zh-aishell3/model.onnx',
        tokens: './vits-icefall-zh-aishell3/tokens.txt',
        lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
    ruleFsts:
        './vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
    ruleFars: './vits-icefall-zh-aishell3/rule.far',
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    '他在长沙出生，长白山长大，去过长江，现在他是一个银行的行长，主管行政工作。有困难，请拨110，或者13020240513。今天是2024年5月13号, 他上个月的工资是12345块钱。';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 88,
  speed: 1.0,
  silenceScale: 0.2,
});

let start = Date.now();
const audio = tts.generate({text: text, generationConfig});
let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-zh-aishell3.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_vits_zh_ll.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
  const config = {
    model: {
      vits: {
        model: './sherpa-onnx-vits-zh-ll/model.onnx',
        tokens: './sherpa-onnx-vits-zh-ll/tokens.txt',
        lexicon: './sherpa-onnx-vits-zh-ll/lexicon.txt',
      },
      debug: true,
      numThreads: 1,
      provider: 'cpu',
    },
    maxNumSentences: 1,
    ruleFsts:
        './sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/number.fst',
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
    '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。2024年5月13号，拨打110或者18920240513。123456块钱。';

const generationConfig = new sherpa_onnx.GenerationConfig({
  sid: 2,
  speed: 1.0,
  silenceScale: 0.2,
});

let start = Date.now();
const audio = tts.generate({text: text, generationConfig});
let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-zh-ll.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_zipvoice_zh_en.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
// to download model files
function createOfflineTts() {
  const config = {
    model: {
      zipvoice: {
        tokens: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt',
        encoder:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx',
        decoder:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx',
        vocoder: './vocos_24khz.onnx',
        dataDir:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data',
        lexicon: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt',
      },
      debug: true,
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };
  return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();
const text =
    '小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.';
const referenceText =
    '那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.';
const referenceAudioFilename =
    './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav';
const referenceWave = sherpa_onnx.readWave(referenceAudioFilename);

const generationConfig = new sherpa_onnx.GenerationConfig({
  speed: 1.0,
  referenceAudio: referenceWave.samples,
  referenceSampleRate: referenceWave.sampleRate,
  referenceText,
  numSteps: 4,
  extra: {min_char_in_sentence: 10},
});

let start = Date.now();
const audio = tts.generate({text, generationConfig});

let stop = Date.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

const filename = 'test-zipvoice-zh-en.wav';
sherpa_onnx.writeWave(
    filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_zipvoice_zh_en_async.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

async function createOfflineTts() {
  const config = {
    model: {
      zipvoice: {
        tokens: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt',
        encoder:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx',
        decoder:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx',
        vocoder: './vocos_24khz.onnx',
        dataDir:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data',
        lexicon: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt',
      },
      debug: false,  // set to true to see verbose logs
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };

  return await sherpa_onnx.OfflineTts.createAsync(config);
}

/**
 * @param {sherpa_onnx.OfflineTts} tts
 * @param {string} text
 */
async function generateAudioAsync(tts, text) {
  const referenceText =
      '那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.';
  const referenceAudioFilename =
      './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav';
  const referenceWave = sherpa_onnx.readWave(referenceAudioFilename);

  const generationConfig = new sherpa_onnx.GenerationConfig({
    speed: 1.0,
    referenceAudio: referenceWave.samples,
    referenceSampleRate: referenceWave.sampleRate,
    referenceText,
    numSteps: 4,
    extra: {min_char_in_sentence: 10},
  });

  console.log('Starting generation...');

  const audio = await tts.generateAsync({
    text,
    enableExternalBuffer: true,
    generationConfig,
    onProgress: ({samples, progress}) => {
      process.stdout.write(
          `Progress: ${(progress * 100).toFixed(1)}%, ` +
          `Samples: ${samples.length}\r`);
      return 1;
    },
  });

  console.log('\nGeneration complete!');
  return audio;
}

async function main() {
  console.log('Creating OfflineTts...');
  const tts = await createOfflineTts();
  console.log('OfflineTts created!');

  const text =
      '小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.';

  const start = Date.now();
  const audio = await generateAudioAsync(tts, text);
  const stop = Date.now();

  const elapsed_seconds = (stop - start) / 1000;
  const duration = audio.samples.length / audio.sampleRate;
  const real_time_factor = elapsed_seconds / duration;

  console.log('Wave duration', duration.toFixed(3), 'seconds');
  console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
      real_time_factor.toFixed(3));

  const filename = 'test-zipvoice-zh-en-async.wav';
  sherpa_onnx.writeWave(filename, {
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  });
  console.log(`Saved to ${filename}`);
}

main().catch((err) => {
  console.error('Error:', err);
});


================================================
FILE: nodejs-addon-examples/test_tts_non_streaming_zipvoice_zh_en_play_async.js
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// npm install speaker
//
const Speaker = require('speaker');
const sherpa_onnx = require('sherpa-onnx-node');

async function createOfflineTts() {
  const config = {
    model: {
      zipvoice: {
        tokens: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt',
        encoder:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx',
        decoder:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx',
        vocoder: './vocos_24khz.onnx',
        dataDir:
            './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data',
        lexicon: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt',
      },
      debug: false,  // set to true to see verbose logs
      numThreads: 2,
      provider: 'cpu',
    },
    maxNumSentences: 1,
  };

  return await sherpa_onnx.OfflineTts.createAsync(config);
}

function createSpeaker(sampleRate) {
  return new Speaker({
    channels: 1,
    bitDepth: 16,
    sampleRate: sampleRate,
    signed: true,
  });
}

function float32ToInt16Buffer(samples) {
  const buffer = Buffer.alloc(samples.length * 2);

  for (let i = 0; i < samples.length; ++i) {
    const s = Math.max(-1, Math.min(1, samples[i]));
    const v = s < 0 ? s * 0x8000 : s * 0x7fff;
    buffer.writeInt16LE(Math.round(v), i * 2);
  }

  return buffer;
}

function waitForEvent(emitter, eventName) {
  return new Promise((resolve, reject) => {
    emitter.once(eventName, resolve);
    emitter.once('error', reject);
  });
}

/**
 * @param {sherpa_onnx.OfflineTts} tts
 * @param {string} text
 */
async function generateAudioAsync(tts, text) {
  const referenceText =
      '那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.';
  const referenceAudioFilename =
      './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav';
  const referenceWave = sherpa_onnx.readWave(referenceAudioFilename);

  const generationConfig = new sherpa_onnx.GenerationConfig({
    speed: 1.0,
    referenceAudio: referenceWave.samples,
    referenceSampleRate: referenceWave.sampleRate,
    referenceText,
    numSteps: 4,
    extra: {min_char_in_sentence: 10},
  });

  const speaker = createSpeaker(tts.sampleRate);
  const start = Date.now();

  console.log('Starting generation and playback...');

  const audio = await tts.generateAsync({
    text,
    enableExternalBuffer: true,
    generationConfig,
    onProgress: ({samples, progress}) => {
      process.stdout.write(
          `Progress: ${(progress * 100).toFixed(1)}%, ` +
          `Chunk samples: ${samples.length}\r`);
      speaker.write(float32ToInt16Buffer(samples));
      return 1;
    },
  });

  const generationStop = Date.now();
  speaker.end();
  await waitForEvent(speaker, 'close');
  const playbackStop = Date.now();

  console.log('\nGeneration and playback complete!');
  return {
    audio,
    generationElapsedSeconds: (generationStop - start) / 1000,
    playbackElapsedSeconds: (playbackStop - start) / 1000,
  };
}

async function main() {
  console.log('Creating OfflineTts...');
  const tts = await createOfflineTts();
  console.log('OfflineTts created!');

  const text =
      '小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.';

  const {audio, generationElapsedSeconds, playbackElapsedSeconds} =
      await generateAudioAsync(tts, text);
  const duration = audio.samples.length / audio.sampleRate;
  const real_time_factor = generationElapsedSeconds / duration;

  console.log('Wave duration', duration.toFixed(3), 'seconds');
  console.log(
      'Generation elapsed', generationElapsedSeconds.toFixed(3), 'seconds');
  console.log(
      'Playback drained in', playbackElapsedSeconds.toFixed(3), 'seconds');
  console.log(
      `RTF = ${generationElapsedSeconds.toFixed(3)}/${duration.toFixed(3)} =`,
      real_time_factor.toFixed(3));

  const filename = 'test-zipvoice-zh-en-play-async.wav';
  sherpa_onnx.writeWave(filename, {
    samples: audio.samples,
    sampleRate: audio.sampleRate,
  });
  console.log(`Saved to ${filename}`);
}

main().catch((err) => {
  console.error('Error:', err);
});


================================================
FILE: nodejs-addon-examples/test_vad_asr_non_streaming_moonshine_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'moonshine': {
        'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
        'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
        'uncachedDecoder':
            './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
        'cachedDecoder':
            './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
      },
      'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.windowSize;
  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
  }

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();
    const stream = recognizer.createStream();
    stream.acceptWaveform({
      samples: segment.samples,
      sampleRate: recognizer.config.featConfig.sampleRate
    });
    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${index}: ${text}`);

      const filename = `${index}-${text}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');

      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});

      index += 1;
    }
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_asr_non_streaming_nemo_ctc_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'nemoCtc': {
        'model':
            './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx',
      },
      'tokens':
          './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.windowSize;
  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
  }

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();
    const stream = recognizer.createStream();
    stream.acceptWaveform({
      samples: segment.samples,
      sampleRate: recognizer.config.featConfig.sampleRate
    });
    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${index}: ${text}`);

      const filename = `${index}-${text}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');

      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});

      index += 1;
    }
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_asr_non_streaming_paraformer_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'paraformer': {
        'model': './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
      },
      'tokens': './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.windowSize;
  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
  }

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();
    const stream = recognizer.createStream();
    stream.acceptWaveform({
      samples: segment.samples,
      sampleRate: recognizer.config.featConfig.sampleRate
    });
    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${index}: ${text}`);

      const filename = `${index}-${text}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');

      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});

      index += 1;
    }
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_asr_non_streaming_sense_voice_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'senseVoice': {
        'model':
            './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
        'useInverseTextNormalization': 1,
      },
      'tokens':
          './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.windowSize;
  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
  }

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();
    const stream = recognizer.createStream();
    stream.acceptWaveform({
      samples: segment.samples,
      sampleRate: recognizer.config.featConfig.sampleRate
    });
    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${index}: ${text}`);

      const filename = `${index}-${text}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');

      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});

      index += 1;
    }
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_asr_non_streaming_transducer_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'transducer': {
        'encoder':
            './sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx',
        'decoder':
            './sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx',
        'joiner':
            './sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx',
      },
      'tokens': './sherpa-onnx-zipformer-en-2023-04-01/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.windowSize;
  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
  }

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();
    const stream = recognizer.createStream();
    stream.acceptWaveform({
      samples: segment.samples,
      sampleRate: recognizer.config.featConfig.sampleRate
    });
    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${index}: ${text}`);

      const filename = `${index}-${text}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');

      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});

      index += 1;
    }
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_asr_non_streaming_whisper_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'whisper': {
        'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
        'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
      },
      'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.windowSize;
  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
  }

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();
    const stream = recognizer.createStream();
    stream.acceptWaveform({
      samples: segment.samples,
      sampleRate: recognizer.config.featConfig.sampleRate
    });
    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${index}: ${text}`);

      const filename = `${index}-${text}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');

      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});

      index += 1;
    }
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_asr_non_streaming_zipformer_ctc_microphone.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'zipformerCtc': {
        'model':
            './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx',
      },
      'tokens': './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.windowSize;
  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
  }

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();
    const stream = recognizer.createStream();
    stream.acceptWaveform({
      samples: segment.samples,
      sampleRate: recognizer.config.featConfig.sampleRate
    });
    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${index}: ${text}`);

      const filename = `${index}-${text}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');

      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});

      index += 1;
    }
  }
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  //
  // OR
  //
  // please download ten-vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
  const config = {
    sileroVad: {
      // model: '',
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    tenVad: {
      model: '',
      // model: './ten-vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 256,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const vad = createVad();

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);


const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate,
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.model != '' ?
      vad.config.sileroVad.windowSize :
      vad.config.tenVad.windowSize;

  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
    if (vad.isDetected() && !printed) {
      console.log(`${index}: Detected speech`);
      printed = true;
    }

    if (!vad.isDetected()) {
      printed = false;
    }

    while (!vad.isEmpty()) {
      const segment = vad.front();
      vad.pop();
      const filename = `${index}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');
      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});
      const duration = segment.samples.length / vad.config.sampleRate;
      console.log(`${index} End of speech. Duration: ${duration} seconds`);
      console.log(`Saved to ${filename}`);
      index += 1;
    }
  }
});

ai.on('close', () => {
  console.log('Free resources');
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_spoken_language_identification_microphone.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
function createSpokenLanguageID() {
  const config = {
    whisper: {
      encoder: './sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx',
      decoder: './sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx',
    },
    debug: true,
    numThreads: 1,
    provider: 'cpu',
  };
  return new sherpa_onnx.SpokenLanguageIdentification(config);
}

const slid = createSpokenLanguageID();
const vad = createVad();

const display = new Intl.DisplayNames(['en'], {type: 'language'});

const bufferSizeInSeconds = 30;
const buffer =
    new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);


const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: vad.config.sampleRate,
  }
});

let printed = false;
let index = 0;
ai.on('data', data => {
  const windowSize = vad.config.sileroVad.windowSize;
  buffer.push(new Float32Array(data.buffer));
  while (buffer.size() > windowSize) {
    const samples = buffer.get(buffer.head(), windowSize);
    buffer.pop(windowSize);
    vad.acceptWaveform(samples);
    if (vad.isDetected() && !printed) {
      console.log(`${index}: Detected speech`);
      printed = true;
    }

    if (!vad.isDetected()) {
      printed = false;
    }

    while (!vad.isEmpty()) {
      const segment = vad.front();
      vad.pop();

      const stream = slid.createStream();
      stream.acceptWaveform(
          {samples: segment.samples, sampleRate: vad.config.sampleRate});
      const lang = slid.compute(stream);
      const fullLang = display.of(lang);

      const filename = `${index}-${fullLang}-${
                           new Date()
                               .toLocaleTimeString('en-US', {hour12: false})
                               .split(' ')[0]}.wav`
                           .replace(/:/g, '-');

      sherpa_onnx.writeWave(
          filename,
          {samples: segment.samples, sampleRate: vad.config.sampleRate});
      const duration = segment.samples.length / vad.config.sampleRate;
      console.log(`${index} End of speech. Duration: ${
          duration} seconds.\n Detected language: ${fullLang}`);
      console.log(`Saved to ${filename}`);
      index += 1;
    }
  }
});

ai.on('close', () => {
  console.log('Free resources');
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-addon-examples/test_vad_with_non_streaming_asr_moonshine.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'moonshine': {
        'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
        'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
        'uncachedDecoder':
            './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
        'cachedDecoder':
            './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
      },
      'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      maxSpeechDuration: 5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

// please download ./Obama.wav from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const waveFilename = './Obama.wav';
const wave = sherpa_onnx.readWave(waveFilename);

if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
  throw new Error(
      'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
}

console.log('Started');
let start = Date.now();

const windowSize = vad.config.sileroVad.windowSize;
for (let i = 0; i < wave.samples.length; i += windowSize) {
  const thisWindow = wave.samples.subarray(i, i + windowSize);
  vad.acceptWaveform(thisWindow);

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();

    let start_time = segment.start / wave.sampleRate;
    let end_time = start_time + segment.samples.length / wave.sampleRate;

    start_time = start_time.toFixed(2);
    end_time = end_time.toFixed(2);

    const stream = recognizer.createStream();
    stream.acceptWaveform(
        {samples: segment.samples, sampleRate: wave.sampleRate});

    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${start_time} -- ${end_time}: ${text}`);
    }
  }
}

vad.flush();

while (!vad.isEmpty()) {
  const segment = vad.front();
  vad.pop();

  let start_time = segment.start / wave.sampleRate;
  let end_time = start_time + segment.samples.length / wave.sampleRate;

  start_time = start_time.toFixed(2);
  end_time = end_time.toFixed(2);

  const stream = recognizer.createStream();
  stream.acceptWaveform(
      {samples: segment.samples, sampleRate: wave.sampleRate});

  recognizer.decode(stream);
  const r = recognizer.getResult(stream);
  if (r.text.length > 0) {
    const text = r.text.toLowerCase().trim();
    console.log(`${start_time} -- ${end_time}: ${text}`);
  }
}

let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));


================================================
FILE: nodejs-addon-examples/test_vad_with_non_streaming_asr_whisper.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx-node');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'whisper': {
        'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
        'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
      },
      'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    }
  };

  return new sherpa_onnx.OfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      maxSpeechDuration: 5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
  };

  const bufferSizeInSeconds = 60;

  return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
}

const recognizer = createRecognizer();
const vad = createVad();

// please download ./Obama.wav from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const waveFilename = './Obama.wav';
const wave = sherpa_onnx.readWave(waveFilename);

if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
  throw new Error(
      'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
}

console.log('Started');
let start = Date.now();

const windowSize = vad.config.sileroVad.windowSize;
for (let i = 0; i < wave.samples.length; i += windowSize) {
  const thisWindow = wave.samples.subarray(i, i + windowSize);
  vad.acceptWaveform(thisWindow);

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();

    let start_time = segment.start / wave.sampleRate;
    let end_time = start_time + segment.samples.length / wave.sampleRate;

    start_time = start_time.toFixed(2);
    end_time = end_time.toFixed(2);

    const stream = recognizer.createStream();
    stream.acceptWaveform(
        {samples: segment.samples, sampleRate: wave.sampleRate});

    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${start_time} -- ${end_time}: ${text}`);
    }
  }
}

vad.flush();

while (!vad.isEmpty()) {
  const segment = vad.front();
  vad.pop();

  let start_time = segment.start / wave.sampleRate;
  let end_time = start_time + segment.samples.length / wave.sampleRate;

  start_time = start_time.toFixed(2);
  end_time = end_time.toFixed(2);

  const stream = recognizer.createStream();
  stream.acceptWaveform(
      {samples: segment.samples, sampleRate: wave.sampleRate});

  recognizer.decode(stream);
  const r = recognizer.getResult(stream);
  if (r.text.length > 0) {
    const text = r.text.toLowerCase().trim();
    console.log(`${start_time} -- ${end_time}: ${text}`);
  }
}

let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));


================================================
FILE: nodejs-examples/.gitignore
================================================
node_modules
lib
package-lock.json
*.tar.bz2


================================================
FILE: nodejs-examples/README.md
================================================
# Introduction

Note: You need `Node >= 18`. 

Note: For Mac M1 and other silicon chip series, do check the example `test-online-paraformer-microphone-mic.js` 

This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).

It uses WebAssembly to wrap `sherpa-onnx` for NodeJS and it does not support multiple threads.

Note: [../nodejs-addon-examples](../nodejs-addon-examples) uses
[node-addon-api](https://github.com/nodejs/node-addon-api) to wrap
`sherpa-onnx` for NodeJS and it supports multiple threads.

Before you continue, please first run

```bash
cd ./nodejs-examples

npm i
```

In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)
for text-to-speech and speech-to-text.


# Speech enhancement

In the following, we demonstrate how to run speech enhancement.

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
node ./test-offline-speech-enhancement-gtcrn.js
node ./test-online-speech-enhancement-gtcrn.js
```

The GTCRN example files use `gtcrn_simple.onnx`.

DPDFNet has a separate example file. Download DPDFNet models from
`https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models`
or the official Hugging Face hub `https://huggingface.co/Ceva-IP/DPDFNet`

Use 16 kHz DPDFNet models such as `dpdfnet_baseline.onnx`, `dpdfnet2.onnx`,
`dpdfnet4.onnx`, or `dpdfnet8.onnx` if you want enhanced audio for downstream
ASR or speech recognition, and use `dpdfnet2_48khz_hr.onnx` if you want 48 kHz
enhancement output.

```bash
node ./test-offline-speech-enhancement-dpdfnet.js
node ./test-online-speech-enhancement-dpdfnet.js
```

The following four example files are available:

```bash
node ./test-offline-speech-enhancement-gtcrn.js
node ./test-offline-speech-enhancement-dpdfnet.js
node ./test-online-speech-enhancement-gtcrn.js
node ./test-online-speech-enhancement-dpdfnet.js
```

# Speaker diarization

In the following, we demonstrate how to run speaker diarization.

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

node ./test-offline-speaker-diarization.js
```

# Text-to-speech

In the following, we demonstrate how to run text-to-speech.

## ./test-offline-tts-zipvoice-zh-en.js

[./test-offline-tts-zipvoice-zh-en.js](./test-offline-tts-zipvoice-zh-en.js)
shows how to use ZipVoice for Zero-shot TTS in Chinese and English.

Please make sure that the reference transcript matches the reference audio.

You can use the following command to run it:
```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

node ./test-offline-tts-zipvoice-zh-en.js
```

## ./test-offline-tts-pocket-en.js

[./test-offline-tts-pocket-en.js](./test-offline-tts-pocket-en.js)
shows how to use PocketTTS for Zero-shot TTS.

You can use the following command to run it:
```
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

node ./test-offline-tts-pocket-en.js
```

## ./test-offline-tts-kitten-en.js

[./test-offline-tts-kitten-en.js](./test-offline-tts-kitten-en.js) shows how to use
[kitten-nano-en-v0_1-fp16](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2)
for text-to-speech.

You can use the following command to run it:

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

node ./test-offline-tts-kitten-en.js
```

## ./test-offline-tts-kokoro-en.js

[./test-offline-tts-kokoro-en.js](./test-offline-tts-kokoro-en.js) shows how to use
[kokoro-en-v0_19](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2)
for text-to-speech.

You can use the following command to run it:

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

node ./test-offline-tts-kokoro-en.js
```

## ./test-offline-tts-matcha-zh.js

[./test-offline-tts-matcha-zh.js](./test-offline-tts-matcha-zh.js) shows how to use
[matcha-icefall-zh-baker](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)
for text-to-speech.

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

node ./test-offline-tts-matcha-zh.js
```

## ./test-offline-tts-matcha-en.js

[./test-offline-tts-matcha-en.js](./test-offline-tts-matcha-en.js) shows how to use
[matcha-icefall-en_US-ljspeech](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)
for text-to-speech.

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

node ./test-offline-tts-matcha-en.js
```

## ./test-offline-tts-vits-en.js

[./test-offline-tts-vits-en.js](./test-offline-tts-vits-en.js) shows how to use
[vits-piper-en_US-amy-low.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2)
for text-to-speech.

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xvf vits-piper-en_US-amy-low.tar.bz2
node ./test-offline-tts-vits-en.js
```

## ./test-offline-tts-vits-zh.js

[./test-offline-tts-vits-zh.js](./test-offline-tts-vits-zh.js) shows how to use
a VITS pretrained model
[aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3)
for text-to-speech.

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
node ./test-offline-tts-vits-zh.js
```

# Speech-to-text

In the following, we demonstrate how to decode files and how to perform
speech recognition with a microphone with `nodejs`.

## ./test-offline-dolphin-ctc.js

[./test-offline-dolphin-ctc.js](./test-offline-dolphin-ctc.js) demonstrates
how to decode a file with a [Dolphin](https://github.com/DataoceanAI/Dolphin) CTC model.

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
node ./test-offline-dolphin-ctc.js
```

## ./test-offline-nemo-canary.js

[./test-offline-nemo-canary.js](./test-offline-nemo-canary.js) demonstrates
how to decode a file with a NeMo Canary model. In the code we use
[sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2

node ./test-offline-nemo-canary.js
```

## ./test-offline-zipformer-ctc.js

[./test-offline-zipformer-ctc.js](./test-offline-zipformer-ctc.js) demonstrates
how to decode a file with a Zipformer CTC model. In the code we use
[sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html#sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03-chinese).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

node ./test-offline-zipformer-ctc.js
```

## ./test-offline-funasr-nano.js

[./test-offline-funasr-nano.js](./test-offline-funasr-nano.js) demonstrates
how to decode a file with a FunASR Nano model. In the code we use
[sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2

node ./test-offline-funasr-nano.js
```

## ./test-offline-medasr-ctc.js

[./test-offline-medasr-ctc.js](./test-offline-medasr-ctc.js) demonstrates
how to decode a file with a Google MedASR CTC model. In the code we use
[sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2

node ./test-offline-medasr-ctc.js
```

## ./test-offline-fire-red-asr-ctc.js

[./test-offline-fire-red-asr-ctc.js](./test-offline-fire-red-asr-ctc.js) demonstrates
how to decode a file with a FireRedASR CTC model. In the code we use
[sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

node ./test-offline-fire-red-asr-ctc.js
```

## ./test-offline-omnilingual-asr-ctc.js

[./test-offline-omnilingual-asr-ctc.js](./test-offline-omnilingual-asr-ctc.js) demonstrates
how to decode a file with a Omnilingual ASR CTC model. In the code we use
[sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2

node ./test-offline-omnilingual-asr-ctc.js
```

## ./test-offline-wenet-ctc.js

[./test-offline-wenet-ctc.js](./test-offline-wenet-ctc.js) demonstrates
how to decode a file with a WeNet CTC model. In the code we use
[sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2

node ./test-offline-wenet-ctc.js
```

## ./test-offline-nemo-ctc.js

[./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates
how to decode a file with a NeMo CTC model. In the code we use
[stt_en_conformer_ctc_small](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/english.html#stt-en-conformer-ctc-small).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
node ./test-offline-nemo-ctc.js
```

## ./test-offline-paraformer.js

[./test-offline-paraformer.js](./test-offline-paraformer.js) demonstrates
how to decode a file with a non-streaming Paraformer model. In the code we use
[sherpa-onnx-paraformer-zh-2023-09-14](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-09-14-chinese).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
node ./test-offline-paraformer.js
```

## ./test-offline-sense-voice-with-hr.js

[./test-offline-sense-voice-with-hr.js](./test-offline-sense-voice-with-hr.js) demonstrates
how to decode a file with a non-streaming SenseVoice model with homophone replacer.

You can use the following command to run it:

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

node ./test-offline-sense-voice-with-hr.js
```

## ./test-offline-sense-voice.js

[./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates
how to decode a file with a non-streaming SenseVoice model.

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2

node ./test-offline-sense-voice.js
```

## ./test-offline-transducer.js

[./test-offline-transducer.js](./test-offline-transducer.js) demonstrates
how to decode a file with a non-streaming transducer model. In the code we use
[sherpa-onnx-zipformer-en-2023-06-26](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-zipformer-en-2023-06-26-english).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
node ./test-offline-transducer.js
```

## ./test-vad-with-non-streaming-asr-whisper.js

[./test-vad-with-non-streaming-asr-whisper.js](./test-vad-with-non-streaming-asr-whisper.js)
shows how to use VAD + whisper to decode a very long file.

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test-vad-with-non-streaming-asr-whisper.js
```

## ./test-offline-whisper.js

[./test-offline-whisper.js](./test-offline-whisper.js) demonstrates
how to decode a file with a Whisper model. In the code we use
[sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
node ./test-offline-whisper.js
```

## ./test-offline-fire-red-asr.js

[./test-offline-fire-red-asr.js](./test-offline-fire-red-asr.js) demonstrates
how to decode a file with a FireRedAsr AED model.

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2

node ./test-offline-fire-red-asr.js
```

## ./test-offline-moonshine-v2.js

[./test-offline-moonshine-v2.js](./test-offline-moonshine-v2.js) demonstrates
how to decode a file with a Moonshine v2 model. In the code we use
[sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2

node ./test-offline-moonshine-v2.js
```

## ./test-offline-moonshine.js

[./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates
how to decode a file with a Moonshine model. In the code we use
[sherpa-onnx-moonshine-tiny-en-int8](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2).

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

node ./test-offline-moonshine.js
```

## ./test-vad-with-non-streaming-asr-moonshine.js

[./test-vad-with-non-streaming-asr-moonshine.js](./test-vad-with-non-streaming-asr-moonshine.js)
shows how to use VAD + whisper to decode a very long file.

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

node ./test-vad-with-non-streaming-asr-moonshine.js
```

## ./test-online-paraformer-microphone.js

[./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js)
demonstrates how to do real-time speech recognition from microphone
with a streaming Paraformer model. In the code we use
[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
node ./test-online-paraformer-microphone.js
```


## ./test-online-paraformer-microphone-mic.js

[./test-online-paraformer-microphone-mic.js](./test-online-paraformer-microphone-mic.js)
demonstrates how to do real-time speech recognition from microphone
with a streaming Paraformer model. In the code we use
[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).

It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
node ./test-online-paraformer-microphone-mic.js
```

## ./test-online-t-one-ctc.js
[./test-online-t-one-ctc.js](./test-online-t-one-ctc.js) demonstrates
how to decode a file using a streaming T-one model.

You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
node ./test-online-t-one-ctc.js
```

## ./test-online-paraformer.js
[./test-online-paraformer.js](./test-online-paraformer.js) demonstrates
how to decode a file using a streaming Paraformer model. In the code we use
[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
node ./test-online-paraformer.js
```

## ./test-online-transducer-microphone.js
[./test-online-transducer-microphone.js](./test-online-transducer-microphone.js)
demonstrates how to do real-time speech recognition with microphone using a streaming transducer model. In the code
we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english).


You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
node ./test-online-transducer-microphone.js
```

## ./test-online-transducer.js
[./test-online-transducer.js](./test-online-transducer.js) demonstrates
how to decode a file using a streaming transducer model. In the code
we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
node ./test-online-transducer.js
```

## ./test-online-zipformer2-ctc.js
[./test-online-zipformer2-ctc.js](./test-online-zipformer2-ctc.js) demonstrates
how to decode a file using a streaming zipformer2 CTC model. In the code
we use [sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/zipformer-ctc-models.html#sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13-chinese).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
node ./test-online-zipformer2-ctc.js
```

## ./test-online-zipformer2-ctc-hlg.js
[./test-online-zipformer2-ctc-hlg.js](./test-online-zipformer2-ctc-hlg.js) demonstrates
how to decode a file using a streaming zipformer2 CTC model with HLG. In the code
we use [sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2).

You can use the following command to run it:

```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
node ./test-online-zipformer2-ctc-hlg.js
```


================================================
FILE: nodejs-examples/package.json
================================================
{
  "dependencies": {
    "mic": "^2.1.2",
    "naudiodon2": "^2.4.0",
    "sherpa-onnx": "^1.12.31",
    "wav": "^1.0.2"
  }
}


================================================
FILE: nodejs-examples/test-keyword-spotter-transducer.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx');

function createKeywordSpotter() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
  const config = {
    'modelConfig': {
      'transducer': {
        'encoder':
            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
        'decoder':
            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
        'joiner':
            './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
      },
      'tokens':
          './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
    },
    keywords: 'w én s ēn t è k ǎ s uǒ  @文森特卡索\n' +
        'f ǎ g uó @法国'
  };

  return sherpa_onnx.createKws(config);
}

const kws = createKeywordSpotter();
const stream = kws.createStream();
const waveFilename =
    './sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';

const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform(kws.config.featConfig.sampleRate, tailPadding);

const detectedKeywords = [];
while (kws.isReady(stream)) {
  kws.decode(stream);
  const keyword = kws.getResult(stream).keyword;
  if (keyword != '') {
    detectedKeywords.push(keyword);

    // remember to reset the stream right after detecting a keyword
    kws.reset(stream);
  }
}
console.log(detectedKeywords);

stream.free();
kws.free();


================================================
FILE: nodejs-examples/test-offline-dolphin-ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      dolphin: {
        model:
            './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx',
      },
      tokens:
          './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-fire-red-asr-ctc.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      fireRedAsrCtc: {
        model:
            './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx',
      },
      tokens:
          './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-fire-red-asr.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let modelConfig = {
    fireRedAsr: {
      encoder:
          './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx',
      decoder:
          './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx',
    },
    tokens: './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt',
    debug: 1,
  };

  let config = {
    modelConfig: modelConfig,
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-funasr-nano.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      funasrNano: {
        encoderAdaptor:
            './sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx',
        llm: './sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx',
        embedding:
            './sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx',
        tokenizer: './sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B',
      },
      tokens: '',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-medasr-ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      medasr: {
        model: './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx',
      },
      tokens: './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-moonshine-v2.js
================================================
// Copyright (c)  2023-2026  Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let modelConfig = {
    moonshine: {
      encoder:
          './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort',
      mergedDecoder:
          './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort',
    },
    tokens: './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt',
  };

  let config = {
    modelConfig: modelConfig,
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-moonshine.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let modelConfig = {
    moonshine: {
      preprocessor: './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
      encoder: './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
      uncachedDecoder:
          './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
      cachedDecoder:
          './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
    },
    tokens: './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
  };

  let config = {
    modelConfig: modelConfig,
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename = './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-nemo-canary.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      canary: {
        encoder:
            './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx',
        decoder:
            './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx',
        srcLang: 'en',
        tgtLang: 'en',
        usePnc: 1,
      },
      debug: 0,
      tokens:
          './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
let stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
let text = recognizer.getResult(stream).text;
console.log(`text in English: ${text}`);

stream.free();

// now output German text
recognizer.config.modelConfig.canary.tgtLang = 'de';
recognizer.setConfig(recognizer.config);

stream = recognizer.createStream();
stream.acceptWaveform(wave.sampleRate, wave.samples);
recognizer.decode(stream);
text = recognizer.getResult(stream).text;

console.log(`text in German: ${text}`);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-nemo-ctc.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      nemoCtc: {
        model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
      },
      tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-omnilingual-asr-ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      omnilingual: {
        model:
            './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx',
      },
      tokens:
          './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-paraformer-itn.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)

const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let modelConfig = {
    paraformer: {
      model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
    },
    tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
  };

  let config = {
    modelConfig: modelConfig,
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
    ruleFsts: './itn_zh_number.fst',
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}


const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
const waveFilename = './itn-zh-number.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-paraformer.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let modelConfig = {
    paraformer: {
      model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
    },
    tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
  };

  let config = {
    modelConfig: modelConfig,
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename = './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-sense-voice-with-hr.js
================================================
// Copyright (c)  2024-2025  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let modelConfig = {
    senseVoice: {
      model:
          './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/model.int8.onnx',
      language: '',
      useInverseTextNormalization: 1,
    },
    tokens:
        './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/tokens.txt',
  };

  let config = {
    modelConfig: modelConfig,
    hr: {
      lexicon: './lexicon.txt',
      ruleFsts: './replace.fst',
    },
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename = './test-hr.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-sense-voice.js
================================================
// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let modelConfig = {
    senseVoice: {
      model:
          './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/model.int8.onnx',
      language: '',
      useInverseTextNormalization: 1,
    },
    tokens:
        './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/tokens.txt',
  };

  let config = {
    modelConfig: modelConfig,
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/test_wavs/zh.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-speaker-diarization.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx');

// clang-format off
/* Please use the following commands to download files
   used in this script

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

 */
// clang-format on

const config = {
  segmentation: {
    pyannote: {
      model: './sherpa-onnx-pyannote-segmentation-3-0/model.onnx',
      debug: 1,
    },
  },
  embedding: {
    model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
    debug: 1,
  },
  clustering: {
    // since we know that the test wave file
    // ./0-four-speakers-zh.wav contains 4 speakers, we use 4 for numClusters
    // here. if you don't have such information, please set numClusters to -1
    numClusters: 4,

    // If numClusters is not -1, then threshold is ignored.
    //
    // A larger threshold leads to fewer clusters, i.e., fewer speakers
    // A smaller threshold leads to more clusters, i.e., more speakers
    // You need to tune it by yourself.
    threshold: 0.5,
  },

  // If a segment is shorter than minDurationOn, we discard it
  minDurationOn: 0.2,  // in seconds

  // If the gap between two segments is less than minDurationOff, then we
  // merge these two segments into a single one
  minDurationOff: 0.5,  // in seconds
};

const waveFilename = './0-four-speakers-zh.wav';

const sd = sherpa_onnx.createOfflineSpeakerDiarization(config);
console.log('Started');

const wave = sherpa_onnx.readWave(waveFilename);
if (sd.sampleRate != wave.sampleRate) {
  throw new Error(
      `Expected sample rate: ${sd.sampleRate}, given: ${wave.sampleRate}`);
}

const segments = sd.process(wave.samples);
console.log(segments);


================================================
FILE: nodejs-examples/test-offline-speech-enhancement-dpdfnet.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
// Please download a DPDFNet model and ./inp_16k.wav used in this file
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// or https://huggingface.co/Ceva-IP/DPDFNet
//
// This script shows how to use speech enhancement API from sherpa-onnx.
// Use dpdfnet_baseline.onnx, dpdfnet2.onnx, dpdfnet4.onnx, or dpdfnet8.onnx
// for 16 kHz downstream ASR or speech recognition.
// Use dpdfnet2_48khz_hr.onnx for 48 kHz enhancement output.
const sherpa_onnx = require('sherpa-onnx');

function createOfflineSpeechDenoiser() {
  const model = './dpdfnet2.onnx';
  let config = {
    model: {
      dpdfnet: {model},
      debug: 1,
    },
  };

  return sherpa_onnx.createOfflineSpeechDenoiser(config);
}

const speech_denoiser = createOfflineSpeechDenoiser();

const waveFilename = './inp_16k.wav';
const wave = sherpa_onnx.readWave(waveFilename);

const denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
const outputFilename = './enhanced.wav';
sherpa_onnx.writeWave(outputFilename, denoised);
console.log(`Saved to ${outputFilename}`);

speech_denoiser.free();


================================================
FILE: nodejs-examples/test-offline-speech-enhancement-gtcrn.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
// Please download a speech enhancement model and ./inp_16k.wav used in this file
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// This script shows how to use speech enhancement API from sherpa-onnx.
const sherpa_onnx = require('sherpa-onnx');

function createOfflineSpeechDenoiser() {
  const model = './gtcrn_simple.onnx';
  let config = {
    model: {
      gtcrn: {model},
      debug: 1,
    },
  };

  return sherpa_onnx.createOfflineSpeechDenoiser(config);
}

const speech_denoiser = createOfflineSpeechDenoiser();

const waveFilename = './inp_16k.wav';
const wave = sherpa_onnx.readWave(waveFilename);

const denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
const outputFilename = './enhanced.wav';
sherpa_onnx.writeWave(outputFilename, denoised);
console.log(`Saved to ${outputFilename}`);

speech_denoiser.free();


================================================
FILE: nodejs-examples/test-offline-transducer.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let modelConfig = {
    transducer: {
      encoder:
          './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.int8.onnx',
      decoder:
          './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx',
      joiner:
          './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
    },
    tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
    modelType: 'transducer',
  };

  let config = {
    modelConfig: modelConfig,
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}
const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-tts-kitten-en.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineTts() {
  let offlineTtsKittenModelConfig = {
    model: './kitten-nano-en-v0_1-fp16/model.fp16.onnx',
    voices: './kitten-nano-en-v0_1-fp16/voices.bin',
    tokens: './kitten-nano-en-v0_1-fp16/tokens.txt',
    dataDir: './kitten-nano-en-v0_1-fp16/espeak-ng-data',
    lengthScale: 1.0,
  };
  let offlineTtsModelConfig = {
    offlineTtsKittenModelConfig: offlineTtsKittenModelConfig,
    numThreads: 1,
    debug: 1,
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    maxNumSentences: 1,
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}

const tts = createOfflineTts();
const speakerId = 0;
const speed = 1.0;
const generationConfig = {
  sid: speakerId,
  speed: speed,
  silenceScale: 0.2,
};
const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const audio = tts.generateWithConfig(text, generationConfig);
tts.save('./test-kitten-en.wav', audio);
console.log('Saved to test-kitten-en.wav successfully.');
tts.free();


================================================
FILE: nodejs-examples/test-offline-tts-kokoro-en.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineTts() {
  let offlineTtsKokoroModelConfig = {
    model: './kokoro-en-v0_19/model.onnx',
    voices: './kokoro-en-v0_19/voices.bin',
    tokens: './kokoro-en-v0_19/tokens.txt',
    dataDir: './kokoro-en-v0_19/espeak-ng-data',
    lengthScale: 1.0,
  };
  let offlineTtsModelConfig = {
    offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
    numThreads: 1,
    debug: 1,
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    maxNumSentences: 1,
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}

const tts = createOfflineTts();
const speakerId = 0;
const speed = 1.0;
const generationConfig = {
  sid: speakerId,
  speed: speed,
  silenceScale: 0.2,
};
const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const audio = tts.generateWithConfig(text, generationConfig);
tts.save('./test-kokoro-en.wav', audio);
console.log('Saved to test-kokoro-en.wav successfully.');
tts.free();


================================================
FILE: nodejs-examples/test-offline-tts-kokoro-zh-en.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineTts() {
  let offlineTtsKokoroModelConfig = {
    model: './kokoro-multi-lang-v1_0/model.onnx',
    voices: './kokoro-multi-lang-v1_0/voices.bin',
    tokens: './kokoro-multi-lang-v1_0/tokens.txt',
    dataDir: './kokoro-multi-lang-v1_0/espeak-ng-data',
    lexicon:
        './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt',
    lengthScale: 1.0,
  };
  let offlineTtsModelConfig = {
    offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
    numThreads: 1,
    debug: 1,
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    maxNumSentences: 1,
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}

const tts = createOfflineTts();
const speakerId = 49;
const speed = 1.0;
const generationConfig = {
  sid: speakerId,
  speed: speed,
  silenceScale: 0.2,
};
const text =
    '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？';

const audio = tts.generateWithConfig(text, generationConfig);
tts.save('./test-kokoro-zh-en-49.wav', audio);
console.log('Saved to test-kokoro-zh-en-49.wav successfully.');
tts.free();


================================================
FILE: nodejs-examples/test-offline-tts-matcha-en.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

const silenceScale = 0.2;

function createOfflineTts() {
  let offlineTtsMatchaModelConfig = {
    acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
    vocoder: './vocos-22khz-univ.onnx',
    tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
    dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',

    noiseScale: 0.667,
    lengthScale: 1.0,
  };
  let offlineTtsModelConfig = {
    offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
    numThreads: 1,
    debug: 1,
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    maxNumSentences: 1,
    silenceScale: silenceScale,
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}

const tts = createOfflineTts();
const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const generationConfig = {
  sid: 0,
  speed: 1.0,
  silenceScale: silenceScale,
};

const audio = tts.generateWithConfig(text, generationConfig);
tts.save('./test-matcha-en.wav', audio);
console.log('Saved to test-matcha-en.wav successfully.');
tts.free();


================================================
FILE: nodejs-examples/test-offline-tts-matcha-zh.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

const silenceScale = 0.2;

function createOfflineTts() {
  let offlineTtsMatchaModelConfig = {
    acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
    vocoder: './vocos-22khz-univ.onnx',
    lexicon: './matcha-icefall-zh-baker/lexicon.txt',
    tokens: './matcha-icefall-zh-baker/tokens.txt',
    noiseScale: 0.667,
    lengthScale: 1.0,
  };
  let offlineTtsModelConfig = {
    offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
    numThreads: 1,
    debug: 1,
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    maxNumSentences: 1,
    silenceScale: silenceScale,
    ruleFsts:
        './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}

const tts = createOfflineTts();
const text =
    '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。';

const generationConfig = {
  sid: 0,
  speed: 1.0,
  silenceScale: silenceScale,
};

const audio = tts.generateWithConfig(text, generationConfig);
tts.save('./test-matcha-zh.wav', audio);
console.log('Saved to test-matcha-zh.wav successfully.');
tts.free();


================================================
FILE: nodejs-examples/test-offline-tts-pocket-en.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineTts() {
  let pocket = {
    lmFlow: './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx',
    lmMain: './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx',
    encoder: './sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx',
    decoder: './sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx',
    textConditioner:
        './sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx',
    vocabJson: './sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json',
    tokenScoresJson:
        './sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json',
    voiceEmbeddingCacheCapacity: 50,
  };
  let offlineTtsModelConfig = {
    offlineTtsPocketModelConfig: pocket,
    numThreads: 1,
    debug: 1,  // set it to 1 to see verbose logs; 0 to disable logs
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    maxNumSentences: 1,
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}

const referenceWaveFilename =
    './sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav';
const wave = sherpa_onnx.readWave(referenceWaveFilename);

const generationConfig = {
  silenceScale: 0.2,
  referenceAudio: wave.samples,
  referenceSampleRate: wave.sampleRate,
  numSteps: 5,
  extra: {max_reference_audio_len: 12, seed: 42}
};

const tts = createOfflineTts();
const text =
    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

const audio = tts.generateWithConfig(text, generationConfig);
tts.save('./test-pocket-en.wav', audio);
console.log('Saved to test-pocket-en.wav successfully.');
tts.free();


================================================
FILE: nodejs-examples/test-offline-tts-vits-en.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineTts() {
  let offlineTtsVitsModelConfig = {
    model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx',
    tokens: './vits-piper-en_US-amy-low/tokens.txt',
    dataDir: './vits-piper-en_US-amy-low/espeak-ng-data',
    noiseScale: 0.667,
    noiseScaleW: 0.8,
    lengthScale: 1.0,
  };
  let offlineTtsModelConfig = {
    offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
    numThreads: 1,
    debug: 1,
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    maxNumSentences: 1,
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}


const tts = createOfflineTts();
const speakerId = 0;
const speed = 1.0;
const generationConfig = {
  sid: speakerId,
  speed: speed,
  silenceScale: 0.2,
};
const audio = tts.generateWithConfig(
    '“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”',
    generationConfig);

tts.save('./test-vits-en.wav', audio);
console.log('Saved to test-vits-en.wav successfully.');

tts.free();


================================================
FILE: nodejs-examples/test-offline-tts-vits-zh.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineTts() {
  let offlineTtsVitsModelConfig = {
    model: './vits-icefall-zh-aishell3/model.onnx',
    lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
    tokens: './vits-icefall-zh-aishell3/tokens.txt',
    noiseScale: 0.667,
    noiseScaleW: 0.8,
    lengthScale: 1.0,
  };
  let offlineTtsModelConfig = {
    offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
    numThreads: 1,
    debug: 1,
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    ruleFsts:
        './vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
    ruleFars: './vits-icefall-zh-aishell3/rule.far',
    maxNumSentences: 1,
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}

const tts = createOfflineTts();
const speakerId = 66;
const speed = 1.0;
const generationConfig = {
  sid: speakerId,
  speed: speed,
  silenceScale: 0.2,
};
const audio = tts.generateWithConfig('3年前中国总人口是1411778724人', generationConfig);
tts.save('./test-vits-zh.wav', audio);
console.log('Saved to test-vits-zh.wav successfully.');
tts.free();


================================================
FILE: nodejs-examples/test-offline-tts-zipvoice-zh-en.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineTts() {
  const zipvoice = {
    encoder: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx',
    decoder: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx',
    tokens: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt',
    lexicon: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt',
    dataDir: './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data',
    vocoder: './vocos_24khz.onnx',
  };

  const offlineTtsModelConfig = {
    offlineTtsZipVoiceModelConfig: zipvoice,
    numThreads: 1,
    debug: 1,  // set it to 1 to see verbose logs; 0 to disable logs
    provider: 'cpu',
  };

  const offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    maxNumSentences: 1,
  };

  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}

const referenceWaveFilename =
    './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav';
const wave = sherpa_onnx.readWave(referenceWaveFilename);

const referenceText =
    '那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.';
const text =
    '小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.';

const generationConfig = {
  referenceAudio: wave.samples,
  referenceSampleRate: wave.sampleRate,
  // It must match the transcript of the reference audio above.
  referenceText: referenceText,
  numSteps: 4,
  extra: {min_char_in_sentence: 10},
};

const tts = createOfflineTts();
const audio = tts.generateWithConfig(text, generationConfig);
tts.save('./test-zipvoice-zh-en.wav', audio);
console.log('Saved to test-zipvoice-zh-en.wav successfully.');
tts.free();


================================================
FILE: nodejs-examples/test-offline-wenet-ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      wenetCtc: {
        model:
            './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx',
      },
      tokens:
          './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-whisper.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx');
console.log(`version : ${sherpa_onnx.version}`);
console.log(`git sha1: ${sherpa_onnx.gitSha1}`);
console.log(`git date: ${sherpa_onnx.gitDate}`);

function createOfflineRecognizer() {
  let modelConfig = {
    whisper: {
      encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
      decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
      language: '',
      task: 'transcribe',
      tailPaddings: -1,
    },
    tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
  };

  let config = {
    modelConfig: modelConfig,
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-offline-zipformer-ctc.js
================================================
// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
  let config = {
    modelConfig: {
      zipformerCtc: {
        model: './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx',
      },
      tokens: './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt',
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-online-paraformer-microphone-mic.js
================================================
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
const mic = require(
    'mic');  // It uses `mic` for better compatibility, do check its
             // [npm](https://www.npmjs.com/package/mic) before running it.
const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineParaformerModelConfig = {
    encoder:
        './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
    decoder:
        './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
  };

  let onlineModelConfig = {
    paraformer: onlineParaformerModelConfig,
    tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
  };

  let recognizerConfig = {
    modelConfig: onlineModelConfig,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

/**
 * SpeechSession class, work as a session manager with the formatOutput function
 * Sample output:
=== Automated Speech Recognition ===
Current Session #1
Time: 8:44:46 PM
------------------------
Recognized Sentences:
[8:44:43 PM] 1. it's so great three result is great great 她还支持中文
[8:44:46 PM] 2. 很厉
------------------------
Recognizing: 真的很厉害太厉害

*/
class SpeechSession {
  constructor() {
    this.startTime = Date.now();
    this.sentences = [];
    this.currentText = '';
    this.lastUpdateTime = Date.now();
  }

  addOrUpdateText(text) {
    this.currentText = text;
    this.lastUpdateTime = Date.now();
  }

  finalizeSentence() {
    if (this.currentText.trim()) {
      this.sentences.push({
        text: this.currentText.trim(),
        timestamp: new Date().toLocaleTimeString()
      });
    }
    this.currentText = '';
  }

  shouldStartNewSession() {
    return Date.now() - this.lastUpdateTime > 10000;  // 10 seconds of silence
  }
}

function formatOutput() {
  clearConsole();
  console.log('\n=== Automated Speech Recognition ===');
  console.log(`Current Session #${sessionCount}`);
  console.log('Time:', new Date().toLocaleTimeString());
  console.log('------------------------');

  // 显示历史句子
  if (currentSession.sentences.length > 0) {
    console.log('Recognized Sentences:');
    currentSession.sentences.forEach((sentence, index) => {
      console.log(`[${sentence.timestamp}] ${index + 1}. ${sentence.text}`);
    });
    console.log('------------------------');
  }

  // 显示当前正在识别的内容
  if (currentSession.currentText) {
    console.log('Recognizing:', currentSession.currentText);
  }
}


const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
let currentSession = new SpeechSession();
let sessionCount = 1;

function clearConsole() {
  process.stdout.write('\x1B[2J\x1B[0f');
}


function exitHandler(options, exitCode) {
  if (options.cleanup) {
    console.log('\nCleaned up resources...');
    micInstance.stop();
    stream.free();
    recognizer.free();
  }
  if (exitCode || exitCode === 0) console.log('Exit code:', exitCode);
  if (options.exit) process.exit();
}

const micInstance = mic({
  rate: recognizer.config.featConfig.sampleRate,
  channels: 1,
  debug: false,  // 关闭调试输出
  device: 'default',
  bitwidth: 16,
  encoding: 'signed-integer',
  exitOnSilence: 6,
  fileType: 'raw'
});

const micInputStream = micInstance.getAudioStream();

function startMic() {
  return new Promise((resolve, reject) => {
    micInputStream.once('startComplete', () => {
      console.log('Mic phone started.');
      resolve();
    });

    micInputStream.once('error', (err) => {
      console.error('Mic phone start error:', err);
      reject(err);
    });

    micInstance.start();
  });
}

micInputStream.on('data', buffer => {
  const int16Array = new Int16Array(buffer.buffer);
  const samples = new Float32Array(int16Array.length);

  for (let i = 0; i < int16Array.length; i++) {
    samples[i] = int16Array[i] / 32768.0;
  }

  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text;

  if (text.length > 0) {
    // 检查是否需要开始新会话
    if (currentSession.shouldStartNewSession()) {
      currentSession.finalizeSentence();
      sessionCount++;
      currentSession = new SpeechSession();
    }

    currentSession.addOrUpdateText(text);
    formatOutput();
  }

  if (isEndpoint) {
    if (text.length > 0) {
      currentSession.finalizeSentence();
      formatOutput();
    }
    recognizer.reset(stream);
  }
});

micInputStream.on('error', err => {
  console.error('Audio stream error:', err);
});

micInputStream.on('close', () => {
  console.log('Mic phone closed.');
});

process.on('exit', exitHandler.bind(null, {cleanup: true}));
process.on('SIGINT', exitHandler.bind(null, {exit: true}));
process.on('SIGUSR1', exitHandler.bind(null, {exit: true}));
process.on('SIGUSR2', exitHandler.bind(null, {exit: true}));
process.on('uncaughtException', exitHandler.bind(null, {exit: true}));

async function main() {
  try {
    console.log('Starting ...');
    await startMic();
    console.log('Initialized, waiting for speech ...');
    formatOutput();
  } catch (err) {
    console.error('Failed to initialize:', err);
    process.exit(1);
  }
}

main();


================================================
FILE: nodejs-examples/test-online-paraformer-microphone.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineParaformerModelConfig = {
    encoder:
        './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
    decoder:
        './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
  };

  let onlineModelConfig = {
    paraformer: onlineParaformerModelConfig,
    tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
  };

  let recognizerConfig = {
    modelConfig: onlineModelConfig,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
});

ai.on('data', data => {
  const samples = new Float32Array(data.buffer);

  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text;

  if (text.length > 0 && lastText != text) {
    lastText = text;
    console.log(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream);
  }
});

ai.on('close', () => {
  console.log('Free resources');
  stream.free();
  recognizer.free();
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-examples/test-online-paraformer.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineParaformerModelConfig = {
    encoder:
        './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
    decoder:
        './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
  };

  let onlineModelConfig = {
    paraformer: onlineParaformerModelConfig,
    tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
  };

  let recognizerConfig = {
    modelConfig: onlineModelConfig,
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav';

const reader = new wav.Reader();
const readable = new Readable().wrap(reader);

function decode(samples) {
  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }
  const text = recognizer.getResult(stream).text;
  console.log(text);
}

reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  if (sampleRate != recognizer.config.featConfig.sampleRate) {
    throw new Error(`Only support sampleRate ${
        recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  }

  if (audioFormat != 1) {
    throw new Error(`Only support PCM format. Given ${audioFormat}`);
  }

  if (channels != 1) {
    throw new Error(`Only a single channel. Given ${channels}`);
  }

  if (bitDepth != 16) {
    throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  }
});

fs.createReadStream(waveFilename, {'highWaterMark': 4096})
    .pipe(reader)
    .on('finish', function(err) {
      // tail padding
      const floatSamples =
          new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
      decode(floatSamples);
      stream.free();
      recognizer.free();
    });

readable.on('readable', function() {
  let chunk;
  while ((chunk = readable.read()) != null) {
    const int16Samples = new Int16Array(
        chunk.buffer, chunk.byteOffset,
        chunk.length / Int16Array.BYTES_PER_ELEMENT);

    const floatSamples = new Float32Array(int16Samples.length);

    for (let i = 0; i < floatSamples.length; i++) {
      floatSamples[i] = int16Samples[i] / 32768.0;
    }

    decode(floatSamples);
  }
});


================================================
FILE: nodejs-examples/test-online-speech-enhancement-dpdfnet.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)
//
// Please download a DPDFNet model and ./inp_16k.wav used in this file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// or https://huggingface.co/Ceva-IP/DPDFNet
//
// This script shows how to use the streaming speech enhancement API from
// sherpa-onnx.
const sherpa_onnx = require('sherpa-onnx');

function createOnlineSpeechDenoiser() {
  const model = './dpdfnet_baseline.onnx';
  const config = {
    model: {
      dpdfnet: {model},
      debug: 1,
    },
  };

  return sherpa_onnx.createOnlineSpeechDenoiser(config);
}

const speech_denoiser = createOnlineSpeechDenoiser();

const waveFilename = './inp_16k.wav';
const wave = sherpa_onnx.readWave(waveFilename);
const frameShift = speech_denoiser.frameShiftInSamples;
const output = [];

let start = 0;
while (start < wave.samples.length) {
  const end = Math.min(start + frameShift, wave.samples.length);
  const chunk = wave.samples.slice(start, end);
  const denoised = speech_denoiser.run(chunk, wave.sampleRate);
  output.push(...denoised.samples);
  start = end;
}

output.push(...speech_denoiser.flush().samples);

const outputFilename = './enhanced-online-dpdfnet.wav';
sherpa_onnx.writeWave(outputFilename, {
  samples: Float32Array.from(output),
  sampleRate: speech_denoiser.sampleRate,
});
console.log(`Saved to ${outputFilename}`);

speech_denoiser.free();


================================================
FILE: nodejs-examples/test-online-speech-enhancement-gtcrn.js
================================================
// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)
//
// Please download a speech enhancement model and ./inp_16k.wav used in this
// file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// This script shows how to use the streaming speech enhancement API from
// sherpa-onnx.
const sherpa_onnx = require('sherpa-onnx');

function createOnlineSpeechDenoiser() {
  const model = './gtcrn_simple.onnx';
  const config = {
    model: {
      gtcrn: {model},
      debug: 1,
    },
  };

  return sherpa_onnx.createOnlineSpeechDenoiser(config);
}

const speech_denoiser = createOnlineSpeechDenoiser();

const waveFilename = './inp_16k.wav';
const wave = sherpa_onnx.readWave(waveFilename);
const frameShift = speech_denoiser.frameShiftInSamples;
const output = [];

let start = 0;
while (start < wave.samples.length) {
  const end = Math.min(start + frameShift, wave.samples.length);
  const chunk = wave.samples.slice(start, end);
  const denoised = speech_denoiser.run(chunk, wave.sampleRate);
  output.push(...denoised.samples);
  start = end;
}

output.push(...speech_denoiser.flush().samples);

const outputFilename = './enhanced-online-gtcrn.wav';
sherpa_onnx.writeWave(outputFilename, {
  samples: Float32Array.from(output),
  sampleRate: speech_denoiser.sampleRate,
});
console.log(`Saved to ${outputFilename}`);

speech_denoiser.free();


================================================
FILE: nodejs-examples/test-online-t-one-ctc.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let toneCtc = {
    model: './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx',
  };

  let onlineModelConfig = {
    toneCtc: toneCtc,
    tokens: './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt',
    numThreads: 1,
    provider: 'cpu',
    debug: 1,
  };


  let recognizerConfig = {
    modelConfig: onlineModelConfig,
    decodingMethod: 'greedy_search',
    maxActivePaths: 4,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

const waveFilename = './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);

const leftPadding = new Float32Array(wave.sampleRate * 0.3);
const tailPadding = new Float32Array(wave.sampleRate * 0.6);

stream.acceptWaveform(wave.sampleRate, leftPadding);
stream.acceptWaveform(wave.sampleRate, wave.samples);
stream.acceptWaveform(wave.sampleRate, tailPadding);

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-online-transducer-itn.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineTransducerModelConfig = {
    encoder:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
    decoder:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
    joiner:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
  };

  let onlineModelConfig = {
    transducer: onlineTransducerModelConfig,
    tokens:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
    numThreads: 1,
    provider: 'cpu',
    debug: 1,
    modelType: 'zipformer',
  };

  let featureConfig = {
    sampleRate: 16000,
    featureDim: 80,
  };

  let recognizerConfig = {
    featConfig: featureConfig,
    modelConfig: onlineModelConfig,
    decodingMethod: 'greedy_search',
    maxActivePaths: 4,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
    // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
    ruleFsts: './itn_zh_number.fst',
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
const waveFilename = './itn-zh-number.wav';

const reader = new wav.Reader();
const readable = new Readable().wrap(reader);

function decode(samples) {
  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }
  const text = recognizer.getResult(stream).text;
  console.log(text);
}

reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  if (sampleRate != recognizer.config.featConfig.sampleRate) {
    throw new Error(`Only support sampleRate ${
        recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  }

  if (audioFormat != 1) {
    throw new Error(`Only support PCM format. Given ${audioFormat}`);
  }

  if (channels != 1) {
    throw new Error(`Only a single channel. Given ${channels}`);
  }

  if (bitDepth != 16) {
    throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  }
});

fs.createReadStream(waveFilename, {'highWaterMark': 4096})
    .pipe(reader)
    .on('finish', function(err) {
      // tail padding
      const floatSamples =
          new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
      decode(floatSamples);
      stream.free();
      recognizer.free();
    });

readable.on('readable', function() {
  let chunk;
  while ((chunk = readable.read()) != null) {
    const int16Samples = new Int16Array(
        chunk.buffer, chunk.byteOffset,
        chunk.length / Int16Array.BYTES_PER_ELEMENT);

    const floatSamples = new Float32Array(int16Samples.length);

    for (let i = 0; i < floatSamples.length; i++) {
      floatSamples[i] = int16Samples[i] / 32768.0;
    }

    decode(floatSamples);
  }
});


================================================
FILE: nodejs-examples/test-online-transducer-microphone.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineTransducerModelConfig = {
    encoder:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
    decoder:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
    joiner:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
  };

  let onlineModelConfig = {
    transducer: onlineTransducerModelConfig,
    tokens:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
    numThreads: 1,
    provider: 'cpu',
    debug: 1,
    modelType: 'zipformer',
  };

  let featureConfig = {
    sampleRate: 16000,
    featureDim: 80,
  };

  let recognizerConfig = {
    featConfig: featureConfig,
    modelConfig: onlineModelConfig,
    decodingMethod: 'greedy_search',
    maxActivePaths: 4,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
});

ai.on('data', data => {
  const samples = new Float32Array(data.buffer);

  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text;

  if (text.length > 0 && lastText != text) {
    lastText = text;
    console.log(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream);
  }
});

ai.on('close', () => {
  console.log('Free resources');
  stream.free();
  recognizer.free();
});

ai.start();
console.log('Started! Please speak');


================================================
FILE: nodejs-examples/test-online-transducer.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineTransducerModelConfig = {
    encoder:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
    decoder:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
    joiner:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
  };

  let onlineModelConfig = {
    transducer: onlineTransducerModelConfig,
    tokens:
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
  };

  let recognizerConfig = {
    modelConfig: onlineModelConfig,
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';

const reader = new wav.Reader();
const readable = new Readable().wrap(reader);

function decode(samples) {
  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }
  const text = recognizer.getResult(stream).text;
  console.log(text);
}

reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  if (sampleRate != recognizer.config.featConfig.sampleRate) {
    throw new Error(`Only support sampleRate ${
        recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  }

  if (audioFormat != 1) {
    throw new Error(`Only support PCM format. Given ${audioFormat}`);
  }

  if (channels != 1) {
    throw new Error(`Only a single channel. Given ${channels}`);
  }

  if (bitDepth != 16) {
    throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  }
});

fs.createReadStream(waveFilename, {'highWaterMark': 4096})
    .pipe(reader)
    .on('finish', function(err) {
      // tail padding
      const floatSamples =
          new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
      decode(floatSamples);
      stream.free();
      recognizer.free();
    });

readable.on('readable', function() {
  let chunk;
  while ((chunk = readable.read()) != null) {
    const int16Samples = new Int16Array(
        chunk.buffer, chunk.byteOffset,
        chunk.length / Int16Array.BYTES_PER_ELEMENT);

    const floatSamples = new Float32Array(int16Samples.length);

    for (let i = 0; i < floatSamples.length; i++) {
      floatSamples[i] = int16Samples[i] / 32768.0;
    }

    decode(floatSamples);
  }
});


================================================
FILE: nodejs-examples/test-online-zipformer2-ctc-hlg.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineZipformer2CtcModelConfig = {
    model:
        './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
  };

  let onlineModelConfig = {
    zipformer2Ctc: onlineZipformer2CtcModelConfig,
    tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
    numThreads: 1,
    provider: 'cpu',
    debug: 0,
    modelType: '',
  };

  let featureConfig = {
    sampleRate: 16000,
    featureDim: 80,
  };

  let recognizerConfig = {
    featConfig: featureConfig,
    modelConfig: onlineModelConfig,
    decodingMethod: 'greedy_search',
    maxActivePaths: 4,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
    ctcFstDecoderConfig: {
      graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
      maxActive: 3000,
    }
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';

const reader = new wav.Reader();
const readable = new Readable().wrap(reader);

function decode(samples) {
  stream.acceptWaveform(gSampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }
  const text = recognizer.getResult(stream).text;
  console.log(text);
}

let gSampleRate = 16000;

reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  gSampleRate = sampleRate;

  if (audioFormat != 1) {
    throw new Error(`Only support PCM format. Given ${audioFormat}`);
  }

  if (channels != 1) {
    throw new Error(`Only a single channel. Given ${channels}`);
  }

  if (bitDepth != 16) {
    throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  }
});

fs.createReadStream(waveFilename, {'highWaterMark': 4096})
    .pipe(reader)
    .on('finish', function(err) {
      // tail padding
      const floatSamples =
          new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
      decode(floatSamples);
      stream.free();
      recognizer.free();
    });

readable.on('readable', function() {
  let chunk;
  while ((chunk = readable.read()) != null) {
    const int16Samples = new Int16Array(
        chunk.buffer, chunk.byteOffset,
        chunk.length / Int16Array.BYTES_PER_ELEMENT);

    const floatSamples = new Float32Array(int16Samples.length);

    for (let i = 0; i < floatSamples.length; i++) {
      floatSamples[i] = int16Samples[i] / 32768.0;
    }

    decode(floatSamples);
  }
});


================================================
FILE: nodejs-examples/test-online-zipformer2-ctc.js
================================================
// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');

const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineZipformer2CtcModelConfig = {
    model:
        './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx',
  };

  let onlineModelConfig = {
    zipformer2Ctc: onlineZipformer2CtcModelConfig,
    tokens:
        './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt',
    numThreads: 1,
    provider: 'cpu',
    debug: 1,
  };

  let featureConfig = {
    sampleRate: 16000,
    featureDim: 80,
  };

  let recognizerConfig = {
    featConfig: featureConfig,
    modelConfig: onlineModelConfig,
    decodingMethod: 'greedy_search',
    maxActivePaths: 4,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();

const waveFilename =
    './sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav';

const reader = new wav.Reader();
const readable = new Readable().wrap(reader);

function decode(samples) {
  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }
  const text = recognizer.getResult(stream).text;
  console.log(text);
}

reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  if (sampleRate != recognizer.config.featConfig.sampleRate) {
    throw new Error(`Only support sampleRate ${
        recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  }

  if (audioFormat != 1) {
    throw new Error(`Only support PCM format. Given ${audioFormat}`);
  }

  if (channels != 1) {
    throw new Error(`Only a single channel. Given ${channels}`);
  }

  if (bitDepth != 16) {
    throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  }
});

fs.createReadStream(waveFilename, {'highWaterMark': 4096})
    .pipe(reader)
    .on('finish', function(err) {
      // tail padding
      const floatSamples =
          new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
      decode(floatSamples);
      stream.free();
      recognizer.free();
    });

readable.on('readable', function() {
  let chunk;
  while ((chunk = readable.read()) != null) {
    const int16Samples = new Int16Array(
        chunk.buffer, chunk.byteOffset,
        chunk.length / Int16Array.BYTES_PER_ELEMENT);

    const floatSamples = new Float32Array(int16Samples.length);

    for (let i = 0; i < floatSamples.length; i++) {
      floatSamples[i] = int16Samples[i] / 32768.0;
    }

    decode(floatSamples);
  }
});


================================================
FILE: nodejs-examples/test-vad-with-non-streaming-asr-moonshine.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'modelConfig': {
      'moonshine': {
        'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx',
        'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx',
        'uncachedDecoder':
            './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx',
        'cachedDecoder':
            './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx',
      },
      'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt',
      'debug': 0,
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  //
  // please download ten-vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
  //
  // You only need one vad
  //
  // To use ten-vad.onnx, please set sileroVad.model to ''
  // and set tenVad.model to 'ten-vad.onnx'
  //
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      maxSpeechDuration: 5,
      windowSize: 512,
    },
    tenVad: {
      // model: './ten-vad.onnx',
      model: '',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      maxSpeechDuration: 5,
      windowSize: 256,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
    bufferSizeInSeconds: 60,
  };


  return sherpa_onnx.createVad(config);
}

const recognizer = createRecognizer();
const vad = createVad();

// please download ./Obama.wav from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const waveFilename = './Obama.wav';
const wave = sherpa_onnx.readWave(waveFilename);

if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
  throw new Error(
      'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
}

console.log('Started');
let start = Date.now();

let windowSize = vad.config.sileroVad.windowSize;
if (vad.config.tenVad.model != '') {
  windowSize = vad.config.tenVad.windowSize;
}

for (let i = 0; i < wave.samples.length; i += windowSize) {
  const thisWindow = wave.samples.subarray(i, i + windowSize);
  vad.acceptWaveform(thisWindow);

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();

    let start_time = segment.start / wave.sampleRate;
    let end_time = start_time + segment.samples.length / wave.sampleRate;

    start_time = start_time.toFixed(2);
    end_time = end_time.toFixed(2);

    const stream = recognizer.createStream();
    stream.acceptWaveform(wave.sampleRate, segment.samples);

    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${start_time} -- ${end_time}: ${text}`);
    }

    stream.free();
  }
}

vad.flush();

while (!vad.isEmpty()) {
  const segment = vad.front();
  vad.pop();

  let start_time = segment.start / wave.sampleRate;
  let end_time = start_time + segment.samples.length / wave.sampleRate;

  start_time = start_time.toFixed(2);
  end_time = end_time.toFixed(2);

  const stream = recognizer.createStream();
  stream.acceptWaveform(wave.sampleRate, segment.samples);

  recognizer.decode(stream);
  const r = recognizer.getResult(stream);
  if (r.text.length > 0) {
    const text = r.text.toLowerCase().trim();
    console.log(`${start_time} -- ${end_time}: ${text}`);
  }
}

let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

vad.free();
recognizer.free();


================================================
FILE: nodejs-examples/test-vad-with-non-streaming-asr-whisper.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createRecognizer() {
  // Please download test files from
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  const config = {
    'modelConfig': {
      'whisper': {
        'encoder': './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
        'decoder': './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
        'tailPaddings': 2000,
      },
      'tokens': './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
      'debug': 0,
    }
  };

  return sherpa_onnx.createOfflineRecognizer(config);
}

function createVad() {
  // please download silero_vad.onnx from
  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  const config = {
    sileroVad: {
      model: './silero_vad.onnx',
      threshold: 0.5,
      minSpeechDuration: 0.25,
      minSilenceDuration: 0.5,
      maxSpeechDuration: 5,
      windowSize: 512,
    },
    sampleRate: 16000,
    debug: true,
    numThreads: 1,
    bufferSizeInSeconds: 60,
  };

  return sherpa_onnx.createVad(config);
}

const recognizer = createRecognizer();
const vad = createVad();

// please download ./Obama.wav from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const waveFilename = './Obama.wav';
const wave = sherpa_onnx.readWave(waveFilename);

if (wave.sampleRate != recognizer.config.featConfig.sampleRate) {
  throw new Error(
      'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}');
}

console.log('Started');
let start = Date.now();

const windowSize = vad.config.sileroVad.windowSize;
for (let i = 0; i < wave.samples.length; i += windowSize) {
  const thisWindow = wave.samples.subarray(i, i + windowSize);
  vad.acceptWaveform(thisWindow);

  while (!vad.isEmpty()) {
    const segment = vad.front();
    vad.pop();

    let start_time = segment.start / wave.sampleRate;
    let end_time = start_time + segment.samples.length / wave.sampleRate;

    start_time = start_time.toFixed(2);
    end_time = end_time.toFixed(2);

    const stream = recognizer.createStream();
    stream.acceptWaveform(wave.sampleRate, segment.samples);

    recognizer.decode(stream);
    const r = recognizer.getResult(stream);
    if (r.text.length > 0) {
      const text = r.text.toLowerCase().trim();
      console.log(`${start_time} -- ${end_time}: ${text}`);
    }

    stream.free();
  }
}

vad.flush();

while (!vad.isEmpty()) {
  const segment = vad.front();
  vad.pop();

  let start_time = segment.start / wave.sampleRate;
  let end_time = start_time + segment.samples.length / wave.sampleRate;

  start_time = start_time.toFixed(2);
  end_time = end_time.toFixed(2);

  const stream = recognizer.createStream();
  stream.acceptWaveform(wave.sampleRate, segment.samples);

  recognizer.decode(stream);
  const r = recognizer.getResult(stream);
  if (r.text.length > 0) {
    const text = r.text.toLowerCase().trim();
    console.log(`${start_time} -- ${end_time}: ${text}`);
  }
}

let stop = Date.now();
console.log('Done');

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'seconds');
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds');
console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3));

vad.free();
recognizer.free();


================================================
FILE: pascal-api-examples/.gitignore
================================================
link*.res


================================================
FILE: pascal-api-examples/README.md
================================================
# Introduction

This directory contains examples for how to use the [Object Pascal](https://en.wikipedia.org/wiki/Object_Pascal)
APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).

**Documentation for this directory**:
https://k2-fsa.github.io/sherpa/onnx/pascal-api/index.html

|Directory| Description|
|---------|------------|
|[read-wav](./read-wav)|It shows how to read a wave file.|
|[speaker-diarization](./speaker-diarization)|It shows how to use Pascal API for speaker diarization.|
|[speech-enhancement-gtcrn](./speech-enhancement-gtcrn)| It shows how to use the offline speech denoiser API with GTCRN.|
|[speech-enhancement-dpdfnet](./speech-enhancement-dpdfnet)| It shows how to use the offline speech denoiser API with DPDFNet. Use `dpdfnet_baseline.onnx`, `dpdfnet2.onnx`, `dpdfnet4.onnx`, or `dpdfnet8.onnx` for 16 kHz downstream ASR and `dpdfnet2_48khz_hr.onnx` for 48 kHz enhancement output.|
|[streaming-speech-enhancement-gtcrn](./streaming-speech-enhancement-gtcrn)| It shows how to use the streaming speech denoiser API with GTCRN.|
|[streaming-speech-enhancement-dpdfnet](./streaming-speech-enhancement-dpdfnet)| It shows how to use the streaming speech denoiser API with DPDFNet.|
|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
|[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.|
|[vad](./vad)| It shows how to use the voice activity detection API.|
|[vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| It shows how to use the voice activity detection API with non-streaming models for speech recognition.|
|[portaudio-test](./portaudio-test)| It shows how to use PortAudio for recording and playing.|
|[tts](./tts)| It shows how to use the text-to-speech API.|


================================================
FILE: pascal-api-examples/non-streaming-asr/.gitignore
================================================
!run-*.sh
zipformer_transducer
whisper
nemo_transducer
nemo_ctc
paraformer
paraformer_itn
sense_voice
telespeech_ctc
moonshine
moonshine_v2
dolphin_ctc
zipformer_ctc
wenet_ctc
nemo_canary
omnilingual_asr_ctc
medasr_ctc
funasr_nano
fire_red_asr_ctc
fire_red_asr


================================================
FILE: pascal-api-examples/non-streaming-asr/README.md
================================================
# Introduction

This folder contains examples about using sherpa-onnx's object pascal
APIs with non-streaming models for speech recognition.

|File|Description|
|----|-----------|
|[run-dolphin-ctc.sh](./run-dolphin-ctc.sh)|Use a non-streaming [Dolphin](https://github.com/DataoceanAI/Dolphin) CTC model for speech recognition|
|[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition|
|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition|
|[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers|
|[run-paraformer.sh](./run-paraformer.sh)|Use a non-streaming Paraformer model for speech recognition|
|[run-sense-voice.sh](./run-sense-voice.sh)|Use a non-streaming SenseVoice model for speech recognition|
|[run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|Use a non-streaming TeleSpeech CTC model for speech recognition|
|[run-whisper.sh](./run-whisper.sh)|Use a Whisper model for speech recognition|
|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a non-streaming Zipformer transducer model for speech recognition|


================================================
FILE: pascal-api-examples/non-streaming-asr/dolphin_ctc.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming Dolphin CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program dolphin_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Dolphin.Model := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/fire_red_asr.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming FireRedAsr AED model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program fire_red_asr;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.FireRedAsr.Encoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx';
  Config.ModelConfig.FireRedAsr.Decoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/fire_red_asr_ctc.pas
================================================
{ Copyright (c)  2026  Xiaomi Corporation }

{
This file shows how to use a non-streaming FireRedASR CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program fire_red_asr_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.FireRedAsrCtc.Model := './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := True;

  WaveFilename := './sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/funasr_nano.pas
================================================
{ Copyright (c)  2026  Xiaomi Corporation }

{
This file shows how to use a non-streaming FunASR Nano model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program funasr_nano;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.FunAsrNano.EncoderAdaptor := './sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx';
  Config.ModelConfig.FunAsrNano.LLM := './sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx';
  Config.ModelConfig.FunAsrNano.Embedding := './sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx';
  Config.ModelConfig.FunAsrNano.Tokenizer := './sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B';
  Config.ModelConfig.Tokens := '';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 2;
  Config.ModelConfig.Debug := True;

  WaveFilename := './sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/medasr_ctc.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming Google MedASR CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program medasr_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.MedAsr.Model := './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := True;

  WaveFilename := './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/moonshine.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming Moonshine model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program moonshine;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Moonshine.Preprocessor := './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx';
  Config.ModelConfig.Moonshine.Encoder := './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx';
  Config.ModelConfig.Moonshine.UncachedDecoder := './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx';
  Config.ModelConfig.Moonshine.CachedDecoder := './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx';

  Config.ModelConfig.Tokens := './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/moonshine_v2.pas
================================================
{ Copyright (c)  2024-2026  Xiaomi Corporation }

{
This file shows how to use a non-streaming Moonshine v2 model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program moonshine_v2;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Moonshine.Encoder := './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort';
  Config.ModelConfig.Moonshine.MergedDecoder := './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort';

  Config.ModelConfig.Tokens := './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/nemo_canary.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming NeMo Canary model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program nemo_canary;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Canary.Encoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx';
  Config.ModelConfig.Canary.Decoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx';
  Config.ModelConfig.Canary.SrcLang := 'en';
  Config.ModelConfig.Canary.TgtLang := 'en';
  Config.ModelConfig.Canary.UsePnc := True;
  Config.ModelConfig.Tokens := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  FreeAndNil(Stream);

  WriteLn('-----------Output German-----');

  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

  Config.ModelConfig.Canary.TgtLang := 'de';
  Recognizer.SetConfig(Config);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/nemo_ctc.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming NeMo CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program nemo_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.NeMoCtC.Model := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/es-spanish.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/nemo_transducer.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming NeMo transducer
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program nemo_transducer;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx';
  Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx';
  Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx';
  Config.ModelConfig.ModelType := 'nemo_transducer';
  Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/omnilingual_asr_ctc.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming Omnilingual ASR CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program omnilingual_asr_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Omnilingual.Model := './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/paraformer.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming Paraformer model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program paraformer;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/3-sichuan.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/paraformer_itn.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming Paraformer model
to decode files with inverse text normalization for numbers.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program paraformer_itn;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;
  Config.RuleFsts := './itn_zh_number.fst';

  WaveFilename := './itn-zh-number.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/run-dolphin-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./dolphin_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./dolphin_ctc


================================================
FILE: pascal-api-examples/non-streaming-asr/run-fire-red-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  ls -lh sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25
fi


fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./fire_red_asr_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./fire_red_asr_ctc


================================================
FILE: pascal-api-examples/non-streaming-asr/run-fire-red-asr.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
fi


fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./fire_red_asr.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./fire_red_asr


================================================
FILE: pascal-api-examples/non-streaming-asr/run-funasr-nano.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./funasr_nano.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./funasr_nano


================================================
FILE: pascal-api-examples/non-streaming-asr/run-medasr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./medasr_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./medasr_ctc


================================================
FILE: pascal-api-examples/non-streaming-asr/run-moonshine-v2.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./moonshine_v2.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./moonshine_v2


================================================
FILE: pascal-api-examples/non-streaming-asr/run-moonshine.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./moonshine.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./moonshine


================================================
FILE: pascal-api-examples/non-streaming-asr/run-nemo-canary.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./nemo_canary.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./nemo_canary


================================================
FILE: pascal-api-examples/non-streaming-asr/run-nemo-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./nemo_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./nemo_ctc


================================================
FILE: pascal-api-examples/non-streaming-asr/run-nemo-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2

  tar xvf sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
  rm sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./nemo_transducer.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./nemo_transducer


================================================
FILE: pascal-api-examples/non-streaming-asr/run-omnilingual-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./omnilingual_asr_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./omnilingual_asr_ctc


================================================
FILE: pascal-api-examples/non-streaming-asr/run-paraformer-itn.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

if [ ! -f ./itn-zh-number.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
fi

if [ ! -f ./itn_zh_number.fst ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./paraformer_itn.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./paraformer_itn


================================================
FILE: pascal-api-examples/non-streaming-asr/run-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

  tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./paraformer.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./paraformer


================================================
FILE: pascal-api-examples/non-streaming-asr/run-sense-voice.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./sense_voice.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./sense_voice


================================================
FILE: pascal-api-examples/non-streaming-asr/run-telespeech-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2

  tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./telespeech_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./telespeech_ctc


================================================
FILE: pascal-api-examples/non-streaming-asr/run-wenet-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./wenet_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./wenet_ctc


================================================
FILE: pascal-api-examples/non-streaming-asr/run-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./whisper.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./whisper


================================================
FILE: pascal-api-examples/non-streaming-asr/run-zipformer-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

  tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
  rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./zipformer_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./zipformer_ctc


================================================
FILE: pascal-api-examples/non-streaming-asr/run-zipformer-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2

  tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./zipformer_transducer.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./zipformer_transducer


================================================
FILE: pascal-api-examples/non-streaming-asr/sense_voice.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming SenseVoice model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program sense_voice;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.SenseVoice.Model := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx';
  Config.ModelConfig.SenseVoice.Language := 'auto';
  Config.ModelConfig.SenseVoice.UseItn := False;
  Config.ModelConfig.Tokens := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(Format('sherpa-onnx version: %s', [SherpaOnnxGetVersionStr()]));
  WriteLn(Format('sherpa-onnx gitSha1: %s', [SherpaOnnxGetGitSha1()]));
  WriteLn(Format('sherpa-onnx gitDate: %s', [SherpaOnnxGetGitDate()]));
  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/telespeech_ctc.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming TeleSpeech CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program telespeech_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.TeleSpeechCtc := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/wenet_ctc.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming Wenet CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program wenet_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.WenetCtc.Model := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/whisper.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming Whisper model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program whisper;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Whisper.Encoder := './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
  Config.ModelConfig.Whisper.Decoder := './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/zipformer_ctc.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming Zipformer CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program zipformer_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.ZipformerCtc.Model := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/non-streaming-asr/zipformer_transducer.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming Zipformer transducer
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program zipformer_transducer;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;

  Config: TSherpaOnnxOfflineRecognizerConfig;
  Recognizer: TSherpaOnnxOfflineRecognizer;
  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx';
  Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx';
  Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/test_wavs/1089-134686-0001.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  Stream := Recognizer.CreateStream();
  Start := Now;

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/portaudio-test/.gitignore
================================================
test-record
test-play


================================================
FILE: pascal-api-examples/portaudio-test/README.md
================================================
# Introduction

[portaudio.pas](./portaudio.pas)
requires that the portaudio library is installed on your system.


On macOS, you can use

```bash
brew install portaudio
```

and it will install `portaudio` into `/usr/local/Cellar/portaudio/19.7.0`.


================================================
FILE: pascal-api-examples/portaudio-test/test-play.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }
{
This file shows how to use portaudio for playing.

}
program main;

{$mode objfpc}{$H+}


uses
  portaudio,
  sherpa_onnx,
  dos,
  ctypes,
  SysUtils;

var
  Version: String;
  EnvStr: String;
  Status: Integer;
  NumDevices: Integer;
  DeviceIndex: Integer;
  DeviceInfo: PPaDeviceInfo;
  I: Integer;
  Param: TPaStreamParameters;
  Stream: PPaStream;
  Wave: TSherpaOnnxWave;

  Buffer: TSherpaOnnxCircularBuffer;

function PlayCallback(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;
var
  Samples: TSherpaOnnxSamplesArray;
  I: Integer;
begin
  if Buffer.Size >= frameCount then
    begin
      Samples := Buffer.Get(Buffer.Head, FrameCount);
      Buffer.Pop(FrameCount);
    end
  else
    begin
      Samples := Buffer.Get(Buffer.Head, Buffer.Size);
      Buffer.Pop(Buffer.Size);
      SetLength(Samples, frameCount);
    end;
  for I := 0 to frameCount - 1 do
    pcfloat(output)[I] := Samples[I];

  if Buffer.Size > 0 then
    Result := paContinue
  else
    Result := paComplete;
end;


begin
  Version := String(Pa_GetVersionText);
  WriteLn('Version is ', Version);
  Status := Pa_Initialize;
  if Status <> paNoError then
    begin
      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;

  NumDevices := Pa_GetDeviceCount;
  WriteLn('Num devices: ', NumDevices);

  DeviceIndex := Pa_GetDefaultOutputDevice;

  if DeviceIndex = paNoDevice then
    begin
      WriteLn('No default output device found');
      Pa_Terminate;
      Exit;
    end;

  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
  if EnvStr <> '' then
    begin
      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
    end;

  for I := 0 to (NumDevices - 1) do
    begin
      DeviceInfo := Pa_GetDeviceInfo(I);
      if I = DeviceIndex then
        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
      else
        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
    end;

  WriteLn('Use device ', DeviceIndex);
  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);

  Wave := SherpaOnnxReadWave('./record.wav');
  if Wave.Samples = nil then
    begin
      WriteLn('Failed to read ./record.wav');
      Pa_Terminate;
      Exit;
    end;

  Initialize(Param);
  Param.Device := DeviceIndex;
  Param.ChannelCount := 1;
  Param.SampleFormat := paFloat32;
  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
  param.HostApiSpecificStreamInfo := nil;

  Buffer := TSherpaOnnxCircularBuffer.Create(Length(Wave.Samples));
  Buffer.Push(Wave.Samples);

  Status := Pa_OpenStream(stream, nil, @Param, Wave.SampleRate, paFramesPerBufferUnspecified, paNoFlag,
    PPaStreamCallback(@PlayCallback), nil);

  if Status <> paNoError then
    begin
      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  Status := Pa_StartStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  while Buffer.Size > 0 do
    Pa_Sleep(100);  {sleep for 0.1 second }

  Status := Pa_CloseStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
      Exit;
    end;

  Status := Pa_Terminate;
  if Status <> paNoError then
    begin
      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;
end.


================================================
FILE: pascal-api-examples/portaudio-test/test-record.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }
{
This file shows how to use portaudio for recording.

It records for 10 seconds and saves the audio samples to ./record.wav
}
program main;

{$mode objfpc}

uses
  portaudio,
  sherpa_onnx,
  dos,
  ctypes,
  SysUtils;

var
  Version: String;
  EnvStr: String;
  Status: Integer;
  NumDevices: Integer;
  DeviceIndex: Integer;
  DeviceInfo: PPaDeviceInfo;
  I: Integer;
  Param: TPaStreamParameters;
  SampleRate: Double;
  Stream: PPaStream;

  Buffer: TSherpaOnnxCircularBuffer;
  AllSamples: TSherpaOnnxSamplesArray;

function RecordCallback(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;
begin
  Buffer.Push(pcfloat(input), frameCount);
  Result := paContinue;
end;


begin
  Version := String(Pa_GetVersionText);
  WriteLn('Version is ', Version);
  Status := Pa_Initialize;
  if Status <> paNoError then
    begin
      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;

  NumDevices := Pa_GetDeviceCount;
  WriteLn('Num devices: ', NumDevices);

  DeviceIndex := Pa_GetDefaultInputDevice;

  if DeviceIndex = paNoDevice then
    begin
      WriteLn('No default input device found');
      Pa_Terminate;
      Exit;
    end;

  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
  if EnvStr <> '' then
    begin
      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
    end;

  for I := 0 to (NumDevices - 1) do
    begin
      DeviceInfo := Pa_GetDeviceInfo(I);
      if I = DeviceIndex then
        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
      else
        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
    end;

  WriteLn('Use device ', DeviceIndex);
  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
  WriteLn(' Max input channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxInputChannels);

  Initialize(Param);
  Param.Device := DeviceIndex;
  Param.ChannelCount := 1;
  Param.SampleFormat := paFloat32;
  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighInputLatency;
  param.HostApiSpecificStreamInfo := nil;

  SampleRate := 48000;
  Buffer := TSherpaOnnxCircularBuffer.Create(Round(SampleRate) * 20);

  Status := Pa_OpenStream(stream, @Param, nil, SampleRate, paFramesPerBufferUnspecified, paNoFlag,
    PPaStreamCallback(@RecordCallback), nil);

  if Status <> paNoError then
    begin
      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  Status := Pa_StartStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  WriteLn('Please speak! It will exit after 10 seconds.');
  Pa_Sleep(10000);  {sleep for 10 seconds }

  Status := Pa_CloseStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
      Exit;
    end;

  AllSamples := Buffer.Get(0, Buffer.Size);

  SherpaOnnxWriteWave('record.wav', AllSamples, Round(SampleRate));
  WriteLn('Saved to record.wav');

  Status := Pa_Terminate;
  if Status <> paNoError then
    begin
      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;
end.


================================================
FILE: pascal-api-examples/read-wav/.gitignore
================================================
main


================================================
FILE: pascal-api-examples/read-wav/main.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }
program main;

{$mode objfpc}

uses
  sherpa_onnx;

var
  Wave: TSherpaOnnxWave;
  S: Single;
  I: Integer;
begin
  Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
  WriteLn('info ', Wave.SampleRate, ' ', Length(Wave.Samples));
  S := 0;
  for i := Low(Wave.Samples) to High(Wave.Samples) do
    S += Wave.Samples[i];

  WriteLn('sum is ', S);
end.


================================================
FILE: pascal-api-examples/speaker-diarization/main.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }
{
This file shows how to use the Pascal API from sherpa-onnx
for speaker diarization.

Usage:

Step 1: Download a speaker segmentation model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

Step 2: Download a speaker embedding extractor model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

Step 3. Download test wave files

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

Step 4. Run it
}

program main;

{$mode delphi}

uses
  sherpa_onnx,
  ctypes,
  SysUtils;

function ProgressCallback(
      NumProcessedChunks: cint32;
      NumTotalChunks: cint32): cint32; cdecl;
var
  Progress: Single;
begin
  Progress := 100.0 * NumProcessedChunks / NumTotalChunks;
  WriteLn(Format('Progress: %.3f%%', [Progress]));

  Result := 0;
end;

var
  Wave: TSherpaOnnxWave;
  Config: TSherpaOnnxOfflineSpeakerDiarizationConfig;
  Sd: TSherpaOnnxOfflineSpeakerDiarization;
  Segments: TSherpaOnnxOfflineSpeakerDiarizationSegmentArray;
  I: Integer;
begin
  Wave := SherpaOnnxReadWave('./0-four-speakers-zh.wav');

  Config.Segmentation.Pyannote.Model := './sherpa-onnx-pyannote-segmentation-3-0/model.onnx';
  Config.Embedding.Model := './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';

  {
    Since we know that there are 4 speakers in ./0-four-speakers-zh.wav, we
    set NumClusters to 4 here.
    If you don't have such information, please set NumClusters to -1.
    In that case, you have to set Config.Clustering.Threshold.
    A larger threshold leads to fewer clusters, i.e., fewer speakers.
  }
  Config.Clustering.NumClusters := 4;
  Config.Segmentation.Debug := True;
  Config.Embedding.Debug := True;

  Sd := TSherpaOnnxOfflineSpeakerDiarization.Create(Config);
  if Sd.GetHandle = nil then
    begin
      WriteLn('Please check you config');
      Exit;
    end;

  if Sd.GetSampleRate <> Wave.SampleRate then
    begin
      WriteLn(Format('Expected sample rate: %d, given: %d', [Sd.GetSampleRate, Wave.SampleRate]));
      Exit;
    end;

  {
    // If you don't want to use a callback
    Segments := Sd.Process(Wave.Samples);
  }
  Segments := Sd.Process(Wave.Samples, @ProgressCallback);

  for I := Low(Segments) to High(Segments) do
    begin
      WriteLn(Format('%.3f -- %.3f speaker_%d',
        [Segments[I].Start, Segments[I].Stop, Segments[I].Speaker]));
    end;

  FreeAndNil(Sd);
end.


================================================
FILE: pascal-api-examples/speech-enhancement-dpdfnet/.gitignore
================================================
dpdfnet


================================================
FILE: pascal-api-examples/speech-enhancement-dpdfnet/dpdfnet.pas
================================================
{ Copyright (c)  2026  Xiaomi Corporation }
{
This file shows how to use the offline speech enhancement API from sherpa-onnx
with a DPDFNet model.

Please first download files used in this script before you run it.

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav

Use dpdfnet_baseline.onnx, dpdfnet2.onnx, dpdfnet4.onnx, or dpdfnet8.onnx
for 16 kHz downstream ASR or speech recognition.
Use dpdfnet2_48khz_hr.onnx for 48 kHz enhancement output.
}
program main;

{$mode delphi}

uses
  sherpa_onnx,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  Config: TSherpaOnnxOfflineSpeechDenoiserConfig;
  Sd: TSherpaOnnxOfflineSpeechDenoiser;
  Audio: TSherpaOnnxDenoisedAudio;
begin
  Wave := SherpaOnnxReadWave('./inp_16k.wav');

  Initialize(Config);
  Config.Model.DpdfNet.Model := './dpdfnet_baseline.onnx';
  Config.Model.NumThreads:= 1;
  Config.Model.Debug:= True;
  Config.Model.Provider:= 'cpu';

  Sd := TSherpaOnnxOfflineSpeechDenoiser.Create(Config);

  Audio := Sd.Run(Wave.Samples, Wave.SampleRate);

  SherpaOnnxWriteWave('./enhanced-dpdfnet.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./enhanced-dpdfnet.wav');

  FreeAndNil(Sd);
end.


================================================
FILE: pascal-api-examples/speech-enhancement-gtcrn/.gitignore
================================================
gtcrn


================================================
FILE: pascal-api-examples/speech-enhancement-gtcrn/gtcrn.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
{
This file shows how to use the speech enhancement API from sherpa-onnx

Please first download files used in this script before you run it.

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
}
program main;

{$mode delphi}

uses
  sherpa_onnx,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  Model: AnsiString;

  Config: TSherpaOnnxOfflineSpeechDenoiserConfig;
  Sd: TSherpaOnnxOfflineSpeechDenoiser;
  Audio: TSherpaOnnxDenoisedAudio;
begin
  Wave := SherpaOnnxReadWave('./inp_16k.wav');
  Model := './gtcrn_simple.onnx';

  Initialize(Config);
  Config.Model.Gtcrn.Model := Model;
  Config.Model.NumThreads:= 1;
  Config.Model.Debug:= True;
  Config.Model.Provider:= 'cpu';

  Sd := TSherpaOnnxOfflineSpeechDenoiser.Create(Config);

  Audio := Sd.Run(Wave.Samples, Wave.SampleRate);

  SherpaOnnxWriteWave('./enhanced.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./enhanced.wav');

  FreeAndNil(Sd);
end.


================================================
FILE: pascal-api-examples/streaming-asr/.gitignore
================================================
!run-*.sh
zipformer_transducer
paraformer
zipformer_ctc
zipformer_ctc_hlg
nemo_transducer
t_one_ctc


================================================
FILE: pascal-api-examples/streaming-asr/README.md
================================================
# Introduction

This folder contains examples about using sherpa-onnx's object pascal
APIs with streaming models for speech recognition.

|File|Description|
|----|-----------|
|[run-paraformer.sh](./run-paraformer.sh)|Use a streaming Paraformer model for speech recognition|
|[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition|
|[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition|
|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition|
|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a NeMo transducer model for speech recognition|


================================================
FILE: pascal-api-examples/streaming-asr/nemo_transducer.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a streaming NeMo transducer
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program nemo_transducer;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Config: TSherpaOnnxOnlineRecognizerConfig;
  Recognizer: TSherpaOnnxOnlineRecognizer;
  Stream: TSherpaOnnxOnlineStream;
  RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;
  TailPaddings: array of Single;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  to download model files used in this file.}
  Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/encoder.onnx';
  Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/decoder.onnx';
  Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/joiner.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);

  Start := Now;

  Stream := Recognizer.CreateStream();

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

  SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);

  Stream.InputFinished();

  while Recognizer.IsReady(Stream) do
    Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/streaming-asr/paraformer.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a streaming Paraformer model to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program paraformer;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Config: TSherpaOnnxOnlineRecognizerConfig;
  Recognizer: TSherpaOnnxOnlineRecognizer;
  Stream: TSherpaOnnxOnlineStream;
  RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;
  TailPaddings: array of Single;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  to download model files used in this file.}
  Config.ModelConfig.Paraformer.Encoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';
  Config.ModelConfig.Paraformer.Decoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';

  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);

  Start := Now;

  Stream := Recognizer.CreateStream();

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

  SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);

  Stream.InputFinished();

  while Recognizer.IsReady(Stream) do
    Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/streaming-asr/run-nemo-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
  tar xvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
  rm sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./nemo_transducer.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./nemo_transducer


================================================
FILE: pascal-api-examples/streaming-asr/run-paraformer.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi


if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./paraformer.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./paraformer


================================================
FILE: pascal-api-examples/streaming-asr/run-t-one-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./t_one_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./t_one_ctc


================================================
FILE: pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./zipformer_ctc_hlg.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./zipformer_ctc_hlg


================================================
FILE: pascal-api-examples/streaming-asr/run-zipformer-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./zipformer_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./zipformer_ctc


================================================
FILE: pascal-api-examples/streaming-asr/run-zipformer-transducer.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  ls -lh lib
  popd
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi


fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./zipformer_transducer.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./zipformer_transducer


================================================
FILE: pascal-api-examples/streaming-asr/t_one_ctc.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a streaming T-one CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program t_one_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Config: TSherpaOnnxOnlineRecognizerConfig;
  Recognizer: TSherpaOnnxOnlineRecognizer;
  Stream: TSherpaOnnxOnlineStream;
  RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;
  LeftPaddings: array of Single;
  TailPaddings: array of Single;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  to download model files used in this file.}
  Config.ModelConfig.ToneCtc.Model := './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);

  Start := Now;

  Stream := Recognizer.CreateStream();

  SetLength(LeftPaddings, Round(Wave.SampleRate * 0.3)); {0.3 seconds of padding}
  Stream.AcceptWaveform(LeftPaddings, Wave.SampleRate);

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

  SetLength(TailPaddings, Round(Wave.SampleRate * 0.6)); {0.6 seconds of padding}
  Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);

  Stream.InputFinished();

  while Recognizer.IsReady(Stream) do
    Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/streaming-asr/zipformer_ctc.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a streaming Zipformer CTC model
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program zipformer_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Config: TSherpaOnnxOnlineRecognizerConfig;
  Recognizer: TSherpaOnnxOnlineRecognizer;
  Stream: TSherpaOnnxOnlineStream;
  RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;
  TailPaddings: array of Single;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  to download model files used in this file.}
  Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);

  Start := Now;

  Stream := Recognizer.CreateStream();

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

  SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);

  Stream.InputFinished();

  while Recognizer.IsReady(Stream) do
    Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/streaming-asr/zipformer_ctc_hlg.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a streaming Zipformer CTC model
with HLG to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program zipformer_ctc_hlg;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Config: TSherpaOnnxOnlineRecognizerConfig;
  Recognizer: TSherpaOnnxOnlineRecognizer;
  Stream: TSherpaOnnxOnlineStream;
  RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;
  TailPaddings: array of Single;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  to download model files used in this file.}
  Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := True;
  Config.CtcFstDecoderConfig.Graph := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst';

  WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);

  Start := Now;

  Stream := Recognizer.CreateStream();

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

  SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);

  Stream.InputFinished();

  while Recognizer.IsReady(Stream) do
    Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/streaming-asr/zipformer_transducer.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a streaming Zipformer transducer
to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program zipformer_transducer;

{$mode objfpc}

uses
  sherpa_onnx,
  DateUtils,
  SysUtils;

var
  Config: TSherpaOnnxOnlineRecognizerConfig;
  Recognizer: TSherpaOnnxOnlineRecognizer;
  Stream: TSherpaOnnxOnlineStream;
  RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;
  TailPaddings: array of Single;

  Start: TDateTime;
  Stop: TDateTime;

  Elapsed: Single;
  Duration: Single;
  RealTimeFactor: Single;
begin
  Initialize(Config);

  {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  to download model files used in this file.}
  Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
  Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
  Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  WaveFilename := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';

  Wave := SherpaOnnxReadWave(WaveFilename);

  Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);

  Start := Now;

  Stream := Recognizer.CreateStream();

  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);

  SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
  Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);

  Stream.InputFinished();

  while Recognizer.IsReady(Stream) do
    Recognizer.Decode(Stream);

  RecognitionResult := Recognizer.GetResult(Stream);

  Stop := Now;

  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  Duration := Length(Wave.Samples) / Wave.SampleRate;
  RealTimeFactor := Elapsed / Duration;

  WriteLn(RecognitionResult.ToString);
  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  WriteLn(Format('Wave duration %.3f s', [Duration]));
  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));

  {Free resources to avoid memory leak.

  Note: You don't need to invoke them for this simple script.
  However, you have to invoke them in your own large/complex project.
  }
  FreeAndNil(Stream);
  FreeAndNil(Recognizer);
end.


================================================
FILE: pascal-api-examples/streaming-speech-enhancement-dpdfnet/.gitignore
================================================
dpdfnet


================================================
FILE: pascal-api-examples/streaming-speech-enhancement-dpdfnet/dpdfnet.pas
================================================
{ Copyright (c)  2026  Xiaomi Corporation }
{
This file shows how to use the streaming speech enhancement API from sherpa-onnx
with a DPDFNet model.

Please first download files used in this script before you run it.

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
}
program main;

{$mode delphi}

uses
  sherpa_onnx,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  Config: TSherpaOnnxOnlineSpeechDenoiserConfig;
  Sd: TSherpaOnnxOnlineSpeechDenoiser;
  Audio: TSherpaOnnxDenoisedAudio;
  Chunk: array of Single;
  Enhanced: array of Single;
  StartIndex: Integer;
  N: Integer;
  NewLength: Integer;
begin
  Wave := SherpaOnnxReadWave('./inp_16k.wav');

  Initialize(Config);
  Config.Model.DpdfNet.Model := './dpdfnet_baseline.onnx';
  Config.Model.NumThreads:= 1;
  Config.Model.Debug:= True;
  Config.Model.Provider:= 'cpu';

  Sd := TSherpaOnnxOnlineSpeechDenoiser.Create(Config);

  SetLength(Enhanced, 0);
  StartIndex := 0;
  while StartIndex < Length(Wave.Samples) do
    begin
      N := Sd.GetFrameShiftInSamples;
      if StartIndex + N > Length(Wave.Samples) then
        N := Length(Wave.Samples) - StartIndex;

      Chunk := Copy(Wave.Samples, StartIndex, N);
      Audio := Sd.Run(Chunk, Wave.SampleRate);
      NewLength := Length(Enhanced) + Length(Audio.Samples);
      SetLength(Enhanced, NewLength);
      if Length(Audio.Samples) > 0 then
        Move(Audio.Samples[0], Enhanced[NewLength - Length(Audio.Samples)],
          Length(Audio.Samples) * SizeOf(Single));
      Inc(StartIndex, N);
    end;

  Audio := Sd.Flush;
  NewLength := Length(Enhanced) + Length(Audio.Samples);
  SetLength(Enhanced, NewLength);
  if Length(Audio.Samples) > 0 then
    Move(Audio.Samples[0], Enhanced[NewLength - Length(Audio.Samples)],
      Length(Audio.Samples) * SizeOf(Single));

  SherpaOnnxWriteWave('./enhanced-online-dpdfnet.wav', Enhanced, Sd.GetSampleRate);
  WriteLn('Saved to ./enhanced-online-dpdfnet.wav');

  FreeAndNil(Sd);
end.


================================================
FILE: pascal-api-examples/streaming-speech-enhancement-gtcrn/.gitignore
================================================
gtcrn


================================================
FILE: pascal-api-examples/streaming-speech-enhancement-gtcrn/gtcrn.pas
================================================
{ Copyright (c)  2026  Xiaomi Corporation }
{
This file shows how to use the streaming speech enhancement API from sherpa-onnx
with a GTCRN model.

Please first download files used in this script before you run it.

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
}
program main;

{$mode delphi}

uses
  sherpa_onnx,
  SysUtils;

var
  Wave: TSherpaOnnxWave;
  Config: TSherpaOnnxOnlineSpeechDenoiserConfig;
  Sd: TSherpaOnnxOnlineSpeechDenoiser;
  Audio: TSherpaOnnxDenoisedAudio;
  Chunk: array of Single;
  Enhanced: array of Single;
  StartIndex: Integer;
  N: Integer;
  NewLength: Integer;
begin
  Wave := SherpaOnnxReadWave('./inp_16k.wav');

  Initialize(Config);
  Config.Model.Gtcrn.Model := './gtcrn_simple.onnx';
  Config.Model.NumThreads:= 1;
  Config.Model.Debug:= True;
  Config.Model.Provider:= 'cpu';

  Sd := TSherpaOnnxOnlineSpeechDenoiser.Create(Config);

  SetLength(Enhanced, 0);
  StartIndex := 0;
  while StartIndex < Length(Wave.Samples) do
    begin
      N := Sd.GetFrameShiftInSamples;
      if StartIndex + N > Length(Wave.Samples) then
        N := Length(Wave.Samples) - StartIndex;

      Chunk := Copy(Wave.Samples, StartIndex, N);
      Audio := Sd.Run(Chunk, Wave.SampleRate);
      NewLength := Length(Enhanced) + Length(Audio.Samples);
      SetLength(Enhanced, NewLength);
      if Length(Audio.Samples) > 0 then
        Move(Audio.Samples[0], Enhanced[NewLength - Length(Audio.Samples)],
          Length(Audio.Samples) * SizeOf(Single));
      Inc(StartIndex, N);
    end;

  Audio := Sd.Flush;
  NewLength := Length(Enhanced) + Length(Audio.Samples);
  SetLength(Enhanced, NewLength);
  if Length(Audio.Samples) > 0 then
    Move(Audio.Samples[0], Enhanced[NewLength - Length(Audio.Samples)],
      Length(Audio.Samples) * SizeOf(Single));

  SherpaOnnxWriteWave('./enhanced-online-gtcrn.wav', Enhanced, Sd.GetSampleRate);
  WriteLn('Saved to ./enhanced-online-gtcrn.wav');

  FreeAndNil(Sd);
end.


================================================
FILE: pascal-api-examples/tts/.gitignore
================================================
!run-*.sh
piper
piper-playback
link*.res
matcha-zh
matcha-en
matcha-zh-playback
matcha-en-playback
kokoro-en
kitten-en
kokoro-en-playback
kitten-en-playback
kokoro-zh-en
kokoro-zh-en-playback
pocket-en
supertonic-en
zipvoice-zh-en


================================================
FILE: pascal-api-examples/tts/README.md
================================================
# Introduction

This directory contains examples for how to use the TTS (text to speech) APIs.

|Directory| Description|
|---------|------------|
|[run-piper.sh](./run-piper.sh)|It shows how to use models from [piper](https://github.com/rhasspy/piper) for text to speech.|
|[run-piper-playback.sh](./run-piper-playback.sh)|It shows how to use models from [piper](https://github.com/rhasspy/piper) for text to speech. It plays the generated audio as it is still generating. |
|[run-zipvoice-zh-en.sh](./run-zipvoice-zh-en.sh)|It shows how to use ZipVoice Chinese/English zero-shot TTS for text to speech.|


================================================
FILE: pascal-api-examples/tts/kitten-en-playback.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program kitten_en_playback;
{
This file shows how to use the text to speech API of sherpa-onnx
with kitten models.

It generates speech from text and saves it to a wave file.

Note that it plays the audio back as it is still generating.
}

{$mode objfpc}

uses
  {$ifdef unix}
  cthreads,
  {$endif}
  SysUtils,
  dos,
  ctypes,
  portaudio,
  sherpa_onnx;

var
  CriticalSection: TRTLCriticalSection;

  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  Resampler: TSherpaOnnxLinearResampler;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 0;
  Buffer: TSherpaOnnxCircularBuffer;
  FinishedGeneration: Boolean = False;
  FinishedPlaying: Boolean = False;

  Version: String;
  EnvStr: String;
  Status: Integer;
  NumDevices: Integer;
  DeviceIndex: Integer;
  DeviceInfo: PPaDeviceInfo;

  { If you get EDivByZero: Division by zero error, please change the sample rate
    to the one supported by your microphone.
  }
  DeviceSampleRate: Integer = 48000;
  I: Integer;
  Param: TPaStreamParameters;
  Stream: PPaStream;
  Wave: TSherpaOnnxWave;
  GenerationConfig: TSherpaOnnxGenerationConfig;

function GenerateCallback(
      Samples: pcfloat; N: cint32;
      Progress: cfloat; Arg: Pointer): cint; cdecl;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Resampler <> nil then
      Buffer.Push(Resampler.Resample(Samples, N, False))
    else
      Buffer.Push(Samples, N);
  finally
    LeaveCriticalSection(CriticalSection);
  end;

  { 1 means to continue generating; 0 means to stop generating. }
  Result := 1;
end;

function PlayCallback(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;
var
  Samples: TSherpaOnnxSamplesArray;
  I: Integer;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Buffer.Size >= frameCount then
      begin
        Samples := Buffer.Get(Buffer.Head, FrameCount);
        Buffer.Pop(FrameCount);
      end
    else if Buffer.Size > 0 then
      begin
        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
        Buffer.Pop(Buffer.Size);
        SetLength(Samples, frameCount);
      end
    else
      SetLength(Samples, frameCount);

    for I := 0 to frameCount - 1 do
      pcfloat(output)[I] := Samples[I];

    if (Buffer.Size > 0) or (not FinishedGeneration) then
      Result := paContinue
    else
      begin
        Result := paComplete;
        FinishedPlaying := True;
      end;
  finally
    LeaveCriticalSection(CriticalSection);
  end;
end;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Kitten.Model := './kitten-nano-en-v0_1-fp16/model.fp16.onnx';
  Config.Model.Kitten.Voices := './kitten-nano-en-v0_1-fp16/voices.bin';
  Config.Model.Kitten.Tokens := './kitten-nano-en-v0_1-fp16/tokens.txt';
  Config.Model.Kitten.DataDir := './kitten-nano-en-v0_1-fp16/espeak-ng-data';
  Config.Model.NumThreads := 2;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

begin
  Tts := GetOfflineTts;
  if Tts.GetSampleRate <> DeviceSampleRate then
    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);

  Version := String(Pa_GetVersionText);
  WriteLn('Version is ', Version);
  Status := Pa_Initialize;
  if Status <> paNoError then
    begin
      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;

  NumDevices := Pa_GetDeviceCount;
  WriteLn('Num devices: ', NumDevices);

  DeviceIndex := Pa_GetDefaultOutputDevice;

  if DeviceIndex = paNoDevice then
    begin
      WriteLn('No default output device found');
      Pa_Terminate;
      Exit;
    end;

  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
  if EnvStr <> '' then
    begin
      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
    end;

  for I := 0 to (NumDevices - 1) do
    begin
      DeviceInfo := Pa_GetDeviceInfo(I);
      if I = DeviceIndex then
        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
      else
        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
    end;

  WriteLn('Use device ', DeviceIndex);
  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);

  Initialize(Param);
  Param.Device := DeviceIndex;
  Param.ChannelCount := 1;
  Param.SampleFormat := paFloat32;
  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
  param.HostApiSpecificStreamInfo := nil;

  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);


  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
    PPaStreamCallback(@PlayCallback), nil);

  if Status <> paNoError then
    begin
      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  InitCriticalSection(CriticalSection);

  Status := Pa_StartStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig,
    @GenerateCallback, nil);
  FinishedGeneration := True;
  SherpaOnnxWriteWave('./kitten-en-playback-0.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./kitten-en-playback-0.wav');

  while not FinishedPlaying do
    Pa_Sleep(100);  {sleep for 0.1 second }
    {TODO(fangjun): Use an event to indicate the play is finished}

  DoneCriticalSection(CriticalSection);

  FreeAndNil(Tts);
  FreeAndNil(Resampler);

  Status := Pa_CloseStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
      Exit;
    end;

  Status := Pa_Terminate;
  if Status <> paNoError then
    begin
      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;
end.


================================================
FILE: pascal-api-examples/tts/kitten-en.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program kitten_en;
{
This file shows how to use the text to speech API of sherpa-onnx
with Kitten TTS models.

It generates speech from text and saves it to a wave file.

If you want to play it while it is generating, please see
./kitten-en-playback.pas
}

{$mode objfpc}

uses
  SysUtils,
  sherpa_onnx;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Kitten.Model := './kitten-nano-en-v0_1-fp16/model.fp16.onnx';
  Config.Model.Kitten.Voices := './kitten-nano-en-v0_1-fp16/voices.bin';
  Config.Model.Kitten.Tokens := './kitten-nano-en-v0_1-fp16/tokens.txt';
  Config.Model.Kitten.DataDir := './kitten-nano-en-v0_1-fp16/espeak-ng-data';
  Config.Model.NumThreads := 2;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  GenerationConfig: TSherpaOnnxGenerationConfig;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 0;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig, NIL, NIL);
  SherpaOnnxWriteWave('./kitten-en-0.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./kitten-en-0.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/tts/kokoro-en-playback.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program kokoro_en_playback;
{
This file shows how to use the text to speech API of sherpa-onnx
with Kokoro models.

It generates speech from text and saves it to a wave file.

Note that it plays the audio back as it is still generating.
}

{$mode objfpc}

uses
  {$ifdef unix}
  cthreads,
  {$endif}
  SysUtils,
  dos,
  ctypes,
  portaudio,
  sherpa_onnx;

var
  CriticalSection: TRTLCriticalSection;

  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  Resampler: TSherpaOnnxLinearResampler;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 7;
  Buffer: TSherpaOnnxCircularBuffer;
  FinishedGeneration: Boolean = False;
  FinishedPlaying: Boolean = False;

  Version: String;
  EnvStr: String;
  Status: Integer;
  NumDevices: Integer;
  DeviceIndex: Integer;
  DeviceInfo: PPaDeviceInfo;

  { If you get EDivByZero: Division by zero error, please change the sample rate
    to the one supported by your microphone.
  }
  DeviceSampleRate: Integer = 48000;
  I: Integer;
  Param: TPaStreamParameters;
  Stream: PPaStream;
  Wave: TSherpaOnnxWave;
  GenerationConfig: TSherpaOnnxGenerationConfig;

function GenerateCallback(
      Samples: pcfloat; N: cint32;
      Progress: cfloat; Arg: Pointer): cint; cdecl;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Resampler <> nil then
      Buffer.Push(Resampler.Resample(Samples, N, False))
    else
      Buffer.Push(Samples, N);
  finally
    LeaveCriticalSection(CriticalSection);
  end;

  { 1 means to continue generating; 0 means to stop generating. }
  Result := 1;
end;

function PlayCallback(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;
var
  Samples: TSherpaOnnxSamplesArray;
  I: Integer;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Buffer.Size >= frameCount then
      begin
        Samples := Buffer.Get(Buffer.Head, FrameCount);
        Buffer.Pop(FrameCount);
      end
    else if Buffer.Size > 0 then
      begin
        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
        Buffer.Pop(Buffer.Size);
        SetLength(Samples, frameCount);
      end
    else
      SetLength(Samples, frameCount);

    for I := 0 to frameCount - 1 do
      pcfloat(output)[I] := Samples[I];

    if (Buffer.Size > 0) or (not FinishedGeneration) then
      Result := paContinue
    else
      begin
        Result := paComplete;
        FinishedPlaying := True;
      end;
  finally
    LeaveCriticalSection(CriticalSection);
  end;
end;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Kokoro.Model := './kokoro-en-v0_19/model.onnx';
  Config.Model.Kokoro.Voices := './kokoro-en-v0_19/voices.bin';
  Config.Model.Kokoro.Tokens := './kokoro-en-v0_19/tokens.txt';
  Config.Model.Kokoro.DataDir := './kokoro-en-v0_19/espeak-ng-data';
  Config.Model.NumThreads := 2;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

begin
  Tts := GetOfflineTts;
  if Tts.GetSampleRate <> DeviceSampleRate then
    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);

  Version := String(Pa_GetVersionText);
  WriteLn('Version is ', Version);
  Status := Pa_Initialize;
  if Status <> paNoError then
    begin
      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;

  NumDevices := Pa_GetDeviceCount;
  WriteLn('Num devices: ', NumDevices);

  DeviceIndex := Pa_GetDefaultOutputDevice;

  if DeviceIndex = paNoDevice then
    begin
      WriteLn('No default output device found');
      Pa_Terminate;
      Exit;
    end;

  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
  if EnvStr <> '' then
    begin
      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
    end;

  for I := 0 to (NumDevices - 1) do
    begin
      DeviceInfo := Pa_GetDeviceInfo(I);
      if I = DeviceIndex then
        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
      else
        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
    end;

  WriteLn('Use device ', DeviceIndex);
  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);

  Initialize(Param);
  Param.Device := DeviceIndex;
  Param.ChannelCount := 1;
  Param.SampleFormat := paFloat32;
  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
  param.HostApiSpecificStreamInfo := nil;

  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);


  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
    PPaStreamCallback(@PlayCallback), nil);

  if Status <> paNoError then
    begin
      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  InitCriticalSection(CriticalSection);

  Status := Pa_StartStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig,
    @GenerateCallback, nil);
  FinishedGeneration := True;
  SherpaOnnxWriteWave('./kokoro-en-playback-7.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./kokoro-en-playback-7.wav');

  while not FinishedPlaying do
    Pa_Sleep(100);  {sleep for 0.1 second }
    {TODO(fangjun): Use an event to indicate the play is finished}

  DoneCriticalSection(CriticalSection);

  FreeAndNil(Tts);
  FreeAndNil(Resampler);

  Status := Pa_CloseStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
      Exit;
    end;

  Status := Pa_Terminate;
  if Status <> paNoError then
    begin
      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;
end.


================================================
FILE: pascal-api-examples/tts/kokoro-en.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program kokoro_en;
{
This file shows how to use the text to speech API of sherpa-onnx
with Kokoro TTS models.

It generates speech from text and saves it to a wave file.

If you want to play it while it is generating, please see
./kokoro-en-playback.pas
}

{$mode objfpc}

uses
  SysUtils,
  sherpa_onnx;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Kokoro.Model := './kokoro-en-v0_19/model.onnx';
  Config.Model.Kokoro.Voices := './kokoro-en-v0_19/voices.bin';
  Config.Model.Kokoro.Tokens := './kokoro-en-v0_19/tokens.txt';
  Config.Model.Kokoro.DataDir := './kokoro-en-v0_19/espeak-ng-data';
  Config.Model.NumThreads := 2;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  GenerationConfig: TSherpaOnnxGenerationConfig;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 8;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig, NIL, NIL);
  SherpaOnnxWriteWave('./kokoro-en-8.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./kokoro-en-8.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/tts/kokoro-zh-en-playback.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program kokoro_en_playback;
{
This file shows how to use the text to speech API of sherpa-onnx
with Kokoro models (Chinese + English).

It generates speech from text and saves it to a wave file.

Note that it plays the audio back as it is still generating.
}

{$mode objfpc}

uses
  {$ifdef unix}
  cthreads,
  {$endif}
  SysUtils,
  dos,
  ctypes,
  portaudio,
  sherpa_onnx;

var
  CriticalSection: TRTLCriticalSection;

  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  Resampler: TSherpaOnnxLinearResampler;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 47;
  Buffer: TSherpaOnnxCircularBuffer;
  FinishedGeneration: Boolean = False;
  FinishedPlaying: Boolean = False;

  Version: String;
  EnvStr: String;
  Status: Integer;
  NumDevices: Integer;
  DeviceIndex: Integer;
  DeviceInfo: PPaDeviceInfo;

  { If you get EDivByZero: Division by zero error, please change the sample rate
    to the one supported by your microphone.
  }
  DeviceSampleRate: Integer = 48000;
  I: Integer;
  Param: TPaStreamParameters;
  Stream: PPaStream;
  Wave: TSherpaOnnxWave;
  GenerationConfig: TSherpaOnnxGenerationConfig;

function GenerateCallback(
      Samples: pcfloat; N: cint32;
      Progress: cfloat; Arg: Pointer): cint; cdecl;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Resampler <> nil then
      Buffer.Push(Resampler.Resample(Samples, N, False))
    else
      Buffer.Push(Samples, N);
  finally
    LeaveCriticalSection(CriticalSection);
  end;

  { 1 means to continue generating; 0 means to stop generating. }
  Result := 1;
end;

function PlayCallback(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;
var
  Samples: TSherpaOnnxSamplesArray;
  I: Integer;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Buffer.Size >= frameCount then
      begin
        Samples := Buffer.Get(Buffer.Head, FrameCount);
        Buffer.Pop(FrameCount);
      end
    else if Buffer.Size > 0 then
      begin
        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
        Buffer.Pop(Buffer.Size);
        SetLength(Samples, frameCount);
      end
    else
      SetLength(Samples, frameCount);

    for I := 0 to frameCount - 1 do
      pcfloat(output)[I] := Samples[I];

    if (Buffer.Size > 0) or (not FinishedGeneration) then
      Result := paContinue
    else
      begin
        Result := paComplete;
        FinishedPlaying := True;
      end;
  finally
    LeaveCriticalSection(CriticalSection);
  end;
end;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Kokoro.Model := './kokoro-multi-lang-v1_0/model.onnx';
  Config.Model.Kokoro.Voices := './kokoro-multi-lang-v1_0/voices.bin';
  Config.Model.Kokoro.Tokens := './kokoro-multi-lang-v1_0/tokens.txt';
  Config.Model.Kokoro.DataDir := './kokoro-multi-lang-v1_0/espeak-ng-data';
  Config.Model.Kokoro.DictDir := './kokoro-multi-lang-v1_0/dict';
  Config.Model.Kokoro.Lexicon := './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt';
  Config.Model.NumThreads := 2;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

begin
  Tts := GetOfflineTts;
  if Tts.GetSampleRate <> DeviceSampleRate then
    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);

  Version := String(Pa_GetVersionText);
  WriteLn('Version is ', Version);
  Status := Pa_Initialize;
  if Status <> paNoError then
    begin
      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;

  NumDevices := Pa_GetDeviceCount;
  WriteLn('Num devices: ', NumDevices);

  DeviceIndex := Pa_GetDefaultOutputDevice;

  if DeviceIndex = paNoDevice then
    begin
      WriteLn('No default output device found');
      Pa_Terminate;
      Exit;
    end;

  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
  if EnvStr <> '' then
    begin
      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
    end;

  for I := 0 to (NumDevices - 1) do
    begin
      DeviceInfo := Pa_GetDeviceInfo(I);
      if I = DeviceIndex then
        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
      else
        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
    end;

  WriteLn('Use device ', DeviceIndex);
  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);

  Initialize(Param);
  Param.Device := DeviceIndex;
  Param.ChannelCount := 1;
  Param.SampleFormat := paFloat32;
  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
  param.HostApiSpecificStreamInfo := nil;

  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);


  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
    PPaStreamCallback(@PlayCallback), nil);

  if Status <> paNoError then
    begin
      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  InitCriticalSection(CriticalSection);

  Status := Pa_StartStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig,
    @GenerateCallback, nil);
  FinishedGeneration := True;
  SherpaOnnxWriteWave('./kokoro-zh-en-playback-47.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./kokoro-zh-en-playback-47.wav');

  while not FinishedPlaying do
    Pa_Sleep(100);  {sleep for 0.1 second }
    {TODO(fangjun): Use an event to indicate the play is finished}

  DoneCriticalSection(CriticalSection);

  FreeAndNil(Tts);
  FreeAndNil(Resampler);

  Status := Pa_CloseStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
      Exit;
    end;

  Status := Pa_Terminate;
  if Status <> paNoError then
    begin
      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;
end.


================================================
FILE: pascal-api-examples/tts/kokoro-zh-en.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program kokoro_en;
{
This file shows how to use the text to speech API of sherpa-onnx
with Kokoro TTS models (Chinese + English).

It generates speech from text and saves it to a wave file.

If you want to play it while it is generating, please see
./kokoro-en-playback.pas
}

{$mode objfpc}

uses
  SysUtils,
  sherpa_onnx;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Kokoro.Model := './kokoro-multi-lang-v1_0/model.onnx';
  Config.Model.Kokoro.Voices := './kokoro-multi-lang-v1_0/voices.bin';
  Config.Model.Kokoro.Tokens := './kokoro-multi-lang-v1_0/tokens.txt';
  Config.Model.Kokoro.DataDir := './kokoro-multi-lang-v1_0/espeak-ng-data';
  Config.Model.Kokoro.DictDir := './kokoro-multi-lang-v1_0/dict';
  Config.Model.Kokoro.Lexicon := './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt';
  Config.Model.NumThreads := 2;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  GenerationConfig: TSherpaOnnxGenerationConfig;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 46;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig, NIL, NIL);
  SherpaOnnxWriteWave('./kokoro-zh-en-46.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./kokoro-zh-en-46.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/tts/matcha-en-playback.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program matcha_en_playback;
{
This file shows how to use the text to speech API of sherpa-onnx
with MatchaTTS models.

It generates speech from text and saves it to a wave file.

Note that it plays the audio back as it is still generating.
}

{$mode objfpc}

uses
  {$ifdef unix}
  cthreads,
  {$endif}
  SysUtils,
  dos,
  ctypes,
  portaudio,
  sherpa_onnx;

var
  CriticalSection: TRTLCriticalSection;

  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  Resampler: TSherpaOnnxLinearResampler;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 0;
  Buffer: TSherpaOnnxCircularBuffer;
  FinishedGeneration: Boolean = False;
  FinishedPlaying: Boolean = False;

  Version: String;
  EnvStr: String;
  Status: Integer;
  NumDevices: Integer;
  DeviceIndex: Integer;
  DeviceInfo: PPaDeviceInfo;

  { If you get EDivByZero: Division by zero error, please change the sample rate
    to the one supported by your microphone.
  }
  DeviceSampleRate: Integer = 48000;
  I: Integer;
  Param: TPaStreamParameters;
  Stream: PPaStream;
  Wave: TSherpaOnnxWave;
  GenerationConfig: TSherpaOnnxGenerationConfig;

function GenerateCallback(
      Samples: pcfloat; N: cint32;
      Progress: cfloat; Arg: Pointer): cint; cdecl;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Resampler <> nil then
      Buffer.Push(Resampler.Resample(Samples, N, False))
    else
      Buffer.Push(Samples, N);
  finally
    LeaveCriticalSection(CriticalSection);
  end;

  { 1 means to continue generating; 0 means to stop generating. }
  Result := 1;
end;

function PlayCallback(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;
var
  Samples: TSherpaOnnxSamplesArray;
  I: Integer;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Buffer.Size >= frameCount then
      begin
        Samples := Buffer.Get(Buffer.Head, FrameCount);
        Buffer.Pop(FrameCount);
      end
    else if Buffer.Size > 0 then
      begin
        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
        Buffer.Pop(Buffer.Size);
        SetLength(Samples, frameCount);
      end
    else
      SetLength(Samples, frameCount);

    for I := 0 to frameCount - 1 do
      pcfloat(output)[I] := Samples[I];

    if (Buffer.Size > 0) or (not FinishedGeneration) then
      Result := paContinue
    else
      begin
        Result := paComplete;
        FinishedPlaying := True;
      end;
  finally
    LeaveCriticalSection(CriticalSection);
  end;
end;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx';
  Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx';
  Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt';
  Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data';
  Config.Model.NumThreads := 1;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

begin
  Tts := GetOfflineTts;
  if Tts.GetSampleRate <> DeviceSampleRate then
    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);

  Version := String(Pa_GetVersionText);
  WriteLn('Version is ', Version);
  Status := Pa_Initialize;
  if Status <> paNoError then
    begin
      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;

  NumDevices := Pa_GetDeviceCount;
  WriteLn('Num devices: ', NumDevices);

  DeviceIndex := Pa_GetDefaultOutputDevice;

  if DeviceIndex = paNoDevice then
    begin
      WriteLn('No default output device found');
      Pa_Terminate;
      Exit;
    end;

  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
  if EnvStr <> '' then
    begin
      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
    end;

  for I := 0 to (NumDevices - 1) do
    begin
      DeviceInfo := Pa_GetDeviceInfo(I);
      if I = DeviceIndex then
        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
      else
        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
    end;

  WriteLn('Use device ', DeviceIndex);
  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);

  Initialize(Param);
  Param.Device := DeviceIndex;
  Param.ChannelCount := 1;
  Param.SampleFormat := paFloat32;
  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
  param.HostApiSpecificStreamInfo := nil;

  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);


  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
    PPaStreamCallback(@PlayCallback), nil);

  if Status <> paNoError then
    begin
      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  InitCriticalSection(CriticalSection);

  Status := Pa_StartStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig,
    @GenerateCallback, nil);
  FinishedGeneration := True;
  SherpaOnnxWriteWave('./matcha-en-playback.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./matcha-en-playback.wav');

  while not FinishedPlaying do
    Pa_Sleep(100);  {sleep for 0.1 second }
    {TODO(fangjun): Use an event to indicate the play is finished}

  DoneCriticalSection(CriticalSection);

  FreeAndNil(Tts);
  FreeAndNil(Resampler);

  Status := Pa_CloseStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
      Exit;
    end;

  Status := Pa_Terminate;
  if Status <> paNoError then
    begin
      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;
end.


================================================
FILE: pascal-api-examples/tts/matcha-en.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program matcha_en;
{
This file shows how to use the text to speech API of sherpa-onnx
with MatchaTTS models.

It generates speech from text and saves it to a wave file.

If you want to play it while it is generating, please see
./matcha-en-playback.pas
}

{$mode objfpc}

uses
  SysUtils,
  sherpa_onnx;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx';
  Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx';
  Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt';
  Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data';
  Config.Model.NumThreads := 1;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  GenerationConfig: TSherpaOnnxGenerationConfig;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 0;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig, NIL, NIL);
  SherpaOnnxWriteWave('./matcha-en.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./matcha-en.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/tts/matcha-zh-playback.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program matcha_zh_playback;
{
This file shows how to use the text to speech API of sherpa-onnx
with MatchaTTS models.

It generates speech from text and saves it to a wave file.

Note that it plays the audio back as it is still generating.
}

{$mode objfpc}

uses
  {$ifdef unix}
  cthreads,
  {$endif}
  SysUtils,
  dos,
  ctypes,
  portaudio,
  sherpa_onnx;

var
  CriticalSection: TRTLCriticalSection;

  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  Resampler: TSherpaOnnxLinearResampler;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 0;
  Buffer: TSherpaOnnxCircularBuffer;
  FinishedGeneration: Boolean = False;
  FinishedPlaying: Boolean = False;

  Version: String;
  EnvStr: String;
  Status: Integer;
  NumDevices: Integer;
  DeviceIndex: Integer;
  DeviceInfo: PPaDeviceInfo;

  { If you get EDivByZero: Division by zero error, please change the sample rate
    to the one supported by your microphone.
  }
  DeviceSampleRate: Integer = 48000;
  I: Integer;
  Param: TPaStreamParameters;
  Stream: PPaStream;
  Wave: TSherpaOnnxWave;
  GenerationConfig: TSherpaOnnxGenerationConfig;

function GenerateCallback(
      Samples: pcfloat; N: cint32;
      Progress: cfloat; Arg: Pointer): cint; cdecl;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Resampler <> nil then
      Buffer.Push(Resampler.Resample(Samples, N, False))
    else
      Buffer.Push(Samples, N);
  finally
    LeaveCriticalSection(CriticalSection);
  end;

  { 1 means to continue generating; 0 means to stop generating. }
  Result := 1;
end;

function PlayCallback(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;
var
  Samples: TSherpaOnnxSamplesArray;
  I: Integer;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Buffer.Size >= frameCount then
      begin
        Samples := Buffer.Get(Buffer.Head, FrameCount);
        Buffer.Pop(FrameCount);
      end
    else if Buffer.Size > 0 then
      begin
        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
        Buffer.Pop(Buffer.Size);
        SetLength(Samples, frameCount);
      end
    else
      SetLength(Samples, frameCount);

    for I := 0 to frameCount - 1 do
      pcfloat(output)[I] := Samples[I];

    if (Buffer.Size > 0) or (not FinishedGeneration) then
      Result := paContinue
    else
      begin
        Result := paComplete;
        FinishedPlaying := True;
      end;
  finally
    LeaveCriticalSection(CriticalSection);
  end;
end;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx';
  Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx';
  Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt';
  Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt';
  Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict';
  Config.Model.NumThreads := 1;
  Config.Model.Debug := False;
  Config.RuleFsts := './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst';
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

begin
  Tts := GetOfflineTts;
  if Tts.GetSampleRate <> DeviceSampleRate then
    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);

  Version := String(Pa_GetVersionText);
  WriteLn('Version is ', Version);
  Status := Pa_Initialize;
  if Status <> paNoError then
    begin
      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;

  NumDevices := Pa_GetDeviceCount;
  WriteLn('Num devices: ', NumDevices);

  DeviceIndex := Pa_GetDefaultOutputDevice;

  if DeviceIndex = paNoDevice then
    begin
      WriteLn('No default output device found');
      Pa_Terminate;
      Exit;
    end;

  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
  if EnvStr <> '' then
    begin
      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
    end;

  for I := 0 to (NumDevices - 1) do
    begin
      DeviceInfo := Pa_GetDeviceInfo(I);
      if I = DeviceIndex then
        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
      else
        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
    end;

  WriteLn('Use device ', DeviceIndex);
  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);

  Initialize(Param);
  Param.Device := DeviceIndex;
  Param.ChannelCount := 1;
  Param.SampleFormat := paFloat32;
  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
  param.HostApiSpecificStreamInfo := nil;

  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);


  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
    PPaStreamCallback(@PlayCallback), nil);

  if Status <> paNoError then
    begin
      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  InitCriticalSection(CriticalSection);

  Status := Pa_StartStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := '某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig,
    @GenerateCallback, nil);
  FinishedGeneration := True;
  SherpaOnnxWriteWave('./matcha-zh-playback.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./matcha-zh-playback.wav');

  while not FinishedPlaying do
    Pa_Sleep(100);  {sleep for 0.1 second }
    {TODO(fangjun): Use an event to indicate the play is finished}

  DoneCriticalSection(CriticalSection);

  FreeAndNil(Tts);
  FreeAndNil(Resampler);

  Status := Pa_CloseStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
      Exit;
    end;

  Status := Pa_Terminate;
  if Status <> paNoError then
    begin
      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;
end.


================================================
FILE: pascal-api-examples/tts/matcha-zh.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
program matcha_zh;
{
This file shows how to use the text to speech API of sherpa-onnx
with MatchaTTS models.

It generates speech from text and saves it to a wave file.

If you want to play it while it is generating, please see
./matcha-zh-playback.pas
}

{$mode objfpc}

uses
  SysUtils,
  sherpa_onnx;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx';
  Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx';
  Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt';
  Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt';
  Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict';
  Config.Model.NumThreads := 1;
  Config.Model.Debug := False;
  Config.RuleFsts := './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst';
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  GenerationConfig: TSherpaOnnxGenerationConfig;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 0;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := '某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig, NIL, NIL);
  SherpaOnnxWriteWave('./matcha-zh.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./matcha-zh.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/tts/piper-playback.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }
program piper_playback;
{
This file shows how to use the text to speech API of sherpa-onnx
with Piper models.

It generates speech from text and saves it to a wave file.

Note that it plays the audio back as it is still generating.
}

{$mode objfpc}

uses
  {$ifdef unix}
  cthreads,
  {$endif}
  SysUtils,
  dos,
  ctypes,
  portaudio,
  sherpa_onnx;

var
  CriticalSection: TRTLCriticalSection;

  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  Resampler: TSherpaOnnxLinearResampler;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 0;
  Buffer: TSherpaOnnxCircularBuffer;
  FinishedGeneration: Boolean = False;
  FinishedPlaying: Boolean = False;

  Version: String;
  EnvStr: String;
  Status: Integer;
  NumDevices: Integer;
  DeviceIndex: Integer;
  DeviceInfo: PPaDeviceInfo;

  { If you get EDivByZero: Division by zero error, please change the sample rate
    to the one supported by your microphone.
  }
  DeviceSampleRate: Integer = 48000;
  I: Integer;
  Param: TPaStreamParameters;
  Stream: PPaStream;
  Wave: TSherpaOnnxWave;
  GenerationConfig: TSherpaOnnxGenerationConfig;

function GenerateCallback(
      Samples: pcfloat; N: cint32;
      Progress: cfloat; Arg: Pointer): cint; cdecl;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Resampler <> nil then
      Buffer.Push(Resampler.Resample(Samples, N, False))
    else
      Buffer.Push(Samples, N);
  finally
    LeaveCriticalSection(CriticalSection);
  end;

  { 1 means to continue generating; 0 means to stop generating. }
  Result := 1;
end;

function PlayCallback(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;
var
  Samples: TSherpaOnnxSamplesArray;
  I: Integer;
begin
  EnterCriticalSection(CriticalSection);
  try
    if Buffer.Size >= frameCount then
      begin
        Samples := Buffer.Get(Buffer.Head, FrameCount);
        Buffer.Pop(FrameCount);
      end
    else if Buffer.Size > 0 then
      begin
        Samples := Buffer.Get(Buffer.Head, Buffer.Size);
        Buffer.Pop(Buffer.Size);
        SetLength(Samples, frameCount);
      end
    else
      SetLength(Samples, frameCount);

    for I := 0 to frameCount - 1 do
      pcfloat(output)[I] := Samples[I];

    if (Buffer.Size > 0) or (not FinishedGeneration) then
      Result := paContinue
    else
      begin
        Result := paComplete;
        FinishedPlaying := True;
      end;
  finally
    LeaveCriticalSection(CriticalSection);
  end;
end;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Vits.Model := './vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx';
  Config.Model.Vits.Tokens := './vits-piper-en_US-libritts_r-medium/tokens.txt';
  Config.Model.Vits.DataDir := './vits-piper-en_US-libritts_r-medium/espeak-ng-data';
  Config.Model.NumThreads := 1;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

begin
  Tts := GetOfflineTts;
  if Tts.GetSampleRate <> DeviceSampleRate then
    Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate);

  Version := String(Pa_GetVersionText);
  WriteLn('Version is ', Version);
  Status := Pa_Initialize;
  if Status <> paNoError then
    begin
      WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;

  NumDevices := Pa_GetDeviceCount;
  WriteLn('Num devices: ', NumDevices);

  DeviceIndex := Pa_GetDefaultOutputDevice;

  if DeviceIndex = paNoDevice then
    begin
      WriteLn('No default output device found');
      Pa_Terminate;
      Exit;
    end;

  EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
  if EnvStr <> '' then
    begin
      DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
      WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
    end;

  for I := 0 to (NumDevices - 1) do
    begin
      DeviceInfo := Pa_GetDeviceInfo(I);
      if I = DeviceIndex then
        { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
        WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
      else
        WriteLn(Format('   %d %s', [I, AnsiString(DeviceInfo^.Name)]));
    end;

  WriteLn('Use device ', DeviceIndex);
  WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
  WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);

  Initialize(Param);
  Param.Device := DeviceIndex;
  Param.ChannelCount := 1;
  Param.SampleFormat := paFloat32;
  param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
  param.HostApiSpecificStreamInfo := nil;

  Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate);


  { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. }
  Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag,
    PPaStreamCallback(@PlayCallback), nil);

  if Status <> paNoError then
    begin
      WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  InitCriticalSection(CriticalSection);

  Status := Pa_StartStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
      Pa_Terminate;
      Exit;
    end;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig,
    @GenerateCallback, nil);
  FinishedGeneration := True;
  SherpaOnnxWriteWave('./libritts_r-generated.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./libritts_r-generated.wav');

  while not FinishedPlaying do
    Pa_Sleep(100);  {sleep for 0.1 second }
    {TODO(fangjun): Use an event to indicate the play is finished}

  DoneCriticalSection(CriticalSection);

  FreeAndNil(Tts);
  FreeAndNil(Resampler);

  Status := Pa_CloseStream(stream);
  if Status <> paNoError then
    begin
      WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
      Exit;
    end;

  Status := Pa_Terminate;
  if Status <> paNoError then
    begin
      WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
      Exit;
    end;
end.


================================================
FILE: pascal-api-examples/tts/piper.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }
program piper;
{
This file shows how to use the text to speech API of sherpa-onnx
with Piper models.

It generates speech from text and saves it to a wave file.

If you want to play it while it is generating, please see
./piper-playback.pas
}

{$mode objfpc}

uses
  SysUtils,
  sherpa_onnx;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Vits.Model := './vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx';
  Config.Model.Vits.Tokens := './vits-piper-en_US-libritts_r-medium/tokens.txt';
  Config.Model.Vits.DataDir := './vits-piper-en_US-libritts_r-medium/espeak-ng-data';
  Config.Model.NumThreads := 1;
  Config.Model.Debug := False;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  Audio: TSherpaOnnxGeneratedAudio;
  GenerationConfig: TSherpaOnnxGenerationConfig;

  Text: AnsiString;
  Speed: Single = 1.0;  {Use a larger value to speak faster}
  SpeakerId: Integer = 0;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.';

  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := Speed;
  GenerationConfig.Sid := SpeakerId;

  Audio :=  Tts.Generate(Text, GenerationConfig, NIL, NIL);
  SherpaOnnxWriteWave('./libritts_r-generated.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./libritts_r-generated.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/tts/pocket-en.pas
================================================
{ Copyright (c)  2026  Xiaomi Corporation }
program pocket_en;
{
This file shows how to use the text to speech API of sherpa-onnx
with Pocket TTS models.

It generates speech from text and saves it to a wave file.
}

{$mode objfpc}

uses
  ctypes,
  SysUtils,
  sherpa_onnx;

function ProgressCallback(Samples: pcfloat; N: cint32; P: cfloat;
  Arg: Pointer): cint32; cdecl;
begin
  WriteLn(Format('Progress: %.2f%%, samples: %d', [P * 100.0, N]));
  Result := 1;
end;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Pocket.LmFlow := './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx';
  Config.Model.Pocket.LmMain := './sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx';
  Config.Model.Pocket.Encoder := './sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx';
  Config.Model.Pocket.Decoder := './sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx';
  Config.Model.Pocket.TextConditioner := './sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx';
  Config.Model.Pocket.VocabJson := './sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json';
  Config.Model.Pocket.TokenScoresJson := './sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json';
  Config.Model.NumThreads := 2;
  Config.Model.Debug := True;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  GenerationConfig: TSherpaOnnxGenerationConfig;
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;
  Audio: TSherpaOnnxGeneratedAudio;

  Text: AnsiString;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.';

  WaveFilename := './sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav';
  Wave := SherpaOnnxReadWave(WaveFilename);
  GenerationConfig.ReferenceAudio := Wave.Samples;
  GenerationConfig.ReferenceAudioLen := Length(Wave.Samples);
  GenerationConfig.ReferenceSampleRate := Wave.SampleRate;

  Audio := Tts.Generate(Text, GenerationConfig, @ProgressCallback, NIL);
  SherpaOnnxWriteWave('./pocket-tts-en.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./pocket-tts-en.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/tts/run-kitten-en-playback.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html
if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
  ./kitten-en-playback.pas

# Please see ../portaudio-test/README.md
# for how to install portaudio on macOS

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./kitten-en-playback


================================================
FILE: pascal-api-examples/tts/run-kitten-en.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html
if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./kitten-en.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./kitten-en


================================================
FILE: pascal-api-examples/tts/run-kokoro-en-playback.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
  ./kokoro-en-playback.pas

# Please see ../portaudio-test/README.md
# for how to install portaudio on macOS

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./kokoro-en-playback


================================================
FILE: pascal-api-examples/tts/run-kokoro-en.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./kokoro-en.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./kokoro-en


================================================
FILE: pascal-api-examples/tts/run-kokoro-zh-en-playback.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
  ./kokoro-zh-en-playback.pas

# Please see ../portaudio-test/README.md
# for how to install portaudio on macOS

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./kokoro-zh-en-playback


================================================
FILE: pascal-api-examples/tts/run-kokoro-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./kokoro-zh-en.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./kokoro-zh-en


================================================
FILE: pascal-api-examples/tts/run-matcha-en-playback.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
  ./matcha-en-playback.pas

# Please see ../portaudio-test/README.md
# for how to install portaudio on macOS

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./matcha-en-playback


================================================
FILE: pascal-api-examples/tts/run-matcha-en.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./matcha-en.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./matcha-en


================================================
FILE: pascal-api-examples/tts/run-matcha-zh-playback.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
  ./matcha-zh-playback.pas

# Please see ../portaudio-test/README.md
# for how to install portaudio on macOS

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./matcha-zh-playback


================================================
FILE: pascal-api-examples/tts/run-matcha-zh.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./matcha-zh.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./matcha-zh


================================================
FILE: pascal-api-examples/tts/run-piper-playback.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
  rm vits-piper-en_US-libritts_r-medium.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  -Fl/usr/local/Cellar/portaudio/19.7.0/lib \
  ./piper-playback.pas

# Please see ../portaudio-test/README.md
# for how to install portaudio on macOS

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./piper-playback


================================================
FILE: pascal-api-examples/tts/run-piper.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
  rm vits-piper-en_US-libritts_r-medium.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./piper.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./piper


================================================
FILE: pascal-api-examples/tts/run-pocket-en.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./pocket-en.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./pocket-en


================================================
FILE: pascal-api-examples/tts/run-supertonic-en.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html
if [ ! -f ./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./supertonic-en.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./supertonic-en


================================================
FILE: pascal-api-examples/tts/run-zipvoice-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f ./vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./zipvoice-zh-en.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./zipvoice-zh-en


================================================
FILE: pascal-api-examples/tts/supertonic-en.pas
================================================
{ Copyright (c)  2026  Xiaomi Corporation }
program supertonic_en;
{
This file shows how to use the text to speech API of sherpa-onnx
with Supertonic TTS models.

It generates speech from text and saves it to a wave file.

Please visit
https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html
to download the model.
}

{$mode objfpc}

uses
  SysUtils,
  sherpa_onnx;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config.Model.Supertonic.DurationPredictor := './sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx';
  Config.Model.Supertonic.TextEncoder := './sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx';
  Config.Model.Supertonic.VectorEstimator := './sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx';
  Config.Model.Supertonic.Vocoder := './sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx';
  Config.Model.Supertonic.TtsJson := './sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json';
  Config.Model.Supertonic.UnicodeIndexer := './sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin';
  Config.Model.Supertonic.VoiceStyle := './sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin';
  Config.Model.NumThreads := 2;
  Config.Model.Debug := True;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  GenerationConfig: TSherpaOnnxGenerationConfig;
  Audio: TSherpaOnnxGeneratedAudio;
  Text: AnsiString;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := 'Today as always, men fall into two groups: slaves and free men. Whoever ' +
    'does not have two-thirds of his day for himself, is a slave, whatever ' +
    'he may be: a statesman, a businessman, an official, or a scholar.';

  GenerationConfig.Sid := 6;
  GenerationConfig.NumSteps := 5;
  GenerationConfig.Speed := 1.25;
  GenerationConfig.Extra := '{"lang": "en"}';

  Audio := Tts.Generate(Text, GenerationConfig, NIL, NIL);
  SherpaOnnxWriteWave('./supertonic-tts-en.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./supertonic-tts-en.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/tts/zipvoice-zh-en.pas
================================================
{ Copyright (c)  2026  Xiaomi Corporation }
program zipvoice_zh_en;
{
This file shows how to use the text to speech API of sherpa-onnx
with ZipVoice TTS models.

It generates speech from text and saves it to a wave file.

Please visit
https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
to download the model.
}

{$mode objfpc}

uses
  ctypes,
  SysUtils,
  sherpa_onnx;

function ProgressCallback(Samples: pcfloat; N: cint32; P: cfloat;
  Arg: Pointer): cint32; cdecl;
begin
  WriteLn(Format('Progress: %.2f%%, samples: %d', [P * 100.0, N]));
  Result := 1;
end;

function GetOfflineTts: TSherpaOnnxOfflineTts;
var
  Config: TSherpaOnnxOfflineTtsConfig;
begin
  Config := Default(TSherpaOnnxOfflineTtsConfig);
  Config.Model.ZipVoice.Tokens := './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt';
  Config.Model.ZipVoice.Encoder := './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx';
  Config.Model.ZipVoice.Decoder := './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx';
  Config.Model.ZipVoice.Vocoder := './vocos_24khz.onnx';
  Config.Model.ZipVoice.DataDir := './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data';
  Config.Model.ZipVoice.Lexicon := './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt';
  Config.Model.ZipVoice.FeatScale := 0.1;
  Config.Model.ZipVoice.Tshift := 0.5;
  Config.Model.ZipVoice.TargetRms := 0.1;
  Config.Model.ZipVoice.GuidanceScale := 1.0;
  Config.Model.NumThreads := 2;
  Config.Model.Debug := True;
  Config.MaxNumSentences := 1;

  Result := TSherpaOnnxOfflineTts.Create(Config);
end;

var
  Tts: TSherpaOnnxOfflineTts;
  GenerationConfig: TSherpaOnnxGenerationConfig;
  Wave: TSherpaOnnxWave;
  WaveFilename: AnsiString;
  Audio: TSherpaOnnxGeneratedAudio;
  Text: AnsiString;
  ReferenceText: AnsiString;

begin
  Tts := GetOfflineTts;

  WriteLn('There are ', Tts.GetNumSpeakers, ' speakers');

  Text := '小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.';
  ReferenceText := '那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.';

  WaveFilename := './sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav';
  Wave := SherpaOnnxReadWave(WaveFilename);
  GenerationConfig := Default(TSherpaOnnxGenerationConfig);
  GenerationConfig.SilenceScale := 0.2;
  GenerationConfig.Speed := 1.0;
  GenerationConfig.Sid := 0;
  GenerationConfig.ReferenceAudio := Wave.Samples;
  GenerationConfig.ReferenceAudioLen := Length(Wave.Samples);
  GenerationConfig.ReferenceSampleRate := Wave.SampleRate;
  GenerationConfig.ReferenceText := ReferenceText;
  GenerationConfig.NumSteps := 4;
  GenerationConfig.Extra := '{"min_char_in_sentence": "10"}';

  Audio := Tts.Generate(Text, GenerationConfig, @ProgressCallback, NIL);
  SherpaOnnxWriteWave('./zipvoice-zh-en.wav', Audio.Samples, Audio.SampleRate);
  WriteLn('Saved to ./zipvoice-zh-en.wav');

  FreeAndNil(Tts);
end.


================================================
FILE: pascal-api-examples/vad/.gitignore
================================================
!run*.sh
circular_buffer
remove_silence
remove_silence_ten_vad


================================================
FILE: pascal-api-examples/vad/README.md
================================================
# Introduction


This directory contains examples for how to use the VAD (voice activity detection)
APIs.

|Directory| Description|
|---------|------------|
|[run-circular-buffer.sh](./run-circular-buffer.sh)|It shows how to use the circular buffer API.|
|[run-remove-silence.sh](./run-remove-silence.sh)|It shows how to use the VAD API to remove silences from a wave file.|


================================================
FILE: pascal-api-examples/vad/circular_buffer.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }
program circular_buffer;
{
This file shows how to use the CircularBuffer API of sherpa-onnx
}

{$mode objfpc}
{$ASSERTIONS ON}

uses
  sherpa_onnx;

var
  Buffer: TSherpaOnnxCircularBuffer;
  Samples: TSherpaOnnxSamplesArray;
begin
  {The initial capacity is 5. It will be resized automatically if needed.}
  Buffer := TSherpaOnnxCircularBuffer.Create(5);
  Assert(Buffer.Size = 0);
  Assert(Buffer.Head = 0);
  Buffer.Push([0, 10, 20]);

  {Push() changes Size. Head is not changed.}
  Assert(Buffer.Size = 3);
  Assert(Buffer.Head = 0);

  Samples := Buffer.Get(0, 1);
  Assert(Length(Samples) = 1);
  Assert(Samples[0] = 0);

  { Get() does not change Size or Head}
  Assert(Buffer.Size = 3);
  Assert(Buffer.Head = 0);

  Samples := Buffer.Get(0, 2);
  Assert(Length(Samples) = 2);
  Assert(Samples[0] = 0);
  Assert(Samples[1] = 10);

  { The buffer will be resized since its initial capacity is 5 but we have
    pushed 7 elements into it.

    No data is lost during the resize.
  }
  Buffer.Push([30, 40, 50, 60]);

  Assert(Buffer.Size = 7); {There are now 7 elements}
  Assert(Buffer.Head = 0);

  {Remove the first 4 elements}
  Buffer.Pop(4);

  Assert(Buffer.Size = 3); {There are only 3 elements left}
  Assert(Buffer.Head = 4);

  Samples := Buffer.Get(Buffer.Head, 2);
  Assert(Length(Samples) = 2);
  Assert(Samples[0] = 40);
  Assert(Samples[1] = 50);

  Buffer.Pop(1);

  Assert(Buffer.Size = 2); {There are only 2 elements left}
  Assert(Buffer.Head = 5);

  Samples := Buffer.Get(Buffer.Head, 2);
  Assert(Length(Samples) = 2);
  Assert(Samples[0] = 50);
  Assert(Samples[1] = 60);

  Buffer.Pop(2);
  Assert(Buffer.Size = 0); {There are no elements left}
  Assert(Buffer.Head = 7);

  Buffer.Push([100, 200, 300, 400, 500]);
  Assert(Buffer.Size = 5);
  Assert(Buffer.Head = 7);

  Buffer.Pop(4);
  Assert(Buffer.Size = 1);

  {Head can be larger than the Capacity!
   This is what circular means. It points to Buffer.Head / Capacity.
  }
  Assert(Buffer.Head = 11);
  Buffer.Push([600, 700]);

  Assert(Buffer.Size = 3);
  Assert(Buffer.Head = 11);

  Samples := Buffer.Get(Buffer.Head, 3);
  Assert(Length(Samples) = 3);
  Assert(Samples[0] = 500);
  Assert(Samples[1] = 600);
  Assert(Samples[2] = 700);

  Buffer.Pop(3);
  Assert(Buffer.Size = 0);
  Assert(Buffer.Head = 14);

  Buffer.Reset();

  Assert(Buffer.Size = 0);
  Assert(Buffer.Head = 0);
end.


================================================
FILE: pascal-api-examples/vad/remove_silence.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }
{
This file shows how to use the VAD API from sherpa-onnx
to remove silences from a wave file with silero-vad.
}
program main;

{$mode delphi}

uses
  sherpa_onnx,
  SysUtils;

var
  Wave: TSherpaOnnxWave;

  Config: TSherpaOnnxVadModelConfig;
  Vad: TSherpaOnnxVoiceActivityDetector;
  Offset: Integer;
  WindowSize: Integer;
  SpeechSegment: TSherpaOnnxSpeechSegment;

  Start: Single;
  Duration: Single;
  SampleRate: Integer;

  AllSpeechSegment: array of TSherpaOnnxSpeechSegment;
  AllSamples: array of Single;
  N: Integer;
  I: Integer;
begin
  SampleRate := 16000; {Please don't change it unless you know the details}

  Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
  if Wave.SampleRate <> SampleRate then
    begin
      WriteLn(Format('Expected sample rate: %d. Given: %d',
        [SampleRate, Wave.SampleRate]));

      Exit;
    end;

  WindowSize := 512; {Please don't change it unless you know the details}
  Initialize(Config);

  Config.SileroVad.Model := './silero_vad.onnx';
  Config.SileroVad.MinSpeechDuration := 0.25;
  Config.SileroVad.MinSilenceDuration := 0.5;
  Config.SileroVad.Threshold := 0.5;
  Config.SileroVad.WindowSize := WindowSize;
  Config.NumThreads:= 1;
  Config.Debug:= True;
  Config.Provider:= 'cpu';
  Config.SampleRate := SampleRate;

  Vad := TSherpaOnnxVoiceActivityDetector.Create(Config, 20);

  AllSpeechSegment := nil;
  AllSamples := nil;
  Offset := 0;
  while Offset + WindowSize <= Length(Wave.Samples) do
    begin
      Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
      Inc(Offset, WindowSize);

      while not Vad.IsEmpty do
        begin
          SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1);

          SpeechSegment := Vad.Front();
          Vad.Pop();
          AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment;

          Start := SpeechSegment.Start / SampleRate;
          Duration := Length(SpeechSegment.Samples) / SampleRate;
          WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration]));
        end;
    end;

  Vad.Flush;

  while not Vad.IsEmpty do
    begin
      SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1);

      SpeechSegment := Vad.Front();
      Vad.Pop();
      AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment;

      Start := SpeechSegment.Start / SampleRate;
      Duration := Length(SpeechSegment.Samples) / SampleRate;
      WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration]));
    end;

  N := 0;
  for SpeechSegment in AllSpeechSegment do
    Inc(N, Length(SpeechSegment.Samples));

  SetLength(AllSamples, N);

  N := 0;
  for SpeechSegment in AllSpeechSegment do
    begin
      for I := Low(SpeechSegment.Samples) to High(SpeechSegment.Samples) do
        begin
          AllSamples[N] := SpeechSegment.Samples[I];
          Inc(N);
        end;
    end;

  SherpaOnnxWriteWave('./lei-jun-test-no-silence.wav', AllSamples, SampleRate);
  WriteLn('Saved to ./lei-jun-test-no-silence.wav');

  FreeAndNil(Vad);
end.


================================================
FILE: pascal-api-examples/vad/remove_silence_ten_vad.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }
{
This file shows how to use the VAD API from sherpa-onnx
to remove silences from a wave file with ten-vad.
}
program main;

{$mode delphi}

uses
  sherpa_onnx,
  SysUtils;

var
  Wave: TSherpaOnnxWave;

  Config: TSherpaOnnxVadModelConfig;
  Vad: TSherpaOnnxVoiceActivityDetector;
  Offset: Integer;
  WindowSize: Integer;
  SpeechSegment: TSherpaOnnxSpeechSegment;

  Start: Single;
  Duration: Single;
  SampleRate: Integer;

  AllSpeechSegment: array of TSherpaOnnxSpeechSegment;
  AllSamples: array of Single;
  N: Integer;
  I: Integer;
begin
  SampleRate := 16000; {Please don't change it unless you know the details}

  Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
  if Wave.SampleRate <> SampleRate then
    begin
      WriteLn(Format('Expected sample rate: %d. Given: %d',
        [SampleRate, Wave.SampleRate]));

      Exit;
    end;

  WindowSize := 256; {Please don't change it unless you know the details}
  Initialize(Config);

  Config.TenVad.Model := './ten-vad.onnx';
  Config.TenVad.MinSpeechDuration := 0.25;
  Config.TenVad.MinSilenceDuration := 0.5;
  Config.TenVad.Threshold := 0.25;
  Config.TenVad.WindowSize := WindowSize;
  Config.NumThreads:= 1;
  Config.Debug:= True;
  Config.Provider:= 'cpu';
  Config.SampleRate := SampleRate;

  Vad := TSherpaOnnxVoiceActivityDetector.Create(Config, 20);

  AllSpeechSegment := nil;
  AllSamples := nil;
  Offset := 0;
  while Offset + WindowSize <= Length(Wave.Samples) do
    begin
      Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
      Inc(Offset, WindowSize);

      while not Vad.IsEmpty do
        begin
          SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1);

          SpeechSegment := Vad.Front();
          Vad.Pop();
          AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment;

          Start := SpeechSegment.Start / SampleRate;
          Duration := Length(SpeechSegment.Samples) / SampleRate;
          WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration]));
        end;
    end;

  Vad.Flush;

  while not Vad.IsEmpty do
    begin
      SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1);

      SpeechSegment := Vad.Front();
      Vad.Pop();
      AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment;

      Start := SpeechSegment.Start / SampleRate;
      Duration := Length(SpeechSegment.Samples) / SampleRate;
      WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration]));
    end;

  N := 0;
  for SpeechSegment in AllSpeechSegment do
    Inc(N, Length(SpeechSegment.Samples));

  SetLength(AllSamples, N);

  N := 0;
  for SpeechSegment in AllSpeechSegment do
    begin
      for I := Low(SpeechSegment.Samples) to High(SpeechSegment.Samples) do
        begin
          AllSamples[N] := SpeechSegment.Samples[I];
          Inc(N);
        end;
    end;

  SherpaOnnxWriteWave('./lei-jun-test-no-silence-ten-vad.wav', AllSamples, SampleRate);
  WriteLn('Saved to ./lei-jun-test-no-silence-ten-vad.wav');

  FreeAndNil(Vad);
end.


================================================
FILE: pascal-api-examples/vad/run-circular-buffer.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./circular_buffer.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./circular_buffer


================================================
FILE: pascal-api-examples/vad/run-remove-silence-ten-vad.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./ten-vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./remove_silence_ten_vad.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./remove_silence_ten_vad


================================================
FILE: pascal-api-examples/vad/run-remove-silence.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./remove_silence.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./remove_silence


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/.gitignore
================================================
!run-*.sh
vad_with_whisper
vad_with_sense_voice
vad_with_moonshine
vad_with_zipformer_ctc
vad_with_dolphin


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/README.md
================================================
# Introduction


This directory contains examples for how to use the VAD (voice activity detection)
with non-streaming speech recognition models.

|Directory| Description|
|---------|------------|
|[run-vad-with-dolphin-ctc.sh](./run-vad-with-dolphin-ctc.sh)|It shows how to use the VAD + [Dolphin](https://github.com/DataoceanAI/Dolphin) for speech recognition.|
|[run-vad-with-whisper.sh](./run-vad-with-whisper.sh)|It shows how to use the VAD + [Whisper](https://github.com/openai/whisper) for speech recognition.|
|[run-vad-with-sense-voice.sh](./run-vad-with-sense-voice.sh)|It shows how to use the VAD + [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) for speech recognition.|
|[run-vad-with-moonshine.sh](./run-vad-with-moonshine.sh)|It shows how to use the VAD + [Moonshine](https://github.com/usefulsensors/moonshine) for speech recognition.|


Please refer to [non-streaming-asr](../non-streaming-asr) for more kinds of non-streaming models.


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-dolphin-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./vad_with_dolphin.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./vad_with_dolphin


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-moonshine.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./Obama.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./vad_with_moonshine.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./vad_with_moonshine


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-sense-voice.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./vad_with_sense_voice.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./vad_with_sense_voice


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-whisper.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./Obama.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
fi

if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./vad_with_whisper.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./vad_with_whisper


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-zipformer-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)

echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  mkdir -p ../../build
  pushd ../../build
  cmake \
    -DCMAKE_INSTALL_PREFIX=./install \
    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
    -DBUILD_SHARED_LIBS=ON \
    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
    ..

  cmake --build . --target install --config Release
  popd
fi

if [[ ! -f ./silero_vad.onnx ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

  tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
  rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
fi

fpc \
  -dSHERPA_ONNX_USE_SHARED_LIBS \
  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  -Fl$SHERPA_ONNX_DIR/build/install/lib \
  ./vad_with_zipformer_ctc.pas

export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH

./vad_with_zipformer_ctc


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/vad_with_dolphin.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming Dolphin model
with silero VAD to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program vad_with_dolphin;

{$mode objfpc}

uses
  sherpa_onnx,
  SysUtils;

function CreateVad(): TSherpaOnnxVoiceActivityDetector;
var
  Config: TSherpaOnnxVadModelConfig;

  SampleRate: Integer;
  WindowSize: Integer;
begin
  Initialize(Config);

  SampleRate := 16000; {Please don't change it unless you know the details}
  WindowSize := 512; {Please don't change it unless you know the details}

  Config.SileroVad.Model := './silero_vad.onnx';
  Config.SileroVad.MinSpeechDuration := 0.5;
  Config.SileroVad.MinSilenceDuration := 0.5;
  Config.SileroVad.Threshold := 0.5;
  Config.SileroVad.WindowSize := WindowSize;
  Config.NumThreads:= 1;
  Config.Debug:= True;
  Config.Provider:= 'cpu';
  Config.SampleRate := SampleRate;

  Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
end;

function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
var
  Config: TSherpaOnnxOfflineRecognizerConfig;
begin
  Initialize(Config);

  Config.ModelConfig.Dolphin.Model := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

var
  Wave: TSherpaOnnxWave;

  Recognizer: TSherpaOnnxOfflineRecognizer;
  Vad: TSherpaOnnxVoiceActivityDetector;

  Offset: Integer;
  WindowSize: Integer;
  SpeechSegment: TSherpaOnnxSpeechSegment;

  Start: Single;
  Duration: Single;

  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
begin
  Vad := CreateVad();
  Recognizer := CreateOfflineRecognizer();

  Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
  if Wave.SampleRate <> Vad.Config.SampleRate then
    begin
      WriteLn(Format('Expected sample rate: %d. Given: %d',
        [Vad.Config.SampleRate, Wave.SampleRate]));

      Exit;
    end;

  WindowSize := Vad.Config.SileroVad.WindowSize;
  Offset := 0;
  while Offset + WindowSize <= Length(Wave.Samples) do
    begin
      Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
      Offset += WindowSize;

      while not Vad.IsEmpty do
        begin
          SpeechSegment := Vad.Front();
          Vad.Pop();
          Stream := Recognizer.CreateStream();

          Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
          Recognizer.Decode(Stream);
          RecognitionResult := Recognizer.GetResult(Stream);

          Start := SpeechSegment.Start / Wave.SampleRate;
          Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
          WriteLn(Format('%.3f -- %.3f %s',
            [Start, Start + Duration, RecognitionResult.Text]));

          FreeAndNil(Stream);
        end;
    end;

  Vad.Flush;

  while not Vad.IsEmpty do
    begin
      SpeechSegment := Vad.Front();
      Vad.Pop();
      Stream := Recognizer.CreateStream();

      Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
      Recognizer.Decode(Stream);
      RecognitionResult := Recognizer.GetResult(Stream);

      Start := SpeechSegment.Start / Wave.SampleRate;
      Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
      WriteLn(Format('%.3f -- %.3f %s',
        [Start, Start + Duration, RecognitionResult.Text]));

      FreeAndNil(Stream);
    end;

  FreeAndNil(Recognizer);
  FreeAndNil(Vad);
end.


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/vad_with_moonshine.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming Moonshine model
with silero VAD to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program vad_with_moonshine;

{$mode objfpc}

uses
  sherpa_onnx,
  SysUtils;

function CreateVad(): TSherpaOnnxVoiceActivityDetector;
var
  Config: TSherpaOnnxVadModelConfig;

  SampleRate: Integer;
  WindowSize: Integer;
begin
  Initialize(Config);

  SampleRate := 16000; {Please don't change it unless you know the details}
  WindowSize := 512; {Please don't change it unless you know the details}

  Config.SileroVad.Model := './silero_vad.onnx';
  Config.SileroVad.MinSpeechDuration := 0.5;
  Config.SileroVad.MinSilenceDuration := 0.5;
  Config.SileroVad.Threshold := 0.5;
  Config.SileroVad.WindowSize := WindowSize;
  Config.NumThreads:= 1;
  Config.Debug:= True;
  Config.Provider:= 'cpu';
  Config.SampleRate := SampleRate;

  Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
end;

function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
var
  Config: TSherpaOnnxOfflineRecognizerConfig;
begin
  Initialize(Config);

  Config.ModelConfig.Moonshine.Preprocessor := './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx';
  Config.ModelConfig.Moonshine.Encoder := './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx';
  Config.ModelConfig.Moonshine.UncachedDecoder := './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx';
  Config.ModelConfig.Moonshine.CachedDecoder := './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx';

  Config.ModelConfig.Tokens := './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

var
  Wave: TSherpaOnnxWave;

  Recognizer: TSherpaOnnxOfflineRecognizer;
  Vad: TSherpaOnnxVoiceActivityDetector;

  Offset: Integer;
  WindowSize: Integer;
  SpeechSegment: TSherpaOnnxSpeechSegment;

  Start: Single;
  Duration: Single;

  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
begin
  Vad := CreateVad();
  Recognizer := CreateOfflineRecognizer();

  Wave := SherpaOnnxReadWave('./Obama.wav');
  if Wave.SampleRate <> Vad.Config.SampleRate then
    begin
      WriteLn(Format('Expected sample rate: %d. Given: %d',
        [Vad.Config.SampleRate, Wave.SampleRate]));

      Exit;
    end;

  WindowSize := Vad.Config.SileroVad.WindowSize;
  Offset := 0;
  while Offset + WindowSize <= Length(Wave.Samples) do
    begin
      Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
      Offset += WindowSize;

      while not Vad.IsEmpty do
        begin
          SpeechSegment := Vad.Front();
          Vad.Pop();
          Stream := Recognizer.CreateStream();

          Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
          Recognizer.Decode(Stream);
          RecognitionResult := Recognizer.GetResult(Stream);

          Start := SpeechSegment.Start / Wave.SampleRate;
          Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
          WriteLn(Format('%.3f -- %.3f %s',
            [Start, Start + Duration, RecognitionResult.Text]));

          FreeAndNil(Stream);
        end;
    end;

  Vad.Flush;

  while not Vad.IsEmpty do
    begin
      SpeechSegment := Vad.Front();
      Vad.Pop();
      Stream := Recognizer.CreateStream();

      Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
      Recognizer.Decode(Stream);
      RecognitionResult := Recognizer.GetResult(Stream);

      Start := SpeechSegment.Start / Wave.SampleRate;
      Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
      WriteLn(Format('%.3f -- %.3f %s',
        [Start, Start + Duration, RecognitionResult.Text]));

      FreeAndNil(Stream);
    end;

  FreeAndNil(Recognizer);
  FreeAndNil(Vad);
end.


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/vad_with_sense_voice.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming SenseVoice model
with silero VAD to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program vad_with_sense_voice;

{$mode objfpc}

uses
  sherpa_onnx,
  SysUtils;

function CreateVad(): TSherpaOnnxVoiceActivityDetector;
var
  Config: TSherpaOnnxVadModelConfig;

  SampleRate: Integer;
  WindowSize: Integer;
begin
  Initialize(Config);

  SampleRate := 16000; {Please don't change it unless you know the details}
  WindowSize := 512; {Please don't change it unless you know the details}

  Config.SileroVad.Model := './silero_vad.onnx';
  Config.SileroVad.MinSpeechDuration := 0.5;
  Config.SileroVad.MinSilenceDuration := 0.5;
  Config.SileroVad.Threshold := 0.5;
  Config.SileroVad.WindowSize := WindowSize;
  Config.NumThreads:= 1;
  Config.Debug:= True;
  Config.Provider:= 'cpu';
  Config.SampleRate := SampleRate;

  Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
end;

function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
var
  Config: TSherpaOnnxOfflineRecognizerConfig;
begin
  Initialize(Config);

  Config.ModelConfig.SenseVoice.Model := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx';
  Config.ModelConfig.SenseVoice.Language := 'auto';
  Config.ModelConfig.SenseVoice.UseItn := False;
  Config.ModelConfig.Tokens := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

var
  Wave: TSherpaOnnxWave;

  Recognizer: TSherpaOnnxOfflineRecognizer;
  Vad: TSherpaOnnxVoiceActivityDetector;

  Offset: Integer;
  WindowSize: Integer;
  SpeechSegment: TSherpaOnnxSpeechSegment;

  Start: Single;
  Duration: Single;

  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
begin
  Vad := CreateVad();
  Recognizer := CreateOfflineRecognizer();

  Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
  if Wave.SampleRate <> Vad.Config.SampleRate then
    begin
      WriteLn(Format('Expected sample rate: %d. Given: %d',
        [Vad.Config.SampleRate, Wave.SampleRate]));

      Exit;
    end;

  WindowSize := Vad.Config.SileroVad.WindowSize;
  Offset := 0;
  while Offset + WindowSize <= Length(Wave.Samples) do
    begin
      Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
      Offset += WindowSize;

      while not Vad.IsEmpty do
        begin
          SpeechSegment := Vad.Front();
          Vad.Pop();
          Stream := Recognizer.CreateStream();

          Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
          Recognizer.Decode(Stream);
          RecognitionResult := Recognizer.GetResult(Stream);

          Start := SpeechSegment.Start / Wave.SampleRate;
          Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
          WriteLn(Format('%.3f -- %.3f %s',
            [Start, Start + Duration, RecognitionResult.Text]));

          FreeAndNil(Stream);
        end;
    end;

  Vad.Flush;

  while not Vad.IsEmpty do
    begin
      SpeechSegment := Vad.Front();
      Vad.Pop();
      Stream := Recognizer.CreateStream();

      Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
      Recognizer.Decode(Stream);
      RecognitionResult := Recognizer.GetResult(Stream);

      Start := SpeechSegment.Start / Wave.SampleRate;
      Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
      WriteLn(Format('%.3f -- %.3f %s',
        [Start, Start + Duration, RecognitionResult.Text]));

      FreeAndNil(Stream);
    end;

  FreeAndNil(Recognizer);
  FreeAndNil(Vad);
end.


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/vad_with_whisper.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation }

{
This file shows how to use a non-streaming Whisper model
with silero VAD to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program vad_with_whisper;

{$mode objfpc}

uses
  sherpa_onnx,
  SysUtils;

function CreateVad(): TSherpaOnnxVoiceActivityDetector;
var
  Config: TSherpaOnnxVadModelConfig;

  SampleRate: Integer;
  WindowSize: Integer;
begin
  Initialize(Config);

  SampleRate := 16000; {Please don't change it unless you know the details}
  WindowSize := 512; {Please don't change it unless you know the details}

  Config.SileroVad.Model := './silero_vad.onnx';
  Config.SileroVad.MinSpeechDuration := 0.5;
  Config.SileroVad.MinSilenceDuration := 0.5;
  Config.SileroVad.Threshold := 0.5;
  Config.SileroVad.WindowSize := WindowSize;
  Config.NumThreads:= 1;
  Config.Debug:= True;
  Config.Provider:= 'cpu';
  Config.SampleRate := SampleRate;

  Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
end;

function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
var
  Config: TSherpaOnnxOfflineRecognizerConfig;
begin
  Initialize(Config);

  Config.ModelConfig.Whisper.Encoder := './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
  Config.ModelConfig.Whisper.Decoder := './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

var
  Wave: TSherpaOnnxWave;

  Recognizer: TSherpaOnnxOfflineRecognizer;
  Vad: TSherpaOnnxVoiceActivityDetector;

  Offset: Integer;
  WindowSize: Integer;
  SpeechSegment: TSherpaOnnxSpeechSegment;

  Start: Single;
  Duration: Single;

  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
begin
  Vad := CreateVad();
  Recognizer := CreateOfflineRecognizer();

  Wave := SherpaOnnxReadWave('./Obama.wav');
  if Wave.SampleRate <> Vad.Config.SampleRate then
    begin
      WriteLn(Format('Expected sample rate: %d. Given: %d',
        [Vad.Config.SampleRate, Wave.SampleRate]));

      Exit;
    end;

  WindowSize := Vad.Config.SileroVad.WindowSize;
  Offset := 0;
  while Offset + WindowSize <= Length(Wave.Samples) do
    begin
      Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
      Offset += WindowSize;

      while not Vad.IsEmpty do
        begin
          SpeechSegment := Vad.Front();
          Vad.Pop();
          Stream := Recognizer.CreateStream();

          Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
          Recognizer.Decode(Stream);
          RecognitionResult := Recognizer.GetResult(Stream);

          Start := SpeechSegment.Start / Wave.SampleRate;
          Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
          WriteLn(Format('%.3f -- %.3f %s',
            [Start, Start + Duration, RecognitionResult.Text]));

          FreeAndNil(Stream);
        end;
    end;

  Vad.Flush;

  while not Vad.IsEmpty do
    begin
      SpeechSegment := Vad.Front();
      Vad.Pop();
      Stream := Recognizer.CreateStream();

      Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
      Recognizer.Decode(Stream);
      RecognitionResult := Recognizer.GetResult(Stream);

      Start := SpeechSegment.Start / Wave.SampleRate;
      Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
      WriteLn(Format('%.3f -- %.3f %s',
        [Start, Start + Duration, RecognitionResult.Text]));

      FreeAndNil(Stream);
    end;

  FreeAndNil(Recognizer);
  FreeAndNil(Vad);
end.


================================================
FILE: pascal-api-examples/vad-with-non-streaming-asr/vad_with_zipformer_ctc.pas
================================================
{ Copyright (c)  2025  Xiaomi Corporation }

{
This file shows how to use a non-streaming Zipformer CTC model
with silero VAD to decode files.

You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}

program vad_with_zipformer_ctc;

{$mode objfpc}

uses
  sherpa_onnx,
  SysUtils;

function CreateVad(): TSherpaOnnxVoiceActivityDetector;
var
  Config: TSherpaOnnxVadModelConfig;

  SampleRate: Integer;
  WindowSize: Integer;
begin
  Initialize(Config);

  SampleRate := 16000; {Please don't change it unless you know the details}
  WindowSize := 512; {Please don't change it unless you know the details}

  Config.SileroVad.Model := './silero_vad.onnx';
  Config.SileroVad.MinSpeechDuration := 0.5;
  Config.SileroVad.MinSilenceDuration := 0.5;
  Config.SileroVad.Threshold := 0.5;
  Config.SileroVad.WindowSize := WindowSize;
  Config.NumThreads:= 1;
  Config.Debug:= True;
  Config.Provider:= 'cpu';
  Config.SampleRate := SampleRate;

  Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
end;

function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
var
  Config: TSherpaOnnxOfflineRecognizerConfig;
begin
  Initialize(Config);

  Config.ModelConfig.ZipformerCtc.Model := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx';
  Config.ModelConfig.Tokens := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt';
  Config.ModelConfig.Provider := 'cpu';
  Config.ModelConfig.NumThreads := 1;
  Config.ModelConfig.Debug := False;

  Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

var
  Wave: TSherpaOnnxWave;

  Recognizer: TSherpaOnnxOfflineRecognizer;
  Vad: TSherpaOnnxVoiceActivityDetector;

  Offset: Integer;
  WindowSize: Integer;
  SpeechSegment: TSherpaOnnxSpeechSegment;

  Start: Single;
  Duration: Single;

  Stream: TSherpaOnnxOfflineStream;
  RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
begin
  Vad := CreateVad();
  Recognizer := CreateOfflineRecognizer();

  Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
  if Wave.SampleRate <> Vad.Config.SampleRate then
    begin
      WriteLn(Format('Expected sample rate: %d. Given: %d',
        [Vad.Config.SampleRate, Wave.SampleRate]));

      Exit;
    end;

  WindowSize := Vad.Config.SileroVad.WindowSize;
  Offset := 0;
  while Offset + WindowSize <= Length(Wave.Samples) do
    begin
      Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
      Offset += WindowSize;

      while not Vad.IsEmpty do
        begin
          SpeechSegment := Vad.Front();
          Vad.Pop();
          Stream := Recognizer.CreateStream();

          Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
          Recognizer.Decode(Stream);
          RecognitionResult := Recognizer.GetResult(Stream);

          Start := SpeechSegment.Start / Wave.SampleRate;
          Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
          WriteLn(Format('%.3f -- %.3f %s',
            [Start, Start + Duration, RecognitionResult.Text]));

          FreeAndNil(Stream);
        end;
    end;

  Vad.Flush;

  while not Vad.IsEmpty do
    begin
      SpeechSegment := Vad.Front();
      Vad.Pop();
      Stream := Recognizer.CreateStream();

      Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
      Recognizer.Decode(Stream);
      RecognitionResult := Recognizer.GetResult(Stream);

      Start := SpeechSegment.Start / Wave.SampleRate;
      Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
      WriteLn(Format('%.3f -- %.3f %s',
        [Start, Start + Duration, RecognitionResult.Text]));

      FreeAndNil(Stream);
    end;

  FreeAndNil(Recognizer);
  FreeAndNil(Vad);
end.


================================================
FILE: pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.k2fsa.sherpa.onnx</groupId>
    <artifactId>sherpa-onnx-android</artifactId>
    <version>1.12.31</version>
    <url>https://github.com/k2-fsa/sherpa-onnx</url>
    <packaging>pom</packaging>
    <description>First Android Library</description>

    <licenses>
      <license>
        <name>The Apache Software License, Version 2.0</name>
        <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
        <distribution>repo</distribution>
      </license>
    </licenses>
</project>


================================================
FILE: python-api-examples/README.md
================================================
# File description

- [./http_server.py](./http_server.py) It defines which files to server.
  Files are saved in [./web](./web).
- [non_streaming_server.py](./non_streaming_server.py) WebSocket server for
  non-streaming models.
- [vad-remove-non-speech-segments.py](./vad-remove-non-speech-segments.py) It uses
  [silero-vad](https://github.com/snakers4/silero-vad) to remove non-speech
  segments and concatenate all speech segments into a single one.
- [vad-with-non-streaming-asr.py](./vad-with-non-streaming-asr.py) It shows
  how to use VAD with a non-streaming ASR model for speech recognition from
  a microphone
- [offline-speech-enhancement-gtcrn.py](./offline-speech-enhancement-gtcrn.py)
  It shows how to use the offline speech denoiser API with GTCRN.

- [offline-speech-enhancement-dpdfnet.py](./offline-speech-enhancement-dpdfnet.py)
  It shows how to use the offline speech denoiser API with DPDFNet.

- [online-speech-enhancement-gtcrn.py](./online-speech-enhancement-gtcrn.py)
  It shows how to use the online speech denoiser API with GTCRN.

- [online-speech-enhancement-dpdfnet.py](./online-speech-enhancement-dpdfnet.py)
  It shows how to use the online speech denoiser API with DPDFNet.
  models. Use 16 kHz DPDFNet models such as `dpdfnet_baseline.onnx`,
  `dpdfnet2.onnx`, `dpdfnet4.onnx`, or `dpdfnet8.onnx` for downstream ASR and
  `dpdfnet2_48khz_hr.onnx` for 48 kHz enhancement output.

- [pocket-tts.py](./pocket-tts.py) It shows how to use PocketTTS with the
  `GenerationConfig` API.

- [supertonic-tts.py](./supertonic-tts.py) It shows how to use SupertonicTTS
  with the `GenerationConfig` API.

- [zipvoice-tts.py](./zipvoice-tts.py) It shows how to use ZipVoice for
  zero-shot TTS with the `GenerationConfig` API.

- [zipvoice-tts-play.py](./zipvoice-tts-play.py) It shows how to use ZipVoice
  for zero-shot TTS and plays the generated audio while it is being synthesized.


================================================
FILE: python-api-examples/add-punctuation-online.py
================================================
#!/usr/bin/env python3

"""
This script shows how to add punctuations to text using sherpa-onnx Python API.

Please download the model from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models

The following is an example

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
"""

from pathlib import Path

import sherpa_onnx


def main():
    model = "./sherpa-onnx-online-punct-en-2024-08-06/model.onnx"
    bpe = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab"
    if not Path(model).is_file():
        raise ValueError(f"{model} does not exist")
    if not Path(bpe).is_file():
        raise ValueError(f"{bpe} does not exist")

    model_config = sherpa_onnx.OnlinePunctuationModelConfig(
        cnn_bilstm=model, bpe_vocab=bpe
    )
    config = sherpa_onnx.OnlinePunctuationConfig(model_config=model_config)
    punct = sherpa_onnx.OnlinePunctuation(config)

    texts = [
        "how are you i am fine thank you",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
    ]
    for text in texts:
        text_with_punct = punct.add_punctuation_with_case(text)
        print("----------")
        print(f"input : {text}")
        print(f"output: {text_with_punct}")
    print("----------")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/add-punctuation.py
================================================
#!/usr/bin/env python3

"""
This script shows how to add punctuations to text using sherpa-onnx Python API.

Please download the model from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models

The following is an example

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
"""

from pathlib import Path

import sherpa_onnx


def main():
    model = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"
    if not Path(model).is_file():
        raise ValueError(f"{model} does not exist")
    config = sherpa_onnx.OfflinePunctuationConfig(
        model=sherpa_onnx.OfflinePunctuationModelConfig(ct_transformer=model),
    )

    punct = sherpa_onnx.OfflinePunctuation(config)

    text_list = [
        "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
        "我们都是木头人不会说话不会动",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
    ]
    for text in text_list:
        text_with_punct = punct.add_punctuation(text)
        print("----------")
        print(f"input: {text}")
        print(f"output: {text_with_punct}")

    print("----------")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/audio-tagging-from-a-file-ced.py
================================================
#!/usr/bin/env python3

"""
This script shows how to use audio tagging Python APIs to tag a file.

Please read the code to download the required model files and test wave file.
"""

import logging
import time
from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def read_test_wave():
    # Please download the model files and test wave files from
    # https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
    test_wave = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/6.wav"

    if not Path(test_wave).is_file():
        raise ValueError(
            f"Please download {test_wave} from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models"
        )

    # See https://python-soundfile.readthedocs.io/en/0.11.0/#soundfile.read
    data, sample_rate = sf.read(
        test_wave,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)

    # samples is a 1-d array of dtype float32
    # sample_rate is a scalar
    return samples, sample_rate


def create_audio_tagger():
    # Please download the model files and test wave files from
    # https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
    model_file = "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx"
    label_file = (
        "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv"
    )

    if not Path(model_file).is_file():
        raise ValueError(
            f"Please download {model_file} from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models"
        )

    if not Path(label_file).is_file():
        raise ValueError(
            f"Please download {label_file} from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models"
        )

    config = sherpa_onnx.AudioTaggingConfig(
        model=sherpa_onnx.AudioTaggingModelConfig(
            ced=model_file,
            num_threads=1,
            debug=True,
            provider="cpu",
        ),
        labels=label_file,
        top_k=5,
    )
    if not config.validate():
        raise ValueError(f"Please check the config: {config}")

    print(config)

    return sherpa_onnx.AudioTagging(config)


def main():
    logging.info("Create audio tagger")
    audio_tagger = create_audio_tagger()

    logging.info("Read test wave")
    samples, sample_rate = read_test_wave()

    logging.info("Computing")

    start_time = time.time()

    stream = audio_tagger.create_stream()
    stream.accept_waveform(sample_rate=sample_rate, waveform=samples)
    result = audio_tagger.compute(stream)
    end_time = time.time()

    elapsed_seconds = end_time - start_time
    audio_duration = len(samples) / sample_rate

    real_time_factor = elapsed_seconds / audio_duration
    logging.info(f"Elapsed seconds: {elapsed_seconds:.3f}")
    logging.info(f"Audio duration in seconds: {audio_duration:.3f}")
    logging.info(
        f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
    )

    s = "\n"
    for i, e in enumerate(result):
        s += f"{i}: {e}\n"

    logging.info(s)


if __name__ == "__main__":
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

    logging.basicConfig(format=formatter, level=logging.INFO)

    main()


================================================
FILE: python-api-examples/audio-tagging-from-a-file.py
================================================
#!/usr/bin/env python3

"""
This script shows how to use audio tagging Python APIs to tag a file.

Please read the code to download the required model files and test wave file.
"""

import logging
import time
from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def read_test_wave():
    # Please download the model files and test wave files from
    # https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
    test_wave = "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav"

    if not Path(test_wave).is_file():
        raise ValueError(
            f"Please download {test_wave} from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models"
        )

    # See https://python-soundfile.readthedocs.io/en/0.11.0/#soundfile.read
    data, sample_rate = sf.read(
        test_wave,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)

    # samples is a 1-d array of dtype float32
    # sample_rate is a scalar
    return samples, sample_rate


def create_audio_tagger():
    # Please download the model files and test wave files from
    # https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
    model_file = "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.onnx"
    label_file = (
        "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv"
    )

    if not Path(model_file).is_file():
        raise ValueError(
            f"Please download {model_file} from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models"
        )

    if not Path(label_file).is_file():
        raise ValueError(
            f"Please download {label_file} from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models"
        )

    config = sherpa_onnx.AudioTaggingConfig(
        model=sherpa_onnx.AudioTaggingModelConfig(
            zipformer=sherpa_onnx.OfflineZipformerAudioTaggingModelConfig(
                model=model_file,
            ),
            num_threads=1,
            debug=True,
            provider="cpu",
        ),
        labels=label_file,
        top_k=5,
    )
    if not config.validate():
        raise ValueError(f"Please check the config: {config}")

    print(config)

    return sherpa_onnx.AudioTagging(config)


def main():
    logging.info("Create audio tagger")
    audio_tagger = create_audio_tagger()

    logging.info("Read test wave")
    samples, sample_rate = read_test_wave()

    logging.info("Computing")

    start_time = time.time()

    stream = audio_tagger.create_stream()
    stream.accept_waveform(sample_rate=sample_rate, waveform=samples)
    result = audio_tagger.compute(stream)
    end_time = time.time()

    elapsed_seconds = end_time - start_time
    audio_duration = len(samples) / sample_rate

    real_time_factor = elapsed_seconds / audio_duration
    logging.info(f"Elapsed seconds: {elapsed_seconds:.3f}")
    logging.info(f"Audio duration in seconds: {audio_duration:.3f}")
    logging.info(
        f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
    )

    s = "\n"
    for i, e in enumerate(result):
        s += f"{i}: {e}\n"

    logging.info(s)


if __name__ == "__main__":
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

    logging.basicConfig(format=formatter, level=logging.INFO)

    main()


================================================
FILE: python-api-examples/generate-subtitles.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python APIs to generate
subtitles.

Supported file formats are those supported by ffmpeg; for instance,
*.mov, *.mp4, *.wav, etc.

Note that you need a non-streaming model for this script.

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

or download ten-vad.onnx, for instance

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx

Please replace --silero-vad-model with --ten-vad-model below to use ten-vad.

(1) For paraformer

    ./python-api-examples/generate-subtitles.py  \
      --silero-vad-model=/path/to/silero_vad.onnx \
      --tokens=/path/to/tokens.txt \
      --paraformer=/path/to/paraformer.onnx \
      --num-threads=2 \
      --decoding-method=greedy_search \
      --debug=false \
      --sample-rate=16000 \
      --feature-dim=80 \
      /path/to/test.mp4

(2) For transducer models from icefall

    ./python-api-examples/generate-subtitles.py  \
      --silero-vad-model=/path/to/silero_vad.onnx \
      --tokens=/path/to/tokens.txt \
      --encoder=/path/to/encoder.onnx \
      --decoder=/path/to/decoder.onnx \
      --joiner=/path/to/joiner.onnx \
      --num-threads=2 \
      --decoding-method=greedy_search \
      --debug=false \
      --sample-rate=16000 \
      --feature-dim=80 \
      /path/to/test.mp4

(3) For Moonshine models

./python-api-examples/generate-subtitles.py  \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
  --num-threads=2 \
  /path/to/test.mp4

(4) For Whisper models

./python-api-examples/generate-subtitles.py  \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
  --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
  --whisper-task=transcribe \
  --num-threads=2 \
  /path/to/test.mp4

(5) For SenseVoice CTC models

./python-api-examples/generate-subtitles.py  \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --num-threads=2 \
  /path/to/test.mp4

(6) For FireRedAsr models

./python-api-examples/generate-subtitles.py  \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --tokens=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt \
  --fire-red-asr-encoder=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx \
  --fire-red-asr-decoder=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx \
  --num-threads=2 \
  /path/to/test.mp4

(7) For WeNet CTC models

./python-api-examples/generate-subtitles.py  \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --wenet-ctc=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
  --tokens=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
  --num-threads=2 \
  /path/to/test.mp4

(8) For NeMo Parakeet TDT models

./python-api-examples/generate-subtitles.py  \
  --silero-vad-model=./silero_vad.onnx \
  --encoder ./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx \
  --decoder ./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx \
  --joiner ./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx \
  --tokens ./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt \
  --model-type nemo_transducer \
  /path/to/test.mp4

Please refer to
https://k2-fsa.github.io/sherpa/onnx/index.html
to install sherpa-onnx and to download non-streaming pre-trained models
used in this file.
"""
import argparse
import datetime as dt
import shutil
import subprocess
import sys
from dataclasses import dataclass
from datetime import timedelta
from pathlib import Path

import numpy as np
import sherpa_onnx


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        help="Path to silero_vad.onnx.",
    )

    parser.add_argument(
        "--ten-vad-model",
        type=str,
        help="Path to ten-vad.onnx",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        default="",
        type=str,
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        default="",
        type=str,
        help="Path to the transducer decoder model",
    )

    parser.add_argument(
        "--joiner",
        default="",
        type=str,
        help="Path to the transducer joiner model",
    )

    parser.add_argument(
        "--model-type",
        default="",
        type=str,
        help="If using NeMo transducer models, please set it to nemo_transducer",
    )

    parser.add_argument(
        "--paraformer",
        default="",
        type=str,
        help="Path to the model.onnx from Paraformer",
    )

    parser.add_argument(
        "--sense-voice",
        default="",
        type=str,
        help="Path to the model.onnx from SenseVoice",
    )

    parser.add_argument(
        "--wenet-ctc",
        default="",
        type=str,
        help="Path to the CTC model.onnx from WeNet",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=2,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--fire-red-asr-encoder",
        default="",
        type=str,
        help="Path to FireRedAsr encoder model",
    )

    parser.add_argument(
        "--fire-red-asr-decoder",
        default="",
        type=str,
        help="Path to FireRedAsr decoder model",
    )

    parser.add_argument(
        "--whisper-encoder",
        default="",
        type=str,
        help="Path to whisper encoder model",
    )

    parser.add_argument(
        "--whisper-decoder",
        default="",
        type=str,
        help="Path to whisper decoder model",
    )

    parser.add_argument(
        "--whisper-language",
        default="",
        type=str,
        help="""It specifies the spoken language in the input file.
        Example values: en, fr, de, zh, jp.
        Available languages for multilingual models can be found at
        https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
        If not specified, we infer the language from the input audio file.
        """,
    )

    parser.add_argument(
        "--whisper-task",
        default="transcribe",
        choices=["transcribe", "translate"],
        type=str,
        help="""For multilingual models, if you specify translate, the output
        will be in English.
        """,
    )

    parser.add_argument(
        "--whisper-tail-paddings",
        default=-1,
        type=int,
        help="""Number of tail padding frames.
        We have removed the 30-second constraint from whisper, so you need to
        choose the amount of tail padding frames by yourself.
        Use -1 to use a default value for tail padding.
        """,
    )

    parser.add_argument(
        "--moonshine-preprocessor",
        default="",
        type=str,
        help="Path to moonshine preprocessor model",
    )

    parser.add_argument(
        "--moonshine-encoder",
        default="",
        type=str,
        help="Path to moonshine encoder model",
    )

    parser.add_argument(
        "--moonshine-uncached-decoder",
        default="",
        type=str,
        help="Path to moonshine uncached decoder model",
    )

    parser.add_argument(
        "--moonshine-cached-decoder",
        default="",
        type=str,
        help="Path to moonshine cached decoder model",
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="""Valid values are greedy_search and modified_beam_search.
        modified_beam_search is valid only for transducer models.
        """,
    )
    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages when loading modes.",
    )

    parser.add_argument(
        "--sample-rate",
        type=int,
        default=16000,
        help="""Sample rate of the feature extractor. Must match the one
        expected by the model. Note: The input sound files can have a
        different sample rate from this argument.""",
    )

    parser.add_argument(
        "--feature-dim",
        type=int,
        default=80,
        help="Feature dimension. Must match the one expected by the model",
    )

    parser.add_argument(
        "sound_file",
        type=str,
        help="The input sound file to generate subtitles ",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    if args.encoder:
        assert len(args.paraformer) == 0, args.paraformer
        assert len(args.sense_voice) == 0, args.sense_voice
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.fire_red_asr_encoder) == 0, args.fire_red_asr_encoder
        assert len(args.fire_red_asr_decoder) == 0, args.fire_red_asr_decoder
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.encoder)
        assert_file_exists(args.decoder)
        assert_file_exists(args.joiner)

        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            tokens=args.tokens,
            model_type=args.model_type,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.paraformer:
        assert len(args.sense_voice) == 0, args.sense_voice
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.fire_red_asr_encoder) == 0, args.fire_red_asr_encoder
        assert len(args.fire_red_asr_decoder) == 0, args.fire_red_asr_decoder
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.paraformer)

        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=args.paraformer,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.sense_voice:
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.fire_red_asr_encoder) == 0, args.fire_red_asr_encoder
        assert len(args.fire_red_asr_decoder) == 0, args.fire_red_asr_decoder
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.sense_voice)
        recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
            model=args.sense_voice,
            tokens=args.tokens,
            num_threads=args.num_threads,
            use_itn=True,
            debug=args.debug,
        )
    elif args.wenet_ctc:
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.fire_red_asr_encoder) == 0, args.fire_red_asr_encoder
        assert len(args.fire_red_asr_decoder) == 0, args.fire_red_asr_decoder
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.wenet_ctc)

        recognizer = sherpa_onnx.OfflineRecognizer.from_wenet_ctc(
            model=args.wenet_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.whisper_encoder:
        assert_file_exists(args.whisper_encoder)
        assert_file_exists(args.whisper_decoder)
        assert len(args.fire_red_asr_encoder) == 0, args.fire_red_asr_encoder
        assert len(args.fire_red_asr_decoder) == 0, args.fire_red_asr_decoder
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=args.whisper_encoder,
            decoder=args.whisper_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
            language=args.whisper_language,
            task=args.whisper_task,
            tail_paddings=args.whisper_tail_paddings,
        )
    elif args.moonshine_preprocessor:
        assert len(args.fire_red_asr_encoder) == 0, args.fire_red_asr_encoder
        assert len(args.fire_red_asr_decoder) == 0, args.fire_red_asr_decoder
        assert_file_exists(args.moonshine_preprocessor)
        assert_file_exists(args.moonshine_encoder)
        assert_file_exists(args.moonshine_uncached_decoder)
        assert_file_exists(args.moonshine_cached_decoder)

        recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
            preprocessor=args.moonshine_preprocessor,
            encoder=args.moonshine_encoder,
            uncached_decoder=args.moonshine_uncached_decoder,
            cached_decoder=args.moonshine_cached_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.fire_red_asr_encoder:
        recognizer = sherpa_onnx.OfflineRecognizer.from_fire_red_asr(
            encoder=args.fire_red_asr_encoder,
            decoder=args.fire_red_asr_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    else:
        raise ValueError("Please specify at least one model")

    return recognizer


@dataclass
class Segment:
    start: float
    duration: float
    text: str = ""

    @property
    def end(self):
        return self.start + self.duration

    def __str__(self):
        s = f"{timedelta(seconds=self.start)}"[:-3]
        s += " --> "
        s += f"{timedelta(seconds=self.end)}"[:-3]
        s = s.replace(".", ",")
        s += "\n"
        s += self.text
        return s


def main():
    args = get_args()
    assert_file_exists(args.tokens)
    if args.silero_vad_model:
        assert_file_exists(args.silero_vad_model)
    elif args.ten_vad_model:
        assert_file_exists(args.ten_vad_model)
    else:
        raise ValueError("You need to supply one vad model")

    assert args.num_threads > 0, args.num_threads

    if not Path(args.sound_file).is_file():
        raise ValueError(f"{args.sound_file} does not exist")

    assert (
        args.sample_rate == 16000
    ), f"Only sample rate 16000 is supported.Given: {args.sample_rate}"

    recognizer = create_recognizer(args)

    ffmpeg_cmd = [
        "ffmpeg",
        "-i",
        args.sound_file,
        "-f",
        "s16le",
        "-acodec",
        "pcm_s16le",
        "-ac",
        "1",
        "-ar",
        str(args.sample_rate),
        "-",
    ]

    process = subprocess.Popen(
        ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
    )

    frames_per_read = int(args.sample_rate * 100)  # 100 second

    stream = recognizer.create_stream()

    config = sherpa_onnx.VadModelConfig()
    if args.silero_vad_model:
        config.silero_vad.model = args.silero_vad_model
        config.silero_vad.threshold = 0.2
        config.silero_vad.min_silence_duration = 0.25  # seconds
        config.silero_vad.min_speech_duration = 0.25  # seconds

        # If the current segment is larger than this value, then it increases
        # the threshold to 0.9 internally. After detecting this segment,
        # it resets the threshold to its original value.
        config.silero_vad.max_speech_duration = 5  # seconds
        config.sample_rate = args.sample_rate

        window_size = config.silero_vad.window_size
        print("use silero-vad")
    else:
        config.ten_vad.model = args.ten_vad_model
        config.ten_vad.threshold = 0.2
        config.ten_vad.min_silence_duration = 0.25  # seconds
        config.ten_vad.min_speech_duration = 0.25  # seconds

        # If the current segment is larger than this value, then it increases
        # the threshold to 0.9 internally. After detecting this segment,
        # it resets the threshold to its original value.
        config.ten_vad.max_speech_duration = 5  # seconds
        config.sample_rate = args.sample_rate

        window_size = config.ten_vad.window_size
        print("use ten-vad")

    buffer = []
    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=100)

    segment_list = []

    print("Started!")
    start_t = dt.datetime.now()
    num_processed_samples = 0

    is_eof = False
    # TODO(fangjun): Support multithreads
    while not is_eof:
        # *2 because int16_t has two bytes
        data = process.stdout.read(frames_per_read * 2)
        if not data:
            vad.flush()
            is_eof = True
        else:
            samples = np.frombuffer(data, dtype=np.int16)
            samples = samples.astype(np.float32) / 32768

            num_processed_samples += samples.shape[0]

            buffer = np.concatenate([buffer, samples])
            while len(buffer) > window_size:
                vad.accept_waveform(buffer[:window_size])
                buffer = buffer[window_size:]

                if False:
                    # If you want to process the speech segment as soon as
                    # speech is detected, you can use
                    current_segment = vad.current_segment
                    if len(current_segment.samples) > 0:
                        print(
                            f"speech starts at {current_segment.start/16000} seconds: ",
                            f"duration {len(current_segment.samples)/16000} seconds",
                        )

        streams = []
        segments = []
        while not vad.empty():
            segment = Segment(
                start=vad.front.start / args.sample_rate,
                duration=len(vad.front.samples) / args.sample_rate,
            )
            segments.append(segment)

            stream = recognizer.create_stream()
            stream.accept_waveform(args.sample_rate, vad.front.samples)

            streams.append(stream)

            vad.pop()

        for s in streams:
            recognizer.decode_stream(s)

        for seg, stream in zip(segments, streams):
            seg.text = stream.result.text
            if seg.text in (".", "The."):
                continue
            segment_list.append(seg)

    end_t = dt.datetime.now()
    elapsed_seconds = (end_t - start_t).total_seconds()
    duration = num_processed_samples / 16000
    rtf = elapsed_seconds / duration

    srt_filename = Path(args.sound_file).with_suffix(".srt")
    with open(srt_filename, "w", encoding="utf-8") as f:
        for i, seg in enumerate(segment_list):
            print(i + 1, file=f)
            print(seg, file=f)
            print("", file=f)

    print(f"Saved to {srt_filename}")
    print(f"Audio duration:\t{duration:.3f} s")
    print(f"Elapsed:\t{elapsed_seconds:.3f} s")
    print(f"RTF = {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f}")
    print("Done!")


if __name__ == "__main__":
    if shutil.which("ffmpeg") is None:
        sys.exit("Please install ffmpeg first!")
    main()


================================================
FILE: python-api-examples/http_server.py
================================================
# Copyright      2022  Xiaomi Corp.        (authors: Fangjun Kuang)
#
# See ../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Tuple

# Please sort it alphabetically
_static_files = (
    ("/css/bootstrap.min.css", "text/css"),
    ("/css/bootstrap.min.css.map", "text/css"),
    ("/index.html", "text/html"),
    ("/js/bootstrap.min.js", "application/javascript"),
    ("/js/bootstrap.min.js.map", "application/javascript"),
    ("/js/jquery-3.6.0.min.js", "application/javascript"),
    ("/js/offline_record.js", "application/javascript"),
    ("/js/offline_record.js", "application/javascript"),
    ("/js/popper.min.js", "application/javascript"),
    ("/js/popper.min.js.map", "application/javascript"),
    ("/js/streaming_record.js", "application/javascript"),
    ("/js/upload.js", "application/javascript"),
    ("/k2-logo.png", "image/png"),
    ("/nav-partial.html", "text/html"),
    ("/offline_record.html", "text/html"),
    ("/streaming_record.html", "text/html"),
    ("/upload.html", "text/html"),
)

_404_page = r"""
<!doctype html><html><head>
<title>Speech recognition with next-gen Kaldi</title><body>
<h1>404 ERROR! Please re-check your URL</h1>
</body></head></html>
"""


def read_file(root: str, name: str) -> str:
    try:
        with open(f"{root}/{name}") as f:
            return f.read()
    except:  # noqa
        with open(f"{root}/{name}", "rb") as f:
            return f.read()


class HttpServer:
    """
    A simple HTTP server that hosts only static files
    """

    def __init__(self, doc_root: str):
        content = dict()
        for f, mime_type in _static_files:
            content[f] = (read_file(doc_root, f), mime_type)
        self.content = content

    def process_request(self, f: str) -> Tuple[str, str, str]:
        """
        Args:
          f:
            The filename to read.
        Returns:
          Return a tuple:
            - a bool, True if the given file is found. False otherwise.
            - a str, the content of the file if found. Otherwise, it
              contains the content for the 404 page
            - a str, the MIME type of the returned content
        """
        if f in self.content:
            return True, self.content[f][0], self.content[f][1]
        else:
            return False, _404_page, "text/html"


================================================
FILE: python-api-examples/inverse-text-normalization-offline-asr.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2024  Xiaomi Corporation

"""
This script shows how to use inverse text normalization with non-streaming ASR.

Usage:

(1) Download the test model

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

(2) Download rule fst

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst

Please refer to
https://github.com/k2-fsa/colab/blob/master/sherpa-onnx/itn_zh_number.ipynb
for how itn_zh_number.fst is generated.

(3) Download test wave

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav

(4) Run this script

python3 ./python-api-examples/inverse-text-normalization-offline-asr.py
"""
from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"
    tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"
    rule_fsts = "./itn_zh_number.fst"

    if (
        not Path(model).is_file()
        or not Path(tokens).is_file()
        or not Path(rule_fsts).is_file()
    ):
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return sherpa_onnx.OfflineRecognizer.from_paraformer(
        paraformer=model,
        tokens=tokens,
        debug=True,
        rule_fsts=rule_fsts,
    )


def main():
    recognizer = create_recognizer()
    wave_filename = "./itn-zh-number.wav"
    if not Path(wave_filename).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    print(wave_filename)
    print(stream.result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/inverse-text-normalization-online-asr.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2024  Xiaomi Corporation

"""
This script shows how to use inverse text normalization with streaming ASR.

Usage:

(1) Download the test model

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

(2) Download rule fst

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst

Please refer to
https://github.com/k2-fsa/colab/blob/master/sherpa-onnx/itn_zh_number.ipynb
for how itn_zh_number.fst is generated.

(3) Download test wave

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav

(4) Run this script

python3 ./python-api-examples/inverse-text-normalization-online-asr.py
"""
from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    encoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"
    decoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
    joiner = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx"
    tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
    rule_fsts = "./itn_zh_number.fst"

    if (
        not Path(encoder).is_file()
        or not Path(decoder).is_file()
        or not Path(joiner).is_file()
        or not Path(tokens).is_file()
        or not Path(rule_fsts).is_file()
    ):
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return sherpa_onnx.OnlineRecognizer.from_transducer(
        encoder=encoder,
        decoder=decoder,
        joiner=joiner,
        tokens=tokens,
        debug=True,
        rule_fsts=rule_fsts,
    )


def main():
    recognizer = create_recognizer()
    wave_filename = "./itn-zh-number.wav"
    if not Path(wave_filename).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)

    tail_padding = [0] * int(0.3 * sample_rate)
    stream.accept_waveform(sample_rate, tail_padding)

    while recognizer.is_ready(stream):
        recognizer.decode_stream(stream)

    print(wave_filename)
    print(recognizer.get_result_all(stream))


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/keyword-spotter-from-microphone.py
================================================
#!/usr/bin/env python3

# Real-time keyword spotting from a microphone with sherpa-onnx Python API
#
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
# to download pre-trained models

import argparse
import sys
from pathlib import Path

from typing import List

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html to download it"
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        type=str,
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        help="Path to the transducer decoder model",
    )

    parser.add_argument(
        "--joiner",
        type=str,
        help="Path to the transducer joiner model",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--max-active-paths",
        type=int,
        default=4,
        help="""
        It specifies number of active paths to keep during decoding.
        """,
    )

    parser.add_argument(
        "--num-trailing-blanks",
        type=int,
        default=1,
        help="""The number of trailing blanks a keyword should be followed. Setting
        to a larger value (e.g. 8) when your keywords has overlapping tokens
        between each other.
        """,
    )

    parser.add_argument(
        "--keywords-file",
        type=str,
        help="""
        The file containing keywords, one words/phrases per line, and for each
        phrase the bpe/cjkchar/pinyin are separated by a space. For example:

        ▁HE LL O ▁WORLD
        x iǎo ài t óng x ué 
        """,
    )

    parser.add_argument(
        "--keywords-score",
        type=float,
        default=1.0,
        help="""
        The boosting score of each token for keywords. The larger the easier to
        survive beam search.
        """,
    )

    parser.add_argument(
        "--keywords-threshold",
        type=float,
        default=0.25,
        help="""
        The trigger threshold (i.e. probability) of the keyword. The larger the
        harder to trigger.
        """,
    )

    return parser.parse_args()


def main():
    args = get_args()

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    assert_file_exists(args.tokens)
    assert_file_exists(args.encoder)
    assert_file_exists(args.decoder)
    assert_file_exists(args.joiner)

    assert Path(
        args.keywords_file
    ).is_file(), (
        f"keywords_file : {args.keywords_file} not exist, please provide a valid path."
    )

    keyword_spotter = sherpa_onnx.KeywordSpotter(
        tokens=args.tokens,
        encoder=args.encoder,
        decoder=args.decoder,
        joiner=args.joiner,
        num_threads=args.num_threads,
        max_active_paths=args.max_active_paths,
        keywords_file=args.keywords_file,
        keywords_score=args.keywords_score,
        keywords_threshold=args.keywords_threshold,
        num_trailing_blanks=args.num_trailing_blanks,
        provider=args.provider,
    )

    print("Started! Please speak")

    idx = 0

    sample_rate = 16000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms
    stream = keyword_spotter.create_stream()
    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            stream.accept_waveform(sample_rate, samples)
            while keyword_spotter.is_ready(stream):
                keyword_spotter.decode_stream(stream)
                result = keyword_spotter.get_result(stream)
                if result:
                    print(f"{idx}: {result }")
                    idx += 1
                    # Remember to reset stream right after detecting a keyword
                    keyword_spotter.reset_stream(stream)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/keyword-spotter.py
================================================
#!/usr/bin/env python3

"""
This file demonstrates how to use sherpa-onnx Python API to do keyword spotting
from wave file(s).

Please refer to
https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
to download pre-trained models.
"""
import argparse
import time
import wave
from pathlib import Path
from typing import List, Tuple

import numpy as np
import sherpa_onnx


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


def create_keyword_spotter():
    kws = sherpa_onnx.KeywordSpotter(
        tokens="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt",
        encoder="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx",
        decoder="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx",
        joiner="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx",
        num_threads=2,
        keywords_file="./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt",
        provider="cpu",
    )

    return kws


def main():
    kws = create_keyword_spotter()

    wave_filename = (
        "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
    )

    samples, sample_rate = read_wave(wave_filename)

    tail_paddings = np.zeros(int(0.66 * sample_rate), dtype=np.float32)

    print("----------Use pre-defined keywords----------")
    s = kws.create_stream()
    s.accept_waveform(sample_rate, samples)
    s.accept_waveform(sample_rate, tail_paddings)
    s.input_finished()
    while kws.is_ready(s):
        kws.decode_stream(s)
        r = kws.get_result(s)
        if r != "":
            # Remember to call reset right after detected a keyword
            kws.reset_stream(s)

            print(f"Detected {r}")

    print("----------Use pre-defined keywords + add a new keyword----------")

    s = kws.create_stream("y ǎn y uán @演员")
    s.accept_waveform(sample_rate, samples)
    s.accept_waveform(sample_rate, tail_paddings)
    s.input_finished()
    while kws.is_ready(s):
        kws.decode_stream(s)
        r = kws.get_result(s)
        if r != "":
            # Remember to call reset right after detected a keyword
            kws.reset_stream(s)

            print(f"Detected {r}")

    print("----------Use pre-defined keywords + add 2 new keywords----------")

    s = kws.create_stream("y ǎn y uán @演员/zh ī m íng @知名")
    s.accept_waveform(sample_rate, samples)
    s.accept_waveform(sample_rate, tail_paddings)
    s.input_finished()
    while kws.is_ready(s):
        kws.decode_stream(s)
        r = kws.get_result(s)
        if r != "":
            # Remember to call reset right after detected a keyword
            kws.reset_stream(s)

            print(f"Detected {r}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/non_streaming_server.py
================================================
#!/usr/bin/env python3
# Copyright      2022-2023  Xiaomi Corp.
"""
A server for non-streaming speech recognition. Non-streaming means you send all
the content of the audio at once for recognition.

It supports multiple clients sending at the same time.

Usage:
    ./non_streaming_server.py --help

Please refer to

https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html

for pre-trained models to download.

Usage examples:

(1) Use a non-streaming transducer model

cd /path/to/sherpa-onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
rm sherpa-onnx-zipformer-en-2023-06-26.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \
  --decoder ./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx \
  --tokens ./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt \
  --port 6006
  
(2) Use a non-streaming paraformer

cd /path/to/sherpa-onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --paraformer ./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --tokens ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt

(3) Use a non-streaming CTC model from NeMo

cd /path/to/sherpa-onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2
tar xvf sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2
rm sherpa-onnx-nemo-ctc-en-conformer-medium.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
  --tokens ./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt

(4) Use a non-streaming CTC model from WeNet

cd /path/to/sherpa-onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zh-wenet-wenetspeech.tar.bz2
tar xvf sherpa-onnx-zh-wenet-wenetspeech.tar.bz2
rm sherpa-onnx-zh-wenet-wenetspeech.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --wenet-ctc ./sherpa-onnx-zh-wenet-wenetspeech/model.onnx \
  --tokens ./sherpa-onnx-zh-wenet-wenetspeech/tokens.txt

(5) Use a Moonshine model

cd /path/to/sherpa-onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt

(6) Use a Whisper model

cd /path/to/sherpa-onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt

(7) Use a tdnn model of the yesno recipe from icefall

cd /path/to/sherpa-onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-tdnn-yesno.tar.bz2
tar xvf sherpa-onnx-tdnn-yesno.tar.bz2
rm sherpa-onnx-tdnn-yesno.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --sample-rate=8000 \
  --feat-dim=23 \
  --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
  --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt

(8) Use a Non-streaming SenseVoice model

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
  --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt

(9) Use a Non-streaming telespeech ctc model

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2

python3 ./python-api-examples/non_streaming_server.py \
  --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
  --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt

----

To use a certificate so that you can use https, please use

python3 ./python-api-examples/non_streaming_server.py \
  --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  --certificate=/path/to/your/cert.pem

If you don't have a certificate, please run:

    cd ./python-api-examples/web
    ./generate-certificate.py

It will generate 3 files, one of which is the required `cert.pem`.
"""  # noqa

import argparse
import asyncio
import http
import logging
import socket
import ssl
import sys
import warnings
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from pathlib import Path
from typing import Optional, Tuple

import numpy as np
import sherpa_onnx

import websockets

from http_server import HttpServer


def setup_logger(
    log_filename: str,
    log_level: str = "info",
    use_console: bool = True,
) -> None:
    """Setup log level.

    Args:
      log_filename:
        The filename to save the log.
      log_level:
        The log level to use, e.g., "debug", "info", "warning", "error",
        "critical"
      use_console:
        True to also print logs to console.
    """
    now = datetime.now()
    date_time = now.strftime("%Y-%m-%d-%H-%M-%S")
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
    log_filename = f"{log_filename}-{date_time}.txt"

    Path(log_filename).parent.mkdir(parents=True, exist_ok=True)

    level = logging.ERROR
    if log_level == "debug":
        level = logging.DEBUG
    elif log_level == "info":
        level = logging.INFO
    elif log_level == "warning":
        level = logging.WARNING
    elif log_level == "critical":
        level = logging.CRITICAL

    logging.basicConfig(
        filename=log_filename,
        format=formatter,
        level=level,
        filemode="w",
    )
    if use_console:
        console = logging.StreamHandler()
        console.setLevel(level)
        console.setFormatter(logging.Formatter(formatter))
        logging.getLogger("").addHandler(console)


def add_transducer_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--encoder",
        default="",
        type=str,
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        default="",
        type=str,
        help="Path to the transducer decoder model",
    )

    parser.add_argument(
        "--joiner",
        default="",
        type=str,
        help="Path to the transducer joiner model",
    )


def add_paraformer_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--paraformer",
        default="",
        type=str,
        help="Path to the model.onnx from Paraformer",
    )


def add_sense_voice_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--sense-voice",
        default="",
        type=str,
        help="Path to the model.onnx from SenseVoice",
    )


def add_nemo_ctc_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--nemo-ctc",
        default="",
        type=str,
        help="Path to the model.onnx from NeMo CTC",
    )


def add_telespeech_ctc_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--telespeech-ctc",
        default="",
        type=str,
        help="Path to the model.onnx from TeleSpeech CTC",
    )


def add_wenet_ctc_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--wenet-ctc",
        default="",
        type=str,
        help="Path to the model.onnx from WeNet CTC",
    )


def add_tdnn_ctc_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--tdnn-model",
        default="",
        type=str,
        help="Path to the model.onnx for the tdnn model of the yesno recipe",
    )


def add_moonshine_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--moonshine-preprocessor",
        default="",
        type=str,
        help="Path to moonshine preprocessor model",
    )

    parser.add_argument(
        "--moonshine-encoder",
        default="",
        type=str,
        help="Path to moonshine encoder model",
    )

    parser.add_argument(
        "--moonshine-uncached-decoder",
        default="",
        type=str,
        help="Path to moonshine uncached decoder model",
    )

    parser.add_argument(
        "--moonshine-cached-decoder",
        default="",
        type=str,
        help="Path to moonshine cached decoder model",
    )


def add_whisper_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--whisper-encoder",
        default="",
        type=str,
        help="Path to whisper encoder model",
    )

    parser.add_argument(
        "--whisper-decoder",
        default="",
        type=str,
        help="Path to whisper decoder model",
    )

    parser.add_argument(
        "--whisper-language",
        default="",
        type=str,
        help="""It specifies the spoken language in the input audio file.
        Example values: en, fr, de, zh, jp.
        Available languages for multilingual models can be found at
        https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
        If not specified, we infer the language from the input audio file.
        """,
    )

    parser.add_argument(
        "--whisper-task",
        default="transcribe",
        choices=["transcribe", "translate"],
        type=str,
        help="""For multilingual models, if you specify translate, the output
        will be in English.
        """,
    )

    parser.add_argument(
        "--whisper-tail-paddings",
        default=-1,
        type=int,
        help="""Number of tail padding frames.
        We have removed the 30-second constraint from whisper, so you need to
        choose the amount of tail padding frames by yourself.
        Use -1 to use a default value for tail padding.
        """,
    )


def add_model_args(parser: argparse.ArgumentParser):
    add_transducer_model_args(parser)
    add_paraformer_model_args(parser)
    add_sense_voice_model_args(parser)
    add_nemo_ctc_model_args(parser)
    add_wenet_ctc_model_args(parser)
    add_telespeech_ctc_model_args(parser)
    add_tdnn_ctc_model_args(parser)
    add_whisper_model_args(parser)
    add_moonshine_model_args(parser)

    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=2,
        help="Number of threads to run the neural network model",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )


def add_feature_config_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--sample-rate",
        type=int,
        default=16000,
        help="Sample rate of the data used to train the model. ",
    )

    parser.add_argument(
        "--feat-dim",
        type=int,
        default=80,
        help="Feature dimension of the model",
    )


def add_decoding_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="""Decoding method to use. Current supported methods are:
        - greedy_search
        - modified_beam_search  (for transducer models only)
        """,
    )

    add_modified_beam_search_args(parser)


def add_modified_beam_search_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--max-active-paths",
        type=int,
        default=4,
        help="""Used only when --decoding-method is modified_beam_search.
        It specifies number of active paths to keep during decoding.
        """,
    )


def add_hotwords_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, and for each
        phrase the bpe/cjkchar are separated by a space. For example:

        ▁HE LL O ▁WORLD
        你 好 世 界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )


def add_blank_penalty_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )


def check_args(args):
    if not Path(args.tokens).is_file():
        raise ValueError(f"{args.tokens} does not exist")

    if args.decoding_method not in (
        "greedy_search",
        "modified_beam_search",
    ):
        raise ValueError(f"Unsupported decoding method {args.decoding_method}")

    if args.decoding_method == "modified_beam_search":
        assert args.num_active_paths > 0, args.num_active_paths
        assert Path(args.encoder).is_file(), args.encoder
        assert Path(args.decoder).is_file(), args.decoder
        assert Path(args.joiner).is_file(), args.joiner

    if args.hotwords_file != "":
        assert args.decoding_method == "modified_beam_search", args.decoding_method
        assert Path(args.hotwords_file).is_file(), args.hotwords_file


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    add_model_args(parser)
    add_feature_config_args(parser)
    add_decoding_args(parser)
    add_hotwords_args(parser)
    add_blank_penalty_args(parser)

    parser.add_argument(
        "--port",
        type=int,
        default=6006,
        help="The server will listen on this port",
    )

    parser.add_argument(
        "--max-batch-size",
        type=int,
        default=3,
        help="""Max batch size for computation. Note if there are not enough
        requests in the queue, it will wait for max_wait_ms time. After that,
        even if there are not enough requests, it still sends the
        available requests in the queue for computation.
        """,
    )

    parser.add_argument(
        "--max-wait-ms",
        type=float,
        default=5,
        help="""Max time in millisecond to wait to build batches for inference.
        If there are not enough requests in the feature queue to build a batch
        of max_batch_size, it waits up to this time before fetching available
        requests for computation.
        """,
    )

    parser.add_argument(
        "--nn-pool-size",
        type=int,
        default=1,
        help="Number of threads for NN computation and decoding.",
    )

    parser.add_argument(
        "--max-message-size",
        type=int,
        default=(1 << 20),
        help="""Max message size in bytes.
        The max size per message cannot exceed this limit.
        """,
    )

    parser.add_argument(
        "--max-queue-size",
        type=int,
        default=32,
        help="Max number of messages in the queue for each connection.",
    )

    parser.add_argument(
        "--max-active-connections",
        type=int,
        default=200,
        help="""Maximum number of active connections. The server will refuse
        to accept new connections once the current number of active connections
        equals to this limit.
        """,
    )

    parser.add_argument(
        "--certificate",
        type=str,
        help="""Path to the X.509 certificate. You need it only if you want to
        use a secure websocket connection, i.e., use wss:// instead of ws://.
        You can use ./web/generate-certificate.py
        to generate the certificate `cert.pem`.
        Note ./web/generate-certificate.py will generate three files but you
        only need to pass the generated cert.pem to this option.
        """,
    )

    parser.add_argument(
        "--doc-root",
        type=str,
        default="./python-api-examples/web",
        help="Path to the web root",
    )

    return parser.parse_args()


class NonStreamingServer:
    def __init__(
        self,
        recognizer: sherpa_onnx.OfflineRecognizer,
        max_batch_size: int,
        max_wait_ms: float,
        nn_pool_size: int,
        max_message_size: int,
        max_queue_size: int,
        max_active_connections: int,
        doc_root: str,
        certificate: Optional[str] = None,
    ):
        """
        Args:
          recognizer:
            An instance of the sherpa_onnx.OfflineRecognizer.
          max_batch_size:
            Max batch size for inference.
          max_wait_ms:
            Max wait time in milliseconds in order to build a batch of
            `max_batch_size`.
          nn_pool_size:
            Number of threads for the thread pool that is used for NN
            computation and decoding.
          max_message_size:
            Max size in bytes per message.
          max_queue_size:
            Max number of messages in the queue for each connection.
          max_active_connections:
            Max number of active connections. Once number of active client
            equals to this limit, the server refuses to accept new connections.
          doc_root:
            Path to the directory where files like index.html for the HTTP
            server locate.
          certificate:
            Optional. If not None, it will use secure websocket.
            You can use ./web/generate-certificate.py to generate
            it (the default generated filename is `cert.pem`).
        """
        self.recognizer = recognizer

        self.certificate = certificate
        self.http_server = HttpServer(doc_root)

        self.nn_pool_size = nn_pool_size
        self.nn_pool = ThreadPoolExecutor(
            max_workers=nn_pool_size,
            thread_name_prefix="nn",
        )

        self.stream_queue = asyncio.Queue()

        self.max_wait_ms = max_wait_ms
        self.max_batch_size = max_batch_size
        self.max_message_size = max_message_size
        self.max_queue_size = max_queue_size
        self.max_active_connections = max_active_connections

        self.current_active_connections = 0
        self.sample_rate = int(recognizer.config.feat_config.sampling_rate)

    async def process_request(
        self,
        path: str,
        request_headers: websockets.Headers,
    ) -> Optional[Tuple[http.HTTPStatus, websockets.Headers, bytes]]:
        if "sec-websocket-key" not in (
            request_headers.headers  # For new request_headers
            if hasattr(request_headers, "headers")
            else request_headers  # For old request_headers
        ):
            # This is a normal HTTP request
            if path == "/":
                path = "/index.html"
            if path[-1] == "?":
                path = path[:-1]

            if path == "/streaming_record.html":
                response = r"""
<!doctype html><html><head>
<title>Speech recognition with next-gen Kaldi</title><body>
<h2>Only
<a href="/upload.html">/upload.html</a>
and
<a href="/offline_record.html">/offline_record.html</a>
is available for the non-streaming server.<h2>
<br/>
<br/>
Go back to <a href="/upload.html">/upload.html</a>
or <a href="/offline_record.html">/offline_record.html</a>
</body></head></html>
"""
                found = True
                mime_type = "text/html"
            else:
                found, response, mime_type = self.http_server.process_request(path)
            if isinstance(response, str):
                response = response.encode("utf-8")

            if not found:
                status = http.HTTPStatus.NOT_FOUND
            else:
                status = http.HTTPStatus.OK
            header = {"Content-Type": mime_type}
            return status, header, response

        if self.current_active_connections < self.max_active_connections:
            self.current_active_connections += 1
            return None

        # Refuse new connections
        status = http.HTTPStatus.SERVICE_UNAVAILABLE  # 503
        header = {"Hint": "The server is overloaded. Please retry later."}
        response = b"The server is busy. Please retry later."

        return status, header, response

    async def run(self, port: int):
        logging.info("started")

        tasks = []
        for i in range(self.nn_pool_size):
            tasks.append(asyncio.create_task(self.stream_consumer_task()))

        if self.certificate:
            logging.info(f"Using certificate: {self.certificate}")
            ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
            ssl_context.load_cert_chain(self.certificate)
        else:
            ssl_context = None
            logging.info("No certificate provided")

        async with websockets.serve(
            self.handle_connection,
            host="",
            port=port,
            max_size=self.max_message_size,
            max_queue=self.max_queue_size,
            process_request=self.process_request,
            ssl=ssl_context,
        ):
            ip_list = ["localhost"]
            if ssl_context:
                ip_list += ["0.0.0.0", "127.0.0.1"]
                ip_list.append(socket.gethostbyname(socket.gethostname()))

            proto = "http://" if ssl_context is None else "https://"
            s = "Please visit one of the following addresses:\n\n"
            for p in ip_list:
                s += "  " + proto + p + f":{port}" "\n"
            logging.info(s)

            await asyncio.Future()  # run forever

        await asyncio.gather(*tasks)  # not reachable

    async def recv_audio_samples(
        self,
        socket: websockets.WebSocketServerProtocol,
    ) -> Tuple[Optional[np.ndarray], Optional[float]]:
        """Receive a tensor from the client.

        The message from the client is a **bytes** buffer.

        The first message can be either "Done" meaning the client won't send
        anything in the future or it can be a buffer containing 8 bytes.
        The first 4 bytes in little endian specifies the sample
        rate of the audio samples; the second 4 bytes in little endian specifies
        the number of bytes in the audio file, which will be sent by the client
        in the subsequent messages.
        Since there is a limit in the message size posed by the websocket
        protocol, the client may send the audio file in multiple messages if the
        audio file is very large.

        The second and remaining messages contain audio samples.

        Please refer to ./offline-websocket-client-decode-files-paralell.py
        and ./offline-websocket-client-decode-files-sequential.py
        for how the client sends the message.

        Args:
          socket:
            The socket for communicating with the client.
        Returns:
          Return a containing:
            - 1-D np.float32 array containing the audio samples
            - sample rate of the audio samples
          or return (None, None) indicating the end of utterance.
        """
        header = await socket.recv()
        if header == "Done":
            return None, None

        assert len(header) >= 8, (
            "The first message should contain at least 8 bytes."
            + f"Given {len(header)}"
        )

        sample_rate = int.from_bytes(header[:4], "little", signed=True)
        expected_num_bytes = int.from_bytes(header[4:8], "little", signed=True)

        received = []
        num_received_bytes = 0
        if len(header) > 8:
            received.append(header[8:])
            num_received_bytes += len(header) - 8

        if num_received_bytes < expected_num_bytes:
            async for message in socket:
                received.append(message)
                num_received_bytes += len(message)
                if num_received_bytes >= expected_num_bytes:
                    break

        assert num_received_bytes == expected_num_bytes, (
            num_received_bytes,
            expected_num_bytes,
        )

        samples = b"".join(received)
        array = np.frombuffer(samples, dtype=np.float32)
        return array, sample_rate

    async def stream_consumer_task(self):
        """This function extracts streams from the queue, batches them up, sends
        them to the RNN-T model for computation and decoding.
        """
        while True:
            if self.stream_queue.empty():
                await asyncio.sleep(self.max_wait_ms / 1000)
                continue

            batch = []
            try:
                while len(batch) < self.max_batch_size:
                    item = self.stream_queue.get_nowait()

                    batch.append(item)
            except asyncio.QueueEmpty:
                pass

            stream_list = [b[0] for b in batch]
            future_list = [b[1] for b in batch]

            loop = asyncio.get_running_loop()
            await loop.run_in_executor(
                self.nn_pool,
                self.recognizer.decode_streams,
                stream_list,
            )

            for f in future_list:
                self.stream_queue.task_done()
                f.set_result(None)

    async def compute_and_decode(
        self,
        stream: sherpa_onnx.OfflineStream,
    ) -> None:
        """Put the stream into the queue and wait it to be processed by the
        consumer task.

        Args:
          stream:
            The stream to be processed. Note: It is changed in-place.
        """
        loop = asyncio.get_running_loop()
        future = loop.create_future()
        await self.stream_queue.put((stream, future))
        await future

    async def handle_connection(
        self,
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and sends
        deocoding result back to the client.

        Args:
          socket:
            The socket for communicating with the client.
        """
        try:
            await self.handle_connection_impl(socket)
        except websockets.exceptions.ConnectionClosedError:
            logging.info(f"{socket.remote_address} disconnected")
        finally:
            # Decrement so that it can accept new connections
            self.current_active_connections -= 1

            logging.info(
                f"Disconnected: {socket.remote_address}. "
                f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
            )

    async def handle_connection_impl(
        self,
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and send
        decoding results back to the client.

        Args:
          socket:
            The socket for communicating with the client.
        """
        logging.info(
            f"Connected: {socket.remote_address}. "
            f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
        )

        while True:
            stream = self.recognizer.create_stream()
            samples, sample_rate = await self.recv_audio_samples(socket)
            if samples is None:
                break
            # stream.accept_samples() runs in the main thread

            stream.accept_waveform(sample_rate, samples)

            await self.compute_and_decode(stream)
            result = stream.result.text
            logging.info(f"result: {result}")

            if result:
                await socket.send(result)
            else:
                # If result is an empty string, send something to the client.
                # Otherwise, socket.send() is a no-op and the client will
                # wait for a reply indefinitely.
                await socket.send("<EMPTY>")


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    if args.encoder:
        assert len(args.paraformer) == 0, args.paraformer
        assert len(args.sense_voice) == 0, args.sense_voice
        assert len(args.nemo_ctc) == 0, args.nemo_ctc
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.encoder)
        assert_file_exists(args.decoder)
        assert_file_exists(args.joiner)

        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            max_active_paths=args.max_active_paths,
            hotwords_file=args.hotwords_file,
            hotwords_score=args.hotwords_score,
            blank_penalty=args.blank_penalty,
            provider=args.provider,
        )
    elif args.paraformer:
        assert len(args.sense_voice) == 0, args.sense_voice
        assert len(args.nemo_ctc) == 0, args.nemo_ctc
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.paraformer)

        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=args.paraformer,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            provider=args.provider,
        )
    elif args.sense_voice:
        assert len(args.nemo_ctc) == 0, args.nemo_ctc
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.sense_voice)
        recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
            model=args.sense_voice,
            tokens=args.tokens,
            num_threads=args.num_threads,
            use_itn=True,
        )
    elif args.nemo_ctc:
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.nemo_ctc)

        recognizer = sherpa_onnx.OfflineRecognizer.from_nemo_ctc(
            model=args.nemo_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            provider=args.provider,
        )
    elif args.wenet_ctc:
        assert len(args.telespeech_ctc) == 0, args.telespeech_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.wenet_ctc)

        recognizer = sherpa_onnx.OfflineRecognizer.from_wenet_ctc(
            model=args.wenet_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            provider=args.provider,
        )
    elif args.telespeech_ctc:
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.telespeech_ctc)

        recognizer = sherpa_onnx.OfflineRecognizer.from_telespeech_ctc(
            model=args.telespeech_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            provider=args.provider,
        )
    elif args.whisper_encoder:
        assert len(args.tdnn_model) == 0, args.tdnn_model
        assert_file_exists(args.whisper_encoder)
        assert_file_exists(args.whisper_decoder)
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=args.whisper_encoder,
            decoder=args.whisper_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            language=args.whisper_language,
            task=args.whisper_task,
            tail_paddings=args.whisper_tail_paddings,
            provider=args.provider,
        )
    elif args.tdnn_model:
        assert_file_exists(args.tdnn_model)
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        recognizer = sherpa_onnx.OfflineRecognizer.from_tdnn_ctc(
            model=args.tdnn_model,
            tokens=args.tokens,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            provider=args.provider,
        )
    elif args.moonshine_preprocessor:
        assert_file_exists(args.moonshine_preprocessor)
        assert_file_exists(args.moonshine_encoder)
        assert_file_exists(args.moonshine_uncached_decoder)
        assert_file_exists(args.moonshine_cached_decoder)

        recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
            preprocessor=args.moonshine_preprocessor,
            encoder=args.moonshine_encoder,
            uncached_decoder=args.moonshine_uncached_decoder,
            cached_decoder=args.moonshine_cached_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
        )
    else:
        raise ValueError("Please specify at least one model")

    return recognizer


def main():
    args = get_args()
    logging.info(vars(args))
    check_args(args)

    recognizer = create_recognizer(args)

    port = args.port
    max_wait_ms = args.max_wait_ms
    max_batch_size = args.max_batch_size
    nn_pool_size = args.nn_pool_size
    max_message_size = args.max_message_size
    max_queue_size = args.max_queue_size
    max_active_connections = args.max_active_connections
    certificate = args.certificate
    doc_root = args.doc_root

    if certificate and not Path(certificate).is_file():
        raise ValueError(f"{certificate} does not exist")

    if not Path(doc_root).is_dir():
        raise ValueError(f"Directory {doc_root} does not exist")

    non_streaming_server = NonStreamingServer(
        recognizer=recognizer,
        max_wait_ms=max_wait_ms,
        max_batch_size=max_batch_size,
        nn_pool_size=nn_pool_size,
        max_message_size=max_message_size,
        max_queue_size=max_queue_size,
        max_active_connections=max_active_connections,
        certificate=certificate,
        doc_root=doc_root,
    )
    asyncio.run(non_streaming_server.run(port))


if __name__ == "__main__":
    log_filename = "log/log-non-streaming-server"
    setup_logger(log_filename)
    main()


================================================
FILE: python-api-examples/offline-decode-files.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023 by manyeyes
# Copyright (c)  2023  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python API to transcribe
file(s) with a non-streaming model.

(1) For paraformer

    ./python-api-examples/offline-decode-files.py  \
      --tokens=/path/to/tokens.txt \
      --paraformer=/path/to/paraformer.onnx \
      --num-threads=2 \
      --decoding-method=greedy_search \
      --debug=false \
      --sample-rate=16000 \
      --feature-dim=80 \
      /path/to/0.wav \
      /path/to/1.wav

(2) For transducer models from icefall

    ./python-api-examples/offline-decode-files.py  \
      --tokens=/path/to/tokens.txt \
      --encoder=/path/to/encoder.onnx \
      --decoder=/path/to/decoder.onnx \
      --joiner=/path/to/joiner.onnx \
      --num-threads=2 \
      --decoding-method=greedy_search \
      --debug=false \
      --sample-rate=16000 \
      --feature-dim=80 \
      /path/to/0.wav \
      /path/to/1.wav

    also with RNN LM rescoring and LODR (optional):

    ./python-api-examples/offline-decode-files.py  \
      --tokens=/path/to/tokens.txt \
      --encoder=/path/to/encoder.onnx \
      --decoder=/path/to/decoder.onnx \
      --joiner=/path/to/joiner.onnx \
      --num-threads=2 \
      --decoding-method=modified_beam_search \
      --debug=false \
      --sample-rate=16000 \
      --feature-dim=80 \
      --lm=/path/to/lm.onnx \
      --lm-scale=0.1 \
      --lodr-fst=/path/to/lodr.fst \
      --lodr-scale=-0.1 \
      /path/to/0.wav \
      /path/to/1.wav

(3) For CTC models from NeMo

python3 ./python-api-examples/offline-decode-files.py \
  --tokens=./sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt \
  --nemo-ctc=./sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx \
  --num-threads=2 \
  --decoding-method=greedy_search \
  --debug=false \
  ./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav \
  ./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/1.wav \
  ./sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav

(4) For Whisper models

python3 ./python-api-examples/offline-decode-files.py \
  --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
  --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
  --whisper-task=transcribe \
  --num-threads=1 \
  ./sherpa-onnx-whisper-base.en/test_wavs/0.wav \
  ./sherpa-onnx-whisper-base.en/test_wavs/1.wav \
  ./sherpa-onnx-whisper-base.en/test_wavs/8k.wav

(5) For CTC models from WeNet

python3 ./python-api-examples/offline-decode-files.py \
  --wenet-ctc=./sherpa-onnx-zh-wenet-wenetspeech/model.onnx \
  --tokens=./sherpa-onnx-zh-wenet-wenetspeech/tokens.txt \
  ./sherpa-onnx-zh-wenet-wenetspeech/test_wavs/0.wav \
  ./sherpa-onnx-zh-wenet-wenetspeech/test_wavs/1.wav \
  ./sherpa-onnx-zh-wenet-wenetspeech/test_wavs/8k.wav

(6) For tdnn models of the yesno recipe from icefall

python3 ./python-api-examples/offline-decode-files.py \
  --sample-rate=8000 \
  --feature-dim=23 \
  --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
  --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
  ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav

Please refer to
https://k2-fsa.github.io/sherpa/onnx/index.html
to install sherpa-onnx and to download non-streaming pre-trained models
used in this file.
"""
import argparse
import time
import wave
from pathlib import Path
from typing import List, Tuple

import numpy as np
import sherpa_onnx


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, like
        HELLO WORLD
        你好世界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )

    parser.add_argument(
        "--modeling-unit",
        type=str,
        default="",
        help="""
        The modeling unit of the model, valid values are cjkchar, bpe, cjkchar+bpe.
        Used only when hotwords-file is given.
        """,
    )

    parser.add_argument(
        "--bpe-vocab",
        type=str,
        default="",
        help="""
        The path to the bpe vocabulary, the bpe vocabulary is generated by
        sentencepiece, you can also export the bpe vocabulary through a bpe model
        by `scripts/export_bpe_vocab.py`. Used only when hotwords-file is given
        and modeling-unit is bpe or cjkchar+bpe.
        """,
    )

    parser.add_argument(
        "--encoder",
        default="",
        type=str,
        help="Path to the encoder model",
    )

    parser.add_argument(
        "--decoder",
        default="",
        type=str,
        help="Path to the decoder model",
    )

    parser.add_argument(
        "--joiner",
        default="",
        type=str,
        help="Path to the joiner model",
    )

    parser.add_argument(
        "--paraformer",
        default="",
        type=str,
        help="Path to the model.onnx from Paraformer",
    )

    parser.add_argument(
        "--nemo-ctc",
        default="",
        type=str,
        help="Path to the model.onnx from NeMo CTC",
    )

    parser.add_argument(
        "--wenet-ctc",
        default="",
        type=str,
        help="Path to the model.onnx from WeNet CTC",
    )

    parser.add_argument(
        "--tdnn-model",
        default="",
        type=str,
        help="Path to the model.onnx for the tdnn model of the yesno recipe",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--whisper-encoder",
        default="",
        type=str,
        help="Path to whisper encoder model",
    )

    parser.add_argument(
        "--whisper-decoder",
        default="",
        type=str,
        help="Path to whisper decoder model",
    )

    parser.add_argument(
        "--whisper-language",
        default="",
        type=str,
        help="""It specifies the spoken language in the input audio file.
        Example values: en, fr, de, zh, jp.
        Available languages for multilingual models can be found at
        https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
        If not specified, we infer the language from the input audio file.
        """,
    )

    parser.add_argument(
        "--whisper-task",
        default="transcribe",
        choices=["transcribe", "translate"],
        type=str,
        help="""For multilingual models, if you specify translate, the output
        will be in English.
        """,
    )

    parser.add_argument(
        "--whisper-tail-paddings",
        default=-1,
        type=int,
        help="""Number of tail padding frames.
        We have removed the 30-second constraint from whisper, so you need to
        choose the amount of tail padding frames by yourself.
        Use -1 to use a default value for tail padding.
        """,
    )

    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="Valid values are greedy_search and modified_beam_search",
    )

    parser.add_argument(
        "--lm",
        metavar="file",
        type=str,
        default="",
        help="Path to RNN LM model",
    )

    parser.add_argument(
        "--lm-scale",
        metavar="lm_scale",
        type=float,
        default=0.1,
        help="LM model scale for rescoring",
    )

    parser.add_argument(
        "--lodr-fst",
        metavar="file",
        type=str,
        default="",
        help="Path to LODR FST model. Used only when --lm is given.",
    )

    parser.add_argument(
        "--lodr-scale",
        metavar="lodr_scale",
        type=float,
        default=-0.1,
        help="LODR scale for rescoring.Used only when --lodr_fst is given.",
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--sample-rate",
        type=int,
        default=16000,
        help="""Sample rate of the feature extractor. Must match the one
        expected  by the model. Note: The input sound files can have a
        different sample rate from this argument.""",
    )

    parser.add_argument(
        "--feature-dim",
        type=int,
        default=80,
        help="Feature dimension. Must match the one expected by the model",
    )

    parser.add_argument(
        "sound_files",
        type=str,
        nargs="+",
        help="The input sound file(s) to decode. Each file must be of WAVE"
        "format with a single channel, and each sample has 16-bit, "
        "i.e., int16_t. "
        "The sample rate of the file can be arbitrary and does not need to "
        "be 16 kHz",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


def main():
    args = get_args()
    assert_file_exists(args.tokens)
    assert args.num_threads > 0, args.num_threads

    if args.encoder:
        assert len(args.paraformer) == 0, args.paraformer
        assert len(args.nemo_ctc) == 0, args.nemo_ctc
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model

        assert_file_exists(args.encoder)
        assert_file_exists(args.decoder)
        assert_file_exists(args.joiner)

        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            lm=args.lm,
            lm_scale=args.lm_scale,
            lodr_fst=args.lodr_fst,
            lodr_scale=args.lodr_scale,
            decoding_method=args.decoding_method,
            hotwords_file=args.hotwords_file,
            hotwords_score=args.hotwords_score,
            modeling_unit=args.modeling_unit,
            bpe_vocab=args.bpe_vocab,
            blank_penalty=args.blank_penalty,
            debug=args.debug,
        )
    elif args.paraformer:
        assert len(args.nemo_ctc) == 0, args.nemo_ctc
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model

        assert_file_exists(args.paraformer)

        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=args.paraformer,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.nemo_ctc:
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model

        assert_file_exists(args.nemo_ctc)

        recognizer = sherpa_onnx.OfflineRecognizer.from_nemo_ctc(
            model=args.nemo_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.wenet_ctc:
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.tdnn_model) == 0, args.tdnn_model

        assert_file_exists(args.wenet_ctc)

        recognizer = sherpa_onnx.OfflineRecognizer.from_wenet_ctc(
            model=args.wenet_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.whisper_encoder:
        assert len(args.tdnn_model) == 0, args.tdnn_model
        assert_file_exists(args.whisper_encoder)
        assert_file_exists(args.whisper_decoder)

        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=args.whisper_encoder,
            decoder=args.whisper_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
            language=args.whisper_language,
            task=args.whisper_task,
            tail_paddings=args.whisper_tail_paddings,
        )
    elif args.tdnn_model:
        assert_file_exists(args.tdnn_model)

        recognizer = sherpa_onnx.OfflineRecognizer.from_tdnn_ctc(
            model=args.tdnn_model,
            tokens=args.tokens,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    else:
        print("Please specify at least one model")
        return

    print("Started!")
    start_time = time.time()

    streams = []
    total_duration = 0
    for wave_filename in args.sound_files:
        assert_file_exists(wave_filename)
        samples, sample_rate = read_wave(wave_filename)
        duration = len(samples) / sample_rate
        total_duration += duration
        s = recognizer.create_stream()
        s.accept_waveform(sample_rate, samples)

        streams.append(s)

    recognizer.decode_streams(streams)
    results = [s.result.text for s in streams]
    end_time = time.time()
    print("Done!")

    for wave_filename, result in zip(args.sound_files, results):
        print(f"{wave_filename}\n{result}")
        print("-" * 10)

    elapsed_seconds = end_time - start_time
    rtf = elapsed_seconds / total_duration
    print(f"num_threads: {args.num_threads}")
    print(f"decoding_method: {args.decoding_method}")
    print(f"Wave duration: {total_duration:.3f} s")
    print(f"Elapsed time: {elapsed_seconds:.3f} s")
    print(
        f"Real time factor (RTF): {elapsed_seconds:.3f}/{total_duration:.3f} = {rtf:.3f}"
    )


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-dolphin-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming CTC model from Dolphin
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
"""

from pathlib import Path
import time

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
    tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"
    test_wav = (
        "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
    )

    if not Path(model).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_dolphin_ctc(
            model=model,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    start = time.time()
    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    end = time.time()

    print(wave_filename)
    print(stream.result)

    elapsed_seconds = end - start
    audio_duration = len(audio) / sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-fire-red-asr-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming FireRedASR CTC model from
https://github.com/FireRedTeam/FireRedASR2S
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
"""

import time
from pathlib import Path

import librosa
import numpy as np
import sherpa_onnx


def create_recognizer():
    model = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx"
    tokens = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt"
    test_wav_0 = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/0.wav"
    test_wav_1 = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav"
    test_wav_2 = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/2.wav"
    test_wav_3 = (
        "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/3-sichuan.wav"
    )
    test_wav_4 = (
        "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/4-tianjin.wav"
    )
    test_wav_5 = (
        "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/5-henan.wav"
    )

    for f in [
        model,
        tokens,
        test_wav_0,
        test_wav_1,
        test_wav_2,
        test_wav_3,
        test_wav_4,
        test_wav_5,
    ]:
        if not Path(f).is_file():
            print(f"{f} does not exist")

            raise ValueError(
                """Please download model files from
                https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
                """
            )
    return (
        sherpa_onnx.OfflineRecognizer.from_fire_red_asr_ctc(
            model=model,
            tokens=tokens,
            num_threads=2,
        ),
        test_wav_0,
        test_wav_1,
        test_wav_2,
        test_wav_3,
        test_wav_4,
        test_wav_5,
    )


def load_audio(filename):
    audio, sample_rate = librosa.load(filename, sr=16000)
    assert sample_rate == 16000, sample_rate

    return np.ascontiguousarray(audio)


def decode_single_file(recognizer, filename):
    samples = load_audio(filename)

    start_time = time.time()

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate=16000, waveform=samples)
    recognizer.decode_stream(stream)

    end_time = time.time()
    elapsed_seconds = end_time - start_time
    audio_duration = len(samples) / 16000
    real_time_factor = elapsed_seconds / audio_duration

    print("---")
    print(filename)
    print(stream.result)
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
    print()


def decode_multiple_files(recognizer, filenames):
    streams = []

    start_time = time.time()

    audio_duration = 0

    for filename in filenames:
        samples = load_audio(filename)
        audio_duration += len(samples) / 16000

        stream = recognizer.create_stream()
        stream.accept_waveform(sample_rate=16000, waveform=samples)
        streams.append(stream)

    recognizer.decode_streams(streams)

    end_time = time.time()
    elapsed_seconds = end_time - start_time
    real_time_factor = elapsed_seconds / audio_duration

    for name, stream in zip(filenames, streams):
        print("---")
        print(name)
        print(stream.result)
        print()

    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
    print()
    print()


def main():
    recognizer, *filenames = create_recognizer()

    decode_single_file(recognizer, filenames[0])
    decode_single_file(recognizer, filenames[1])
    decode_multiple_files(recognizer, filenames[2:])


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-fire-red-asr-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming FireRedAsr AED model from
https://github.com/FireRedTeam/FireRedASR
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    encoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx"
    decoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx"
    tokens = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt"
    test_wav = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"
    #  test_wav = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/1.wav"
    #  test_wav = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/2.wav"
    #  test_wav = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/3.wav"
    #  test_wav = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/8k.wav"
    #  test_wav = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/3-sichuan.wav"
    #  test_wav = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/4-tianjin.wav"
    #  test_wav = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/5-henan.wav"

    if (
        not Path(encoder).is_file()
        or not Path(decoder).is_file()
        or not Path(test_wav).is_file()
    ):
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_fire_red_asr(
            encoder=encoder,
            decoder=decoder,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    print(wave_filename)
    print(stream.result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-funasr-nano-decode-files.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2025  zengyw
#
"""
Decode audio files using FunASR-nano models with sherpa-onnx Python API.

This script demonstrates how to use FunASR-nano models for offline speech recognition.

Usage:
    python offline-funasr-nano-decode-files.py \
        --encoder-adaptor=/path/to/encoder_adaptor.onnx \
        --llm=/path/to/llm.onnx \
        --tokenizer=/path/to/Qwen3-0.6B \
        --embedding=/path/to/embedding.onnx \
        [--num-threads=4] \
        [--provider=cpu] \
        audio1.wav audio2.wav ...
"""

import argparse
import sys
from pathlib import Path

import soundfile as sf

try:
    import sherpa_onnx
except ImportError:
    print("Please install sherpa-onnx: pip install sherpa-onnx")
    sys.exit(1)


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=__doc__,
    )

    parser.add_argument(
        "--encoder-adaptor",
        type=str,
        required=True,
        help="Path to encoder_adaptor.onnx",
    )

    parser.add_argument(
        "--llm",
        type=str,
        required=True,
        help="Path to llm.onnx (unified KV cache model)",
    )

    parser.add_argument(
        "--tokenizer",
        type=str,
        required=True,
        help="Path to tokenizer directory (e.g., Qwen3-0.6B)",
    )

    parser.add_argument(
        "--embedding",
        type=str,
        required=True,
        help="Path to embedding.onnx",
    )

    parser.add_argument(
        "--system-prompt",
        type=str,
        default="You are a helpful assistant.",
        help="System prompt for FunASR-nano",
    )

    parser.add_argument(
        "--user-prompt",
        type=str,
        default="语音转写:",
        help="User prompt template for FunASR-nano",
    )

    parser.add_argument(
        "--max-new-tokens",
        type=int,
        default=512,
        help="Maximum number of new tokens to generate",
    )

    parser.add_argument(
        "--temperature",
        type=float,
        default=1e-6,
        help="Sampling temperature",
    )

    parser.add_argument(
        "--top-p",
        type=float,
        default=0.8,
        help="Top-p (nucleus) sampling threshold",
    )

    parser.add_argument(
        "--seed",
        type=int,
        default=42,
        help="Random seed",
    )

    parser.add_argument(
        "--language",
        type=str,
        default="",
        help="Language for transcription (empty string means None)",
    )

    parser.add_argument(
        "--itn",
        action="store_true",
        default=True,
        help="Whether to apply inverse text normalization (default: True)",
    )

    parser.add_argument(
        "--no-itn",
        dest="itn",
        action="store_false",
        help="Disable inverse text normalization",
    )

    parser.add_argument(
        "--hotwords",
        type=str,
        default="",
        help="Hotwords (comma-separated, e.g., 'Sherpa,FunASR')",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=2,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        choices=["cpu", "cuda"],
        help="Provider: cpu or cuda",
    )

    parser.add_argument(
        "--debug",
        action="store_true",
        help="True to print model information while loading",
    )

    parser.add_argument(
        "sound_files",
        type=str,
        nargs="+",
        help="The input sound file(s) to decode. "
        "Each file must be of single channel, 16-bit PCM encoded wav file. "
        "Its sample rate can be arbitrary and does not need to be 16kHz.",
    )

    return parser.parse_args()


def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    return sherpa_onnx.OfflineRecognizer.from_funasr_nano(
        encoder_adaptor=args.encoder_adaptor,
        llm=args.llm,
        embedding=args.embedding,
        tokenizer=args.tokenizer,
        num_threads=args.num_threads,
        provider=args.provider,
        debug=args.debug,
        system_prompt=args.system_prompt,
        user_prompt=args.user_prompt,
        max_new_tokens=args.max_new_tokens,
        temperature=args.temperature,
        top_p=args.top_p,
        seed=args.seed,
        language=args.language,
        itn=args.itn,
        hotwords=args.hotwords,
    )


def decode_file(
    recognizer: sherpa_onnx.OfflineRecognizer,
    filename: str,
):
    """Decode a single audio file."""
    audio, sample_rate = sf.read(filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    result = stream.result
    return result


def main():
    args = get_args()

    print("Creating recognizer...")
    recognizer = create_recognizer(args)
    print("Recognizer created!")

    print(f"\nDecoding {len(args.sound_files)} file(s)...\n")

    for sound_file in args.sound_files:
        if not Path(sound_file).exists():
            print(f"Error: File not found: {sound_file}", file=sys.stderr)
            continue

        print(f"Processing: {sound_file}")
        result = decode_file(recognizer, sound_file)

        print(f"Text: {result.text}")
        if result.tokens:
            print(f"Tokens: {result.tokens}")
        if result.timestamps:
            print(f"Timestamps: {[f'{t:.2f}' for t in result.timestamps]}")
        print()


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-medasr-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming Google MedASR CTC model from
https://huggingface.co/google/medasr
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
"""

import time
from pathlib import Path

import librosa
import numpy as np
import sherpa_onnx


def create_recognizer():
    model = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx"
    tokens = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt"
    test_wav_0 = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav"
    test_wav_1 = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/1.wav"
    test_wav_2 = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/2.wav"
    test_wav_3 = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/3.wav"
    test_wav_4 = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/4.wav"
    test_wav_5 = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/5.wav"

    for f in [
        model,
        tokens,
        test_wav_0,
        test_wav_1,
        test_wav_2,
        test_wav_3,
        test_wav_4,
        test_wav_5,
    ]:
        if not Path(f).is_file():
            print(f"{f} does not exist")

            raise ValueError(
                """Please download model files from
                https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
                """
            )
    return (
        sherpa_onnx.OfflineRecognizer.from_medasr_ctc(
            model=model,
            tokens=tokens,
            num_threads=2,
        ),
        test_wav_0,
        test_wav_1,
        test_wav_2,
        test_wav_3,
        test_wav_4,
        test_wav_5,
    )


def load_audio(filename):
    audio, sample_rate = librosa.load(filename, sr=16000)
    assert sample_rate == 16000, sample_rate

    return np.ascontiguousarray(audio)


def decode_single_file(recognizer, filename):
    samples = load_audio(filename)

    start_time = time.time()

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate=16000, waveform=samples)
    recognizer.decode_stream(stream)

    end_time = time.time()
    elapsed_seconds = end_time - start_time
    audio_duration = len(samples) / 16000
    real_time_factor = elapsed_seconds / audio_duration

    print("---")
    print(filename)
    print(stream.result)
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
    print()


def decode_multiple_files(recognizer, filenames):
    streams = []

    start_time = time.time()

    audio_duration = 0

    for filename in filenames:
        samples = load_audio(filename)
        audio_duration += len(samples) / 16000

        stream = recognizer.create_stream()
        stream.accept_waveform(sample_rate=16000, waveform=samples)
        streams.append(stream)

    recognizer.decode_streams(streams)

    end_time = time.time()
    elapsed_seconds = end_time - start_time
    real_time_factor = elapsed_seconds / audio_duration

    for name, stream in zip(filenames, streams):
        print("---")
        print(name)
        print(stream.result)
        print()

    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
    print()
    print()


def main():
    recognizer, *filenames = create_recognizer()

    decode_single_file(recognizer, filenames[0])
    decode_single_file(recognizer, filenames[1])
    decode_multiple_files(recognizer, filenames[2:])


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-moonshine-decode-files-v2.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming Moonshine model from
https://github.com/usefulsensors/moonshine
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
"""

import datetime as dt
from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    encoder = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort"
    decoder = (
        "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort"
    )
    tokens = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt"
    test_wav = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav"

    if not Path(encoder).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_moonshine_v2(
            encoder=encoder,
            decoder=decoder,
            tokens=tokens,
            debug=False,  # Set to True to see more logs
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    start_t = dt.datetime.now()

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)

    end_t = dt.datetime.now()
    elapsed_seconds = (end_t - start_t).total_seconds()
    duration = audio.shape[-1] / sample_rate
    rtf = elapsed_seconds / duration

    print(stream.result)
    print(wave_filename)
    print("Text:", stream.result.text)
    print(f"Audio duration:\t{duration:.3f} s")
    print(f"Elapsed:\t{elapsed_seconds:.3f} s")
    print(f"RTF = {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-moonshine-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming Moonshine model from
https://github.com/usefulsensors/moonshine
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
"""

import datetime as dt
from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"
    encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"
    uncached_decoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"
    cached_decoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"

    tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"
    test_wav = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"

    if not Path(preprocessor).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_moonshine(
            preprocessor=preprocessor,
            encoder=encoder,
            uncached_decoder=uncached_decoder,
            cached_decoder=cached_decoder,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    start_t = dt.datetime.now()

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)

    end_t = dt.datetime.now()
    elapsed_seconds = (end_t - start_t).total_seconds()
    duration = audio.shape[-1] / sample_rate
    rtf = elapsed_seconds / duration

    print(stream.result)
    print(wave_filename)
    print("Text:", stream.result.text)
    print(f"Audio duration:\t{duration:.3f} s")
    print(f"Elapsed:\t{elapsed_seconds:.3f} s")
    print(f"RTF = {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-nemo-canary-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming Canary model from NeMo
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models


The example model supports 4 languages and it is converted from
https://huggingface.co/nvidia/canary-180m-flash

It supports automatic speech-to-text recognition (ASR) in 4 languages
(English, German, French, Spanish) and translation from English to
German/French/Spanish and from German/French/Spanish to English with or
without punctuation and capitalization (PnC).
"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"
    decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"
    tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"

    en_wav = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
    de_wav = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav"

    if not Path(encoder).is_file() or not Path(en_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_nemo_canary(
            encoder=encoder,
            decoder=decoder,
            tokens=tokens,
            debug=True,
        ),
        en_wav,
        de_wav,
    )


def decode(recognizer, samples, sample_rate, src_lang, tgt_lang):
    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, samples)

    recognizer.recognizer.set_config(
        config=sherpa_onnx.OfflineRecognizerConfig(
            model_config=sherpa_onnx.OfflineModelConfig(
                canary=sherpa_onnx.OfflineCanaryModelConfig(
                    src_lang=src_lang,
                    tgt_lang=tgt_lang,
                )
            )
        )
    )

    recognizer.decode_stream(stream)
    return stream.result.text


def main():
    recognizer, en_wav, de_wav = create_recognizer()

    en_audio, en_sample_rate = sf.read(en_wav, dtype="float32", always_2d=True)
    en_audio = en_audio[:, 0]  # only use the first channel

    de_audio, de_sample_rate = sf.read(de_wav, dtype="float32", always_2d=True)
    de_audio = de_audio[:, 0]  # only use the first channel

    en_wav_en_result = decode(
        recognizer, en_audio, en_sample_rate, src_lang="en", tgt_lang="en"
    )
    en_wav_es_result = decode(
        recognizer, en_audio, en_sample_rate, src_lang="en", tgt_lang="es"
    )
    en_wav_de_result = decode(
        recognizer, en_audio, en_sample_rate, src_lang="en", tgt_lang="de"
    )
    en_wav_fr_result = decode(
        recognizer, en_audio, en_sample_rate, src_lang="en", tgt_lang="fr"
    )

    de_wav_en_result = decode(
        recognizer, de_audio, de_sample_rate, src_lang="de", tgt_lang="en"
    )
    de_wav_de_result = decode(
        recognizer, de_audio, de_sample_rate, src_lang="de", tgt_lang="de"
    )

    print("en_wav_en_result", en_wav_en_result)
    print("en_wav_es_result", en_wav_es_result)
    print("en_wav_de_result", en_wav_de_result)
    print("en_wav_fr_result", en_wav_fr_result)
    print("-" * 10)
    print("de_wav_en_result", de_wav_en_result)
    print("de_wav_de_result", de_wav_de_result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-nemo-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming CTC model from NeMo
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models


The example model supports 10 languages and it is converted from
https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc
"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx"
    tokens = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt"

    test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/en-english.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/es-spanish.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/fr-french.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/hr-croatian.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/it-italian.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/po-polish.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/ru-russian.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/uk-ukrainian.wav"

    if not Path(model).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_nemo_ctc(
            model=model,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    print(wave_filename)
    print(stream.result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-nemo-parakeet-decode-file.py
================================================
# Example using the sherpa-onnx Python API and sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8 model
# Prints recognized text, per-token timestamps, and durations

import os
import sys
import sherpa_onnx
import soundfile as sf

wav_filename = "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/test_wavs/en.wav"
encoder = "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/encoder.int8.onnx"
decoder = "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/decoder.int8.onnx"
joiner = "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/joiner.int8.onnx"
tokens = "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/tokens.txt"

if not os.path.exists(wav_filename):
    print(f"File not found: {wav_filename}")
    sys.exit(1)


recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
    encoder,
    decoder,
    joiner,
    tokens,
    num_threads=1,
    provider="cpu",
    debug=False,
    decoding_method="greedy_search",
    model_type="nemo_transducer"
)

audio, sample_rate = sf.read(wav_filename, dtype="float32", always_2d=True)
audio = audio[:, 0]  # use first channel if multi-channel
stream = recognizer.create_stream()
stream.accept_waveform(sample_rate, audio)
recognizer.decode_stream(stream)
result = stream.result

print(f"Recognized text: {result.text}")

if hasattr(result, "tokens") and hasattr(result, "timestamps") and hasattr(result, "durations"):
    print("Token\tTimestamp\tDuration")
    for token, ts, dur in zip(result.tokens, result.timestamps, result.durations):
        print(f"{token}\t{ts:.2f}\t{dur:.2f}")
else:
    print("Timestamps or durations not available.")


================================================
FILE: python-api-examples/offline-nemo-transducer-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming transducer model from NeMo
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models


The example model supports 10 languages and it is converted from
https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc
"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    encoder = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx"
    decoder = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx"
    joiner = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx"
    tokens = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt"

    test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/en-english.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/es-spanish.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/fr-french.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/hr-croatian.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/it-italian.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/po-polish.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/ru-russian.wav"
    #  test_wav = "./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/uk-ukrainian.wav"

    if not Path(encoder).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=encoder,
            decoder=decoder,
            joiner=joiner,
            tokens=tokens,
            model_type="nemo_transducer",
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    print(wave_filename)
    print(stream.result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-omnilingual-asr-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming Omnilingual ASR CTC model from
https://github.com/facebookresearch/omnilingual-asr
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
"""

from pathlib import Path

import numpy as np
import time
import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx"
    tokens = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt"
    test_wav_en = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav"
    test_wav_de = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/de.wav"
    test_wav_fr = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/fr.wav"
    test_wav_es = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/es.wav"

    for f in [model, tokens, test_wav_en, test_wav_de, test_wav_fr, test_wav_es]:
        if not Path(f).is_file():
            print(f"{f} does not exist")

            raise ValueError(
                """Please download model files from
                https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
                """
            )
    return (
        sherpa_onnx.OfflineRecognizer.from_omnilingual_asr_ctc(
            model=model,
            tokens=tokens,
            num_threads=1,
        ),
        test_wav_en,
        test_wav_de,
        test_wav_fr,
        test_wav_es,
    )


def load_audio(filename):
    audio, sample_rate = sf.read(filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        import librosa

        audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000)

    return np.ascontiguousarray(audio)


def decode_single_file(recognizer, filename):
    samples = load_audio(filename)

    start_time = time.time()

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate=16000, waveform=samples)
    recognizer.decode_stream(stream)

    end_time = time.time()
    elapsed_seconds = end_time - start_time
    audio_duration = len(samples) / 16000
    real_time_factor = elapsed_seconds / audio_duration

    print("---")
    print(filename)
    print(stream.result)
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
    print()


def decode_multiple_files(recognizer, filenames):
    streams = []

    start_time = time.time()

    audio_duration = 0

    for filename in filenames:
        samples = load_audio(filename)
        audio_duration += len(samples) / 16000

        stream = recognizer.create_stream()
        stream.accept_waveform(sample_rate=16000, waveform=samples)
        streams.append(stream)

    recognizer.decode_streams(streams)

    end_time = time.time()
    elapsed_seconds = end_time - start_time
    real_time_factor = elapsed_seconds / audio_duration

    for name, stream in zip(filenames, streams):
        print("---")
        print(name)
        print(stream.result)
        print()

    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
    print()
    print()


def main():
    recognizer, *filenames = create_recognizer()

    decode_single_file(recognizer, filenames[0])
    decode_single_file(recognizer, filenames[1])
    decode_multiple_files(recognizer, filenames[2:])


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-omnilingual-asr-ctc-v2-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming Omnilingual ASR CTC v2 model from
https://github.com/facebookresearch/omnilingual-asr
to decode files.

Please download model files from
https://huggingface.co/Edison2ST/sherpa-onnx-omnilingual-asr-1600-languages-ctc-v2

For instance,

wget https://huggingface.co/Edison2ST/sherpa-onnx-omnilingual-asr-1600-languages-ctc-v2/resolve/main/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05.tar.bz2 # noqa: E501
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05.tar.bz2
"""

from pathlib import Path

import numpy as np
import time
import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05/model.int8.onnx"
    tokens = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05/tokens.txt"
    test_wav_en = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05/test_wavs/en.wav"
    test_wav_de = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05/test_wavs/de.wav"
    test_wav_fr = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05/test_wavs/fr.wav"
    test_wav_es = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-v2-int8-2026-02-05/test_wavs/es.wav"

    for f in [model, tokens, test_wav_en, test_wav_de, test_wav_fr, test_wav_es]:
        if not Path(f).is_file():
            print(f"{f} does not exist")

            raise ValueError("""Please download model files from
                https://huggingface.co/Edison2ST/sherpa-onnx-omnilingual-asr-1600-languages-ctc-v2
                """)
    return (
        sherpa_onnx.OfflineRecognizer.from_omnilingual_asr_ctc(
            model=model,
            tokens=tokens,
            num_threads=1,
        ),
        test_wav_en,
        test_wav_de,
        test_wav_fr,
        test_wav_es,
    )


def load_audio(filename):
    audio, sample_rate = sf.read(filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        import librosa

        audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000)

    return np.ascontiguousarray(audio)


def decode_single_file(recognizer, filename):
    samples = load_audio(filename)

    start_time = time.time()

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate=16000, waveform=samples)
    recognizer.decode_stream(stream)

    end_time = time.time()
    elapsed_seconds = end_time - start_time
    audio_duration = len(samples) / 16000
    real_time_factor = elapsed_seconds / audio_duration

    print("---")
    print(filename)
    print(stream.result)
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
    print()


def decode_multiple_files(recognizer, filenames):
    streams = []

    start_time = time.time()

    audio_duration = 0

    for filename in filenames:
        samples = load_audio(filename)
        audio_duration += len(samples) / 16000

        stream = recognizer.create_stream()
        stream.accept_waveform(sample_rate=16000, waveform=samples)
        streams.append(stream)

    recognizer.decode_streams(streams)

    end_time = time.time()
    elapsed_seconds = end_time - start_time
    real_time_factor = elapsed_seconds / audio_duration

    for name, stream in zip(filenames, streams):
        print("---")
        print(name)
        print(stream.result)
        print()

    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
    print()
    print()


def main():
    recognizer, *filenames = create_recognizer()

    decode_single_file(recognizer, filenames[0])
    decode_single_file(recognizer, filenames[1])
    decode_multiple_files(recognizer, filenames[2:])


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-sense-voice-ctc-decode-files-with-hr.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming SenseVoice CTC model from
https://github.com/FunAudioLLM/SenseVoice
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx"
    tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
    test_wav = "./test-hr.wav"

    if not Path(model).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            and
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_sense_voice(
            model=model,
            tokens=tokens,
            use_itn=True,
            debug=True,
            hr_lexicon="./lexicon.txt",
            hr_rule_fsts="./replace.fst",
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    print(wave_filename)
    print(stream.result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-sense-voice-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming SenseVoice CTC model from
https://github.com/FunAudioLLM/SenseVoice
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx"
    tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
    test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav"
    #  test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav"
    #  test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/ja.wav"
    #  test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/ko.wav"
    #  test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/yue.wav"

    if not Path(model).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_sense_voice(
            model=model,
            tokens=tokens,
            use_itn=True,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    print(wave_filename)
    print(stream.result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-source-separation-spleeter.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

"""
This file shows how to use spleeter for source separation.

Please first download a spleeter model from

https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models

The following is an example:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2

Please also download a test file

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav

The test wav file is 16-bit encoded with 2 channels. If you have other
formats, e.g., .mp4 or .mp3, please first use ffmpeg to convert it.
For instance

    ffmpeg -i your.mp4 -vn -acodec pcm_s16le -ar 44100 -ac 2 out.wav

Then you can use out.wav as input for this example.
"""

import time
from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def create_offline_source_separation():
    # Please read the help message at the beginning of this file
    # to download model files
    vocals = "./sherpa-onnx-spleeter-2stems-fp16/vocals.fp16.onnx"
    accompaniment = "./sherpa-onnx-spleeter-2stems-fp16/accompaniment.fp16.onnx"

    if not Path(vocals).is_file():
        raise ValueError(f"{vocals} does not exist.")

    if not Path(accompaniment).is_file():
        raise ValueError(f"{accompaniment} does not exist.")

    config = sherpa_onnx.OfflineSourceSeparationConfig(
        model=sherpa_onnx.OfflineSourceSeparationModelConfig(
            spleeter=sherpa_onnx.OfflineSourceSeparationSpleeterModelConfig(
                vocals=vocals,
                accompaniment=accompaniment,
            ),
            num_threads=1,
            debug=False,
            provider="cpu",
        )
    )
    if not config.validate():
        raise ValueError("Please check your config.")

    return sherpa_onnx.OfflineSourceSeparation(config)


def load_audio():
    # Please read the help message at the beginning of this file to download
    # the following wav_file
    wav_file = "./qi-feng-le-zh.wav"
    if not Path(wav_file).is_file():
        raise ValueError(f"{wav_file} does not exist")

    samples, sample_rate = sf.read(wav_file, dtype="float32", always_2d=True)
    samples = np.transpose(samples)
    # now samples is of shape (num_channels, num_samples)
    assert (
        samples.shape[1] > samples.shape[0]
    ), f"You should use (num_channels, num_samples). {samples.shape}"

    assert (
        samples.dtype == np.float32
    ), f"Expect np.float32 as dtype. Given: {samples.dtype}"

    return samples, sample_rate


def main():
    sp = create_offline_source_separation()
    samples, sample_rate = load_audio()
    samples = np.ascontiguousarray(samples)

    start = time.time()
    output = sp.process(sample_rate=sample_rate, samples=samples)
    end = time.time()

    print("output.sample_rate", output.sample_rate)

    assert len(output.stems) == 2, len(output.stems)

    vocals = output.stems[0].data
    non_vocals = output.stems[1].data
    # vocals.shape (num_channels, num_samples)

    vocals = np.transpose(vocals)
    non_vocals = np.transpose(non_vocals)

    # vocals.shape (num_samples,num_channels)

    sf.write("./spleeter-vocals.wav", vocals, samplerate=output.sample_rate)
    sf.write("./spleeter-non-vocals.wav", non_vocals, samplerate=output.sample_rate)

    elapsed_seconds = end - start
    audio_duration = samples.shape[1] / sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    print("Saved to ./spleeter-vocals.wav and ./spleeter-non-vocals.wav")
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-source-separation-uvr.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

"""
This file shows how to use UVR for source separation.

Please first download a UVR model from

https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models

The following is an example:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_9482.onnx

Please also download a test file

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav

The test wav file is 16-bit encoded with 2 channels. If you have other
formats, e.g., .mp4 or .mp3, please first use ffmpeg to convert it.
For instance

    ffmpeg -i your.mp4 -vn -acodec pcm_s16le -ar 44100 -ac 2 out.wav

Then you can use out.wav as input for this example.
"""

import time
from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def create_offline_source_separation():
    # Please read the help message at the beginning of this file
    # to download model files
    model = "./UVR_MDXNET_9482.onnx"

    if not Path(model).is_file():
        raise ValueError(f"{model} does not exist.")

    config = sherpa_onnx.OfflineSourceSeparationConfig(
        model=sherpa_onnx.OfflineSourceSeparationModelConfig(
            uvr=sherpa_onnx.OfflineSourceSeparationUvrModelConfig(
                model=model,
            ),
            num_threads=1,
            debug=False,
            provider="cpu",
        )
    )
    if not config.validate():
        raise ValueError("Please check your config.")

    return sherpa_onnx.OfflineSourceSeparation(config)


def load_audio():
    # Please read the help message at the beginning of this file to download
    # the following wav_file
    wav_file = "./qi-feng-le-zh.wav"
    if not Path(wav_file).is_file():
        raise ValueError(f"{wav_file} does not exist")

    samples, sample_rate = sf.read(wav_file, dtype="float32", always_2d=True)
    samples = np.transpose(samples)
    # now samples is of shape (num_channels, num_samples)
    assert (
        samples.shape[1] > samples.shape[0]
    ), f"You should use (num_channels, num_samples). {samples.shape}"

    assert (
        samples.dtype == np.float32
    ), f"Expect np.float32 as dtype. Given: {samples.dtype}"

    return samples, sample_rate


def main():
    sp = create_offline_source_separation()
    samples, sample_rate = load_audio()
    samples = np.ascontiguousarray(samples)

    print("Started. Please wait")
    start = time.time()
    output = sp.process(sample_rate=sample_rate, samples=samples)
    end = time.time()

    print("output.sample_rate", output.sample_rate)

    assert len(output.stems) == 2, len(output.stems)

    vocals = output.stems[0].data
    non_vocals = output.stems[1].data
    # vocals.shape (num_channels, num_samples)

    vocals = np.transpose(vocals)
    non_vocals = np.transpose(non_vocals)

    # vocals.shape (num_samples,num_channels)

    sf.write("./uvr-vocals.wav", vocals, samplerate=output.sample_rate)
    sf.write("./uvr-non-vocals.wav", non_vocals, samplerate=output.sample_rate)

    elapsed_seconds = end - start
    audio_duration = samples.shape[1] / sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    print("Saved to ./uvr-vocals.wav and ./uvr-non-vocals.wav")
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-speaker-diarization.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2024  Xiaomi Corporation

"""
This file shows how to use sherpa-onnx Python API for
offline/non-streaming speaker diarization.

Usage:

Step 1: Download a speaker segmentation model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

Step 2: Download a speaker embedding extractor model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

Step 3. Download test wave files

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

Step 4. Run it

    python3 ./python-api-examples/offline-speaker-diarization.py

"""
from pathlib import Path

import sherpa_onnx
import soundfile as sf
import librosa


def resample_audio(audio, sample_rate, target_sample_rate):
    """
    Resample audio to target sample rate using librosa
    """
    if sample_rate != target_sample_rate:
        print(f"Resampling audio from {sample_rate}Hz to {target_sample_rate}Hz...")
        audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
        print(f"Resampling completed. New audio shape: {audio.shape}")
        return audio, target_sample_rate
    return audio, sample_rate


def init_speaker_diarization(num_speakers: int = -1, cluster_threshold: float = 0.5):
    """
    Args:
      num_speakers:
        If you know the actual number of speakers in the wave file, then please
        specify it. Otherwise, leave it to -1
      cluster_threshold:
        If num_speakers is -1, then this threshold is used for clustering.
        A smaller cluster_threshold leads to more clusters, i.e., more speakers.
        A larger cluster_threshold leads to fewer clusters, i.e., fewer speakers.
    """
    segmentation_model = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
    embedding_extractor_model = (
        "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
    )

    config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
        segmentation=sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
            pyannote=sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
                model=segmentation_model
            ),
        ),
        embedding=sherpa_onnx.SpeakerEmbeddingExtractorConfig(
            model=embedding_extractor_model
        ),
        clustering=sherpa_onnx.FastClusteringConfig(
            num_clusters=num_speakers, threshold=cluster_threshold
        ),
        min_duration_on=0.3,
        min_duration_off=0.5,
    )
    if not config.validate():
        raise RuntimeError(
            "Please check your config and make sure all required files exist"
        )

    return sherpa_onnx.OfflineSpeakerDiarization(config)


def progress_callback(num_processed_chunk: int, num_total_chunks: int) -> int:
    progress = num_processed_chunk / num_total_chunks * 100
    print(f"Progress: {progress:.3f}%")
    return 0


def main():
    wave_filename = "./0-four-speakers-zh.wav"
    if not Path(wave_filename).is_file():
        raise RuntimeError(f"{wave_filename} does not exist")

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # Since we know there are 4 speakers in the above test wave file, we use
    # num_speakers 4 here
    sd = init_speaker_diarization(num_speakers=4)
    
    # Resample audio to match the expected sample rate
    target_sample_rate = sd.sample_rate
    audio, sample_rate = resample_audio(audio, sample_rate, target_sample_rate)
    
    if sample_rate != sd.sample_rate:
        raise RuntimeError(
            f"Expected samples rate: {sd.sample_rate}, given: {sample_rate}"
        )

    show_progress = True

    if show_progress:
        result = sd.process(audio, callback=progress_callback).sort_by_start_time()
    else:
        result = sd.process(audio).sort_by_start_time()

    for r in result:
        print(f"{r.start:.3f} -- {r.end:.3f} speaker_{r.speaker:02}")
        #  print(r) # this one is simpler


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-speech-enhancement-dpdfnet.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use the speech enhancement API with DPDFNet.

Please download DPDFNet models from the sherpa-onnx GitHub release
or the official Hugging Face hub:
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
https://huggingface.co/Ceva-IP/DPDFNet

Example:

 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet2.onnx
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet4.onnx
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet8.onnx
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet2_48khz_hr.onnx
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/speech_with_noise.wav

Use 16 kHz DPDFNet models such as `dpdfnet_baseline.onnx`, `dpdfnet2.onnx`,
`dpdfnet4.onnx`, or `dpdfnet8.onnx` for downstream ASR or speech recognition.
Use `dpdfnet2_48khz_hr.onnx` for 48 kHz enhancement output.
"""

import time
from pathlib import Path
from typing import Tuple

import numpy as np
import sherpa_onnx
import soundfile as sf


def create_speech_denoiser():
    model_filename = "./dpdfnet_baseline.onnx"
    if not Path(model_filename).is_file():
        print(f"{model_filename} does not exist")
        raise ValueError(
            "Please first download a DPDFNet model from "
            "the sherpa-onnx GitHub release or the official Hugging Face hub: "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models or "
            "https://huggingface.co/Ceva-IP/DPDFNet"
        )

    config = sherpa_onnx.OfflineSpeechDenoiserConfig(
        model=sherpa_onnx.OfflineSpeechDenoiserModelConfig(
            dpdfnet=sherpa_onnx.OfflineSpeechDenoiserDpdfNetModelConfig(
                model=model_filename
            ),
            debug=False,
            num_threads=1,
            provider="cpu",
        )
    )
    if not config.validate():
        print(config)
        raise ValueError("Errors in config. Please check previous error logs")
    return sherpa_onnx.OfflineSpeechDenoiser(config)


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def main():
    sd = create_speech_denoiser()
    test_wave = "./speech_with_noise.wav"
    if not Path(test_wave).is_file():
        raise ValueError(
            f"{test_wave} does not exist. You can download it from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models"
        )

    samples, sample_rate = load_audio(test_wave)

    start = time.time()
    denoised = sd(samples, sample_rate)
    end = time.time()

    elapsed_seconds = end - start
    audio_duration = len(samples) / sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    output_filename = f"./enhanced_{denoised.sample_rate}.wav"
    sf.write(output_filename, denoised.samples, denoised.sample_rate)
    print(f"Saved to {output_filename}")
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-speech-enhancement-gtcrn.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use the speech enhancement API.

Please download files used this script from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

Example:

 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/speech_with_noise.wav
"""

import time
from pathlib import Path
from typing import Tuple

import numpy as np
import sherpa_onnx
import soundfile as sf


def create_speech_denoiser():
    model_filename = "./gtcrn_simple.onnx"
    if not Path(model_filename).is_file():
        raise ValueError(
            "Please first download a model from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models"
        )

    config = sherpa_onnx.OfflineSpeechDenoiserConfig(
        model=sherpa_onnx.OfflineSpeechDenoiserModelConfig(
            gtcrn=sherpa_onnx.OfflineSpeechDenoiserGtcrnModelConfig(
                model=model_filename
            ),
            debug=False,
            num_threads=1,
            provider="cpu",
        )
    )
    if not config.validate():
        print(config)
        raise ValueError("Errors in config. Please check previous error logs")
    return sherpa_onnx.OfflineSpeechDenoiser(config)


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def main():
    sd = create_speech_denoiser()
    test_wave = "./speech_with_noise.wav"
    if not Path(test_wave).is_file():
        raise ValueError(
            f"{test_wave} does not exist. You can download it from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models"
        )

    samples, sample_rate = load_audio(test_wave)

    start = time.time()
    denoised = sd(samples, sample_rate)
    end = time.time()

    elapsed_seconds = end - start
    audio_duration = len(samples) / sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    sf.write("./enhanced_16k.wav", denoised.samples, denoised.sample_rate)
    print("Saved to ./enhanced_16k.wav")
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-telespeech-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming CTC model from
https://github.com/Tele-AI/TeleSpeech-ASR
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models


"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx"
    tokens = "./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt"
    test_wav = "./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav"
    #  test_wav = "./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/4-tianjin.wav"
    #  test_wav = "./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/5-henan.wav"

    if not Path(model).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_telespeech_ctc(
            model=model,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    print(wave_filename)
    print(stream.result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-tts-play.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python API to generate audio
from text, i.e., text-to-speech.

Different from ./offline-tts.py, this file plays back the generated audio
while the model is still generating.

Usage:

Example (1/8)

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2

python3 ./python-api-examples/offline-tts-play.py \
 --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
 --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
 --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
 --output-filename=./generated.wav \
 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Example (2/8)

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xvf vits-zh-aishell3.tar.bz2

python3 ./python-api-examples/offline-tts-play.py \
 --vits-model=./vits-icefall-zh-aishell3/model.onnx \
 --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
 --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
 --tts-rule-fsts='./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst' \
 --sid=21 \
 --output-filename=./liubei-21.wav \
 "勿以恶小而为之，勿以善小而不为。惟贤惟德，能服于人。122334"

Example (3/8)

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2

python3 ./python-api-examples/offline-tts-play.py \
 --vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
 --vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
 --vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
 --tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
 --sid=2 \
 --output-filename=./test-2.wav \
 "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。2024年5月11号，拨打110或者18920240511。123456块钱。"

Example (4/8)

curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

python3 ./python-api-examples/offline-tts-play.py \
 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
 --matcha-vocoder=./vocos-22khz-univ.onnx \
 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
 --output-filename=./test-matcha.wav \
 "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"

Example (5/8)

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

python3 ./python-api-examples/offline-tts-play.py \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --output-filename=./test-matcha-ljspeech-en.wav \
  --num-threads=2 \
 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Example (6/8)

(This version of kokoro supports only English)

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

python3 ./python-api-examples/offline-tts.py \
  --debug=1 \
  --kokoro-model=./kokoro-en-v0_19/model.onnx \
  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
  --num-threads=2 \
  --sid=10 \
  --output-filename="./kokoro-10.wav" \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Example (7/8)

(This version of kokoro supports English, Chinese, etc.)

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

python3 ./python-api-examples/offline-tts-play.py \
  --debug=1 \
  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
  --num-threads=2 \
  --sid=18 \
  --output-filename="./kokoro-18-zh-en.wav" \
  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"

Example (8/8)

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

python3 ./python-api-examples/offline-tts-play.py \
  --debug=1 \
  --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
  --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
  --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
  --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
  --num-threads=2 \
  --sid=0 \
  --output-filename="./kitten-0.wav" \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/index.html
for details.
"""

import argparse
import logging
import queue
import sys
import threading
import time

import numpy as np
import sherpa_onnx
import soundfile as sf

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)


def add_vits_args(parser):
    parser.add_argument(
        "--vits-model",
        type=str,
        default="",
        help="Path to vits model.onnx",
    )

    parser.add_argument(
        "--vits-lexicon",
        type=str,
        default="",
        help="Path to lexicon.txt",
    )

    parser.add_argument(
        "--vits-tokens",
        type=str,
        default="",
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--vits-data-dir",
        type=str,
        default="",
        help="""Path to the dict directory of espeak-ng. If it is specified,
        --vits-lexicon and --vits-tokens are ignored""",
    )


def add_matcha_args(parser):
    parser.add_argument(
        "--matcha-acoustic-model",
        type=str,
        default="",
        help="Path to model.onnx for matcha",
    )

    parser.add_argument(
        "--matcha-vocoder",
        type=str,
        default="",
        help="Path to vocoder for matcha",
    )

    parser.add_argument(
        "--matcha-lexicon",
        type=str,
        default="",
        help="Path to lexicon.txt for matcha",
    )

    parser.add_argument(
        "--matcha-tokens",
        type=str,
        default="",
        help="Path to tokens.txt for matcha",
    )

    parser.add_argument(
        "--matcha-data-dir",
        type=str,
        default="",
        help="""Path to the dict directory of espeak-ng. If it is specified,
        --matcha-lexicon and --matcha-tokens are ignored""",
    )


def add_kokoro_args(parser):
    parser.add_argument(
        "--kokoro-model",
        type=str,
        default="",
        help="Path to model.onnx for kokoro",
    )

    parser.add_argument(
        "--kokoro-voices",
        type=str,
        default="",
        help="Path to voices.bin for kokoro",
    )

    parser.add_argument(
        "--kokoro-tokens",
        type=str,
        default="",
        help="Path to tokens.txt for kokoro",
    )

    parser.add_argument(
        "--kokoro-data-dir",
        type=str,
        default="",
        help="Path to the dict directory of espeak-ng.",
    )

    parser.add_argument(
        "--kokoro-lexicon",
        type=str,
        default="",
        help="Path to lexicon.txt for kokoro. Needed only by multilingual kokoro",
    )


def add_kitten_args(parser):
    parser.add_argument(
        "--kitten-model",
        type=str,
        default="",
        help="Path to model.onnx for kitten",
    )

    parser.add_argument(
        "--kitten-voices",
        type=str,
        default="",
        help="Path to voices.bin for kitten",
    )

    parser.add_argument(
        "--kitten-tokens",
        type=str,
        default="",
        help="Path to tokens.txt for kitten",
    )

    parser.add_argument(
        "--kitten-data-dir",
        type=str,
        default="",
        help="Path to the dict directory of espeak-ng.",
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    add_vits_args(parser)
    add_matcha_args(parser)
    add_kokoro_args(parser)
    add_kitten_args(parser)

    parser.add_argument(
        "--tts-rule-fsts",
        type=str,
        default="",
        help="Path to rule.fst",
    )

    parser.add_argument(
        "--output-filename",
        type=str,
        default="./generated.wav",
        help="Path to save generated wave",
    )

    parser.add_argument(
        "--sid",
        type=int,
        default=0,
        help="""Speaker ID. Used only for multi-speaker models, e.g.
        models trained using the VCTK dataset. Not used for single-speaker
        models, e.g., models trained using the LJ speech dataset.
        """,
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--speed",
        type=float,
        default=1.0,
        help="Speech speed. Larger->faster; smaller->slower",
    )

    parser.add_argument(
        "text",
        type=str,
        help="The input text to generate audio for",
    )

    return parser.parse_args()


# buffer saves audio samples to be played
buffer = queue.Queue()

# started is set to True once generated_audio_callback is called.
started = False

# stopped is set to True once all the text has been processed
stopped = False

# killed is set to True once ctrl + C is pressed
killed = False

# Note: When started is True, and stopped is True, and buffer is empty,
# we will exit the program since all audio samples have been played.

sample_rate = None

event = threading.Event()

first_message_time = None


def generated_audio_callback(samples: np.ndarray, progress: float):
    """This function is called whenever max_num_sentences sentences
    have been processed.

    Note that it is passed to C++ and is invoked in C++.

    Args:
      samples:
        A 1-D np.float32 array containing audio samples
    """
    global first_message_time
    if first_message_time is None:
        first_message_time = time.time()

    buffer.put(samples)
    global started

    if started is False:
        logging.info("Start playing ...")
    started = True

    # 1 means to keep generating
    # 0 means to stop generating
    if killed:
        return 0

    return 1


# see https://python-sounddevice.readthedocs.io/en/0.4.6/api/streams.html#sounddevice.OutputStream
def play_audio_callback(
    outdata: np.ndarray, frames: int, time, status: sd.CallbackFlags
):
    if killed or (started and buffer.empty() and stopped):
        event.set()

    # outdata is of shape (frames, num_channels)
    if buffer.empty():
        outdata.fill(0)
        return

    n = 0
    while n < frames and not buffer.empty():
        remaining = frames - n
        k = buffer.queue[0].shape[0]

        if remaining <= k:
            outdata[n:, 0] = buffer.queue[0][:remaining]
            buffer.queue[0] = buffer.queue[0][remaining:]
            n = frames
            if buffer.queue[0].shape[0] == 0:
                buffer.get()

            break

        outdata[n : n + k, 0] = buffer.get()
        n += k

    if n < frames:
        outdata[n:, 0] = 0


# Please see
# https://python-sounddevice.readthedocs.io/en/0.4.6/usage.html#device-selection
# for how to select a device
def play_audio():
    if False:
        # This if branch can be safely removed. It is here to show you how to
        # change the default output device in case you need that.
        devices = sd.query_devices()
        print(devices)

        # sd.default.device[1] is the output device, if you want to
        # select a different device, say, 3, as the output device, please
        # use self.default.device[1] = 3

        default_output_device_idx = sd.default.device[1]
        print(
            f'Use default output device: {devices[default_output_device_idx]["name"]}'
        )

    with sd.OutputStream(
        channels=1,
        callback=play_audio_callback,
        dtype="float32",
        samplerate=sample_rate,
        blocksize=1024,
    ):
        event.wait()

    logging.info("Exiting ...")


def main():
    args = get_args()
    print(args)

    tts_config = sherpa_onnx.OfflineTtsConfig(
        model=sherpa_onnx.OfflineTtsModelConfig(
            vits=sherpa_onnx.OfflineTtsVitsModelConfig(
                model=args.vits_model,
                lexicon=args.vits_lexicon,
                data_dir=args.vits_data_dir,
                tokens=args.vits_tokens,
            ),
            matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
                acoustic_model=args.matcha_acoustic_model,
                vocoder=args.matcha_vocoder,
                lexicon=args.matcha_lexicon,
                tokens=args.matcha_tokens,
                data_dir=args.matcha_data_dir,
            ),
            kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
                model=args.kokoro_model,
                voices=args.kokoro_voices,
                tokens=args.kokoro_tokens,
                data_dir=args.kokoro_data_dir,
                lexicon=args.kokoro_lexicon,
            ),
            kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
                model=args.kitten_model,
                voices=args.kitten_voices,
                tokens=args.kitten_tokens,
                data_dir=args.kitten_data_dir,
            ),
            provider=args.provider,
            debug=args.debug,
            num_threads=args.num_threads,
        ),
        rule_fsts=args.tts_rule_fsts,
        max_num_sentences=1,
    )

    if not tts_config.validate():
        raise ValueError("Please check your config")

    logging.info("Loading model ...")
    tts = sherpa_onnx.OfflineTts(tts_config)
    logging.info("Loading model done.")

    global sample_rate
    sample_rate = tts.sample_rate

    play_back_thread = threading.Thread(target=play_audio)
    play_back_thread.start()

    logging.info("Start generating ...")
    start_time = time.time()
    gen_config = sherpa_onnx.GenerationConfig()
    gen_config.sid = args.sid
    gen_config.speed = args.speed
    gen_config.silence_scale = 0.2
    audio = tts.generate(
        args.text,
        gen_config,
        callback=generated_audio_callback,
    )
    end_time = time.time()
    logging.info("Finished generating!")
    global stopped
    stopped = True

    if len(audio.samples) == 0:
        print("Error in generating audios. Please read previous error messages.")
        global killed
        killed = True
        play_back_thread.join()
        return

    elapsed_seconds = end_time - start_time
    audio_duration = len(audio.samples) / audio.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    sf.write(
        args.output_filename,
        audio.samples,
        samplerate=audio.sample_rate,
        subtype="PCM_16",
    )
    logging.info(f"The text is '{args.text}'")
    logging.info(
        "Time in seconds to receive the first "
        f"message: {first_message_time-start_time:.3f}"
    )
    logging.info(f"Elapsed seconds: {elapsed_seconds:.3f}")
    logging.info(f"Audio duration in seconds: {audio_duration:.3f}")
    logging.info(
        f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
    )

    logging.info(f"***  Saved to {args.output_filename} ***")

    print("\n   >>>>>>>>> You can safely press ctrl + C to stop the play <<<<<<<<<<\n")

    play_back_thread.join()


if __name__ == "__main__":
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

    logging.basicConfig(format=formatter, level=logging.INFO)
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")
        killed = True
        sys.exit(0)


================================================
FILE: python-api-examples/offline-tts.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023-2025  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python API to generate audio
from text, i.e., text-to-speech.


Different from ./offline-tts-play.py, this file does not play back the
generated audio.

Usage:

Example (1/8)

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2

python3 ./python-api-examples/offline-tts.py \
 --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
 --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
 --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
 --output-filename=./generated.wav \
 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Example (2/8)

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2

python3 ./python-api-examples/offline-tts.py \
 --vits-model=./vits-icefall-zh-aishell3/model.onnx \
 --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
 --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
 --tts-rule-fsts='./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst' \
 --sid=21 \
 --output-filename=./liubei-21.wav \
 "勿以恶小而为之，勿以善小而不为。惟贤惟德，能服于人。122334"

Example (3/8)

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2

python3 ./python-api-examples/offline-tts.py \
 --vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
 --vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
 --vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
 --tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
 --sid=2 \
 --output-filename=./test-2.wav \
 "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。2024年5月11号，拨打110或者18920240511。123456块钱。"

Example (4/8)

curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

python3 ./python-api-examples/offline-tts.py \
 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
 --matcha-vocoder=./vocos-22khz-univ.onnx \
 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
 --output-filename=./test-matcha.wav \
 "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"

Example (5/8)

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx

python3 ./python-api-examples/offline-tts.py \
  --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  --matcha-vocoder=./vocos-22khz-univ.onnx \
  --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  --output-filename=./test-matcha-ljspeech-en.wav \
  --num-threads=2 \
 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Example (6/8)

(This version of kokoro supports only English)

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
tar xf kokoro-en-v0_19.tar.bz2
rm kokoro-en-v0_19.tar.bz2

python3 ./python-api-examples/offline-tts.py \
  --debug=1 \
  --kokoro-model=./kokoro-en-v0_19/model.onnx \
  --kokoro-voices=./kokoro-en-v0_19/voices.bin \
  --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \
  --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \
  --num-threads=2 \
  --sid=10 \
  --output-filename="./kokoro-10.wav" \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Example (7/8)

(This version of kokoro supports English, Chinese, etc.)

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

python3 ./python-api-examples/offline-tts.py \
  --debug=1 \
  --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
  --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
  --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
  --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
  --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
  --num-threads=2 \
  --sid=18 \
  --output-filename="./kokoro-18-zh-en.wav" \
  "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"

Example (8/8)

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2

python3 ./python-api-examples/offline-tts.py \
  --debug=1 \
  --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
  --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
  --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
  --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
  --num-threads=2 \
  --sid=0 \
  --output-filename="./kitten-0.wav" \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/index.html
for details.

"""

import argparse
import time

import sherpa_onnx
import soundfile as sf


def add_vits_args(parser):
    parser.add_argument(
        "--vits-model",
        type=str,
        default="",
        help="Path to vits model.onnx",
    )

    parser.add_argument(
        "--vits-lexicon",
        type=str,
        default="",
        help="Path to lexicon.txt",
    )

    parser.add_argument(
        "--vits-tokens",
        type=str,
        default="",
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--vits-data-dir",
        type=str,
        default="",
        help="""Path to the dict directory of espeak-ng. If it is specified,
        --vits-lexicon and --vits-tokens are ignored""",
    )


def add_matcha_args(parser):
    parser.add_argument(
        "--matcha-acoustic-model",
        type=str,
        default="",
        help="Path to model.onnx for matcha",
    )

    parser.add_argument(
        "--matcha-vocoder",
        type=str,
        default="",
        help="Path to vocoder for matcha",
    )

    parser.add_argument(
        "--matcha-lexicon",
        type=str,
        default="",
        help="Path to lexicon.txt for matcha",
    )

    parser.add_argument(
        "--matcha-tokens",
        type=str,
        default="",
        help="Path to tokens.txt for matcha",
    )

    parser.add_argument(
        "--matcha-data-dir",
        type=str,
        default="",
        help="""Path to the dict directory of espeak-ng. If it is specified,
        --matcha-lexicon and --matcha-tokens are ignored""",
    )


def add_kokoro_args(parser):
    parser.add_argument(
        "--kokoro-model",
        type=str,
        default="",
        help="Path to model.onnx for kokoro",
    )

    parser.add_argument(
        "--kokoro-voices",
        type=str,
        default="",
        help="Path to voices.bin for kokoro",
    )

    parser.add_argument(
        "--kokoro-tokens",
        type=str,
        default="",
        help="Path to tokens.txt for kokoro",
    )

    parser.add_argument(
        "--kokoro-data-dir",
        type=str,
        default="",
        help="Path to the dict directory of espeak-ng.",
    )

    parser.add_argument(
        "--kokoro-lexicon",
        type=str,
        default="",
        help="Path to lexicon.txt for kokoro. Needed only by multilingual kokoro",
    )


def add_kitten_args(parser):
    parser.add_argument(
        "--kitten-model",
        type=str,
        default="",
        help="Path to model.onnx for kitten",
    )

    parser.add_argument(
        "--kitten-voices",
        type=str,
        default="",
        help="Path to voices.bin for kitten",
    )

    parser.add_argument(
        "--kitten-tokens",
        type=str,
        default="",
        help="Path to tokens.txt for kitten",
    )

    parser.add_argument(
        "--kitten-data-dir",
        type=str,
        default="",
        help="Path to the dict directory of espeak-ng.",
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    add_vits_args(parser)
    add_matcha_args(parser)
    add_kokoro_args(parser)
    add_kitten_args(parser)

    parser.add_argument(
        "--tts-rule-fsts",
        type=str,
        default="",
        help="Path to rule.fst",
    )

    parser.add_argument(
        "--max-num-sentences",
        type=int,
        default=1,
        help="""Max number of sentences in a batch to avoid OOM if the input
        text is very long. Set it to -1 to process all the sentences in a
        single batch. A smaller value does not mean it is slower compared
        to a larger one on CPU.
        """,
    )

    parser.add_argument(
        "--output-filename",
        type=str,
        default="./generated.wav",
        help="Path to save generated wave",
    )

    parser.add_argument(
        "--sid",
        type=int,
        default=0,
        help="""Speaker ID. Used only for multi-speaker models, e.g.
        models trained using the VCTK dataset. Not used for single-speaker
        models, e.g., models trained using the LJ speech dataset.
        """,
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--speed",
        type=float,
        default=1.0,
        help="Speech speed. Larger->faster; smaller->slower",
    )

    parser.add_argument(
        "text",
        type=str,
        help="The input text to generate audio for",
    )

    return parser.parse_args()


def main():
    args = get_args()
    print(args)

    tts_config = sherpa_onnx.OfflineTtsConfig(
        model=sherpa_onnx.OfflineTtsModelConfig(
            vits=sherpa_onnx.OfflineTtsVitsModelConfig(
                model=args.vits_model,
                lexicon=args.vits_lexicon,
                data_dir=args.vits_data_dir,
                tokens=args.vits_tokens,
            ),
            matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
                acoustic_model=args.matcha_acoustic_model,
                vocoder=args.matcha_vocoder,
                lexicon=args.matcha_lexicon,
                tokens=args.matcha_tokens,
                data_dir=args.matcha_data_dir,
            ),
            kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
                model=args.kokoro_model,
                voices=args.kokoro_voices,
                tokens=args.kokoro_tokens,
                data_dir=args.kokoro_data_dir,
                lexicon=args.kokoro_lexicon,
            ),
            kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
                model=args.kitten_model,
                voices=args.kitten_voices,
                tokens=args.kitten_tokens,
                data_dir=args.kitten_data_dir,
            ),
            provider=args.provider,
            debug=args.debug,
            num_threads=args.num_threads,
        ),
        rule_fsts=args.tts_rule_fsts,
        max_num_sentences=args.max_num_sentences,
    )
    if not tts_config.validate():
        raise ValueError("Please check your config")

    tts = sherpa_onnx.OfflineTts(tts_config)

    start = time.time()
    gen_config = sherpa_onnx.GenerationConfig()
    gen_config.sid = args.sid
    gen_config.speed = args.speed
    gen_config.silence_scale = 0.2
    audio = tts.generate(args.text, gen_config)
    end = time.time()

    if len(audio.samples) == 0:
        print("Error in generating audios. Please read previous error messages.")
        return

    elapsed_seconds = end - start
    audio_duration = len(audio.samples) / audio.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    sf.write(
        args.output_filename,
        audio.samples,
        samplerate=audio.sample_rate,
        subtype="PCM_16",
    )
    print(f"Saved to {args.output_filename}")
    print(f"The text is '{args.text}'")
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-websocket-client-decode-files-paralell.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023  Xiaomi Corporation

"""
A websocket client for sherpa-onnx-offline-websocket-server

This file shows how to transcribe multiple
files in parallel. We create a separate connection for transcribing each file.

Usage:
    ./offline-websocket-client-decode-files-parallel.py \
      --server-addr localhost \
      --server-port 6006 \
      /path/to/foo.wav \
      /path/to/bar.wav \
      /path/to/16kHz.wav \
      /path/to/8kHz.wav

(Note: You have to first start the server before starting the client)

You can find the server at
https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/offline-websocket-server.cc

Note: The server is implemented in C++.
"""

import argparse
import asyncio
import logging
import wave
from typing import Tuple

try:
    import websockets
except ImportError:
    print("please run:")
    print("")
    print("  pip install websockets")
    print("")
    print("before you run this script")
    print("")

import numpy as np


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--server-addr",
        type=str,
        default="localhost",
        help="Address of the server",
    )

    parser.add_argument(
        "--server-port",
        type=int,
        default=6006,
        help="Port of the server",
    )

    parser.add_argument(
        "sound_files",
        type=str,
        nargs="+",
        help="The input sound file(s) to decode. Each file must be of WAVE"
        "format with a single channel, and each sample has 16-bit, "
        "i.e., int16_t. "
        "The sample rate of the file can be arbitrary and does not need to "
        "be 16 kHz",
    )

    return parser.parse_args()


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


async def run(
    server_addr: str,
    server_port: int,
    wave_filename: str,
):
    async with websockets.connect(
        f"ws://{server_addr}:{server_port}"
    ) as websocket:  # noqa
        logging.info(f"Sending {wave_filename}")
        samples, sample_rate = read_wave(wave_filename)
        assert isinstance(sample_rate, int)
        assert samples.dtype == np.float32, samples.dtype
        assert samples.ndim == 1, samples.dim
        buf = sample_rate.to_bytes(4, byteorder="little")  # 4 bytes
        buf += (samples.size * 4).to_bytes(4, byteorder="little")
        buf += samples.tobytes()

        payload_len = 10240
        while len(buf) > payload_len:
            await websocket.send(buf[:payload_len])
            buf = buf[payload_len:]

        if buf:
            await websocket.send(buf)

        decoding_results = await websocket.recv()
        logging.info(f"{wave_filename}\n{decoding_results}")

        # to signal that the client has sent all the data
        await websocket.send("Done")


async def main():
    args = get_args()
    logging.info(vars(args))

    server_addr = args.server_addr
    server_port = args.server_port
    sound_files = args.sound_files

    all_tasks = []
    for wave_filename in sound_files:
        task = asyncio.create_task(
            run(
                server_addr=server_addr,
                server_port=server_port,
                wave_filename=wave_filename,
            )
        )
        all_tasks.append(task)

    await asyncio.gather(*all_tasks)


if __name__ == "__main__":
    formatter = (
        "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
    )
    logging.basicConfig(format=formatter, level=logging.INFO)
    asyncio.run(main())


================================================
FILE: python-api-examples/offline-websocket-client-decode-files-sequential.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023  Xiaomi Corporation

"""
A websocket client for sherpa-onnx-offline-websocket-server

This file shows how to use a single connection to transcribe multiple
files sequentially.

Usage:
    ./offline-websocket-client-decode-files-sequential.py \
      --server-addr localhost \
      --server-port 6006 \
      /path/to/foo.wav \
      /path/to/bar.wav \
      /path/to/16kHz.wav \
      /path/to/8kHz.wav

(Note: You have to first start the server before starting the client)

You can find the server at
https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/offline-websocket-server.cc

Note: The server is implemented in C++.
"""

import argparse
import asyncio
import logging
import wave
from typing import List, Tuple

try:
    import websockets
except ImportError:
    print("please run:")
    print("")
    print("  pip install websockets")
    print("")
    print("before you run this script")
    print("")

import numpy as np


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--server-addr",
        type=str,
        default="localhost",
        help="Address of the server",
    )

    parser.add_argument(
        "--server-port",
        type=int,
        default=6006,
        help="Port of the server",
    )

    parser.add_argument(
        "sound_files",
        type=str,
        nargs="+",
        help="The input sound file(s) to decode. Each file must be of WAVE"
        "format with a single channel, and each sample has 16-bit, "
        "i.e., int16_t. "
        "The sample rate of the file can be arbitrary and does not need to "
        "be 16 kHz",
    )

    return parser.parse_args()


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


async def run(
    server_addr: str,
    server_port: int,
    sound_files: List[str],
):
    async with websockets.connect(
        f"ws://{server_addr}:{server_port}"
    ) as websocket:  # noqa
        for wave_filename in sound_files:
            logging.info(f"Sending {wave_filename}")
            samples, sample_rate = read_wave(wave_filename)
            assert isinstance(sample_rate, int)
            assert samples.dtype == np.float32, samples.dtype
            assert samples.ndim == 1, samples.dim

            buf = sample_rate.to_bytes(4, byteorder="little")  # 4 bytes
            buf += (samples.size * 4).to_bytes(4, byteorder="little")
            buf += samples.tobytes()

            payload_len = 10240
            while len(buf) > payload_len:
                await websocket.send(buf[:payload_len])
                buf = buf[payload_len:]

            if buf:
                await websocket.send(buf)

            decoding_results = await websocket.recv()
            print(decoding_results)

        # to signal that the client has sent all the data
        await websocket.send("Done")


async def main():
    args = get_args()
    logging.info(vars(args))

    server_addr = args.server_addr
    server_port = args.server_port
    sound_files = args.sound_files

    await run(
        server_addr=server_addr,
        server_port=server_port,
        sound_files=sound_files,
    )


if __name__ == "__main__":
    formatter = (
        "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
    )
    logging.basicConfig(format=formatter, level=logging.INFO)
    asyncio.run(main())


================================================
FILE: python-api-examples/offline-whisper-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming whisper model from
https://github.com/openai/whisper
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
"""

import datetime as dt
from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"
    decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"
    tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"
    test_wav = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"

    if not Path(encoder).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=encoder,
            decoder=decoder,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    start_t = dt.datetime.now()

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)

    end_t = dt.datetime.now()
    elapsed_seconds = (end_t - start_t).total_seconds()
    duration = audio.shape[-1] / sample_rate
    rtf = elapsed_seconds / duration

    print(stream.result)
    print(wave_filename)
    print("Text:", stream.result.text)
    print(f"Audio duration:\t{duration:.3f} s")
    print(f"Elapsed:\t{elapsed_seconds:.3f} s")
    print(f"RTF = {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/offline-zipformer-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming zipformer CTC model from icefall
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx"
    tokens = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt"
    test_wav = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav"

    if not Path(model).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OfflineRecognizer.from_zipformer_ctc(
            model=model,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)
    recognizer.decode_stream(stream)
    print(wave_filename)
    print(stream.result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/online-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file demonstrates how to use sherpa-onnx Python API to transcribe
file(s) with a streaming model.

Usage:

(1) Streaming transducer

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2
rm sherpa-onnx-streaming-zipformer-en-2023-06-26.tar.bz2

./python-api-examples/online-decode-files.py \
  --tokens=./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \
  --encoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
  --decoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
  --joiner=./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav \
  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/1.wav \
  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/8k.wav

or with RNN LM rescoring and LODR:

./python-api-examples/online-decode-files.py \
  --tokens=./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \
  --encoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
  --decoder=./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
  --joiner=./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
  --decoding-method=modified_beam_search \
  --lm=/path/to/lm.onnx \
  --lm-scale=0.1 \
  --lodr-fst=/path/to/lodr.fst \
  --lodr-scale=-0.1 \
  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav \
  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/1.wav \
  ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/8k.wav

(2) Streaming paraformer

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

./python-api-examples/online-decode-files.py \
  --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx \
  --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx \
  ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav \
  ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/1.wav \
  ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav \
  ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/3.wav \
  ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/8k.wav

(3) Streaming Zipformer2 CTC

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
ls -lh sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13

./python-api-examples/online-decode-files.py \
  --zipformer2-ctc=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
  --tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \
  ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav

(4) Streaming Conformer CTC from WeNet

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zh-wenet-wenetspeech.tar.bz2
tar xvf sherpa-onnx-zh-wenet-wenetspeech.tar.bz2
rm sherpa-onnx-zh-wenet-wenetspeech.tar.bz2

./python-api-examples/online-decode-files.py \
  --tokens=./sherpa-onnx-zh-wenet-wenetspeech/tokens.txt \
  --wenet-ctc=./sherpa-onnx-zh-wenet-wenetspeech/model-streaming.onnx \
  ./sherpa-onnx-zh-wenet-wenetspeech/test_wavs/0.wav \
  ./sherpa-onnx-zh-wenet-wenetspeech/test_wavs/1.wav \
  ./sherpa-onnx-zh-wenet-wenetspeech/test_wavs/8k.wav


Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
to download streaming pre-trained models.
"""
import argparse
import time
import wave
from pathlib import Path
from typing import List, Tuple

import numpy as np
import sherpa_onnx


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        type=str,
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        help="Path to the transducer decoder model",
    )

    parser.add_argument(
        "--joiner",
        type=str,
        help="Path to the transducer joiner model",
    )

    parser.add_argument(
        "--zipformer2-ctc",
        type=str,
        help="Path to the zipformer2 ctc model",
    )

    parser.add_argument(
        "--paraformer-encoder",
        type=str,
        help="Path to the paraformer encoder model",
    )

    parser.add_argument(
        "--paraformer-decoder",
        type=str,
        help="Path to the paraformer decoder model",
    )

    parser.add_argument(
        "--wenet-ctc",
        type=str,
        help="Path to the wenet ctc model",
    )

    parser.add_argument(
        "--wenet-ctc-chunk-size",
        type=int,
        default=16,
        help="The --chunk-size parameter for streaming WeNet models",
    )

    parser.add_argument(
        "--wenet-ctc-num-left-chunks",
        type=int,
        default=4,
        help="The --num-left-chunks parameter for streaming WeNet models",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="Valid values are greedy_search and modified_beam_search",
    )

    parser.add_argument(
        "--max-active-paths",
        type=int,
        default=4,
        help="""Used only when --decoding-method is modified_beam_search.
        It specifies number of active paths to keep during decoding.
        """,
    )

    parser.add_argument(
        "--lm",
        type=str,
        default="",
        help="""Used only when --decoding-method is modified_beam_search.
        path of language model.
        """,
    )

    parser.add_argument(
        "--lm-scale",
        type=float,
        default=0.1,
        help="""Used only when --decoding-method is modified_beam_search.
        scale of language model.
        """,
    )

    parser.add_argument(
        "--lodr-fst",
        metavar="file",
        type=str,
        default="",
        help="Path to LODR FST model. Used only when --lm is given.",
    )

    parser.add_argument(
        "--lodr-scale",
        metavar="lodr_scale",
        type=float,
        default=-0.1,
        help="LODR scale for rescoring.Used only when --lodr_fst is given.",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, like
        HELLO WORLD
        你好世界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )

    parser.add_argument(
        "--modeling-unit",
        type=str,
        default="",
        help="""
        The modeling unit of the model, valid values are cjkchar, bpe, cjkchar+bpe.
        Used only when hotwords-file is given.
        """,
    )

    parser.add_argument(
        "--bpe-vocab",
        type=str,
        default="",
        help="""
        The path to the bpe vocabulary, the bpe vocabulary is generated by
        sentencepiece, you can also export the bpe vocabulary through a bpe model
        by `scripts/export_bpe_vocab.py`. Used only when hotwords-file is given
        and modeling-unit is bpe or cjkchar+bpe.
        """,
    )

    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )

    parser.add_argument(
        "sound_files",
        type=str,
        nargs="+",
        help="The input sound file(s) to decode. Each file must be of WAVE"
        "format with a single channel, and each sample has 16-bit, "
        "i.e., int16_t. "
        "The sample rate of the file can be arbitrary and does not need to "
        "be 16 kHz",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


def main():
    args = get_args()
    assert_file_exists(args.tokens)

    if args.encoder:
        assert_file_exists(args.encoder)
        assert_file_exists(args.decoder)
        assert_file_exists(args.joiner)

        assert not args.paraformer_encoder, args.paraformer_encoder
        assert not args.paraformer_decoder, args.paraformer_decoder

        recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
            tokens=args.tokens,
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            num_threads=args.num_threads,
            provider=args.provider,
            sample_rate=16000,
            feature_dim=80,
            decoding_method=args.decoding_method,
            max_active_paths=args.max_active_paths,
            lm=args.lm,
            lm_scale=args.lm_scale,
            lodr_fst=args.lodr_fst,
            lodr_scale=args.lodr_scale,
            hotwords_file=args.hotwords_file,
            hotwords_score=args.hotwords_score,
            modeling_unit=args.modeling_unit,
            bpe_vocab=args.bpe_vocab,
            blank_penalty=args.blank_penalty,
        )
    elif args.zipformer2_ctc:
        recognizer = sherpa_onnx.OnlineRecognizer.from_zipformer2_ctc(
            tokens=args.tokens,
            model=args.zipformer2_ctc,
            num_threads=args.num_threads,
            provider=args.provider,
            sample_rate=16000,
            feature_dim=80,
            decoding_method="greedy_search",
        )
    elif args.paraformer_encoder:
        recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(
            tokens=args.tokens,
            encoder=args.paraformer_encoder,
            decoder=args.paraformer_decoder,
            num_threads=args.num_threads,
            provider=args.provider,
            sample_rate=16000,
            feature_dim=80,
            decoding_method="greedy_search",
        )
    elif args.wenet_ctc:
        recognizer = sherpa_onnx.OnlineRecognizer.from_wenet_ctc(
            tokens=args.tokens,
            model=args.wenet_ctc,
            chunk_size=args.wenet_ctc_chunk_size,
            num_left_chunks=args.wenet_ctc_num_left_chunks,
            num_threads=args.num_threads,
            provider=args.provider,
            sample_rate=16000,
            feature_dim=80,
            decoding_method="greedy_search",
        )
    else:
        raise ValueError("Please provide a model")

    print("Started!")
    start_time = time.time()

    streams = []
    total_duration = 0
    for wave_filename in args.sound_files:
        assert_file_exists(wave_filename)
        samples, sample_rate = read_wave(wave_filename)
        duration = len(samples) / sample_rate
        total_duration += duration

        s = recognizer.create_stream()

        s.accept_waveform(sample_rate, samples)

        tail_paddings = np.zeros(int(0.66 * sample_rate), dtype=np.float32)
        s.accept_waveform(sample_rate, tail_paddings)

        s.input_finished()

        streams.append(s)

    while True:
        ready_list = []
        for s in streams:
            if recognizer.is_ready(s):
                ready_list.append(s)
        if len(ready_list) == 0:
            break
        recognizer.decode_streams(ready_list)
    results = [recognizer.get_result(s) for s in streams]
    end_time = time.time()
    print("Done!")

    for wave_filename, result in zip(args.sound_files, results):
        print(f"{wave_filename}\n{result}")
        print("-" * 10)

    elapsed_seconds = end_time - start_time
    rtf = elapsed_seconds / total_duration
    print(f"num_threads: {args.num_threads}")
    print(f"decoding_method: {args.decoding_method}")
    print(f"Wave duration: {total_duration:.3f} s")
    print(f"Elapsed time: {elapsed_seconds:.3f} s")
    print(
        f"Real time factor (RTF): {elapsed_seconds:.3f}/{total_duration:.3f} = {rtf:.3f}"
    )


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/online-nemo-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a streaming CTC model from NeMo
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models


The example model is converted from
https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_hybrid_large_streaming_80ms
"""

from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms/model.onnx"
    tokens = "./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms/tokens.txt"

    test_wav = "./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms/test_wavs/0.wav"

    if not Path(model).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OnlineRecognizer.from_nemo_ctc(
            model=model,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 16000 Hz

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, audio)

    tail_paddings = np.zeros(int(0.66 * sample_rate), dtype=np.float32)
    stream.accept_waveform(sample_rate, tail_paddings)
    stream.input_finished()

    while recognizer.is_ready(stream):
        recognizer.decode_stream(stream)
    print(wave_filename)
    print(recognizer.get_result_all(stream))


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/online-speech-enhancement-dpdfnet.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use the online speech enhancement API with DPDFNet.

Please download files used in this script from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
"""

from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def create_speech_denoiser():
    model_filename = "./dpdfnet_baseline.onnx"
    if not Path(model_filename).is_file():
        raise ValueError(
            "Please first download a model from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models"
            " or https://huggingface.co/csukuangfj/speech-enhancement-models"
        )

    config = sherpa_onnx.OnlineSpeechDenoiserConfig(
        model=sherpa_onnx.OfflineSpeechDenoiserModelConfig(
            dpdfnet=sherpa_onnx.OfflineSpeechDenoiserDpdfNetModelConfig(
                model=model_filename
            ),
            debug=False,
            num_threads=1,
            provider="cpu",
        )
    )

    if not config.validate():
        print(config)
        raise ValueError("Errors in config. Please check previous error logs")

    return sherpa_onnx.OnlineSpeechDenoiser(config)


def load_audio(filename: str):
    data, sample_rate = sf.read(filename, always_2d=True, dtype="float32")
    samples = np.ascontiguousarray(data[:, 0])
    return samples, sample_rate


def main():
    sd = create_speech_denoiser()
    test_wave = "./speech_with_noise.wav"
    if not Path(test_wave).is_file():
        raise ValueError(
            f"{test_wave} does not exist. You can download it from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models"
        )

    samples, sample_rate = load_audio(test_wave)
    frame_shift = sd.frame_shift_in_samples
    output = []

    for start in range(0, len(samples), frame_shift):
        chunk = samples[start : start + frame_shift]
        denoised = sd(chunk, sample_rate)
        output.append(np.asarray(denoised.samples, dtype=np.float32))

    output.append(np.asarray(sd.flush().samples, dtype=np.float32))
    enhanced = np.concatenate(output) if output else np.empty(0, dtype=np.float32)

    sf.write("./enhanced_online_dpdfnet.wav", enhanced, sd.sample_rate)
    print("Saved to ./enhanced_online_dpdfnet.wav")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/online-speech-enhancement-gtcrn.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use the online speech enhancement API with GTCRN.

Please download files used in this script from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
"""

from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def create_speech_denoiser():
    model_filename = "./gtcrn_simple.onnx"
    if not Path(model_filename).is_file():
        raise ValueError(
            "Please first download a model from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models"
        )

    config = sherpa_onnx.OnlineSpeechDenoiserConfig(
        model=sherpa_onnx.OfflineSpeechDenoiserModelConfig(
            gtcrn=sherpa_onnx.OfflineSpeechDenoiserGtcrnModelConfig(
                model=model_filename
            ),
            debug=False,
            num_threads=1,
            provider="cpu",
        )
    )

    if not config.validate():
        print(config)
        raise ValueError("Errors in config. Please check previous error logs")

    return sherpa_onnx.OnlineSpeechDenoiser(config)


def load_audio(filename: str):
    data, sample_rate = sf.read(filename, always_2d=True, dtype="float32")
    samples = np.ascontiguousarray(data[:, 0])
    return samples, sample_rate


def main():
    sd = create_speech_denoiser()
    test_wave = "./speech_with_noise.wav"
    if not Path(test_wave).is_file():
        raise ValueError(
            f"{test_wave} does not exist. You can download it from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models"
        )

    samples, sample_rate = load_audio(test_wave)
    frame_shift = sd.frame_shift_in_samples
    output = []

    for start in range(0, len(samples), frame_shift):
        chunk = samples[start : start + frame_shift]
        denoised = sd(chunk, sample_rate)
        output.append(np.asarray(denoised.samples, dtype=np.float32))

    output.append(np.asarray(sd.flush().samples, dtype=np.float32))
    enhanced = np.concatenate(output) if output else np.empty(0, dtype=np.float32)

    sf.write("./enhanced_online_gtcrn.wav", enhanced, sd.sample_rate)
    print("Saved to ./enhanced_online_gtcrn.wav")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/online-t-one-ctc-decode-files.py
================================================
#!/usr/bin/env python3

"""
This file shows how to use a streaming CTC model from T-one
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models


The example model is converted from
https://github.com/voicekit-team/T-one
using
https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/t-one

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
"""

from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def create_recognizer():
    model = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx"
    tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt"
    test_wav = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav"

    if not Path(model).is_file() or not Path(test_wav).is_file():
        raise ValueError(
            """Please download model files from
            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
            """
        )
    return (
        sherpa_onnx.OnlineRecognizer.from_t_one_ctc(
            model=model,
            tokens=tokens,
            debug=True,
        ),
        test_wav,
    )


def main():
    recognizer, wave_filename = create_recognizer()

    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel

    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
    # sample_rate does not need to be 8000 Hz

    stream = recognizer.create_stream()
    left_paddings = np.zeros(int(0.3 * sample_rate), dtype=np.float32)
    stream.accept_waveform(sample_rate, left_paddings)

    stream.accept_waveform(sample_rate, audio)

    tail_paddings = np.zeros(int(0.66 * sample_rate), dtype=np.float32)
    stream.accept_waveform(sample_rate, tail_paddings)
    stream.input_finished()

    while recognizer.is_ready(stream):
        recognizer.decode_stream(stream)
    print(wave_filename)
    print(recognizer.get_result_all(stream))


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/online-websocket-client-decode-file.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023  Xiaomi Corporation

"""
A websocket client for sherpa-onnx-online-websocket-server

Usage:
    ./online-websocket-client-decode-file.py \
      --server-addr localhost \
      --server-port 6006 \
      --seconds-per-message 0.1 \
      --samples-per-message 8000 \
      /path/to/foo.wav

(Note: You have to first start the server before starting the client)

You can find the c++ server at
https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/online-websocket-server.cc
or use the python server ./python-api-examples/streaming_server.py

There is also a C++ version of the client. Please see
https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/online-websocket-client.cc
"""

import argparse
import asyncio
import json
import logging
import wave

try:
    import websockets
except ImportError:
    print("please run:")
    print("")
    print("  pip install websockets")
    print("")
    print("before you run this script")
    print("")

import numpy as np


def read_wave(wave_filename: str) -> np.ndarray:
    """
    Args:
      wave_filename:
        Path to a wave file. Its sampling rate has to be 16000.
        It should be single channel and each sample should be 16-bit.
    Returns:
      Return a 1-D float32 tensor.
    """

    with wave.open(wave_filename) as f:
        assert f.getframerate() == 16000, f.getframerate()
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--server-addr",
        type=str,
        default="localhost",
        help="Address of the server",
    )

    parser.add_argument(
        "--server-port",
        type=int,
        default=6006,
        help="Port of the server",
    )

    parser.add_argument(
        "--samples-per-message",
        type=int,
        default=8000,
        help="Number of samples per message",
    )

    parser.add_argument(
        "--seconds-per-message",
        type=float,
        default=0.1,
        help="We will simulate that the duration of two messages is of this value",
    )

    parser.add_argument(
        "sound_file",
        type=str,
        help="The input sound file. Must be wave with a single channel, 16kHz "
        "sampling rate, 16-bit of each sample.",
    )

    return parser.parse_args()


async def receive_results(socket: websockets.WebSocketServerProtocol):
    last_message = ""
    async for message in socket:
        if message != "Done!":
            last_message = message
            logging.info(json.loads(message))
        else:
            break
    return last_message


async def run(
    server_addr: str,
    server_port: int,
    wave_filename: str,
    samples_per_message: int,
    seconds_per_message: float,
):
    data = read_wave(wave_filename)

    async with websockets.connect(
        f"ws://{server_addr}:{server_port}"
    ) as websocket:  # noqa
        logging.info(f"Sending {wave_filename}")

        receive_task = asyncio.create_task(receive_results(websocket))

        start = 0
        while start < data.shape[0]:
            end = start + samples_per_message
            end = min(end, data.shape[0])
            d = data.data[start:end].tobytes()

            await websocket.send(d)

            # Simulate streaming. You can remove the sleep if you want
            await asyncio.sleep(seconds_per_message)  # in seconds

            start += samples_per_message

        # to signal that the client has sent all the data
        await websocket.send("Done")

        decoding_results = await receive_task
        logging.info(f"\nFinal result is:\n{json.loads(decoding_results)}")


async def main():
    args = get_args()
    logging.info(vars(args))

    server_addr = args.server_addr
    server_port = args.server_port
    samples_per_message = args.samples_per_message
    seconds_per_message = args.seconds_per_message

    await run(
        server_addr=server_addr,
        server_port=server_port,
        wave_filename=args.sound_file,
        samples_per_message=samples_per_message,
        seconds_per_message=seconds_per_message,
    )


if __name__ == "__main__":
    formatter = (
        "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"  # noqa
    )
    logging.basicConfig(format=formatter, level=logging.INFO)
    asyncio.run(main())


================================================
FILE: python-api-examples/online-websocket-client-microphone.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023  Xiaomi Corporation

"""
A websocket client for sherpa-onnx-online-websocket-server

Usage:
    ./online-websocket-client-microphone.py \
      --server-addr localhost \
      --server-port 6006

(Note: You have to first start the server before starting the client)

You can find the C++ server at
https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/online-websocket-server.cc
or use the python server ./python-api-examples/streaming_server.py

There is also a C++ version of the client. Please see
https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/csrc/online-websocket-client.cc
"""

import argparse
import asyncio
import sys

import numpy as np

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

try:
    import websockets
except ImportError:
    print("please run:")
    print("")
    print("  pip install websockets")
    print("")
    print("before you run this script")
    print("")
    sys.exit(-1)


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--server-addr",
        type=str,
        default="localhost",
        help="Address of the server",
    )

    parser.add_argument(
        "--server-port",
        type=int,
        default=6006,
        help="Port of the server",
    )

    return parser.parse_args()


async def inputstream_generator(channels=1):
    """Generator that yields blocks of input data as NumPy arrays.

    See https://python-sounddevice.readthedocs.io/en/0.4.6/examples.html#creating-an-asyncio-generator-for-audio-blocks
    """
    q_in = asyncio.Queue()
    loop = asyncio.get_event_loop()

    def callback(indata, frame_count, time_info, status):
        loop.call_soon_threadsafe(q_in.put_nowait, (indata.copy(), status))

    devices = sd.query_devices()
    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')
    print()
    print("Started! Please speak")

    stream = sd.InputStream(
        callback=callback,
        channels=channels,
        dtype="float32",
        samplerate=16000,
        blocksize=int(0.05 * 16000),  # 0.05 seconds
    )
    with stream:
        while True:
            indata, status = await q_in.get()
            yield indata, status


async def receive_results(socket: websockets.WebSocketServerProtocol):
    last_message = ""
    async for message in socket:
        if message != "Done!":
            if last_message != message:
                last_message = message

                if last_message:
                    print(last_message)
        else:
            return last_message


async def run(
    server_addr: str,
    server_port: int,
):
    async with websockets.connect(
        f"ws://{server_addr}:{server_port}"
    ) as websocket:  # noqa
        receive_task = asyncio.create_task(receive_results(websocket))
        print("Started! Please Speak")

        async for indata, status in inputstream_generator():
            if status:
                print(status)
            indata = indata.reshape(-1)
            indata = np.ascontiguousarray(indata)
            await websocket.send(indata.tobytes())

        decoding_results = await receive_task
        print(f"\nFinal result is:\n{decoding_results}")


async def main():
    args = get_args()
    print(vars(args))

    server_addr = args.server_addr
    server_port = args.server_port

    await run(
        server_addr=server_addr,
        server_port=server_port,
    )


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/online-zipformer-ctc-hlg-decode-file.py
================================================
#!/usr/bin/env python3

# This file shows how to use a streaming zipformer CTC model and an HLG
# graph for decoding.
#
# We use the following model as an example
#
"""
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2

python3 ./python-api-examples/online-zipformer-ctc-hlg-decode-file.py \
  --tokens ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt \
  --graph ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst \
  --model ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \
  ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav

"""
# (The above model is from https://github.com/k2-fsa/icefall/pull/1557)

import argparse
import time
import wave
from pathlib import Path
from typing import List, Tuple

import numpy as np
import sherpa_onnx


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the ONNX model",
    )

    parser.add_argument(
        "--graph",
        type=str,
        required=True,
        help="Path to H.fst, HL.fst, or HLG.fst",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--debug",
        type=int,
        default=0,
        help="Valid values: 1, 0",
    )

    parser.add_argument(
        "sound_file",
        type=str,
        help="The input sound file to decode. It must be of WAVE"
        "format with a single channel, and each sample has 16-bit, "
        "i.e., int16_t. "
        "The sample rate of the file can be arbitrary and does not need to "
        "be 16 kHz",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


def main():
    args = get_args()
    print(vars(args))

    assert_file_exists(args.tokens)
    assert_file_exists(args.graph)
    assert_file_exists(args.model)

    recognizer = sherpa_onnx.OnlineRecognizer.from_zipformer2_ctc(
        tokens=args.tokens,
        model=args.model,
        num_threads=args.num_threads,
        provider=args.provider,
        sample_rate=16000,
        feature_dim=80,
        ctc_graph=args.graph,
    )

    wave_filename = args.sound_file
    assert_file_exists(wave_filename)
    samples, sample_rate = read_wave(wave_filename)
    duration = len(samples) / sample_rate

    print("Started")

    start_time = time.time()
    s = recognizer.create_stream()
    s.accept_waveform(sample_rate, samples)
    tail_paddings = np.zeros(int(0.66 * sample_rate), dtype=np.float32)
    s.accept_waveform(sample_rate, tail_paddings)
    s.input_finished()
    while recognizer.is_ready(s):
        recognizer.decode_stream(s)

    result = recognizer.get_result(s).lower()
    end_time = time.time()

    elapsed_seconds = end_time - start_time
    rtf = elapsed_seconds / duration
    print(f"num_threads: {args.num_threads}")
    print(f"Wave duration: {duration:.3f} s")
    print(f"Elapsed time: {elapsed_seconds:.3f} s")
    print(f"Real time factor (RTF): {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f}")
    print(result)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/pocket-tts-play.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2026  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python API
for voice cloning using PocketTTS.

Different from ./pocket-tts.py, this file plays back the generated audio
while the model is still generating.

Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

python3 ./pocket-tts-play.py

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
for details.
"""

import logging
import queue
import sys
import threading
import time
from pathlib import Path

import librosa
import numpy as np
import sherpa_onnx
import soundfile as sf

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)


def create_tts():
    tts_config = sherpa_onnx.OfflineTtsConfig(
        model=sherpa_onnx.OfflineTtsModelConfig(
            pocket=sherpa_onnx.OfflineTtsPocketModelConfig(
                lm_flow="./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx",
                lm_main="./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx",
                encoder="./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx",
                decoder="./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx",
                text_conditioner="./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx",
                vocab_json="./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json",
                token_scores_json="./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json",
            ),
            debug=True,  # set it to True to see verbose logs
            num_threads=2,
            provider="cpu",
        )
    )
    if not tts_config.validate():
        raise ValueError(
            "Please read the previous error messages and re-check your config"
        )

    return sherpa_onnx.OfflineTts(tts_config)


# buffer saves audio samples to be played
buffer = queue.Queue()

# started is set to True once generated_audio_callback is called.
started = False

# stopped is set to True once all the text has been processed
stopped = False

# killed is set to True once ctrl + C is pressed
killed = False

# Note: When started is True, and stopped is True, and buffer is empty,
# we will exit the program since all audio samples have been played.

sample_rate = None

event = threading.Event()

first_message_time = None


def generated_audio_callback(samples: np.ndarray, progress: float):
    """This function is called whenever max_num_sentences sentences
    have been processed.

    Note that it is passed to C++ and is invoked in C++.

    Args:
      samples:
        A 1-D np.float32 array containing audio samples
    """
    global first_message_time
    if first_message_time is None:
        first_message_time = time.time()

    buffer.put(samples)
    global started

    if started is False:
        logging.info("Start playing ...")
    started = True

    # 1 means to keep generating
    # 0 means to stop generating
    if killed:
        return 0

    return 1


# see https://python-sounddevice.readthedocs.io/en/0.4.6/api/streams.html#sounddevice.OutputStream
def play_audio_callback(
    outdata: np.ndarray, frames: int, time, status: sd.CallbackFlags
):
    if killed or (started and buffer.empty() and stopped):
        event.set()

    # outdata is of shape (frames, num_channels)
    if buffer.empty():
        outdata.fill(0)
        return

    n = 0
    while n < frames and not buffer.empty():
        remaining = frames - n
        k = buffer.queue[0].shape[0]

        if remaining <= k:
            outdata[n:, 0] = buffer.queue[0][:remaining]
            buffer.queue[0] = buffer.queue[0][remaining:]
            n = frames
            if buffer.queue[0].shape[0] == 0:
                buffer.get()

            break

        outdata[n : n + k, 0] = buffer.get()
        n += k

    if n < frames:
        outdata[n:, 0] = 0


# Please see
# https://python-sounddevice.readthedocs.io/en/0.4.6/usage.html#device-selection
# for how to select a device
def play_audio():
    if False:
        # This if branch can be safely removed. It is here to show you how to
        # change the default output device in case you need that.
        devices = sd.query_devices()
        print(devices)

        # sd.default.device[1] is the output device, if you want to
        # select a different device, say, 3, as the output device, please
        # use self.default.device[1] = 3

        default_output_device_idx = sd.default.device[1]
        print(
            f'Use default output device: {devices[default_output_device_idx]["name"]}'
        )

    with sd.OutputStream(
        channels=1,
        callback=play_audio_callback,
        dtype="float32",
        samplerate=sample_rate,
        blocksize=1024,
    ):
        event.wait()

    logging.info("Exiting ...")


def main():
    reference_audio_file = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav"
    if not Path(reference_audio_file).is_file():
        raise ValueError(f"Reference audio {reference_audio_file} does not exist")

    logging.info("Loading model ...")
    tts = create_tts()
    logging.info("Loading model done.")

    reference_audio, reference_sample_rate = librosa.load(
        reference_audio_file, sr=tts.sample_rate
    )

    text = """
    I am happy to join with you today in what will go down in history as the greatest demonstration for freedom in the history of our nation.
    Five score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation. This momentous decree came as a great beacon light of hope to millions of Negro slaves who had been seared in the flames of withering injustice. It came as a joyous daybreak to end the long night of their captivity.
    But one hundred years later, the Negro still is not free. One hundred years later, the life of the Negro is still sadly crippled by the manacles of segregation and the chains of discrimination. One hundred years later, the Negro lives on a lonely island of poverty in the midst of a vast ocean of material prosperity. One hundred years later, the Negro is still languished in the corners of American society and finds himself an exile in his own land. And so we've come here today to dramatize a shameful condition.
    In a sense we've come to our nation's capital to cash a check. When the architects of our republic wrote the magnificent words of the Constitution and the Declaration of Independence, they were signing a promissory note to which every American was to fall heir. This note was a promise that all men, yes, black men as well as white men, would be guaranteed the "unalienable Rights" of "Life, Liberty and the pursuit of Happiness." It is obvious today that America has defaulted on this promissory note, insofar as her citizens of color are concerned. Instead of honoring this sacred obligation, America has given the Negro people a bad check, a check which has come back marked insufficient funds.
    """

    global sample_rate
    sample_rate = tts.sample_rate

    gen_config = sherpa_onnx.GenerationConfig()
    gen_config.reference_audio = reference_audio
    gen_config.reference_sample_rate = reference_sample_rate
    gen_config.num_steps = 5

    play_back_thread = threading.Thread(target=play_audio)
    play_back_thread.start()

    logging.info("Start generating ...")
    start_time = time.time()
    audio = tts.generate(
        text,
        gen_config,
        callback=generated_audio_callback,
    )
    end_time = time.time()
    logging.info("Finished generating!")
    global stopped
    stopped = True

    if len(audio.samples) == 0:
        print("Error in generating audios. Please read previous error messages.")
        global killed
        killed = True
        play_back_thread.join()
        return

    elapsed_seconds = end_time - start_time
    audio_duration = len(audio.samples) / audio.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    output_filename = "./generated.wav"
    sf.write(
        output_filename,
        audio.samples,
        samplerate=audio.sample_rate,
        subtype="PCM_16",
    )
    logging.info(f"The text is '{text}'")
    logging.info(
        "Time in seconds to receive the first "
        f"message: {first_message_time-start_time:.3f}"
    )
    logging.info(f"Elapsed seconds: {elapsed_seconds:.3f}")
    logging.info(f"Audio duration in seconds: {audio_duration:.3f}")
    logging.info(
        f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
    )

    logging.info(f"***  Saved to {output_filename} ***")

    print("\n   >>>>>>>>> You can safely press ctrl + C to stop the play <<<<<<<<<<\n")

    play_back_thread.join()


if __name__ == "__main__":
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

    logging.basicConfig(format=formatter, level=logging.INFO)
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")
        killed = True
        sys.exit(0)


================================================
FILE: python-api-examples/pocket-tts.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2026  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python API
for voice cloning using PocketTTS.


Different from ./pocket-tts-play.py, this file does not play back the
generated audio.

Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

python3 ./pocket-tts.py

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
for details.

"""

import time
from pathlib import Path

import librosa
import sherpa_onnx
import soundfile as sf


def create_tts():
    tts_config = sherpa_onnx.OfflineTtsConfig(
        model=sherpa_onnx.OfflineTtsModelConfig(
            pocket=sherpa_onnx.OfflineTtsPocketModelConfig(
                lm_flow="./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx",
                lm_main="./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx",
                encoder="./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx",
                decoder="./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx",
                text_conditioner="./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx",
                vocab_json="./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json",
                token_scores_json="./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json",
            ),
            debug=True,
            num_threads=2,
            provider="cpu",
        )
    )
    if not tts_config.validate():
        raise ValueError(
            "Please read the previous error messages and re-check your config"
        )

    return sherpa_onnx.OfflineTts(tts_config)


def main():
    reference_audio_file = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav"
    if not Path(reference_audio_file).is_file():
        raise ValueError(f"Reference audio {reference_audio_file} does not exist")

    tts = create_tts()

    reference_audio, sample_rate = librosa.load(
        reference_audio_file, sr=tts.sample_rate
    )

    text = "I am happy to join with you today in what will go down in history as the greatest demonstration for freedom in the history of our nation."

    gen_config = sherpa_onnx.GenerationConfig()
    gen_config.reference_audio = reference_audio
    gen_config.reference_sample_rate = sample_rate
    gen_config.num_steps = 5

    start = time.time()
    audio = tts.generate(text, gen_config)
    end = time.time()

    if len(audio.samples) == 0:
        print("Error in generating audios. Please read previous error messages.")
        return

    elapsed_seconds = end - start
    audio_duration = len(audio.samples) / audio.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    output_filename = "./generated.wav"
    sf.write(
        output_filename,
        audio.samples,
        samplerate=audio.sample_rate,
        subtype="PCM_16",
    )
    print(f"Saved to {output_filename}")
    print(f"The text is '{text}'")
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/simulate-streaming-paraformer-microphone.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2025  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python APIs
with VAD and non-streaming Paraformer for real-time speech recognition
from a microphone.

Usage:


wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-int8-2025-10-07.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-int8-2025-10-07.tar.bz2

./python-api-examples/simulate-streaming-paraformer-microphone.py  \
  --silero-vad-model=./silero_vad.onnx \
  --paraformer=./sherpa-onnx-paraformer-zh-int8-2025-10-07/model.int8.onnx \
  --tokens=./sherpa-onnx-paraformer-zh-int8-2025-10-07/tokens.txt
"""
import argparse
import queue
import sys
import threading
import time
from pathlib import Path

import numpy as np

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx

killed = False
recording_thread = None
sample_rate = 16000  # Please don't change it

# buffer saves audio samples to be played
samples_queue = queue.Queue()


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--paraformer",
        default="",
        type=str,
        help="Path to the model.onnx from Paraformer",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=2,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--hr-lexicon",
        type=str,
        default="",
        help="If not empty, it is the lexicon.txt for homophone replacer",
    )

    parser.add_argument(
        "--hr-rule-fsts",
        type=str,
        default="",
        help="If not empty, it is the replace.fst for homophone replacer",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    assert_file_exists(args.paraformer)
    recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
        paraformer=args.paraformer,
        tokens=args.tokens,
        num_threads=args.num_threads,
        debug=False,
        hr_rule_fsts=args.hr_rule_fsts,
        hr_lexicon=args.hr_lexicon,
    )

    return recognizer


def start_recording():
    # You can use any value you like for samples_per_read
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
        while not killed:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            samples = np.copy(samples)
            samples_queue.put(samples)


def main():
    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)

    # If you want to select a different input device, please use
    # sd.default.device[0] = xxx
    # where xxx is the device number

    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    args = get_args()
    assert_file_exists(args.tokens)
    assert_file_exists(args.silero_vad_model)

    assert args.num_threads > 0, args.num_threads

    print("Creating recognizer. Please wait...")
    recognizer = create_recognizer(args)

    config = sherpa_onnx.VadModelConfig()
    config.silero_vad.model = args.silero_vad_model
    config.silero_vad.threshold = 0.5
    config.silero_vad.min_silence_duration = 0.1  # seconds
    config.silero_vad.min_speech_duration = 0.25  # seconds
    # If the current segment is larger than this value, then it increases
    # the threshold to 0.9 internally. After detecting this segment,
    # it resets the threshold to its original value.
    config.silero_vad.max_speech_duration = 8  # seconds
    config.sample_rate = sample_rate

    window_size = config.silero_vad.window_size

    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=100)

    print("Started! Please speak")

    buffer = []

    global recording_thread
    recording_thread = threading.Thread(target=start_recording)
    recording_thread.start()

    display = sherpa_onnx.Display()

    started = False
    started_time = None

    offset = 0
    while not killed:
        samples = samples_queue.get()  # a blocking read

        buffer = np.concatenate([buffer, samples])
        while offset + window_size < len(buffer):
            vad.accept_waveform(buffer[offset : offset + window_size])
            if not started and vad.is_speech_detected():
                started = True
                started_time = time.time()
            offset += window_size

        if not started:
            if len(buffer) > 10 * window_size:
                offset -= len(buffer) - 10 * window_size
                buffer = buffer[-10 * window_size :]

        if started and time.time() - started_time > 0.2:
            stream = recognizer.create_stream()
            stream.accept_waveform(sample_rate, buffer)
            recognizer.decode_stream(stream)
            text = stream.result.text.strip()
            if text:
                display.update_text(text)
                display.display()

            started_time = time.time()

        while not vad.empty():
            # In general, this while loop is executed only once
            stream = recognizer.create_stream()
            stream.accept_waveform(sample_rate, vad.front.samples)

            vad.pop()
            recognizer.decode_stream(stream)

            text = stream.result.text.strip()

            display.update_text(text)

            buffer = []
            offset = 0
            started = False
            started_time = None

            display.finalize_current_sentence()
            display.display()


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        killed = True
        if recording_thread:
            recording_thread.join()
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/simulate-streaming-sense-voice-microphone.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2025  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python APIs
with VAD and non-streaming SenseVoice for real-time speech recognition
from a microphone.

Usage:


wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

./python-api-examples/simulate-streaming-sense-voice-microphone.py  \
  --silero-vad-model=./silero_vad.onnx \
  --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt
"""
import argparse
import queue
import sys
import threading
import time
from pathlib import Path

import numpy as np

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx

killed = False
recording_thread = None
sample_rate = 16000  # Please don't change it

# buffer saves audio samples to be played
samples_queue = queue.Queue()


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--sense-voice",
        default="",
        type=str,
        help="Path to the model.onnx from SenseVoice",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=2,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--hr-lexicon",
        type=str,
        default="",
        help="If not empty, it is the lexicon.txt for homophone replacer",
    )

    parser.add_argument(
        "--hr-rule-fsts",
        type=str,
        default="",
        help="If not empty, it is the replace.fst for homophone replacer",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    assert_file_exists(args.sense_voice)
    recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
        model=args.sense_voice,
        tokens=args.tokens,
        num_threads=args.num_threads,
        use_itn=False,
        debug=False,
        hr_rule_fsts=args.hr_rule_fsts,
        hr_lexicon=args.hr_lexicon,
    )

    return recognizer


def start_recording():
    # You can use any value you like for samples_per_read
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
        while not killed:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            samples = np.copy(samples)
            samples_queue.put(samples)


def main():
    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)

    # If you want to select a different input device, please use
    # sd.default.device[0] = xxx
    # where xxx is the device number

    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    args = get_args()
    assert_file_exists(args.tokens)
    assert_file_exists(args.silero_vad_model)

    assert args.num_threads > 0, args.num_threads

    print("Creating recognizer. Please wait...")
    recognizer = create_recognizer(args)

    config = sherpa_onnx.VadModelConfig()
    config.silero_vad.model = args.silero_vad_model
    config.silero_vad.threshold = 0.5
    config.silero_vad.min_silence_duration = 0.1  # seconds
    config.silero_vad.min_speech_duration = 0.25  # seconds
    # If the current segment is larger than this value, then it increases
    # the threshold to 0.9 internally. After detecting this segment,
    # it resets the threshold to its original value.
    config.silero_vad.max_speech_duration = 8  # seconds
    config.sample_rate = sample_rate

    window_size = config.silero_vad.window_size

    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=100)

    print("Started! Please speak")

    buffer = []

    global recording_thread
    recording_thread = threading.Thread(target=start_recording)
    recording_thread.start()

    display = sherpa_onnx.Display()

    started = False
    started_time = None

    offset = 0
    while not killed:
        samples = samples_queue.get()  # a blocking read

        buffer = np.concatenate([buffer, samples])
        while offset + window_size < len(buffer):
            vad.accept_waveform(buffer[offset : offset + window_size])
            if not started and vad.is_speech_detected():
                started = True
                started_time = time.time()
            offset += window_size

        if not started:
            if len(buffer) > 10 * window_size:
                offset -= len(buffer) - 10 * window_size
                buffer = buffer[-10 * window_size :]

        if started and time.time() - started_time > 0.2:
            stream = recognizer.create_stream()
            stream.accept_waveform(sample_rate, buffer)
            recognizer.decode_stream(stream)
            text = stream.result.text.strip()
            if text:
                display.update_text(text)
                display.display()

            started_time = time.time()

        while not vad.empty():
            # In general, this while loop is executed only once
            stream = recognizer.create_stream()
            stream.accept_waveform(sample_rate, vad.front.samples)

            vad.pop()
            recognizer.decode_stream(stream)

            text = stream.result.text.strip()

            display.update_text(text)

            buffer = []
            offset = 0
            started = False
            started_time = None

            display.finalize_current_sentence()
            display.display()


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        killed = True
        if recording_thread:
            recording_thread.join()
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/speaker-identification-with-vad-dynamic.py
================================================
#!/usr/bin/env python3

"""
This script shows how to use Python APIs for speaker identification with
a microphone and a VAD model

Usage:

(1) Download a model for computing speaker embeddings

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a model. An example is given below:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx

Note that `zh` means Chinese, while `en` means English.

(2) Download the VAD model
Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

(3) Run this script

python3 ./python-api-examples/speaker-identification-with-vad-dynamic.py \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --model ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx
"""
import argparse
import sys

import numpy as np
import sherpa_onnx

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

g_sample_rate = 16000


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the speaker embedding model file.",
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument("--threshold", type=float, default=0.4)

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    return parser.parse_args()


def load_speaker_embedding_model(args):
    config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
        model=args.model,
        num_threads=args.num_threads,
        debug=args.debug,
        provider=args.provider,
    )
    if not config.validate():
        raise ValueError(f"Invalid config. {config}")
    extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config)
    return extractor


def compute_speaker_embedding(
    samples: np.ndarray,
    extractor: sherpa_onnx.SpeakerEmbeddingExtractor,
) -> np.ndarray:
    """
    Args:
      samples:
        A 1-D float32 array.
      extractor:
        The return value of function load_speaker_embedding_model().
    Returns:
      Return a 1-D float32 array.
    """
    if len(samples) < g_sample_rate:
        print(f"Your input contains only {len(samples)} samples!")

    stream = extractor.create_stream()
    stream.accept_waveform(sample_rate=g_sample_rate, waveform=samples)
    stream.input_finished()

    assert extractor.is_ready(stream)
    embedding = extractor.compute(stream)
    embedding = np.array(embedding)
    return embedding


def main():
    args = get_args()
    print(args)

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)
    # If you want to select a different device, please change
    # sd.default.device[0]. For instance, if you want to select device 10,
    # please use
    #
    #  sd.default.device[0] = 4
    #  print(devices)
    #

    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    extractor = load_speaker_embedding_model(args)

    manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)

    vad_config = sherpa_onnx.VadModelConfig()
    vad_config.silero_vad.model = args.silero_vad_model
    vad_config.silero_vad.min_silence_duration = 0.25
    vad_config.silero_vad.min_speech_duration = 1.0
    vad_config.sample_rate = g_sample_rate

    window_size = vad_config.silero_vad.window_size
    vad = sherpa_onnx.VoiceActivityDetector(vad_config, buffer_size_in_seconds=100)

    samples_per_read = int(0.1 * g_sample_rate)  # 0.1 second = 100 ms

    print("Started! Please speak")

    line_num = 0
    speaker_id = 0
    buffer = []
    with sd.InputStream(channels=1, dtype="float32", samplerate=g_sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            buffer = np.concatenate([buffer, samples])
            while len(buffer) > window_size:
                vad.accept_waveform(buffer[:window_size])
                buffer = buffer[window_size:]

            while not vad.empty():
                if len(vad.front.samples) < 0.5 * g_sample_rate:
                    # this segment is too short, skip it
                    vad.pop()
                    continue
                stream = extractor.create_stream()
                stream.accept_waveform(
                    sample_rate=g_sample_rate, waveform=vad.front.samples
                )
                vad.pop()
                stream.input_finished()

                embedding = extractor.compute(stream)
                embedding = np.array(embedding)
                name = manager.search(embedding, threshold=args.threshold)
                if not name:
                    # register it
                    new_name = f"speaker_{speaker_id}"
                    status = manager.add(new_name, embedding)
                    if not status:
                        raise RuntimeError(f"Failed to register speaker {new_name}")
                    print(
                        f"{line_num}: Detected new speaker. Register it as {new_name}"
                    )
                    speaker_id += 1
                else:
                    print(f"{line_num}: Detected existing speaker: {name}")
                line_num += 1


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/speaker-identification-with-vad-non-streaming-asr-alsa.py
================================================
#!/usr/bin/env python3

"""
This script works only on Linux. It uses ALSA for recording.

This script shows how to use Python APIs for speaker identification with
a microphone, a VAD model, and a non-streaming ASR model.

Please see also ./generate-subtitles.py

Usage:

(1) Prepare a text file containing speaker related files.

Each line in the text file contains two columns. The first column is the
speaker name, while the second column contains the wave file of the speaker.

If the text file contains multiple wave files for the same speaker, then the
embeddings of these files are averaged.

An example text file is given below:

    foo /path/to/a.wav
    bar /path/to/b.wav
    foo /path/to/c.wav
    foobar /path/to/d.wav

Each wave file should contain only a single channel; the sample format
should be int16_t; the sample rate can be arbitrary.

(2) Download a model for computing speaker embeddings

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a model. An example is given below:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx

Note that `zh` means Chinese, while `en` means English.

(3) Download the VAD model
Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

(4) Please refer to ./generate-subtitles.py
to download a non-streaming ASR model.

(5) Run this script

Assume the filename of the text file is speaker.txt.

python3 ./python-api-examples/speaker-identification-with-vad-non-streaming-asr.py \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --speaker-file ./speaker.txt \
  --model ./wespeaker_zh_cnceleb_resnet34.onnx
"""
import argparse
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import sherpa_onnx
import soundfile as sf

g_sample_rate = 16000


def register_non_streaming_asr_model_args(parser):
    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        default="",
        type=str,
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        default="",
        type=str,
        help="Path to the transducer decoder model",
    )

    parser.add_argument(
        "--joiner",
        default="",
        type=str,
        help="Path to the transducer joiner model",
    )

    parser.add_argument(
        "--paraformer",
        default="",
        type=str,
        help="Path to the model.onnx from Paraformer",
    )

    parser.add_argument(
        "--wenet-ctc",
        default="",
        type=str,
        help="Path to the CTC model.onnx from WeNet",
    )

    parser.add_argument(
        "--whisper-encoder",
        default="",
        type=str,
        help="Path to whisper encoder model",
    )

    parser.add_argument(
        "--whisper-decoder",
        default="",
        type=str,
        help="Path to whisper decoder model",
    )

    parser.add_argument(
        "--whisper-language",
        default="",
        type=str,
        help="""It specifies the spoken language in the input file.
        Example values: en, fr, de, zh, jp.
        Available languages for multilingual models can be found at
        https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
        If not specified, we infer the language from the input audio file.
        """,
    )

    parser.add_argument(
        "--whisper-task",
        default="transcribe",
        choices=["transcribe", "translate"],
        type=str,
        help="""For multilingual models, if you specify translate, the output
        will be in English.
        """,
    )

    parser.add_argument(
        "--whisper-tail-paddings",
        default=-1,
        type=int,
        help="""Number of tail padding frames.
        We have removed the 30-second constraint from whisper, so you need to
        choose the amount of tail padding frames by yourself.
        Use -1 to use a default value for tail padding.
        """,
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="""Valid values are greedy_search and modified_beam_search.
        modified_beam_search is valid only for transducer models.
        """,
    )

    parser.add_argument(
        "--feature-dim",
        type=int,
        default=80,
        help="Feature dimension. Must match the one expected by the model",
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    register_non_streaming_asr_model_args(parser)

    parser.add_argument(
        "--speaker-file",
        type=str,
        required=True,
        help="""Path to the speaker file. Read the help doc at the beginning of this
        file for the format.""",
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the speaker embedding model file.",
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument("--threshold", type=float, default=0.6)

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--device-name",
        type=str,
        required=True,
        help="""
The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
        """,
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    if args.encoder:
        assert len(args.paraformer) == 0, args.paraformer
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder

        assert_file_exists(args.encoder)
        assert_file_exists(args.decoder)
        assert_file_exists(args.joiner)

        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.paraformer:
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder

        assert_file_exists(args.paraformer)

        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=args.paraformer,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=g_sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.wenet_ctc:
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder

        assert_file_exists(args.wenet_ctc)

        recognizer = sherpa_onnx.OfflineRecognizer.from_wenet_ctc(
            model=args.wenet_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.whisper_encoder:
        assert_file_exists(args.whisper_encoder)
        assert_file_exists(args.whisper_decoder)

        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=args.whisper_encoder,
            decoder=args.whisper_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
            language=args.whisper_language,
            task=args.whisper_task,
            tail_paddings=args.whisper_tail_paddings,
        )
    else:
        raise ValueError("Please specify at least one model")

    return recognizer


def load_speaker_embedding_model(args):
    config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
        model=args.model,
        num_threads=args.num_threads,
        debug=args.debug,
        provider=args.provider,
    )
    if not config.validate():
        raise ValueError(f"Invalid config. {config}")
    extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config)
    return extractor


def load_speaker_file(args) -> Dict[str, List[str]]:
    if not Path(args.speaker_file).is_file():
        raise ValueError(f"--speaker-file {args.speaker_file} does not exist")

    ans = defaultdict(list)
    with open(args.speaker_file) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            fields = line.split()
            if len(fields) != 2:
                raise ValueError(f"Invalid line: {line}. Fields: {fields}")

            speaker_name, filename = fields
            ans[speaker_name].append(filename)
    return ans


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_speaker_embedding(
    filenames: List[str],
    extractor: sherpa_onnx.SpeakerEmbeddingExtractor,
) -> np.ndarray:
    assert len(filenames) > 0, "filenames is empty"

    ans = None
    for filename in filenames:
        print(f"processing {filename}")
        samples, sample_rate = load_audio(filename)
        stream = extractor.create_stream()
        stream.accept_waveform(sample_rate=sample_rate, waveform=samples)
        stream.input_finished()

        assert extractor.is_ready(stream)
        embedding = extractor.compute(stream)
        embedding = np.array(embedding)
        if ans is None:
            ans = embedding
        else:
            ans += embedding

    return ans / len(filenames)


def main():
    args = get_args()
    print(args)

    device_name = args.device_name
    print(f"device_name: {device_name}")
    alsa = sherpa_onnx.Alsa(device_name)

    recognizer = create_recognizer(args)
    extractor = load_speaker_embedding_model(args)
    speaker_file = load_speaker_file(args)

    manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)
    for name, filename_list in speaker_file.items():
        embedding = compute_speaker_embedding(
            filenames=filename_list,
            extractor=extractor,
        )
        status = manager.add(name, embedding)
        if not status:
            raise RuntimeError(f"Failed to register speaker {name}")

    vad_config = sherpa_onnx.VadModelConfig()
    vad_config.silero_vad.model = args.silero_vad_model
    vad_config.silero_vad.min_silence_duration = 0.25
    vad_config.silero_vad.min_speech_duration = 0.25
    vad_config.sample_rate = g_sample_rate
    if not vad_config.validate():
        raise ValueError("Errors in vad config")

    window_size = vad_config.silero_vad.window_size

    vad = sherpa_onnx.VoiceActivityDetector(vad_config, buffer_size_in_seconds=100)

    samples_per_read = int(0.1 * g_sample_rate)  # 0.1 second = 100 ms

    print("Started! Please speak")

    idx = 0
    buffer = []
    while True:
        samples = alsa.read(samples_per_read)  # a blocking read
        samples = np.array(samples)
        buffer = np.concatenate([buffer, samples])
        while len(buffer) > window_size:
            vad.accept_waveform(buffer[:window_size])
            buffer = buffer[window_size:]

        while not vad.empty():
            if len(vad.front.samples) < 0.5 * g_sample_rate:
                # this segment is too short, skip it
                vad.pop()
                continue
            stream = extractor.create_stream()
            stream.accept_waveform(
                sample_rate=g_sample_rate, waveform=vad.front.samples
            )
            stream.input_finished()

            embedding = extractor.compute(stream)
            embedding = np.array(embedding)
            name = manager.search(embedding, threshold=args.threshold)
            if not name:
                name = "unknown"

            # Now for non-streaming ASR
            asr_stream = recognizer.create_stream()
            asr_stream.accept_waveform(
                sample_rate=g_sample_rate, waveform=vad.front.samples
            )
            recognizer.decode_stream(asr_stream)
            text = asr_stream.result.text

            vad.pop()

            print(f"\r{idx}-{name}: {text}")
            idx += 1


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/speaker-identification-with-vad-non-streaming-asr.py
================================================
#!/usr/bin/env python3

"""
This script shows how to use Python APIs for speaker identification with
a microphone, a VAD model, and a non-streaming ASR model.

Please see also ./generate-subtitles.py

Usage:

(1) Prepare a text file containing speaker related files.

Each line in the text file contains two columns. The first column is the
speaker name, while the second column contains the wave file of the speaker.

If the text file contains multiple wave files for the same speaker, then the
embeddings of these files are averaged.

An example text file is given below:

    foo /path/to/a.wav
    bar /path/to/b.wav
    foo /path/to/c.wav
    foobar /path/to/d.wav

Each wave file should contain only a single channel; the sample format
should be int16_t; the sample rate can be arbitrary.

(2) Download a model for computing speaker embeddings

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a model. An example is given below:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx

Note that `zh` means Chinese, while `en` means English.

(3) Download the VAD model
Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

(4) Please refer to ./generate-subtitles.py
to download a non-streaming ASR model.

(5) Run this script

Assume the filename of the text file is speaker.txt.

python3 ./python-api-examples/speaker-identification-with-vad-non-streaming-asr.py \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --speaker-file ./speaker.txt \
  --model ./wespeaker_zh_cnceleb_resnet34.onnx
"""
import argparse
import sys
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import sherpa_onnx
import soundfile as sf

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

g_sample_rate = 16000


def register_non_streaming_asr_model_args(parser):
    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        default="",
        type=str,
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        default="",
        type=str,
        help="Path to the transducer decoder model",
    )

    parser.add_argument(
        "--joiner",
        default="",
        type=str,
        help="Path to the transducer joiner model",
    )

    parser.add_argument(
        "--paraformer",
        default="",
        type=str,
        help="Path to the model.onnx from Paraformer",
    )

    parser.add_argument(
        "--wenet-ctc",
        default="",
        type=str,
        help="Path to the CTC model.onnx from WeNet",
    )

    parser.add_argument(
        "--whisper-encoder",
        default="",
        type=str,
        help="Path to whisper encoder model",
    )

    parser.add_argument(
        "--whisper-decoder",
        default="",
        type=str,
        help="Path to whisper decoder model",
    )

    parser.add_argument(
        "--whisper-language",
        default="",
        type=str,
        help="""It specifies the spoken language in the input file.
        Example values: en, fr, de, zh, jp.
        Available languages for multilingual models can be found at
        https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
        If not specified, we infer the language from the input audio file.
        """,
    )

    parser.add_argument(
        "--whisper-task",
        default="transcribe",
        choices=["transcribe", "translate"],
        type=str,
        help="""For multilingual models, if you specify translate, the output
        will be in English.
        """,
    )

    parser.add_argument(
        "--whisper-tail-paddings",
        default=-1,
        type=int,
        help="""Number of tail padding frames.
        We have removed the 30-second constraint from whisper, so you need to
        choose the amount of tail padding frames by yourself.
        Use -1 to use a default value for tail padding.
        """,
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="""Valid values are greedy_search and modified_beam_search.
        modified_beam_search is valid only for transducer models.
        """,
    )

    parser.add_argument(
        "--feature-dim",
        type=int,
        default=80,
        help="Feature dimension. Must match the one expected by the model",
    )

    parser.add_argument(
        "--sense-voice",
        default="",
        type=str,
        help="Path to sense voice model",
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    register_non_streaming_asr_model_args(parser)

    parser.add_argument(
        "--speaker-file",
        type=str,
        required=True,
        help="""Path to the speaker file. Read the help doc at the beginning of this
        file for the format.""",
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the speaker embedding model file.",
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument("--threshold", type=float, default=0.6)

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    if args.encoder:
        assert len(args.paraformer) == 0, args.paraformer
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder

        assert_file_exists(args.encoder)
        assert_file_exists(args.decoder)
        assert_file_exists(args.joiner)

        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.paraformer:
        assert len(args.wenet_ctc) == 0, args.wenet_ctc
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder

        assert_file_exists(args.paraformer)

        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=args.paraformer,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=g_sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.wenet_ctc:
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder

        assert_file_exists(args.wenet_ctc)

        recognizer = sherpa_onnx.OfflineRecognizer.from_wenet_ctc(
            model=args.wenet_ctc,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
        )
    elif args.whisper_encoder:
        assert_file_exists(args.whisper_encoder)
        assert_file_exists(args.whisper_decoder)

        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=args.whisper_encoder,
            decoder=args.whisper_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
            language=args.whisper_language,
            task=args.whisper_task,
            tail_paddings=args.whisper_tail_paddings,
        )
    elif args.sense_voice:
        assert_file_exists(args.sense_voice)
        recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
            model=args.sense_voice,
            tokens=args.tokens,
            num_threads=args.num_threads,
            use_itn=True,
            debug=args.debug,
        )
    else:
        raise ValueError("Please specify at least one model")

    return recognizer


def load_speaker_embedding_model(args):
    config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
        model=args.model,
        num_threads=args.num_threads,
        debug=args.debug,
        provider=args.provider,
    )
    if not config.validate():
        raise ValueError(f"Invalid config. {config}")
    extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config)
    return extractor


def load_speaker_file(args) -> Dict[str, List[str]]:
    if not Path(args.speaker_file).is_file():
        raise ValueError(f"--speaker-file {args.speaker_file} does not exist")

    ans = defaultdict(list)
    with open(args.speaker_file) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            fields = line.split()
            if len(fields) != 2:
                raise ValueError(f"Invalid line: {line}. Fields: {fields}")

            speaker_name, filename = fields
            ans[speaker_name].append(filename)
    return ans


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_speaker_embedding(
    filenames: List[str],
    extractor: sherpa_onnx.SpeakerEmbeddingExtractor,
) -> np.ndarray:
    assert len(filenames) > 0, "filenames is empty"

    ans = None
    for filename in filenames:
        print(f"processing {filename}")
        samples, sample_rate = load_audio(filename)
        stream = extractor.create_stream()
        stream.accept_waveform(sample_rate=sample_rate, waveform=samples)
        stream.input_finished()

        assert extractor.is_ready(stream)
        embedding = extractor.compute(stream)
        embedding = np.array(embedding)
        if ans is None:
            ans = embedding
        else:
            ans += embedding

    return ans / len(filenames)


def main():
    args = get_args()
    print(args)
    recognizer = create_recognizer(args)
    extractor = load_speaker_embedding_model(args)
    speaker_file = load_speaker_file(args)

    manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)
    for name, filename_list in speaker_file.items():
        embedding = compute_speaker_embedding(
            filenames=filename_list,
            extractor=extractor,
        )
        status = manager.add(name, embedding)
        if not status:
            raise RuntimeError(f"Failed to register speaker {name}")

    vad_config = sherpa_onnx.VadModelConfig()
    vad_config.silero_vad.model = args.silero_vad_model
    vad_config.silero_vad.min_silence_duration = 0.25
    vad_config.silero_vad.min_speech_duration = 0.25
    vad_config.sample_rate = g_sample_rate
    if not vad_config.validate():
        raise ValueError("Errors in vad config")

    window_size = vad_config.silero_vad.window_size

    vad = sherpa_onnx.VoiceActivityDetector(vad_config, buffer_size_in_seconds=100)

    samples_per_read = int(0.1 * g_sample_rate)  # 0.1 second = 100 ms

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    print("Started! Please speak")

    idx = 0
    buffer = []
    with sd.InputStream(channels=1, dtype="float32", samplerate=g_sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            buffer = np.concatenate([buffer, samples])
            while len(buffer) > window_size:
                vad.accept_waveform(buffer[:window_size])
                buffer = buffer[window_size:]

            while not vad.empty():
                if len(vad.front.samples) < 0.5 * g_sample_rate:
                    # this segment is too short, skip it
                    vad.pop()
                    continue
                stream = extractor.create_stream()
                stream.accept_waveform(
                    sample_rate=g_sample_rate, waveform=vad.front.samples
                )
                stream.input_finished()

                embedding = extractor.compute(stream)
                embedding = np.array(embedding)
                name = manager.search(embedding, threshold=args.threshold)
                if not name:
                    name = "unknown"

                # Now for non-streaming ASR
                asr_stream = recognizer.create_stream()
                asr_stream.accept_waveform(
                    sample_rate=g_sample_rate, waveform=vad.front.samples
                )
                recognizer.decode_stream(asr_stream)
                text = asr_stream.result.text

                vad.pop()

                print(f"\r{idx}-{name}: {text}")
                idx += 1


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/speaker-identification-with-vad.py
================================================
#!/usr/bin/env python3

"""
This script shows how to use Python APIs for speaker identification with
a microphone and a VAD model

Usage:

(1) Prepare a text file containing speaker related files.

Each line in the text file contains two columns. The first column is the
speaker name, while the second column contains the wave file of the speaker.

If the text file contains multiple wave files for the same speaker, then the
embeddings of these files are averaged.

An example text file is given below:

    foo /path/to/a.wav
    bar /path/to/b.wav
    foo /path/to/c.wav
    foobar /path/to/d.wav

Each wave file should contain only a single channel; the sample format
should be int16_t; the sample rate can be arbitrary.

(2) Download a model for computing speaker embeddings

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a model. An example is given below:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx

Note that `zh` means Chinese, while `en` means English.

(3) Download the VAD model
Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

(4) Run this script

Assume the filename of the text file is speaker.txt.

python3 ./python-api-examples/speaker-identification-with-vad.py \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --speaker-file ./speaker.txt \
  --model ./wespeaker_zh_cnceleb_resnet34.onnx
"""
import argparse
import sys
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import sherpa_onnx
import soundfile as sf

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--speaker-file",
        type=str,
        required=True,
        help="""Path to the speaker file. Read the help doc at the beginning of this
        file for the format.""",
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the speaker embedding model file.",
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument("--threshold", type=float, default=0.6)

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    return parser.parse_args()


def load_speaker_embedding_model(args):
    config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
        model=args.model,
        num_threads=args.num_threads,
        debug=args.debug,
        provider=args.provider,
    )
    if not config.validate():
        raise ValueError(f"Invalid config. {config}")
    extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config)
    return extractor


def load_speaker_file(args) -> Dict[str, List[str]]:
    if not Path(args.speaker_file).is_file():
        raise ValueError(f"--speaker-file {args.speaker_file} does not exist")

    ans = defaultdict(list)
    with open(args.speaker_file) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            fields = line.split()
            if len(fields) != 2:
                raise ValueError(f"Invalid line: {line}. Fields: {fields}")

            speaker_name, filename = fields
            ans[speaker_name].append(filename)
    return ans


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_speaker_embedding(
    filenames: List[str],
    extractor: sherpa_onnx.SpeakerEmbeddingExtractor,
) -> np.ndarray:
    assert len(filenames) > 0, "filenames is empty"

    ans = None
    for filename in filenames:
        print(f"processing {filename}")
        samples, sample_rate = load_audio(filename)
        stream = extractor.create_stream()
        stream.accept_waveform(sample_rate=sample_rate, waveform=samples)
        stream.input_finished()

        assert extractor.is_ready(stream)
        embedding = extractor.compute(stream)
        embedding = np.array(embedding)
        if ans is None:
            ans = embedding
        else:
            ans += embedding

    return ans / len(filenames)


g_sample_rate = 16000


def main():
    args = get_args()
    print(args)
    extractor = load_speaker_embedding_model(args)
    speaker_file = load_speaker_file(args)

    manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)
    for name, filename_list in speaker_file.items():
        embedding = compute_speaker_embedding(
            filenames=filename_list,
            extractor=extractor,
        )
        status = manager.add(name, embedding)
        if not status:
            raise RuntimeError(f"Failed to register speaker {name}")

    vad_config = sherpa_onnx.VadModelConfig()
    vad_config.silero_vad.model = args.silero_vad_model
    vad_config.silero_vad.min_silence_duration = 0.25
    vad_config.silero_vad.min_speech_duration = 0.25
    vad_config.sample_rate = g_sample_rate

    window_size = vad_config.silero_vad.window_size
    vad = sherpa_onnx.VoiceActivityDetector(vad_config, buffer_size_in_seconds=100)

    samples_per_read = int(0.1 * g_sample_rate)  # 0.1 second = 100 ms

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    print("Started! Please speak")

    idx = 0
    buffer = []
    with sd.InputStream(channels=1, dtype="float32", samplerate=g_sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            buffer = np.concatenate([buffer, samples])
            while len(buffer) > window_size:
                vad.accept_waveform(buffer[:window_size])
                buffer = buffer[window_size:]

            while not vad.empty():
                if len(vad.front.samples) < 0.5 * g_sample_rate:
                    # this segment is too short, skip it
                    vad.pop()
                    continue
                stream = extractor.create_stream()
                stream.accept_waveform(
                    sample_rate=g_sample_rate, waveform=vad.front.samples
                )
                vad.pop()
                stream.input_finished()

                print("Computing", end="")
                embedding = extractor.compute(stream)
                embedding = np.array(embedding)
                name = manager.search(embedding, threshold=args.threshold)
                if not name:
                    name = "unknown"
                print(f"\r{idx}: Predicted name: {name}")
                idx += 1


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/speaker-identification.py
================================================
#!/usr/bin/env python3

"""
This script shows how to use Python APIs for speaker identification with
a microphone.

Usage:

(1) Prepare a text file containing speaker related files.

Each line in the text file contains two columns. The first column is the
speaker name, while the second column contains the wave file of the speaker.

If the text file contains multiple wave files for the same speaker, then the
embeddings of these files are averaged.

An example text file is given below:

    foo /path/to/a.wav
    bar /path/to/b.wav
    foo /path/to/c.wav
    foobar /path/to/d.wav

Each wave file should contain only a single channel; the sample format
should be int16_t; the sample rate can be arbitrary.

(2) Download a model for computing speaker embeddings

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a model. An example is given below:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx

Note that `zh` means Chinese, while `en` means English.

(3) Run this script

Assume the filename of the text file is speaker.txt.

python3 ./python-api-examples/speaker-identification.py \
  --speaker-file ./speaker.txt \
  --model ./wespeaker_zh_cnceleb_resnet34.onnx
"""
import argparse
import queue
import sys
import threading
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple

import numpy as np
import sherpa_onnx
import soundfile as sf

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--speaker-file",
        type=str,
        required=True,
        help="""Path to the speaker file. Read the help doc at the beginning of this
        file for the format.""",
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the model file.",
    )

    parser.add_argument("--threshold", type=float, default=0.6)

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    return parser.parse_args()


def load_speaker_embedding_model(args):
    config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
        model=args.model,
        num_threads=args.num_threads,
        debug=args.debug,
        provider=args.provider,
    )
    if not config.validate():
        raise ValueError(f"Invalid config. {config}")
    extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config)
    return extractor


def load_speaker_file(args) -> Dict[str, List[str]]:
    if not Path(args.speaker_file).is_file():
        raise ValueError(f"--speaker-file {args.speaker_file} does not exist")

    ans = defaultdict(list)
    with open(args.speaker_file) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            fields = line.split()
            if len(fields) != 2:
                raise ValueError(f"Invalid line: {line}. Fields: {fields}")

            speaker_name, filename = fields
            ans[speaker_name].append(filename)
    return ans


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_speaker_embedding(
    filenames: List[str],
    extractor: sherpa_onnx.SpeakerEmbeddingExtractor,
) -> np.ndarray:
    assert len(filenames) > 0, "filenames is empty"

    ans = None
    for filename in filenames:
        print(f"processing {filename}")
        samples, sample_rate = load_audio(filename)
        stream = extractor.create_stream()
        stream.accept_waveform(sample_rate=sample_rate, waveform=samples)
        stream.input_finished()

        assert extractor.is_ready(stream)
        embedding = extractor.compute(stream)
        embedding = np.array(embedding)
        if ans is None:
            ans = embedding
        else:
            ans += embedding

    return ans / len(filenames)


g_buffer = queue.Queue()
g_stop = False
g_sample_rate = 16000
g_read_mic_thread = None


def read_mic():
    print("Please speak!")
    samples_per_read = int(0.1 * g_sample_rate)  # 0.1 second = 100 ms
    with sd.InputStream(channels=1, dtype="float32", samplerate=g_sample_rate) as s:
        while not g_stop:
            samples, _ = s.read(samples_per_read)  # a blocking read
            g_buffer.put(samples)


def main():
    args = get_args()
    print(args)
    extractor = load_speaker_embedding_model(args)
    speaker_file = load_speaker_file(args)

    manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)
    for name, filename_list in speaker_file.items():
        embedding = compute_speaker_embedding(
            filenames=filename_list,
            extractor=extractor,
        )
        status = manager.add(name, embedding)
        if not status:
            raise RuntimeError(f"Failed to register speaker {name}")

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    global g_stop
    global g_read_mic_thread
    while True:
        key = input("Press Enter to start recording")
        if key.lower() in ("q", "quit"):
            g_stop = True
            break

        g_stop = False
        g_buffer.queue.clear()
        g_read_mic_thread = threading.Thread(target=read_mic)
        g_read_mic_thread.start()
        input("Press Enter to stop recording")
        g_stop = True
        g_read_mic_thread.join()
        print("Compute embedding")
        stream = extractor.create_stream()
        while not g_buffer.empty():
            samples = g_buffer.get()
            stream.accept_waveform(sample_rate=g_sample_rate, waveform=samples)
        stream.input_finished()

        embedding = extractor.compute(stream)
        embedding = np.array(embedding)
        name = manager.search(embedding, threshold=args.threshold)
        if not name:
            name = "unknown"
        print(f"Predicted name: {name}")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")
        g_stop = True
        if g_read_mic_thread.is_alive():
            g_read_mic_thread.join()


================================================
FILE: python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py
================================================
#!/usr/bin/env python3

# Real-time speech recognition from a microphone with sherpa-onnx Python API
# with endpoint detection.
#
# Note: This script uses ALSA and works only on Linux systems, especially
# for embedding Linux systems and for running Linux on Windows using WSL.
#
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
# to download pre-trained models

import argparse
from pathlib import Path

import sherpa_onnx


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        type=str,
        required=True,
        help="Path to the encoder model",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        required=True,
        help="Path to the decoder model",
    )

    parser.add_argument(
        "--joiner",
        type=str,
        required=True,
        help="Path to the joiner model",
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="Valid values are greedy_search and modified_beam_search",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, and for each
        phrase the bpe/cjkchar are separated by a space. For example:

        ▁HE LL O ▁WORLD
        你 好 世 界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )

    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )

    parser.add_argument(
        "--hr-lexicon",
        type=str,
        default="",
        help="If not empty, it is the lexicon.txt for homophone replacer",
    )

    parser.add_argument(
        "--hr-rule-fsts",
        type=str,
        default="",
        help="If not empty, it is the replace.fst for homophone replacer",
    )

    parser.add_argument(
        "--device-name",
        type=str,
        required=True,
        help="""
The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
        """,
    )

    return parser.parse_args()


def create_recognizer(args):
    assert_file_exists(args.encoder)
    assert_file_exists(args.decoder)
    assert_file_exists(args.joiner)
    assert_file_exists(args.tokens)
    # Please replace the model files if needed.
    # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
    # for download links.
    recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
        tokens=args.tokens,
        encoder=args.encoder,
        decoder=args.decoder,
        joiner=args.joiner,
        num_threads=1,
        sample_rate=16000,
        feature_dim=80,
        enable_endpoint_detection=True,
        rule1_min_trailing_silence=2.4,
        rule2_min_trailing_silence=1.2,
        rule3_min_utterance_length=300,  # it essentially disables this rule
        decoding_method=args.decoding_method,
        provider=args.provider,
        hotwords_file=args.hotwords_file,
        hotwords_score=args.hotwords_score,
        blank_penalty=args.blank_penalty,
        hr_rule_fsts=args.hr_rule_fsts,
        hr_lexicon=args.hr_lexicon,
    )
    return recognizer


def main():
    args = get_args()
    device_name = args.device_name
    print(f"device_name: {device_name}")
    alsa = sherpa_onnx.Alsa(device_name)

    print("Creating recognizer")
    recognizer = create_recognizer(args)
    print("Started! Please speak")

    sample_rate = 16000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    stream = recognizer.create_stream()

    display = sherpa_onnx.Display()

    while True:
        samples = alsa.read(samples_per_read)  # a blocking read
        stream.accept_waveform(sample_rate, samples)
        while recognizer.is_ready(stream):
            recognizer.decode_stream(stream)

        is_endpoint = recognizer.is_endpoint(stream)

        result = recognizer.get_result(stream)

        display.update_text(result)
        display.display()

        if is_endpoint:
            if result:
                display.finalize_current_sentence()
                display.display()

            recognizer.reset(stream)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py
================================================
#!/usr/bin/env python3

# Real-time speech recognition from a microphone with sherpa-onnx Python API
# with endpoint detection.
#
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
# to download pre-trained models

import argparse
import sys
from pathlib import Path

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        type=str,
        required=True,
        help="Path to the encoder model",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        required=True,
        help="Path to the decoder model",
    )

    parser.add_argument(
        "--joiner",
        type=str,
        required=True,
        help="Path to the joiner model",
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="Valid values are greedy_search and modified_beam_search",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, and for each
        phrase the bpe/cjkchar are separated by a space. For example:

        ▁HE LL O ▁WORLD
        你 好 世 界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )

    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )

    parser.add_argument(
        "--hr-lexicon",
        type=str,
        default="",
        help="If not empty, it is the lexicon.txt for homophone replacer",
    )

    parser.add_argument(
        "--hr-rule-fsts",
        type=str,
        default="",
        help="If not empty, it is the replace.fst for homophone replacer",
    )

    return parser.parse_args()


def create_recognizer(args):
    assert_file_exists(args.encoder)
    assert_file_exists(args.decoder)
    assert_file_exists(args.joiner)
    assert_file_exists(args.tokens)
    # Please replace the model files if needed.
    # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
    # for download links.
    recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
        tokens=args.tokens,
        encoder=args.encoder,
        decoder=args.decoder,
        joiner=args.joiner,
        num_threads=1,
        sample_rate=16000,
        feature_dim=80,
        enable_endpoint_detection=True,
        rule1_min_trailing_silence=2.4,
        rule2_min_trailing_silence=1.2,
        rule3_min_utterance_length=300,  # it essentially disables this rule
        decoding_method=args.decoding_method,
        provider=args.provider,
        hotwords_file=args.hotwords_file,
        hotwords_score=args.hotwords_score,
        blank_penalty=args.blank_penalty,
        hr_rule_fsts=args.hr_rule_fsts,
        hr_lexicon=args.hr_lexicon,
    )
    return recognizer


def main():
    args = get_args()

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    recognizer = create_recognizer(args)
    print("Started! Please speak")

    # The model is using 16 kHz, we use 48 kHz here to demonstrate that
    # sherpa-onnx will do resampling inside.
    sample_rate = 48000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    stream = recognizer.create_stream()

    display = sherpa_onnx.Display()

    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            stream.accept_waveform(sample_rate, samples)
            while recognizer.is_ready(stream):
                recognizer.decode_stream(stream)

            is_endpoint = recognizer.is_endpoint(stream)

            result = recognizer.get_result(stream)

            display.update_text(result)
            display.display()

            if is_endpoint:
                if result:
                    display.finalize_current_sentence()
                    display.display()

                recognizer.reset(stream)


if __name__ == "__main__":

    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/speech-recognition-from-microphone.py
================================================
#!/usr/bin/env python3

# Real-time speech recognition from a microphone with sherpa-onnx Python API
#
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
# to download pre-trained models

import argparse
import sys
from pathlib import Path

from typing import List

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        type=str,
        required=True,
        help="Path to the encoder model",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        required=True,
        help="Path to the decoder model",
    )

    parser.add_argument(
        "--joiner",
        type=str,
        help="Path to the joiner model",
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="Valid values are greedy_search and modified_beam_search",
    )

    parser.add_argument(
        "--max-active-paths",
        type=int,
        default=4,
        help="""Used only when --decoding-method is modified_beam_search.
        It specifies number of active paths to keep during decoding.
        """,
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, and for each
        phrase the bpe/cjkchar are separated by a space. For example:

        ▁HE LL O ▁WORLD
        你 好 世 界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )

    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )

    parser.add_argument(
        "--hr-lexicon",
        type=str,
        default="",
        help="If not empty, it is the lexicon.txt for homophone replacer",
    )

    parser.add_argument(
        "--hr-rule-fsts",
        type=str,
        default="",
        help="If not empty, it is the replace.fst for homophone replacer",
    )

    return parser.parse_args()


def create_recognizer(args):
    assert_file_exists(args.encoder)
    assert_file_exists(args.decoder)
    assert_file_exists(args.joiner)
    assert_file_exists(args.tokens)
    # Please replace the model files if needed.
    # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
    # for download links.
    recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
        tokens=args.tokens,
        encoder=args.encoder,
        decoder=args.decoder,
        joiner=args.joiner,
        num_threads=1,
        sample_rate=16000,
        feature_dim=80,
        decoding_method=args.decoding_method,
        max_active_paths=args.max_active_paths,
        provider=args.provider,
        hotwords_file=args.hotwords_file,
        hotwords_score=args.hotwords_score,
        blank_penalty=args.blank_penalty,
        hr_rule_fsts=args.hr_rule_fsts,
        hr_lexicon=args.hr_lexicon,
    )
    return recognizer


def main():
    args = get_args()

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    recognizer = create_recognizer(args)
    print("Started! Please speak")

    # The model is using 16 kHz, we use 48 kHz here to demonstrate that
    # sherpa-onnx will do resampling inside.
    sample_rate = 48000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms
    last_result = ""
    stream = recognizer.create_stream()
    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            stream.accept_waveform(sample_rate, samples)
            while recognizer.is_ready(stream):
                recognizer.decode_stream(stream)
            result = recognizer.get_result(stream)
            if last_result != result:
                last_result = result
                print("\r{}".format(result), end="", flush=True)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/speech-recognition-from-url.py
================================================
#!/usr/bin/env python3
#
# Real-time speech recognition from a URL with sherpa-onnx Python API
#
# Supported URLs are those supported by ffmpeg.
#
# For instance:
# (1) RTMP
#     rtmp://localhost/live/livestream
#
# (2) A file
#     https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/wenetspeech/DEV_T0000000000.opus
#     https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/aishell2/ID0012W0030.wav
#     file:///Users/fangjun/open-source/sherpa-onnx/a.wav
#
#    Note that it supports all file formats supported by ffmpeg
#
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
# to download pre-trained models

import argparse
import shutil
import subprocess
import sys
from pathlib import Path

import numpy as np
import sherpa_onnx


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        type=str,
        required=True,
        help="Path to the encoder model",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        required=True,
        help="Path to the decoder model",
    )

    parser.add_argument(
        "--joiner",
        type=str,
        help="Path to the joiner model",
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="Valid values are greedy_search and modified_beam_search",
    )

    parser.add_argument(
        "--url",
        type=str,
        required=True,
        help="""Example values:
          rtmp://localhost/live/livestream
          https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/wenetspeech/DEV_T0000000000.opus
          https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/aishell2/ID0012W0030.wav
        """,
    )

    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, and for each
        phrase the bpe/cjkchar are separated by a space. For example:

        ▁HE LL O ▁WORLD
        你 好 世 界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )

    parser.add_argument(
        "--hr-lexicon",
        type=str,
        default="",
        help="If not empty, it is the lexicon.txt for homophone replacer",
    )

    parser.add_argument(
        "--hr-rule-fsts",
        type=str,
        default="",
        help="If not empty, it is the replace.fst for homophone replacer",
    )

    return parser.parse_args()


def create_recognizer(args):
    # Please replace the model files if needed.
    # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
    # for download links.
    recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
        tokens=args.tokens,
        encoder=args.encoder,
        decoder=args.decoder,
        joiner=args.joiner,
        num_threads=1,
        sample_rate=16000,
        feature_dim=80,
        decoding_method=args.decoding_method,
        enable_endpoint_detection=True,
        rule1_min_trailing_silence=2.4,
        rule2_min_trailing_silence=1.2,
        rule3_min_utterance_length=300,  # it essentially disables this rule
        hotwords_file=args.hotwords_file,
        hotwords_score=args.hotwords_score,
        hr_rule_fsts=args.hr_rule_fsts,
        hr_lexicon=args.hr_lexicon,
    )
    return recognizer


def main():
    args = get_args()
    assert_file_exists(args.encoder)
    assert_file_exists(args.decoder)
    assert_file_exists(args.joiner)
    assert_file_exists(args.tokens)

    recognizer = create_recognizer(args)

    ffmpeg_cmd = [
        "ffmpeg",
        "-i",
        args.url,
        "-f",
        "s16le",
        "-acodec",
        "pcm_s16le",
        "-ac",
        "1",
        "-ar",
        "16000",
        "-",
    ]

    process = subprocess.Popen(
        ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
    )

    frames_per_read = 1600  # 0.1 second

    stream = recognizer.create_stream()

    display = sherpa_onnx.Display()

    print("Started!")
    while True:
        # *2 because int16_t has two bytes
        data = process.stdout.read(frames_per_read * 2)
        if not data:
            break

        samples = np.frombuffer(data, dtype=np.int16)
        samples = samples.astype(np.float32) / 32768
        stream.accept_waveform(16000, samples)

        while recognizer.is_ready(stream):
            recognizer.decode_stream(stream)

        is_endpoint = recognizer.is_endpoint(stream)

        result = recognizer.get_result(stream)

        display.update_text(result)
        display.display()

        if is_endpoint:
            if result:
                display.finalize_current_sentence()
                display.display()

            recognizer.reset(stream)


if __name__ == "__main__":
    if shutil.which("ffmpeg") is None:
        sys.exit("Please install ffmpeg first!")
    main()


================================================
FILE: python-api-examples/spoken-language-identification.py
================================================
#!/usr/bin/env python3

"""
This script shows how to use Python APIs for spoken language identification.
It detects the language spoken in the given wave file.

Usage:

1. Download a whisper multilingual model. We use a tiny model below.
Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
to download more models.

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2

We only use the int8.onnx models below.

2. Download a test wave.

You can find many wave files for different languages at
https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs

wget https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/resolve/main/test_wavs/de-german.wav

python3 ./python-api-examples/spoken-language-identification.py
  --whisper-encoder=sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx \
  --whisper-decoder=sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx \
  --num-threads=1 \
  ./de-german.wav
"""

import argparse
import logging
import time
import wave
from pathlib import Path
from typing import Tuple

import numpy as np
import sherpa_onnx


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--whisper-encoder",
        required=True,
        type=str,
        help="Path to a multilingual whisper encoder model",
    )

    parser.add_argument(
        "--whisper-decoder",
        required=True,
        type=str,
        help="Path to a multilingual whisper decoder model",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    parser.add_argument(
        "sound_file",
        type=str,
        help="The input sound file to identify. It must be of WAVE"
        "format with a single channel, and each sample has 16-bit, "
        "i.e., int16_t. "
        "The sample rate of the file can be arbitrary and does not need to "
        "be 16 kHz",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html to download it"
    )


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


def main():
    args = get_args()
    assert_file_exists(args.whisper_encoder)
    assert_file_exists(args.whisper_decoder)
    assert args.num_threads > 0, args.num_threads
    config = sherpa_onnx.SpokenLanguageIdentificationConfig(
        whisper=sherpa_onnx.SpokenLanguageIdentificationWhisperConfig(
            encoder=args.whisper_encoder,
            decoder=args.whisper_decoder,
        ),
        num_threads=args.num_threads,
        debug=args.debug,
        provider=args.provider,
    )
    slid = sherpa_onnx.SpokenLanguageIdentification(config)

    samples, sample_rate = read_wave(args.sound_file)

    start_time = time.time()
    stream = slid.create_stream()
    stream.accept_waveform(sample_rate=sample_rate, waveform=samples)
    lang = slid.compute(stream)
    end_time = time.time()

    elapsed_seconds = end_time - start_time
    audio_duration = len(samples) / sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    logging.info(f"File: {args.sound_file}")
    logging.info(f"Detected language: {lang}")
    logging.info(f"Elapsed seconds: {elapsed_seconds:.3f}")
    logging.info(f"Audio duration in seconds: {audio_duration:.3f}")
    logging.info(
        f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
    )


if __name__ == "__main__":
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

    logging.basicConfig(format=formatter, level=logging.INFO)

    main()


================================================
FILE: python-api-examples/streaming-paraformer-asr-microphone.py
================================================
#!/usr/bin/env python3

# Real-time speech recognition from a microphone with sherpa-onnx Python API
# with endpoint detection.
# This script uses a streaming paraformer
#
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#
# to download pre-trained models

import sys
from pathlib import Path

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html to download it"
    )


def create_recognizer():
    encoder = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"
    decoder = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"
    tokens = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"
    assert_file_exists(encoder)
    assert_file_exists(decoder)
    assert_file_exists(tokens)
    recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(
        tokens=tokens,
        encoder=encoder,
        decoder=decoder,
        num_threads=1,
        sample_rate=16000,
        feature_dim=80,
        enable_endpoint_detection=True,
        rule1_min_trailing_silence=2.4,
        rule2_min_trailing_silence=1.2,
        rule3_min_utterance_length=300,  # it essentially disables this rule
    )
    return recognizer


def main():
    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    recognizer = create_recognizer()
    print("Started! Please speak")

    # The model is using 16 kHz, we use 48 kHz here to demonstrate that
    # sherpa-onnx will do resampling inside.
    sample_rate = 48000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    stream = recognizer.create_stream()

    display = sherpa_onnx.Display()

    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            stream.accept_waveform(sample_rate, samples)
            while recognizer.is_ready(stream):
                recognizer.decode_stream(stream)

            is_endpoint = recognizer.is_endpoint(stream)

            result = recognizer.get_result(stream)

            display.update_text(result)
            display.display()

            if is_endpoint:
                if result:
                    display.finalize_current_sentence()
                    display.display()

                recognizer.reset(stream)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/streaming_server.py
================================================
#!/usr/bin/env python3
# Copyright      2022-2023  Xiaomi Corp.
#
"""
A server for streaming ASR recognition. By streaming it means the audio samples
are coming in real-time. You don't need to wait until all audio samples are
captured before sending them for recognition.

It supports multiple clients sending at the same time.

Usage:
    ./streaming_server.py --help

Example:

(1) Without a certificate

python3 ./python-api-examples/streaming_server.py \
  --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
  --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt

(2) With a certificate

(a) Generate a certificate first:

    cd python-api-examples/web
    ./generate-certificate.py
    cd ../..

(b) Start the server

python3 ./python-api-examples/streaming_server.py \
  --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
  --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
  --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  --certificate ./python-api-examples/web/cert.pem

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/wenet/index.html
to download pre-trained models.

The model in the above help messages is from
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english

To use a WeNet streaming Conformer CTC model, please use

python3 ./python-api-examples/streaming_server.py \
  --tokens=./sherpa-onnx-zh-wenet-wenetspeech/tokens.txt \
  --wenet-ctc=./sherpa-onnx-zh-wenet-wenetspeech/model-streaming.onnx
"""

import argparse
import asyncio
import http
import json
import logging
import socket
import ssl
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Tuple

import numpy as np
import sherpa_onnx
import websockets

from http_server import HttpServer


def setup_logger(
    log_filename: str,
    log_level: str = "info",
    use_console: bool = True,
) -> None:
    """Setup log level.

    Args:
      log_filename:
        The filename to save the log.
      log_level:
        The log level to use, e.g., "debug", "info", "warning", "error",
        "critical"
      use_console:
        True to also print logs to console.
    """
    now = datetime.now()
    date_time = now.strftime("%Y-%m-%d-%H-%M-%S")
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
    log_filename = f"{log_filename}-{date_time}.txt"

    Path(log_filename).parent.mkdir(parents=True, exist_ok=True)

    level = logging.ERROR
    if log_level == "debug":
        level = logging.DEBUG
    elif log_level == "info":
        level = logging.INFO
    elif log_level == "warning":
        level = logging.WARNING
    elif log_level == "critical":
        level = logging.CRITICAL

    logging.basicConfig(
        filename=log_filename,
        format=formatter,
        level=level,
        filemode="w",
    )
    if use_console:
        console = logging.StreamHandler()
        console.setLevel(level)
        console.setFormatter(logging.Formatter(formatter))
        logging.getLogger("").addHandler(console)


def add_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--encoder",
        type=str,
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        help="Path to the transducer decoder model.",
    )

    parser.add_argument(
        "--joiner",
        type=str,
        help="Path to the transducer joiner model.",
    )

    parser.add_argument(
        "--zipformer2-ctc",
        type=str,
        help="Path to the model file from zipformer2 ctc",
    )

    parser.add_argument(
        "--wenet-ctc",
        type=str,
        help="Path to the model.onnx from WeNet",
    )

    parser.add_argument(
        "--paraformer-encoder",
        type=str,
        help="Path to the paraformer encoder model",
    )

    parser.add_argument(
        "--paraformer-decoder",
        type=str,
        help="Path to the paraformer decoder model.",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--sample-rate",
        type=int,
        default=16000,
        help="Sample rate of the data used to train the model. "
        "Caution: If your input sound files have a different sampling rate, "
        "we will do resampling inside",
    )

    parser.add_argument(
        "--feat-dim",
        type=int,
        default=80,
        help="Feature dimension of the model",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )


def add_decoding_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="""Decoding method to use. Current supported methods are:
        - greedy_search
        - modified_beam_search
        """,
    )

    add_modified_beam_search_args(parser)


def add_hotwords_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, and for each
        phrase the bpe/cjkchar are separated by a space. For example:

        ▁HE LL O ▁WORLD
        你 好 世 界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )
    parser.add_argument(
        "--modeling-unit",
        type=str,
        default='cjkchar',
        help="""
        The modeling unit of the used model. Current supported units are:
        - cjkchar(for Chinese)
        - bpe(for English like languages)
        - cjkchar+bpe(for multilingual models)
        """,
    )
    parser.add_argument(
        "--bpe-vocab",
        type=str,
        default='',
        help="""
        The bpe vocabulary generated by sentencepiece toolkit. 
        It is only used when modeling-unit is bpe or cjkchar+bpe.
        if you can’t find bpe.vocab in the model directory, please run:
        python script/export_bpe_vocab.py --bpe-model exp/bpe.model
        """,
    )


def add_modified_beam_search_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--num-active-paths",
        type=int,
        default=4,
        help="""Used only when --decoding-method is modified_beam_search.
        It specifies number of active paths to keep during decoding.
        """,
    )

def add_blank_penalty_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )

def add_endpointing_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--use-endpoint",
        type=int,
        default=1,
        help="1 to enable endpoiting. 0 to disable it",
    )

    parser.add_argument(
        "--rule1-min-trailing-silence",
        type=float,
        default=2.4,
        help="""This endpointing rule1 requires duration of trailing silence
        in seconds) to be >= this value""",
    )

    parser.add_argument(
        "--rule2-min-trailing-silence",
        type=float,
        default=1.2,
        help="""This endpointing rule2 requires duration of trailing silence in
        seconds) to be >= this value.""",
    )

    parser.add_argument(
        "--rule3-min-utterance-length",
        type=float,
        default=20,
        help="""This endpointing rule3 requires utterance-length (in seconds)
        to be >= this value.""",
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    add_model_args(parser)
    add_decoding_args(parser)
    add_endpointing_args(parser)
    add_hotwords_args(parser)
    add_blank_penalty_args(parser)

    parser.add_argument(
        "--port",
        type=int,
        default=6006,
        help="The server will listen on this port",
    )

    parser.add_argument(
        "--nn-pool-size",
        type=int,
        default=1,
        help="Number of threads for NN computation and decoding.",
    )

    parser.add_argument(
        "--max-batch-size",
        type=int,
        default=3,
        help="""Max batch size for computation. Note if there are not enough
        requests in the queue, it will wait for max_wait_ms time. After that,
        even if there are not enough requests, it still sends the
        available requests in the queue for computation.
        """,
    )

    parser.add_argument(
        "--max-wait-ms",
        type=float,
        default=10,
        help="""Max time in millisecond to wait to build batches for inference.
        If there are not enough requests in the stream queue to build a batch
        of max_batch_size, it waits up to this time before fetching available
        requests for computation.
        """,
    )

    parser.add_argument(
        "--max-message-size",
        type=int,
        default=(1 << 20),
        help="""Max message size in bytes.
        The max size per message cannot exceed this limit.
        """,
    )

    parser.add_argument(
        "--max-queue-size",
        type=int,
        default=32,
        help="Max number of messages in the queue for each connection.",
    )

    parser.add_argument(
        "--max-active-connections",
        type=int,
        default=200,
        help="""Maximum number of active connections. The server will refuse
        to accept new connections once the current number of active connections
        equals to this limit.
        """,
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=2,
        help="Number of threads to run the neural network model",
    )

    parser.add_argument(
        "--certificate",
        type=str,
        help="""Path to the X.509 certificate. You need it only if you want to
        use a secure websocket connection, i.e., use wss:// instead of ws://.
        You can use ./web/generate-certificate.py
        to generate the certificate `cert.pem`.
        Note ./web/generate-certificate.py will generate three files but you
        only need to pass the generated cert.pem to this option.
        """,
    )

    parser.add_argument(
        "--doc-root",
        type=str,
        default="./python-api-examples/web",
        help="Path to the web root",
    )

    return parser.parse_args()


def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer:
    if args.encoder:
        recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
            tokens=args.tokens,
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            max_active_paths=args.num_active_paths,
            hotwords_score=args.hotwords_score,
            hotwords_file=args.hotwords_file,
            blank_penalty=args.blank_penalty,
            enable_endpoint_detection=args.use_endpoint != 0,
            rule1_min_trailing_silence=args.rule1_min_trailing_silence,
            rule2_min_trailing_silence=args.rule2_min_trailing_silence,
            rule3_min_utterance_length=args.rule3_min_utterance_length,
            provider=args.provider,
            modeling_unit=args.modeling_unit,
            bpe_vocab=args.bpe_vocab
        )
    elif args.paraformer_encoder:
        recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(
            tokens=args.tokens,
            encoder=args.paraformer_encoder,
            decoder=args.paraformer_decoder,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            enable_endpoint_detection=args.use_endpoint != 0,
            rule1_min_trailing_silence=args.rule1_min_trailing_silence,
            rule2_min_trailing_silence=args.rule2_min_trailing_silence,
            rule3_min_utterance_length=args.rule3_min_utterance_length,
            provider=args.provider,
        )
    elif args.zipformer2_ctc:
        recognizer = sherpa_onnx.OnlineRecognizer.from_zipformer2_ctc(
            tokens=args.tokens,
            model=args.zipformer2_ctc,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            enable_endpoint_detection=args.use_endpoint != 0,
            rule1_min_trailing_silence=args.rule1_min_trailing_silence,
            rule2_min_trailing_silence=args.rule2_min_trailing_silence,
            rule3_min_utterance_length=args.rule3_min_utterance_length,
            provider=args.provider,
        )
    elif args.wenet_ctc:
        recognizer = sherpa_onnx.OnlineRecognizer.from_wenet_ctc(
            tokens=args.tokens,
            model=args.wenet_ctc,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            enable_endpoint_detection=args.use_endpoint != 0,
            rule1_min_trailing_silence=args.rule1_min_trailing_silence,
            rule2_min_trailing_silence=args.rule2_min_trailing_silence,
            rule3_min_utterance_length=args.rule3_min_utterance_length,
            provider=args.provider,
        )
    else:
        raise ValueError("Please provide a model")

    return recognizer


def format_timestamps(timestamps: List[float]) -> List[str]:
    return ["{:.3f}".format(t) for t in timestamps]


class StreamingServer(object):
    def __init__(
        self,
        recognizer: sherpa_onnx.OnlineRecognizer,
        nn_pool_size: int,
        max_wait_ms: float,
        max_batch_size: int,
        max_message_size: int,
        max_queue_size: int,
        max_active_connections: int,
        doc_root: str,
        certificate: Optional[str] = None,
    ):
        """
        Args:
          recognizer:
            An instance of online recognizer.
          nn_pool_size:
            Number of threads for the thread pool that is responsible for
            neural network computation and decoding.
          max_wait_ms:
            Max wait time in milliseconds in order to build a batch of
            `batch_size`.
          max_batch_size:
            Max batch size for inference.
          max_message_size:
            Max size in bytes per message.
          max_queue_size:
            Max number of messages in the queue for each connection.
          max_active_connections:
            Max number of active connections. Once number of active client
            equals to this limit, the server refuses to accept new connections.
          beam_search_params:
            Dictionary containing all the parameters for beam search.
          online_endpoint_config:
            Config for endpointing.
          doc_root:
            Path to the directory where files like index.html for the HTTP
            server locate.
          certificate:
            Optional. If not None, it will use secure websocket.
            You can use ./web/generate-certificate.py to generate
            it (the default generated filename is `cert.pem`).
        """
        self.recognizer = recognizer

        self.certificate = certificate
        self.http_server = HttpServer(doc_root)

        self.nn_pool_size = nn_pool_size
        self.nn_pool = ThreadPoolExecutor(
            max_workers=nn_pool_size,
            thread_name_prefix="nn",
        )

        self.stream_queue = asyncio.Queue()

        self.max_wait_ms = max_wait_ms
        self.max_batch_size = max_batch_size
        self.max_message_size = max_message_size
        self.max_queue_size = max_queue_size
        self.max_active_connections = max_active_connections

        self.current_active_connections = 0

        self.sample_rate = int(recognizer.config.feat_config.sampling_rate)

    async def stream_consumer_task(self):
        """This function extracts streams from the queue, batches them up, sends
        them to the neural network model for computation and decoding.
        """
        while True:
            if self.stream_queue.empty():
                await asyncio.sleep(self.max_wait_ms / 1000)
                continue

            batch = []
            try:
                while len(batch) < self.max_batch_size:
                    item = self.stream_queue.get_nowait()

                    assert self.recognizer.is_ready(item[0])

                    batch.append(item)
            except asyncio.QueueEmpty:
                pass
            stream_list = [b[0] for b in batch]
            future_list = [b[1] for b in batch]

            loop = asyncio.get_running_loop()
            await loop.run_in_executor(
                self.nn_pool,
                self.recognizer.decode_streams,
                stream_list,
            )

            for f in future_list:
                self.stream_queue.task_done()
                f.set_result(None)

    async def compute_and_decode(
        self,
        stream: sherpa_onnx.OnlineStream,
    ) -> None:
        """Put the stream into the queue and wait it to be processed by the
        consumer task.

        Args:
          stream:
            The stream to be processed. Note: It is changed in-place.
        """
        loop = asyncio.get_running_loop()
        future = loop.create_future()
        await self.stream_queue.put((stream, future))
        await future

    async def process_request(
        self,
        path: str,
        request_headers: websockets.Headers,
    ) -> Optional[Tuple[http.HTTPStatus, websockets.Headers, bytes]]:
        if "sec-websocket-key" not in (
            request_headers.headers  # For new request_headers
            if hasattr(request_headers, "headers")
            else request_headers  # For old request_headers
        ):
            # This is a normal HTTP request
            if path == "/":
                path = "/index.html"

            if path in ("/upload.html", "/offline_record.html"):
                response = r"""
<!doctype html><html><head>
<title>Speech recognition with next-gen Kaldi</title><body>
<h2>Only /streaming_record.html is available for the streaming server.<h2>
<br/>
<br/>
Go back to <a href="/streaming_record.html">/streaming_record.html</a>
</body></head></html>
"""
                found = True
                mime_type = "text/html"
            else:
                found, response, mime_type = self.http_server.process_request(path)

            if isinstance(response, str):
                response = response.encode("utf-8")

            if not found:
                status = http.HTTPStatus.NOT_FOUND
            else:
                status = http.HTTPStatus.OK
            header = {"Content-Type": mime_type}
            return status, header, response

        if self.current_active_connections < self.max_active_connections:
            self.current_active_connections += 1
            return None

        # Refuse new connections
        status = http.HTTPStatus.SERVICE_UNAVAILABLE  # 503
        header = {"Hint": "The server is overloaded. Please retry later."}
        response = b"The server is busy. Please retry later."

        return status, header, response

    async def run(self, port: int):
        tasks = []
        for i in range(self.nn_pool_size):
            tasks.append(asyncio.create_task(self.stream_consumer_task()))

        if self.certificate:
            logging.info(f"Using certificate: {self.certificate}")
            ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
            ssl_context.load_cert_chain(self.certificate)
        else:
            ssl_context = None
            logging.info("No certificate provided")

        async with websockets.serve(
            self.handle_connection,
            host="",
            port=port,
            max_size=self.max_message_size,
            max_queue=self.max_queue_size,
            process_request=self.process_request,
            ssl=ssl_context,
        ):
            ip_list = ["localhost"]
            if ssl_context:
                ip_list += ["0.0.0.0", "127.0.0.1"]
                ip_list.append(socket.gethostbyname(socket.gethostname()))
            proto = "http://" if ssl_context is None else "https://"
            s = "Please visit one of the following addresses:\n\n"
            for p in ip_list:
                s += "  " + proto + p + f":{port}" "\n"

            if not ssl_context:
                s += "\nSince you are not providing a certificate, you cannot "
                s += "use your microphone from within the browser using "
                s += "public IP addresses. Only localhost can be used."
                s += "You also cannot use 0.0.0.0 or 127.0.0.1"

            logging.info(s)

            await asyncio.Future()  # run forever

        await asyncio.gather(*tasks)  # not reachable

    async def handle_connection(
        self,
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and send
        decoding result back to the client.

        Args:
          socket:
            The socket for communicating with the client.
        """
        try:
            await self.handle_connection_impl(socket)
        except websockets.exceptions.ConnectionClosedError:
            logging.info(f"{socket.remote_address} disconnected")
        finally:
            # Decrement so that it can accept new connections
            self.current_active_connections -= 1

            logging.info(
                f"Disconnected: {socket.remote_address}. "
                f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
            )

    async def handle_connection_impl(
        self,
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and send
        decoding result back to the client.

        Args:
          socket:
            The socket for communicating with the client.
        """
        logging.info(
            f"Connected: {socket.remote_address}. "
            f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
        )

        stream = self.recognizer.create_stream()
        segment = 0

        while True:
            samples = await self.recv_audio_samples(socket)
            if samples is None:
                break

            # TODO(fangjun): At present, we assume the sampling rate
            # of the received audio samples equal to --sample-rate
            stream.accept_waveform(sample_rate=self.sample_rate, waveform=samples)

            while self.recognizer.is_ready(stream):
                await self.compute_and_decode(stream)
                result = self.recognizer.get_result(stream)

                message = {
                    "text": result,
                    "segment": segment,
                }
                if self.recognizer.is_endpoint(stream):
                    self.recognizer.reset(stream)
                    segment += 1

                await socket.send(json.dumps(message))

        tail_padding = np.zeros(int(self.sample_rate * 0.3)).astype(np.float32)
        stream.accept_waveform(sample_rate=self.sample_rate, waveform=tail_padding)
        stream.input_finished()
        while self.recognizer.is_ready(stream):
            await self.compute_and_decode(stream)

        result = self.recognizer.get_result(stream)

        message = {
            "text": result,
            "segment": segment,
        }

        await socket.send(json.dumps(message))

    async def recv_audio_samples(
        self,
        socket: websockets.WebSocketServerProtocol,
    ) -> Optional[np.ndarray]:
        """Receive a tensor from the client.

        Each message contains either a bytes buffer containing audio samples
        in 16 kHz or contains "Done" meaning the end of utterance.

        Args:
          socket:
            The socket for communicating with the client.
        Returns:
          Return a 1-D np.float32 tensor containing the audio samples or
          return None.
        """
        message = await socket.recv()
        if message == "Done":
            return None

        return np.frombuffer(message, dtype=np.float32)


def check_args(args):
    if args.encoder:
        assert Path(args.encoder).is_file(), f"{args.encoder} does not exist"

        assert Path(args.decoder).is_file(), f"{args.decoder} does not exist"

        assert Path(args.joiner).is_file(), f"{args.joiner} does not exist"

        assert args.paraformer_encoder is None, args.paraformer_encoder
        assert args.paraformer_decoder is None, args.paraformer_decoder
        assert args.zipformer2_ctc is None, args.zipformer2_ctc
        assert args.wenet_ctc is None, args.wenet_ctc
    elif args.paraformer_encoder:
        assert Path(
            args.paraformer_encoder
        ).is_file(), f"{args.paraformer_encoder} does not exist"

        assert Path(
            args.paraformer_decoder
        ).is_file(), f"{args.paraformer_decoder} does not exist"
    elif args.zipformer2_ctc:
        assert Path(
            args.zipformer2_ctc
        ).is_file(), f"{args.zipformer2_ctc} does not exist"
    elif args.wenet_ctc:
        assert Path(args.wenet_ctc).is_file(), f"{args.wenet_ctc} does not exist"
    else:
        raise ValueError("Please provide a model")

    if not Path(args.tokens).is_file():
        raise ValueError(f"{args.tokens} does not exist")

    if args.decoding_method not in (
        "greedy_search",
        "modified_beam_search",
    ):
        raise ValueError(f"Unsupported decoding method {args.decoding_method}")

    if args.decoding_method == "modified_beam_search":
        assert args.num_active_paths > 0, args.num_active_paths


def main():
    args = get_args()
    logging.info(vars(args))
    check_args(args)

    recognizer = create_recognizer(args)

    port = args.port
    nn_pool_size = args.nn_pool_size
    max_batch_size = args.max_batch_size
    max_wait_ms = args.max_wait_ms
    max_message_size = args.max_message_size
    max_queue_size = args.max_queue_size
    max_active_connections = args.max_active_connections
    certificate = args.certificate
    doc_root = args.doc_root

    if certificate and not Path(certificate).is_file():
        raise ValueError(f"{certificate} does not exist")

    if not Path(doc_root).is_dir():
        raise ValueError(f"Directory {doc_root} does not exist")

    server = StreamingServer(
        recognizer=recognizer,
        nn_pool_size=nn_pool_size,
        max_batch_size=max_batch_size,
        max_wait_ms=max_wait_ms,
        max_message_size=max_message_size,
        max_queue_size=max_queue_size,
        max_active_connections=max_active_connections,
        certificate=certificate,
        doc_root=doc_root,
    )
    asyncio.run(server.run(port))


if __name__ == "__main__":
    log_filename = "log/log-streaming-server"
    setup_logger(log_filename)
    main()


================================================
FILE: python-api-examples/supertonic-tts.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2026  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python API
for SupertonicTTS.


Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

python3 ./supertonic-tts.py

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html
for details.

"""

import time

import sherpa_onnx
import soundfile as sf


def create_tts():
    tts_config = sherpa_onnx.OfflineTtsConfig(
        model=sherpa_onnx.OfflineTtsModelConfig(
            supertonic=sherpa_onnx.OfflineTtsSupertonicModelConfig(
                duration_predictor="./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx",
                text_encoder="./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx",
                vector_estimator="./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx",
                vocoder="./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx",
                tts_json="./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json",
                unicode_indexer="./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin",
                voice_style="./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin",
            ),
            debug=False,
            num_threads=2,
            provider="cpu",
        )
    )
    if not tts_config.validate():
        raise ValueError(
            "Please read the previous error messages and re-check your config"
        )

    return sherpa_onnx.OfflineTts(tts_config)


def main():
    tts = create_tts()

    text = "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be, a statesman, a businessman, an official, or a scholar."

    gen_config = sherpa_onnx.GenerationConfig()

    # This model has 10 speakers. Valid sid: 0-9
    gen_config.sid = 6
    gen_config.num_steps = 5
    gen_config.speed = 1.25  # larger -> faster

    # We use en for English.
    # You can also use es, pt, fr, ko.
    # This single model supports 5 languages.
    gen_config.extra["lang"] = "en"

    start = time.time()
    audio = tts.generate(text, gen_config)
    end = time.time()

    if len(audio.samples) == 0:
        print("Error in generating audios. Please read previous error messages.")
        return

    elapsed_seconds = end - start
    audio_duration = len(audio.samples) / audio.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    output_filename = "./supertonic-en.wav"
    sf.write(
        output_filename,
        audio.samples,
        samplerate=audio.sample_rate,
        subtype="PCM_16",
    )
    print(f"Saved to {output_filename}")
    print(f"The text is '{text}'")
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/test-sentence-piece-tokenizer.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2026  Xiaomi Corporation

"""
Please download test files
 - vocab.json
 - token_scores.json
from
https://huggingface.co/csukuangfj/sherpa-onnx-test-data/tree/main

They are generated by ../scripts/pocket-tts/convert_tokenizer.py
using the BPE model from
https://huggingface.co/KevinAHM/pocket-tts-onnx/blob/main/tokenizer.model

See also ../scripts/pocket-tts/test_tokenizer.py
"""

from pathlib import Path

import sherpa_onnx


def main():
    vocab_json = "./vocab.json"
    token_scores_json = "./token_scores.json"

    if not Path(vocab_json).is_file() or not Path(token_scores_json).is_file():
        print("Please download test files first")
        return

    sp = sherpa_onnx.SentencePieceTokenizer(
        vocab_json=vocab_json,
        token_scores_json=token_scores_json,
    )

    text = "Yesterday, I bought 3 apples, 2 bananas, and a dozen oranges. Wow! That's amazing—did you see it too? I can't believe it's already 10:30 p.m."

    ids = sp.encode(text, out_type=int)
    tokens = sp.encode(text, out_type=str)
    print(text)
    print(tokens)
    print(ids)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/test-whisper-timestamps.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Posit Software, PBC
"""
Test Whisper timestamps functionality.

This script tests token-level timestamps using cross-attention DTW alignment.
Note: Requires models exported with attention outputs.

Usage:
  # Test without timestamps (default)
  python test-whisper-timestamps.py \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --tokens=/path/to/tokens.txt \
    --audio=/path/to/test.wav

  # Test with timestamps (requires attention-enabled model)
  python test-whisper-timestamps.py \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --tokens=/path/to/tokens.txt \
    --audio=/path/to/test.wav \
    --enable-token-timestamps

  # Test with CUDA GPU acceleration
  python test-whisper-timestamps.py \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --tokens=/path/to/tokens.txt \
    --audio=/path/to/test.wav \
    --enable-token-timestamps \
    --provider=cuda
"""

import argparse
import wave
from typing import Tuple

import numpy as np
import sherpa_onnx


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Read a wave file and return samples as float32 array.

    Args:
      wave_filename: Path to a wave file. Should be single channel, 16-bit.

    Returns:
      Tuple of (samples as float32 array normalized to [-1, 1], sample_rate)
    """
    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # 16-bit
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)
        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


def test_without_timestamps(args, samples, sample_rate):
    """Test recognition without timestamps."""
    print("=" * 60)
    print("Testing Without Timestamps")
    print("=" * 60)

    recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
        encoder=args.encoder,
        decoder=args.decoder,
        tokens=args.tokens,
        enable_token_timestamps=False,
        provider=args.provider,
    )

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, samples)
    recognizer.decode_stream(stream)
    result = stream.result

    print(f"\nText: {result.text}")
    print(f"Tokens: {result.tokens}")
    print(f"Timestamps: {result.timestamps}")

    assert len(result.timestamps) == 0, "Should have no timestamps"

    print("\nTest without timestamps PASSED!")


def test_with_timestamps(args, samples, sample_rate, audio_duration, enable_segment_timestamps=False):
    """Test token-level timestamps using cross-attention DTW."""
    print("\n" + "=" * 60)
    if enable_segment_timestamps:
        print("Testing With Both Token and Segment Timestamps")
    else:
        print("Testing With Token Timestamps (cross-attention DTW)")
    print("=" * 60)

    recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
        encoder=args.encoder,
        decoder=args.decoder,
        tokens=args.tokens,
        enable_token_timestamps=True,
        enable_segment_timestamps=enable_segment_timestamps,
        provider=args.provider,
    )

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, samples)
    recognizer.decode_stream(stream)
    result = stream.result

    print(f"\nText: {result.text}")
    print(f"Language: {result.lang}")

    # Check token-level timestamps
    print(f"\nToken timestamps count: {len(result.timestamps)}")
    assert len(result.timestamps) == len(result.tokens), (
        f"Timestamps count ({len(result.timestamps)}) != "
        f"tokens count ({len(result.tokens)})"
    )

    print("\n--- Token-Level Timestamps ---")
    timestamps = result.timestamps
    durations = result.durations
    tokens = result.tokens

    assert len(durations) == len(tokens), (
        f"Durations count ({len(durations)}) != tokens count ({len(tokens)})"
    )

    for token, ts, dur in zip(tokens, timestamps, durations):
        end_ts = ts + dur
        print(f"  [{ts:.2f}s - {end_ts:.2f}s] ({dur:.2f}s): {repr(token)}")

    # Check monotonicity
    for i in range(1, len(result.timestamps)):
        assert result.timestamps[i] >= result.timestamps[i - 1], (
            f"Timestamps not monotonic at index {i}: "
            f"{result.timestamps[i - 1]} > {result.timestamps[i]}"
        )

    # Check range: timestamps bounded by actual audio duration (or 30s if truncated)
    max_timestamp = min(audio_duration, 30.0)
    for ts in result.timestamps:
        assert 0.0 <= ts <= max_timestamp, f"Timestamp out of range: {ts}"

    # Note: Word-level timestamps can be derived from token-level data client-side
    # by grouping tokens that start with a space character into words--or, in the
    # case of non-space-delimited languages like Chinese, Japanese, etc., treat
    # each unicode character as a word.

    # Check segment timestamps if enabled
    if enable_segment_timestamps:
        print("\n--- Segment-Level Timestamps ---")
        seg_timestamps = result.segment_timestamps
        seg_durations = result.segment_durations
        seg_texts = result.segment_texts

        assert len(seg_timestamps) == len(seg_durations) == len(seg_texts), (
            f"Segment vectors have different lengths: "
            f"timestamps={len(seg_timestamps)}, durations={len(seg_durations)}, "
            f"texts={len(seg_texts)}"
        )

        for i, (ts, dur, text) in enumerate(
            zip(seg_timestamps, seg_durations, seg_texts)
        ):
            end_ts = ts + dur
            print(f"  Segment {i}: [{ts:.2f}s - {end_ts:.2f}s] ({dur:.2f}s)")
            print(f"    Text: {repr(text)}")

    print("\nTest with timestamps PASSED!")
    return True


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--encoder", required=True, help="Path to encoder.onnx")
    parser.add_argument("--decoder", required=True, help="Path to decoder.onnx")
    parser.add_argument("--tokens", required=True, help="Path to tokens.txt")
    parser.add_argument("--audio", required=True, help="Path to audio file (wav)")
    parser.add_argument(
        "--enable-token-timestamps",
        action="store_true",
        help="Enable token-level timestamps (requires attention-enabled model)",
    )
    parser.add_argument(
        "--enable-segment-timestamps",
        action="store_true",
        help="Enable segment-level timestamps using timestamp tokens",
    )
    parser.add_argument(
        "--provider",
        default="cpu",
        help="Execution provider: cpu, cuda, coreml, etc. (default: cpu)",
    )
    args = parser.parse_args()

    # Handle --enable-segment-timestamps dependency on --enable-token-timestamps
    if args.enable_segment_timestamps and not args.enable_token_timestamps:
        parser.error(
            "--enable-segment-timestamps requires --enable-token-timestamps to be set"
        )

    # Read audio
    samples, sample_rate = read_wave(args.audio)
    print(f"Loaded audio: {len(samples)} samples at {sample_rate} Hz")
    print(f"Duration: {len(samples) / sample_rate:.2f} seconds\n")

    # Test without timestamps
    test_without_timestamps(args, samples, sample_rate)

    # Test with timestamps if requested
    audio_duration = len(samples) / sample_rate
    if args.enable_token_timestamps:
        test_with_timestamps(
            args,
            samples,
            sample_rate,
            audio_duration,
            enable_segment_timestamps=args.enable_segment_timestamps,
        )

    print("\n" + "=" * 60)
    print("All tests passed!")
    print("=" * 60)


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/two-pass-speech-recognition-from-microphone.py
================================================
#!/usr/bin/env python3

# Two-pass real-time speech recognition from a microphone with sherpa-onnx
# Python API.
#
# The first pass uses a streaming model, which has two purposes:
#
#  (1) Display a temporary result to users
#
#  (2) Endpointing
#
# The second pass uses a non-streaming model. It has a higher recognition
# accuracy than the first pass model and its result is used as the final result.
#
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
# to download pre-trained models

"""
Usage examples:

(1) Chinese: Streaming zipformer (1st pass) + Non-streaming paraformer (2nd pass)

python3 ./python-api-examples/two-pass-speech-recognition-from-microphone.py \
  --first-encoder ./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/encoder-epoch-99-avg-1.onnx \
  --first-decoder ./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/decoder-epoch-99-avg-1.onnx \
  --first-joiner ./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/joiner-epoch-99-avg-1.onnx \
  --first-tokens ./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23/tokens.txt \
  \
  --second-paraformer ./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  --second-tokens ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt

(2) English: Streaming zipformer (1st pass) + Non-streaming whisper (2nd pass)

python3 ./python-api-examples/two-pass-speech-recognition-from-microphone.py \
  --first-encoder ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/encoder-epoch-99-avg-1.onnx \
  --first-decoder ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/decoder-epoch-99-avg-1.onnx \
  --first-joiner ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/joiner-epoch-99-avg-1.onnx \
  --first-tokens ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/tokens.txt \
  \
  --second-whisper-encoder ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx \
  --second-whisper-decoder ./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx \
  --second-tokens ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt
"""

import argparse
import sys
from pathlib import Path

import numpy as np

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx


def assert_file_exists(filename: str, message: str):
    if not filename:
        raise ValueError(f"Please specify {message}")

    if not Path(filename).is_file():
        raise ValueError(f"{message} {filename} does not exist")


def add_first_pass_streaming_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--first-tokens",
        type=str,
        required=True,
        help="Path to tokens.txt for the first pass",
    )

    parser.add_argument(
        "--first-encoder",
        type=str,
        required=True,
        help="Path to the encoder model for the first pass",
    )

    parser.add_argument(
        "--first-decoder",
        type=str,
        required=True,
        help="Path to the decoder model for the first pass",
    )

    parser.add_argument(
        "--first-joiner",
        type=str,
        help="Path to the joiner model for the first pass",
    )

    parser.add_argument(
        "--first-decoding-method",
        type=str,
        default="greedy_search",
        help="""Decoding method for the first pass. Valid values are
        greedy_search and modified_beam_search""",
    )

    parser.add_argument(
        "--first-max-active-paths",
        type=int,
        default=4,
        help="""Used only when --first-decoding-method is modified_beam_search.
        It specifies number of active paths to keep during decoding.
        """,
    )


def add_second_pass_transducer_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--second-encoder",
        default="",
        type=str,
        help="Path to the transducer encoder model for the second pass",
    )

    parser.add_argument(
        "--second-decoder",
        default="",
        type=str,
        help="Path to the transducer decoder model for the second pass",
    )

    parser.add_argument(
        "--second-joiner",
        default="",
        type=str,
        help="Path to the transducer joiner model for the second pass",
    )


def add_second_pass_paraformer_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--second-paraformer",
        default="",
        type=str,
        help="Path to the model.onnx for Paraformer for the second pass",
    )


def add_second_pass_nemo_ctc_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--second-nemo-ctc",
        default="",
        type=str,
        help="Path to the model.onnx for NeMo CTC for the second pass",
    )


def add_second_pass_whisper_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--second-whisper-encoder",
        default="",
        type=str,
        help="Path to whisper encoder model for the second pass",
    )

    parser.add_argument(
        "--second-whisper-decoder",
        default="",
        type=str,
        help="Path to whisper decoder model for the second pass",
    )

    parser.add_argument(
        "--second-whisper-language",
        default="",
        type=str,
        help="""It specifies the spoken language in the input audio file.
        Example values: en, fr, de, zh, jp.
        Available languages for multilingual models can be found at
        https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
        If not specified, we infer the language from the input audio file.
        """,
    )

    parser.add_argument(
        "--second-whisper-task",
        default="transcribe",
        choices=["transcribe", "translate"],
        type=str,
        help="""For multilingual models, if you specify translate, the output
        will be in English.
        """,
    )

    parser.add_argument(
        "--second-whisper-tail-paddings",
        default=-1,
        type=int,
        help="""Number of tail padding frames.
        We have removed the 30-second constraint from whisper, so you need to
        choose the amount of tail padding frames by yourself.
        Use -1 to use a default value for tail padding.
        """,
    )


def add_second_pass_non_streaming_model_args(parser: argparse.ArgumentParser):
    add_second_pass_transducer_model_args(parser)
    add_second_pass_nemo_ctc_model_args(parser)
    add_second_pass_paraformer_model_args(parser)
    add_second_pass_whisper_model_args(parser)

    parser.add_argument(
        "--second-tokens",
        type=str,
        help="Path to tokens.txt for the second pass",
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )

    add_first_pass_streaming_model_args(parser)
    add_second_pass_non_streaming_model_args(parser)

    return parser.parse_args()


def check_first_pass_args(args):
    assert_file_exists(args.first_tokens, "--first-tokens")
    assert_file_exists(args.first_encoder, "--first-encoder")
    assert_file_exists(args.first_decoder, "--first-decoder")
    assert_file_exists(args.first_joiner, "--first-joiner")


def check_second_pass_args(args):
    assert_file_exists(args.second_tokens, "--second-tokens")

    if args.second_encoder:
        assert_file_exists(args.second_encoder, "--second-encoder")
        assert_file_exists(args.second_decoder, "--second-decoder")
        assert_file_exists(args.second_joiner, "--second-joiner")
    elif args.second_paraformer:
        assert_file_exists(args.second_paraformer, "--second-paraformer")
    elif args.second_nemo_ctc:
        assert_file_exists(args.second_nemo_ctc, "--second-nemo-ctc")
    elif args.second_whisper_encoder:
        assert_file_exists(args.second_whisper_encoder, "--second-whisper-encoder")
        assert_file_exists(args.second_whisper_decoder, "--second-whisper-decoder")
    else:
        raise ValueError("Please specify the model for the second pass")


def create_first_pass_recognizer(args):
    # Please replace the model files if needed.
    # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
    # for download links.
    recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
        tokens=args.first_tokens,
        encoder=args.first_encoder,
        decoder=args.first_decoder,
        joiner=args.first_joiner,
        num_threads=1,
        sample_rate=16000,
        feature_dim=80,
        decoding_method=args.first_decoding_method,
        max_active_paths=args.first_max_active_paths,
        provider=args.provider,
        enable_endpoint_detection=True,
        rule1_min_trailing_silence=2.4,
        rule2_min_trailing_silence=1.2,
        rule3_min_utterance_length=20,
    )
    return recognizer


def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    if args.second_encoder:
        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=args.second_encoder,
            decoder=args.second_decoder,
            joiner=args.second_joiner,
            tokens=args.second_tokens,
            sample_rate=16000,
            feature_dim=80,
            decoding_method="greedy_search",
            max_active_paths=4,
        )
    elif args.second_paraformer:
        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=args.second_paraformer,
            tokens=args.second_tokens,
            num_threads=1,
            sample_rate=16000,
            feature_dim=80,
            decoding_method="greedy_search",
        )
    elif args.second_nemo_ctc:
        recognizer = sherpa_onnx.OfflineRecognizer.from_nemo_ctc(
            model=args.second_nemo_ctc,
            tokens=args.second_tokens,
            num_threads=1,
            sample_rate=16000,
            feature_dim=80,
            decoding_method="greedy_search",
        )
    elif args.second_whisper_encoder:
        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=args.second_whisper_encoder,
            decoder=args.second_whisper_decoder,
            tokens=args.second_tokens,
            num_threads=1,
            decoding_method="greedy_search",
            language=args.second_whisper_language,
            task=args.second_whisper_task,
            tail_paddings=args.second_whisper_tail_paddings,
        )
    else:
        raise ValueError("Please specify at least one model for the second pass")

    return recognizer


def run_second_pass(
    recognizer: sherpa_onnx.OfflineRecognizer,
    samples: np.ndarray,
    sample_rate: int,
):
    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, samples)

    recognizer.decode_stream(stream)

    return stream.result.text


def main():
    args = get_args()
    check_first_pass_args(args)
    check_second_pass_args(args)

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)

    # If you want to select a different input device, please use
    # sd.default.device[0] = xxx
    # where xxx is the device number

    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    print("Creating recognizers. Please wait...")
    first_recognizer = create_first_pass_recognizer(args)
    second_recognizer = create_second_pass_recognizer(args)

    print("Started! Please speak")

    sample_rate = 16000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms
    stream = first_recognizer.create_stream()

    display = sherpa_onnx.Display()

    sample_buffers = []
    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)
            stream.accept_waveform(sample_rate, samples)

            sample_buffers.append(samples)

            while first_recognizer.is_ready(stream):
                first_recognizer.decode_stream(stream)

            is_endpoint = first_recognizer.is_endpoint(stream)

            result = first_recognizer.get_result(stream)
            result = result.lower().strip()

            display.update_text(result)
            display.display()

            if is_endpoint:
                if result:
                    samples = np.concatenate(sample_buffers)
                    # There are internal sample buffers inside the streaming
                    # feature extractor, so we cannot send all samples to
                    # the 2nd pass. Here 8000 is just an empirical value
                    # that should work for most streaming models in sherpa-onnx
                    sample_buffers = [samples[-8000:]]
                    samples = samples[:-8000]
                    result = run_second_pass(
                        recognizer=second_recognizer,
                        samples=samples,
                        sample_rate=sample_rate,
                    )
                    result = result.lower().strip()
                    display.update_text(result)
                    display.finalize_current_sentence()
                    display.display()
                else:
                    sample_buffers = []

                first_recognizer.reset(stream)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/two-pass-wss.py
================================================
#!/usr/bin/env python3
# Copyright (c) 2025 Minghu Wang
"""

A two-pass streaming ASR server with WebSocket support. This server implements
a two-pass recognition strategy where the first pass uses a fast streaming model
for real-time recognition, and the second pass uses a more accurate offline model
to refine the results.

The first pass provides immediate feedback to users, while the second pass
improves accuracy by re-processing the complete utterance with a more powerful
model.

It supports multiple clients sending audio simultaneously and provides
real-time transcription results.

Usage:
    ./two-pass-wss.py --help

Example:

(1) Without a certificate

python3 ./python-api-examples/two-pass-wss.py \
  --paraformer-encoder ./sherpa-onnx-paraformer-zh-2023-09-18/encoder.onnx \
  --paraformer-decoder ./sherpa-onnx-paraformer-zh-2023-09-18/decoder.onnx \
  --tokens ./sherpa-onnx-paraformer-zh-2023-09-18/tokens.txt \
  --second-sense-voice ./sherpa-onnx-sense-voice-zh-2023-09-18/model.onnx \
  --second-tokens ./sherpa-onnx-sense-voice-zh-2023-09-18/tokens.txt

(2) With a certificate

(a) Generate a certificate first:

    cd python-api-examples/web
    ./generate-certificate.py
    cd ../..

(b) Start the server

python3 ./python-api-examples/two-pass-wss.py \
  --paraformer-encoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx \
  --paraformer-decoder ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx \
  --tokens ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
  --second-sense-voice ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  --second-tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --certificate ./python-api-examples/web/cert.pem

Please refer to
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
to download pre-trained models.
"""

import argparse
import asyncio
import http
import json
import logging
import socket
import ssl
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Tuple

import numpy as np
import sherpa_onnx
import websockets

def setup_logger(
    log_filename: str,
    log_level: str = "info",
    use_console: bool = True,
) -> None:
    """Setup log level.

    Args:
      log_filename:
        The filename to save the log.
      log_level:
        The log level to use, e.g., "debug", "info", "warning", "error",
        "critical"
      use_console:
        True to also print logs to console.
    """
    now = datetime.now()
    date_time = now.strftime("%Y-%m-%d-%H-%M-%S")
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
    log_filename = f"{log_filename}-{date_time}.txt"

    Path(log_filename).parent.mkdir(parents=True, exist_ok=True)

    level = logging.ERROR
    if log_level == "debug":
        level = logging.DEBUG
    elif log_level == "info":
        level = logging.INFO
    elif log_level == "warning":
        level = logging.WARNING
    elif log_level == "critical":
        level = logging.CRITICAL

    logging.basicConfig(
        filename=log_filename,
        format=formatter,
        level=level,
        filemode="w",
    )
    if use_console:
        console = logging.StreamHandler()
        console.setLevel(level)
        console.setFormatter(logging.Formatter(formatter))
        logging.getLogger("").addHandler(console)


def add_model_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--encoder",
        type=str,
        default="",
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        default="",
        help="Path to the transducer decoder model.",
    )


    parser.add_argument(
        "--second-tokens",
        type=str,
        default="",
        help="Path to the second pass tokens.txt",
    )

    parser.add_argument(
        "--second-sense-voice",
        type=str,
        default="",
        help="Path to the second pass sense voice model.",
    )

    parser.add_argument(
        "--paraformer-encoder",
        type=str,
        default="",
        help="Path to the paraformer encoder model",
    )

    parser.add_argument(
        "--paraformer-decoder",
        type=str,
        default="",
        help="Path to the paraformer decoder model.",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        default="",
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--sample-rate",
        type=int,
        default=16000,
        help="Sample rate of the data used to train the model. "
        "Caution: If your input sound files have a different sampling rate, "
        "we will do resampling inside",
    )

    parser.add_argument(
        "--feat-dim",
        type=int,
        default=80,
        help="Feature dimension of the model",
    )

    parser.add_argument(
        "--provider",
        type=str,
        default="cpu",
        help="Valid values: cpu, cuda, coreml",
    )


def add_decoding_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="""Decoding method to use. Current supported methods are:
        - greedy_search
        - modified_beam_search
        """,
    )

    add_modified_beam_search_args(parser)


def add_hotwords_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--hotwords-file",
        type=str,
        default="",
        help="""
        The file containing hotwords, one words/phrases per line, and for each
        phrase the bpe/cjkchar are separated by a space. For example:

        ▁HE LL O ▁WORLD
        你 好 世 界
        """,
    )

    parser.add_argument(
        "--hotwords-score",
        type=float,
        default=1.5,
        help="""
        The hotword score of each token for biasing word/phrase. Used only if
        --hotwords-file is given.
        """,
    )
    parser.add_argument(
        "--modeling-unit",
        type=str,
        default='cjkchar',
        help="""
        The modeling unit of the used model. Current supported units are:
        - cjkchar(for Chinese)
        - bpe(for English like languages)
        - cjkchar+bpe(for multilingual models)
        """,
    )
    parser.add_argument(
        "--bpe-vocab",
        type=str,
        default='',
        help="""
        The bpe vocabulary generated by sentencepiece toolkit. 
        It is only used when modeling-unit is bpe or cjkchar+bpe.
        if you can't find bpe.vocab in the model directory, please run:
        python script/export_bpe_vocab.py --bpe-model exp/bpe.model
        """,
    )


def add_modified_beam_search_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--num-active-paths",
        type=int,
        default=4,
        help="""Used only when --decoding-method is modified_beam_search.
        It specifies number of active paths to keep during decoding.
        """,
    )

def add_blank_penalty_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )

def add_endpointing_args(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--rule1-min-trailing-silence",
        type=float,
        default=2.4,
        help="""This endpointing rule1 requires duration of trailing silence
        in seconds) to be >= this value""",
    )

    parser.add_argument(
        "--rule2-min-trailing-silence",
        type=float,
        default=1.2,
        help="""This endpointing rule2 requires duration of trailing silence in
        seconds) to be >= this value.""",
    )

    parser.add_argument(
        "--rule3-min-utterance-length",
        type=float,
        default=20,
        help="""This endpointing rule3 requires utterance-length (in seconds)
        to be >= this value.""",
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    add_model_args(parser)
    add_decoding_args(parser)
    add_endpointing_args(parser)
    add_hotwords_args(parser)
    add_blank_penalty_args(parser)

    parser.add_argument(
        "--port",
        type=int,
        default=6006,
        help="The server will listen on this port",
    )

    parser.add_argument(
        "--nn-pool-size",
        type=int,
        default=1,
        help="Number of threads for NN computation and decoding.",
    )

    parser.add_argument(
        "--max-batch-size",
        type=int,
        default=3,
        help="""Max batch size for computation. Note if there are not enough
        requests in the queue, it will wait for max_wait_ms time. After that,
        even if there are not enough requests, it still sends the
        available requests in the queue for computation.
        """,
    )

    parser.add_argument(
        "--max-wait-ms",
        type=float,
        default=10,
        help="""Max time in millisecond to wait to build batches for inference.
        If there are not enough requests in the stream queue to build a batch
        of max_batch_size, it waits up to this time before fetching available
        requests for computation.
        """,
    )

    parser.add_argument(
        "--max-message-size",
        type=int,
        default=(1 << 20),
        help="""Max message size in bytes.
        The max size per message cannot exceed this limit.
        """,
    )

    parser.add_argument(
        "--max-queue-size",
        type=int,
        default=32,
        help="Max number of messages in the queue for each connection.",
    )

    parser.add_argument(
        "--max-active-connections",
        type=int,
        default=200,
        help="""Maximum number of active connections. The server will refuse
        to accept new connections once the current number of active connections
        equals to this limit.
        """,
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=2,
        help="Number of threads to run the neural network model",
    )

    parser.add_argument(
        "--second-pass-threads",
        type=int,
        default=2,
        help="Number of threads for second pass processing",
    )

    parser.add_argument(
        "--certificate",
        type=str,
        help="""Path to the X.509 certificate. You need it only if you want to
        use a secure websocket connection, i.e., use wss:// instead of ws://.
        You can use ./web/generate-certificate.py
        to generate the certificate `cert.pem`.
        Note ./web/generate-certificate.py will generate three files but you
        only need to pass the generated cert.pem to this option.
        """,
    )

    return parser.parse_args()

def run_second_pass(
    recognizer: sherpa_onnx.OfflineRecognizer,
    samples: np.ndarray,
    sample_rate: int,
):
    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, samples)

    recognizer.decode_stream(stream)

    return stream.result.text

def create_first_pass_recognizer(args) -> sherpa_onnx.OnlineRecognizer:
    recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(
            tokens=args.tokens,
            encoder=args.paraformer_encoder,
            decoder=args.paraformer_decoder,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feat_dim,
            decoding_method=args.decoding_method,
            enable_endpoint_detection=True,
            rule1_min_trailing_silence=args.rule1_min_trailing_silence,
            rule2_min_trailing_silence=args.rule2_min_trailing_silence,
            rule3_min_utterance_length=args.rule3_min_utterance_length,
            provider=args.provider,
        )
    return recognizer


def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
            model=args.second_sense_voice,
            tokens=args.second_tokens,
            num_threads=1,
            sample_rate=16000,
            feature_dim=80,
            use_itn=True,
            decoding_method="greedy_search",
        )
    return recognizer


def format_timestamps(timestamps: List[float]) -> List[str]:
    return ["{:.3f}".format(t) for t in timestamps]


class StreamingServer(object):
    def __init__(
        self,
        first_pass_recognizer: sherpa_onnx.OnlineRecognizer,
        second_pass_recognizer: sherpa_onnx.OfflineRecognizer,
        nn_pool_size: int,
        max_wait_ms: float,
        max_batch_size: int,
        max_message_size: int,
        max_queue_size: int,
        max_active_connections: int,
        second_pass_threads: int = 2,
        certificate: Optional[str] = None,
    ):
        """
        Args:
          first_pass_recognizer:
            An instance of online recognizer for first pass.
          second_pass_recognizer:
            An instance of offline recognizer for second pass.
          nn_pool_size:
            Number of threads for the thread pool that is responsible for
            neural network computation and decoding.
          max_wait_ms:
            Max wait time in milliseconds in order to build a batch of
            `batch_size`.
          max_batch_size:
            Max batch size for inference.
          max_message_size:
            Max size in bytes per message.
          max_queue_size:
            Max number of messages in the queue for each connection.
          max_active_connections:
            Max number of active connections. Once number of active client
            equals to this limit, the server refuses to accept new connections.
          certificate:
            Optional. If not None, it will use secure websocket.
            You can use ./web/generate-certificate.py to generate
            it (the default generated filename is `cert.pem`).
        """
        self.first_pass_recognizer = first_pass_recognizer
        self.second_pass_recognizer = second_pass_recognizer

        self.certificate = certificate

        self.nn_pool_size = nn_pool_size
        self.nn_pool = ThreadPoolExecutor(
            max_workers=nn_pool_size,
            thread_name_prefix="nn",
        )

        self.second_pass_pool = ThreadPoolExecutor(
            max_workers=second_pass_threads,
            thread_name_prefix="second_pass",
        )

        self.stream_queue = asyncio.Queue()

        self.max_wait_ms = max_wait_ms
        self.max_batch_size = max_batch_size
        self.max_message_size = max_message_size
        self.max_queue_size = max_queue_size
        self.max_active_connections = max_active_connections

        self.current_active_connections = 0

        self.sample_rate = int(self.first_pass_recognizer.config.feat_config.sampling_rate)

    async def stream_consumer_task(self):
        """This function extracts streams from the queue, batches them up, sends
        them to the neural network model for computation and decoding.
        """
        while True:
            if self.stream_queue.empty():
                await asyncio.sleep(self.max_wait_ms / 1000)
                continue

            batch = []
            try:
                while len(batch) < self.max_batch_size:
                    item = self.stream_queue.get_nowait()

                    assert self.first_pass_recognizer.is_ready(item[0])

                    batch.append(item)
            except asyncio.QueueEmpty:
                pass
            stream_list = [b[0] for b in batch]
            future_list = [b[1] for b in batch]

            loop = asyncio.get_running_loop()
            await loop.run_in_executor(
                self.nn_pool,
                self.first_pass_recognizer.decode_streams,
                stream_list,
            )

            for f in future_list:
                self.stream_queue.task_done()
                f.set_result(None)

    async def compute_and_decode(
        self,
        stream: sherpa_onnx.OnlineStream,
    ) -> None:
        """Put the stream into the queue and wait it to be processed by the
        consumer task.

        Args:
          stream:
            The stream to be processed. Note: It is changed in-place.
        """
        loop = asyncio.get_running_loop()
        future = loop.create_future()
        await self.stream_queue.put((stream, future))
        await future

    async def run_second_pass_async(
        self,
        samples: np.ndarray,
        sample_rate: int,
    ) -> str:
        """Run second-pass recognition asynchronously to avoid blocking.

        Args:
          samples: Audio samples.
          sample_rate: Sampling rate.

        Returns:
          Text result from the second-pass recognition.
        """
        import time
        start_time = time.time()
        
        loop = asyncio.get_running_loop()
        result = await loop.run_in_executor(
            self.second_pass_pool,
            run_second_pass,
            self.second_pass_recognizer,
            samples,
            sample_rate,
        )
        
        end_time = time.time()
        duration = end_time - start_time
        logging.info(f"Second pass processing completed in {duration:.3f}s for {len(samples)/sample_rate:.2f}s audio")
        
        return result.lower().strip()

    async def process_request(
        self,
        path: str,
        request_headers: websockets.Headers,
    ) -> Optional[Tuple[http.HTTPStatus, websockets.Headers, bytes]]:
        if self.current_active_connections < self.max_active_connections:
            self.current_active_connections += 1
            return None

        # Refuse new connections
        status = http.HTTPStatus.SERVICE_UNAVAILABLE  # 503
        header = {"Hint": "The server is overloaded. Please retry later."}
        response = b"The server is busy. Please retry later."

        return status, header, response

    async def run(self, port: int):
        tasks = []
        for i in range(self.nn_pool_size):
            tasks.append(asyncio.create_task(self.stream_consumer_task()))

        if self.certificate:
            logging.info(f"Using certificate: {self.certificate}")
            ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
            ssl_context.load_cert_chain(self.certificate)
        else:
            ssl_context = None
            logging.info("No certificate provided")

        try:
            async with websockets.serve(
                self.handle_connection,
                host="",
                port=port,
                max_size=self.max_message_size,
                max_queue=self.max_queue_size,
                process_request=self.process_request,
                ssl=ssl_context,
            ):
                logging.info(f"Started server on port {port}")
                await asyncio.Future()  # run forever
        finally:
            logging.info("Shutting down thread pools...")
            self.nn_pool.shutdown(wait=True)
            self.second_pass_pool.shutdown(wait=True)
            logging.info("Thread pools shut down successfully")

        await asyncio.gather(*tasks)  # not reachable

    async def handle_connection(
        self,
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and send
        decoding result back to the client.

        Args:
          socket:
            The socket for communicating with the client.
        """
        try:
            await self.handle_connection_impl(socket)
        except websockets.exceptions.ConnectionClosed:
            logging.info(f"{socket.remote_address} disconnected")
        finally:
            # Decrement so that it can accept new connections
            self.current_active_connections -= 1

            logging.info(
                f"Disconnected: {socket.remote_address}. "
                f"Number of connections: {self.current_active_connections}/{self.max_active_connections}"  # noqa
            )

    async def handle_connection_impl(
        self,
        socket: websockets.WebSocketServerProtocol,
    ):
        """Receive audio samples from the client, process it, and send
        decoding result back to the client.

        Args:
          socket:
            The socket for communicating with the client.
        """
        stream = self.first_pass_recognizer.create_stream()
        segment = 0
        sample_buffers = []
        while True:
            samples = await self.recv_audio_samples(socket)
            if samples is None:
                break
            
            # TODO(fangjun): At present, we assume the sampling rate
            # of the received audio samples equal to --sample-rate
            stream.accept_waveform(sample_rate=self.sample_rate, waveform=samples)
            sample_buffers.append(samples)
            while self.first_pass_recognizer.is_ready(stream):
                await self.compute_and_decode(stream)
                result = self.first_pass_recognizer.get_result(stream)

                message = {
                    "text": result,
                    "segment": segment,
                }
                if self.first_pass_recognizer.is_endpoint(stream):
                    if result:
                        samples_for_2nd_pass = np.concatenate(sample_buffers)
                        sample_buffers = [samples_for_2nd_pass[-8000:]]
                        samples_for_2nd_pass = samples_for_2nd_pass[:-8000]
                        second_pass_result = (
                            await self.run_second_pass_async(
                                samples=samples_for_2nd_pass,
                                sample_rate=self.sample_rate,
                            )
                        )

                        if second_pass_result:
                            message["text"] = second_pass_result
                            message["segment"] = segment
                    else:
                        sample_buffers=[]

                    self.first_pass_recognizer.reset(stream)
                    segment += 1
                await socket.send(json.dumps(message))

        tail_padding = np.zeros(int(self.sample_rate * 0.3)).astype(np.float32)
        stream.accept_waveform(sample_rate=self.sample_rate, waveform=tail_padding)
        stream.input_finished()
        while self.first_pass_recognizer.is_ready(stream):
            await self.compute_and_decode(stream)

        result = self.first_pass_recognizer.get_result(stream)

        message = {
            "text": result,
            "segment": segment,
        }
        await socket.send(json.dumps(message))

    async def recv_audio_samples(
        self,
        socket: websockets.WebSocketServerProtocol,
    ) -> Optional[np.ndarray]:
        """Receive audio samples from WebSocket connection
        
        Args:
          socket: WebSocket connection
        
        Returns:
          Numpy array containing audio samples, or None indicating end of audio
        """
        message = await socket.recv()
        if message == "Done":
            return None
        return np.frombuffer(message, dtype=np.float32)


def check_args(args):
    if args.encoder:
        assert Path(args.encoder).is_file(), f"{args.encoder} does not exist"
        assert Path(args.decoder).is_file(), f"{args.decoder} does not exist"
        assert args.paraformer_encoder is None, args.paraformer_encoder
        assert args.paraformer_decoder is None, args.paraformer_decoder
       
    elif args.paraformer_encoder:
        assert Path(
            args.paraformer_encoder
        ).is_file(), f"{args.paraformer_encoder} does not exist"

        assert Path(
            args.paraformer_decoder
        ).is_file(), f"{args.paraformer_decoder} does not exist"
    else:
        raise ValueError("Please provide a model")

    if not Path(args.tokens).is_file():
        raise ValueError(f"{args.tokens} does not exist")

    if args.decoding_method not in (
        "greedy_search",
        "modified_beam_search",
    ):
        raise ValueError(f"Unsupported decoding method {args.decoding_method}")

    if args.decoding_method == "modified_beam_search":
        assert args.num_active_paths > 0, args.num_active_paths


def main():
    args = get_args()
    logging.info(vars(args))
    check_args(args)

    first_pass_recognizer = create_first_pass_recognizer(args)
    second_pass_recognizer = create_second_pass_recognizer(args)

    port = args.port
    nn_pool_size = args.nn_pool_size
    max_batch_size = args.max_batch_size
    max_wait_ms = args.max_wait_ms
    max_message_size = args.max_message_size
    max_queue_size = args.max_queue_size
    max_active_connections = args.max_active_connections
    second_pass_threads = args.second_pass_threads
    certificate = args.certificate
    # doc_root = args.doc_root

    if certificate and not Path(certificate).is_file():
        raise ValueError(f"{certificate} does not exist")

    server = StreamingServer(
        first_pass_recognizer=first_pass_recognizer,
        second_pass_recognizer=second_pass_recognizer,
        nn_pool_size=nn_pool_size,
        max_batch_size=max_batch_size,
        max_wait_ms=max_wait_ms,
        max_message_size=max_message_size,
        max_queue_size=max_queue_size,
        max_active_connections=max_active_connections,
        second_pass_threads=second_pass_threads,
        certificate=certificate,
        # doc_root=doc_root,
    )
    asyncio.run(server.run(port))


if __name__ == "__main__":
    log_filename = "log/log-streaming-server"
    setup_logger(log_filename)
    main()


================================================
FILE: python-api-examples/vad-alsa.py
================================================
#!/usr/bin/env python3

"""
This script works only on Linux. It uses ALSA for recording.
"""

import argparse
from pathlib import Path

import sherpa_onnx


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument(
        "--device-name",
        type=str,
        required=True,
        help="""
The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
        """,
    )

    return parser.parse_args()


def main():
    args = get_args()
    if not Path(args.silero_vad_model).is_file():
        raise RuntimeError(
            f"{args.silero_vad_model} does not exist. Please download it from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx"
        )

    device_name = args.device_name
    print(f"device_name: {device_name}")
    alsa = sherpa_onnx.Alsa(device_name)

    sample_rate = 16000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    config = sherpa_onnx.VadModelConfig()
    config.silero_vad.model = args.silero_vad_model
    config.sample_rate = sample_rate

    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=30)

    print("Started! Please speak. Press Ctrl C to exit")

    printed = False
    k = 0
    try:
        while True:
            samples = alsa.read(samples_per_read)  # a blocking read

            vad.accept_waveform(samples)

            if vad.is_speech_detected() and not printed:
                print("Detected speech")
                printed = True

            if not vad.is_speech_detected():
                printed = False

            while not vad.empty():
                samples = vad.front.samples
                duration = len(samples) / sample_rate
                filename = f"seg-{k}-{duration:.3f}-seconds.wav"
                k += 1
                sherpa_onnx.write_wave(filename, samples, sample_rate)
                print(f"Duration: {duration:.3f} seconds")
                print(f"Saved to {filename}")
                print("----------")

                vad.pop()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exit")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/vad-microphone.py
================================================
#!/usr/bin/env python3

import argparse
import os
import sys
from pathlib import Path

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    return parser.parse_args()


def main():
    args = get_args()
    if not Path(args.silero_vad_model).is_file():
        raise RuntimeError(
            f"{args.silero_vad_model} does not exist. Please download it from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx"
        )

    mic_sample_rate = 16000
    if "SHERPA_ONNX_MIC_SAMPLE_RATE" in os.environ:
        mic_sample_rate = int(os.environ.get("SHERPA_ONNX_MIC_SAMPLE_RATE"))
        print(f"Change microphone sample rate to {mic_sample_rate}")

    sample_rate = 16000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    config = sherpa_onnx.VadModelConfig()
    config.silero_vad.model = args.silero_vad_model
    config.sample_rate = sample_rate

    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=30)

    # python3 -m sounddevice
    # can also be used to list all devices

    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        print(
            "If you are using Linux and you are sure there is a microphone "
            "on your system, please use "
            "./vad-alsa.py"
        )
        sys.exit(0)

    print(devices)

    if "SHERPA_ONNX_MIC_DEVICE" in os.environ:
        input_device_idx = int(os.environ.get("SHERPA_ONNX_MIC_DEVICE"))
        sd.default.device[0] = input_device_idx
        print(f'Use selected device: {devices[input_device_idx]["name"]}')
    else:
        input_device_idx = sd.default.device[0]
        print(f'Use default device: {devices[input_device_idx]["name"]}')

    print("Started! Please speak. Press Ctrl C to exit")

    printed = False
    k = 0
    try:
        with sd.InputStream(
            channels=1, dtype="float32", samplerate=mic_sample_rate
        ) as s:
            while True:
                samples, _ = s.read(samples_per_read)  # a blocking read
                samples = samples.reshape(-1)

                if mic_sample_rate != sample_rate:
                    import librosa

                    samples = librosa.resample(
                        samples, orig_sr=mic_sample_rate, target_sr=sample_rate
                    )

                vad.accept_waveform(samples)

                if vad.is_speech_detected() and not printed:
                    print("Detected speech")
                    printed = True

                if not vad.is_speech_detected():
                    printed = False

                while not vad.empty():
                    samples = vad.front.samples
                    duration = len(samples) / sample_rate
                    filename = f"seg-{k}-{duration:.3f}-seconds.wav"
                    k += 1
                    sherpa_onnx.write_wave(filename, samples, sample_rate)
                    print(f"Duration: {duration:.3f} seconds")
                    print(f"Saved to {filename}")
                    print("----------")

                    vad.pop()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exit")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/vad-remove-non-speech-segments-alsa.py
================================================
#!/usr/bin/env python3

"""
This file shows how to remove non-speech segments
and merge all speech segments into a large segment
and save it to a file.

Different from ./vad-remove-non-speech-segments.py, this file supports only
Linux.

Usage

python3 ./vad-remove-non-speech-segments-alsa.py \
        --silero-vad-model silero_vad.onnx

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
"""

import argparse
import time
from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument(
        "--device-name",
        type=str,
        required=True,
        help="""
The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
        """,
    )

    return parser.parse_args()


def main():
    args = get_args()
    assert_file_exists(args.silero_vad_model)

    device_name = args.device_name
    print(f"device_name: {device_name}")
    alsa = sherpa_onnx.Alsa(device_name)

    sample_rate = 16000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    config = sherpa_onnx.VadModelConfig()
    config.silero_vad.model = args.silero_vad_model
    config.sample_rate = sample_rate

    window_size = config.silero_vad.window_size

    buffer = []
    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=30)

    all_samples = []

    print("Started! Please speak. Press Ctrl C to exit")

    try:
        while True:
            samples = alsa.read(samples_per_read)  # a blocking read
            samples = np.array(samples)

            buffer = np.concatenate([buffer, samples])

            all_samples = np.concatenate([all_samples, samples])

            while len(buffer) > window_size:
                vad.accept_waveform(buffer[:window_size])
                buffer = buffer[window_size:]
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Saving & Exiting")

        speech_samples = []
        while not vad.empty():
            speech_samples.extend(vad.front.samples)
            vad.pop()

        speech_samples = np.array(speech_samples, dtype=np.float32)

        filename_for_speech = time.strftime("%Y%m%d-%H%M%S-speech.wav")
        sf.write(filename_for_speech, speech_samples, samplerate=sample_rate)

        filename_for_all = time.strftime("%Y%m%d-%H%M%S-all.wav")
        sf.write(filename_for_all, all_samples, samplerate=sample_rate)

        print(f"Saved to {filename_for_speech} and {filename_for_all}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/vad-remove-non-speech-segments-from-file.py
================================================
#!/usr/bin/env python3

"""
This file shows how to remove non-speech segments
and merge all speech segments into a large segment
and save it to a file.

Usage

python3 ./vad-remove-non-speech-segments-from-file.py \
        --silero-vad-model silero_vad.onnx \
        input.wav \
        output.wav

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
"""

import argparse
from pathlib import Path
from typing import Tuple

import numpy as np
import sherpa_onnx
import soundfile as sf


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument(
        "input",
        type=str,
        help="Path to input.wav",
    )

    parser.add_argument(
        "output",
        type=str,
        help="Path to output.wav",
    )

    return parser.parse_args()


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def main():
    args = get_args()
    assert_file_exists(args.silero_vad_model)
    assert_file_exists(args.input)

    samples, sample_rate = load_audio(args.input)
    if sample_rate != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    config = sherpa_onnx.VadModelConfig()
    config.silero_vad.model = args.silero_vad_model
    config.silero_vad.threshold = 0.5
    config.silero_vad.min_silence_duration = 0.25  # seconds
    config.silero_vad.min_speech_duration = 0.25  # seconds

    # If the current segment is larger than this value, then it increases
    # the threshold to 0.9 internally. After detecting this segment,
    # it resets the threshold to its original value.
    config.silero_vad.max_speech_duration = 5  # seconds

    config.sample_rate = sample_rate

    window_size = config.silero_vad.window_size

    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=30)

    speech_samples = []
    while len(samples) > window_size:
        vad.accept_waveform(samples[:window_size])
        samples = samples[window_size:]

        while not vad.empty():
            speech_samples.extend(vad.front.samples)
            vad.pop()

    vad.flush()

    while not vad.empty():
        speech_samples.extend(vad.front.samples)
        vad.pop()

    speech_samples = np.array(speech_samples, dtype=np.float32)

    sf.write(args.output, speech_samples, samplerate=sample_rate)

    print(f"Saved to {args.output}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/vad-remove-non-speech-segments.py
================================================
#!/usr/bin/env python3

"""
This file shows how to remove non-speech segments
and merge all speech segments into a large segment
and save it to a file.

Usage

python3 ./vad-remove-non-speech-segments.py \
        --silero-vad-model silero_vad.onnx

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
"""

import argparse
import sys
import time
from pathlib import Path

import numpy as np
import sherpa_onnx
import soundfile as sf

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    return parser.parse_args()


def main():
    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        print(
            "If you are using Linux and you are sure there is a microphone "
            "on your system, please use "
            "./vad-remove-non-speech-segments-alsa.py"
        )
        sys.exit(0)

    print(devices)
    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    args = get_args()
    assert_file_exists(args.silero_vad_model)

    sample_rate = 16000
    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms

    config = sherpa_onnx.VadModelConfig()
    config.silero_vad.model = args.silero_vad_model
    config.sample_rate = sample_rate

    window_size = config.silero_vad.window_size

    buffer = []
    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=30)

    all_samples = []

    print("Started! Please speak. Press Ctrl C to exit")

    try:
        with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
            while True:
                samples, _ = s.read(samples_per_read)  # a blocking read
                samples = samples.reshape(-1)
                buffer = np.concatenate([buffer, samples])

                all_samples = np.concatenate([all_samples, samples])

                while len(buffer) > window_size:
                    vad.accept_waveform(buffer[:window_size])
                    buffer = buffer[window_size:]
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Saving & Exiting")

        speech_samples = []
        while not vad.empty():
            speech_samples.extend(vad.front.samples)
            vad.pop()

        speech_samples = np.array(speech_samples, dtype=np.float32)

        filename_for_speech = time.strftime("%Y%m%d-%H%M%S-speech.wav")
        sf.write(filename_for_speech, speech_samples, samplerate=sample_rate)

        filename_for_all = time.strftime("%Y%m%d-%H%M%S-all.wav")
        sf.write(filename_for_all, all_samples, samplerate=sample_rate)

        print(f"Saved to {filename_for_speech} and {filename_for_all}")


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/vad-with-non-streaming-asr.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2023  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python APIs
with VAD and non-streaming ASR models for speech recognition
from a microphone.

Note that you need a non-streaming model for this script.

(1) For paraformer

    ./python-api-examples/vad-with-non-streaming-asr.py  \
      --silero-vad-model=/path/to/silero_vad.onnx \
      --tokens=/path/to/tokens.txt \
      --paraformer=/path/to/paraformer.onnx \
      --num-threads=2 \
      --decoding-method=greedy_search \
      --debug=false \
      --sample-rate=16000 \
      --feature-dim=80

(2) For transducer models from icefall

    ./python-api-examples/vad-with-non-streaming-asr.py  \
      --silero-vad-model=/path/to/silero_vad.onnx \
      --tokens=/path/to/tokens.txt \
      --encoder=/path/to/encoder.onnx \
      --decoder=/path/to/decoder.onnx \
      --joiner=/path/to/joiner.onnx \
      --num-threads=2 \
      --decoding-method=greedy_search \
      --debug=false \
      --sample-rate=16000 \
      --feature-dim=80

(3) For Moonshine models

./python-api-examples/vad-with-non-streaming-asr.py  \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
  --moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
  --moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
  --moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
  --tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
  --num-threads=2

(4) For Whisper models

./python-api-examples/vad-with-non-streaming-asr.py  \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
  --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
  --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
  --whisper-task=transcribe \
  --num-threads=2

(5) For SenseVoice CTC models

./python-api-examples/vad-with-non-streaming-asr.py  \
  --silero-vad-model=/path/to/silero_vad.onnx \
  --sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  --num-threads=2

Please refer to
https://k2-fsa.github.io/sherpa/onnx/index.html
to install sherpa-onnx and to download non-streaming pre-trained models
used in this file.

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
to download silero_vad.onnx

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

"""
import argparse
import sys
from pathlib import Path

import numpy as np

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)

import sherpa_onnx


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--silero-vad-model",
        type=str,
        required=True,
        help="Path to silero_vad.onnx",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--encoder",
        default="",
        type=str,
        help="Path to the transducer encoder model",
    )

    parser.add_argument(
        "--decoder",
        default="",
        type=str,
        help="Path to the transducer decoder model",
    )

    parser.add_argument(
        "--joiner",
        default="",
        type=str,
        help="Path to the transducer joiner model",
    )

    parser.add_argument(
        "--paraformer",
        default="",
        type=str,
        help="Path to the model.onnx from Paraformer",
    )

    parser.add_argument(
        "--sense-voice",
        default="",
        type=str,
        help="Path to the model.onnx from SenseVoice",
    )

    parser.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Number of threads for neural network computation",
    )

    parser.add_argument(
        "--whisper-encoder",
        default="",
        type=str,
        help="Path to whisper encoder model",
    )

    parser.add_argument(
        "--whisper-decoder",
        default="",
        type=str,
        help="Path to whisper decoder model",
    )

    parser.add_argument(
        "--whisper-language",
        default="",
        type=str,
        help="""It specifies the spoken language in the input file.
        Example values: en, fr, de, zh, jp.
        Available languages for multilingual models can be found at
        https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
        If not specified, we infer the language from the input audio file.
        """,
    )

    parser.add_argument(
        "--whisper-task",
        default="transcribe",
        choices=["transcribe", "translate"],
        type=str,
        help="""For multilingual models, if you specify translate, the output
        will be in English.
        """,
    )

    parser.add_argument(
        "--whisper-tail-paddings",
        default=-1,
        type=int,
        help="""Number of tail padding frames.
        We have removed the 30-second constraint from whisper, so you need to
        choose the amount of tail padding frames by yourself.
        Use -1 to use a default value for tail padding.
        """,
    )

    parser.add_argument(
        "--moonshine-preprocessor",
        default="",
        type=str,
        help="Path to moonshine preprocessor model",
    )

    parser.add_argument(
        "--moonshine-encoder",
        default="",
        type=str,
        help="Path to moonshine encoder model",
    )

    parser.add_argument(
        "--moonshine-uncached-decoder",
        default="",
        type=str,
        help="Path to moonshine uncached decoder model",
    )

    parser.add_argument(
        "--moonshine-cached-decoder",
        default="",
        type=str,
        help="Path to moonshine cached decoder model",
    )

    parser.add_argument(
        "--blank-penalty",
        type=float,
        default=0.0,
        help="""
        The penalty applied on blank symbol during decoding.
        Note: It is a positive value that would be applied to logits like
        this `logits[:, 0] -= blank_penalty` (suppose logits.shape is
        [batch_size, vocab] and blank id is 0).
        """,
    )

    parser.add_argument(
        "--decoding-method",
        type=str,
        default="greedy_search",
        help="""Valid values are greedy_search and modified_beam_search.
        modified_beam_search is valid only for transducer models.
        """,
    )
    parser.add_argument(
        "--debug",
        type=bool,
        default=False,
        help="True to show debug messages when loading modes.",
    )

    parser.add_argument(
        "--sample-rate",
        type=int,
        default=16000,
        help="""Sample rate of the feature extractor. Must match the one
        expected by the model.""",
    )

    parser.add_argument(
        "--feature-dim",
        type=int,
        default=80,
        help="Feature dimension. Must match the one expected by the model",
    )

    parser.add_argument(
        "--hr-lexicon",
        type=str,
        default="",
        help="If not empty, it is the lexicon.txt for homophone replacer",
    )

    parser.add_argument(
        "--hr-rule-fsts",
        type=str,
        default="",
        help="If not empty, it is the replace.fst for homophone replacer",
    )

    return parser.parse_args()


def assert_file_exists(filename: str):
    assert Path(filename).is_file(), (
        f"{filename} does not exist!\n"
        "Please refer to "
        "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
    )


def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
    if args.encoder:
        assert len(args.paraformer) == 0, args.paraformer
        assert len(args.sense_voice) == 0, args.sense_voice
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.encoder)
        assert_file_exists(args.decoder)
        assert_file_exists(args.joiner)

        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=args.encoder,
            decoder=args.decoder,
            joiner=args.joiner,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            blank_penalty=args.blank_penalty,
            debug=args.debug,
            hr_rule_fsts=args.hr_rule_fsts,
            hr_lexicon=args.hr_lexicon,
        )
    elif args.paraformer:
        assert len(args.sense_voice) == 0, args.sense_voice
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.paraformer)

        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=args.paraformer,
            tokens=args.tokens,
            num_threads=args.num_threads,
            sample_rate=args.sample_rate,
            feature_dim=args.feature_dim,
            decoding_method=args.decoding_method,
            debug=args.debug,
            hr_rule_fsts=args.hr_rule_fsts,
            hr_lexicon=args.hr_lexicon,
        )
    elif args.sense_voice:
        assert len(args.whisper_encoder) == 0, args.whisper_encoder
        assert len(args.whisper_decoder) == 0, args.whisper_decoder
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        assert_file_exists(args.sense_voice)
        recognizer = sherpa_onnx.OfflineRecognizer.from_sense_voice(
            model=args.sense_voice,
            tokens=args.tokens,
            num_threads=args.num_threads,
            use_itn=True,
            debug=args.debug,
            hr_rule_fsts=args.hr_rule_fsts,
            hr_lexicon=args.hr_lexicon,
        )
    elif args.whisper_encoder:
        assert_file_exists(args.whisper_encoder)
        assert_file_exists(args.whisper_decoder)
        assert len(args.moonshine_preprocessor) == 0, args.moonshine_preprocessor
        assert len(args.moonshine_encoder) == 0, args.moonshine_encoder
        assert (
            len(args.moonshine_uncached_decoder) == 0
        ), args.moonshine_uncached_decoder
        assert len(args.moonshine_cached_decoder) == 0, args.moonshine_cached_decoder

        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=args.whisper_encoder,
            decoder=args.whisper_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
            language=args.whisper_language,
            task=args.whisper_task,
            tail_paddings=args.whisper_tail_paddings,
            hr_rule_fsts=args.hr_rule_fsts,
            hr_lexicon=args.hr_lexicon,
        )
    elif args.moonshine_preprocessor:
        assert_file_exists(args.moonshine_preprocessor)
        assert_file_exists(args.moonshine_encoder)
        assert_file_exists(args.moonshine_uncached_decoder)
        assert_file_exists(args.moonshine_cached_decoder)

        recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
            preprocessor=args.moonshine_preprocessor,
            encoder=args.moonshine_encoder,
            uncached_decoder=args.moonshine_uncached_decoder,
            cached_decoder=args.moonshine_cached_decoder,
            tokens=args.tokens,
            num_threads=args.num_threads,
            decoding_method=args.decoding_method,
            debug=args.debug,
            hr_rule_fsts=args.hr_rule_fsts,
            hr_lexicon=args.hr_lexicon,
        )
    else:
        raise ValueError("Please specify at least one model")

    return recognizer


def main():
    devices = sd.query_devices()
    if len(devices) == 0:
        print("No microphone devices found")
        sys.exit(0)

    print(devices)

    # If you want to select a different input device, please use
    # sd.default.device[0] = xxx
    # where xxx is the device number

    default_input_device_idx = sd.default.device[0]
    print(f'Use default device: {devices[default_input_device_idx]["name"]}')

    args = get_args()
    assert_file_exists(args.tokens)
    assert_file_exists(args.silero_vad_model)

    assert args.num_threads > 0, args.num_threads

    assert (
        args.sample_rate == 16000
    ), f"Only sample rate 16000 is supported.Given: {args.sample_rate}"

    print("Creating recognizer. Please wait...")
    recognizer = create_recognizer(args)

    config = sherpa_onnx.VadModelConfig()
    config.silero_vad.model = args.silero_vad_model
    config.silero_vad.min_silence_duration = 0.25
    config.sample_rate = args.sample_rate

    window_size = config.silero_vad.window_size

    vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=100)

    samples_per_read = int(0.1 * args.sample_rate)  # 0.1 second = 100 ms

    print("Started! Please speak")

    buffer = []
    texts = []
    with sd.InputStream(channels=1, dtype="float32", samplerate=args.sample_rate) as s:
        while True:
            samples, _ = s.read(samples_per_read)  # a blocking read
            samples = samples.reshape(-1)

            buffer = np.concatenate([buffer, samples])
            while len(buffer) > window_size:
                vad.accept_waveform(buffer[:window_size])
                buffer = buffer[window_size:]

            while not vad.empty():
                stream = recognizer.create_stream()
                stream.accept_waveform(args.sample_rate, vad.front.samples)

                vad.pop()
                recognizer.decode_stream(stream)

                text = stream.result.text.strip().lower()
                if len(text):
                    idx = len(texts)
                    texts.append(text)
                    print(f"{idx}: {text}")


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")


================================================
FILE: python-api-examples/web/.gitignore
================================================
*.pem
*.key
*.crt


================================================
FILE: python-api-examples/web/generate-certificate.py
================================================
#!/usr/bin/env python3

"""
pip install pyopenssl
"""

from OpenSSL import crypto

# The code in this file is modified from
# https://stackoverflow.com/questions/27164354/create-a-self-signed-x509-certificate-in-python

"""
This script generates 3 files:
    - private.key
    - selfsigned.crt
    - cert.pem

You need cert.pem when you start a https server
or a secure websocket server.

Note: You need to change serialNumber if you want to generate
a new certificate as two different certificates cannot share
the same serial number if they are issued by the same organization.

Otherwise, you may get the following error from within you browser:

  An error occurred during a connection to 127.0.0.1:6007. You have received an
  invalid certificate. Please contact the server administrator or email
  correspondent and give them the following information: Your certificate
  contains the same serial number as another certificate issued by the
  certificate authority. Please get a new certificate containing a unique
  serial number. Error code: SEC_ERROR_REUSED_ISSUER_AND_SERIAL

"""


def cert_gen(
    emailAddress="https://github.com/k2-fsa/sherpa-onnx",
    commonName="sherpa-onnx",
    countryName="CN",
    localityName="k2-fsa",
    stateOrProvinceName="k2-fsa",
    organizationName="k2-fsa",
    organizationUnitName="k2-fsa",
    serialNumber=3,
    validityStartInSeconds=0,
    validityEndInSeconds=10 * 365 * 24 * 60 * 60,
    KEY_FILE="private.key",
    CERT_FILE="selfsigned.crt",
    ALL_IN_ONE_FILE="cert.pem",
):
    # can look at generated file using openssl:
    # openssl x509 -inform pem -in selfsigned.crt -noout -text
    # create a key pair
    k = crypto.PKey()
    k.generate_key(crypto.TYPE_RSA, 4096)
    # create a self-signed cert
    cert = crypto.X509()
    cert.get_subject().C = countryName
    cert.get_subject().ST = stateOrProvinceName
    cert.get_subject().L = localityName
    cert.get_subject().O = organizationName  # noqa
    cert.get_subject().OU = organizationUnitName
    cert.get_subject().CN = commonName
    cert.get_subject().emailAddress = emailAddress
    cert.set_serial_number(serialNumber)
    cert.gmtime_adj_notBefore(0)
    cert.gmtime_adj_notAfter(validityEndInSeconds)
    cert.set_issuer(cert.get_subject())
    cert.set_pubkey(k)
    cert.sign(k, "sha512")
    with open(CERT_FILE, "wt") as f:
        f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8"))
    with open(KEY_FILE, "wt") as f:
        f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))

    with open(ALL_IN_ONE_FILE, "wt") as f:
        f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
        f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8"))
    print(f"Generated {CERT_FILE}")
    print(f"Generated {KEY_FILE}")
    print(f"Generated {ALL_IN_ONE_FILE}")


cert_gen()


================================================
FILE: python-api-examples/web/index.html
================================================
<!doctype html>
<html lang="en">
<head>
  <!-- Required meta tags -->
  <meta charset="utf-8"></meta>
  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>

  <!-- Bootstrap CSS -->
  <link rel="stylesheet"
        href="./css/bootstrap.min.css"
        integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
        crossorigin="anonymous">
  </link>
  <link rel="icon"
      type="image/png"
      href="./k2-logo.png">

  <script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>

  <title>Next-gen Kaldi demo</title>
</head>


<body>
  <div id="nav"></div>
  <script>
    $(function(){
      $("#nav").load("nav-partial.html");
    });
  </script>

  <ul class="list-unstyled">
  <li class="media">
    <div class="media-body">
      <h5 class="mt-0 mb-1">Upload</h5>
      <p>Recognition from a selected file</p>
    </div>
  <li>

  <li class="media">
    <div class="media-body">
      <h5 class="mt-0 mb-1">Streaming_Record</h5>
      <p>Recognition from real-time recordings</p>
    </div>
  </li>

  <li class="media">
    <div class="media-body">
      <h5 class="mt-0 mb-1">Offline_Record</h5>
      <p>Recognition from offline recordings</p>
    </div>
  </li>
  </ul>

  Code is available at
  <a href="https://github.com/k2-fsa/sherpa-onnx"> https://github.com/k2-fsa/sherpa-onnx</a>

  <!-- Optional JavaScript -->
  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
  <script src="./js/popper.min.js"
          integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
          crossorigin="anonymous">
  </script>

  <script src="./js/bootstrap.min.js"
          integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
          crossorigin="anonymous">
  </script>

</body>
</html>


================================================
FILE: python-api-examples/web/js/offline_record.js
================================================
// This file copies and modifies code
// from https://mdn.github.io/web-dictaphone/scripts/app.js
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e

var socket;

const serverIpInput = document.getElementById('server-ip');
const serverPortInput = document.getElementById('server-port');

const connectBtn = document.getElementById('connect');
const uploadBtn = document.getElementById('file');

function initWebSocket() {
  let protocol = 'ws://';
  if (window.location.protocol == 'https:') {
    protocol = 'wss://'
  }
  let server_ip = serverIpInput.value;
  let server_port = serverPortInput.value;
  console.log('protocol: ', protocol);
  console.log('server_ip: ', server_ip);
  console.log('server_port: ', server_port);

  let uri = protocol + server_ip + ':' + server_port;
  console.log('uri', uri);
  socket = new WebSocket(uri);

  // Connection opened
  socket.addEventListener('open', function(event) {
    console.log('connected');
    recordBtn.disabled = false;
    connectBtn.disabled = true;
    connectBtn.innerHTML = 'Connected!';
  });

  // Connection closed
  socket.addEventListener('close', function(event) {
    console.log('disconnected');
    recordBtn.disabled = true;
    stopBtn.disabled = true;
    connectBtn.disabled = false;
    connectBtn.innerHTML = 'Click me to connect!';
  });

  // Listen for messages
  socket.addEventListener('message', function(event) {
    console.log('Received message: ', event.data);

    document.getElementById('results').value = event.data;
    socket.send('Done');
    console.log('Sent Done');
    socket.close();
  });
}

const recordBtn = document.getElementById('offline_record');
const stopBtn = document.getElementById('offline_stop');
const clearBtn = document.getElementById('clear');
const soundClips = document.getElementById('sound-clips');
const canvas = document.getElementById('canvas');
const mainSection = document.querySelector('.container');

recordBtn.disabled = true;
stopBtn.disabled = true;

window.onload = (event) => {
  console.log('page is fully loaded');
  console.log('protocol', window.location.protocol);
  console.log('port', window.location.port);
  if (window.location.protocol == 'https:') {
    document.getElementById('ws-protocol').textContent = 'wss://';
  }
  serverIpInput.value = window.location.hostname;
  serverPortInput.value = window.location.port;
};

connectBtn.onclick = function() {
  initWebSocket();
};


let audioCtx;
const canvasCtx = canvas.getContext('2d');
let mediaStream;
let analyser;

let expectedSampleRate = 16000;
let recordSampleRate;  // the sampleRate of the microphone
let recorder = null;   // the microphone
let leftchannel = [];  // TODO: Use a single channel

let recordingLength = 0;  // number of samples so far

clearBtn.onclick = function() {
  document.getElementById('results').value = '';
};

function send_header(n) {
  const header = new ArrayBuffer(8);
  new DataView(header).setInt32(0, expectedSampleRate, true /* littleEndian */);
  new DataView(header).setInt32(4, n, true /* littleEndian */);
  socket.send(new Int32Array(header, 0, 2));
}

// copied/modified from https://mdn.github.io/web-dictaphone/
// and
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
if (navigator.mediaDevices.getUserMedia) {
  console.log('getUserMedia supported.');

  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
  const constraints = {audio: true};

  let onSuccess = function(stream) {
    if (!audioCtx) {
      audioCtx = new AudioContext();
    }
    console.log(audioCtx);
    recordSampleRate = audioCtx.sampleRate;
    console.log('sample rate ' + recordSampleRate);

    // creates an audio node from the microphone incoming stream
    mediaStream = audioCtx.createMediaStreamSource(stream);
    console.log(mediaStream);

    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
    // bufferSize: the onaudioprocess event is called when the buffer is full
    var bufferSize = 2048;
    var numberOfInputChannels = 2;
    var numberOfOutputChannels = 2;
    if (audioCtx.createScriptProcessor) {
      recorder = audioCtx.createScriptProcessor(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    } else {
      recorder = audioCtx.createJavaScriptNode(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    }
    console.log(recorder);

    recorder.onaudioprocess = function(e) {
      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
      samples = downsampleBuffer(samples, expectedSampleRate);
      let buf = new Int16Array(samples.length);
      for (var i = 0; i < samples.length; ++i) {
        let s = samples[i];
        if (s >= 1)
          s = 1;
        else if (s <= -1)
          s = -1;
        buf[i] = s * 32767;
      }
      leftchannel.push(buf);
      recordingLength += bufferSize;
    };

    visualize(stream);
    mediaStream.connect(analyser);

    recordBtn.onclick = function() {
      mediaStream.connect(recorder);
      mediaStream.connect(analyser);
      recorder.connect(audioCtx.destination);

      console.log('recorder started');
      recordBtn.style.background = 'red';

      stopBtn.disabled = false;
      recordBtn.disabled = true;
    };

    stopBtn.onclick = function() {
      console.log('recorder stopped');

      // stopBtn recording
      recorder.disconnect(audioCtx.destination);
      mediaStream.disconnect(recorder);
      mediaStream.disconnect(analyser);

      recordBtn.style.background = '';
      recordBtn.style.color = '';
      // mediaRecorder.requestData();

      stopBtn.disabled = true;
      recordBtn.disabled = false;

      const clipName =
          prompt('Enter a name for your sound clip?', 'My unnamed clip');

      const clipContainer = document.createElement('article');
      const clipLabel = document.createElement('p');
      const audio = document.createElement('audio');
      const deleteButton = document.createElement('button');
      clipContainer.classList.add('clip');
      audio.setAttribute('controls', '');
      deleteButton.textContent = 'Delete';
      deleteButton.className = 'delete';

      if (clipName === null) {
        clipLabel.textContent = 'My unnamed clip';
      } else {
        clipLabel.textContent = clipName;
      }

      clipContainer.appendChild(audio);

      clipContainer.appendChild(clipLabel);
      clipContainer.appendChild(deleteButton);
      soundClips.appendChild(clipContainer);

      audio.controls = true;
      let samples = flatten(leftchannel);
      let buf = new Float32Array(samples.length);
      for (var i = 0; i < samples.length; ++i) {
        let s = samples[i];
        buf[i] = s / 32767.0;
      }
      const blob = toWav(samples);

      leftchannel = [];
      const audioURL = window.URL.createObjectURL(blob);
      audio.src = audioURL;
      console.log('recorder stopped');

      deleteButton.onclick = function(e) {
        let evtTgt = e.target;
        evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
      };

      clipLabel.onclick = function() {
        const existingName = clipLabel.textContent;
        const newClipName = prompt('Enter a new name for your sound clip?');
        if (newClipName === null) {
          clipLabel.textContent = existingName;
        } else {
          clipLabel.textContent = newClipName;
        }
      };

      buf = buf.buffer

      let n = 1024 * 4;  // send this number of bytes per request.
      console.log('buf length, ' + buf.byteLength);
      send_header(buf.byteLength);

      for (let start = 0; start < buf.byteLength; start += n) {
        socket.send(buf.slice(start, start + n));
      }
    };
  };

  let onError = function(err) {
    console.log('The following error occurred: ' + err);
  };

  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
  console.log('getUserMedia not supported on your browser!');
  alert('getUserMedia not supported on your browser!');
}

function visualize(stream) {
  if (!audioCtx) {
    audioCtx = new AudioContext();
  }

  const source = audioCtx.createMediaStreamSource(stream);

  if (!analyser) {
    analyser = audioCtx.createAnalyser();
    analyser.fftSize = 2048;
  }
  const bufferLength = analyser.frequencyBinCount;
  const dataArray = new Uint8Array(bufferLength);

  // source.connect(analyser);
  // analyser.connect(audioCtx.destination);

  draw()

  function draw() {
    const WIDTH = canvas.width
    const HEIGHT = canvas.height;

    requestAnimationFrame(draw);

    analyser.getByteTimeDomainData(dataArray);

    canvasCtx.fillStyle = 'rgb(200, 200, 200)';
    canvasCtx.fillRect(0, 0, WIDTH, HEIGHT);

    canvasCtx.lineWidth = 2;
    canvasCtx.strokeStyle = 'rgb(0, 0, 0)';

    canvasCtx.beginPath();

    let sliceWidth = WIDTH * 1.0 / bufferLength;
    let x = 0;

    for (let i = 0; i < bufferLength; i++) {
      let v = dataArray[i] / 128.0;
      let y = v * HEIGHT / 2;

      if (i === 0) {
        canvasCtx.moveTo(x, y);
      } else {
        canvasCtx.lineTo(x, y);
      }

      x += sliceWidth;
    }

    canvasCtx.lineTo(canvas.width, canvas.height / 2);
    canvasCtx.stroke();
  }
}

window.onresize = function() {
  canvas.width = mainSection.offsetWidth;
};

window.onresize();

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function flatten(listOfSamples) {
  let n = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    n += listOfSamples[i].length;
  }
  let ans = new Int16Array(n);

  let offset = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    ans.set(listOfSamples[i], offset);
    offset += listOfSamples[i].length;
  }
  return ans;
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples) {
  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, expectedSampleRate, true);  // sampleRate
  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
  view.setUint16(32, 2, true);                       // blockAlign
  view.setUint16(34, 16, true);                      // bitsPerSample
  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);      // subchunk2Size

  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }

  return new Blob([view], {type: 'audio/wav'});
}

// this function is copied from
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
function downsampleBuffer(buffer, exportSampleRate) {
  if (exportSampleRate === recordSampleRate) {
    return buffer;
  }
  var sampleRateRatio = recordSampleRate / exportSampleRate;
  var newLength = Math.round(buffer.length / sampleRateRatio);
  var result = new Float32Array(newLength);
  var offsetResult = 0;
  var offsetBuffer = 0;
  while (offsetResult < result.length) {
    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
    var accum = 0, count = 0;
    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
      accum += buffer[i];
      count++;
    }
    result[offsetResult] = accum / count;
    offsetResult++;
    offsetBuffer = nextOffsetBuffer;
  }
  return result;
};


================================================
FILE: python-api-examples/web/js/streaming_record.js
================================================
// This file copies and modifies code
// from https://mdn.github.io/web-dictaphone/scripts/app.js
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e

var socket;
var recognition_text = [];

function getDisplayResult() {
  let i = 0;
  let ans = '';
  for (let s in recognition_text) {
    if (recognition_text[s] == '') continue;

    ans += '' + i + ': ' + recognition_text[s] + '\n';
    i += 1;
  }
  return ans;
}

function initWebSocket() {
  console.log('Creating websocket')
  let protocol = 'ws://';
  if (window.location.protocol == 'https:') {
    protocol = 'wss://'
  }
  let server_ip = serverIpInput.value;
  let server_port = serverPortInput.value;
  console.log('protocol: ', protocol);
  console.log('server_ip: ', server_ip);
  console.log('server_port: ', server_port);

  let uri = protocol + server_ip + ':' + server_port;
  console.log('uri', uri);
  socket = new WebSocket(uri);
  // socket = new WebSocket('wss://localhost:6006/');

  // Connection opened
  socket.addEventListener('open', function(event) {
    console.log('connected');
    recordBtn.disabled = false;
    connectBtn.disabled = true;
    connectBtn.innerHTML = 'Connected!';
  });

  // Connection closed
  socket.addEventListener('close', function(event) {
    console.log('disconnected');
    recordBtn.disabled = true;
    connectBtn.disabled = false;
    connectBtn.innerHTML = 'Click me to connect!';
  });

  // Listen for messages
  socket.addEventListener('message', function(event) {
    let message = JSON.parse(event.data);
    if (message.segment in recognition_text) {
      recognition_text[message.segment] = message.text;
    } else {
      recognition_text.push(message.text);
    }
    let text_area = document.getElementById('results');
    text_area.value = getDisplayResult();
    text_area.scrollTop = text_area.scrollHeight;  // auto scroll
    console.log('Received message: ', event.data);
  });
}

window.onload = (event) => {
  console.log('page is fully loaded');
  console.log('protocol', window.location.protocol);
  console.log('port', window.location.port);
  if (window.location.protocol == 'https:') {
    document.getElementById('ws-protocol').textContent = 'wss://';
  }
  serverIpInput.value = window.location.hostname;
  serverPortInput.value = window.location.port;
};

const serverIpInput = document.getElementById('server-ip');
const serverPortInput = document.getElementById('server-port');

const connectBtn = document.getElementById('connect');
const recordBtn = document.getElementById('streaming_record');
const stopBtn = document.getElementById('streaming_stop');
const clearBtn = document.getElementById('clear');
const soundClips = document.getElementById('sound-clips');
const canvas = document.getElementById('canvas');
const mainSection = document.querySelector('.container');

stopBtn.disabled = true;
recordBtn.disabled = true;

let audioCtx;
const canvasCtx = canvas.getContext('2d');
let mediaStream;
let analyser;

let expectedSampleRate = 16000;
let recordSampleRate;  // the sampleRate of the microphone
let recorder = null;   // the microphone
let leftchannel = [];  // TODO: Use a single channel

let recordingLength = 0;  // number of samples so far

clearBtn.onclick = function() {
  document.getElementById('results').value = '';
  recognition_text = [];
};

connectBtn.onclick = function() {
  initWebSocket();
};

// copied/modified from https://mdn.github.io/web-dictaphone/
// and
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
if (navigator.mediaDevices.getUserMedia) {
  console.log('getUserMedia supported.');

  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
  const constraints = {audio: true};

  let onSuccess = function(stream) {
    if (!audioCtx) {
      audioCtx = new AudioContext();
    }
    console.log(audioCtx);
    recordSampleRate = audioCtx.sampleRate;
    console.log('sample rate ' + recordSampleRate);

    // creates an audio node from the microphone incoming stream
    mediaStream = audioCtx.createMediaStreamSource(stream);
    console.log(mediaStream);

    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
    // bufferSize: the onaudioprocess event is called when the buffer is full
    var bufferSize = 2048;
    var numberOfInputChannels = 2;
    var numberOfOutputChannels = 2;
    if (audioCtx.createScriptProcessor) {
      recorder = audioCtx.createScriptProcessor(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    } else {
      recorder = audioCtx.createJavaScriptNode(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    }
    console.log(recorder);

    recorder.onaudioprocess = function(e) {
      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
      samples = downsampleBuffer(samples, expectedSampleRate);

      let buf = new Int16Array(samples.length);
      for (var i = 0; i < samples.length; ++i) {
        let s = samples[i];
        if (s >= 1)
          s = 1;
        else if (s <= -1)
          s = -1;

        samples[i] = s;
        buf[i] = s * 32767;
      }

      socket.send(samples);

      leftchannel.push(buf);
      recordingLength += bufferSize;
    };

    visualize(stream);
    mediaStream.connect(analyser);

    recordBtn.onclick = function() {
      mediaStream.connect(recorder);
      mediaStream.connect(analyser);
      recorder.connect(audioCtx.destination);

      console.log('recorder started');
      recordBtn.style.background = 'red';

      stopBtn.disabled = false;
      recordBtn.disabled = true;
    };

    stopBtn.onclick = function() {
      console.log('recorder stopped');

      socket.send('Done');
      console.log('Sent Done');

      socket.close();

      // stopBtn recording
      recorder.disconnect(audioCtx.destination);
      mediaStream.disconnect(recorder);
      mediaStream.disconnect(analyser);

      recordBtn.style.background = '';
      recordBtn.style.color = '';
      // mediaRecorder.requestData();

      stopBtn.disabled = true;
      recordBtn.disabled = false;

      const clipName =
          prompt('Enter a name for your sound clip?', 'My unnamed clip');

      const clipContainer = document.createElement('article');
      const clipLabel = document.createElement('p');
      const audio = document.createElement('audio');
      const deleteButton = document.createElement('button');
      clipContainer.classList.add('clip');
      audio.setAttribute('controls', '');
      deleteButton.textContent = 'Delete';
      deleteButton.className = 'delete';

      if (clipName === null) {
        clipLabel.textContent = 'My unnamed clip';
      } else {
        clipLabel.textContent = clipName;
      }

      clipContainer.appendChild(audio);

      clipContainer.appendChild(clipLabel);
      clipContainer.appendChild(deleteButton);
      soundClips.appendChild(clipContainer);

      audio.controls = true;
      let samples = flatten(leftchannel);
      const blob = toWav(samples);

      leftchannel = [];
      const audioURL = window.URL.createObjectURL(blob);
      audio.src = audioURL;
      console.log('recorder stopped');

      deleteButton.onclick = function(e) {
        let evtTgt = e.target;
        evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
      };

      clipLabel.onclick = function() {
        const existingName = clipLabel.textContent;
        const newClipName = prompt('Enter a new name for your sound clip?');
        if (newClipName === null) {
          clipLabel.textContent = existingName;
        } else {
          clipLabel.textContent = newClipName;
        }
      };
    };
  };

  let onError = function(err) {
    console.log('The following error occurred: ' + err);
  };

  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
  console.log('getUserMedia not supported on your browser!');
  alert('getUserMedia not supported on your browser!');
}

function visualize(stream) {
  if (!audioCtx) {
    audioCtx = new AudioContext();
  }

  const source = audioCtx.createMediaStreamSource(stream);

  if (!analyser) {
    analyser = audioCtx.createAnalyser();
    analyser.fftSize = 2048;
  }
  const bufferLength = analyser.frequencyBinCount;
  const dataArray = new Uint8Array(bufferLength);

  // source.connect(analyser);
  // analyser.connect(audioCtx.destination);

  draw()

  function draw() {
    const WIDTH = canvas.width
    const HEIGHT = canvas.height;

    requestAnimationFrame(draw);

    analyser.getByteTimeDomainData(dataArray);

    canvasCtx.fillStyle = 'rgb(200, 200, 200)';
    canvasCtx.fillRect(0, 0, WIDTH, HEIGHT);

    canvasCtx.lineWidth = 2;
    canvasCtx.strokeStyle = 'rgb(0, 0, 0)';

    canvasCtx.beginPath();

    let sliceWidth = WIDTH * 1.0 / bufferLength;
    let x = 0;

    for (let i = 0; i < bufferLength; i++) {
      let v = dataArray[i] / 128.0;
      let y = v * HEIGHT / 2;

      if (i === 0) {
        canvasCtx.moveTo(x, y);
      } else {
        canvasCtx.lineTo(x, y);
      }

      x += sliceWidth;
    }

    canvasCtx.lineTo(canvas.width, canvas.height / 2);
    canvasCtx.stroke();
  }
}

window.onresize = function() {
  canvas.width = mainSection.offsetWidth;
};

window.onresize();

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function flatten(listOfSamples) {
  let n = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    n += listOfSamples[i].length;
  }
  let ans = new Int16Array(n);

  let offset = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    ans.set(listOfSamples[i], offset);
    offset += listOfSamples[i].length;
  }
  return ans;
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples) {
  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, expectedSampleRate, true);  // sampleRate
  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
  view.setUint16(32, 2, true);                       // blockAlign
  view.setUint16(34, 16, true);                      // bitsPerSample
  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);      // subchunk2Size

  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }

  return new Blob([view], {type: 'audio/wav'});
}

// this function is copied from
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
function downsampleBuffer(buffer, exportSampleRate) {
  if (exportSampleRate === recordSampleRate) {
    return buffer;
  }
  var sampleRateRatio = recordSampleRate / exportSampleRate;
  var newLength = Math.round(buffer.length / sampleRateRatio);
  var result = new Float32Array(newLength);
  var offsetResult = 0;
  var offsetBuffer = 0;
  while (offsetResult < result.length) {
    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
    var accum = 0, count = 0;
    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
      accum += buffer[i];
      count++;
    }
    result[offsetResult] = accum / count;
    offsetResult++;
    offsetBuffer = nextOffsetBuffer;
  }
  return result;
};


================================================
FILE: python-api-examples/web/js/upload.js
================================================
/**
References
https://developer.mozilla.org/en-US/docs/Web/API/FileList
https://developer.mozilla.org/en-US/docs/Web/API/FileReader
https://javascript.info/arraybuffer-binary-arrays
https://developer.mozilla.org/zh-CN/docs/Web/API/WebSocket
https://developer.mozilla.org/en-US/docs/Web/API/WebSocket/send
*/

var socket;

const serverIpInput = document.getElementById('server-ip');
const serverPortInput = document.getElementById('server-port');

const connectBtn = document.getElementById('connect');
const uploadBtn = document.getElementById('file');

function initWebSocket() {
  let protocol = 'ws://';
  if (window.location.protocol == 'https:') {
    protocol = 'wss://'
  }
  let server_ip = serverIpInput.value;
  let server_port = serverPortInput.value;
  console.log('protocol: ', protocol);
  console.log('server_ip: ', server_ip);
  console.log('server_port: ', server_port);


  let uri = protocol + server_ip + ':' + server_port;
  console.log('uri', uri);
  socket = new WebSocket(uri);

  // Connection opened
  socket.addEventListener('open', function(event) {
    console.log('connected');
    uploadBtn.disabled = false;
    connectBtn.disabled = true;
    connectBtn.innerHTML = 'Connected!';
  });

  // Connection closed
  socket.addEventListener('close', function(event) {
    console.log('disconnected');
    uploadBtn.disabled = true;
    connectBtn.disabled = false;
    connectBtn.innerHTML = 'Click me to connect!';
  });

  // Listen for messages
  socket.addEventListener('message', function(event) {
    console.log('Received message: ', event.data);

    document.getElementById('results').value = event.data;
    socket.send('Done');
    console.log('Sent Done');
    socket.close();
  });
}

window.onload = (event) => {
  console.log('page is fully loaded');
  console.log('protocol', window.location.protocol);
  console.log('port', window.location.port);
  if (window.location.protocol == 'https:') {
    document.getElementById('ws-protocol').textContent = 'wss://';
  }
  serverIpInput.value = window.location.hostname;
  serverPortInput.value = window.location.port;
};

connectBtn.onclick = function() {
  initWebSocket();
};

function send_header(n) {
  const header = new ArrayBuffer(8);
  // assume the uploaded wave is 16000 Hz
  new DataView(header).setInt32(0, 16000, true /* littleEndian */);
  new DataView(header).setInt32(4, n, true /* littleEndian */);
  socket.send(new Int32Array(header, 0, 2));
}

function onFileChange() {
  var files = document.getElementById('file').files;

  if (files.length == 0) {
    console.log('No file selected');
    return;
  }

  console.log('files: ' + files);

  const file = files[0];
  console.log(file);
  console.log('file.name ' + file.name);
  console.log('file.type ' + file.type);
  console.log('file.size ' + file.size);

  let audioCtx = new AudioContext({sampleRate: 16000});

  let reader = new FileReader();
  reader.onload = function() {
    console.log('reading file!');
    audioCtx.decodeAudioData(reader.result, decodedDone);
  };

  function decodedDone(decoded) {
    let typedArray = new Float32Array(decoded.length);
    let float32_samples = decoded.getChannelData(0);
    let buf = float32_samples.buffer

    // Send 1024 audio samples per request.
    //
    // It has two purposes:
    //  (1) Simulate streaming
    //  (2) There is a limit on the number of bytes in the payload that can be
    //      sent by websocket, which is 1MB, I think. We can send a large
    //      audio file for decoding in this approach.
    let n = 1024 * 4;  // send this number of bytes per request.
    console.log('buf length, ' + buf.byteLength);
    send_header(buf.byteLength);
    for (let start = 0; start < buf.byteLength; start += n) {
      socket.send(buf.slice(start, start + n));
    }
  }

  reader.readAsArrayBuffer(file);
}

const clearBtn = document.getElementById('clear');
clearBtn.onclick = function() {
  console.log('clicked');
  document.getElementById('results').value = '';
};


================================================
FILE: python-api-examples/web/nav-partial.html
================================================
  <nav class="navbar navbar-expand-lg navbar-light bg-light">
    <a class="navbar-brand" href="index.html">Next-gen Kaldi demo</a>
      <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
        <span class="navbar-toggler-icon"></span>
      </button>
    <div class="collapse navbar-collapse" id="navbarSupportedContent">
      <ul class="navbar-nav mr-auto">
        <li class="nav-item active">
          <a class="nav-link" href="index.html">Home <span class="sr-only">(current)</span></a>
        </li>

        <li class="nav-item">
          <a class="nav-link" href="upload.html">Upload</a>
        </li>

        <li class="nav-item">
          <a class="nav-link" href="streaming_record.html">Streaming-Record</a>
        </li>

        <li class="nav-item">
          <a class="nav-link" href="offline_record.html">Offline-Record</a>
        </li>

      </ul>
    </div>
  </nav>


================================================
FILE: python-api-examples/web/offline_record.html
================================================
<!doctype html>
<html lang="en">
<head>
  <!-- Required meta tags -->
  <meta charset="utf-8"></meta>
  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>

  <!-- Bootstrap CSS -->
  <link rel="stylesheet"
        href="./css/bootstrap.min.css"
        integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
        crossorigin="anonymous">
  </link>

  <script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>

  <title>Next-gen Kaldi demo (Upload file for recognition)</title>
</head>


<body>
  <div id="nav"></div>
  <script>
    $(function(){
      $("#nav").load("nav-partial.html");
    });
  </script>

  <h3>Recognition from offline recordings</h3>
  <div class="container">
    <div class="input-group mb-1">
      <div class="input-group-prepend">
        <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
      </div>
      <span class="input-group-text" id="ws-protocol">ws://</span>
      <input type="text" id="server-ip" class="form-control" placeholder="Sherpa-onnx server IP, e.g., localhost" aria-label="sherpa-onnx server IP">
      <span class="input-group-text">:</span>
      <input type="text" id="server-port" class="form-control" placeholder="Sherpa-onnx server port, e.g., 6006" aria-label="sherpa-onnx server port">
    </div>

    <div class="row">
       <div class="col-12">
        <canvas id="canvas" height="60px" display="block" margin-bottom="0.5rem"></canvas>
      </div>
    </div>
    <div class="row">
       <div class="col">
        <button class="btn btn-primary btn-block" id="offline_record">Offline-Record</button>
       </div>
       <div class="col">
        <button class="btn btn-primary btn-block" id="offline_stop">Offline-Stop</button>
       </div>
    </div>
  </div>

  <div class="mb-3">
    <label for="results" class="form-label">Recognition results</label>
    <textarea class="form-control" id="results" rows="8"></textarea>
  </div>

  <button class="btn btn-primary btn-block" id="clear">Clear results</button>

  <section flex="1" overflow="auto" id="sound-clips">
  </section>


  <!-- Optional JavaScript -->
  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
  <script src="./js/popper.min.js"
          integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
          crossorigin="anonymous">
  </script>

  <script src="./js/bootstrap.min.js"
          integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
          crossorigin="anonymous">
  </script>

  <script src="./js/offline_record.js"> </script>
</body>
</html>


================================================
FILE: python-api-examples/web/start-https-server.py
================================================
#!/usr/bin/env python3

# Code in this file is modified from
# https://stackoverflow.com/questions/19705785/python-3-simple-https-server

import argparse
import http.server
import ssl
import sys
from pathlib import Path

"""
Usage:

  ./start-https-server.py \
    --server-address 0.0.0.0 \
    --server-port 6007 \
    --cert ./cert.pem
"""


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--server-address",
        type=str,
        default="0.0.0.0",
        help="""IP address which this server will bind to""",
    )

    parser.add_argument(
        "--server-port",
        type=int,
        default=6007,
        help="""Port number on which this server will listen""",
    )

    parser.add_argument(
        "--certificate",
        type=str,
        default="cert.pem",
        help="""Path to the X.509 certificate. You can use
        ./generate-certificate.py to generate it""",
    )

    return parser.parse_args()


def main():
    args = get_args()
    print(f"{vars(args)}")
    server_address = (args.server_address, args.server_port)
    httpd = http.server.HTTPServer(
        server_address, http.server.SimpleHTTPRequestHandler
    )

    if not Path(args.certificate).is_file():
        print("Please run ./generate-certificate.py to generate a certificate")
        sys.exit(-1)

    httpd.socket = ssl.wrap_socket(
        httpd.socket,
        server_side=True,
        certfile=args.certificate,
        ssl_version=ssl.PROTOCOL_TLS,
    )
    print(
        "The server is listening at the following address:\n"
        f"https://{args.server_address}:{args.server_port}"
    )
    httpd.serve_forever()


if __name__ == "__main__":
    main()


================================================
FILE: python-api-examples/web/streaming_record.html
================================================
<!doctype html>
<html lang="en">
<head>
  <!-- Required meta tags -->
  <meta charset="utf-8"></meta>
  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>

  <!-- Bootstrap CSS -->
  <link rel="stylesheet"
        href="./css/bootstrap.min.css"
        integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
        crossorigin="anonymous">
  </link>

  <script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>

  <title>Next-gen Kaldi demo (Upload file for recognition)</title>
</head>


<body>
  <div id="nav"></div>
  <script>
    $(function(){
      $("#nav").load("nav-partial.html");
    });
  </script>

  <h3>Recognition from real-time recordings</h3>
  <div class="container">
    <div class="input-group mb-1">
      <div class="input-group-prepend">
        <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
      </div>
      <span class="input-group-text" id="ws-protocol">ws://</span>
      <input type="text" id="server-ip" class="form-control" placeholder="Sherpa-onnx server IP, e.g., localhost" aria-label="sherpa-onnx server IP">
      <span class="input-group-text">:</span>
      <input type="text" id="server-port" class="form-control" placeholder="Sherpa-onnx server port, e.g., 6006" aria-label="sherpa-onnx server port">
    </div>

    <div class="row">
       <div class="col-12">
        <canvas id="canvas" height="60px" display="block" margin-bottom="0.5rem"></canvas>
      </div>
    </div>
    <div class="row">
       <div class="col">
        <button class="btn btn-primary btn-block" id="streaming_record">Streaming-Record</button>
       </div>
       <div class="col">
        <button class="btn btn-primary btn-block" id="streaming_stop">Streaming-Stop</button>
       </div>
    </div>
  </div>

  <div class="mb-3">
    <label for="results" class="form-label">Recognition results</label>
    <textarea class="form-control" id="results" rows="8"></textarea>
  </div>

  <button class="btn btn-primary btn-block" id="clear">Clear results</button>

  <section flex="1" overflow="auto" id="sound-clips">
  </section>


  <!-- Optional JavaScript -->
  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
  <script src="./js/popper.min.js"
          integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
          crossorigin="anonymous">
  </script>

  <script src="./js/bootstrap.min.js"
          integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
          crossorigin="anonymous">
  </script>

  <script src="./js/streaming_record.js"> </script>
</body>
</html>


================================================
FILE: python-api-examples/web/upload.html
================================================
<!doctype html>
<html lang="en">
<head>
  <!-- Required meta tags -->
  <meta charset="utf-8"></meta>
  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>

  <!-- Bootstrap CSS -->
  <link rel="stylesheet"
        href="./css/bootstrap.min.css"
        integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
        crossorigin="anonymous">
  </link>

  <script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>

  <title>Next-gen Kaldi demo (Upload file for recognition)</title>
</head>


<body>
  <div id="nav"></div>
  <script>
    $(function(){
      $("#nav").load("nav-partial.html");
    });
  </script>

  <h3>Recognition from a selected file</h3>
  <div class="input-group mb-1">
    <div class="input-group-prepend">
      <button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
    </div>
    <span class="input-group-text" id="ws-protocol">ws://</span>
    <input type="text" id="server-ip" class="form-control" placeholder="Sherpa-onnx server IP, e.g., localhost" aria-label="sherpa-onnx server IP">
    <span class="input-group-text">:</span>
    <input type="text" id="server-port" class="form-control" placeholder="Sherpa-onnx server port, e.g., 6006" aria-label="sherpa-onnx server port">
  </div>

  <form>
    <div class="mb-3">
      <label for="file" class="form-label">Select file</label>
      <input class="form-control" type="file" id="file" accept=".wav" onchange="onFileChange()" disabled="true"></input>
    </div>

    <div class="mb-3">
      <label for="results" class="form-label">Recognition results</label>
      <textarea class="form-control" id="results" rows="8"></textarea>
    </div>

    <button class="btn btn-primary btn-block" id="clear">Clear results</button>
  </form>

  <!-- Optional JavaScript -->
  <!-- jQuery first, then Popper.js, then Bootstrap JS -->
  <script src="./js/popper.min.js"
          integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
          crossorigin="anonymous">
  </script>

  <script src="./js/bootstrap.min.js"
          integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
          crossorigin="anonymous">
  </script>

  <script src="./js/upload.js"> </script>
</body>
</html>


================================================
FILE: python-api-examples/zipvoice-tts-play.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2026  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python API
for Chinese/English zero-shot TTS with ZipVoice.

Different from ./zipvoice-tts.py, this file plays back the generated audio
while the model is still generating.

Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

python3 ./python-api-examples/zipvoice-tts-play.py

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
for details.
"""

import logging
import queue
import sys
import threading
import time
from pathlib import Path

import librosa
import numpy as np
import sherpa_onnx
import soundfile as sf

try:
    import sounddevice as sd
except ImportError:
    print("Please install sounddevice first. You can use")
    print()
    print("  pip install sounddevice")
    print()
    print("to install it")
    sys.exit(-1)


def create_tts():
    tts_config = sherpa_onnx.OfflineTtsConfig(
        model=sherpa_onnx.OfflineTtsModelConfig(
            zipvoice=sherpa_onnx.OfflineTtsZipvoiceModelConfig(
                tokens="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt",
                encoder="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx",
                decoder="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx",
                data_dir="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data",
                lexicon="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt",
                vocoder="./vocos_24khz.onnx",
            ),
            debug=False,
            num_threads=2,
            provider="cpu",
        )
    )
    if not tts_config.validate():
        raise ValueError(
            "Please read the previous error messages and re-check your config"
        )

    return sherpa_onnx.OfflineTts(tts_config)


buffer = queue.Queue()
started = False
stopped = False
killed = False
sample_rate = None
event = threading.Event()
first_message_time = None


def generated_audio_callback(samples: np.ndarray, progress: float):
    global first_message_time
    if first_message_time is None:
        first_message_time = time.time()

    buffer.put(samples)

    global started
    if started is False:
        logging.info("Start playing ...")
    started = True

    if killed:
        return 0

    return 1


def play_audio_callback(
    outdata: np.ndarray, frames: int, time, status: sd.CallbackFlags
):
    if killed or (started and buffer.empty() and stopped):
        event.set()

    if buffer.empty():
        outdata.fill(0)
        return

    n = 0
    while n < frames and not buffer.empty():
        remaining = frames - n
        k = buffer.queue[0].shape[0]

        if remaining <= k:
            outdata[n:, 0] = buffer.queue[0][:remaining]
            buffer.queue[0] = buffer.queue[0][remaining:]
            n = frames
            if buffer.queue[0].shape[0] == 0:
                buffer.get()

            break

        outdata[n : n + k, 0] = buffer.get()
        n += k

    if n < frames:
        outdata[n:, 0] = 0


def play_audio():
    if False:
        devices = sd.query_devices()
        print(devices)

        default_output_device_idx = sd.default.device[1]
        print(
            f'Use default output device: {devices[default_output_device_idx]["name"]}'
        )

    with sd.OutputStream(
        channels=1,
        callback=play_audio_callback,
        dtype="float32",
        samplerate=sample_rate,
        blocksize=1024,
    ):
        event.wait()

    logging.info("Exiting ...")


def main():
    reference_audio_file = (
        "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav"
    )
    if not Path(reference_audio_file).is_file():
        raise ValueError(f"Reference audio {reference_audio_file} does not exist")

    logging.info("Loading model ...")
    tts = create_tts()
    logging.info("Loading model done.")

    reference_audio, reference_sample_rate = librosa.load(reference_audio_file, sr=None)
    reference_text = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系."
    text = """
    小米的价值观是真诚, 热爱.
    真诚，就是不欺人也不自欺.
    热爱, 就是全心投入并享受其中.
    """

    global sample_rate
    sample_rate = tts.sample_rate

    gen_config = sherpa_onnx.GenerationConfig()
    gen_config.reference_audio = reference_audio
    gen_config.reference_sample_rate = reference_sample_rate
    gen_config.reference_text = reference_text
    gen_config.num_steps = 4
    gen_config.extra["min_char_in_sentence"] = "30"

    play_back_thread = threading.Thread(target=play_audio)
    play_back_thread.start()

    logging.info("Start generating ...")
    start_time = time.time()
    audio = tts.generate(
        text,
        gen_config,
        callback=generated_audio_callback,
    )
    end_time = time.time()
    logging.info("Finished generating!")

    global stopped
    stopped = True

    if len(audio.samples) == 0:
        print("Error in generating audios. Please read previous error messages.")
        global killed
        killed = True
        play_back_thread.join()
        return

    elapsed_seconds = end_time - start_time
    audio_duration = len(audio.samples) / audio.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    output_filename = "./generated-zipvoice-zh-en-play.wav"
    sf.write(
        output_filename,
        audio.samples,
        samplerate=audio.sample_rate,
        subtype="PCM_16",
    )
    logging.info(f"The text is '{text}'")
    logging.info(
        "Time in seconds to receive the first "
        f"message: {first_message_time-start_time:.3f}"
    )
    logging.info(f"Elapsed seconds: {elapsed_seconds:.3f}")
    logging.info(f"Audio duration in seconds: {audio_duration:.3f}")
    logging.info(
        f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
    )

    logging.info(f"***  Saved to {output_filename} ***")

    print("\n   >>>>>>>>> You can safely press ctrl + C to stop the play <<<<<<<<<<\n")

    play_back_thread.join()


if __name__ == "__main__":
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

    logging.basicConfig(format=formatter, level=logging.INFO)
    try:
        main()
    except KeyboardInterrupt:
        print("\nCaught Ctrl + C. Exiting")
        killed = True
        sys.exit(0)


================================================
FILE: python-api-examples/zipvoice-tts.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2026  Xiaomi Corporation

"""
This file demonstrates how to use sherpa-onnx Python API
for Chinese/English zero-shot TTS with ZipVoice.


Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

python3 ./python-api-examples/zipvoice-tts.py

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
for details.

"""

import time
from pathlib import Path

import librosa
import sherpa_onnx
import soundfile as sf


def create_tts():
    tts_config = sherpa_onnx.OfflineTtsConfig(
        model=sherpa_onnx.OfflineTtsModelConfig(
            zipvoice=sherpa_onnx.OfflineTtsZipvoiceModelConfig(
                tokens="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt",
                encoder="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx",
                decoder="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx",
                data_dir="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data",
                lexicon="./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt",
                vocoder="./vocos_24khz.onnx",
            ),
            debug=False,
            num_threads=2,
            provider="cpu",
        )
    )
    if not tts_config.validate():
        raise ValueError(
            "Please read the previous error messages and re-check your config"
        )

    return sherpa_onnx.OfflineTts(tts_config)


def main():
    reference_audio_file = (
        "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav"
    )
    if not Path(reference_audio_file).is_file():
        raise ValueError(f"Reference audio {reference_audio_file} does not exist")

    tts = create_tts()

    reference_audio, sample_rate = librosa.load(reference_audio_file, sr=None)
    reference_text = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系."
    text = "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

    gen_config = sherpa_onnx.GenerationConfig()
    gen_config.reference_audio = reference_audio
    gen_config.reference_sample_rate = sample_rate
    gen_config.reference_text = reference_text
    gen_config.num_steps = 4
    gen_config.extra["min_char_in_sentence"] = "30"

    start = time.time()
    audio = tts.generate(text, gen_config)
    end = time.time()

    if len(audio.samples) == 0:
        print("Error in generating audios. Please read previous error messages.")
        return

    elapsed_seconds = end - start
    audio_duration = len(audio.samples) / audio.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    output_filename = "./generated-zipvoice-zh-en-python.wav"
    sf.write(
        output_filename,
        audio.samples,
        samplerate=audio.sample_rate,
        subtype="PCM_16",
    )
    print(f"Saved to {output_filename}")
    print(f"The text is '{text}'")
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: release.sh
================================================
#!/usr/bin/env bash
#
# Copyright (c)  2023  Xiaomi Corporation
#
# Please see the end of this file for what files it will generate

set -ex
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION: ${SHERPA_ONNX_VERSION}"
dst=v${SHERPA_ONNX_VERSION}

if [ -d $dst ]; then
  echo "$dst exists - skipping"
  exit 0
fi

./build-android-arm64-v8a.sh
./build-android-armv7-eabi.sh
./build-android-x86-64.sh
./build-android-x86.sh
./build-ios.sh

mkdir -p $dst/jniLibs/arm64-v8a
cp -v ./build-android-arm64-v8a/install/lib/*.so $dst/jniLibs/arm64-v8a/

mkdir -p $dst/jniLibs/armeabi-v7a
cp -v ./build-android-armv7-eabi/install/lib/*.so $dst/jniLibs/armeabi-v7a/

mkdir -p $dst/jniLibs/x86_64
cp -v ./build-android-x86-64/install/lib/*.so $dst/jniLibs/x86_64

mkdir -p $dst/jniLibs/x86
cp -v ./build-android-x86/install/lib/*.so $dst/jniLibs/x86

mkdir -p $dst/build-ios/
cp -av ./build-ios/sherpa-onnx.xcframework $dst/build-ios/

mkdir -p $dst/build-ios/ios-onnxruntime
cp -av ./build-ios/ios-onnxruntime/onnxruntime.xcframework $dst/build-ios/ios-onnxruntime/

cd $dst

tar cjvf sherpa-onnx-v${SHERPA_ONNX_VERSION}-android.tar.bz2 ./jniLibs

tar cjvf sherpa-onnx-v${SHERPA_ONNX_VERSION}-ios.tar.bz2 ./build-ios


================================================
FILE: rust-api-examples/.gitignore
================================================
target
!run-*.sh


================================================
FILE: rust-api-examples/Cargo.toml
================================================
[package]
name = "rust-api-examples"
version = "1.12.31"
edition = "2021"

[dependencies]
anyhow = "1.0"
clap = { version = "4.5", features = ["derive"] }
sherpa-onnx = "1.12.31"
# sherpa-onnx = { path = "../sherpa-onnx/rust/sherpa-onnx" }
serde_json = "1.0"

cpal = { version = "0.16", optional = true } # cross-platform audio I/O

[features]
# Default features are empty to avoid building cpal by default
default = []

# Feature for using microphone
mic = ["cpal"]

[[example]]
name = "streaming_zipformer_microphone"
required-features = ["mic"]


================================================
FILE: rust-api-examples/README.md
================================================
# Introduction

This folder uses Rust API maintained by us.

## Setup library path

### Method 1 (Build from source, support only shared libs right now)

```bash
export SHERPA_ONNX_LIB_DIR=/Users/fangjun/open-source/sherpa-onnx/build/install/lib
export RUSTFLAGS="-C link-arg=-Wl,-rpath,$SHERPA_ONNX_LIB_DIR"
```

### Method 2 (Download pre-built libs)

```bash
# You can choose any directory you like
cd $HOME/Downloads

# We use version v1.12.31 below as an example.
# Please always use the latest version from
# https://github.com/k2-fsa/sherpa-onnx/releases

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.12.31/sherpa-onnx-v1.12.31-osx-universal2-shared.tar.bz2
tar xvf sherpa-onnx-v1.12.31-osx-universal2-shared.tar.bz2
rm sherpa-onnx-v1.12.31-osx-universal2-shared.tar.bz2

export SHERPA_ONNX_LIB_DIR=$HOME/Downloads/sherpa-onnx-v1.12.31-osx-universal2-shared/lib
export RUSTFLAGS="-C link-arg=-Wl,-rpath,$SHERPA_ONNX_LIB_DIR"
```

## Examples

| # | Example | Description |
|---|---------|-------------|
| 1 | [version](#example-1-show-sherpa-onnx-version) | Show the sherpa-onnx version |
| 2 | [pocket_tts](#example-2-tts-with-pocket-tts-zero-shot-voice-cloning) | Text-to-speech with zero-shot voice cloning using a reference audio |
| 3 | [supertonic_tts](#example-3-tts-with-supertonic-tts) | Text-to-speech with Supertonic TTS (multi-speaker, multi-language) |
| 4 | [zipvoice_tts](#example-4-tts-with-zipvoice-zero-shot-voice-cloning) | Text-to-speech with ZipVoice zero-shot voice cloning |
| 5 | [vits_tts](#example-5-tts-with-vits-english-piper) | Text-to-speech with a standalone VITS Piper model (English) |
| 6 | [vits_tts](#example-6-tts-with-vits-german-piper) | Text-to-speech with a standalone VITS Piper model (German) |
| 7 | [matcha_tts_en](#example-7-tts-with-matcha-english) | Text-to-speech with Matcha TTS (English) |
| 8 | [matcha_tts_zh](#example-8-tts-with-matcha-chinese) | Text-to-speech with Matcha TTS (Chinese) |
| 9 | [kokoro_tts_en](#example-9-tts-with-kokoro-english) | Text-to-speech with Kokoro TTS (English) |
| 10 | [kokoro_tts_zh_en](#example-10-tts-with-kokoro-chinese--english) | Text-to-speech with Kokoro TTS (Chinese + English) |
| 11 | [kitten_tts_en](#example-11-tts-with-kitten-english) | Text-to-speech with Kitten TTS (English) |
| 12 | [streaming_zipformer_en](#example-12-asr-with-streaming-zipformer-english) | Streaming ASR with zipformer transducer (English) |
| 13 | [streaming_zipformer_zh_en](#example-13-asr-with-streaming-zipformer-chinese--english) | Streaming ASR with zipformer transducer (Chinese + English) |
| 14 | [streaming_zipformer_microphone](#example-14-asr-with-streaming-zipformer-with-a-microphone-real-time-asr) | Real-time streaming ASR from microphone input |
| 15 | [zipformer_en](#example-15-asr-with-non-streaming-zipformer-english) | Non-streaming ASR with zipformer transducer (English) |
| 16 | [zipformer_zh_en](#example-16-asr-with-non-streaming-zipformer-chinese--english) | Non-streaming ASR with zipformer transducer (Chinese + English) |
| 17 | [zipformer_vi](#example-17-asr-with-non-streaming-zipformer-vietnamese) | Non-streaming ASR with zipformer transducer (Vietnamese) |
| 18 | [nemo_parakeet](#example-18-asr-with-non-streaming-nemo-parakeet-english) | Non-streaming ASR with Nemo Parakeet TDT transducer (English) |
| 19 | [fire_red_asr_ctc](#example-19-asr-with-non-streaming-fireredasr-ctc-chinese--english) | Non-streaming ASR with FireRedASR CTC model (Chinese + English) |
| 20 | [moonshine_v2](#example-20-asr-with-non-streaming-moonshine-v2-english) | Non-streaming ASR with Moonshine v2 (English) |
| 21 | [sense_voice](#example-21-asr-with-non-streaming-sensevoice) | Non-streaming ASR with SenseVoice (Chinese, English, Japanese, Korean, Cantonese) |
| 22 | [silero_vad_remove_silence](#example-22-remove-silences-from-a-file-using-silerovad) | Remove silences from an audio file using Silero VAD |
| 23 | [offline_speech_enhancement_gtcrn](#example-23-offline-speech-enhancement-with-gtcrn) | Offline speech enhancement with GTCRN |
| 24 | [offline_speech_enhancement_dpdfnet](#example-24-offline-speech-enhancement-with-dpdfnet) | Offline speech enhancement with DPDFNet |
| 25 | [streaming_speech_enhancement_gtcrn](#example-25-streaming-speech-enhancement-with-gtcrn) | Streaming speech enhancement with GTCRN |
| 26 | [streaming_speech_enhancement_dpdfnet](#example-26-streaming-speech-enhancement-with-dpdfnet) | Streaming speech enhancement with DPDFNet |
| 27 | [online_punctuation](#example-27-online-punctuation) | Add punctuation to text using online punctuation model |
| 28 | [keyword_spotter](#example-28-keyword-spotter) | Detect keywords from audio using a Zipformer KWS model |
| 29 | [spoken_language_identification](#example-29-spoken-language-identification) | Detect the spoken language in a wave file using Whisper |
| 30 | [offline_punctuation](#example-30-offline-punctuation) | Add punctuation to text using an offline punctuation model |
| 31 | [audio_tagging_zipformer](#example-31-audio-tagging-with-a-zipformer-model) | Audio tagging with a Zipformer model |
| 32 | [audio_tagging_ced](#example-32-audio-tagging-with-a-ced-model) | Audio tagging with a CED model |
| 33 | [speaker_embedding_extractor](#example-33-speaker-embedding-extractor) | Compute a speaker embedding from a wave file |
| 34 | [speaker_embedding_manager](#example-34-speaker-embedding-manager) | Register, search, verify, and remove speakers using embeddings |
| 35 | [speaker_embedding_cosine_similarity](#example-35-speaker-embedding-cosine-similarity) | Compute cosine similarity from three speaker embeddings |
| 36 | [offline_speaker_diarization](#example-36-offline-speaker-diarization) | Offline speaker diarization with pyannote segmentation and 3D-Speaker embeddings |

## Run it

Each helper script downloads the required files if needed.

### Example 1: Show sherpa-onnx version

```bash
./run-version.sh
```

For macOS, you can run
```
otool -l target/debug/examples/version | grep -A2 LC_RPATH
```
to check the RPATH.

### Example 2: TTS with Pocket TTS (zero-shot voice cloning)

```bash
./run-pocket-tts.sh
```

### Example 3: TTS with Supertonic TTS

```bash
./run-supertonic-tts.sh
```

### Example 4: TTS with ZipVoice zero-shot voice cloning

```bash
./run-zipvoice-tts.sh
```


### Example 5: TTS with VITS (English Piper)

```bash
./run-vits-en.sh
```

### Example 6: TTS with VITS (German Piper)

```bash
./run-vits-de.sh
```

### Example 7: TTS with Matcha (English)

```bash
./run-matcha-tts-en.sh
```

### Example 8: TTS with Matcha (Chinese)

```bash
./run-matcha-tts-zh.sh
```

### Example 9: TTS with Kokoro (English)

```bash
./run-kokoro-tts-en.sh
```

### Example 10: TTS with Kokoro (Chinese + English)

```bash
./run-kokoro-tts-zh-en.sh
```

### Example 11: TTS with Kitten (English)

```bash
./run-kitten-tts-en.sh
```

### Example 12: ASR with streaming zipformer (English)

```bash
./run-streaming-zipformer-en.sh
```

### Example 13: ASR with streaming zipformer (Chinese + English)

```bash
./run-streaming-zipformer-zh-en.sh
```

### Example 14: ASR with streaming zipformer (with a microphone, real-time ASR)

```bash
./run-streaming-zipformer-microphone-zh-en.sh
```

### Example 15: ASR with non-streaming zipformer (English)

```bash
./run-zipformer-en.sh
```

### Example 16: ASR with non-streaming zipformer (Chinese + English)

```bash
./run-zipformer-zh-en.sh
```

### Example 17: ASR with non-streaming zipformer (Vietnamese)

```bash
./run-zipformer-vi.sh
```

### Example 18: ASR with non-streaming Nemo Parakeet (English)

```bash
./run-nemo-parakeet-en.sh
```

### Example 19: ASR with non-streaming FireRedASR CTC (Chinese + English)

```bash
./run-fire-red-asr-ctc.sh
```

### Example 20: ASR with non-streaming Moonshine v2 (English)

```bash
./run-moonshine-v2.sh
```

### Example 21: ASR with non-streaming SenseVoice

```bash
./run-sense-voice.sh
```

### Example 22: Remove silences from a file using SileroVAD

```bash
./run-silero-vad-remove-silence.sh
```

### Example 23: Offline speech enhancement with GTCRN

```bash
./run-offline-speech-enhancement-gtcrn.sh
```

### Example 24: Offline speech enhancement with DPDFNet

```bash
./run-offline-speech-enhancement-dpdfnet.sh
```

### Example 25: Streaming speech enhancement with GTCRN

```bash
./run-streaming-speech-enhancement-gtcrn.sh
```

### Example 26: Streaming speech enhancement with DPDFNet

```bash
./run-streaming-speech-enhancement-dpdfnet.sh
```

### Example 27: Online punctuation

```bash
./run-online-punctuation.sh
```

### Example 28: Keyword spotter

```bash
./run-keyword-spotter.sh
```

### Example 29: Spoken language identification

```bash
./run-spoken-language-identification.sh
```

### Example 30: Offline punctuation

```bash
./run-offline-punctuation.sh
```

### Example 31: Audio tagging with a Zipformer model

```bash
./run-audio-tagging-zipformer.sh
```

### Example 32: Audio tagging with a CED model

```bash
./run-audio-tagging-ced.sh
```


### Example 33: Speaker embedding extractor

```bash
./run-speaker-embedding-extractor.sh
```

### Example 34: Speaker embedding manager

```bash
./run-speaker-embedding-manager.sh
```


### Example 35: Speaker embedding cosine similarity

```bash
./run-speaker-embedding-cosine-similarity.sh
```


### Example 36: Offline speaker diarization

```bash
./run-offline-speaker-diarization.sh
```


================================================
FILE: rust-api-examples/examples/audio_tagging_ced.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use audio tagging with a CED model
// through sherpa-onnx's Rust API.

use sherpa_onnx::{AudioTagging, AudioTaggingConfig, AudioTaggingModelConfig, Wave};
use std::time::Instant;

fn main() {
    let config = AudioTaggingConfig {
        model: AudioTaggingModelConfig {
            ced: Some("./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx".into()),
            num_threads: 1,
            debug: true,
            provider: Some("cpu".into()),
            ..Default::default()
        },
        labels: Some(
            "./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv".into(),
        ),
        top_k: 5,
    };

    let tagger = AudioTagging::create(&config).expect("Failed to create AudioTagging");

    let wav = Wave::read("./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/6.wav")
        .expect("Failed to read test wave");

    let start = Instant::now();
    let stream = tagger.create_stream();
    stream.accept_waveform(wav.sample_rate(), wav.samples());
    let result = tagger.compute(&stream, 5);
    let elapsed_seconds = start.elapsed().as_secs_f32();
    let audio_duration = wav.samples().len() as f32 / wav.sample_rate() as f32;
    let rtf = elapsed_seconds / audio_duration;

    println!("Elapsed seconds: {:.3}", elapsed_seconds);
    println!("Audio duration in seconds: {:.3}", audio_duration);
    println!("RTF: {:.3}/{:.3} = {:.3}", elapsed_seconds, audio_duration, rtf);
    println!();
    for (i, event) in result.iter().enumerate() {
        println!("{}: {{name: {}, index: {}, prob: {:.3}}}", i, event.name, event.index, event.prob);
    }
}


================================================
FILE: rust-api-examples/examples/audio_tagging_zipformer.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use audio tagging with a Zipformer model
// through sherpa-onnx's Rust API.

use sherpa_onnx::{
    AudioTagging, AudioTaggingConfig, AudioTaggingModelConfig,
    OfflineZipformerAudioTaggingModelConfig, Wave,
};
use std::time::Instant;

fn main() {
    let config = AudioTaggingConfig {
        model: AudioTaggingModelConfig {
            zipformer: OfflineZipformerAudioTaggingModelConfig {
                model: Some(
                    "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx"
                        .into(),
                ),
            },
            num_threads: 1,
            debug: true,
            provider: Some("cpu".into()),
            ..Default::default()
        },
        labels: Some(
            "./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv"
                .into(),
        ),
        top_k: 5,
    };

    let tagger = AudioTagging::create(&config).expect("Failed to create AudioTagging");

    let wav =
        Wave::read("./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav")
            .expect("Failed to read test wave");

    let start = Instant::now();
    let stream = tagger.create_stream();
    stream.accept_waveform(wav.sample_rate(), wav.samples());
    let result = tagger.compute(&stream, 5);
    let elapsed_seconds = start.elapsed().as_secs_f32();
    let audio_duration = wav.samples().len() as f32 / wav.sample_rate() as f32;
    let rtf = elapsed_seconds / audio_duration;

    println!("Elapsed seconds: {:.3}", elapsed_seconds);
    println!("Audio duration in seconds: {:.3}", audio_duration);
    println!("RTF: {:.3}/{:.3} = {:.3}", elapsed_seconds, audio_duration, rtf);
    println!();
    for (i, event) in result.iter().enumerate() {
        println!("{}: {{name: {}, index: {}, prob: {:.3}}}", i, event.name, event.index, event.prob);
    }
}


================================================
FILE: rust-api-examples/examples/fire_red_asr_ctc.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use FireRedAsr CTC with sherpa-onnx's Rust API
// for offline speech recognition.
//
// See ../README.md for how to run it.

use clap::Parser;
use sherpa_onnx::{
    OfflineFireRedAsrCtcModelConfig, OfflineRecognizer, OfflineRecognizerConfig, Wave,
};
use std::time::Instant;

/// FireRedAsr CTC offline example
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// Path to WAV file
    #[arg(long)]
    wav: String,

    /// Path to FireRedAsr CTC ONNX model
    #[arg(long)]
    model: String,

    /// Path to tokens file
    #[arg(long)]
    tokens: String,

    /// Provider (default: cpu)
    #[arg(long, default_value = "cpu")]
    provider: String,

    /// Enable debug logs
    #[arg(long, default_value_t = false)]
    debug: bool,

    /// Number of threads
    #[arg(long, default_value_t = 2)]
    num_threads: i32,
}

fn main() {
    let args = Args::parse();

    let wave = Wave::read(&args.wav).expect("Failed to read WAV file");
    let audio_duration = wave.samples().len() as f64 / wave.sample_rate() as f64;

    let mut recognizer_config = OfflineRecognizerConfig::default();

    recognizer_config.model_config.fire_red_asr_ctc = OfflineFireRedAsrCtcModelConfig {
        model: Some(args.model.clone()),
    };

    recognizer_config.model_config.tokens = Some(args.tokens.clone());
    recognizer_config.model_config.provider = Some(args.provider.clone());
    recognizer_config.model_config.debug = args.debug;
    recognizer_config.model_config.num_threads = args.num_threads;

    // Measure recognizer creation time
    println!("Creating recognizer ...");
    let start_creation = Instant::now();
    let recognizer =
        OfflineRecognizer::create(&recognizer_config).expect("Failed to create OfflineRecognizer");
    let creation_elapsed = start_creation.elapsed().as_secs_f64();
    println!("Recognizer created in {:.3} seconds.", creation_elapsed);

    let stream = recognizer.create_stream();

    // Measure recognition time
    let start_recognition = Instant::now();
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    recognizer.decode(&stream);
    let recognition_elapsed = start_recognition.elapsed().as_secs_f64();

    // Get recognition result
    if let Some(result) = stream.get_result() {
        println!("Decoded text: {}", result.text);

        let total_time = creation_elapsed + recognition_elapsed;
        let rtf = recognition_elapsed / audio_duration;

        println!("\n=== Performance Summary ===");
        println!("Audio duration          : {:.3} seconds", audio_duration);
        println!("Recognizer creation time: {:.3} seconds", creation_elapsed);
        println!(
            "Recognition time        : {:.3} seconds",
            recognition_elapsed
        );
        println!("Total elapsed time      : {:.3} seconds", total_time);

        // Detailed RTF computation log
        println!(
            "Real-Time Factor (RTF)  : {:.3} (recognition_elapsed / audio_duration = {:.3} / {:.3})",
            rtf, recognition_elapsed, audio_duration
        );

        println!(
            "Number of threads       : {}",
            recognizer_config.model_config.num_threads
        );
    } else {
        eprintln!("Failed to get recognition result");
    }
}


================================================
FILE: rust-api-examples/examples/keyword_spotter.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use sherpa-onnx's Rust API for keyword spotting.
//
// See ../README.md for how to run it.

use clap::Parser;
use sherpa_onnx::{KeywordSpotter, KeywordSpotterConfig, Wave};

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    #[arg(long)]
    wav: String,

    #[arg(long)]
    encoder: String,

    #[arg(long)]
    decoder: String,

    #[arg(long)]
    joiner: String,

    #[arg(long)]
    tokens: String,

    #[arg(long)]
    keywords_file: String,

    #[arg(long, default_value = "cpu")]
    provider: String,

    #[arg(long, default_value_t = 1)]
    num_threads: i32,

    #[arg(long, default_value_t = false)]
    debug: bool,
}

fn detect_keywords(
    kws: &KeywordSpotter,
    wave: &Wave,
    title: &str,
    extra_keywords: Option<&str>,
) {
    println!("{title}");

    let stream = if let Some(extra_keywords) = extra_keywords {
        kws.create_stream_with_keywords(extra_keywords)
    } else {
        kws.create_stream()
    };

    let tail_padding = vec![0.0f32; (wave.sample_rate() / 2) as usize];
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    stream.accept_waveform(wave.sample_rate(), &tail_padding);
    stream.input_finished();

    let mut detected = false;
    while kws.is_ready(&stream) {
        kws.decode(&stream);
        if let Some(result) = kws.get_result(&stream) {
            if !result.keyword.is_empty() {
                detected = true;
                println!("Detected keyword: {}", result.json);
                kws.reset(&stream);
            }
        }
    }

    if !detected {
        println!("No keyword detected.");
    }

    println!();
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();
    let wave = Wave::read(&args.wav).ok_or_else(|| anyhow::anyhow!("Failed to read WAV file"))?;

    let mut config = KeywordSpotterConfig::default();
    config.model_config.transducer.encoder = Some(args.encoder);
    config.model_config.transducer.decoder = Some(args.decoder);
    config.model_config.transducer.joiner = Some(args.joiner);
    config.model_config.tokens = Some(args.tokens);
    config.model_config.provider = Some(args.provider);
    config.model_config.num_threads = args.num_threads;
    config.model_config.debug = args.debug;
    config.keywords_file = Some(args.keywords_file);

    let kws = KeywordSpotter::create(&config)
        .ok_or_else(|| anyhow::anyhow!("Failed to create KeywordSpotter"))?;

    detect_keywords(
        &kws,
        &wave,
        "--Test pre-defined keywords from the keywords file--",
        None,
    );
    detect_keywords(
        &kws,
        &wave,
        "--Use pre-defined keywords + add a new keyword--",
        Some("y ǎn y uán @演员"),
    );
    detect_keywords(
        &kws,
        &wave,
        "--Use pre-defined keywords + add two new keywords--",
        Some("y ǎn y uán @演员/zh ī m íng @知名"),
    );

    Ok(())
}


================================================
FILE: rust-api-examples/examples/kitten_tts_en.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use Kitten TTS with sherpa-onnx's Rust API
// for offline English text-to-speech.

use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsKittenModelConfig,
};
use std::time::Instant;

fn main() {
    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            kitten: OfflineTtsKittenModelConfig {
                model: Some("./kitten-nano-en-v0_1-fp16/model.fp16.onnx".into()),
                voices: Some("./kitten-nano-en-v0_1-fp16/voices.bin".into()),
                tokens: Some("./kitten-nano-en-v0_1-fp16/tokens.txt".into()),
                data_dir: Some("./kitten-nano-en-v0_1-fp16/espeak-ng-data".into()),
                length_scale: 1.0,
            },
            num_threads: 2,
            debug: false,
            ..Default::default()
        },
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let text = "Today as always, men fall into two groups: slaves and free men. Whoever \
        does not have two-thirds of his day for himself, is a slave, whatever \
        he may be: a statesman, a businessman, an official, or a scholar. \
        Friends fell out often because life was changing so fast. The easiest \
        thing in the world was to lose touch with someone.";

    let gen_config = GenerationConfig {
        sid: 0,
        speed: 1.0,
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    let filename = "./generated-kitten-en-rust.wav";
    if audio.save(filename) {
        println!("Saved to: {}", filename);
    } else {
        eprintln!("Failed to save {}", filename);
    }
}


================================================
FILE: rust-api-examples/examples/kokoro_tts_en.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use Kokoro TTS with sherpa-onnx's Rust API
// for offline English text-to-speech.

use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsKokoroModelConfig,
};
use std::time::Instant;

fn main() {
    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            kokoro: OfflineTtsKokoroModelConfig {
                model: Some("./kokoro-en-v0_19/model.onnx".into()),
                voices: Some("./kokoro-en-v0_19/voices.bin".into()),
                tokens: Some("./kokoro-en-v0_19/tokens.txt".into()),
                data_dir: Some("./kokoro-en-v0_19/espeak-ng-data".into()),
                length_scale: 1.0,
                ..Default::default()
            },
            num_threads: 2,
            debug: false,
            ..Default::default()
        },
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let text = "Today as always, men fall into two groups: slaves and free men. Whoever \
        does not have two-thirds of his day for himself, is a slave, whatever \
        he may be: a statesman, a businessman, an official, or a scholar. \
        Friends fell out often because life was changing so fast. The easiest \
        thing in the world was to lose touch with someone.";

    let gen_config = GenerationConfig {
        sid: 0,
        speed: 1.0,
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    let filename = "./generated-kokoro-en-rust.wav";
    if audio.save(filename) {
        println!("Saved to: {}", filename);
    } else {
        eprintln!("Failed to save {}", filename);
    }
}


================================================
FILE: rust-api-examples/examples/kokoro_tts_zh_en.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use Kokoro TTS with sherpa-onnx's Rust API
// for offline Chinese + English text-to-speech.

use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsKokoroModelConfig,
};
use std::time::Instant;

fn main() {
    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            kokoro: OfflineTtsKokoroModelConfig {
                model: Some("./kokoro-multi-lang-v1_0/model.onnx".into()),
                voices: Some("./kokoro-multi-lang-v1_0/voices.bin".into()),
                tokens: Some("./kokoro-multi-lang-v1_0/tokens.txt".into()),
                data_dir: Some("./kokoro-multi-lang-v1_0/espeak-ng-data".into()),
                dict_dir: Some("./kokoro-multi-lang-v1_0/dict".into()),
                lexicon: Some(
                    "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt".into(),
                ),
                length_scale: 1.0,
                ..Default::default()
            },
            num_threads: 2,
            debug: false,
            ..Default::default()
        },
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let text = "中英文语音合成测试。This is generated by next generation Kaldi using \
        Kokoro without Misaki. 你觉得中英文说的如何呢？";

    let gen_config = GenerationConfig {
        sid: 0,
        speed: 1.0,
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    let filename = "./generated-kokoro-zh-en-rust.wav";
    if audio.save(filename) {
        println!("Saved to: {}", filename);
    } else {
        eprintln!("Failed to save {}", filename);
    }
}


================================================
FILE: rust-api-examples/examples/matcha_tts_en.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use Matcha TTS with sherpa-onnx's Rust API
// for offline English text-to-speech.

use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsMatchaModelConfig,
};
use std::time::Instant;

fn main() {
    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            matcha: OfflineTtsMatchaModelConfig {
                acoustic_model: Some("./matcha-icefall-en_US-ljspeech/model-steps-3.onnx".into()),
                vocoder: Some("./vocos-22khz-univ.onnx".into()),
                tokens: Some("./matcha-icefall-en_US-ljspeech/tokens.txt".into()),
                data_dir: Some("./matcha-icefall-en_US-ljspeech/espeak-ng-data".into()),
                noise_scale: 0.667,
                length_scale: 1.0,
                ..Default::default()
            },
            num_threads: 2,
            debug: false,
            ..Default::default()
        },
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let text = "Today as always, men fall into two groups: slaves and free men. Whoever \
        does not have two-thirds of his day for himself, is a slave, whatever \
        he may be: a statesman, a businessman, an official, or a scholar. \
        Friends fell out often because life was changing so fast. The easiest \
        thing in the world was to lose touch with someone.";

    let gen_config = GenerationConfig {
        sid: 0,
        speed: 1.0,
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    let filename = "./generated-matcha-en-rust.wav";
    if audio.save(filename) {
        println!("Saved to: {}", filename);
    } else {
        eprintln!("Failed to save {}", filename);
    }
}


================================================
FILE: rust-api-examples/examples/matcha_tts_zh.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use Matcha TTS with sherpa-onnx's Rust API
// for offline Chinese text-to-speech.

use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsMatchaModelConfig,
};
use std::time::Instant;

fn main() {
    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            matcha: OfflineTtsMatchaModelConfig {
                acoustic_model: Some("./matcha-icefall-zh-baker/model-steps-3.onnx".into()),
                vocoder: Some("./vocos-22khz-univ.onnx".into()),
                lexicon: Some("./matcha-icefall-zh-baker/lexicon.txt".into()),
                tokens: Some("./matcha-icefall-zh-baker/tokens.txt".into()),
                dict_dir: Some("./matcha-icefall-zh-baker/dict".into()),
                noise_scale: 0.667,
                length_scale: 1.0,
                ..Default::default()
            },
            num_threads: 2,
            debug: false,
            ..Default::default()
        },
        rule_fsts: Some(
            "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst".into(),
        ),
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let text = "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如\
        涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感\
        受着生命的奇迹与温柔.\
        某某银行的副行长和一些行政领导表示，他们去过长江和长白山; \
        经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。";

    let gen_config = GenerationConfig {
        sid: 0,
        speed: 1.0,
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    let filename = "./generated-matcha-zh-rust.wav";
    if audio.save(filename) {
        println!("Saved to: {}", filename);
    } else {
        eprintln!("Failed to save {}", filename);
    }
}


================================================
FILE: rust-api-examples/examples/moonshine_v2.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use a Moonshine v2 model with sherpa-onnx's Rust API
// for offline speech recognition.
//
// See ../README.md for how to run it.

use clap::Parser;
use sherpa_onnx::{OfflineRecognizer, OfflineRecognizerConfig, Wave};
use std::time::Instant;

/// Moonshine v2 offline example
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// Path to WAV file
    #[arg(long)]
    wav: String,

    /// Path to the encoder model
    #[arg(long)]
    encoder: String,

    /// Path to the decoder model
    #[arg(long)]
    decoder: String,

    /// Path to tokens file
    #[arg(long)]
    tokens: String,

    /// Provider (default: cpu)
    #[arg(long, default_value = "cpu")]
    provider: String,

    /// Enable debug logs
    #[arg(long, default_value_t = false)]
    debug: bool,

    /// Number of threads
    #[arg(long, default_value_t = 2)]
    num_threads: i32,
}

fn main() {
    let args = Args::parse();

    let wave = Wave::read(&args.wav).expect("Failed to read WAV file");
    let audio_duration = wave.samples().len() as f64 / wave.sample_rate() as f64;

    let mut recognizer_config = OfflineRecognizerConfig::default();

    recognizer_config.model_config.moonshine.encoder = Some(args.encoder.clone());
    recognizer_config.model_config.moonshine.merged_decoder = Some(args.decoder.clone());

    recognizer_config.model_config.tokens = Some(args.tokens.clone());
    recognizer_config.model_config.provider = Some(args.provider.clone());
    recognizer_config.model_config.debug = args.debug;
    recognizer_config.model_config.num_threads = args.num_threads;

    // Measure recognizer creation time
    println!("Creating recognizer ...");
    let start_creation = Instant::now();
    let recognizer =
        OfflineRecognizer::create(&recognizer_config).expect("Failed to create OfflineRecognizer");
    let creation_elapsed = start_creation.elapsed().as_secs_f64();
    println!("Recognizer created in {:.3} seconds.", creation_elapsed);

    let stream = recognizer.create_stream();

    // Measure recognition time
    let start_recognition = Instant::now();
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    recognizer.decode(&stream);
    let recognition_elapsed = start_recognition.elapsed().as_secs_f64();

    // Get recognition result
    if let Some(result) = stream.get_result() {
        println!("Decoded text: {}", result.text);

        let total_time = creation_elapsed + recognition_elapsed;
        let rtf = recognition_elapsed / audio_duration;

        println!("\n=== Performance Summary ===");
        println!("Audio duration          : {:.3} seconds", audio_duration);
        println!("Recognizer creation time: {:.3} seconds", creation_elapsed);
        println!(
            "Recognition time        : {:.3} seconds",
            recognition_elapsed
        );
        println!("Total elapsed time      : {:.3} seconds", total_time);

        // Detailed RTF computation log
        println!(
            "Real-Time Factor (RTF)  : {:.3} (recognition_elapsed / audio_duration = {:.3} / {:.3})",
            rtf, recognition_elapsed, audio_duration
        );

        println!(
            "Number of threads       : {}",
            recognizer_config.model_config.num_threads
        );
    } else {
        eprintln!("Failed to get recognition result");
    }
}


================================================
FILE: rust-api-examples/examples/nemo_parakeet.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use Nemo Parakeet with sherpa-onnx's Rust API
// for offline speech recognition.
//
// See ../README.md for how to run it.

use clap::Parser;
use sherpa_onnx::{OfflineRecognizer, OfflineRecognizerConfig, OfflineTransducerModelConfig, Wave};
use std::time::Instant;

/// Nemo Parakeet offline example
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// Path to WAV file
    #[arg(long)]
    wav: String,

    /// Path to encoder ONNX model
    #[arg(long)]
    encoder: String,

    /// Path to decoder ONNX model
    #[arg(long)]
    decoder: String,

    /// Path to joiner ONNX model
    #[arg(long)]
    joiner: String,

    /// Path to tokens file
    #[arg(long)]
    tokens: String,

    /// Provider (default: cpu)
    #[arg(long, default_value = "cpu")]
    provider: String,

    /// Enable debug logs
    #[arg(long, default_value_t = false)]
    debug: bool,

    /// Number of threads
    #[arg(long, default_value_t = 2)]
    num_threads: i32,
}

fn main() {
    let args = Args::parse();

    let wave = Wave::read(&args.wav).expect("Failed to read WAV file");
    let audio_duration = wave.samples().len() as f64 / wave.sample_rate() as f64;

    // Create default recognizer config
    let mut recognizer_config = OfflineRecognizerConfig::default();

    // Set the transducer model
    recognizer_config.model_config.transducer = OfflineTransducerModelConfig {
        encoder: Some(args.encoder.clone()),
        decoder: Some(args.decoder.clone()),
        joiner: Some(args.joiner.clone()),
    };

    recognizer_config.model_config.tokens = Some(args.tokens.clone());
    recognizer_config.model_config.provider = Some(args.provider.clone());
    recognizer_config.model_config.debug = args.debug;
    recognizer_config.model_config.num_threads = args.num_threads;

    // Measure recognizer creation time
    println!("Creating recognizer ...");
    let start_creation = Instant::now();
    let recognizer =
        OfflineRecognizer::create(&recognizer_config).expect("Failed to create OfflineRecognizer");
    let creation_elapsed = start_creation.elapsed().as_secs_f64();
    println!("Recognizer created in {:.3} seconds.", creation_elapsed);

    let stream = recognizer.create_stream();

    // Measure recognition time
    let start_recognition = Instant::now();
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    recognizer.decode(&stream);
    let recognition_elapsed = start_recognition.elapsed().as_secs_f64();

    // Get recognition result
    if let Some(result) = stream.get_result() {
        println!("Decoded text: {}", result.text);

        let total_elapsed = creation_elapsed + recognition_elapsed;
        let rtf = recognition_elapsed / audio_duration;

        println!("\n=== Performance Summary ===");
        println!("Audio duration          : {:.3} seconds", audio_duration);
        println!("Recognizer creation time: {:.3} seconds", creation_elapsed);
        println!(
            "Recognition time        : {:.3} seconds",
            recognition_elapsed
        );
        println!("Total elapsed time      : {:.3} seconds", total_elapsed);

        println!(
            "Real-Time Factor (RTF)  : {:.3} (recognition_elapsed / audio_duration = {:.3} / {:.3})",
            rtf, recognition_elapsed, audio_duration
        );

        println!(
            "Number of threads       : {}",
            recognizer_config.model_config.num_threads
        );
    } else {
        eprintln!("Failed to get recognition result");
    }
}


================================================
FILE: rust-api-examples/examples/offline_punctuation.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use sherpa-onnx's Rust API for offline
// punctuation.
//
// See ../README.md for how to run it.

use clap::Parser;
use sherpa_onnx::{OfflinePunctuation, OfflinePunctuationConfig, OfflinePunctuationModelConfig};

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    #[arg(long)]
    ct_transformer: String,

    #[arg(long, default_value_t = 1)]
    num_threads: i32,

    #[arg(long, default_value = "cpu")]
    provider: String,

    #[arg(long, default_value_t = false)]
    debug: bool,
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();
    let punct = OfflinePunctuation::create(&OfflinePunctuationConfig {
        model: OfflinePunctuationModelConfig {
            ct_transformer: Some(args.ct_transformer),
            num_threads: args.num_threads,
            provider: Some(args.provider),
            debug: args.debug,
        },
    })
    .ok_or_else(|| anyhow::anyhow!("Failed to create OfflinePunctuation"))?;

    let texts = [
        "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
        "我们都是木头人不会说话不会动",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
    ];

    println!("----------");
    for text in texts {
        let out = punct
            .add_punctuation(text)
            .ok_or_else(|| anyhow::anyhow!("Failed to add punctuation"))?;
        println!("Input text: {text}");
        println!("Output text: {out}");
        println!("----------");
    }

    Ok(())
}


================================================
FILE: rust-api-examples/examples/offline_speaker_diarization.rs
================================================
use sherpa_onnx::{
    FastClusteringConfig, OfflineSpeakerDiarization, OfflineSpeakerDiarizationConfig,
    OfflineSpeakerSegmentationModelConfig, OfflineSpeakerSegmentationPyannoteModelConfig,
    SpeakerEmbeddingExtractorConfig, Wave,
};

fn main() {
    let config = OfflineSpeakerDiarizationConfig {
        segmentation: OfflineSpeakerSegmentationModelConfig {
            pyannote: OfflineSpeakerSegmentationPyannoteModelConfig {
                model: Some("./sherpa-onnx-pyannote-segmentation-3-0/model.onnx".into()),
            },
            ..Default::default()
        },
        embedding: SpeakerEmbeddingExtractorConfig {
            model: Some("./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx".into()),
            ..Default::default()
        },
        clustering: FastClusteringConfig {
            num_clusters: 4,
            ..Default::default()
        },
        ..Default::default()
    };

    let sd = OfflineSpeakerDiarization::create(&config)
        .expect("Failed to initialize offline speaker diarization");

    let wave = Wave::read("./0-four-speakers-zh.wav").expect("Failed to read wave");

    assert_eq!(
        sd.sample_rate(),
        wave.sample_rate(),
        "Unexpected sample rate"
    );

    let result = sd
        .process(wave.samples())
        .expect("Failed to do speaker diarization");
    println!("Number of speakers: {}", result.num_speakers());
    println!("Number of segments: {}", result.num_segments());

    for s in result.sort_by_start_time() {
        println!("{:.3} -- {:.3} speaker_{:02}", s.start, s.end, s.speaker);
    }
}


================================================
FILE: rust-api-examples/examples/offline_speech_enhancement_dpdfnet.rs
================================================
use clap::Parser;
use sherpa_onnx::{
    write, OfflineSpeechDenoiser, OfflineSpeechDenoiserConfig,
    OfflineSpeechDenoiserDpdfNetModelConfig, Wave,
};

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    #[arg(long)]
    model: String,

    #[arg(long)]
    input: String,

    #[arg(long)]
    output: String,
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();

    let config = OfflineSpeechDenoiserConfig {
        model: sherpa_onnx::OfflineSpeechDenoiserModelConfig {
            dpdfnet: OfflineSpeechDenoiserDpdfNetModelConfig {
                model: Some(args.model),
            },
            ..Default::default()
        },
    };

    let denoiser = OfflineSpeechDenoiser::create(&config)
        .ok_or_else(|| anyhow::anyhow!("Failed to create offline DPDFNet denoiser"))?;
    let wave =
        Wave::read(&args.input).ok_or_else(|| anyhow::anyhow!("Failed to read {}", args.input))?;

    let audio = denoiser.run(wave.samples(), wave.sample_rate());
    anyhow::ensure!(
        write(&args.output, &audio.samples, audio.sample_rate),
        "Failed to save {}",
        args.output
    );

    println!("Saved to {}", args.output);
    Ok(())
}


================================================
FILE: rust-api-examples/examples/offline_speech_enhancement_gtcrn.rs
================================================
use clap::Parser;
use sherpa_onnx::{
    write, OfflineSpeechDenoiser, OfflineSpeechDenoiserConfig,
    OfflineSpeechDenoiserGtcrnModelConfig, Wave,
};

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    #[arg(long)]
    model: String,

    #[arg(long)]
    input: String,

    #[arg(long)]
    output: String,
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();

    let config = OfflineSpeechDenoiserConfig {
        model: sherpa_onnx::OfflineSpeechDenoiserModelConfig {
            gtcrn: OfflineSpeechDenoiserGtcrnModelConfig {
                model: Some(args.model),
            },
            ..Default::default()
        },
    };

    let denoiser = OfflineSpeechDenoiser::create(&config)
        .ok_or_else(|| anyhow::anyhow!("Failed to create offline GTCRN denoiser"))?;
    let wave =
        Wave::read(&args.input).ok_or_else(|| anyhow::anyhow!("Failed to read {}", args.input))?;

    let audio = denoiser.run(wave.samples(), wave.sample_rate());
    anyhow::ensure!(
        write(&args.output, &audio.samples, audio.sample_rate),
        "Failed to save {}",
        args.output
    );

    println!("Saved to {}", args.output);
    Ok(())
}


================================================
FILE: rust-api-examples/examples/online_punctuation.rs
================================================
// Copyright (c) 2026 zengyw
//
// This file demonstrates how to use online punctuation with sherpa-onnx's Rust API.
//
// See ../README.md for how to run it.

use clap::Parser;
use sherpa_onnx::{OnlinePunctuation, OnlinePunctuationConfig, OnlinePunctuationModelConfig};

/// Online punctuation example
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// Path to CNN-BiLSTM ONNX model
    #[arg(long)]
    cnn_bilstm: String,

    /// Path to BPE vocabulary file
    #[arg(long)]
    bpe_vocab: String,

    /// Number of threads
    #[arg(long, default_value_t = 1)]
    num_threads: i32,

    /// Provider (default: cpu)
    #[arg(long, default_value = "cpu")]
    provider: String,

    /// Enable debug logs
    #[arg(long, default_value_t = false)]
    debug: bool,
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();

    let config = OnlinePunctuationConfig {
        model: OnlinePunctuationModelConfig {
            cnn_bilstm: Some(args.cnn_bilstm),
            bpe_vocab: Some(args.bpe_vocab),
            num_threads: args.num_threads,
            provider: Some(args.provider),
            debug: args.debug,
            ..Default::default()
        },
    };

    let punct = OnlinePunctuation::create(&config)
        .ok_or_else(|| anyhow::anyhow!("Failed to create OnlinePunctuation"))?;

    let texts = [
        "how are you i am fine thank you",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
    ];

    println!("----------");
    for text in texts {
        let out = punct
            .add_punctuation(text)
            .ok_or_else(|| anyhow::anyhow!("Failed to add punctuation"))?;

        println!("Input text: {text}");
        println!("Output text: {out}");
        println!("----------");
    }

    Ok(())
}


================================================
FILE: rust-api-examples/examples/pocket_tts.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use Pocket TTS with sherpa-onnx's Rust API
// for offline text-to-speech with zero-shot voice cloning.

use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsPocketModelConfig, Wave,
};
use std::collections::HashMap;
use std::time::Instant;

fn main() {
    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            pocket: OfflineTtsPocketModelConfig {
                lm_flow: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx".into()),
                lm_main: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx".into()),
                encoder: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx".into()),
                decoder: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx".into()),
                text_conditioner: Some(
                    "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx".into(),
                ),
                vocab_json: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json".into()),
                token_scores_json: Some(
                    "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json".into(),
                ),
                voice_embedding_cache_capacity: 50,
            },
            num_threads: 2,
            debug: false, // set to true to see verbose logs
            ..Default::default()
        },
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let text = "Today as always, men fall into two groups: slaves and free men. Whoever \
        does not have two-thirds of his day for himself, is a slave, whatever \
        he may be: a statesman, a businessman, an official, or a scholar. \
        Friends fell out often because life was changing so fast. The easiest \
        thing in the world was to lose touch with someone.";

    // Read reference audio for zero-shot voice cloning
    let reference_audio_file = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav";
    let wave = Wave::read(reference_audio_file).expect("Failed to read reference audio");

    let mut extra = HashMap::new();
    extra.insert(
        "max_reference_audio_len".to_string(),
        serde_json::json!(10.0),
    );
    extra.insert("seed".to_string(), serde_json::json!(42));

    let gen_config = GenerationConfig {
        speed: 1.0,
        reference_audio: Some(wave.samples().to_vec()),
        reference_sample_rate: wave.sample_rate(),
        extra: Some(extra),
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    let filename = "./generated-pocket-en-rust.wav";
    if audio.save(filename) {
        println!("Saved to: {}", filename);
    } else {
        eprintln!("Failed to save {}", filename);
    }
}


================================================
FILE: rust-api-examples/examples/sense_voice.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use SenseVoice with sherpa-onnx's Rust API
// for offline speech recognition.
//
// See ../README.md for how to run it.

use clap::Parser;
use sherpa_onnx::{OfflineRecognizer, OfflineRecognizerConfig, OfflineSenseVoiceModelConfig, Wave};
use std::time::Instant;

/// SenseVoice offline example
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// Path to WAV file
    #[arg(long)]
    wav: String,

    /// Path to SenseVoice ONNX model
    #[arg(long)]
    model: String,

    /// Path to tokens file
    #[arg(long)]
    tokens: String,

    /// Language, e.g., "auto", "en", "zh"
    #[arg(long, default_value = "auto")]
    language: String,

    /// Provider (default: cpu)
    #[arg(long, default_value = "cpu")]
    provider: String,

    /// Enable debug logs
    #[arg(long, default_value_t = false)]
    debug: bool,

    /// Enable inverse text normalization
    #[arg(long, default_value_t = true)]
    use_itn: bool,

    /// Number of threads
    #[arg(long, default_value_t = 2)]
    num_threads: i32,
}

fn main() {
    let args = Args::parse();

    let wave = Wave::read(&args.wav).expect("Failed to read WAV file");
    let audio_duration = wave.samples().len() as f64 / wave.sample_rate() as f64;

    let mut recognizer_config = OfflineRecognizerConfig::default();

    recognizer_config.model_config.sense_voice = OfflineSenseVoiceModelConfig {
        model: Some(args.model.clone()),
        language: Some(args.language.clone()),
        use_itn: args.use_itn,
    };

    recognizer_config.model_config.tokens = Some(args.tokens.clone());
    recognizer_config.model_config.provider = Some(args.provider.clone());
    recognizer_config.model_config.debug = args.debug;
    recognizer_config.model_config.num_threads = args.num_threads;

    // Measure recognizer creation time
    println!("Creating recognizer ...");
    let start_creation = Instant::now();
    let recognizer =
        OfflineRecognizer::create(&recognizer_config).expect("Failed to create OfflineRecognizer");
    let creation_elapsed = start_creation.elapsed().as_secs_f64();
    println!("Recognizer created in {:.3} seconds.", creation_elapsed);

    let stream = recognizer.create_stream();

    // Measure recognition time
    let start_recognition = Instant::now();
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    recognizer.decode(&stream);
    let recognition_elapsed = start_recognition.elapsed().as_secs_f64();

    // Get recognition result
    if let Some(result) = stream.get_result() {
        println!("Decoded text: {}", result.text);

        let total_time = creation_elapsed + recognition_elapsed;
        let rtf = recognition_elapsed / audio_duration;

        println!("\n=== Performance Summary ===");
        println!("Audio duration          : {:.3} seconds", audio_duration);
        println!("Recognizer creation time: {:.3} seconds", creation_elapsed);
        println!(
            "Recognition time        : {:.3} seconds",
            recognition_elapsed
        );
        println!("Total elapsed time      : {:.3} seconds", total_time);

        // Detailed RTF computation log
        println!(
            "Real-Time Factor (RTF)  : {:.3} (recognition_elapsed / audio_duration = {:.3} / {:.3})",
            rtf, recognition_elapsed, audio_duration
        );

        println!(
            "Number of threads       : {}",
            recognizer_config.model_config.num_threads
        );
    } else {
        eprintln!("Failed to get recognition result");
    }
}


================================================
FILE: rust-api-examples/examples/silero_vad_remove_silence.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use silero VAD with sherpa-onnx's
// Rust API to remove non-speech segments and save speech-only audio.
//
// See ../README.md for how to run it

use clap::Parser;
use sherpa_onnx::{self, SileroVadModelConfig, VadModelConfig, VoiceActivityDetector, Wave};

/// Simple VAD example: remove non-speech segments from a WAV file
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// Path to input WAV file
    #[arg(long)]
    input: String,

    /// Path to output WAV file
    #[arg(long)]
    output: String,

    /// Path to Silero VAD ONNX model
    #[arg(long)]
    silero_vad_model: String,
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();

    // Read WAV file
    let wave = Wave::read(&args.input)
        .ok_or_else(|| anyhow::anyhow!("Failed to read WAV file: {}", &args.input))?;
    let sample_rate = wave.sample_rate();
    let input_num_samples = wave.num_samples();
    let input_duration = input_num_samples as f32 / sample_rate as f32;

    println!(
        "Input WAV: sample rate: {}, num samples: {}, duration: {:.2}s",
        sample_rate, input_num_samples, input_duration
    );

    // Configure VAD
    let mut silero_config = SileroVadModelConfig::default();
    silero_config.model = Some(args.silero_vad_model);

    // You can tune the values below
    silero_config.threshold = 0.5;
    silero_config.min_silence_duration = 0.25;
    silero_config.min_speech_duration = 0.25;
    silero_config.max_speech_duration = 5.0;

    let vad_config = VadModelConfig {
        silero_vad: silero_config,
        ten_vad: Default::default(),
        sample_rate,
        num_threads: 1,
        provider: Some("cpu".to_string()),
        debug: false,
    };

    let vad = VoiceActivityDetector::create(&vad_config, 30.0)
        .expect("Failed to create VoiceActivityDetector");

    let mut speech_samples = Vec::new();
    const WINDOW_SIZE: usize = 512;

    for chunk in wave.samples().chunks(WINDOW_SIZE) {
        vad.accept_waveform(chunk);

        while let Some(seg) = vad.front() {
            speech_samples.extend_from_slice(seg.samples());
            vad.pop();
        }
    }

    vad.flush();
    while let Some(seg) = vad.front() {
        speech_samples.extend_from_slice(seg.samples());
        vad.pop();
    }

    // Write speech-only samples to output WAV
    let ok = sherpa_onnx::write(&args.output, &speech_samples, sample_rate);
    if ok {
        println!("Saved speech-only audio to {}", args.output);
    } else {
        println!("Failed to save speech-only audio to {}", args.output);
    }

    // Summary
    let output_num_samples = speech_samples.len();
    let output_duration = output_num_samples as f32 / sample_rate as f32;
    println!("\n=== Summary ===");
    println!(
        "Input:  sample rate = {}, samples = {}, duration = {:.2}s",
        sample_rate, input_num_samples, input_duration
    );
    println!(
        "Output: sample rate = {}, samples = {}, duration = {:.2}s",
        sample_rate, output_num_samples, output_duration
    );
    println!(
        "Removed non-speech: {:.2}% of input removed",
        100.0 * (1.0 - output_duration / input_duration)
    );

    Ok(())
}


================================================
FILE: rust-api-examples/examples/speaker_embedding_cosine_similarity.rs
================================================
use sherpa_onnx::{SpeakerEmbeddingExtractor, SpeakerEmbeddingExtractorConfig, Wave};

fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
    assert_eq!(a.len(), b.len(), "Vectors must have the same length");

    let mut dot = 0.0_f32;
    let mut sum_a = 0.0_f32;
    let mut sum_b = 0.0_f32;

    for (&x, &y) in a.iter().zip(b.iter()) {
        dot += x * y;
        sum_a += x * x;
        sum_b += y * y;
    }

    let mag_a = sum_a.sqrt();
    let mag_b = sum_b.sqrt();
    if mag_a > 0.0 && mag_b > 0.0 {
        dot / (mag_a * mag_b)
    } else {
        0.0
    }
}

fn compute_embedding(extractor: &SpeakerEmbeddingExtractor, wave_filename: &str) -> Vec<f32> {
    let wave = Wave::read(wave_filename)
        .unwrap_or_else(|| panic!("Failed to read {}", wave_filename));
    let stream = extractor.create_stream().expect("Failed to create stream");
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    stream.input_finished();

    if !extractor.is_ready(&stream) {
        panic!("{} is too short", wave_filename);
    }

    extractor
        .compute(&stream)
        .unwrap_or_else(|| panic!("Failed to compute embedding for {}", wave_filename))
}

fn main() {
    let config = SpeakerEmbeddingExtractorConfig {
        model: Some("./wespeaker_zh_cnceleb_resnet34.onnx".into()),
        num_threads: 1,
        debug: true,
        provider: Some("cpu".into()),
    };

    let extractor = SpeakerEmbeddingExtractor::create(&config)
        .expect("Failed to create SpeakerEmbeddingExtractor");

    let embedding1 = compute_embedding(&extractor, "./fangjun-sr-1.wav");
    let embedding2 = compute_embedding(&extractor, "./fangjun-sr-2.wav");
    let embedding3 = compute_embedding(&extractor, "./leijun-sr-1.wav");

    let score12 = cosine_similarity(&embedding1, &embedding2);
    let score13 = cosine_similarity(&embedding1, &embedding3);
    let score23 = cosine_similarity(&embedding2, &embedding3);

    println!("Score between spk1 and spk2: {}", score12);
    println!("Score between spk1 and spk3: {}", score13);
    println!("Score between spk2 and spk3: {}", score23);
}


================================================
FILE: rust-api-examples/examples/speaker_embedding_extractor.rs
================================================
use sherpa_onnx::{SpeakerEmbeddingExtractor, SpeakerEmbeddingExtractorConfig, Wave};

fn main() {
    let config = SpeakerEmbeddingExtractorConfig {
        model: Some("./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx".into()),
        num_threads: 1,
        debug: true,
        provider: Some("cpu".into()),
    };

    let extractor = SpeakerEmbeddingExtractor::create(&config)
        .expect("Failed to create SpeakerEmbeddingExtractor");
    println!("Embedding dim: {}", extractor.dim());

    let wave = Wave::read("./sr-data/test/fangjun-test-sr-1.wav").expect("Failed to read wave");
    let stream = extractor.create_stream().expect("Failed to create stream");
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    stream.input_finished();

    if !extractor.is_ready(&stream) {
        panic!("Input wave is too short");
    }

    let embedding = extractor.compute(&stream).expect("Failed to compute embedding");
    println!("Computed embedding with {} values", embedding.len());

    let n = usize::min(10, embedding.len());
    println!("First {} values: {:?}", n, &embedding[..n]);
}


================================================
FILE: rust-api-examples/examples/speaker_embedding_manager.rs
================================================
use sherpa_onnx::{
    SpeakerEmbeddingExtractor, SpeakerEmbeddingExtractorConfig, SpeakerEmbeddingManager, Wave,
};

fn compute_embedding(extractor: &SpeakerEmbeddingExtractor, filename: &str) -> Vec<f32> {
    let wave = Wave::read(filename).unwrap_or_else(|| panic!("Failed to read {}", filename));
    let stream = extractor.create_stream().expect("Failed to create stream");
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    stream.input_finished();

    if !extractor.is_ready(&stream) {
        panic!("The input wave file {} is too short!", filename);
    }

    extractor
        .compute(&stream)
        .unwrap_or_else(|| panic!("Failed to compute embedding for {}", filename))
}

fn main() {
    let config = SpeakerEmbeddingExtractorConfig {
        model: Some("./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx".into()),
        num_threads: 1,
        debug: true,
        provider: Some("cpu".into()),
    };

    let extractor = SpeakerEmbeddingExtractor::create(&config)
        .expect("Failed to create SpeakerEmbeddingExtractor");
    let manager = SpeakerEmbeddingManager::create(extractor.dim())
        .expect("Failed to create SpeakerEmbeddingManager");

    let spk1 = vec![
        compute_embedding(&extractor, "./sr-data/enroll/fangjun-sr-1.wav"),
        compute_embedding(&extractor, "./sr-data/enroll/fangjun-sr-2.wav"),
        compute_embedding(&extractor, "./sr-data/enroll/fangjun-sr-3.wav"),
    ];
    let spk2 = vec![
        compute_embedding(&extractor, "./sr-data/enroll/leijun-sr-1.wav"),
        compute_embedding(&extractor, "./sr-data/enroll/leijun-sr-2.wav"),
    ];

    assert!(manager.add_list("fangjun", &spk1));
    assert!(manager.contains("fangjun"));

    let flattened_spk2: Vec<f32> = spk2.iter().flat_map(|v| v.iter().copied()).collect();
    assert!(manager.add_list_flattened("leijun", &flattened_spk2));
    assert!(manager.contains("leijun"));
    assert_eq!(manager.num_speakers(), 2);

    println!("Registered speakers: {:?}", manager.get_all_speakers());

    let v1 = compute_embedding(&extractor, "./sr-data/test/fangjun-test-sr-1.wav");
    let v2 = compute_embedding(&extractor, "./sr-data/test/leijun-test-sr-1.wav");
    let v3 = compute_embedding(&extractor, "./sr-data/test/liudehua-test-sr-1.wav");

    let threshold = 0.6;

    println!(
        "fangjun-test-sr-1.wav => {}",
        manager.search(&v1, threshold).unwrap_or_else(|| "unknown".to_string())
    );
    println!(
        "leijun-test-sr-1.wav => {}",
        manager.search(&v2, threshold).unwrap_or_else(|| "unknown".to_string())
    );
    println!(
        "liudehua-test-sr-1.wav => {}",
        manager.search(&v3, threshold).unwrap_or_else(|| "unknown".to_string())
    );

    let best_matches = manager.get_best_matches(&v1, threshold, 2);
    println!("Best matches for fangjun-test-sr-1.wav: {:?}", best_matches);

    println!("fangjun verification for v1: {}", manager.verify("fangjun", &v1, threshold));
    println!("fangjun verification for v2: {}", manager.verify("fangjun", &v2, threshold));

    assert!(manager.remove("fangjun"));
    println!("After removing fangjun: {:?}", manager.get_all_speakers());

    assert!(manager.remove("leijun"));
    println!("After removing leijun: {:?}", manager.get_all_speakers());
}


================================================
FILE: rust-api-examples/examples/spoken_language_identification.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use sherpa-onnx's Rust API for spoken language
// identification.
//
// See ../README.md for how to run it.

use clap::Parser;
use sherpa_onnx::{
    SpokenLanguageIdentification, SpokenLanguageIdentificationConfig,
    SpokenLanguageIdentificationWhisperConfig, Wave,
};
use std::time::Instant;

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    #[arg(long)]
    wav: String,

    #[arg(long)]
    whisper_encoder: String,

    #[arg(long)]
    whisper_decoder: String,

    #[arg(long, default_value_t = 0)]
    tail_paddings: i32,

    #[arg(long, default_value_t = 1)]
    num_threads: i32,

    #[arg(long, default_value = "cpu")]
    provider: String,

    #[arg(long, default_value_t = false)]
    debug: bool,
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();
    let wave = Wave::read(&args.wav).ok_or_else(|| anyhow::anyhow!("Failed to read WAV file"))?;
    let audio_duration = wave.num_samples() as f64 / wave.sample_rate() as f64;

    let config = SpokenLanguageIdentificationConfig {
        whisper: SpokenLanguageIdentificationWhisperConfig {
            encoder: Some(args.whisper_encoder),
            decoder: Some(args.whisper_decoder),
            tail_paddings: args.tail_paddings,
        },
        num_threads: args.num_threads,
        provider: Some(args.provider),
        debug: args.debug,
    };

    let slid = SpokenLanguageIdentification::create(&config)
        .ok_or_else(|| anyhow::anyhow!("Failed to create SpokenLanguageIdentification"))?;

    let stream = slid.create_stream();
    let start = Instant::now();
    stream.accept_waveform(wave.sample_rate(), wave.samples());
    let result = slid
        .compute(&stream)
        .ok_or_else(|| anyhow::anyhow!("Failed to compute spoken language identification result"))?;
    let elapsed = start.elapsed().as_secs_f64();

    println!("File: {}", args.wav);
    println!("Detected language: {}", result.lang);
    println!("Elapsed seconds: {:.3}", elapsed);
    println!("Audio duration in seconds: {:.3}", audio_duration);
    println!("RTF: {:.3}/{:.3} = {:.3}", elapsed, audio_duration, elapsed / audio_duration);

    Ok(())
}


================================================
FILE: rust-api-examples/examples/streaming_speech_enhancement_dpdfnet.rs
================================================
use clap::Parser;
use sherpa_onnx::{
    write, OfflineSpeechDenoiserDpdfNetModelConfig, OnlineSpeechDenoiser, OnlineSpeechDenoiserConfig,
    Wave,
};

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    #[arg(long)]
    model: String,

    #[arg(long)]
    input: String,

    #[arg(long)]
    output: String,
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();

    let config = OnlineSpeechDenoiserConfig {
        model: sherpa_onnx::OfflineSpeechDenoiserModelConfig {
            dpdfnet: OfflineSpeechDenoiserDpdfNetModelConfig {
                model: Some(args.model),
            },
            ..Default::default()
        },
    };

    let denoiser = OnlineSpeechDenoiser::create(&config)
        .ok_or_else(|| anyhow::anyhow!("Failed to create streaming DPDFNet denoiser"))?;
    let wave =
        Wave::read(&args.input).ok_or_else(|| anyhow::anyhow!("Failed to read {}", args.input))?;

    let frame_shift = denoiser.frame_shift_in_samples() as usize;
    let mut enhanced = Vec::new();

    for chunk in wave.samples().chunks(frame_shift.max(1)) {
        let audio = denoiser.run(chunk, wave.sample_rate());
        enhanced.extend_from_slice(&audio.samples);
    }

    enhanced.extend_from_slice(&denoiser.flush().samples);

    anyhow::ensure!(
        write(&args.output, &enhanced, denoiser.sample_rate()),
        "Failed to save {}",
        args.output
    );

    println!("Saved to {}", args.output);
    Ok(())
}


================================================
FILE: rust-api-examples/examples/streaming_speech_enhancement_gtcrn.rs
================================================
use clap::Parser;
use sherpa_onnx::{
    write, OfflineSpeechDenoiserGtcrnModelConfig, OnlineSpeechDenoiser, OnlineSpeechDenoiserConfig,
    Wave,
};

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    #[arg(long)]
    model: String,

    #[arg(long)]
    input: String,

    #[arg(long)]
    output: String,
}

fn main() -> anyhow::Result<()> {
    let args = Args::parse();

    let config = OnlineSpeechDenoiserConfig {
        model: sherpa_onnx::OfflineSpeechDenoiserModelConfig {
            gtcrn: OfflineSpeechDenoiserGtcrnModelConfig {
                model: Some(args.model),
            },
            ..Default::default()
        },
    };

    let denoiser = OnlineSpeechDenoiser::create(&config)
        .ok_or_else(|| anyhow::anyhow!("Failed to create streaming GTCRN denoiser"))?;
    let wave =
        Wave::read(&args.input).ok_or_else(|| anyhow::anyhow!("Failed to read {}", args.input))?;

    let frame_shift = denoiser.frame_shift_in_samples() as usize;
    let mut enhanced = Vec::new();

    for chunk in wave.samples().chunks(frame_shift.max(1)) {
        let audio = denoiser.run(chunk, wave.sample_rate());
        enhanced.extend_from_slice(&audio.samples);
    }

    enhanced.extend_from_slice(&denoiser.flush().samples);

    anyhow::ensure!(
        write(&args.output, &enhanced, denoiser.sample_rate()),
        "Failed to save {}",
        args.output
    );

    println!("Saved to {}", args.output);
    Ok(())
}


================================================
FILE: rust-api-examples/examples/streaming_zipformer.rs
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// This file demonstrates how to use streaming Zipformer with sherpa-onnx's
// Rust API for speech recognition.
//
// See ../README.md for how to run it
//
// Note that even if we use a wave file as an example, this model supports
// real-time streaming speech recognition.
// See ./streaming_zipformer_microphone.rs for how to do real-time
// streaming speech recognition from a microphone.

use clap::Parser;
use sherpa_onnx::{OnlineRecognizer, OnlineRecognizerConfig, Wave};

/// Simple streaming Zipformer example
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// Path to WAV file
    #[arg(long)]
    wav: String,

    /// Path to encoder ONNX model
    #[arg(long)]
    encoder: String,

    /// Path to decoder ONNX model
    #[arg(long)]
    decoder: String,

    /// Path to joiner ONNX model
    #[arg(long)]
    joiner: String,

    /// Path to tokens file
    #[arg(long)]
    tokens: String,

    /// Provider (default: cpu)
    #[arg(long, default_value = "cpu")]
    provider: String,

    /// Enable debug logs
    #[arg(long, default_value_t = false)]
    debug: bool,
}

fn main() {
    let args = Args::parse();

    let wave = Wave::read(&args.wav).expect("Failed to read WAV file");

    let mut recognizer_config = OnlineRecognizerConfig::default();
    recognizer_config.model_config.transducer.encoder = Some(args.encoder.clone());
    recognizer_config.model_config.transducer.decoder = Some(args.decoder.clone());
    recognizer_config.model_config.transducer.joiner = Some(args.joiner.clone());
    recognizer_config.model_config.tokens = Some(args.tokens.clone());
    recognizer_config.model_config.provider = Some(args.provider.clone());
    recognizer_config.enable_endpoint = true;
    recognizer_config.model_config.debug = args.debug;
    recognizer_config.decoding_method = Some("greedy_search".to_string());

    let recognizer =
        OnlineRecognizer::create(&recognizer_config).expect("Failed to create OnlineRecognizer");

    let stream = recognizer.create_stream();
    let mut segment_id = 0;

    // use any positive value as you like
    const CHUNK_SIZE: usize = 3200;

    println!(
        "Sample rate: {}, num samples: {}, duration: {:.2}s",
        wave.sample_rate(),
        wave.num_samples(),
        wave.num_samples() as f32 / wave.sample_rate() as f32
    );

    // Process in chunks
    for chunk in wave.samples().chunks(CHUNK_SIZE) {
        stream.accept_waveform(wave.sample_rate(), chunk);

        while recognizer.is_ready(&stream) {
            recognizer.decode(&stream);

            if let Some(result) = recognizer.get_result(&stream) {
                if !result.text.is_empty() {
                    println!("Segment {}: {}", segment_id, result.text);
                }
            }

            if recognizer.is_endpoint(&stream) {
                recognizer.reset(&stream);
                segment_id += 1;
            }
        }
    }

    // Tail padding (~0.3s)
    let tail_padding_len = (wave.sample_rate() as f32 * 0.3).round() as usize;
    let tail_padding = vec![0.0f32; tail_padding_len];
    stream.accept_waveform(wave.sample_rate(), &tail_padding);

    stream.input_finished();

    while recognizer.is_ready(&stream) {
        recognizer.decode(&stream);
        if let Some(result) = recognizer.get_result(&stream) {
            if !result.text.is_empty() {
                println!("Segment {}: {}", segment_id, result.text);
            }
        }
    }

    println!("Transcription finished.");
}


================================================
FILE: rust-api-examples/examples/streaming_zipformer_microphone.rs
================================================
// Copyright (c)  2026  Xiaomi Corporation
//
// This file demonstrates how to use streaming Zipformer with sherpa-onnx's
// Rust API for real-time streaming speech recognition with a microphone.
//
// See ../README.md for how to run it
//
// See ./streaming_zipformer.rs for how to recognize a wave file.

use anyhow::Result;
use clap::Parser;
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use cpal::SampleFormat;
use sherpa_onnx::{DisplayManager, OnlineRecognizer, OnlineRecognizerConfig};
use std::sync::mpsc;

/// Command-line arguments
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
    #[arg(long)]
    encoder: String,
    #[arg(long)]
    decoder: String,
    #[arg(long)]
    joiner: String,
    #[arg(long)]
    tokens: String,
    #[arg(long, default_value = "cpu")]
    provider: String,
    #[arg(long, default_value_t = false)]
    debug: bool,
    #[arg(long, default_value_t = 3200)]
    chunk_size: usize,
}

/// List input devices and return the default one
fn list_input_devices(host: &cpal::Host) -> Result<cpal::Device> {
    let default_input = host.default_input_device();
    let default_name = default_input.as_ref().map(|d| d.name().unwrap_or_default());

    println!("Available input devices:");
    for device in host.input_devices()? {
        let name = device.name().unwrap_or("<unknown>".to_string());
        let mark = if Some(&name) == default_name.as_ref() {
            "*"
        } else {
            " "
        };
        println!("{} {}", mark, name);
    }

    let device = default_input.ok_or_else(|| anyhow::anyhow!("No default input device"))?;

    println!("\nUsing default device: {}", device.name()?);
    Ok(device)
}

/// Create and configure the OnlineRecognizer
fn setup_recognizer(args: &Args) -> OnlineRecognizer {
    let mut config = OnlineRecognizerConfig::default();
    config.model_config.transducer.encoder = Some(args.encoder.clone());
    config.model_config.transducer.decoder = Some(args.decoder.clone());
    config.model_config.transducer.joiner = Some(args.joiner.clone());
    config.model_config.tokens = Some(args.tokens.clone());
    config.model_config.provider = Some(args.provider.clone());
    config.model_config.debug = args.debug;
    config.enable_endpoint = true;
    config.decoding_method = Some("greedy_search".to_string());

    OnlineRecognizer::create(&config).expect("Failed to create OnlineRecognizer")
}

/// Build the audio input stream (producer)
fn build_input_stream(device: &cpal::Device, tx: mpsc::Sender<Vec<f32>>) -> Result<cpal::Stream> {
    let supported = device.default_input_config()?;
    let config = supported.config();
    let sample_format = supported.sample_format();
    let channels = config.channels as usize;

    let err_fn = |err| eprintln!("Audio stream error: {:?}", err);

    println!(
        "Input format: {:?}, channels: {}, sample_rate: {}",
        sample_format, channels, config.sample_rate.0
    );

    let stream = match sample_format {
        SampleFormat::F32 => device.build_input_stream(
            &config,
            move |data: &[f32], _| {
                if data.is_empty() {
                    return;
                }

                let mono: Vec<f32> = data
                    .chunks(channels)
                    .map(|frame| {
                        let sum: f32 = frame.iter().copied().sum();
                        sum / channels as f32
                    })
                    .collect();
                let _ = tx.send(mono);
            },
            err_fn,
            None,
        )?,

        SampleFormat::I16 => device.build_input_stream(
            &config,
            move |data: &[i16], _| {
                if data.is_empty() {
                    return;
                }

                let mono: Vec<f32> = data
                    .chunks(channels)
                    .map(|frame| {
                        let sum: f32 = frame.iter().map(|&s| s as f32 / i16::MAX as f32).sum();
                        sum / channels as f32
                    })
                    .collect();

                let _ = tx.send(mono);
            },
            err_fn,
            None,
        )?,

        SampleFormat::U16 => device.build_input_stream(
            &config,
            move |data: &[u16], _| {
                if data.is_empty() {
                    return;
                }

                let mono: Vec<f32> = data
                    .chunks(channels)
                    .map(|frame| {
                        let sum: f32 = frame
                            .iter()
                            .map(|&s| {
                                let centered = s as f32 - 32768.0;
                                centered / 32768.0
                            })
                            .sum();
                        sum / channels as f32
                    })
                    .collect();

                let _ = tx.send(mono);
            },
            err_fn,
            None,
        )?,

        other => anyhow::bail!("Unsupported sample format: {:?}", other),
    };

    Ok(stream)
}

/// Main recognition loop (consumer)
fn run_recognition_loop(
    rx: mpsc::Receiver<Vec<f32>>,
    recognizer: &OnlineRecognizer,
    stream: &mut sherpa_onnx::OnlineStream,
    chunk_size: usize,
    sample_rate: i32,
) {
    let mut display = DisplayManager::new();
    let mut buffer = Vec::<f32>::new();

    loop {
        match rx.recv() {
            Ok(samples) => {
                buffer.extend_from_slice(&samples);
            }
            Err(_) => {
                println!("\nAudio stream closed. Exiting.");
                break;
            }
        }

        while buffer.len() >= chunk_size {
            let chunk: Vec<f32> = buffer.drain(..chunk_size).collect();
            stream.accept_waveform(sample_rate, &chunk);

            while recognizer.is_ready(&stream) {
                recognizer.decode(&stream);

                if let Some(result) = recognizer.get_result(&stream) {
                    let text = result.text;
                    if !text.is_empty() {
                        display.update_text(&text);
                    }
                }

                if recognizer.is_endpoint(&stream) {
                    if let Some(result) = recognizer.get_result(&stream) {
                        if !result.text.is_empty() {
                            display.finalize_sentence();
                        }
                    }
                    recognizer.reset(&stream);
                }
            }
        }

        display.render();
    }
}

fn main() -> Result<()> {
    let args = Args::parse();
    let host = cpal::default_host();

    let device = list_input_devices(&host)?;

    let supported = device.default_input_config()?;
    let sample_rate = supported.sample_rate().0 as i32;

    let recognizer = setup_recognizer(&args);
    let mut stream = recognizer.create_stream();

    let (tx, rx) = mpsc::channel::<Vec<f32>>();
    let audio_stream = build_input_stream(&device, tx)?;
    audio_stream.play()?;

    println!("Streaming microphone ASR... Press Ctrl+C to stop.");

    run_recognition_loop(rx, &recognizer, &mut stream, args.chunk_size, sample_rate);

    Ok(())
}


================================================
FILE: rust-api-examples/examples/supertonic_tts.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use Supertonic TTS with sherpa-onnx's Rust API
// for offline text-to-speech.

use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsSupertonicModelConfig,
};
use std::collections::HashMap;
use std::time::Instant;

fn main() {
    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            supertonic: OfflineTtsSupertonicModelConfig {
                duration_predictor: Some(
                    "./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx"
                        .into(),
                ),
                text_encoder: Some(
                    "./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx".into(),
                ),
                vector_estimator: Some(
                    "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx"
                        .into(),
                ),
                vocoder: Some(
                    "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx".into(),
                ),
                tts_json: Some("./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json".into()),
                unicode_indexer: Some(
                    "./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin".into(),
                ),
                voice_style: Some("./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin".into()),
            },
            num_threads: 2,
            debug: false, // set to true to see verbose logs
            ..Default::default()
        },
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let text = "Today as always, men fall into two groups: slaves and free men. Whoever \
        does not have two-thirds of his day for himself, is a slave, whatever \
        he may be: a statesman, a businessman, an official, or a scholar.";

    let mut extra = HashMap::new();
    extra.insert("lang".to_string(), serde_json::json!("en"));

    let gen_config = GenerationConfig {
        sid: 6,
        num_steps: 5,
        speed: 1.25,
        extra: Some(extra),
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    let filename = "./generated-supertonic-en-rust.wav";
    if audio.save(filename) {
        println!("Saved to: {}", filename);
    } else {
        eprintln!("Failed to save {}", filename);
    }
}


================================================
FILE: rust-api-examples/examples/version.rs
================================================
use sherpa_onnx;

fn main() {
    println!("Version : {}", sherpa_onnx::version());
    println!("Git SHA1: {}", sherpa_onnx::git_sha1());
    println!("Git date: {}", sherpa_onnx::git_date());
}


================================================
FILE: rust-api-examples/examples/vits_tts.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use a Piper VITS TTS model with sherpa-onnx's
// Rust API for offline text-to-speech.

use clap::Parser;
use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsVitsModelConfig,
};
use std::time::Instant;

#[derive(Parser, Debug)]
#[command(author, version, about)]
struct Args {
    /// Path to the VITS/Piper model
    #[arg(long)]
    model: String,

    /// Path to tokens.txt
    #[arg(long)]
    tokens: String,

    /// Path to espeak-ng-data
    #[arg(long)]
    data_dir: String,

    /// Input text to synthesize
    #[arg(long)]
    text: String,

    /// Output wave filename
    #[arg(long, default_value = "./generated-vits-rust.wav")]
    output: String,

    /// Speaker ID for multi-speaker models
    #[arg(long, default_value_t = 0)]
    sid: i32,

    /// Speech speed; larger means faster
    #[arg(long, default_value_t = 1.0)]
    speed: f32,

    /// Number of threads
    #[arg(long, default_value_t = 2)]
    num_threads: i32,

    /// Show debug logs from sherpa-onnx
    #[arg(long, default_value_t = false)]
    debug: bool,
}

fn main() {
    let args = Args::parse();

    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            vits: OfflineTtsVitsModelConfig {
                model: Some(args.model.clone()),
                tokens: Some(args.tokens.clone()),
                noise_scale: 0.667,
                noise_scale_w: 0.8,
                length_scale: 1.0,
                data_dir: Some(args.data_dir.clone()),
                ..Default::default()
            },
            num_threads: args.num_threads,
            debug: args.debug,
            ..Default::default()
        },
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let gen_config = GenerationConfig {
        sid: args.sid,
        speed: args.speed,
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            &args.text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    if audio.save(&args.output) {
        println!("Saved to: {}", args.output);
    } else {
        eprintln!("Failed to save {}", args.output);
    }
}


================================================
FILE: rust-api-examples/examples/zipvoice_tts.rs
================================================
// Copyright (c) 2026 Xiaomi Corporation
//
// This file demonstrates how to use ZipVoice TTS with sherpa-onnx's Rust API
// for offline zero-shot text-to-speech.

use sherpa_onnx::{
    GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsZipvoiceModelConfig, Wave,
};
use std::collections::HashMap;
use std::time::Instant;

fn main() {
    let config = OfflineTtsConfig {
        model: sherpa_onnx::OfflineTtsModelConfig {
            zipvoice: OfflineTtsZipvoiceModelConfig {
                tokens: Some("./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt".into()),
                encoder: Some(
                    "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx".into(),
                ),
                decoder: Some(
                    "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx".into(),
                ),
                vocoder: Some("./vocos_24khz.onnx".into()),
                data_dir: Some(
                    "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data".into(),
                ),
                lexicon: Some(
                    "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt".into(),
                ),
                feat_scale: 0.1,
                t_shift: 0.5,
                target_rms: 0.1,
                guidance_scale: 1.0,
            },
            num_threads: 2,
            debug: false,
            ..Default::default()
        },
        ..Default::default()
    };

    let tts = OfflineTts::create(&config).expect("Failed to create OfflineTts");

    println!("Sample rate: {}", tts.sample_rate());
    println!("Num speakers: {}", tts.num_speakers());

    let text = "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中.";
    let reference_text = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系.";
    let reference_audio_file =
        "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav";

    let wave = Wave::read(reference_audio_file).expect("Failed to read reference audio");

    let mut extra = HashMap::new();
    extra.insert("min_char_in_sentence".to_string(), serde_json::json!(10));

    let gen_config = GenerationConfig {
        speed: 1.0,
        reference_audio: Some(wave.samples().to_vec()),
        reference_sample_rate: wave.sample_rate(),
        reference_text: Some(reference_text.to_string()),
        num_steps: 4,
        extra: Some(extra),
        ..Default::default()
    };

    let start = Instant::now();

    let audio = tts
        .generate_with_config(
            text,
            &gen_config,
            Some(|_samples: &[f32], progress: f32| -> bool {
                println!("Progress: {:.1}%", progress * 100.0);
                true
            }),
        )
        .expect("Generation failed");

    let elapsed_seconds = start.elapsed().as_secs_f32();
    let duration = audio.samples().len() as f32 / audio.sample_rate() as f32;
    let rtf = elapsed_seconds / duration;

    println!("Number of threads: {}", config.model.num_threads);
    println!("Elapsed seconds: {:.3} s", elapsed_seconds);
    println!("Audio duration: {:.3} s", duration);
    println!(
        "Real-time factor (RTF): {:.3}/{:.3} = {:.3}",
        elapsed_seconds, duration, rtf
    );

    let filename = "./generated-zipvoice-zh-en-rust.wav";
    if audio.save(filename) {
        println!("Saved to: {}", filename);
    } else {
        eprintln!("Failed to save {}", filename);
    }
}


================================================
FILE: rust-api-examples/run-audio-tagging-ced.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
  tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
  rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2
fi

cargo run --example audio_tagging_ced


================================================
FILE: rust-api-examples/run-audio-tagging-zipformer.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
  rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2
fi

cargo run --example audio_tagging_zipformer


================================================
FILE: rust-api-examples/run-fire-red-asr-ctc.sh
================================================
#!/usr/bin/env bash
set -ex

# see
# https://k2-fsa.github.io/sherpa/onnx/FireRedAsr/pretrained.html
if [ ! -f ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx ]; then
  curl -SsL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

  tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  ls -lh sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25
fi

cargo run --example fire_red_asr_ctc -- \
    --wav ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav \
    --model ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx \
    --tokens ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt \
    --num-threads 2 \
    --debug


================================================
FILE: rust-api-examples/run-keyword-spotter.sh
================================================
#!/usr/bin/env bash
set -ex

repo=sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile
if [ ! -f ./$repo/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/$repo.tar.bz2
  tar xvf $repo.tar.bz2
  rm $repo.tar.bz2
fi

cargo run --example keyword_spotter --   --wav ./$repo/test_wavs/3.wav   --encoder ./$repo/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx   --decoder ./$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx   --joiner ./$repo/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx   --tokens ./$repo/tokens.txt   --keywords-file ./$repo/test_wavs/test_keywords.txt   --provider cpu   --num-threads 1


================================================
FILE: rust-api-examples/run-kitten-tts-en.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi

cargo run --example kitten_tts_en


================================================
FILE: rust-api-examples/run-kokoro-tts-en.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

cargo run --example kokoro_tts_en


================================================
FILE: rust-api-examples/run-kokoro-tts-zh-en.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

cargo run --example kokoro_tts_zh_en


================================================
FILE: rust-api-examples/run-matcha-tts-en.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

cargo run --example matcha_tts_en


================================================
FILE: rust-api-examples/run-matcha-tts-zh.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

cargo run --example matcha_tts_zh


================================================
FILE: rust-api-examples/run-moonshine-v2.sh
================================================
#!/usr/bin/env bash
set -ex

# see
# https://k2-fsa.github.io/sherpa/onnx/moonshine
if [ ! -f ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
fi

cargo run --example moonshine_v2 -- \
    --wav ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav \
    --encoder ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort \
    --decoder ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort \
    --tokens ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt \
    --num-threads 2


================================================
FILE: rust-api-examples/run-nemo-parakeet-en.sh
================================================
#!/usr/bin/env bash
set -ex

# See also
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english
if [ ! -f "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx" ]; then
    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
    tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
    rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
    ls -lh sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8
fi

# Run Rust Nemo Parakeet example
cargo run --example nemo_parakeet -- \
    --wav "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/test_wavs/0.wav" \
    --encoder "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx" \
    --decoder "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx" \
    --joiner "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx" \
    --tokens "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt" \
    --provider cpu \
    --num-threads 2 \
    --debug


================================================
FILE: rust-api-examples/run-offline-punctuation.sh
================================================
#!/usr/bin/env bash
set -ex

repo=sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8
if [ ! -f ./$repo/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/$repo.tar.bz2
  tar xvf $repo.tar.bz2
  rm $repo.tar.bz2
fi

cargo run --example offline_punctuation --   --ct-transformer ./$repo/model.int8.onnx   --provider cpu   --num-threads 1


================================================
FILE: rust-api-examples/run-offline-speaker-diarization.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi

if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./0-four-speakers-zh.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi

cargo run --example offline_speaker_diarization


================================================
FILE: rust-api-examples/run-offline-speech-enhancement-dpdfnet.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

cargo run --example offline_speech_enhancement_dpdfnet -- \
  --model ./dpdfnet_baseline.onnx \
  --input ./inp_16k.wav \
  --output ./enhanced-rust-dpdfnet.wav


================================================
FILE: rust-api-examples/run-offline-speech-enhancement-gtcrn.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

cargo run --example offline_speech_enhancement_gtcrn -- \
  --model ./gtcrn_simple.onnx \
  --input ./inp_16k.wav \
  --output ./enhanced-rust-gtcrn.wav


================================================
FILE: rust-api-examples/run-online-punctuation.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -d ./sherpa-onnx-online-punct-en-2024-08-06 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
fi

cargo run --example online_punctuation -- \
  --cnn-bilstm ./sherpa-onnx-online-punct-en-2024-08-06/model.onnx \
  --bpe-vocab ./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab


================================================
FILE: rust-api-examples/run-pocket-tts.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xvf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

cargo run --example pocket_tts


================================================
FILE: rust-api-examples/run-sense-voice.sh
================================================
#!/usr/bin/env bash
set -ex

# see
# https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-chinese-english-japanese-korean-cantonese
if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/model.int8.onnx ]; then
  curl -SsL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2

  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
  ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17
fi

cargo run --example sense_voice -- \
    --wav ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/test_wavs/en.wav \
    --model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/model.int8.onnx \
    --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/tokens.txt \
    --num-threads 2 \
    --debug


================================================
FILE: rust-api-examples/run-silero-vad-remove-silence.sh
================================================
#!/usr/bin/env bash
set -ex

# https://k2-fsa.github.io/sherpa/onnx/vad/silero-vad.html
if [ ! -f "./silero_vad.onnx" ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -f ./lei-jun-test.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi

cargo run --example silero_vad_remove_silence -- \
    --input ./lei-jun-test.wav \
    --output ./no-silence.wav \
    --silero-vad-model ./silero_vad.onnx


================================================
FILE: rust-api-examples/run-speaker-embedding-cosine-similarity.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./wespeaker_zh_cnceleb_resnet34.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx
fi

if [ ! -f ./fangjun-sr-1.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/fangjun-sr-1.wav
fi

if [ ! -f ./fangjun-sr-2.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/fangjun-sr-2.wav
fi

if [ ! -f ./leijun-sr-1.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/leijun-sr-1.wav
fi

cargo run --example speaker_embedding_cosine_similarity


================================================
FILE: rust-api-examples/run-speaker-embedding-extractor.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
fi

if [ ! -d ./sr-data ]; then
  git clone https://github.com/csukuangfj/sr-data
fi

cargo run --example speaker_embedding_extractor


================================================
FILE: rust-api-examples/run-speaker-embedding-manager.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
fi

if [ ! -d ./sr-data ]; then
  git clone https://github.com/csukuangfj/sr-data
fi

cargo run --example speaker_embedding_manager


================================================
FILE: rust-api-examples/run-spoken-language-identification.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.tar.bz2
  rm sherpa-onnx-whisper-tiny.tar.bz2
fi

if [ ! -f ./spoken-language-identification-test-wavs/en-english.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
  tar xvf spoken-language-identification-test-wavs.tar.bz2
  rm spoken-language-identification-test-wavs.tar.bz2
fi

cargo run --example spoken_language_identification --   --wav ./spoken-language-identification-test-wavs/de-german.wav   --whisper-encoder ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx   --whisper-decoder ./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx   --provider cpu   --num-threads 1


================================================
FILE: rust-api-examples/run-streaming-speech-enhancement-dpdfnet.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

cargo run --example streaming_speech_enhancement_dpdfnet -- \
  --model ./dpdfnet_baseline.onnx \
  --input ./inp_16k.wav \
  --output ./enhanced-rust-streaming-dpdfnet.wav


================================================
FILE: rust-api-examples/run-streaming-speech-enhancement-gtcrn.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

cargo run --example streaming_speech_enhancement_gtcrn -- \
  --model ./gtcrn_simple.onnx \
  --input ./inp_16k.wav \
  --output ./enhanced-rust-streaming-gtcrn.wav


================================================
FILE: rust-api-examples/run-streaming-zipformer-en.sh
================================================
#!/usr/bin/env bash
set -ex

# see
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-21-english
if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-06-21/encoder-epoch-99-avg-1.int8.onnx ]; then
  curl -SsL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-2023-06-21.tar.bz2

  tar xvf sherpa-onnx-streaming-zipformer-en-2023-06-21.tar.bz2
  rm sherpa-onnx-streaming-zipformer-en-2023-06-21.tar.bz2
  ls -lh sherpa-onnx-streaming-zipformer-en-2023-06-21
fi

cargo run --example streaming_zipformer -- \
    --wav sherpa-onnx-streaming-zipformer-en-2023-06-21/test_wavs/1.wav \
    --encoder sherpa-onnx-streaming-zipformer-en-2023-06-21/encoder-epoch-99-avg-1.int8.onnx \
    --decoder sherpa-onnx-streaming-zipformer-en-2023-06-21/decoder-epoch-99-avg-1.onnx \
    --joiner sherpa-onnx-streaming-zipformer-en-2023-06-21/joiner-epoch-99-avg-1.int8.onnx \
    --tokens sherpa-onnx-streaming-zipformer-en-2023-06-21/tokens.txt \
    --provider cpu \
    --debug


================================================
FILE: rust-api-examples/run-streaming-zipformer-microphone-zh-en.sh
================================================
#!/usr/bin/env bash
set -ex

# see
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ]; then
  curl -SsL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
fi

cargo run --example streaming_zipformer_microphone --features mic -- \
    --encoder sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
    --decoder sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
    --joiner sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
    --tokens sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
    --provider cpu \
    --debug


================================================
FILE: rust-api-examples/run-streaming-zipformer-zh-en.sh
================================================
#!/usr/bin/env bash
set -ex

# see
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ]; then
  curl -SsL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  ls -lh sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
fi

cargo run --example streaming_zipformer -- \
    --wav sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/2.wav \
    --encoder sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
    --decoder sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
    --joiner sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
    --tokens sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
    --provider cpu \
    --debug


================================================
FILE: rust-api-examples/run-supertonic-tts.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
fi

cargo run --example supertonic_tts


================================================
FILE: rust-api-examples/run-version.sh
================================================
#!/usr/bin/env bash
set -ex
cargo run --example version


================================================
FILE: rust-api-examples/run-vits-de.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -d ./vits-piper-de_DE-glados-high ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-de_DE-glados-high.tar.bz2
  tar xf vits-piper-de_DE-glados-high.tar.bz2
  rm vits-piper-de_DE-glados-high.tar.bz2
fi

cargo run --example vits_tts --   --model ./vits-piper-de_DE-glados-high/de_DE-glados-high.onnx   --tokens ./vits-piper-de_DE-glados-high/tokens.txt   --data-dir ./vits-piper-de_DE-glados-high/espeak-ng-data   --output ./generated-vits-de-rust.wav   --text "Alles hat ein Ende, nur die Wurst hat zwei."


================================================
FILE: rust-api-examples/run-vits-en.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -d ./vits-piper-en_US-amy-low ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  tar xf vits-piper-en_US-amy-low.tar.bz2
  rm vits-piper-en_US-amy-low.tar.bz2
fi

cargo run --example vits_tts --   --model ./vits-piper-en_US-amy-low/en_US-amy-low.onnx   --tokens ./vits-piper-en_US-amy-low/tokens.txt   --data-dir ./vits-piper-en_US-amy-low/espeak-ng-data   --output ./generated-vits-en-rust.wav   --text "Liliana, the most beautiful and lovely assistant of our team!"


================================================
FILE: rust-api-examples/run-zipformer-en.sh
================================================
#!/usr/bin/env bash
set -ex

# see also
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english
if [ ! -f "./icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04/data/lang_bpe_500/tokens.txt" ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04.tar.bz2

  tar xvf icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04.tar.bz2
  rm icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04.tar.bz2
  ls -lh icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04
fi

# Run Zipformer transducer
cargo run --example zipformer -- \
    --wav "./icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04/test_wavs/1089-134686-0001.wav" \
    --tokens=./icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04/data/lang_bpe_500/tokens.txt \
    --encoder=./icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04/exp/encoder-epoch-30-avg-4.int8.onnx \
    --decoder=./icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04/exp/decoder-epoch-30-avg-4.onnx \
    --joiner=./icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04/exp/joiner-epoch-30-avg-4.int8.onnx \
    --provider cpu \
    --num-threads 2 \
    --debug


================================================
FILE: rust-api-examples/run-zipformer-vi.sh
================================================
#!/usr/bin/env bash
set -ex

# see also
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-vi-30m-int8-2026-02-09-vietnamese
if [ ! -f "./sherpa-onnx-zipformer-vi-30M-int8-2026-02-09/encoder.int8.onnx" ]; then
    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-vi-30M-int8-2026-02-09.tar.bz2
    tar xvf sherpa-onnx-zipformer-vi-30M-int8-2026-02-09.tar.bz2
    rm sherpa-onnx-zipformer-vi-30M-int8-2026-02-09.tar.bz2
    ls -lh sherpa-onnx-zipformer-vi-30M-int8-2026-02-09
fi

# Run Zipformer transducer
cargo run --example zipformer -- \
    --wav "./sherpa-onnx-zipformer-vi-30M-int8-2026-02-09/test_wavs/0.wav" \
    --encoder "./sherpa-onnx-zipformer-vi-30M-int8-2026-02-09/encoder.int8.onnx" \
    --decoder "./sherpa-onnx-zipformer-vi-30M-int8-2026-02-09/decoder.onnx" \
    --joiner "./sherpa-onnx-zipformer-vi-30M-int8-2026-02-09/joiner.int8.onnx" \
    --tokens "./sherpa-onnx-zipformer-vi-30M-int8-2026-02-09/tokens.txt" \
    --provider cpu \
    --num-threads 2 \
    --debug


================================================
FILE: rust-api-examples/run-zipformer-zh-en.sh
================================================
#!/usr/bin/env bash
set -ex

# see also
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-zh-en-2023-11-22-chinese-english
if [ ! -f "./sherpa-onnx-zipformer-zh-en-2023-11-22/encoder-epoch-34-avg-19.int8.onnx" ]; then
    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
    tar xvf sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
    rm sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
    ls -lh sherpa-onnx-zipformer-zh-en-2023-11-22
fi

# Run Zipformer transducer
cargo run --example zipformer -- \
    --wav "./sherpa-onnx-zipformer-zh-en-2023-11-22/test_wavs/0.wav" \
    --encoder "./sherpa-onnx-zipformer-zh-en-2023-11-22/encoder-epoch-34-avg-19.int8.onnx" \
    --decoder "./sherpa-onnx-zipformer-zh-en-2023-11-22/decoder-epoch-34-avg-19.onnx" \
    --joiner "./sherpa-onnx-zipformer-zh-en-2023-11-22/joiner-epoch-34-avg-19.int8.onnx" \
    --tokens "./sherpa-onnx-zipformer-zh-en-2023-11-22/tokens.txt" \
    --provider cpu \
    --num-threads 2 \
    --debug


================================================
FILE: rust-api-examples/run-zipvoice-tts.sh
================================================
#!/usr/bin/env bash
set -ex

if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xvf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f ./vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

cargo run --example zipvoice_tts


================================================
FILE: scripts/3dspeaker/README.md
================================================
# Introduction

This directory contains scripts
about exporting models from https://github.com/alibaba-damo-academy/3D-Speaker
to `onnx` so that they can be used in `sherpa-onnx`.


================================================
FILE: scripts/3dspeaker/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2023-2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
import json
import os
import pathlib
import re
from typing import Dict

import onnx
import torch
from infer_sv import supports
from modelscope.hub.snapshot_download import snapshot_download
from speakerlab.utils.builder import dynamic_import


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        choices=[
            "speech_campplus_sv_en_voxceleb_16k",
            "speech_campplus_sv_zh-cn_16k-common",
            "speech_campplus_sv_zh_en_16k-common_advanced",
            "speech_eres2net_sv_en_voxceleb_16k",
            "speech_eres2net_sv_zh-cn_16k-common",
            "speech_eres2net_base_200k_sv_zh-cn_16k-common",
            "speech_eres2net_base_sv_zh-cn_3dspeaker_16k",
            "speech_eres2net_large_sv_zh-cn_3dspeaker_16k",
            "speech_eres2netv2_sv_zh-cn_16k-common",
        ],
    )
    return parser.parse_args()


@torch.no_grad()
def main():
    args = get_args()
    local_model_dir = "pretrained"
    model_id = f"iic/{args.model}"
    conf = supports[model_id]
    cache_dir = snapshot_download(
        model_id,
        revision=conf["revision"],
    )
    cache_dir = pathlib.Path(cache_dir)

    save_dir = os.path.join(local_model_dir, model_id.split("/")[1])
    save_dir = pathlib.Path(save_dir)
    save_dir.mkdir(exist_ok=True, parents=True)

    download_files = ["examples", conf["model_pt"]]
    for src in cache_dir.glob("*"):
        if re.search("|".join(download_files), src.name):
            dst = save_dir / src.name
            try:
                dst.unlink()
            except FileNotFoundError:
                pass
            dst.symlink_to(src)
    pretrained_model = save_dir / conf["model_pt"]
    pretrained_state = torch.load(pretrained_model, map_location="cpu")

    model = conf["model"]
    embedding_model = dynamic_import(model["obj"])(**model["args"])
    embedding_model.load_state_dict(pretrained_state)
    embedding_model.eval()

    with open(f"{cache_dir}/configuration.json") as f:
        json_config = json.loads(f.read())
        print(json_config)

    T = 100
    C = 80
    x = torch.rand(1, T, C)
    filename = f"{args.model}.onnx"
    torch.onnx.export(
        embedding_model,
        x,
        filename,
        opset_version=13,
        input_names=["x"],
        output_names=["embedding"],
        dynamic_axes={
            "x": {0: "N", 1: "T"},
            "embeddings": {0: "N"},
        },
    )

    # all models from 3d-speaker expect input samples in the range
    # [-1, 1]
    normalize_samples = 1

    # all models from 3d-speaker normalize the features by the global mean
    feature_normalize_type = "global-mean"
    sample_rate = json_config["model"]["model_config"]["sample_rate"]

    feat_dim = conf["model"]["args"]["feat_dim"]
    assert feat_dim == 80, feat_dim

    output_dim = conf["model"]["args"]["embedding_size"]

    if "zh-cn" in args.model:
        language = "Chinese"
    elif "zh_en" in args.model:
        language = "Chinese-English"
    elif "en" in args.model:
        language = "English"
    else:
        raise ValueError(f"Unsupported language for model {args.model}")

    comment = f"This model is from iic/{args.model}"
    url = f"https://www.modelscope.cn/models/iic/{args.model}/summary"

    meta_data = {
        "framework": "3d-speaker",
        "language": language,
        "url": url,
        "comment": comment,
        "sample_rate": sample_rate,
        "output_dim": output_dim,
        "normalize_samples": normalize_samples,
        "feature_normalize_type": feature_normalize_type,
    }
    print(meta_data)
    add_meta_data(filename=filename, meta_data=meta_data)


main()


================================================
FILE: scripts/3dspeaker/test-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2023-2024  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
This script computes speaker similarity score in the range [0-1]
of two wave files using a speaker embedding model.
"""
import argparse
import wave
from pathlib import Path

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
from numpy.linalg import norm


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the input onnx model. Example value: model.onnx",
    )

    parser.add_argument(
        "--file1",
        type=str,
        required=True,
        help="Input wave 1",
    )

    parser.add_argument(
        "--file2",
        type=str,
        required=True,
        help="Input wave 2",
    )

    return parser.parse_args()


def read_wavefile(filename, expected_sample_rate: int = 16000) -> np.ndarray:
    """
    Args:
      filename:
        Path to a wave file, which must be of 16-bit and 16kHz.
     expected_sample_rate:
       Expected sample rate of the wave file.
    Returns:
      Return a 1-D float32 array containing audio samples. Each sample is in
      the range [-1, 1].
    """
    filename = str(filename)
    with wave.open(filename) as f:
        wave_file_sample_rate = f.getframerate()
        assert wave_file_sample_rate == expected_sample_rate, (
            wave_file_sample_rate,
            expected_sample_rate,
        )

        num_channels = f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_int16 = samples_int16.reshape(-1, num_channels)[:, 0]
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768

        return samples_float32


def compute_features(samples: np.ndarray, sample_rate: int) -> np.ndarray:
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.samp_freq = sample_rate
    opts.frame_opts.snip_edges = True

    opts.mel_opts.num_bins = 80
    opts.mel_opts.debug_mel = False

    fbank = knf.OnlineFbank(opts)
    fbank.accept_waveform(sample_rate, samples)
    fbank.input_finished()

    features = []
    for i in range(fbank.num_frames_ready):
        f = fbank.get_frame(i)
        features.append(f)
    features = np.stack(features, axis=0)

    return features


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
        )

        meta = self.model.get_modelmeta().custom_metadata_map
        self.normalize_samples = int(meta["normalize_samples"])
        self.sample_rate = int(meta["sample_rate"])
        self.output_dim = int(meta["output_dim"])
        self.feature_normalize_type = meta["feature_normalize_type"]

    def __call__(self, x: np.ndarray) -> np.ndarray:
        """
        Args:
          x:
            A 2-D float32 tensor of shape (T, C).
          y:
            A 1-D float32 tensor containing model output.
        """
        x = np.expand_dims(x, axis=0)

        return self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )[0][0]


def main():
    args = get_args()
    print(args)
    filename = Path(args.model)
    file1 = Path(args.file1)
    file2 = Path(args.file2)
    assert filename.is_file(), filename
    assert file1.is_file(), file1
    assert file2.is_file(), file2

    model = OnnxModel(filename)
    wave1 = read_wavefile(file1, model.sample_rate)
    wave2 = read_wavefile(file2, model.sample_rate)

    if not model.normalize_samples:
        wave1 = wave1 * 32768
        wave2 = wave2 * 32768

    features1 = compute_features(wave1, model.sample_rate)
    features2 = compute_features(wave2, model.sample_rate)

    if model.feature_normalize_type == "global-mean":
        features1 -= features1.mean(axis=0, keepdims=True)
        features2 -= features2.mean(axis=0, keepdims=True)

    output1 = model(features1)
    output2 = model(features2)

    similarity = np.dot(output1, output2) / (norm(output1) * norm(output2))
    print(f"similarity in the range [0-1]: {similarity}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/.gitignore
================================================
build-apk-tts.sh
!*.sh.in


================================================
FILE: scripts/apk/README.md
================================================
# Introduction

This folder contains scripts for building Android APKs.


================================================
FILE: scripts/apk/build-apk-asr-2pass.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building streaming ASR two-pass APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

export SHERPA_ONNX_ENABLE_TTS=OFF
export SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=OFF

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

mkdir -p apks

{% for first, second in model_list %}
pushd ./android/SherpaOnnx2Pass/app/src/main/assets/

model_name1={{ first.model_name }}
model_name=$model_name1
type1={{ first.idx }}
lang1={{ first.lang }}
short_name1={{ first.short_name }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name1}.tar.bz2
tar xvf ${model_name1}.tar.bz2

{{ first.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name1

model_name2={{ second.model_name }}
model_name=$model_name2
type2={{ second.idx }}
lang2={{ second.lang }}
short_name2={{ second.short_name }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name2}.tar.bz2
tar xvf ${model_name2}.tar.bz2

{{ second.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name2

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"firstType = 9/firstType = $type1/" ./MainActivity.kt
sed -i.bak s/"secondType = 0/secondType = $type2/" ./MainActivity.kt

{% if first.rule_fsts %}
  rule_fsts={{ first.rule_fsts }}
  sed -i.bak s%"firstRuleFsts = null"%"firstRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}

{% if second.rule_fsts %}
  rule_fsts={{ second.rule_fsts }}
  sed -i.bak s%"secondRuleFsts = null"%"secondRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}

git diff
popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build ASR apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnx2Pass/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnx2Pass
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnx2Pass/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-asr_2pass-$lang1-${short_name1}_${short_name2}.apk
  ls -lh apks
  rm -v ./android/SherpaOnnx2Pass/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnx2Pass/app/src/main/assets/$model_name1
rm -rf ./android/SherpaOnnx2Pass/app/src/main/assets/$model_name2
{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-asr.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building streaming ASR APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

export SHERPA_ONNX_ENABLE_TTS=OFF
export SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=OFF

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

mkdir -p apks

{% for model in model_list %}
pushd ./android/SherpaOnnx/app/src/main/assets/
model_name={{ model.model_name }}
type={{ model.idx }}
lang={{ model.lang }}
short_name={{ model.short_name }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}

git diff
popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build ASR apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnx/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnx
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnx/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-asr-$lang-$short_name.apk
  ls -lh apks
  rm -v ./android/SherpaOnnx/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name
rm -rf ./android/SherpaOnnx/app/src/main/assets/*.fst
{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-audio-tagging-wearos.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building audio tagging WearOS APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

export SHERPA_ONNX_ENABLE_TTS=OFF
export SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=OFF

mkdir -p apks

{% for model in model_list %}
pushd ./android/SherpaOnnxAudioTaggingWearOs/app/src/main/assets/
model_name={{ model.model_name }}
short_name={{ model.short_name }}
type={{ model.idx }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2
rm -rfv $model_name/model.onnx
rm -rfv $model_name/test_wavs
rm -rf  *.tar.bz2
ls -lh $model_name

popd
# Now we are at the project root directory

git checkout .
# Tagger.kt is a symlink file, so we use SherpaOnnxAudioTagging here instead of SherpaOnnxAudioTaggingWearOs
pushd android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/
sed -i.bak s/"type = 0/type = $type/" ./Tagger.kt
git diff
popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build audio tagging apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxAudioTaggingWearOs/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxAudioTaggingWearOs
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnxAudioTaggingWearOs/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-audio-tagging-$short_name-wearos.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxAudioTaggingWearOs/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxAudioTaggingWearOs/app/src/main/assets/$model_name
{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-audio-tagging.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building audio tagging APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

export SHERPA_ONNX_ENABLE_TTS=OFF
export SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=OFF

mkdir -p apks

{% for model in model_list %}
pushd ./android/SherpaOnnxAudioTagging/app/src/main/assets/
model_name={{ model.model_name }}
short_name={{ model.short_name }}
type={{ model.idx }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2
rm -rfv $model_name/model.onnx
rm -rfv $model_name/test_wavs
rm -rf  *.tar.bz2
ls -lh $model_name

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/
sed -i.bak s/"type = 0/type = $type/" ./Tagger.kt
git diff
popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build audio tagging apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxAudioTagging/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxAudioTagging
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnxAudioTagging/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-audio-tagging-$short_name.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxAudioTagging/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxAudioTagging/app/src/main/assets/$model_name
{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-qnn-vad-asr-simulate-streaming.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building simulated-streaming VAD + ASR APK + QNN for sherpa-onnx v${SHERPA_ONNX_VERSION}"

export SHERPA_ONNX_ENABLE_TTS=OFF

export SHERPA_ONNX_ENABLE_QNN=ON

log "Download qnn header files"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models-qnn/qnn-include-2.40.0.251030.tar.bz2
tar xf qnn-include-2.40.0.251030.tar.bz2
rm qnn-include-2.40.0.251030.tar.bz2
ls -lh qnn-include-2.40.0.251030

export QNN_SDK_ROOT=$PWD/qnn-include-2.40.0.251030

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh

cp -v ./build-android-arm64-v8a/install/lib/*.so ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/arm64-v8a/

log "=======Download qnn libs============"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models-qnn/qnn-libs-2.40.0.251030.tar.bz2
tar xvf qnn-libs-2.40.0.251030.tar.bz2
rm qnn-libs-2.40.0.251030.tar.bz2
cp -v qnn-libs-2.40.0.251030/*.so ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/arm64-v8a/

rm -rf qnn-libs-2.40.0.251030

ls -lh ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/arm64-v8a/

mkdir -p apks

{% for model in model_list %}
pushd ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/
model_name={{ model.model_name }}-android-aarch64
type={{ model.idx }}
lang={{ model.lang }}
short_name={{ model.short_name }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models-qnn/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{% if model.use_hr %}
  if [ ! -f lexicon.txt ]; then
    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  fi

  if [ ! -f replace.fst ]; then
    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  fi
{% endif %}

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

popd
# Now we are at the project root directory

git checkout .

pushd android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/screens
sed -i.bak s/"asrModelType = 15/asrModelType = $type/" ./Home.kt
popd

pushd android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr

{% if model.use_hr %}
  sed -i.bak s/"useHr = false/useHr = true/" ./SimulateStreamingAsr.kt
{% endif %}

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak s%"asrRuleFsts = null"%"asrRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}

git diff
popd

for arch in arm64-v8a; do
  log "------------------------------------------------------------"
  log "build simulated-streaming ASR apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  pushd ./android/SherpaOnnxSimulateStreamingAsr
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnxSimulateStreamingAsr/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-qnn-$arch-simulated_streaming_asr-$lang-$short_name.apk
  ls -lh apks
done

rm -rf ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/$model_name
rm -rf ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/lexicon.txt
rm -rf ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/replace.fst

{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-slid.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building spoken language identification APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

export SHERPA_ONNX_ENABLE_TTS=OFF
export SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION=OFF

mkdir -p apks

{% for model in model_list %}
pushd ./android/SherpaOnnxSpokenLanguageIdentification/app/src/main/assets/
model_name={{ model.model_name }}
short_name={{ model.short_name }}
type={{ model.idx }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2
rm -rfv $model_name/*-encoder.onnx
rm -rfv $model_name/*-decoder.onnx
rm -rfv $model_name/*.py
rm -rfv $model_name/*.txt
rm -rfv $model_name/*.md
rm -rfv $model_name/test_wavs
rm -rf  *.tar.bz2
ls -lh $model_name

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/
sed -i.bak s/"type = 0/type = $type/" ./slid.kt
git diff
popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build spoken language identification apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxSpokenLanguageIdentification
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnxSpokenLanguageIdentification/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-slid-$short_name.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxSpokenLanguageIdentification/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxSpokenLanguageIdentification/app/src/main/assets/$model_name
{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-speaker-diarization.sh.in
================================================
#!/usr/bin/env bash
#
# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building Speaker identification APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

export SHERPA_ONNX_ENABLE_TTS=OFF

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

mkdir -p apks

{% for model in model_list %}

pushd ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/

ls -lh

segmentation_model_name={{ model.segmentation.model_name }}
segmentation_short_name={{ model.segmentation.short_name }}

embedding_model_name={{ model.embedding.model_name }}
embedding_short_name={{ model.embedding.short_name }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/$segmentation_model_name.tar.bz2
tar xvf $segmentation_model_name.tar.bz2
rm $segmentation_model_name.tar.bz2
mv $segmentation_model_name/model.onnx segmentation.onnx
rm -rf $segmentation_model_name

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$embedding_model_name.onnx
mv $embedding_model_name.onnx embedding.onnx

echo "pwd: $PWD"
ls -lh

popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build speaker diarization apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxSpeakerDiarization
  ./gradlew build
  popd

  mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-speaker-diarization-$segmentation_short_name-$embedding_short_name.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/*.onnx

{% endfor %}

ls -lh apks


================================================
FILE: scripts/apk/build-apk-speaker-identification.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building Speaker identification APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

export SHERPA_ONNX_ENABLE_TTS=OFF

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

mkdir -p apks

{% for model in model_list %}
pushd ./android/SherpaOnnxSpeakerIdentification/app/src/main/assets/
model_name={{ model.model_name }}
short_name={{ model.short_name }}
lang={{ model.lang }}
framework={{ model.framework }}

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$model_name

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/
sed -i.bak s/"private val modelName.*/private val modelName = \"$model_name\"/" ./Speaker.kt
git diff
popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build speaker identification apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxSpeakerIdentification
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew build
  popd

  mv android/SherpaOnnxSpeakerIdentification/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-$lang-speaker-identification-$framework-$short_name.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxSpeakerIdentification/app/src/main/assets/$model_name
{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-tts-engine.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building TTS engine APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

export SHERPA_ONNX_ENABLE_TTS=ON

mkdir -p apks

{% for tts_model in tts_model_list %}
pushd ./android/SherpaOnnxTtsEngine/app/src/main/assets/
model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
acoustic_model_name={{ tts_model.acoustic_model_name }}
vocoder={{ tts_model.vocoder }}
voices={{ tts_model.voices }}
lang={{ tts_model.lang }}
lang_iso_639_3={{ tts_model.lang_iso_639_3 }}
lang_iso_639_3_2={{ tts_model.lang_iso_639_3_2 }}

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2

{% if tts_model.vocoder %}
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
{% endif %}

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt
sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt

{% if tts_model.lang2 %}
  sed -i.bak s/"lang2 = null"/"lang2 = \"$lang_iso_639_3_2\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.is_kitten %}
  sed -i.bak s/"isKitten = false"/"isKitten = true"/ ./TtsEngine.kt
{% endif %}

{% if tts_model.model_name %}
  sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.acoustic_model_name %}
  sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.vocoder %}
  sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.voices %}
  sed -i.bak s/"voices = null"/"voices = \"$voices\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.rule_fsts %}
  rule_fsts={{ tts_model.rule_fsts }}
  sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
{% endif %}

{% if tts_model.rule_fars %}
  rule_fars={{ tts_model.rule_fars }}
  sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt
{% endif %}

{% if tts_model.data_dir %}
  data_dir={{ tts_model.data_dir }}
  sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt
{% elif not tts_model.is_char %}
  sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.lexicon %}
  lexicon={{ tts_model.lexicon }}
  sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./TtsEngine.kt
{% endif %}

git diff
popd

if [[ $model_dir == vits-melo-tts-zh_en ]]; then
  lang=zh_en
fi

if [[ $model_dir == matcha-icefall-zh-en ]]; then
  lang=zh_en
fi

if [[ $model_dir == kokoro-multi-lang-v1_0 || $model_dir == kokoro-multi-lang-v1_1 || $model_dir == kokoro-int8-multi-lang-v1_1 ]]; then
  lang=zh_en
fi

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build tts apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxTtsEngine/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxTtsEngine
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnxTtsEngine/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-$lang-tts-engine-$model_dir.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxTtsEngine/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxTtsEngine/app/src/main/assets/$model_dir
rm -fv ./android/SherpaOnnxTtsEngine/app/src/main/assets/*.onnx
{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-tts.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building TTS APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

export SHERPA_ONNX_ENABLE_TTS=ON

mkdir -p apks

{% for tts_model in tts_model_list %}
pushd ./android/SherpaOnnxTts/app/src/main/assets/
model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
acoustic_model_name={{ tts_model.acoustic_model_name }}
vocoder={{ tts_model.vocoder }}
voices={{ tts_model.voices }}
lang={{ tts_model.lang }}

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2

{% if tts_model.vocoder %}
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
{% endif %}

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt


{% if tts_model.model_name %}
  sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.acoustic_model_name %}
  sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.vocoder %}
  sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.voices %}
  sed -i.bak s/"voices = null"/"voices = \"$voices\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.rule_fsts %}
  rule_fsts={{ tts_model.rule_fsts }}
  sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}

{% if tts_model.rule_fars %}
  rule_fars={{ tts_model.rule_fars }}
  sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt
{% endif %}

{% if tts_model.data_dir %}
  data_dir={{ tts_model.data_dir }}
  sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt
{% elif not tts_model.is_char %}
  sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.lexicon %}
  lexicon={{ tts_model.lexicon }}
  sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt
{% endif %}

{% if tts_model.is_kitten %}
  sed -i.bak s/"isKitten = false"/"isKitten = true"/ ./MainActivity.kt
{% endif %}

git diff
popd

if [[ $model_dir == vits-melo-tts-zh_en ]]; then
  lang=zh_en
fi

if [[ $model_dir == matcha-icefall-zh-en ]]; then
  lang=zh_en
fi

if [[ $model_dir == kokoro-multi-lang-v1_0 || $model_dir == kokoro-multi-lang-v1_1 || $model_dir == kokoro-int8-multi-lang-v1_1 ]]; then
  lang=zh_en
fi

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build tts apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxTts
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnxTts/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-$lang-tts-$model_dir.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxTts/app/src/main/assets/$model_dir
rm -fv ./android/SherpaOnnxTts/app/src/main/assets/*.onnx

{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-vad-asr-simulate-streaming.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building simulated-streaming VAD + ASR APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

export SHERPA_ONNX_ENABLE_TTS=OFF

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

mkdir -p apks

{% for model in model_list %}
pushd ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/
model_name={{ model.model_name }}
type={{ model.idx }}
lang={{ model.lang }}
short_name={{ model.short_name }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{% if model.use_hr %}
  if [ ! -f lexicon.txt ]; then
    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  fi

  if [ ! -f replace.fst ]; then
    curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  fi
{% endif %}

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

popd
# Now we are at the project root directory

git checkout .

pushd android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr/screens
sed -i.bak s/"asrModelType = 15/asrModelType = $type/" ./Home.kt
popd

pushd android/SherpaOnnxSimulateStreamingAsr/app/src/main/java/com/k2fsa/sherpa/onnx/simulate/streaming/asr

{% if model.use_hr %}
  sed -i.bak s/"useHr = false/useHr = true/" ./SimulateStreamingAsr.kt
{% endif %}

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak s%"asrRuleFsts = null"%"asrRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}

git diff
popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build simulated-streaming ASR apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxSimulateStreamingAsr
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnxSimulateStreamingAsr/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-simulated_streaming_asr-$lang-$short_name.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/$model_name
rm -rf ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/lexicon.txt
rm -rf ./android/SherpaOnnxSimulateStreamingAsr/app/src/main/assets/replace.fst

{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/build-apk-vad-asr.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable ANDROID_NDK
# before running this script

# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building streaming VAD + ASR APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"

export SHERPA_ONNX_ENABLE_TTS=OFF

log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh

mkdir -p apks

{% for model in model_list %}
pushd ./android/SherpaOnnxVadAsr/app/src/main/assets/
model_name={{ model.model_name }}
type={{ model.idx }}
lang={{ model.lang }}
short_name={{ model.short_name }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"asrModelType = 0/asrModelType = $type/" ./MainActivity.kt

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak s%"asrRuleFsts = null"%"asrRuleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}

git diff
popd

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  log "------------------------------------------------------------"
  log "build ASR apk for $arch"
  log "------------------------------------------------------------"
  src_arch=$arch
  if [ $arch == "armeabi-v7a" ]; then
    src_arch=armv7-eabi
  elif [ $arch == "x86_64" ]; then
    src_arch=x86-64
  fi

  ls -lh ./build-android-$src_arch/install/lib/*.so

  cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxVadAsr/app/src/main/jniLibs/$arch/

  pushd ./android/SherpaOnnxVadAsr
  sed -i.bak s/2048/9012/g ./gradle.properties
  git diff ./gradle.properties
  ./gradlew assembleRelease
  popd

  mv android/SherpaOnnxVadAsr/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-vad_asr-$lang-$short_name.apk
  ls -lh apks
  rm -v ./android/SherpaOnnxVadAsr/app/src/main/jniLibs/$arch/*.so
done

rm -rf ./android/SherpaOnnxVadAsr/app/src/main/assets/$model_name
{% endfor %}

git checkout .

ls -lh apks/


================================================
FILE: scripts/apk/generate-asr-2pass-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    # We will download
    # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
    model_name: str

    # The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code
    idx: int

    # e.g., zh, en, zh_en
    lang: str

    # e.g., whisper, paraformer, zipformer
    short_name: str = ""

    # cmd is used to remove extra file from the model directory
    cmd: str = ""
    rule_fsts: str = ""


def get_2nd_models():
    models = [
        Model(
            model_name="sherpa-onnx-whisper-tiny.en",
            idx=2,
            lang="en",
            short_name="whisper_tiny",
            cmd="""
            pushd $model_name
            rm -fv tiny.en-encoder.onnx
            rm -fv tiny.en-decoder.onnx
            rm -rf test_wavs
            rm -fv *.py
            rm -fv requirements.txt
            rm -fv .gitignore
            rm -fv README.md

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-2023-09-14",
            idx=0,
            lang="zh",
            short_name="paraformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name

            rm -fv README.md
            rm -rfv test_wavs
            rm -fv model.onnx

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-wenetspeech-20230615",
            idx=4,
            lang="zh",
            short_name="zipformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name

            rm -rfv test_wavs
            rm -fv README.md
            mv -v data/lang_char/tokens.txt ./
            rm -rfv data/lang_char

            mv -v exp/encoder-epoch-12-avg-4.int8.onnx ./
            mv -v exp/decoder-epoch-12-avg-4.onnx ./
            mv -v exp/joiner-epoch-12-avg-4.int8.onnx ./
            rm -rfv exp

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17",
            idx=15,
            lang="zh_en_ko_ja_yue",
            short_name="sense_voice_2024_07_17_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -fv *.py

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-en-int8",
            idx=21,
            lang="en",
            short_name="moonshine_tiny_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-en-int8",
            idx=22,
            lang="en",
            short_name="moonshine_base_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
            idx=25,
            lang="multi_lang",
            short_name="dolphin_base_ctc",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09",
            idx=41,
            lang="zh_en_ko_ja_yue",
            short_name="sense_voice_2025_09_09_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -fv *.py

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
            idx=42,
            lang="zh_en_yue",
            short_name="wenetspeech_yue_u2pconformer_ctc_2025_09_10_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
    ]
    return models


def get_1st_models():
    # See as ./generate-asr-apk-script.py
    models = [
        Model(
            model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
            idx=8,
            lang="bilingual_zh_en",
            short_name="zipformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            rm -fv *.sh
            rm -fv bpe.model
            rm -fv README.md
            rm -fv .gitattributes
            rm -fv *state*
            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-2023-06-26",
            idx=6,
            lang="en",
            short_name="zipformer2",
            cmd="""
            pushd $model_name
            rm -fv encoder-epoch-99-avg-1-chunk-16-left-128.onnx
            rm -fv decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
            rm -fv joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx

            rm -fv README.md
            rm -fv bpe.model
            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-streaming-wenetspeech-20230615",
            idx=3,
            lang="zh",
            short_name="zipformer2",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
            rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
            rm -fv exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx

            rm -fv data/lang_char/lexicon.txt
            rm -fv data/lang_char/words.txt
            rm -rfv test_wavs
            rm -fv README.md

            ls -lh exp/
            ls -lh data/lang_char

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-fr-2023-04-14",
            idx=7,
            lang="fr",
            short_name="zipformer",
            cmd="""
            pushd $model_name
            rm -fv encoder-epoch-29-avg-9-with-averaged-model.onnx
            rm -fv decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
            rm -fv joiner-epoch-29-avg-9-with-averaged-model.int8.onnx

            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
            idx=9,
            lang="zh",
            short_name="small_zipformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            idx=10,
            lang="en",
            short_name="small_zipformer",
            cmd="""
            pushd $model_name
            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01",
            idx=15,
            lang="zh",
            short_name="int8_small_zipformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -f bpe.model

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01",
            idx=16,
            lang="zh",
            short_name="small_zipformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -f bpe.model

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
    ]

    return models


def get_models():
    first = get_1st_models()
    second = get_2nd_models()

    combinations = []

    first_zh = [
        "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
        "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01",
        "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01",
    ]

    second_zh = [
        "sherpa-onnx-paraformer-zh-2023-09-14",
        "icefall-asr-zipformer-wenetspeech-20230615",
        "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17",
        "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09",
        "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
        "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
    ]
    for first_m in first_zh:
        for second_m in second_zh:
            combinations.append((first_m, second_m))

    combinations += [
        (
            "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            "sherpa-onnx-whisper-tiny.en",
        ),
        (
            "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            "sherpa-onnx-moonshine-tiny-en-int8",
        ),
        (
            "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            "sherpa-onnx-moonshine-base-en-int8",
        ),
        (
            "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17",
        ),
        (
            "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09",
        ),
        (
            "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
        ),
    ]
    models = []
    for f, s in combinations:
        t = []
        for m in first:
            if m.model_name == f:
                t.append(m)
                break
        assert len(t) == 1, (f, s, first, second)

        for m in second:
            if m.model_name == s:
                t.append(m)
                break
        assert len(t) == 2, (f, s, first, second)

        models.append(t)

    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./build-apk-asr-2pass.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/generate-asr-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    # We will download
    # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
    model_name: str

    # The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code
    idx: int

    # e.g., zh, en, zh_en
    lang: str

    # e.g., whisper, paraformer, zipformer
    short_name: str = ""

    # cmd is used to remove extra file from the model directory
    cmd: str = ""

    rule_fsts: str = ""


def get_models():
    models = [
        Model(
            model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
            idx=8,
            lang="bilingual_zh_en",
            short_name="zipformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            rm -fv *.sh
            rm -fv bpe.model
            rm -fv README.md
            rm -fv .gitattributes
            rm -fv *state*
            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-2023-06-26",
            idx=6,
            lang="en",
            short_name="zipformer2",
            cmd="""
            pushd $model_name
            rm -fv encoder-epoch-99-avg-1-chunk-16-left-128.onnx
            rm -fv decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
            rm -fv joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx

            rm -fv README.md
            rm -fv bpe.model
            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-streaming-wenetspeech-20230615",
            idx=3,
            lang="zh",
            short_name="zipformer2",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
            rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
            rm -fv exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx

            rm -fv data/lang_char/lexicon.txt
            rm -fv data/lang_char/words.txt
            rm -rfv test_wavs
            rm -fv README.md

            ls -lh exp/
            ls -lh data/lang_char

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-fr-2023-04-14",
            idx=7,
            lang="fr",
            short_name="zipformer",
            cmd="""
            pushd $model_name
            rm -fv encoder-epoch-29-avg-9-with-averaged-model.onnx
            rm -fv decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
            rm -fv joiner-epoch-29-avg-9-with-averaged-model.int8.onnx

            rm -fv *.sh
            rm -rfv test_wavs
            rm README.md

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
            idx=9,
            lang="zh",
            short_name="small_zipformer_14M_2023_02_23",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            idx=10,
            lang="en",
            short_name="small_zipformer_20M_2023_02_17",
            cmd="""
            pushd $model_name
            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms",
            idx=11,
            lang="en",
            short_name="nemo_ctc_80ms",
            cmd="""
            pushd $model_name
            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms",
            idx=12,
            lang="en",
            short_name="nemo_ctc_480ms",
            cmd="""
            pushd $model_name
            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms",
            idx=13,
            lang="en",
            short_name="nemo_ctc_1040ms",
            cmd="""
            pushd $model_name
            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-korean-2024-06-16",
            idx=14,
            lang="ko",
            short_name="zipformer",
            cmd="""
            pushd $model_name
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            rm -fv bpe.model
            rm -fv README.md
            rm -fv .gitattributes
            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-paraformer-bilingual-zh-en",
            idx=5,
            lang="zh_en",
            short_name="paraformer",
            cmd="""
            pushd $model_name
            rm -fv decoder.onnx
            rm -fv encoder.onnx

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01",
            idx=15,
            lang="zh",
            short_name="int8_small_zipformer_2025_04_01",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -f bpe.model

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01",
            idx=16,
            lang="zh",
            short_name="small_zipformer_2025_04_01",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -f bpe.model

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30",
            idx=17,
            lang="zh",
            short_name="large_zipformer_int8",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv bpe.model

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30",
            idx=18,
            lang="zh",
            short_name="large_zipformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv bpe.model

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30",
            idx=19,
            lang="zh",
            short_name="large_zipformer_fp16",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv bpe.model

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30",
            idx=20,
            lang="zh",
            short_name="large_zipformer_int8",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv bpe.model

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06",
            idx=21,
            lang="en",
            short_name="zipformer_kroko_asr",
            cmd="""
            pushd $model_name
            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06",
            idx=22,
            lang="es",
            short_name="zipformer_kroko_asr",
            cmd="""
            pushd $model_name
            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06",
            idx=23,
            lang="fr",
            short_name="zipformer_kroko_asr",
            cmd="""
            pushd $model_name
            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06",
            idx=24,
            lang="de",
            short_name="zipformer_kroko_asr",
            cmd="""
            pushd $model_name
            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16",
            idx=25,
            lang="ru",
            short_name="small_zipformer_int8_2025_08_16",
            cmd="""
            pushd $model_name
            rm -rf test_wavs
            rm -fv bpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16",
            idx=26,
            lang="ru",
            short_name="small_zipformer_2025_08_16",
            cmd="""
            pushd $model_name
            rm -rf test_wavs
            rm -fv bpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-t-one-russian-2025-09-08",
            idx=27,
            lang="ru",
            short_name="t_one_ctc_2025_09_08",
            cmd="""
            pushd $model_name

            rm -v *.wav

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemotron-speech-streaming-en-0.6b-int8-2026-01-14",
            idx=28,
            lang="en",
            short_name="nemotron-speech-streaming-en-0.6b-int8-2026-01-14",
            cmd="""
            pushd $model_name

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-bn-vosk-2026-02-09",
            idx=29,
            lang="bn",
            short_name="bengali_vosk_2026_02_09",
            cmd="""
            pushd $model_name

            rm -rf test_wavs

            ls -lh

            popd
            """,
        ),
    ]

    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./build-apk-asr.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/generate-audio-tagging-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class AudioTaggingModel:
    model_name: str
    idx: int
    short_name: str = ""


def get_models():
    # see https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
    icefall_models = [
        AudioTaggingModel(
            model_name="sherpa-onnx-zipformer-small-audio-tagging-2024-04-15",
            idx=0,
            short_name="small_zipformer",
        ),
        AudioTaggingModel(
            model_name="sherpa-onnx-zipformer-audio-tagging-2024-04-09",
            idx=1,
            short_name="zipformer",
        ),
    ]

    ced_models = [
        AudioTaggingModel(
            model_name="sherpa-onnx-ced-tiny-audio-tagging-2024-04-19",
            idx=2,
            short_name="ced_tiny",
        ),
        AudioTaggingModel(
            model_name="sherpa-onnx-ced-mini-audio-tagging-2024-04-19",
            idx=3,
            short_name="ced_mini",
        ),
        AudioTaggingModel(
            model_name="sherpa-onnx-ced-small-audio-tagging-2024-04-19",
            idx=4,
            short_name="ced_small",
        ),
        AudioTaggingModel(
            model_name="sherpa-onnx-ced-base-audio-tagging-2024-04-19",
            idx=5,
            short_name="ced_base",
        ),
    ]

    return icefall_models + ced_models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./build-apk-audio-tagging.sh",
        "./build-apk-audio-tagging-wearos.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/generate-qnn-vad-asr-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
from pathlib import Path

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    # We will download
    # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
    model_name: str

    # The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code
    idx: int

    # e.g., zh, en, zh_en
    lang: str

    # e.g., whisper, paraformer, zipformer
    short_name: str = ""

    # cmd is used to remove extra file from the model directory
    cmd: str = ""

    rule_fsts: str = ""

    use_hr: bool = False


# See get_2nd_models() in ./generate-asr-2pass-apk-script.py
def get_models():
    models = [
        Model(
            model_name="sherpa-onnx-qnn-5-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9000,
            lang="zh_en_ko_ja_yue",
            short_name="5-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-8-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9001,
            lang="zh_en_ko_ja_yue",
            short_name="8-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-10-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9002,
            lang="zh_en_ko_ja_yue",
            short_name="10-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-13-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9003,
            lang="zh_en_ko_ja_yue",
            short_name="13-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-15-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9004,
            lang="zh_en_ko_ja_yue",
            short_name="15-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-18-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9005,
            lang="zh_en_ko_ja_yue",
            short_name="18-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-20-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9006,
            lang="zh_en_ko_ja_yue",
            short_name="20-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-23-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9007,
            lang="zh_en_ko_ja_yue",
            short_name="23-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-25-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9008,
            lang="zh_en_ko_ja_yue",
            short_name="25-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-28-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9009,
            lang="zh_en_ko_ja_yue",
            short_name="28-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-30-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8",
            idx=9010,
            lang="zh_en_ko_ja_yue",
            short_name="30-seconds-sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-5-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9011,
            lang="zh",
            short_name="5-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-8-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9012,
            lang="zh",
            short_name="8-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-10-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9013,
            lang="zh",
            short_name="10-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-13-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9014,
            lang="zh",
            short_name="13-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-15-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9015,
            lang="zh",
            short_name="15-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-18-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9016,
            lang="zh",
            short_name="18-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-20-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9017,
            lang="zh",
            short_name="20-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-23-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9018,
            lang="zh",
            short_name="23-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-25-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9019,
            lang="zh",
            short_name="25-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-28-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9020,
            lang="zh",
            short_name="28-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-30-seconds-zipformer-ctc-zh-2025-07-03-int8",
            idx=9021,
            lang="zh",
            short_name="30-seconds-zipformer_ctc_2025_07_03_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-5-seconds-paraformer-zh-2023-03-28-int8",
            idx=9023,
            lang="zh",
            short_name="5-seconds-paraformer_zh_2023_03_28_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-qnn-5-seconds-paraformer-zh-2025-10-07-int8",
            idx=9024,
            lang="zh",
            short_name="5-seconds-paraformer_zh_2025_10_07_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
    ]
    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./build-apk-qnn-vad-asr-simulate-streaming.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        if not Path(f"{filename}.in").is_file():
            print(f"skip {filename}")
            continue

        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/generate-slid-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class SlidModel:
    model_name: str
    idx: int
    short_name: str = ""


def get_models():
    # see https://k2-fsa.github.io/sherpa/onnx/spolken-language-identification/pretrained_models.html#pre-trained-models
    whisper_models = [
        SlidModel(
            model_name="sherpa-onnx-whisper-tiny",
            idx=0,
            short_name="whisper_tiny",
        ),
    ]

    return whisper_models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./build-apk-slid.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/generate-speaker-diarization-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
from typing import List

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class SpeakerSegmentationModel:
    model_name: str
    short_name: str


@dataclass
class SpeakerEmbeddingModel:
    model_name: str
    short_name: str


@dataclass
class Model:
    segmentation: SpeakerSegmentationModel
    embedding: SpeakerEmbeddingModel


def get_segmentation_models() -> List[SpeakerSegmentationModel]:
    models = [
        SpeakerSegmentationModel(
            model_name="sherpa-onnx-pyannote-segmentation-3-0",
            short_name="pyannote_audio",
        ),
        SpeakerSegmentationModel(
            model_name="sherpa-onnx-reverb-diarization-v1",
            short_name="revai_v1",
        ),
    ]

    return models


def get_embedding_models() -> List[SpeakerEmbeddingModel]:
    models = [
        SpeakerSegmentationModel(
            model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k",
            short_name="3dspeaker",
        ),
        SpeakerSegmentationModel(
            model_name="nemo_en_titanet_small",
            short_name="nemo",
        ),
    ]
    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    segmentation_models = get_segmentation_models()
    embedding_models = get_embedding_models()

    all_model_list = []
    for s in segmentation_models:
        for e in embedding_models:
            all_model_list.append(Model(segmentation=s, embedding=e))

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = ["./build-apk-speaker-diarization.sh"]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/generate-speaker-identification-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
from typing import List

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class SpeakerIdentificationModel:
    model_name: str
    short_name: str = ""
    lang: str = ""
    framework: str = ""


def get_3dspeaker_models() -> List[SpeakerIdentificationModel]:
    models = [
        SpeakerIdentificationModel(
            model_name="3dspeaker_speech_campplus_sv_en_voxceleb_16k.onnx"
        ),
        SpeakerIdentificationModel(
            model_name="3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
        ),
        SpeakerIdentificationModel(
            model_name="3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx"
        ),
        SpeakerIdentificationModel(
            model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
        ),
        SpeakerIdentificationModel(
            model_name="3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx"
        ),
        SpeakerIdentificationModel(
            model_name="3dspeaker_speech_eres2net_sv_en_voxceleb_16k.onnx"
        ),
        SpeakerIdentificationModel(
            model_name="3dspeaker_speech_eres2net_sv_zh-cn_16k-common.onnx"
        ),
    ]

    prefix = "3dspeaker_speech_"
    num = len(prefix)
    for m in models:
        m.framework = "3dspeaker"
        m.short_name = m.model_name[num:-5]
        if "_zh-cn_" in m.model_name:
            m.lang = "zh"
        elif "_en_" in m.model_name:
            m.lang = "en"
        else:
            raise ValueError(m)
    return models


def get_wespeaker_models() -> List[SpeakerIdentificationModel]:
    models = [
        SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++.onnx"),
        SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++_LM.onnx"),
        SpeakerIdentificationModel(
            model_name="wespeaker_en_voxceleb_resnet152_LM.onnx"
        ),
        SpeakerIdentificationModel(
            model_name="wespeaker_en_voxceleb_resnet221_LM.onnx"
        ),
        SpeakerIdentificationModel(
            model_name="wespeaker_en_voxceleb_resnet293_LM.onnx"
        ),
        SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34.onnx"),
        SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34_LM.onnx"),
        SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34.onnx"),
        SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34_LM.onnx"),
    ]

    prefix = "wespeaker_xx_"
    num = len(prefix)
    for m in models:
        m.framework = "wespeaker"
        m.short_name = m.model_name[num:-5]
        if "_zh_" in m.model_name:
            m.lang = "zh"
        elif "_en_" in m.model_name:
            m.lang = "en"
        else:
            raise ValueError(m)
    return models


def get_nemo_models() -> List[SpeakerIdentificationModel]:
    models = [
        SpeakerIdentificationModel(
            model_name="nemo_en_speakerverification_speakernet.onnx"
        ),
        SpeakerIdentificationModel(model_name="nemo_en_titanet_large.onnx"),
        SpeakerIdentificationModel(model_name="nemo_en_titanet_small.onnx"),
    ]

    prefix = "nemo_en_"
    num = len(prefix)
    for m in models:
        m.framework = "nemo"
        m.short_name = m.model_name[num:-5]
        if "_zh_" in m.model_name:
            m.lang = "zh"
        elif "_en_" in m.model_name:
            m.lang = "en"
        else:
            raise ValueError(m)
    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_3dspeaker_models()
    all_model_list += get_wespeaker_models()
    all_model_list += get_nemo_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = ["./build-apk-speaker-identification.sh"]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/generate-tts-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
from typing import List, Optional

import jinja2

# pip install iso639-lang
from iso639 import Lang


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class TtsModel:
    model_dir: str
    model_name: str = ""  # for vits
    acoustic_model_name: str = ""  # for matcha
    vocoder: str = ""  # for matcha
    voices: str = ""  # for kokoro
    lang: str = ""  # en, zh, fr, de, etc.
    lang2: str = ""  # en, zh, fr, de, etc.
    rule_fsts: Optional[List[str]] = None
    rule_fars: Optional[List[str]] = None
    data_dir: Optional[str] = None
    dict_dir: Optional[str] = None
    is_char: bool = False
    lang_iso_639_3: str = ""
    lang_iso_639_3_2: str = ""
    lexicon: str = ""
    is_kitten: bool = False


def convert_lang_to_iso_639_3(models: List[TtsModel]):
    for m in models:
        if m.lang_iso_639_3 == "":
            m.lang_iso_639_3 = Lang(m.lang).pt3
        if m.lang2 != "":
            m.lang_iso_639_3_2 = Lang(m.lang2).pt3


def get_coqui_models() -> List[TtsModel]:
    # English (coqui-ai/TTS)
    models = [
        TtsModel(model_dir="vits-coqui-en-ljspeech"),
        TtsModel(model_dir="vits-coqui-en-ljspeech-neon"),
        TtsModel(model_dir="vits-coqui-en-vctk"),
        #  TtsModel(model_dir="vits-coqui-en-jenny"),
    ]

    for m in models:
        m.data_dir = m.model_dir + "/" + "espeak-ng-data"
        m.model_name = "model.onnx"
        m.lang = "en"

    character_models = [
        TtsModel(model_dir="vits-coqui-bg-cv", lang="bg"),
        TtsModel(model_dir="vits-coqui-bn-custom_female", lang="bn"),
        TtsModel(model_dir="vits-coqui-cs-cv", lang="cs"),
        TtsModel(model_dir="vits-coqui-da-cv", lang="da"),
        TtsModel(model_dir="vits-coqui-de-css10", lang="de"),
        TtsModel(model_dir="vits-coqui-es-css10", lang="es"),
        TtsModel(model_dir="vits-coqui-et-cv", lang="et"),
        TtsModel(model_dir="vits-coqui-fi-css10", lang="fi"),
        TtsModel(model_dir="vits-coqui-fr-css10", lang="fr"),
        TtsModel(model_dir="vits-coqui-ga-cv", lang="ga"),
        TtsModel(model_dir="vits-coqui-hr-cv", lang="hr"),
        TtsModel(model_dir="vits-coqui-lt-cv", lang="lt"),
        TtsModel(model_dir="vits-coqui-lv-cv", lang="lv"),
        TtsModel(model_dir="vits-coqui-mt-cv", lang="mt"),
        TtsModel(model_dir="vits-coqui-nl-css10", lang="nl"),
        TtsModel(model_dir="vits-coqui-pl-mai_female", lang="pl"),
        TtsModel(model_dir="vits-coqui-pt-cv", lang="pt"),
        TtsModel(model_dir="vits-coqui-ro-cv", lang="ro"),
        TtsModel(model_dir="vits-coqui-sk-cv", lang="sk"),
        TtsModel(model_dir="vits-coqui-sl-cv", lang="sl"),
        TtsModel(model_dir="vits-coqui-sv-cv", lang="sv"),
        TtsModel(model_dir="vits-coqui-uk-mai", lang="uk"),
    ]
    for m in character_models:
        m.is_char = True
        m.model_name = "model.onnx"

    return models + character_models


def get_piper_models() -> List[TtsModel]:
    models = [
        #  TtsModel(model_dir="vits-piper-es_ES-mls_10246-low"),
        #  TtsModel(model_dir="vits-piper-es_ES-mls_9972-low"),
        #  TtsModel(model_dir="vits-piper-pl_PL-mls_6892-low"),
        TtsModel(model_dir="vits-piper-ar_JO-kareem-low"),
        TtsModel(model_dir="vits-piper-ar_JO-kareem-medium"),
        TtsModel(model_dir="vits-piper-ar_JO-SA_dii-high"),
        TtsModel(model_dir="vits-piper-ar_JO-SA_miro-high"),
        TtsModel(model_dir="vits-piper-ar_JO-SA_miro_V2-high"),
        TtsModel(model_dir="vits-piper-ca_ES-upc_ona-medium"),
        TtsModel(model_dir="vits-piper-ca_ES-upc_ona-x_low"),
        TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
        TtsModel(model_dir="vits-piper-cs_CZ-jirka-low"),
        TtsModel(model_dir="vits-piper-cs_CZ-jirka-medium"),
        TtsModel(model_dir="vits-piper-cy_GB-bu_tts-medium"),
        TtsModel(model_dir="vits-piper-cy_GB-gwryw_gogleddol-medium"),
        TtsModel(model_dir="vits-piper-da_DK-talesyntese-medium"),
        TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low"),
        TtsModel(model_dir="vits-piper-de_DE-karlsson-low"),
        TtsModel(model_dir="vits-piper-de_DE-kerstin-low"),
        TtsModel(model_dir="vits-piper-de_DE-dii-high"),
        TtsModel(model_dir="vits-piper-de_DE-miro-high"),
        #  TtsModel(model_dir="vits-piper-de_DE-mls-medium"),
        TtsModel(model_dir="vits-piper-de_DE-pavoque-low"),
        TtsModel(model_dir="vits-piper-de_DE-ramona-low"),
        TtsModel(model_dir="vits-piper-de_DE-thorsten-high"),
        TtsModel(model_dir="vits-piper-de_DE-thorsten-low"),
        TtsModel(model_dir="vits-piper-de_DE-thorsten-medium"),
        TtsModel(model_dir="vits-piper-de_DE-thorsten_emotional-medium"),
        TtsModel(model_dir="vits-piper-de_DE-glados-high"),
        TtsModel(model_dir="vits-piper-de_DE-glados-low"),
        TtsModel(model_dir="vits-piper-de_DE-glados-medium"),
        TtsModel(model_dir="vits-piper-de_DE-glados_turret-high"),
        TtsModel(model_dir="vits-piper-de_DE-glados_turret-low"),
        TtsModel(model_dir="vits-piper-de_DE-glados_turret-medium"),
        TtsModel(model_dir="vits-piper-el_GR-rapunzelina-low"),
        TtsModel(model_dir="vits-piper-en_GB-alan-low"),
        TtsModel(model_dir="vits-piper-en_GB-alan-medium"),
        TtsModel(model_dir="vits-piper-en_GB-alba-medium"),
        TtsModel(model_dir="vits-piper-en_GB-aru-medium"),
        TtsModel(model_dir="vits-piper-en_GB-cori-high"),
        TtsModel(model_dir="vits-piper-en_GB-cori-medium"),
        TtsModel(model_dir="vits-piper-en_GB-dii-high"),
        TtsModel(model_dir="vits-piper-en_GB-jenny_dioco-medium"),
        TtsModel(model_dir="vits-piper-en_GB-miro-high"),
        TtsModel(model_dir="vits-piper-en_GB-northern_english_male-medium"),
        TtsModel(model_dir="vits-piper-en_GB-semaine-medium"),
        TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low"),
        TtsModel(model_dir="vits-piper-en_GB-southern_english_female-medium"),
        TtsModel(model_dir="vits-piper-en_GB-southern_english_male-medium"),
        TtsModel(model_dir="vits-piper-en_GB-sweetbbak-amy"),
        TtsModel(model_dir="vits-piper-en_GB-vctk-medium"),
        TtsModel(model_dir="vits-piper-en_US-amy-low"),
        TtsModel(model_dir="vits-piper-en_US-amy-medium"),
        TtsModel(model_dir="vits-piper-en_US-arctic-medium"),
        TtsModel(model_dir="vits-piper-en_US-bryce-medium"),
        TtsModel(model_dir="vits-piper-en_US-danny-low"),
        TtsModel(model_dir="vits-piper-en_US-glados"),
        TtsModel(model_dir="vits-piper-en_US-glados-high"),
        TtsModel(model_dir="vits-piper-en_US-hfc_female-medium"),
        TtsModel(model_dir="vits-piper-en_US-hfc_male-medium"),
        TtsModel(model_dir="vits-piper-en_US-joe-medium"),
        TtsModel(model_dir="vits-piper-en_US-john-medium"),
        TtsModel(model_dir="vits-piper-en_US-kathleen-low"),
        TtsModel(model_dir="vits-piper-en_US-kristin-medium"),
        TtsModel(model_dir="vits-piper-en_US-kusal-medium"),
        TtsModel(model_dir="vits-piper-en_US-l2arctic-medium"),
        TtsModel(model_dir="vits-piper-en_US-lessac-high"),
        TtsModel(model_dir="vits-piper-en_US-lessac-low"),
        TtsModel(model_dir="vits-piper-en_US-lessac-medium"),
        TtsModel(model_dir="vits-piper-en_US-libritts-high"),
        TtsModel(model_dir="vits-piper-en_US-libritts_r-medium"),
        TtsModel(model_dir="vits-piper-en_US-ljspeech-high"),
        TtsModel(model_dir="vits-piper-en_US-ljspeech-medium"),
        TtsModel(model_dir="vits-piper-en_US-miro-high"),
        TtsModel(model_dir="vits-piper-en_US-norman-medium"),
        TtsModel(model_dir="vits-piper-en_US-ryan-high"),
        TtsModel(model_dir="vits-piper-en_US-ryan-low"),
        TtsModel(model_dir="vits-piper-en_US-ryan-medium"),
        TtsModel(model_dir="vits-piper-es_AR-daniela-high"),
        TtsModel(model_dir="vits-piper-es_ES-carlfm-x_low"),
        TtsModel(model_dir="vits-piper-es_ES-davefx-medium"),
        TtsModel(model_dir="vits-piper-es_ES-glados-medium"),
        TtsModel(model_dir="vits-piper-es_ES-miro-high"),
        TtsModel(model_dir="vits-piper-es_ES-sharvard-medium"),
        TtsModel(model_dir="vits-piper-es_MX-ald-medium"),
        TtsModel(model_dir="vits-piper-es_MX-claude-high"),
        TtsModel(model_dir="vits-piper-fa_IR-amir-medium"),
        TtsModel(model_dir="vits-piper-fa_IR-ganji-medium"),
        TtsModel(model_dir="vits-piper-fa_IR-ganji_adabi-medium"),
        TtsModel(model_dir="vits-piper-fa_IR-gyro-medium"),
        TtsModel(model_dir="vits-piper-fa_IR-reza_ibrahim-medium"),
        TtsModel(model_dir="vits-piper-fa_en-rezahedayatfar-ibrahimwalk-medium"),
        TtsModel(model_dir="vits-piper-fi_FI-harri-low"),
        TtsModel(model_dir="vits-piper-fi_FI-harri-medium"),
        #  TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-gilles-low"),
        TtsModel(model_dir="vits-piper-fr_FR-miro-high"),
        TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
        TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-tjiho-model1"),
        TtsModel(model_dir="vits-piper-fr_FR-tjiho-model2"),
        TtsModel(model_dir="vits-piper-fr_FR-tjiho-model3"),
        TtsModel(model_dir="vits-piper-fr_FR-tom-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
        TtsModel(model_dir="vits-piper-hi_IN-pratham-medium"),
        TtsModel(model_dir="vits-piper-hi_IN-priyamvada-medium"),
        TtsModel(model_dir="vits-piper-hi_IN-rohan-medium"),
        TtsModel(model_dir="vits-piper-hu_HU-anna-medium"),
        TtsModel(model_dir="vits-piper-hu_HU-berta-medium"),
        TtsModel(model_dir="vits-piper-hu_HU-imre-medium"),
        TtsModel(model_dir="vits-piper-id_ID-news_tts-medium"),
        TtsModel(model_dir="vits-piper-is_IS-bui-medium"),
        TtsModel(model_dir="vits-piper-is_IS-salka-medium"),
        TtsModel(model_dir="vits-piper-is_IS-steinn-medium"),
        TtsModel(model_dir="vits-piper-is_IS-ugla-medium"),
        TtsModel(model_dir="vits-piper-it_IT-dii-high"),
        TtsModel(model_dir="vits-piper-it_IT-miro-high"),
        TtsModel(model_dir="vits-piper-it_IT-paola-medium"),
        TtsModel(model_dir="vits-piper-it_IT-riccardo-x_low"),
        TtsModel(model_dir="vits-piper-ka_GE-natia-medium"),
        TtsModel(model_dir="vits-piper-kk_KZ-iseke-x_low"),
        TtsModel(model_dir="vits-piper-kk_KZ-issai-high"),
        TtsModel(model_dir="vits-piper-kk_KZ-raya-x_low"),
        TtsModel(model_dir="vits-piper-lv_LV-aivars-medium"),
        TtsModel(model_dir="vits-piper-lb_LU-marylux-medium"),
        TtsModel(model_dir="vits-piper-ne_NP-chitwan-medium"),
        TtsModel(model_dir="vits-piper-ne_NP-google-medium"),
        TtsModel(model_dir="vits-piper-ne_NP-google-x_low"),
        TtsModel(model_dir="vits-piper-nl_BE-nathalie-medium"),
        TtsModel(model_dir="vits-piper-nl_BE-nathalie-x_low"),
        TtsModel(model_dir="vits-piper-nl_BE-rdh-medium"),
        TtsModel(model_dir="vits-piper-nl_BE-rdh-x_low"),
        TtsModel(model_dir="vits-piper-nl_NL-miro-high"),
        TtsModel(model_dir="vits-piper-nl_NL-dii-high"),
        #  TtsModel(model_dir="vits-piper-nl_NL-mls-medium"),
        #  TtsModel(model_dir="vits-piper-nl_NL-mls_5809-low"),
        #  TtsModel(model_dir="vits-piper-nl_NL-mls_7432-low"),
        TtsModel(model_dir="vits-piper-no_NO-talesyntese-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-darkman-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-gosia-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-jarvis_wg_glos-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-justyna_wg_glos-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-mc_speech-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-meski_wg_glos-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-zenski_wg_glos-medium"),
        TtsModel(model_dir="vits-piper-pt_BR-cadu-medium"),
        TtsModel(model_dir="vits-piper-pt_BR-dii-high"),
        TtsModel(model_dir="vits-piper-pt_BR-edresson-low"),
        TtsModel(model_dir="vits-piper-pt_BR-faber-medium"),
        TtsModel(model_dir="vits-piper-pt_BR-jeff-medium"),
        TtsModel(model_dir="vits-piper-pt_BR-miro-high"),
        TtsModel(model_dir="vits-piper-pt_PT-dii-high"),
        TtsModel(model_dir="vits-piper-pt_PT-miro-high"),
        TtsModel(model_dir="vits-piper-pt_PT-tugao-medium"),
        TtsModel(model_dir="vits-piper-ro_RO-mihai-medium"),
        TtsModel(model_dir="vits-piper-ru_RU-denis-medium"),
        TtsModel(model_dir="vits-piper-ru_RU-dmitri-medium"),
        TtsModel(model_dir="vits-piper-ru_RU-irina-medium"),
        TtsModel(model_dir="vits-piper-ru_RU-ruslan-medium"),
        TtsModel(model_dir="vits-piper-sk_SK-lili-medium"),
        TtsModel(model_dir="vits-piper-sl_SI-artur-medium"),
        TtsModel(model_dir="vits-piper-sr_RS-serbski_institut-medium"),
        TtsModel(model_dir="vits-piper-sv_SE-lisa-medium"),
        TtsModel(model_dir="vits-piper-sv_SE-nst-medium"),
        TtsModel(model_dir="vits-piper-sw_CD-lanfrica-medium"),
        TtsModel(model_dir="vits-piper-tr_TR-dfki-medium"),
        TtsModel(model_dir="vits-piper-tr_TR-fahrettin-medium"),
        TtsModel(model_dir="vits-piper-tr_TR-fettah-medium"),
        TtsModel(model_dir="vits-piper-uk_UA-lada-x_low"),
        TtsModel(model_dir="vits-piper-uk_UA-ukrainian_tts-medium"),
        TtsModel(model_dir="vits-piper-vi_VN-25hours_single-low"),
        TtsModel(model_dir="vits-piper-vi_VN-vais1000-medium"),
        TtsModel(model_dir="vits-piper-vi_VN-vivos-x_low"),
        TtsModel(model_dir="vits-piper-zh_CN-huayan-medium"),
    ]

    for m in models:
        m.data_dir = m.model_dir + "/" + "espeak-ng-data"
        m.model_name = m.model_dir[len("vits-piper-") :] + ".onnx"
        m.lang = m.model_dir.split("-")[2][:2]

    return models


def get_mimic3_models() -> List[TtsModel]:
    models = [
        TtsModel(model_dir="vits-mimic3-af_ZA-google-nwu_low"),
        TtsModel(model_dir="vits-mimic3-bn-multi_low"),
        TtsModel(model_dir="vits-mimic3-es_ES-m-ailabs_low"),
        TtsModel(model_dir="vits-mimic3-fa-haaniye_low"),
        TtsModel(model_dir="vits-mimic3-fi_FI-harri-tapani-ylilammi_low"),
        TtsModel(model_dir="vits-mimic3-gu_IN-cmu-indic_low"),
        TtsModel(model_dir="vits-mimic3-hu_HU-diana-majlinger_low"),
        TtsModel(model_dir="vits-mimic3-ko_KO-kss_low"),
        TtsModel(model_dir="vits-mimic3-ne_NP-ne-google_low"),
        TtsModel(model_dir="vits-mimic3-pl_PL-m-ailabs_low"),
        TtsModel(model_dir="vits-mimic3-tn_ZA-google-nwu_low"),
        TtsModel(model_dir="vits-mimic3-vi_VN-vais1000_low"),
    ]
    for m in models:
        m.data_dir = m.model_dir + "/" + "espeak-ng-data"
        m.model_name = m.model_dir[len("vits-mimic3-") :] + ".onnx"
        m.lang = m.model_dir.split("-")[2][:2]

    return models


def get_vits_models() -> List[TtsModel]:
    chinese_models = [
        # Chinese
        TtsModel(
            model_dir="vits-icefall-zh-aishell3",
            model_name="model.onnx",
            lang="zh",
            rule_fsts="vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst",
            rule_fars="vits-icefall-zh-aishell3/rule.far",
        ),
        TtsModel(
            model_dir="vits-zh-aishell3",
            model_name="vits-aishell3.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-doom",
            model_name="doom.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-echo",
            model_name="echo.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-zenyatta",
            model_name="zenyatta.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-abyssinvoker",
            model_name="abyssinvoker.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-keqing",
            model_name="keqing.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-eula",
            model_name="eula.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-bronya",
            model_name="bronya.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-theresa",
            model_name="theresa.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-wnj",
            model_name="vits-zh-hf-fanchen-wnj.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-melo-tts-zh_en",
            model_name="model.onnx",
            lang="zh",
            lang2="en",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-C",
            model_name="vits-zh-hf-fanchen-C.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe",
            model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new",
            model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-unity",
            model_name="vits-zh-hf-fanchen-unity.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="sherpa-onnx-vits-zh-ll",
            model_name="model.onnx",
            lang="zh",
        ),
    ]

    rule_fsts = ["phone.fst", "date.fst", "number.fst"]
    for m in chinese_models:
        s = [f"{m.model_dir}/{r}" for r in rule_fsts]
        if (
            "vits-zh-hf" in m.model_dir
            or "sherpa-onnx-vits-zh-ll" == m.model_dir
            or "melo-tts" in m.model_dir
        ):
            s = s[:-1]
        else:
            m.rule_fars = f"{m.model_dir}/rule.far"

        m.rule_fsts = ",".join(s)

    all_models = chinese_models + [
        TtsModel(
            model_dir="vits-cantonese-hf-xiaomaiiwn",
            model_name="vits-cantonese-hf-xiaomaiiwn.onnx",
            lang="cantonese",
            lang_iso_639_3="yue",
            rule_fsts="vits-cantonese-hf-xiaomaiiwn/rule.fst",
        ),
        # English (US)
        TtsModel(model_dir="vits-vctk", model_name="vits-vctk.onnx", lang="en"),
        #  TtsModel(model_dir="vits-ljs", model_name="vits-ljs.onnx", lang="en"),
        # fmt: on
    ]

    return all_models


def get_matcha_models() -> List[TtsModel]:
    chinese_models = [
        TtsModel(
            model_dir="matcha-icefall-zh-baker",
            acoustic_model_name="model-steps-3.onnx",
            lang="zh",
            lexicon="lexicon.txt",
        )
    ]
    rule_fsts = ["phone.fst", "date.fst", "number.fst"]
    for m in chinese_models:
        s = [f"{m.model_dir}/{r}" for r in rule_fsts]
        m.rule_fsts = ",".join(s)
        m.vocoder = "vocos-22khz-univ.onnx"

    chinese_english_models = [
        TtsModel(
            model_dir="matcha-icefall-zh-en",
            acoustic_model_name="model-steps-3.onnx",
            lang="zh",
            lexicon="lexicon.txt",
        )
    ]
    rule_fsts_zh = ["phone-zh.fst", "date-zh.fst", "number-zh.fst"]
    for m in chinese_english_models:
        s = [f"{m.model_dir}/{r}" for r in rule_fsts_zh]
        m.rule_fsts = ",".join(s)
        m.vocoder = "vocos-16khz-univ.onnx"
        m.data_dir = f"{m.model_dir}/espeak-ng-data"

    english_persian_models = [
        TtsModel(
            model_dir="matcha-icefall-en_US-ljspeech",
            acoustic_model_name="model-steps-3.onnx",
            lang="en",
        ),
        TtsModel(
            model_dir="matcha-tts-fa_en-musa",
            acoustic_model_name="model.onnx",
            lang="fa",
        ),
        TtsModel(
            model_dir="matcha-tts-fa_en-khadijah",
            acoustic_model_name="model.onnx",
            lang="fa",
        ),
    ]
    for m in english_persian_models:
        m.data_dir = f"{m.model_dir}/espeak-ng-data"
        m.vocoder = "vocos-22khz-univ.onnx"

    return chinese_models + english_persian_models + chinese_english_models


def get_kokoro_models() -> List[TtsModel]:
    english_models = [
        TtsModel(
            model_dir="kokoro-en-v0_19",
            model_name="model.onnx",
            lang="en",
        )
    ]
    for m in english_models:
        m.data_dir = f"{m.model_dir}/espeak-ng-data"
        m.voices = "voices.bin"

    multi_lingual_models = [
        TtsModel(
            model_dir="kokoro-multi-lang-v1_0",
            model_name="model.onnx",
            lang="en",
            lang2="zh",
        ),
        TtsModel(
            model_dir="kokoro-multi-lang-v1_1",
            model_name="model.onnx",
            lang="en",
            lang2="zh",
        ),
        TtsModel(
            model_dir="kokoro-int8-multi-lang-v1_1",
            model_name="model.int8.onnx",
            lang="en",
            lang2="zh",
        ),
    ]
    for m in multi_lingual_models:
        m.data_dir = f"{m.model_dir}/espeak-ng-data"
        m.voices = "voices.bin"
        m.lexicon = f"{m.model_dir}/lexicon-us-en.txt,{m.model_dir}/lexicon-zh.txt"
        m.rule_fsts = f"{m.model_dir}/phone-zh.fst,{m.model_dir}/date-zh.fst,{m.model_dir}/number-zh.fst"

    return english_models + multi_lingual_models


def get_kitten_models() -> List[TtsModel]:
    english_models = [
        TtsModel(
            model_dir="kitten-nano-en-v0_1-fp16",
            model_name="model.fp16.onnx",
            lang="en",
        ),
        TtsModel(
            model_dir="kitten-nano-en-v0_2-fp16",
            model_name="model.fp16.onnx",
            lang="en",
        ),
        TtsModel(
            model_dir="kitten-mini-en-v0_1-fp16",
            model_name="model.fp16.onnx",
            lang="en",
        ),
    ]
    for m in english_models:
        m.data_dir = f"{m.model_dir}/espeak-ng-data"
        m.voices = "voices.bin"
        m.is_kitten = True

    return english_models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)
    d = dict()

    all_model_list = get_vits_models()
    all_model_list += get_piper_models()
    all_model_list += get_mimic3_models()
    all_model_list += get_coqui_models()
    all_model_list += get_matcha_models()
    all_model_list += get_kokoro_models()
    all_model_list += get_kitten_models()

    convert_lang_to_iso_639_3(all_model_list)
    print(all_model_list)

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")
    d["tts_model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["tts_model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = ["./build-apk-tts.sh", "./build-apk-tts-engine.sh"]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/apk/generate-vad-asr-apk-script.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
from pathlib import Path

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    # We will download
    # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
    model_name: str

    # The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code
    idx: int

    # e.g., zh, en, zh_en
    lang: str
    lang2: str

    # e.g., whisper, paraformer, zipformer
    short_name: str = ""

    # cmd is used to remove extra file from the model directory
    cmd: str = ""

    rule_fsts: str = ""

    use_hr: bool = False


# See get_2nd_models() in ./generate-asr-2pass-apk-script.py
def get_models():
    models = [
        Model(
            model_name="sherpa-onnx-whisper-tiny.en",
            idx=2,
            lang="en",
            lang2="English",
            short_name="whisper_tiny",
            cmd="""
            pushd $model_name
            rm -fv tiny.en-encoder.onnx
            rm -fv tiny.en-decoder.onnx
            rm -rf test_wavs
            rm -fv *.py
            rm -fv requirements.txt
            rm -fv .gitignore
            rm -fv README.md

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-2023-09-14",
            idx=0,
            lang="zh_en",
            lang2="Chinese,English",
            short_name="paraformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name

            rm -fv README.md
            rm -rfv test_wavs
            rm -fv model.onnx

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17",
            idx=15,
            lang="zh_en_ko_ja_yue",
            lang2="中英粤日韩",
            short_name="sense_voice_2024_07_17_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -fv *.py

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-small-2024-03-09",
            idx=14,
            lang="zh_en",
            lang2="Chinese,English",
            short_name="small_paraformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name

            rm -fv README.md
            rm -fv *.py
            rm -fv *.yaml
            rm -fv *.mvn
            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-wenetspeech-20230615",
            idx=4,
            lang="zh",
            lang2="Chinese",
            short_name="zipformer",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name

            rm -rfv test_wavs
            rm -fv README.md
            mv -v data/lang_char/tokens.txt ./
            rm -rfv data/lang_char

            mv -v exp/encoder-epoch-12-avg-4.int8.onnx ./
            mv -v exp/decoder-epoch-12-avg-4.onnx ./
            mv -v exp/joiner-epoch-12-avg-4.int8.onnx ./
            rm -rfv exp

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k",
            idx=7,
            lang="be_de_en_es_fr_hr_it_pl_ru_uk",
            lang2="be_de_en_es_fr_hr_it_pl_ru_uk",
            short_name="fast_conformer_ctc_20k",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-fast-conformer-ctc-en-24500",
            idx=8,
            lang="en",
            lang2="English",
            short_name="fast_conformer_ctc_24500",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288",
            idx=9,
            lang="en_de_es_fr",
            lang2="English,German,Spanish,French",
            short_name="fast_conformer_ctc_14288",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-fast-conformer-ctc-es-1424",
            idx=10,
            lang="es",
            lang2="Spanish",
            short_name="fast_conformer_ctc_1424",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04",
            idx=11,
            lang="zh",
            lang2="Chinese",
            short_name="telespeech",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name

            rm -rfv test_wavs
            rm -fv test.py

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-thai-2024-06-20",
            idx=12,
            lang="th",
            lang2="Thai",
            short_name="zipformer",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -fv README.md
            rm -fv bpe.model

            rm -fv encoder-epoch-12-avg-5.onnx
            rm -fv decoder-epoch-12-avg-5.int8.onnx
            rm joiner-epoch-12-avg-5.onnx

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-korean-2024-06-24",
            idx=13,
            lang="ko",
            lang2="Korean",
            short_name="zipformer",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -fv README.md
            rm -fv bpe.model

            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01",
            idx=16,
            lang="ja",
            lang2="Japanese",
            short_name="zipformer_reazonspeech",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ru-2024-09-18",
            idx=17,
            lang="ru",
            lang2="Russian",
            short_name="zipformer",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv encoder.onnx
            rm -fv decoder.int8.onnx
            rm -fv joiner.onnx
            rm -fv bpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-small-zipformer-ru-2024-09-18",
            idx=18,
            lang="ru",
            lang2="Russian",
            short_name="small_zipformer",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv encoder.onnx
            rm -fv decoder.int8.onnx
            rm -fv joiner.onnx
            rm -fv bpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24",
            idx=19,
            lang="ru",
            lang2="Russian",
            short_name="nemo_ctc_giga_am",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv *.sh
            rm -fv *.py

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24",
            idx=20,
            lang="ru",
            lang2="Russian",
            short_name="nemo_transducer_giga_am",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv *.sh
            rm -fv *.py

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-en-int8",
            idx=21,
            lang="en",
            lang2="English",
            short_name="moonshine_tiny_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-en-int8",
            idx=22,
            lang="en",
            lang2="English",
            short_name="moonshine_base_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-zh-en-2023-11-22",
            idx=23,
            lang="zh_en",
            lang2="Chinese,English",
            short_name="zipformer",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv encoder-epoch-34-avg-19.onnx
            rm -fv joiner-epoch-34-avg-19.onnx
            rm -fv bbpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
            idx=25,
            lang="multi_lang",
            lang2="multi_lang",
            short_name="dolphin_base_ctc",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-vi-int8-2025-04-20",
            idx=26,
            lang="vi",
            lang2="Vietnamese",
            short_name="zipformer",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -fv bpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19",
            idx=27,
            lang="ru",
            lang2="Russian",
            short_name="nemo_ctc_giga_am_v2",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv *.sh
            rm -fv *.py

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19",
            idx=28,
            lang="ru",
            lang2="Russian",
            short_name="nemo_transducer_giga_am",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv *.sh
            rm -fv *.py

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ru-int8-2025-04-20",
            idx=29,
            lang="ru",
            lang2="Russian",
            short_name="v2_zipformer",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            rm -fv bpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8",
            idx=30,
            lang="en",
            lang2="English",
            short_name="parakeet_tdt_0.6b_v2",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03",
            idx=31,
            lang="zh",
            lang2="Chinese",
            short_name="zipformer_2025_07_03",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -rfv bbpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8",
            idx=33,
            lang="en",
            lang2="English",
            short_name="parakeet_tdt_ctc_110m",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8",
            idx=34,
            lang="ja",
            lang2="Japanese",
            short_name="parakeet-tdt_ctc_0.6b_ja",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8",
            idx=35,
            lang="pt",
            lang2="Portuguese",
            short_name="stt_pt_fastconformer_hybrid_large_pc_transducer_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8",
            idx=36,
            lang="pt",
            lang2="Portuguese",
            short_name="stt_pt_fastconformer_hybrid_large_pc_ctc-int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8",
            idx=37,
            lang="de",
            lang2="German",
            short_name="stt_de_fastconformer_hybrid_large_pc_transducer_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8",
            idx=38,
            lang="de",
            lang2="German",
            short_name="stt_de_fastconformer_hybrid_large_pc_ctc-int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ctc-small-zh-int8-2025-07-16",
            idx=39,
            lang="zh",
            lang2="Chinese",
            short_name="zipformer_ctc_small_2025_07_16",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -rfv bbpe.model

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8",
            idx=40,
            lang="multi",
            lang2="25_languages",
            short_name="parakeet_tdt_0.6b_v3",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09",
            idx=41,
            lang="zh_en_ko_ja_yue",
            lang2="中英粤日韩",
            short_name="sense_voice_2025_09_09_int8",
            use_hr=True,
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
            idx=42,
            lang="zh_en_yue",
            lang2="中英粤",
            short_name="wenetspeech_yue_u2pconformer_ctc_2025_09_10_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-int8-2025-10-07",
            idx=43,
            lang="zh",
            lang2="四川话",
            short_name="paraformer_四川话",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12",
            idx=44,
            lang="1600",
            lang2="1600_languages",
            short_name="omnilingual_asr_300M_ctc_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-medasr-ctc-en-int8-2025-12-25",
            idx=45,
            lang="en",
            lang2="英语",
            short_name="google_medasr_ctc_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-funasr-nano-int8-2025-12-30",
            idx=46,
            lang="multi",
            lang2="31_languages",
            short_name="funasr_nano_int8_2025_12_30",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-int8-2026-02-03",
            idx=47,
            lang="wu",
            lang2="吴语",
            short_name="wenetspeech_wu_u2pconformer_ctc_2026_02_03_int8",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-2026-02-03",
            idx=48,
            lang="wu",
            lang2="吴语",
            short_name="wenetspeech_wu_u2pconformer_ctc_2026_02_03",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-vi-30M-int8-2026-02-09",
            idx=49,
            lang="vi",
            lang2="Vietnamese",
            short_name="zipformer_vi_30M_int8_2026_02_09",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25",
            idx=50,
            lang="zh_en",
            lang2="中英",
            short_name="fire_red_asr2_ctc_int8_2026_02_25",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-ko-quantized-2026-02-27",
            idx=51,
            lang="ko",
            lang2="Korean",
            short_name="moonshine_tiny_ko_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-ja-quantized-2026-02-27",
            idx=52,
            lang="ja",
            lang2="Japanese",
            short_name="moonshine_tiny_ja_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27",
            idx=53,
            lang="en",
            lang2="English",
            short_name="moonshine_tiny_en_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-zh-quantized-2026-02-27",
            idx=54,
            lang="zh",
            lang2="Chinese",
            short_name="moonshine_base_zh_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-vi-quantized-2026-02-27",
            idx=55,
            lang="vi",
            lang2="Vietnamese",
            short_name="moonshine_base_vi_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-uk-quantized-2026-02-27",
            idx=56,
            lang="uk",
            lang2="Ukrainian",
            short_name="moonshine_base_uk_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-ja-quantized-2026-02-27",
            idx=57,
            lang="ja",
            lang2="Japanese",
            short_name="moonshine_base_ja_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-es-quantized-2026-02-27",
            idx=58,
            lang="es",
            lang2="Spanish",
            short_name="moonshine_base_es_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-en-quantized-2026-02-27",
            idx=59,
            lang="en",
            lang2="English",
            short_name="moonshine_base_en_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-ar-quantized-2026-02-27",
            idx=60,
            lang="ar",
            lang2="Arabic",
            short_name="moonshine_base_ar_2026_02_27",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
    ]
    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./build-apk-vad-asr.sh",
        "./build-hap-vad-asr.sh",
        "./build-apk-vad-asr-simulate-streaming.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        if not Path(f"{filename}.in").is_file():
            print(f"skip {filename}")
            continue

        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/bbpe/.gitignore
================================================
bbpe.cc


================================================
FILE: scripts/bbpe/generate_bbpe_table.py
================================================
#!/usr/bin/env python3
# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)
#
# See https://github.com/facebookresearch/fairseq/blob/main/fairseq/data/encoders/byte_bpe.py#L28
# and
# https://github.com/k2-fsa/icefall/blob/master/icefall/byte_utils.py
#
# Caution: The PRINTABLE_LATIN from fairseq is different from PRINTABLE_BASE_CHARS from icefall

import re

BPE_UNK = chr(8263)
PRINTABLE_BASE_CHARS = (
    list(range(256, 287 + 1))
    + list(range(32, 126 + 1))
    + list(range(288, 305 + 1))
    + list(range(308, 318 + 1))
    + list(range(321, 328 + 1))
    + list(range(330, 382 + 1))
    + list(range(384, 422 + 1))
)


BYTE_TO_BCHAR = {b: chr(PRINTABLE_BASE_CHARS[b]) for b in range(256)}
BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
BCHAR_TO_BYTE[BPE_UNK] = 32  # map unk to space


def main():
    s = ""
    s += "// sherpa-onnx/csrc/bbpe.cc\n"
    s += "//\n"
    s += "// Copyright (c)  2024 Xiaomi Corporation\n"
    s += "\n"
    s += "// Auto-generated! DO NOT EDIT\n"
    s += "\n"
    s += '#include "sherpa-onnx/csrc/bbpe.h"\n'
    s += "\n"
    s += "#include <cstdint>\n"
    s += "#include <string>\n"
    s += "#include <unordered_map>\n"
    s += "\n"
    s += "const std::unordered_map<std::string, uint8_t> &GetByteBpeTable() {\n"
    s += "  static const std::unordered_map<std::string, uint8_t> table = {\n"

    s += "      "
    for i, (k, v) in enumerate(BCHAR_TO_BYTE.items()):
        s += "{"
        if k == "\\":
            s += f'"\\\\", {v}'
        elif k == '"':
            s += f'"\\"", {v}'
        else:
            s += f'"{k}", {v}'
        s += "}, "
        if i > 0 and i % 7 == 0:
            s += "\n"
            s += "      "
    s += "};\n"
    s += "\n"
    s += "  return table\n;"
    s += "}\n"

    s += "\n"
    s += "const std::unordered_map<uint8_t, std::string> &GetByteBpeTableId2Token() {\n"
    s += "  static const std::unordered_map<uint8_t, std::string> table = {\n"

    s += "      "
    for i, (k, v) in enumerate(BCHAR_TO_BYTE.items()):
        s += "{"
        if k == "\\":
            s += f'{v}, "\\\\"'
        elif k == '"':
            s += f'{v}, "\\""'
        else:
            s += f'{v}, "{k}"'

        s += "}, "
        if i > 0 and i % 7 == 0:
            s += "\n"
            s += "      "
    s += "};\n"
    s += "\n"
    s += "  return table\n;"
    s += "}\n"

    with open("bbpe.cc", "w", encoding="utf-8") as f:
        f.write(s)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/benchmark/README.md
================================================
# Whisper Timestamp Accuracy Benchmark

This directory contains tools for benchmarking sherpa-onnx Whisper word timestamp accuracy against ground truth alignments from the Montreal Forced Aligner (MFA).

## Overview

The benchmark suite evaluates how accurately sherpa-onnx predicts word-level timestamps by comparing against MFA alignments on LibriSpeech data. MFA provides high-quality forced alignments that serve as ground truth for measuring timestamp accuracy.

## Scripts

### `download_librispeech_test_data.py`

Downloads and prepares the benchmark dataset:
- LibriSpeech dev-clean audio (converted to 16kHz mono WAV)
- MFA word alignments with precise word boundaries

**Usage:**
```bash
uv run python scripts/benchmark/download_librispeech_test_data.py [--num-utterances 200]
```

**Options:**
- `--num-utterances` - Number of utterances to include (default: 200)
- `--output-dir` - Output directory (default: `benchmark_data`)
- `--skip-download` - Skip download step and use existing files

**Output:**
- `benchmark_data/audio/*.wav` - Audio files
- `benchmark_data/manifest.json` - Mapping of audio files to ground truth timestamps

**Requirements:**
- `gdown` (for Google Drive downloads)
- `ffmpeg` or `sox` (for audio conversion)

### `run_timestamp_benchmark.py`

Runs the timestamp accuracy benchmark against the downloaded ground truth.

**Usage:**
```bash
PYTHONPATH=build/lib:sherpa-onnx/python uv run python scripts/benchmark/run_timestamp_benchmark.py \
    --encoder ./whisper-tiny-attention/tiny-encoder.onnx \
    --decoder ./whisper-tiny-attention/tiny-decoder.onnx \
    --tokens ./whisper-tiny-attention/tiny-tokens.txt
```

**Options:**
- `--encoder` - Path to Whisper encoder ONNX model (required)
- `--decoder` - Path to Whisper decoder ONNX model (required)
- `--tokens` - Path to tokens file (required)
- `--data-dir` - Directory with manifest and audio (default: `benchmark_data`)
- `--output-dir` - Output directory for results (default: `benchmark_results`)
- `--language` - Language code (default: `en`)
- `--num-workers` - Number of parallel workers (default: 1)

**Parallel Processing:**
```bash
# Run with 4 workers for faster benchmarking
PYTHONPATH=build/lib:sherpa-onnx/python uv run python scripts/benchmark/run_timestamp_benchmark.py \
    --encoder ./whisper-tiny-attention/tiny-encoder.onnx \
    --decoder ./whisper-tiny-attention/tiny-decoder.onnx \
    --tokens ./whisper-tiny-attention/tiny-tokens.txt \
    --num-workers 4
```

Note: Each worker loads its own model copy, so memory usage scales linearly with worker count.

**Requirements:**
- `numpy`
- `jiwer` (for WER calculation)
- Built sherpa-onnx library

**Note on PYTHONPATH:** This script uses `PYTHONPATH=build/lib:sherpa-onnx/python` instead of `pip install sherpa-onnx` to allow rapid iteration when developing C++ code. After running `make` in the build directory, you can immediately test without reinstalling the package.

## Output Format

### `details_YYYYMMDD_HHMMSS.csv`

Per-word timing errors with columns:
- `utterance_id` - Utterance identifier
- `word_index` - Word position in utterance
- `word` - The word text
- `gt_start`, `gt_end` - Ground truth timestamps (seconds)
- `pred_start`, `pred_end` - Predicted timestamps (seconds)
- `matched` - Whether the word was successfully aligned
- `start_error_ms`, `end_error_ms` - Timing errors in milliseconds

### `summary_YYYYMMDD_HHMMSS.csv`

Per-utterance aggregate statistics:
- `utterance_id` - Utterance identifier
- `num_gt_words`, `num_pred_words`, `num_matched` - Word counts
- `match_rate` - Fraction of ground truth words matched
- `wer` - Word Error Rate
- `mean_start_error_ms`, `median_start_error_ms`, `max_start_error_ms` - Start time error statistics
- `mean_end_error_ms`, `median_end_error_ms`, `max_end_error_ms` - End time error statistics
- `pct_within_20ms`, `pct_within_50ms` - Percentage of words within accuracy thresholds

## Metrics Explained

- **Start/End Time Error**: Absolute difference between predicted and ground truth timestamps
- **Match Rate**: How many ground truth words were successfully aligned with predictions
- **WER (Word Error Rate)**: Standard ASR accuracy metric (lower is better)
- **Accuracy Thresholds**: Percentage of words with start time error within 20ms, 50ms, or 100ms

## Example Workflow

```bash
# 1. Build sherpa-onnx
cd build && make -j8 && cd ..

# 2. Export a Whisper model with attention outputs
uv run python scripts/whisper/export-onnx.py --model tiny --with-attention --output-dir ./whisper-tiny-attention

# 3. Download benchmark data
uv run python scripts/benchmark/download_librispeech_test_data.py --num-utterances 200

# 4. Run the benchmark
PYTHONPATH=build/lib:sherpa-onnx/python uv run python scripts/benchmark/run_timestamp_benchmark.py \
    --encoder ./whisper-tiny-attention/tiny-encoder.onnx \
    --decoder ./whisper-tiny-attention/tiny-decoder.onnx \
    --tokens ./whisper-tiny-attention/tiny-tokens.txt \
    --num-workers 4

# 5. Review results in benchmark_results/
```

## Data Sources and Citations

### LibriSpeech Corpus

The audio data comes from the [LibriSpeech](https://www.openslr.org/12/) ASR corpus:

> Panayotov, V., Chen, G., Povey, D., & Khudanpur, S. (2015). LibriSpeech: An ASR corpus based on public domain audio books. In *2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)* (pp. 5206-5210). IEEE. https://doi.org/10.1109/ICASSP.2015.7178964

LibriSpeech is derived from read audiobooks from the [LibriVox](https://librivox.org/) project and is freely available under a CC BY 4.0 license.

### Montreal Forced Aligner (MFA)

The ground truth word alignments were generated using the [Montreal Forced Aligner](https://montreal-forced-aligner.readthedocs.io/):

> McAuliffe, M., Socolof, M., Mihuc, S., Wagner, M., & Sonderegger, M. (2017). Montreal Forced Aligner: Trainable text-speech alignment using Kaldi. In *Proceedings of Interspeech 2017* (pp. 498-502). https://doi.org/10.21437/Interspeech.2017-1386

MFA is an open-source forced alignment tool that uses Kaldi for acoustic modeling.

### Pre-computed LibriSpeech Alignments

The pre-computed MFA alignments for LibriSpeech are provided by the [librispeech-alignments](https://github.com/CorentinJ/librispeech-alignments) project by Corentin Jemine.

## License

The LibriSpeech corpus is released under the [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license. Please ensure compliance with all applicable licenses when using this benchmark data.


================================================
FILE: scripts/benchmark/download_librispeech_test_data.py
================================================
#!/usr/bin/env python3
# /// script
# dependencies = ["gdown"]
# ///
from __future__ import annotations

"""
Download and prepare LibriSpeech test data for timestamp benchmarking.

Downloads:
1. LibriSpeech dev-clean audio subset
2. MFA word alignments from librispeech-alignments repo

Outputs:
- benchmark_data/audio/*.wav (16kHz mono WAV files)
- benchmark_data/manifest.json (mapping of audio files to ground truth timestamps)

Usage:
    python scripts/benchmark/download_librispeech_test_data.py [--num-utterances 200]
"""

import argparse
import json
import os
import re
import subprocess
import sys
import tarfile
import tempfile
import urllib.request
import zipfile
from pathlib import Path

# URLs for downloads
LIBRISPEECH_DEV_CLEAN_URL = "https://www.openslr.org/resources/12/dev-clean.tar.gz"
MFA_ALIGNMENTS_URL = "https://drive.google.com/uc?export=download&id=1WYfgr31T-PPwMcxuAq09XZfHQO5Mw8fE"

# Google Drive file ID for the simple TXT format alignments
GDRIVE_FILE_ID = "1WYfgr31T-PPwMcxuAq09XZfHQO5Mw8fE"


def download_file(url: str, dest_path: Path, description: str = "file"):
    """Download a file with progress indication."""
    print(f"Downloading {description}...")
    print(f"  URL: {url}")
    print(f"  Destination: {dest_path}")

    def reporthook(block_num, block_size, total_size):
        if total_size > 0:
            downloaded = block_num * block_size
            percent = min(100, downloaded * 100 / total_size)
            mb_downloaded = downloaded / (1024 * 1024)
            mb_total = total_size / (1024 * 1024)
            sys.stdout.write(f"\r  Progress: {percent:.1f}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)")
            sys.stdout.flush()

    urllib.request.urlretrieve(url, dest_path, reporthook)
    print()  # newline after progress


def download_from_gdrive(file_id: str, dest_path: Path, description: str = "file"):
    """Download a file from Google Drive using gdown."""
    try:
        import gdown
    except ImportError:
        print("ERROR: gdown is required for downloading from Google Drive.")
        print("Install it with: pip install gdown")
        sys.exit(1)

    print(f"Downloading {description} from Google Drive...")
    print(f"  File ID: {file_id}")
    print(f"  Destination: {dest_path}")

    url = f"https://drive.google.com/uc?id={file_id}"
    gdown.download(url, str(dest_path), quiet=False)


def extract_tar_gz(archive_path: Path, dest_dir: Path):
    """Extract a tar.gz archive."""
    print(f"Extracting {archive_path}...")
    with tarfile.open(archive_path, "r:gz") as tar:
        tar.extractall(dest_dir)
    print(f"  Extracted to {dest_dir}")


def extract_zip(archive_path: Path, dest_dir: Path):
    """Extract a zip archive."""
    print(f"Extracting {archive_path}...")
    with zipfile.ZipFile(archive_path, "r") as z:
        z.extractall(dest_dir)
    print(f"  Extracted to {dest_dir}")


def convert_flac_to_wav(flac_path: Path, wav_path: Path):
    """Convert FLAC to 16kHz mono WAV using ffmpeg or sox."""
    wav_path.parent.mkdir(parents=True, exist_ok=True)

    # Try ffmpeg first
    try:
        subprocess.run(
            [
                "ffmpeg", "-y", "-i", str(flac_path),
                "-ar", "16000", "-ac", "1", "-sample_fmt", "s16",
                str(wav_path)
            ],
            check=True,
            capture_output=True
        )
        return
    except (subprocess.CalledProcessError, FileNotFoundError):
        pass

    # Try sox
    try:
        subprocess.run(
            ["sox", str(flac_path), "-r", "16000", "-c", "1", str(wav_path)],
            check=True,
            capture_output=True
        )
        return
    except (subprocess.CalledProcessError, FileNotFoundError):
        pass

    print(f"ERROR: Could not convert {flac_path}")
    print("Please install ffmpeg or sox")
    sys.exit(1)


def parse_alignment_line(line: str) -> dict | None:
    """
    Parse a single line from the MFA alignment file.

    Format: utterance_id "word1,word2,..." "end_time1,end_time2,..."
    Empty words represent silences.
    Times are END times for each word.

    Returns dict with utterance_id, words (list), and word_times (list of {word, start, end})
    """
    # Pattern: utterance_id "words" "times"
    match = re.match(r'^(\S+)\s+"([^"]*)"\s+"([^"]*)"', line.strip())
    if not match:
        return None

    utterance_id = match.group(1)
    words_str = match.group(2)
    times_str = match.group(3)

    # Parse words (comma-separated, may have empty entries for silences)
    words = words_str.split(",")

    # Parse end times
    try:
        end_times = [float(t) for t in times_str.split(",") if t]
    except ValueError:
        return None

    if len(words) != len(end_times):
        return None

    # Convert to word_times with start and end
    word_times = []
    prev_end = 0.0
    for word, end_time in zip(words, end_times):
        if word:  # Skip empty words (silences)
            word_times.append({
                "word": word,
                "start": prev_end,
                "end": end_time
            })
        prev_end = end_time

    return {
        "utterance_id": utterance_id,
        "words": [w["word"] for w in word_times],
        "word_times": word_times
    }


def parse_alignment_file(alignment_path: Path) -> dict:
    """Parse an alignment file and return dict mapping utterance_id to alignment data."""
    alignments = {}
    with open(alignment_path, "r") as f:
        for line in f:
            parsed = parse_alignment_line(line)
            if parsed:
                alignments[parsed["utterance_id"]] = parsed
    return alignments


def main():
    parser = argparse.ArgumentParser(description="Download LibriSpeech benchmark data")
    parser.add_argument(
        "--num-utterances",
        type=int,
        default=200,
        help="Number of utterances to include in test set (default: 200)"
    )
    parser.add_argument(
        "--output-dir",
        type=str,
        default="benchmark_data",
        help="Output directory (default: benchmark_data)"
    )
    parser.add_argument(
        "--skip-download",
        action="store_true",
        help="Skip download step (use existing files)"
    )
    args = parser.parse_args()

    # Paths
    script_dir = Path(__file__).parent.resolve()
    repo_root = script_dir.parent.parent
    output_dir = repo_root / args.output_dir
    audio_dir = output_dir / "audio"
    cache_dir = output_dir / ".cache"

    output_dir.mkdir(parents=True, exist_ok=True)
    audio_dir.mkdir(parents=True, exist_ok=True)
    cache_dir.mkdir(parents=True, exist_ok=True)

    # Step 1: Download LibriSpeech dev-clean
    librispeech_tar = cache_dir / "dev-clean.tar.gz"
    librispeech_dir = cache_dir / "LibriSpeech" / "dev-clean"

    if not args.skip_download and not librispeech_dir.exists():
        if not librispeech_tar.exists():
            download_file(
                LIBRISPEECH_DEV_CLEAN_URL,
                librispeech_tar,
                "LibriSpeech dev-clean"
            )
        extract_tar_gz(librispeech_tar, cache_dir)

    # Step 2: Download MFA alignments
    alignments_zip = cache_dir / "librispeech-alignments.zip"
    alignments_dir = cache_dir / "alignments"

    if not args.skip_download and not alignments_dir.exists():
        if not alignments_zip.exists():
            download_from_gdrive(
                GDRIVE_FILE_ID,
                alignments_zip,
                "LibriSpeech MFA alignments"
            )
        alignments_dir.mkdir(parents=True, exist_ok=True)
        extract_zip(alignments_zip, alignments_dir)

    # Step 3: Find alignment files for dev-clean
    print("\nParsing alignment files...")
    all_alignments = {}

    # Look for dev-clean alignment files
    for alignment_file in alignments_dir.rglob("*.alignment.txt"):
        if "dev-clean" in str(alignment_file):
            print(f"  Parsing {alignment_file.name}...")
            file_alignments = parse_alignment_file(alignment_file)
            all_alignments.update(file_alignments)

    print(f"  Found {len(all_alignments)} alignments")

    # Step 4: Find corresponding audio files and convert
    print("\nProcessing audio files...")
    manifest = []
    processed = 0

    # Walk through LibriSpeech directory structure: speaker/chapter/utterance.flac
    for flac_file in sorted(librispeech_dir.rglob("*.flac")):
        utterance_id = flac_file.stem  # e.g., "84-121123-0000"

        if utterance_id not in all_alignments:
            continue

        alignment = all_alignments[utterance_id]

        # Skip utterances with no words
        if not alignment["word_times"]:
            continue

        # Convert to WAV
        wav_file = audio_dir / f"{utterance_id}.wav"
        if not wav_file.exists():
            convert_flac_to_wav(flac_file, wav_file)

        # Add to manifest
        manifest.append({
            "utterance_id": utterance_id,
            "audio_path": str(wav_file.relative_to(output_dir)),
            "transcript": " ".join(alignment["words"]),
            "word_times": alignment["word_times"]
        })

        processed += 1
        if processed % 50 == 0:
            print(f"  Processed {processed} utterances...")

        if processed >= args.num_utterances:
            break

    print(f"\nProcessed {len(manifest)} utterances")

    # Step 5: Write manifest
    manifest_path = output_dir / "manifest.json"
    with open(manifest_path, "w") as f:
        json.dump(manifest, f, indent=2)
    print(f"Wrote manifest to {manifest_path}")

    # Summary
    print("\n" + "=" * 60)
    print("Download complete!")
    print("=" * 60)
    print(f"Audio files: {audio_dir}")
    print(f"Manifest: {manifest_path}")
    print(f"Total utterances: {len(manifest)}")

    # Calculate total duration
    total_words = sum(len(item["word_times"]) for item in manifest)
    print(f"Total words: {total_words}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/benchmark/run_timestamp_benchmark.py
================================================
#!/usr/bin/env python3
# /// script
# dependencies = ["numpy", "jiwer"]
# ///
from __future__ import annotations

"""
Run timestamp accuracy benchmark against LibriSpeech ground truth.

Compares sherpa-onnx Whisper word timestamps against MFA alignments.

Usage:
    PYTHONPATH=build/lib:sherpa-onnx/python python scripts/benchmark/run_timestamp_benchmark.py \
        --encoder ./whisper-tiny-attention/tiny-encoder.onnx \
        --decoder ./whisper-tiny-attention/tiny-decoder.onnx \
        --tokens ./whisper-tiny-attention/tiny-tokens.txt

    # Parallel processing with 4 workers:
    PYTHONPATH=build/lib:sherpa-onnx/python python scripts/benchmark/run_timestamp_benchmark.py \
        --encoder ./whisper-tiny-attention/tiny-encoder.onnx \
        --decoder ./whisper-tiny-attention/tiny-decoder.onnx \
        --tokens ./whisper-tiny-attention/tiny-tokens.txt \
        --num-workers 4

Outputs:
    benchmark_results/details_YYYYMMDD_HHMMSS.csv - Per-word timing errors
    benchmark_results/summary_YYYYMMDD_HHMMSS.csv - Aggregate statistics
"""

import argparse
import csv
import json
import multiprocessing
import os
import re
import sys
import time
import wave
from dataclasses import dataclass
from datetime import datetime
from difflib import SequenceMatcher
from pathlib import Path

import numpy as np

try:
    import sherpa_onnx
except ImportError:
    print("ERROR: sherpa_onnx not found. Please install it using one of the methods at:")
    print("https://k2-fsa.github.io/sherpa/onnx/python/install.html")
    sys.exit(1)

try:
    import jiwer
    from jiwer import wer as compute_wer
except ImportError:
    print("ERROR: jiwer not found. Install with: pip install jiwer")
    sys.exit(1)

# Text normalization for WER calculation
wer_transforms = jiwer.Compose([
    jiwer.ToLowerCase(),
    jiwer.RemovePunctuation(),
    jiwer.ExpandCommonEnglishContractions(),
    jiwer.SubstituteWords({
        "mr": "mister",
        "mrs": "missus",
        "dr": "doctor",
        "st": "saint",
    }),
    jiwer.RemoveMultipleSpaces(),
    jiwer.Strip(),
    jiwer.ReduceToListOfListOfWords(),
])


@dataclass
class WordTiming:
    """A word with its timing information."""
    word: str
    start: float
    end: float


@dataclass
class AlignedWord:
    """A pair of ground truth and predicted words that have been aligned."""
    word: str
    gt_start: float
    gt_end: float
    pred_start: float | None
    pred_end: float | None
    matched: bool


def normalize_word(word: str) -> str:
    """Normalize word for comparison."""
    # Remove punctuation, lowercase
    return re.sub(r'[^\w]', '', word).strip().lower()


def read_wave(wave_filename: str) -> tuple[np.ndarray, int]:
    """Read a wave file and return samples as float32 array."""
    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f"Expected mono, got {f.getnchannels()} channels"
        assert f.getsampwidth() == 2, f"Expected 16-bit, got {f.getsampwidth() * 8}-bit"
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32) / 32768
        return samples_float32, f.getframerate()


def tokens_to_words(
    tokens: list[str],
    timestamps: list[float],
    durations: list[float]
) -> list[WordTiming]:
    """
    Convert token-level timestamps to word-level timestamps.

    Follows OpenAI Whisper's split_tokens_on_spaces logic:
    - Tokens starting with space begin a new word
    - Punctuation-only tokens begin a new word
    - Otherwise append to previous word
    """
    import string

    if not tokens:
        return []

    words = []
    current_word = ""
    current_start = None
    current_end = None

    for token, ts, dur in zip(tokens, timestamps, durations):
        token_end = ts + dur
        token_stripped = token.strip()

        # Determine if this token starts a new word
        with_space = token.startswith(" ")
        is_punctuation = token_stripped in string.punctuation
        is_first = len(words) == 0 and current_word == ""

        if with_space or is_punctuation or is_first:
            # Save previous word if exists
            if current_word.strip():
                words.append(WordTiming(
                    word=current_word.strip(),
                    start=current_start,
                    end=current_end
                ))
            # Start new word
            current_word = token
            current_start = ts
            current_end = token_end
        else:
            # Append to current word
            current_word += token
            current_end = token_end

    # Don't forget the last word
    if current_word.strip():
        words.append(WordTiming(
            word=current_word.strip(),
            start=current_start,
            end=current_end
        ))

    return words


def get_sherpa_word_timestamps(
    recognizer: sherpa_onnx.OfflineRecognizer,
    audio_path: Path
) -> list[WordTiming]:
    """Run sherpa-onnx recognition and return word timestamps."""
    samples, sample_rate = read_wave(str(audio_path))

    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, samples)
    recognizer.decode_stream(stream)
    result = stream.result

    # Convert token timestamps to word timestamps
    return tokens_to_words(result.tokens, result.timestamps, result.durations)


# Global recognizer for worker processes
_worker_recognizer = None


def _init_worker(encoder: str, decoder: str, tokens: str, language: str):
    """Initialize recognizer in worker process."""
    global _worker_recognizer
    _worker_recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
        encoder=encoder,
        decoder=decoder,
        tokens=tokens,
        language=language,
        enable_token_timestamps=True,
    )


def _process_utterance(args: tuple) -> dict:
    """Process a single utterance in a worker process."""
    item, data_dir = args
    utterance_id = item["utterance_id"]
    audio_path = Path(data_dir) / item["audio_path"]

    # Parse ground truth
    gt_words = [
        WordTiming(word=wt["word"], start=wt["start"], end=wt["end"])
        for wt in item["word_times"]
    ]
    gt_transcript = " ".join(w.word for w in gt_words)

    # Get predictions
    pred_words = get_sherpa_word_timestamps(_worker_recognizer, audio_path)
    pred_transcript = " ".join(w.word for w in pred_words)

    # Align words
    aligned = align_words(gt_words, pred_words)

    # Calculate per-utterance stats
    matched = [a for a in aligned if a.matched]

    if matched:
        start_errors = [abs(a.pred_start - a.gt_start) * 1000 for a in matched]
        end_errors = [abs(a.pred_end - a.gt_end) * 1000 for a in matched]

        stats = {
            "utterance_id": utterance_id,
            "num_gt_words": len(gt_words),
            "num_pred_words": len(pred_words),
            "num_matched": len(matched),
            "match_rate": len(matched) / len(gt_words) if gt_words else 0,
            "wer": jiwer.wer(
                gt_transcript,
                pred_transcript,
                reference_transform=wer_transforms,
                hypothesis_transform=wer_transforms,
            ),
            "mean_start_error_ms": np.mean(start_errors),
            "median_start_error_ms": np.median(start_errors),
            "max_start_error_ms": np.max(start_errors),
            "mean_end_error_ms": np.mean(end_errors),
            "median_end_error_ms": np.median(end_errors),
            "max_end_error_ms": np.max(end_errors),
            "pct_within_20ms": sum(1 for e in start_errors if e <= 20) / len(start_errors) * 100,
            "pct_within_50ms": sum(1 for e in start_errors if e <= 50) / len(start_errors) * 100,
        }
    else:
        stats = {
            "utterance_id": utterance_id,
            "num_gt_words": len(gt_words),
            "num_pred_words": len(pred_words),
            "num_matched": 0,
            "match_rate": 0,
            "wer": jiwer.wer(
                gt_transcript,
                pred_transcript,
                reference_transform=wer_transforms,
                hypothesis_transform=wer_transforms,
            ) if gt_transcript else 1.0,
            "mean_start_error_ms": None,
            "median_start_error_ms": None,
            "max_start_error_ms": None,
            "mean_end_error_ms": None,
            "median_end_error_ms": None,
            "max_end_error_ms": None,
            "pct_within_20ms": None,
            "pct_within_50ms": None,
        }

    # Build aligned words for detailed output
    aligned_words = []
    for j, a in enumerate(aligned):
        aligned_words.append({
            "utterance_id": utterance_id,
            "word_index": j,
            "word": a.word,
            "gt_start": a.gt_start,
            "gt_end": a.gt_end,
            "pred_start": a.pred_start if a.pred_start is not None else "",
            "pred_end": a.pred_end if a.pred_end is not None else "",
            "matched": a.matched,
            "start_error_ms": abs(a.pred_start - a.gt_start) * 1000 if a.matched else "",
            "end_error_ms": abs(a.pred_end - a.gt_end) * 1000 if a.matched else "",
        })

    return {"stats": stats, "aligned": aligned_words}


def align_words(
    gt_words: list[WordTiming],
    pred_words: list[WordTiming]
) -> list[AlignedWord]:
    """
    Align ground truth and predicted words using sequence matching.

    Returns list of AlignedWord with timing comparisons for matched words.
    """
    # Normalize words for matching
    gt_normalized = [normalize_word(w.word) for w in gt_words]
    pred_normalized = [normalize_word(w.word) for w in pred_words]

    # Use SequenceMatcher to find matching blocks
    matcher = SequenceMatcher(None, gt_normalized, pred_normalized)

    aligned = []
    matched_pred_indices = set()

    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        if tag == 'equal':
            # Words match
            for gt_idx, pred_idx in zip(range(i1, i2), range(j1, j2)):
                gt_word = gt_words[gt_idx]
                pred_word = pred_words[pred_idx]
                aligned.append(AlignedWord(
                    word=gt_word.word,
                    gt_start=gt_word.start,
                    gt_end=gt_word.end,
                    pred_start=pred_word.start,
                    pred_end=pred_word.end,
                    matched=True
                ))
                matched_pred_indices.add(pred_idx)
        elif tag in ('replace', 'delete'):
            # Ground truth words not matched
            for gt_idx in range(i1, i2):
                gt_word = gt_words[gt_idx]
                aligned.append(AlignedWord(
                    word=gt_word.word,
                    gt_start=gt_word.start,
                    gt_end=gt_word.end,
                    pred_start=None,
                    pred_end=None,
                    matched=False
                ))

    return aligned


def run_benchmark(
    manifest: list[dict],
    data_dir: Path,
    output_dir: Path,
    encoder: str,
    decoder: str,
    tokens: str,
    language: str,
    num_workers: int = 1
):
    """Run benchmark on all utterances in manifest."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    details_path = output_dir / f"details_{timestamp}.csv"
    summary_path = output_dir / f"summary_{timestamp}.csv"

    output_dir.mkdir(parents=True, exist_ok=True)

    # Collect all results
    all_aligned = []
    utterance_stats = []

    total = len(manifest)
    start_time = time.time()

    if num_workers > 1:
        # Parallel processing
        print(f"\nProcessing {total} utterances with {num_workers} workers...")

        # Prepare arguments for workers
        work_items = [(item, str(data_dir)) for item in manifest]

        with multiprocessing.Pool(
            processes=num_workers,
            initializer=_init_worker,
            initargs=(encoder, decoder, tokens, language)
        ) as pool:
            completed = 0
            for result in pool.imap(_process_utterance, work_items):
                utterance_stats.append(result["stats"])
                all_aligned.extend(result["aligned"])

                completed += 1
                elapsed = time.time() - start_time
                avg_per_item = elapsed / completed
                remaining = total - completed
                eta_seconds = avg_per_item * remaining

                if eta_seconds >= 3600:
                    eta_str = f"{eta_seconds / 3600:.1f}h"
                elif eta_seconds >= 60:
                    eta_str = f"{eta_seconds / 60:.1f}m"
                else:
                    eta_str = f"{eta_seconds:.0f}s"

                print(f"  [{completed}/{total}] {result['stats']['utterance_id']} - ETA: {eta_str}", flush=True)
    else:
        # Sequential processing (original behavior)
        print(f"\nProcessing {total} utterances...")

        # Initialize recognizer for sequential mode
        recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
            encoder=encoder,
            decoder=decoder,
            tokens=tokens,
            language=language,
            enable_token_timestamps=True,
        )

        for i, item in enumerate(manifest):
            iter_start = time.time()
            utterance_id = item["utterance_id"]
            audio_path = data_dir / item["audio_path"]

            # Parse ground truth
            gt_words = [
                WordTiming(word=wt["word"], start=wt["start"], end=wt["end"])
                for wt in item["word_times"]
            ]
            gt_transcript = " ".join(w.word for w in gt_words)

            # Get predictions
            pred_words = get_sherpa_word_timestamps(recognizer, audio_path)
            pred_transcript = " ".join(w.word for w in pred_words)

            # Align words
            aligned = align_words(gt_words, pred_words)

            # Calculate per-utterance stats
            matched = [a for a in aligned if a.matched]

            if matched:
                start_errors = [abs(a.pred_start - a.gt_start) * 1000 for a in matched]
                end_errors = [abs(a.pred_end - a.gt_end) * 1000 for a in matched]

                stats = {
                    "utterance_id": utterance_id,
                    "num_gt_words": len(gt_words),
                    "num_pred_words": len(pred_words),
                    "num_matched": len(matched),
                    "match_rate": len(matched) / len(gt_words) if gt_words else 0,
                    "wer": jiwer.wer(
                        gt_transcript,
                        pred_transcript,
                        reference_transform=wer_transforms,
                        hypothesis_transform=wer_transforms,
                    ),
                    "mean_start_error_ms": np.mean(start_errors),
                    "median_start_error_ms": np.median(start_errors),
                    "max_start_error_ms": np.max(start_errors),
                    "mean_end_error_ms": np.mean(end_errors),
                    "median_end_error_ms": np.median(end_errors),
                    "max_end_error_ms": np.max(end_errors),
                    "pct_within_20ms": sum(1 for e in start_errors if e <= 20) / len(start_errors) * 100,
                    "pct_within_50ms": sum(1 for e in start_errors if e <= 50) / len(start_errors) * 100,
                }
            else:
                stats = {
                    "utterance_id": utterance_id,
                    "num_gt_words": len(gt_words),
                    "num_pred_words": len(pred_words),
                    "num_matched": 0,
                    "match_rate": 0,
                    "wer": jiwer.wer(
                        gt_transcript,
                        pred_transcript,
                        reference_transform=wer_transforms,
                        hypothesis_transform=wer_transforms,
                    ) if gt_transcript else 1.0,
                    "mean_start_error_ms": None,
                    "median_start_error_ms": None,
                    "max_start_error_ms": None,
                    "mean_end_error_ms": None,
                    "median_end_error_ms": None,
                    "max_end_error_ms": None,
                    "pct_within_20ms": None,
                    "pct_within_50ms": None,
                }

            utterance_stats.append(stats)

            # Store aligned words for detailed output
            for j, a in enumerate(aligned):
                all_aligned.append({
                    "utterance_id": utterance_id,
                    "word_index": j,
                    "word": a.word,
                    "gt_start": a.gt_start,
                    "gt_end": a.gt_end,
                    "pred_start": a.pred_start if a.pred_start is not None else "",
                    "pred_end": a.pred_end if a.pred_end is not None else "",
                    "matched": a.matched,
                    "start_error_ms": abs(a.pred_start - a.gt_start) * 1000 if a.matched else "",
                    "end_error_ms": abs(a.pred_end - a.gt_end) * 1000 if a.matched else "",
                })

            # Progress with ETA
            completed = i + 1
            elapsed = time.time() - start_time
            avg_per_item = elapsed / completed
            remaining = total - completed
            eta_seconds = avg_per_item * remaining

            if eta_seconds >= 3600:
                eta_str = f"{eta_seconds / 3600:.1f}h"
            elif eta_seconds >= 60:
                eta_str = f"{eta_seconds / 60:.1f}m"
            else:
                eta_str = f"{eta_seconds:.0f}s"

            iter_time = time.time() - iter_start
            print(f"  [{completed}/{total}] {utterance_id} ({iter_time:.1f}s) - ETA: {eta_str}", flush=True)

    # Sort results by utterance_id to ensure consistent output
    utterance_stats.sort(key=lambda x: x["utterance_id"])
    all_aligned.sort(key=lambda x: (x["utterance_id"], x["word_index"]))

    # Write detailed results
    print(f"\nWriting detailed results to {details_path}...")
    with open(details_path, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=[
            "utterance_id", "word_index", "word", "gt_start", "gt_end",
            "pred_start", "pred_end", "matched", "start_error_ms", "end_error_ms"
        ])
        writer.writeheader()
        writer.writerows(all_aligned)

    # Write summary results
    print(f"Writing summary to {summary_path}...")
    with open(summary_path, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=[
            "utterance_id", "num_gt_words", "num_pred_words", "num_matched",
            "match_rate", "wer", "mean_start_error_ms", "median_start_error_ms",
            "max_start_error_ms", "mean_end_error_ms", "median_end_error_ms",
            "max_end_error_ms", "pct_within_20ms", "pct_within_50ms"
        ])
        writer.writeheader()
        writer.writerows(utterance_stats)

    # Print aggregate stats
    matched_stats = [s for s in utterance_stats if s["num_matched"] > 0]
    if matched_stats:
        print("\n" + "=" * 60)
        print("AGGREGATE RESULTS")
        print("=" * 60)
        print(f"Total utterances: {len(manifest)}")
        print(f"Total ground truth words: {sum(s['num_gt_words'] for s in utterance_stats)}")
        print(f"Total matched words: {sum(s['num_matched'] for s in utterance_stats)}")

        all_start_errors = [
            float(r["start_error_ms"]) for r in all_aligned
            if r["matched"] and r["start_error_ms"] != ""
        ]
        all_end_errors = [
            float(r["end_error_ms"]) for r in all_aligned
            if r["matched"] and r["end_error_ms"] != ""
        ]

        if all_start_errors:
            print(f"\nStart Time Errors:")
            print(f"  Mean: {np.mean(all_start_errors):.1f} ms")
            print(f"  Median: {np.median(all_start_errors):.1f} ms")
            print(f"  Max: {np.max(all_start_errors):.1f} ms")
            print(f"  Std: {np.std(all_start_errors):.1f} ms")

            print(f"\nEnd Time Errors:")
            print(f"  Mean: {np.mean(all_end_errors):.1f} ms")
            print(f"  Median: {np.median(all_end_errors):.1f} ms")
            print(f"  Max: {np.max(all_end_errors):.1f} ms")
            print(f"  Std: {np.std(all_end_errors):.1f} ms")

            print(f"\nAccuracy Thresholds (start time):")
            print(f"  Within 20ms: {sum(1 for e in all_start_errors if e <= 20) / len(all_start_errors) * 100:.1f}%")
            print(f"  Within 50ms: {sum(1 for e in all_start_errors if e <= 50) / len(all_start_errors) * 100:.1f}%")
            print(f"  Within 100ms: {sum(1 for e in all_start_errors if e <= 100) / len(all_start_errors) * 100:.1f}%")

        avg_wer = np.mean([s["wer"] for s in utterance_stats])
        print(f"\nWord Error Rate (WER): {avg_wer * 100:.1f}%")

    return details_path, summary_path


def main():
    parser = argparse.ArgumentParser(description="Run timestamp accuracy benchmark")
    parser.add_argument("--encoder", required=True, help="Path to encoder.onnx")
    parser.add_argument("--decoder", required=True, help="Path to decoder.onnx")
    parser.add_argument("--tokens", required=True, help="Path to tokens.txt")
    parser.add_argument(
        "--data-dir",
        default="benchmark_data",
        help="Directory with manifest.json and audio (default: benchmark_data)"
    )
    parser.add_argument(
        "--output-dir",
        default="benchmark_results",
        help="Output directory for CSV files (default: benchmark_results)"
    )
    parser.add_argument(
        "--language",
        default="en",
        help="Language code (default: en)"
    )
    parser.add_argument(
        "--num-workers",
        type=int,
        default=1,
        help="Number of parallel workers (default: 1, sequential). "
             "Use higher values to speed up benchmarks on multi-core machines. "
             "Each worker loads its own model copy, so memory usage scales linearly."
    )
    args = parser.parse_args()

    # Resolve paths
    script_dir = Path(__file__).parent.resolve()
    repo_root = script_dir.parent.parent
    data_dir = repo_root / args.data_dir
    output_dir = repo_root / args.output_dir
    manifest_path = data_dir / "manifest.json"

    # Load manifest
    print(f"Loading manifest from {manifest_path}...")
    with open(manifest_path) as f:
        manifest = json.load(f)
    print(f"  Found {len(manifest)} utterances")

    # Print recognizer info
    print(f"\nRecognizer configuration:")
    print(f"  Encoder: {args.encoder}")
    print(f"  Decoder: {args.decoder}")
    print(f"  Tokens: {args.tokens}")
    if args.num_workers > 1:
        print(f"  Workers: {args.num_workers} (parallel)")
    else:
        print(f"  Workers: 1 (sequential)")

    # Run benchmark
    details_path, summary_path = run_benchmark(
        manifest=manifest,
        data_dir=data_dir,
        output_dir=output_dir,
        encoder=args.encoder,
        decoder=args.decoder,
        tokens=args.tokens,
        language=args.language,
        num_workers=args.num_workers,
    )

    print("\n" + "=" * 60)
    print("Benchmark complete!")
    print("=" * 60)
    print(f"Details: {details_path}")
    print(f"Summary: {summary_path}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/check_style_cpplint.sh
================================================
#!/bin/bash
#
# Copyright      2020  Mobvoi Inc. (authors: Fangjun Kuang)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Usage:
#
# (1) To check files of the last commit
#  ./scripts/check_style_cpplint.sh
#
# (2) To check changed files not committed yet
#  ./scripts/check_style_cpplint.sh 1
#
# (3) To check all files in the project
#  ./scripts/check_style_cpplint.sh 2


cpplint_version="2.0.2"
cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd)
sherpa_onnx_dir=$(cd $cur_dir/.. && pwd)

build_dir=$sherpa_onnx_dir/build
mkdir -p $build_dir

cpplint_src=$build_dir/cpplint-${cpplint_version}/cpplint.py

if [ ! -d "$build_dir/cpplint-${cpplint_version}" ]; then
  pushd $build_dir
  if command -v wget &> /dev/null; then
    wget https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz
  elif command -v curl &> /dev/null; then
    curl -O -SL https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz
  else
    echo "Please install wget or curl to download cpplint"
    exit 1
  fi
  tar xf ${cpplint_version}.tar.gz
  rm ${cpplint_version}.tar.gz

  # cpplint will report the following error for: __host__ __device__ (
  #
  #     Extra space before ( in function call  [whitespace/parens] [4]
  #
  # the following patch disables the above error
  sed -i "3490i\        not Search(r'__host__ __device__\\\s+\\\(', fncall) and" $cpplint_src
  popd
fi

source $sherpa_onnx_dir/scripts/utils.sh

# return true if the given file is a c++ source file
# return false otherwise
function is_source_code_file() {
  case "$1" in
    *.cc|*.h|*.cu)
      echo true;;
    *)
      echo false;;
  esac
}

function check_style() {
  if [[ $1 == mfc-example* ]]; then
    return
  fi
  python3 $cpplint_src $1 || abort $1
}

function check_last_commit() {
  files=$(git diff HEAD^1 --name-only --diff-filter=ACDMRUXB)
  echo $files
}

function check_current_dir() {
  files=$(git status -s -uno --porcelain | awk '{
  if (NF == 4) {
    # a file has been renamed
    print $NF
  } else {
    print $2
  }}')

  echo $files
}

function do_check() {
  case "$1" in
    1)
      echo "Check changed files"
      files=$(check_current_dir)
      ;;
    2)
      echo "Check all files"
      files=$(find $sherpa_onnx_dir/cxx-api-examples-ignored $sherpa_onnx_dir/c-api-examples-ignored $sherpa_onnx_dir/sherpa-onnx/csrc $sherpa_onnx_dir/sherpa-onnx/python $sherpa_onnx_dir/scripts/node-addon-api/src $sherpa_onnx_dir/sherpa-onnx/jni $sherpa_onnx_dir/sherpa-onnx/c-api -name "*.h" -o -name "*.cc")
      files2=$(find $sherpa_onnx_dir/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/ -name "*.cc")
      ;;
    *)
      echo "Check last commit"
      files=$(check_last_commit)
      ;;
  esac

  for f in $files $files2; do
    need_check=$(is_source_code_file $f)
    if $need_check; then
      [[ -f $f ]] && check_style $f
    fi
  done
}

function main() {
  do_check $1

  ok "Great! Style check passed!"
}

cd $sherpa_onnx_dir

main $1


================================================
FILE: scripts/dart/add-punctuations-pubspec.yaml
================================================
name: add_punctuations

description: >
  This example demonstrates how to use the Dart API to add punctuations to text.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/audio-tagging-pubspec.yaml
================================================
name: audio_tagging

description: >
  This example demonstrates how to use the Dart API for audio tagging.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/kws-pubspec.yaml
================================================
name: keyword_spotter

description: >
  This example demonstrates how to use the Dart API for keyword spotting

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/non-streaming-asr-pubspec.yaml
================================================
name: non_streaming_asr
description: >
  This example demonstrates how to use the Dart API for Non-streaming speech recognition. Specifically, we use the following models as examples, whisper, zipformer, and paraformer.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx

  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/release.sh
================================================
#!/usr/bin/env bash

# see
# https://dart.dev/tools/pub/automated-publishing

set -ex

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
echo "SCRIPT_DIR: $SCRIPT_DIR"
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

src_dir=$SHERPA_ONNX_DIR/sherpa-onnx/flutter
pushd $src_dir

v="version: $SHERPA_ONNX_VERSION"
echo "v: $v"
sed -i.bak s"/^version: .*/$v/" ./pubspec.yaml
rm *.bak
rm notes.md
git status
git diff

HF_MIRROR=hf.co
linux_wheel_filename=sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-manylinux2014_x86_64.whl
linux_wheel=$src_dir/$linux_wheel_filename

macos_wheel_filename=sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-macosx_10_15_universal2.whl
macos_wheel=$src_dir/$macos_wheel_filename

windows_x64_wheel_filename=sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-win_amd64.whl
windows_x64_wheel=$src_dir/$windows_x64_wheel_filename

function process_linux() {
  mkdir -p t
  cd t
  curl -OL https://$HF_MIRROR/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/$linux_wheel_filename
  unzip $linux_wheel_filename
  cp -v sherpa_onnx/lib/*.so* ../linux
  cd ..
  rm -rf t

  pushd linux

  popd
}

function process_windows_x64() {
  mkdir -p t
  cd t
  curl -OL https://$HF_MIRROR/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/$windows_x64_wheel_filename
  unzip $windows_x64_wheel_filename
  cp -v sherpa_onnx/lib/*.dll ../windows
  cd ..
  rm -rf t
}

function process_macos() {
  mkdir -p t
  cd t
  curl -OL https://$HF_MIRROR/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/$macos_wheel_filename
  unzip $macos_wheel_filename
  cp -v sherpa_onnx/lib/*.dylib ../macos
  cd ..
  rm -rf t
}

process_linux
process_windows_x64
process_macos


================================================
FILE: scripts/dart/sherpa-onnx-pubspec.yaml
================================================
name: sherpa_onnx

description: >
  Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
  with onnxruntime without Internet connection.

repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/sherpa-onnx/flutter

issue_tracker: https://github.com/k2-fsa/sherpa-onnx/issues
documentation: https://k2-fsa.github.io/sherpa/onnx/

topics:
  - speech-recognition
  - speech-synthesis
  - speaker-identification
  - audio-tagging
  - voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
version: 1.10.20

homepage: https://github.com/k2-fsa/sherpa-onnx

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  ffi: ^2.1.0
  flutter:
    sdk: flutter

  sherpa_onnx_android:
    path: ../sherpa_onnx_android

  sherpa_onnx_macos:
    path: ../sherpa_onnx_macos

  sherpa_onnx_linux:
    path: ../sherpa_onnx_linux

  sherpa_onnx_windows:
    path: ../sherpa_onnx_windows

flutter:
  plugin:
    platforms:
      android:
        default_package: sherpa_onnx_android

      macos:
        default_package: sherpa_onnx_macos

      linux:
        default_package: sherpa_onnx_linux

      windows:
        default_package: sherpa_onnx_windows


================================================
FILE: scripts/dart/slid-pubspec.yaml
================================================
name: spoken_language_identification

description: >
  This example demonstrates how to use the Dart API for spoken language identification.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx

  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/speaker-diarization-pubspec.yaml
================================================
name: speaker_diarization
description: >
  This example demonstrates how to use the Dart API for speaker diarization.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/speaker-id-pubspec.yaml
================================================
name: speaker_identification

description: >
  This example demonstrates how to use the Dart API for speaker identification.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/speech-enhancement-dpdfnet-pubspec.yaml
================================================
name: speech_enhancement_dpdfnet

description: >
  This example demonstrates how to use the Dart API for DPDFNet speech enhancement/denoising.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/speech-enhancement-gtcrn-pubspec.yaml
================================================
name: speech_enhancement_gtcrn

description: >
  This example demonstrates how to use the Dart API for speech enhancement/denoising.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/streaming-asr-pubspec.yaml
================================================
name: streaming_asr

description: >
  This example demonstrates how to use the Dart API for streaming speech recognition.

version: 1.0.0
# repository: https://github.com/my_org/my_repo

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx

  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0
  test: ^1.24.0


================================================
FILE: scripts/dart/streaming-speech-enhancement-dpdfnet-pubspec.yaml
================================================
name: streaming_speech_enhancement_dpdfnet

description: >
  This example demonstrates how to use the Dart API for streaming speech enhancement/denoising with DPDFNet.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/streaming-speech-enhancement-gtcrn-pubspec.yaml
================================================
name: streaming_speech_enhancement_gtcrn

description: >
  This example demonstrates how to use the Dart API for streaming speech enhancement/denoising with GTCRN.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/tts-pubspec.yaml
================================================
name: tts
description: A sample command-line application.
version: 1.0.0
# repository: https://github.com/my_org/my_repo

environment:
  sdk: ">=3.0.0 <4.0.0"

# Add regular dependencies here.
dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx

  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/vad-non-streaming-asr-pubspec.yaml
================================================
name: vad_with_non_streaming_asr

description: >
  This example demonstrates how to use the Dart API for VAD (voice activity detection)
  with non-streaming speech recognition.

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx
  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dart/vad-pubspec.yaml
================================================
name: vad

description: >
  This example demonstrates how to use the Dart API for VAD (voice activity detection).

version: 1.0.0

environment:
  sdk: ">=3.0.0 <4.0.0"

dependencies:
  sherpa_onnx:
    path: ../../flutter/sherpa_onnx

  path: ^1.9.0
  args: ^2.5.0

dev_dependencies:
  lints: ^3.0.0


================================================
FILE: scripts/dotnet/.gitignore
================================================
all
macos-arm64
macos-x64
linux-x64
linux-arm64
windows-arm64
windows-x64
windows-x86
packages
tmp


================================================
FILE: scripts/dotnet/AudioEvent.cs
================================================
﻿/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{

    public class AudioEvent
    {
        public AudioEvent(IntPtr handle)
        {
            Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));

            // PtrToStringUTF8() requires .net standard 2.1
            // _text = Marshal.PtrToStringUTF8(impl.Text);

            int length = 0;

            unsafe
            {
                byte* buffer = (byte*)impl.Name;
                while (*buffer != 0)
                {
                    ++buffer;
                    length += 1;
                }
            }

            byte[] stringBuffer = new byte[length];
            Marshal.Copy(impl.Name, stringBuffer, 0, length);
            _name = Encoding.UTF8.GetString(stringBuffer);

            _index = impl.Index;
            _prob = impl.Prob;
        }

        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public IntPtr Name;
            public int Index;
            public float Prob;
        }

        private String _name;
        public String Name => _name;

        private int _index;
        public int Index => _index;

        private float _prob;
        public float Prob => _prob;
    }
}


================================================
FILE: scripts/dotnet/AudioTagging.cs
================================================
﻿/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.Runtime.InteropServices;
using System.Text;
using System.Collections.Generic;

namespace SherpaOnnx
{
    public class AudioTagging : IDisposable
    {
        public AudioTagging(AudioTaggingConfig config)
        {
            IntPtr h = SherpaOnnxCreateAudioTagging(ref config);
            _handle = new HandleRef(this, h);
        }

        public OfflineStream CreateStream()
        {
            IntPtr p = SherpaOnnxAudioTaggingCreateOfflineStream(_handle.Handle);
            return new OfflineStream(p);
        }

        // if topK <= 0, then config.TopK is used
        // if topK > 0, then config.TopK is ignored
        public AudioEvent[] Compute(OfflineStream stream, int topK = -1)
        {
            IntPtr p = SherpaOnnxAudioTaggingCompute(_handle.Handle, stream.Handle, topK);

            var result = new List<AudioEvent>();

            if (p == IntPtr.Zero)
            {
              return result.ToArray();
            }

            int index = 0;
            while (true)
            {
              IntPtr e = Marshal.ReadIntPtr(p, index * IntPtr.Size);
              if (e == IntPtr.Zero)
              {
                break;
              }

              AudioEvent ae = new AudioEvent(e);
              result.Add(ae);

              ++index;
            }

            SherpaOnnxAudioTaggingFreeResults(p);

            return result.ToArray();
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~AudioTagging()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyAudioTagging(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;


        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateAudioTagging(ref AudioTaggingConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyAudioTagging(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxAudioTaggingCreateOfflineStream(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxAudioTaggingCompute(IntPtr handle, IntPtr stream, int topK);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxAudioTaggingFreeResults(IntPtr p);
    }
}


================================================
FILE: scripts/dotnet/AudioTaggingConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct AudioTaggingConfig
    {
        public AudioTaggingConfig()
        {
            Model = new AudioTaggingModelConfig();

            Labels = "";
            TopK = 5;
        }

        public AudioTaggingModelConfig Model;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Labels;

        public int TopK;
    }
}


================================================
FILE: scripts/dotnet/AudioTaggingModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct AudioTaggingModelConfig
    {
        public AudioTaggingModelConfig()
        {
            Zipformer = new OfflineZipformerAudioTaggingModelConfig();

            CED = "";
            NumThreads = 1;
            Debug = 0;
            Provider = "cpu";
        }

        public OfflineZipformerAudioTaggingModelConfig Zipformer;

        [MarshalAs(UnmanagedType.LPStr)]
        public string CED;

        public int NumThreads;

        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;
    }
}


================================================
FILE: scripts/dotnet/CircularBuffer.cs
================================================
﻿/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)

using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public class CircularBuffer : IDisposable
    {
        public CircularBuffer(int capacity)
        {
            IntPtr h = SherpaOnnxCreateCircularBuffer(capacity);
            _handle = new HandleRef(this, h);
        }

        public void Push(float[] data)
        {
            SherpaOnnxCircularBufferPush(_handle.Handle, data, data.Length);
        }

        public float[] Get(int startIndex, int n)
        {
            IntPtr p = SherpaOnnxCircularBufferGet(_handle.Handle, startIndex, n);

            float[] ans = new float[n];
            Marshal.Copy(p, ans, 0, n);

            SherpaOnnxCircularBufferFree(p);

            return ans;
        }

        public void Pop(int n)
        {
            SherpaOnnxCircularBufferPop(_handle.Handle, n);
        }

        public int Size
        {
          get
          {
              return SherpaOnnxCircularBufferSize(_handle.Handle);
          }
        }

        public int Head
        {
          get
          {
              return SherpaOnnxCircularBufferHead(_handle.Handle);
          }
        }

        public void Reset()
        {
            SherpaOnnxCircularBufferReset(_handle.Handle);
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~CircularBuffer()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyCircularBuffer(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateCircularBuffer(int capacity);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyCircularBuffer(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxCircularBufferPush(IntPtr handle, float[] p, int n);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCircularBufferGet(IntPtr handle, int startIndex, int n);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxCircularBufferFree(IntPtr p);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxCircularBufferPop(IntPtr handle, int n);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxCircularBufferSize(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxCircularBufferHead(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxCircularBufferReset(IntPtr handle);
    }
}


================================================
FILE: scripts/dotnet/DenoisedAudio.cs
================================================
﻿/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    public class DenoisedAudio
    {
        public DenoisedAudio(IntPtr p)
        {
            _handle = new HandleRef(this, p);
        }

        public bool SaveToWaveFile(String filename)
        {
            if (Handle == IntPtr.Zero)
            {
                return false;
            }

            Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
            byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
            byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
            utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator
            int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull);
            return status == 1;
        }

        ~DenoisedAudio()
        {
            Cleanup();
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        private void Cleanup()
        {
            if (Handle != IntPtr.Zero)
            {
                SherpaOnnxDestroyDenoisedAudio(Handle);
            }

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public IntPtr Samples;
            public int NumSamples;
            public int SampleRate;
        }

        private HandleRef _handle;
        public IntPtr Handle => _handle.Handle;

        public int NumSamples
        {
            get
            {
                if (Handle == IntPtr.Zero)
                {
                    return 0;
                }

                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
                return impl.NumSamples;
            }
        }

        public int SampleRate
        {
            get
            {
                if (Handle == IntPtr.Zero)
                {
                    return 0;
                }

                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
                return impl.SampleRate;
            }
        }

        public float[] Samples
        {
            get
            {
                if (Handle == IntPtr.Zero)
                {
                    return new float[0];
                }

                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));

                float[] samples = new float[impl.NumSamples];
                if (impl.NumSamples > 0 && impl.Samples != IntPtr.Zero)
                {
                    Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples);
                }
                return samples;
            }
        }

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyDenoisedAudio(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
    }
}


================================================
FILE: scripts/dotnet/Dll.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

namespace SherpaOnnx
{
    internal static class Dll
    {
        public const string Filename = "sherpa-onnx-c-api";
    }
}

================================================
FILE: scripts/dotnet/FastClusteringConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct FastClusteringConfig
    {
        public FastClusteringConfig()
        {
            NumClusters = -1;
            Threshold = 0.5F;
        }

        public int NumClusters;
        public float Threshold;
    }
}


================================================
FILE: scripts/dotnet/FeatureConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    /// It expects 16 kHz 16-bit single channel wave format.
    [StructLayout(LayoutKind.Sequential)]
    public struct FeatureConfig
    {
        public FeatureConfig()
        {
            SampleRate = 16000;
            FeatureDim = 80;
        }
        /// Sample rate of the input data. MUST match the one expected
        /// by the model. For instance, it should be 16000 for models provided
        /// by us.
        public int SampleRate;

        /// Feature dimension of the model.
        /// For instance, it should be 80 for models provided by us.
        public int FeatureDim;
    }

}

================================================
FILE: scripts/dotnet/HomophoneReplacerConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct HomophoneReplacerConfig
    {
        public HomophoneReplacerConfig()
        {
          DictDir = "";
          Lexicon = "";
          RuleFsts = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string DictDir;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Lexicon;

        [MarshalAs(UnmanagedType.LPStr)]
        public string RuleFsts;
    }
}


================================================
FILE: scripts/dotnet/KeywordResult.cs
================================================
﻿/// Copyright (c)  2024  Xiaomi Corporation

using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    public class KeywordResult
    {
        public KeywordResult(IntPtr handle)
        {
            Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));

            // PtrToStringUTF8() requires .net standard 2.1
            // _keyword = Marshal.PtrToStringUTF8(impl.Keyword);

            int length = 0;

            unsafe
            {
                byte* buffer = (byte*)impl.Keyword;
                while (*buffer != 0)
                {
                    ++buffer;
                    length += 1;
                }
            }

            byte[] stringBuffer = new byte[length];
            Marshal.Copy(impl.Keyword, stringBuffer, 0, length);
            _keyword = Encoding.UTF8.GetString(stringBuffer);
        }

        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public IntPtr Keyword;
        }

        private String _keyword;
        public String Keyword => _keyword;
    }
}


================================================
FILE: scripts/dotnet/KeywordSpotter.cs
================================================
﻿/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    // please see
    // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code
    // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources
    public class KeywordSpotter : IDisposable
    {
        public KeywordSpotter(KeywordSpotterConfig config)
        {
            IntPtr h = SherpaOnnxCreateKeywordSpotter(ref config);
            _handle = new HandleRef(this, h);
        }

        public OnlineStream CreateStream()
        {
            IntPtr p = SherpaOnnxCreateKeywordStream(_handle.Handle);
            return new OnlineStream(p);
        }

        public OnlineStream CreateStream(string keywords)
        {
            byte[] utf8Bytes = Encoding.UTF8.GetBytes(keywords);
            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
            utf8BytesWithNull[utf8Bytes.Length] = 0; // Null terminator
            IntPtr p = SherpaOnnxCreateKeywordStreamWithKeywords(_handle.Handle, utf8BytesWithNull);
            return new OnlineStream(p);
        }

        /// Return true if the passed stream is ready for decoding.
        public bool IsReady(OnlineStream stream)
        {
            return IsReady(_handle.Handle, stream.Handle) != 0;
        }

        /// You have to ensure that IsReady(stream) returns true before
        /// you call this method
        public void Decode(OnlineStream stream)
        {
            Decode(_handle.Handle, stream.Handle);
        }

        public void Reset(OnlineStream stream)
        {
            Reset(_handle.Handle, stream.Handle);
        }

        // The caller should ensure all passed streams are ready for decoding.
        public void Decode(IEnumerable<OnlineStream> streams)
        {
            // TargetFramework=net20 does not support System.Linq
            // IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
            List<IntPtr> list = new List<IntPtr>();
            foreach (OnlineStream s in streams)
            {
              list.Add(s.Handle);
            }

            IntPtr[] ptrs = list.ToArray();
            Decode(_handle.Handle, ptrs, ptrs.Length);
        }

        public KeywordResult GetResult(OnlineStream stream)
        {
            IntPtr h = GetResult(_handle.Handle, stream.Handle);
            KeywordResult result = new KeywordResult(h);
            DestroyResult(h);
            return result;
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~KeywordSpotter()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyKeywordSpotter(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateKeywordSpotter(ref KeywordSpotterConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyKeywordSpotter(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateKeywordStream(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateKeywordStreamWithKeywords(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Keywords);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxIsKeywordStreamReady")]
        private static extern int IsReady(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDecodeKeywordStream")]
        private static extern void Decode(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxResetKeywordStream")]
        private static extern void Reset(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDecodeMultipleKeywordStreams")]
        private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxGetKeywordResult")]
        private static extern IntPtr GetResult(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDestroyKeywordResult")]
        private static extern void DestroyResult(IntPtr result);
    }
}


================================================
FILE: scripts/dotnet/KeywordSpotterConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct KeywordSpotterConfig
    {
        public KeywordSpotterConfig()
        {
            FeatConfig = new FeatureConfig();
            ModelConfig = new OnlineModelConfig();

            MaxActivePaths = 4;
            NumTrailingBlanks = 1;
            KeywordsScore = 1.0F;
            KeywordsThreshold = 0.25F;
            KeywordsFile = "";
            KeywordsBuf= "";
            KeywordsBufSize= 0;
        }
        public FeatureConfig FeatConfig;
        public OnlineModelConfig ModelConfig;

        public int MaxActivePaths;
        public int NumTrailingBlanks;
        public float KeywordsScore;
        public float KeywordsThreshold;

        [MarshalAs(UnmanagedType.LPStr)]
        public string KeywordsFile;

        [MarshalAs(UnmanagedType.LPStr)]
        public string KeywordsBuf;

        public int KeywordsBufSize;
    }
}


================================================
FILE: scripts/dotnet/OfflineCanaryModelConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineCanaryModelConfig
    {
        public OfflineCanaryModelConfig()
        {
            Encoder = "";
            Decoder = "";
            SrcLang = "en";
            TgtLang = "en";
            UsePnc = 1;
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string SrcLang;

        [MarshalAs(UnmanagedType.LPStr)]
        public string TgtLang;

        public int UsePnc;
    }
}


================================================
FILE: scripts/dotnet/OfflineDolphinModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineDolphinModelConfig
    {
        public OfflineDolphinModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineFireRedAsrCtcModel.cs
================================================
/// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineFireRedAsrCtcModelConfig
    {
        public OfflineFireRedAsrCtcModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineFireRedAsrModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineFireRedAsrModelConfig
    {
        public OfflineFireRedAsrModelConfig()
        {
            Encoder = "";
            Decoder = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;
    }
}


================================================
FILE: scripts/dotnet/OfflineFunAsrNanoModel.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineFunAsrNanoModelConfig
    {
        public OfflineFunAsrNanoModelConfig()
        {
            EncoderAdaptor = "";
            LLM = "";
            Embedding = "";
            Tokenizer = "";
            SystemPrompt = "You are a helpful assistant.";
            UserPrompt = "语音转写：";
            MaxNewTokens = 512;
            Temperature = 1e-6F;
            TopP = 0.8F;
            Seed = 42;
            Language = "";
            Itn = 0;
            Hotwords = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string EncoderAdaptor;

        [MarshalAs(UnmanagedType.LPStr)]
        public string LLM;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Embedding;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Tokenizer;

        [MarshalAs(UnmanagedType.LPStr)]
        public string SystemPrompt;

        [MarshalAs(UnmanagedType.LPStr)]
        public string UserPrompt;

        public int MaxNewTokens;
        public float Temperature;
        public float TopP;
        public int Seed;
        [MarshalAs(UnmanagedType.LPStr)]
        public string Language;

        public int Itn;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Hotwords;
    }
}


================================================
FILE: scripts/dotnet/OfflineLMConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineLMConfig
    {
        public OfflineLMConfig()
        {
            Model = "";
            Scale = 0.5F;
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;

        public float Scale;
    }

}

================================================
FILE: scripts/dotnet/OfflineMedAsrCtcModel.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineMedAsrCtcModelConfig
    {
        public OfflineMedAsrCtcModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineModelConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineModelConfig
    {
        public OfflineModelConfig()
        {
            Transducer = new OfflineTransducerModelConfig();
            Paraformer = new OfflineParaformerModelConfig();
            NeMoCtc = new OfflineNemoEncDecCtcModelConfig();
            Whisper = new OfflineWhisperModelConfig();
            Tdnn = new OfflineTdnnModelConfig();
            Tokens = "";
            NumThreads = 1;
            Debug = 0;
            Provider = "cpu";
            ModelType = "";
            ModelingUnit = "cjkchar";
            BpeVocab = "";
            TeleSpeechCtc = "";
            SenseVoice = new OfflineSenseVoiceModelConfig();
            Moonshine = new OfflineMoonshineModelConfig();
            FireRedAsr = new OfflineFireRedAsrModelConfig();
            Dolphin = new OfflineDolphinModelConfig();
            ZipformerCtc = new OfflineZipformerCtcModelConfig();
            Canary = new OfflineCanaryModelConfig();
            WenetCtc = new OfflineWenetCtcModelConfig();
            Omnilingual = new OfflineOmnilingualAsrCtcModelConfig();
            MedAsr = new OfflineMedAsrCtcModelConfig();
            FunAsrNano = new OfflineFunAsrNanoModelConfig();
            FireRedAsrCtc = new OfflineFireRedAsrCtcModelConfig();
        }
        public OfflineTransducerModelConfig Transducer;
        public OfflineParaformerModelConfig Paraformer;
        public OfflineNemoEncDecCtcModelConfig NeMoCtc;
        public OfflineWhisperModelConfig Whisper;
        public OfflineTdnnModelConfig Tdnn;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Tokens;

        public int NumThreads;

        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;

        [MarshalAs(UnmanagedType.LPStr)]
        public string ModelType;

        [MarshalAs(UnmanagedType.LPStr)]
        public string ModelingUnit;

        [MarshalAs(UnmanagedType.LPStr)]
        public string BpeVocab;

        [MarshalAs(UnmanagedType.LPStr)]
        public string TeleSpeechCtc;

        public OfflineSenseVoiceModelConfig SenseVoice;
        public OfflineMoonshineModelConfig Moonshine;
        public OfflineFireRedAsrModelConfig FireRedAsr;
        public OfflineDolphinModelConfig Dolphin;
        public OfflineZipformerCtcModelConfig ZipformerCtc;
        public OfflineCanaryModelConfig Canary;
        public OfflineWenetCtcModelConfig WenetCtc;
        public OfflineOmnilingualAsrCtcModelConfig Omnilingual;
        public OfflineMedAsrCtcModelConfig MedAsr;
        public OfflineFunAsrNanoModelConfig FunAsrNano;
        public OfflineFireRedAsrCtcModelConfig FireRedAsrCtc;
    }
}


================================================
FILE: scripts/dotnet/OfflineMoonshineModelConfig.cs
================================================
/// Copyright (c)  2024-2026  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

// For Moonshine v1, you need four models:
//  - preprocessor, encoder, cached_decoder, uncached_decoder
//
// For Moonshine v2, you need 2 models:
//  - encoder, merged_decoder
namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineMoonshineModelConfig
    {
        public OfflineMoonshineModelConfig()
        {
            Preprocessor = "";
            Encoder = "";
            UncachedDecoder = "";
            CachedDecoder = "";
            MergedDecoder = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Preprocessor;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string UncachedDecoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string CachedDecoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string MergedDecoder;
    }
}


================================================
FILE: scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineNemoEncDecCtcModelConfig
    {
        public OfflineNemoEncDecCtcModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}

================================================
FILE: scripts/dotnet/OfflineOmnilingualAsrCtcModel.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineOmnilingualAsrCtcModelConfig
    {
        public OfflineOmnilingualAsrCtcModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineParaformerModelConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineParaformerModelConfig
    {
        public OfflineParaformerModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }

}

================================================
FILE: scripts/dotnet/OfflinePunctuation.cs
================================================
﻿/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.Runtime.InteropServices;
using System.Text;


namespace SherpaOnnx
{
    public class OfflinePunctuation : IDisposable
    {
        public OfflinePunctuation(OfflinePunctuationConfig config)
        {
            IntPtr h = SherpaOnnxCreateOfflinePunctuation(ref config);
            _handle = new HandleRef(this, h);
        }

        public String AddPunct(String text)
        {
            byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
            utf8BytesWithNull[utf8Bytes.Length] = 0; // Null terminator

            IntPtr p = SherpaOfflinePunctuationAddPunct(_handle.Handle, utf8BytesWithNull);

            string s = "";
            int length = 0;

            unsafe
            {
                byte* b = (byte*)p;
                if (b != null)
                {
                    while (*b != 0)
                    {
                        ++b;
                        length += 1;
                    }
                }
            }

            if (length > 0)
            {
                byte[] stringBuffer = new byte[length];
                Marshal.Copy(p, stringBuffer, 0, length);
                s = Encoding.UTF8.GetString(stringBuffer);
            }

            SherpaOfflinePunctuationFreeText(p);

            return s;
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~OfflinePunctuation()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOfflinePunctuation(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;


        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOfflinePunctuation(ref OfflinePunctuationConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOfflinePunctuation(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOfflinePunctuationAddPunct(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Text);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOfflinePunctuationFreeText(IntPtr p);
    }
}


================================================
FILE: scripts/dotnet/OfflinePunctuationConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflinePunctuationConfig
    {
        public OfflinePunctuationConfig()
        {
            Model = new OfflinePunctuationModelConfig();
        }
        public OfflinePunctuationModelConfig Model;
    }
}


================================================
FILE: scripts/dotnet/OfflinePunctuationModelConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflinePunctuationModelConfig
    {
        public OfflinePunctuationModelConfig()
        {
            CtTransformer = "";
            NumThreads = 1;
            Debug = 0;
            Provider = "cpu";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string CtTransformer;

        public int NumThreads;

        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;
    }
}


================================================
FILE: scripts/dotnet/OfflineRecognizer.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破

using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public class OfflineRecognizer : IDisposable
    {
        public OfflineRecognizer(OfflineRecognizerConfig config)
        {
            IntPtr h = SherpaOnnxCreateOfflineRecognizer(ref config);
            _handle = new HandleRef(this, h);
        }

        public void SetConfig(OfflineRecognizerConfig config)
        {
            SherpaOnnxOfflineRecognizerSetConfig(_handle.Handle, ref config);
        }

        public OfflineStream CreateStream()
        {
            IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle);
            return new OfflineStream(p);
        }

        public void Decode(OfflineStream stream)
        {
            Decode(_handle.Handle, stream.Handle);
        }

        // The caller should ensure all passed streams are ready for decoding.
        public void Decode(IEnumerable<OfflineStream> streams)
        {
            // TargetFramework=net20 does not support System.Linq
            // IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
            List<IntPtr> list = new List<IntPtr>();
            foreach (OfflineStream s in streams)
            {
              list.Add(s.Handle);
            }
            IntPtr[] ptrs = list.ToArray();
            Decode(_handle.Handle, ptrs, ptrs.Length);
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~OfflineRecognizer()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOfflineRecognizer(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOfflineRecognizerSetConfig(IntPtr handle, ref OfflineRecognizerConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOfflineStream(IntPtr handle);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDecodeOfflineStream")]
        private static extern void Decode(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDecodeMultipleOfflineStreams")]
        private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
    }

}


================================================
FILE: scripts/dotnet/OfflineRecognizerConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineRecognizerConfig
    {
        public OfflineRecognizerConfig()
        {
            FeatConfig = new FeatureConfig();
            ModelConfig = new OfflineModelConfig();
            LmConfig = new OfflineLMConfig();

            DecodingMethod = "greedy_search";
            MaxActivePaths = 4;
            HotwordsFile = "";
            HotwordsScore = 1.5F;
            RuleFsts = "";
            RuleFars = "";
            BlankPenalty = 0.0F;
            Hr = new HomophoneReplacerConfig();
        }
        public FeatureConfig FeatConfig;
        public OfflineModelConfig ModelConfig;
        public OfflineLMConfig LmConfig;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DecodingMethod;

        public int MaxActivePaths;

        [MarshalAs(UnmanagedType.LPStr)]
        public string HotwordsFile;

        public float HotwordsScore;

        [MarshalAs(UnmanagedType.LPStr)]
        public string RuleFsts;

        [MarshalAs(UnmanagedType.LPStr)]
        public string RuleFars;

        public float BlankPenalty;

        public HomophoneReplacerConfig Hr;
    }
}


================================================
FILE: scripts/dotnet/OfflineRecognizerResult.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破

using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{

    public class OfflineRecognizerResult
    {
        public OfflineRecognizerResult(IntPtr handle)
        {
            Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));

            // PtrToStringUTF8() requires .net standard 2.1
            // _text = Marshal.PtrToStringUTF8(impl.Text);

            int length = 0;

            unsafe
            {
                byte* buffer = (byte*)impl.Text;
                while (*buffer != 0)
                {
                    ++buffer;
                    length += 1;
                }
            }

            byte[] stringBuffer = new byte[length];
            Marshal.Copy(impl.Text, stringBuffer, 0, length);
            _text = Encoding.UTF8.GetString(stringBuffer);

            _tokens = new String[impl.Count];

            unsafe
            {
                byte* buf = (byte*)impl.Tokens;
                for (int i = 0; i < impl.Count; i++)
                {
                    length = 0;
                    byte* start = buf;
                    while (*buf != 0)
                    {
                        ++buf;
                        length += 1;
                    }
                    ++buf;

                    stringBuffer = new byte[length];
                    fixed (byte* pTarget = stringBuffer)
                    {
                        for (int k = 0; k < length; k++)
                        {
                            pTarget[k] = start[k];
                        }
                    }

                    _tokens[i] = Encoding.UTF8.GetString(stringBuffer);
                }
            }

            unsafe
            {
              if (impl.Timestamps != IntPtr.Zero)
              {
                float *t = (float*)impl.Timestamps;
                _timestamps = new float[impl.Count];
                fixed (float* f = _timestamps)
                {
                  for (int k = 0; k < impl.Count; k++)
                  {
                    f[k] = t[k];
                  }
                }
              }
            }

            unsafe
            {
              if (impl.Durations != IntPtr.Zero)
              {
                float *d = (float*)impl.Durations;
                _durations = new float[impl.Count];
                fixed (float* f = _durations)
                {
                  for (int k = 0; k < impl.Count; k++)
                  {
                    f[k] = d[k];
                  }
                }
              }
            }
        }

        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public IntPtr Text;
            public IntPtr Timestamps;
            public int Count;
            public IntPtr Tokens;
            public IntPtr Durations;
        }

        private String _text;
        public String Text => _text;

        private String[] _tokens;
        public String[] Tokens => _tokens;

        private float[] _timestamps;
        public float[] Timestamps => _timestamps;

        private float[] _durations;
        public float[] Durations => _durations;
    }
}


================================================
FILE: scripts/dotnet/OfflineSenseVoiceModelConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineSenseVoiceModelConfig
    {
        public OfflineSenseVoiceModelConfig()
        {
            Model = "";
            Language = "";
            UseInverseTextNormalization = 0;
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Language;

        public int UseInverseTextNormalization;
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeakerDiarization.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation
using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    // IntPtr is actually a `const float*` from C++
    public delegate int OfflineSpeakerDiarizationProgressCallback(int numProcessedChunks, int numTotalChunks, IntPtr arg);

    public class OfflineSpeakerDiarization : IDisposable
    {
        public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config)
        {
            IntPtr h = SherpaOnnxCreateOfflineSpeakerDiarization(ref config);
            _handle = new HandleRef(this, h);
        }

        public void SetConfig(OfflineSpeakerDiarizationConfig config)
        {
            SherpaOnnxOfflineSpeakerDiarizationSetConfig(_handle.Handle, ref config);
        }

        public OfflineSpeakerDiarizationSegment[] Process(float[] samples)
        {
            IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcess(_handle.Handle, samples, samples.Length);
            return ProcessImpl(result);
        }

        public OfflineSpeakerDiarizationSegment[] ProcessWithCallback(float[] samples, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg)
        {
            IntPtr result = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(_handle.Handle, samples, samples.Length, callback, arg);
            return ProcessImpl(result);
        }

        private OfflineSpeakerDiarizationSegment[] ProcessImpl(IntPtr result)
        {
            if (result == IntPtr.Zero)
            {
              return new OfflineSpeakerDiarizationSegment[] {};
            }

            int numSegments = SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result);
            IntPtr p = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result);

            OfflineSpeakerDiarizationSegment[] ans = new OfflineSpeakerDiarizationSegment[numSegments];
            unsafe
            {
              int size = sizeof(float) * 2 + sizeof(int);
              for (int i = 0; i != numSegments; ++i)
              {
                IntPtr t = new IntPtr((byte*)p + i * size);
                ans[i] = new OfflineSpeakerDiarizationSegment(t);

                // The following IntPtr.Add() does not support net20
                // ans[i] = new OfflineSpeakerDiarizationSegment(IntPtr.Add(p, i));
              }
            }


            SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p);
            SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result);

            return ans;

        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~OfflineSpeakerDiarization()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOfflineSpeakerDiarization(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        public int SampleRate
        {
            get
            {
                return SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(_handle.Handle);
            }
        }

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOfflineSpeakerDiarization(ref OfflineSpeakerDiarizationConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOfflineSpeakerDiarization(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcess(IntPtr handle, float[] samples, int n);

        [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
        private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(IntPtr handle, float[] samples, int n, OfflineSpeakerDiarizationProgressCallback callback, IntPtr arg);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOfflineSpeakerDiarizationSetConfig(IntPtr handle, ref OfflineSpeakerDiarizationConfig config);
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineSpeakerDiarizationConfig
    {
        public OfflineSpeakerDiarizationConfig()
        {
            Segmentation = new OfflineSpeakerSegmentationModelConfig();
            Embedding = new SpeakerEmbeddingExtractorConfig();
            Clustering = new FastClusteringConfig();

            MinDurationOn = 0.3F;
            MinDurationOff = 0.5F;
        }

        public OfflineSpeakerSegmentationModelConfig Segmentation;
        public SpeakerEmbeddingExtractorConfig Embedding;
        public FastClusteringConfig Clustering;

        public float MinDurationOn;
        public float MinDurationOff;
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation
using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{

    public class OfflineSpeakerDiarizationSegment
    {
        public OfflineSpeakerDiarizationSegment(IntPtr handle)
        {
          Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));

          Start = impl.Start;
          End = impl.End;
          Speaker = impl.Speaker;
        }

        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public float Start;
            public float End;
            public int Speaker;
        }

        public float Start;
        public float End;
        public int Speaker;
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineSpeakerSegmentationModelConfig
    {
        public OfflineSpeakerSegmentationModelConfig()
        {
            Pyannote = new OfflineSpeakerSegmentationPyannoteModelConfig();
            NumThreads = 1;
            Debug = 0;
            Provider = "cpu";
        }

        public OfflineSpeakerSegmentationPyannoteModelConfig Pyannote;

        /// Number of threads used to run the neural network model
        public int NumThreads;

        /// true to print debug information of the model
        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeakerSegmentationPyannoteModelConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineSpeakerSegmentationPyannoteModelConfig
    {
        public OfflineSpeakerSegmentationPyannoteModelConfig()
        {
            Model = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeechDenoiser.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public class OfflineSpeechDenoiser: IDisposable
    {
        public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config)
        {
            IntPtr h = SherpaOnnxCreateOfflineSpeechDenoiser(ref config);
            _handle = new HandleRef(this, h);
        }

        public DenoisedAudio Run(float[] samples, int sampleRate)
        {
            IntPtr p = SherpaOnnxOfflineSpeechDenoiserRun(_handle.Handle, samples, samples.Length, sampleRate);
            return new DenoisedAudio(p);
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~OfflineSpeechDenoiser()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOfflineSpeechDenoiser(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        public int SampleRate
        {
            get
            {
                return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(_handle.Handle);
            }
        }

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOfflineSpeechDenoiser(ref OfflineSpeechDenoiserConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOfflineSpeechDenoiser(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOfflineSpeechDenoiserGetSampleRate(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxOfflineSpeechDenoiserRun(IntPtr handle, float[] samples, int n, int sampleRate);
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeechDenoiserConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineSpeechDenoiserConfig
    {
        public OfflineSpeechDenoiserConfig()
        {
            Model = new OfflineSpeechDenoiserModelConfig();
        }
        public OfflineSpeechDenoiserModelConfig Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeechDenoiserDpdfNetModelConfig.cs
================================================
/// Copyright (c)  2026  Xiaomi Corporation

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineSpeechDenoiserDpdfNetModelConfig
    {
        public OfflineSpeechDenoiserDpdfNetModelConfig()
        {
            Model = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeechDenoiserGtcrnModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineSpeechDenoiserGtcrnModelConfig
    {
        public OfflineSpeechDenoiserGtcrnModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineSpeechDenoiserModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineSpeechDenoiserModelConfig
    {
        public OfflineSpeechDenoiserModelConfig()
        {
            Gtcrn = new OfflineSpeechDenoiserGtcrnModelConfig();
            Dpdfnet = new OfflineSpeechDenoiserDpdfNetModelConfig();
            NumThreads = 1;
            Debug = 0;
            Provider = "cpu";
        }

        public OfflineSpeechDenoiserGtcrnModelConfig Gtcrn;

        public int NumThreads;

        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;

        public OfflineSpeechDenoiserDpdfNetModelConfig Dpdfnet;
    }
}


================================================
FILE: scripts/dotnet/OfflineStream.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破

using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    public class OfflineStream : IDisposable
    {
        public OfflineStream(IntPtr p)
        {
            _handle = new HandleRef(this, p);
        }

        public void AcceptWaveform(int sampleRate, float[] samples)
        {
            AcceptWaveform(Handle, sampleRate, samples, samples.Length);
        }

        public void SetOption(string key, string value)
        {
            SherpaOnnxOfflineStreamSetOption(Handle, key, value);
        }

        public string GetOption(string key)
        {
            IntPtr p = SherpaOnnxOfflineStreamGetOption(Handle, key);
            return Marshal.PtrToStringAnsi(p) ?? "";
        }

        public bool HasOption(string key)
        {
            return SherpaOnnxOfflineStreamHasOption(Handle, key) == 1;
        }

        public OfflineRecognizerResult Result
        {
            get
            {
                IntPtr h = GetResult(_handle.Handle);
                OfflineRecognizerResult result = new OfflineRecognizerResult(h);
                DestroyResult(h);
                return result;
            }
        }

        ~OfflineStream()
        {
            Cleanup();
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOfflineStream(Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;
        public IntPtr Handle => _handle.Handle;

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOfflineStream(IntPtr handle);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxAcceptWaveformOffline")]
        private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOfflineStreamSetOption(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key, [MarshalAs(UnmanagedType.LPStr)] string value);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxOfflineStreamGetOption(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOfflineStreamHasOption(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxGetOfflineStreamResult")]
        private static extern IntPtr GetResult(IntPtr handle);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDestroyOfflineRecognizerResult")]
        private static extern void DestroyResult(IntPtr handle);
    }

}


================================================
FILE: scripts/dotnet/OfflineTdnnModelConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTdnnModelConfig
    {
        public OfflineTdnnModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }

}

================================================
FILE: scripts/dotnet/OfflineTransducerModelConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTransducerModelConfig
    {
        public OfflineTransducerModelConfig()
        {
            Encoder = "";
            Decoder = "";
            Joiner = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Joiner;
    }

}

================================================
FILE: scripts/dotnet/OfflineTts.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破
using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    // IntPtr is actually a `const float*` from C++
    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
    public delegate int OfflineTtsCallback(IntPtr samples, int n);

    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
    public delegate int OfflineTtsCallbackProgress(IntPtr samples, int n, float progress);

    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
    public delegate int OfflineTtsCallbackProgressWithArg(IntPtr samples, int n, float progress, IntPtr arg);


    public class OfflineTts : IDisposable
    {
        public OfflineTts(OfflineTtsConfig config)
        {
            IntPtr h = SherpaOnnxCreateOfflineTts(ref config);
            _handle = new HandleRef(this, h);
        }

        public OfflineTtsGeneratedAudio Generate(String text, float speed, int speakerId)
        {
            byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
            utf8BytesWithNull[utf8Bytes.Length] = 0; // Null terminator
            IntPtr p = SherpaOnnxOfflineTtsGenerate(_handle.Handle, utf8BytesWithNull, speakerId, speed);
            return new OfflineTtsGeneratedAudio(p);
        }

        public OfflineTtsGeneratedAudio GenerateWithCallback(
            String text,
            float speed,
            int speakerId,
            OfflineTtsCallback callback)
        {
            byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1];
            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
            utf8BytesWithNull[utf8Bytes.Length] = 0;

            GCHandle callbackHandle = default(GCHandle);
            try
            {
                callbackHandle = GCHandle.Alloc(callback);

                IntPtr p = SherpaOnnxOfflineTtsGenerateWithCallback(
                    _handle.Handle,
                    utf8BytesWithNull,
                    speakerId,
                    speed,
                    callback
                );

                return new OfflineTtsGeneratedAudio(p);
            }
            finally
            {
                if (callbackHandle.IsAllocated)
                    callbackHandle.Free();
            }
        }

        public OfflineTtsGeneratedAudio GenerateWithCallbackProgress(
            String text,
            float speed,
            int speakerId,
            OfflineTtsCallbackProgress callback)
        {
            byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1];
            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
            utf8BytesWithNull[utf8Bytes.Length] = 0;

            GCHandle callbackHandle = default(GCHandle);
            try
            {
                callbackHandle = GCHandle.Alloc(callback);

                IntPtr p = SherpaOnnxOfflineTtsGenerateWithProgressCallback(
                    _handle.Handle,
                    utf8BytesWithNull,
                    speakerId,
                    speed,
                    callback
                );

                return new OfflineTtsGeneratedAudio(p);
            }
            finally
            {
                if (callbackHandle.IsAllocated)
                    callbackHandle.Free();
            }
        }


        public OfflineTtsGeneratedAudio GenerateWithConfig(
            string text,
            OfflineTtsGenerationConfig config,
            OfflineTtsCallbackProgressWithArg callback)
        {
            byte[] utf8Bytes = Encoding.UTF8.GetBytes(text);
            byte[] utf8BytesWithNull = new byte[utf8Bytes.Length + 1];
            Array.Copy(utf8Bytes, utf8BytesWithNull, utf8Bytes.Length);
            utf8BytesWithNull[utf8Bytes.Length] = 0;

            GCHandle callbackHandle = default(GCHandle);
            GCHandle? audioHandle = null;

            var nativeConfig = config.ToNative(out audioHandle);

            try
            {
                callbackHandle = GCHandle.Alloc(callback);

                IntPtr p = SherpaOnnxOfflineTtsGenerateWithConfig(
                    _handle.Handle,
                    utf8BytesWithNull,
                    ref nativeConfig,
                    callback,
                    IntPtr.Zero
                );

                return new OfflineTtsGeneratedAudio(p);
            }
            finally
            {
                if (callbackHandle.IsAllocated)
                    callbackHandle.Free();

                if (audioHandle.HasValue)
                    audioHandle.Value.Free();
            }
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~OfflineTts()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOfflineTts(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        public int SampleRate
        {
            get
            {
                return SherpaOnnxOfflineTtsSampleRate(_handle.Handle);
            }
        }

        public int NumSpeakers
        {
            get
            {
                return SherpaOnnxOfflineTtsNumSpeakers(_handle.Handle);
            }
        }

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOfflineTts(ref OfflineTtsConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOfflineTts(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOfflineTtsSampleRate(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOfflineTtsNumSpeakers(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxOfflineTtsGenerate(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Text, int sid, float speed);

        [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
        private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Text, int sid, float speed, OfflineTtsCallback callback);

        [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
        private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithProgressCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Text, int sid, float speed, OfflineTtsCallbackProgress callback);

        [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
        private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithConfig(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Text, ref OfflineTtsGenerationConfig.NativeStruct config, OfflineTtsCallbackProgressWithArg callback, IntPtr arg);
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsConfig
    {
        public OfflineTtsConfig()
        {
            Model = new OfflineTtsModelConfig();
            RuleFsts = "";
            MaxNumSentences = 1;
            RuleFars = "";
            SilenceScale = 0.2F;
        }
        public OfflineTtsModelConfig Model;

        [MarshalAs(UnmanagedType.LPStr)]
        public string RuleFsts;

        public int MaxNumSentences;

        [MarshalAs(UnmanagedType.LPStr)]
        public string RuleFars;

        public float SilenceScale;
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsGeneratedAudio.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破
using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    public class OfflineTtsGeneratedAudio
    {
        public OfflineTtsGeneratedAudio(IntPtr p)
        {
            _handle = new HandleRef(this, p);
        }

        public bool SaveToWaveFile(String filename)
        {
            Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
            byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
            byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
            utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator
            int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull);
            return status == 1;
        }

        ~OfflineTtsGeneratedAudio()
        {
            Cleanup();
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOfflineTtsGeneratedAudio(Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public IntPtr Samples;
            public int NumSamples;
            public int SampleRate;
        }

        private HandleRef _handle;
        public IntPtr Handle => _handle.Handle;

        public int NumSamples
        {
            get
            {
                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
                return impl.NumSamples;
            }
        }

        public int SampleRate
        {
            get
            {
                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
                return impl.SampleRate;
            }
        }

        public float[] Samples
        {
            get
            {
                Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));

                float[] samples = new float[impl.NumSamples];
                Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples);
                return samples;
            }
        }

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOfflineTtsGeneratedAudio(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsGenerationConfig.cs
================================================
﻿/// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

using System;
using System.Collections;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    public class OfflineTtsGenerationConfig
    {
        public OfflineTtsGenerationConfig()
        {
            SilenceScale = 0.2f;
            Speed = 1.0f;
            Sid = 0;
            ReferenceAudio = null;
            ReferenceSampleRate = 0;
            ReferenceText = "";
            NumSteps = 5;
            Extra = new Hashtable();
        }

        public float SilenceScale;
        public float Speed;
        public int Sid;

        public float[] ReferenceAudio;
        public int ReferenceSampleRate;
        public string ReferenceText;
        public int NumSteps;

        /// <summary>
        /// Extra attributes stored as key/value pairs
        /// </summary>
        public Hashtable Extra;

        /// <summary>
        /// Convert to native struct for P/Invoke
        /// </summary>
        internal NativeStruct ToNative(out GCHandle? audioHandle)
        {
            NativeStruct native = new NativeStruct();
            native.SilenceScale = SilenceScale;
            native.Speed = Speed;
            native.Sid = Sid;

            // Handle ReferenceAudio
            audioHandle = null;
            if (ReferenceAudio != null && ReferenceAudio.Length > 0)
            {
                audioHandle = GCHandle.Alloc(ReferenceAudio, GCHandleType.Pinned);
                native.ReferenceAudio = audioHandle.Value.AddrOfPinnedObject();
                native.ReferenceAudioLen = ReferenceAudio.Length;
            }
            else
            {
                native.ReferenceAudio = IntPtr.Zero;
                native.ReferenceAudioLen = 0;
            }

            native.ReferenceSampleRate = ReferenceSampleRate;
            native.ReferenceText = ReferenceText ?? "";
            native.NumSteps = NumSteps;

            // Handle Extra JSON
            native.Extra = "{}";
            if (Extra != null && Extra.Count > 0)
            {
                StringBuilder json = new StringBuilder();
                json.Append("{");
                bool first = true;

                foreach (DictionaryEntry kv in Extra)
                {
                    if (!first) json.Append(",");
                    first = false;

                    string key = JsonEscape(kv.Key.ToString());
                    string val;

                    if (kv.Value is string)
                        val = JsonEscape((string)kv.Value);
                    else if (kv.Value is float || kv.Value is double)
                        val = ((IFormattable)kv.Value).ToString(null, System.Globalization.CultureInfo.InvariantCulture);
                    else if (kv.Value is bool)
                        val = (bool)kv.Value ? "true" : "false";
                    else
                        val = kv.Value.ToString();

                    json.AppendFormat("{0}:{1}", key, val);
                }

                json.Append("}");
                native.Extra = json.ToString();
            }
            return native;
        }

        /// <summary>
        /// Escapes a string for JSON (for .NET 2.0)
        /// </summary>
        private static string JsonEscape(string s)
        {
            if (s == null) return "\"\"";

            StringBuilder sb = new StringBuilder();
            sb.Append('"');
            foreach (char c in s)
            {
                switch (c)
                {
                    case '"': sb.Append("\\\""); break;
                    case '\\': sb.Append("\\\\"); break;
                    case '\b': sb.Append("\\b"); break;
                    case '\f': sb.Append("\\f"); break;
                    case '\n': sb.Append("\\n"); break;
                    case '\r': sb.Append("\\r"); break;
                    case '\t': sb.Append("\\t"); break;
                    default:
                        if (c < 32 || c > 126)
                            sb.AppendFormat("\\u{0:X4}", (int)c);
                        else
                            sb.Append(c);
                        break;
                }
            }
            sb.Append('"');
            return sb.ToString();
        }

        [StructLayout(LayoutKind.Sequential)]
        internal struct NativeStruct
        {
            public float SilenceScale;
            public float Speed;
            public int Sid;

            public IntPtr ReferenceAudio;
            public int ReferenceAudioLen;
            public int ReferenceSampleRate;

            [MarshalAs(UnmanagedType.LPStr)]
            public string ReferenceText;

            public int NumSteps;

            [MarshalAs(UnmanagedType.LPStr)]
            public string Extra;
        }
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsKittenModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsKittenModelConfig
    {
        public OfflineTtsKittenModelConfig()
        {
            Model = "";
            Voices = "";
            Tokens = "";
            DataDir = "";

            LengthScale = 1.0F;
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Voices;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Tokens;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DataDir;

        public float LengthScale;
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsKokoroModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsKokoroModelConfig
    {
        public OfflineTtsKokoroModelConfig()
        {
            Model = "";
            Voices = "";
            Tokens = "";
            DataDir = "";

            LengthScale = 1.0F;

            DictDir = "";
            Lexicon = "";
            Lang = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Voices;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Tokens;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DataDir;

        public float LengthScale;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DictDir;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Lexicon;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Lang;
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsMatchaModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsMatchaModelConfig
    {
        public OfflineTtsMatchaModelConfig()
        {
            AcousticModel = "";
            Vocoder = "";
            Lexicon = "";
            Tokens = "";
            DataDir = "";

            NoiseScale = 0.667F;
            LengthScale = 1.0F;

            DictDir = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string AcousticModel;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Vocoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Lexicon;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Tokens;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DataDir;

        public float NoiseScale;
        public float LengthScale;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DictDir;
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsModelConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsModelConfig
    {
        public OfflineTtsModelConfig()
        {
            Vits = new OfflineTtsVitsModelConfig();
            Matcha = new OfflineTtsMatchaModelConfig();
            Kokoro = new OfflineTtsKokoroModelConfig();
            Kitten = new OfflineTtsKittenModelConfig();
            ZipVoice = new OfflineTtsZipVoiceModelConfig();
            Pocket = new OfflineTtsPocketModelConfig();
            Supertonic = new OfflineTtsSupertonicModelConfig();
            NumThreads = 1;
            Debug = 0;
            Provider = "cpu";
        }

        public OfflineTtsVitsModelConfig Vits;
        public int NumThreads;
        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;

        public OfflineTtsMatchaModelConfig Matcha;
        public OfflineTtsKokoroModelConfig Kokoro;
        public OfflineTtsKittenModelConfig Kitten;
        public OfflineTtsZipVoiceModelConfig ZipVoice;
        public OfflineTtsPocketModelConfig Pocket;
        public OfflineTtsSupertonicModelConfig Supertonic;
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsPocketModelConfig.cs
================================================
/// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsPocketModelConfig
    {
        // Default constructor for convenience
        public OfflineTtsPocketModelConfig()
        {
            LmFlow = "";
            LmMain = "";
            Encoder = "";
            Decoder = "";
            TextConditioner = "";
            VocabJson = "";
            TokenScoresJson = "";
            VoiceEmbeddingCacheCapacity = 50;
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string LmFlow;

        [MarshalAs(UnmanagedType.LPStr)]
        public string LmMain;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string TextConditioner;

        [MarshalAs(UnmanagedType.LPStr)]
        public string VocabJson;

        [MarshalAs(UnmanagedType.LPStr)]
        public string TokenScoresJson;

        public int VoiceEmbeddingCacheCapacity;
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsSupertonicModelConfig.cs
================================================
/// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsSupertonicModelConfig
    {
        public OfflineTtsSupertonicModelConfig()
        {
            DurationPredictor = "";
            TextEncoder = "";
            VectorEstimator = "";
            Vocoder = "";
            TtsJson = "";
            UnicodeIndexer = "";
            VoiceStyle = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string DurationPredictor;

        [MarshalAs(UnmanagedType.LPStr)]
        public string TextEncoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string VectorEstimator;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Vocoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string TtsJson;

        [MarshalAs(UnmanagedType.LPStr)]
        public string UnicodeIndexer;

        [MarshalAs(UnmanagedType.LPStr)]
        public string VoiceStyle;
    }
}


================================================
FILE: scripts/dotnet/OfflineTtsVitsModelConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsVitsModelConfig
    {
        public OfflineTtsVitsModelConfig()
        {
            Model = "";
            Lexicon = "";
            Tokens = "";
            DataDir = "";

            NoiseScale = 0.667F;
            NoiseScaleW = 0.8F;
            LengthScale = 1.0F;

            DictDir = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Lexicon;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Tokens;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DataDir;

        public float NoiseScale;
        public float NoiseScaleW;
        public float LengthScale;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DictDir;
    }
}

================================================
FILE: scripts/dotnet/OfflineTtsZipVoiceModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineTtsZipVoiceModelConfig
    {
        public OfflineTtsZipVoiceModelConfig()
        {
            Tokens = "";
            Encoder = "";
            Decoder = "";
            Vocoder = "";
            DataDir = "";
            Lexicon = "";

            FeatScale = 0.1F;
            Tshift = 0.5F;
            TargetRms = 0.1F;
            GuidanceScale = 1.0F;
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Tokens;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Vocoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DataDir;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Lexicon;

        public float FeatScale;
        public float Tshift;
        public float TargetRms;
        public float GuidanceScale;
    }
}


================================================
FILE: scripts/dotnet/OfflineWenetCtcModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineWenetCtcModelConfig
    {
        public OfflineWenetCtcModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineWhisperModelConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineWhisperModelConfig
    {
        public OfflineWhisperModelConfig()
        {
            Encoder = "";
            Decoder = "";
            Language = "";
            Task = "transcribe";
            TailPaddings = -1;
            EnableTokenTimestamps = 0;
            EnableSegmentTimestamps = 0;
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Language;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Task;

        public int TailPaddings;
        public int EnableTokenTimestamps;
        public int EnableSegmentTimestamps;
    }

}


================================================
FILE: scripts/dotnet/OfflineZipformerAudioTaggingModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineZipformerAudioTaggingModelConfig
    {
        public OfflineZipformerAudioTaggingModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OfflineZipformerCtcModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OfflineZipformerCtcModelConfig
    {
        public OfflineZipformerCtcModelConfig()
        {
            Model = "";
        }
        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OnlineCtcFstDecoderConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OnlineCtcFstDecoderConfig
    {
        public OnlineCtcFstDecoderConfig()
        {
            Graph = "";
            MaxActive = 3000;
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Graph;

        public int MaxActive;
    }

}

================================================
FILE: scripts/dotnet/OnlineModelConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OnlineModelConfig
    {
        public OnlineModelConfig()
        {
            Transducer = new OnlineTransducerModelConfig();
            Paraformer = new OnlineParaformerModelConfig();
            Zipformer2Ctc = new OnlineZipformer2CtcModelConfig();
            Tokens = "";
            NumThreads = 1;
            Provider = "cpu";
            Debug = 0;
            ModelType = "";
            ModelingUnit = "cjkchar";
            BpeVocab = "";
            TokensBuf = "";
            TokensBufSize = 0;
            NemoCtc = new OnlineNemoCtcModelConfig();
            ToneCtc = new OnlineToneCtcModelConfig();
        }

        public OnlineTransducerModelConfig Transducer;
        public OnlineParaformerModelConfig Paraformer;
        public OnlineZipformer2CtcModelConfig Zipformer2Ctc;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Tokens;

        /// Number of threads used to run the neural network model
        public int NumThreads;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;

        /// true to print debug information of the model
        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string ModelType;

        [MarshalAs(UnmanagedType.LPStr)]
        public string ModelingUnit;

        [MarshalAs(UnmanagedType.LPStr)]
        public string BpeVocab;

        [MarshalAs(UnmanagedType.LPStr)]
        public string TokensBuf;

        public int TokensBufSize;

        public OnlineNemoCtcModelConfig NemoCtc;

        public OnlineToneCtcModelConfig ToneCtc;
    }

}


================================================
FILE: scripts/dotnet/OnlineNemoCtcModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OnlineNemoCtcModelConfig
    {
        public OnlineNemoCtcModelConfig()
        {
            Model = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OnlineParaformerModelConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OnlineParaformerModelConfig
    {
        public OnlineParaformerModelConfig()
        {
            Encoder = "";
            Decoder = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;
    }

}

================================================
FILE: scripts/dotnet/OnlineRecognizer.cs
================================================
﻿/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    // please see
    // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code
    // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources
    public class OnlineRecognizer : IDisposable
    {
        public OnlineRecognizer(OnlineRecognizerConfig config)
        {
            IntPtr h = SherpaOnnxCreateOnlineRecognizer(ref config);
            _handle = new HandleRef(this, h);
        }

        public OnlineStream CreateStream()
        {
            IntPtr p = SherpaOnnxCreateOnlineStream(_handle.Handle);
            return new OnlineStream(p);
        }

        /// Return true if the passed stream is ready for decoding.
        public bool IsReady(OnlineStream stream)
        {
            return IsReady(_handle.Handle, stream.Handle) != 0;
        }

        /// Return true if an endpoint is detected for this stream.
        /// You probably need to invoke Reset(stream) when this method returns
        /// true.
        public bool IsEndpoint(OnlineStream stream)
        {
            return SherpaOnnxOnlineStreamIsEndpoint(_handle.Handle, stream.Handle) != 0;
        }

        /// You have to ensure that IsReady(stream) returns true before
        /// you call this method
        public void Decode(OnlineStream stream)
        {
            Decode(_handle.Handle, stream.Handle);
        }

        // The caller should ensure all passed streams are ready for decoding.
        public void Decode(IEnumerable<OnlineStream> streams)
        {
            // TargetFramework=net20 does not support System.Linq
            // IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
            List<IntPtr> list = new List<IntPtr>();
            foreach (OnlineStream s in streams)
            {
              list.Add(s.Handle);
            }

            IntPtr[] ptrs = list.ToArray();
            Decode(_handle.Handle, ptrs, ptrs.Length);
        }

        public OnlineRecognizerResult GetResult(OnlineStream stream)
        {
            IntPtr h = GetResult(_handle.Handle, stream.Handle);
            OnlineRecognizerResult result = new OnlineRecognizerResult(h);
            DestroyResult(h);
            return result;
        }

        /// When this method returns, IsEndpoint(stream) will return false.
        public void Reset(OnlineStream stream)
        {
            SherpaOnnxOnlineStreamReset(_handle.Handle, stream.Handle);
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~OnlineRecognizer()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOnlineRecognizer(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOnlineRecognizer(ref OnlineRecognizerConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOnlineRecognizer(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOnlineStream(IntPtr handle);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxIsOnlineStreamReady")]
        private static extern int IsReady(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDecodeOnlineStream")]
        private static extern void Decode(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDecodeMultipleOnlineStreams")]
        private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxGetOnlineStreamResult")]
        private static extern IntPtr GetResult(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename, EntryPoint = "SherpaOnnxDestroyOnlineRecognizerResult")]
        private static extern void DestroyResult(IntPtr result);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOnlineStreamReset(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOnlineStreamIsEndpoint(IntPtr handle, IntPtr stream);
    }
}


================================================
FILE: scripts/dotnet/OnlineRecognizerConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OnlineRecognizerConfig
    {
        public OnlineRecognizerConfig()
        {
            FeatConfig = new FeatureConfig();
            ModelConfig = new OnlineModelConfig();
            DecodingMethod = "greedy_search";
            MaxActivePaths = 4;
            EnableEndpoint = 0;
            Rule1MinTrailingSilence = 1.2F;
            Rule2MinTrailingSilence = 2.4F;
            Rule3MinUtteranceLength = 20.0F;
            HotwordsFile = "";
            HotwordsScore = 1.5F;
            CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
            RuleFsts = "";
            RuleFars = "";
            BlankPenalty = 0.0F;
            HotwordsBuf = "";
            HotwordsBufSize = 0;
            Hr = new HomophoneReplacerConfig();
        }
        public FeatureConfig FeatConfig;
        public OnlineModelConfig ModelConfig;

        [MarshalAs(UnmanagedType.LPStr)]
        public string DecodingMethod;

        /// Used only when decoding_method is modified_beam_search
        /// Example value: 4
        public int MaxActivePaths;

        /// 0 to disable endpoint detection.
        /// A non-zero value to enable endpoint detection.
        public int EnableEndpoint;

        /// An endpoint is detected if trailing silence in seconds is larger than
        /// this value even if nothing has been decoded.
        /// Used only when enable_endpoint is not 0.
        public float Rule1MinTrailingSilence;

        /// An endpoint is detected if trailing silence in seconds is larger than
        /// this value after something that is not blank has been decoded.
        /// Used only when enable_endpoint is not 0.
        public float Rule2MinTrailingSilence;

        /// An endpoint is detected if the utterance in seconds is larger than
        /// this value.
        /// Used only when enable_endpoint is not 0.
        public float Rule3MinUtteranceLength;

        /// Path to the hotwords.
        [MarshalAs(UnmanagedType.LPStr)]
        public string HotwordsFile;

        /// Bonus score for each token in hotwords.
        public float HotwordsScore;

        public OnlineCtcFstDecoderConfig CtcFstDecoderConfig;

        [MarshalAs(UnmanagedType.LPStr)]
        public string RuleFsts;

        [MarshalAs(UnmanagedType.LPStr)]
        public string RuleFars;

        public float BlankPenalty;

        [MarshalAs(UnmanagedType.LPStr)]
        public string HotwordsBuf;

        public int HotwordsBufSize;

        public HomophoneReplacerConfig Hr;
    }
}


================================================
FILE: scripts/dotnet/OnlineRecognizerResult.cs
================================================
﻿/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破
using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{

    public class OnlineRecognizerResult
    {
        public OnlineRecognizerResult(IntPtr handle)
        {
            Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
            // PtrToStringUTF8() requires .net standard 2.1
            // _text = Marshal.PtrToStringUTF8(impl.Text);

            int length = 0;

            unsafe
            {
                byte* buffer = (byte*)impl.Text;
                while (*buffer != 0)
                {
                    ++buffer;
                    length += 1;
                }
            }

            byte[] stringBuffer = new byte[length];
            Marshal.Copy(impl.Text, stringBuffer, 0, length);
            _text = Encoding.UTF8.GetString(stringBuffer);

            _tokens = new String[impl.Count];

            unsafe
            {
                byte* buf = (byte*)impl.Tokens;
                for (int i = 0; i < impl.Count; i++)
                {
                    length = 0;
                    byte* start = buf;
                    while (*buf != 0)
                    {
                        ++buf;
                        length += 1;
                    }
                    ++buf;

                    stringBuffer = new byte[length];
                    fixed (byte* pTarget = stringBuffer)
                    {
                        for (int k = 0; k < length; k++)
                        {
                            pTarget[k] = start[k];
                        }
                    }

                    _tokens[i] = Encoding.UTF8.GetString(stringBuffer);
                }
            }

            unsafe
            {
                float* t = (float*)impl.Timestamps;
                if (t != null)
                {
                    _timestamps = new float[impl.Count];
                    fixed (float* pTarget = _timestamps)
                    {
                        for (int i = 0; i < impl.Count; i++)
                        {
                            pTarget[i] = t[i];
                        }
                    }
                }
                else
                {
                    _timestamps = new float[] {};
                }
            }
        }
        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public IntPtr Text;
            public IntPtr Tokens;
            public IntPtr TokensArr;
            public IntPtr Timestamps;
            public int Count;
        }

        private String _text;
        public String Text => _text;

        private String[] _tokens;
        public String[] Tokens => _tokens;

        private float[] _timestamps;
        public float[] Timestamps => _timestamps;
    }
}


================================================
FILE: scripts/dotnet/OnlineSpeechDenoiser.cs
================================================
/// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public class OnlineSpeechDenoiser: IDisposable
    {
        public OnlineSpeechDenoiser(OnlineSpeechDenoiserConfig config)
        {
            IntPtr h = SherpaOnnxCreateOnlineSpeechDenoiser(ref config);
            _handle = new HandleRef(this, h);
        }

        public DenoisedAudio Run(float[] samples, int sampleRate)
        {
            IntPtr p = SherpaOnnxOnlineSpeechDenoiserRun(_handle.Handle, samples, samples.Length, sampleRate);
            return new DenoisedAudio(p);
        }

        public DenoisedAudio Flush()
        {
            IntPtr p = SherpaOnnxOnlineSpeechDenoiserFlush(_handle.Handle);
            return new DenoisedAudio(p);
        }

        public void Reset()
        {
            SherpaOnnxOnlineSpeechDenoiserReset(_handle.Handle);
        }

        public void Dispose()
        {
            Cleanup();
            System.GC.SuppressFinalize(this);
        }

        ~OnlineSpeechDenoiser()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOnlineSpeechDenoiser(_handle.Handle);
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        public int SampleRate => SherpaOnnxOnlineSpeechDenoiserGetSampleRate(_handle.Handle);

        public int FrameShiftInSamples =>
            SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(_handle.Handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateOnlineSpeechDenoiser(ref OnlineSpeechDenoiserConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOnlineSpeechDenoiser(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOnlineSpeechDenoiserGetSampleRate(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxOnlineSpeechDenoiserRun(IntPtr handle, float[] samples, int n, int sampleRate);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxOnlineSpeechDenoiserFlush(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOnlineSpeechDenoiserReset(IntPtr handle);
    }
}


================================================
FILE: scripts/dotnet/OnlineSpeechDenoiserConfig.cs
================================================
/// Copyright (c)  2026  Xiaomi Corporation (authors: Fangjun Kuang)

namespace SherpaOnnx
{
    public struct OnlineSpeechDenoiserConfig
    {
        public OnlineSpeechDenoiserConfig()
        {
            Model = new OfflineSpeechDenoiserModelConfig();
        }

        public OfflineSpeechDenoiserModelConfig Model;
    }
}


================================================
FILE: scripts/dotnet/OnlineStream.cs
================================================
﻿/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破
using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    public class OnlineStream : IDisposable
    {
        public OnlineStream(IntPtr p)
        {
            _handle = new HandleRef(this, p);
        }

        public void AcceptWaveform(int sampleRate, float[] samples)
        {
            SherpaOnnxOnlineStreamAcceptWaveform(Handle, sampleRate, samples, samples.Length);
        }

        public void InputFinished()
        {
            SherpaOnnxOnlineStreamInputFinished(Handle);
        }

        public void SetOption(string key, string value)
        {
            SherpaOnnxOnlineStreamSetOption(Handle, key, value);
        }

        public string GetOption(string key)
        {
            IntPtr p = SherpaOnnxOnlineStreamGetOption(Handle, key);
            return Marshal.PtrToStringAnsi(p) ?? "";
        }

        public bool HasOption(string key)
        {
            return SherpaOnnxOnlineStreamHasOption(Handle, key) == 1;
        }

        ~OnlineStream()
        {
            Cleanup();
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyOnlineStream(Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;
        public IntPtr Handle => _handle.Handle;

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyOnlineStream(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOnlineStreamAcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOnlineStreamInputFinished(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxOnlineStreamSetOption(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key, [MarshalAs(UnmanagedType.LPStr)] string value);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxOnlineStreamGetOption(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxOnlineStreamHasOption(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key);
    }

}


================================================
FILE: scripts/dotnet/OnlineToneCtcModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OnlineToneCtcModelConfig
    {
        public OnlineToneCtcModelConfig()
        {
            Model = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}


================================================
FILE: scripts/dotnet/OnlineTransducerModelConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{

    [StructLayout(LayoutKind.Sequential)]
    public struct OnlineTransducerModelConfig
    {
        public OnlineTransducerModelConfig()
        {
            Encoder = "";
            Decoder = "";
            Joiner = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Joiner;
    }

}

================================================
FILE: scripts/dotnet/OnlineZipformer2CtcModelConfig.cs
================================================
/// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
/// Copyright (c)  2023 by manyeyes
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct OnlineZipformer2CtcModelConfig
    {
        public OnlineZipformer2CtcModelConfig()
        {
            Model = "";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;
    }
}

================================================
FILE: scripts/dotnet/README.md
================================================
# Introduction

[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is an open-source
real-time speech recognition toolkit developed
by the Next-gen Kaldi team.

It supports streaming recognition on a variety of
platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc.

It does not require Internet connection during recognition.

See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html
for details.

Please see
https://github.com/k2-fsa/sherpa-onnx/tree/dot-net/dotnet-examples
for how to use C# APIs of this package.


================================================
FILE: scripts/dotnet/SileroVadModelConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct SileroVadModelConfig
    {
        public SileroVadModelConfig()
        {
            Model = "";
            Threshold = 0.5F;
            MinSilenceDuration = 0.5F;
            MinSpeechDuration = 0.25F;
            WindowSize = 512;
            MaxSpeechDuration = 5.0F;
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;

        public float Threshold;

        public float MinSilenceDuration;

        public float MinSpeechDuration;

        public int WindowSize;

        public float MaxSpeechDuration;
    }
}


================================================
FILE: scripts/dotnet/SpeakerEmbeddingExtractor.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破
using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public class SpeakerEmbeddingExtractor : IDisposable
    {
        public SpeakerEmbeddingExtractor(SpeakerEmbeddingExtractorConfig config)
        {
            IntPtr h = SherpaOnnxCreateSpeakerEmbeddingExtractor(ref config);
            _handle = new HandleRef(this, h);
        }

        public OnlineStream CreateStream()
        {
            IntPtr p = SherpaOnnxSpeakerEmbeddingExtractorCreateStream(_handle.Handle);
            return new OnlineStream(p);
        }

        public bool IsReady(OnlineStream stream)
        {
            return SherpaOnnxSpeakerEmbeddingExtractorIsReady(_handle.Handle, stream.Handle) != 0;
        }

        public float[] Compute(OnlineStream stream)
        {
            IntPtr p = SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(_handle.Handle, stream.Handle);

            int dim = Dim;
            float[] ans = new float[dim];
            Marshal.Copy(p, ans, 0, dim);

            SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(p);

            return ans;
        }

        public int Dim
        {
            get
            {
                return SherpaOnnxSpeakerEmbeddingExtractorDim(_handle.Handle);
            }
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~SpeakerEmbeddingExtractor()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroySpeakerEmbeddingExtractor(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateSpeakerEmbeddingExtractor(ref SpeakerEmbeddingExtractorConfig config);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroySpeakerEmbeddingExtractor(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxSpeakerEmbeddingExtractorDim(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxSpeakerEmbeddingExtractorCreateStream(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxSpeakerEmbeddingExtractorIsReady(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(IntPtr handle, IntPtr stream);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(IntPtr p);
    }

}


================================================
FILE: scripts/dotnet/SpeakerEmbeddingExtractorConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct SpeakerEmbeddingExtractorConfig
    {
        public SpeakerEmbeddingExtractorConfig()
        {
            Model = "";
            NumThreads = 1;
            Debug = 0;
            Provider = "cpu";
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;

        public int NumThreads;
        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;
    }

}

================================================
FILE: scripts/dotnet/SpeakerEmbeddingManager.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    public class SpeakerEmbeddingManager : IDisposable
    {
        public SpeakerEmbeddingManager(int dim)
        {
            IntPtr h = SherpaOnnxCreateSpeakerEmbeddingManager(dim);
            _handle = new HandleRef(this, h);
            this._dim = dim;
        }

        public bool Add(string name, float[] v)
        {
            byte[] utf8Name = Encoding.UTF8.GetBytes(name);
            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
            return SherpaOnnxSpeakerEmbeddingManagerAdd(_handle.Handle, utf8NameWithNull, v) == 1;
        }

        public bool Add(string name, ICollection<float[]> v_list)
        {
            int n = v_list.Count;
            float[] v = new float[n * _dim];
            int i = 0;
            foreach (var item in v_list)
            {
                item.CopyTo(v, i);
                i += _dim;
            }

            byte[] utf8Name = Encoding.UTF8.GetBytes(name);
            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
            return SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(_handle.Handle, utf8NameWithNull, v, n) == 1;
        }

        public bool Remove(string name)
        {
            byte[] utf8Name = Encoding.UTF8.GetBytes(name);
            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
            return SherpaOnnxSpeakerEmbeddingManagerRemove(_handle.Handle, utf8NameWithNull) == 1;
        }

        public string Search(float[] v, float threshold)
        {
            IntPtr p = SherpaOnnxSpeakerEmbeddingManagerSearch(_handle.Handle, v, threshold);

            string s = "";
            int length = 0;

            unsafe
            {
                byte* b = (byte*)p;
                if (b != null)
                {
                    while (*b != 0)
                    {
                        ++b;
                        length += 1;
                    }
                }
            }

            if (length > 0)
            {
                byte[] stringBuffer = new byte[length];
                Marshal.Copy(p, stringBuffer, 0, length);
                s = Encoding.UTF8.GetString(stringBuffer);
            }

            SherpaOnnxSpeakerEmbeddingManagerFreeSearch(p);

            return s;
        }

        public bool Verify(string name, float[] v, float threshold)
        {
            byte[] utf8Name = Encoding.UTF8.GetBytes(name);
            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
            return SherpaOnnxSpeakerEmbeddingManagerVerify(_handle.Handle, utf8NameWithNull, v, threshold) == 1;
        }

        public bool Contains(string name)
        {
            byte[] utf8Name = Encoding.UTF8.GetBytes(name);
            byte[] utf8NameWithNull = new byte[utf8Name.Length + 1]; // +1 for null terminator
            Array.Copy(utf8Name, utf8NameWithNull, utf8Name.Length);
            utf8NameWithNull[utf8Name.Length] = 0; // Null terminator
            return SherpaOnnxSpeakerEmbeddingManagerContains(_handle.Handle, utf8NameWithNull) == 1;
        }

        public string[] GetAllSpeakers()
        {
            if (NumSpeakers == 0)
            {
                return new string[] { };
            }

            IntPtr names = SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(_handle.Handle);

            string[] ans = new string[NumSpeakers];

            unsafe
            {
                byte** p = (byte**)names;
                for (int i = 0; i != NumSpeakers; i++)
                {
                    int length = 0;
                    byte* s = p[i];
                    while (*s != 0)
                    {
                        ++s;
                        length += 1;
                    }
                    byte[] stringBuffer = new byte[length];
                    Marshal.Copy((IntPtr)p[i], stringBuffer, 0, length);
                    ans[i] = Encoding.UTF8.GetString(stringBuffer);
                }
            }

            SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(names);

            return ans;
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~SpeakerEmbeddingManager()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroySpeakerEmbeddingManager(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        public int NumSpeakers
        {
            get
            {
                return SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(_handle.Handle);
            }
        }

        private HandleRef _handle;
        private int _dim;


        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateSpeakerEmbeddingManager(int dim);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroySpeakerEmbeddingManager(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxSpeakerEmbeddingManagerAdd(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Name, float[] v);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Name, float[] v, int n);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxSpeakerEmbeddingManagerRemove(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Name);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxSpeakerEmbeddingManagerSearch(IntPtr handle, float[] v, float threshold);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(IntPtr p);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxSpeakerEmbeddingManagerVerify(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Name, float[] v, float threshold);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxSpeakerEmbeddingManagerContains(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Name);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(IntPtr names);
    }
}


================================================
FILE: scripts/dotnet/SpeechSegment.cs
================================================
﻿/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public class SpeechSegment
    {
        public SpeechSegment(IntPtr handle)
        {
            Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));

            _start = impl.Start;

            unsafe
            {
                float* t = (float*)impl.Samples;
                _samples = new float[impl.Count];
                fixed (float* pTarget = _samples)
                {
                    for (int i = 0; i < impl.Count; i++)
                    {
                        pTarget[i] = t[i];
                    }
                }
            }
        }

        public int _start;
        public int Start => _start;

        private float[] _samples;
        public float[] Samples => _samples;

        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public int Start;
            public IntPtr Samples;
            public int Count;
        }
    }
}


================================================
FILE: scripts/dotnet/SpokenLanguageIdentification.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破
using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public class SpokenLanguageIdentification : IDisposable
{
    public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config)
    {
        IntPtr h = SherpaOnnxCreateSpokenLanguageIdentification(ref config);
        _handle = new HandleRef(this, h);
    }

    public OfflineStream CreateStream()
    {
        IntPtr p = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(_handle.Handle);
        return new OfflineStream(p);
    }

    public SpokenLanguageIdentificationResult Compute(OfflineStream stream)
    {
        IntPtr h = SherpaOnnxSpokenLanguageIdentificationCompute(_handle.Handle, stream.Handle);
        SpokenLanguageIdentificationResult result = new SpokenLanguageIdentificationResult(h);
        SherpaOnnxDestroySpokenLanguageIdentificationResult(h);
        return result;
    }

    public void Dispose()
    {
        Cleanup();
        // Prevent the object from being placed on the
        // finalization queue
        System.GC.SuppressFinalize(this);
    }

    ~SpokenLanguageIdentification()
    {
        Cleanup();
    }

    private void Cleanup()
    {
        SherpaOnnxDestroySpokenLanguageIdentification(_handle.Handle);

        // Don't permit the handle to be used again.
        _handle = new HandleRef(this, IntPtr.Zero);
    }

    private HandleRef _handle;

    [DllImport(Dll.Filename)]
    private static extern IntPtr SherpaOnnxCreateSpokenLanguageIdentification(ref SpokenLanguageIdentificationConfig config);

    [DllImport(Dll.Filename)]
    private static extern void SherpaOnnxDestroySpokenLanguageIdentification(IntPtr handle);

    [DllImport(Dll.Filename)]
    private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(IntPtr handle);

    [DllImport(Dll.Filename)]
    private static extern IntPtr SherpaOnnxSpokenLanguageIdentificationCompute(IntPtr handle, IntPtr stream);

    [DllImport(Dll.Filename)]
    private static extern void SherpaOnnxDestroySpokenLanguageIdentificationResult(IntPtr handle);
}
}


================================================
FILE: scripts/dotnet/SpokenLanguageIdentificationConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public struct SpokenLanguageIdentificationConfig
    {
        public SpokenLanguageIdentificationConfig()
        {
            Whisper = new SpokenLanguageIdentificationWhisperConfig();
            NumThreads = 1;
            Debug = 0;
            Provider = "cpu";
        }
        public SpokenLanguageIdentificationWhisperConfig Whisper;

        public int NumThreads;
        public int Debug;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;
    }

}

================================================
FILE: scripts/dotnet/SpokenLanguageIdentificationResult.cs
================================================
﻿/// Copyright (c)  2024.5 by 东风破
using System;
using System.Runtime.InteropServices;
using System.Text;

namespace SherpaOnnx
{
    public class SpokenLanguageIdentificationResult
    {
        public SpokenLanguageIdentificationResult(IntPtr handle)
        {
            Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));

            // PtrToStringUTF8() requires .net standard 2.1
            // _text = Marshal.PtrToStringUTF8(impl.Text);

            int length = 0;

            unsafe
            {
                byte* buffer = (byte*)impl.Lang;
                while (*buffer != 0)
                {
                    ++buffer;
                    length += 1;
                }
            }

            byte[] stringBuffer = new byte[length];
            Marshal.Copy(impl.Lang, stringBuffer, 0, length);
            _lang = Encoding.UTF8.GetString(stringBuffer);
        }

        [StructLayout(LayoutKind.Sequential)]
        struct Impl
        {
            public IntPtr Lang;
        }

        private String _lang;
        public String Lang => _lang;
    }
}


================================================
FILE: scripts/dotnet/SpokenLanguageIdentificationWhisperConfig.cs
================================================
/// Copyright (c)  2024.5 by 东风破

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct SpokenLanguageIdentificationWhisperConfig
    {
        public SpokenLanguageIdentificationWhisperConfig()
        {
            Encoder = "";
            Decoder = "";
            TailPaddings = -1;
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Encoder;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Decoder;

        public int TailPaddings;
    }

}

================================================
FILE: scripts/dotnet/TenVadModelConfig.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct TenVadModelConfig
    {
        public TenVadModelConfig()
        {
            Model = "";
            Threshold = 0.5F;
            MinSilenceDuration = 0.5F;
            MinSpeechDuration = 0.25F;
            WindowSize = 256;
            MaxSpeechDuration = 5.0F;
        }

        [MarshalAs(UnmanagedType.LPStr)]
        public string Model;

        public float Threshold;

        public float MinSilenceDuration;

        public float MinSpeechDuration;

        public int WindowSize;

        public float MaxSpeechDuration;
    }
}


================================================
FILE: scripts/dotnet/VadModelConfig.cs
================================================
/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    [StructLayout(LayoutKind.Sequential)]
    public struct VadModelConfig
    {
        public VadModelConfig()
        {
            SileroVad = new SileroVadModelConfig();
            SampleRate = 16000;
            NumThreads = 1;
            Provider = "cpu";
            Debug = 0;
            TenVad = new TenVadModelConfig();
        }

        public SileroVadModelConfig SileroVad;

        public int SampleRate;

        public int NumThreads;

        [MarshalAs(UnmanagedType.LPStr)]
        public string Provider;

        public int Debug;

        public TenVadModelConfig TenVad;
    }
}


================================================
FILE: scripts/dotnet/VersionInfo.cs
================================================
/// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.Runtime.InteropServices;
using System.Text;


namespace SherpaOnnx
{
    public class VersionInfo
    {
        public static String Version
        {
          get
          {
            IntPtr p = SherpaOnnxGetVersionStr();

            string s = "";
            int length = 0;

            unsafe
            {
                byte* b = (byte*)p;
                if (b != null)
                {
                    while (*b != 0)
                    {
                        ++b;
                        length += 1;
                    }
                }
            }

            if (length > 0)
            {
                byte[] stringBuffer = new byte[length];
                Marshal.Copy(p, stringBuffer, 0, length);
                s = Encoding.UTF8.GetString(stringBuffer);
            }

            return s;
          }
        }

        public static String GitSha1
        {
          get
          {
            IntPtr p = SherpaOnnxGetGitSha1();

            string s = "";
            int length = 0;

            unsafe
            {
                byte* b = (byte*)p;
                if (b != null)
                {
                    while (*b != 0)
                    {
                        ++b;
                        length += 1;
                    }
                }
            }

            if (length > 0)
            {
                byte[] stringBuffer = new byte[length];
                Marshal.Copy(p, stringBuffer, 0, length);
                s = Encoding.UTF8.GetString(stringBuffer);
            }

            return s;
          }
        }

        public static String GitDate
        {
          get
          {
            IntPtr p = SherpaOnnxGetGitDate();

            string s = "";
            int length = 0;

            unsafe
            {
                byte* b = (byte*)p;
                if (b != null)
                {
                    while (*b != 0)
                    {
                        ++b;
                        length += 1;
                    }
                }
            }

            if (length > 0)
            {
                byte[] stringBuffer = new byte[length];
                Marshal.Copy(p, stringBuffer, 0, length);
                s = Encoding.UTF8.GetString(stringBuffer);
            }

            return s;
          }
        }


        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxGetVersionStr();

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxGetGitSha1();

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxGetGitDate();
    }
}


================================================
FILE: scripts/dotnet/VoiceActivityDetector.cs
================================================
﻿/// Copyright (c)  2024  Xiaomi Corporation (authors: Fangjun Kuang)
using System;
using System.Runtime.InteropServices;

namespace SherpaOnnx
{
    public class VoiceActivityDetector : IDisposable
    {
        public VoiceActivityDetector(VadModelConfig config, float bufferSizeInSeconds)
        {
            IntPtr h = SherpaOnnxCreateVoiceActivityDetector(ref config, bufferSizeInSeconds);
            _handle = new HandleRef(this, h);
        }

        public void AcceptWaveform(float[] samples)
        {
            SherpaOnnxVoiceActivityDetectorAcceptWaveform(_handle.Handle, samples, samples.Length);
        }

        public bool IsEmpty()
        {
            return SherpaOnnxVoiceActivityDetectorEmpty(_handle.Handle) == 1;
        }

        public bool IsSpeechDetected()
        {
            return SherpaOnnxVoiceActivityDetectorDetected(_handle.Handle) == 1;
        }

        public void Pop()
        {
            SherpaOnnxVoiceActivityDetectorPop(_handle.Handle);
        }

        public SpeechSegment Front()
        {
            IntPtr p = SherpaOnnxVoiceActivityDetectorFront(_handle.Handle);

            SpeechSegment segment = new SpeechSegment(p);

            SherpaOnnxDestroySpeechSegment(p);

            return segment;
        }

        public void Clear()
        {
            SherpaOnnxVoiceActivityDetectorClear(_handle.Handle);
        }

        public void Reset()
        {
            SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
        }

        public void Flush()
        {
            SherpaOnnxVoiceActivityDetectorFlush(_handle.Handle);
        }

        public void Dispose()
        {
            Cleanup();
            // Prevent the object from being placed on the
            // finalization queue
            System.GC.SuppressFinalize(this);
        }

        ~VoiceActivityDetector()
        {
            Cleanup();
        }

        private void Cleanup()
        {
            SherpaOnnxDestroyVoiceActivityDetector(_handle.Handle);

            // Don't permit the handle to be used again.
            _handle = new HandleRef(this, IntPtr.Zero);
        }

        private HandleRef _handle;

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxCreateVoiceActivityDetector(ref VadModelConfig config, float bufferSizeInSeconds);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroyVoiceActivityDetector(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxVoiceActivityDetectorAcceptWaveform(IntPtr handle, float[] samples, int n);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxVoiceActivityDetectorEmpty(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern int SherpaOnnxVoiceActivityDetectorDetected(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxVoiceActivityDetectorPop(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxVoiceActivityDetectorClear(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern IntPtr SherpaOnnxVoiceActivityDetectorFront(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxDestroySpeechSegment(IntPtr segment);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);

        [DllImport(Dll.Filename)]
        private static extern void SherpaOnnxVoiceActivityDetectorFlush(IntPtr handle);
    }
}


================================================
FILE: scripts/dotnet/examples/Common.csproj
================================================
﻿<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <TargetFramework>net8.0</TargetFramework>
    <RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
  </ItemGroup>

</Project>


================================================
FILE: scripts/dotnet/examples/README.md
================================================
# Introduction

Files in this directory are used exclusively by CI.


================================================
FILE: scripts/dotnet/generate.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2023  Xiaomi Corporation

import glob
import os
import re
from pathlib import Path

import jinja2

SHERPA_ONNX_DIR = Path(__file__).resolve().parent.parent.parent

src_dir = os.environ.get("src_dir", "/tmp")


def get_version():
    cmake_file = SHERPA_ONNX_DIR / "CMakeLists.txt"
    with open(cmake_file) as f:
        content = f.read()

    version = re.search(r"set\(SHERPA_ONNX_VERSION (.*)\)", content).group(1)
    return version.strip('"')


def read_proj_file(filename):
    with open(filename) as f:
        return f.read()


def get_dict():
    return {
        "version": get_version(),
    }


def process_linux(s, rid):
    libs = [
        "libonnxruntime.so",
        "libsherpa-onnx-c-api.so",
    ]
    prefix = f"{src_dir}/linux-{rid}/"
    libs = [prefix + lib for lib in libs]
    libs = "\n      ;".join(libs)

    d = get_dict()
    d["dotnet_rid"] = f"linux-{rid}"
    d["libs"] = libs

    environment = jinja2.Environment()
    template = environment.from_string(s)
    s = template.render(**d)
    with open(f"./linux-{rid}/sherpa-onnx.runtime.csproj", "w") as f:
        f.write(s)


def process_macos(s, rid):
    lib_dir = os.path.join(src_dir, f"macos-{rid}")
    onnx_libs = glob.glob(os.path.join(lib_dir, "libonnxruntime*.dylib"))
    if not onnx_libs:
        raise FileNotFoundError(f"No libonnxruntime*.dylib found in {lib_dir}")

    other_libs = [os.path.join(lib_dir, "libsherpa-onnx-c-api.dylib")]
    libs = onnx_libs + other_libs
    libs_str = "\n      ;".join(libs)

    d = get_dict()
    d["dotnet_rid"] = f"osx-{rid}"
    d["libs"] = libs_str

    environment = jinja2.Environment()
    template = environment.from_string(s)
    s = template.render(**d)
    with open(f"./macos-{rid}/sherpa-onnx.runtime.csproj", "w") as f:
        f.write(s)


def process_windows(s, rid):
    libs = [
        "onnxruntime.dll",
        "sherpa-onnx-c-api.dll",
    ]

    prefix = f"{src_dir}/windows-{rid}/"
    libs = [prefix + lib for lib in libs]
    libs = "\n      ;".join(libs)

    d = get_dict()
    d["dotnet_rid"] = f"win-{rid}"
    d["libs"] = libs

    environment = jinja2.Environment()
    template = environment.from_string(s)
    s = template.render(**d)
    with open(f"./windows-{rid}/sherpa-onnx.runtime.csproj", "w") as f:
        f.write(s)


def main():
    s = read_proj_file("./sherpa-onnx.csproj.runtime.in")
    process_macos(s, "x64")
    process_macos(s, "arm64")
    process_linux(s, "x64")
    process_linux(s, "arm64")
    process_windows(s, "x64")
    process_windows(s, "x86")
    process_windows(s, "arm64")

    s = read_proj_file("./sherpa-onnx.csproj.in")
    d = get_dict()
    d["packages_dir"] = str(SHERPA_ONNX_DIR / "scripts/dotnet/packages")

    environment = jinja2.Environment()
    template = environment.from_string(s)
    s = template.render(**d)
    with open("./all/sherpa-onnx.csproj", "w") as f:
        f.write(s)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/dotnet/sherpa-onnx.csproj.in
================================================
<Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
    <PackageReadmeFile>README.md</PackageReadmeFile>
    <OutputType>Library</OutputType>
    <LangVersion>10.0</LangVersion>
    <TargetFrameworks>net8.0;net7.0;net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
    <RuntimeIdentifiers>linux-x64;linux-arm64;osx-x64;osx-arm64;win-x64;win-x86;win-arm64</RuntimeIdentifiers>
    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
    <AssemblyName>sherpa-onnx</AssemblyName>
    <Version>{{ version }}</Version>

    <PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl>
    <RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl>
    <PackageTags>speech recognition voice audio stt asr speech-to-text AI offline
      privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags>

    <Authors>The Next-gen Kaldi development team</Authors>
    <Owners>The Next-gen Kaldi development team</Owners>
    <Company>Xiaomi Corporation</Company>
    <Copyright>Copyright 2019-2023 Xiaomi Corporation</Copyright>
    <Description>sherpa-onnx is an open-source real-time speech recognition toolkit developed
    by the Next-gen Kaldi team. It supports streaming recognition on a variety of
    platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc.

    It does not require Internet connection during recognition.

    See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html
    for details.
    </Description>

    <!-- Pack Option -->
    <Title>sherpa-onnx v{{ version }}</Title>
    <PackageId>org.k2fsa.sherpa.onnx</PackageId>

    <!-- Signing -->
    <SignAssembly>false</SignAssembly>
    <PublicSign>false</PublicSign>
    <DelaySign>false</DelaySign>
  </PropertyGroup>

  <PropertyGroup>
    <RestoreSources>{{ packages_dir }};$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
  </PropertyGroup>

  <ItemGroup>
    <None Include="../README.md" Pack="true" PackagePath="/"/>
  </ItemGroup>

  <ItemGroup>
    <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.linux-x64" Version="{{ version }}" />
    <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.linux-arm64" Version="{{ version }}" />
    <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.osx-x64"   Version="{{ version }}" />
    <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.osx-arm64" Version="{{ version }}" />
    <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.win-x64"   Version="{{ version }}" />
    <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.win-x86"   Version="{{ version }}" />
    <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.win-arm64" Version="{{ version }}" />
  </ItemGroup>

</Project>


================================================
FILE: scripts/dotnet/sherpa-onnx.csproj.runtime.in
================================================
<Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
    <PackageReadmeFile>README.md</PackageReadmeFile>
    <OutputType>Library</OutputType>
    <TargetFrameworks>net8.0;net7.0;net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
    <RuntimeIdentifier>{{ dotnet_rid }}</RuntimeIdentifier>
    <AssemblyName>sherpa-onnx</AssemblyName>
    <Version>{{ version }}</Version>

    <PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl>
    <RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl>
    <PackageTags>speech recognition voice audio stt asr speech-to-text AI offline
      privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags>

    <!-- Nuget Properties -->
    <Description>.NET native {{ dotnet_rid }} wrapper for the sherpa-onnx project.

    In general, you don't need to use this package directly.

    Please use https://www.nuget.org/packages/org.k2fsa.sherpa.onnx instead
    </Description>
    <IncludeBuildOutput>false</IncludeBuildOutput>

    <!-- Pack Option -->
    <Title>sherpa-onnx {{ dotnet_rid }} v{{ version }}</Title>
    <PackageId>org.k2fsa.sherpa.onnx.runtime.{{ dotnet_rid }}</PackageId>

    <!-- Signing -->
    <SignAssembly>false</SignAssembly>
    <PublicSign>false</PublicSign>
    <DelaySign>false</DelaySign>
  </PropertyGroup>

  <ItemGroup>
    <None Include="../README.md" Pack="true" PackagePath="/"/>
  </ItemGroup>

  <ItemGroup>
    <!-- Native library must be in native directory... -->
    <!-- If project is built as a STATIC_LIBRARY (e.g. Windows) then we don't have to include it -->
    <Content Include="
      {{ libs }}
    ">
      <PackagePath>runtimes/{{ dotnet_rid }}/native/%(Filename)%(Extension)</PackagePath>
      <Pack>true</Pack>
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </Content>
  </ItemGroup>
</Project>


================================================
FILE: scripts/export_bpe_vocab.py
================================================
#!/usr/bin/env python3
# Copyright    2024  Xiaomi Corp.        (authors: Wei Kang)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# You can install sentencepiece via:
#
#  pip install sentencepiece
#
# Due to an issue reported in
# https://github.com/google/sentencepiece/pull/642#issuecomment-857972030
#
# Please install a version >=0.1.96

import argparse
import codecs
from typing import Dict

try:
  import sentencepiece as spm
except ImportError:
    print('Please run')
    print('  pip install sentencepiece')
    print('before you continue')
    raise


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--bpe-model",
        type=str,
        help="The path to the bpe model.",
    )

    return parser.parse_args()


def main():
    args = get_args()
    model_file = args.bpe_model

    vocab_file = model_file.replace(".model", ".vocab")

    sp = spm.SentencePieceProcessor()
    sp.Load(model_file)
    vocabs = [sp.id_to_piece(id) for id in range(sp.get_piece_size())]
    with codecs.open(vocab_file, "w", "utf-8") as vfile:
        for v in vocabs:
            id = sp.piece_to_id(v)
            vfile.write(f"{v}\t{sp.get_score(id)}\n")
    print(f"Vocabulary file is written to {vocab_file}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/flutter/.gitignore
================================================
!*.sh.in


================================================
FILE: scripts/flutter/build-android-streaming-asr.sh.in
================================================
#!/usr/bin/env bash
set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
log "SCRIPT_DIR: $SCRIPT_DIR"
log "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
log "SHERPA_ONNX_VERSION: $SHERPA_ONNX_VERSION"

if [ -z $arch ]; then
  arch=x86_64
fi

log "arch: $arch"

{% for model in model_list %}
pushd $SHERPA_ONNX_DIR/flutter-examples/streaming_asr/

model_name={{ model.model_name }}
lang={{ model.lang }}
type={{ model.idx }}
short_name={{ model.short_name }}

rm -rf assets
mkdir assets

pushd assets

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

ls -lh
ls -lh *

popd

git checkout ./
sed -i.bak "s|   - assets/$|   - assets/\n    - assets/$model_name/|g" ./pubspec.yaml

sed -i.bak "s/final type = .*;$/final type = $type;/g" ./lib/streaming_asr.dart

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak "s|ruleFsts: ''|ruleFsts: await copyAssetFile(\'assets/$rule_fsts\')|g"  ./lib/streaming_asr.dart
{% endif %}

git diff .

flutter pub get

flutter build apk --split-per-abi --release

pushd build/app/outputs/flutter-apk
ls -lh


for arch in armeabi-v7a arm64-v8a x86_64; do
  src=app-$arch-release.apk
  dst=$SHERPA_ONNX_DIR/sherpa-onnx-$SHERPA_ONNX_VERSION-$arch-asr-$short_name.apk
  mv $src $dst
done

pushd $SHERPA_ONNX_DIR
ls -lh *.apk
popd

popd

popd

{% endfor %}


================================================
FILE: scripts/flutter/build-android-tts.sh.in
================================================
#!/usr/bin/env bash
set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
log "SCRIPT_DIR: $SCRIPT_DIR"
log "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
log "SHERPA_ONNX_VERSION: $SHERPA_ONNX_VERSION"

{% for tts_model in tts_model_list %}
pushd $SHERPA_ONNX_DIR/flutter-examples/tts/

git checkout .

rm -rf assets
mkdir assets
pushd assets

model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
lang={{ tts_model.lang }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2

ls -lh
ls -lh *

popd # assets
# Now we are at the project root directory

./generate-asset-list.py

pushd lib

sed -i.bak "s|modelDir = ''|modelDir = \"$model_dir\"|" ./model.dart
sed -i.bak s/"modelName = ''"/"modelName = \"$model_name\""/ ./model.dart

{% if tts_model.rule_fsts %}
  rule_fsts={{ tts_model.rule_fsts }}
  sed -i.bak "s|ruleFsts = ''|ruleFsts = \"$rule_fsts\"|" ./model.dart
{% endif %}

{% if tts_model.rule_fars %}
  rule_fars={{ tts_model.rule_fars }}
  sed -i.bak "s|ruleFars = ''|ruleFars = \"$rule_fars\"|" ./model.dart
{% endif %}

{% if tts_model.data_dir %}
  data_dir={{ tts_model.data_dir }}
  sed -i.bak "s|dataDir = ''|dataDir = \"$data_dir\"|" ./model.dart
{% elif not tts_model.is_char %}
  sed -i.bak "s|lexicon = ''|lexicon = \"lexicon.txt\"|" ./model.dart
{% endif %}

git status

git diff .

popd #lib

flutter pub get

flutter build apk --split-per-abi --release

pushd build/app/outputs/flutter-apk
ls -lh

for arch in armeabi-v7a arm64-v8a x86_64; do
  src=app-$arch-release.apk
  dst=$SHERPA_ONNX_DIR/sherpa-onnx-$SHERPA_ONNX_VERSION-$arch-$lang-tts-$model_dir.apk
  mv $src $dst
done

pushd $SHERPA_ONNX_DIR
ls -lh *.apk
popd

popd

popd

{% endfor %}


================================================
FILE: scripts/flutter/build-linux-streaming-asr.sh.in
================================================
#!/usr/bin/env bash
set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
log "SCRIPT_DIR: $SCRIPT_DIR"
log "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
log "SHERPA_ONNX_VERSION: $SHERPA_ONNX_VERSION"

if [ -z $arch ]; then
  arch=x86_64
fi

log "arch: $arch"

{% for model in model_list %}
pushd $SHERPA_ONNX_DIR/flutter-examples/streaming_asr/

model_name={{ model.model_name }}
lang={{ model.lang }}
type={{ model.idx }}
short_name={{ model.short_name }}

rm -rf assets
mkdir assets

pushd assets

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

ls -lh
ls -lh *

popd

git checkout ./
sed -i.bak "s|   - assets/$|   - assets/\n    - assets/$model_name/|g" ./pubspec.yaml

sed -i.bak "s/final type = .*;$/final type = $type;/g" ./lib/streaming_asr.dart

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak "s|ruleFsts: ''|ruleFsts: await copyAssetFile(\'assets/$rule_fsts\')|g"  ./lib/streaming_asr.dart
{% endif %}

git diff .

flutter pub get
# flutter upgrade
# flutter pub upgrade

export FLUTTER_XCODE_ARCHS=$arch
log "FLUTTER_XCODE_ARCHS: $FLUTTER_XCODE_ARCHS"

flutter build linux

pushd build/linux/x64/release/

ls -lh
echo "----"
ls -lh bundle

echo "----"
ls -lh bundle/lib

echo "----"
ls -lh bundle/data
echo "======"
ls -lh bundle/data/flutter_assets


echo "----"
file bundle/streaming_asr

echo "----"
readelf -d bundle/streaming_asr

echo "----"
ldd bundle/streaming_asr

app=sherpa-onnx-$SHERPA_ONNX_VERSION-linux-$arch-$lang-streaming_asr
app=sherpa-onnx-$SHERPA_ONNX_VERSION-linux-$arch-asr-$lang-$short_name

mv bundle $app
tar cjf $app.tar.bz2 $app
rm -rf $app
mv $app.tar.bz2 $SHERPA_ONNX_DIR

pushd $SHERPA_ONNX_DIR
ls -lh *.tar.bz2
popd

popd

popd

{% endfor %}


================================================
FILE: scripts/flutter/build-linux-tts.sh.in
================================================
#!/usr/bin/env bash
set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
log "SCRIPT_DIR: $SCRIPT_DIR"
log "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
log "SHERPA_ONNX_VERSION: $SHERPA_ONNX_VERSION"

if [ -z $arch ]; then
  arch=x86_64
fi

log "arch: $arch"

{% for tts_model in tts_model_list %}
pushd $SHERPA_ONNX_DIR/flutter-examples/tts/

git checkout .

rm -rf assets
mkdir assets

pushd assets

model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
lang={{ tts_model.lang }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2

ls -lh
ls -lh *

popd # assets
# Now we are at the project root directory

./generate-asset-list.py

pushd lib

sed -i.bak "s|modelDir = ''|modelDir = \"$model_dir\"|" ./model.dart
sed -i.bak s/"modelName = ''"/"modelName = \"$model_name\""/ ./model.dart

{% if tts_model.rule_fsts %}
  rule_fsts={{ tts_model.rule_fsts }}
  sed -i.bak "s|ruleFsts = ''|ruleFsts = \"$rule_fsts\"|" ./model.dart
{% endif %}

{% if tts_model.rule_fars %}
  rule_fars={{ tts_model.rule_fars }}
  sed -i.bak "s|ruleFars = ''|ruleFars = \"$rule_fars\"|" ./model.dart
{% endif %}

{% if tts_model.data_dir %}
  data_dir={{ tts_model.data_dir }}
  sed -i.bak "s|dataDir = ''|dataDir = \"$data_dir\"|" ./model.dart
{% elif not tts_model.is_char %}
  sed -i.bak "s|lexicon = ''|lexicon = \"lexicon.txt\"|" ./model.dart
{% endif %}

git status

git diff .

popd #lib

flutter pub get

flutter build linux

pushd build/linux/x64/release/

ls -lh
echo "----"
ls -lh bundle

echo "----"
ls -lh bundle/lib

echo "----"
ls -lh bundle/data
echo "======"
ls -lh bundle/data/flutter_assets


echo "----"
file bundle/tts

echo "----"
readelf -d bundle/tts

echo "----"
ldd bundle/tts

app=sherpa-onnx-$SHERPA_ONNX_VERSION-linux-$arch-$lang-tts-$model_dir

mv bundle $app
tar cjf $app.tar.bz2 $app
rm -rf $app
mv $app.tar.bz2 $SHERPA_ONNX_DIR

pushd $SHERPA_ONNX_DIR
ls -lh *.tar.bz2
popd

popd

popd
{% endfor %}


================================================
FILE: scripts/flutter/build-macos-streaming-asr.sh.in
================================================
#!/usr/bin/env bash
set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
log "SCRIPT_DIR: $SCRIPT_DIR"
log "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
log "SHERPA_ONNX_VERSION: $SHERPA_ONNX_VERSION"

if [ -z $arch ]; then
  arch=x86_64
fi

log "arch: $arch"

{% for model in model_list %}
pushd $SHERPA_ONNX_DIR/flutter-examples/streaming_asr/

model_name={{ model.model_name }}
lang={{ model.lang }}
type={{ model.idx }}
short_name={{ model.short_name }}

rm -rf assets
mkdir assets

pushd assets

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

ls -lh
ls -lh *

popd

git checkout ./
sed -i.bak "s|   - assets/$|   - assets/\n    - assets/$model_name/|g" ./pubspec.yaml

sed -i.bak "s/final type = .*;$/final type = $type;/g" ./lib/streaming_asr.dart

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak "s|ruleFsts: ''|ruleFsts: await copyAssetFile(\'assets/$rule_fsts\')|g"  ./lib/streaming_asr.dart
{% endif %}

git diff .

flutter pub get

export FLUTTER_XCODE_ARCHS=$arch
log "FLUTTER_XCODE_ARCHS: $FLUTTER_XCODE_ARCHS"

flutter build macos

pushd build/macos/Build/Products/Release/
ls -lh

app=sherpa-onnx-$SHERPA_ONNX_VERSION-osx-$arch-asr-$lang-$short_name.app
mv streaming_asr.app $app
tar cjf $app.tar.bz2 $app
rm -rf $app
ls -lh
mv $app.tar.bz2 $SHERPA_ONNX_DIR

pushd $SHERPA_ONNX_DIR
ls -lh *.tar.bz2
popd

popd

popd

{% endfor %}


================================================
FILE: scripts/flutter/build-macos-tts.sh.in
================================================
#!/usr/bin/env bash
set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
log "SCRIPT_DIR: $SCRIPT_DIR"
log "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
log "SHERPA_ONNX_VERSION: $SHERPA_ONNX_VERSION"

if [ -z $arch ]; then
  arch=x86_64
fi

log "arch: $arch"

{% for tts_model in tts_model_list %}
pushd $SHERPA_ONNX_DIR/flutter-examples/tts/

git checkout .

rm -rf assets
mkdir assets

pushd assets

model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
lang={{ tts_model.lang }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2

ls -lh
ls -lh *

popd # assets
# Now we are at the project root directory

./generate-asset-list.py

pushd lib

sed -i.bak "s|modelDir = ''|modelDir = \"$model_dir\"|" ./model.dart
sed -i.bak s/"modelName = ''"/"modelName = \"$model_name\""/ ./model.dart

{% if tts_model.rule_fsts %}
  rule_fsts={{ tts_model.rule_fsts }}
  sed -i.bak "s|ruleFsts = ''|ruleFsts = \"$rule_fsts\"|" ./model.dart
{% endif %}

{% if tts_model.rule_fars %}
  rule_fars={{ tts_model.rule_fars }}
  sed -i.bak "s|ruleFars = ''|ruleFars = \"$rule_fars\"|" ./model.dart
{% endif %}

{% if tts_model.data_dir %}
  data_dir={{ tts_model.data_dir }}
  sed -i.bak "s|dataDir = ''|dataDir = \"$data_dir\"|" ./model.dart
{% elif not tts_model.is_char %}
  sed -i.bak "s|lexicon = ''|lexicon = \"lexicon.txt\"|" ./model.dart
{% endif %}

git status

git diff .

popd #lib

flutter pub get

export FLUTTER_XCODE_ARCHS=$arch
log "FLUTTER_XCODE_ARCHS: $FLUTTER_XCODE_ARCHS"

flutter build macos

pushd build/macos/Build/Products/Release/
ls -lh

app=sherpa-onnx-$SHERPA_ONNX_VERSION-osx-$arch-$lang-tts-$model_dir.app

mv tts.app $app
tar cjf $app.tar.bz2 $app
ls -lh
mv $app.tar.bz2 $SHERPA_ONNX_DIR
rm -rf $app

pushd $SHERPA_ONNX_DIR
ls -lh *.tar.bz2
popd

popd

popd

{% endfor %}


================================================
FILE: scripts/flutter/build-windows-streaming-asr.sh.in
================================================
#!/usr/bin/env bash
set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
log "SCRIPT_DIR: $SCRIPT_DIR"
log "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
log "SHERPA_ONNX_VERSION: $SHERPA_ONNX_VERSION"

{% for model in model_list %}
pushd $SHERPA_ONNX_DIR/flutter-examples/streaming_asr/

model_name={{ model.model_name }}
lang={{ model.lang }}
type={{ model.idx }}
short_name={{ model.short_name }}

rm -rf build
rm -rf assets
mkdir assets

pushd assets

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

ls -lh
ls -lh *

popd

git checkout ./
sed -i.bak "s|   - assets/$|   - assets/\n    - assets/$model_name/|g" ./pubspec.yaml

sed -i.bak "s/final type = .*;$/final type = $type;/g" ./lib/streaming_asr.dart

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak "s|ruleFsts: ''|ruleFsts: await copyAssetFile(\'assets/$rule_fsts\')|g"  ./lib/streaming_asr.dart
{% endif %}

git diff .

flutter pub get

flutter build windows

pushd build/windows/x64/runner/
ls -lh

dst=sherpa-onnx-$SHERPA_ONNX_VERSION-win-x64-asr-$lang-$short_name
mv Release $dst
tar cjf $dst.tar.bz2 ./$dst
rm -rf $dst
mv $dst.tar.bz2 $SHERPA_ONNX_DIR

pushd $SHERPA_ONNX_DIR
ls -lh *.tar.bz2
popd

popd

popd

{% endfor %}


================================================
FILE: scripts/flutter/build-windows-tts.sh.in
================================================
#!/usr/bin/env bash
set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
log "SCRIPT_DIR: $SCRIPT_DIR"
log "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
log "SHERPA_ONNX_VERSION: $SHERPA_ONNX_VERSION"

if [ -z $arch ]; then
  arch=x64
fi

log "arch: $arch"

{% for tts_model in tts_model_list %}
pushd $SHERPA_ONNX_DIR/flutter-examples/tts/

git checkout .

rm -rf assets

mkdir -p assets

pushd assets

model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
lang={{ tts_model.lang }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2

ls -lh

ls -lh *

popd # assets
# Now we are at the project root directory

./generate-asset-list.py

pushd lib

sed -i.bak "s|modelDir = ''|modelDir = \"$model_dir\"|" ./model.dart
sed -i.bak s/"modelName = ''"/"modelName = \"$model_name\""/ ./model.dart

{% if tts_model.rule_fsts %}
  rule_fsts={{ tts_model.rule_fsts }}
  sed -i.bak "s|ruleFsts = ''|ruleFsts = \"$rule_fsts\"|" ./model.dart
{% endif %}

{% if tts_model.rule_fars %}
  rule_fars={{ tts_model.rule_fars }}
  sed -i.bak "s|ruleFars = ''|ruleFars = \"$rule_fars\"|" ./model.dart
{% endif %}

{% if tts_model.data_dir %}
  data_dir={{ tts_model.data_dir }}
  sed -i.bak "s|dataDir = ''|dataDir = \"$data_dir\"|" ./model.dart
{% elif not tts_model.is_char %}
  sed -i.bak "s|lexicon = ''|lexicon = \"lexicon.txt\"|" ./model.dart
{% endif %}

git status

git diff .

popd #lib

flutter pub get

flutter build windows

pushd build/windows/x64/runner/
ls -lh
echo "---"

ls -lh ./Release/

dst=sherpa-onnx-$SHERPA_ONNX_VERSION-win-$arch-$lang-tts-$model_dir
mv Release $dst
tar cjf $dst.tar.bz2 ./$dst
rm -rf $dst
mv $dst.tar.bz2 $SHERPA_ONNX_DIR

pushd $SHERPA_ONNX_DIR
ls -lh *.tar.bz2
popd

popd

popd

{% endfor %}


================================================
FILE: scripts/flutter/generate-streaming-asr.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )

    return parser.parse_args()


@dataclass
class Model:
    # We will download
    # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
    model_name: str

    # The type of the model, e..g, 0, 1, 2. It is hardcoded in the flutter code
    # See flutter-example/streaming_asr/lib/online_model.dart
    idx: int

    # e.g., zh, en, zh_en
    lang: str

    # e.g., whisper, paraformer, zipformer
    short_name: str = ""

    # cmd is used to remove extra files from the model directory
    cmd: str = ""

    rule_fsts: str = ""


def get_models():
    models = [
        Model(
            model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
            idx=0,
            lang="bilingual_zh_en",
            short_name="zipformer_2023_02_20",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv joiner-epoch-99-avg-1.int8.onnx

            rm -fv *.sh
            rm -fv bpe.model
            rm -fv README.md
            rm -fv .gitattributes
            rm -fv *state*
            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-2023-06-26",
            idx=1,
            lang="en",
            short_name="zipformer2_2023_06_26",
            cmd="""
            pushd $model_name
            rm -fv encoder-epoch-99-avg-1-chunk-16-left-128.onnx
            rm -fv decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
            rm -fv joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx

            rm -fv README.md
            rm -fv bpe.model
            rm -rfv test_wavs

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-streaming-wenetspeech-20230615",
            idx=2,
            lang="zh",
            short_name="zipformer2_wenetspeech_2023_06_15",
            rule_fsts="itn_zh_number.fst",
            cmd="""
            if [ ! -f itn_zh_number.fst ]; then
              curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
            fi
            pushd $model_name
            rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
            rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
            rm -fv exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx

            rm -fv data/lang_char/lexicon.txt
            rm -fv data/lang_char/words.txt
            rm -rfv test_wavs
            rm -fv README.md

            ls -lh exp/
            ls -lh data/lang_char

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-fr-2023-04-14",
            idx=3,
            lang="fr",
            short_name="zipformer_2023_04_14",
            cmd="""
            pushd $model_name
            rm -fv encoder-epoch-29-avg-9-with-averaged-model.onnx
            rm -fv decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
            rm -fv joiner-epoch-29-avg-9-with-averaged-model.int8.onnx

            rm -fv *.sh
            rm -rfv test_wavs
            rm README.md

            ls -lh

            popd
            """,
        ),
    ]

    return models


def main():
    args = get_args()

    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./build-macos-streaming-asr.sh",
        "./build-linux-streaming-asr.sh",
        "./build-windows-streaming-asr.sh",
        "./build-android-streaming-asr.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/flutter/generate-tts.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
from typing import List, Optional

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class TtsModel:
    model_dir: str
    model_name: str = ""
    lang: str = ""  # en, zh, fr, de, etc.
    rule_fsts: Optional[List[str]] = None
    rule_fars: Optional[List[str]] = None
    data_dir: Optional[str] = None
    dict_dir: Optional[str] = None
    is_char: bool = False


def get_coqui_models() -> List[TtsModel]:
    # English (coqui-ai/TTS)
    models = [
        TtsModel(model_dir="vits-coqui-en-ljspeech"),
        TtsModel(model_dir="vits-coqui-en-ljspeech-neon"),
        TtsModel(model_dir="vits-coqui-en-vctk"),
        #  TtsModel(model_dir="vits-coqui-en-jenny"),
    ]

    for m in models:
        m.data_dir = m.model_dir + "/" + "espeak-ng-data"
        m.model_name = "model.onnx"
        m.lang = "en"

    character_models = [
        TtsModel(model_dir="vits-coqui-bg-cv", lang="bg"),
        TtsModel(model_dir="vits-coqui-bn-custom_female", lang="bn"),
        TtsModel(model_dir="vits-coqui-cs-cv", lang="cs"),
        TtsModel(model_dir="vits-coqui-da-cv", lang="da"),
        TtsModel(model_dir="vits-coqui-de-css10", lang="de"),
        TtsModel(model_dir="vits-coqui-es-css10", lang="es"),
        TtsModel(model_dir="vits-coqui-et-cv", lang="et"),
        TtsModel(model_dir="vits-coqui-fi-css10", lang="fi"),
        TtsModel(model_dir="vits-coqui-fr-css10", lang="fr"),
        TtsModel(model_dir="vits-coqui-ga-cv", lang="ga"),
        TtsModel(model_dir="vits-coqui-hr-cv", lang="hr"),
        TtsModel(model_dir="vits-coqui-lt-cv", lang="lt"),
        TtsModel(model_dir="vits-coqui-lv-cv", lang="lv"),
        TtsModel(model_dir="vits-coqui-mt-cv", lang="mt"),
        TtsModel(model_dir="vits-coqui-nl-css10", lang="nl"),
        TtsModel(model_dir="vits-coqui-pl-mai_female", lang="pl"),
        TtsModel(model_dir="vits-coqui-pt-cv", lang="pt"),
        TtsModel(model_dir="vits-coqui-ro-cv", lang="ro"),
        TtsModel(model_dir="vits-coqui-sk-cv", lang="sk"),
        TtsModel(model_dir="vits-coqui-sl-cv", lang="sl"),
        TtsModel(model_dir="vits-coqui-sv-cv", lang="sv"),
        TtsModel(model_dir="vits-coqui-uk-mai", lang="uk"),
    ]
    for m in character_models:
        m.is_char = True
        m.model_name = "model.onnx"

    return models + character_models


def get_piper_models() -> List[TtsModel]:
    models = [
        #  TtsModel(model_dir="vits-piper-es_ES-mls_10246-low"),
        #  TtsModel(model_dir="vits-piper-es_ES-mls_9972-low"),
        #  TtsModel(model_dir="vits-piper-pl_PL-mls_6892-low"),
        TtsModel(model_dir="vits-piper-ar_JO-kareem-low"),
        TtsModel(model_dir="vits-piper-ar_JO-kareem-medium"),
        TtsModel(model_dir="vits-piper-ca_ES-upc_ona-medium"),
        TtsModel(model_dir="vits-piper-ca_ES-upc_ona-x_low"),
        TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
        TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
        TtsModel(model_dir="vits-piper-cs_CZ-jirka-medium"),
        TtsModel(model_dir="vits-piper-cy_GB-gwryw_gogleddol-medium"),
        TtsModel(model_dir="vits-piper-da_DK-talesyntese-medium"),
        TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low"),
        TtsModel(model_dir="vits-piper-de_DE-karlsson-low"),
        TtsModel(model_dir="vits-piper-de_DE-kerstin-low"),
        #  TtsModel(model_dir="vits-piper-de_DE-mls-medium"),
        TtsModel(model_dir="vits-piper-de_DE-pavoque-low"),
        TtsModel(model_dir="vits-piper-de_DE-ramona-low"),
        TtsModel(model_dir="vits-piper-de_DE-thorsten-high"),
        TtsModel(model_dir="vits-piper-de_DE-thorsten-low"),
        TtsModel(model_dir="vits-piper-de_DE-thorsten-medium"),
        TtsModel(model_dir="vits-piper-de_DE-thorsten_emotional-medium"),
        TtsModel(model_dir="vits-piper-el_GR-rapunzelina-low"),
        TtsModel(model_dir="vits-piper-en_GB-alan-low"),
        TtsModel(model_dir="vits-piper-en_GB-alan-medium"),
        TtsModel(model_dir="vits-piper-en_GB-alba-medium"),
        TtsModel(model_dir="vits-piper-en_GB-aru-medium"),
        TtsModel(model_dir="vits-piper-en_GB-cori-high"),
        TtsModel(model_dir="vits-piper-en_GB-cori-medium"),
        TtsModel(model_dir="vits-piper-en_GB-jenny_dioco-medium"),
        TtsModel(model_dir="vits-piper-en_GB-northern_english_male-medium"),
        TtsModel(model_dir="vits-piper-en_GB-semaine-medium"),
        TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low"),
        TtsModel(model_dir="vits-piper-en_GB-southern_english_female-medium"),
        TtsModel(model_dir="vits-piper-en_GB-southern_english_male-medium"),
        TtsModel(model_dir="vits-piper-en_GB-sweetbbak-amy"),
        TtsModel(model_dir="vits-piper-en_GB-vctk-medium"),
        TtsModel(model_dir="vits-piper-en_US-amy-low"),
        TtsModel(model_dir="vits-piper-en_US-amy-medium"),
        TtsModel(model_dir="vits-piper-en_US-arctic-medium"),
        TtsModel(model_dir="vits-piper-en_US-bryce-medium"),
        TtsModel(model_dir="vits-piper-en_US-danny-low"),
        TtsModel(model_dir="vits-piper-en_US-glados"),
        TtsModel(model_dir="vits-piper-en_US-hfc_female-medium"),
        TtsModel(model_dir="vits-piper-en_US-hfc_male-medium"),
        TtsModel(model_dir="vits-piper-en_US-joe-medium"),
        TtsModel(model_dir="vits-piper-en_US-john-medium"),
        TtsModel(model_dir="vits-piper-en_US-kathleen-low"),
        TtsModel(model_dir="vits-piper-en_US-kristin-medium"),
        TtsModel(model_dir="vits-piper-en_US-kusal-medium"),
        TtsModel(model_dir="vits-piper-en_US-l2arctic-medium"),
        TtsModel(model_dir="vits-piper-en_US-lessac-high"),
        TtsModel(model_dir="vits-piper-en_US-lessac-low"),
        TtsModel(model_dir="vits-piper-en_US-lessac-medium"),
        TtsModel(model_dir="vits-piper-en_US-libritts-high"),
        TtsModel(model_dir="vits-piper-en_US-libritts_r-medium"),
        TtsModel(model_dir="vits-piper-en_US-ljspeech-high"),
        TtsModel(model_dir="vits-piper-en_US-ljspeech-medium"),
        TtsModel(model_dir="vits-piper-en_US-norman-medium"),
        TtsModel(model_dir="vits-piper-en_US-ryan-high"),
        TtsModel(model_dir="vits-piper-en_US-ryan-low"),
        TtsModel(model_dir="vits-piper-en_US-ryan-medium"),
        TtsModel(model_dir="vits-piper-es-glados-medium"),
        TtsModel(model_dir="vits-piper-es_ES-carlfm-x_low"),
        TtsModel(model_dir="vits-piper-es_ES-davefx-medium"),
        TtsModel(model_dir="vits-piper-es_ES-sharvard-medium"),
        TtsModel(model_dir="vits-piper-es_MX-ald-medium"),
        TtsModel(model_dir="vits-piper-es_MX-claude-high"),
        TtsModel(model_dir="vits-piper-fa_IR-amir-medium"),
        TtsModel(model_dir="vits-piper-fa_IR-gyro-medium"),
        TtsModel(model_dir="vits-piper-fi_FI-harri-low"),
        TtsModel(model_dir="vits-piper-fi_FI-harri-medium"),
        #  TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
        TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-tom-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
        TtsModel(model_dir="vits-piper-hu_HU-anna-medium"),
        TtsModel(model_dir="vits-piper-hu_HU-berta-medium"),
        TtsModel(model_dir="vits-piper-hu_HU-imre-medium"),
        TtsModel(model_dir="vits-piper-is_IS-bui-medium"),
        TtsModel(model_dir="vits-piper-is_IS-salka-medium"),
        TtsModel(model_dir="vits-piper-is_IS-steinn-medium"),
        TtsModel(model_dir="vits-piper-is_IS-ugla-medium"),
        TtsModel(model_dir="vits-piper-it_IT-paola-medium"),
        TtsModel(model_dir="vits-piper-it_IT-riccardo-x_low"),
        TtsModel(model_dir="vits-piper-ka_GE-natia-medium"),
        TtsModel(model_dir="vits-piper-kk_KZ-iseke-x_low"),
        TtsModel(model_dir="vits-piper-kk_KZ-issai-high"),
        TtsModel(model_dir="vits-piper-kk_KZ-raya-x_low"),
        TtsModel(model_dir="vits-piper-lb_LU-marylux-medium"),
        TtsModel(model_dir="vits-piper-ne_NP-google-medium"),
        TtsModel(model_dir="vits-piper-ne_NP-google-x_low"),
        TtsModel(model_dir="vits-piper-nl_BE-nathalie-medium"),
        TtsModel(model_dir="vits-piper-nl_BE-nathalie-x_low"),
        TtsModel(model_dir="vits-piper-nl_BE-rdh-medium"),
        TtsModel(model_dir="vits-piper-nl_BE-rdh-x_low"),
        #  TtsModel(model_dir="vits-piper-nl_NL-mls-medium"),
        #  TtsModel(model_dir="vits-piper-nl_NL-mls_5809-low"),
        #  TtsModel(model_dir="vits-piper-nl_NL-mls_7432-low"),
        TtsModel(model_dir="vits-piper-no_NO-talesyntese-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-darkman-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-gosia-medium"),
        TtsModel(model_dir="vits-piper-pl_PL-mc_speech-medium"),
        TtsModel(model_dir="vits-piper-pt_BR-edresson-low"),
        TtsModel(model_dir="vits-piper-pt_BR-faber-medium"),
        TtsModel(model_dir="vits-piper-pt_PT-tugao-medium"),
        TtsModel(model_dir="vits-piper-ro_RO-mihai-medium"),
        TtsModel(model_dir="vits-piper-ru_RU-denis-medium"),
        TtsModel(model_dir="vits-piper-ru_RU-dmitri-medium"),
        TtsModel(model_dir="vits-piper-ru_RU-irina-medium"),
        TtsModel(model_dir="vits-piper-ru_RU-ruslan-medium"),
        TtsModel(model_dir="vits-piper-sk_SK-lili-medium"),
        TtsModel(model_dir="vits-piper-sl_SI-artur-medium"),
        TtsModel(model_dir="vits-piper-sr_RS-serbski_institut-medium"),
        TtsModel(model_dir="vits-piper-sv_SE-nst-medium"),
        TtsModel(model_dir="vits-piper-sw_CD-lanfrica-medium"),
        TtsModel(model_dir="vits-piper-tr_TR-dfki-medium"),
        TtsModel(model_dir="vits-piper-tr_TR-fahrettin-medium"),
        TtsModel(model_dir="vits-piper-tr_TR-fettah-medium"),
        TtsModel(model_dir="vits-piper-uk_UA-lada-x_low"),
        TtsModel(model_dir="vits-piper-uk_UA-ukrainian_tts-medium"),
        TtsModel(model_dir="vits-piper-vi_VN-25hours_single-low"),
        TtsModel(model_dir="vits-piper-vi_VN-vais1000-medium"),
        TtsModel(model_dir="vits-piper-vi_VN-vivos-x_low"),
        TtsModel(model_dir="vits-piper-zh_CN-huayan-medium"),
    ]

    for m in models:
        m.data_dir = m.model_dir + "/" + "espeak-ng-data"
        m.model_name = m.model_dir[len("vits-piper-") :] + ".onnx"
        m.lang = m.model_dir.split("-")[2][:2]

    return models


def get_mimic3_models() -> List[TtsModel]:
    models = [
        TtsModel(model_dir="vits-mimic3-af_ZA-google-nwu_low"),
        TtsModel(model_dir="vits-mimic3-bn-multi_low"),
        TtsModel(model_dir="vits-mimic3-es_ES-m-ailabs_low"),
        TtsModel(model_dir="vits-mimic3-fa-haaniye_low"),
        TtsModel(model_dir="vits-mimic3-fi_FI-harri-tapani-ylilammi_low"),
        TtsModel(model_dir="vits-mimic3-gu_IN-cmu-indic_low"),
        TtsModel(model_dir="vits-mimic3-hu_HU-diana-majlinger_low"),
        TtsModel(model_dir="vits-mimic3-ko_KO-kss_low"),
        TtsModel(model_dir="vits-mimic3-ne_NP-ne-google_low"),
        TtsModel(model_dir="vits-mimic3-pl_PL-m-ailabs_low"),
        TtsModel(model_dir="vits-mimic3-tn_ZA-google-nwu_low"),
        TtsModel(model_dir="vits-mimic3-vi_VN-vais1000_low"),
    ]
    for m in models:
        m.data_dir = m.model_dir + "/" + "espeak-ng-data"
        m.model_name = m.model_dir[len("vits-mimic3-") :] + ".onnx"
        m.lang = m.model_dir.split("-")[2][:2]

    return models


def get_vits_models() -> List[TtsModel]:
    chinese_models = [
        # Chinese
        TtsModel(
            model_dir="vits-icefall-zh-aishell3",
            model_name="model.onnx",
            lang="zh",
            rule_fsts="vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst",
            rule_fars="vits-icefall-zh-aishell3/rule.far",
        ),
        TtsModel(
            model_dir="vits-zh-aishell3",
            model_name="vits-aishell3.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-doom",
            model_name="doom.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-echo",
            model_name="echo.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-zenyatta",
            model_name="zenyatta.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-abyssinvoker",
            model_name="abyssinvoker.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-keqing",
            model_name="keqing.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-eula",
            model_name="eula.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-bronya",
            model_name="bronya.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-theresa",
            model_name="theresa.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-wnj",
            model_name="vits-zh-hf-fanchen-wnj.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-melo-tts-zh_en",
            model_name="model.onnx",
            lang="zh_en",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-C",
            model_name="vits-zh-hf-fanchen-C.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe",
            model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new",
            model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="vits-zh-hf-fanchen-unity",
            model_name="vits-zh-hf-fanchen-unity.onnx",
            lang="zh",
        ),
        TtsModel(
            model_dir="sherpa-onnx-vits-zh-ll",
            model_name="model.onnx",
            lang="zh",
        ),
    ]

    rule_fsts = ["phone.fst", "date.fst", "number.fst"]
    for m in chinese_models:
        s = [f"{m.model_dir}/{r}" for r in rule_fsts]
        if (
            "vits-zh-hf" in m.model_dir
            or "sherpa-onnx-vits-zh-ll" == m.model_dir
            or "melo-tts" in m.model_dir
        ):
            m.dict_dir = m.model_dir + "/dict"
        else:
            m.rule_fars = f"{m.model_dir}/rule.far"

        m.rule_fsts = ",".join(s)

    all_models = chinese_models + [
        TtsModel(
            model_dir="vits-cantonese-hf-xiaomaiiwn",
            model_name="vits-cantonese-hf-xiaomaiiwn.onnx",
            lang="cantonese",
            rule_fsts="vits-cantonese-hf-xiaomaiiwn/rule.fst",
        ),
        # English (US)
        TtsModel(model_dir="vits-vctk", model_name="vits-vctk.onnx", lang="en"),
        #  TtsModel(model_dir="vits-ljs", model_name="vits-ljs.onnx", lang="en"),
        # fmt: on
    ]

    return all_models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)
    d = dict()

    all_model_list = get_vits_models()
    all_model_list += get_piper_models()
    all_model_list += get_mimic3_models()
    all_model_list += get_coqui_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(
        "{index}/{total}: {start}-{end}/{num_models}".format(
            index=index,
            total=total,
            start=start,
            end=end,
            num_models=num_models,
        )
    )

    d["tts_model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["tts_model_list"].append(all_model_list[s])
        print("{s}/{num_models}".format(s=s, num_models=num_models))

    filename_list = [
        "./build-macos-tts.sh",
        "./build-linux-tts.sh",
        "./build-android-tts.sh",
        "./build-windows-tts.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open("{filename}.in".format(filename=filename)) as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/go/README.md
================================================
# Introduction

- [./_internal](./_internal) is for testing only. As a general user, you don't
need to care about it.


================================================
FILE: scripts/go/_internal/.gitignore
================================================
!*.sh
go.sum


================================================
FILE: scripts/go/_internal/add-punctuation/go.mod
================================================
module add-punctuation

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/add-punctuation-online/go.mod
================================================
module add-punctuation-online

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/build_darwin_amd64.go
================================================
//go:build darwin && amd64 && !ios

package sherpa_onnx

// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin
import "C"


================================================
FILE: scripts/go/_internal/build_darwin_arm64.go
================================================
//go:build darwin && arm64 && !ios

package sherpa_onnx

// #cgo LDFLAGS: -L ${SRCDIR}/lib/aarch64-apple-darwin -lsherpa-onnx-c-api -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/aarch64-apple-darwin
import "C"


================================================
FILE: scripts/go/_internal/build_linux_amd64.go
================================================
//go:build !android && linux && amd64 && !musl

package sherpa_onnx

// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-unknown-linux-gnu -lsherpa-onnx-c-api -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-unknown-linux-gnu
import "C"


================================================
FILE: scripts/go/_internal/build_linux_arm.go
================================================
//go:build linux && arm && !arm7

package sherpa_onnx

// #cgo LDFLAGS: -L ${SRCDIR}/lib/arm-unknown-linux-gnueabihf -lsherpa-onnx-c-api -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/arm-unknown-linux-gnueabihf
import "C"


================================================
FILE: scripts/go/_internal/build_linux_arm64.go
================================================
//go:build linux && arm64

package sherpa_onnx

// #cgo LDFLAGS: -L ${SRCDIR}/lib/aarch64-unknown-linux-gnu -lsherpa-onnx-c-api -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/aarch64-unknown-linux-gnu
import "C"


================================================
FILE: scripts/go/_internal/build_windows_386.go
================================================
//go:build windows && 386

package sherpa_onnx

// #cgo LDFLAGS: -L ${SRCDIR}/lib/i686-pc-windows-gnu -lsherpa-onnx-c-api -lonnxruntime
import "C"


================================================
FILE: scripts/go/_internal/build_windows_amd64.go
================================================
//go:build windows && amd64

package sherpa_onnx

// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-pc-windows-gnu -lsherpa-onnx-c-api -lonnxruntime
import "C"


================================================
FILE: scripts/go/_internal/go.mod
================================================
module sherpa_onnx

go 1.17


================================================
FILE: scripts/go/_internal/lib/x86_64-pc-windows-gnu/.gitkeep
================================================


================================================
FILE: scripts/go/_internal/non-streaming-canary-decode-files/go.mod
================================================
module non-streaming-canary-decode-files

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/non-streaming-funasr-nano-decode-files/go.mod
================================================
module non-streaming-funasr-nano-decode-files

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/non-streaming-omnilingual-asr-ctc-decode-files/go.mod
================================================
module non-streaming-omnilingual-asr-ctc-decode-files

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/non-streaming-speaker-diarization/go.mod
================================================
module non-streaming-speaker-diarization

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/supertonic-tts/go.mod
================================================
module supertonic-tts

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/vad-speaker-identification/go.mod
================================================
module vad-speaker-identification

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/zero-shot-zipvoice-tts/go.mod
================================================
module zero-shot-zipvoice-tts

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/_internal/zero-shot-zipvoice-tts-play/go.mod
================================================
module zero-shot-zipvoice-tts-play

go 1.17

replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../


================================================
FILE: scripts/go/defines.go.jinja
================================================
{{ golang_header }}
package sherpa_onnx

// ============================================================
// Code Generated Automatically for {{ platform }} platform, DO NOT EDIT MANUALLY!!
// ============================================================

import (
	sherpa "github.com/k2-fsa/sherpa-onnx-go-{{ platform }}"
)

// ============================================================
// Struct/Function Defines
// ============================================================
{% for define in defines -%}
{%- if define.type == 'function' %}
var {{ define.name }} = sherpa.{{ define.name }}
{%- else %}
type {{ define.name }} = sherpa.{{ define.name }}
{%- endif %}
{%- endfor %}

================================================
FILE: scripts/go/generate.py
================================================
#!/usr/bin/env python3

import argparse
import os
import re

import jinja2


def parse_args():
    # set the source code file
    # -s sherpa_onnx.go
    # set the output folder
    # -o ./sherpa-onnx-go
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    # add argv to set source code file
    parser.add_argument("-s", "--source", type=str, required=True)
    # add argv to set output folder
    parser.add_argument("-o", "--output", type=str, required=True)
    return parser.parse_args()


def parse_golang(target):
    with open(target, "r") as file:
        content = file.read()
    defines = []
    struct_pattern = re.compile(r"type\s+([A-Z]\w+)\s+struct", re.DOTALL)
    struct_matches = struct_pattern.findall(content)
    for name in struct_matches:
        c_define = {
            "type": "struct",
            "name": name,
        }
        defines.append(c_define)
    struct_pattern = re.compile(r"type\s+([A-Z][^ =]+)\s*=", re.DOTALL)
    struct_matches = struct_pattern.findall(content)
    for name in struct_matches:
        c_define = {
            "type": "struct",
            "name": name,
        }
        defines.append(c_define)
    func_pattern = re.compile(r"func\s+([A-Z][^ \(]+)\s*\(", re.DOTALL)
    func_matches = func_pattern.findall(content)
    for name in func_matches:
        c_define = {
            "type": "function",
            "name": name,
        }
        defines.append(c_define)
    return defines


def render(output, defines, platform):
    build_info = ""
    if platform == "windows":
        build_info = "//go:build (windows && amd64) || (windows && 386)"
    elif platform == "linux":
        build_info = "//go:build (!android && linux && arm64) || (!android && linux && amd64 && !musl) || (!android && linux && arm && !arm7) || (!android && arm7) || (!android && linux && 386 && !musl) || (!android && musl) || (!android && linux && mips) || (!android && linux && mips64) || (!android && linux && mips64le) || (!android && linux && mipsle)"
    elif platform == "macos":
        build_info = "//go:build (darwin && amd64 && !ios) || (darwin && arm64 && !ios)"
    with open("./defines.go.jinja") as f:
        content = f.read()
    environment = jinja2.Environment()
    template = environment.from_string(content)
    context = {
        "platform": platform,
        "defines": defines,
        "golang_header": build_info,
    }
    rendered = template.render(**context)
    folder = os.path.dirname(output)
    if not os.path.exists(folder):
        os.makedirs(folder)
    with open(output, "w") as f:
        print(rendered, file=f)


def generate(src, output):
    defines = parse_golang(src)
    platform = "linux"
    render(f"{output}/sherpa_onnx/sherpa_onnx_{platform}.go", defines, platform)
    platform = "windows"
    render(f"{output}/sherpa_onnx/sherpa_onnx_{platform}.go", defines, platform)
    platform = "macos"
    render(f"{output}/sherpa_onnx/sherpa_onnx_{platform}.go", defines, platform)


if __name__ == "__main__":
    args = parse_args()
    generate(args.source, args.output)


================================================
FILE: scripts/go/release.sh
================================================
#!/usr/bin/env bash

set -ex

git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(realpath $SCRIPT_DIR/../..)
echo "SCRIPT_DIR: $SCRIPT_DIR"
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"


SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

function linux() {
  echo "Process linux"
  git clone git@github.com:k2-fsa/sherpa-onnx-go-linux.git

  rm -v ./sherpa-onnx-go-linux/*.go

  cp -v ./sherpa_onnx.go ./sherpa-onnx-go-linux/
  cp -v ./_internal/c-api.h ./sherpa-onnx-go-linux/
  cp -v ./_internal/build_linux_*.go ./sherpa-onnx-go-linux/

  rm -rf sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/lib*
  dst=$(realpath sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu)
  mkdir t
  cd t
  wget -q https://huggingface.co/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-manylinux2014_x86_64.whl
  unzip sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-manylinux2014_x86_64.whl

  rm -fv $dst/_sherpa*.so
  cp -v sherpa_onnx/lib/lib*.so* $dst

  cd ..
  rm -rf t

  rm -rf sherpa-onnx-go-linux/lib/aarch64-unknown-linux-gnu/lib*
  dst=$(realpath sherpa-onnx-go-linux/lib/aarch64-unknown-linux-gnu)
  mkdir t
  cd t
  wget -q https://huggingface.co/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-manylinux2014_aarch64.whl
  unzip ./sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-manylinux2014_aarch64.whl

  rm -fv $dst/_sherpa*.so
  cp -v sherpa_onnx/lib/lib*.so* $dst

  cd ..
  rm -rf t

  rm -rf sherpa-onnx-go-linux/lib/arm-unknown-linux-gnueabihf/lib*
  dst=$(realpath sherpa-onnx-go-linux/lib/arm-unknown-linux-gnueabihf)
  mkdir t
  cd t
  wget -q https://huggingface.co/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/sherpa_onnx_core-$SHERPA_ONNX_VERSION-py3-none-manylinux_2_35_armv7l.whl
  unzip ./sherpa_onnx_core-$SHERPA_ONNX_VERSION-py3-none-manylinux_2_35_armv7l.whl

  rm -fv $dst/_sherpa*.so
  cp -v sherpa_onnx/lib/lib*.so* $dst

  cd ..
  rm -rf t

  echo "------------------------------"
  cd sherpa-onnx-go-linux
  git status
  git add .
  git commit -m "Release v$SHERPA_ONNX_VERSION" && \
  git push && \
  git tag v$SHERPA_ONNX_VERSION && \
  git push origin v$SHERPA_ONNX_VERSION || true
  cd ..
  rm -rf sherpa-onnx-go-linux
}

function osx() {
  echo "Process osx-x64"
  git clone git@github.com:k2-fsa/sherpa-onnx-go-macos.git
  rm -v ./sherpa-onnx-go-macos/*.go
  cp -v ./sherpa_onnx.go ./sherpa-onnx-go-macos/
  cp -v ./_internal/c-api.h ./sherpa-onnx-go-macos/
  cp -v ./_internal/build_darwin_*.go ./sherpa-onnx-go-macos/

  rm -rf sherpa-onnx-go-macos/lib/x86_64-apple-darwin/lib*
  dst=$(realpath sherpa-onnx-go-macos/lib/x86_64-apple-darwin/)

  mkdir t
  cd t
  wget -q https://huggingface.co/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-macosx_10_15_x86_64.whl
  unzip ./sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-macosx_10_15_x86_64.whl

  cp -v sherpa_onnx/lib/*.dylib $dst/

  pushd $dst
  cp -v libonnxruntime.*.dylib libonnxruntime.dylib
  popd

  cd ..
  rm -rf t

  echo "process macos arm64"
  rm -rf sherpa-onnx-go-macos/lib/aarch64-apple-darwin/lib*
  dst=$(realpath sherpa-onnx-go-macos/lib/aarch64-apple-darwin)

  mkdir t
  cd t
  wget -q https://huggingface.co/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-macosx_11_0_arm64.whl
  unzip ./sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-macosx_11_0_arm64.whl

  cp -v sherpa_onnx/lib/*.dylib $dst/

  pushd $dst
  cp -v libonnxruntime.*.dylib libonnxruntime.dylib
  popd

  cd ..
  rm -rf t
  echo "------------------------------"
  cd sherpa-onnx-go-macos
  git status
  git add .
  git commit -m "Release v$SHERPA_ONNX_VERSION" && \
  git push && \
  git tag v$SHERPA_ONNX_VERSION && \
  git push origin v$SHERPA_ONNX_VERSION || true
  cd ..
  rm -rf sherpa-onnx-go-macos
}

function windows() {
  echo "Process windows"
  git clone git@github.com:k2-fsa/sherpa-onnx-go-windows.git
  rm -v ./sherpa-onnx-go-windows/*.go
  cp -v ./sherpa_onnx.go ./sherpa-onnx-go-windows/
  cp -v ./_internal/c-api.h ./sherpa-onnx-go-windows/
  cp -v ./_internal/build_windows_*.go ./sherpa-onnx-go-windows/

  rm -fv sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu/*
  dst=$(realpath sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu)
  mkdir t
  cd t
  wget -q https://huggingface.co/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-win_amd64.whl
  unzip ./sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-win_amd64.whl

  cp -v sherpa_onnx/lib/*.dll $dst

  cd ..
  rm -rf t

  rm -fv sherpa-onnx-go-windows/lib/i686-pc-windows-gnu/*
  dst=$(realpath sherpa-onnx-go-windows/lib/i686-pc-windows-gnu)
  mkdir t
  cd t
  wget -q https://huggingface.co/csukuangfj2/sherpa-onnx-wheels/resolve/main/cpu/$SHERPA_ONNX_VERSION/sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-win32.whl
  unzip ./sherpa_onnx_core-${SHERPA_ONNX_VERSION}-py3-none-win32.whl

  cp -v sherpa_onnx/lib/*.dll $dst

  cd ..
  rm -rf t
  echo "------------------------------"
  cd sherpa-onnx-go-windows
  git status
  git add .
  git commit -m "Release v$SHERPA_ONNX_VERSION" && \
  git push && \
  git tag v$SHERPA_ONNX_VERSION && \
  git push origin v$SHERPA_ONNX_VERSION || true
  cd ..
  rm -rf sherpa-onnx-go-windows
}

function basic() {
  echo "Process sherpa-onnx-go"
  git clone git@github.com:k2-fsa/sherpa-onnx-go.git

  python3 ./generate.py -s ./sherpa_onnx.go -o ./sherpa-onnx-go

  echo "------------------------------"
  cd sherpa-onnx-go
  git status
  git add .
  git commit -m "Release v$SHERPA_ONNX_VERSION" && \
    git push && \
    git tag v$SHERPA_ONNX_VERSION && \
    git push origin v$SHERPA_ONNX_VERSION
  cd ..
  rm -rf sherpa-onnx-go
}

basic
windows
linux
osx

rm -fv ~/.ssh/github


================================================
FILE: scripts/go/sherpa_onnx.go
================================================
/*
Speech recognition with [Next-gen Kaldi].

[sherpa-onnx] is an open-source speech recognition framework for [Next-gen Kaldi].
It depends only on [onnxruntime], supporting both streaming and non-streaming
speech recognition.

It does not need to access the network during recognition and everything
runs locally.

It supports a variety of platforms, such as Linux (x86_64, aarch64, arm),
Windows (x86_64, x86), macOS (x86_64, arm64), etc.

Usage examples:

 1. Real-time speech recognition from a microphone

    Please see
    https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/real-time-speech-recognition-from-microphone

 2. Decode files using a non-streaming model

    Please see
    https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-decode-files

 3. Decode files using a streaming model

    Please see
    https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files

 4. Convert text to speech using a non-streaming model

    Please see
    https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-tts

[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
[onnxruntime]: https://github.com/microsoft/onnxruntime
[Next-gen Kaldi]: https://github.com/k2-fsa/
*/
package sherpa_onnx

// #include <stdlib.h>
// #include "c-api.h"
// extern int32_t _cgoGeneratedAudioCallback(float *samples,int32_t n,void *arg);
// extern int32_t _cgoGeneratedAudioProgressCallback(float *samples, int32_t n, float p, void *arg);
import "C"
import (
	"encoding/json"
	"runtime/cgo"
	"unsafe"
)

// Configuration for online/streaming transducer models
//
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
// to download pre-trained models
type OnlineTransducerModelConfig struct {
	Encoder string // Path to the encoder model, e.g., encoder.onnx or encoder.int8.onnx
	Decoder string // Path to the decoder model.
	Joiner  string // Path to the joiner model.
}

// Configuration for online/streaming paraformer models
//
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
// to download pre-trained models
type OnlineParaformerModelConfig struct {
	Encoder string // Path to the encoder model, e.g., encoder.onnx or encoder.int8.onnx
	Decoder string // Path to the decoder model.
}

// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/index.html
// to download pre-trained models
type OnlineZipformer2CtcModelConfig struct {
	Model string // Path to the onnx model
}

type OnlineNemoCtcModelConfig struct {
	Model string // Path to the onnx model
}

type OnlineToneCtcModelConfig struct {
	Model string // Path to the onnx model
}

// Configuration for online/streaming models
//
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
// to download pre-trained models
type OnlineModelConfig struct {
	Transducer    OnlineTransducerModelConfig
	Paraformer    OnlineParaformerModelConfig
	Zipformer2Ctc OnlineZipformer2CtcModelConfig
	NemoCtc       OnlineNemoCtcModelConfig
	ToneCtc       OnlineToneCtcModelConfig
	Tokens        string // Path to tokens.txt
	NumThreads    int    // Number of threads to use for neural network computation
	Provider      string // Optional. Valid values are: cpu, cuda, coreml
	Debug         int    // 1 to show model meta information while loading it.
	ModelType     string // Optional. You can specify it for faster model initialization
	ModelingUnit  string // Optional. cjkchar, bpe, cjkchar+bpe
	BpeVocab      string // Optional.
	TokensBuf     string // Optional.
	TokensBufSize int    // Optional.
}

// Configuration for the feature extractor
type FeatureConfig struct {
	// Sample rate expected by the model. It is 16000 for all
	// pre-trained models provided by us
	SampleRate int
	// Feature dimension expected by the model. It is 80 for all
	// pre-trained models provided by us
	FeatureDim int
}

type OnlineCtcFstDecoderConfig struct {
	Graph     string
	MaxActive int
}

type HomophoneReplacerConfig struct {
	DictDir  string // unused
	Lexicon  string
	RuleFsts string
}

// Configuration for the online/streaming recognizer.
type OnlineRecognizerConfig struct {
	FeatConfig  FeatureConfig
	ModelConfig OnlineModelConfig

	// Valid decoding methods: greedy_search, modified_beam_search
	DecodingMethod string

	// Used only when DecodingMethod is modified_beam_search. It specifies
	// the maximum number of paths to keep during the search
	MaxActivePaths int

	EnableEndpoint int // 1 to enable endpoint detection.

	// Please see
	// https://k2-fsa.github.io/sherpa/ncnn/endpoint.html
	// for the meaning of Rule1MinTrailingSilence, Rule2MinTrailingSilence
	// and Rule3MinUtteranceLength.
	Rule1MinTrailingSilence float32
	Rule2MinTrailingSilence float32
	Rule3MinUtteranceLength float32
	HotwordsFile            string
	HotwordsScore           float32
	BlankPenalty            float32
	CtcFstDecoderConfig     OnlineCtcFstDecoderConfig
	RuleFsts                string
	RuleFars                string
	HotwordsBuf             string
	HotwordsBufSize         int
	Hr                      HomophoneReplacerConfig
}

// It contains the recognition result for a online stream.
type OnlineRecognizerResult struct {
	Text string
}

// The online recognizer class. It wraps a pointer from C.
type OnlineRecognizer struct {
	impl *C.struct_SherpaOnnxOnlineRecognizer
}

// The online stream class. It wraps a pointer from C.
type OnlineStream struct {
	impl *C.struct_SherpaOnnxOnlineStream
}

// Free the internal pointer inside the recognizer to avoid memory leak.
func DeleteOnlineRecognizer(recognizer *OnlineRecognizer) {
	C.SherpaOnnxDestroyOnlineRecognizer(recognizer.impl)
	recognizer.impl = nil
}

// The user is responsible to invoke [DeleteOnlineRecognizer]() to free
// the returned recognizer to avoid memory leak
func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
	c := C.struct_SherpaOnnxOnlineRecognizerConfig{}
	c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate)
	c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim)

	c.model_config.transducer.encoder = C.CString(config.ModelConfig.Transducer.Encoder)
	defer C.free(unsafe.Pointer(c.model_config.transducer.encoder))

	c.model_config.transducer.decoder = C.CString(config.ModelConfig.Transducer.Decoder)
	defer C.free(unsafe.Pointer(c.model_config.transducer.decoder))

	c.model_config.transducer.joiner = C.CString(config.ModelConfig.Transducer.Joiner)
	defer C.free(unsafe.Pointer(c.model_config.transducer.joiner))

	c.model_config.paraformer.encoder = C.CString(config.ModelConfig.Paraformer.Encoder)
	defer C.free(unsafe.Pointer(c.model_config.paraformer.encoder))

	c.model_config.paraformer.decoder = C.CString(config.ModelConfig.Paraformer.Decoder)
	defer C.free(unsafe.Pointer(c.model_config.paraformer.decoder))

	c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model)
	defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model))

	c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model)
	defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model))

	c.model_config.t_one_ctc.model = C.CString(config.ModelConfig.ToneCtc.Model)
	defer C.free(unsafe.Pointer(c.model_config.t_one_ctc.model))

	c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
	defer C.free(unsafe.Pointer(c.model_config.tokens))

	c.model_config.tokens_buf = C.CString(config.ModelConfig.TokensBuf)
	defer C.free(unsafe.Pointer(c.model_config.tokens_buf))

	c.model_config.tokens_buf_size = C.int(config.ModelConfig.TokensBufSize)

	c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)

	c.model_config.provider = C.CString(config.ModelConfig.Provider)
	defer C.free(unsafe.Pointer(c.model_config.provider))

	c.model_config.debug = C.int(config.ModelConfig.Debug)

	c.model_config.model_type = C.CString(config.ModelConfig.ModelType)
	defer C.free(unsafe.Pointer(c.model_config.model_type))

	c.model_config.modeling_unit = C.CString(config.ModelConfig.ModelingUnit)
	defer C.free(unsafe.Pointer(c.model_config.modeling_unit))

	c.model_config.bpe_vocab = C.CString(config.ModelConfig.BpeVocab)
	defer C.free(unsafe.Pointer(c.model_config.bpe_vocab))

	c.decoding_method = C.CString(config.DecodingMethod)
	defer C.free(unsafe.Pointer(c.decoding_method))

	c.max_active_paths = C.int(config.MaxActivePaths)
	c.enable_endpoint = C.int(config.EnableEndpoint)
	c.rule1_min_trailing_silence = C.float(config.Rule1MinTrailingSilence)
	c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence)
	c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength)

	c.hotwords_file = C.CString(config.HotwordsFile)
	defer C.free(unsafe.Pointer(c.hotwords_file))

	c.hotwords_buf = C.CString(config.HotwordsBuf)
	defer C.free(unsafe.Pointer(c.hotwords_buf))

	c.hotwords_buf_size = C.int(config.HotwordsBufSize)

	c.hotwords_score = C.float(config.HotwordsScore)
	c.blank_penalty = C.float(config.BlankPenalty)

	c.rule_fsts = C.CString(config.RuleFsts)
	defer C.free(unsafe.Pointer(c.rule_fsts))

	c.rule_fars = C.CString(config.RuleFars)
	defer C.free(unsafe.Pointer(c.rule_fars))

	c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph)
	defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
	c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)

	c.hr.lexicon = C.CString(config.Hr.Lexicon)
	defer C.free(unsafe.Pointer(c.hr.lexicon))

	c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
	defer C.free(unsafe.Pointer(c.hr.rule_fsts))

	impl := C.SherpaOnnxCreateOnlineRecognizer(&c)
	if impl == nil {
		return nil
	}
	recognizer := &OnlineRecognizer{}
	recognizer.impl = impl
	return recognizer
}

// Delete the internal pointer inside the stream to avoid memory leak.
func DeleteOnlineStream(stream *OnlineStream) {
	C.SherpaOnnxDestroyOnlineStream(stream.impl)
	stream.impl = nil
}

// The user is responsible to invoke [DeleteOnlineStream]() to free
// the returned stream to avoid memory leak
func NewOnlineStream(recognizer *OnlineRecognizer) *OnlineStream {
	stream := &OnlineStream{}
	stream.impl = C.SherpaOnnxCreateOnlineStream(recognizer.impl)
	return stream
}

// Input audio samples for the stream.
//
// sampleRate is the actual sample rate of the input audio samples. If it
// is different from the sample rate expected by the feature extractor, we will
// do resampling inside.
//
// samples contains audio samples. Each sample is in the range [-1, 1]
func (s *OnlineStream) AcceptWaveform(sampleRate int, samples []float32) {
	C.SherpaOnnxOnlineStreamAcceptWaveform(s.impl, C.int(sampleRate), (*C.float)(&samples[0]), C.int(len(samples)))
}

// Signal that there will be no incoming audio samples.
// After calling this function, you cannot call [OnlineStream.AcceptWaveform] any longer.
//
// The main purpose of this function is to flush the remaining audio samples
// buffered inside for feature extraction.
func (s *OnlineStream) InputFinished() {
	C.SherpaOnnxOnlineStreamInputFinished(s.impl)
}

// Set a key-value option on the online stream.
// This provides a generic mechanism for passing per-stream runtime parameters
// to the recognizer (e.g., "is_final" for streaming Paraformer).
func (s *OnlineStream) SetOption(key string, value string) {
	cKey := C.CString(key)
	defer C.free(unsafe.Pointer(cKey))
	cValue := C.CString(value)
	defer C.free(unsafe.Pointer(cValue))
	C.SherpaOnnxOnlineStreamSetOption(s.impl, cKey, cValue)
}

// Get a key-value option from the online stream.
// Returns an empty string if the option is not set.
func (s *OnlineStream) GetOption(key string) string {
	cKey := C.CString(key)
	defer C.free(unsafe.Pointer(cKey))
	return C.GoString(C.SherpaOnnxOnlineStreamGetOption(s.impl, cKey))
}

// Check whether the given option exists in the online stream.
// Return true if the option exists. Return false otherwise.
func (s *OnlineStream) HasOption(key string) bool {
	cKey := C.CString(key)
	defer C.free(unsafe.Pointer(cKey))
	return C.SherpaOnnxOnlineStreamHasOption(s.impl, cKey) == 1
}

// Check whether the stream has enough feature frames for decoding.
// Return true if this stream is ready for decoding. Return false otherwise.
//
// You will usually use it like below:
//
//	for recognizer.IsReady(s) {
//	   recognizer.Decode(s)
//	}
func (recognizer *OnlineRecognizer) IsReady(s *OnlineStream) bool {
	return C.SherpaOnnxIsOnlineStreamReady(recognizer.impl, s.impl) == 1
}

// Return true if an endpoint is detected.
//
// You usually use it like below:
//
//	if recognizer.IsEndpoint(s) {
//	   // do your own stuff after detecting an endpoint
//
//	   recognizer.Reset(s)
//	}
func (recognizer *OnlineRecognizer) IsEndpoint(s *OnlineStream) bool {
	return C.SherpaOnnxOnlineStreamIsEndpoint(recognizer.impl, s.impl) == 1
}

// After calling this function, the internal neural network model states
// are reset and IsEndpoint(s) would return false. GetResult(s) would also
// return an empty string.
func (recognizer *OnlineRecognizer) Reset(s *OnlineStream) {
	C.SherpaOnnxOnlineStreamReset(recognizer.impl, s.impl)
}

// Decode the stream. Before calling this function, you have to ensure
// that recognizer.IsReady(s) returns true. Otherwise, you will be SAD.
//
// You usually use it like below:
//
//	for recognizer.IsReady(s) {
//	  recognizer.Decode(s)
//	}
func (recognizer *OnlineRecognizer) Decode(s *OnlineStream) {
	C.SherpaOnnxDecodeOnlineStream(recognizer.impl, s.impl)
}

// Decode multiple streams in parallel, i.e., in batch.
// You have to ensure that each stream is ready for decoding. Otherwise,
// you will be SAD.
func (recognizer *OnlineRecognizer) DecodeStreams(s []*OnlineStream) {
	ss := make([]*C.struct_SherpaOnnxOnlineStream, len(s))
	for i, v := range s {
		ss[i] = v.impl
	}

	C.SherpaOnnxDecodeMultipleOnlineStreams(recognizer.impl, &ss[0], C.int(len(s)))
}

// Get the current result of stream since the last invoke of Reset()
func (recognizer *OnlineRecognizer) GetResult(s *OnlineStream) *OnlineRecognizerResult {
	p := C.SherpaOnnxGetOnlineStreamResult(recognizer.impl, s.impl)
	defer C.SherpaOnnxDestroyOnlineRecognizerResult(p)
	result := &OnlineRecognizerResult{}
	result.Text = C.GoString(p.text)

	return result
}

// Configuration for offline/non-streaming transducer.
//
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
// to download pre-trained models
type OfflineTransducerModelConfig struct {
	Encoder string // Path to the encoder model, i.e., encoder.onnx or encoder.int8.onnx
	Decoder string // Path to the decoder model
	Joiner  string // Path to the joiner model
}

// Configuration for offline/non-streaming paraformer.
//
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
// to download pre-trained models
type OfflineParaformerModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

// Configuration for offline/non-streaming NeMo CTC models.
//
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
// to download pre-trained models
type OfflineNemoEncDecCtcModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

type OfflineZipformerCtcModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

type OfflineWenetCtcModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

type OfflineOmnilingualAsrCtcModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

type OfflineMedAsrCtcModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

type OfflineFireRedAsrCtcModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

type OfflineDolphinModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

type OfflineWhisperModelConfig struct {
	Encoder                 string
	Decoder                 string
	Language                string
	Task                    string
	TailPaddings            int
	EnableTokenTimestamps   int
	EnableSegmentTimestamps int
}

type OfflineCanaryModelConfig struct {
	Encoder string
	Decoder string
	SrcLang string
	TgtLang string
	UsePnc  int
}

type OfflineFireRedAsrModelConfig struct {
	Encoder string
	Decoder string
}

type OfflineFunASRNanoModelConfig struct {
	EncoderAdaptor              string
	LLM                         string
	Embedding                   string
	Tokenizer                   string
	SystemPrompt                string
	UserPrompt                  string
	MaxNewTokens                int
	Temperature                 float32
	TopP                        float32
	Seed                        int
	Language                    string
	UseInverseTextNormalization int
	Hotwords                    string
}

// For Moonshine v1, you need 4 models:
//   - preprocessor, encoder, uncached_decoder, cached_decoder
//
// For Moonshine v2, you need 2 models:
//   - encoder, merged_decoder
type OfflineMoonshineModelConfig struct {
	Preprocessor    string
	Encoder         string
	UncachedDecoder string
	CachedDecoder   string
	MergedDecoder   string
}

type OfflineTdnnModelConfig struct {
	Model string
}

type OfflineSenseVoiceModelConfig struct {
	Model                       string
	Language                    string
	UseInverseTextNormalization int
}

// Configuration for offline LM.
type OfflineLMConfig struct {
	Model string  // Path to the model
	Scale float32 // scale for LM score
}

type OfflineModelConfig struct {
	Transducer    OfflineTransducerModelConfig
	Paraformer    OfflineParaformerModelConfig
	NemoCTC       OfflineNemoEncDecCtcModelConfig
	Whisper       OfflineWhisperModelConfig
	Tdnn          OfflineTdnnModelConfig
	SenseVoice    OfflineSenseVoiceModelConfig
	Moonshine     OfflineMoonshineModelConfig
	FireRedAsr    OfflineFireRedAsrModelConfig
	FunAsrNano    OfflineFunASRNanoModelConfig
	Dolphin       OfflineDolphinModelConfig
	ZipformerCtc  OfflineZipformerCtcModelConfig
	Canary        OfflineCanaryModelConfig
	WenetCtc      OfflineWenetCtcModelConfig
	Omnilingual   OfflineOmnilingualAsrCtcModelConfig
	MedAsr        OfflineMedAsrCtcModelConfig
	FireRedAsrCtc OfflineFireRedAsrCtcModelConfig
	Tokens        string // Path to tokens.txt

	// Number of threads to use for neural network computation
	NumThreads int

	// 1 to print model meta information while loading
	Debug int

	// Optional. Valid values: cpu, cuda, coreml
	Provider string

	// Optional. Specify it for faster model initialization.
	ModelType string

	ModelingUnit  string // Optional. cjkchar, bpe, cjkchar+bpe
	BpeVocab      string // Optional.
	TeleSpeechCtc string // Optional.
}

// Configuration for the offline/non-streaming recognizer.
type OfflineRecognizerConfig struct {
	FeatConfig  FeatureConfig
	ModelConfig OfflineModelConfig
	LmConfig    OfflineLMConfig

	// Valid decoding method: greedy_search, modified_beam_search
	DecodingMethod string

	// Used only when DecodingMethod is modified_beam_search.
	MaxActivePaths int
	HotwordsFile   string
	HotwordsScore  float32
	BlankPenalty   float32
	RuleFsts       string
	RuleFars       string
	Hr             HomophoneReplacerConfig
}

// It wraps a pointer from C
type OfflineRecognizer struct {
	impl *C.struct_SherpaOnnxOfflineRecognizer
}

// It wraps a pointer from C
type OfflineStream struct {
	impl *C.struct_SherpaOnnxOfflineStream
}

// It contains recognition result of an offline stream.
type OfflineRecognizerResult struct {
	Text       string
	Tokens     []string
	Timestamps []float32
	Durations  []float32
	Lang       string
	Emotion    string
	Event      string
}

func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_SherpaOnnxOfflineRecognizerConfig {
	c := C.struct_SherpaOnnxOfflineRecognizerConfig{}
	c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate)
	c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim)

	c.model_config.transducer.encoder = C.CString(config.ModelConfig.Transducer.Encoder)
	c.model_config.transducer.decoder = C.CString(config.ModelConfig.Transducer.Decoder)
	c.model_config.transducer.joiner = C.CString(config.ModelConfig.Transducer.Joiner)

	c.model_config.paraformer.model = C.CString(config.ModelConfig.Paraformer.Model)

	c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model)

	c.model_config.whisper.encoder = C.CString(config.ModelConfig.Whisper.Encoder)
	c.model_config.whisper.decoder = C.CString(config.ModelConfig.Whisper.Decoder)
	c.model_config.whisper.language = C.CString(config.ModelConfig.Whisper.Language)
	c.model_config.whisper.task = C.CString(config.ModelConfig.Whisper.Task)
	c.model_config.whisper.tail_paddings = C.int(config.ModelConfig.Whisper.TailPaddings)
	c.model_config.whisper.enable_token_timestamps = C.int(config.ModelConfig.Whisper.EnableTokenTimestamps)
	c.model_config.whisper.enable_segment_timestamps = C.int(config.ModelConfig.Whisper.EnableSegmentTimestamps)

	c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model)

	c.model_config.sense_voice.model = C.CString(config.ModelConfig.SenseVoice.Model)
	c.model_config.sense_voice.language = C.CString(config.ModelConfig.SenseVoice.Language)
	c.model_config.sense_voice.use_itn = C.int(config.ModelConfig.SenseVoice.UseInverseTextNormalization)

	c.model_config.moonshine.preprocessor = C.CString(config.ModelConfig.Moonshine.Preprocessor)
	c.model_config.moonshine.encoder = C.CString(config.ModelConfig.Moonshine.Encoder)
	c.model_config.moonshine.uncached_decoder = C.CString(config.ModelConfig.Moonshine.UncachedDecoder)
	c.model_config.moonshine.cached_decoder = C.CString(config.ModelConfig.Moonshine.CachedDecoder)
	c.model_config.moonshine.merged_decoder = C.CString(config.ModelConfig.Moonshine.MergedDecoder)

	c.model_config.fire_red_asr.encoder = C.CString(config.ModelConfig.FireRedAsr.Encoder)
	c.model_config.fire_red_asr.decoder = C.CString(config.ModelConfig.FireRedAsr.Decoder)

	c.model_config.funasr_nano.encoder_adaptor = C.CString(config.ModelConfig.FunAsrNano.EncoderAdaptor)
	c.model_config.funasr_nano.llm = C.CString(config.ModelConfig.FunAsrNano.LLM)
	c.model_config.funasr_nano.embedding = C.CString(config.ModelConfig.FunAsrNano.Embedding)
	c.model_config.funasr_nano.tokenizer = C.CString(config.ModelConfig.FunAsrNano.Tokenizer)
	c.model_config.funasr_nano.system_prompt = C.CString(config.ModelConfig.FunAsrNano.SystemPrompt)
	c.model_config.funasr_nano.user_prompt = C.CString(config.ModelConfig.FunAsrNano.UserPrompt)
	c.model_config.funasr_nano.max_new_tokens = C.int(config.ModelConfig.FunAsrNano.MaxNewTokens)
	c.model_config.funasr_nano.temperature = C.float(config.ModelConfig.FunAsrNano.Temperature)
	c.model_config.funasr_nano.top_p = C.float(config.ModelConfig.FunAsrNano.TopP)
	c.model_config.funasr_nano.seed = C.int(config.ModelConfig.FunAsrNano.Seed)
	c.model_config.funasr_nano.language = C.CString(config.ModelConfig.FunAsrNano.Language)
	c.model_config.funasr_nano.itn = C.int(config.ModelConfig.FunAsrNano.UseInverseTextNormalization)
	c.model_config.funasr_nano.hotwords = C.CString(config.ModelConfig.FunAsrNano.Hotwords)

	c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
	c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model)

	c.model_config.canary.encoder = C.CString(config.ModelConfig.Canary.Encoder)
	c.model_config.canary.decoder = C.CString(config.ModelConfig.Canary.Decoder)
	c.model_config.canary.src_lang = C.CString(config.ModelConfig.Canary.SrcLang)
	c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang)
	c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc)

	c.model_config.wenet_ctc.model = C.CString(config.ModelConfig.WenetCtc.Model)

	c.model_config.omnilingual.model = C.CString(config.ModelConfig.Omnilingual.Model)
	c.model_config.medasr.model = C.CString(config.ModelConfig.MedAsr.Model)
	c.model_config.fire_red_asr_ctc.model = C.CString(config.ModelConfig.FireRedAsrCtc.Model)

	c.model_config.tokens = C.CString(config.ModelConfig.Tokens)

	c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)

	c.model_config.debug = C.int(config.ModelConfig.Debug)

	c.model_config.provider = C.CString(config.ModelConfig.Provider)

	c.model_config.model_type = C.CString(config.ModelConfig.ModelType)

	c.model_config.modeling_unit = C.CString(config.ModelConfig.ModelingUnit)

	c.model_config.bpe_vocab = C.CString(config.ModelConfig.BpeVocab)

	c.model_config.telespeech_ctc = C.CString(config.ModelConfig.TeleSpeechCtc)

	c.lm_config.model = C.CString(config.LmConfig.Model)
	c.lm_config.scale = C.float(config.LmConfig.Scale)

	c.decoding_method = C.CString(config.DecodingMethod)

	c.max_active_paths = C.int(config.MaxActivePaths)

	c.hotwords_file = C.CString(config.HotwordsFile)
	c.hotwords_score = C.float(config.HotwordsScore)

	c.blank_penalty = C.float(config.BlankPenalty)

	c.rule_fsts = C.CString(config.RuleFsts)
	c.rule_fars = C.CString(config.RuleFars)

	c.hr.lexicon = C.CString(config.Hr.Lexicon)
	c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
	return &c
}
func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) {
	stringFields := []*(*C.char){
		&c.model_config.transducer.encoder,
		&c.model_config.transducer.decoder,
		&c.model_config.transducer.joiner,
		&c.model_config.paraformer.model,
		&c.model_config.nemo_ctc.model,
		&c.model_config.whisper.encoder,
		&c.model_config.whisper.decoder,
		&c.model_config.whisper.language,
		&c.model_config.whisper.task,
		&c.model_config.tdnn.model,
		&c.model_config.sense_voice.model,
		&c.model_config.sense_voice.language,
		&c.model_config.moonshine.preprocessor,
		&c.model_config.moonshine.encoder,
		&c.model_config.moonshine.uncached_decoder,
		&c.model_config.moonshine.cached_decoder,
		&c.model_config.moonshine.merged_decoder,
		&c.model_config.fire_red_asr.encoder,
		&c.model_config.fire_red_asr.decoder,
		&c.model_config.funasr_nano.encoder_adaptor,
		&c.model_config.funasr_nano.llm,
		&c.model_config.funasr_nano.embedding,
		&c.model_config.funasr_nano.tokenizer,
		&c.model_config.funasr_nano.system_prompt,
		&c.model_config.funasr_nano.user_prompt,
		&c.model_config.funasr_nano.language,
		&c.model_config.funasr_nano.hotwords,
		&c.model_config.dolphin.model,
		&c.model_config.zipformer_ctc.model,
		&c.model_config.canary.encoder,
		&c.model_config.canary.decoder,
		&c.model_config.canary.src_lang,
		&c.model_config.canary.tgt_lang,
		&c.model_config.wenet_ctc.model,
		&c.model_config.medasr.model,
		&c.model_config.fire_red_asr_ctc.model,
		&c.model_config.omnilingual.model,
		&c.model_config.tokens,
		&c.model_config.provider,
		&c.model_config.model_type,
		&c.model_config.modeling_unit,
		&c.model_config.bpe_vocab,
		&c.model_config.telespeech_ctc,
		&c.lm_config.model,
		&c.decoding_method,
		&c.hotwords_file,
		&c.rule_fsts,
		&c.rule_fars,
		&c.hr.lexicon,
		&c.hr.rule_fsts,
	}

	for _, field := range stringFields {
		if *field != nil {
			C.free(unsafe.Pointer(*field))
			*field = nil
		}
	}
}

// Frees the internal pointer of the recognition to avoid memory leak.
func DeleteOfflineRecognizer(recognizer *OfflineRecognizer) {
	C.SherpaOnnxDestroyOfflineRecognizer(recognizer.impl)
	recognizer.impl = nil
}

// The user is responsible to invoke [DeleteOfflineRecognizer]() to free
// the returned recognizer to avoid memory leak
func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
	c := newCOfflineRecognizerConfig(config)
	defer freeCOfflineRecognizerConfig(c)

	impl := C.SherpaOnnxCreateOfflineRecognizer(c)
	if impl == nil {
		return nil
	}
	recognizer := &OfflineRecognizer{}
	recognizer.impl = impl

	return recognizer
}

// Set new config to replace
func (r *OfflineRecognizer) SetConfig(config *OfflineRecognizerConfig) {
	c := newCOfflineRecognizerConfig(config)
	defer freeCOfflineRecognizerConfig(c)

	C.SherpaOnnxOfflineRecognizerSetConfig(r.impl, c)
}

// Frees the internal pointer of the stream to avoid memory leak.
func DeleteOfflineStream(stream *OfflineStream) {
	C.SherpaOnnxDestroyOfflineStream(stream.impl)
	stream.impl = nil
}

// The user is responsible to invoke [DeleteOfflineStream]() to free
// the returned stream to avoid memory leak
func NewOfflineStream(recognizer *OfflineRecognizer) *OfflineStream {
	stream := &OfflineStream{}
	stream.impl = C.SherpaOnnxCreateOfflineStream(recognizer.impl)
	return stream
}

// Input audio samples for the offline stream.
// Please only call it once. That is, input all samples at once.
//
// sampleRate is the sample rate of the input audio samples. If it is different
// from the value expected by the feature extractor, we will do resampling inside.
//
// samples contains the actual audio samples. Each sample is in the range [-1, 1].
func (s *OfflineStream) AcceptWaveform(sampleRate int, samples []float32) {
	C.SherpaOnnxAcceptWaveformOffline(s.impl, C.int(sampleRate), (*C.float)(&samples[0]), C.int(len(samples)))
}

// Set a key-value option on the offline stream.
// This provides a generic mechanism for passing per-stream runtime parameters
// to the recognizer (e.g., "task", "prompt").
func (s *OfflineStream) SetOption(key string, value string) {
	cKey := C.CString(key)
	defer C.free(unsafe.Pointer(cKey))
	cValue := C.CString(value)
	defer C.free(unsafe.Pointer(cValue))
	C.SherpaOnnxOfflineStreamSetOption(s.impl, cKey, cValue)
}

// Get a key-value option from the offline stream.
// Returns an empty string if the option is not set.
func (s *OfflineStream) GetOption(key string) string {
	cKey := C.CString(key)
	defer C.free(unsafe.Pointer(cKey))
	return C.GoString(C.SherpaOnnxOfflineStreamGetOption(s.impl, cKey))
}

// Check whether the given option exists in the offline stream.
// Return true if the option exists. Return false otherwise.
func (s *OfflineStream) HasOption(key string) bool {
	cKey := C.CString(key)
	defer C.free(unsafe.Pointer(cKey))
	return C.SherpaOnnxOfflineStreamHasOption(s.impl, cKey) == 1
}

// Decode the offline stream.
func (recognizer *OfflineRecognizer) Decode(s *OfflineStream) {
	C.SherpaOnnxDecodeOfflineStream(recognizer.impl, s.impl)
}

// Decode multiple streams in parallel, i.e., in batch.
func (recognizer *OfflineRecognizer) DecodeStreams(s []*OfflineStream) {
	ss := make([]*C.struct_SherpaOnnxOfflineStream, len(s))
	for i, v := range s {
		ss[i] = v.impl
	}

	C.SherpaOnnxDecodeMultipleOfflineStreams(recognizer.impl, &ss[0], C.int(len(s)))
}

// Get the recognition result of the offline stream.
func (s *OfflineStream) GetResult() *OfflineRecognizerResult {
	p := C.SherpaOnnxGetOfflineStreamResult(s.impl)
	defer C.SherpaOnnxDestroyOfflineRecognizerResult(p)
	n := int(p.count)
	if n == 0 {
		return nil
	}
	result := &OfflineRecognizerResult{}
	result.Text = C.GoString(p.text)
	result.Lang = C.GoString(p.lang)
	result.Emotion = C.GoString(p.emotion)
	result.Event = C.GoString(p.event)
	result.Tokens = make([]string, n)
	tokens := unsafe.Slice(p.tokens_arr, n)
	for i := 0; i < n; i++ {
		result.Tokens[i] = C.GoString(tokens[i])
	}
	if p.timestamps != nil {
		result.Timestamps = make([]float32, n)
		timestamps := unsafe.Slice(p.timestamps, n)
		for i := 0; i < n; i++ {
			result.Timestamps[i] = float32(timestamps[i])
		}
	}
	if p.durations != nil {
		result.Durations = make([]float32, n)
		durations := unsafe.Slice(p.durations, n)
		for i := 0; i < n; i++ {
			result.Durations[i] = float32(durations[i])
		}
	}
	return result
}

// Configuration for offline/non-streaming text-to-speech (TTS).
//
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
// to download pre-trained models
type OfflineTtsVitsModelConfig struct {
	Model       string  // Path to the VITS onnx model
	Lexicon     string  // Path to lexicon.txt
	Tokens      string  // Path to tokens.txt
	DataDir     string  // Path to espeak-ng-data directory
	NoiseScale  float32 // noise scale for vits models. Please use 0.667 in general
	NoiseScaleW float32 // noise scale for vits models. Please use 0.8 in general
	LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
	DictDir     string  // unused
}

type OfflineTtsMatchaModelConfig struct {
	AcousticModel string  // Path to the acoustic model for MatchaTTS
	Vocoder       string  // Path to the vocoder model for MatchaTTS
	Lexicon       string  // Path to lexicon.txt
	Tokens        string  // Path to tokens.txt
	DataDir       string  // Path to espeak-ng-data directory
	NoiseScale    float32 // noise scale for vits models. Please use 0.667 in general
	LengthScale   float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
	DictDir       string  // unused
}

type OfflineTtsKokoroModelConfig struct {
	Model       string  // Path to the model for kokoro
	Voices      string  // Path to the voices.bin for kokoro
	Tokens      string  // Path to tokens.txt
	DataDir     string  // Path to espeak-ng-data directory
	DictDir     string  // unused
	Lexicon     string  // Path to lexicon files
	Lang        string  // Example: es for Spanish, fr-fr for French. Can be empty
	LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
}

type OfflineTtsKittenModelConfig struct {
	Model       string  // Path to the model for kitten
	Voices      string  // Path to the voices.bin for kitten
	Tokens      string  // Path to tokens.txt
	DataDir     string  // Path to espeak-ng-data directory
	LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
}

type OfflineTtsPocketModelConfig struct {
	LmFlow                      string // lm_flow
	LmMain                      string // lm_main
	Encoder                     string // encoder
	Decoder                     string // decoder
	TextConditioner             string // text_conditioner
	VocabJson                   string // vocab_json
	TokenScoresJson             string // token_scores_json
	VoiceEmbeddingCacheCapacity int    // voice_embedding_cache_capacity
}

type OfflineTtsZipvoiceModelConfig struct {
	Tokens  string // Path to tokens.txt for ZipVoice
	Encoder string // Path to text encoder (e.g. encoder.onnx)
	Decoder string // Path to flow-matching decoder (e.g. fm_decoder.onnx)
	DataDir string // Path to espeak-ng-data
	Lexicon string // Path to lexicon.txt (needed for zh)
	Vocoder string // Path to vocoder (e.g. vocos_24khz.onnx)

	FeatScale     float32 // Feature scale
	TShift        float32 // t-shift (<1 shifts to smaller t)
	TargetRms     float32 // Target RMS for speech normalization
	GuidanceScale float32 // CFG scale
}

type OfflineTtsSupertonicModelConfig struct {
	DurationPredictor string // Path to duration_predictor.onnx
	TextEncoder       string // Path to text_encoder.onnx
	VectorEstimator   string // Path to vector_estimator.onnx
	Vocoder           string // Path to vocoder.onnx
	TtsJson           string // Path to tts.json
	UnicodeIndexer    string // Path to unicode_indexer.bin
	VoiceStyle        string // Path to voice.bin
}

type OfflineTtsModelConfig struct {
	Vits       OfflineTtsVitsModelConfig
	Matcha     OfflineTtsMatchaModelConfig
	Kokoro     OfflineTtsKokoroModelConfig
	Kitten     OfflineTtsKittenModelConfig
	Zipvoice   OfflineTtsZipvoiceModelConfig
	Pocket     OfflineTtsPocketModelConfig
	Supertonic OfflineTtsSupertonicModelConfig

	// Number of threads to use for neural network computation
	NumThreads int

	// 1 to print model meta information while loading
	Debug int

	// Optional. Valid values: cpu, cuda, coreml
	Provider string
}

type OfflineTtsConfig struct {
	Model           OfflineTtsModelConfig
	RuleFsts        string
	RuleFars        string
	MaxNumSentences int
	SilenceScale    float32
}

type GeneratedAudio struct {
	// Normalized samples in the range [-1, 1]
	Samples []float32

	SampleRate int
}

type GenerationConfig struct {
	SilenceScale float32
	Speed        float32
	Sid          int

	ReferenceAudio      []float32
	ReferenceSampleRate int
	ReferenceText       string

	NumSteps int

	// Opaque JSON passed directly to C
	Extra json.RawMessage
}

// The offline tts class. It wraps a pointer from C.
type OfflineTts struct {
	impl *C.struct_SherpaOnnxOfflineTts
}

type sherpaOnnxGeneratedAudioCallbackWithArg func(samples []float32) bool

//export _cgoGeneratedAudioCallback
func _cgoGeneratedAudioCallback(
	samples *C.float,
	n C.int32_t,
	arg unsafe.Pointer,
) C.int32_t {

	h := *(*cgo.Handle)(arg)
	cb := h.Value().(sherpaOnnxGeneratedAudioCallbackWithArg)

	nn := int(n)
	arr := unsafe.Slice(
		(*float32)(unsafe.Pointer(samples)),
		nn,
	)

	all := make([]float32, nn)
	copy(all, arr)

	// Prevent panics from crossing the C boundary
	var ret bool
	func() {
		defer func() {
			if r := recover(); r != nil {
				ret = false
			}
		}()
		ret = cb(all)
	}()

	if ret {
		return 1
	}
	return 0
}

type sherpaOnnxGeneratedAudioProgressCallbackWithArg func(samples []float32, p float32) bool

//export _cgoGeneratedAudioProgressCallback
func _cgoGeneratedAudioProgressCallback(
	samples *C.float,
	n C.int32_t,
	p C.float,
	arg unsafe.Pointer,
) C.int32_t {

	h := *(*cgo.Handle)(arg)
	cb := h.Value().(sherpaOnnxGeneratedAudioProgressCallbackWithArg)

	nn := int(n)
	arr := unsafe.Slice(
		(*float32)(unsafe.Pointer(samples)),
		nn,
	)

	all := make([]float32, nn)
	copy(all, arr)

	// Prevent panics from crossing the C boundary
	var ret bool
	func() {
		defer func() {
			if r := recover(); r != nil {
				ret = false
			}
		}()
		ret = cb(all, float32(p))
	}()

	if ret {
		return 1
	}
	return 0
}

// Free the internal pointer inside the tts to avoid memory leak.
func DeleteOfflineTts(tts *OfflineTts) {
	C.SherpaOnnxDestroyOfflineTts(tts.impl)
	tts.impl = nil
}

// The user is responsible to invoke [DeleteOfflineTts]() to free
// the returned tts to avoid memory leak
func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
	c := C.struct_SherpaOnnxOfflineTtsConfig{}

	c.rule_fsts = C.CString(config.RuleFsts)
	defer C.free(unsafe.Pointer(c.rule_fsts))

	c.rule_fars = C.CString(config.RuleFars)
	defer C.free(unsafe.Pointer(c.rule_fars))

	c.max_num_sentences = C.int(config.MaxNumSentences)
	c.silence_scale = C.float(config.SilenceScale)

	// vits
	c.model.vits.model = C.CString(config.Model.Vits.Model)
	defer C.free(unsafe.Pointer(c.model.vits.model))

	c.model.vits.lexicon = C.CString(config.Model.Vits.Lexicon)
	defer C.free(unsafe.Pointer(c.model.vits.lexicon))

	c.model.vits.tokens = C.CString(config.Model.Vits.Tokens)
	defer C.free(unsafe.Pointer(c.model.vits.tokens))

	c.model.vits.data_dir = C.CString(config.Model.Vits.DataDir)
	defer C.free(unsafe.Pointer(c.model.vits.data_dir))

	c.model.vits.noise_scale = C.float(config.Model.Vits.NoiseScale)
	c.model.vits.noise_scale_w = C.float(config.Model.Vits.NoiseScaleW)
	c.model.vits.length_scale = C.float(config.Model.Vits.LengthScale)

	// matcha
	c.model.matcha.acoustic_model = C.CString(config.Model.Matcha.AcousticModel)
	defer C.free(unsafe.Pointer(c.model.matcha.acoustic_model))

	c.model.matcha.vocoder = C.CString(config.Model.Matcha.Vocoder)
	defer C.free(unsafe.Pointer(c.model.matcha.vocoder))

	c.model.matcha.lexicon = C.CString(config.Model.Matcha.Lexicon)
	defer C.free(unsafe.Pointer(c.model.matcha.lexicon))

	c.model.matcha.tokens = C.CString(config.Model.Matcha.Tokens)
	defer C.free(unsafe.Pointer(c.model.matcha.tokens))

	c.model.matcha.data_dir = C.CString(config.Model.Matcha.DataDir)
	defer C.free(unsafe.Pointer(c.model.matcha.data_dir))

	c.model.matcha.noise_scale = C.float(config.Model.Matcha.NoiseScale)
	c.model.matcha.length_scale = C.float(config.Model.Matcha.LengthScale)

	// kokoro
	c.model.kokoro.model = C.CString(config.Model.Kokoro.Model)
	defer C.free(unsafe.Pointer(c.model.kokoro.model))

	c.model.kokoro.voices = C.CString(config.Model.Kokoro.Voices)
	defer C.free(unsafe.Pointer(c.model.kokoro.voices))

	c.model.kokoro.tokens = C.CString(config.Model.Kokoro.Tokens)
	defer C.free(unsafe.Pointer(c.model.kokoro.tokens))

	c.model.kokoro.data_dir = C.CString(config.Model.Kokoro.DataDir)
	defer C.free(unsafe.Pointer(c.model.kokoro.data_dir))

	c.model.kokoro.lexicon = C.CString(config.Model.Kokoro.Lexicon)
	defer C.free(unsafe.Pointer(c.model.kokoro.lexicon))

	c.model.kokoro.lang = C.CString(config.Model.Kokoro.Lang)
	defer C.free(unsafe.Pointer(c.model.kokoro.lang))

	c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale)

	// kitten
	c.model.kitten.model = C.CString(config.Model.Kitten.Model)
	defer C.free(unsafe.Pointer(c.model.kitten.model))

	c.model.kitten.voices = C.CString(config.Model.Kitten.Voices)
	defer C.free(unsafe.Pointer(c.model.kitten.voices))

	c.model.kitten.tokens = C.CString(config.Model.Kitten.Tokens)
	defer C.free(unsafe.Pointer(c.model.kitten.tokens))

	c.model.kitten.data_dir = C.CString(config.Model.Kitten.DataDir)
	defer C.free(unsafe.Pointer(c.model.kitten.data_dir))

	c.model.kitten.length_scale = C.float(config.Model.Kitten.LengthScale)

	// zipvoice
	c.model.zipvoice.tokens = C.CString(config.Model.Zipvoice.Tokens)
	defer C.free(unsafe.Pointer(c.model.zipvoice.tokens))

	c.model.zipvoice.encoder = C.CString(config.Model.Zipvoice.Encoder)
	defer C.free(unsafe.Pointer(c.model.zipvoice.encoder))

	c.model.zipvoice.decoder = C.CString(config.Model.Zipvoice.Decoder)
	defer C.free(unsafe.Pointer(c.model.zipvoice.decoder))

	c.model.zipvoice.vocoder = C.CString(config.Model.Zipvoice.Vocoder)
	defer C.free(unsafe.Pointer(c.model.zipvoice.vocoder))

	c.model.zipvoice.data_dir = C.CString(config.Model.Zipvoice.DataDir)
	defer C.free(unsafe.Pointer(c.model.zipvoice.data_dir))

	c.model.zipvoice.lexicon = C.CString(config.Model.Zipvoice.Lexicon)
	defer C.free(unsafe.Pointer(c.model.zipvoice.lexicon))

	c.model.zipvoice.feat_scale = C.float(config.Model.Zipvoice.FeatScale)
	c.model.zipvoice.t_shift = C.float(config.Model.Zipvoice.TShift)
	c.model.zipvoice.target_rms = C.float(config.Model.Zipvoice.TargetRms)
	c.model.zipvoice.guidance_scale = C.float(config.Model.Zipvoice.GuidanceScale)

	// pocket
	c.model.pocket.lm_flow = C.CString(config.Model.Pocket.LmFlow)
	defer C.free(unsafe.Pointer(c.model.pocket.lm_flow))

	c.model.pocket.lm_main = C.CString(config.Model.Pocket.LmMain)
	defer C.free(unsafe.Pointer(c.model.pocket.lm_main))

	c.model.pocket.encoder = C.CString(config.Model.Pocket.Encoder)
	defer C.free(unsafe.Pointer(c.model.pocket.encoder))

	c.model.pocket.decoder = C.CString(config.Model.Pocket.Decoder)
	defer C.free(unsafe.Pointer(c.model.pocket.decoder))

	c.model.pocket.text_conditioner = C.CString(config.Model.Pocket.TextConditioner)
	defer C.free(unsafe.Pointer(c.model.pocket.text_conditioner))

	c.model.pocket.vocab_json = C.CString(config.Model.Pocket.VocabJson)
	defer C.free(unsafe.Pointer(c.model.pocket.vocab_json))

	c.model.pocket.token_scores_json = C.CString(config.Model.Pocket.TokenScoresJson)
	defer C.free(unsafe.Pointer(c.model.pocket.token_scores_json))

	c.model.pocket.voice_embedding_cache_capacity = C.int(config.Model.Pocket.VoiceEmbeddingCacheCapacity)

	// supertonic
	c.model.supertonic.duration_predictor = C.CString(config.Model.Supertonic.DurationPredictor)
	defer C.free(unsafe.Pointer(c.model.supertonic.duration_predictor))

	c.model.supertonic.text_encoder = C.CString(config.Model.Supertonic.TextEncoder)
	defer C.free(unsafe.Pointer(c.model.supertonic.text_encoder))

	c.model.supertonic.vector_estimator = C.CString(config.Model.Supertonic.VectorEstimator)
	defer C.free(unsafe.Pointer(c.model.supertonic.vector_estimator))

	c.model.supertonic.vocoder = C.CString(config.Model.Supertonic.Vocoder)
	defer C.free(unsafe.Pointer(c.model.supertonic.vocoder))

	c.model.supertonic.tts_json = C.CString(config.Model.Supertonic.TtsJson)
	defer C.free(unsafe.Pointer(c.model.supertonic.tts_json))

	c.model.supertonic.unicode_indexer = C.CString(config.Model.Supertonic.UnicodeIndexer)
	defer C.free(unsafe.Pointer(c.model.supertonic.unicode_indexer))

	c.model.supertonic.voice_style = C.CString(config.Model.Supertonic.VoiceStyle)
	defer C.free(unsafe.Pointer(c.model.supertonic.voice_style))

	c.model.num_threads = C.int(config.Model.NumThreads)
	c.model.debug = C.int(config.Model.Debug)

	c.model.provider = C.CString(config.Model.Provider)
	defer C.free(unsafe.Pointer(c.model.provider))

	impl := C.SherpaOnnxCreateOfflineTts(&c)
	if impl == nil {
		return nil
	}
	tts := &OfflineTts{}
	tts.impl = impl
	return tts
}

func (tts *OfflineTts) NumSpeakers() int {
	return int(C.SherpaOnnxOfflineTtsNumSpeakers(tts.impl))
}

func (tts *OfflineTts) SampleRate() int {
	return int(C.SherpaOnnxOfflineTtsSampleRate(tts.impl))
}

func (tts *OfflineTts) Generate(text string, sid int, speed float32) *GeneratedAudio {
	s := C.CString(text)
	defer C.free(unsafe.Pointer(s))

	audio := C.SherpaOnnxOfflineTtsGenerate(tts.impl, s, C.int(sid), C.float(speed))

	if audio == nil {
		return nil
	}

	defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)

	ans := &GeneratedAudio{}
	ans.SampleRate = int(audio.sample_rate)
	n := int(audio.n)
	ans.Samples = make([]float32, n)

	// see https://stackoverflow.com/questions/48756732/what-does-1-30c-yourtype-do-exactly-in-cgo
	// :n:n means 0:n:n, means low:high:capacity
	samples := unsafe.Slice(
		(*float32)(unsafe.Pointer(audio.samples)),
		n,
	)

	copy(ans.Samples, samples)

	return ans
}

// Deprecated: Use GenerateWithConfig() instead.
func (tts *OfflineTts) GenerateWithZipvoice(
	text, promptText string,
	promptSamples []float32,
	promptSampleRate int,
	speed float32,
	numSteps int,
) *GeneratedAudio {

	cText := C.CString(text)
	defer C.free(unsafe.Pointer(cText))

	cPromptText := C.CString(promptText)
	defer C.free(unsafe.Pointer(cPromptText))

	var p *C.float
	var n C.int
	if len(promptSamples) > 0 {
		p = (*C.float)(unsafe.Pointer(&promptSamples[0]))
		n = C.int(len(promptSamples))
	}

	audio := C.SherpaOnnxOfflineTtsGenerateWithZipvoice(
		tts.impl,
		cText,
		cPromptText,
		p,
		n,
		C.int(promptSampleRate),
		C.float(speed),
		C.int(numSteps),
	)
	if audio == nil {
		return nil
	}
	defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)

	nn := int(audio.n)
	arr := unsafe.Slice(
		(*float32)(unsafe.Pointer(audio.samples)),
		nn,
	)

	ans := &GeneratedAudio{
		SampleRate: int(audio.sample_rate),
		Samples:    make([]float32, nn),
	}
	copy(ans.Samples, arr)

	return ans
}

func (tts *OfflineTts) GenerateWithCallback(
	text string,
	sid int,
	speed float32,
	cb sherpaOnnxGeneratedAudioCallbackWithArg,
) *GeneratedAudio {

	s := C.CString(text)
	defer C.free(unsafe.Pointer(s))

	var audio *C.struct_SherpaOnnxGeneratedAudio

	if cb != nil {
		h := cgo.NewHandle(cb)
		defer h.Delete()

		audio = C.SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
			tts.impl,
			s,
			C.int(sid),
			C.float(speed),
			C.SherpaOnnxGeneratedAudioCallbackWithArg(C._cgoGeneratedAudioCallback),
			unsafe.Pointer(&h),
		)
	} else {
		audio = C.SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
			tts.impl,
			s,
			C.int(sid),
			C.float(speed),
			nil,
			nil,
		)
	}

	if audio == nil {
		return nil
	}
	defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)

	n := int(audio.n)
	samples := unsafe.Slice(
		(*float32)(unsafe.Pointer(audio.samples)),
		n,
	)

	ans := &GeneratedAudio{
		SampleRate: int(audio.sample_rate),
		Samples:    make([]float32, n),
	}
	copy(ans.Samples, samples)

	return ans
}

func (tts *OfflineTts) GenerateWithProgressCallback(
	text string,
	sid int,
	speed float32,
	cb sherpaOnnxGeneratedAudioProgressCallbackWithArg,
) *GeneratedAudio {
	s := C.CString(text)
	defer C.free(unsafe.Pointer(s))

	var audio *C.struct_SherpaOnnxGeneratedAudio

	if cb != nil {
		h := cgo.NewHandle(cb)
		defer h.Delete()

		audio = C.SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
			tts.impl,
			s,
			C.int(sid),
			C.float(speed),
			C.SherpaOnnxGeneratedAudioProgressCallbackWithArg(
				C._cgoGeneratedAudioProgressCallback,
			),
			unsafe.Pointer(&h),
		)
	} else {
		audio = C.SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
			tts.impl,
			s,
			C.int(sid),
			C.float(speed),
			nil,
			nil,
		)
	}

	if audio == nil {
		return nil
	}
	defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)

	n := int(audio.n)
	samples := unsafe.Slice(
		(*float32)(unsafe.Pointer(audio.samples)),
		n,
	)

	ans := &GeneratedAudio{
		SampleRate: int(audio.sample_rate),
		Samples:    make([]float32, n),
	}
	copy(ans.Samples, samples)

	return ans
}

func (tts *OfflineTts) GenerateWithConfig(
	text string,
	cfg *GenerationConfig,
	cb sherpaOnnxGeneratedAudioProgressCallbackWithArg,
) *GeneratedAudio {
	if cfg == nil {
		cfg = &GenerationConfig{}
	}

	cText := C.CString(text)
	defer C.free(unsafe.Pointer(cText))

	var cCfg C.struct_SherpaOnnxGenerationConfig
	cCfg.silence_scale = C.float(cfg.SilenceScale)
	cCfg.speed = C.float(cfg.Speed)
	cCfg.sid = C.int(cfg.Sid)
	cCfg.num_steps = C.int(cfg.NumSteps)

	var cReferenceAudio *C.float
	if len(cfg.ReferenceAudio) > 0 {
		cReferenceAudio = (*C.float)(C.malloc(C.size_t(len(cfg.ReferenceAudio)) * C.size_t(unsafe.Sizeof(C.float(0)))))
		slice := (*[1 << 30]C.float)(unsafe.Pointer(cReferenceAudio))[:len(cfg.ReferenceAudio):len(cfg.ReferenceAudio)]
		for i, v := range cfg.ReferenceAudio {
			slice[i] = C.float(v)
		}
		cCfg.reference_audio = cReferenceAudio
		cCfg.reference_audio_len = C.int(len(cfg.ReferenceAudio))
		cCfg.reference_sample_rate = C.int(cfg.ReferenceSampleRate)
		defer C.free(unsafe.Pointer(cReferenceAudio)) // free after use
	}

	// Reference text
	if cfg.ReferenceText != "" {
		cCfg.reference_text = C.CString(cfg.ReferenceText)
		defer C.free(unsafe.Pointer(cCfg.reference_text))
	}

	var cExtra *C.char

	if len(cfg.Extra) > 0 {
		cExtra = C.CString(string(cfg.Extra)) // copy Go slice to C memory
		defer C.free(unsafe.Pointer(cExtra))  // free after use
	}

	cCfg.extra = cExtra

	var audio *C.struct_SherpaOnnxGeneratedAudio
	if cb != nil {
		h := cgo.NewHandle(cb)
		defer h.Delete()

		audio = C.SherpaOnnxOfflineTtsGenerateWithConfig(
			tts.impl,
			cText,
			&cCfg,
			C.SherpaOnnxGeneratedAudioProgressCallbackWithArg(
				C._cgoGeneratedAudioProgressCallback,
			),
			unsafe.Pointer(&h),
		)
	} else {
		audio = C.SherpaOnnxOfflineTtsGenerateWithConfig(
			tts.impl,
			cText,
			&cCfg,
			nil,
			nil,
		)
	}

	if audio == nil {
		return nil
	}
	defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)

	n := int(audio.n)
	arr := unsafe.Slice(
		(*float32)(unsafe.Pointer(audio.samples)),
		n,
	)

	ans := &GeneratedAudio{
		SampleRate: int(audio.sample_rate),
		Samples:    make([]float32, n),
	}
	copy(ans.Samples, arr)

	return ans
}

func (audio *GeneratedAudio) Save(filename string) bool {
	s := C.CString(filename)
	defer C.free(unsafe.Pointer(s))

	ok := int(C.SherpaOnnxWriteWave((*C.float)(&audio.Samples[0]), C.int(len(audio.Samples)), C.int(audio.SampleRate), s))

	return ok == 1
}

func (audio *GeneratedAudio) ToBuffer() []byte {
	// Similar to Save(): it writes the wave to an allocated buffer;
	// Uses the C API: SHERPA_ONNX_API void SherpaOnnxWriteWaveToBuffer(const float *samples, int32_t n, int32_t sample_rate, char *buffer);
	n := len(audio.Samples)
	if n == 0 {
		return nil
	}
	fs := C.SherpaOnnxWaveFileSize(C.int(n)) // SHERPA_ONNX_API int64_t SherpaOnnxWaveFileSize(int32_t n_samples);
	buf := make([]byte, fs)
	C.SherpaOnnxWriteWaveToBuffer((*C.float)(&audio.Samples[0]), C.int(n), C.int(audio.SampleRate), (*C.char)(unsafe.Pointer(&buf[0])))
	return buf
}

// ============================================================
// For VAD
// ============================================================
type SileroVadModelConfig struct {
	Model              string
	Threshold          float32
	MinSilenceDuration float32
	MinSpeechDuration  float32
	WindowSize         int
	MaxSpeechDuration  float32
}

type TenVadModelConfig struct {
	Model              string
	Threshold          float32
	MinSilenceDuration float32
	MinSpeechDuration  float32
	WindowSize         int
	MaxSpeechDuration  float32
}

type VadModelConfig struct {
	SileroVad  SileroVadModelConfig
	TenVad     TenVadModelConfig
	SampleRate int
	NumThreads int
	Provider   string
	Debug      int
}

type CircularBuffer struct {
	impl *C.struct_SherpaOnnxCircularBuffer
}

func DeleteCircularBuffer(buffer *CircularBuffer) {
	C.SherpaOnnxDestroyCircularBuffer(buffer.impl)
	buffer.impl = nil
}

func NewCircularBuffer(capacity int) *CircularBuffer {
	circularBuffer := &CircularBuffer{}
	circularBuffer.impl = C.SherpaOnnxCreateCircularBuffer(C.int(capacity))
	return circularBuffer
}

func (buffer *CircularBuffer) Push(samples []float32) {
	C.SherpaOnnxCircularBufferPush(buffer.impl, (*C.float)(&samples[0]), C.int(len(samples)))
}

func (buffer *CircularBuffer) Get(start int, n int) []float32 {
	samples := C.SherpaOnnxCircularBufferGet(buffer.impl, C.int(start), C.int(n))
	defer C.SherpaOnnxCircularBufferFree(samples)

	result := make([]float32, n)

	p := unsafe.Slice(samples, n)
	for i := 0; i < n; i++ {
		result[i] = float32(p[i])
	}

	return result
}

func (buffer *CircularBuffer) Pop(n int) {
	C.SherpaOnnxCircularBufferPop(buffer.impl, C.int(n))
}

func (buffer *CircularBuffer) Size() int {
	return int(C.SherpaOnnxCircularBufferSize(buffer.impl))
}

func (buffer *CircularBuffer) Head() int {
	return int(C.SherpaOnnxCircularBufferHead(buffer.impl))
}

func (buffer *CircularBuffer) Reset() {
	C.SherpaOnnxCircularBufferReset(buffer.impl)
}

type SpeechSegment struct {
	Start   int
	Samples []float32
}

type VoiceActivityDetector struct {
	impl *C.struct_SherpaOnnxVoiceActivityDetector
}

func NewVoiceActivityDetector(config *VadModelConfig, bufferSizeInSeconds float32) *VoiceActivityDetector {
	c := C.struct_SherpaOnnxVadModelConfig{}

	c.silero_vad.model = C.CString(config.SileroVad.Model)
	defer C.free(unsafe.Pointer(c.silero_vad.model))

	c.silero_vad.threshold = C.float(config.SileroVad.Threshold)
	c.silero_vad.min_silence_duration = C.float(config.SileroVad.MinSilenceDuration)
	c.silero_vad.min_speech_duration = C.float(config.SileroVad.MinSpeechDuration)
	c.silero_vad.window_size = C.int(config.SileroVad.WindowSize)
	c.silero_vad.max_speech_duration = C.float(config.SileroVad.MaxSpeechDuration)

	c.ten_vad.model = C.CString(config.TenVad.Model)
	defer C.free(unsafe.Pointer(c.ten_vad.model))

	c.ten_vad.threshold = C.float(config.TenVad.Threshold)
	c.ten_vad.min_silence_duration = C.float(config.TenVad.MinSilenceDuration)
	c.ten_vad.min_speech_duration = C.float(config.TenVad.MinSpeechDuration)
	c.ten_vad.window_size = C.int(config.TenVad.WindowSize)
	c.ten_vad.max_speech_duration = C.float(config.TenVad.MaxSpeechDuration)

	c.sample_rate = C.int(config.SampleRate)
	c.num_threads = C.int(config.NumThreads)
	c.provider = C.CString(config.Provider)
	defer C.free(unsafe.Pointer(c.provider))

	c.debug = C.int(config.Debug)

	impl := C.SherpaOnnxCreateVoiceActivityDetector(&c, C.float(bufferSizeInSeconds))
	if impl == nil {
		return nil
	}
	vad := &VoiceActivityDetector{}
	vad.impl = impl
	return vad
}

func DeleteVoiceActivityDetector(vad *VoiceActivityDetector) {
	C.SherpaOnnxDestroyVoiceActivityDetector(vad.impl)
	vad.impl = nil
}

func (vad *VoiceActivityDetector) AcceptWaveform(samples []float32) {
	C.SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad.impl, (*C.float)(&samples[0]), C.int(len(samples)))
}

func (vad *VoiceActivityDetector) IsEmpty() bool {
	return int(C.SherpaOnnxVoiceActivityDetectorEmpty(vad.impl)) == 1
}

func (vad *VoiceActivityDetector) IsSpeech() bool {
	return int(C.SherpaOnnxVoiceActivityDetectorDetected(vad.impl)) == 1
}

func (vad *VoiceActivityDetector) Pop() {
	C.SherpaOnnxVoiceActivityDetectorPop(vad.impl)
}

func (vad *VoiceActivityDetector) Clear() {
	C.SherpaOnnxVoiceActivityDetectorClear(vad.impl)
}

func (vad *VoiceActivityDetector) Front() *SpeechSegment {
	f := C.SherpaOnnxVoiceActivityDetectorFront(vad.impl)
	defer C.SherpaOnnxDestroySpeechSegment(f)

	ans := &SpeechSegment{}
	ans.Start = int(f.start)

	n := int(f.n)
	ans.Samples = make([]float32, n)

	samples := unsafe.Slice(f.samples, n)

	for i := 0; i < n; i++ {
		ans.Samples[i] = float32(samples[i])
	}

	return ans
}

func (vad *VoiceActivityDetector) Reset() {
	C.SherpaOnnxVoiceActivityDetectorReset(vad.impl)
}

func (vad *VoiceActivityDetector) Flush() {
	C.SherpaOnnxVoiceActivityDetectorFlush(vad.impl)
}

// Spoken language identification

type SpokenLanguageIdentificationWhisperConfig struct {
	Encoder      string
	Decoder      string
	TailPaddings int
}

type SpokenLanguageIdentificationConfig struct {
	Whisper    SpokenLanguageIdentificationWhisperConfig
	NumThreads int
	Debug      int
	Provider   string
}

type SpokenLanguageIdentification struct {
	impl *C.struct_SherpaOnnxSpokenLanguageIdentification
}

type SpokenLanguageIdentificationResult struct {
	Lang string
}

func NewSpokenLanguageIdentification(config *SpokenLanguageIdentificationConfig) *SpokenLanguageIdentification {
	c := C.struct_SherpaOnnxSpokenLanguageIdentificationConfig{}

	c.whisper.encoder = C.CString(config.Whisper.Encoder)
	defer C.free(unsafe.Pointer(c.whisper.encoder))

	c.whisper.decoder = C.CString(config.Whisper.Decoder)
	defer C.free(unsafe.Pointer(c.whisper.decoder))

	c.whisper.tail_paddings = C.int(config.Whisper.TailPaddings)

	c.num_threads = C.int(config.NumThreads)
	c.debug = C.int(config.Debug)

	c.provider = C.CString(config.Provider)
	defer C.free(unsafe.Pointer(c.provider))

	slid := &SpokenLanguageIdentification{}
	slid.impl = C.SherpaOnnxCreateSpokenLanguageIdentification(&c)

	return slid
}

func DeleteSpokenLanguageIdentification(slid *SpokenLanguageIdentification) {
	C.SherpaOnnxDestroySpokenLanguageIdentification(slid.impl)
	slid.impl = nil
}

// The user has to invoke DeleteOfflineStream() to free the returned value
// to avoid memory leak
func (slid *SpokenLanguageIdentification) CreateStream() *OfflineStream {
	stream := &OfflineStream{}
	stream.impl = C.SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid.impl)
	return stream
}

func (slid *SpokenLanguageIdentification) Compute(stream *OfflineStream) *SpokenLanguageIdentificationResult {
	r := C.SherpaOnnxSpokenLanguageIdentificationCompute(slid.impl, stream.impl)
	// defer C.SherpaOnnxDestroySpokenLanguageIdentificationResult(r)

	ans := &SpokenLanguageIdentificationResult{}
	ans.Lang = C.GoString(r.lang)

	return ans
}

// ============================================================
// For speaker embedding extraction
// ============================================================

type SpeakerEmbeddingExtractorConfig struct {
	Model      string
	NumThreads int
	Debug      int
	Provider   string
}

type SpeakerEmbeddingExtractor struct {
	impl *C.struct_SherpaOnnxSpeakerEmbeddingExtractor
}

// The user has to invoke [DeleteSpeakerEmbeddingExtractor]() to free the returned value
// to avoid memory leak
func NewSpeakerEmbeddingExtractor(config *SpeakerEmbeddingExtractorConfig) *SpeakerEmbeddingExtractor {
	c := C.struct_SherpaOnnxSpeakerEmbeddingExtractorConfig{}

	c.model = C.CString(config.Model)
	defer C.free(unsafe.Pointer(c.model))

	c.num_threads = C.int(config.NumThreads)
	c.debug = C.int(config.Debug)

	c.provider = C.CString(config.Provider)
	defer C.free(unsafe.Pointer(c.provider))

	impl := C.SherpaOnnxCreateSpeakerEmbeddingExtractor(&c)
	if impl == nil {
		return nil
	}
	ex := &SpeakerEmbeddingExtractor{}
	ex.impl = impl
	return ex
}

func DeleteSpeakerEmbeddingExtractor(ex *SpeakerEmbeddingExtractor) {
	C.SherpaOnnxDestroySpeakerEmbeddingExtractor(ex.impl)
	ex.impl = nil
}

func (ex *SpeakerEmbeddingExtractor) Dim() int {
	return int(C.SherpaOnnxSpeakerEmbeddingExtractorDim(ex.impl))
}

// The user is responsible to invoke [DeleteOnlineStream]() to free
// the returned stream to avoid memory leak
func (ex *SpeakerEmbeddingExtractor) CreateStream() *OnlineStream {
	stream := &OnlineStream{}
	stream.impl = C.SherpaOnnxSpeakerEmbeddingExtractorCreateStream(ex.impl)
	return stream
}

func (ex *SpeakerEmbeddingExtractor) IsReady(stream *OnlineStream) bool {
	return int(C.SherpaOnnxSpeakerEmbeddingExtractorIsReady(ex.impl, stream.impl)) == 1
}

func (ex *SpeakerEmbeddingExtractor) Compute(stream *OnlineStream) []float32 {
	embedding := C.SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(ex.impl, stream.impl)
	defer C.SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(embedding)

	n := ex.Dim()
	ans := make([]float32, n)

	// see https://stackoverflow.com/questions/48756732/what-does-1-30c-yourtype-do-exactly-in-cgo
	// :n:n means 0:n:n, means low:high:capacity
	c := unsafe.Slice(embedding, n)

	for i := 0; i < n; i++ {
		ans[i] = float32(c[i])
	}

	return ans
}

type SpeakerEmbeddingManager struct {
	impl *C.struct_SherpaOnnxSpeakerEmbeddingManager
}

// The user has to invoke [DeleteSpeakerEmbeddingManager]() to free the returned
// value to avoid memory leak
func NewSpeakerEmbeddingManager(dim int) *SpeakerEmbeddingManager {
	impl := C.SherpaOnnxCreateSpeakerEmbeddingManager(C.int(dim))
	if impl == nil {
		return nil
	}
	m := &SpeakerEmbeddingManager{}
	m.impl = impl
	return m
}

func DeleteSpeakerEmbeddingManager(m *SpeakerEmbeddingManager) {
	C.SherpaOnnxDestroySpeakerEmbeddingManager(m.impl)
	m.impl = nil
}

func (m *SpeakerEmbeddingManager) Register(name string, embedding []float32) bool {
	s := C.CString(name)
	defer C.free(unsafe.Pointer(s))

	return C.int(C.SherpaOnnxSpeakerEmbeddingManagerAdd(m.impl, s, (*C.float)(&embedding[0]))) == 1
}

func (m *SpeakerEmbeddingManager) RegisterV(name string, embeddings [][]float32) bool {
	s := C.CString(name)
	defer C.free(unsafe.Pointer(s))

	if len(embeddings) == 0 {
		return false
	}

	dim := len(embeddings[0])
	v := make([]float32, 0, dim*len(embeddings))
	for _, embedding := range embeddings {
		v = append(v, embedding...)
	}

	return C.int(C.SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(m.impl, s, (*C.float)(&v[0]), C.int(len(embeddings)))) == 1
}

func (m *SpeakerEmbeddingManager) Remove(name string) bool {
	s := C.CString(name)
	defer C.free(unsafe.Pointer(s))

	return C.int(C.SherpaOnnxSpeakerEmbeddingManagerRemove(m.impl, s)) == 1
}

func (m *SpeakerEmbeddingManager) Search(embedding []float32, threshold float32) string {
	var s string

	name := C.SherpaOnnxSpeakerEmbeddingManagerSearch(m.impl, (*C.float)(&embedding[0]), C.float(threshold))
	defer C.SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name)

	if name != nil {
		s = C.GoString(name)
	}

	return s
}

func (m *SpeakerEmbeddingManager) Verify(name string, embedding []float32, threshold float32) bool {
	s := C.CString(name)
	defer C.free(unsafe.Pointer(s))

	return C.int(C.SherpaOnnxSpeakerEmbeddingManagerVerify(m.impl, s, (*C.float)(&embedding[0]), C.float(threshold))) == 1
}

func (m *SpeakerEmbeddingManager) Contains(name string) bool {
	s := C.CString(name)
	defer C.free(unsafe.Pointer(s))

	return C.int(C.SherpaOnnxSpeakerEmbeddingManagerContains(m.impl, s)) == 1
}

func (m *SpeakerEmbeddingManager) NumSpeakers() int {
	return int(C.SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(m.impl))
}

func (m *SpeakerEmbeddingManager) AllSpeakers() []string {
	all_speakers := C.SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(m.impl)
	defer C.SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speakers)

	n := m.NumSpeakers()
	if n == 0 {
		return nil
	}

	// https://stackoverflow.com/questions/62012070/convert-array-of-strings-from-cgo-in-go
	p := unsafe.Slice(all_speakers, n)

	ans := make([]string, n)

	for i := 0; i < n; i++ {
		ans[i] = C.GoString(p[i])
	}

	return ans
}

// Wave

// single channel wave
type Wave = GeneratedAudio

func ReadWave(filename string) *Wave {
	s := C.CString(filename)
	defer C.free(unsafe.Pointer(s))

	w := C.SherpaOnnxReadWave(s)
	defer C.SherpaOnnxFreeWave(w)

	if w == nil {
		return nil
	}

	n := int(w.num_samples)
	if n == 0 {
		return nil
	}

	ans := &Wave{}
	ans.SampleRate = int(w.sample_rate)
	samples := unsafe.Slice(w.samples, n)

	ans.Samples = make([]float32, n)

	for i := 0; i < n; i++ {
		ans.Samples[i] = float32(samples[i])
	}

	return ans
}

// ============================================================
// For offline speaker diarization
// ============================================================
type OfflineSpeakerSegmentationPyannoteModelConfig struct {
	Model string
}

type OfflineSpeakerSegmentationModelConfig struct {
	Pyannote   OfflineSpeakerSegmentationPyannoteModelConfig
	NumThreads int
	Debug      int
	Provider   string
}

type FastClusteringConfig struct {
	NumClusters int
	Threshold   float32
}

type OfflineSpeakerDiarizationConfig struct {
	Segmentation   OfflineSpeakerSegmentationModelConfig
	Embedding      SpeakerEmbeddingExtractorConfig
	Clustering     FastClusteringConfig
	MinDurationOn  float32
	MinDurationOff float32
}

type OfflineSpeakerDiarization struct {
	impl *C.struct_SherpaOnnxOfflineSpeakerDiarization
}

func DeleteOfflineSpeakerDiarization(sd *OfflineSpeakerDiarization) {
	C.SherpaOnnxDestroyOfflineSpeakerDiarization(sd.impl)
	sd.impl = nil
}

func NewOfflineSpeakerDiarization(config *OfflineSpeakerDiarizationConfig) *OfflineSpeakerDiarization {
	c := C.struct_SherpaOnnxOfflineSpeakerDiarizationConfig{}
	c.segmentation.pyannote.model = C.CString(config.Segmentation.Pyannote.Model)
	defer C.free(unsafe.Pointer(c.segmentation.pyannote.model))

	c.segmentation.num_threads = C.int(config.Segmentation.NumThreads)

	c.segmentation.debug = C.int(config.Segmentation.Debug)

	c.segmentation.provider = C.CString(config.Segmentation.Provider)
	defer C.free(unsafe.Pointer(c.segmentation.provider))

	c.embedding.model = C.CString(config.Embedding.Model)
	defer C.free(unsafe.Pointer(c.embedding.model))

	c.embedding.num_threads = C.int(config.Embedding.NumThreads)

	c.embedding.debug = C.int(config.Embedding.Debug)

	c.embedding.provider = C.CString(config.Embedding.Provider)
	defer C.free(unsafe.Pointer(c.embedding.provider))

	c.clustering.num_clusters = C.int(config.Clustering.NumClusters)
	c.clustering.threshold = C.float(config.Clustering.Threshold)
	c.min_duration_on = C.float(config.MinDurationOn)
	c.min_duration_off = C.float(config.MinDurationOff)

	p := C.SherpaOnnxCreateOfflineSpeakerDiarization(&c)

	if p == nil {
		return nil
	}

	sd := &OfflineSpeakerDiarization{}
	sd.impl = p

	return sd
}

func (sd *OfflineSpeakerDiarization) SampleRate() int {
	return int(C.SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(sd.impl))
}

// only config.Clustering is used. All other fields are ignored
func (sd *OfflineSpeakerDiarization) SetConfig(config *OfflineSpeakerDiarizationConfig) {
	c := C.struct_SherpaOnnxOfflineSpeakerDiarizationConfig{}

	c.clustering.num_clusters = C.int(config.Clustering.NumClusters)
	c.clustering.threshold = C.float(config.Clustering.Threshold)

	C.SherpaOnnxOfflineSpeakerDiarizationSetConfig(sd.impl, &c)
}

type OfflineSpeakerDiarizationSegment struct {
	Start   float32
	End     float32
	Speaker int
}

func (sd *OfflineSpeakerDiarization) Process(samples []float32) []OfflineSpeakerDiarizationSegment {
	r := C.SherpaOnnxOfflineSpeakerDiarizationProcess(sd.impl, (*C.float)(&samples[0]), C.int(len(samples)))
	defer C.SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r)

	n := int(C.SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r))

	if n == 0 {
		return nil
	}

	s := C.SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(r)
	defer C.SherpaOnnxOfflineSpeakerDiarizationDestroySegment(s)

	ans := make([]OfflineSpeakerDiarizationSegment, n)

	p := unsafe.Slice(s, n)

	for i := 0; i < n; i++ {
		ans[i].Start = float32(p[i].start)
		ans[i].End = float32(p[i].end)
		ans[i].Speaker = int(p[i].speaker)
	}

	return ans
}

// ============================================================
// For punctuation
// ============================================================
type OfflinePunctuationModelConfig struct {
	CtTransformer string
	NumThreads    int
	Debug         int // true to print debug information of the model
	Provider      string
}

type OfflinePunctuationConfig struct {
	Model OfflinePunctuationModelConfig
}

type OfflinePunctuation struct {
	impl *C.struct_SherpaOnnxOfflinePunctuation
}

func NewOfflinePunctuation(config *OfflinePunctuationConfig) *OfflinePunctuation {
	cfg := C.struct_SherpaOnnxOfflinePunctuationConfig{}
	cfg.model.ct_transformer = C.CString(config.Model.CtTransformer)
	defer C.free(unsafe.Pointer(cfg.model.ct_transformer))

	cfg.model.num_threads = C.int(config.Model.NumThreads)
	cfg.model.debug = C.int(config.Model.Debug)
	cfg.model.provider = C.CString(config.Model.Provider)
	defer C.free(unsafe.Pointer(cfg.model.provider))

	impl := C.SherpaOnnxCreateOfflinePunctuation(&cfg)
	if impl == nil {
		return nil
	}
	punc := &OfflinePunctuation{}
	punc.impl = impl
	return punc
}

func DeleteOfflinePunc(punc *OfflinePunctuation) {
	C.SherpaOnnxDestroyOfflinePunctuation(punc.impl)
	punc.impl = nil
}

func (punc *OfflinePunctuation) AddPunct(text string) string {
	inputText := C.CString(text)
	defer C.free(unsafe.Pointer(inputText))
	p := C.SherpaOfflinePunctuationAddPunct(punc.impl, inputText)
	if p == nil {
		return ""
	}
	defer C.SherpaOfflinePunctuationFreeText(p)

	text_with_punct := C.GoString(p)

	return text_with_punct
}

type OnlinePunctuationModelConfig struct {
	CnnBilstm  string
	BpeVocab   string
	NumThreads int
	Debug      int
	Provider   string
}

type OnlinePunctuationConfig struct {
	Model OnlinePunctuationModelConfig
}

type OnlinePunctuation struct {
	impl *C.struct_SherpaOnnxOnlinePunctuation
}

func NewOnlinePunctuation(config *OnlinePunctuationConfig) *OnlinePunctuation {
	cfg := C.struct_SherpaOnnxOnlinePunctuationConfig{}
	cfg.model.cnn_bilstm = C.CString(config.Model.CnnBilstm)
	defer C.free(unsafe.Pointer(cfg.model.cnn_bilstm))

	cfg.model.bpe_vocab = C.CString(config.Model.BpeVocab)
	defer C.free(unsafe.Pointer(cfg.model.bpe_vocab))

	cfg.model.num_threads = C.int(config.Model.NumThreads)
	cfg.model.debug = C.int(config.Model.Debug)
	cfg.model.provider = C.CString(config.Model.Provider)
	defer C.free(unsafe.Pointer(cfg.model.provider))

	impl := C.SherpaOnnxCreateOnlinePunctuation(&cfg)
	if impl == nil {
		return nil
	}
	punc := &OnlinePunctuation{}
	punc.impl = impl
	return punc
}

func DeleteOnlinePunctuation(punc *OnlinePunctuation) {
	C.SherpaOnnxDestroyOnlinePunctuation(punc.impl)
	punc.impl = nil
}

func (punc *OnlinePunctuation) AddPunct(text string) string {
	inputText := C.CString(text)
	defer C.free(unsafe.Pointer(inputText))

	p := C.SherpaOnnxOnlinePunctuationAddPunct(punc.impl, inputText)
	if p == nil {
		return ""
	}
	defer C.SherpaOnnxOnlinePunctuationFreeText(p)

	textWithPunct := C.GoString(p)

	return textWithPunct
}

// Configuration for the online/streaming recognizer.
type KeywordSpotterConfig struct {
	FeatConfig        FeatureConfig
	ModelConfig       OnlineModelConfig
	MaxActivePaths    int
	KeywordsFile      string
	KeywordsScore     float32
	KeywordsThreshold float32
	KeywordsBuf       string
	KeywordsBufSize   int
}

type KeywordSpotterResult struct {
	Keyword string
}

type KeywordSpotter struct {
	impl *C.struct_SherpaOnnxKeywordSpotter
}

// Free the internal pointer inside the recognizer to avoid memory leak.
func DeleteKeywordSpotter(spotter *KeywordSpotter) {
	C.SherpaOnnxDestroyKeywordSpotter(spotter.impl)
	spotter.impl = nil
}

// The user is responsible to invoke [DeleteKeywordSpotter]() to free
// the returned spotter to avoid memory leak
func NewKeywordSpotter(config *KeywordSpotterConfig) *KeywordSpotter {
	c := C.struct_SherpaOnnxKeywordSpotterConfig{}
	c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate)
	c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim)

	c.model_config.transducer.encoder = C.CString(config.ModelConfig.Transducer.Encoder)
	defer C.free(unsafe.Pointer(c.model_config.transducer.encoder))

	c.model_config.transducer.decoder = C.CString(config.ModelConfig.Transducer.Decoder)
	defer C.free(unsafe.Pointer(c.model_config.transducer.decoder))

	c.model_config.transducer.joiner = C.CString(config.ModelConfig.Transducer.Joiner)
	defer C.free(unsafe.Pointer(c.model_config.transducer.joiner))

	c.model_config.paraformer.encoder = C.CString(config.ModelConfig.Paraformer.Encoder)
	defer C.free(unsafe.Pointer(c.model_config.paraformer.encoder))

	c.model_config.paraformer.decoder = C.CString(config.ModelConfig.Paraformer.Decoder)
	defer C.free(unsafe.Pointer(c.model_config.paraformer.decoder))

	c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model)
	defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model))

	c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model)
	defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model))

	c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
	defer C.free(unsafe.Pointer(c.model_config.tokens))

	c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)

	c.model_config.provider = C.CString(config.ModelConfig.Provider)
	defer C.free(unsafe.Pointer(c.model_config.provider))

	c.model_config.debug = C.int(config.ModelConfig.Debug)

	c.model_config.model_type = C.CString(config.ModelConfig.ModelType)
	defer C.free(unsafe.Pointer(c.model_config.model_type))

	c.model_config.modeling_unit = C.CString(config.ModelConfig.ModelingUnit)
	defer C.free(unsafe.Pointer(c.model_config.modeling_unit))

	c.model_config.bpe_vocab = C.CString(config.ModelConfig.BpeVocab)
	defer C.free(unsafe.Pointer(c.model_config.bpe_vocab))

	c.model_config.tokens_buf = C.CString(config.ModelConfig.TokensBuf)
	defer C.free(unsafe.Pointer(c.model_config.tokens_buf))

	c.model_config.tokens_buf_size = C.int(config.ModelConfig.TokensBufSize)

	c.max_active_paths = C.int(config.MaxActivePaths)

	c.keywords_file = C.CString(config.KeywordsFile)
	defer C.free(unsafe.Pointer(c.keywords_file))

	c.keywords_score = C.float(config.KeywordsScore)

	c.keywords_threshold = C.float(config.KeywordsThreshold)

	c.keywords_buf = C.CString(config.KeywordsBuf)
	defer C.free(unsafe.Pointer(c.keywords_buf))

	c.keywords_buf_size = C.int(config.KeywordsBufSize)

	impl := C.SherpaOnnxCreateKeywordSpotter(&c)
	if impl == nil {
		return nil
	}
	spotter := &KeywordSpotter{}
	spotter.impl = impl
	return spotter
}

// The user is responsible to invoke [DeleteOnlineStream]() to free
// the returned stream to avoid memory leak
func NewKeywordStream(spotter *KeywordSpotter) *OnlineStream {
	stream := &OnlineStream{}
	stream.impl = C.SherpaOnnxCreateKeywordStream(spotter.impl)
	return stream
}

// The user is responsible to invoke [DeleteOnlineStream]() to free
// the returned stream to avoid memory leak
func NewKeywordStreamWithKeywords(spotter *KeywordSpotter, keywords string) *OnlineStream {
	stream := &OnlineStream{}

	s := C.CString(keywords)
	defer C.free(unsafe.Pointer(s))

	stream.impl = C.SherpaOnnxCreateKeywordStreamWithKeywords(spotter.impl, s)
	return stream
}

// Check whether the stream has enough feature frames for decoding.
// Return true if this stream is ready for decoding. Return false otherwise.
//
// You will usually use it like below:
//
//	for spotter.IsReady(s) {
//	   spotter.Decode(s)
//	}
func (spotter *KeywordSpotter) IsReady(s *OnlineStream) bool {
	return C.SherpaOnnxIsKeywordStreamReady(spotter.impl, s.impl) == 1
}

// Decode the stream. Before calling this function, you have to ensure
// that spotter.IsReady(s) returns true. Otherwise, you will be SAD.
//
// You usually use it like below:
//
//	for spotter.IsReady(s) {
//	  spotter.Decode(s)
//	}
func (spotter *KeywordSpotter) Decode(s *OnlineStream) {
	C.SherpaOnnxDecodeKeywordStream(spotter.impl, s.impl)
}

// You MUST call it right after detecting a keyword
func (spotter *KeywordSpotter) Reset(s *OnlineStream) {
	C.SherpaOnnxResetKeywordStream(spotter.impl, s.impl)
}

// Get the current result of stream since the last invoke of Reset()
func (spotter *KeywordSpotter) GetResult(s *OnlineStream) *KeywordSpotterResult {
	p := C.SherpaOnnxGetKeywordResult(spotter.impl, s.impl)
	defer C.SherpaOnnxDestroyKeywordResult(p)
	result := &KeywordSpotterResult{}
	result.Keyword = C.GoString(p.keyword)
	return result
}

// Configuration for the audio tagging.
type OfflineZipformerAudioTaggingModelConfig struct {
	Model string
}

type AudioTaggingModelConfig struct {
	Zipformer  OfflineZipformerAudioTaggingModelConfig
	Ced        string
	NumThreads int32
	Debug      int32
	Provider   string
}

type AudioTaggingConfig struct {
	Model  AudioTaggingModelConfig
	Labels string
	TopK   int32
}

type AudioTagging struct {
	impl *C.struct_SherpaOnnxAudioTagging
}

type AudioEvent struct {
	Name  string
	Index int
	Prob  float32
}

func DeleteAudioTagging(tagging *AudioTagging) {
	C.SherpaOnnxDestroyAudioTagging(tagging.impl)
	tagging.impl = nil
}

// The user is responsible to invoke [DeleteAudioTagging]() to free
// the returned tagger to avoid memory leak
func NewAudioTagging(config *AudioTaggingConfig) *AudioTagging {
	c := C.struct_SherpaOnnxAudioTaggingConfig{}

	c.model.zipformer.model = C.CString(config.Model.Zipformer.Model)
	defer C.free(unsafe.Pointer(c.model.zipformer.model))

	c.model.ced = C.CString(config.Model.Ced)
	defer C.free(unsafe.Pointer(c.model.ced))

	c.model.num_threads = C.int(config.Model.NumThreads)

	c.model.provider = C.CString(config.Model.Provider)
	defer C.free(unsafe.Pointer(c.model.provider))

	c.model.debug = C.int(config.Model.Debug)

	c.labels = C.CString(config.Labels)
	defer C.free(unsafe.Pointer(c.labels))

	c.top_k = C.int(config.TopK)

	impl := C.SherpaOnnxCreateAudioTagging(&c)
	if impl == nil {
		return nil
	}
	tagging := &AudioTagging{}
	tagging.impl = impl
	return tagging
}

// The user is responsible to invoke [DeleteOfflineStream]() to free
// the returned stream to avoid memory leak
func NewAudioTaggingStream(tagging *AudioTagging) *OfflineStream {
	stream := &OfflineStream{}
	stream.impl = C.SherpaOnnxAudioTaggingCreateOfflineStream(tagging.impl)
	return stream
}

func (tagging *AudioTagging) Compute(s *OfflineStream, topK int32) []AudioEvent {
	r := C.SherpaOnnxAudioTaggingCompute(tagging.impl, s.impl, C.int(topK))
	defer C.SherpaOnnxAudioTaggingFreeResults(r)
	result := make([]AudioEvent, 0)

	p := (*[1 << 25]*C.struct_SherpaOnnxAudioEvent)(unsafe.Pointer(r))
	i := 0
	for {
		if p[i] == nil {
			break
		}
		result = append(result, AudioEvent{
			Name:  C.GoString(p[i].name),
			Index: int(p[i].index),
			Prob:  float32(p[i].prob),
		})
		i += 1
	}
	return result
}

type OfflineSpeechDenoiserGtcrnModelConfig struct {
	Model string
}

type OfflineSpeechDenoiserDpdfNetModelConfig struct {
	Model string
}

type OfflineSpeechDenoiserModelConfig struct {
	Gtcrn      OfflineSpeechDenoiserGtcrnModelConfig
	DpdfNet    OfflineSpeechDenoiserDpdfNetModelConfig
	NumThreads int32
	Debug      int32
	Provider   string
}

type OfflineSpeechDenoiserConfig struct {
	Model OfflineSpeechDenoiserModelConfig
}

type OfflineSpeechDenoiser struct {
	impl *C.struct_SherpaOnnxOfflineSpeechDenoiser
}

type OnlineSpeechDenoiserConfig struct {
	Model OfflineSpeechDenoiserModelConfig
}

type OnlineSpeechDenoiser struct {
	impl *C.struct_SherpaOnnxOnlineSpeechDenoiser
}

type DenoisedAudio struct {
	// Normalized samples in the range [-1, 1]
	Samples []float32

	SampleRate int
}

func floatPointer(samples []float32) *C.float {
	if len(samples) == 0 {
		return nil
	}

	return (*C.float)(&samples[0])
}

func denoisedAudioFromPointer(audio *C.struct_SherpaOnnxDenoisedAudio) *DenoisedAudio {
	if audio == nil {
		return &DenoisedAudio{}
	}

	defer C.SherpaOnnxDestroyDenoisedAudio(audio)

	ans := &DenoisedAudio{}
	ans.SampleRate = int(audio.sample_rate)
	n := int(audio.n)
	ans.Samples = make([]float32, n)

	if n == 0 || audio.samples == nil {
		return ans
	}

	denoisedSamples := unsafe.Slice(audio.samples, n)
	for i := 0; i < n; i++ {
		ans.Samples[i] = float32(denoisedSamples[i])
	}

	return ans
}

// Free the internal pointer inside the OfflineSpeechDenoiser to avoid memory leak.
func DeleteOfflineSpeechDenoiser(sd *OfflineSpeechDenoiser) {
	C.SherpaOnnxDestroyOfflineSpeechDenoiser(sd.impl)
	sd.impl = nil
}

// The user is responsible to invoke [DeleteOfflineSpeechDenoiser]() to free
// the returned tts to avoid memory leak
func NewOfflineSpeechDenoiser(config *OfflineSpeechDenoiserConfig) *OfflineSpeechDenoiser {
	c := C.struct_SherpaOnnxOfflineSpeechDenoiserConfig{}
	c.model.gtcrn.model = C.CString(config.Model.Gtcrn.Model)
	defer C.free(unsafe.Pointer(c.model.gtcrn.model))
	c.model.dpdfnet.model = C.CString(config.Model.DpdfNet.Model)
	defer C.free(unsafe.Pointer(c.model.dpdfnet.model))

	c.model.num_threads = C.int(config.Model.NumThreads)
	c.model.debug = C.int(config.Model.Debug)

	c.model.provider = C.CString(config.Model.Provider)
	defer C.free(unsafe.Pointer(c.model.provider))

	impl := C.SherpaOnnxCreateOfflineSpeechDenoiser(&c)
	if impl == nil {
		return nil
	}

	sd := &OfflineSpeechDenoiser{}
	sd.impl = impl
	return sd
}

func (sd *OfflineSpeechDenoiser) Run(samples []float32, sampleRate int) *DenoisedAudio {
	audio := C.SherpaOnnxOfflineSpeechDenoiserRun(sd.impl, floatPointer(samples), C.int(len(samples)), C.int(sampleRate))
	return denoisedAudioFromPointer(audio)
}

func (audio *DenoisedAudio) Save(filename string) bool {
	s := C.CString(filename)
	defer C.free(unsafe.Pointer(s))

	ok := int(C.SherpaOnnxWriteWave(floatPointer(audio.Samples), C.int(len(audio.Samples)), C.int(audio.SampleRate), s))

	return ok == 1
}

func (sd *OfflineSpeechDenoiser) SampleRate() int {
	return int(C.SherpaOnnxOfflineSpeechDenoiserGetSampleRate(sd.impl))
}

// Free the internal pointer inside the OnlineSpeechDenoiser to avoid memory leak.
func DeleteOnlineSpeechDenoiser(sd *OnlineSpeechDenoiser) {
	C.SherpaOnnxDestroyOnlineSpeechDenoiser(sd.impl)
	sd.impl = nil
}

// The user is responsible to invoke [DeleteOnlineSpeechDenoiser]() to free
// the returned denoiser to avoid memory leak.
func NewOnlineSpeechDenoiser(config *OnlineSpeechDenoiserConfig) *OnlineSpeechDenoiser {
	c := C.struct_SherpaOnnxOnlineSpeechDenoiserConfig{}
	c.model.gtcrn.model = C.CString(config.Model.Gtcrn.Model)
	defer C.free(unsafe.Pointer(c.model.gtcrn.model))
	c.model.dpdfnet.model = C.CString(config.Model.DpdfNet.Model)
	defer C.free(unsafe.Pointer(c.model.dpdfnet.model))

	c.model.num_threads = C.int(config.Model.NumThreads)
	c.model.debug = C.int(config.Model.Debug)

	c.model.provider = C.CString(config.Model.Provider)
	defer C.free(unsafe.Pointer(c.model.provider))

	impl := C.SherpaOnnxCreateOnlineSpeechDenoiser(&c)
	if impl == nil {
		return nil
	}

	sd := &OnlineSpeechDenoiser{}
	sd.impl = impl
	return sd
}

func (sd *OnlineSpeechDenoiser) Run(samples []float32, sampleRate int) *DenoisedAudio {
	audio := C.SherpaOnnxOnlineSpeechDenoiserRun(sd.impl, floatPointer(samples), C.int(len(samples)), C.int(sampleRate))
	return denoisedAudioFromPointer(audio)
}

func (sd *OnlineSpeechDenoiser) Flush() *DenoisedAudio {
	audio := C.SherpaOnnxOnlineSpeechDenoiserFlush(sd.impl)
	return denoisedAudioFromPointer(audio)
}

func (sd *OnlineSpeechDenoiser) Reset() {
	C.SherpaOnnxOnlineSpeechDenoiserReset(sd.impl)
}

func (sd *OnlineSpeechDenoiser) SampleRate() int {
	return int(C.SherpaOnnxOnlineSpeechDenoiserGetSampleRate(sd.impl))
}

func (sd *OnlineSpeechDenoiser) FrameShiftInSamples() int {
	return int(C.SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(sd.impl))
}

func GetVersion() string {
	return C.GoString(C.SherpaOnnxGetVersionStr())
}

func GetGitSha1() string {
	return C.GoString(C.SherpaOnnxGetGitSha1())
}

func GetGitDate() string {
	return C.GoString(C.SherpaOnnxGetGitDate())
}


================================================
FILE: scripts/go/ssh_config
================================================
Host github.com
  Hostname github.com
  User git
  IdentityFile ~/.ssh/github
  StrictHostKeyChecking no


================================================
FILE: scripts/gtcrn/README.md
================================================
# Introduction

This folder contains scripts for adding metadata to models from
https://github.com/Xiaobin-Rong/gtcrn/blob/main/stream/onnx_models/gtcrn_simple.onnx


================================================
FILE: scripts/gtcrn/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
NodeArg(name='mix', type='tensor(float)', shape=[1, 257, 1, 2])
NodeArg(name='conv_cache', type='tensor(float)', shape=[2, 1, 16, 16, 33])
NodeArg(name='tra_cache', type='tensor(float)', shape=[2, 3, 1, 1, 16])
NodeArg(name='inter_cache', type='tensor(float)', shape=[2, 1, 33, 16])
-----
NodeArg(name='enh', type='tensor(float)', shape=[1, 257, 1, 2])
NodeArg(name='conv_cache_out', type='tensor(float)', shape=[2, 1, 16, 16, 33])
NodeArg(name='tra_cache_out', type='tensor(float)', shape=[2, 3, 1, 1, 16])
NodeArg(name='inter_cache_out', type='tensor(float)', shape=[2, 1, 33, 16])
"""

import onnx
import onnxruntime as ort


def show(filename):
    session_opts = ort.SessionOptions()
    session_opts.log_severity_level = 3
    sess = ort.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def main():
    filename = "./gtcrn_simple.onnx"
    show(filename)
    model = onnx.load(filename)

    meta_data = {
        "model_type": "gtcrn",
        "comment": "gtcrn_simple",
        "version": 1,
        "sample_rate": 16000,
        "model_url": "https://github.com/Xiaobin-Rong/gtcrn/blob/main/stream/onnx_models/gtcrn_simple.onnx",
        "maintainer": "k2-fsa",
        "comment2": "Please see also https://github.com/Xiaobin-Rong/gtcrn",
        "conv_cache_shape": "2,1,16,16,33",
        "tra_cache_shape": "2,3,1,1,16",
        "inter_cache_shape": "2,1,33,16",
        "n_fft": 512,
        "hop_length": 256,
        "window_length": 512,
        "window_type": "hann_sqrt",
    }

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, filename)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/gtcrn/show.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnxruntime
import onnx

"""
[key: "model_type"
value: "gtcrn"
, key: "comment"
value: "gtcrn_simple"
, key: "version"
value: "1"
, key: "sample_rate"
value: "16000"
, key: "model_url"
value: "https://github.com/Xiaobin-Rong/gtcrn/blob/main/stream/onnx_models/gtcrn_simple.onnx"
, key: "maintainer"
value: "k2-fsa"
, key: "comment2"
value: "Please see also https://github.com/Xiaobin-Rong/gtcrn"
, key: "conv_cache_shape"
value: "2,1,16,16,33"
, key: "tra_cache_shape"
value: "2,3,1,1,16"
, key: "inter_cache_shape"
value: "2,1,33,16"
, key: "n_fft"
value: "512"
, key: "hop_length"
value: "256"
, key: "window_length"
value: "512"
, key: "window_type"
value: "hann_sqrt"
]
"""

"""
NodeArg(name='mix', type='tensor(float)', shape=[1, 257, 1, 2])
NodeArg(name='conv_cache', type='tensor(float)', shape=[2, 1, 16, 16, 33])
NodeArg(name='tra_cache', type='tensor(float)', shape=[2, 3, 1, 1, 16])
NodeArg(name='inter_cache', type='tensor(float)', shape=[2, 1, 33, 16])
-----
NodeArg(name='enh', type='tensor(float)', shape=[1, 257, 1, 2])
NodeArg(name='conv_cache_out', type='tensor(float)', shape=[2, 1, 16, 16, 33])
NodeArg(name='tra_cache_out', type='tensor(float)', shape=[2, 3, 1, 1, 16])
NodeArg(name='inter_cache_out', type='tensor(float)', shape=[2, 1, 33, 16])
"""


def show(filename):
    model = onnx.load(filename)
    print(model.metadata_props)

    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(
        filename, session_opts, providers=["CPUExecutionProvider"]
    )
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def main():
    show("./gtcrn_simple.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/gtcrn/test.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


class OnnxModel:
    def __init__(self):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            "./gtcrn_simple.onnx",
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map
        self.sample_rate = int(meta["sample_rate"])
        self.n_fft = int(meta["n_fft"])
        self.hop_length = int(meta["hop_length"])
        self.window_length = int(meta["window_length"])
        assert meta["window_type"] == "hann_sqrt", meta["window_type"]

        self.window = torch.hann_window(self.window_length).pow(0.5)

    def get_init_states(self):
        meta = self.model.get_modelmeta().custom_metadata_map
        conv_cache_shape = list(map(int, meta["conv_cache_shape"].split(",")))
        tra_cache_shape = list(map(int, meta["tra_cache_shape"].split(",")))
        inter_cache_shape = list(map(int, meta["inter_cache_shape"].split(",")))

        conv_cache_shape = np.zeros(conv_cache_shape, dtype=np.float32)
        tra_cache = np.zeros(tra_cache_shape, dtype=np.float32)
        inter_cache = np.zeros(inter_cache_shape, dtype=np.float32)

        return conv_cache_shape, tra_cache, inter_cache

    def __call__(self, x, states):
        """
        Args:
          x: (1, n_fft/2+1, 1, 2)
        Returns:
          o: (1, n_fft/2+1, 1, 2)
        """
        out, next_conv_cache, next_tra_cache, next_inter_cache = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
                self.model.get_outputs()[2].name,
                self.model.get_outputs()[3].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
                self.model.get_inputs()[1].name: states[0],
                self.model.get_inputs()[2].name: states[1],
                self.model.get_inputs()[3].name: states[2],
            },
        )

        return out, (next_conv_cache, next_tra_cache, next_inter_cache)


def main():
    model = OnnxModel()

    filename = "./inp_16k.wav"
    wave, sample_rate = load_audio(filename)
    if sample_rate != model.sample_rate:
        import librosa

        wave = librosa.resample(wave, orig_sr=sample_rate, target_sr=model.sample_rate)
        sample_rate = model.sample_rate

    stft_config = knf.StftConfig(
        n_fft=model.n_fft,
        hop_length=model.hop_length,
        win_length=model.window_length,
        window=model.window.tolist(),
    )
    stft = knf.Stft(stft_config)
    stft_result = stft(wave)
    num_frames = stft_result.num_frames
    real = np.array(stft_result.real, dtype=np.float32).reshape(num_frames, -1)
    imag = np.array(stft_result.imag, dtype=np.float32).reshape(num_frames, -1)

    states = model.get_init_states()
    outputs = []
    for i in range(num_frames):
        x_real = real[i : i + 1]
        x_imag = imag[i : i + 1]
        x = np.vstack([x_real, x_imag]).transpose()
        x = np.expand_dims(x, axis=0)
        x = np.expand_dims(x, axis=2)

        o, states = model(x, states)
        outputs.append(o)

    outputs = np.concatenate(outputs, axis=2)
    outputs = outputs.squeeze(0).transpose(1, 0, 2)

    enhanced_real = outputs[:, :, 0]
    enhanced_imag = outputs[:, :, 1]
    enhanced_stft_result = knf.StftResult(
        real=enhanced_real.reshape(-1).tolist(),
        imag=enhanced_imag.reshape(-1).tolist(),
        num_frames=enhanced_real.shape[0],
    )

    istft = knf.IStft(stft_config)
    enhanced = istft(enhanced_stft_result)

    sf.write("./enhanced_16k.wav", enhanced, model.sample_rate)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/hap/.gitignore
================================================
!build-*.in


================================================
FILE: scripts/hap/build-hap-vad-asr.sh.in
================================================
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!

# Please set the environment variable COMMANDLINE_TOOLS_DIR
# before running this script

# Inside the $COMMANDLINE_TOOL_DIR directory, you can find the following:
#
# command-line-tools fangjun$ ls
# LICENSE.txt NOTICE.txt  bin         codelinter  hstack      hvigor      ohpm        sdk         tool

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)

log "Building streaming VAD + ASR Hap for sherpa-onnx v${SHERPA_ONNX_VERSION}"

export SHERPA_ONNX_ENABLE_TTS=OFF

if [ ! -f $COMMANDLINE_TOOLS_DIR/bin/hvigorw ]; then
  echo "Please first download Command Line Tools for HarmonyOS"
  echo "See https://developer.huawei.com/consumer/cn/download/"
  echo "or"
  echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
  exit 1
fi

jar=$COMMANDLINE_TOOLS_DIR/sdk/default/openharmony/toolchains/lib/hap-sign-tool.jar

export PATH=$COMMANDLINE_TOOLS_DIR/bin:$PATH

mkdir -p haps

{% for model in model_list %}
pushd ./harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/rawfile
model_name={{ model.model_name }}
type={{ model.idx }}
lang={{ model.lang }}
lang2={{ model.lang2 }}
short_name={{ model.short_name }}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2

{{ model.cmd }}

rm -rf  *.tar.bz2
ls -lh $model_name

if [ ! -f ./silero_vad.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

popd
# Now we are at the project root directory

git checkout .
pushd harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/
sed -i.bak s/"const type = 2/const type = $type/" ./NonStreamingAsrWithVadWorker.ets

{% if model.rule_fsts %}
  rule_fsts={{ model.rule_fsts }}
  sed -i.bak s%"ruleFsts = ''"%"ruleFsts = \"$rule_fsts\""% ./NonStreamingAsrWithVadWorker.ets
{% endif %}

git diff
popd

pushd harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages
sed -i.bak s/English/$lang2/ ./Index.ets
popd

pushd harmony-os/SherpaOnnxVadAsr

git diff

cd entry
ohpm install
cd ..

hvigorw clean --no-daemon
hvigorw assembleHap --mode module -p product=default -p buildMode=release --no-daemon

ls -lh ./entry/build/default/outputs/default/entry-default-unsigned.hap

in_file=$PWD/entry/build/default/outputs/default/entry-default-unsigned.hap
out_file=$PWD/entry/build/default/outputs/default/entry-default-signed.hap

java -jar $jar sign-app -keyAlias "$HAP_KEY_ALIAS" -signAlg "SHA256withECDSA" -mode "localSign" \
  -appCertFile "/tmp/sherpa_onnx.cer" -profileFile "/tmp/sherpa_onnx_profileRelease.p7b" \
  -inFile $in_file -keystoreFile "/tmp/sherpa_onnx_ohos_key.p12" \
  -outFile $out_file -keyPwd "$HAP_KEY_PWD" -keystorePwd "$HAP_KEY_STORE_PWD" -signCode "1"

ls -l $in_file $out_file
ls -lh $in_file $out_file
rm -rf ./entry/src/main/resources/rawfile/$model_name
popd

# Use unsigned hap
mv $in_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap
# mv $out_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap

ls -lh haps

{% endfor %}

git checkout .

ls -lh haps/


================================================
FILE: scripts/kitten-tts/README.md
================================================
# Introduction

See also https://github.com/KittenML/KittenTTS


================================================
FILE: scripts/kitten-tts/mini_v0_1/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import argparse

import numpy as np
import onnx

from generate_voices_bin import speaker2id


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model", type=str, required=True, help="input and output onnx model"
    )

    return parser.parse_args()


def main():
    args = get_args()
    print(args.model)

    model = onnx.load(args.model)

    style = np.load("./voices.npz")
    style_shape = style[list(style.keys())[0]].shape

    speaker2id_str = ""
    id2speaker_str = ""
    sep = ""
    for s, i in speaker2id.items():
        speaker2id_str += f"{sep}{s}->{i}"
        id2speaker_str += f"{sep}{i}->{s}"
        sep = ","

    meta_data = {
        "model_type": "kitten-tts",
        "language": "English",
        "has_espeak": 1,
        "sample_rate": 24000,
        "version": 1,
        "voice": "en-us",
        "style_dim": ",".join(map(str, style_shape)),
        "n_speakers": len(speaker2id),
        "speaker2id": speaker2id_str,
        "id2speaker": id2speaker_str,
        "speaker_names": ",".join(map(str, speaker2id.keys())),
        "model_url": "https://huggingface.co/KittenML/kitten-tts-nano-0.2",
        "see_also": "https://github.com/KittenML/KittenTTS",
        "maintainer": "k2-fsa",
        "comment": "This is kitten-tts-nano-0.2 and supports only English",
    }

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, args.model)

    print(f"Please see {args.model}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/mini_v0_1/convert_opset.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
Change the model so that it can be run in onnxruntime 1.17.1
"""

import onnx


def main():
    model = onnx.load("kitten_tts_mini_v0_1.onnx")

    # Print current opsets
    for opset in model.opset_import:
        print(f"Domain: '{opset.domain}', Version: {opset.version}")

    # Modify the opset versions (be careful!)
    for opset in model.opset_import:
        if opset.domain == "":  # ai.onnx domain
            opset.version = 19  # change from 20 to 19
        elif opset.domain == "ai.onnx.ml":
            opset.version = 4  # change from 5 to 4

    # Save the modified model
    onnx.save(model, "model.fp16.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/mini_v0_1/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""


import sherpa_onnx
import soundfile as sf

from generate_voices_bin import speaker2id

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
            model="kitten-mini-en-v0_1-fp16/model.fp16.onnx",
            voices="kitten-mini-en-v0_1-fp16/voices.bin",
            tokens="kitten-mini-en-v0_1-fp16/tokens.txt",
            data_dir="kitten-mini-en-v0_1-fp16/espeak-ng-data",
        ),
        num_threads=2,
    ),
    max_num_sentences=1,
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

for s, i in speaker2id.items():
    print(s, i, len(speaker2id))
    audio = tts.generate(text, sid=i, speed=1.0)

    sf.write(
        f"./hf/kitten/v0.1-mini/mp3/{i}-{s}.mp3",
        audio.samples,
        samplerate=audio.sample_rate,
    )


================================================
FILE: scripts/kitten-tts/nano_v0_1/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import argparse

import numpy as np
import onnx

from generate_voices_bin import speaker2id


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model", type=str, required=True, help="input and output onnx model"
    )

    return parser.parse_args()


def main():
    args = get_args()
    print(args.model)

    model = onnx.load(args.model)

    style = np.load("./voices.npz")
    style_shape = style[list(style.keys())[0]].shape

    speaker2id_str = ""
    id2speaker_str = ""
    sep = ""
    for s, i in speaker2id.items():
        speaker2id_str += f"{sep}{s}->{i}"
        id2speaker_str += f"{sep}{i}->{s}"
        sep = ","

    meta_data = {
        "model_type": "kitten-tts",
        "language": "English",
        "has_espeak": 1,
        "sample_rate": 24000,
        "version": 1,
        "voice": "en-us",
        "style_dim": ",".join(map(str, style_shape)),
        "n_speakers": len(speaker2id),
        "speaker2id": speaker2id_str,
        "id2speaker": id2speaker_str,
        "speaker_names": ",".join(map(str, speaker2id.keys())),
        "model_url": "https://huggingface.co/KittenML/kitten-tts-nano-0.1",
        "see_also": "https://github.com/KittenML/KittenTTS",
        "maintainer": "k2-fsa",
        "comment": "This is kitten-tts-nano-0.1 and supports only English",
    }

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, args.model)

    print(f"Please see {args.model}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/nano_v0_1/convert_opset.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
Change the model so that it can be run in onnxruntime 1.17.1
"""

import onnx


def main():
    model = onnx.load("kitten_tts_nano_v0_1.onnx")

    # Print current opsets
    for opset in model.opset_import:
        print(f"Domain: '{opset.domain}', Version: {opset.version}")

    # Modify the opset versions (be careful!)
    for opset in model.opset_import:
        if opset.domain == "":  # ai.onnx domain
            opset.version = 19  # change from 20 to 19
        elif opset.domain == "ai.onnx.ml":
            opset.version = 4  # change from 5 to 4

    # Save the modified model
    onnx.save(model, "model.fp16.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/nano_v0_1/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""


import sherpa_onnx
import soundfile as sf

from generate_voices_bin import speaker2id

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
            model="kitten-nano-en-v0_1-fp16/model.fp16.onnx",
            voices="kitten-nano-en-v0_1-fp16/voices.bin",
            tokens="kitten-nano-en-v0_1-fp16/tokens.txt",
            data_dir="kitten-nano-en-v0_1-fp16/espeak-ng-data",
        ),
        num_threads=2,
    ),
    max_num_sentences=1,
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

for s, i in speaker2id.items():
    print(s, i, len(speaker2id))
    audio = tts.generate(text, sid=i, speed=1.0)

    sf.write(
        f"./hf/kitten/v0.1-nano/mp3/{i}-{s}.mp3",
        audio.samples,
        samplerate=audio.sample_rate,
    )


================================================
FILE: scripts/kitten-tts/nano_v0_1/generate_tokens.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


def get_vocab():
    # https://github.com/KittenML/KittenTTS/blob/main/kittentts/onnx_model.py#L17
    _pad = "$"
    _punctuation = ';:,.!?¡¿—…"«»"" '
    _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"

    symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
    dicts = {}
    for i in range(len((symbols))):
        dicts[symbols[i]] = i
    return dicts


def main():
    token2id = get_vocab()
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for s, i in token2id.items():
            f.write(f"{s} {i}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/nano_v0_1/generate_voices_bin.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
from pathlib import Path

import numpy as np

speakers = [
    "expr-voice-2-m",
    "expr-voice-2-f",
    "expr-voice-3-m",
    "expr-voice-3-f",
    "expr-voice-4-m",
    "expr-voice-4-f",
    "expr-voice-5-m",
    "expr-voice-5-f",
]

id2speaker = {idx: speaker for idx, speaker in enumerate(speakers)}

speaker2id = {speaker: idx for idx, speaker in id2speaker.items()}


def main():
    if Path("./voices.bin").is_file():
        print("./voices.bin exists - skip")
        return

    voices = np.load("./voices.npz")

    with open("voices.bin", "wb") as f:
        for speaker in speakers:
            v = voices[speaker]
            # v.shape (1, 256)
            f.write(v.tobytes())


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/nano_v0_1/show.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnxruntime
import onnx

"""
[key: "onnx.infer"
value: "onnxruntime.quant"
, key: "onnx.quant.pre_process"
value: "onnxruntime.quant"
]
NodeArg(name='input_ids', type='tensor(int64)', shape=[1, 'sequence_length'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='waveform', type='tensor(float)', shape=['num_samples'])
NodeArg(name='duration', type='tensor(int64)', shape=['Castduration_dim_0'])
"""


def show(filename):
    model = onnx.load(filename)
    print(model.metadata_props)

    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(
        filename, session_opts, providers=["CPUExecutionProvider"]
    )
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def main():
    show("./model.fp16.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/nano_v0_1/test.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
import time
from pathlib import Path
from typing import Dict, List

import numpy as np

try:
    from piper_phonemize import phonemize_espeak
except Exception as ex:
    raise RuntimeError(
        f"{ex}\nPlease run\n"
        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
    )

import onnxruntime as ort
import soundfile as sf


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the model",
    )

    parser.add_argument(
        "--voices-bin",
        type=str,
        required=True,
        help="Path to the voices.bin",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )
    return parser.parse_args()


def show(filename):
    session_opts = ort.SessionOptions()
    session_opts.log_severity_level = 3
    sess = ort.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def load_tokens(filename: str) -> Dict[str, int]:
    ans = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 2:
                token, idx = fields
                ans[token] = int(idx)
            else:
                assert len(fields) == 1, (len(fields), line)
                ans[" "] = int(fields[0])
    return ans


def load_voices(speaker_names: List[str], dim: List[int], voices_bin: str):
    embedding = (
        np.fromfile(voices_bin, dtype="uint8")
        .view(np.float32)
        .reshape(len(speaker_names), *dim)
    )
    ans = dict()
    for i in range(len(speaker_names)):
        ans[speaker_names[i]] = embedding[i]

    return ans


class OnnxModel:
    def __init__(self, model_filename: str, voices_bin: str, tokens: str):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            model_filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        self.token2id = load_tokens(tokens)

        meta = self.model.get_modelmeta().custom_metadata_map
        print(meta)
        dim = list(map(int, meta["style_dim"].split(",")))
        speaker_names = meta["speaker_names"].split(",")

        self.voices = load_voices(
            speaker_names=speaker_names, dim=dim, voices_bin=voices_bin
        )

        self.sample_rate = int(meta["sample_rate"])

    def __call__(self, text: str, voice):
        tokens = phonemize_espeak(text, "en-us")
        # tokens is List[List[str]]
        # Each sentence is a List[str]
        # len(tokens) == number of sentences

        flatten = []
        for t in tokens:
            flatten.extend(t)
            # we append a space at the end of a sentence so that there is
            # a pause in the generated audio
            flatten.append(" ")

        tokens = "".join(flatten)

        tokens = list(tokens)

        token_ids = [self.token2id[i] for i in tokens]

        style = self.voices[voice]

        token_ids = [0, *token_ids, 0]
        token_ids = np.array([token_ids], dtype=np.int64)

        speed = np.array([1.0], dtype=np.float32)

        audio = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: token_ids,
                self.model.get_inputs()[1].name: style,
                self.model.get_inputs()[2].name: speed,
            },
        )[0]
        return audio


def main():
    args = get_args()
    print(vars(args))
    show(args.model)

    #  tokens = phonemize_espeak("how are you doing?", "en-us")
    # [['h', 'ˌ', 'a', 'ʊ', ' ', 'ɑ', 'ː', 'ɹ', ' ', 'j', 'u', 'ː', ' ', 'd', 'ˈ', 'u', 'ː', 'ɪ', 'ŋ', '?']]
    m = OnnxModel(
        model_filename=args.model, voices_bin=args.voices_bin, tokens=args.tokens
    )

    text = (
        "Today as always, men fall into two groups: slaves and free men. "
        + " Whoever does not have two-thirds of his day for himself, "
        + "is a slave, whatever he may be: a statesman, a businessman, "
        + "an official, or a scholar."
    )

    for i, voice in enumerate(m.voices.keys(), 1):
        print(f"Testing {i}/{len(m.voices)} - {voice}/{args.model}")

        start = time.time()
        audio = m(text, voice=voice)
        end = time.time()

        elapsed_seconds = end - start
        audio_duration = len(audio) / m.sample_rate
        real_time_factor = elapsed_seconds / audio_duration

        filename = f"{Path(args.model).stem}-{voice}.wav"
        sf.write(
            filename,
            audio,
            samplerate=m.sample_rate,
            subtype="PCM_16",
        )
        print(f" Saved to {filename}")
        print(f" Elapsed seconds: {elapsed_seconds:.3f}")
        print(f" Audio duration in seconds: {audio_duration:.3f}")
        print(
            f" RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
        )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/nano_v0_2/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import argparse

import numpy as np
import onnx

from generate_voices_bin import speaker2id


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model", type=str, required=True, help="input and output onnx model"
    )

    return parser.parse_args()


def main():
    args = get_args()
    print(args.model)

    model = onnx.load(args.model)

    style = np.load("./voices.npz")
    style_shape = style[list(style.keys())[0]].shape

    speaker2id_str = ""
    id2speaker_str = ""
    sep = ""
    for s, i in speaker2id.items():
        speaker2id_str += f"{sep}{s}->{i}"
        id2speaker_str += f"{sep}{i}->{s}"
        sep = ","

    meta_data = {
        "model_type": "kitten-tts",
        "language": "English",
        "has_espeak": 1,
        "sample_rate": 24000,
        "version": 1,
        "voice": "en-us",
        "style_dim": ",".join(map(str, style_shape)),
        "n_speakers": len(speaker2id),
        "speaker2id": speaker2id_str,
        "id2speaker": id2speaker_str,
        "speaker_names": ",".join(map(str, speaker2id.keys())),
        "model_url": "https://huggingface.co/KittenML/kitten-tts-nano-0.2",
        "see_also": "https://github.com/KittenML/KittenTTS",
        "maintainer": "k2-fsa",
        "comment": "This is kitten-tts-nano-0.2 and supports only English",
    }

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, args.model)

    print(f"Please see {args.model}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/nano_v0_2/convert_opset.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
Change the model so that it can be run in onnxruntime 1.17.1
"""

import onnx


def main():
    model = onnx.load("kitten_tts_nano_v0_2.onnx")

    # Print current opsets
    for opset in model.opset_import:
        print(f"Domain: '{opset.domain}', Version: {opset.version}")

    # Modify the opset versions (be careful!)
    for opset in model.opset_import:
        if opset.domain == "":  # ai.onnx domain
            opset.version = 19  # change from 20 to 19
        elif opset.domain == "ai.onnx.ml":
            opset.version = 4  # change from 5 to 4

    # Save the modified model
    onnx.save(model, "model.fp16.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kitten-tts/nano_v0_2/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""


import sherpa_onnx
import soundfile as sf

from generate_voices_bin import speaker2id

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
            model="kitten-nano-en-v0_2-fp16/model.fp16.onnx",
            voices="kitten-nano-en-v0_2-fp16/voices.bin",
            tokens="kitten-nano-en-v0_2-fp16/tokens.txt",
            data_dir="kitten-nano-en-v0_2-fp16/espeak-ng-data",
        ),
        num_threads=2,
    ),
    max_num_sentences=1,
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

for s, i in speaker2id.items():
    print(s, i, len(speaker2id))
    audio = tts.generate(text, sid=i, speed=1.0)

    sf.write(
        f"./hf/kitten/v0.2-nano/mp3/{i}-{s}.mp3",
        audio.samples,
        samplerate=audio.sample_rate,
    )


================================================
FILE: scripts/kokoro/.gitignore
================================================
espeak-ng-data
voices.json
voices.bin
README-new.md
lexicon-*.txt
config.json


================================================
FILE: scripts/kokoro/README.md
================================================
# Introduction

Please see also
https://huggingface.co/hexgrad/Kokoro-82M
and
https://huggingface.co/hexgrad/Kokoro-82M/discussions/14


================================================
FILE: scripts/kokoro/v0.19/.gitignore
================================================
kLegacy


================================================
FILE: scripts/kokoro/v0.19/__init__.py
================================================


================================================
FILE: scripts/kokoro/v0.19/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import argparse

import onnx
import torch

from generate_voices_bin import speaker2id


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model", type=str, required=True, help="input and output onnx model"
    )

    return parser.parse_args()


def main():
    args = get_args()
    print(args.model)

    model = onnx.load(args.model)

    style = torch.load(
        "./kLegacy/v0.19/voices/af.pt", weights_only=True, map_location="cpu"
    )

    speaker2id_str = ""
    id2speaker_str = ""
    sep = ""
    for s, i in speaker2id.items():
        speaker2id_str += f"{sep}{s}->{i}"
        id2speaker_str += f"{sep}{i}->{s}"
        sep = ","

    meta_data = {
        "model_type": "kokoro",
        "language": "English",
        "has_espeak": 1,
        "sample_rate": 24000,
        "version": 1,
        "voice": "en-us",
        "style_dim": ",".join(map(str, style.shape)),
        "n_speakers": len(speaker2id),
        "speaker2id": speaker2id_str,
        "id2speaker": id2speaker_str,
        "speaker_names": ",".join(map(str, speaker2id.keys())),
        "model_url": "https://huggingface.co/hexgrad/kLegacy/",
        "see_also": "https://huggingface.co/spaces/hexgrad/Kokoro-TTS",
        "maintainer": "k2-fsa",
        "comment": "This is kokoro v0.19 and supports only English",
    }

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, args.model)

    print(f"Please see {args.model}, ./voices.bin, and ./tokens.txt")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v0.19/dynamic_quantization.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from pathlib import Path

import onnxruntime
from onnxruntime.quantization import QuantType, quantize_dynamic


def show(filename):
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'tokens1'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio0'])
"""


def main():
    show("./model.onnx")

    if not Path("./model.int8.onnx").is_file():
        quantize_dynamic(
            model_input="model.onnx",
            model_output="model.int8.onnx",
            #  op_types_to_quantize=["MatMul"],
            weight_type=QuantType.QUInt8,
        )
    else:
        print("./model.int8.onnx exists - skip")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v0.19/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""

import sherpa_onnx
import soundfile as sf

from generate_voices_bin import speaker2id

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
            model="./model.onnx",
            voices="./voices.bin",
            tokens="./tokens.txt",
            data_dir="./espeak-ng-data",
        ),
        num_threads=2,
    ),
    max_num_sentences=1,
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

for s, i in speaker2id.items():
    print(s, i, len(speaker2id))
    audio = tts.generate(text, sid=i, speed=1.0)

    sf.write(
        f"./hf/kokoro/v0.19/mp3/{i}-{s}.mp3",
        audio.samples,
        samplerate=audio.sample_rate,
    )


================================================
FILE: scripts/kokoro/v0.19/generate_tokens.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


def get_vocab():
    # https://huggingface.co/hexgrad/kLegacy/blob/main/v0.19/kokoro.py#L75
    _pad = "$"
    _punctuation = ';:,.!?¡¿—…"«»“” '
    _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
    symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
    dicts = {}
    for i in range(len((symbols))):
        dicts[symbols[i]] = i
    return dicts


def main():
    token2id = get_vocab()
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for s, i in token2id.items():
            f.write(f"{s} {i}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v0.19/generate_voices_bin.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import torch
from pathlib import Path


id2speaker = {
    0: "af",
    1: "af_bella",
    2: "af_nicole",
    3: "af_sarah",
    4: "af_sky",
    5: "am_adam",
    6: "am_michael",
    7: "bf_emma",
    8: "bf_isabella",
    9: "bm_george",
    10: "bm_lewis",
}

speaker2id = {speaker: idx for idx, speaker in id2speaker.items()}


def main():
    if Path("./voices.bin").is_file():
        print("./voices.bin exists - skip")
        return

    with open("voices.bin", "wb") as f:
        for _, speaker in id2speaker.items():
            m = torch.load(
                f"kLegacy/v0.19/voices/{speaker}.pt",
                weights_only=True,
                map_location="cpu",
            ).numpy()
            # m.shape (511, 1, 256)

            f.write(m.tobytes())


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v0.19/test.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
female (7)
'af', 'af_bella', 'af_nicole','af_sarah', 'af_sky',
'bf_emma', 'bf_isabella',

male (4)
'am_adam',  'am_michael', 'bm_george', 'bm_lewis'
"""

import argparse
import time
from pathlib import Path
from typing import Dict, List

import numpy as np

try:
    from piper_phonemize import phonemize_espeak
except Exception as ex:
    raise RuntimeError(
        f"{ex}\nPlease run\n"
        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
    )

import onnxruntime as ort
import soundfile as sf


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the model",
    )

    parser.add_argument(
        "--voices-bin",
        type=str,
        required=True,
        help="Path to the voices.bin",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )
    return parser.parse_args()


def show(filename):
    session_opts = ort.SessionOptions()
    session_opts.log_severity_level = 3
    sess = ort.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'tokens1'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio0'])
"""


def load_tokens(filename: str) -> Dict[str, int]:
    ans = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 2:
                token, idx = fields
                ans[token] = int(idx)
            else:
                assert len(fields) == 1, (len(fields), line)
                ans[" "] = int(fields[0])
    return ans


def load_voices(speaker_names: List[str], dim: List[int], voices_bin: str):
    embedding = (
        np.fromfile(voices_bin, dtype="uint8")
        .view(np.float32)
        .reshape(len(speaker_names), *dim)
    )
    print("embedding.shape", embedding.shape)
    ans = dict()
    for i in range(len(speaker_names)):
        ans[speaker_names[i]] = embedding[i]

    return ans


class OnnxModel:
    def __init__(self, model_filename: str, voices_bin: str, tokens: str):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            model_filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        self.token2id = load_tokens(tokens)

        meta = self.model.get_modelmeta().custom_metadata_map
        print(meta)
        dim = list(map(int, meta["style_dim"].split(",")))
        speaker_names = meta["speaker_names"].split(",")

        self.voices = load_voices(
            speaker_names=speaker_names, dim=dim, voices_bin=voices_bin
        )

        self.sample_rate = int(meta["sample_rate"])

        print(list(self.voices.keys()))
        # ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky', 'am_adam',
        # 'am_michael', 'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis']
        # af -> (511, 1, 256)
        self.max_len = self.voices[next(iter(self.voices))].shape[0] - 1

    def __call__(self, text: str, voice):
        tokens = phonemize_espeak(text, "en-us")
        # tokens is List[List[str]]
        # Each sentence is a List[str]
        # len(tokens) == number of sentences

        tokens = sum(tokens, [])  # flatten
        tokens = "".join(tokens)

        tokens = tokens.replace("kəkˈoːɹoʊ", "kˈoʊkəɹoʊ").replace(
            "kəkˈɔːɹəʊ", "kˈəʊkəɹəʊ"
        )

        tokens = list(tokens)

        token_ids = [self.token2id[i] for i in tokens]
        token_ids = token_ids[: self.max_len]

        style = self.voices[voice][len(token_ids)]

        token_ids = [0, *token_ids, 0]
        token_ids = np.array([token_ids], dtype=np.int64)

        speed = np.array([1.0], dtype=np.float32)

        audio = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: token_ids,
                self.model.get_inputs()[1].name: style,
                self.model.get_inputs()[2].name: speed,
            },
        )[0]
        return audio


def main():
    args = get_args()
    print(vars(args))
    show(args.model)

    #  tokens = phonemize_espeak("how are you doing?", "en-us")
    # [['h', 'ˌ', 'a', 'ʊ', ' ', 'ɑ', 'ː', 'ɹ', ' ', 'j', 'u', 'ː', ' ', 'd', 'ˈ', 'u', 'ː', 'ɪ', 'ŋ', '?']]
    m = OnnxModel(
        model_filename=args.model, voices_bin=args.voices_bin, tokens=args.tokens
    )

    text = (
        "Today as always, men fall into two groups: slaves and free men."
        + " Whoever does not have two-thirds of his day for himself, "
        + "is a slave, whatever he may be: a statesman, a businessman, "
        + "an official, or a scholar."
    )

    for i, voice in enumerate(m.voices.keys(), 1):
        print(f"Testing {i}/{len(m.voices)} - {voice}/{args.model}")

        start = time.time()
        audio = m(text, voice=voice)
        end = time.time()

        elapsed_seconds = end - start
        audio_duration = len(audio) / m.sample_rate
        real_time_factor = elapsed_seconds / audio_duration

        filename = f"{Path(args.model).stem}-{voice}.wav"
        sf.write(
            filename,
            audio,
            samplerate=m.sample_rate,
            subtype="PCM_16",
        )
        print(f" Saved to {filename}")
        print(f" Elapsed seconds: {elapsed_seconds:.3f}")
        print(f" Audio duration in seconds: {audio_duration:.3f}")
        print(
            f" RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}"
        )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.0/.gitignore
================================================
config.json
*.json
*.txt
.add-meta-data.done
voices


================================================
FILE: scripts/kokoro/v1.0/README.md
================================================
# Introduction

This directory is for kokoro v1.0


================================================
FILE: scripts/kokoro/v1.0/__init__.py
================================================


================================================
FILE: scripts/kokoro/v1.0/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import onnx
import torch

from generate_voices_bin import speaker2id


def main():
    model = onnx.load("./kokoro.onnx")
    style = torch.load(
        "./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu"
    )

    id2speaker_str = ""
    speaker2id_str = ""
    sep = ""
    for s, i in speaker2id.items():
        speaker2id_str += f"{sep}{s}->{i}"
        id2speaker_str += f"{sep}{i}->{s}"
        sep = ","

    meta_data = {
        "model_type": "kokoro",
        "language": "multi-lang, e.g., English, Chinese",
        "has_espeak": 1,
        "sample_rate": 24000,
        "version": 2,
        "voice": "en-us",
        "style_dim": ",".join(map(str, style.shape)),
        "n_speakers": len(speaker2id),
        "id2speaker": id2speaker_str,
        "speaker2id": speaker2id_str,
        "speaker_names": ",".join(map(str, speaker2id.keys())),
        "model_url": "https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files",
        "see_also": "https://huggingface.co/spaces/hexgrad/Kokoro-TTS",
        "see_also_2": "https://huggingface.co/hexgrad/Kokoro-82M",
        "maintainer": "k2-fsa",
        "comment": "This is Kokoro v1.0, a multilingual TTS model, supporting English, Chinese, French, Japanese etc.",
    }

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, "./kokoro.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.0/dynamic_quantization.py
================================================
#!/usr/bin/env python3
import argparse

import onnxruntime
from onnxruntime.quantization import QuantType, quantize_dynamic


def show(filename):
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
"""


def main():
    show("./kokoro.onnx")

    quantize_dynamic(
        model_input="kokoro.onnx",
        model_output="kokoro.int8.onnx",
        #  op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.0/export_onnx.py
================================================
#!/usr/bin/env python3

import json

import torch
from kokoro import KModel
from kokoro.model import KModelForONNX


@torch.no_grad()
def main():
    with open("Kokoro-82M/config.json") as f:
        config = json.load(f)

    model = (
        KModel(
            repo_id="not-used-any-value-is-ok",
            model="Kokoro-82M/kokoro-v1_0.pth",
            config=config,
            disable_complex=True,
        )
        .to("cpu")
        .eval()
    )

    x = torch.randint(1, 100, (48,)).numpy()
    x = torch.LongTensor([[0, *x, 0]])

    style = torch.rand(1, 256, dtype=torch.float32)
    speed = torch.rand(1)

    print(x.shape, x.dtype)
    print(style.shape, style.dtype)
    print(speed, speed.dtype)

    model2 = KModelForONNX(model)

    torch.onnx.export(
        model2,
        (x, style, speed),
        "kokoro.onnx",
        input_names=["tokens", "style", "speed"],
        output_names=["audio"],
        dynamic_axes={
            "tokens": {1: "sequence_length"},
            "audio": {0: "audio_length"},
        },
        opset_version=14,  # minimum working version for this kokoro model is 14
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.0/generate_lexicon_en.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import json
from typing import List, Tuple


def generate_english_lexicon(kind: str):
    assert kind in ("us", "gb"), kind
    # If you want to add new words, please add them to
    # the user_defined dict.
    user_defined = {
        "Kokoro": "kˈOkəɹO",
        "Misaki": "misˈɑki",
    }

    user_defined_lower = dict()
    for k, v in user_defined.items():
        user_defined_lower[k.lower()] = v

    with open(f"./{kind}_gold.json", encoding="utf-8") as f:
        gold = json.load(f)

    with open(f"./{kind}_silver.json", encoding="utf-8") as f:
        silver = json.load(f)

    # words in us_gold has a higher priority than those in s_silver, so
    # we put us_gold after us_silver below
    english = {**silver, **gold}

    lexicon = dict()
    for k, v in english.items():
        k_lower = k.lower()

        if k_lower in user_defined_lower:
            print(f"{k} already exist in the user defined dict. Skip adding")
            continue

        if isinstance(v, str):
            lexicon[k_lower] = v
        else:
            assert isinstance(v, dict), (k, v)
            assert "DEFAULT" in v, (k, v)
            lexicon[k_lower] = v["DEFAULT"]

    return list(user_defined_lower.items()) + list(lexicon.items())


def save(filename: str, lexicon: List[Tuple[str, str]]):
    with open(filename, "w", encoding="utf-8") as f:
        for word, phones in lexicon:
            tokens = " ".join(list(phones))
            f.write(f"{word} {tokens}\n")


def main():
    us = generate_english_lexicon("us")
    gb = generate_english_lexicon("gb")

    save("lexicon-us-en.txt", us)
    save("lexicon-gb-en.txt", gb)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.0/generate_lexicon_zh.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import List, Tuple

from misaki import zh
from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict

user_dict = {
    "还田": [["huan2"], ["tian2"]],
    "行长": [["hang2"], ["zhang3"]],
    "银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
}

load_phrases_dict(user_dict)

phrases_dict.phrases_dict.update(**user_dict)


def generate_chinese_lexicon():
    word_dict = pinyin_dict.pinyin_dict
    phrases = phrases_dict.phrases_dict

    g2p = zh.ZHG2P()
    lexicon = []

    for key in word_dict:
        if not (0x4E00 <= key <= 0x9FFF):
            continue
        w = chr(key)
        tokens: str = g2p.word2ipa(w)
        tokens = tokens.replace(chr(815), "")
        lexicon.append((w, tokens))

    for key in phrases:
        tokens: str = g2p.word2ipa(key)
        tokens = tokens.replace(chr(815), "")
        lexicon.append((key, tokens))
    return lexicon


def save(filename: str, lexicon: List[Tuple[str, str]]):
    with open(filename, "w", encoding="utf-8") as f:
        for word, phones in lexicon:
            tokens = " ".join(list(phones))
            f.write(f"{word} {tokens}\n")


def main():
    zh = generate_chinese_lexicon()

    save("lexicon-zh.txt", zh)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.0/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""

import sherpa_onnx
import soundfile as sf

from generate_voices_bin import speaker2id

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
            model="./kokoro.onnx",
            voices="./voices.bin",
            tokens="./tokens.txt",
            data_dir="./espeak-ng-data",
            dict_dir="./dict",
            lexicon="./lexicon-zh.txt,./lexicon-us-en.txt",
        ),
        num_threads=2,
        debug=True,
    ),
    rule_fsts="./phone-zh.fst,./date-zh.fst,./number-zh.fst",
    max_num_sentences=1,
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "This model supports both Chinese and English. 小米的核心价值观是什么？答案是真诚热爱！有困难，请拨打110 或者18601200909。I am learning 机器学习. 我在研究 machine learning。What do you think 中英文说的如何呢? 今天是 2025年6月18号."

print("text", text)

for s, i in speaker2id.items():
    print(s, i, len(speaker2id))
    audio = tts.generate(text, sid=i, speed=1.0)

    sf.write(
        f"./hf/kokoro/v1.0/mp3/{i}-{s}.mp3",
        audio.samples,
        samplerate=audio.sample_rate,
    )


================================================
FILE: scripts/kokoro/v1.0/generate_tokens.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import json


def main():
    with open("Kokoro-82M/config.json") as f:
        config = json.load(f)
    vocab = config["vocab"]

    with open("tokens.txt", "w", encoding="utf-8") as f:
        for k, i in vocab.items():
            f.write(f"{k} {i}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.0/generate_voices_bin.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import torch
from pathlib import Path


id2speaker = {
    0: "af_alloy",
    1: "af_aoede",
    2: "af_bella",
    3: "af_heart",
    4: "af_jessica",
    5: "af_kore",
    6: "af_nicole",
    7: "af_nova",
    8: "af_river",
    9: "af_sarah",
    10: "af_sky",
    11: "am_adam",
    12: "am_echo",
    13: "am_eric",
    14: "am_fenrir",
    15: "am_liam",
    16: "am_michael",
    17: "am_onyx",
    18: "am_puck",
    19: "am_santa",
    20: "bf_alice",
    21: "bf_emma",
    22: "bf_isabella",
    23: "bf_lily",
    24: "bm_daniel",
    25: "bm_fable",
    26: "bm_george",
    27: "bm_lewis",
    28: "ef_dora",
    29: "em_alex",
    30: "ff_siwis",
    31: "hf_alpha",
    32: "hf_beta",
    33: "hm_omega",
    34: "hm_psi",
    35: "if_sara",
    36: "im_nicola",
    37: "jf_alpha",
    38: "jf_gongitsune",
    39: "jf_nezumi",
    40: "jf_tebukuro",
    41: "jm_kumo",
    42: "pf_dora",
    43: "pm_alex",
    44: "pm_santa",
    45: "zf_xiaobei",
    46: "zf_xiaoni",
    47: "zf_xiaoxiao",
    48: "zf_xiaoyi",
    49: "zm_yunjian",
    50: "zm_yunxi",
    51: "zm_yunxia",
    52: "zm_yunyang",
}

speaker2id = {speaker: idx for idx, speaker in id2speaker.items()}


def main():
    if Path("./voices.bin").is_file():
        print("./voices.bin exists - skip")
        return

    with open("voices.bin", "wb") as f:
        for _, speaker in id2speaker.items():
            m = torch.load(
                f"Kokoro-82M/voices/{speaker}.pt",
                weights_only=True,
                map_location="cpu",
            ).numpy()
            # m.shape (510, 1, 256)

            f.write(m.tobytes())


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.0/test.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import re
import time
from typing import Dict, List

import jieba
import numpy as np
import onnxruntime as ort
import soundfile as sf

try:
    from piper_phonemize import phonemize_espeak
except Exception as ex:
    raise RuntimeError(
        f"{ex}\nPlease run\n"
        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
    )


def show(filename):
    session_opts = ort.SessionOptions()
    session_opts.log_severity_level = 3
    sess = ort.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
"""


def load_voices(speaker_names: List[str], dim: List[int], voices_bin: str):
    embedding = (
        np.fromfile(voices_bin, dtype="uint8")
        .view(np.float32)
        .reshape(len(speaker_names), *dim)
    )
    print("embedding.shape", embedding.shape)
    ans = dict()
    for i in range(len(speaker_names)):
        ans[speaker_names[i]] = embedding[i]

    return ans


def load_tokens(filename: str) -> Dict[str, int]:
    ans = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 2:
                token, idx = fields
                ans[token] = int(idx)
            else:
                assert len(fields) == 1, (len(fields), line)
                ans[" "] = int(fields[0])
    return ans


def load_lexicon(filename: str) -> Dict[str, List[str]]:
    ans = dict()
    for lexicon in filename.split(","):
        print(lexicon)
        with open(lexicon, encoding="utf-8") as f:
            for line in f:
                w, tokens = line.strip().split(" ", maxsplit=1)
                ans[w] = "".join(tokens.split())
    return ans


class OnnxModel:
    def __init__(self, model_filename: str, tokens: str, lexicon: str, voices_bin: str):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            model_filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        self.token2id = load_tokens(tokens)
        self.word2tokens = load_lexicon(lexicon)

        meta = self.model.get_modelmeta().custom_metadata_map
        print(meta)
        dim = list(map(int, meta["style_dim"].split(",")))
        speaker_names = meta["speaker_names"].split(",")
        self.voices = load_voices(
            speaker_names=speaker_names, dim=dim, voices_bin=voices_bin
        )
        self.sample_rate = int(meta["sample_rate"])
        print(list(self.voices.keys()))

        self.sample_rate = 24000
        self.max_len = self.voices[next(iter(self.voices))].shape[0] - 1

    def __call__(self, text: str, voice: str):
        punctuations = ';:,.!?-…()"“”'
        text = text.lower()

        tokens = ""

        for t in re.findall("[\u4E00-\u9FFF]+|[\u0000-\u007f]+", text):
            if ord(t[0]) < 0x7F:
                for w in t.split():
                    while w:
                        if w[0] in punctuations:
                            tokens += w[0] + " "
                            w = w[1:]
                            continue

                        if w[-1] in punctuations:
                            if w[:-1] in self.word2tokens:
                                tokens += self.word2tokens[w[:-1]]
                                tokens += w[-1]
                        else:
                            if w in self.word2tokens:
                                tokens += self.word2tokens[w]
                            else:
                                print(f"Use espeak-ng for word {w}")
                                tokens += "".join(phonemize_espeak(w, "en-us")[0])

                        tokens += " "
                        break
            else:
                # Chinese
                for w in jieba.cut(t):
                    if w in self.word2tokens:
                        tokens += self.word2tokens[w]
                    else:
                        for i in w:
                            if i in self.word2tokens:
                                tokens += self.word2tokens[i]
                            else:
                                print(f"skip {i}")

        token_ids = [self.token2id[i] for i in tokens]
        token_ids = token_ids[: self.max_len]

        style = self.voices[voice][len(token_ids)]

        token_ids = [0, *token_ids, 0]
        token_ids = np.array([token_ids], dtype=np.int64)

        speed = np.array([1.0], dtype=np.float32)

        audio = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: token_ids,
                self.model.get_inputs()[1].name: style,
                self.model.get_inputs()[2].name: speed,
            },
        )[0]
        return audio


def main():
    m = OnnxModel(
        model_filename="./kokoro.onnx",
        tokens="./tokens.txt",
        lexicon="./lexicon-gb-en.txt,./lexicon-zh.txt",
        voices_bin="./voices.bin",
    )
    text = "来听一听, 这个是什么口音? How are you doing? Are you ok? Thank you! 你觉得中英文说得如何呢?"

    text = text.lower()

    voice = "bf_alice"
    start = time.time()
    audio = m(text, voice=voice)
    end = time.time()

    elapsed_seconds = end - start
    audio_duration = len(audio) / m.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    filename = f"kokoro_v1.0_{voice}_zh_en.wav"
    sf.write(
        filename,
        audio,
        samplerate=m.sample_rate,
        subtype="PCM_16",
    )
    print(f" Saved to {filename}")
    print(f" Elapsed seconds: {elapsed_seconds:.3f}")
    print(f" Audio duration in seconds: {audio_duration:.3f}")
    print(f" RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.1-zh/README.md
================================================
# Introduction

This directory is for kokoro v1.1-zh.

See also https://huggingface.co/hexgrad/Kokoro-82M-v1.1-zh


================================================
FILE: scripts/kokoro/v1.1-zh/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import onnx
import torch

from generate_voices_bin import speaker2id


def main():
    model = onnx.load("./kokoro.onnx")
    style = torch.load("./voices/zf_001.pt", weights_only=True, map_location="cpu")

    id2speaker_str = ""
    speaker2id_str = ""
    sep = ""
    for s, i in speaker2id.items():
        speaker2id_str += f"{sep}{s}->{i}"
        id2speaker_str += f"{sep}{i}->{s}"
        sep = ","

    meta_data = {
        "model_type": "kokoro",
        "language": "multi-lang, e.g., English, Chinese",
        "has_espeak": 1,
        "sample_rate": 24000,
        "version": 2,
        "voice": "en-us",
        "style_dim": ",".join(map(str, style.shape)),
        "n_speakers": len(speaker2id),
        "id2speaker": id2speaker_str,
        "speaker2id": speaker2id_str,
        "speaker_names": ",".join(map(str, speaker2id.keys())),
        "model_url": "https://huggingface.co/hexgrad/Kokoro-82M-v1.1-zh",
        "maintainer": "k2-fsa",
        "comment": "This is Kokoro v1.1-zh, a multilingual TTS model, supporting English, Chinese.",
    }

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, "./kokoro.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.1-zh/dynamic_quantization.py
================================================
#!/usr/bin/env python3
import argparse

import onnxruntime
from onnxruntime.quantization import QuantType, quantize_dynamic


def show(filename):
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
"""


def main():
    show("./kokoro.onnx")

    quantize_dynamic(
        model_input="kokoro.onnx",
        model_output="kokoro.int8.onnx",
        #  op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.1-zh/export_onnx.py
================================================
#!/usr/bin/env python3

import json

import torch
from kokoro import KModel
from kokoro.model import KModelForONNX


@torch.no_grad()
def main():
    with open("config.json") as f:
        config = json.load(f)

    model = (
        KModel(
            repo_id="not-used-any-value-is-ok",
            model="kokoro-v1_1-zh.pth",
            config=config,
            disable_complex=True,
        )
        .to("cpu")
        .eval()
    )

    x = torch.randint(1, 100, (48,)).numpy()
    x = torch.LongTensor([[0, *x, 0]])

    style = torch.rand(1, 256, dtype=torch.float32)
    speed = torch.rand(1)

    print(x.shape, x.dtype)
    print(style.shape, style.dtype)
    print(speed, speed.dtype)

    model2 = KModelForONNX(model)

    torch.onnx.export(
        model2,
        (x, style, speed),
        "kokoro.onnx",
        input_names=["tokens", "style", "speed"],
        output_names=["audio"],
        dynamic_axes={
            "tokens": {1: "sequence_length"},
            "audio": {0: "audio_length"},
        },
        opset_version=14,  # minimum working version for this kokoro model is 14
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.1-zh/generate_lexicon_zh.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import re
from typing import List, Tuple

from misaki import zh
from misaki.token import MToken
from misaki.zh_frontend import ZH_MAP
from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict

user_dict = {
    "还田": [["huan2"], ["tian2"]],
    "行长": [["hang2"], ["zhang3"]],
    "银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
}

load_phrases_dict(user_dict)

phrases_dict.phrases_dict.update(**user_dict)


def process_text(self, text, with_erhua=True):
    """
    This function is modified from
    https://github.com/hexgrad/misaki/blob/main/misaki/zh_frontend.py#L155

    Note that we have removed jieba.posseg.lcut().
    """
    seg_cut = [(text, "v")]
    seg_cut = self.tone_modifier.pre_merge_for_modify(seg_cut)
    tokens = []
    seg_cut = self.tone_modifier.pre_merge_for_modify(seg_cut)
    initials = []
    finals = []
    # pypinyin, g2pM
    for word, pos in seg_cut:
        if pos == "x" and "\u4E00" <= min(word) and max(word) <= "\u9FFF":
            pos = "X"
        elif pos != "x" and word in self.punc:
            pos = "x"
        tk = MToken(text=word, tag=pos, whitespace="")
        if pos in ("x", "eng"):
            if not word.isspace():
                if pos == "x" and word in self.punc:
                    tk.phonemes = word
                tokens.append(tk)
            elif tokens:
                tokens[-1].whitespace += word
            continue
        elif (
            tokens and tokens[-1].tag not in ("x", "eng") and not tokens[-1].whitespace
        ):
            tokens[-1].whitespace = "/"

        # g2p
        sub_initials, sub_finals = self._get_initials_finals(word)
        # tone sandhi
        sub_finals = self.tone_modifier.modified_tone(word, pos, sub_finals)
        # er hua
        if with_erhua:
            sub_initials, sub_finals = self._merge_erhua(
                sub_initials, sub_finals, word, pos
            )

        initials.append(sub_initials)
        finals.append(sub_finals)
        # assert len(sub_initials) == len(sub_finals) == len(word)

        # sum(iterable[, start])
        # initials = sum(initials, [])
        # finals = sum(finals, [])

        phones = []
        for c, v in zip(sub_initials, sub_finals):
            # NOTE: post process for pypinyin outputs
            # we discriminate i, ii and iii
            if c:
                phones.append(c)
            # replace punctuation by ` `
            # if c and c in self.punc:
            #     phones.append(c)
            if v and (v not in self.punc or v != c):  # and v not in self.rhy_phns:
                phones.append(v)
        phones = "_".join(phones).replace("_eR", "_er").replace("R", "_R")
        phones = re.sub(r"(?=\d)", "_", phones).split("_")
        tk.phonemes = "".join(ZH_MAP.get(p, self.unk) for p in phones)
        tokens.append(tk)

    result = "".join(
        (self.unk if tk.phonemes is None else tk.phonemes) + tk.whitespace
        for tk in tokens
    )

    return result, tokens


def generate_chinese_lexicon():
    word_dict = pinyin_dict.pinyin_dict
    phrases = phrases_dict.phrases_dict

    g2p = zh.ZHG2P(version="1.1")

    lexicon = []
    for key in word_dict:
        if not (0x4E00 <= key <= 0x9FFF):
            continue
        w = chr(key)
        tokens: str = process_text(g2p.frontend, w)[0]
        lexicon.append((w, tokens))

    for key in phrases:
        tokens: str = process_text(g2p.frontend, key)[0]
        lexicon.append((key, tokens))
    return lexicon


def save(filename: str, lexicon: List[Tuple[str, str]]):
    with open(filename, "w", encoding="utf-8") as f:
        for word, phones in lexicon:
            tokens = " ".join(list(phones))
            f.write(f"{word} {tokens}\n")


def main():
    zh = generate_chinese_lexicon()

    save("lexicon-zh.txt", zh)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.1-zh/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""

import sherpa_onnx
import soundfile as sf

from generate_voices_bin import speaker2id

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig(
            model="./kokoro.onnx",
            voices="./voices.bin",
            tokens="./tokens.txt",
            data_dir="./espeak-ng-data",
            dict_dir="./dict",
            lexicon="./lexicon-zh.txt,./lexicon-us-en.txt",
        ),
        num_threads=2,
        debug=True,
    ),
    rule_fsts="./phone-zh.fst,./date-zh.fst,./number-zh.fst",
    max_num_sentences=1,
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "This model supports both Chinese and English. 小米的核心价值观是什么？答案是真诚热爱！有困难，请拨打110 或者18601200909。I am learning 机器学习. 我在研究 machine learning。What do you think 中英文说的如何呢? 今天是 2025年6月18号."

print("text", text)

for s, i in speaker2id.items():
    print(s, i, len(speaker2id))
    audio = tts.generate(text, sid=i, speed=1.0)

    sf.write(
        f"./hf/kokoro/v1.1-zh/mp3/{i}-{s}.mp3",
        audio.samples,
        samplerate=audio.sample_rate,
    )


================================================
FILE: scripts/kokoro/v1.1-zh/generate_voices_bin.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import torch
from pathlib import Path


speakers = [
    "af_maple",
    "af_sol",
    "bf_vale",
]
for i in range(1, 99 + 1):
    name = "zf_{:03d}".format(i)
    if Path(f"voices/{name}.pt").is_file():
        speakers.append(name)

for i in range(9, 100 + 1):
    name = "zm_{:03d}".format(i)
    if Path(f"voices/{name}.pt").is_file():
        speakers.append(name)


id2speaker = {index: value for index, value in enumerate(speakers)}

speaker2id = {speaker: idx for idx, speaker in id2speaker.items()}


def main():
    if Path("./voices.bin").is_file():
        print("./voices.bin exists - skip")
        return

    with open("voices.bin", "wb") as f:
        for _, speaker in id2speaker.items():
            m = torch.load(
                f"voices/{speaker}.pt",
                weights_only=True,
                map_location="cpu",
            ).numpy()
            # m.shape (510, 1, 256)

            f.write(m.tobytes())


if __name__ == "__main__":
    main()


================================================
FILE: scripts/kokoro/v1.1-zh/test.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import re
import time
from typing import Dict, List

import jieba
import numpy as np
import onnxruntime as ort
import soundfile as sf

try:
    from piper_phonemize import phonemize_espeak
except Exception as ex:
    raise RuntimeError(
        f"{ex}\nPlease run\n"
        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
    )


def show(filename):
    session_opts = ort.SessionOptions()
    session_opts.log_severity_level = 3
    sess = ort.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
"""


def load_voices(speaker_names: List[str], dim: List[int], voices_bin: str):
    embedding = (
        np.fromfile(voices_bin, dtype="uint8")
        .view(np.float32)
        .reshape(len(speaker_names), *dim)
    )
    print("embedding.shape", embedding.shape)
    ans = dict()
    for i in range(len(speaker_names)):
        ans[speaker_names[i]] = embedding[i]

    return ans


def load_tokens(filename: str) -> Dict[str, int]:
    ans = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 2:
                token, idx = fields
                ans[token] = int(idx)
            else:
                assert len(fields) == 1, (len(fields), line)
                ans[" "] = int(fields[0])
    return ans


def load_lexicon(filename: str) -> Dict[str, List[str]]:
    ans = dict()
    for lexicon in filename.split(","):
        print(lexicon)
        with open(lexicon, encoding="utf-8") as f:
            for line in f:
                w, tokens = line.strip().split(" ", maxsplit=1)
                ans[w] = "".join(tokens.split())
    return ans


class OnnxModel:
    def __init__(self, model_filename: str, tokens: str, lexicon: str, voices_bin: str):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 3
        session_opts.intra_op_num_threads = 3

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            model_filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        self.token2id = load_tokens(tokens)
        self.word2tokens = load_lexicon(lexicon)

        meta = self.model.get_modelmeta().custom_metadata_map
        print(meta)
        dim = list(map(int, meta["style_dim"].split(",")))
        speaker_names = meta["speaker_names"].split(",")
        self.voices = load_voices(
            speaker_names=speaker_names, dim=dim, voices_bin=voices_bin
        )
        self.sample_rate = int(meta["sample_rate"])
        print(list(self.voices.keys()))

        self.sample_rate = 24000
        self.max_len = self.voices[next(iter(self.voices))].shape[0] - 1

    def __call__(self, text: str, voice: str):
        punctuations = ';:,.!?-…()"“”'
        text = text.lower()

        tokens = ""

        for t in re.findall("[\u4E00-\u9FFF]+|[\u0000-\u007f]+", text):
            if ord(t[0]) < 0x7F:
                for w in t.split():
                    while w:
                        if w[0] in punctuations:
                            tokens += w[0] + " "
                            w = w[1:]
                            continue

                        if w[-1] in punctuations:
                            if w[:-1] in self.word2tokens:
                                tokens += self.word2tokens[w[:-1]]
                                tokens += w[-1]
                        else:
                            if w in self.word2tokens:
                                tokens += self.word2tokens[w]
                            else:
                                print(f"Use espeak-ng for word {w}")
                                tokens += "".join(phonemize_espeak(w, "en-us")[0])

                        tokens += " "
                        break
            else:
                # Chinese
                for w in jieba.cut(t):
                    if w in self.word2tokens:
                        tokens += self.word2tokens[w]
                    else:
                        for i in w:
                            if i in self.word2tokens:
                                tokens += self.word2tokens[i]
                            else:
                                print(f"skip {i}")

        token_ids = [self.token2id[i] for i in tokens]
        token_ids = token_ids[: self.max_len]

        style = self.voices[voice][len(token_ids)]

        token_ids = [0, *token_ids, 0]
        token_ids = np.array([token_ids], dtype=np.int64)

        speed = np.array([1.0], dtype=np.float32)

        audio = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: token_ids,
                self.model.get_inputs()[1].name: style,
                self.model.get_inputs()[2].name: speed,
            },
        )[0]
        return audio


def main():
    m = OnnxModel(
        model_filename="./kokoro.onnx",
        tokens="./tokens.txt",
        lexicon="./lexicon-us-en.txt,./lexicon-zh.txt",
        voices_bin="./voices.bin",
    )
    text = "来听一听, 这个是什么口音? How are you doing? Are you ok? Thank you! 你觉得中英文说得如何呢?"

    text = text.lower()

    voice = "zf_001"
    start = time.time()
    audio = m(text, voice=voice)
    end = time.time()

    elapsed_seconds = end - start
    audio_duration = len(audio) / m.sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    filename = f"kokoro_v1.1_{voice}_zh_en.wav"
    sf.write(
        filename,
        audio,
        samplerate=m.sample_rate,
        subtype="PCM_16",
    )
    print(f" Saved to {filename}")
    print(f" Elapsed seconds: {elapsed_seconds:.3f}")
    print(f" Audio duration in seconds: {audio_duration:.3f}")
    print(f" RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/lazarus/generate-subtitles.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
from typing import List, Optional

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    model_name: str
    lang: str
    short_name: str = ""
    cmd: str = ""


def get_models():
    models = [
        Model(
            model_name="sherpa-onnx-whisper-tiny.en",
            lang="en",
            short_name="whisper_tiny.en",
            cmd="""
            pushd $model_name
            rm -fv tiny.en-encoder.onnx
            rm -fv tiny.en-decoder.onnx

            mv -v tiny.en-encoder.int8.onnx whisper-encoder.onnx
            mv -v tiny.en-decoder.int8.onnx whisper-decoder.onnx
            mv -v tiny.en-tokens.txt tokens.txt

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-en-int8",
            lang="en",
            short_name="moonshine_tiny",
            cmd="""
            pushd $model_name
            mv -v preprocess.onnx moonshine-preprocessor.onnx
            mv -v encode.int8.onnx moonshine-encoder.onnx
            mv -v uncached_decode.int8.onnx moonshine-uncached-decoder.onnx
            mv -v cached_decode.int8.onnx moonshine-cached-decoder.onnx

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
            lang="zh_en_ko_ja_yue",
            short_name="sense_voice",
            cmd="""
            pushd $model_name
            rm -fv model.onnx
            mv -v model.int8.onnx sense-voice.onnx
            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-2023-09-14",
            lang="zh_en",
            short_name="paraformer_2023_09_14",
            cmd="""
            pushd $model_name
            rm -fv model.onnx
            mv -v model.int8.onnx paraformer.onnx
            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-small-2024-03-09",
            lang="zh_en",
            short_name="paraformer_small_2024_03_09",
            cmd="""
            pushd $model_name
            rm -fv model.onnx
            mv -v model.int8.onnx paraformer.onnx
            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-gigaspeech-2023-12-12",
            lang="en",
            short_name="zipformer_gigaspeech_2023_12_12",
            cmd="""
            pushd $model_name
            mv encoder-epoch-30-avg-1.int8.onnx transducer-encoder.onnx
            mv decoder-epoch-30-avg-1.onnx transducer-decoder.onnx
            mv joiner-epoch-30-avg-1.int8.onnx transducer-joiner.onnx

            rm -fv encoder-epoch-30-avg-1.onnx
            rm -fv decoder-epoch-30-avg-1.int8.onnx
            rm -fv joiner-epoch-30-avg-1.onnx

            popd
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-wenetspeech-20230615",
            lang="zh",
            short_name="zipformer_wenetspeech",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -fv README.md
            mv -v data/lang_char/tokens.txt ./
            rm -rfv data/lang_char

            mv -v exp/encoder-epoch-12-avg-4.int8.onnx ./
            mv -v exp/decoder-epoch-12-avg-4.onnx ./
            mv -v exp/joiner-epoch-12-avg-4.int8.onnx ./
            rm -rfv exp

            mv -v encoder-epoch-12-avg-4.int8.onnx transducer-encoder.onnx
            mv -v decoder-epoch-12-avg-4.onnx transducer-decoder.onnx
            mv -v joiner-epoch-12-avg-4.int8.onnx transducer-joiner.onnx

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01",
            lang="ja",
            short_name="zipformer_reazonspeech_2024_08_01",
            cmd="""
            pushd $model_name
            mv encoder-epoch-99-avg-1.int8.onnx transducer-encoder.onnx
            mv decoder-epoch-99-avg-1.onnx transducer-decoder.onnx
            mv joiner-epoch-99-avg-1.int8.onnx transducer-joiner.onnx

            rm -fv encoder-epoch-99-avg-1.onnx
            rm -fv decoder-epoch-99-avg-1.int8.onnx
            rm -fv joiner-epoch-99-avg-1.onnx

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-thai-2024-06-20",
            lang="th",
            short_name="zipformer_gigaspeech2",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs
            rm -fv README.md
            rm -fv bpe.model

            mv encoder-epoch-12-avg-5.int8.onnx transducer-encoder.onnx
            mv decoder-epoch-12-avg-5.onnx transducer-decoder.onnx
            mv joiner-epoch-12-avg-5.int8.onnx transducer-joiner.onnx

            rm -fv encoder-epoch-12-avg-5.onnx
            rm -fv decoder-epoch-12-avg-5.int8.onnx
            rm -fv joiner-epoch-12-avg-5.onnx

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04",
            lang="zh",
            short_name="telespeech_ctc",
            cmd="""
            pushd $model_name

            mv model.int8.onnx telespeech.onnx
            rm -fv model.onnx

            ls -lh

            popd
            """,
        ),
        Model(
            model_name="sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8",
            lang="en",
            short_name="parakeet_tdt_0.6b_v2",
            cmd="""
            pushd $model_name

            rm -rfv test_wavs

            mv -v encoder.int8.onnx nemo-transducer-encoder.onnx
            mv -v decoder.int8.onnx nemo-transducer-decoder.onnx
            mv -v joiner.int8.onnx nemo-transducer-joiner.onnx

            ls -lh

            popd
            """,
        ),
    ]
    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./build-generate-subtitles.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/matcha-tts/README.md
================================================
# Introduction

This folder contains scripts for adding meta data to tts models
from https://github.com/shivammehta25/Matcha-TTS

Note: If you use icefall to train a MatchaTTS model, you don't need this folder.


================================================
FILE: scripts/matcha-tts/en/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""


import sherpa_onnx
import soundfile as sf

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
            acoustic_model="matcha-icefall-en_US-ljspeech/model-steps-3.onnx",
            vocoder="vocos-22khz-univ.onnx",
            tokens="matcha-icefall-en_US-ljspeech/tokens.txt",
            lexicon="",
            data_dir="matcha-icefall-en_US-ljspeech/espeak-ng-data",
        ),
        num_threads=2,
    ),
    max_num_sentences=1,
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

audio = tts.generate(text, sid=0, speed=1.0)

sf.write(
    "./hf/matcha/icefall-en-ljspeech/mp3/0.mp3",
    audio.samples,
    samplerate=audio.sample_rate,
)


================================================
FILE: scripts/matcha-tts/fa-en/.gitignore
================================================
.add-meta-data.done


================================================
FILE: scripts/matcha-tts/fa-en/README.md
================================================
# Introduction

This folder is for
https://github.com/k2-fsa/sherpa-onnx/issues/1779


================================================
FILE: scripts/matcha-tts/fa-en/add_meta_data.py
================================================
#!/usr/bin/env python3

from typing import Any, Dict

import onnx


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def main():
    meta_data = {
        "model_type": "matcha-tts",
        "language": "Persian+English",
        "voice": "fa",
        "has_espeak": 1,
        "jieba": 0,
        "n_speakers": 1,
        "sample_rate": 22050,
        "version": 1,
        "pad_id": 0,
        "use_icefall": 0,
        "model_author": "Ali Mahmoudi (@mah92)",
        "maintainer": "k2-fsa",
        "use_eos_bos": 0,
        "num_ode_steps": 5,
        "see_also": "https://github.com/k2-fsa/sherpa-onnx/issues/1779",
    }
    add_meta_data("./female/model.onnx", meta_data)
    add_meta_data("./male/model.onnx", meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/matcha-tts/fa-en/test.py
================================================
#!/usr/bin/env python3

"""
AM
NodeArg(name='x', type='tensor(int64)', shape=['batch_size', 'time'])
NodeArg(name='x_lengths', type='tensor(int64)', shape=['batch_size'])
NodeArg(name='scales', type='tensor(float)', shape=[2])
-----
NodeArg(name='mel', type='tensor(float)', shape=['batch_size', 80, 'time'])
NodeArg(name='mel_lengths', type='tensor(int64)', shape=['batch_size'])

Vocoder
NodeArg(name='mel', type='tensor(float)', shape=['N', 80, 'L'])
-----
NodeArg(name='audio', type='tensor(float)', shape=['N', 'L'])
"""

import argparse

import numpy as np
import onnxruntime as ort
import soundfile as sf

try:
    from piper_phonemize import phonemize_espeak
except Exception as ex:
    raise RuntimeError(
        f"{ex}\nPlease run\n"
        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
    )


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--am", type=str, required=True, help="Path to the acoustic model"
    )

    parser.add_argument(
        "--vocoder", type=str, required=True, help="Path to the vocoder"
    )
    parser.add_argument(
        "--tokens", type=str, required=True, help="Path to the tokens.txt"
    )

    parser.add_argument(
        "--text", type=str, required=True, help="Path to the text for generation"
    )

    parser.add_argument(
        "--out-wav", type=str, required=True, help="Path to save the generated wav"
    )
    return parser.parse_args()


def load_tokens(filename: str):
    ans = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 1:
                ans[" "] = int(fields[0])
            else:
                assert len(fields) == 2, (line, fields)
                ans[fields[0]] = int(fields[1])
    return ans


class OnnxHifiGANModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)

    def __call__(self, x: np.ndarray):
        assert x.ndim == 3, x.shape
        assert x.shape[0] == 1, x.shape

        audio = self.model.run(
            [self.model.get_outputs()[0].name],
            {
                self.model.get_inputs()[0].name: x,
            },
        )[0]
        # audio: (batch_size, num_samples)

        return audio


class OnnxModel:
    def __init__(
        self,
        filename: str,
        tokens: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 2

        self.session_opts = session_opts
        self.token2id = load_tokens(tokens)
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print(f"{self.model.get_modelmeta().custom_metadata_map}")
        metadata = self.model.get_modelmeta().custom_metadata_map
        self.sample_rate = int(metadata["sample_rate"])

        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)

    def __call__(self, x: np.ndarray):
        assert x.ndim == 2, x.shape
        assert x.shape[0] == 1, x.shape

        x_lengths = np.array([x.shape[1]], dtype=np.int64)

        noise_scale = 1.0
        length_scale = 1.0
        scales = np.array([noise_scale, length_scale], dtype=np.float32)

        mel = self.model.run(
            [self.model.get_outputs()[0].name],
            {
                self.model.get_inputs()[0].name: x,
                self.model.get_inputs()[1].name: x_lengths,
                self.model.get_inputs()[2].name: scales,
            },
        )[0]
        # mel: (batch_size, feat_dim, num_frames)

        return mel


def main():
    args = get_args()
    print(vars(args))
    am = OnnxModel(args.am, args.tokens)
    vocoder = OnnxHifiGANModel(args.vocoder)

    phones = phonemize_espeak(args.text, voice="fa")
    phones = sum(phones, [])
    phone_ids = [am.token2id[i] for i in phones]

    padded_phone_ids = [0] * (len(phone_ids) * 2 + 1)
    padded_phone_ids[1::2] = phone_ids

    tokens = np.array([padded_phone_ids], dtype=np.int64)
    mel = am(tokens)
    audio = vocoder(mel)

    sf.write(args.out_wav, audio[0], am.sample_rate, "PCM_16")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/matcha-tts/zh/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""


import sherpa_onnx
import soundfile as sf

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
            acoustic_model="matcha-icefall-zh-baker/model-steps-3.onnx",
            vocoder="vocos-22khz-univ.onnx",
            lexicon="matcha-icefall-zh-baker/lexicon.txt",
            tokens="matcha-icefall-zh-baker/tokens.txt",
            dict_dir="matcha-icefall-zh-baker/dict",
        ),
        num_threads=2,
    ),
    max_num_sentences=1,
    rule_fsts="./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst",
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔."


audio = tts.generate(text, sid=0, speed=1.0)

sf.write(
    "./hf/matcha/icefall-zh/mp3/0.mp3",
    audio.samples,
    samplerate=audio.sample_rate,
)


================================================
FILE: scripts/matcha-tts/zh-en/.gitignore
================================================
vocab_tts.txt


================================================
FILE: scripts/matcha-tts/zh-en/README.md
================================================
# Introduction

Model files are from
https://modelscope.cn/models/dengcunqin/matcha_tts_zh_en_20251010/summary

Note that you have to use
vocos-16khz-univ.onnx

You can download it from
 https://modelscope.cn/models/dengcunqin/matcha_tts_zh_en_20251010/resolve/master/vocos-16khz-univ.onnx
or
 https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-16khz-univ.onnx

```
{'am': './model-steps-3.onnx', 'vocoder': './vocos-16khz-univ.onnx', 'tokens': './tokens.txt', 'lexicon': './lexicon.txt', 'text': '中英文合成测试. It supports both English 和中文合成', 'out_wav': 'generated.wav'}

{'use_eos_bos': '1', 'modelscope_url': 'https://modelscope.cn/models/dengcunqin/matcha_tts_zh_en_20251010', 'sample_rate': '16000', 'language': 'chinese English', 'model_type': 'matcha-tts', 'n_speakers': '1', 'model_author': 'dengcunqin', 'version': '1', 'pad_id': '0', 'voice': 'zh en-us', 'demo_url': 'https://www.tulingyun.com/tts.html', 'num_ode_steps': '3'}

NodeArg(name='x', type='tensor(int64)', shape=['N', 'L'])
NodeArg(name='x_length', type='tensor(int64)', shape=['N'])
NodeArg(name='noise_scale', type='tensor(float)', shape=[1])
NodeArg(name='length_scale', type='tensor(float)', shape=[1])
-----
NodeArg(name='mel', type='tensor(float)', shape=['N', 80, 'L'])

vocos {'modelscope_url': 'https://modelscope.cn/models/dengcunqin/matcha_tts_zh_en_20251010', 'use_eos_bos': '1', 'n_speakers': '1', 'sample_rate': '16000', 'pad_id': '0', 'language': 'chinese English', 'model_type': 'matcha-tts vocos', 'voice': 'zh en-us', 'version': '1', 'demo_url': 'https://www.tulingyun.com/tts.html', 'model_author': 'dengcunqin'}

----------vocos----------
NodeArg(name='mels', type='tensor(float)', shape=['batch_size', 80, 'time'])
-----
NodeArg(name='mag', type='tensor(float)', shape=['batch_size', 'Clipmag_dim_1', 'time'])
NodeArg(name='x', type='tensor(float)', shape=['batch_size', 'Cosx_dim_1', 'time'])
NodeArg(name='y', type='tensor(float)', shape=['batch_size', 'Cosx_dim_1', 'time'])
```


================================================
FILE: scripts/matcha-tts/zh-en/generate_lexicon.py
================================================
#!/usr/bin/env python3

from pypinyin import Style, pinyin, load_phrases_dict, phrases_dict, pinyin_dict

load_phrases_dict(
    {
        "行长": [["hang2"], ["zhang3"]],
        "银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
    }
)
user_defined = {
    "微调": ["wei1", "tiao2"],
    "这个": ["zhe4", "ge4"],
    "方便地": ["fang1", "bian2", "de1"],
}


def main():
    filename = "lexicon.txt"

    word_dict = pinyin_dict.pinyin_dict
    phrases = phrases_dict.phrases_dict

    i = 0
    with open(filename, "w", encoding="utf-8") as f:
        for key in word_dict:
            if not (0x4E00 <= key <= 0x9FFF):
                continue

            w = chr(key)
            tokens = pinyin(w, style=Style.TONE3, neutral_tone_with_five=True)[0][0]

            if tokens == "shei2":
                tokens = "shui2"

            if tokens[-1] not in ("1", "2", "3", "4", "5"):
                tokens += "1"

            f.write(f"{w} {tokens}\n")

        for key, value in user_defined.items():
            f.write(f"{key} {' '.join(value)}\n")

        for key in phrases:
            if key in user_defined:
                continue
            tokens = pinyin(key, style=Style.TONE3, neutral_tone_with_five=True)

            for i in range(len(tokens)):
                if tokens[i][0] == "shei2":
                    tokens[i][0] = "shui2"

                if tokens[i][0][-1] not in ("1", "2", "3", "4", "5"):
                    tokens[i][0] += "1"

            flatten = [t[0] for t in tokens]

            tokens = " ".join(flatten)

            f.write(f"{key} {tokens}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/matcha-tts/zh-en/generate_samples.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""


import sherpa_onnx
import soundfile as sf

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
            acoustic_model="matcha-icefall-zh-en/model-steps-3.onnx",
            vocoder="vocos-16khz-univ.onnx",
            lexicon="matcha-icefall-zh-en/lexicon.txt",
            tokens="matcha-icefall-zh-en/tokens.txt",
            data_dir="matcha-icefall-zh-en/espeak-ng-data",
        ),
        num_threads=2,
    ),
    max_num_sentences=1,
    rule_fsts="./matcha-icefall-zh-en/phone-zh.fst,./matcha-icefall-zh-en/date-zh.fst,./matcha-icefall-zh-en/number-zh.fst",
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
text = "我最近在学习machine learning，希望能够在未来的artificial intelligence领域有所建树。在这次vocation中，我们计划去Paris欣赏埃菲尔铁塔和卢浮宫的美景。某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。开始数字测试。2025年12月4号，拨打110或者189202512043。123456块钱。在这个快速发展的时代，人工智能技术正在改变我们的生活方式。语音合成作为人工智能的重要应用之一，让机器能够用自然流畅的语音与人类进行交流。"


audio = tts.generate(text, sid=0, speed=1.0)

sf.write(
    "./hf/matcha/icefall-zh-en/mp3/0.mp3",
    audio.samples,
    samplerate=audio.sample_rate,
)


================================================
FILE: scripts/matcha-tts/zh-en/generate_tokens.py
================================================
#!/usr/bin/env python3

token2id = dict()
with open("./vocab_tts.txt", encoding="utf-8") as f:
    for i, line in enumerate(f):
        fields = line.strip().split()
        if len(fields) == 0:
            token2id[" "] = i + 1
        else:
            token2id[fields[0]] = i + 1

with open("./tokens.txt", "w", encoding="utf-8") as f:
    for t, i in token2id.items():
        f.write(f"{t} {i}\n")


================================================
FILE: scripts/matcha-tts/zh-en/test.py
================================================
#!/usr/bin/env python3

"""
AM

NodeArg(name='x', type='tensor(int64)', shape=['N', 'L'])
NodeArg(name='x_length', type='tensor(int64)', shape=['N'])
NodeArg(name='noise_scale', type='tensor(float)', shape=[1])
NodeArg(name='length_scale', type='tensor(float)', shape=[1])
-----
NodeArg(name='mel', type='tensor(float)', shape=['N', 80, 'L'])

Vocoder

NodeArg(name='mels', type='tensor(float)', shape=['batch_size', 80, 'time'])
-----
NodeArg(name='mag', type='tensor(float)', shape=['batch_size', 'Clipmag_dim_1', 'time'])
NodeArg(name='x', type='tensor(float)', shape=['batch_size', 'Cosx_dim_1', 'time'])
NodeArg(name='y', type='tensor(float)', shape=['batch_size', 'Cosx_dim_1', 'time'])
"""

import argparse

import re

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf

try:
    from piper_phonemize import phonemize_espeak
except Exception as ex:
    raise RuntimeError(
        f"{ex}\nPlease run\n"
        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
    )


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--am",
        type=str,
        default="./model-steps-3.onnx",
        help="Path to the acoustic model",
    )

    parser.add_argument(
        "--vocoder",
        type=str,
        default="./vocos-16khz-univ.onnx",
        help="Path to the vocoder",
    )
    parser.add_argument(
        "--tokens", type=str, default="./tokens.txt", help="Path to the tokens.txt"
    )

    parser.add_argument(
        "--lexicon", type=str, default="./lexicon.txt", help="Path to the lexicon.txt"
    )

    parser.add_argument(
        "--text",
        type=str,
        #  default="这是一个中英文测试. It can also speak English. 你觉得中英文说的如何呀?",
        default="中英文合成测试. It supports both English 和中文合成",
        help="The text for generation",
    )

    parser.add_argument(
        "--out-wav",
        type=str,
        default="generated.wav",
        help="Path to save the generated wav",
    )
    return parser.parse_args()


def load_tokens(filename: str):
    ans = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 1:
                ans[" "] = int(fields[0])
            else:
                assert len(fields) == 2, (line, fields)
                ans[fields[0]] = int(fields[1])
    return ans


def load_lexicon(filename: str, token2id):
    ans = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            tokens = fields[1:]
            ids = [token2id[t] for t in tokens]
            ans[fields[0]] = ids
    return ans


class OnnxVocosModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        print(f"vocos {self.model.get_modelmeta().custom_metadata_map}")

        print("----------vocos----------")
        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)
        print()

    def __call__(self, x: np.ndarray):
        """
        Args:
          x: (N, feat_dim, num_frames)
        Returns:
          mag: (N, n_fft/2+1, num_frames)
          x: (N, n_fft/2+1, num_frames)
          y: (N, n_fft/2+1, num_frames)

        The complex spectrum is mag * (x + j*y)
        """
        assert x.ndim == 3, x.shape
        assert x.shape[0] == 1, x.shape

        mag, x, y = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
                self.model.get_outputs()[2].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )

        return mag, x, y


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 2

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print(f"{self.model.get_modelmeta().custom_metadata_map}")
        metadata = self.model.get_modelmeta().custom_metadata_map
        self.sample_rate = int(metadata["sample_rate"])

        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)

    def __call__(self, x: np.ndarray):
        assert x.ndim == 2, x.shape
        assert x.shape[0] == 1, x.shape

        x_lengths = np.array([x.shape[1]], dtype=np.int64)

        noise_scale = 1.0
        length_scale = 1.0

        mel = self.model.run(
            [self.model.get_outputs()[0].name],
            {
                self.model.get_inputs()[0].name: x,
                self.model.get_inputs()[1].name: x_lengths,
                self.model.get_inputs()[2].name: np.array(
                    [noise_scale], dtype=np.float32
                ),
                self.model.get_inputs()[3].name: np.array(
                    [length_scale], dtype=np.float32
                ),
            },
        )[0]
        # mel: (batch_size, feat_dim, num_frames)

        return mel


def main():
    args = get_args()
    print(vars(args))
    am = OnnxModel(args.am)
    vocoder = OnnxVocosModel(args.vocoder)

    token2id = load_tokens(args.tokens)
    id2token = {i: t for t, i in token2id.items()}
    lexicon = load_lexicon(args.lexicon, token2id)

    text = args.text

    pattern = re.compile(r"[\u4e00-\u9fff]+|[a-zA-Z0-9 ,.!\?]+")

    ids = []
    for match in pattern.finditer(text):
        segment = match.group()
        if segment in token2id:
            print(segment)
            ids.append(token2id[segment])
        elif re.match(r"[\u4e00-\u9fff]+", segment):
            # process chinese
            print(segment)
            for w in segment:
                if w in lexicon:
                    ids += lexicon[w]
                else:
                    print(f"Ignore {w}")
        else:
            print(segment)
            segment = segment.strip()
            tokens_list = phonemize_espeak(segment, "en-us")
            tokens = sum(tokens_list, [])
            for t in tokens:
                ids.append(token2id[t])

    tokens = np.array([ids], dtype=np.int64)
    mel = am(tokens)
    print(tokens)
    print(mel.shape)

    mag, x, y = vocoder(mel)
    stft_result = knf.StftResult(
        real=(mag * x)[0].transpose().reshape(-1).tolist(),
        imag=(mag * y)[0].transpose().reshape(-1).tolist(),
        num_frames=mag.shape[2],
    )
    config = knf.StftConfig(
        n_fft=1024,
        hop_length=256,
        win_length=1024,
        window_type="hann",
        center=True,
        pad_mode="reflect",
        normalized=False,
    )
    istft = knf.IStft(config)
    audio_vocos = istft(stft_result)

    audio_vocos = np.array(audio_vocos)

    sf.write(args.out_wav, audio_vocos, am.sample_rate, "PCM_16")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/medasr/README.md
================================================
---
license: other
license_name: health-ai-developer-foundations
license_link: https://developers.google.com/health-ai-developer-foundations/terms
language:
- en
pipeline_tag: automatic-speech-recognition
library_name: transformers
tags:
- medical-asr
- radiology
- medical
---

# Introduction

This directory includes models sourced from:

https://github.com/Google-Health/medasr

All model files are governed by the Health AI Developer Foundations Terms of Use.
For full licensing details, please refer to:

https://developers.google.com/health-ai-developer-foundations/terms


================================================
FILE: scripts/medasr/export_onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
Make sure you have set the environment variable

    export HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

where hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx is your Huggingface access token.
"""

from typing import Any, Dict

import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic
from transformers import AutoModelForCTC, AutoProcessor


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    model.metadata_props.clear()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


class Wrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, x: torch.Tensor, mask: torch.Tensor):
        """
        Args:
          x: (N, T, C), dtype float32
          mask: (N, T), dtype int64. Valid positions are 1. Padding positions are 0.
        Returns:
          logits: (N, T/4, vocob_size), dtype float32
          logits_len: (N,), dtype int64
        """
        o = self.m(x, mask.bool())
        logits_len = self.m._get_subsampling_output_length(mask.sum(-1)).to(torch.int64)
        return o.logits, logits_len


def generate_tokens(tokenizer):
    vocab = tokenizer.get_vocab()
    id2token = {i: t for t, i in vocab.items()}

    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i in range(tokenizer.vocab_size):
            if i == tokenizer.pad_token_id:
                f.write(f"<blk> {i}\n")
            else:
                f.write(f"{id2token[i]} {i}\n")
    print("saved to tokens.txt")


@torch.no_grad()
def main():
    model_id = "google/medasr"
    processor = AutoProcessor.from_pretrained(model_id)

    generate_tokens(processor.tokenizer)

    model = AutoModelForCTC.from_pretrained(model_id)

    w = Wrapper(model)
    w.eval()

    filename = "model.onnx"
    x = torch.rand(1, 100, 128)
    mask = torch.ones(1, x.shape[1], dtype=torch.int64)
    torch.onnx.export(
        w,
        (x, mask),
        filename,
        input_names=["x", "mask"],
        output_names=["logits", "logits_len"],
        dynamic_axes={
            "x": {0: "N", 1: "T"},
            "mask": {0: "N", 1: "T"},
            "logits": {0: "N", 1: "T_4"},
            "logits_len": {0: "N"},
        },
        opset_version=14,
        external_data=False,
        dynamo=False,
    )

    meta_data = {
        "model_type": "medasr_ctc",
        "version": "20251225",
        "model_author": "google",
        "maintainer": "k2-fsa",
        "vocab_size": processor.tokenizer.vocab_size,
        "subsampling_factor": 4,
        "url": "https://github.com/Google-Health/medasr",
        "license": "https://developers.google.com/health-ai-developer-foundations/terms",
    }
    add_meta_data(filename=filename, meta_data=meta_data)

    filename_int8 = "model.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        op_types_to_quantize=["MatMul"],
        # Note that we have to use QUInt8 here.
        #
        # When QInt8 is used, C++ onnxruntime produces incorrect results
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/medasr/test_onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
import time

import kaldi_native_fbank as knf
import librosa
import numpy as np
import onnxruntime as ort


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to onnx model file",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--wav",
        type=str,
        required=True,
        help="Path to test wav",
    )
    return parser.parse_args()


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

    def __call__(self, x, mask):
        """
        Args:
          x: (N, T, C), float32
          mask: (N, T), int64
        Returns:
          logits: (N, T/4, vocab_size), float32
          logits_len: (N,) int64
        """
        logits, logits_len = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
                self.model.get_inputs()[1].name: mask,
            },
        )

        return logits, logits_len


def load_tokens(tokens):
    id2token = dict()
    with open(tokens, encoding="utf-8") as f:
        for line in f:
            fields = line.split()
            if len(fields) == 1:
                id2token[int(fields[0])] = " "
            else:
                t, idx = fields
                id2token[int(idx)] = t
    return id2token


def compute_feat(samples):
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = True
    opts.frame_opts.window_type = "hanning"
    opts.frame_opts.samp_freq = 16000
    opts.frame_opts.preemph_coeff = 0
    opts.frame_opts.remove_dc_offset = False
    opts.mel_opts.num_bins = 128

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(16000, samples.tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )
    assert features.dtype == np.float32, features.dtype

    features = np.ascontiguousarray(features)

    return features


def main():
    args = get_args()
    print(vars(args))

    model = OnnxModel(args.model)

    samples, sample_rate = librosa.load(args.wav, sr=16000)

    start = time.time()

    assert sample_rate == 16000, sample_rate
    features = compute_feat(samples)
    mask = np.ones(features.shape[0], dtype=np.int64)[None]
    features = features[None]

    logits, logits_len = model(features, mask)
    idx = logits[0, : logits_len[0]].argmax(axis=-1)

    end = time.time()
    elapsed_seconds = end - start
    audio_duration = samples.shape[0] / 16000
    real_time_factor = elapsed_seconds / audio_duration

    print("idx", idx)

    unique_ids = []
    prev = -1
    for i in idx.tolist():
        if i == prev:
            continue
        unique_ids.append(i)
        prev = i
    print("unique_ids", unique_ids)
    blank_id = 0
    ids = [i for i in unique_ids if i != blank_id]
    print(ids)

    id2token = load_tokens(args.tokens)

    tokens = [id2token[i] for i in ids]
    text = "".join(tokens)
    print(text)
    text = text.replace("▁", " ")
    print(text)
    print(f"RTF: {real_time_factor}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/melo-tts/README.md
================================================
# Introduction

Models in this directory are converted from
https://github.com/myshell-ai/MeloTTS

Note there is only a single female speaker in the model for Chinese+English TTS.
TTS model, whereas there are 5 female speakers in the model For English TTS.


================================================
FILE: scripts/melo-tts/export-onnx-en.py
================================================
#!/usr/bin/env python3
# This model exports the English-only TTS model.
# It has 5 speakers.
# {'EN-US': 0, 'EN-BR': 1, 'EN_INDIA': 2, 'EN-AU': 3, 'EN-Default': 4}

from typing import Any, Dict

import onnx
import torch
from melo.api import TTS
from melo.text import language_id_map, language_tone_start_map
from melo.text.chinese import pinyin_to_symbol_map
from melo.text.english import eng_dict, refine_syllables
from pypinyin import Style, lazy_pinyin, phrases_dict, pinyin_dict


def generate_tokens(symbol_list):
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(symbol_list):
            f.write(f"{s} {i}\n")


def add_new_english_words(lexicon):
    """
    Args:
      lexicon:
        Please modify it in-place.
    """

    # Please have a look at
    # https://github.com/myshell-ai/MeloTTS/blob/main/melo/text/cmudict.rep

    # We give several examples below about how to add new words

    # Example 1. Add a new word kaldi

    # It does not contain the word kaldi in cmudict.rep
    # so if we add the following line to cmudict.rep
    #
    #  KALDI K AH0 - L D IH0
    #
    # then we need to change the lexicon like below
    lexicon["kaldi"] = [["K", "AH0"], ["L", "D", "IH0"]]
    #
    # K AH0 and L D IH0 are separated by a dash "-", so
    # ["K", "AH0"] is a in list and ["L", "D", "IH0"] is in a separate list

    # Note: Either kaldi or KALDI is fine. You can use either lowercase or
    # uppercase or both

    # Example 2. Add a new word SF
    #
    # If we add the following line to cmudict.rep
    #
    #  SF EH1 S - EH1 F
    #
    # to cmudict.rep, then we need to change the lexicon like below:
    lexicon["SF"] = [["EH1", "S"], ["EH1", "F"]]

    # Please add your new words here

    # No need to return lexicon since it is changed in-place


def generate_lexicon():
    add_new_english_words(eng_dict)
    with open("lexicon.txt", "w", encoding="utf-8") as f:
        for word in eng_dict:
            phones, tones = refine_syllables(eng_dict[word])
            tones = [t + language_tone_start_map["EN"] for t in tones]
            tones = [str(t) for t in tones]

            phones = " ".join(phones)
            tones = " ".join(tones)

            f.write(f"{word.lower()} {phones} {tones}\n")


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


class ModelWrapper(torch.nn.Module):
    def __init__(self, model: "SynthesizerTrn"):
        super().__init__()
        self.model = model
        self.lang_id = language_id_map[model.language]

    def forward(
        self,
        x,
        x_lengths,
        tones,
        sid,
        noise_scale,
        length_scale,
        noise_scale_w,
        max_len=None,
    ):
        """
        Args:
          x: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
          tones: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
          lang_id: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
          sid: an integer
        """
        bert = torch.zeros(x.shape[0], 1024, x.shape[1], dtype=torch.float32)
        ja_bert = torch.zeros(x.shape[0], 768, x.shape[1], dtype=torch.float32)
        lang_id = torch.zeros_like(x)
        lang_id[:, 1::2] = self.lang_id
        return self.model.model.infer(
            x=x,
            x_lengths=x_lengths,
            sid=sid,
            tone=tones,
            language=lang_id,
            bert=bert,
            ja_bert=ja_bert,
            noise_scale=noise_scale,
            noise_scale_w=noise_scale_w,
            length_scale=length_scale,
        )[0]


def main():
    generate_lexicon()

    language = "EN"
    model = TTS(language=language, device="cpu")

    generate_tokens(model.hps["symbols"])

    torch_model = ModelWrapper(model)

    opset_version = 13
    x = torch.randint(low=0, high=10, size=(60,), dtype=torch.int64)
    print(x.shape)
    x_lengths = torch.tensor([x.size(0)], dtype=torch.int64)
    sid = torch.tensor([1], dtype=torch.int64)
    tones = torch.zeros_like(x)

    noise_scale = torch.tensor([1.0], dtype=torch.float32)
    length_scale = torch.tensor([1.0], dtype=torch.float32)
    noise_scale_w = torch.tensor([1.0], dtype=torch.float32)

    x = x.unsqueeze(0)
    tones = tones.unsqueeze(0)

    filename = "model.onnx"

    torch.onnx.export(
        torch_model,
        (
            x,
            x_lengths,
            tones,
            sid,
            noise_scale,
            length_scale,
            noise_scale_w,
        ),
        filename,
        opset_version=opset_version,
        input_names=[
            "x",
            "x_lengths",
            "tones",
            "sid",
            "noise_scale",
            "length_scale",
            "noise_scale_w",
        ],
        output_names=["y"],
        dynamic_axes={
            "x": {0: "N", 1: "L"},
            "x_lengths": {0: "N"},
            "tones": {0: "N", 1: "L"},
            "y": {0: "N", 1: "S", 2: "T"},
        },
    )

    meta_data = {
        "model_type": "melo-vits",
        "comment": "melo",
        "version": 2,
        "language": "English",
        "add_blank": int(model.hps.data.add_blank),
        "n_speakers": len(model.hps.data.spk2id),  # 5
        "jieba": 0,
        "sample_rate": model.hps.data.sampling_rate,
        "bert_dim": 1024,
        "ja_bert_dim": 768,
        "speaker_id": 0,
        "lang_id": language_id_map[model.language],
        "tone_start": language_tone_start_map[model.language],
        "url": "https://github.com/myshell-ai/MeloTTS",
        "license": "MIT license",
        "description": "MeloTTS is a high-quality multi-lingual text-to-speech library by MyShell.ai",
    }
    add_meta_data(filename, meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/melo-tts/export-onnx.py
================================================
#!/usr/bin/env python3
# This script exports ZH_EN TTS model, which supports both Chinese and English.
# This model has only 1 speaker.

from typing import Any, Dict

import onnx
import torch
from melo.api import TTS
from melo.text import language_id_map, language_tone_start_map
from melo.text.chinese import pinyin_to_symbol_map
from melo.text.english import eng_dict, refine_syllables
from pypinyin import Style, lazy_pinyin, phrases_dict, pinyin_dict

for k, v in pinyin_to_symbol_map.items():
    if isinstance(v, list):
        break
    pinyin_to_symbol_map[k] = v.split()


def get_initial_final_tone(word: str):
    initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
    finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)

    ans_phone = []
    ans_tone = []

    for c, v in zip(initials, finals):
        raw_pinyin = c + v
        v_without_tone = v[:-1]
        try:
            tone = v[-1]
        except:
            print("skip", word, initials, finals)
            return [], []

        pinyin = c + v_without_tone
        assert tone in "12345"

        if c:
            v_rep_map = {
                "uei": "ui",
                "iou": "iu",
                "uen": "un",
            }
            if v_without_tone in v_rep_map.keys():
                pinyin = c + v_rep_map[v_without_tone]
        else:
            pinyin_rep_map = {
                "ing": "ying",
                "i": "yi",
                "in": "yin",
                "u": "wu",
            }
            if pinyin in pinyin_rep_map.keys():
                pinyin = pinyin_rep_map[pinyin]
            else:
                single_rep_map = {
                    "v": "yu",
                    "e": "e",
                    "i": "y",
                    "u": "w",
                }
                if pinyin[0] in single_rep_map.keys():
                    pinyin = single_rep_map[pinyin[0]] + pinyin[1:]
                    #  print(word, initials, finals, pinyin)

        if pinyin not in pinyin_to_symbol_map:
            print("skip", pinyin, word, c, v, raw_pinyin)
            continue
        phone = pinyin_to_symbol_map[pinyin]
        ans_phone += phone
        ans_tone += [tone] * len(phone)

    return ans_phone, ans_tone


def generate_tokens(symbol_list):
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(symbol_list):
            f.write(f"{s} {i}\n")


def add_new_english_words(lexicon):
    """
    Args:
      lexicon:
        Please modify it in-place.
    """

    # Please have a look at
    # https://github.com/myshell-ai/MeloTTS/blob/main/melo/text/cmudict.rep

    # We give several examples below about how to add new words

    # Example 1. Add a new word kaldi

    # It does not contain the word kaldi in cmudict.rep
    # so if we add the following line to cmudict.rep
    #
    #  KALDI K AH0 - L D IH0
    #
    # then we need to change the lexicon like below
    lexicon["kaldi"] = [["K", "AH0"], ["L", "D", "IH0"]]
    #
    # K AH0 and L D IH0 are separated by a dash "-", so
    # ["K", "AH0"] is a in list and ["L", "D", "IH0"] is in a separate list

    # Note: Either kaldi or KALDI is fine. You can use either lowercase or
    # uppercase or both

    # Example 2. Add a new word SF
    #
    # If we add the following line to cmudict.rep
    #
    #  SF EH1 S - EH1 F
    #
    # to cmudict.rep, then we need to change the lexicon like below:
    lexicon["SF"] = [["EH1", "S"], ["EH1", "F"]]

    # Please add your new words here

    # No need to return lexicon since it is changed in-place


def generate_lexicon():
    word_dict = pinyin_dict.pinyin_dict
    phrases = phrases_dict.phrases_dict
    add_new_english_words(eng_dict)
    with open("lexicon.txt", "w", encoding="utf-8") as f:
        for word in eng_dict:
            phones, tones = refine_syllables(eng_dict[word])
            tones = [t + language_tone_start_map["EN"] for t in tones]
            tones = [str(t) for t in tones]

            phones = " ".join(phones)
            tones = " ".join(tones)

            f.write(f"{word.lower()} {phones} {tones}\n")

        for key in word_dict:
            if not (0x4E00 <= key <= 0x9FA5):
                continue
            w = chr(key)
            phone, tone = get_initial_final_tone(w)
            if not phone:
                continue
            phone = " ".join(phone)
            tone = " ".join(tone)
            f.write(f"{w} {phone} {tone}\n")

        for w in phrases:
            phone, tone = get_initial_final_tone(w)
            if not phone:
                continue
            assert len(phone) == len(tone), (len(phone), len(tone), phone, tone)
            phone = " ".join(phone)
            tone = " ".join(tone)
            f.write(f"{w} {phone} {tone}\n")


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


class ModelWrapper(torch.nn.Module):
    def __init__(self, model: "SynthesizerTrn"):
        super().__init__()
        self.model = model
        self.lang_id = language_id_map[model.language]

    def forward(
        self,
        x,
        x_lengths,
        tones,
        sid,
        noise_scale,
        length_scale,
        noise_scale_w,
        max_len=None,
    ):
        """
        Args:
          x: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
          tones: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
          lang_id: A 1-D array of dtype np.int64. Its shape is (token_numbers,)
          sid: an integer
        """
        bert = torch.zeros(x.shape[0], 1024, x.shape[1], dtype=torch.float32)
        ja_bert = torch.zeros(x.shape[0], 768, x.shape[1], dtype=torch.float32)
        lang_id = torch.zeros_like(x)
        lang_id[:, 1::2] = self.lang_id
        return self.model.model.infer(
            x=x,
            x_lengths=x_lengths,
            sid=sid,
            tone=tones,
            language=lang_id,
            bert=bert,
            ja_bert=ja_bert,
            noise_scale=noise_scale,
            noise_scale_w=noise_scale_w,
            length_scale=length_scale,
        )[0]


def main():
    generate_lexicon()

    language = "ZH"
    model = TTS(language=language, device="cpu")

    generate_tokens(model.hps["symbols"])

    torch_model = ModelWrapper(model)

    opset_version = 18
    x = torch.randint(low=0, high=10, size=(60,), dtype=torch.int64)
    print(x.shape)
    x_lengths = torch.tensor([x.size(0)], dtype=torch.int64)
    sid = torch.tensor([1], dtype=torch.int64)
    tones = torch.zeros_like(x)

    noise_scale = torch.tensor([1.0], dtype=torch.float32)
    length_scale = torch.tensor([1.0], dtype=torch.float32)
    noise_scale_w = torch.tensor([1.0], dtype=torch.float32)

    x = x.unsqueeze(0)
    tones = tones.unsqueeze(0)

    filename = "model.onnx"

    torch.onnx.export(
        torch_model,
        (
            x,
            x_lengths,
            tones,
            sid,
            noise_scale,
            length_scale,
            noise_scale_w,
        ),
        filename,
        opset_version=opset_version,
        input_names=[
            "x",
            "x_lengths",
            "tones",
            "sid",
            "noise_scale",
            "length_scale",
            "noise_scale_w",
        ],
        output_names=["y"],
        dynamic_axes={
            "x": {0: "N", 1: "L"},
            "x_lengths": {0: "N"},
            "tones": {0: "N", 1: "L"},
            "y": {0: "N", 1: "S", 2: "T"},
        },
    )

    meta_data = {
        "model_type": "melo-vits",
        "comment": "melo",
        "version": 2,
        "language": "Chinese + English",
        "add_blank": int(model.hps.data.add_blank),
        "n_speakers": 1,
        "jieba": 1,
        "sample_rate": model.hps.data.sampling_rate,
        "bert_dim": 1024,
        "ja_bert_dim": 768,
        "speaker_id": list(model.hps.data.spk2id.values())[0],
        "lang_id": language_id_map[model.language],
        "tone_start": language_tone_start_map[model.language],
        "url": "https://github.com/myshell-ai/MeloTTS",
        "license": "MIT license",
        "description": "MeloTTS is a high-quality multi-lingual text-to-speech library by MyShell.ai",
    }
    add_meta_data(filename, meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/melo-tts/show-info.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnxruntime


def show(filename):
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)

    meta = sess.get_modelmeta().custom_metadata_map
    print("*****************************************")
    print("meta\n", meta)


def main():
    print("=========model==========")
    show("./model.onnx")


if __name__ == "__main__":
    main()

"""
=========model==========
NodeArg(name='x', type='tensor(int64)', shape=['N', 'L'])
NodeArg(name='x_lengths', type='tensor(int64)', shape=['N'])
NodeArg(name='tones', type='tensor(int64)', shape=['N', 'L'])
NodeArg(name='sid', type='tensor(int64)', shape=[1])
NodeArg(name='noise_scale', type='tensor(float)', shape=[1])
NodeArg(name='length_scale', type='tensor(float)', shape=[1])
NodeArg(name='noise_scale_w', type='tensor(float)', shape=[1])
-----
NodeArg(name='y', type='tensor(float)', shape=['N', 'S', 'T'])
*****************************************
meta
 {'description': 'MeloTTS is a high-quality multi-lingual text-to-speech library by MyShell.ai',
 'model_type': 'melo-vits', 'license': 'MIT license', 'sample_rate': '44100', 'add_blank': '1',
 'n_speakers': '1', 'bert_dim': '1024', 'language': 'Chinese + English',
 'ja_bert_dim': '768', 'speaker_id': '1', 'comment': 'melo', 'lang_id': '3',
 'tone_start': '0', 'url': 'https://github.com/myshell-ai/MeloTTS'}
"""


================================================
FILE: scripts/melo-tts/test.py
================================================
#!/usr/bin/env python3

from typing import Iterable, List, Tuple

import jieba
import onnxruntime as ort
import soundfile as sf
import torch


class Lexicon:
    def __init__(self, lexion_filename: str, tokens_filename: str):
        tokens = dict()
        with open(tokens_filename, encoding="utf-8") as f:
            for line in f:
                s, i = line.split()
                tokens[s] = int(i)
        # Map "v" to "V" token ID (same as post_replace_ph in MeloTTS, only for English models)
        # English models have "V" with token ID 14
        if tokens.get("V") == 14 and "v" in tokens:
            tokens["v"] = tokens["V"]


        lexicon = dict()
        with open(lexion_filename, encoding="utf-8") as f:
            for line in f:
                splits = line.split()
                word_or_phrase = splits[0]
                phone_tone_list = splits[1:]
                assert len(phone_tone_list) & 1 == 0, len(phone_tone_list)
                phones = phone_tone_list[: len(phone_tone_list) // 2]
                phones = [tokens[p] for p in phones]

                tones = phone_tone_list[len(phone_tone_list) // 2 :]
                tones = [int(t) for t in tones]

                lexicon[word_or_phrase] = (phones, tones)
        lexicon["呣"] = lexicon["母"]
        lexicon["嗯"] = lexicon["恩"]
        self.lexicon = lexicon

        punctuation = ["!", "?", "…", ",", ".", "'", "-"]
        for p in punctuation:
            i = tokens[p]
            tone = 0
            self.lexicon[p] = ([i], [tone])
        self.lexicon[" "] = ([tokens["_"]], [0])

    def _convert(self, text: str) -> Tuple[List[int], List[int]]:
        phones = []
        tones = []

        if text == "，":
            text = ","
        elif text == "。":
            text = "."
        elif text == "！":
            text = "!"
        elif text == "？":
            text = "?"

        if text not in self.lexicon:
            print("t", text)
            if len(text) > 1:
                for w in text:
                    print("w", w)
                    p, t = self.convert(w)
                    if p:
                        phones += p
                        tones += t
            return phones, tones

        phones, tones = self.lexicon[text]
        return phones, tones

    def convert(self, text_list: Iterable[str]) -> Tuple[List[int], List[int]]:
        phones = []
        tones = []
        for text in text_list:
            print(text)
            p, t = self._convert(text)
            phones += p
            tones += t
        return phones, tones


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 4

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        meta = self.model.get_modelmeta().custom_metadata_map
        self.bert_dim = int(meta["bert_dim"])
        self.ja_bert_dim = int(meta["ja_bert_dim"])
        self.add_blank = int(meta["add_blank"])
        self.sample_rate = int(meta["sample_rate"])
        self.speaker_id = int(meta["speaker_id"])
        self.lang_id = int(meta["lang_id"])
        self.sample_rate = int(meta["sample_rate"])

    def __call__(self, x, tones):
        """
        Args:
          x: 1-D int64 torch tensor
          tones: 1-D int64 torch tensor
        """
        x = x.unsqueeze(0)
        tones = tones.unsqueeze(0)

        print(x.shape, tones.shape)
        sid = torch.tensor([self.speaker_id], dtype=torch.int64)
        noise_scale = torch.tensor([0.6], dtype=torch.float32)
        length_scale = torch.tensor([1.0], dtype=torch.float32)
        noise_scale_w = torch.tensor([0.8], dtype=torch.float32)

        x_lengths = torch.tensor([x.shape[-1]], dtype=torch.int64)

        y = self.model.run(
            ["y"],
            {
                "x": x.numpy(),
                "x_lengths": x_lengths.numpy(),
                "tones": tones.numpy(),
                "sid": sid.numpy(),
                "noise_scale": noise_scale.numpy(),
                "noise_scale_w": noise_scale_w.numpy(),
                "length_scale": length_scale.numpy(),
            },
        )[0][0][0]
        return y


def main():
    lexicon = Lexicon(lexion_filename="./lexicon.txt", tokens_filename="./tokens.txt")

    text = "这是一个使用 next generation kaldi 的 text to speech 中英文例子. Thank you! 你觉得如何呢? are you ok? Fantastic! How about you?"
    s = jieba.cut(text, HMM=True)

    phones, tones = lexicon.convert(s)

    model = OnnxModel("./model.onnx")

    if model.add_blank:
        new_phones = [0] * (2 * len(phones) + 1)
        new_tones = [0] * (2 * len(tones) + 1)

        new_phones[1::2] = phones
        new_tones[1::2] = tones

        phones = new_phones
        tones = new_tones

    phones = torch.tensor(phones, dtype=torch.int64)
    tones = torch.tensor(tones, dtype=torch.int64)

    print(phones.shape, tones.shape)

    y = model(x=phones, tones=tones)
    sf.write("./test.wav", y, model.sample_rate)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/mobile-asr-models/.gitignore
================================================
run2.sh


================================================
FILE: scripts/mobile-asr-models/README.md
================================================
# Introduction

This folder contains scripts to convert ASR models for mobile platforms
supporting only batch size equal to 1.

The advantage of fixing the batch size to 1 is that it provides more
opportunities for model optimization and quantization.

To give you a concrete example, for the following model
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english

| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
|---|---|---|
|Dynamic batch size| 315 MB| 174 MB|
|Batch size fixed to 1| 242 MB | 100 MB |

The following [colab notebook](https://colab.research.google.com/drive/1RsVZbsxbPjazeGrNNbZNjXCYbEG2F2DU?usp=sharing)
provides examples to use the above two models.

**WARNING**: Tested with `onnxruntime==1.16.3 onnx==1.15.0`.

```bash
pip install onnxruntime==1.16.3 onnx==1.15.0
```

## More examples

### [sherpa-onnx-streaming-zipformer-korean-2024-06-16](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-korean-2024-06-16-korean)


| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
|---|---|---|
|Dynamic batch size| 279 MB| 122 MB|
|Batch size fixed to 1| 264 MB | 107 MB |

### [sherpa-onnx-streaming-zipformer-en-20M-2023-02-17](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english)

| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
|---|---|---|
|Dynamic batch size| 85 MB| 41 MB|
|Batch size fixed to 1| 75 MB | 32 MB |

### [sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12-chinese)

| | encoder-epoch-20-avg-1-chunk-16-left-128.onnx | encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx|
|---|---|---|
|Dynamic batch size| 249 MB| 67 MB|
|Batch size fixed to 1| 247 MB | 65 MB |

### [icefall-asr-zipformer-streaming-wenetspeech-20230615](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-streaming-wenetspeech-20230615-chinese)

| | encoder-epoch-12-avg-4-chunk-16-left-128.onnx | encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx|
|---|---|---|
|Dynamic batch size| 250 MB| 68 MB|
|Batch size fixed to 1| 247 MB | 65 MB |

### [sherpa-onnx-streaming-zipformer-en-2023-06-26](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english)


| | encoder-epoch-99-avg-1-chunk-16-left-128.onnx | encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx|
|---|---|---|
|Dynamic batch size| 250 MB| 68 MB|
|Batch size fixed to 1| 247 MB | 65 MB |

### [sherpa-onnx-streaming-zipformer-en-2023-06-21](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-21-english)

| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
|---|---|---|
|Dynamic batch size| 338 MB| 180 MB|
|Batch size fixed to 1| 264 MB | 107 MB |

### [sherpa-onnx-streaming-zipformer-en-2023-02-21](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-02-21-english)

| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
|---|---|---|
|Dynamic batch size| 279 MB| 122 MB|
|Batch size fixed to 1| 264 MB | 107 MB |

### [sherpa-onnx-streaming-zipformer-fr-2023-04-14](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#shaojieli-sherpa-onnx-streaming-zipformer-fr-2023-04-14-french)

| | encoder-epoch-29-avg-9-with-averaged-model.onnx | encoder-epoch-29-avg-9-with-averaged-model.int8.onnx|
|---|---|---|
|Dynamic batch size| 279 MB| 121 MB|
|Batch size fixed to 1| 264 MB | 107 MB |

### [sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16-bilingual-chinese-english)

| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
|---|---|---|
|Dynamic batch size| 85 MB| 41 MB|
|Batch size fixed to 1| 75 MB | 32 MB |

### [sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23-chinese)

| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
|---|---|---|
|Dynamic batch size| 40 MB| 21 MB|
|Batch size fixed to 1| 33 MB | 15 MB |

### [sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01](https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html#sherpa-onnx-kws-zipformer-wenetspeech-3-3m-2024-01-01-chinese)

| | encoder-epoch-12-avg-2-chunk-16-left-64.onnx | encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx|
|---|---|---|
|Dynamic batch size| 12 MB| 4.6 MB|
|Batch size fixed to 1| 11 MB | 3.9 MB |

### [sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01](https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html#sherpa-onnx-kws-zipformer-gigaspeech-3-3m-2024-01-01-english)

| | encoder-epoch-12-avg-2-chunk-16-left-64.onnx | encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx|
|---|---|---|
|Dynamic batch size| 12 MB| 4.6 MB|
|Batch size fixed to 1| 11 MB | 3.9 MB |


================================================
FILE: scripts/mobile-asr-models/dynamic_quantization.py
================================================
#!/usr/bin/env python3
import argparse

import onnxruntime
from onnxruntime.quantization import QuantType, quantize_dynamic


def show(filename):
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--input",
        type=str,
        required=True,
        help="Input onnx model",
    )

    parser.add_argument(
        "--output",
        type=str,
        required=True,
        help="Output onnx model",
    )
    return parser.parse_args()


def main():
    args = get_args()
    print(vars(args))
    print(f"----------{args.input}----------")
    show(args.input)
    print("------------------------------")

    quantize_dynamic(
        model_input=args.input,
        model_output=args.output,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/mobile-asr-models/generate-asr.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    # We will download
    # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
    model_name: str

    cmd: str


def get_streaming_zipformer_transducer_models():
    models = [
        Model(
            model_name="sherpa-onnx-streaming-zipformer-korean-2024-06-16",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/encoder-epoch-99-avg-1.int8.onnx

            cp -v $src/bpe.model $dst/ || true
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
            cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
              --output1 $dst/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
              --output2 $dst/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx

            cp -v $src/bpe.model $dst/ || true
            cp -v $src/README.md $dst/
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst/
            cp -v $src/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-streaming-wenetspeech-20230615",
            cmd="""
            ./run-impl.sh \
              --input $src/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
              --output1 $dst/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
              --output2 $dst/encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx

            cp -fv $src/README.md $dst/
            cp -v $src/data/lang_char/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx $dst/
            cp -v $src/exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-2023-06-26",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \
              --output1 $dst/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \
              --output2 $dst/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx

            cp -v $src/bpe.model $dst/ || true
            cp -v $src/README.md $dst/
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-99-avg-1-chunk-16-left-128.onnx $dst/
            cp -v $src/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
              """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-2023-06-21",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/encoder-epoch-99-avg-1.int8.onnx

            cp -fv $src/README.md $dst/
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
            cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
                """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-2023-02-21",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/encoder-epoch-99-avg-1.int8.onnx

            cp -v $src/bpe.model $dst/ || true
            cp -v $src/README.md $dst/ || true
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
            cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
              """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/encoder-epoch-99-avg-1.int8.onnx

            cp -v $src/README.md $dst/
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
            cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-fr-2023-04-14",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-29-avg-9-with-averaged-model.onnx \
              --output1 $dst/encoder-epoch-29-avg-9-with-averaged-model.onnx \
              --output2 $dst/encoder-epoch-29-avg-9-with-averaged-model.int8.onnx

            cp -v $src/bpe.model $dst/ || true
            cp -v $src/README.md $dst/ || true
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-29-avg-9-with-averaged-model.onnx $dst/
            cp -v $src/joiner-epoch-29-avg-9-with-averaged-model.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
              """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/encoder-epoch-99-avg-1.int8.onnx

            mkdir $dst/{64,96}

            ./run-impl.sh \
              --input $src/64/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/64/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/64/encoder-epoch-99-avg-1.int8.onnx

            ./run-impl.sh \
              --input $src/96/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/96/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/96/encoder-epoch-99-avg-1.int8.onnx

            cp -v $src/bpe.model $dst/ || true
            cp -v $src/README.md $dst/ || true
            cp -av $src/test_wavs $dst/

            cp -v $src/tokens.txt $dst/
            cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
            cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/

            cp -v $src/tokens.txt $dst/64/
            cp -v $src/64/decoder-epoch-99-avg-1.onnx $dst/64/
            cp -v $src/64/joiner-epoch-99-avg-1.int8.onnx $dst/64/

            cp -v $src/tokens.txt $dst/96/
            cp -v $src/96/decoder-epoch-99-avg-1.onnx $dst/96/
            cp -v $src/96/joiner-epoch-99-avg-1.int8.onnx $dst/96/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
              """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/encoder-epoch-99-avg-1.int8.onnx

            cp -v $src/bpe.model $dst/ || true
            cp -v $src/README.md $dst/ || true
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
            cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
            """,
        ),
        Model(
            model_name="sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-99-avg-1.onnx \
              --output1 $dst/encoder-epoch-99-avg-1.onnx \
              --output2 $dst/encoder-epoch-99-avg-1.int8.onnx

            cp -v $src/bpe.model $dst/ || true
            cp -v $src/README.md $dst/ || true
            cp -v $src/tokens.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
            cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
            """,
        ),
    ]

    return models


def get_models():
    return get_streaming_zipformer_transducer_models()


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./run2.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/mobile-asr-models/generate-kws.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass
import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    # We will download
    # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
    model_name: str

    cmd: str


def get_kws_models():
    models = [
        Model(
            model_name="sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
              --output1 $dst/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
              --output2 $dst/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx

            cp -v $src/README.md $dst/
            cp -v $src/*.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-12-avg-2-chunk-16-left-64.onnx $dst/
            cp -v $src/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
                  """,
        ),
        Model(
            model_name="sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01",
            cmd="""
            ./run-impl.sh \
              --input $src/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
              --output1 $dst/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
              --output2 $dst/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx

            cp -v $src/bpe.model $dst/
            cp -v $src/README.md $dst/
            cp -v $src/*.txt $dst/
            cp -av $src/test_wavs $dst/
            cp -v $src/decoder-epoch-12-avg-2-chunk-16-left-64.onnx $dst/
            cp -v $src/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx $dst/

            cat > $dst/notes.md <<EOF
# Introduction
This model is converted from
https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/$src.tar.bz2
and it supports only batch size equal to 1.
EOF
                  """,
        ),
    ]
    return models


def get_models():
    return get_kws_models()


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./run2.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/mobile-asr-models/parse_options.sh
================================================
#!/usr/bin/env bash

# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
#                 Arnab Ghoshal, Karel Vesely

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.


# Parse command-line options.
# To be sourced by another script (as in ". parse_options.sh").
# Option format is: --option-name arg
# and shell variable "option_name" gets set to value "arg."
# The exception is --help, which takes no arguments, but prints the
# $help_message variable (if defined).


###
### The --config file options have lower priority to command line
### options, so we need to import them first...
###

# Now import all the configs specified by command-line, in left-to-right order
for ((argpos=1; argpos<$#; argpos++)); do
  if [ "${!argpos}" == "--config" ]; then
    argpos_plus1=$((argpos+1))
    config=${!argpos_plus1}
    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
    . $config  # source the config file.
  fi
done


###
### Now we process the command line options
###
while true; do
  [ -z "${1:-}" ] && break;  # break if there are no arguments
  case "$1" in
    # If the enclosing script is called with --help option, print the help
    # message and exit.  Scripts should put help messages in $help_message
    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
      else printf "$help_message\n" 1>&2 ; fi;
      exit 0 ;;
    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
      exit 1 ;;
    # If the first command-line argument begins with "--" (e.g. --foo-bar),
    # then work out the variable name as $name, which will equal "foo_bar".
    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
      # Next we test whether the variable in question is undefned-- if so it's
      # an invalid option and we die.  Note: $0 evaluates to the name of the
      # enclosing script.
      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
      # is undefined.  We then have to wrap this test inside "eval" because
      # foo_bar is itself inside a variable ($name).
      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;

      oldval="`eval echo \\$$name`";
      # Work out whether we seem to be expecting a Boolean argument.
      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
        was_bool=true;
      else
        was_bool=false;
      fi

      # Set the variable to the right value-- the escaped quotes make it work if
      # the option had spaces, like --cmd "queue.pl -sync y"
      eval $name=\"$2\";

      # Check that Boolean-valued arguments are really Boolean.
      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
        exit 1;
      fi
      shift 2;
      ;;
  *) break;
  esac
done


# Check for an empty argument to the --cmd option, which can easily occur as a
# result of scripting errors.
[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;


true; # so this script returns exit code 0.


================================================
FILE: scripts/mobile-asr-models/run2.sh.in
================================================
#!/usr/bin/env bash
set -e

{% for model in model_list %}

src={{ model.model_name }}

if [[ $src == *kws* ]]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/$src.tar.bz2

else
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
fi

tar xvf $src.tar.bz2
rm $src.tar.bz2

dst=$src-mobile

mkdir -p $dst

{{ model.cmd }}

echo "---$src---"
ls -lh $src
echo "---$dst---"
ls -lh $dst
rm -rf $src

tar cjfv $dst.tar.bz2 $dst

if [[ $src == *kws* ]]; then
  mkdir -p ../../kws
  mv *.tar.bz2 ../../kws/
else
  mv *.tar.bz2 ../../
fi
rm -rf $dst

{% endfor %}


================================================
FILE: scripts/moonshine/.gitignore
================================================
tokenizer.json


================================================
FILE: scripts/moonshine/README.md
================================================
# Introduction

This directory contains models from
https://github.com/usefulsensors/moonshine

See its license at
https://github.com/usefulsensors/moonshine/blob/main/LICENSE


================================================
FILE: scripts/moonshine/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

from pathlib import Path

import tokenizers
from onnxruntime.quantization import QuantType, quantize_dynamic


def generate_tokens():
    if Path("./tokens.txt").is_file():
        return
    print("Generating tokens.txt")
    tokenizer = tokenizers.Tokenizer.from_file("./tokenizer.json")
    vocab_size = tokenizer.get_vocab_size()
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i in range(vocab_size):
            s = tokenizer.id_to_token(i).strip()
            f.write(f"{s}\t{i}\n")


def main():
    generate_tokens()

    # Note(fangjun): Don't use int8 for the preprocessor since it has
    # a larger impact on the accuracy
    for f in ["uncached_decode", "cached_decode", "encode"]:
        if Path(f"{f}.int8.onnx").is_file():
            continue

        print("processing", f)
        quantize_dynamic(
            model_input=f"{f}.onnx",
            model_output=f"{f}.int8.onnx",
            weight_type=QuantType.QInt8,
        )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/moonshine/test.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
import datetime as dt

import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf


def display(sess, name):
    print(f"=========={name} Input==========")
    for i in sess.get_inputs():
        print(i)
    print(f"=========={name} Output==========")
    for i in sess.get_outputs():
        print(i)


class OnnxModel:
    def __init__(
        self,
        preprocess: str,
        encode: str,
        uncached_decode: str,
        cached_decode: str,
    ):
        self.init_preprocess(preprocess)
        display(self.preprocess, "preprocess")

        self.init_encode(encode)
        display(self.encode, "encode")

        self.init_uncached_decode(uncached_decode)
        display(self.uncached_decode, "uncached_decode")

        self.init_cached_decode(cached_decode)
        display(self.cached_decode, "cached_decode")

    def init_preprocess(self, preprocess):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.preprocess = ort.InferenceSession(
            preprocess,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def init_encode(self, encode):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.encode = ort.InferenceSession(
            encode,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def init_uncached_decode(self, uncached_decode):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.uncached_decode = ort.InferenceSession(
            uncached_decode,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def init_cached_decode(self, cached_decode):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.cached_decode = ort.InferenceSession(
            cached_decode,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def run_preprocess(self, audio):
        """
        Args:
          audio: (batch_size, num_samples), float32
        Returns:
          A tensor of shape (batch_size, T, dim), float32
        """
        return self.preprocess.run(
            [
                self.preprocess.get_outputs()[0].name,
            ],
            {
                self.preprocess.get_inputs()[0].name: audio,
            },
        )[0]

    def run_encode(self, features):
        """
        Args:
          features: (batch_size, T, dim)
        Returns:
          A tensor of shape (batch_size, T, dim)
        """
        features_len = np.array([features.shape[1]], dtype=np.int32)

        return self.encode.run(
            [
                self.encode.get_outputs()[0].name,
            ],
            {
                self.encode.get_inputs()[0].name: features,
                self.encode.get_inputs()[1].name: features_len,
            },
        )[0]

    def run_uncached_decode(self, token: int, token_len: int, encoder_out: np.ndarray):
        """
        Args:
          token: The current token
          token_len: Number of predicted tokens so far
          encoder_out: A tensor fo shape (batch_size, T, dim)
        Returns:
          A a tuple:
            - a tensor of shape (batch_size, 1, dim)
            - a list of states
        """
        token_tensor = np.array([[token]], dtype=np.int32)
        token_len_tensor = np.array([token_len], dtype=np.int32)

        num_outs = len(self.uncached_decode.get_outputs())
        out_names = [
            self.uncached_decode.get_outputs()[i].name for i in range(num_outs)
        ]

        out = self.uncached_decode.run(
            out_names,
            {
                self.uncached_decode.get_inputs()[0].name: token_tensor,
                self.uncached_decode.get_inputs()[1].name: encoder_out,
                self.uncached_decode.get_inputs()[2].name: token_len_tensor,
            },
        )

        logits = out[0]
        states = out[1:]

        return logits, states

    def run_cached_decode(
        self, token: int, token_len: int, encoder_out: np.ndarray, states
    ):
        """
        Args:
          token: The current token
          token_len: Number of predicted tokens so far
          encoder_out: A tensor of shape (batch_size, T, dim)
          states: previous states
        Returns:
          A a tuple:
            - a tensor of shape (batch_size, 1, dim)
            - a list of states
        """
        token_tensor = np.array([[token]], dtype=np.int32)
        token_len_tensor = np.array([token_len], dtype=np.int32)

        num_outs = len(self.cached_decode.get_outputs())
        out_names = [self.cached_decode.get_outputs()[i].name for i in range(num_outs)]

        states_inputs = {}
        for i in range(3, len(self.cached_decode.get_inputs())):
            name = self.cached_decode.get_inputs()[i].name
            states_inputs[name] = states[i - 3]

        out = self.cached_decode.run(
            out_names,
            {
                self.cached_decode.get_inputs()[0].name: token_tensor,
                self.cached_decode.get_inputs()[1].name: encoder_out,
                self.cached_decode.get_inputs()[2].name: token_len_tensor,
                **states_inputs,
            },
        )

        logits = out[0]
        states = out[1:]

        return logits, states


def main():
    wave = "./1.wav"
    id2token = dict()
    token2id = dict()
    with open("./tokens.txt", encoding="utf-8") as f:
        for k, line in enumerate(f):
            t, idx = line.split("\t")
            id2token[int(idx)] = t
            token2id[t] = int(idx)

    model = OnnxModel(
        preprocess="./preprocess.onnx",
        encode="./encode.int8.onnx",
        uncached_decode="./uncached_decode.int8.onnx",
        cached_decode="./cached_decode.int8.onnx",
    )

    audio, sample_rate = sf.read(wave, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000
    audio = audio[None]  # (1, num_samples)
    print("audio.shape", audio.shape)  # (1, 159414)

    start_t = dt.datetime.now()

    features = model.run_preprocess(audio)  # (1, 413, 288)
    print("features", features.shape)

    sos = token2id["<s>"]
    eos = token2id["</s>"]

    tokens = [sos]

    encoder_out = model.run_encode(features)
    print("encoder_out.shape", encoder_out.shape)  # (1, 413, 288)

    logits, states = model.run_uncached_decode(
        token=tokens[-1],
        token_len=len(tokens),
        encoder_out=encoder_out,
    )

    print("logits.shape", logits.shape)  # (1, 1, 32768)
    print("len(states)", len(states))  # 24

    max_len = int((audio.shape[-1] / 16000) * 6)

    for i in range(max_len):
        token = logits.squeeze().argmax()
        if token == eos:
            break
        tokens.append(token)

        logits, states = model.run_cached_decode(
            token=tokens[-1],
            token_len=len(tokens),
            encoder_out=encoder_out,
            states=states,
        )

    tokens = tokens[1:]  # remove sos
    words = [id2token[i] for i in tokens]
    underline = "▁"
    #  underline = b"\xe2\x96\x81".decode()
    text = "".join(words).replace(underline, " ").strip()

    end_t = dt.datetime.now()
    t = (end_t - start_t).total_seconds()
    rtf = t * 16000 / audio.shape[-1]

    print(text)
    print("RTF:", rtf)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/moonshine/v2/README.md
================================================
# Introduction

This folder contains scripts for moonshine v2 models that use
    - encoder_model.onnx
    - decoder_model_merged.onnx
or
    - encoder_model.ort
    - decoder_model_merged.ort

Note that you need to use [./generate_tokens.py](./generate_tokens.py)
to generate `tokens.txt` from `tokenizer.bin` for moonshine v2 models.

See also https://github.com/moonshine-ai/moonshine/pull/73


================================================
FILE: scripts/moonshine/v2/generate_tokens.py
================================================
#!/usr/bin/env python3
# Copyright      2026  Xiaomi Corp.        (authors: Fangjun Kuang)

import base64
from test import BinTokenizer


def main():
    tokenizer = BinTokenizer("./tokenizer.bin")

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for idx, token_bytes in enumerate(tokenizer.tokens):
            b64 = base64.b64encode(token_bytes).decode("ascii")
            f.write(f"{b64} {idx}\n")

    print("Saved to ./tokens.txt")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/moonshine/v2/test.py
================================================
#!/usr/bin/env python3
# Copyright      2026  Xiaomi Corp.        (authors: Fangjun Kuang)


import librosa
import numpy as np
import onnxruntime as ort


class BinTokenizer:
    def __init__(self, path):
        self.tokens = self._load(path)

    def _load(self, path):
        tokens = []
        with open(path, "rb") as f:
            data = f.read()

        i = 0
        while i < len(data):
            first = data[i]
            i += 1

            if first == 0:
                tokens.append(b"")  # store as bytes
                continue

            if first < 128:
                length = first
            else:
                second = data[i]
                i += 1
                length = (second * 128) + (first - 128)

            token_bytes = data[i : i + length]
            i += length
            tokens.append(token_bytes)  # store as bytes, do NOT decode here

        return tokens

    def decode(self, ids):
        # join bytes first, then decode as UTF-8
        byte_stream = b"".join(self.tokens[i] for i in ids if i < len(self.tokens))
        text = byte_stream.decode("utf-8", errors="replace")
        return text.replace("▁", " ").strip()


class OnnxModel:
    def __init__(self, encoder, decoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.encoder = ort.InferenceSession(
            encoder,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        self.decoder = ort.InferenceSession(
            decoder,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print(f"----{encoder} input----")
        for i in self.encoder.get_inputs():
            print(i)

        print(f"----{encoder} output----")

        for i in self.encoder.get_outputs():
            print(i)

        print(f"----{decoder} input----")
        for i in self.decoder.get_inputs():
            print(i)

        print(f"----{decoder} output----")

        for i in self.decoder.get_outputs():
            print(i)

        self.need_decoder_attention_mask = False

        for n in self.decoder.get_inputs():
            if "key_values" in n.name and not hasattr(self, "num_head"):
                self.num_head = n.shape[1]
                self.head_dim = n.shape[3]

            if "encoder_attention_mask" in n.name:
                self.need_decoder_attention_mask = True
        if self.need_decoder_attention_mask:
            # [ mask, ids, encoder_out, states, use_cache_branch]
            self.num_layers = (len(self.decoder.get_inputs()) - 4) // 4
        else:
            # [ ids, encoder_out, states, use_cache_branch]
            self.num_layers = (len(self.decoder.get_inputs()) - 3) // 4

        self.bos = 1
        self.eos = 2

    def get_decoder_init_states(self):
        states = []
        shape = [1, self.num_head, 0, self.head_dim]
        for i in range(self.num_layers):
            decoder_key = np.zeros(shape, dtype=np.float32)
            decoder_value = np.zeros(shape, dtype=np.float32)
            encoder_key = np.zeros(shape, dtype=np.float32)
            encoder_value = np.zeros(shape, dtype=np.float32)

            states.append(decoder_key)
            states.append(decoder_value)
            states.append(encoder_key)
            states.append(encoder_value)

        return states

    def run_encoder(self, audio):
        audio = audio[None, :]  # batch=1

        if len(self.encoder.get_inputs()) > 1:
            mask = np.ones_like(audio, dtype=np.int64)

            outputs = self.encoder.run(
                [
                    self.encoder.get_outputs()[0].name,
                ],
                {
                    self.encoder.get_inputs()[0].name: audio,
                    self.encoder.get_inputs()[1].name: mask,
                },
            )
        else:
            outputs = self.encoder.run(
                [
                    self.encoder.get_outputs()[0].name,
                ],
                {
                    self.encoder.get_inputs()[0].name: audio,
                },
            )
        return outputs[0]  # last_hidden_state

    def run_decoder(self, token_id, encoder_out, states):
        inputs = dict()
        if self.need_decoder_attention_mask:
            mask = np.ones((1, encoder_out.shape[1]), dtype=np.int64)
            inputs[self.decoder.get_inputs()[0].name] = mask

            inputs[self.decoder.get_inputs()[1].name] = np.array(
                [[token_id]], dtype=np.int64
            )
            inputs[self.decoder.get_inputs()[2].name] = encoder_out

            for i in range(len(states)):
                inputs[self.decoder.get_inputs()[3 + i].name] = states[i]

            inputs[self.decoder.get_inputs()[-1].name] = np.array(
                [token_id != self.bos], dtype=bool
            )
        else:
            inputs[self.decoder.get_inputs()[0].name] = np.array(
                [[token_id]], dtype=np.int64
            )
            inputs[self.decoder.get_inputs()[1].name] = encoder_out

            for i in range(len(states)):
                inputs[self.decoder.get_inputs()[2 + i].name] = states[i]

            inputs[self.decoder.get_inputs()[-1].name] = np.array(
                [token_id != self.bos], dtype=bool
            )

        outputs = self.decoder.run(None, inputs)

        logits = outputs[0]
        if token_id == self.bos:
            states = outputs[1:]
        else:
            for i in range(self.num_layers):
                states[4 * i + 0] = outputs[1 + 4 * i + 0]
                states[4 * i + 1] = outputs[1 + 4 * i + 1]

        return logits, states


def load_audio(filename):
    audio, sample_rate = librosa.load(filename, sr=16000)
    assert sample_rate == 16000, sample_rate
    assert len(audio.shape) == 1, audio.shape

    return np.ascontiguousarray(audio[: 8 * 16000])


def main():
    model = OnnxModel(
        encoder="./tiny/encoder_model.ort",
        decoder="./tiny/decoder_model_merged.ort",
        #
        #  encoder="./tiny-zh/encoder_model.onnx",
        #  decoder="./tiny-zh/decoder_model_merged.onnx",
        #
        #  encoder="./base-zh/encoder_model.ort",
        #  decoder="./base-zh/decoder_model_merged.ort",
    )
    samples = load_audio("./two_cities.wav")
    print("samples.shape", samples.shape)
    encoder_out = model.run_encoder(samples)
    print("encoder_out.shape", encoder_out.shape)
    states = model.get_decoder_init_states()
    tokens = []

    max_len = int(len(samples) / 16000 * 15)

    token_id = model.bos

    for step in range(max_len):
        logits, states = model.run_decoder(token_id, encoder_out, states)
        token_id = int(np.argmax(logits[0, 0]))
        if token_id == model.eos:
            break
        tokens.append(token_id)
    print(tokens)

    tokenizer = BinTokenizer("./base-zh/tokenizer.bin")
    text = tokenizer.decode(tokens)
    print("text", text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/.gitignore
================================================
!run-*.sh


================================================
FILE: scripts/nemo/GigaAM/README.md
================================================
# Introduction

This folder contains scripts for converting models from
https://github.com/salute-developers/GigaAM
to sherpa-onnx.

The ASR models are for Russian speech recognition in this folder.

Please see the license of the models at
https://github.com/salute-developers/GigaAM/blob/main/LICENSE


================================================
FILE: scripts/nemo/GigaAM/export-onnx-ctc-v2.py
================================================
#!/usr/bin/env python3
import gigaam
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic


def add_meta_data(filename: str, meta_data: dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def main() -> None:
    model_name = "v2_ctc"
    model = gigaam.load_model(
        model_name, fp16_encoder=False, use_flash=False, download_root="."
    )

    # use characters
    # space is 0
    # <blk> is the last token
    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(model.cfg["labels"]):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")
    model.to_onnx(".")
    meta_data = {
        "vocab_size": len(model.cfg["labels"]) + 1,
        "normalize_type": "",
        "subsampling_factor": 4,
        "model_type": "EncDecCTCModel",
        "version": "1",
        "model_author": "https://github.com/salute-developers/GigaAM",
        "license": "https://github.com/salute-developers/GigaAM/blob/main/LICENSE",
        "language": "Russian",
        "is_giga_am": 1,
    }
    add_meta_data(f"./{model_name}.onnx", meta_data)
    quantize_dynamic(
        model_input=f"./{model_name}.onnx",
        model_output="./model.int8.onnx",
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/export-onnx-ctc-v3-punct.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import gigaam
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic

"""
==========Input==========
NodeArg(name='features', type='tensor(float)', shape=['batch_size', 64, 'seq_len'])
NodeArg(name='feature_lengths', type='tensor(int64)', shape=['batch_size'])
==========Output==========
NodeArg(name='log_probs', type='tensor(float)', shape=['batch_size', 'seq_len', 257])
"""


def add_meta_data(filename: str, meta_data: dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


"""
{'model_class': 'ctc', 'sample_rate': 16000, 'preprocessor': {'_target_': 'gigaam.preprocess.FeatureExtractor',
'sample_rate': 16000, 'features': 64, 'win_length': 320, 'hop_length': 160, 'mel_scale': 'htk', 'n_fft': 320,
'mel_norm': None, 'center': False}, 'encoder': {'_target_': 'gigaam.encoder.ConformerEncoder', 'feat_in': 64,
'n_layers': 16, 'd_model': 768, 'subsampling': 'conv1d', 'subs_kernel_size': 5, 'subsampling_factor': 4,
'ff_expansion_factor': 4, 'self_attention_model': 'rotary', 'pos_emb_max_len': 5000, 'n_heads': 16,
'conv_kernel_size': 5, 'flash_attn': False, 'conv_norm_type': 'layer_norm'}, 'head': {'_target_':
'gigaam.decoder.CTCHead', 'feat_in': 768, 'num_classes': 257}, 'decoding': {'_target_':
'gigaam.decoding.CTCGreedyDecoding', 'vocabulary': None,
'model_path': '/root/.cache/gigaam/v3_e2e_ctc_tokenizer.model'},
'model_name': 'v3_e2e_ctc', 'hashes': {'model': 'c15fd0dbca70363a146016d197ee0e2a',
'tokenizer': '2a9cd0c246db42d076e92abb31055deb'}}
"""


def main() -> None:
    model_name = "v3_e2e_ctc"
    model = gigaam.load_model(model_name)

    # <blk> is the last token
    sp = model.decoding.tokenizer.model
    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i in range(sp.vocab_size()):
            f.write(f"{sp.id_to_piece(i)} {i}\n")

        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")
    model.to_onnx(".")
    meta_data = {
        "vocab_size": sp.vocab_size() + 1,
        "normalize_type": "",
        "subsampling_factor": 4,
        "model_type": "EncDecCTCModel",
        "version": "1",
        "model_author": "https://github.com/salute-developers/GigaAM",
        "license": "https://github.com/salute-developers/GigaAM/blob/main/LICENSE",
        "language": "Russian",
        "comment": "v3 with puncutations",
        "is_giga_am": 1,
    }
    add_meta_data(f"./{model_name}.onnx", meta_data)
    quantize_dynamic(
        model_input=f"./{model_name}.onnx",
        model_output="./model.int8.onnx",
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/export-onnx-ctc-v3.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import gigaam
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic

"""
NodeArg(name='features', type='tensor(float)', shape=['batch_size', 64, 'seq_len'])
NodeArg(name='feature_lengths', type='tensor(int64)', shape=['batch_size'])
-----
NodeArg(name='log_probs', type='tensor(float)', shape=['batch_size', 'seq_len', 34])
"""


def add_meta_data(filename: str, meta_data: dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


"""
{'model_class': 'ctc', 'sample_rate': 16000,
'preprocessor': {'_target_': 'gigaam.preprocess.FeatureExtractor', 'sample_rate': 16000, 'features': 64,
'win_length': 320, 'hop_length': 160, 'mel_scale': 'htk', 'n_fft': 320, 'mel_norm': None, 'center': False},
'encoder': {'_target_': 'gigaam.encoder.ConformerEncoder', 'feat_in': 64, 'n_layers': 16, 'd_model': 768,
'subsampling': 'conv1d', 'subs_kernel_size': 5, 'subsampling_factor': 4, 'ff_expansion_factor': 4,
'self_attention_model': 'rotary', 'pos_emb_max_len': 5000, 'n_heads': 16, 'conv_kernel_size': 5,
'flash_attn': False, 'conv_norm_type': 'layer_norm'}, 'head': {'_target_': 'gigaam.decoder.CTCHead',
'feat_in': 768, 'num_classes': 34}, 'decoding': {'_target_': 'gigaam.decoding.CTCGreedyDecoding',
'vocabulary': [' ', 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 'р', 'с',
'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я']}, 'model_name': 'v3_ctc',
'hashes': {'model': '1bdc12052560591b7cdf35bef02619fa'}}
"""


def main() -> None:
    model_name = "v3_ctc"
    model = gigaam.load_model(model_name)

    # use characters
    # space is 0
    # <blk> is the last token
    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(model.cfg["decoding"]["vocabulary"]):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")
    model.to_onnx(".")
    meta_data = {
        "vocab_size": len(model.cfg["decoding"]["vocabulary"]) + 1,
        "normalize_type": "",
        "subsampling_factor": 4,
        "model_type": "EncDecCTCModel",
        "version": "1",
        "model_author": "https://github.com/salute-developers/GigaAM",
        "license": "https://github.com/salute-developers/GigaAM/blob/main/LICENSE",
        "language": "Russian",
        "comment": "v3",
        "is_giga_am": 1,
    }
    add_meta_data(f"./{model_name}.onnx", meta_data)
    quantize_dynamic(
        model_input=f"./{model_name}.onnx",
        model_output="./model.int8.onnx",
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/export-onnx-ctc.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
from typing import Dict

import onnx
import torch
import torchaudio
from nemo.collections.asr.models import EncDecCTCModel
from nemo.collections.asr.modules.audio_preprocessing import (
    AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
)
from nemo.collections.asr.parts.preprocessing.features import (
    FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
)
from onnxruntime.quantization import QuantType, quantize_dynamic


class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
    def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
        if "window_size" in kwargs:
            del kwargs["window_size"]
        if "window_stride" in kwargs:
            del kwargs["window_stride"]

        super().__init__(**kwargs)

        self._mel_spec_extractor: torchaudio.transforms.MelSpectrogram = (
            torchaudio.transforms.MelSpectrogram(
                sample_rate=self._sample_rate,
                win_length=self.win_length,
                hop_length=self.hop_length,
                n_mels=kwargs["nfilt"],
                window_fn=self.torch_windows[kwargs["window"]],
                mel_scale=mel_scale,
                norm=kwargs["mel_norm"],
                n_fft=kwargs["n_fft"],
                f_max=kwargs.get("highfreq", None),
                f_min=kwargs.get("lowfreq", 0),
                wkwargs=wkwargs,
            )
        )


class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
    def __init__(self, mel_scale: str = "htk", **kwargs):
        super().__init__(**kwargs)
        kwargs["nfilt"] = kwargs["features"]
        del kwargs["features"]
        self.featurizer = (
            FilterbankFeaturesTA(  # Deprecated arguments; kept for config compatibility
                mel_scale=mel_scale,
                **kwargs,
            )
        )


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    model = EncDecCTCModel.from_config_file("./ctc_model_config.yaml")
    ckpt = torch.load("./ctc_model_weights.ckpt", map_location="cpu")
    model.load_state_dict(ckpt, strict=False)
    model.eval()

    # use characters
    # space is 0
    # <blk> is the last token
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i, t in enumerate(model.cfg.labels):
            f.write(f"{t} {i}\n")
        f.write(f"<blk> {i+1}\n")

    filename = "model.onnx"
    model.export(filename)

    meta_data = {
        "vocab_size": len(model.cfg.labels) + 1,
        "normalize_type": "",
        "subsampling_factor": 4,
        "model_type": "EncDecCTCModel",
        "version": "1",
        "model_author": "https://github.com/salute-developers/GigaAM",
        "license": "https://github.com/salute-developers/GigaAM/blob/main/GigaAM%20License_NC.pdf",
        "language": "Russian",
        "is_giga_am": 1,
    }
    add_meta_data(filename, meta_data)

    filename_int8 = "model.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/export-onnx-rnnt-v2.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import os

import gigaam
import onnx
import torch
from gigaam.utils import onnx_converter
from onnxruntime.quantization import QuantType, quantize_dynamic
from torch import Tensor

"""
==========Input==========
NodeArg(name='audio_signal', type='tensor(float)', shape=['batch_size', 64, 'seq_len'])
NodeArg(name='length', type='tensor(int64)', shape=['batch_size'])
==========Output==========
NodeArg(name='encoded', type='tensor(float)', shape=['batch_size', 768, 'Transposeencoded_dim_2'])
NodeArg(name='encoded_len', type='tensor(int32)', shape=['batch_size'])

==========Input==========
NodeArg(name='x', type='tensor(int32)', shape=[1, 1])
NodeArg(name='unused_x_len.1', type='tensor(int32)', shape=[1])
NodeArg(name='h.1', type='tensor(float)', shape=[1, 1, 320])
NodeArg(name='c.1', type='tensor(float)', shape=[1, 1, 320])
==========Output==========
NodeArg(name='dec', type='tensor(float)', shape=[1, 320, 1])
NodeArg(name='unused_x_len', type='tensor(int32)', shape=[1])
NodeArg(name='h', type='tensor(float)', shape=[1, 1, 320])
NodeArg(name='c', type='tensor(float)', shape=[1, 1, 320])

==========Input==========
NodeArg(name='enc', type='tensor(float)', shape=[1, 768, 1])
NodeArg(name='dec', type='tensor(float)', shape=[1, 320, 1])
==========Output==========
NodeArg(name='joint', type='tensor(float)', shape=[1, 1, 1, 34])
"""


def add_meta_data(filename: str, meta_data: dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


class EncoderWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, audio_signal: Tensor, length: Tensor):
        # https://github.com/salute-developers/GigaAM/blob/main/gigaam/encoder.py#L499
        out, out_len = self.m.encoder(audio_signal, length)

        return out, out_len.to(torch.int64)

    def to_onnx(self, dir_path: str = "."):
        onnx_converter(
            model_name=f"{self.m.cfg.model_name}_encoder",
            out_dir=dir_path,
            module=self.m.encoder,
            dynamic_axes=self.m.encoder.dynamic_axes(),
        )


class DecoderWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, x: Tensor, unused_x_len: Tensor, h: Tensor, c: Tensor):
        # https://github.com/salute-developers/GigaAM/blob/main/gigaam/decoder.py#L110C17-L110C54
        emb = self.m.head.decoder.embed(x)
        g, (h, c) = self.m.head.decoder.lstm(emb.transpose(0, 1), (h, c))
        return g.permute(1, 2, 0), unused_x_len + 1, h, c

    def to_onnx(self, dir_path: str = "."):
        label, hidden_h, hidden_c = self.m.head.decoder.input_example()
        label = label.to(torch.int32)
        label_len = torch.zeros(1, dtype=torch.int32)

        onnx_converter(
            model_name=f"{self.m.cfg.model_name}_decoder",
            out_dir=dir_path,
            module=self,
            dynamic_axes=self.m.encoder.dynamic_axes(),
            inputs=(label, label_len, hidden_h, hidden_c),
            input_names=["x", "unused_x_len.1", "h.1", "c.1"],
            output_names=["dec", "unused_x_len", "h", "c"],
        )


def main() -> None:
    model_name = "v2_rnnt"
    model = gigaam.load_model(
        model_name, fp16_encoder=False, use_flash=False, download_root="."
    )

    # use characters
    # space is 0
    # <blk> is the last token
    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(model.cfg["labels"]):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    EncoderWrapper(model).to_onnx(".")
    DecoderWrapper(model).to_onnx(".")

    onnx_converter(
        model_name=f"{model.cfg.model_name}_joint",
        out_dir=".",
        module=model.head.joint,
    )
    meta_data = {
        # vocab_size does not include the blank
        # we will increase vocab_size by 1 in the c++ code
        "vocab_size": model.cfg["head"]["decoder"]["num_classes"] - 1,
        "pred_rnn_layers": model.cfg["head"]["decoder"]["pred_rnn_layers"],
        "pred_hidden": model.cfg["head"]["decoder"]["pred_hidden"],
        "normalize_type": "",
        "subsampling_factor": 4,
        "model_type": "EncDecRNNTBPEModel",
        "version": "2",
        "model_author": "https://github.com/salute-developers/GigaAM",
        "license": "https://github.com/salute-developers/GigaAM/blob/main/LICENSE",
        "language": "Russian",
        "is_giga_am": 1,
    }

    add_meta_data(f"./{model_name}_encoder.onnx", meta_data)
    quantize_dynamic(
        model_input=f"./{model_name}_encoder.onnx",
        model_output="./encoder.int8.onnx",
        weight_type=QuantType.QUInt8,
    )
    os.rename(f"./{model_name}_decoder.onnx", "decoder.onnx")
    os.rename(f"./{model_name}_joint.onnx", "joiner.onnx")
    os.remove(f"./{model_name}_encoder.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/export-onnx-rnnt-v3-punct.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import os

import gigaam
import onnx
import torch
from gigaam.utils import onnx_converter
from onnxruntime.quantization import QuantType, quantize_dynamic
from torch import Tensor

# encoder input length should be of int64
# encder output length can be int64 or int32

"""
==========Input==========
NodeArg(name='audio_signal', type='tensor(float)', shape=['batch_size', 64, 'seq_len'])
NodeArg(name='length', type='tensor(int64)', shape=['batch_size'])
==========Output==========
NodeArg(name='encoded', type='tensor(float)', shape=['batch_size', 768, 'Transposeencoded_dim_2'])
NodeArg(name='encoded_len', type='tensor(int32)', shape=['batch_size'])
==========Input==========
NodeArg(name='x', type='tensor(int32)', shape=[1, 1])
NodeArg(name='unused_x_len.1', type='tensor(int32)', shape=[1])
NodeArg(name='h.1', type='tensor(float)', shape=[1, 1, 320])
NodeArg(name='c.1', type='tensor(float)', shape=[1, 1, 320])
==========Output==========
NodeArg(name='dec', type='tensor(float)', shape=[1, 320, 1])
NodeArg(name='unused_x_len', type='tensor(int32)', shape=[1])
NodeArg(name='h', type='tensor(float)', shape=[1, 1, 320])
NodeArg(name='c', type='tensor(float)', shape=[1, 1, 320])
==========Input==========
NodeArg(name='enc', type='tensor(float)', shape=[1, 768, 1])
NodeArg(name='dec', type='tensor(float)', shape=[1, 320, 1])
==========Output==========
NodeArg(name='joint', type='tensor(float)', shape=[1, 1, 1, 1025])
"""


def add_meta_data(filename: str, meta_data: dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


class EncoderWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, audio_signal: Tensor, length: Tensor):
        # https://github.com/salute-developers/GigaAM/blob/main/gigaam/encoder.py#L499
        out, out_len = self.m.encoder(audio_signal, length)

        return out, out_len.to(torch.int64)

    def to_onnx(self, dir_path: str = "."):
        onnx_converter(
            model_name=f"{self.m.cfg.model_name}_encoder",
            out_dir=dir_path,
            module=self.m.encoder,
            dynamic_axes=self.m.encoder.dynamic_axes(),
        )


class DecoderWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, x: Tensor, unused_x_len: Tensor, h: Tensor, c: Tensor):
        # https://github.com/salute-developers/GigaAM/blob/main/gigaam/decoder.py#L110C17-L110C54
        emb = self.m.head.decoder.embed(x)
        g, (h, c) = self.m.head.decoder.lstm(emb.transpose(0, 1), (h, c))
        return g.permute(1, 2, 0), unused_x_len + 1, h, c

    def to_onnx(self, dir_path: str = "."):
        label, hidden_h, hidden_c = self.m.head.decoder.input_example()
        label = label.to(torch.int32)
        label_len = torch.zeros(1, dtype=torch.int32)

        onnx_converter(
            model_name=f"{self.m.cfg.model_name}_decoder",
            out_dir=dir_path,
            module=self,
            dynamic_axes=self.m.encoder.dynamic_axes(),
            inputs=(label, label_len, hidden_h, hidden_c),
            input_names=["x", "unused_x_len.1", "h.1", "c.1"],
            output_names=["dec", "unused_x_len", "h", "c"],
        )


"""
{'model_class': 'rnnt', 'sample_rate': 16000,
'preprocessor': {'_target_': 'gigaam.preprocess.FeatureExtractor', 'sample_rate': 16000,
'features': 64, 'win_length': 320, 'hop_length': 160, 'mel_scale': 'htk', 'n_fft': 320,
'mel_norm': None, 'center': False},
'encoder': {'_target_': 'gigaam.encoder.ConformerEncoder', 'feat_in': 64, 'n_layers': 16,
'd_model': 768, 'subsampling_factor': 4, 'ff_expansion_factor': 4,
'self_attention_model': 'rotary', 'pos_emb_max_len': 5000, 'n_heads': 16,
'conv_kernel_size': 5, 'flash_attn': False, 'subs_kernel_size': 5,
'subsampling': 'conv1d', 'conv_norm_type': 'layer_norm'},
'head': {'_target_': 'gigaam.decoder.RNNTHead',
'decoder': {'pred_hidden': 320, 'pred_rnn_layers': 1, 'num_classes': 1025},
'joint': {'enc_hidden': 768, 'pred_hidden': 320, 'joint_hidden': 320, 'num_classes': 1025}},
'decoding': {'_target_': 'gigaam.decoding.RNNTGreedyDecoding',
'vocabulary': None, 'model_path': '/root/.cache/gigaam/v3_e2e_rnnt_tokenizer.model'}, 'model_name': 'v3_e2e_rnnt', 'hashes': {'model': '72e2a9b5c7caad963b2bbfd2f298c252', 'tokenizer': '3b3bf8370e882885d79731592fc99f98'}}
"""


def main() -> None:
    model_name = "v3_e2e_rnnt"
    model = gigaam.load_model(model_name)

    # <blk> is the last token
    sp = model.decoding.tokenizer.model
    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i in range(sp.vocab_size()):
            f.write(f"{sp.id_to_piece(i)} {i}\n")

        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    EncoderWrapper(model).to_onnx(".")
    DecoderWrapper(model).to_onnx(".")

    onnx_converter(
        model_name=f"{model.cfg.model_name}_joint",
        out_dir=".",
        module=model.head.joint,
    )
    meta_data = {
        # vocab_size does not include the blank
        # we will increase vocab_size by 1 in the c++ code
        "vocab_size": model.cfg["head"]["decoder"]["num_classes"] - 1,
        "pred_rnn_layers": model.cfg["head"]["decoder"]["pred_rnn_layers"],
        "pred_hidden": model.cfg["head"]["decoder"]["pred_hidden"],
        "normalize_type": "",
        "subsampling_factor": 4,
        "model_type": "EncDecRNNTBPEModel",
        "version": "3",
        "model_author": "https://github.com/salute-developers/GigaAM",
        "license": "https://github.com/salute-developers/GigaAM/blob/main/LICENSE",
        "language": "Russian",
        "comment": "v3",
        "is_giga_am": 1,
    }

    add_meta_data(f"./{model_name}_encoder.onnx", meta_data)
    quantize_dynamic(
        model_input=f"./{model_name}_encoder.onnx",
        model_output="./encoder.int8.onnx",
        weight_type=QuantType.QUInt8,
    )
    os.rename(f"./{model_name}_decoder.onnx", "decoder.onnx")
    os.rename(f"./{model_name}_joint.onnx", "joiner.onnx")
    os.remove(f"./{model_name}_encoder.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/export-onnx-rnnt-v3.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import os

import gigaam
import onnx
import torch
from gigaam.utils import onnx_converter
from onnxruntime.quantization import QuantType, quantize_dynamic
from torch import Tensor

# encoder input length should be of int64
# encder output length can be int64 or int32

"""
==========Input==========
NodeArg(name='audio_signal', type='tensor(float)', shape=['batch_size', 64, 'seq_len'])
NodeArg(name='length', type='tensor(int64)', shape=['batch_size'])
==========Output==========
NodeArg(name='encoded', type='tensor(float)', shape=['batch_size', 768, 'Transposeencoded_dim_2'])
NodeArg(name='encoded_len', type='tensor(int32)', shape=['batch_size'])
==========Input==========
NodeArg(name='x', type='tensor(int32)', shape=[1, 1])
NodeArg(name='unused_x_len.1', type='tensor(int32)', shape=[1])
NodeArg(name='h.1', type='tensor(float)', shape=[1, 1, 320])
NodeArg(name='c.1', type='tensor(float)', shape=[1, 1, 320])
==========Output==========
NodeArg(name='dec', type='tensor(float)', shape=[1, 320, 1])
NodeArg(name='unused_x_len', type='tensor(int32)', shape=[1])
NodeArg(name='h', type='tensor(float)', shape=[1, 1, 320])
NodeArg(name='c', type='tensor(float)', shape=[1, 1, 320])
==========Input==========
NodeArg(name='enc', type='tensor(float)', shape=[1, 768, 1])
NodeArg(name='dec', type='tensor(float)', shape=[1, 320, 1])
==========Output==========
NodeArg(name='joint', type='tensor(float)', shape=[1, 1, 1, 34])
"""


def add_meta_data(filename: str, meta_data: dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


class EncoderWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, audio_signal: Tensor, length: Tensor):
        # https://github.com/salute-developers/GigaAM/blob/main/gigaam/encoder.py#L499
        out, out_len = self.m.encoder(audio_signal, length)

        return out, out_len.to(torch.int64)

    def to_onnx(self, dir_path: str = "."):
        onnx_converter(
            model_name=f"{self.m.cfg.model_name}_encoder",
            out_dir=dir_path,
            module=self.m.encoder,
            dynamic_axes=self.m.encoder.dynamic_axes(),
        )


class DecoderWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, x: Tensor, unused_x_len: Tensor, h: Tensor, c: Tensor):
        # https://github.com/salute-developers/GigaAM/blob/main/gigaam/decoder.py#L110C17-L110C54
        emb = self.m.head.decoder.embed(x)
        g, (h, c) = self.m.head.decoder.lstm(emb.transpose(0, 1), (h, c))
        return g.permute(1, 2, 0), unused_x_len + 1, h, c

    def to_onnx(self, dir_path: str = "."):
        label, hidden_h, hidden_c = self.m.head.decoder.input_example()
        label = label.to(torch.int32)
        label_len = torch.zeros(1, dtype=torch.int32)

        onnx_converter(
            model_name=f"{self.m.cfg.model_name}_decoder",
            out_dir=dir_path,
            module=self,
            dynamic_axes=self.m.encoder.dynamic_axes(),
            inputs=(label, label_len, hidden_h, hidden_c),
            input_names=["x", "unused_x_len.1", "h.1", "c.1"],
            output_names=["dec", "unused_x_len", "h", "c"],
        )


"""
{'model_class': 'rnnt', 'sample_rate': 16000,
'preprocessor': {'_target_': 'gigaam.preprocess.FeatureExtractor', 'sample_rate': 16000,
'features': 64, 'win_length': 320, 'hop_length': 160, 'mel_scale': 'htk', 'n_fft': 320,
'mel_norm': None, 'center': False},
'encoder': {'_target_': 'gigaam.encoder.ConformerEncoder', 'feat_in': 64, 'n_layers': 16,
'd_model': 768, 'subsampling_factor': 4, 'ff_expansion_factor': 4,
'self_attention_model': 'rotary', 'pos_emb_max_len': 5000, 'n_heads': 16,
'conv_kernel_size': 5, 'flash_attn': False, 'subs_kernel_size': 5,
'subsampling': 'conv1d', 'conv_norm_type': 'layer_norm'},
'head': {'_target_': 'gigaam.decoder.RNNTHead',
'decoder': {'pred_hidden': 320, 'pred_rnn_layers': 1, 'num_classes': 34},
'joint': {'enc_hidden': 768, 'pred_hidden': 320, 'joint_hidden': 320, 'num_classes': 34}},
'decoding': {'_target_': 'gigaam.decoding.RNNTGreedyDecoding',
'vocabulary': [' ', 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 'и', 'й', 'к', 'л', 'м', 'н',
'о', 'п', 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я']},
'model_name': 'v3_rnnt', 'hashes': {'model': 'be62a7bc46de1311ec288d3bf8ee2818'}}
"""


def main() -> None:
    model_name = "v3_rnnt"
    model = gigaam.load_model(model_name)

    # use characters
    # space is 0
    # <blk> is the last token
    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(model.cfg["decoding"]["vocabulary"]):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    EncoderWrapper(model).to_onnx(".")
    DecoderWrapper(model).to_onnx(".")

    onnx_converter(
        model_name=f"{model.cfg.model_name}_joint",
        out_dir=".",
        module=model.head.joint,
    )
    meta_data = {
        # vocab_size does not include the blank
        # we will increase vocab_size by 1 in the c++ code
        "vocab_size": model.cfg["head"]["decoder"]["num_classes"] - 1,
        "pred_rnn_layers": model.cfg["head"]["decoder"]["pred_rnn_layers"],
        "pred_hidden": model.cfg["head"]["decoder"]["pred_hidden"],
        "normalize_type": "",
        "subsampling_factor": 4,
        "model_type": "EncDecRNNTBPEModel",
        "version": "3",
        "model_author": "https://github.com/salute-developers/GigaAM",
        "license": "https://github.com/salute-developers/GigaAM/blob/main/LICENSE",
        "language": "Russian",
        "comment": "v3",
        "is_giga_am": 1,
    }

    add_meta_data(f"./{model_name}_encoder.onnx", meta_data)
    quantize_dynamic(
        model_input=f"./{model_name}_encoder.onnx",
        model_output="./encoder.int8.onnx",
        weight_type=QuantType.QUInt8,
    )
    os.rename(f"./{model_name}_decoder.onnx", "decoder.onnx")
    os.rename(f"./{model_name}_joint.onnx", "joiner.onnx")
    os.remove(f"./{model_name}_encoder.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/export-onnx-rnnt.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import Dict

import onnx
import torch
import torchaudio
from nemo.collections.asr.models import EncDecRNNTBPEModel
from nemo.collections.asr.modules.audio_preprocessing import (
    AudioToMelSpectrogramPreprocessor as NeMoAudioToMelSpectrogramPreprocessor,
)
from nemo.collections.asr.parts.preprocessing.features import (
    FilterbankFeaturesTA as NeMoFilterbankFeaturesTA,
)
from onnxruntime.quantization import QuantType, quantize_dynamic


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


class FilterbankFeaturesTA(NeMoFilterbankFeaturesTA):
    def __init__(self, mel_scale: str = "htk", wkwargs=None, **kwargs):
        if "window_size" in kwargs:
            del kwargs["window_size"]
        if "window_stride" in kwargs:
            del kwargs["window_stride"]

        super().__init__(**kwargs)

        self._mel_spec_extractor: torchaudio.transforms.MelSpectrogram = (
            torchaudio.transforms.MelSpectrogram(
                sample_rate=self._sample_rate,
                win_length=self.win_length,
                hop_length=self.hop_length,
                n_mels=kwargs["nfilt"],
                window_fn=self.torch_windows[kwargs["window"]],
                mel_scale=mel_scale,
                norm=kwargs["mel_norm"],
                n_fft=kwargs["n_fft"],
                f_max=kwargs.get("highfreq", None),
                f_min=kwargs.get("lowfreq", 0),
                wkwargs=wkwargs,
            )
        )


class AudioToMelSpectrogramPreprocessor(NeMoAudioToMelSpectrogramPreprocessor):
    def __init__(self, mel_scale: str = "htk", **kwargs):
        super().__init__(**kwargs)
        kwargs["nfilt"] = kwargs["features"]
        del kwargs["features"]
        self.featurizer = (
            FilterbankFeaturesTA(  # Deprecated arguments; kept for config compatibility
                mel_scale=mel_scale,
                **kwargs,
            )
        )


@torch.no_grad()
def main():
    model = EncDecRNNTBPEModel.from_config_file("./rnnt_model_config.yaml")
    ckpt = torch.load("./rnnt_model_weights.ckpt", map_location="cpu")
    model.load_state_dict(ckpt, strict=False)
    model.eval()

    # use bpe
    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    model.encoder.export("encoder.onnx")
    model.decoder.export("decoder.onnx")
    model.joint.export("joiner.onnx")

    meta_data = {
        # not including the blank
        # we increase vocab_size in the C++ code
        "vocab_size": model.decoder.vocab_size,
        "pred_rnn_layers": model.decoder.pred_rnn_layers,
        "pred_hidden": model.decoder.pred_hidden,
        "normalize_type": "",
        "subsampling_factor": 4,
        "model_type": "EncDecRNNTBPEModel",
        "version": "1",
        "model_author": "https://github.com/salute-developers/GigaAM",
        "license": "https://github.com/salute-developers/GigaAM/blob/main/GigaAM%20License_NC.pdf",
        "language": "Russian",
        "is_giga_am": 1,
    }
    add_meta_data("encoder.onnx", meta_data)

    quantize_dynamic(
        model_input="encoder.onnx",
        model_output="encoder.int8.onnx",
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/run-ctc-v2.sh
================================================
#!/usr/bin/env bash

set -ex

function install_gigaam() {
  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
  python3 get-pip.py
  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html
  pip install -qq wget text-unidecode "matplotlib>=3.3.2" onnx onnxruntime==1.17.1 pybind11 Cython einops kaldi-native-fbank soundfile librosa

  BRANCH='main'
  python3 -m pip install git+https://github.com/salute-developers/GigaAM.git@$BRANCH#egg=gigaam

  python3 -m pip install -qq kaldi-native-fbank
  pip install numpy==1.26.4
}

function download_files() {
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
  curl -SL -O https://raw.githubusercontent.com/salute-developers/GigaAM/main/LICENSE
}

install_gigaam
download_files

python3 ./export-onnx-ctc-v2.py
ls -lh
python3 ./test-onnx-ctc.py


================================================
FILE: scripts/nemo/GigaAM/run-ctc-v3-punct.sh
================================================
#!/usr/bin/env bash

set -ex

function install_gigaam() {
  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
  python3 get-pip.py
  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html
  pip install -qq wget text-unidecode "matplotlib>=3.3.2" onnx onnxruntime==1.17.1 pybind11 Cython einops kaldi-native-fbank soundfile librosa

  BRANCH='main'
  python3 -m pip install git+https://github.com/salute-developers/GigaAM.git@$BRANCH#egg=gigaam

  python3 -m pip install -qq kaldi-native-fbank
  pip install numpy==1.26.4
}

function download_files() {
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
  curl -SL -O https://raw.githubusercontent.com/salute-developers/GigaAM/main/LICENSE
}

install_gigaam
download_files

python3 ./export-onnx-ctc-v3-punct.py
ls -lh
python3 ./test-onnx-ctc.py


================================================
FILE: scripts/nemo/GigaAM/run-ctc-v3.sh
================================================
#!/usr/bin/env bash

set -ex

function install_gigaam() {
  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
  python3 get-pip.py
  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html
  pip install -qq wget text-unidecode "matplotlib>=3.3.2" onnx onnxruntime==1.17.1 pybind11 Cython einops kaldi-native-fbank soundfile librosa

  BRANCH='main'
  python3 -m pip install git+https://github.com/salute-developers/GigaAM.git@$BRANCH#egg=gigaam

  python3 -m pip install -qq kaldi-native-fbank
  pip install numpy==1.26.4
}

function download_files() {
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
  curl -SL -O https://raw.githubusercontent.com/salute-developers/GigaAM/main/LICENSE
}

install_gigaam
download_files

python3 ./export-onnx-ctc-v3.py
ls -lh
python3 ./test-onnx-ctc.py


================================================
FILE: scripts/nemo/GigaAM/run-ctc.sh
================================================
#!/usr/bin/env bash
# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

function install_nemo() {
  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
  python3 get-pip.py

  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html

  pip install -qq wget text-unidecode "matplotlib>=3.3.2" onnx onnxruntime==1.17.1 pybind11 Cython einops kaldi-native-fbank soundfile librosa
  pip install -qq ipython

  # sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip ipython

  BRANCH='main'
  python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]

  pip install numpy==1.26.4
}

function download_files() {
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_weights.ckpt
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/ctc_model_config.yaml
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/long_example.wav
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/ctc/ctc_model_weights.ckpt
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/ctc/ctc_model_config.yaml
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/long_example.wav
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/GigaAM%20License_NC.pdf
}

install_nemo
download_files

python3 ./export-onnx-ctc.py
ls -lh
python3 ./test-onnx-ctc.py


================================================
FILE: scripts/nemo/GigaAM/run-rnnt-v2.sh
================================================
#!/usr/bin/env bash
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

function install_gigaam() {
  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
  python3 get-pip.py
  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html
  pip install -qq wget text-unidecode "matplotlib>=3.3.2" onnx onnxruntime==1.17.1 pybind11 Cython einops kaldi-native-fbank soundfile librosa

  BRANCH='main'
  python3 -m pip install git+https://github.com/salute-developers/GigaAM.git@$BRANCH#egg=gigaam

  python3 -m pip install -qq kaldi-native-fbank
  pip install numpy==1.26.4
}

function download_files() {
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
  curl -SL -O https://raw.githubusercontent.com/salute-developers/GigaAM/main/LICENSE
}

install_gigaam
download_files

python3 ./export-onnx-rnnt-v2.py
ls -lh
python3 ./test-onnx-rnnt.py


================================================
FILE: scripts/nemo/GigaAM/run-rnnt-v3-punct.sh
================================================
#!/usr/bin/env bash
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

function install_gigaam() {
  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
  python3 get-pip.py
  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html
  pip install -qq wget text-unidecode "matplotlib>=3.3.2" onnx onnxruntime==1.17.1 pybind11 Cython einops kaldi-native-fbank soundfile librosa

  BRANCH='main'
  python3 -m pip install git+https://github.com/salute-developers/GigaAM.git@$BRANCH#egg=gigaam

  python3 -m pip install -qq kaldi-native-fbank
  pip install numpy==1.26.4
}

function download_files() {
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
  curl -SL -O https://raw.githubusercontent.com/salute-developers/GigaAM/main/LICENSE
}

install_gigaam
download_files

python3 ./export-onnx-rnnt-v3-punct.py
ls -lh
python3 ./test-onnx-rnnt.py


================================================
FILE: scripts/nemo/GigaAM/run-rnnt-v3.sh
================================================
#!/usr/bin/env bash
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

function install_gigaam() {
  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
  python3 get-pip.py
  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html
  pip install -qq wget text-unidecode "matplotlib>=3.3.2" onnx onnxruntime==1.17.1 pybind11 Cython einops kaldi-native-fbank soundfile librosa

  BRANCH='main'
  python3 -m pip install git+https://github.com/salute-developers/GigaAM.git@$BRANCH#egg=gigaam

  python3 -m pip install -qq kaldi-native-fbank
  pip install numpy==1.26.4
}

function download_files() {
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
  curl -SL -O https://raw.githubusercontent.com/salute-developers/GigaAM/main/LICENSE
}

install_gigaam
download_files

python3 ./export-onnx-rnnt-v3.py
ls -lh
python3 ./test-onnx-rnnt.py


================================================
FILE: scripts/nemo/GigaAM/run-rnnt.sh
================================================
#!/usr/bin/env bash
# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

function install_nemo() {
  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
  python3 get-pip.py

  pip install torch==2.4.0 torchaudio==2.4.0 -f https://download.pytorch.org/whl/torch_stable.html

  pip install -qq wget text-unidecode "matplotlib>=3.3.2" onnx onnxruntime==1.17.1 pybind11 Cython einops kaldi-native-fbank soundfile librosa
  pip install -qq ipython

  # sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip ipython

  BRANCH='main'
  python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]

  pip install numpy==1.26.4
}

function download_files() {
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_weights.ckpt
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/rnnt_model_config.yaml
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/example.wav
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/long_example.wav
  # curl -SL -O https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/tokenizer_all_sets.tar

  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/rnnt/rnnt_model_weights.ckpt
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/rnnt/rnnt_model_config.yaml
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/long_example.wav
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/GigaAM%20License_NC.pdf
  curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/rnnt/tokenizer_all_sets.tar
  tar -xf tokenizer_all_sets.tar && rm tokenizer_all_sets.tar
  ls -lh
  echo "---"
  ls -lh tokenizer_all_sets
  echo "---"
}

install_nemo
download_files

python3 ./export-onnx-rnnt.py
ls -lh
python3 ./test-onnx-rnnt.py
rm -v encoder.onnx
ls -lh


================================================
FILE: scripts/nemo/GigaAM/test-onnx-ctc.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

# https://github.com/salute-developers/GigaAM

import kaldi_native_fbank as knf
import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch


def create_fbank():
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.remove_dc_offset = False
    opts.frame_opts.preemph_coeff = 0
    opts.frame_opts.window_type = "hann"

    opts.frame_opts.round_to_power_of_two = False

    opts.mel_opts.low_freq = 0
    opts.mel_opts.high_freq = 8000
    opts.mel_opts.num_bins = 64

    fbank = knf.OnlineFbank(opts)
    return fbank


def compute_features(audio, fbank) -> np.ndarray:
    """
    Args:
      audio: (num_samples,), np.float32
      fbank: the fbank extractor
    Returns:
      features: (num_frames, feat_dim), np.float32
    """
    assert len(audio.shape) == 1, audio.shape
    fbank.accept_waveform(16000, audio)
    ans = []
    processed = 0
    while processed < fbank.num_frames_ready:
        ans.append(np.array(fbank.get_frame(processed)))
        processed += 1
    ans = np.stack(ans)
    return ans


def display(sess):
    print("==========Input==========")
    for i in sess.get_inputs():
        print(i)
    print("==========Output==========")
    for i in sess.get_outputs():
        print(i)


"""
==========Input==========
NodeArg(name='audio_signal', type='tensor(float)', shape=['audio_signal_dynamic_axes_1', 64, 'audio_signal_dynamic_axes_2'])
NodeArg(name='length', type='tensor(int64)', shape=['length_dynamic_axes_1'])
==========Output==========
NodeArg(name='logprobs', type='tensor(float)', shape=['logprobs_dynamic_axes_1', 'logprobs_dynamic_axes_2', 34])
"""


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.model = ort.InferenceSession(
            filename,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )
        display(self.model)

    def __call__(self, x: np.ndarray):
        # x: (T, C)
        x = torch.from_numpy(x)
        x = x.t().unsqueeze(0)
        # x: [1, C, T]
        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)

        log_probs = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x.numpy(),
                self.model.get_inputs()[1].name: x_lens.numpy(),
            },
        )[0]
        # [batch_size, T, dim]
        return log_probs


def main():
    filename = "./model.int8.onnx"
    tokens = "./tokens.txt"
    wav = "./example.wav"

    model = OnnxModel(filename)

    id2token = dict()
    with open(tokens, encoding="utf-8") as f:
        for line in f:
            fields = line.split()
            if len(fields) == 1:
                id2token[int(fields[0])] = " "
            else:
                t, idx = fields
                id2token[int(idx)] = t

    fbank = create_fbank()
    audio, sample_rate = sf.read(wav, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000

    features = compute_features(audio, fbank)
    print("features.shape", features.shape)

    blank = len(id2token) - 1
    prev = -1
    ans = []
    log_probs = model(features)
    print("log_probs", log_probs.shape)
    log_probs = torch.from_numpy(log_probs)[0]
    ids = torch.argmax(log_probs, dim=1).tolist()
    for i in ids:
        if i != blank and i != prev:
            ans.append(i)
        prev = i

    tokens = [id2token[i] for i in ans]

    text = "".join(tokens)
    print(wav)
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/GigaAM/test-onnx-rnnt.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from pathlib import Path

import kaldi_native_fbank as knf
import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch


def create_fbank():
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.remove_dc_offset = False
    opts.frame_opts.preemph_coeff = 0
    opts.frame_opts.window_type = "hann"

    opts.frame_opts.round_to_power_of_two = False

    opts.mel_opts.low_freq = 0
    opts.mel_opts.high_freq = 8000
    opts.mel_opts.num_bins = 64

    fbank = knf.OnlineFbank(opts)
    return fbank


def compute_features(audio, fbank):
    assert len(audio.shape) == 1, audio.shape
    fbank.accept_waveform(16000, audio)
    ans = []
    processed = 0
    while processed < fbank.num_frames_ready:
        ans.append(np.array(fbank.get_frame(processed)))
        processed += 1
    ans = np.stack(ans)
    return ans


def display(sess):
    print("==========Input==========")
    for i in sess.get_inputs():
        print(i)
    print("==========Output==========")
    for i in sess.get_outputs():
        print(i)


"""
==========Input==========
NodeArg(name='audio_signal', type='tensor(float)', shape=['audio_signal_dynamic_axes_1', 64, 'audio_signal_dynamic_axes_2'])
NodeArg(name='length', type='tensor(int64)', shape=['length_dynamic_axes_1'])
==========Output==========
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 768, 'outputs_dynamic_axes_2'])
NodeArg(name='encoded_lengths', type='tensor(int64)', shape=['encoded_lengths_dynamic_axes_1'])
==========Input==========
NodeArg(name='targets', type='tensor(int32)', shape=['targets_dynamic_axes_1', 'targets_dynamic_axes_2'])
NodeArg(name='target_length', type='tensor(int32)', shape=['target_length_dynamic_axes_1'])
NodeArg(name='states.1', type='tensor(float)', shape=[1, 'states.1_dim_1', 320])
NodeArg(name='onnx::LSTM_3', type='tensor(float)', shape=[1, 1, 320])
==========Output==========
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 320, 'outputs_dynamic_axes_2'])
NodeArg(name='prednet_lengths', type='tensor(int32)', shape=['prednet_lengths_dynamic_axes_1'])
NodeArg(name='states', type='tensor(float)', shape=[1, 'states_dynamic_axes_1', 320])
NodeArg(name='74', type='tensor(float)', shape=[1, 'states_dynamic_axes_1', 320])
==========Input==========
NodeArg(name='encoder_outputs', type='tensor(float)', shape=['encoder_outputs_dynamic_axes_1', 768, 'encoder_outputs_dynamic_axes_2'])
NodeArg(name='decoder_outputs', type='tensor(float)', shape=['decoder_outputs_dynamic_axes_1', 320, 'decoder_outputs_dynamic_axes_2'])
==========Output==========
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 'outputs_dynamic_axes_2', 'outputs_dynamic_axes_3', 513])
"""


class OnnxModel:
    def __init__(
        self,
        encoder: str,
        decoder: str,
        joiner: str,
    ):
        self.init_encoder(encoder)
        display(self.encoder)
        self.init_decoder(decoder)
        display(self.decoder)
        self.init_joiner(joiner)
        display(self.joiner)

    def init_encoder(self, encoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.encoder = ort.InferenceSession(
            encoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.encoder.get_modelmeta().custom_metadata_map
        self.normalize_type = meta["normalize_type"]
        print(meta)

        self.pred_rnn_layers = int(meta["pred_rnn_layers"])
        self.pred_hidden = int(meta["pred_hidden"])

    def init_decoder(self, decoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.decoder = ort.InferenceSession(
            decoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def init_joiner(self, joiner):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.joiner = ort.InferenceSession(
            joiner,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def get_decoder_state(self):
        batch_size = 1
        state0 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
        state1 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
        return state0, state1

    def run_encoder(self, x: np.ndarray):
        # x: (T, C)
        x = torch.from_numpy(x)
        x = x.t().unsqueeze(0)
        # x: [1, C, T]
        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)

        (encoder_out, out_len) = self.encoder.run(
            [
                self.encoder.get_outputs()[0].name,
                self.encoder.get_outputs()[1].name,
            ],
            {
                self.encoder.get_inputs()[0].name: x.numpy(),
                self.encoder.get_inputs()[1].name: x_lens.numpy(),
            },
        )
        # [batch_size, dim, T]
        return encoder_out

    def run_decoder(
        self,
        token: int,
        state0: np.ndarray,
        state1: np.ndarray,
    ):
        target = torch.tensor([[token]], dtype=torch.int32).numpy()
        target_len = torch.tensor([1], dtype=torch.int32).numpy()

        (decoder_out, decoder_out_length, state0_next, state1_next,) = self.decoder.run(
            [
                self.decoder.get_outputs()[0].name,
                self.decoder.get_outputs()[1].name,
                self.decoder.get_outputs()[2].name,
                self.decoder.get_outputs()[3].name,
            ],
            {
                self.decoder.get_inputs()[0].name: target,
                self.decoder.get_inputs()[1].name: target_len,
                self.decoder.get_inputs()[2].name: state0,
                self.decoder.get_inputs()[3].name: state1,
            },
        )
        return decoder_out, state0_next, state1_next

    def run_joiner(
        self,
        encoder_out: np.ndarray,
        decoder_out: np.ndarray,
    ):
        # encoder_out: [batch_size,  dim, 1]
        # decoder_out: [batch_size,  dim, 1]
        logit = self.joiner.run(
            [
                self.joiner.get_outputs()[0].name,
            ],
            {
                self.joiner.get_inputs()[0].name: encoder_out,
                self.joiner.get_inputs()[1].name: decoder_out,
            },
        )[0]
        # logit: [batch_size, 1, 1, vocab_size]
        return logit


def main():
    model = OnnxModel("encoder.int8.onnx", "decoder.onnx", "joiner.onnx")

    id2token = dict()
    with open("./tokens.txt", encoding="utf-8") as f:
        for line in f:
            fields = line.split()
            if len(fields) == 1:
                id2token[int(fields[0])] = " "
            else:
                t, idx = fields
                id2token[int(idx)] = t

    fbank = create_fbank()
    audio, sample_rate = sf.read("./example.wav", dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000

    tail_padding = np.zeros(sample_rate * 2)

    audio = np.concatenate([audio, tail_padding])

    blank = len(id2token) - 1
    ans = [blank]
    state0, state1 = model.get_decoder_state()
    decoder_out, state0_next, state1_next = model.run_decoder(ans[-1], state0, state1)

    features = compute_features(audio, fbank)
    print("audio.shape", audio.shape)
    print("features.shape", features.shape)

    encoder_out = model.run_encoder(features)
    # encoder_out:[batch_size, dim, T)
    for t in range(encoder_out.shape[2]):
        encoder_out_t = encoder_out[:, :, t : t + 1]
        logits = model.run_joiner(encoder_out_t, decoder_out)
        logits = torch.from_numpy(logits)
        logits = logits.squeeze()
        idx = torch.argmax(logits, dim=-1).item()
        if idx != blank:
            ans.append(idx)
            state0 = state0_next
            state1 = state1_next
            decoder_out, state0_next, state1_next = model.run_decoder(
                ans[-1], state0, state1
            )

    ans = ans[1:]  # remove the first blank
    print(ans)
    tokens = [id2token[i] for i in ans]
    underline = "▁"
    #  underline = b"\xe2\x96\x81".decode()
    text = "".join(tokens).replace(underline, " ").strip()
    print("./example.wav")
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/README.md
================================================
# Introduction

This directory contains scripts for exporting models
from [NeMo](https://github.com/NVIDIA/NeMo/) to onnx
so that you can use them in `sherpa-onnx`.

- [./speaker-verification](./speaker-verification) contains models for speaker verification.


================================================
FILE: scripts/nemo/canary/export_onnx_180m_flash.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
<|en|>
<|pnc|>
<|noitn|>
<|nodiarize|>
<|notimestamp|>
"""

import os
from typing import Dict, Tuple

import nemo
import onnx
import torch
from nemo.collections.common.parts import NEG_INF
from onnxruntime.quantization import QuantType, quantize_dynamic

"""
NotImplemented: [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED :
Could not find an implementation for Trilu(14) node with name '/Trilu'

See also https://github.com/microsoft/onnxruntime/issues/16189#issuecomment-1722219631

So we use fixed_form_attention_mask() to replace
the original form_attention_mask()
"""


def fixed_form_attention_mask(input_mask, diagonal=None):
    """
    Fixed: Build attention mask with optional masking of future tokens we forbid
    to attend to (e.g. as it is in Transformer decoder).

    Args:
        input_mask: binary mask of size B x L with 1s corresponding to valid
            tokens and 0s corresponding to padding tokens
        diagonal: diagonal where triangular future mask starts
            None -- do not mask anything
            0 -- regular translation or language modeling future masking
            1 -- query stream masking as in XLNet architecture
    Returns:
        attention_mask: mask of size B x 1 x L x L with 0s corresponding to
            tokens we plan to attend to and -10000 otherwise
    """

    if input_mask is None:
        return None
    attn_shape = (1, input_mask.shape[1], input_mask.shape[1])
    attn_mask = input_mask.to(dtype=bool).unsqueeze(1)
    if diagonal is not None:
        future_mask = torch.tril(
            torch.ones(
                attn_shape,
                dtype=torch.int64,  # it was torch.bool
                # but onnxruntime does not support torch.int32 or torch.bool
                # in torch.tril
                device=input_mask.device,
            ),
            diagonal,
        ).bool()
        attn_mask = attn_mask & future_mask
    attention_mask = (1 - attn_mask.to(torch.float)) * NEG_INF
    return attention_mask.unsqueeze(1)


nemo.collections.common.parts.form_attention_mask = fixed_form_attention_mask

from nemo.collections.asr.models import EncDecMultiTaskModel


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def lens_to_mask(lens, max_length):
    """
    Create a mask from a tensor of lengths.
    """
    batch_size = lens.shape[0]
    arange = torch.arange(max_length, device=lens.device)
    mask = arange.expand(batch_size, max_length) < lens.unsqueeze(1)
    return mask


class EncoderWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.encoder = m.encoder
        self.encoder_decoder_proj = m.encoder_decoder_proj

    def forward(
        self, x: torch.Tensor, x_len: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Args:
          x: (N, T, C)
          x_len: (N,)
        Returns:
          - enc_states: (N, T, C)
          - encoded_len: (N,)
          - enc_mask: (N, T)
        """
        x = x.permute(0, 2, 1)
        # x: (N, C, T)
        encoded, encoded_len = self.encoder(audio_signal=x, length=x_len)

        enc_states = encoded.permute(0, 2, 1)

        enc_states = self.encoder_decoder_proj(enc_states)

        enc_mask = lens_to_mask(encoded_len, enc_states.shape[1])

        return enc_states, encoded_len, enc_mask


class DecoderWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.decoder = m.transf_decoder
        self.log_softmax = m.log_softmax

        # We use only greedy search, so there is no need to compute log_softmax
        self.log_softmax.mlp.log_softmax = False

    def forward(
        self,
        decoder_input_ids: torch.Tensor,
        decoder_mems_list_0: torch.Tensor,
        decoder_mems_list_1: torch.Tensor,
        decoder_mems_list_2: torch.Tensor,
        decoder_mems_list_3: torch.Tensor,
        decoder_mems_list_4: torch.Tensor,
        decoder_mems_list_5: torch.Tensor,
        enc_states: torch.Tensor,
        enc_mask: torch.Tensor,
    ):
        """
        Args:
          decoder_input_ids: (N, num_tokens), torch.int32
          decoder_mems_list_i: (N, num_tokens, 1024)
          enc_states: (N, T, 1024)
          enc_mask: (N, T)
        Returns:
          - logits: (N, 1, vocab_size)
          - decoder_mems_list_i: (N, num_tokens_2, 1024)
        """
        pos = decoder_input_ids[0][-1].item()
        decoder_input_ids = decoder_input_ids[:, :-1]

        decoder_hidden_states = self.decoder.embedding.forward(
            decoder_input_ids, start_pos=pos
        )
        decoder_input_mask = torch.ones_like(decoder_input_ids).float()

        decoder_mems_list = self.decoder.decoder.forward(
            decoder_hidden_states,
            decoder_input_mask,
            enc_states,
            enc_mask,
            [
                decoder_mems_list_0,
                decoder_mems_list_1,
                decoder_mems_list_2,
                decoder_mems_list_3,
                decoder_mems_list_4,
                decoder_mems_list_5,
            ],
            return_mems=True,
        )
        logits = self.log_softmax(hidden_states=decoder_mems_list[-1][:, -1:])

        return logits, decoder_mems_list


def export_encoder(canary_model):
    encoder = EncoderWrapper(canary_model)
    x = torch.rand(1, 4000, 128)
    x_lens = torch.tensor([x.shape[1]], dtype=torch.int64)

    encoder_filename = "encoder.onnx"
    torch.onnx.export(
        encoder,
        (x, x_lens),
        encoder_filename,
        input_names=["x", "x_len"],
        output_names=["enc_states", "enc_len", "enc_mask"],
        opset_version=14,
        dynamic_axes={
            "x": {0: "N", 1: "T"},
            "x_len": {0: "N"},
            "enc_states": {0: "N", 1: "T"},
            "enc_len": {0: "N"},
            "enc_mask": {0: "N", 1: "T"},
        },
    )


def export_decoder(canary_model):
    decoder = DecoderWrapper(canary_model)
    decoder_input_ids = torch.tensor([[1, 0]], dtype=torch.int32)

    decoder_mems_list_0 = torch.zeros(1, 10, 1024)
    decoder_mems_list_1 = torch.zeros(1, 10, 1024)
    decoder_mems_list_2 = torch.zeros(1, 10, 1024)
    decoder_mems_list_3 = torch.zeros(1, 10, 1024)
    decoder_mems_list_4 = torch.zeros(1, 10, 1024)
    decoder_mems_list_5 = torch.zeros(1, 10, 1024)

    enc_states = torch.zeros(1, 1000, 1024)
    enc_mask = torch.ones(1, 1000).bool()

    torch.onnx.export(
        decoder,
        (
            decoder_input_ids,
            decoder_mems_list_0,
            decoder_mems_list_1,
            decoder_mems_list_2,
            decoder_mems_list_3,
            decoder_mems_list_4,
            decoder_mems_list_5,
            enc_states,
            enc_mask,
        ),
        "decoder.onnx",
        dynamo=True,
        opset_version=14,
        external_data=False,
        input_names=[
            "decoder_input_ids",
            "decoder_mems_list_0",
            "decoder_mems_list_1",
            "decoder_mems_list_2",
            "decoder_mems_list_3",
            "decoder_mems_list_4",
            "decoder_mems_list_5",
            "enc_states",
            "enc_mask",
        ],
        output_names=[
            "logits",
            "next_decoder_mem_list_0",
            "next_decoder_mem_list_1",
            "next_decoder_mem_list_2",
            "next_decoder_mem_list_3",
            "next_decoder_mem_list_4",
            "next_decoder_mem_list_5",
        ],
        dynamic_axes={
            "decoder_input_ids": {1: "num_tokens"},
            "decoder_mems_list_0": {1: "num_tokens"},
            "decoder_mems_list_1": {1: "num_tokens"},
            "decoder_mems_list_2": {1: "num_tokens"},
            "decoder_mems_list_3": {1: "num_tokens"},
            "decoder_mems_list_4": {1: "num_tokens"},
            "decoder_mems_list_5": {1: "num_tokens"},
            "enc_states": {1: "T"},
            "enc_mask": {1: "T"},
        },
    )


def export_tokens(canary_model):
    underline = "▁"
    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i in range(canary_model.tokenizer.vocab_size):
            s = canary_model.tokenizer.ids_to_text([i])

            if s[0] == " ":
                s = underline + s[1:]

            f.write(f"{s} {i}\n")
        print("Saved to tokens.txt")


@torch.no_grad()
def main():
    canary_model = EncDecMultiTaskModel.from_pretrained("nvidia/canary-180m-flash")
    canary_model.eval()

    preprocessor = canary_model.cfg["preprocessor"]
    sample_rate = preprocessor["sample_rate"]
    normalize_type = preprocessor["normalize"]
    window_size = preprocessor["window_size"]  # ms
    window_stride = preprocessor["window_stride"]  # ms
    window = preprocessor["window"]
    features = preprocessor["features"]
    n_fft = preprocessor["n_fft"]
    vocab_size = canary_model.tokenizer.vocab_size  # 5248

    subsampling_factor = canary_model.cfg["encoder"]["subsampling_factor"]

    assert sample_rate == 16000, sample_rate
    assert normalize_type == "per_feature", normalize_type
    assert window_size == 0.025, window_size
    assert window_stride == 0.01, window_stride
    assert window == "hann", window
    assert features == 128, features
    assert n_fft == 512, n_fft
    assert subsampling_factor == 8, subsampling_factor

    export_tokens(canary_model)
    export_encoder(canary_model)
    export_decoder(canary_model)

    for m in ["encoder", "decoder"]:
        quantize_dynamic(
            model_input=f"./{m}.onnx",
            model_output=f"./{m}.int8.onnx",
            weight_type=QuantType.QUInt8,
        )

    meta_data = {
        "vocab_size": vocab_size,
        "normalize_type": normalize_type,
        "subsampling_factor": subsampling_factor,
        "model_type": "EncDecMultiTaskModel",
        "version": "1",
        "model_author": "NeMo",
        "url": "https://huggingface.co/nvidia/canary-180m-flash",
        "feat_dim": features,
    }

    add_meta_data("encoder.onnx", meta_data)
    add_meta_data("encoder.int8.onnx", meta_data)

    """
    To fix the following error with onnxruntime 1.17.1 and 1.16.3:

    onnxruntime.capi.onnxruntime_pybind11_state.Fail: [ONNXRuntimeError] : 1 :FAIL : Load model from ./decoder.int8.onnx failed:/Users/runner/work/1/s/onnxruntime/core/graph/model.cc:150 onnxruntime::Model::Model(onnx::ModelProto &&, const onnxruntime::PathString &, const onnxruntime::IOnnxRuntimeOpSchemaRegistryList *, const logging::Logger &, const onnxruntime::ModelOptions &)
    Unsupported model IR version: 10, max supported IR version: 9
    """
    for filename in ["./decoder.onnx", "./decoder.int8.onnx"]:
        model = onnx.load(filename)
        print("old", model.ir_version)
        model.ir_version = 9
        print("new", model.ir_version)
        onnx.save(model, filename)

    os.system("ls -lh *.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/canary/run_180m_flash.sh
================================================
#!/usr/bin/env bash
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/de.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/en.wav

pip install \
  nemo_toolkit['asr'] \
  "numpy<2" \
  ipython \
  kaldi-native-fbank \
  librosa \
  onnx==1.17.0 \
  onnxruntime==1.17.1 \
  onnxscript \
  soundfile

python3 ./export_onnx_180m_flash.py
ls -lh *.onnx


log "-----fp32------"

python3 ./test_180m_flash.py \
  --encoder ./encoder.onnx \
  --decoder ./decoder.onnx \
  --source-lang en \
  --target-lang en \
  --tokens ./tokens.txt \
  --wav ./en.wav

python3 ./test_180m_flash.py \
  --encoder ./encoder.onnx \
  --decoder ./decoder.onnx \
  --source-lang en \
  --target-lang de \
  --tokens ./tokens.txt \
  --wav ./en.wav

python3 ./test_180m_flash.py \
  --encoder ./encoder.onnx \
  --decoder ./decoder.onnx \
  --source-lang de \
  --target-lang de \
  --tokens ./tokens.txt \
  --wav ./de.wav

python3 ./test_180m_flash.py \
  --encoder ./encoder.onnx \
  --decoder ./decoder.onnx \
  --source-lang de \
  --target-lang en \
  --tokens ./tokens.txt \
  --wav ./de.wav


log "-----int8------"

python3 ./test_180m_flash.py \
  --encoder ./encoder.int8.onnx \
  --decoder ./decoder.int8.onnx \
  --source-lang en \
  --target-lang en \
  --tokens ./tokens.txt \
  --wav ./en.wav

python3 ./test_180m_flash.py \
  --encoder ./encoder.int8.onnx \
  --decoder ./decoder.int8.onnx \
  --source-lang en \
  --target-lang de \
  --tokens ./tokens.txt \
  --wav ./en.wav

python3 ./test_180m_flash.py \
  --encoder ./encoder.int8.onnx \
  --decoder ./decoder.int8.onnx \
  --source-lang de \
  --target-lang de \
  --tokens ./tokens.txt \
  --wav ./de.wav

python3 ./test_180m_flash.py \
  --encoder ./encoder.int8.onnx \
  --decoder ./decoder.int8.onnx \
  --source-lang de \
  --target-lang en \
  --tokens ./tokens.txt \
  --wav ./de.wav


================================================
FILE: scripts/nemo/canary/test_180m_flash.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
import time
from pathlib import Path
from typing import List

import kaldi_native_fbank as knf
import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--encoder", type=str, required=True, help="Path to encoder.onnx"
    )
    parser.add_argument(
        "--decoder", type=str, required=True, help="Path to decoder.onnx"
    )

    parser.add_argument("--tokens", type=str, required=True, help="Path to tokens.txt")

    parser.add_argument(
        "--source-lang",
        type=str,
        help="Language of the input wav. Valid values are: en, de, es, fr",
    )
    parser.add_argument(
        "--target-lang",
        type=str,
        help="Language of the recognition result. Valid values are: en, de, es, fr",
    )
    parser.add_argument(
        "--use-pnc",
        type=int,
        default=1,
        help="1 to enable cases and punctuations. 0 to disable that",
    )

    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


def display(sess, model):
    print(f"=========={model} Input==========")
    for i in sess.get_inputs():
        print(i)
    print(f"=========={model }Output==========")
    for i in sess.get_outputs():
        print(i)


class OnnxModel:
    def __init__(
        self,
        encoder: str,
        decoder: str,
    ):
        self.init_encoder(encoder)
        display(self.encoder, "encoder")

        self.init_decoder(decoder)
        display(self.decoder, "decoder")

    def init_encoder(self, encoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.encoder = ort.InferenceSession(
            encoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.encoder.get_modelmeta().custom_metadata_map
        self.normalize_type = meta["normalize_type"]
        print(meta)

    def init_decoder(self, decoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.decoder = ort.InferenceSession(
            decoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def run_encoder(self, x: np.ndarray, x_lens: np.ndarray):
        """
        Args:
          x: (N, T, C), np.float
          x_lens: (N,), np.int64
        Returns:
          enc_states: (N, T, C)
          enc_lens: (N,), np.int64
          enc_masks: (N, T), np.bool
        """
        enc_states, enc_lens, enc_masks = self.encoder.run(
            [
                self.encoder.get_outputs()[0].name,
                self.encoder.get_outputs()[1].name,
                self.encoder.get_outputs()[2].name,
            ],
            {
                self.encoder.get_inputs()[0].name: x,
                self.encoder.get_inputs()[1].name: x_lens,
            },
        )
        return enc_states, enc_lens, enc_masks

    def run_decoder(
        self,
        decoder_input_ids: np.ndarray,
        decoder_mems_list: List[np.ndarray],
        enc_states: np.ndarray,
        enc_mask: np.ndarray,
    ):
        """
        Args:
          decoder_input_ids: (N, num_tokens), int32
          decoder_mems_list: a list of tensors, each of which is (N, num_tokens, C)
          enc_states: (N, T, C), float
          enc_mask: (N, T), bool
        Returns:
          logits: (1, 1, vocab_size), float
          new_decoder_mems_list:
        """
        (logits, *new_decoder_mems_list) = self.decoder.run(
            [
                self.decoder.get_outputs()[0].name,
                self.decoder.get_outputs()[1].name,
                self.decoder.get_outputs()[2].name,
                self.decoder.get_outputs()[3].name,
                self.decoder.get_outputs()[4].name,
                self.decoder.get_outputs()[5].name,
                self.decoder.get_outputs()[6].name,
            ],
            {
                self.decoder.get_inputs()[0].name: decoder_input_ids,
                self.decoder.get_inputs()[1].name: decoder_mems_list[0],
                self.decoder.get_inputs()[2].name: decoder_mems_list[1],
                self.decoder.get_inputs()[3].name: decoder_mems_list[2],
                self.decoder.get_inputs()[4].name: decoder_mems_list[3],
                self.decoder.get_inputs()[5].name: decoder_mems_list[4],
                self.decoder.get_inputs()[6].name: decoder_mems_list[5],
                self.decoder.get_inputs()[7].name: enc_states,
                self.decoder.get_inputs()[8].name: enc_mask,
            },
        )
        return logits, new_decoder_mems_list


def create_fbank():
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.remove_dc_offset = False
    opts.frame_opts.window_type = "hann"

    opts.mel_opts.low_freq = 0
    opts.mel_opts.num_bins = 128

    opts.mel_opts.is_librosa = True

    fbank = knf.OnlineFbank(opts)
    return fbank


def compute_features(audio, fbank):
    assert len(audio.shape) == 1, audio.shape
    fbank.accept_waveform(16000, audio)
    ans = []
    processed = 0
    while processed < fbank.num_frames_ready:
        ans.append(np.array(fbank.get_frame(processed)))
        processed += 1
    ans = np.stack(ans)
    return ans


def main():
    args = get_args()
    assert Path(args.encoder).is_file(), args.encoder
    assert Path(args.decoder).is_file(), args.decoder
    assert Path(args.tokens).is_file(), args.tokens
    assert Path(args.wav).is_file(), args.wav

    print(vars(args))

    id2token = dict()
    token2id = dict()
    with open(args.tokens, encoding="utf-8") as f:
        for line in f:
            fields = line.split()
            if len(fields) == 2:
                t, idx = fields[0], int(fields[1])
                if line[0] == " ":
                    t = " " + t
            else:
                t = " "
                idx = int(fields[0])

            id2token[idx] = t
            token2id[t] = idx

    model = OnnxModel(args.encoder, args.decoder)

    fbank = create_fbank()

    start = time.time()
    audio, sample_rate = sf.read(args.wav, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000

    features = compute_features(audio, fbank)
    if model.normalize_type != "":
        assert model.normalize_type == "per_feature", model.normalize_type
        mean = features.mean(axis=0, keepdims=True)
        stddev = features.std(axis=0, keepdims=True) + 1e-5
        features = (features - mean) / stddev

    features = np.expand_dims(features, axis=0)
    # features.shape: (1, 291, 128)

    features_len = np.array([features.shape[1]], dtype=np.int64)

    enc_states, _, enc_masks = model.run_encoder(features, features_len)

    decoder_input_ids = []
    decoder_input_ids.append(token2id["<|startofcontext|>"])
    decoder_input_ids.append(token2id["<|startoftranscript|>"])
    decoder_input_ids.append(token2id["<|emo:undefined|>"])
    if args.source_lang in ("en", "es", "de", "fr"):
        decoder_input_ids.append(token2id[f"<|{args.source_lang}|>"])
    else:
        decoder_input_ids.append(token2id[f"<|en|>"])

    if args.target_lang in ("en", "es", "de", "fr"):
        decoder_input_ids.append(token2id[f"<|{args.target_lang}|>"])
    else:
        decoder_input_ids.append(token2id[f"<|en|>"])

    if args.use_pnc:
        decoder_input_ids.append(token2id[f"<|pnc|>"])
    else:
        decoder_input_ids.append(token2id[f"<|nopnc|>"])

    decoder_input_ids.append(token2id[f"<|noitn|>"])
    decoder_input_ids.append(token2id["<|notimestamp|>"])
    decoder_input_ids.append(token2id["<|nodiarize|>"])

    decoder_mems_list = [np.zeros((1, 0, 1024), dtype=np.float32) for _ in range(6)]

    for pos, decoder_input_id in enumerate(decoder_input_ids):
        logits, decoder_mems_list = model.run_decoder(
            np.array([[decoder_input_id, pos]], dtype=np.int32),
            decoder_mems_list,
            enc_states,
            enc_masks,
        )
    tokens = [logits.argmax()]
    print("decoder_input_ids", decoder_input_ids)
    eos = token2id["<|endoftext|>"]

    for i in range(1, 200):
        decoder_input_ids = [tokens[-1], i]
        logits, decoder_mems_list = model.run_decoder(
            np.array([decoder_input_ids], dtype=np.int32),
            decoder_mems_list,
            enc_states,
            enc_masks,
        )
        t = logits.argmax()
        if t == eos:
            break
        tokens.append(t)
    print("len(tokens)", len(tokens))
    print("tokens", tokens)

    text = "".join([id2token[i] for i in tokens])

    underline = "▁"
    #  underline = b"\xe2\x96\x81".decode()

    text = text.replace(underline, " ").strip()
    print("text:", text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/README.md
================================================
# Introduction

This folder contains scripts for exporting models from

  - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_hybrid_large_streaming_80ms
  - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_hybrid_large_streaming_480ms
  - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_hybrid_large_streaming_1040ms

  - # https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_ctc_large
  - # https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_enes_conformer_transducer_large_codesw
  - # https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_transducer_large
  - # https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_enzh_fastconformer_transducer_large_codesw


  - # https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_fa_fastconformer_hybrid_large
  - # https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_it_fastconformer_hybrid_large_pc
  - # https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_pl_fastconformer_hybrid_large_pc
  - # https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_ua_fastconformer_hybrid_large_pc

  - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_hybrid_large_pc
  - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_es_fastconformer_hybrid_large_pc
  - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc_blend_eu
  - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc

  - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m
  - https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja
  - https://huggingface.co/nvidia/stt_pt_fastconformer_hybrid_large_pc
  - https://huggingface.co/nvidia/stt_de_fastconformer_hybrid_large_pc

to `sherpa-onnx`.


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc-non-streaming.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
import argparse
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
    )
    parser.add_argument(
        "--doc",
        type=str,
        default="",
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    args = get_args()
    model_name = args.model

    asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name)
    print(asr_model.cfg)
    print(asr_model)

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(asr_model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    decoder_type = "ctc"
    asr_model.change_decoding_strategy(decoder_type=decoder_type)
    asr_model.eval()

    asr_model.set_export_config({"decoder_type": "ctc"})

    filename = "model.onnx"

    asr_model.export(filename)

    normalize_type = asr_model.cfg.preprocessor.normalize
    if normalize_type == "NA":
        normalize_type = ""

    meta_data = {
        "vocab_size": asr_model.decoder.vocab_size,
        "normalize_type": normalize_type,
        "subsampling_factor": 8,
        "model_type": "EncDecHybridRNNTCTCBPEModel",
        "version": "1",
        "model_author": "NeMo",
        "url": f"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/{model_name}"
        if "/" in model_name
        else f"https://huggingface.co/{model_name}",
        "comment": "Only the CTC branch is exported",
        "doc": args.doc,
    }
    add_meta_data(filename, meta_data)

    quantize_dynamic(
        model_input="./model.onnx",
        model_output="./model.int8.onnx",
        weight_type=QuantType.QUInt8,
    )

    print("preprocessor", asr_model.cfg.preprocessor)
    print(meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-ctc.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
import argparse
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        choices=["80", "480", "1040"],
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    args = get_args()
    model_name = f"stt_en_fastconformer_hybrid_large_streaming_{args.model}ms"

    asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name)

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(asr_model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    decoder_type = "ctc"
    asr_model.change_decoding_strategy(decoder_type=decoder_type)
    asr_model.eval()

    assert asr_model.encoder.streaming_cfg is not None
    if isinstance(asr_model.encoder.streaming_cfg.chunk_size, list):
        chunk_size = asr_model.encoder.streaming_cfg.chunk_size[1]
    else:
        chunk_size = asr_model.encoder.streaming_cfg.chunk_size

    if isinstance(asr_model.encoder.streaming_cfg.pre_encode_cache_size, list):
        pre_encode_cache_size = asr_model.encoder.streaming_cfg.pre_encode_cache_size[1]
    else:
        pre_encode_cache_size = asr_model.encoder.streaming_cfg.pre_encode_cache_size
    window_size = chunk_size + pre_encode_cache_size

    print("chunk_size", chunk_size)
    print("pre_encode_cache_size", pre_encode_cache_size)
    print("window_size", window_size)

    chunk_shift = chunk_size

    # cache_last_channel: (batch_size, dim1, dim2, dim3)
    cache_last_channel_dim1 = len(asr_model.encoder.layers)
    cache_last_channel_dim2 = asr_model.encoder.streaming_cfg.last_channel_cache_size
    cache_last_channel_dim3 = asr_model.encoder.d_model

    # cache_last_time: (batch_size, dim1, dim2, dim3)
    cache_last_time_dim1 = len(asr_model.encoder.layers)
    cache_last_time_dim2 = asr_model.encoder.d_model
    cache_last_time_dim3 = asr_model.encoder.conv_context_size[0]

    asr_model.set_export_config({"decoder_type": "ctc", "cache_support": True})

    filename = "model.onnx"

    asr_model.export(filename)

    normalize_type = asr_model.cfg.preprocessor.normalize
    if normalize_type == "NA":
        normalize_type = ""

    meta_data = {
        "vocab_size": asr_model.decoder.vocab_size,
        "window_size": window_size,
        "chunk_shift": chunk_shift,
        "normalize_type": normalize_type,
        "cache_last_channel_dim1": cache_last_channel_dim1,
        "cache_last_channel_dim2": cache_last_channel_dim2,
        "cache_last_channel_dim3": cache_last_channel_dim3,
        "cache_last_time_dim1": cache_last_time_dim1,
        "cache_last_time_dim2": cache_last_time_dim2,
        "cache_last_time_dim3": cache_last_time_dim3,
        "subsampling_factor": 8,
        "model_type": "EncDecHybridRNNTCTCBPEModel",
        "version": "1",
        "model_author": "NeMo",
        "url": f"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/{model_name}",
        "comment": "Only the CTC branch is exported",
    }
    add_meta_data(filename, meta_data)
    quantize_dynamic(
        model_input="./model.onnx",
        model_output="./model.int8.onnx",
        weight_type=QuantType.QUInt8,
    )

    print(meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer-non-streaming.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
import argparse
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
    )
    parser.add_argument(
        "--doc",
        type=str,
        default="",
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    args = get_args()
    model_name = args.model

    asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name)

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(asr_model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    decoder_type = "rnnt"
    asr_model.change_decoding_strategy(decoder_type=decoder_type)
    asr_model.eval()

    asr_model.set_export_config({"decoder_type": "rnnt"})

    # asr_model.export("model.onnx")
    asr_model.encoder.export("encoder.onnx")
    asr_model.decoder.export("decoder.onnx")
    asr_model.joint.export("joiner.onnx")
    # model.onnx is a suffix.
    # It will generate two files:
    # encoder-model.onnx
    # decoder_joint-model.onnx

    normalize_type = asr_model.cfg.preprocessor.normalize
    if normalize_type == "NA":
        normalize_type = ""
    meta_data = {
        "vocab_size": asr_model.decoder.vocab_size,
        "normalize_type": normalize_type,
        "pred_rnn_layers": asr_model.decoder.pred_rnn_layers,
        "pred_hidden": asr_model.decoder.pred_hidden,
        "subsampling_factor": 8,
        "model_type": "EncDecHybridRNNTCTCBPEModel",
        "version": "1",
        "model_author": "NeMo",
        "url": f"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/{model_name}"
        if "/" in model_name
        else f"https://huggingface.co/{model_name}",
        "comment": "Only the transducer branch is exported",
        "doc": args.doc,
    }
    add_meta_data("encoder.onnx", meta_data)

    for m in ["encoder", "decoder", "joiner"]:
        quantize_dynamic(
            model_input=f"{m}.onnx",
            model_output=f"{m}.int8.onnx",
            weight_type=QuantType.QUInt8,
        )

    print(meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/export-onnx-transducer.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)
import argparse
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        choices=["80", "480", "1040"],
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    args = get_args()
    model_name = f"stt_en_fastconformer_hybrid_large_streaming_{args.model}ms"

    asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name)

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(asr_model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    decoder_type = "rnnt"
    asr_model.change_decoding_strategy(decoder_type=decoder_type)
    asr_model.eval()

    assert asr_model.encoder.streaming_cfg is not None
    if isinstance(asr_model.encoder.streaming_cfg.chunk_size, list):
        chunk_size = asr_model.encoder.streaming_cfg.chunk_size[1]
    else:
        chunk_size = asr_model.encoder.streaming_cfg.chunk_size

    if isinstance(asr_model.encoder.streaming_cfg.pre_encode_cache_size, list):
        pre_encode_cache_size = asr_model.encoder.streaming_cfg.pre_encode_cache_size[1]
    else:
        pre_encode_cache_size = asr_model.encoder.streaming_cfg.pre_encode_cache_size
    window_size = chunk_size + pre_encode_cache_size

    print("chunk_size", chunk_size)
    print("pre_encode_cache_size", pre_encode_cache_size)
    print("window_size", window_size)

    chunk_shift = chunk_size

    # cache_last_channel: (batch_size, dim1, dim2, dim3)
    cache_last_channel_dim1 = len(asr_model.encoder.layers)
    cache_last_channel_dim2 = asr_model.encoder.streaming_cfg.last_channel_cache_size
    cache_last_channel_dim3 = asr_model.encoder.d_model

    # cache_last_time: (batch_size, dim1, dim2, dim3)
    cache_last_time_dim1 = len(asr_model.encoder.layers)
    cache_last_time_dim2 = asr_model.encoder.d_model
    cache_last_time_dim3 = asr_model.encoder.conv_context_size[0]

    asr_model.set_export_config({"decoder_type": "rnnt", "cache_support": True})

    # asr_model.export("model.onnx")
    asr_model.encoder.export("encoder.onnx")
    asr_model.decoder.export("decoder.onnx")
    asr_model.joint.export("joiner.onnx")
    # model.onnx is a suffix.
    # It will generate two files:
    # encoder-model.onnx
    # decoder_joint-model.onnx

    normalize_type = asr_model.cfg.preprocessor.normalize
    if normalize_type == "NA":
        normalize_type = ""

    meta_data = {
        "vocab_size": asr_model.decoder.vocab_size,
        "window_size": window_size,
        "chunk_shift": chunk_shift,
        "normalize_type": normalize_type,
        "cache_last_channel_dim1": cache_last_channel_dim1,
        "cache_last_channel_dim2": cache_last_channel_dim2,
        "cache_last_channel_dim3": cache_last_channel_dim3,
        "cache_last_time_dim1": cache_last_time_dim1,
        "cache_last_time_dim2": cache_last_time_dim2,
        "cache_last_time_dim3": cache_last_time_dim3,
        "pred_rnn_layers": asr_model.decoder.pred_rnn_layers,
        "pred_hidden": asr_model.decoder.pred_hidden,
        "subsampling_factor": 8,
        "model_type": "EncDecHybridRNNTCTCBPEModel",
        "version": "1",
        "model_author": "NeMo",
        "url": f"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/{model_name}",
        "comment": "Only the transducer branch is exported",
    }
    add_meta_data("encoder.onnx", meta_data)

    for m in ["encoder", "decoder", "joiner"]:
        quantize_dynamic(
            model_input=f"{m}.onnx",
            model_output=f"{m}.int8.onnx",
            weight_type=QuantType.QUInt8,
        )

    print(meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc-non-streaming-2.sh
================================================
#!/usr/bin/env bash
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

# 2200 hours of Portuguese speech
url=https://huggingface.co/nvidia/stt_pt_fastconformer_hybrid_large_pc
name=$(basename $url)
name="nvidia/$name"
doc="STT PT FastConformer Hybrid Transducer-CTC Large transcribes text in upper and lower case Portuguese alphabet along with spaces, period, comma, question mark. This collection contains the Brazilian Portuguese FastConformer Hybrid (Transducer and CTC) Large model (around 115M parameters) with punctuation and capitalization trained on around 2200h hours of Portuguese speech. "

log "Process $name at $url"
./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"
d=sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc
mkdir -p $d
mv -v model.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

mkdir test_wavs
pushd test_wavs
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/pt_br.wav
popd
cp -a test_wavs $d

d=sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8
mkdir -p $d
mv -v model.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d
mv test_wavs $d

python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $d/test_wavs/pt_br.wav


# 2500 hours of German speech
url=https://huggingface.co/nvidia/stt_de_fastconformer_hybrid_large_pc
name=$(basename $url)
name="nvidia/$name"
doc="This model transcribes speech in upper and lower case German alphabet along with spaces, periods, commas, and question marks. It is a 'large' version of FastConformer Transducer-CTC (around 115M parameters) model. This is a hybrid model trained on two losses: Transducer (default) and CTC."

log "Process $name at $url"
./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"
d=sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc
mkdir -p $d
mv -v model.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

mkdir test_wavs
pushd test_wavs
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/de.wav
popd
cp -a test_wavs $d

d=sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8
mkdir -p $d
mv -v model.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d
mv test_wavs $d

python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $d/test_wavs/de.wav


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc-non-streaming.sh
================================================
#!/usr/bin/env bash
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

# 36000 hours of English data
url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m
name=$(basename $url)
doc="parakeet-tdt_ctc-110m is an ASR model that transcribes speech with Punctuations and Capitalizations of the English alphabet. It was trained on 36K hours of English speech collected and prepared by NVIDIA NeMo and Suno teams."

log "Process $name at $url"
./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"
d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
mkdir -p $d
mv -v model.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
mkdir -p $d
mv -v model.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

# 8500 hours of English speech
url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_hybrid_large_pc
name=$(basename $url)
doc="This collection contains the English FastConformer Hybrid (Transducer and CTC) Large model (around 114M parameters) with Punctuation and Capitalization on NeMo ASRSet En PC with around 8500 hours of English speech (SPGI 1k, VoxPopuli, MCV11, Europarl-ASR, Fisher, LibriSpeech, NSC1, MLS). It utilizes a Google SentencePiece [1] tokenizer with a vocabulary size of 1024. It transcribes text in upper and lower case English alphabet along with spaces, periods, commas, question marks, and a few other characters."

log "Process $name at $url"
./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"

d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500
mkdir -p $d
mv -v model.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
mkdir -p $d
mv -v model.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_es_fastconformer_hybrid_large_pc
name=$(basename $url)
doc="This collection contains the Spanish FastConformer Hybrid (CTC and Transducer) Large model (around 114M parameters) with Punctuation and Capitalization. It is trained on the NeMo PnC ES ASRSET (Fisher, MCV12, MLS, Voxpopuli) containing 1424 hours of Spanish speech. It utilizes a Google SentencePiece [1] tokenizer with vocabulary size 1024, and transcribes text in upper and lower case Spanish alphabet along with spaces, period, comma, question mark and inverted question mark."

./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"

d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424
mkdir -p $d
mv -v model.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
mkdir -p $d
mv -v model.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc_blend_eu
name=$(basename $url)
doc="This collection contains the Multilingual FastConformer Hybrid (Transducer and CTC) Large model (around 114M parameters) with Punctuation and Capitalization. It is trained on the NeMo PnC German, English, Spanish, and French ASR sets that contain 14,288 hours of speech in total. It utilizes a Google SentencePiece [1] tokenizer with vocabulary size 256 per language and transcribes text in upper and lower case along with spaces, periods, commas, question marks and a few other language-specific characters. The total tokenizer size is 2560, of which 1024 tokens are allocated to English, German, French, and Spanish. The remaining tokens are reserved for future languages."

./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"

d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
mkdir -p $d
mv -v model.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
mkdir -p $d
mv -v model.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc
name=$(basename $url)
doc="This collection contains the Multilingual FastConformer Hybrid (Transducer and CTC) Large model (around 114M parameters) with Punctuation and Capitalization. It is trained on the NeMo PnC Belarusian, German, English, Spanish, French, Croatian, Italian, Polish, Russian, and Ukrainian ASR sets that contain ~20,000 hours of speech in total. It utilizes a Google SentencePiece [1] tokenizer with vocabulary size 256 per language (2560 total), and transcribes text in upper and lower case along with spaces, periods, commas, question marks and a few other language-specific characters."

./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"

d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
mkdir -p $d
mv -v model.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
mkdir -p $d
mv -v model.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

# Now test the exported model
log "Download test data"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
data=spoken-language-identification-test-wavs

curl -SL -O https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav
mv 2086-149220-0033.wav en.wav

d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.onnx \
  --tokens $d/tokens.txt \
  --wav $data/en-english.wav
mkdir -p $d/test_wavs

cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs/1.wav

d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $data/en-english.wav
mkdir -p $d/test_wavs

cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs/1.wav

d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500
python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.onnx \
  --tokens $d/tokens.txt \
  --wav $data/en-english.wav
mkdir -p $d/test_wavs
cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $data/en-english.wav
mkdir -p $d/test_wavs
cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424
python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.onnx \
  --tokens $d/tokens.txt \
  --wav $data/es-spanish.wav
mkdir -p $d/test_wavs
cp -v $data/es-spanish.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $data/es-spanish.wav
mkdir -p $d/test_wavs
cp -v $data/es-spanish.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
mkdir -p $d/test_wavs
for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
  python3 ./test-onnx-ctc-non-streaming.py \
    --model $d/model.onnx \
    --tokens $d/tokens.txt \
    --wav $data/$w
  cp -v $data/$w $d/test_wavs
done

d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
mkdir -p $d/test_wavs
for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
  python3 ./test-onnx-ctc-non-streaming.py \
    --model $d/model.int8.onnx \
    --tokens $d/tokens.txt \
    --wav $data/$w
  cp -v $data/$w $d/test_wavs
done

d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
mkdir -p $d/test_wavs
for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do
  python3 ./test-onnx-ctc-non-streaming.py \
    --model $d/model.onnx \
    --tokens $d/tokens.txt \
    --wav $data/$w
  cp -v $data/$w $d/test_wavs
done

d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
mkdir -p $d/test_wavs
for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do
  python3 ./test-onnx-ctc-non-streaming.py \
    --model $d/model.int8.onnx \
    --tokens $d/tokens.txt \
    --wav $data/$w
  cp -v $data/$w $d/test_wavs
done


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-ctc.sh
================================================
#!/usr/bin/env bash
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

if [ ! -e ./0.wav ]; then
  # curl -SL -O https://hf-mirror.com/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/test_wavs/0.wav
  curl -SL -O https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/test_wavs/0.wav
fi

ms=(
80
480
1040
)

for m in ${ms[@]}; do
  ./export-onnx-ctc.py --model $m
  d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms

  d_int8=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms-int8

  if [ ! -f $d/model.onnx ]; then
    mkdir -p $d $d_int8
    mv -v model.onnx $d/
    cp -v tokens.txt $d/

    mv -v model.int8.onnx $d_int8/
    mv -v tokens.txt $d_int8/

    echo "---$d---"
    ls -lh $d

    echo "---$d_int8---"
    ls -lh $d_int8
  fi
done

# Now test the exported models

for m in ${ms[@]}; do
  d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms
  echo "---$d---"
  python3 ./test-onnx-ctc.py \
    --model $d/model.onnx \
    --tokens $d/tokens.txt \
    --wav ./0.wav

  d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms-int8
  echo "---$d---"
  python3 ./test-onnx-ctc.py \
    --model $d/model.int8.onnx \
    --tokens $d/tokens.txt \
    --wav ./0.wav
done


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer-non-streaming-2.sh
================================================
#!/usr/bin/env bash
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

# 2200 hours of Portuguese speech
url=https://huggingface.co/nvidia/stt_pt_fastconformer_hybrid_large_pc
name=$(basename $url)
name="nvidia/$name"
doc="STT PT FastConformer Hybrid Transducer-CTC Large transcribes text in upper and lower case Portuguese alphabet along with spaces, period, comma, question mark. This collection contains the Brazilian Portuguese FastConformer Hybrid (Transducer and CTC) Large model (around 115M parameters) with punctuation and capitalization trained on around 2200h hours of Portuguese speech. "

log "Process $name at $url"
./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"
d=sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc
mkdir -p $d
mv -v encoder.onnx $d/
mv -v decoder.onnx $d/
mv -v joiner.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

mkdir test_wavs
pushd test_wavs
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/pt_br.wav
popd
cp -a test_wavs $d

d=sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8
mkdir -p $d
mv -v encoder.int8.onnx $d/
mv -v decoder.int8.onnx $d/
mv -v joiner.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d
mv test_wavs $d

python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.int8.onnx \
  --decoder $d/decoder.int8.onnx \
  --joiner $d/joiner.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $d/test_wavs/pt_br.wav

# 2500 hours of German speech
url=https://huggingface.co/nvidia/stt_de_fastconformer_hybrid_large_pc
name=$(basename $url)
name="nvidia/$name"
doc="This model transcribes speech in upper and lower case German alphabet along with spaces, periods, commas, and question marks. It is a 'large' version of FastConformer Transducer-CTC (around 115M parameters) model. This is a hybrid model trained on two losses: Transducer (default) and CTC."

log "Process $name at $url"
./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"
d=sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc
mkdir -p $d
mv -v encoder.onnx $d/
mv -v decoder.onnx $d/
mv -v joiner.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

mkdir test_wavs
pushd test_wavs
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/de.wav
popd
cp -a test_wavs $d

d=sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8
mkdir -p $d
mv -v encoder.int8.onnx $d/
mv -v decoder.int8.onnx $d/
mv -v joiner.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d
mv test_wavs $d

python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.int8.onnx \
  --decoder $d/decoder.int8.onnx \
  --joiner $d/joiner.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $d/test_wavs/de.wav


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer-non-streaming.sh
================================================
#!/usr/bin/env bash
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

# 36000 hours of English data
url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m
name=$(basename $url)
doc="parakeet-tdt_ctc-110m is an ASR model that transcribes speech with Punctuations and Capitalizations of the English alphabet. It was trained on 36K hours of English speech collected and prepared by NVIDIA NeMo and Suno teams."

log "Process $name at $url"
./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"
d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
mkdir -p $d
mv -v encoder.onnx $d/
mv -v decoder.onnx $d/
mv -v joiner.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
mkdir -p $d
mv -v encoder.int8.onnx $d/
mv -v decoder.int8.onnx $d/
mv -v joiner.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

# 8500 hours of English speech
url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_en_fastconformer_hybrid_large_pc
name=$(basename $url)
doc="This collection contains the English FastConformer Hybrid (Transducer and CTC) Large model (around 114M parameters) with Punctuation and Capitalization on NeMo ASRSet En PC with around 8500 hours of English speech (SPGI 1k, VoxPopuli, MCV11, Europarl-ASR, Fisher, LibriSpeech, NSC1, MLS). It utilizes a Google SentencePiece [1] tokenizer with a vocabulary size of 1024. It transcribes text in upper and lower case English alphabet along with spaces, periods, commas, question marks, and a few other characters."

log "Process $name at $url"
./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"

d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500
mkdir -p $d
mv -v encoder.onnx $d/
mv -v decoder.onnx $d/
mv -v joiner.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
mkdir -p $d
mv -v encoder.int8.onnx $d/
mv -v decoder.int8.onnx $d/
mv -v joiner.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_es_fastconformer_hybrid_large_pc
name=$(basename $url)
doc="This collection contains the Spanish FastConformer Hybrid (CTC and Transducer) Large model (around 114M parameters) with Punctuation and Capitalization. It is trained on the NeMo PnC ES ASRSET (Fisher, MCV12, MLS, Voxpopuli) containing 1424 hours of Spanish speech. It utilizes a Google SentencePiece [1] tokenizer with vocabulary size 1024, and transcribes text in upper and lower case Spanish alphabet along with spaces, period, comma, question mark and inverted question mark."

./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"

d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424
mkdir -p $d
mv -v encoder.onnx $d/
mv -v decoder.onnx $d/
mv -v joiner.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
mkdir -p $d
mv -v encoder.int8.onnx $d/
mv -v decoder.int8.onnx $d/
mv -v joiner.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc_blend_eu
name=$(basename $url)
doc="This collection contains the Multilingual FastConformer Hybrid (Transducer and CTC) Large model (around 114M parameters) with Punctuation and Capitalization. It is trained on the NeMo PnC German, English, Spanish, and French ASR sets that contain 14,288 hours of speech in total. It utilizes a Google SentencePiece [1] tokenizer with vocabulary size 256 per language and transcribes text in upper and lower case along with spaces, periods, commas, question marks and a few other language-specific characters. The total tokenizer size is 2560, of which 1024 tokens are allocated to English, German, French, and Spanish. The remaining tokens are reserved for future languages."

./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"

d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
mkdir -p $d
mv -v encoder.onnx $d/
mv -v decoder.onnx $d/
mv -v joiner.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
mkdir -p $d
mv -v encoder.int8.onnx $d/
mv -v decoder.int8.onnx $d/
mv -v joiner.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

url=https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc
name=$(basename $url)
doc="This collection contains the Multilingual FastConformer Hybrid (Transducer and CTC) Large model (around 114M parameters) with Punctuation and Capitalization. It is trained on the NeMo PnC Belarusian, German, English, Spanish, French, Croatian, Italian, Polish, Russian, and Ukrainian ASR sets that contain ~20,000 hours of speech in total. It utilizes a Google SentencePiece [1] tokenizer with vocabulary size 256 per language (2560 total), and transcribes text in upper and lower case along with spaces, periods, commas, question marks and a few other language-specific characters."

./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"

d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
mkdir -p $d
mv -v encoder.onnx $d/
mv -v decoder.onnx $d/
mv -v joiner.onnx $d/
cp -v tokens.txt $d/
ls -lh $d

d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
mkdir -p $d
mv -v encoder.int8.onnx $d/
mv -v decoder.int8.onnx $d/
mv -v joiner.int8.onnx $d/
mv -v tokens.txt $d/
ls -lh $d

# Now test the exported model
log "Download test data"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
data=spoken-language-identification-test-wavs

curl -SL -O https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav
mv 2086-149220-0033.wav en.wav

d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.onnx \
  --decoder $d/decoder.onnx \
  --joiner $d/joiner.onnx \
  --tokens $d/tokens.txt \
  --wav $data/en-english.wav

python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.onnx \
  --decoder $d/decoder.onnx \
  --joiner $d/joiner.onnx \
  --tokens $d/tokens.txt \
  --wav ./en.wav

mkdir -p $d/test_wavs
cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs

d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.int8.onnx \
  --decoder $d/decoder.int8.onnx \
  --joiner $d/joiner.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $data/en-english.wav

python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.int8.onnx \
  --decoder $d/decoder.int8.onnx \
  --joiner $d/joiner.int8.onnx \
  --tokens $d/tokens.txt \
  --wav ./en.wav

mkdir -p $d/test_wavs
cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500
python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.onnx \
  --decoder $d/decoder.onnx \
  --joiner $d/joiner.onnx \
  --tokens $d/tokens.txt \
  --wav $data/en-english.wav
mkdir -p $d/test_wavs
cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.int8.onnx \
  --decoder $d/decoder.int8.onnx \
  --joiner $d/joiner.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $data/en-english.wav
mkdir -p $d/test_wavs
cp en.wav $d/test_wavs/0.wav
cp -v $data/en-english.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424
python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.onnx \
  --decoder $d/decoder.onnx \
  --joiner $d/joiner.onnx \
  --tokens $d/tokens.txt \
  --wav $data/es-spanish.wav
mkdir -p $d/test_wavs
cp -v $data/es-spanish.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
python3 ./test-onnx-transducer-non-streaming.py \
  --encoder $d/encoder.int8.onnx \
  --decoder $d/decoder.int8.onnx \
  --joiner $d/joiner.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $data/es-spanish.wav
mkdir -p $d/test_wavs
cp -v $data/es-spanish.wav $d/test_wavs

d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
mkdir -p $d/test_wavs
for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
  python3 ./test-onnx-transducer-non-streaming.py \
    --encoder $d/encoder.onnx \
    --decoder $d/decoder.onnx \
    --joiner $d/joiner.onnx \
    --tokens $d/tokens.txt \
    --wav $data/$w
  cp -v $data/$w $d/test_wavs
done

d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
mkdir -p $d/test_wavs
for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
  python3 ./test-onnx-transducer-non-streaming.py \
    --encoder $d/encoder.int8.onnx \
    --decoder $d/decoder.int8.onnx \
    --joiner $d/joiner.int8.onnx \
    --tokens $d/tokens.txt \
    --wav $data/$w
  cp -v $data/$w $d/test_wavs
done

d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
mkdir -p $d/test_wavs
for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do
  python3 ./test-onnx-transducer-non-streaming.py \
    --encoder $d/encoder.onnx \
    --decoder $d/decoder.onnx \
    --joiner $d/joiner.onnx \
    --tokens $d/tokens.txt \
    --wav $data/$w
  cp -v $data/$w $d/test_wavs
done

d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
mkdir -p $d/test_wavs
for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do
  python3 ./test-onnx-transducer-non-streaming.py \
    --encoder $d/encoder.int8.onnx \
    --decoder $d/decoder.int8.onnx \
    --joiner $d/joiner.int8.onnx \
    --tokens $d/tokens.txt \
    --wav $data/$w
  cp -v $data/$w $d/test_wavs
done


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/run-transducer.sh
================================================
#!/usr/bin/env bash
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

set -ex

if [ ! -e ./0.wav ]; then
  # curl -SL -O https://hf-mirror.com/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/test_wavs/0.wav
  curl -SL -O https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18/resolve/main/test_wavs/0.wav
fi

ms=(
80
480
1040
)

for m in ${ms[@]}; do
  ./export-onnx-transducer.py --model $m
  d=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms
  d_int8=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms-int8
  if [ ! -f $d/encoder.onnx ]; then
    mkdir -p $d $d_int8
    mv -v encoder.onnx $d/
    mv -v decoder.onnx $d/
    mv -v joiner.onnx $d/
    cp -v tokens.txt $d/

    mv -v encoder.int8.onnx $d_int8/
    mv -v decoder.int8.onnx $d_int8/
    mv -v joiner.int8.onnx $d_int8/
    mv -v tokens.txt $d_int8/

    echo "---$d---"
    ls -lh $d

    echo "---$d_int8---"
    ls -lh $d_int8
  fi
done

# Now test the exported models

for m in ${ms[@]}; do
  d=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms
  python3 ./test-onnx-transducer.py \
    --encoder $d/encoder.onnx \
    --decoder $d/decoder.onnx \
    --joiner $d/joiner.onnx \
    --tokens $d/tokens.txt \
    --wav ./0.wav

  d=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms-int8
  python3 ./test-onnx-transducer.py \
    --encoder $d/encoder.int8.onnx \
    --decoder $d/decoder.int8.onnx \
    --joiner $d/joiner.int8.onnx \
    --tokens $d/tokens.txt \
    --wav ./0.wav
done


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/show-onnx-transudcer.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnxruntime


def show(filename):
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def main():
    print("=========encoder==========")
    show("./encoder.onnx")

    print("=========decoder==========")
    show("./decoder.onnx")

    print("=========joiner==========")
    show("./joiner.onnx")


if __name__ == "__main__":
    main()

"""
=========encoder==========
NodeArg(name='audio_signal', type='tensor(float)', shape=['audio_signal_dynamic_axes_1', 80, 'audio_signal_dynamic_axes_2'])
NodeArg(name='length', type='tensor(int64)', shape=['length_dynamic_axes_1'])
NodeArg(name='cache_last_channel', type='tensor(float)', shape=['cache_last_channel_dynamic_axes_1', 17, 'cache_last_channel_dynamic_axes_2', 512])
NodeArg(name='cache_last_time', type='tensor(float)', shape=['cache_last_time_dynamic_axes_1', 17, 512, 'cache_last_time_dynamic_axes_2'])
NodeArg(name='cache_last_channel_len', type='tensor(int64)', shape=['cache_last_channel_len_dynamic_axes_1'])
-----
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 512, 'outputs_dynamic_axes_2'])
NodeArg(name='encoded_lengths', type='tensor(int64)', shape=['encoded_lengths_dynamic_axes_1'])
NodeArg(name='cache_last_channel_next', type='tensor(float)', shape=['cache_last_channel_next_dynamic_axes_1', 17, 'cache_last_channel_next_dynamic_axes_2', 512])
NodeArg(name='cache_last_time_next', type='tensor(float)', shape=['cache_last_time_next_dynamic_axes_1', 17, 512, 'cache_last_time_next_dynamic_axes_2'])
NodeArg(name='cache_last_channel_next_len', type='tensor(int64)', shape=['cache_last_channel_next_len_dynamic_axes_1'])
=========decoder==========
NodeArg(name='targets', type='tensor(int32)', shape=['targets_dynamic_axes_1', 'targets_dynamic_axes_2'])
NodeArg(name='target_length', type='tensor(int32)', shape=['target_length_dynamic_axes_1'])
NodeArg(name='states.1', type='tensor(float)', shape=[1, 'states.1_dim_1', 640])
NodeArg(name='onnx::LSTM_3', type='tensor(float)', shape=[1, 1, 640])
-----
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 640, 'outputs_dynamic_axes_2'])
NodeArg(name='prednet_lengths', type='tensor(int32)', shape=['prednet_lengths_dynamic_axes_1'])
NodeArg(name='states', type='tensor(float)', shape=[1, 'states_dynamic_axes_1', 640])
NodeArg(name='74', type='tensor(float)', shape=[1, 'LSTM74_dim_1', 640])
=========joiner==========
NodeArg(name='encoder_outputs', type='tensor(float)', shape=['encoder_outputs_dynamic_axes_1', 512, 'encoder_outputs_dynamic_axes_2'])
NodeArg(name='decoder_outputs', type='tensor(float)', shape=['decoder_outputs_dynamic_axes_1', 640, 'decoder_outputs_dynamic_axes_2'])
-----
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 'outputs_dynamic_axes_2', 'outputs_dynamic_axes_3', 1025])

"""


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-ctc-non-streaming.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from pathlib import Path

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import torch
import soundfile as sf
import librosa


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, required=True, help="Path to model.onnx")

    parser.add_argument("--tokens", type=str, required=True, help="Path to tokens.txt")

    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


def create_fbank():
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.remove_dc_offset = False
    opts.frame_opts.window_type = "hann"

    opts.mel_opts.low_freq = 0
    opts.mel_opts.num_bins = 80

    opts.mel_opts.is_librosa = True

    fbank = knf.OnlineFbank(opts)
    return fbank


def compute_features(audio, fbank):
    assert len(audio.shape) == 1, audio.shape
    fbank.accept_waveform(16000, audio)
    ans = []
    processed = 0
    while processed < fbank.num_frames_ready:
        ans.append(np.array(fbank.get_frame(processed)))
        processed += 1
    ans = np.stack(ans)
    return ans


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        print("==========Input==========")
        for i in self.model.get_inputs():
            print(i)
        print("==========Output==========")
        for i in self.model.get_outputs():
            print(i)
        """
        ==========Input==========
        NodeArg(name='audio_signal', type='tensor(float)', shape=['audio_signal_dynamic_axes_1', 80, 'audio_signal_dynamic_axes_2'])
        NodeArg(name='length', type='tensor(int64)', shape=['length_dynamic_axes_1'])
        ==========Output==========
        NodeArg(name='logprobs', type='tensor(float)', shape=['logprobs_dynamic_axes_1', 'logprobs_dynamic_axes_2', 1025])
        """

        meta = self.model.get_modelmeta().custom_metadata_map
        self.normalize_type = meta["normalize_type"]
        print(meta)

    def __call__(self, x: np.ndarray):
        # x: (T, C)
        x = torch.from_numpy(x)
        x = x.t().unsqueeze(0)
        # x: [1, C, T]
        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)

        log_probs = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x.numpy(),
                self.model.get_inputs()[1].name: x_lens.numpy(),
            },
        )[0]
        # [batch_size, T, vocab_size]
        return torch.from_numpy(log_probs)


def main():
    args = get_args()
    assert Path(args.model).is_file(), args.model
    assert Path(args.tokens).is_file(), args.tokens
    assert Path(args.wav).is_file(), args.wav

    print(vars(args))

    model = OnnxModel(args.model)

    id2token = dict()
    with open(args.tokens, encoding="utf-8") as f:
        for line in f:
            t, idx = line.split()
            id2token[int(idx)] = t

    fbank = create_fbank()
    audio, sample_rate = sf.read(args.wav, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000

    blank = len(id2token) - 1
    ans = []
    prev = -1

    print(audio.shape)
    features = compute_features(audio, fbank)
    if model.normalize_type != "":
        assert model.normalize_type == "per_feature", model.normalize_type
        features = torch.from_numpy(features)
        mean = features.mean(dim=0, keepdims=True)
        stddev = features.std(dim=0, keepdims=True) + 1e-5
        features = (features - mean) / stddev
        features = features.numpy()

    print("features.shape", features.shape)
    log_probs = model(features)

    print("log_probs.shape", log_probs.shape)

    log_probs = log_probs[0, :, :]  # remove batch dim
    ids = torch.argmax(log_probs, dim=1).tolist()
    for k in ids:
        if k != blank and k != prev:
            ans.append(k)
        prev = k

    tokens = [id2token[i] for i in ans]
    underline = "▁"
    #  underline = b"\xe2\x96\x81".decode()
    text = "".join(tokens).replace(underline, " ").strip()
    print(args.wav)
    print(text)


main()


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-ctc.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from pathlib import Path

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import torch
import soundfile as sf
import librosa


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, required=True, help="Path to model.onnx")

    parser.add_argument("--tokens", type=str, required=True, help="Path to tokens.txt")

    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


def create_fbank():
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.remove_dc_offset = False
    opts.frame_opts.window_type = "hann"

    opts.mel_opts.low_freq = 0
    opts.mel_opts.num_bins = 80

    opts.mel_opts.is_librosa = True

    fbank = knf.OnlineFbank(opts)
    return fbank


def compute_features(audio, fbank):
    assert len(audio.shape) == 1, audio.shape
    fbank.accept_waveform(16000, audio)
    ans = []
    processed = 0
    while processed < fbank.num_frames_ready:
        ans.append(np.array(fbank.get_frame(processed)))
        processed += 1
    ans = np.stack(ans)
    return ans


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map
        print(meta)

        self.window_size = int(meta["window_size"])
        self.chunk_shift = int(meta["chunk_shift"])

        self.cache_last_channel_dim1 = int(meta["cache_last_channel_dim1"])
        self.cache_last_channel_dim2 = int(meta["cache_last_channel_dim2"])
        self.cache_last_channel_dim3 = int(meta["cache_last_channel_dim3"])

        self.cache_last_time_dim1 = int(meta["cache_last_time_dim1"])
        self.cache_last_time_dim2 = int(meta["cache_last_time_dim2"])
        self.cache_last_time_dim3 = int(meta["cache_last_time_dim3"])

        self.init_cache_state()

    def init_cache_state(self):
        self.cache_last_channel = torch.zeros(
            1,
            self.cache_last_channel_dim1,
            self.cache_last_channel_dim2,
            self.cache_last_channel_dim3,
            dtype=torch.float32,
        ).numpy()

        self.cache_last_time = torch.zeros(
            1,
            self.cache_last_time_dim1,
            self.cache_last_time_dim2,
            self.cache_last_time_dim3,
            dtype=torch.float32,
        ).numpy()

        self.cache_last_channel_len = torch.zeros([1], dtype=torch.int64).numpy()

    def __call__(self, x: np.ndarray):
        # x: (T, C)
        x = torch.from_numpy(x)
        x = x.t().unsqueeze(0)
        # x: [1, C, T]
        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)

        (
            log_probs,
            log_probs_len,
            cache_last_channel_next,
            cache_last_time_next,
            cache_last_channel_len_next,
        ) = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
                self.model.get_outputs()[2].name,
                self.model.get_outputs()[3].name,
                self.model.get_outputs()[4].name,
            ],
            {
                self.model.get_inputs()[0].name: x.numpy(),
                self.model.get_inputs()[1].name: x_lens.numpy(),
                self.model.get_inputs()[2].name: self.cache_last_channel,
                self.model.get_inputs()[3].name: self.cache_last_time,
                self.model.get_inputs()[4].name: self.cache_last_channel_len,
            },
        )
        self.cache_last_channel = cache_last_channel_next
        self.cache_last_time = cache_last_time_next
        self.cache_last_channel_len = cache_last_channel_len_next

        # [T, vocab_size]
        return torch.from_numpy(log_probs).squeeze(0)


def main():
    args = get_args()
    assert Path(args.model).is_file(), args.model
    assert Path(args.tokens).is_file(), args.tokens
    assert Path(args.wav).is_file(), args.wav

    print(vars(args))

    model = OnnxModel(args.model)

    id2token = dict()
    with open(args.tokens, encoding="utf-8") as f:
        for line in f:
            t, idx = line.split()
            id2token[int(idx)] = t

    fbank = create_fbank()
    audio, sample_rate = sf.read(args.wav, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000

    window_size = model.window_size
    chunk_shift = model.chunk_shift

    blank = len(id2token) - 1
    prev = -1
    ans = []

    features = compute_features(audio, fbank)
    num_chunks = (features.shape[0] - window_size) // chunk_shift + 1
    for i in range(num_chunks):
        start = i * chunk_shift
        end = start + window_size
        chunk = features[start:end, :]

        log_probs = model(chunk)
        ids = torch.argmax(log_probs, dim=1).tolist()
        for i in ids:
            if i != blank and i != prev:
                ans.append(i)
            prev = i

    tokens = [id2token[i] for i in ans]
    underline = "▁"
    #  underline = b"\xe2\x96\x81".decode()
    text = "".join(tokens).replace(underline, " ").strip()
    print(args.wav)
    print(text)


main()


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-transducer-non-streaming.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from pathlib import Path

import kaldi_native_fbank as knf
import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--encoder", type=str, required=True, help="Path to encoder.onnx"
    )
    parser.add_argument(
        "--decoder", type=str, required=True, help="Path to decoder.onnx"
    )
    parser.add_argument("--joiner", type=str, required=True, help="Path to joiner.onnx")

    parser.add_argument("--tokens", type=str, required=True, help="Path to tokens.txt")

    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


def create_fbank():
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.remove_dc_offset = False
    opts.frame_opts.window_type = "hann"

    opts.mel_opts.low_freq = 0
    opts.mel_opts.num_bins = 80

    opts.mel_opts.is_librosa = True

    fbank = knf.OnlineFbank(opts)
    return fbank


def compute_features(audio, fbank):
    assert len(audio.shape) == 1, audio.shape
    fbank.accept_waveform(16000, audio)
    ans = []
    processed = 0
    while processed < fbank.num_frames_ready:
        ans.append(np.array(fbank.get_frame(processed)))
        processed += 1
    ans = np.stack(ans)
    return ans


def display(sess):
    print("==========Input==========")
    for i in sess.get_inputs():
        print(i)
    print("==========Output==========")
    for i in sess.get_outputs():
        print(i)


"""
encoder
==========Input==========
NodeArg(name='audio_signal', type='tensor(float)', shape=['audio_signal_dynamic_axes_1', 80, 'audio_signal_dynamic_axes_2'])
NodeArg(name='length', type='tensor(int64)', shape=['length_dynamic_axes_1'])
==========Output==========
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 512, 'outputs_dynamic_axes_2'])
NodeArg(name='encoded_lengths', type='tensor(int64)', shape=['encoded_lengths_dynamic_axes_1'])

decoder
==========Input==========
NodeArg(name='targets', type='tensor(int32)', shape=['targets_dynamic_axes_1', 'targets_dynamic_axes_2'])
NodeArg(name='target_length', type='tensor(int32)', shape=['target_length_dynamic_axes_1'])
NodeArg(name='states.1', type='tensor(float)', shape=[1, 'states.1_dim_1', 640])
NodeArg(name='onnx::LSTM_3', type='tensor(float)', shape=[1, 1, 640])
==========Output==========
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 640, 'outputs_dynamic_axes_2'])
NodeArg(name='prednet_lengths', type='tensor(int32)', shape=['prednet_lengths_dynamic_axes_1'])
NodeArg(name='states', type='tensor(float)', shape=[1, 'states_dynamic_axes_1', 640])
NodeArg(name='74', type='tensor(float)', shape=[1, 'LSTM74_dim_1', 640])

joiner
==========Input==========
NodeArg(name='encoder_outputs', type='tensor(float)', shape=['encoder_outputs_dynamic_axes_1', 512, 'encoder_outputs_dynamic_axes_2'])
NodeArg(name='decoder_outputs', type='tensor(float)', shape=['decoder_outputs_dynamic_axes_1', 640, 'decoder_outputs_dynamic_axes_2'])
==========Output==========
NodeArg(name='outputs', type='tensor(float)', shape=['outputs_dynamic_axes_1', 'outputs_dynamic_axes_2', 'outputs_dynamic_axes_3', 1025])
"""


class OnnxModel:
    def __init__(
        self,
        encoder: str,
        decoder: str,
        joiner: str,
    ):
        self.init_encoder(encoder)
        display(self.encoder)
        self.init_decoder(decoder)
        display(self.decoder)
        self.init_joiner(joiner)
        display(self.joiner)

    def init_encoder(self, encoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.encoder = ort.InferenceSession(
            encoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.encoder.get_modelmeta().custom_metadata_map
        self.normalize_type = meta["normalize_type"]
        print(meta)

        self.pred_rnn_layers = int(meta["pred_rnn_layers"])
        self.pred_hidden = int(meta["pred_hidden"])

    def init_decoder(self, decoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.decoder = ort.InferenceSession(
            decoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def init_joiner(self, joiner):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.joiner = ort.InferenceSession(
            joiner,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def get_decoder_state(self):
        batch_size = 1
        state0 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
        state1 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
        return state0, state1

    def run_encoder(self, x: np.ndarray):
        # x: (T, C)
        x = torch.from_numpy(x)
        x = x.t().unsqueeze(0)
        # x: [1, C, T]
        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)

        (encoder_out, out_len) = self.encoder.run(
            [
                self.encoder.get_outputs()[0].name,
                self.encoder.get_outputs()[1].name,
            ],
            {
                self.encoder.get_inputs()[0].name: x.numpy(),
                self.encoder.get_inputs()[1].name: x_lens.numpy(),
            },
        )
        # [batch_size, dim, T]
        return encoder_out

    def run_decoder(
        self,
        token: int,
        state0: np.ndarray,
        state1: np.ndarray,
    ):
        target = torch.tensor([[token]], dtype=torch.int32).numpy()
        target_len = torch.tensor([1], dtype=torch.int32).numpy()

        (decoder_out, decoder_out_length, state0_next, state1_next,) = self.decoder.run(
            [
                self.decoder.get_outputs()[0].name,
                self.decoder.get_outputs()[1].name,
                self.decoder.get_outputs()[2].name,
                self.decoder.get_outputs()[3].name,
            ],
            {
                self.decoder.get_inputs()[0].name: target,
                self.decoder.get_inputs()[1].name: target_len,
                self.decoder.get_inputs()[2].name: state0,
                self.decoder.get_inputs()[3].name: state1,
            },
        )
        return decoder_out, state0_next, state1_next

    def run_joiner(
        self,
        encoder_out: np.ndarray,
        decoder_out: np.ndarray,
    ):
        # encoder_out: [batch_size,  dim, 1]
        # decoder_out: [batch_size,  dim, 1]
        logit = self.joiner.run(
            [
                self.joiner.get_outputs()[0].name,
            ],
            {
                self.joiner.get_inputs()[0].name: encoder_out,
                self.joiner.get_inputs()[1].name: decoder_out,
            },
        )[0]
        # logit: [batch_size, 1, 1, vocab_size]
        return logit


def main():
    args = get_args()
    assert Path(args.encoder).is_file(), args.encoder
    assert Path(args.decoder).is_file(), args.decoder
    assert Path(args.joiner).is_file(), args.joiner
    assert Path(args.tokens).is_file(), args.tokens
    assert Path(args.wav).is_file(), args.wav

    print(vars(args))

    model = OnnxModel(args.encoder, args.decoder, args.joiner)

    id2token = dict()
    with open(args.tokens, encoding="utf-8") as f:
        for line in f:
            t, idx = line.split()
            id2token[int(idx)] = t

    fbank = create_fbank()
    audio, sample_rate = sf.read(args.wav, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000

    tail_padding = np.zeros(sample_rate * 2)

    audio = np.concatenate([audio, tail_padding])

    blank = len(id2token) - 1
    ans = [blank]
    state0, state1 = model.get_decoder_state()
    decoder_out, state0_next, state1_next = model.run_decoder(ans[-1], state0, state1)

    features = compute_features(audio, fbank)
    if model.normalize_type != "":
        assert model.normalize_type == "per_feature", model.normalize_type
        features = torch.from_numpy(features)
        mean = features.mean(dim=0, keepdims=True)
        stddev = features.std(dim=0, keepdims=True) + 1e-5
        features = (features - mean) / stddev
        features = features.numpy()
    print(audio.shape)
    print("features.shape", features.shape)

    encoder_out = model.run_encoder(features)
    # encoder_out:[batch_size, dim, T)
    for t in range(encoder_out.shape[2]):
        encoder_out_t = encoder_out[:, :, t : t + 1]
        logits = model.run_joiner(encoder_out_t, decoder_out)
        logits = torch.from_numpy(logits)
        logits = logits.squeeze()
        idx = torch.argmax(logits, dim=-1).item()
        if idx != blank:
            ans.append(idx)
            state0 = state0_next
            state1 = state1_next
            decoder_out, state0_next, state1_next = model.run_decoder(
                ans[-1], state0, state1
            )

    ans = ans[1:]  # remove the first blank
    print(ans)
    tokens = [id2token[i] for i in ans]
    underline = "▁"
    #  underline = b"\xe2\x96\x81".decode()
    text = "".join(tokens).replace(underline, " ").strip()
    print(args.wav)
    print(text)


main()


================================================
FILE: scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-transducer.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from pathlib import Path

import kaldi_native_fbank as knf
import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--encoder", type=str, required=True, help="Path to encoder.onnx"
    )
    parser.add_argument(
        "--decoder", type=str, required=True, help="Path to decoder.onnx"
    )
    parser.add_argument("--joiner", type=str, required=True, help="Path to joiner.onnx")

    parser.add_argument("--tokens", type=str, required=True, help="Path to tokens.txt")

    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


def create_fbank():
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.remove_dc_offset = False
    opts.frame_opts.window_type = "hann"

    opts.mel_opts.low_freq = 0
    opts.mel_opts.num_bins = 80

    opts.mel_opts.is_librosa = True

    fbank = knf.OnlineFbank(opts)
    return fbank


def compute_features(audio, fbank):
    assert len(audio.shape) == 1, audio.shape
    fbank.accept_waveform(16000, audio)
    ans = []
    processed = 0
    while processed < fbank.num_frames_ready:
        ans.append(np.array(fbank.get_frame(processed)))
        processed += 1
    ans = np.stack(ans)
    return ans


class OnnxModel:
    def __init__(
        self,
        encoder: str,
        decoder: str,
        joiner: str,
    ):
        self.init_encoder(encoder)
        self.init_decoder(decoder)
        self.init_joiner(joiner)

    def init_encoder(self, encoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.encoder = ort.InferenceSession(
            encoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.encoder.get_modelmeta().custom_metadata_map
        print(meta)

        self.window_size = int(meta["window_size"])
        self.chunk_shift = int(meta["chunk_shift"])

        self.cache_last_channel_dim1 = int(meta["cache_last_channel_dim1"])
        self.cache_last_channel_dim2 = int(meta["cache_last_channel_dim2"])
        self.cache_last_channel_dim3 = int(meta["cache_last_channel_dim3"])

        self.cache_last_time_dim1 = int(meta["cache_last_time_dim1"])
        self.cache_last_time_dim2 = int(meta["cache_last_time_dim2"])
        self.cache_last_time_dim3 = int(meta["cache_last_time_dim3"])

        self.pred_rnn_layers = int(meta["pred_rnn_layers"])
        self.pred_hidden = int(meta["pred_hidden"])

        self.init_cache_state()

    def init_decoder(self, decoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.decoder = ort.InferenceSession(
            decoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def init_joiner(self, joiner):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.joiner = ort.InferenceSession(
            joiner,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def get_decoder_state(self):
        batch_size = 1
        state0 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
        state1 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
        return state0, state1

    def init_cache_state(self):
        self.cache_last_channel = torch.zeros(
            1,
            self.cache_last_channel_dim1,
            self.cache_last_channel_dim2,
            self.cache_last_channel_dim3,
            dtype=torch.float32,
        ).numpy()

        self.cache_last_time = torch.zeros(
            1,
            self.cache_last_time_dim1,
            self.cache_last_time_dim2,
            self.cache_last_time_dim3,
            dtype=torch.float32,
        ).numpy()

        self.cache_last_channel_len = torch.zeros([1], dtype=torch.int64).numpy()

    def run_encoder(self, x: np.ndarray):
        # x: (T, C)
        x = torch.from_numpy(x)
        x = x.t().unsqueeze(0)
        # x: [1, C, T]
        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)

        (
            encoder_out,
            out_len,
            cache_last_channel_next,
            cache_last_time_next,
            cache_last_channel_len_next,
        ) = self.encoder.run(
            [
                self.encoder.get_outputs()[0].name,
                self.encoder.get_outputs()[1].name,
                self.encoder.get_outputs()[2].name,
                self.encoder.get_outputs()[3].name,
                self.encoder.get_outputs()[4].name,
            ],
            {
                self.encoder.get_inputs()[0].name: x.numpy(),
                self.encoder.get_inputs()[1].name: x_lens.numpy(),
                self.encoder.get_inputs()[2].name: self.cache_last_channel,
                self.encoder.get_inputs()[3].name: self.cache_last_time,
                self.encoder.get_inputs()[4].name: self.cache_last_channel_len,
            },
        )
        self.cache_last_channel = cache_last_channel_next
        self.cache_last_time = cache_last_time_next
        self.cache_last_channel_len = cache_last_channel_len_next

        # [batch_size, dim, T]
        return encoder_out

    def run_decoder(
        self,
        token: int,
        state0: np.ndarray,
        state1: np.ndarray,
    ):
        target = torch.tensor([[token]], dtype=torch.int32).numpy()
        target_len = torch.tensor([1], dtype=torch.int32).numpy()

        (
            decoder_out,
            decoder_out_length,
            state0_next,
            state1_next,
        ) = self.decoder.run(
            [
                self.decoder.get_outputs()[0].name,
                self.decoder.get_outputs()[1].name,
                self.decoder.get_outputs()[2].name,
                self.decoder.get_outputs()[3].name,
            ],
            {
                self.decoder.get_inputs()[0].name: target,
                self.decoder.get_inputs()[1].name: target_len,
                self.decoder.get_inputs()[2].name: state0,
                self.decoder.get_inputs()[3].name: state1,
            },
        )
        return decoder_out, state0_next, state1_next

    def run_joiner(
        self,
        encoder_out: np.ndarray,
        decoder_out: np.ndarray,
    ):
        # encoder_out: [batch_size,  dim, 1]
        # decoder_out: [batch_size,  dim, 1]
        logit = self.joiner.run(
            [
                self.joiner.get_outputs()[0].name,
            ],
            {
                self.joiner.get_inputs()[0].name: encoder_out,
                self.joiner.get_inputs()[1].name: decoder_out,
            },
        )[0]
        # logit: [batch_size, 1, 1, vocab_size]
        return logit


def main():
    args = get_args()
    assert Path(args.encoder).is_file(), args.encoder
    assert Path(args.decoder).is_file(), args.decoder
    assert Path(args.joiner).is_file(), args.joiner
    assert Path(args.tokens).is_file(), args.tokens
    assert Path(args.wav).is_file(), args.wav

    print(vars(args))

    model = OnnxModel(args.encoder, args.decoder, args.joiner)

    id2token = dict()
    with open(args.tokens, encoding="utf-8") as f:
        for line in f:
            t, idx = line.split()
            id2token[int(idx)] = t

    fbank = create_fbank()
    audio, sample_rate = sf.read(args.wav, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000

    tail_padding = np.zeros(sample_rate * 2)

    audio = np.concatenate([audio, tail_padding])

    window_size = model.window_size
    chunk_shift = model.chunk_shift

    blank = len(id2token) - 1
    ans = [blank]
    state0, state1 = model.get_decoder_state()
    decoder_out, state0_next, state1_next = model.run_decoder(ans[-1], state0, state1)

    features = compute_features(audio, fbank)
    num_chunks = (features.shape[0] - window_size) // chunk_shift + 1
    for i in range(num_chunks):
        start = i * chunk_shift
        end = start + window_size
        chunk = features[start:end, :]

        encoder_out = model.run_encoder(chunk)
        # encoder_out:[batch_size, dim, T)
        for t in range(encoder_out.shape[2]):
            encoder_out_t = encoder_out[:, :, t : t + 1]
            logits = model.run_joiner(encoder_out_t, decoder_out)
            logits = torch.from_numpy(logits)
            logits = logits.squeeze()
            idx = torch.argmax(logits, dim=-1).item()
            if idx != blank:
                ans.append(idx)
                state0 = state0_next
                state1 = state1_next
                decoder_out, state0_next, state1_next = model.run_decoder(
                    ans[-1], state0, state1
                )

    ans = ans[1:]  # remove the first blank
    tokens = [id2token[i] for i in ans]
    underline = "▁"
    #  underline = b"\xe2\x96\x81".decode()
    text = "".join(tokens).replace(underline, " ").strip()
    print(args.wav)
    print(text)


main()


================================================
FILE: scripts/nemo/generate_bpe_vocab.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2026  github.com/nullbio
#
# Generate bpe.vocab file from a NeMo model for use with hotwords in sherpa-onnx.
#
# The bpe.vocab file contains BPE tokens with their scores (merge priorities),
# which is required for hotword/keyword boosting with modified beam search.
#
# Usage:
#   # From a pretrained model name:
#   python generate_bpe_vocab.py --model nvidia/parakeet-tdt-0.6b-v2
#
#   # From a local .nemo file:
#   python generate_bpe_vocab.py --model ./parakeet-tdt-0.6b-v2.nemo
#
#   # Specify output path:
#   python generate_bpe_vocab.py --model nvidia/parakeet-tdt-0.6b-v2 --output ./bpe.vocab

import argparse
from pathlib import Path


def generate_bpe_vocab_from_tokenizer(sp, output_path: str):
    """
    Generate bpe.vocab file from a sentencepiece processor.

    Uses the original scores from the SentencePiece model, which represent
    BPE merge priorities. These scores ensure correct tokenization order
    when encoding hotwords.

    Args:
        sp: SentencePiece processor object (from tokenizer.tokenizer)
        output_path: Output path for bpe.vocab file
    """
    vocab_size = sp.get_piece_size()
    print(f"Vocabulary size: {vocab_size}")

    print(f"Writing bpe.vocab to: {output_path}")
    with open(output_path, "w", encoding="utf-8") as f:
        for token_id in range(vocab_size):
            token = sp.id_to_piece(token_id)
            score = sp.get_score(token_id)
            f.write(f"{token}\t{score}\n")

    print("Done!")
    return output_path


def generate_bpe_vocab_from_model(asr_model, output_path: str):
    """
    Generate bpe.vocab file from a loaded NeMo ASR model.

    Args:
        asr_model: Loaded NeMo ASR model object
        output_path: Output path for bpe.vocab file
    """
    sp = asr_model.tokenizer.tokenizer
    return generate_bpe_vocab_from_tokenizer(sp, output_path)


def generate_bpe_vocab(model_path: str, output_path: str):
    """
    Generate bpe.vocab file from a NeMo ASR model.

    Args:
        model_path: Path to .nemo file or HuggingFace model name (e.g., nvidia/parakeet-tdt-0.6b-v2)
        output_path: Output path for bpe.vocab file
    """
    import nemo.collections.asr as nemo_asr

    # Load model
    print(f"Loading model: {model_path}")
    if Path(model_path).is_file():
        asr_model = nemo_asr.models.ASRModel.restore_from(restore_path=model_path)
    else:
        asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_path)

    return generate_bpe_vocab_from_model(asr_model, output_path)


def main():
    parser = argparse.ArgumentParser(
        description="Generate bpe.vocab file from a NeMo ASR model for hotword support",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # From HuggingFace model:
  python generate_bpe_vocab.py --model nvidia/parakeet-tdt-0.6b-v2

  # From local .nemo file:
  python generate_bpe_vocab.py --model ./my_model.nemo --output ./bpe.vocab
        """,
    )
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="NeMo model name (e.g., nvidia/parakeet-tdt-0.6b-v2) or path to .nemo file",
    )
    parser.add_argument(
        "--output",
        type=str,
        default="./bpe.vocab",
        help="Output path for bpe.vocab file (default: ./bpe.vocab)",
    )

    args = parser.parse_args()

    generate_bpe_vocab(
        model_path=args.model,
        output_path=args.output,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/nemotron-speech-streaming-en-0.6b/export_onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2026  Xiaomi Corp.        (authors: Fangjun Kuang)
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic

"""
'_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor',
'sample_rate': 16000, 'normalize': 'NA', 'window_size': 0.025, 'window_stride': 0.01,
'window': 'hann', 'features': 128, 'n_fft': 512, 'log': True,
'frame_splicing': 1, 'dither': 1e-05, 'pad_to': 0, 'pad_value': 0.0}

"""


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    external_filename = filename.split(".onnx")[0]
    onnx.save(
        model,
        filename,
        save_as_external_data=True,
        all_tensors_to_one_file=True,
        location=external_filename + ".data",
    )


@torch.no_grad()
def main():
    model_name = "nvidia/nemotron-speech-streaming-en-0.6b"

    asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name=model_name)

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(asr_model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    asr_model.eval()

    assert asr_model.encoder.streaming_cfg is not None
    if isinstance(asr_model.encoder.streaming_cfg.chunk_size, list):
        chunk_size = asr_model.encoder.streaming_cfg.chunk_size[1]
    else:
        chunk_size = asr_model.encoder.streaming_cfg.chunk_size

    if isinstance(asr_model.encoder.streaming_cfg.pre_encode_cache_size, list):
        pre_encode_cache_size = asr_model.encoder.streaming_cfg.pre_encode_cache_size[1]
    else:
        pre_encode_cache_size = asr_model.encoder.streaming_cfg.pre_encode_cache_size
    window_size = chunk_size + pre_encode_cache_size

    print("chunk_size", chunk_size)
    print("pre_encode_cache_size", pre_encode_cache_size)
    print("window_size", window_size)

    chunk_shift = chunk_size

    # cache_last_channel: (batch_size, dim1, dim2, dim3)
    cache_last_channel_dim1 = len(asr_model.encoder.layers)
    cache_last_channel_dim2 = asr_model.encoder.streaming_cfg.last_channel_cache_size
    cache_last_channel_dim3 = asr_model.encoder.d_model

    # cache_last_time: (batch_size, dim1, dim2, dim3)
    cache_last_time_dim1 = len(asr_model.encoder.layers)
    cache_last_time_dim2 = asr_model.encoder.d_model
    cache_last_time_dim3 = asr_model.encoder.conv_context_size[0]

    asr_model.set_export_config({"cache_support": True})

    asr_model.encoder.export("encoder.onnx")
    asr_model.decoder.export("decoder.onnx")
    asr_model.joint.export("joiner.onnx")

    normalize_type = asr_model.cfg.preprocessor.normalize
    if normalize_type == "NA":
        normalize_type = ""

    meta_data = {
        "vocab_size": asr_model.decoder.vocab_size,
        "window_size": window_size,
        "chunk_shift": chunk_shift,
        "normalize_type": normalize_type,
        "cache_last_channel_dim1": cache_last_channel_dim1,
        "cache_last_channel_dim2": cache_last_channel_dim2,
        "cache_last_channel_dim3": cache_last_channel_dim3,
        "cache_last_time_dim1": cache_last_time_dim1,
        "cache_last_time_dim2": cache_last_time_dim2,
        "cache_last_time_dim3": cache_last_time_dim3,
        "pred_rnn_layers": asr_model.decoder.pred_rnn_layers,
        "pred_hidden": asr_model.decoder.pred_hidden,
        "subsampling_factor": 8,
        "feat_dim": 128,
        "model_type": "EncDecHybridRNNTCTCBPEModel",
        "version": "1",
        "model_author": "NeMo",
        "url": "https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b",
        "comment": "Only the transducer branch is exported",
    }
    add_meta_data("encoder.onnx", meta_data)

    for m in ["encoder", "decoder", "joiner"]:
        quantize_dynamic(
            model_input=f"{m}.onnx",
            model_output=f"{m}.int8.onnx",
            weight_type=QuantType.QUInt8,
        )

    print(meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/parakeet-tdt-0.6b-v2/export_onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import os
import sys
from pathlib import Path
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic

# Add parent directory to path to import generate_bpe_vocab
sys.path.insert(0, str(Path(__file__).parent.parent))
from generate_bpe_vocab import generate_bpe_vocab_from_model


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    if filename == "encoder.onnx":
        external_filename = "encoder"
        onnx.save(
            model,
            filename,
            save_as_external_data=True,
            all_tensors_to_one_file=True,
            location=external_filename + ".weights",
        )
    else:
        onnx.save(model, filename)


@torch.no_grad()
def main():
    if Path("./parakeet-tdt-0.6b-v2.nemo").is_file():
        asr_model = nemo_asr.models.ASRModel.restore_from(
            restore_path="./parakeet-tdt-0.6b-v2.nemo"
        )
    else:
        asr_model = nemo_asr.models.ASRModel.from_pretrained(
            model_name="nvidia/parakeet-tdt-0.6b-v2"
        )

    asr_model.eval()

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(asr_model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    # Generate bpe.vocab for hotword support
    print("Generating bpe.vocab for hotword support...")
    generate_bpe_vocab_from_model(
        asr_model=asr_model,
        output_path="./bpe.vocab",
    )

    asr_model.encoder.export("encoder.onnx")
    asr_model.decoder.export("decoder.onnx")
    asr_model.joint.export("joiner.onnx")
    os.system("ls -lh *.onnx")

    normalize_type = asr_model.cfg.preprocessor.normalize
    if normalize_type == "NA":
        normalize_type = ""

    meta_data = {
        "vocab_size": asr_model.decoder.vocab_size,
        "normalize_type": normalize_type,
        "pred_rnn_layers": asr_model.decoder.pred_rnn_layers,
        "pred_hidden": asr_model.decoder.pred_hidden,
        "subsampling_factor": 8,
        "model_type": "EncDecRNNTBPEModel",
        "version": "2",
        "model_author": "NeMo",
        "url": "https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2",
        "comment": "Only the transducer branch is exported",
        "feat_dim": 128,
    }

    for m in ["encoder", "decoder", "joiner"]:
        quantize_dynamic(
            model_input=f"./{m}.onnx",
            model_output=f"./{m}.int8.onnx",
            weight_type=QuantType.QUInt8 if m == "encoder" else QuantType.QInt8,
        )
        os.system("ls -lh *.onnx")

    add_meta_data("encoder.int8.onnx", meta_data)
    add_meta_data("encoder.onnx", meta_data)
    print("meta_data", meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/parakeet-tdt-0.6b-v2/test_onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import argparse
from pathlib import Path

import kaldi_native_fbank as knf
import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch
import time


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--encoder", type=str, required=True, help="Path to encoder.onnx"
    )
    parser.add_argument(
        "--decoder", type=str, required=True, help="Path to decoder.onnx"
    )
    parser.add_argument("--joiner", type=str, required=True, help="Path to joiner.onnx")

    parser.add_argument("--tokens", type=str, required=True, help="Path to tokens.txt")

    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


def create_fbank():
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.remove_dc_offset = False
    opts.frame_opts.window_type = "hann"

    opts.mel_opts.low_freq = 0
    opts.mel_opts.num_bins = 128

    opts.mel_opts.is_librosa = True

    fbank = knf.OnlineFbank(opts)
    return fbank


def compute_features(audio, fbank):
    assert len(audio.shape) == 1, audio.shape
    fbank.accept_waveform(16000, audio)
    ans = []
    processed = 0
    while processed < fbank.num_frames_ready:
        ans.append(np.array(fbank.get_frame(processed)))
        processed += 1
    ans = np.stack(ans)
    return ans


def display(sess, model):
    print(f"=========={model} Input==========")
    for i in sess.get_inputs():
        print(i)
    print(f"=========={model }Output==========")
    for i in sess.get_outputs():
        print(i)


class OnnxModel:
    def __init__(
        self,
        encoder: str,
        decoder: str,
        joiner: str,
    ):
        self.init_encoder(encoder)
        display(self.encoder, "encoder")
        self.init_decoder(decoder)
        display(self.decoder, "decoder")
        self.init_joiner(joiner)
        display(self.joiner, "joiner")

    def init_encoder(self, encoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.encoder = ort.InferenceSession(
            encoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.encoder.get_modelmeta().custom_metadata_map
        self.normalize_type = meta["normalize_type"]
        print(meta)

        self.pred_rnn_layers = int(meta["pred_rnn_layers"])
        self.pred_hidden = int(meta["pred_hidden"])

    def init_decoder(self, decoder):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.decoder = ort.InferenceSession(
            decoder,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def init_joiner(self, joiner):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.joiner = ort.InferenceSession(
            joiner,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def get_decoder_state(self):
        batch_size = 1
        state0 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
        state1 = torch.zeros(self.pred_rnn_layers, batch_size, self.pred_hidden).numpy()
        return state0, state1

    def run_encoder(self, x: np.ndarray):
        # x: (T, C)
        x = torch.from_numpy(x)
        x = x.t().unsqueeze(0)
        # x: [1, C, T]
        x_lens = torch.tensor([x.shape[-1]], dtype=torch.int64)

        (encoder_out, out_len) = self.encoder.run(
            [
                self.encoder.get_outputs()[0].name,
                self.encoder.get_outputs()[1].name,
            ],
            {
                self.encoder.get_inputs()[0].name: x.numpy(),
                self.encoder.get_inputs()[1].name: x_lens.numpy(),
            },
        )
        # [batch_size, dim, T]
        return encoder_out

    def run_decoder(
        self,
        token: int,
        state0: np.ndarray,
        state1: np.ndarray,
    ):
        target = torch.tensor([[token]], dtype=torch.int32).numpy()
        target_len = torch.tensor([1], dtype=torch.int32).numpy()

        (decoder_out, decoder_out_length, state0_next, state1_next,) = self.decoder.run(
            [
                self.decoder.get_outputs()[0].name,
                self.decoder.get_outputs()[1].name,
                self.decoder.get_outputs()[2].name,
                self.decoder.get_outputs()[3].name,
            ],
            {
                self.decoder.get_inputs()[0].name: target,
                self.decoder.get_inputs()[1].name: target_len,
                self.decoder.get_inputs()[2].name: state0,
                self.decoder.get_inputs()[3].name: state1,
            },
        )
        return decoder_out, state0_next, state1_next

    def run_joiner(
        self,
        encoder_out: np.ndarray,
        decoder_out: np.ndarray,
    ):
        # encoder_out: [batch_size,  dim, 1]
        # decoder_out: [batch_size,  dim, 1]
        logit = self.joiner.run(
            [
                self.joiner.get_outputs()[0].name,
            ],
            {
                self.joiner.get_inputs()[0].name: encoder_out,
                self.joiner.get_inputs()[1].name: decoder_out,
            },
        )[0]
        # logit: [batch_size, 1, 1, vocab_size]
        return logit


def main():
    args = get_args()
    assert Path(args.encoder).is_file(), args.encoder
    assert Path(args.decoder).is_file(), args.decoder
    assert Path(args.joiner).is_file(), args.joiner
    assert Path(args.tokens).is_file(), args.tokens
    assert Path(args.wav).is_file(), args.wav

    print(vars(args))

    model = OnnxModel(args.encoder, args.decoder, args.joiner)

    id2token = dict()
    with open(args.tokens, encoding="utf-8") as f:
        for line in f:
            t, idx = line.split()
            id2token[int(idx)] = t
    vocab_size = len(id2token)

    start = time.time()
    fbank = create_fbank()
    audio, sample_rate = sf.read(args.wav, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != 16000:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=16000,
        )
        sample_rate = 16000

    tail_padding = np.zeros(sample_rate * 2)

    audio = np.concatenate([audio, tail_padding])

    blank = len(id2token) - 1
    ans = [blank]
    state0, state1 = model.get_decoder_state()
    decoder_out, state0_next, state1_next = model.run_decoder(ans[-1], state0, state1)

    features = compute_features(audio, fbank)
    if model.normalize_type != "":
        assert model.normalize_type == "per_feature", model.normalize_type
        features = torch.from_numpy(features)
        mean = features.mean(dim=0, keepdims=True)
        stddev = features.std(dim=0, keepdims=True) + 1e-5
        features = (features - mean) / stddev
        features = features.numpy()
    print(audio.shape)
    print("features.shape", features.shape)

    encoder_out = model.run_encoder(features)
    # encoder_out:[batch_size, dim, T)
    t = 0
    while t < encoder_out.shape[2]:
        encoder_out_t = encoder_out[:, :, t : t + 1]
        logits = model.run_joiner(encoder_out_t, decoder_out)
        logits = torch.from_numpy(logits)
        logits = logits.squeeze()

        token_logits = logits[:vocab_size]
        duration_logits = logits[vocab_size:]

        idx = torch.argmax(token_logits, dim=-1).item()
        skip = torch.argmax(duration_logits, dim=-1).item()
        if skip == 0:
            skip = 1

        if idx != blank:
            ans.append(idx)
            state0 = state0_next
            state1 = state1_next
            decoder_out, state0_next, state1_next = model.run_decoder(
                ans[-1], state0, state1
            )
        t += skip

    end = time.time()

    elapsed_seconds = end - start
    audio_duration = audio.shape[0] / 16000
    real_time_factor = elapsed_seconds / audio_duration

    ans = ans[1:]  # remove the first blank
    tokens = [id2token[i] for i in ans]
    underline = "▁"
    #  underline = b"\xe2\x96\x81".decode()
    text = "".join(tokens).replace(underline, " ").strip()

    print(ans)
    print(args.wav)
    print(text)
    print(f"RTF: {real_time_factor}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/parakeet-tdt-0.6b-v3/export_onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import os
import sys
from pathlib import Path
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic

# Add parent directory to path to import generate_bpe_vocab
sys.path.insert(0, str(Path(__file__).parent.parent))
from generate_bpe_vocab import generate_bpe_vocab_from_model


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    if filename == "encoder.onnx":
        external_filename = "encoder"
        onnx.save(
            model,
            filename,
            save_as_external_data=True,
            all_tensors_to_one_file=True,
            location=external_filename + ".weights",
        )
    else:
        onnx.save(model, filename)


@torch.no_grad()
def main():
    if Path("./parakeet-tdt-0.6b-v3.nemo").is_file():
        asr_model = nemo_asr.models.ASRModel.restore_from(
            restore_path="./parakeet-tdt-0.6b-v3.nemo"
        )
    else:
        asr_model = nemo_asr.models.ASRModel.from_pretrained(
            model_name="nvidia/parakeet-tdt-0.6b-v3"
        )

    asr_model.eval()

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(asr_model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    # Generate bpe.vocab for hotword support
    print("Generating bpe.vocab for hotword support...")
    generate_bpe_vocab_from_model(
        asr_model=asr_model,
        output_path="./bpe.vocab",
    )

    asr_model.encoder.export("encoder.onnx")
    asr_model.decoder.export("decoder.onnx")
    asr_model.joint.export("joiner.onnx")
    os.system("ls -lh *.onnx")

    normalize_type = asr_model.cfg.preprocessor.normalize
    if normalize_type == "NA":
        normalize_type = ""

    meta_data = {
        "vocab_size": asr_model.decoder.vocab_size,
        "normalize_type": normalize_type,
        "pred_rnn_layers": asr_model.decoder.pred_rnn_layers,
        "pred_hidden": asr_model.decoder.pred_hidden,
        "subsampling_factor": 8,
        "model_type": "EncDecRNNTBPEModel",
        "version": "2",
        "model_author": "NeMo",
        "url": "https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3",
        "comment": "Only the transducer branch is exported",
        "feat_dim": 128,
    }

    for m in ["encoder", "decoder", "joiner"]:
        quantize_dynamic(
            model_input=f"./{m}.onnx",
            model_output=f"./{m}.int8.onnx",
            weight_type=QuantType.QUInt8 if m == "encoder" else QuantType.QInt8,
        )
        os.system("ls -lh *.onnx")

    add_meta_data("encoder.int8.onnx", meta_data)
    add_meta_data("encoder.onnx", meta_data)
    print("meta_data", meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/parakeet-tdt_ctc-0.6b-ja/export-onnx-ctc.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import os
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    asr_model = nemo_asr.models.ASRModel.from_pretrained(
        model_name="nvidia/parakeet-tdt_ctc-0.6b-ja"
    )

    print(asr_model.cfg)
    print(asr_model)

    with open("./tokens.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(asr_model.joint.vocabulary):
            f.write(f"{s} {i}\n")
        f.write(f"<blk> {i+1}\n")
        print("Saved to tokens.txt")

    decoder_type = "ctc"
    asr_model.change_decoding_strategy(decoder_type=decoder_type)
    asr_model.eval()

    asr_model.set_export_config({"decoder_type": "ctc"})

    filename = "model.onnx"

    asr_model.export(filename, onnx_opset_version=18)

    normalize_type = asr_model.cfg.preprocessor.normalize
    if normalize_type == "NA":
        normalize_type = ""

    meta_data = {
        "vocab_size": asr_model.decoder.vocab_size,
        "normalize_type": normalize_type,
        "subsampling_factor": 8,
        "model_type": "EncDecHybridRNNTCTCBPEModel",
        "version": "1",
        "model_author": "NeMo",
        "url": "https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja",
        "comment": "Only the CTC branch is exported",
        "doc": "See https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja",
    }

    os.system("ls -lh *.onnx")

    quantize_dynamic(
        model_input="./model.onnx",
        model_output="./model.int8.onnx",
        weight_type=QuantType.QUInt8,
    )

    add_meta_data("model.int8.onnx", meta_data)

    os.system("ls -lh *.onnx")

    print("preprocessor", asr_model.cfg.preprocessor)
    print(meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/parakeet-tdt_ctc-0.6b-ja/run-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

python3 ./export-onnx-ctc.py

ls -lh *.onnx

mkdir -p test_wavs
pushd test_wavs
curl -SL -O https://huggingface.co/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/transcripts.txt
curl -SL -O https://hf-mirror.com/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/test_ja_1.wav
curl -SL -O https://hf-mirror.com/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/test_ja_2.wav
popd

d=sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8

mkdir -p $d
mv -v model.int8.onnx $d/
cp -v tokens.txt $d/
cp -av test_wavs $d
ls -lh $d


d=sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $d/test_wavs/test_ja_1.wav

python3 ./test-onnx-ctc-non-streaming.py \
  --model $d/model.int8.onnx \
  --tokens $d/tokens.txt \
  --wav $d/test_wavs/test_ja_2.wav


================================================
FILE: scripts/nemo/speaker-verification/README.md
================================================
# Introduction

This directory contains script for exporting speaker verification models
from [NeMo](https://github.com/NVIDIA/NeMo/) to onnx
so that you can use them in `sherpa-onnx`.

Specifically, the following 4 models are exported to `sherpa-onnx`
from
[this page](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/speaker_recognition/results.html#speaker-recognition-models):

  - [titanet_large](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/titanet_large),
  - [titanet_small](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/titanet_small)
  - [speakerverification_speakernet](https://ngc.nvidia.com/catalog/models/nvidia:nemo:speakerverification_speakernet)
  - [ecapa_tdnn](https://ngc.nvidia.com/catalog/models/nvidia:nemo:ecapa_tdnn)


================================================
FILE: scripts/nemo/speaker-verification/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from typing import Dict

import nemo.collections.asr as nemo_asr
import onnx
import torch


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        choices=[
            "speakerverification_speakernet",
            "titanet_large",
            "titanet_small",
            "ecapa_tdnn",
        ],
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    args = get_args()
    speaker_model_config = nemo_asr.models.EncDecSpeakerLabelModel.from_pretrained(
        model_name=args.model, return_config=True
    )
    preprocessor_config = speaker_model_config["preprocessor"]

    print(args.model)
    print(speaker_model_config)
    print(preprocessor_config)

    assert preprocessor_config["n_fft"] == 512, preprocessor_config

    assert (
        preprocessor_config["_target_"]
        == "nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor"
    ), preprocessor_config

    assert preprocessor_config["frame_splicing"] == 1, preprocessor_config

    speaker_model = nemo_asr.models.EncDecSpeakerLabelModel.from_pretrained(
        model_name=args.model
    )
    speaker_model.eval()
    filename = f"nemo_en_{args.model}.onnx"
    speaker_model.export(filename)

    print(f"Adding metadata to {filename}")

    comment = "This model is from NeMo."
    url = {
        "titanet_large": "https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/titanet_large",
        "titanet_small": "https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/titanet_small",
        "speakerverification_speakernet": "https://ngc.nvidia.com/catalog/models/nvidia:nemo:speakerverification_speakernet",
        "ecapa_tdnn": "https://ngc.nvidia.com/catalog/models/nvidia:nemo:ecapa_tdnn",
    }[args.model]

    language = "English"

    meta_data = {
        "framework": "nemo",
        "language": language,
        "url": url,
        "comment": comment,
        "sample_rate": preprocessor_config["sample_rate"],
        "output_dim": speaker_model_config["decoder"]["emb_sizes"],
        "feature_normalize_type": preprocessor_config["normalize"],
        "window_size_ms": int(float(preprocessor_config["window_size"]) * 1000),
        "window_stride_ms": int(float(preprocessor_config["window_stride"]) * 1000),
        "window_type": preprocessor_config["window"],  # e.g., hann
        "feat_dim": preprocessor_config["features"],
    }
    print(meta_data)
    add_meta_data(filename=filename, meta_data=meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/nemo/speaker-verification/test-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2023-2024  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
This script computes speaker similarity score in the range [0-1]
of two wave files using a speaker embedding model.
"""
import argparse
import wave
from pathlib import Path

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
from numpy.linalg import norm


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the input onnx model. Example value: model.onnx",
    )

    parser.add_argument(
        "--file1",
        type=str,
        required=True,
        help="Input wave 1",
    )

    parser.add_argument(
        "--file2",
        type=str,
        required=True,
        help="Input wave 2",
    )

    return parser.parse_args()


def read_wavefile(filename, expected_sample_rate: int = 16000) -> np.ndarray:
    """
    Args:
      filename:
        Path to a wave file, which must be of 16-bit and 16kHz.
     expected_sample_rate:
       Expected sample rate of the wave file.
    Returns:
      Return a 1-D float32 array containing audio samples. Each sample is in
      the range [-1, 1].
    """
    filename = str(filename)
    with wave.open(filename) as f:
        wave_file_sample_rate = f.getframerate()
        assert wave_file_sample_rate == expected_sample_rate, (
            wave_file_sample_rate,
            expected_sample_rate,
        )

        num_channels = f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_int16 = samples_int16.reshape(-1, num_channels)[:, 0]
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768

        return samples_float32


def compute_features(samples: np.ndarray, model: "OnnxModel") -> np.ndarray:
    fbank_opts = knf.FbankOptions()
    fbank_opts.frame_opts.samp_freq = model.sample_rate
    fbank_opts.frame_opts.frame_length_ms = model.window_size_ms
    fbank_opts.frame_opts.frame_shift_ms = model.window_stride_ms
    fbank_opts.frame_opts.dither = 0
    fbank_opts.frame_opts.remove_dc_offset = False
    fbank_opts.frame_opts.window_type = model.window_type

    fbank_opts.mel_opts.num_bins = model.feat_dim
    fbank_opts.mel_opts.low_freq = 0
    fbank_opts.mel_opts.is_librosa = True

    fbank = knf.OnlineFbank(fbank_opts)
    fbank.accept_waveform(model.sample_rate, samples)
    fbank.input_finished()

    features = []
    for i in range(fbank.num_frames_ready):
        f = fbank.get_frame(i)
        features.append(f)
    features = np.stack(features, axis=0)
    # at this point, the shape of features is (T, C)

    if model.feature_normalize_type != "":
        assert model.feature_normalize_type == "per_feature"
        mean = np.mean(features, axis=0, keepdims=True)
        std = np.std(features, axis=0, keepdims=True)
        features = (features - mean) / std

    feature_len = features.shape[0]
    pad = 16 - feature_len % 16

    if pad > 0:
        padding = np.zeros((pad, features.shape[1]), dtype=np.float32)
        features = np.concatenate([features, padding])

    features = np.expand_dims(features, axis=0)

    return features, feature_len


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
        )

        meta = self.model.get_modelmeta().custom_metadata_map
        self.framework = meta["framework"]
        self.sample_rate = int(meta["sample_rate"])
        self.output_dim = int(meta["output_dim"])
        self.feature_normalize_type = meta["feature_normalize_type"]
        self.window_size_ms = int(meta["window_size_ms"])
        self.window_stride_ms = int(meta["window_stride_ms"])
        self.window_type = meta["window_type"]
        self.feat_dim = int(meta["feat_dim"])
        print(meta)

        assert self.framework == "nemo", self.framework

    def __call__(self, x: np.ndarray, x_lens: int) -> np.ndarray:
        """
        Args:
          x:
            A 2-D float32 tensor of shape (T, C).
          y:
            A 1-D float32 tensor containing model output.
        """
        x = x.transpose(0, 2, 1)  # (B, T, C) -> (B, C, T)
        x_lens = np.asarray([x_lens], dtype=np.int64)

        return self.model.run(
            [
                self.model.get_outputs()[1].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
                self.model.get_inputs()[1].name: x_lens,
            },
        )[0][0]


def main():
    args = get_args()
    print(args)
    filename = Path(args.model)
    file1 = Path(args.file1)
    file2 = Path(args.file2)
    assert filename.is_file(), filename
    assert file1.is_file(), file1
    assert file2.is_file(), file2

    model = OnnxModel(filename)
    wave1 = read_wavefile(file1, model.sample_rate)
    wave2 = read_wavefile(file2, model.sample_rate)

    features1, features1_len = compute_features(wave1, model)
    features2, features2_len = compute_features(wave2, model)

    output1 = model(features1, features1_len)
    output2 = model(features2, features2_len)

    similarity = np.dot(output1, output2) / (norm(output1) * norm(output2))
    print(f"similarity in the range [0-1]: {similarity}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/node-addon-api/.gitignore
================================================
docs


================================================
FILE: scripts/node-addon-api/CMakeLists.txt
================================================
# See also https://github.com/cmake-js/cmake-js
# npm install cmake-js
# ./node_modules/.bin/cmake-js --help
# ./node_modules/.bin/cmake-js --version
# ./node_modules/.bin/cmake-js compile --help
# ./node_modules/.bin/cmake-js compile --log-level
# ./node_modules/.bin/cmake-js compile --log-level verbose
cmake_minimum_required(VERSION 3.15)
cmake_policy(SET CMP0091 NEW)
cmake_policy(SET CMP0042 NEW)

project(sherpa-onnx)

set(CMAKE_CXX_STANDARD 17)

if(NOT WIN32)
  set(CMAKE_SKIP_BUILD_RPATH FALSE)
  set(BUILD_RPATH_USE_ORIGIN TRUE)
  set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
endif()

if(NOT APPLE)
  set(SHERPA_ONNX_RPATH_ORIGIN "$ORIGIN")
else()
  set(SHERPA_ONNX_RPATH_ORIGIN "@loader_path")
endif()

if(NOT WIN32)
  set(CMAKE_INSTALL_RPATH ${SHERPA_ONNX_RPATH_ORIGIN})
  set(CMAKE_BUILD_RPATH ${SHERPA_ONNX_RPATH_ORIGIN})
endif()

include_directories(${CMAKE_JS_INC})

set(srcs
  src/audio-tagging.cc
  src/keyword-spotting.cc
  src/non-streaming-asr.cc
  src/non-streaming-speaker-diarization.cc
  src/non-streaming-speech-denoiser.cc
  src/non-streaming-tts.cc
  src/offline-punctuation.cc
  src/online-punctuation.cc
  src/streaming-speech-denoiser.cc
  src/sherpa-onnx-node-addon-api.cc
  src/speaker-identification.cc
  src/spoken-language-identification.cc
  src/streaming-asr.cc
  src/vad.cc
  src/version.cc
  src/wave-reader.cc
  src/wave-writer.cc
)

if(NOT DEFINED ENV{SHERPA_ONNX_INSTALL_DIR})
  message(FATAL_ERROR "
Please run:
git clone https://github.com/k2-fsa/sherpa-onnx
cd sherpa-onnx
mkdir build
cd build
cmake -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install ..
make install
export SHERPA_ONNX_INSTALL_DIR=$PWD/install
  ")
endif()

include_directories($ENV{SHERPA_ONNX_INSTALL_DIR}/include)

# See https://nodejs.github.io/node-addon-examples/build-tools/cmake-js
# Include Node-API wrappers
execute_process(
  COMMAND node -p "require('node-addon-api').include"
    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
    OUTPUT_VARIABLE NODE_ADDON_API_DIR
)

string(REPLACE "\n" "" NODE_ADDON_API_DIR ${NODE_ADDON_API_DIR})
string(REPLACE "\"" "" NODE_ADDON_API_DIR ${NODE_ADDON_API_DIR})
include_directories(${NODE_ADDON_API_DIR})

link_directories($ENV{SHERPA_ONNX_INSTALL_DIR}/lib)

add_library(${PROJECT_NAME} SHARED ${srcs} ${CMAKE_JS_SRC})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})

target_link_libraries(${PROJECT_NAME}
  sherpa-onnx-c-api
  onnxruntime
  -Wl,-rpath,$ENV{SHERPA_ONNX_INSTALL_DIR}/lib
  -Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}
)

if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
  # Generate node.lib
  execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS})
endif()


================================================
FILE: scripts/node-addon-api/README.md
================================================
# Introduction

This folder contains `node-addon-api` wrapper for `sherpa-onnx`.

Caution: This folder is for developer only.

## Usage

```bash
git clone https://github.com/k2-fsa/sherpa-onnx
cd sherpa-onnx
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON ..
make -j install
export PKG_CONFIG_PATH=$PWD/install:$PKG_CONFIG_PATH
cd ../scripts/node-addon-api/
npm i
./node_modules/.bin/cmake-js compile --log-level verbose

# see test/test_asr_streaming_transducer.js
# for usages
```


================================================
FILE: scripts/node-addon-api/lib/addon-static-import.js
================================================
const os = require('os');

let addon = null;

const platform = os.platform() === 'win32' ? 'win' : os.platform();
const arch = os.arch();

try {
  if (arch === 'x64') {
    if (platform === 'win') {
      // @ts-expect-error
      addon = require('../sherpa-onnx-win-x64/sherpa-onnx.node')
    } else if (platform === 'darwin') {
      // @ts-expect-error
      addon = require('../sherpa-onnx-darwin-x64/sherpa-onnx.node')
    } else if (platform === 'linux') {
      // @ts-expect-error
      addon = require('../sherpa-onnx-linux-x64/sherpa-onnx.node')
    }
  } else if (arch === 'arm64') {
    if (platform === 'darwin') {
      // @ts-expect-error
      addon = require('../sherpa-onnx-darwin-arm64/sherpa-onnx.node')
    } else if (platform === 'linux') {
      // @ts-expect-error
      addon = require('../sherpa-onnx-linux-arm64/sherpa-onnx.node')
    }
  } else if (arch === 'ia32') {
    if (platform === 'win') {
      // @ts-expect-error
      addon = require('../sherpa-onnx-win-ia32/sherpa-onnx.node')
    }
  }
} catch (error) {
  //
}

if (!addon) {
  try {
    if (arch === 'x64') {
      if (platform === 'win') {
        // @ts-expect-error
        addon = require('./node_modules/sherpa-onnx-win-x64/sherpa-onnx.node')
      } else if (platform === 'darwin') {
        // @ts-expect-error
        addon = require('./node_modules/sherpa-onnx-darwin-x64/sherpa-onnx.node')
      } else if (platform === 'linux') {
        // @ts-expect-error
        addon = require('./node_modules/sherpa-onnx-linux-x64/sherpa-onnx.node')
      }
    } else if (arch === 'arm64') {
      if (platform === 'darwin') {
        // @ts-expect-error
        addon = require('./node_modules/sherpa-onnx-darwin-arm64/sherpa-onnx.node')
      } else if (platform === 'linux') {
        // @ts-expect-error
        addon = require('./node_modules/sherpa-onnx-linux-arm64/sherpa-onnx.node')
      }
    } else if (arch === 'ia32') {
      if (platform === 'win') {
        // @ts-expect-error
        addon = require('./node_modules/sherpa-onnx-win-ia32/sherpa-onnx.node')
      }
    }
  } catch (error) {
    //
  }
}
 
module.exports = addon;

================================================
FILE: scripts/node-addon-api/lib/addon.js
================================================
/** @typedef {import('./types').WaveObject} WaveObject */

const os = require('os');
const path = require('path');
const addonStaticImport = require('./addon-static-import');

// Package name triggered spam for sherpa-onnx-win32-x64
// so we have renamed it to sherpa-onnx-win-x64
const platform = os.platform() === 'win32' ? 'win' : os.platform();
const arch = os.arch();
const platform_arch = `${platform}-${arch}`;
const possible_paths = [
  '../build/Release/sherpa-onnx.node',
  '../build/Debug/sherpa-onnx.node',
  `./node_modules/sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
  `../sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
  './sherpa-onnx.node',
];

let addon = addonStaticImport;

if (!addon) {
  for (const p of possible_paths) {
    try {
      addon = require(p);
      break;
    } catch (error) {
      // do nothing; try the next option
      ;
    }
  }
}

module.exports = addon;

if (!addon) {
  let addon_path =
      `${process.env.PWD}/node_modules/sherpa-onnx-${platform_arch}`;
  const pnpmIndex = __dirname.indexOf(`node_modules${path.sep}.pnpm`);
  if (pnpmIndex !== -1) {
    const parts = __dirname.slice(pnpmIndex).split(path.sep);
    parts.pop();
    addon_path =
        `${process.env.PWD}/${parts.join('/')}/sherpa-onnx-${platform_arch}`;
  }

  let msg = `Could not find sherpa-onnx-node. Tried\n\n  ${
      possible_paths.join('\n  ')}\n`
  if (os.platform() == 'darwin' &&
      (!process.env.DYLD_LIBRARY_PATH ||
       !process.env.DYLD_LIBRARY_PATH.includes(
           `node_modules/sherpa-onnx-${platform_arch}`))) {
    msg +=
        'Please remember to set the following environment variable and try again:\n';

    msg += `export DYLD_LIBRARY_PATH=${addon_path}`;

    msg += ':$DYLD_LIBRARY_PATH\n';
  }

  if (os.platform() == 'linux' &&
      (!process.env.LD_LIBRARY_PATH ||
       !process.env.LD_LIBRARY_PATH.includes(
           `node_modules/sherpa-onnx-${platform_arch}`))) {
    msg +=
        'Please remember to set the following environment variable and try again:\n';

    msg += `export LD_LIBRARY_PATH=${addon_path}`;

    msg += ':$LD_LIBRARY_PATH\n';
  }

  throw new Error(msg)
}

/**
 * Read a wave file from disk.
 * @function module.exports.readWave
 * @param {string} filename
 * @param {boolean} [enableExternalBuffer=true]
 * @returns {WaveObject}
 */

/**
 * Read a wave from binary buffer.
 * @function module.exports.readWaveFromBinary
 * @param {Uint8Array} data - Binary contents of a wave file.
 * @param {boolean} [enableExternalBuffer=true]
 * @returns {WaveObject}
 */

/**
 * Write a wave file to disk.
 * @function module.exports.writeWave
 * @param {string} filename
 * @param {WaveObject} obj - { samples: Float32Array, sampleRate: number }
 * @returns {boolean}
 */


================================================
FILE: scripts/node-addon-api/lib/audio-tagg.js
================================================
/** @typedef {import('./types').AudioTaggingConfig} AudioTaggingConfig */
/** @typedef {import('./types').AudioEvent} AudioEvent */
/** @typedef {import('./types').AudioTaggingHandle} AudioTaggingHandle */
/** @typedef {import('./non-streaming-asr').OfflineStream} OfflineStream */

const addon = require('./addon.js');
const non_streaming_asr = require('./non-streaming-asr.js');

/**
 * AudioTagging utility.
 * @class
 */
class AudioTagging {
  /**
   * Create an AudioTagging instance.
   * @param {AudioTaggingConfig} config
   */
  constructor(config) {
    this.handle = addon.createAudioTagging(config);
    this.config = config;
  }

  /**
   * Create an offline stream bound to this AudioTagging instance.
   * @returns {OfflineStream}
   */
  createStream() {
    return new non_streaming_asr.OfflineStream(
        addon.audioTaggingCreateOfflineStream(this.handle));
  }

  /**
   * Compute audio tags from an offline stream.
   * @param {OfflineStream} stream - An offline stream created by `AudioTagging.createStream()`.
   * @param {number} [topK=-1] - Return top K results; -1 for all.
   * @returns {AudioEvent[]}
   */
  compute(stream, topK = -1) {
    return addon.audioTaggingCompute(this.handle, stream.handle, topK);
  }
}

module.exports = {
  AudioTagging,
}


================================================
FILE: scripts/node-addon-api/lib/keyword-spotter.js
================================================
/** @typedef {import('./types').KeywordSpotterConfig} KeywordSpotterConfig */
/** @typedef {import('./types').KeywordResult} KeywordResult */
/** @typedef {import('./streaming-asr').OnlineStream} OnlineStream */

const addon = require('./addon.js');
const streaming_asr = require('./streaming-asr.js');

/**
 * KeywordSpotter handles keyword detection.
 */
class KeywordSpotter {
  /**
   * @param {KeywordSpotterConfig} config
   */
  constructor(config) {
    this.handle = addon.createKeywordSpotter(config);
    this.config = config
  }

  /**
   * Create an OnlineStream for the spotter.
   * @returns {OnlineStream}
   */
  createStream() {
    const handle = addon.createKeywordStream(this.handle);
    return new streaming_asr.OnlineStream(handle);
  }

  /**
   * @param {OnlineStream} stream
   * @returns {boolean}
   */
  isReady(stream) {
    return addon.isKeywordStreamReady(this.handle, stream.handle);
  }

  /**
   * Trigger decode on a stream.
   * @param {OnlineStream} stream
   */
  decode(stream) {
    addon.decodeKeywordStream(this.handle, stream.handle);
  }

  /**
   * Reset a stream.
   * @param {OnlineStream} stream
   */
  reset(stream) {
    addon.resetKeywordStream(this.handle, stream.handle);
  }

  /**
   * Get the keyword result for a stream.
   * @param {OnlineStream} stream
   * @returns {KeywordResult}
   */
  getResult(stream) {
    const jsonStr = addon.getKeywordResultAsJson(this.handle, stream.handle);

    return JSON.parse(jsonStr);
  }
}

module.exports = {
  KeywordSpotter,
}


================================================
FILE: scripts/node-addon-api/lib/non-streaming-asr.js
================================================
/** @typedef {import('./types').OfflineStreamObject} OfflineStreamObject */
/** @typedef {import('./types').Waveform} Waveform */
/**
 * @typedef {import('./types').OfflineRecognizerConfig} OfflineRecognizerConfig
 */
/**
 * @typedef {import('./types').OfflineRecognizerResult} OfflineRecognizerResult
 */

const addon = require('./addon.js');

/**
 * Internal symbol to mark async-created recognizers.
 * Not accessible unless someone has a reference to this Symbol.
 */
const kFromAsyncFactory = Symbol('OfflineRecognizer.fromAsync');

/**
 * OfflineStream represents a synchronous offline audio stream.
 */
class OfflineStream {
  /**
   * @param {OfflineStreamObject|Object} handle
   */
  constructor(handle) {
    this.handle = handle;
  }

  /**
   * Accept a chunk of waveform samples.
   * @param {Waveform} obj - { samples: Float32Array, sampleRate: number }
   */
  acceptWaveform(obj) {
    addon.acceptWaveformOffline(this.handle, obj);
  }
}

/**
 * OfflineRecognizer wraps the native offline recognizer.
 */
class OfflineRecognizer {
  /**
   * Constructor (SYNC path).
   *
   * Users call:
   *   new OfflineRecognizer(config)
   *
   * Async factory calls this with an internal descriptor.
   *
   * @param {OfflineRecognizerConfig | Object} configOrInternal
   */
  constructor(configOrInternal) {
    // ----- async factory path -----
    if (configOrInternal && typeof configOrInternal === 'object' &&
        configOrInternal[kFromAsyncFactory]) {
      this.handle = configOrInternal.handle;
      this.config = configOrInternal.config;
      return;
    }

    // ----- sync constructor path -----
    this.config = configOrInternal;
    this.handle = addon.createOfflineRecognizer(this.config);
  }

  /**
   * Create an OfflineRecognizer asynchronously (non-blocking).
   *
   * @param {OfflineRecognizerConfig} config
   * @returns {Promise<OfflineRecognizer>}
   */
  static async createAsync(config) {
    const handle = await addon.createOfflineRecognizerAsync(config);

    return new OfflineRecognizer({
      [kFromAsyncFactory]: true,
      handle,
      config,
    });
  }

  /**
   * Create a new OfflineStream bound to this recognizer.
   * @returns {OfflineStream}
   */
  createStream() {
    const handle = addon.createOfflineStream(this.handle);
    return new OfflineStream(handle);
  }

  /**
   * Replace the recognizer config at runtime.
   * @param {OfflineRecognizerConfig} config
   */
  setConfig(config) {
    this.config = config;
    addon.offlineRecognizerSetConfig(this.handle, config);
  }

  /**
   * Decode an offline stream (synchronous).
   * @param {OfflineStream} stream
   */
  decode(stream) {
    addon.decodeOfflineStream(this.handle, stream.handle);
  }

  /**
   * Decode an offline stream asynchronously (non-blocking).
   * @param {OfflineStream} stream
   * @returns {Promise<OfflineRecognizerResult>}
   */
  async decodeAsync(stream) {
    const jsonStr =
        await addon.decodeOfflineStreamAsync(this.handle, stream.handle);
    return JSON.parse(jsonStr);
  }

  /**
   * Get recognition result for a stream.
   * @param {OfflineStream} stream
   * @returns {OfflineRecognizerResult}
   */
  getResult(stream) {
    const jsonStr = addon.getOfflineStreamResultAsJson(stream.handle);
    return JSON.parse(jsonStr);
  }
}

module.exports = {
  OfflineRecognizer,
  OfflineStream,
};


================================================
FILE: scripts/node-addon-api/lib/non-streaming-speaker-diarization.js
================================================
/** @typedef {import('./types').OfflineSpeakerDiarizationConfig} OfflineSpeakerDiarizationConfig */
/** @typedef {import('./types').SpeakerDiarizationSegment} SpeakerDiarizationSegment */

const addon = require('./addon.js');

class OfflineSpeakerDiarization {
  /**
   * @param {OfflineSpeakerDiarizationConfig} config
   */
  constructor(config) {
    this.handle = addon.createOfflineSpeakerDiarization(config);
    this.config = config;

    this.sampleRate = addon.getOfflineSpeakerDiarizationSampleRate(this.handle);
  }

  /**
   * @param {Float32Array} samples - 1-D float32 array in [-1, 1]
   * @returns {SpeakerDiarizationSegment[]}
   */
  process(samples) {
    return addon.offlineSpeakerDiarizationProcess(this.handle, samples);
  }

  /**
   * Set clustering configuration.
   * @param {{clustering: import('./types').FastClusteringConfig}} config
   */
  setConfig(config) {
    addon.offlineSpeakerDiarizationSetConfig(this.handle, config);
    this.config.clustering = config.clustering;
  }
}

module.exports = {
  OfflineSpeakerDiarization,
} 

================================================
FILE: scripts/node-addon-api/lib/non-streaming-speech-denoiser.js
================================================
/** @typedef {import('./types').OfflineSpeechDenoiserConfig} OfflineSpeechDenoiserConfig */
/** @typedef {import('./types').GeneratedAudio} GeneratedAudio */
/** @typedef {import('./types').AudioProcessRequest} AudioProcessRequest */

const addon = require('./addon.js');

class OfflineSpeechDenoiser {
  /**
   * @param {OfflineSpeechDenoiserConfig} config
   */
  constructor(config) {
    this.handle = addon.createOfflineSpeechDenoiser(config);
    this.config = config;

    this.sampleRate =
        addon.offlineSpeechDenoiserGetSampleRateWrapper(this.handle);
  }

  /**
   * Run denoiser synchronously.
   * @param {AudioProcessRequest} obj - { samples: Float32Array, sampleRate: number, enableExternalBuffer?: boolean }
   * @returns {GeneratedAudio}
   */
  run(obj) {
    return addon.offlineSpeechDenoiserRunWrapper(this.handle, obj);
  }
}

module.exports = {
  OfflineSpeechDenoiser,
} 

================================================
FILE: scripts/node-addon-api/lib/non-streaming-tts.js
================================================
/** @typedef {import('./types').OfflineTtsConfig} OfflineTtsConfig */
/** @typedef {import('./types').TtsRequest} TtsRequest */
/** @typedef {import('./types').GeneratedAudio} GeneratedAudio */

const addon = require('./addon.js');

/**
 * Internal symbol to mark async-created TTS instances.
 */
const kFromAsyncFactory = Symbol('OfflineTts.fromAsync');


class GenerationConfig {
  constructor(opts = {}) {
    Object.assign(this, opts);
  }
}


class OfflineTts {
  /**
   * Constructor (sync path).
   *
   * Users call:
   *   new OfflineTts(config)
   *
   * Async factory calls this with an internal descriptor.
   *
   * @param {OfflineTtsConfig|Object} configOrInternal
   */
  constructor(configOrInternal) {
    if (configOrInternal && typeof configOrInternal === 'object' &&
        configOrInternal[kFromAsyncFactory]) {
      // ----- async factory path -----
      this.handle = configOrInternal.handle;
      this.config = configOrInternal.config;
    } else {
      // ----- sync constructor path -----
      this.config = configOrInternal;
      this.handle = addon.createOfflineTts(this.config);
    }

    // Common initialization
    this.numSpeakers = addon.getOfflineTtsNumSpeakers(this.handle);
    this.sampleRate = addon.getOfflineTtsSampleRate(this.handle);
  }

  /**
   * Create an OfflineTts asynchronously (non-blocking).
   * @param {OfflineTtsConfig} config
   * @returns {Promise<OfflineTts>}
   */
  static async createAsync(config) {
    const handle = await addon.createOfflineTtsAsync(config);
    return new OfflineTts({
      [kFromAsyncFactory]: true,
      handle,
      config,
    });
  }

  /**
   * Generate audio synchronously.
   * @param {TtsRequest} obj
   * @returns {GeneratedAudio}
   */
  generate(obj) {
    if (!obj || typeof obj !== 'object') {
      throw new TypeError('generate() expects an object');
    }

    // If generationConfig is present, use new API
    if (obj.generationConfig !== undefined) {
      return addon.offlineTtsGenerateWithConfig(this.handle, obj);
    }

    // Fallback to legacy path
    return addon.offlineTtsGenerate(this.handle, obj);
  }
  /**
   * Generate audio asynchronously with optional generationConfig and progress
   * callback
   *
   * The progress callback receives streaming audio chunks.
   *
   * @param {TtsRequest & { generationConfig?: object, onProgress?: (info: {
   *     samples: Float32Array, progress: number }) => number | boolean | void
   *     }} obj
   * @returns {Promise<GeneratedAudio>}
   */
  generateAsync(obj) {
    const {onProgress, ...rest} = obj;

    const hasConfig = obj.generationConfig !== undefined;

    const fn = hasConfig ? addon.offlineTtsGenerateAsyncWithConfig :
                           addon.offlineTtsGenerateAsync;

    return fn(this.handle, {
      ...rest,
      callback: typeof onProgress === 'function' ?
          (info) => {
            const ret = onProgress(info);
            return ret === 0 || ret === false ? 0 : 1;
          } :
          undefined,
    });
  }
}


module.exports = {
  OfflineTts,
  GenerationConfig,
}


================================================
FILE: scripts/node-addon-api/lib/online-speech-denoiser.js
================================================
/** @typedef {import('./types').OnlineSpeechDenoiserConfig} OnlineSpeechDenoiserConfig */
/** @typedef {import('./types').GeneratedAudio} GeneratedAudio */
/** @typedef {import('./types').AudioProcessRequest} AudioProcessRequest */

const addon = require('./addon.js');

class OnlineSpeechDenoiser {
  /**
   * @param {OnlineSpeechDenoiserConfig} config
   */
  constructor(config) {
    this.handle = addon.createOnlineSpeechDenoiser(config);
    this.config = config;

    this.sampleRate =
        addon.onlineSpeechDenoiserGetSampleRateWrapper(this.handle);
    this.frameShiftInSamples =
        addon.onlineSpeechDenoiserGetFrameShiftInSamplesWrapper(this.handle);
  }

  /**
   * @param {AudioProcessRequest} obj
   * @returns {GeneratedAudio}
   */
  run(obj) {
    return addon.onlineSpeechDenoiserRunWrapper(this.handle, obj);
  }

  /**
   * @param {boolean} [enableExternalBuffer=true]
   * @returns {GeneratedAudio}
   */
  flush(enableExternalBuffer = true) {
    return addon.onlineSpeechDenoiserFlushWrapper(
        this.handle, enableExternalBuffer);
  }

  reset() {
    addon.onlineSpeechDenoiserResetWrapper(this.handle);
  }
}

module.exports = {
  OnlineSpeechDenoiser,
};


================================================
FILE: scripts/node-addon-api/lib/punctuation.js
================================================
/** @typedef {import('./types').OfflinePunctuationHandle} OfflinePunctuationHandle */
/** @typedef {import('./types').OfflinePunctuationConfig} OfflinePunctuationConfig */
/** @typedef {import('./types').OnlinePunctuationConfig} OnlinePunctuationConfig */

const addon = require('./addon.js');

class OfflinePunctuation {
  /**
   * @param {OfflinePunctuationConfig} config
   */
  constructor(config) {
    this.handle = addon.createOfflinePunctuation(config);
    this.config = config;
  }
  /**
   * Add punctuation to `text` and return the punctuated text.
   * @param {string} text
   * @returns {string}
   */
  addPunct(text) {
    return addon.offlinePunctuationAddPunct(this.handle, text);
  }
}

class OnlinePunctuation {
  /**
   * @param {OnlinePunctuationConfig} config
   */
  constructor(config) {
    this.handle = addon.createOnlinePunctuation(config);
    this.config = config;
  }
  /** @param {string} text @returns {string} */
  addPunct(text) {
    return addon.onlinePunctuationAddPunct(this.handle, text);
  }
}

module.exports = {
  OfflinePunctuation,
  OnlinePunctuation,
} 


================================================
FILE: scripts/node-addon-api/lib/sherpa-onnx.js
================================================
/** @typedef {import('./types').WaveObject} WaveObject */
/**
 * @typedef {import('./types').OnlineRecognizerResult} OnlineRecognizerResult
 */
/**
 * @typedef {import('./types').OfflineRecognizerResult} OfflineRecognizerResult
 */

const addon = require('./addon.js')
const streaming_asr = require('./streaming-asr.js');
const non_streaming_asr = require('./non-streaming-asr.js');
const non_streaming_tts = require('./non-streaming-tts.js');
const vad = require('./vad.js');
const slid = require('./spoken-language-identification.js');
const sid = require('./speaker-identification.js');
const at = require('./audio-tagg.js');
const punct = require('./punctuation.js');
const kws = require('./keyword-spotter.js');
const sd = require('./non-streaming-speaker-diarization.js');
const speech_denoiser = require('./non-streaming-speech-denoiser.js');
const online_speech_denoiser = require('./online-speech-denoiser.js');

module.exports = {
  OnlineRecognizer : streaming_asr.OnlineRecognizer,
  OfflineRecognizer : non_streaming_asr.OfflineRecognizer,
  OfflineTts : non_streaming_tts.OfflineTts,
  GenerationConfig : non_streaming_tts.GenerationConfig,
  readWave : addon.readWave,
  writeWave : addon.writeWave,
  Display : streaming_asr.Display,
  Vad : vad.Vad,
  CircularBuffer : vad.CircularBuffer,
  SpokenLanguageIdentification : slid.SpokenLanguageIdentification,
  SpeakerEmbeddingExtractor : sid.SpeakerEmbeddingExtractor,
  SpeakerEmbeddingManager : sid.SpeakerEmbeddingManager,
  AudioTagging : at.AudioTagging,
  OfflinePunctuation : punct.OfflinePunctuation,
  OnlinePunctuation : punct.OnlinePunctuation,
  KeywordSpotter : kws.KeywordSpotter,
  OfflineSpeakerDiarization : sd.OfflineSpeakerDiarization,
  OfflineSpeechDenoiser : speech_denoiser.OfflineSpeechDenoiser,
  OnlineSpeechDenoiser : online_speech_denoiser.OnlineSpeechDenoiser,
  version : addon.version,
  gitSha1 : addon.gitSha1,
  gitDate : addon.gitDate,
}


================================================
FILE: scripts/node-addon-api/lib/speaker-identification.js
================================================
/** @typedef {import('./types').SpeakerEmbeddingEntry} SpeakerEmbeddingEntry */
/** @typedef {import('./types').SpeakerEmbeddingManagerSearchObj} SpeakerEmbeddingManagerSearchObj */
/** @typedef {import('./types').SpeakerEmbeddingManagerVerifyObj} SpeakerEmbeddingManagerVerifyObj */
/** @typedef {import('./types').SpeakerEmbeddingExtractorConfig} SpeakerEmbeddingExtractorConfig */
/** @typedef {import('./streaming-asr').OnlineStream} OnlineStream */

const addon = require('./addon.js');
const streaming_asr = require('./streaming-asr.js');

/**
 * SpeakerEmbeddingExtractor wraps native speaker embedding extractor.
 */
class SpeakerEmbeddingExtractor {
  /**
   * @param {SpeakerEmbeddingExtractorConfig} config
   */
  constructor(config) {
    this.handle = addon.createSpeakerEmbeddingExtractor(config);
    this.config = config;
    this.dim = addon.speakerEmbeddingExtractorDim(this.handle);
  }

  /**
   * @returns {OnlineStream}
   */
  createStream() {
    return new streaming_asr.OnlineStream(
        addon.speakerEmbeddingExtractorCreateStream(this.handle));
  }

  /**
   * @param {OnlineStream} stream
   * @returns {boolean}
   */
  isReady(stream) {
    return addon.speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
  }

  /**
   * Compute embedding and return a Float32Array
   * @param {OnlineStream} stream
   * @param {boolean} [enableExternalBuffer=true]
   * @returns {Float32Array}
   */
  compute(stream, enableExternalBuffer = true) {
    return addon.speakerEmbeddingExtractorComputeEmbedding(
        this.handle, stream.handle, enableExternalBuffer);
  }
}

/**
 * Flattens an array of Float32Arrays into a single Float32Array.
 * @param {Float32Array[]} arrayList
 * @returns {Float32Array}
 */
function flatten(arrayList) {
  let n = 0;
  for (let i = 0; i < arrayList.length; ++i) {
    n += arrayList[i].length;
  }
  let ans = new Float32Array(n);

  let offset = 0;
  for (let i = 0; i < arrayList.length; ++i) {
    ans.set(arrayList[i], offset);
    offset += arrayList[i].length;
  }
  return ans;
}

/**
 * Manager for speaker embeddings.
 */
class SpeakerEmbeddingManager {
  /**
   * @param {number} dim - The embedding dimension
   */
  constructor(dim) {
    this.handle = addon.createSpeakerEmbeddingManager(dim);
    this.dim = dim;
  }

  /**
   * @param {SpeakerEmbeddingEntry} obj
   * @returns {boolean}
   */
  add(obj) {
    return addon.speakerEmbeddingManagerAdd(this.handle, obj);
  }

  /**
   * @param {{name:string, v: Float32Array[]}} obj
   * @returns {boolean}
   */
  addMulti(obj) {
    const c = {
      name: obj.name,
      vv: flatten(obj.v),
      n: obj.v.length,
    };
    return addon.speakerEmbeddingManagerAddListFlattened(this.handle, c);
  }

  /**
   * @param {string} name
   * @returns {boolean}
   */
  remove(name) {
    return addon.speakerEmbeddingManagerRemove(this.handle, name);
  }

  /**
   * @param {SpeakerEmbeddingManagerSearchObj} obj
   * @returns {string}
   */
  search(obj) {
    return addon.speakerEmbeddingManagerSearch(this.handle, obj);
  }

  /**
   * @param {SpeakerEmbeddingManagerVerifyObj} obj
   * @returns {boolean}
   */
  verify(obj) {
    return addon.speakerEmbeddingManagerVerify(this.handle, obj);
  }

  /**
   * @param {string} name
   * @returns {boolean}
   */
  contains(name) {
    return addon.speakerEmbeddingManagerContains(this.handle, name);
  }

  /** @returns {number} */
  getNumSpeakers() {
    return addon.speakerEmbeddingManagerNumSpeakers(this.handle);
  }

  /** @returns {string[]} */
  getAllSpeakerNames() {
    return addon.speakerEmbeddingManagerGetAllSpeakers(this.handle);
  }
}

module.exports = {
  SpeakerEmbeddingExtractor,
  SpeakerEmbeddingManager,
}


================================================
FILE: scripts/node-addon-api/lib/spoken-language-identification.js
================================================
/** @typedef {import('./types').SpokenLanguageIdentificationConfig} SpokenLanguageIdentificationConfig */
/** @typedef {import('./non-streaming-asr').OfflineStream} OfflineStream */

const addon = require('./addon.js');
const non_streaming_asr = require('./non-streaming-asr.js');

class SpokenLanguageIdentification {
  /**
   * @param {SpokenLanguageIdentificationConfig} config
   */
  constructor(config) {
    this.handle = addon.createSpokenLanguageIdentification(config);
    this.config = config;
  }

  /**
   * @returns {OfflineStream}
   */
  createStream() {
    return new non_streaming_asr.OfflineStream(
        addon.createSpokenLanguageIdentificationOfflineStream(this.handle));
  }

  /**
   * Return a 2-letter language code, e.g. 'en', 'de', 'fr', 'es', 'zh'
   * @param {OfflineStream} stream
   * @returns {string}
   */
  compute(stream) {
    return addon.spokenLanguageIdentificationCompute(
        this.handle, stream.handle);
  }
}

module.exports = {
  SpokenLanguageIdentification,
} 

================================================
FILE: scripts/node-addon-api/lib/streaming-asr.js
================================================
/** @typedef {import('./types').OnlineStreamObject} OnlineStreamObject */
/** @typedef {import('./types').OnlineRecognizerHandle} OnlineRecognizerHandle */
/** @typedef {import('./types').DisplayObject} DisplayObject */
/** @typedef {import('./types').OnlineRecognizerConfig} OnlineRecognizerConfig */
/** @typedef {import('./types').Waveform} Waveform */
/** @typedef {import('./types').OnlineRecognizerResult} OnlineRecognizerResult */

const addon = require('./addon.js');

/**
 * Display helper for printing recognized words.
 */
class Display {
  /**
   * @param {number} maxWordPerline
   */
  constructor(maxWordPerline) {
    this.handle = addon.createDisplay(maxWordPerline);
  }

  /**
   * Print text to display.
   * @param {number} idx
   * @param {string} text
   */
  print(idx, text) {
    addon.print(this.handle, idx, text)
  }
}

/**
 * OnlineStream holds an active online stream handle.
 */
class OnlineStream {
  /**
   * @param {OnlineStreamObject|Object} handle - object with `handle` property
   */
  constructor(handle) {
    this.handle = handle;
  }

  /**
   * Accept waveform data
   * @param {Waveform} obj - { samples: Float32Array, sampleRate: number }
   */
  acceptWaveform(obj) {
    addon.acceptWaveformOnline(this.handle, obj)
  }

  /** Notify the stream input has finished. */
  inputFinished() {
    addon.inputFinished(this.handle)
  }
}

/**
 * OnlineRecognizer wraps native online recognizer.
 */
class OnlineRecognizer {
  /**
   * @param {OnlineRecognizerConfig} config - online recognizer config (see C++ for fields)
   */
  constructor(config) {
    this.handle = addon.createOnlineRecognizer(config);
    this.config = config
  }

  /**
   * Create a new OnlineStream.
   * @returns {OnlineStream}
   */
  createStream() {
    const handle = addon.createOnlineStream(this.handle);
    return new OnlineStream(handle);
  }

  /**
   * Check whether a stream is ready.
   * @param {OnlineStream} stream
   * @returns {boolean}
   */
  isReady(stream) {
    return addon.isOnlineStreamReady(this.handle, stream.handle);
  }

  /**
   * Trigger decoding on a stream.
   * @param {OnlineStream} stream
   */
  decode(stream) {
    addon.decodeOnlineStream(this.handle, stream.handle);
  }

  /**
   * Check endpoint condition for a stream.
   * @param {OnlineStream} stream
   * @returns {boolean}
   */
  isEndpoint(stream) {
    return addon.isEndpoint(this.handle, stream.handle);
  }

  /**
   * Reset a stream.
   * @param {OnlineStream} stream
   */
  reset(stream) {
    addon.reset(this.handle, stream.handle);
  }

  /**
   * Get recognition result for a stream.
   * @param {OnlineStream} stream
   * @returns {OnlineRecognizerResult}
   */
  getResult(stream) {
    const jsonStr =
        addon.getOnlineStreamResultAsJson(this.handle, stream.handle);

    return JSON.parse(jsonStr);
  }
}

module.exports = {
  OnlineRecognizer,
  OnlineStream,
  Display
}


================================================
FILE: scripts/node-addon-api/lib/types.js
================================================
/**
 * Centralized JSDoc typedefs for the Node addon API.
 * These typedefs mirror the shapes produced/consumed by the C++ bindings
 * in `scripts/node-addon-api/src/*` and by the underlying SherpaOnnx C API.
 *
 * Keep these typedefs specialized (no `any`/`unknown`) and concise.
 */

/**
 * Opaque handle types returned by native constructors. These are opaque
 * JavaScript objects backed by native pointers. Do not introspect or
 * mutate their internals; pass them to the API functions as-is.
 *
 * @typedef {Object} OfflineStreamHandle
 * @see src/non-streaming-asr.cc
 */

/**
 * @typedef {Object} OnlineStreamHandle
 * @see src/streaming-asr.cc
 */

/**
 * @typedef {Object} OfflineRecognizerHandle
 * @see src/non-streaming-asr.cc
 */

/**
 * @typedef {Object} OnlineRecognizerHandle
 * @see src/streaming-asr.cc
 */

/**
 * @typedef {Object} DisplayHandle
 * @see src/streaming-asr.cc
 */

/**
 * @typedef {Object} CircularBufferHandle
 * @see src/vad.cc
 */

/**
 * @typedef {Object} VoiceActivityDetectorHandle
 * @see src/vad.cc
 */

/**
 * @typedef {Object} AudioTaggingHandle
 * @see src/audio-tagging.cc
 */

/**
 * @typedef {Object} OfflinePunctuationHandle
 * @see src/offline-punctuation.cc
 */

/**
 * A single audio event returned by AudioTagging.compute().
 * @typedef {Object} AudioEvent
 * @property {string} name - The event name.
 * @property {number} prob - Probability in [0,1].
 * @property {number} index - Index (integer) of the event.
 */

/**
 * AudioTagging specific model config for Zipformer variant
 * @typedef {Object} AudioTaggingZipformerModelConfig
 * @property {string} [model]
 */

/**
 * AudioTagging model config.
 * @typedef {Object} AudioTaggingModelConfig
 * @property {AudioTaggingZipformerModelConfig} [zipformer]
 * @property {string} [ced]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 */

/**
 * AudioTagging configuration passed to constructor.
 * @typedef {Object} AudioTaggingConfig
 * @property {AudioTaggingModelConfig} [model]
 * @property {string} [labels]
 * @property {number} [topK]
 */

/**
 * Waveform input object used by acceptWaveform methods.
 * @typedef {Object} Waveform
 * @property {Float32Array} samples - Float32Array of samples in [-1, 1].
 * @property {number} sampleRate - Sample rate as an integer (e.g., 16000).
 */

/**
 * Feature config used by recognizers and models.
 * @typedef {Object} FeatureConfig
 * @property {number} [sampleRate]
 * @property {number} [featureDim]
 */

/**
 * Silero VAD model config
 * @typedef {Object} SileroVadModelConfig
 * @property {string} [model]
 * @property {number} [threshold]
 * @property {number} [minSilenceDuration]
 * @property {number} [minSpeechDuration]
 * @property {number} [windowSize]
 * @property {number} [maxSpeechDuration]
 */

/**
 * Ten-VAD model config
 * @typedef {Object} TenVadModelConfig
 * @property {string} [model]
 * @property {number} [threshold]
 * @property {number} [minSilenceDuration]
 * @property {number} [minSpeechDuration]
 * @property {number} [windowSize]
 * @property {number} [maxSpeechDuration]
 */

/**
 * Voice activity detector configuration.
 * @typedef {Object} VadConfig
 * @property {SileroVadModelConfig} [sileroVad]
 * @property {TenVadModelConfig} [tenVad]
 * @property {number} [sampleRate]
 * @property {number} [numThreads]
 * @property {string} [provider]
 * @property {boolean|number} [debug]
 */

/**
 * Offline Transducer model config
 * @typedef {Object} OfflineTransducerModelConfig
 * @property {string} [encoder]
 * @property {string} [decoder]
 * @property {string} [joiner]
 */

/**
 * Offline Paraformer model config
 * @typedef {Object} OfflineParaformerModelConfig
 * @property {string} [model]
 */

/**
 * Offline Zipformer CTC model config
 * @typedef {Object} OfflineZipformerCtcModelConfig
 * @property {string} [model]
 */

/**
 * Offline Wenet CTC model config
 * @typedef {Object} OfflineWenetCtcModelConfig
 * @property {string} [model]
 */

/**
 * Offline Omnilingual ASR CTC model config
 * @typedef {Object} OfflineOmnilingualAsrCtcModelConfig
 * @property {string} [model]
 */

/**
 * Offline Med ASR CTC model config
 * @typedef {Object} OfflineMedAsrCtcModelConfig
 * @property {string} [model]
 */

/**
 * Offline Dolphin model config
 * @typedef {Object} OfflineDolphinModelConfig
 * @property {string} [model]
 */

/**
 * Offline NeMo CTC model config
 * @typedef {Object} OfflineNeMoCtcModelConfig
 * @property {string} [model]
 */

/**
 * Offline Canary model config
 * @typedef {Object} OfflineCanaryModelConfig
 * @property {string} [encoder]
 * @property {string} [decoder]
 * @property {string} [srcLang]
 * @property {string} [tgtLang]
 * @property {number} [usePnc]
 */

/**
 * Offline Whisper model config
 * @typedef {Object} OfflineWhisperModelConfig
 * @property {string} [encoder]
 * @property {string} [decoder]
 * @property {string} [language]
 * @property {string} [task]
 * @property {number} [tailPaddings]
 */

/**
 * Offline FireRed ASR model config
 * @typedef {Object} OfflineFireRedAsrModelConfig
 * @property {string} [encoder]
 * @property {string} [decoder]
 */

/**
 * Offline Moonshine model config
 * @typedef {Object} OfflineMoonshineModelConfig
 * @property {string} [preprocessor]
 * @property {string} [encoder]
 * @property {string} [uncachedDecoder]
 * @property {string} [cachedDecoder]
 */

/**
 * Offline TDNN model config
 * @typedef {Object} OfflineTdnnModelConfig
 * @property {string} [model]
 */

/**
 * Offline SenseVoice model config
 * @typedef {Object} OfflineSenseVoiceModelConfig
 * @property {string} [model]
 * @property {string} [language]
 * @property {number} [useInverseTextNormalization]
 */

/**
 * Offline model config.
 * @typedef {Object} OfflineModelConfig
 * @property {OfflineTransducerModelConfig} [transducer]
 * @property {OfflineParaformerModelConfig} [paraformer]
 * @property {OfflineZipformerCtcModelConfig} [zipformerCtc]
 * @property {OfflineWenetCtcModelConfig} [wenetCtc]
 * @property {OfflineOmnilingualAsrCtcModelConfig} [omnilingual]
 * @property {OfflineMedAsrCtcModelConfig} [medasr]
 * @property {OfflineDolphinModelConfig} [dolphin]
 * @property {OfflineNeMoCtcModelConfig} [nemoCtc]
 * @property {OfflineCanaryModelConfig} [canary]
 * @property {OfflineWhisperModelConfig} [whisper]
 * @property {OfflineFireRedAsrModelConfig} [fireRedAsr]
 * @property {OfflineMoonshineModelConfig} [moonshine]
 * @property {OfflineTdnnModelConfig} [tdnn]
 * @property {OfflineSenseVoiceModelConfig} [senseVoice]
 * @property {string} [tokens]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 */

/**
 * Transducer model config
 * @typedef {Object} TransducerModelConfig
 * @property {string} [encoder]
 * @property {string} [decoder]
 * @property {string} [joiner]
 */

/**
 * Paraformer model config
 * @typedef {Object} ParaformerModelConfig
 * @property {string} [encoder]
 * @property {string} [decoder]
 */

/**
 * Zipformer2 CTC model config
 * @typedef {Object} Zipformer2CtcModelConfig
 * @property {string} [model]
 */

/**
 * NeMo CTC model config
 * @typedef {Object} NemoCtcModelConfig
 * @property {string} [model]
 */

/**
 * Tone CTC model config
 * @typedef {Object} ToneCtcModelConfig
 * @property {string} [model]
 */

/**
 * Online model config (subset of C++ `OnlineModelConfig`).
 * @typedef {Object} OnlineModelConfig
 * @property {TransducerModelConfig} [transducer]
 * @property {ParaformerModelConfig} [paraformer]
 * @property {Zipformer2CtcModelConfig} [zipformer2Ctc]
 * @property {NemoCtcModelConfig} [nemoCtc]
 * @property {ToneCtcModelConfig} [toneCtc]
 * @property {string} [tokens]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 * @property {string} [modelType]
 * @property {string} [modelingUnit]
 * @property {string} [bpeVocab]
 * @property {string} [tokensBuf]
 * @property {number} [tokensBufSize]
 */

/**
 * Homophone replacer configuration used both in online and offline recognizers.
 * @typedef {Object} HomophoneReplacerConfig
 * @property {string} [lexicon]
 * @property {string} [ruleFsts]
 */

/**
 * Online recognizer configuration passed to createOnlineRecognizer.
 * @typedef {Object} OnlineRecognizerConfig
 * @property {FeatureConfig} [featConfig]
 * @property {OnlineModelConfig} [modelConfig]
 * @property {HomophoneReplacerConfig} [hr]
 * @property {string} [decodingMethod]
 * @property {number} [maxActivePaths]
 * @property {boolean|number} [enableEndpoint]
 * @property {number} [rule1MinTrailingSilence]
 * @property {number} [rule2MinTrailingSilence]
 * @property {number} [rule3MinUtteranceLength]
 * @property {string} [hotwordsFile]
 * @property {number} [hotwordsScore]
 * @property {string} [ruleFsts]
 * @property {string} [ruleFars]
 * @property {number} [blankPenalty]
 */

/**
 * Offline recognizer config passed to createOfflineRecognizer.
 * @typedef {Object} OfflineRecognizerConfig
 * @property {FeatureConfig} [featConfig]
 * @property {OfflineModelConfig} [modelConfig]
 */

/**
 * Wave object returned by readWave and used by writeWave.
 * @typedef {Object} WaveObject
 * @property {Float32Array} samples - 1-D float32 samples in [-1, 1].
 * @property {number} sampleRate - Sample rate as an integer (e.g., 16000).
 * @see src/wave-reader.cc
 */

/**
 * Speech segment returned by Vad.front().
 * @typedef {Object} SpeechSegment
 * @property {number} start - Start index (int32) of this segment.
 * @property {Float32Array} samples - Float32Array of samples.
 * @see src/vad.cc
 */

/**
 * Audio returned by TTS and speech denoiser.
 * @typedef {Object} GeneratedAudio
 * @property {Float32Array} samples - The generated/denoised audio samples.
 * @property {number} sampleRate - Sample rate in Hz.
 * @see src/non-streaming-tts.cc
 * @see src/non-streaming-speech-denoiser.cc
 */

/**
 * @typedef {Object} GenerationConfig
 * @property {number=} silenceScale
 * @property {number=} speed
 * @property {number=} sid
 * @property {number=} numSteps
 *
 * @property {Float32Array=} referenceAudio
 * @property {number=} referenceSampleRate
 * @property {string=} referenceText
 *
 * @property {{[key: string]: number | string}} [extra]
 */


/**
 * TTS request object passed to generate/generateAsync.
 * @typedef {Object} TtsRequest
 * @property {string} text - Input text to synthesize.
 * @property {number} sid - Speaker id (integer).
 * @property {number} speed - Playback speed (float).
 * @property {boolean} [enableExternalBuffer=true] - Whether to use an external
 *           buffer.
 * @property {GenerationConfig=} generationConfig - Optional
 */

/**
 * Spoken Language ID whisper config
 * @typedef {Object} SpokenLanguageIdentificationWhisperConfig
 * @property {string} [encoder]
 * @property {string} [decoder]
 * @property {number} [tailPaddings]
 */

/**
 * SpokenLanguageIdentification config
 * @typedef {Object} SpokenLanguageIdentificationConfig
 * @property {SpokenLanguageIdentificationWhisperConfig} [whisper]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 */

/**
 * Speaker embedding extractor config
 * @typedef {Object} SpeakerEmbeddingExtractorConfig
 * @property {string} [model]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 */

/**
 * Offline punctuation model config
 * @typedef {Object} OfflinePunctuationModelConfig
 * @property {string} [ctTransformer]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 */

/**
 * Offline punctuation config
 * @typedef {Object} OfflinePunctuationConfig
 * @property {OfflinePunctuationModelConfig} [model]
 */

/**
 * Online punctuation model config
 * @typedef {Object} OnlinePunctuationModelConfig
 * @property {string} [cnnBilstm]
 * @property {string} [bpeVocab]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 */

/**
 * Online punctuation config
 * @typedef {Object} OnlinePunctuationConfig
 * @property {OnlinePunctuationModelConfig} [model]
 */

/**
 * Generic audio processing request used by denoisers/tts generators.
 * @typedef {Object} AudioProcessRequest
 * @property {Float32Array} samples
 * @property {number} sampleRate
 * @property {boolean} [enableExternalBuffer]
 */

/**
 * Offline TTS model configs
 * @typedef {Object} OfflineTtsVitsModelConfig
 * @property {string} [model]
 * @property {string} [lexicon]
 * @property {string} [tokens]
 * @property {string} [dataDir]
 * @property {number} [noiseScale]
 * @property {number} [noiseScaleW]
 * @property {number} [lengthScale]
 */

/**
 * @typedef {Object} OfflineTtsMatchaModelConfig
 * @property {string} [acousticModel]
 * @property {string} [vocoder]
 * @property {string} [lexicon]
 * @property {string} [tokens]
 * @property {string} [dataDir]
 * @property {number} [noiseScale]
 * @property {number} [lengthScale]
 */

/**
 * @typedef {Object} OfflineTtsKokoroModelConfig
 * @property {string} [model]
 * @property {string} [voices]
 * @property {string} [tokens]
 * @property {string} [dataDir]
 * @property {number} [lengthScale]
 * @property {string} [lexicon]
 * @property {string} [lang]
 */

/**
 * @typedef {Object} OfflineTtsKittenModelConfig
 * @property {string} [model]
 * @property {string} [voices]
 * @property {string} [tokens]
 * @property {string} [dataDir]
 * @property {number} [lengthScale]
 */

/**
 * @typedef {Object} OfflineTtsZipvoiceModelConfig
 * @property {string} [tokens]
 * @property {string} [encoder]
 * @property {string} [decoder]
 * @property {string} [vocoder]
 * @property {string} [dataDir]
 * @property {string} [lexicon]
 * @property {number} [featScale]
 * @property {number} [tShift]
 * @property {number} [targetRms]
 * @property {number} [guidanceScale]
 */

/**
 * @typedef {Object} OfflineTtsPocketModelConfig
 * @property {string} [lmFlow]
 * @property {string} [lmMain]
 * @property {string} [encoder]
 * @property {string} [decoder]
 * @property {string} [textConditioner]
 * @property {string} [vocabJson]
 * @property {string} [tokenScoresJson]
 * @property {number} [voiceEmbeddingCacheCapacity]
 */

/**
 * Offline TTS model config
 * @typedef {Object} OfflineTtsModelConfig
 * @property {OfflineTtsVitsModelConfig} [vits]
 * @property {OfflineTtsMatchaModelConfig} [matcha]
 * @property {OfflineTtsKokoroModelConfig} [kokoro]
 * @property {OfflineTtsKittenModelConfig} [kitten]
 * @property {OfflineTtsZipvoiceModelConfig} [zipvoice]
 * @property {OfflineTtsPocketModelConfig} [pocket]
 */

/**
 * Offline TTS configuration (partial, commonly used props).
 * @typedef {Object} OfflineTtsConfig
 * @property {OfflineTtsModelConfig} [model]
 * @property {number} [maxNumSentences]
 * @property {number} [silenceScale]
 * @property {number} [numThreads]
 * @property {string} [provider]
 */

/**
 * Offline Speech Denoiser model config
 * @typedef {Object} OfflineSpeechDenoiserGtcrnModelConfig
 * @property {string} [model]
 */

/**
 * Offline Speech Denoiser model config
 * @typedef {Object} OfflineSpeechDenoiserDpdfNetModelConfig
 * @property {string} [model]
 */

/**
 * Offline Speech Denoiser model config
 * @typedef {Object} OfflineSpeechDenoiserModelConfig
 * @property {OfflineSpeechDenoiserGtcrnModelConfig} [gtcrn]
 * @property {OfflineSpeechDenoiserDpdfNetModelConfig} [dpdfnet]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 */

/**
 * Offline Speech Denoiser configuration (partial).
 * @typedef {Object} OfflineSpeechDenoiserConfig
 * @property {OfflineSpeechDenoiserModelConfig} [model]
 */

/**
 * Online Speech Denoiser configuration (partial).
 * @typedef {Object} OnlineSpeechDenoiserConfig
 * @property {OfflineSpeechDenoiserModelConfig} [model]
 */

/**
 * Offline speaker segmentation (pyannote) model config
 * @typedef {Object} OfflineSpeakerSegmentationPyannoteModelConfig
 * @property {string} [model]
 */

/**
 * Offline speaker segmentation model config
 * @typedef {Object} OfflineSpeakerSegmentationModelConfig
 * @property {OfflineSpeakerSegmentationPyannoteModelConfig} [pyannote]
 * @property {number} [numThreads]
 * @property {boolean|number} [debug]
 * @property {string} [provider]
 */

/**
 * Offline Speaker Diarization configuration (partial).
 * @typedef {Object} OfflineSpeakerDiarizationConfig
 * @property {OfflineSpeakerSegmentationModelConfig} [segmentation]
 * @property {SpeakerEmbeddingExtractorConfig} [embedding]
 * @property {FastClusteringConfig} [clustering]
 * @property {number} [minDurationOn]
 * @property {number} [minDurationOff]
 */

/**
 * Fast clustering configuration used by diarization.
 * @typedef {Object} FastClusteringConfig
 * @property {number} [numClusters]
 * @property {number} [threshold]
 */

/**
 * SpeakerEmbeddingManager add-multi flattened object
 * @typedef {Object} SpeakerEmbeddingManagerAddListFlattenedObj
 * @property {string} name
 * @property {Float32Array} vv
 * @property {number} n
 */

/**
 * SpeakerEmbeddingManager search object
 * @typedef {Object} SpeakerEmbeddingManagerSearchObj
 * @property {Float32Array} v
 * @property {number} threshold
 */

/**
 * SpeakerEmbeddingManager verify object
 * @typedef {Object} SpeakerEmbeddingManagerVerifyObj
 * @property {string} name
 * @property {Float32Array} v
 * @property {number} threshold
 */

/**
 * KeywordSpotter config (partial)
 * @typedef {Object} KeywordSpotterConfig
 * @property {FeatureConfig} [featConfig]
 * @property {OfflineModelConfig} [modelConfig]
 * @property {number} [maxActivePaths]
 * @property {number} [numTrailingBlanks]
 * @property {number} [keywordsScore]
 * @property {number} [keywordsThreshold]
 * @property {string} [keywordsFile]
 */

/**
 * Offline recognition result returned by `getOfflineStreamResultAsJson`.
 * See `OfflineRecognitionResult::AsJsonString()` in C++ for precise fields.
 * @typedef {Object} OfflineRecognizerResult
 * @property {string} lang
 * @property {string} emotion
 * @property {string} event
 * @property {string} text
 * @property {number[]} timestamps
 * @property {number[]} durations
 * @property {string[]} tokens
 * @property {number[]} ys_log_probs
 * @property {number[]} words
 */

/**
 * Online recognition result returned by `getOnlineStreamResultAsJson`.
 * See `OnlineRecognizerResult::AsJsonString()` in C++.
 * @typedef {Object} OnlineRecognizerResult
 * @property {string} text
 * @property {string[]} tokens
 * @property {number[]} timestamps
 * @property {number[]} ys_probs
 * @property {number[]} lm_probs
 * @property {number[]} context_scores
 * @property {number} segment
 * @property {number[]} words
 * @property {number} start_time
 * @property {boolean} is_final
 * @property {boolean} is_eof
 */

/**
 * Keyword spotter result returned by `getKeywordResultAsJson`.
 * @typedef {Object} KeywordResult
 * @property {number} start_time
 * @property {string} keyword
 * @property {number[]} timestamps
 * @property {string[]} tokens
 */

/**
 * Speaker diarization segment returned by `offlineSpeakerDiarizationProcess`.
 * @typedef {Object} SpeakerDiarizationSegment
 * @property {number} start - start time in seconds
 * @property {number} end - end time in seconds
 * @property {number} speaker - speaker id (integer)
 */

/**
 * Speaker embedding entry used by SpeakerEmbeddingManager.add
 * @typedef {Object} SpeakerEmbeddingEntry
 * @property {string} name - speaker name
 * @property {Float32Array} v - embedding vector
 */

/**
 * @typedef {Object} OfflineStreamObject
 * @property {OfflineStreamHandle} handle
 */

/**
 * @typedef {Object} OnlineStreamObject
 * @property {OnlineStreamHandle} handle
 */

/**
 * @typedef {Object} DisplayObject
 * @property {DisplayHandle} handle
 */

// Export typedefs so they can be referenced by require('./types.js')
module.exports = {};


================================================
FILE: scripts/node-addon-api/lib/vad.js
================================================
/** @typedef {import('./types').CircularBufferHandle} CircularBufferHandle */
/** @typedef {import('./types').SpeechSegment} SpeechSegment */
/** @typedef {import('./types').VadConfig} VadConfig */

const addon = require('./addon.js');

/**
 * CircularBuffer stores float32 samples internally.
 */
class CircularBuffer {
  /**
   * @param {number} capacity - capacity in samples (integer)
   */
  constructor(capacity) {
    this.handle = addon.createCircularBuffer(capacity);
  }

  /**
   * Push samples into the buffer.
   * @param {Float32Array} samples
   */
  push(samples) {
    addon.circularBufferPush(this.handle, samples);
  }

  /**
   * Get a slice of samples.
   * @param {number} startIndex
   * @param {number} n
   * @param {boolean} [enableExternalBuffer=true]
   * @returns {Float32Array}
   */
  get(startIndex, n, enableExternalBuffer = true) {
    return addon.circularBufferGet(
        this.handle, startIndex, n, enableExternalBuffer);
  }

  /**
   * Pop n samples from the buffer.
   * @param {number} n
   */
  pop(n) {
    return addon.circularBufferPop(this.handle, n);
  }

  /**
   * Get current size in samples.
   * @returns {number}
   */
  size() {
    return addon.circularBufferSize(this.handle);
  }

  /**
   * Get head index.
   * @returns {number}
   */
  head() {
    return addon.circularBufferHead(this.handle);
  }

  /** Reset the buffer. */
  reset() {
    addon.circularBufferReset(this.handle);
  }
}

/**
 * Voice Activity Detector (VAD).
 */
class Vad {
  /**
   * @param {VadConfig} config
   * @param {number} bufferSizeInSeconds
   */
  constructor(config, bufferSizeInSeconds) {
    this.handle =
        addon.createVoiceActivityDetector(config, bufferSizeInSeconds);
    this.config = config;
  }

  /**
   * Accept raw waveform samples.
   * @param {Float32Array} samples
   */
  acceptWaveform(samples) {
    addon.voiceActivityDetectorAcceptWaveform(this.handle, samples);
  }

  /** @returns {boolean} */
  isEmpty() {
    return addon.voiceActivityDetectorIsEmpty(this.handle);
  }

  /** @returns {boolean} */
  isDetected() {
    return addon.voiceActivityDetectorIsDetected(this.handle);
  }

  /** Pop the earliest detected speech segment. */
  pop() {
    addon.voiceActivityDetectorPop(this.handle);
  }

  /** Clear internal state. */
  clear() {
    addon.voiceActivityDetectorClear(this.handle);
  }

  /**
   * Get the front speech segment.
   * @param {boolean} [enableExternalBuffer=true]
   * @returns {SpeechSegment}
   */
  front(enableExternalBuffer = true) {
    return addon.voiceActivityDetectorFront(this.handle, enableExternalBuffer);
  }

  /** Reset detector state. */
  reset() {
    addon.voiceActivityDetectorReset(this.handle);
  }

  /** Flush pending internal buffer. */
  flush() {
    addon.voiceActivityDetectorFlush(this.handle);
  }
}

module.exports = {
  Vad,
  CircularBuffer,
}


================================================
FILE: scripts/node-addon-api/package.json
================================================
{
  "main": "lib/sherpa-onnx.js",
  "version": "1.0.0",
  "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
  "dependencies": {
    "cmake-js": "^7.3.0",
    "node-addon-api": "^8.3.0",
    "perf_hooks": "*"
  },
  "scripts": {
    "install": "cmake-js compile --log-level verbose",
    "postinstall": "npm run typecheck",
    "test": "node --napi-modules ./test/test_binding.js",
    "typecheck": "tsc"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
  },
  "keywords": [
    "speech to text",
    "text to speech",
    "transcription",
    "real-time speech recognition",
    "without internet connection",
    "locally",
    "local",
    "embedded systems",
    "open source",
    "diarization",
    "speaker diarization",
    "speaker recognition",
    "speaker",
    "speaker segmentation",
    "speaker verification",
    "spoken language identification",
    "sherpa",
    "zipformer",
    "asr",
    "tts",
    "stt",
    "c++",
    "onnxruntime",
    "onnx",
    "ai",
    "next-gen kaldi",
    "offline",
    "privacy",
    "open source",
    "streaming speech recognition",
    "speech",
    "recognition",
    "vad",
    "node-addon-api",
    "speaker id",
    "language id"
  ],
  "author": "The next-gen Kaldi team",
  "license": "Apache-2.0",
  "gypfile": true,
  "name": "sherpa-onnx-node-addon-api",
  "bugs": {
    "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
  },
  "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
  "devDependencies": {
    "@types/node": "^24.10.4",
    "typescript": "^5.9.3"
  }
}


================================================
FILE: scripts/node-addon-api/test/test_asr_streaming_transducer.js
================================================
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('../lib/sherpa-onnx.js');
const performance = require('perf_hooks').performance;

// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'transducer': {
      'encoder':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx',
      'decoder':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
      'joiner':
          './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx',
    },
    'tokens':
        './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
    'modelType': 'zipformer',
  }
};

const waveFilename =
    './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';

const recognizer = new sherpa_onnx.OnlineRecognizer(config);
console.log('Started')
let start = performance.now();
const stream = recognizer.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({samples: wave.samples, sampleRate: wave.sampleRate});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
}
result = recognizer.getResult(stream)
let stop = performance.now();
console.log('Done')

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log('RTF', real_time_factor.toFixed(3))
console.log('result', result.text)


================================================
FILE: scripts/node-addon-api/test/test_binding.js
================================================
const sherpa_onnx = require('../lib/sherpa-onnx.js');
console.log(sherpa_onnx)

console.log('Tests passed- everything looks OK!');


================================================
FILE: scripts/node-addon-api/tsconfig.json
================================================
{
  "compilerOptions": {
    "allowJs": true,
    "checkJs": true,
    "noEmit": true,
    "skipLibCheck": true,
    "esModuleInterop": true,
    "module": "commonjs",
    "target": "ES2019",
    "lib": ["ES2020"],
    "types": ["node"]
  },
  "include": ["lib/**/*.js"]
}


================================================
FILE: scripts/nodejs/README.md
================================================
# Introduction

Text-to-speech and speech-to-text with [Next-gen Kaldi](https://github.com/k2-fsa/).

It processes everything locally without accessing the Internet.

Please refer to
https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples
for examples.

You need Node >= 18 for this package.


================================================
FILE: scripts/nodejs/index.js
================================================
// Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
'use strict'

const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')();
const sherpa_onnx_asr = require('./sherpa-onnx-asr.js');
const sherpa_onnx_tts = require('./sherpa-onnx-tts.js');
const sherpa_onnx_kws = require('./sherpa-onnx-kws.js');
const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
const sherpa_onnx_speaker_diarization =
    require('./sherpa-onnx-speaker-diarization.js');
const sherpa_onnx_speech_enhancement =
    require('./sherpa-onnx-speech-enhancement.js');


function createOnlineRecognizer(config) {
  return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
}

function createOfflineRecognizer(config) {
  return new sherpa_onnx_asr.OfflineRecognizer(config, wasmModule);
}

function createOfflineTts(config) {
  return sherpa_onnx_tts.createOfflineTts(wasmModule, config);
}

function createKws(config) {
  return sherpa_onnx_kws.createKws(wasmModule, config);
}

function createCircularBuffer(capacity) {
  return new sherpa_onnx_vad.CircularBuffer(capacity, wasmModule);
}

function createVad(config) {
  return sherpa_onnx_vad.createVad(wasmModule, config);
}

function createOfflineSpeakerDiarization(config) {
  return sherpa_onnx_speaker_diarization.createOfflineSpeakerDiarization(
      wasmModule, config);
}

function readWave(filename) {
  return sherpa_onnx_wave.readWave(filename, wasmModule);
}

function writeWave(filename, data) {
  sherpa_onnx_wave.writeWave(filename, data, wasmModule);
}

function readWaveFromBinaryData(uint8Array) {
  return sherpa_onnx_wave.readWaveFromBinaryData(uint8Array, wasmModule);
}

function createOfflineSpeechDenoiser(config) {
  return sherpa_onnx_speech_enhancement.createOfflineSpeechDenoiser(
      wasmModule, config);
}

function createOnlineSpeechDenoiser(config) {
  return sherpa_onnx_speech_enhancement.createOnlineSpeechDenoiser(
      wasmModule, config);
}

function getVersion() {
  const v = wasmModule._SherpaOnnxGetVersionStr();
  return wasmModule.UTF8ToString(v);
}

function getGitSha1() {
  const v = wasmModule._SherpaOnnxGetGitSha1();
  return wasmModule.UTF8ToString(v);
}

function getGitDate() {
  const v = wasmModule._SherpaOnnxGetGitDate();
  return wasmModule.UTF8ToString(v);
}

// Note: online means streaming and offline means non-streaming here.
// Both of them don't require internet connection.
module.exports = {
  createOnlineRecognizer,
  createOfflineRecognizer,
  createOfflineTts,
  createKws,
  readWave,
  readWaveFromBinaryData,
  writeWave,
  createCircularBuffer,
  createVad,
  createOfflineSpeakerDiarization,
  createOfflineSpeechDenoiser,
  createOnlineSpeechDenoiser,
  version: getVersion(),
  gitSha1: getGitSha1(),
  gitDate: getGitDate(),
};


================================================
FILE: scripts/nodejs/package.json
================================================
{
  "name": "sherpa-onnx",
  "version": "SHERPA_ONNX_VERSION",
  "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
  },
  "keywords": [
    "speech to text",
    "text to speech",
    "transcription",
    "real-time speech recognition",
    "without internet connection",
    "embedded systems",
    "open source",
    "zipformer",
    "asr",
    "tts",
    "stt",
    "c++",
    "onnxruntime",
    "onnx",
    "ai",
    "next-gen kaldi",
    "offline",
    "privacy",
    "open source",
    "streaming speech recognition",
    "speech",
    "recognition",
    "WebAssembly",
    "wasm",
    "speech enhancement",
    "denoising"
  ],
  "author": "The next-gen Kaldi team",
  "license": "Apache-2.0",
  "bugs": {
    "url": "https://github.com/k2-fsa/sherpa-onnx/issues"
  },
  "homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
  "dependencies": {
  }
}


================================================
FILE: scripts/omnilingual-asr/README.md
================================================
# Introduction

This folder contains script to export
https://github.com/facebookresearch/omnilingual-asr
to sherpa-onnx

See
https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/export-omnilingual-asr-to-onnx.yaml
for usage.

```
num_frames = round(num_samples / 318 - 1.5)
num_samples = round(318 * num_frames + 477)

or
num_frames = round(num_samples / 320)

```

20ms per frame


================================================
FILE: scripts/omnilingual-asr/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from typing import Dict

import onnx
import torch
from fairseq2.nn.batch_layout import BatchLayout
from omnilingual_asr.models.inference.pipeline import ASRInferencePipeline
from onnxruntime.quantization import QuantType, quantize_dynamic


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "--model-card",
        type=str,
        required=True,
        choices=[
            "omniASR_CTC_300M",
            "omniASR_CTC_300M_v2",
            "omniASR_CTC_1B",
            "omniASR_CTC_1B_v2",
        ],
        help="The model card to export.",
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, str], model_card: str):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    if "300M" in model_card:
        onnx.save(model, filename)
    else:
        external_filename = filename.split(".onnx")[0]
        onnx.save(
            model,
            filename,
            save_as_external_data=True,
            all_tensors_to_one_file=True,
            location=external_filename + ".weights",
        )


class ModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, x):
        """
        Args:
          x: (N, num_samples), float32
        """
        batch_layout = BatchLayout(shape=x.shape, seq_lens=[x.shape[1]])
        logits, _ = self.model(x, batch_layout)
        return logits


@torch.no_grad()
def main():
    args = get_args()
    print(vars(args))
    pipeline = ASRInferencePipeline(
        model_card=args.model_card,
        device="cpu",
        dtype=torch.float32,
    )

    vocab_size = pipeline.tokenizer._model.vocabulary_size

    with open("tokens.txt", "w") as f:
        for i in range(pipeline.tokenizer._model.vocabulary_size):
            f.write(f"{pipeline.tokenizer._model.index_to_token(i)} {i}\n")

    print("saved to tokens.txt")

    wrapper = ModelWrapper(pipeline.model)
    wrapper.eval()

    x = torch.rand(1, 16000 * 10)
    torch.onnx.export(
        wrapper,
        x,
        "model.onnx",
        opset_version=14,
        input_names=["x"],
        output_names=["logits"],
        dynamic_axes={
            "x": {0: "N", 1: "num_samples"},
            "logits": {0: "N", 1: "num_frames"},
        },
    )

    meta_data = {
        "vocab_size": vocab_size,
        "model_type": "omnilingual-asr",
        "version": "1",
        "sample_rate": 16000,
        "model_author": "facebookresearch",
        "url": "https://github.com/facebookresearch/omnilingual-asr",
        "comment": "300M-CTC",
    }

    add_meta_data("model.onnx", meta_data, args.model_card)
    print("saved to model.onnx")

    quantize_dynamic(
        model_input="./model.onnx",
        model_output="./model.int8.onnx",
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QUInt8,
    )
    print("saved to model.int8.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/omnilingual-asr/test.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import time

import numpy as np
import onnxruntime as ort
import soundfile as sf


def display(sess):
    print("==========Input==========")
    for i in sess.get_inputs():
        print(i)
    print("==========Output==========")
    for i in sess.get_outputs():
        print(i)


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.model = ort.InferenceSession(
            filename,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )
        display(self.model)

    def __call__(self, x: np.ndarray):
        logits = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )[0]
        # [batch_size, T, vocab_size]
        return logits


def load_tokens():
    id2token = dict()
    with open("./tokens.txt", encoding="utf-8") as f:
        for line in f:
            fields = line.split()
            if len(fields) == 1:
                id2token[int(fields[0])] = " "
            else:
                t, idx = fields
                id2token[int(idx)] = t
    return id2token


def load_audio(filename):
    samples, sr = sf.read(filename, always_2d=True, dtype="float32")
    samples = samples[:, 0]  # only use the first channel
    if sr != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sr, target_sr=16000)
    if len(samples) / 16000 > 40:
        raise ValueError(f"{filename} is too long. Support at most 40 seconds")

    mean = np.mean(samples, axis=0, keepdims=True)
    var = np.var(samples, axis=0, keepdims=True)

    eps = 1e-5
    return (samples - mean) / np.sqrt(var + eps)


def test(filename, wav_file_list, num_iter=1):
    id2token = load_tokens()
    model = OnnxModel(filename)

    for it in range(num_iter):
        for wav in wav_file_list:
            print(f"---test {filename} with {wav}----iter---{it}")
            start = time.time()
            samples = load_audio(wav)

            logits = model(samples[None])
            ids = logits[0].argmax(axis=-1)
            ans = []
            prev = -1
            blank = 0
            for i in ids:
                if i != blank and i != prev:
                    ans.append(i)
                prev = i

            words = [id2token[k] for k in ans]
            end = time.time()
            elapsed_seconds = end - start
            audio_duration = samples.shape[0] / 16000
            real_time_factor = elapsed_seconds / audio_duration

            print("---> text is----", "".join(words))
            print(f"RTF: {real_time_factor}")
            print()


def main():
    wav_file_list = ["./en.wav", "./de.wav", "./es.wav", "./fr.wav"]
    test("./model.onnx", wav_file_list)

    test("./model.int8.onnx", wav_file_list)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/paraformer/.gitignore
================================================
seg_dict
tokens.json


================================================
FILE: scripts/paraformer/ascend-npu/export_decoder_onnx.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import torch

from export_encoder_onnx import load_model


@torch.no_grad()
def main():
    print("loading model")
    model = load_model()

    encoder_out = torch.randn(1, 100, 512, dtype=torch.float32)
    acoustic_embedding = torch.randn(1, 50, 512, dtype=torch.float32)

    opset_version = 14
    filename = "decoder.onnx"
    torch.onnx.export(
        model.decoder,
        (encoder_out, acoustic_embedding),
        filename,
        opset_version=opset_version,
        input_names=["encoder_out", "acoustic_embedding"],
        output_names=["decoder_out"],
        dynamic_axes={
            "encoder_out": {1: "T"},
            "acoustic_embedding": {1: "num_tokens"},
            "decoder_out": {1: "num_tokens"},
        },
    )
    print(f"Saved to {filename}")


if __name__ == "__main__":
    torch.manual_seed(20251008)
    main()


================================================
FILE: scripts/paraformer/ascend-npu/export_encoder_onnx.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

from typing import List, Tuple

import torch
import yaml

from torch_model import Paraformer


def load_cmvn(filename) -> Tuple[List[float], List[float]]:
    neg_mean = None
    inv_stddev = None

    with open(filename) as f:
        for line in f:
            if not line.startswith("<LearnRateCoef>"):
                continue
            t = line.split()[3:-1]

            if neg_mean is None:
                neg_mean = list(map(lambda x: float(x), t))
            else:
                inv_stddev = list(map(lambda x: float(x), t))

    return neg_mean, inv_stddev


def load_model():
    with open("./config.yaml", "r", encoding="utf-8") as f:
        config = yaml.safe_load(f)

    print("creating model")

    neg_mean, inv_stddev = load_cmvn("./am.mvn")

    neg_mean = torch.tensor(neg_mean, dtype=torch.float32)
    inv_stddev = torch.tensor(inv_stddev, dtype=torch.float32)

    m = Paraformer(
        neg_mean=neg_mean,
        inv_stddev=inv_stddev,
        input_size=560,
        vocab_size=8404,
        encoder_conf=config["encoder_conf"],
        decoder_conf=config["decoder_conf"],
        predictor_conf=config["predictor_conf"],
    )
    m.eval()

    print("loading state dict")
    state_dict = torch.load("./model_state_dict.pt", map_location="cpu")
    if "state_dict" in state_dict:
        state_dict = state_dict["state_dict"]

    m.load_state_dict(state_dict)
    del state_dict

    return m


@torch.no_grad()
def main():
    print("loading model")
    model = load_model()

    x = torch.randn(1, 100, 560, dtype=torch.float32)

    opset_version = 14
    filename = "encoder.onnx"
    torch.onnx.export(
        model.encoder,
        x,
        filename,
        opset_version=opset_version,
        input_names=["x"],
        output_names=["encoder_out"],
        dynamic_axes={
            "x": {1: "T"},
            "encoder_out": {1: "T"},
        },
    )

    print(f"Saved to {filename}")


if __name__ == "__main__":
    torch.manual_seed(20251013)
    main()


================================================
FILE: scripts/paraformer/ascend-npu/export_predictor_onnx.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import torch

from export_encoder_onnx import load_model
from torch_model import CifPredictorV2

if __name__ == "__main__":

    def modified_predictor_forward(self: CifPredictorV2, hidden: torch.Tensor):
        h = hidden
        context = h.transpose(1, 2)
        queries = self.pad(context)
        output = torch.relu(self.cif_conv1d(queries))
        output = output.transpose(1, 2)

        output = self.cif_output(output)
        alphas = torch.sigmoid(output)
        alphas = torch.nn.functional.relu(
            alphas * self.smooth_factor - self.noise_threshold
        )

        alphas = alphas.squeeze(-1)

        return alphas

    CifPredictorV2.forward = modified_predictor_forward


@torch.no_grad()
def main():
    print("loading model")
    model = load_model()

    x = torch.randn(1, 100, 512, dtype=torch.float32)

    opset_version = 14
    filename = "predictor.onnx"
    torch.onnx.export(
        model.predictor,
        x,
        filename,
        opset_version=opset_version,
        input_names=["encoder_out"],
        output_names=["alphas"],
        dynamic_axes={
            "encoder_out": {1: "T"},
            "alphas": {1: "T"},
            },
    )
    print(f"Saved to {filename}")


if __name__ == "__main__":
    torch.manual_seed(20251008)
    main()


================================================
FILE: scripts/paraformer/ascend-npu/test_om.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import kaldi_native_fbank as knf
import librosa
import numpy as np
from ais_bench.infer.interface import InferSession


def compute_feat(filename):
    sample_rate = 16000
    samples, _ = librosa.load(filename, sr=sample_rate)
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = False
    opts.frame_opts.samp_freq = sample_rate
    opts.mel_opts.num_bins = 80

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(sample_rate, (samples * 32768).tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )
    assert features.data.contiguous is True
    assert features.dtype == np.float32, features.dtype
    print("features sum", features.sum(), features.shape)

    window_size = 7  # lfr_m
    window_shift = 6  # lfr_n

    T = (features.shape[0] - window_size) // window_shift + 1
    features = np.lib.stride_tricks.as_strided(
        features,
        shape=(T, features.shape[1] * window_size),
        strides=((window_shift * features.shape[1]) * 4, 4),
    )
    return np.copy(features)


def load_tokens():
    ans = dict()
    i = 0
    with open("tokens.txt", encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


class OmModel:
    def __init__(self):
        print("init encoder")
        self.encoder = InferSession(device_id=0, model_path="./encoder.om", debug=False)
        self.decoder = InferSession(device_id=1, model_path="./decoder.om", debug=False)
        self.predictor = InferSession(
            device_id=0, model_path="./predictor.om", debug=False
        )

        print("---encoder---")
        for i in self.encoder.get_inputs():
            print(i.name, i.datatype, i.shape)

        print("-----")

        for i in self.encoder.get_outputs():
            print(i.name, i.datatype, i.shape)

        print("---decoder---")
        for i in self.decoder.get_inputs():
            print(i.name, i.datatype, i.shape)

        print("-----")

        for i in self.decoder.get_outputs():
            print(i.name, i.datatype, i.shape)

        print("---predictor---")
        for i in self.predictor.get_inputs():
            print(i.name, i.datatype, i.shape)

        print("-----")

        for i in self.predictor.get_outputs():
            print(i.name, i.datatype, i.shape)

    def run_encoder(self, features):
        encoder_out = self.encoder.infer([features], mode="dymshape")[0]
        return encoder_out

    def run_predictor(self, encoder_out):
        alphas = self.predictor.infer([encoder_out], mode="dymshape")[0]
        return alphas

    def run_decoder(self, encoder_out, acoustic_embedding):
        decoder_out = self.decoder.infer(
            [encoder_out, acoustic_embedding], mode="dymshape"
        )[0]
        return decoder_out


def get_acoustic_embedding(alpha: np.array, hidden: np.array):
    """
    Args:
      alpha: (T,)
      hidden: (T, C)
    Returns:
      acoustic_embeds: (num_tokens, C)
    """
    alpha = alpha.tolist()
    acc = 0

    embeddings = []
    cur_embedding = np.zeros((hidden.shape[1],), dtype=np.float32)

    for i, w in enumerate(alpha):
        acc += w
        if acc >= 1:
            overflow = acc - 1
            remain = w - overflow
            cur_embedding += remain * hidden[i]
            embeddings.append(cur_embedding)

            cur_embedding = overflow * hidden[i]
            acc = overflow
        else:
            cur_embedding += w * hidden[i]

    if len(embeddings) == 0:
        raise ValueError("No speech in the audio file")

    embeddings = np.array(embeddings)
    return embeddings


def main():
    features = compute_feat("./test_wavs/1.wav")
    print("here", features.shape, features.shape[0] > 83)

    print("features.shape", features.shape)

    print("sum", features.sum(), features.mean())

    model = OmModel()

    encoder_out = model.run_encoder(features[None])
    print("encoder_out.shape", encoder_out.shape)
    print("encoder_out.sum", encoder_out.sum(), encoder_out.mean())

    alpha = model.run_predictor(encoder_out)
    print("alpha.shape", alpha.shape)
    print("alpha.sum()", alpha.sum(), alpha.mean())

    acoustic_embedding = get_acoustic_embedding(alpha[0], encoder_out[0])
    print("acoustic_embedding.shape", acoustic_embedding.shape)
    num_tokens = acoustic_embedding.shape[0]
    print("num_tokens", num_tokens)

    print("acoustic_embedding.sum", acoustic_embedding.sum(), acoustic_embedding.mean())

    decoder_out = model.run_decoder(encoder_out, acoustic_embedding[None])
    print("decoder_out", decoder_out.shape)
    print("decoder_out.sum", decoder_out.sum(), decoder_out.mean())
    yseq = decoder_out[0, :num_tokens].argmax(axis=-1).tolist()
    print(yseq, "-->", len(yseq))

    tokens = load_tokens()
    words = [tokens[i] for i in yseq if i not in (1, 2)]
    print(words)
    text = "".join(words)
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/paraformer/qnn/.gitignore
================================================
*.raw
*-list.txt


================================================
FILE: scripts/paraformer/qnn/convert_decoder.sh
================================================
#!/usr/bin/env bash

if [ -z $t ]; then
  echo "Please run export t=num_input_seconds"
  exit 1
fi

if [ -z $soc ]; then
  echo "Please run export soc=SM8850, etc."
  exit 1
fi

if [ -z $QNN_SDK_ROOT ]; then
  echo "Please run setup QNN first"
  exit 1
fi

echo "Export to onnx with num_seconds $t"

python3 ./export_decoder_onnx.py --input-len-in-seconds $t --opset-version 17 --float-mask 0

ls -lh decoder-*.onnx

python3 ../../pyannote/segmentation/show-onnx.py --filename ./decoder-$t-seconds.onnx

echo "Generate test data"

python3 ./generate_decoder_data.py --input-len-in-seconds $t

ls -lh decoder-*

echo "---"
cat ./decoder-input-list.txt
echo "---"

echo "Convert onnx to qnn"


qnn-onnx-converter \
  --input_network ./decoder-$t-seconds.onnx \
  --output_path ./decoder-$t-seconds-quantized \
  --input_list ./decoder-input-list.txt \
  --use_native_input_files  \
  --input_dtype encoder_out float32 \
  --input_dtype acoustic_embedding float32 \
  --input_dtype mask int32 \
  --act_bitwidth 16 \
  --bias_bitwidth 32

  # Note(fangjun): It throws an error if we specify the layout for decoder inputs.
  # --input_layout encoder_out NTF

ls -lh

mv -v decoder-$t-seconds-quantized decoder-$t-seconds-quantized.cpp

python3 ../../qnn/generate_config.py \
    --soc $soc \
    --graph-name "decoder_${t}_seconds_quantized" \
    --output-dir ./my-config-3 \
    --qnn-sdk-root $QNN_SDK_ROOT

ls -lh my-config-3

head -n100 ./my-config-3/*.json

python3 "${QNN_SDK_ROOT}/bin/x86_64-linux-clang/qnn-model-lib-generator" \
    -c "decoder-$t-seconds-quantized.cpp" \
    -b "decoder-$t-seconds-quantized.bin" \
    -o model_libs
    # -t x86_64-linux-clang \

ls -lh model_libs/x86_64-linux-clang/

$QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-context-binary-generator \
  --backend $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtp.so \
  --model ./model_libs/x86_64-linux-clang/libdecoder-${t}-seconds-quantized.so \
  --output_dir ./binary \
  --binary_file decoder \
  --config_file ./my-config-3/htp_backend_extensions.json

ls -lh binary

echo "Finish exporting decoder"


================================================
FILE: scripts/paraformer/qnn/convert_encoder.sh
================================================
#!/usr/bin/env bash

if [ -z $t ]; then
  echo "Please run export t=num_input_seconds"
  exit 1
fi

if [ -z $soc ]; then
  echo "Please run export soc=SM8850, etc."
  exit 1
fi

if [ -z $QNN_SDK_ROOT ]; then
  echo "Please run setup QNN first"
  exit 1
fi

echo "Export to onnx with num_seconds $t"

python3 ./export_encoder_onnx.py --input-len-in-seconds $t --opset-version 17

ls -lh encoder-*.onnx

python3 ../../pyannote/segmentation/show-onnx.py --filename ./encoder-$t-seconds.onnx

echo "Generate test data"

python3 ./generate_encoder_data.py --input-len-in-seconds $t

ls -lh encoder-*

echo "---"
cat ./encoder-input-list.txt
echo "---"

echo "Convert onnx to qnn"


qnn-onnx-converter \
  --input_network ./encoder-$t-seconds.onnx \
  --output_path ./encoder-$t-seconds-quantized \
  --out_node encoder_out \
  --input_list ./encoder-input-list.txt \
  --use_native_input_files  \
  --input_dtype x float32 \
  --act_bitwidth 16 \
  --bias_bitwidth 32 \
  --input_layout x NTF

ls -lh

mv -v encoder-$t-seconds-quantized encoder-$t-seconds-quantized.cpp

python3 ../../qnn/generate_config.py \
    --soc $soc \
    --graph-name "encoder_${t}_seconds_quantized" \
    --output-dir ./my-config \
    --qnn-sdk-root $QNN_SDK_ROOT

ls -lh my-config

head -n100 ./my-config/*.json

python3 "${QNN_SDK_ROOT}/bin/x86_64-linux-clang/qnn-model-lib-generator" \
    -c "encoder-$t-seconds-quantized.cpp" \
    -b "encoder-$t-seconds-quantized.bin" \
    -o model_libs
    # -t x86_64-linux-clang \

ls -lh model_libs/x86_64-linux-clang/

$QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-context-binary-generator \
  --backend $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtp.so \
  --model ./model_libs/x86_64-linux-clang/libencoder-${t}-seconds-quantized.so \
  --output_dir ./binary \
  --binary_file encoder \
  --config_file ./my-config/htp_backend_extensions.json

ls -lh binary

echo "Finish exporting encoder"


================================================
FILE: scripts/paraformer/qnn/convert_predictor.sh
================================================
#!/usr/bin/env bash

if [ -z $t ]; then
  echo "Please run export t=num_input_seconds"
  exit 1
fi

if [ -z $soc ]; then
  echo "Please run export soc=SM8850, etc."
  exit 1
fi

if [ -z $QNN_SDK_ROOT ]; then
  echo "Please run setup QNN first"
  exit 1
fi

echo "Export to onnx with num_seconds $t"

python3 ./export_predictor_onnx.py --input-len-in-seconds $t --opset-version 17

ls -lh predictor-*.onnx

python3 ../../pyannote/segmentation/show-onnx.py --filename ./predictor-$t-seconds.onnx

echo "Generate test data"

python3 ./generate_predictor_data.py --input-len-in-seconds $t

ls -lh predictor-*

echo "---"
cat ./predictor-input-list.txt
echo "---"

echo "Convert onnx to qnn"


qnn-onnx-converter \
  --input_network ./predictor-$t-seconds.onnx \
  --output_path ./predictor-$t-seconds-quantized \
  --input_list ./predictor-input-list.txt \
  --use_native_input_files  \
  --input_dtype encoder_out float32 \
  --act_bitwidth 16 \
  --bias_bitwidth 32

  # Note(fangjun): It throws an error if we specify the layout for predictor input.
  # --input_layout encoder_out NTF

ls -lh

mv -v predictor-$t-seconds-quantized predictor-$t-seconds-quantized.cpp

python3 ../../qnn/generate_config.py \
    --soc $soc \
    --graph-name "predictor_${t}_seconds_quantized" \
    --output-dir ./my-config-2 \
    --qnn-sdk-root $QNN_SDK_ROOT

ls -lh my-config-2

head -n100 ./my-config-2/*.json

python3 "${QNN_SDK_ROOT}/bin/x86_64-linux-clang/qnn-model-lib-generator" \
    -c "predictor-$t-seconds-quantized.cpp" \
    -b "predictor-$t-seconds-quantized.bin" \
    -o model_libs
    # -t x86_64-linux-clang \

ls -lh model_libs/x86_64-linux-clang/

$QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-context-binary-generator \
  --backend $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtp.so \
  --model ./model_libs/x86_64-linux-clang/libpredictor-${t}-seconds-quantized.so \
  --output_dir ./binary \
  --binary_file predictor \
  --config_file ./my-config-2/htp_backend_extensions.json

ls -lh binary

echo "Finish exporting predictor"


================================================
FILE: scripts/paraformer/qnn/generate_decoder_data.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import glob
from pathlib import Path

import numpy as np
import torch

from export_encoder_onnx import get_args, get_num_input_frames, load_model
from export_predictor_onnx import modified_predictor_forward
from test_onnx import compute_feat, get_acoustic_embedding
from torch_model import CifPredictorV2

CifPredictorV2.forward = modified_predictor_forward


def pad(features, max_len):
    if features.shape[0] > max_len:
        return features[:max_len]
    elif features.shape[0] < max_len:
        features = np.pad(
            features,
            ((0, max_len - features.shape[0]), (0, 0)),
            mode="constant",
            constant_values=0,
        )
    return features


@torch.no_grad()
def main():
    args = get_args()
    print(vars(args))

    input_len_in_seconds = int(args.input_len_in_seconds)
    num_input_frames = get_num_input_frames(input_len_in_seconds)

    wav_files = glob.glob("*.wav")

    model = load_model()

    name_list = []
    for w in wav_files:
        f = compute_feat(w)
        print(w, f.shape)
        f = pad(f, num_input_frames)
        f = f[None]
        print(f.shape)

        f = torch.from_numpy(f)

        encoder_out = model.encoder(f)
        alpha = model.predictor(encoder_out)

        acoustic_embedding = get_acoustic_embedding(
            alpha[0].numpy(), encoder_out[0].numpy()
        )
        acoustic_embedding = torch.from_numpy(acoustic_embedding[None])
        num_tokens = acoustic_embedding.shape[1]

        acoustic_embedding = torch.nn.functional.pad(
            acoustic_embedding,
            (0, 0, 0, encoder_out.shape[1] - num_tokens),
            "constant",
            0,
        )

        mask = torch.zeros(1, encoder_out.shape[1], dtype=torch.int32)

        mask[0, :num_tokens] = 1

        # NOTE(Fangjun): We have to transpose the data since QNN expects
        # (N, C, T) for the decoder model
        # Not sure why it has such a requirement.

        encoder_out = encoder_out.permute(0, 2, 1).clone().numpy()
        acoustic_embedding = acoustic_embedding.permute(0, 2, 1).clone().numpy()

        print("inputs: ", encoder_out.shape, acoustic_embedding.shape, mask.shape)

        name = Path(w).stem

        first = f"decoder-input-{name}-0.raw"
        second = f"decoder-input-{name}-1.raw"
        third = f"decoder-input-{name}-2.raw"
        encoder_out.tofile(first)
        acoustic_embedding.tofile(second)
        mask.numpy().tofile(third)

        name_list.append((first, second, third))

    with open("decoder-input-list.txt", "w") as f:
        for first, second, third in name_list:
            f.write(f"{first} {second} {third}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/paraformer/qnn/generate_encoder_data.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import glob
from pathlib import Path

import numpy as np

from export_encoder_onnx import get_args, get_num_input_frames
from test_onnx import compute_feat


def pad(features, max_len):
    if features.shape[0] > max_len:
        return features[:max_len]
    elif features.shape[0] < max_len:
        features = np.pad(
            features,
            ((0, max_len - features.shape[0]), (0, 0)),
            mode="constant",
            constant_values=0,
        )
    return features


def main():
    args = get_args()
    print(vars(args))

    input_len_in_seconds = int(args.input_len_in_seconds)
    num_input_frames = get_num_input_frames(input_len_in_seconds)

    wav_files = glob.glob("*.wav")
    features_name = []
    for w in wav_files:
        f = compute_feat(w)
        print(w, f.shape)
        f = pad(f, num_input_frames)
        print(f.shape)
        print()
        name = Path(w).stem

        s = f"encoder-input-{name}.raw"
        f.tofile(s)
        features_name.append(s)

    with open("encoder-input-list.txt", "w") as f:
        for line in features_name:
            f.write(f"{line}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/paraformer/qnn/generate_predictor_data.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import glob
from pathlib import Path

import numpy as np
import torch

from export_encoder_onnx import get_args, get_num_input_frames, load_model
from export_predictor_onnx import modified_predictor_forward
from test_onnx import compute_feat
from torch_model import CifPredictorV2

CifPredictorV2.forward = modified_predictor_forward


def pad(features, max_len):
    if features.shape[0] > max_len:
        return features[:max_len]
    elif features.shape[0] < max_len:
        features = np.pad(
            features,
            ((0, max_len - features.shape[0]), (0, 0)),
            mode="constant",
            constant_values=0,
        )
    return features


@torch.no_grad()
def main():
    args = get_args()
    print(vars(args))

    input_len_in_seconds = int(args.input_len_in_seconds)
    num_input_frames = get_num_input_frames(input_len_in_seconds)

    wav_files = glob.glob("*.wav")

    model = load_model()

    name_list = []
    for w in wav_files:
        f = compute_feat(w)
        print(w, f.shape)
        f = pad(f, num_input_frames)
        f = f[None]
        print(f.shape)

        f = torch.from_numpy(f)

        encoder_out = model.encoder(f)

        # NOTE(Fangjun): We have to transpose the data since QNN expects
        # (N, C, T) for the predictor model
        # Not sure why it has such a requirement.

        encoder_out = encoder_out.transpose(1, 2).clone().numpy()

        print("encoder_out", encoder_out.shape)

        name = Path(w).stem

        s = f"predictor-input-{name}.raw"
        encoder_out.tofile(s)
        name_list.append(s)
        print(encoder_out.shape)

    with open("predictor-input-list.txt", "w") as f:
        for line in name_list:
            f.write(f"{line}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/paraformer/qnn/test_qnn.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import numpy as np
import torch

from export_encoder_onnx import load_model
from export_predictor_onnx import modified_predictor_forward
from test_onnx import get_acoustic_embedding
from torch_model import CifPredictorV2

CifPredictorV2.forward = modified_predictor_forward


def load_tokens():
    id2token = dict()
    with open("./tokens.txt") as f:
        for line in f:
            fields = line.strip().split()
            id2token[int(fields[1])] = fields[0]
    return id2token


@torch.no_grad()
def main():
    model = load_model()
    encoder_params = sum(p.numel() for p in model.encoder.parameters())
    predictor_params = sum(p.numel() for p in model.predictor.parameters())
    decoder_params = sum(p.numel() for p in model.decoder.parameters())
    print("encoder params (M)", encoder_params / 1024 / 1024)
    print("predictor params (M)", predictor_params / 1024 / 1024)
    print("decoder params (M)", decoder_params / 1024 / 1024)

    features = np.fromfile("./encoder-input-zh.raw", dtype=np.float32).reshape(
        (1, -1, 560)
    )
    features = torch.from_numpy(features)
    encoder_out = model.encoder(features)
    encoder_out.permute(0, 2, 1).numpy().tofile("predictor-in.raw")

    alpha = model.predictor(encoder_out)

    acoustic_embedding = get_acoustic_embedding(
        alpha[0].numpy(), encoder_out[0].numpy()
    )
    acoustic_embedding = torch.from_numpy(acoustic_embedding[None])

    num_tokens = acoustic_embedding.shape[1]

    acoustic_embedding = torch.nn.functional.pad(
        acoustic_embedding,
        (0, 0, 0, encoder_out.shape[1] - num_tokens),
        "constant",
        0,
    )

    mask = torch.zeros(1, encoder_out.shape[1], dtype=torch.float32)

    mask[0, :num_tokens] = 1
    logits = model.decoder(encoder_out, acoustic_embedding, mask)
    print("encoder_out", encoder_out.shape)
    print("acoustic_embedding", acoustic_embedding.shape)
    print("mask", mask.shape)

    encoder_out.permute(0, 2, 1).numpy().tofile("encoder_out.raw")
    acoustic_embedding.permute(0, 2, 1).numpy().tofile("acoustic_embedding.raw")
    mask.to(torch.int32).numpy().tofile("mask.raw")

    yseq = logits[0, :num_tokens].argmax(axis=-1).tolist()
    print(yseq, "-->", len(yseq))

    id2token = load_tokens()
    text = [id2token[i] for i in yseq]
    print(text)

    if False:
        qnn_encoder_out = np.fromfile("./encoder_out.raw", dtype=np.float32).reshape(
            1, -1, 512
        )

        qnn_encoder_out = torch.from_numpy(qnn_encoder_out)

        qnn_alpha = np.fromfile("./alphas.raw", dtype=np.float32).reshape(1, -1)
        qnn_alpha = torch.from_numpy(qnn_alpha)

        acoustic_embedding = get_acoustic_embedding(
            qnn_alpha[0].numpy(), qnn_encoder_out[0].numpy()
        )
        acoustic_embedding = torch.from_numpy(acoustic_embedding[None])

        num_tokens = acoustic_embedding.shape[1]

        acoustic_embedding = torch.nn.functional.pad(
            acoustic_embedding,
            (0, 0, 0, qnn_encoder_out.shape[1] - num_tokens),
            "constant",
            0,
        )

        mask = torch.zeros(1, qnn_encoder_out.shape[1], dtype=torch.float32)

        mask[0, :num_tokens] = 1

        logits = model.decoder(qnn_encoder_out, acoustic_embedding, mask)
    else:
        logits = np.fromfile("./decoder_out.raw", dtype=np.float32).reshape(
            1,
            -1,
            encoder_out.shape[1],
        )
        logits = torch.from_numpy(logits)
        logits = logits.permute(0, 2, 1)

    yseq = logits[0, :num_tokens].argmax(axis=-1).tolist()
    print(yseq, "-->", len(yseq))
    text = [id2token[i] for i in yseq]
    print(text)


if __name__ == "__main__":
    torch.manual_seed(20251013)
    main()


================================================
FILE: scripts/paraformer/rknn/download-example-model.sh
================================================
#!/usr/bin/env bash

wget https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/am.mvn
wget https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/config.yaml
wget https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/tokens.json
wget https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/seg_dict
wget https://hf-mirror.com/csukuangfj/WSChuan-ASR/resolve/main/Paraformer-large-Chuan/model_state_dict.pt

python3 ./export_encoder_onnx.py  --input-len-in-seconds 5
python3 ./export_rknn.py --target-platform rk3588 --in-model ./encoder-5-seconds.onnx --out-model ./encoder-5-seconds.rknn

python3 ./export_predictor_onnx.py  --input-len-in-seconds 5
python3 ./export_rknn.py --target-platform rk3588 --in-model ./predictor-5-seconds.onnx --out-model ./predictor-5-seconds.rknn

python3 ./export_decoder_onnx.py  --input-len-in-seconds 5
python3 ./export_rknn.py --target-platform rk3588 --in-model ./decoder-5-seconds.onnx --out-model ./decoder-5-seconds.rknn


================================================
FILE: scripts/paraformer/rknn/export_decoder_onnx.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import torch

from export_encoder_onnx import load_model, get_num_input_frames

import argparse


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--input-len-in-seconds",
        type=int,
        required=True,
        help="""RKNN/QNN does not support dynamic shape, so we need to hard-code
        how long the model can process.
        """,
    )

    parser.add_argument(
        "--float-mask",
        type=int,
        default=1,
        help="1 to use float mask. 0 to use int32 mask",
    )

    parser.add_argument(
        "--opset-version",
        type=int,
        default=14,
    )
    return parser.parse_args()


@torch.no_grad()
def main():
    print("loading model")
    model = load_model()

    args = get_args()

    input_len_in_seconds = int(args.input_len_in_seconds)
    num_input_frames = get_num_input_frames(input_len_in_seconds)

    encoder_out = torch.randn(1, num_input_frames, 512, dtype=torch.float32)
    acoustic_embedding = torch.randn(1, num_input_frames, 512, dtype=torch.float32)
    if args.float_mask == 1:
        mask = torch.ones([num_input_frames], dtype=torch.float32)
    else:
        mask = torch.ones([num_input_frames], dtype=torch.int32)

    d = model.decoder(encoder_out, acoustic_embedding)
    print("d", d.shape)

    opset_version = args.opset_version
    filename = f"decoder-{input_len_in_seconds}-seconds.onnx"
    torch.onnx.export(
        model.decoder,
        (encoder_out, acoustic_embedding, mask),
        filename,
        opset_version=opset_version,
        input_names=["encoder_out", "acoustic_embedding", "mask"],
        output_names=["decoder_out"],
        dynamic_axes={},
    )
    print(f"Saved to {filename}")


if __name__ == "__main__":
    torch.manual_seed(20251008)
    main()


================================================
FILE: scripts/paraformer/rknn/export_encoder_onnx.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import argparse
import os
from typing import Any, Dict, List, Tuple

import onnx
import torch
import yaml

from torch_model import Paraformer, SANMEncoder


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--input-len-in-seconds",
        type=int,
        required=True,
        help="""RKNN does not support dynamic shape, so we need to hard-code
        how long the model can process.
        """,
    )

    parser.add_argument(
        "--opset-version",
        type=int,
        default=14,
    )
    return parser.parse_args()


def load_cmvn(filename) -> Tuple[List[float], List[float]]:
    neg_mean = None
    inv_stddev = None

    with open(filename) as f:
        for line in f:
            if not line.startswith("<LearnRateCoef>"):
                continue
            t = line.split()[3:-1]

            if neg_mean is None:
                neg_mean = list(map(lambda x: float(x), t))
            else:
                inv_stddev = list(map(lambda x: float(x), t))

    return neg_mean, inv_stddev


if __name__ == "__main__":

    def modified_sanm_encoder_forward(
        self: SANMEncoder, xs_pad: torch.Tensor, pos: torch.Tensor
    ):
        print("xs pad", xs_pad.shape)
        xs_pad = (xs_pad + self.neg_mean) * self.inv_stddev

        xs_pad = xs_pad * self.output_size() ** 0.5

        xs_pad = xs_pad + pos

        xs_pad = self.encoders0(xs_pad)[0]

        xs_pad = self.encoders(xs_pad)[0]

        if self.normalize_before:
            xs_pad = self.after_norm(xs_pad)

        print("xs pad--->", xs_pad.shape, pos.shape)

        return xs_pad

    #  SANMEncoder.forward = modified_sanm_encoder_forward


def load_model():
    with open("./config.yaml", "r", encoding="utf-8") as f:
        config = yaml.safe_load(f)

    print("creating model")

    neg_mean, inv_stddev = load_cmvn("./am.mvn")

    neg_mean = torch.tensor(neg_mean, dtype=torch.float32)
    inv_stddev = torch.tensor(inv_stddev, dtype=torch.float32)

    m = Paraformer(
        neg_mean=neg_mean,
        inv_stddev=inv_stddev,
        input_size=560,
        vocab_size=8404,
        encoder_conf=config["encoder_conf"],
        decoder_conf=config["decoder_conf"],
        predictor_conf=config["predictor_conf"],
    )
    m.eval()

    print("loading state dict")
    state_dict = torch.load("./model_state_dict.pt", map_location="cpu")
    if "state_dict" in state_dict:
        state_dict = state_dict["state_dict"]

    m.load_state_dict(state_dict)
    del state_dict

    return m


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


lfr_window_size = 7
lfr_window_shift = 6


def get_num_input_frames(input_len_in_seconds):
    num_frames = input_len_in_seconds * 100
    print("num_frames", num_frames)

    # num_input_frames is an approximate number
    num_input_frames = int(num_frames / lfr_window_shift + 0.5)
    print("num_input_frames", num_input_frames)
    return num_input_frames


@torch.no_grad()
def main():
    args = get_args()
    print(vars(args))

    print("loading model")
    model = load_model()

    # frame shift is 10ms, 1 second has about 100 feature frames
    input_len_in_seconds = int(args.input_len_in_seconds)
    num_input_frames = get_num_input_frames(input_len_in_seconds)

    x = torch.randn(1, num_input_frames, 560, dtype=torch.float32)
    pos_emb = torch.rand(1, x.shape[1], 560, dtype=torch.float32)

    opset_version = args.opset_version
    filename = f"encoder-{input_len_in_seconds}-seconds.onnx"
    torch.onnx.export(
        model.encoder,
        #  (x, pos_emb),
        x,
        filename,
        opset_version=opset_version,
        #  input_names=["x", "pos_emb"],
        input_names=["x"],
        output_names=["encoder_out"],
        dynamic_axes={},
    )

    model_author = os.environ.get("model_author", "iic")
    comment = os.environ.get(
        "comment",
        "iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
    )
    url = os.environ.get("url", "https://github.com/alibaba-damo-academy/FunASR")

    meta_data = {
        "lfr_window_size": lfr_window_size,
        "lfr_window_shift": lfr_window_shift,
        "num_input_frames": num_input_frames,
        "normalize_samples": 0,  # input should be in the range [-32768, 32767]
        "model_type": "paraformer",
        "version": "1",
        "model_author": model_author,
        "maintainer": "k2-fsa",
        "vocab_size": 8404,
        "comment": comment,
        "url": url,
        "rknn": 1,
    }

    add_meta_data(filename=filename, meta_data=meta_data)
    print(f"Saved to {filename}")


if __name__ == "__main__":
    torch.manual_seed(20251013)
    main()


================================================
FILE: scripts/paraformer/rknn/export_predictor_onnx.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import torch

from export_encoder_onnx import load_model, get_args, get_num_input_frames
from torch_model import CifPredictorV2


def modified_predictor_forward(self: CifPredictorV2, hidden: torch.Tensor):
    h = hidden
    context = h.transpose(1, 2)
    queries = self.pad(context)
    output = torch.relu(self.cif_conv1d(queries))
    output = output.transpose(1, 2)

    output = self.cif_output(output)
    alphas = torch.sigmoid(output)
    alphas = torch.nn.functional.relu(
        alphas * self.smooth_factor - self.noise_threshold
    )

    alphas = alphas.squeeze(-1)

    return alphas


if __name__ == "__main__":
    CifPredictorV2.forward = modified_predictor_forward


@torch.no_grad()
def main():
    print("loading model")
    model = load_model()

    args = get_args()

    input_len_in_seconds = int(args.input_len_in_seconds)
    num_input_frames = get_num_input_frames(input_len_in_seconds)

    x = torch.randn(1, num_input_frames, 512, dtype=torch.float32)

    opset_version = args.opset_version
    filename = f"predictor-{input_len_in_seconds}-seconds.onnx"
    torch.onnx.export(
        model.predictor,
        x,
        filename,
        opset_version=opset_version,
        input_names=["encoder_out"],
        output_names=["alphas"],
        dynamic_axes={},
    )
    print(f"Saved to {filename}")


if __name__ == "__main__":
    torch.manual_seed(20251008)
    main()


================================================
FILE: scripts/paraformer/rknn/export_rknn.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

import argparse
import logging
from pathlib import Path

from rknn.api import RKNN

logging.basicConfig(level=logging.WARNING)

g_platforms = [
    #  "rk3562",
    #  "rk3566",
    #  "rk3568",
    #  "rk3576",
    "rk3588",
]


def get_parser():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--target-platform",
        type=str,
        required=True,
        help=f"Supported values are: {','.join(g_platforms)}",
    )

    parser.add_argument(
        "--in-model",
        type=str,
        required=True,
        help="Path to the input onnx model",
    )

    parser.add_argument(
        "--out-model",
        type=str,
        required=True,
        help="Path to the output rknn model",
    )

    return parser


def get_meta_data(model: str):
    import onnxruntime

    session_opts = onnxruntime.SessionOptions()
    session_opts.inter_op_num_threads = 1
    session_opts.intra_op_num_threads = 1

    m = onnxruntime.InferenceSession(
        model,
        sess_options=session_opts,
        providers=["CPUExecutionProvider"],
    )

    for i in m.get_inputs():
        print(i)

    print("-----")

    for i in m.get_outputs():
        print(i)
    print()

    meta = m.get_modelmeta().custom_metadata_map
    s = ""
    sep = ""
    for key, value in meta.items():
        s = s + sep + f"{key}={value}"
        sep = ";"
    assert len(s) < 1024, len(s)

    print("len(s)", len(s), s)

    return s


def export_rknn(rknn, filename):
    ret = rknn.export_rknn(filename)
    if ret != 0:
        exit(f"Export rknn model to {filename} failed!")


def init_model(filename: str, target_platform: str, custom_string=None):
    rknn = RKNN(verbose=False)

    rknn.config(
        optimization_level=0,
        target_platform=target_platform,
        custom_string=custom_string,
    )
    if not Path(filename).is_file():
        exit(f"{filename} does not exist")

    ret = rknn.load_onnx(model=filename)
    if ret != 0:
        exit(f"Load model {filename} failed!")

    ret = rknn.build(do_quantization=False)
    if ret != 0:
        exit(f"Build model {filename} failed!")

    return rknn


class RKNNModel:
    def __init__(
        self,
        model: str,
        target_platform: str,
    ):
        meta = get_meta_data(model)
        print(meta)

        self.model = init_model(
            model,
            target_platform=target_platform,
            custom_string=meta,
        )

    def export_rknn(self, model):
        export_rknn(self.model, model)

    def release(self):
        self.model.release()


def main():
    args = get_parser().parse_args()
    print(vars(args))

    model = RKNNModel(
        model=args.in_model,
        target_platform=args.target_platform,
    )

    model.export_rknn(
        model=args.out_model,
    )

    model.release()


if __name__ == "__main__":
    main()


================================================
FILE: scripts/paraformer/rknn/test_onnx.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import kaldi_native_fbank as knf
import onnxruntime as ort
import librosa
import torch
import numpy as np


class SinusoidalPositionEncoder(torch.nn.Module):
    def encode(
        self,
        positions: torch.Tensor = None,
        depth: int = None,
        dtype: torch.dtype = torch.float32,
    ):
        """
        Args:
          positions: (batch_size, )
        """
        batch_size = positions.size(0)
        positions = positions.type(dtype)
        device = positions.device
        log_timescale_increment = torch.log(
            torch.tensor([10000], dtype=dtype, device=device)
        ) / (depth / 2 - 1)
        inv_timescales = torch.exp(
            torch.arange(depth / 2, device=device).type(dtype)
            * (-log_timescale_increment)
        )
        inv_timescales = torch.reshape(inv_timescales, [batch_size, -1])
        scaled_time = torch.reshape(positions, [1, -1, 1]) * torch.reshape(
            inv_timescales, [1, 1, -1]
        )
        encoding = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=2)
        return encoding.type(dtype)

    def forward(self, batch_size, timesteps, input_dim):
        positions = torch.arange(1, timesteps + 1)[None, :]
        position_encoding = self.encode(positions, input_dim, torch.float32)

        return position_encoding


def compute_feat(filename):
    sample_rate = 16000
    samples, _ = librosa.load(filename, sr=sample_rate)
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = False
    opts.frame_opts.samp_freq = sample_rate
    opts.mel_opts.num_bins = 80

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(sample_rate, (samples * 32768).tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )
    assert features.data.contiguous is True
    assert features.dtype == np.float32, features.dtype
    #  print("features sum", features.sum(), features.shape)

    window_size = 7  # lfr_m
    window_shift = 6  # lfr_n

    T = (features.shape[0] - window_size) // window_shift + 1
    features = np.lib.stride_tricks.as_strided(
        features,
        shape=(T, features.shape[1] * window_size),
        strides=((window_shift * features.shape[1]) * 4, 4),
    )
    return np.copy(features)


def load_tokens():
    ans = dict()
    i = 0
    with open("tokens.txt", encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


class OnnxModel:
    def __init__(self):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        print("init encoder")
        self.encoder = ort.InferenceSession(
            "./encoder-5-seconds.onnx",
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print("init decoder")
        self.decoder = ort.InferenceSession(
            "./decoder-5-seconds.onnx",
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print("init predictor")
        self.predictor = ort.InferenceSession(
            "./predictor-5-seconds.onnx",
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print("---encoder---")
        for i in self.encoder.get_inputs():
            print(i)

        print("-----")

        for i in self.encoder.get_outputs():
            print(i)

        print("---decoder---")
        for i in self.decoder.get_inputs():
            print(i)

        print("-----")

        for i in self.decoder.get_outputs():
            print(i)

        print("---predictor---")
        for i in self.predictor.get_inputs():
            print(i)

        print("-----")

        for i in self.predictor.get_outputs():
            print(i)

    #  def run_encoder(self, features, pos_emb):
    def run_encoder(self, features):
        (encoder_out,) = self.encoder.run(
            [
                self.encoder.get_outputs()[0].name,
            ],
            {
                self.encoder.get_inputs()[0].name: features,
                #  self.encoder.get_inputs()[1].name: pos_emb,
            },
        )
        return encoder_out

    def run_predictor(self, encoder_out):
        (alphas,) = self.predictor.run(
            [
                self.predictor.get_outputs()[0].name,
            ],
            {
                self.predictor.get_inputs()[0].name: encoder_out,
            },
        )
        return alphas

    #  def run_decoder(self, encoder_out, acoustic_embedding, mask):
    def run_decoder(self, encoder_out, acoustic_embedding, mask):
        print(
            self.decoder.get_outputs()[0].name,
            self.decoder.get_inputs()[0].name,
            self.decoder.get_inputs()[1].name,
        )
        (decoder_out,) = self.decoder.run(
            [
                self.decoder.get_outputs()[0].name,
            ],
            {
                self.decoder.get_inputs()[0].name: encoder_out,
                self.decoder.get_inputs()[1].name: acoustic_embedding,
                self.decoder.get_inputs()[2].name: mask,
            },
        )
        return decoder_out


def get_acoustic_embedding(alpha: np.array, hidden: np.array):
    """
    Args:
      alpha: (T,)
      hidden: (T, C)
    Returns:
      acoustic_embeds: (num_tokens, C)
    """
    alpha = alpha.tolist()
    acc = 0
    num_tokens = 0

    embeddings = []
    cur_embedding = np.zeros((hidden.shape[1],), dtype=np.float32)

    for i, w in enumerate(alpha):
        acc += w
        if acc >= 1:
            overflow = acc - 1
            remain = w - overflow
            cur_embedding += remain * hidden[i]
            embeddings.append(cur_embedding)

            cur_embedding = overflow * hidden[i]
            acc = overflow
        else:
            cur_embedding += w * hidden[i]

    if len(embeddings) == 0:
        raise ValueError("No speech in the audio file")

    embeddings = np.array(embeddings)
    return embeddings


def main():
    features = compute_feat("./1.wav")
    print("here", features.shape, features.shape[0] > 83)
    if features.shape[0] >= 83:
        features = features[:83]
    else:
        padding = features[-(83 - features.shape[0]) :]
        print("padding", features.shape, padding.shape)
        features = np.concatenate([features, padding])

    pos_emb = (
        SinusoidalPositionEncoder()(1, features.shape[0], features.shape[1])
        .squeeze(0)
        .numpy()
    )

    print("features.shape", features.shape, pos_emb.shape)

    print("sum", features.sum(), features.mean(), pos_emb.sum(), pos_emb.mean())

    model = OnnxModel()

    #  encoder_out = model.run_encoder(features[None], pos_emb[None])
    encoder_out = model.run_encoder(features[None])
    print("encoder_out.shape", encoder_out.shape)
    print("encoder_out.sum", encoder_out.sum(), encoder_out.mean())

    alpha = model.run_predictor(encoder_out)
    print("alpha.shape", alpha.shape)
    print("alpha.sum()", alpha.sum(), alpha.mean())

    acoustic_embedding = get_acoustic_embedding(alpha[0], encoder_out[0])
    print("acoustic_embedding.shape", acoustic_embedding.shape)
    num_tokens = acoustic_embedding.shape[0]

    padding = np.zeros((83 - acoustic_embedding.shape[0], 512), dtype=np.float32)
    print("padding.shape", padding.shape, acoustic_embedding.shape)

    acoustic_embedding = np.concatenate([acoustic_embedding, padding], axis=0)
    print("acoustic_embedding.shape", acoustic_embedding.shape)
    print("acoustic_embedding.sum", acoustic_embedding.sum(), acoustic_embedding.mean())

    mask = np.zeros((83,), dtype=np.float32)
    mask[:num_tokens] = 1
    print(mask)

    decoder_out = model.run_decoder(encoder_out, acoustic_embedding[None], mask)
    #  decoder_out = model.run_decoder(encoder_out, acoustic_embedding[None])
    print("decoder_out", decoder_out.shape)
    print("decoder_out.sum", decoder_out.sum(), decoder_out.mean())
    yseq = decoder_out[0, :num_tokens].argmax(axis=-1).tolist()
    print(yseq, "-->", len(yseq))

    tokens = load_tokens()
    words = [tokens[i] for i in yseq if i not in (1, 2)]
    print(words)
    text = "".join(words)
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/paraformer/rknn/torch_model.py
================================================
#!/usr/bin/env python3
"""
Code in this file is copied and modified from
https://github.com/modelscope/FunASR
"""
import math
from typing import Dict, List, Optional, Tuple

import torch
import torch.nn as nn


class EncoderLayerSANM(nn.Module):
    def __init__(
        self,
        in_size,
        size,
        self_attn,
        feed_forward,
        dropout_rate,
        normalize_before=True,
        concat_after=False,
        stochastic_depth_rate=0.0,
    ):
        """Construct an EncoderLayer object."""
        super().__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.norm1 = torch.nn.LayerNorm(in_size)
        self.norm2 = torch.nn.LayerNorm(size)
        self.dropout = nn.Dropout(dropout_rate)
        self.in_size = in_size
        self.size = size
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        if self.concat_after:
            self.concat_linear = nn.Linear(size + size, size)
        self.stochastic_depth_rate = stochastic_depth_rate
        self.dropout_rate = dropout_rate

    def forward(
        self,
        x,
        mask=None,
        cache=None,
        mask_shfit_chunk=None,
        mask_att_chunk_encoder=None,
    ):
        """Compute encoded features.

        Args:
            x_input (torch.Tensor): Input tensor (#batch, time, size).
            mask (torch.Tensor): Mask tensor for the input (#batch, time).
            cache (torch.Tensor): Cache tensor of the input (#batch, time - 1, size).

        Returns:
            torch.Tensor: Output tensor (#batch, time, size).
            torch.Tensor: Mask tensor (#batch, time).

        """
        residual = x
        if self.normalize_before:
            x = self.norm1(x)

        if self.in_size == self.size:
            x = residual + self.dropout(
                self.self_attn(
                    x,
                    mask,
                    mask_shfit_chunk=mask_shfit_chunk,
                    mask_att_chunk_encoder=mask_att_chunk_encoder,
                )
            )
        else:
            x = self.dropout(
                self.self_attn(
                    x,
                    mask,
                    mask_shfit_chunk=mask_shfit_chunk,
                    mask_att_chunk_encoder=mask_att_chunk_encoder,
                )
            )

        if not self.normalize_before:
            x = self.norm1(x)

        residual = x
        if self.normalize_before:
            x = self.norm2(x)

        x = residual + self.dropout(self.feed_forward(x))

        if not self.normalize_before:
            x = self.norm2(x)

        x = torch.clamp(x, -60000.0, 60000.0)

        return x, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder


class MultiSequential(torch.nn.Sequential):
    """Multi-input multi-output torch.nn.Sequential."""

    def __init__(self, *args, layer_drop_rate=0.0):
        """Initialize MultiSequential with layer_drop.

        Args:
            layer_drop_rate (float): Probability of dropping out each fn (layer).

        """
        super().__init__(*args)
        self.layer_drop_rate = layer_drop_rate

    def forward(self, *args):
        """Repeat."""
        for idx, m in enumerate(self):
            args = m(*args)
        return args


def repeat(N, fn, layer_drop_rate=0.0):
    """Repeat module N times.

    Args:
        N (int): Number of repeat time.
        fn (Callable): Function to generate module.
        layer_drop_rate (float): Probability of dropping out each fn (layer).

    Returns:
        MultiSequential: Repeated model instance.

    """
    return MultiSequential(*[fn(n) for n in range(N)], layer_drop_rate=layer_drop_rate)


class MultiHeadedAttentionSANM(nn.Module):
    """Multi-Head Attention layer.

    Args:
        n_head (int): The number of heads.
        n_feat (int): The number of features.
        dropout_rate (float): Dropout rate.

    """

    def __init__(
        self,
        n_head,
        in_feat,
        n_feat,
        dropout_rate,
        kernel_size,
        sanm_shfit=0,
        lora_list=None,
        lora_rank=8,
        lora_alpha=16,
        lora_dropout=0.1,
    ):
        """Construct an MultiHeadedAttention object."""
        super().__init__()

        assert lora_list is None

        assert n_feat % n_head == 0, (n_feat, n_head)

        # We assume d_v always equals d_k
        self.d_k = n_feat // n_head
        self.h = n_head
        # self.linear_q = nn.Linear(n_feat, n_feat)
        # self.linear_k = nn.Linear(n_feat, n_feat)
        # self.linear_v = nn.Linear(n_feat, n_feat)

        self.linear_out = nn.Linear(n_feat, n_feat)
        self.linear_q_k_v = nn.Linear(in_feat, n_feat * 3)
        self.dropout = nn.Dropout(p=dropout_rate)

        self.fsmn_block = nn.Conv1d(
            n_feat, n_feat, kernel_size, stride=1, padding=0, groups=n_feat, bias=False
        )
        # padding
        left_padding = (kernel_size - 1) // 2
        if sanm_shfit > 0:
            left_padding = left_padding + sanm_shfit
        right_padding = kernel_size - 1 - left_padding
        self.pad_fn = nn.ConstantPad1d((left_padding, right_padding), 0.0)

    def forward_fsmn(self, inputs, mask=None, mask_shfit_chunk=None):
        b, t, d = inputs.size()
        if mask is not None:
            mask = torch.reshape(mask, (b, -1, 1))
            if mask_shfit_chunk is not None:
                mask = mask * mask_shfit_chunk
            inputs = inputs * mask

        x = inputs.transpose(1, 2)
        x = self.pad_fn(x)
        x = self.fsmn_block(x)
        x = x.transpose(1, 2)
        x += inputs
        x = self.dropout(x)
        if mask is not None:
            x = x * mask
        return x

    def forward_qkv(self, x):
        """Transform query, key and value.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).

        Returns:
            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).

        """
        b, t, d = x.size()
        q_k_v = self.linear_q_k_v(x)
        q, k, v = torch.split(q_k_v, int(self.h * self.d_k), dim=-1)
        q_h = torch.reshape(q, (b, t, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time1, d_k)
        k_h = torch.reshape(k, (b, t, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time2, d_k)
        v_h = torch.reshape(v, (b, t, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time2, d_k)

        return q_h, k_h, v_h, v

    def forward_attention(self, value, scores, mask=None, mask_att_chunk_encoder=None):
        """Compute attention context vector.

        Args:
            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            torch.Tensor: Transformed value (#batch, time1, d_model)
                weighted by the attention score (#batch, time1, time2).

        """
        n_batch = value.size(0)
        if mask is not None:
            if mask_att_chunk_encoder is not None:
                mask = mask * mask_att_chunk_encoder

            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)

            min_value = -float(
                "inf"
            )  # float(numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min)
            scores = scores.masked_fill(mask, min_value)
            attn = torch.softmax(scores, dim=-1).masked_fill(
                mask, 0.0
            )  # (batch, head, time1, time2)
        else:
            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)

        p_attn = self.dropout(attn)
        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
        x = (
            x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k)
        )  # (batch, time1, d_model)

        return self.linear_out(x)  # (batch, time1, d_model)

    def forward(self, x, mask=None, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
        """Compute scaled dot product attention.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).
            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).

        Returns:
            torch.Tensor: Output tensor (#batch, time1, d_model).

        """
        q_h, k_h, v_h, v = self.forward_qkv(x)
        fsmn_memory = self.forward_fsmn(v, mask, mask_shfit_chunk)
        q_h = q_h * self.d_k ** (-0.5)
        scores = torch.matmul(q_h, k_h.transpose(-2, -1))
        att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)
        return att_outs + fsmn_memory


class SinusoidalPositionEncoder(torch.nn.Module):
    """ """

    def __init__(self, d_model=80, dropout_rate=0.1):
        super().__init__()
        pass

    def encode(
        self,
        positions: torch.Tensor = None,
        depth: int = None,
        dtype: torch.dtype = torch.float32,
    ):
        batch_size = positions.size(0)
        positions = positions.type(dtype)
        device = positions.device
        log_timescale_increment = torch.log(
            torch.tensor([10000], dtype=dtype, device=device)
        ) / (depth / 2 - 1)
        inv_timescales = torch.exp(
            torch.arange(depth / 2, device=device).type(dtype)
            * (-log_timescale_increment)
        )
        inv_timescales = torch.reshape(inv_timescales, [batch_size, -1])
        scaled_time = torch.reshape(positions, [1, -1, 1]) * torch.reshape(
            inv_timescales, [1, 1, -1]
        )
        encoding = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=2)
        return encoding.type(dtype)

    def forward(self, x):
        batch_size, timesteps, input_dim = x.size()
        positions = torch.arange(1, timesteps + 1, device=x.device)[None, :]
        position_encoding = self.encode(positions, input_dim, x.dtype).to(x.device)

        return x + position_encoding


class PositionwiseFeedForward(torch.nn.Module):
    """Positionwise feed forward layer.

    Args:
        idim (int): Input dimension.
        hidden_units (int): The number of hidden units.
        dropout_rate (float): Dropout rate.

    """

    def __init__(self, idim, hidden_units, dropout_rate, activation=torch.nn.ReLU()):
        """Construct an PositionwiseFeedForward object."""
        super().__init__()
        self.w_1 = torch.nn.Linear(idim, hidden_units)
        self.w_2 = torch.nn.Linear(hidden_units, idim)
        self.dropout = torch.nn.Dropout(dropout_rate)
        self.activation = activation

    def forward(self, x):
        """Forward function."""
        return self.w_2(self.dropout(self.activation(self.w_1(x))))


class SANMEncoder(nn.Module):
    """
    Author: Zhifu Gao, Shiliang Zhang, Ming Lei, Ian McLoughlin
    San-m: Memory equipped self-attention for end-to-end speech recognition
    https://arxiv.org/abs/2006.01713
    """

    def __init__(
        self,
        neg_mean: torch.Tensor,
        inv_stddev: torch.Tensor,
        input_size: int,
        output_size: int = 256,
        attention_heads: int = 4,
        linear_units: int = 2048,
        num_blocks: int = 6,
        dropout_rate: float = 0.1,
        positional_dropout_rate: float = 0.1,
        attention_dropout_rate: float = 0.0,
        input_layer: Optional[str] = "conv2d",
        pos_enc_class=SinusoidalPositionEncoder,
        normalize_before: bool = True,
        concat_after: bool = False,
        positionwise_layer_type: str = "linear",
        positionwise_conv_kernel_size: int = 1,
        padding_idx: int = -1,
        interctc_layer_idx: List[int] = [],
        interctc_use_conditioning: bool = False,
        kernel_size: int = 11,
        sanm_shfit: int = 0,
        lora_list: List[str] = None,
        lora_rank: int = 8,
        lora_alpha: int = 16,
        lora_dropout: float = 0.1,
        selfattention_layer_type: str = "sanm",
        tf2torch_tensor_name_prefix_torch: str = "encoder",
        tf2torch_tensor_name_prefix_tf: str = "seq2seq/encoder",
    ):
        super().__init__()
        self.neg_mean = neg_mean
        self.inv_stddev = inv_stddev
        self._output_size = output_size
        assert input_layer == "pe", input_layer

        self.embed = SinusoidalPositionEncoder()
        self.normalize_before = normalize_before

        assert positionwise_layer_type == "linear", positionwise_layer_type
        positionwise_layer = PositionwiseFeedForward
        positionwise_layer_args = (
            output_size,
            linear_units,
            dropout_rate,
        )

        assert selfattention_layer_type == "sanm", selfattention_layer_type

        encoder_selfattn_layer = MultiHeadedAttentionSANM
        encoder_selfattn_layer_args0 = (
            attention_heads,
            input_size,
            output_size,
            attention_dropout_rate,
            kernel_size,
            sanm_shfit,
            lora_list,
            lora_rank,
            lora_alpha,
            lora_dropout,
        )

        encoder_selfattn_layer_args = (
            attention_heads,
            output_size,
            output_size,
            attention_dropout_rate,
            kernel_size,
            sanm_shfit,
            lora_list,
            lora_rank,
            lora_alpha,
            lora_dropout,
        )

        self.encoders0 = repeat(
            1,
            lambda lnum: EncoderLayerSANM(
                input_size,
                output_size,
                encoder_selfattn_layer(*encoder_selfattn_layer_args0),
                positionwise_layer(*positionwise_layer_args),
                dropout_rate,
                normalize_before,
                concat_after,
            ),
        )

        self.encoders = repeat(
            num_blocks - 1,
            lambda lnum: EncoderLayerSANM(
                output_size,
                output_size,
                encoder_selfattn_layer(*encoder_selfattn_layer_args),
                positionwise_layer(*positionwise_layer_args),
                dropout_rate,
                normalize_before,
                concat_after,
            ),
        )

        if self.normalize_before:
            self.after_norm = torch.nn.LayerNorm(output_size)

        self.interctc_layer_idx = interctc_layer_idx

        assert len(interctc_layer_idx) == 0, len(interctc_layer_idx)
        self.interctc_use_conditioning = interctc_use_conditioning
        self.conditioning_layer = None
        self.dropout = nn.Dropout(dropout_rate)
        self.tf2torch_tensor_name_prefix_torch = tf2torch_tensor_name_prefix_torch
        self.tf2torch_tensor_name_prefix_tf = tf2torch_tensor_name_prefix_tf

    def output_size(self) -> int:
        return self._output_size

    def forward(
        self,
        xs_pad: torch.Tensor,
    ) -> torch.Tensor:
        """Embed positions in tensor.

        Args:
            xs_pad: input tensor (B, L, D)
        Returns:
            position embedded tensor and mask
        """
        print("in xs_pad.shape", xs_pad.shape)
        xs_pad = (xs_pad + self.neg_mean) * self.inv_stddev
        masks = None
        xs_pad = xs_pad * self.output_size() ** 0.5

        xs_pad = self.embed(xs_pad)

        # xs_pad = self.dropout(xs_pad)
        encoder_outs = self.encoders0(xs_pad, masks)
        xs_pad, masks = encoder_outs[0], encoder_outs[1]
        encoder_outs = self.encoders(xs_pad, masks)
        xs_pad, masks = encoder_outs[0], encoder_outs[1]

        if self.normalize_before:
            xs_pad = self.after_norm(xs_pad)

        print("out xs_pad.shape", xs_pad.shape)
        return xs_pad


def _pre_hook(
    state_dict,
    prefix,
    local_metadata,
    strict,
    missing_keys,
    unexpected_keys,
    error_msgs,
):
    """Perform pre-hook in load_state_dict for backward compatibility.

    Note:
        We saved self.pe until v.0.5.2 but we have omitted it later.
        Therefore, we remove the item "pe" from `state_dict` for backward compatibility.

    """
    k = prefix + "pe"
    if k in state_dict:
        state_dict.pop(k)


class DecoderLayerSANM(torch.nn.Module):
    """Single decoder layer module.

    Args:
        size (int): Input dimension.
        self_attn (torch.nn.Module): Self-attention module instance.
            `MultiHeadedAttention` instance can be used as the argument.
        src_attn (torch.nn.Module): Self-attention module instance.
            `MultiHeadedAttention` instance can be used as the argument.
        feed_forward (torch.nn.Module): Feed-forward module instance.
            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance
            can be used as the argument.
        dropout_rate (float): Dropout rate.
        normalize_before (bool): Whether to use layer_norm before the first block.
        concat_after (bool): Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)


    """

    def __init__(
        self,
        size,
        self_attn,
        src_attn,
        feed_forward,
        dropout_rate,
        normalize_before=True,
        concat_after=False,
    ):
        """Construct an DecoderLayer object."""
        super(DecoderLayerSANM, self).__init__()
        self.size = size
        self.self_attn = self_attn
        self.src_attn = src_attn
        self.feed_forward = feed_forward
        self.norm1 = torch.nn.LayerNorm(size)
        if self_attn is not None:
            self.norm2 = torch.nn.LayerNorm(size)
        if src_attn is not None:
            self.norm3 = torch.nn.LayerNorm(size)
        self.dropout = torch.nn.Dropout(dropout_rate)
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        if self.concat_after:
            self.concat_linear1 = torch.nn.Linear(size + size, size)
            self.concat_linear2 = torch.nn.Linear(size + size, size)
        self.reserve_attn = False

    def forward(self, tgt, tgt_mask, memory, memory_mask=None, cache=None):
        """Compute decoded features.

        Args:
            tgt (torch.Tensor): Input tensor (#batch, maxlen_out, size).
            tgt_mask (torch.Tensor): Mask for input tensor (#batch, maxlen_out).
            memory (torch.Tensor): Encoded memory, float32 (#batch, maxlen_in, size).
            memory_mask (torch.Tensor): Encoded memory mask (#batch, maxlen_in).
            cache (List[torch.Tensor]): List of cached tensors.
                Each tensor shape should be (#batch, maxlen_out - 1, size).

        Returns:
            torch.Tensor: Output tensor(#batch, maxlen_out, size).
            torch.Tensor: Mask for output tensor (#batch, maxlen_out).
            torch.Tensor: Encoded memory (#batch, maxlen_in, size).
            torch.Tensor: Encoded memory mask (#batch, maxlen_in).

        """
        # tgt = self.dropout(tgt)
        residual = tgt
        if self.normalize_before:
            tgt = self.norm1(tgt)
        tgt = self.feed_forward(tgt)

        x = tgt
        if self.self_attn:
            if self.normalize_before:
                tgt = self.norm2(tgt)
            x, _ = self.self_attn(tgt, tgt_mask)
            x = residual + self.dropout(x)

        if self.src_attn is not None:
            residual = x
            if self.normalize_before:
                x = self.norm3(x)

            x_src_attn = self.src_attn(x, memory, memory_mask, ret_attn=False)
            x = residual + self.dropout(x_src_attn)
            # x = residual + self.dropout(self.src_attn(x, memory, memory_mask))

        return x, tgt_mask, memory, memory_mask, cache


class MultiHeadedAttentionSANMDecoder(nn.Module):
    """Multi-Head Attention layer.

    Args:
        n_head (int): The number of heads.
        n_feat (int): The number of features.
        dropout_rate (float): Dropout rate.

    """

    def __init__(self, n_feat, dropout_rate, kernel_size, sanm_shfit=0):
        """Construct an MultiHeadedAttention object."""
        super().__init__()

        self.dropout = nn.Dropout(p=dropout_rate)

        self.fsmn_block = nn.Conv1d(
            n_feat, n_feat, kernel_size, stride=1, padding=0, groups=n_feat, bias=False
        )
        # padding
        # padding
        left_padding = (kernel_size - 1) // 2
        if sanm_shfit > 0:
            left_padding = left_padding + sanm_shfit
        right_padding = kernel_size - 1 - left_padding
        self.pad_fn = nn.ConstantPad1d((left_padding, right_padding), 0.0)
        self.kernel_size = kernel_size

    def forward(self, inputs, mask, cache=None, mask_shfit_chunk=None):
        """
        :param x: (#batch, time1, size).
        :param mask: Mask tensor (#batch, 1, time)
        :return:
        """
        # print("in fsmn, inputs", inputs.size())
        b, t, d = inputs.size()
        # logging.info(
        #     "mask: {}".format(mask.size()))
        if mask is not None:
            mask = torch.reshape(mask, (b, -1, 1))
            # logging.info("in fsmn, mask: {}, {}".format(mask.size(), mask[0:100:50, :, :]))
            if mask_shfit_chunk is not None:
                # logging.info("in fsmn, mask_fsmn: {}, {}".format(mask_shfit_chunk.size(), mask_shfit_chunk[0:100:50, :, :]))
                mask = mask * mask_shfit_chunk
            # logging.info("in fsmn, mask_after_fsmn: {}, {}".format(mask.size(), mask[0:100:50, :, :]))
            # print("in fsmn, mask", mask.size())
            # print("in fsmn, inputs", inputs.size())
            inputs = inputs * mask

        x = inputs.transpose(1, 2)
        b, d, t = x.size()
        if cache is None:
            # print("in fsmn, cache is None, x", x.size())

            x = self.pad_fn(x)
            if not self.training:
                cache = x
        else:
            # print("in fsmn, cache is not None, x", x.size())
            # x = torch.cat((x, cache), dim=2)[:, :, :-1]
            # if t < self.kernel_size:
            #     x = self.pad_fn(x)
            x = torch.cat((cache[:, :, 1:], x), dim=2)
            x = x[:, :, -(self.kernel_size + t - 1) :]
            # print("in fsmn, cache is not None, x_cat", x.size())
            cache = x
        x = self.fsmn_block(x)
        x = x.transpose(1, 2)
        # print("in fsmn, fsmn_out", x.size())
        if x.size(1) != inputs.size(1):
            inputs = inputs[:, -1, :]

        x = x + inputs
        x = self.dropout(x)
        if mask is not None:
            x = x * mask
        return x, cache


class MultiHeadedAttentionCrossAtt(nn.Module):
    """Multi-Head Attention layer.

    Args:
        n_head (int): The number of heads.
        n_feat (int): The number of features.
        dropout_rate (float): Dropout rate.

    """

    def __init__(
        self,
        n_head,
        n_feat,
        dropout_rate,
        lora_list=None,
        lora_rank=8,
        lora_alpha=16,
        lora_dropout=0.1,
        encoder_output_size=None,
    ):
        """Construct an MultiHeadedAttention object."""
        super().__init__()
        assert n_feat % n_head == 0
        # We assume d_v always equals d_k
        self.d_k = n_feat // n_head
        self.h = n_head
        self.linear_q = nn.Linear(n_feat, n_feat)
        self.linear_k_v = nn.Linear(
            n_feat if encoder_output_size is None else encoder_output_size,
            n_feat * 2,
        )
        self.linear_out = nn.Linear(n_feat, n_feat)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward_qkv(self, x, memory):
        """Transform query, key and value.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).

        Returns:
            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).

        """

        # print("in forward_qkv, x", x.size())
        b = x.size(0)
        q = self.linear_q(x)
        q_h = torch.reshape(q, (b, -1, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time1, d_k)

        k_v = self.linear_k_v(memory)
        k, v = torch.split(k_v, int(self.h * self.d_k), dim=-1)
        k_h = torch.reshape(k, (b, -1, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time2, d_k)
        v_h = torch.reshape(v, (b, -1, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time2, d_k)

        return q_h, k_h, v_h

    def forward_attention(self, value, scores, mask, ret_attn=False):
        """Compute attention context vector.

        Args:
            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            torch.Tensor: Transformed value (#batch, time1, d_model)
                weighted by the attention score (#batch, time1, time2).

        """
        n_batch = value.size(0)
        if mask is not None:
            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)
            min_value = -float(
                "inf"
            )  # float(numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min)
            # logging.info(
            #     "scores: {}, mask_size: {}".format(scores.size(), mask.size()))
            scores = scores.masked_fill(mask, min_value)
            attn = torch.softmax(scores, dim=-1).masked_fill(
                mask, 0.0
            )  # (batch, head, time1, time2)
        else:
            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)
        p_attn = self.dropout(attn)
        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
        x = (
            x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k)
        )  # (batch, time1, d_model)
        if ret_attn:
            return self.linear_out(x), attn  # (batch, time1, d_model)
        return self.linear_out(x)  # (batch, time1, d_model)

    def forward(self, x, memory, memory_mask, ret_attn=False):
        """Compute scaled dot product attention.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).
            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).

        Returns:
            torch.Tensor: Output tensor (#batch, time1, d_model).

        """
        q_h, k_h, v_h = self.forward_qkv(x, memory)
        q_h = q_h * self.d_k ** (-0.5)
        scores = torch.matmul(q_h, k_h.transpose(-2, -1))
        return self.forward_attention(v_h, scores, memory_mask, ret_attn=ret_attn)


class PositionwiseFeedForwardDecoderSANM(torch.nn.Module):
    """Positionwise feed forward layer.

    Args:
        idim (int): Input dimension.
        hidden_units (int): The number of hidden units.
        dropout_rate (float): Dropout rate.

    """

    def __init__(
        self, idim, hidden_units, dropout_rate, adim=None, activation=torch.nn.ReLU()
    ):
        """Construct an PositionwiseFeedForward object."""
        super(PositionwiseFeedForwardDecoderSANM, self).__init__()
        self.w_1 = torch.nn.Linear(idim, hidden_units)
        self.w_2 = torch.nn.Linear(
            hidden_units, idim if adim is None else adim, bias=False
        )
        self.dropout = torch.nn.Dropout(dropout_rate)
        self.activation = activation
        self.norm = torch.nn.LayerNorm(hidden_units)

    def forward(self, x):
        """Forward function."""
        return self.w_2(self.norm(self.dropout(self.activation(self.w_1(x)))))


class ParaformerSANMDecoder(torch.nn.Module):
    """
    Author: Speech Lab of DAMO Academy, Alibaba Group
    Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition
    https://arxiv.org/abs/2006.01713
    """

    def __init__(
        self,
        vocab_size: int,
        encoder_output_size: int,
        attention_heads: int = 4,
        linear_units: int = 2048,
        num_blocks: int = 6,
        dropout_rate: float = 0.1,
        positional_dropout_rate: float = 0.1,
        self_attention_dropout_rate: float = 0.0,
        src_attention_dropout_rate: float = 0.0,
        input_layer: str = "embed",
        use_output_layer: bool = True,
        wo_input_layer: bool = False,
        pos_enc_class="PositionalEncoding",
        normalize_before: bool = True,
        concat_after: bool = False,
        att_layer_num: int = 6,
        kernel_size: int = 21,
        sanm_shfit: int = 0,
        lora_list: List[str] = None,
        lora_rank: int = 8,
        lora_alpha: int = 16,
        lora_dropout: float = 0.1,
        chunk_multiply_factor: tuple = (1,),
        tf2torch_tensor_name_prefix_torch: str = "decoder",
        tf2torch_tensor_name_prefix_tf: str = "seq2seq/decoder",
    ):
        super().__init__()

        attention_dim = encoder_output_size

        assert wo_input_layer is False
        assert input_layer == "embed", input_layer

        # Note: self.embed is not used
        self.embed = torch.nn.Sequential(
            torch.nn.Embedding(vocab_size, attention_dim),
            # pos_enc_class(attention_dim, positional_dropout_rate),
        )

        self.normalize_before = normalize_before
        if self.normalize_before:
            self.after_norm = torch.nn.LayerNorm(attention_dim)
        if use_output_layer:
            self.output_layer = torch.nn.Linear(attention_dim, vocab_size)
        else:
            self.output_layer = None

        self.att_layer_num = att_layer_num
        self.num_blocks = num_blocks
        if sanm_shfit is None:
            sanm_shfit = (kernel_size - 1) // 2
        self.decoders = repeat(
            att_layer_num,
            lambda lnum: DecoderLayerSANM(
                attention_dim,
                MultiHeadedAttentionSANMDecoder(
                    attention_dim,
                    self_attention_dropout_rate,
                    kernel_size,
                    sanm_shfit=sanm_shfit,
                ),
                MultiHeadedAttentionCrossAtt(
                    attention_heads,
                    attention_dim,
                    src_attention_dropout_rate,
                    lora_list,
                    lora_rank,
                    lora_alpha,
                    lora_dropout,
                ),
                PositionwiseFeedForwardDecoderSANM(
                    attention_dim, linear_units, dropout_rate
                ),
                dropout_rate,
                normalize_before,
                concat_after,
            ),
        )
        if num_blocks - att_layer_num <= 0:
            self.decoders2 = None
        else:
            self.decoders2 = repeat(
                num_blocks - att_layer_num,
                lambda lnum: DecoderLayerSANM(
                    attention_dim,
                    MultiHeadedAttentionSANMDecoder(
                        attention_dim,
                        self_attention_dropout_rate,
                        kernel_size,
                        sanm_shfit=0,
                    ),
                    None,
                    PositionwiseFeedForwardDecoderSANM(
                        attention_dim, linear_units, dropout_rate
                    ),
                    dropout_rate,
                    normalize_before,
                    concat_after,
                ),
            )

        self.decoders3 = repeat(
            1,
            lambda lnum: DecoderLayerSANM(
                attention_dim,
                None,
                None,
                PositionwiseFeedForwardDecoderSANM(
                    attention_dim, linear_units, dropout_rate
                ),
                dropout_rate,
                normalize_before,
                concat_after,
            ),
        )
        self.tf2torch_tensor_name_prefix_torch = tf2torch_tensor_name_prefix_torch
        self.tf2torch_tensor_name_prefix_tf = tf2torch_tensor_name_prefix_tf
        self.chunk_multiply_factor = chunk_multiply_factor

    def forward(
        self,
        hs_pad: torch.Tensor,
        ys_in_pad: torch.Tensor,
        tgt_mask: torch.Tensor = None,
        chunk_mask: torch.Tensor = None,
        return_hidden: bool = False,
        return_both: bool = False,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Forward decoder.

        Args:
            hs_pad: encoded memory, float32  (batch, maxlen_in, feat)
            ys_in_pad:
                input token ids, int64 (batch, maxlen_out)
                if input_layer == "embed"
                input tensor (batch, maxlen_out, #mels) in the other cases
        Returns:
            (tuple): tuple containing:

            x: decoded token score before softmax (batch, maxlen_out, token)
                if use_output_layer is True,
        """
        tgt = ys_in_pad

        memory = hs_pad
        memory_mask = None

        x = tgt
        x, tgt_mask, memory, memory_mask, _ = self.decoders(
            x, tgt_mask, memory, memory_mask
        )
        if self.decoders2 is not None:
            x, tgt_mask, memory, memory_mask, _ = self.decoders2(
                x, tgt_mask, memory, memory_mask
            )
        x, tgt_mask, memory, memory_mask, _ = self.decoders3(
            x, tgt_mask, memory, memory_mask
        )
        if self.normalize_before:
            hidden = self.after_norm(x)

        print("hidden", hidden.shape)
        print("self.output_layer", self.output_layer)
        x = self.output_layer(hidden)
        print("x", x.shape)
        return x


def cif_wo_hidden_v1(alphas, threshold, return_fire_idxs=False):
    batch_size, len_time = alphas.size()
    device = alphas.device
    dtype = alphas.dtype

    threshold = torch.tensor([threshold], dtype=alphas.dtype).to(alphas.device)

    fires = torch.zeros(batch_size, len_time, dtype=dtype, device=device)

    # prefix_sum = torch.cumsum(alphas, dim=1)
    prefix_sum = torch.cumsum(alphas, dim=1, dtype=torch.float64).to(
        torch.float32
    )  # cumsum precision degradation cause wrong result in extreme
    prefix_sum_floor = torch.floor(prefix_sum)
    dislocation_prefix_sum = torch.roll(prefix_sum, 1, dims=1)
    dislocation_prefix_sum_floor = torch.floor(dislocation_prefix_sum)

    dislocation_prefix_sum_floor[:, 0] = 0
    dislocation_diff = prefix_sum_floor - dislocation_prefix_sum_floor

    fire_idxs = dislocation_diff > 0
    fires[fire_idxs] = 1
    fires = fires + prefix_sum - prefix_sum_floor
    if return_fire_idxs:
        return fires, fire_idxs
    return fires


def cif_v1(hidden, alphas, threshold):
    fires, fire_idxs = cif_wo_hidden_v1(alphas, threshold, return_fire_idxs=True)

    device = hidden.device
    dtype = hidden.dtype
    batch_size, len_time, hidden_size = hidden.size()
    # frames = torch.zeros(batch_size, len_time, hidden_size, dtype=dtype, device=device)
    # prefix_sum_hidden = torch.cumsum(alphas.unsqueeze(-1).tile((1, 1, hidden_size)) * hidden, dim=1)
    frames = torch.zeros(batch_size, len_time, hidden_size, dtype=dtype, device=device)
    prefix_sum_hidden = torch.cumsum(
        alphas.unsqueeze(-1).repeat((1, 1, hidden_size)) * hidden, dim=1
    )

    frames = prefix_sum_hidden[fire_idxs]
    shift_frames = torch.roll(frames, 1, dims=0)

    batch_len = fire_idxs.sum(1)
    batch_idxs = torch.cumsum(batch_len, dim=0)
    shift_batch_idxs = torch.roll(batch_idxs, 1, dims=0)
    shift_batch_idxs[0] = 0
    shift_frames[shift_batch_idxs] = 0

    remains = fires - torch.floor(fires)
    # remain_frames = remains[fire_idxs].unsqueeze(-1).tile((1, hidden_size)) * hidden[fire_idxs]
    remain_frames = (
        remains[fire_idxs].unsqueeze(-1).repeat((1, hidden_size)) * hidden[fire_idxs]
    )

    shift_remain_frames = torch.roll(remain_frames, 1, dims=0)
    shift_remain_frames[shift_batch_idxs] = 0

    frames = frames - shift_frames + shift_remain_frames - remain_frames

    # max_label_len = batch_len.max()
    max_label_len = (
        torch.round(alphas.sum(-1)).int().max()
    )  # torch.round to calculate the max length

    # frame_fires = torch.zeros(batch_size, max_label_len, hidden_size, dtype=dtype, device=device)
    frame_fires = torch.zeros(
        batch_size, max_label_len, hidden_size, dtype=dtype, device=device
    )
    indices = torch.arange(max_label_len, device=device).expand(batch_size, -1)
    frame_fires_idxs = indices < batch_len.unsqueeze(1)
    frame_fires[frame_fires_idxs] = frames
    return frame_fires, fires


class CifPredictorV2(torch.nn.Module):
    def __init__(
        self,
        idim,
        l_order,
        r_order,
        threshold=1.0,
        dropout=0.1,
        smooth_factor=1.0,
        noise_threshold=0,
        tail_threshold=0.0,
        tf2torch_tensor_name_prefix_torch="predictor",
        tf2torch_tensor_name_prefix_tf="seq2seq/cif",
        tail_mask=True,
    ):
        super().__init__()

        self.pad = torch.nn.ConstantPad1d((l_order, r_order), 0)
        self.cif_conv1d = torch.nn.Conv1d(idim, idim, l_order + r_order + 1)
        self.cif_output = torch.nn.Linear(idim, 1)
        self.dropout = torch.nn.Dropout(p=dropout)
        self.threshold = threshold
        self.smooth_factor = smooth_factor
        self.noise_threshold = noise_threshold
        self.tail_threshold = tail_threshold
        self.tf2torch_tensor_name_prefix_torch = tf2torch_tensor_name_prefix_torch
        self.tf2torch_tensor_name_prefix_tf = tf2torch_tensor_name_prefix_tf
        self.tail_mask = tail_mask

    def forward(
        self,
        hidden,
        target_label=None,
        mask=None,
        ignore_id=-1,
        mask_chunk_predictor=None,
        target_label_length=None,
    ):
        h = hidden
        context = h.transpose(1, 2)
        queries = self.pad(context)
        output = torch.relu(self.cif_conv1d(queries))
        output = output.transpose(1, 2)

        output = self.cif_output(output)
        alphas = torch.sigmoid(output)
        alphas = torch.nn.functional.relu(
            alphas * self.smooth_factor - self.noise_threshold
        )
        if mask is not None:
            mask = mask.transpose(-1, -2).float()
            alphas = alphas * mask
        if mask_chunk_predictor is not None:
            alphas = alphas * mask_chunk_predictor

        alphas = alphas.squeeze(-1)
        if mask is not None:
            mask = mask.squeeze(-1)

        if target_label_length is not None:
            target_length = target_label_length.squeeze(-1)
        elif target_label is not None:
            target_length = (target_label != ignore_id).float().sum(-1)
        else:
            target_length = None
        token_num = alphas.sum(-1)
        if target_length is not None:
            alphas *= (target_length / token_num)[:, None].repeat(1, alphas.size(1))
        elif self.tail_threshold > 0.0:
            if self.tail_mask:
                hidden, alphas, token_num = self.tail_process_fn(
                    hidden, alphas, token_num, mask=mask
                )
            else:
                hidden, alphas, token_num = self.tail_process_fn(
                    hidden, alphas, token_num, mask=None
                )

        acoustic_embeds, cif_peak = cif_v1(hidden, alphas, self.threshold)
        if target_length is None and self.tail_threshold > 0.0:
            token_num_int = torch.max(token_num).type(torch.int32).item()
            acoustic_embeds = acoustic_embeds[:, :token_num_int, :]

        return acoustic_embeds, token_num, alphas, cif_peak

    def tail_process_fn(self, hidden, alphas, token_num=None, mask=None):
        b, t, d = hidden.size()
        tail_threshold = self.tail_threshold
        if mask is not None:
            zeros_t = torch.zeros((b, 1), dtype=torch.float32, device=alphas.device)
            ones_t = torch.ones_like(zeros_t)
            mask_1 = torch.cat([mask, zeros_t], dim=1)
            mask_2 = torch.cat([ones_t, mask], dim=1)
            mask = mask_2 - mask_1
            tail_threshold = mask * tail_threshold
            alphas = torch.cat([alphas, zeros_t], dim=1)
            alphas = torch.add(alphas, tail_threshold)
        else:
            tail_threshold = torch.tensor([tail_threshold], dtype=alphas.dtype).to(
                alphas.device
            )
            tail_threshold = torch.reshape(tail_threshold, (1, 1))
            if b > 1:
                alphas = torch.cat([alphas, tail_threshold.repeat(b, 1)], dim=1)
            else:
                alphas = torch.cat([alphas, tail_threshold], dim=1)
        zeros = torch.zeros((b, 1, d), dtype=hidden.dtype).to(hidden.device)
        hidden = torch.cat([hidden, zeros], dim=1)
        token_num = alphas.sum(dim=-1)
        token_num_floor = torch.floor(token_num)

        return hidden, alphas, token_num_floor


class Paraformer(torch.nn.Module):
    """
    Author: Speech Lab of DAMO Academy, Alibaba Group
    Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition
    https://arxiv.org/abs/2206.08317
    """

    def __init__(
        self,
        neg_mean: torch.Tensor,
        inv_stddev: torch.Tensor,
        input_size: int,
        vocab_size: int,
        ignore_id=-1,
        encoder_conf: Optional[Dict] = None,
        decoder_conf: Optional[Dict] = None,
        predictor_conf: Optional[Dict] = None,
    ):
        super().__init__()

        self.ignore_id = ignore_id
        self.encoder = SANMEncoder(
            neg_mean, inv_stddev, input_size=input_size, **encoder_conf
        )
        encoder_output_size = self.encoder.output_size()

        self.decoder = ParaformerSANMDecoder(
            vocab_size=vocab_size,
            encoder_output_size=encoder_output_size,
            **decoder_conf,
        )
        self.predictor = CifPredictorV2(**predictor_conf)

    def forward(self, x):
        """
        Args:
          x: (N, T, C)
        """
        encoder_out = self.encoder(x)

        encoder_out_mask = None

        pre_acoustic_embeds, pre_token_length, alphas, pre_peak_index = self.predictor(
            encoder_out, None, encoder_out_mask, ignore_id=self.ignore_id
        )
        # pre_acoustic_embeds: (N, num_tokens, C)
        # pre_token_length: [num_tokens,]
        # alphas: (N, T)
        # pre_peak_index: (N, T)

        pre_token_length = pre_token_length.round().long()
        if torch.max(pre_token_length) < 1:
            return []

        decoder_outs = self.decoder(encoder_out, pre_acoustic_embeds)
        # decoder_outs: (N, num_tokens, vocab_size)
        return decoder_outs, pre_token_length


@torch.no_grad()
def test():
    import yaml

    with open("./config.yaml", "r", encoding="utf-8") as f:
        config = yaml.safe_load(f)
    print(config["encoder_conf"])

    neg_mean = torch.rand(560)
    inv_stddev = torch.rand(560)

    m = Paraformer(
        neg_mean=neg_mean,
        inv_stddev=inv_stddev,
        input_size=560,
        vocab_size=8404,
        encoder_conf=config["encoder_conf"],
        decoder_conf=config["decoder_conf"],
        predictor_conf=config["predictor_conf"],
    )
    m.eval()
    print(m.decoder)

    state_dict = torch.load("./model_state_dict.pt", map_location="cpu")["state_dict"]
    m.load_state_dict(state_dict)
    del state_dict
    print(m)


if __name__ == "__main__":
    test()


================================================
FILE: scripts/peng-cheng-starling/.gitignore
================================================
bpe.model
*.wav
*.onnx


================================================
FILE: scripts/peng-cheng-starling/README.md
================================================
# Introduction

This folder contains scripts for files from
https://github.com/yangb05/PengChengStarling


================================================
FILE: scripts/peng-cheng-starling/quantize_models.py
================================================
#!/usr/bin/env python3
from onnxruntime.quantization import QuantType, quantize_dynamic
from pathlib import Path


def main():
    suffix = "epoch-75-avg-11-chunk-16-left-128"

    for m in ["encoder", "joiner"]:
        if Path(f"{m}-{suffix}.int8.onnx").is_file():
            continue

        quantize_dynamic(
            model_input=f"./{m}-{suffix}.onnx",
            model_output=f"./{m}-{suffix}.int8.onnx",
            op_types_to_quantize=["MatMul"],
            weight_type=QuantType.QInt8,
        )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/piper/.gitignore
================================================
*.sh
*.onnx
*.json
MODEL_CARD
generate_samples-vits-piper*.py


================================================
FILE: scripts/piper/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
import json
from typing import Any, Dict

import onnx
from iso639 import Lang


def get_args():
    # For en_GB-semaine-medium
    # --name semaine
    # --kind medium
    # --lang en_GB
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
        "--name",
        type=str,
        required=True,
    )

    parser.add_argument(
        "--kind",
        type=str,
        required=True,
    )

    parser.add_argument(
        "--lang",
        type=str,
        required=True,
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def load_config(filename):
    with open(filename, "r") as file:
        config = json.load(file)
    return config


def generate_tokens(config):
    id_map = config["phoneme_id_map"]
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for s, i in id_map.items():
            if s == "\n":
                continue
            if isinstance(i, list):
                i = i[0]
            print(f"{s} {i}")
            f.write(f"{s} {i}\n")
    print("Generated tokens.txt")


# for en_US-lessac-medium.onnx
# export LANG=en_US
# export TYPE=lessac
# export NAME=medium
def main():
    args = get_args()
    print(args)
    lang = args.lang

    lang_iso = Lang(lang.split("_")[0])
    print(lang, lang_iso)

    kind = args.kind

    name = args.name

    # en_GB-alan-low.onnx.json
    config = load_config(f"{lang}-{name}-{kind}.onnx.json")

    print("generate tokens")
    generate_tokens(config)

    sample_rate = config["audio"]["sample_rate"]
    if sample_rate == 22500:
        print("Change sample rate from 22500 to 22050")
        sample_rate = 22050

    if "lang_code" in config:
        voice = config["lang_code"]
    else:
        voice = config["espeak"]["voice"]

    print("add model metadata")
    meta_data = {
        "model_type": "vits",
        "comment": "piper",  # must be piper for models from piper
        "language": lang_iso.name,
        "voice": voice,  # e.g., en-us
        "has_espeak": 1,
        "n_speakers": config["num_speakers"],
        "sample_rate": sample_rate,
    }
    print(meta_data)
    add_meta_data(f"{lang}-{name}-{kind}.onnx", meta_data)


main()


================================================
FILE: scripts/piper/dynamic_quantization.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse

import onnxmltools
from onnxmltools.utils.float16_converter import convert_float_to_float16
from onnxruntime.quantization import QuantType, quantize_dynamic


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--input",
        type=str,
        required=True,
    )
    parser.add_argument(
        "--output-fp16",
        type=str,
        required=True,
    )

    parser.add_argument(
        "--output-int8",
        type=str,
        required=True,
    )
    return parser.parse_args()


# for op_block_list, see also
# https://github.com/microsoft/onnxruntime/blob/089c52e4522491312e6839af146a276f2351972e/onnxruntime/python/tools/transformers/float16.py#L115
#
# libc++abi: terminating with uncaught exception of type Ort::Exception:
# Type Error: Type (tensor(float16)) of output arg (/dp/RandomNormalLike_output_0)
# of node (/dp/RandomNormalLike) does not match expected type (tensor(float)).
#
# libc++abi: terminating with uncaught exception of type Ort::Exception:
# This is an invalid model. Type Error: Type 'tensor(float16)' of input
# parameter (/enc_p/encoder/attn_layers.0/Constant_84_output_0) of
# operator (Range) in node (/Range_1) is invalid.
def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path):
    onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path)
    onnx_fp16_model = convert_float_to_float16(
        onnx_fp32_model,
        keep_io_types=True,
        op_block_list=[
            "RandomNormalLike",
            "Range",
        ],
    )
    onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path)


def main():
    args = get_args()
    print(args)

    in_filename = args.input
    output_fp16 = args.output_fp16
    output_int8 = args.output_int8

    quantize_dynamic(
        model_input=in_filename,
        model_output=output_int8,
        weight_type=QuantType.QUInt8,
    )

    export_onnx_fp16(in_filename, output_fp16)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/piper/generate.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from dataclasses import dataclass
from pathlib import Path

import jinja2

"""
TODO:
 - add https://huggingface.co/csukuangfj/vits-piper-en_US-glados
"""


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class PiperModel:
    # For en_GB-semaine-medium
    name: str  # semaine
    kind: str  # e.g. medium
    sr: int  # sample rate
    ns: int  # number of speakers
    lang: str = ""  # e.g., en_GB
    cmd: str = ""
    model_name: str = ""
    text: str = ""
    index: int = 0
    url: str = ""


# arabic
def get_ar_models():
    ar_jo = [
        PiperModel(name="kareem", kind="low", sr=16000, ns=1),
        PiperModel(name="kareem", kind="medium", sr=22050, ns=1),
    ]
    ar_jo += [
        PiperModel(
            name="SA_miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak/blob/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak/resolve/main/miro_ar-SA.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak/resolve/main/miro_ar-SA.onnx.json
                   mv miro_ar-SA.onnx ar_JO-SA_miro-high.onnx
                   mv miro_ar-SA.onnx.json ar_JO-SA_miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak",
        ),
        PiperModel(
            name="SA_dii",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_dii_espeak/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_dii_espeak" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_dii_espeak/resolve/main/dii_ar-SA.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_dii_espeak/resolve/main/dii_ar-SA.onnx.json
                   mv dii_ar-SA.onnx ar_JO-SA_dii-high.onnx
                   mv dii_ar-SA.onnx.json ar_JO-SA_dii-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_dii_espeak",
        ),
        PiperModel(
            name="SA_miro_V2",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak_V2/blob/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak_V2" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak_V2/resolve/main/miro_ar-SA.onnx.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak_V2/resolve/main/miro_ar-SA.onnx.json
                   mv miro_ar-SA.onnx.onnx ar_JO-SA_miro_V2-high.onnx
                   mv miro_ar-SA.onnx.json ar_JO-SA_miro_V2-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/phoonnx_ar-SA_miro_espeak_V2",
        ),
    ]

    for m in ar_jo:
        m.lang = "ar_JO"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = ar_jo

    for m in ans:
        m.text = "كيف حالك اليوم؟"
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# catlan
def get_ca_models():
    ca_es = [
        PiperModel(name="upc_ona", kind="medium", sr=22050, ns=1),
        PiperModel(name="upc_ona", kind="x_low", sr=16000, ns=1),
        PiperModel(name="upc_pau", kind="x_low", sr=16000, ns=1),
    ]

    for m in ca_es:
        m.lang = "ca_ES"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = ca_es

    for m in ans:
        m.text = "Si vols estar ben servit, fes-te tu mateix el llit"
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# czech
def get_cs_models():
    cs_cz = [
        PiperModel(name="jirka", kind="low", sr=16000, ns=1),
        PiperModel(name="jirka", kind="medium", sr=22050, ns=1),
    ]

    for m in cs_cz:
        m.lang = "cs_CZ"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = cs_cz

    for m in ans:
        m.text = "Co můžeš udělat dnes, neodkládej na zítřek. "
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# welsh
def get_cy_models():
    cy_gb = [
        PiperModel(name="bu_tts", kind="medium", sr=22050, ns=7),
        PiperModel(name="gwryw_gogleddol", kind="medium", sr=22050, ns=1),
    ]

    for m in cy_gb:
        m.lang = "cy_GB"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = cy_gb

    for m in ans:
        m.text = "Ni all y gwynt ei hunan ei ddilyn, ac felly mae’n rhaid i’r gŵyr ddod i’r gorwel i weld y llwybr yn gyfarwydd"
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# danish
def get_da_models():
    da_dk = [
        PiperModel(name="talesyntese", kind="medium", sr=22050, ns=1),
    ]

    for m in da_dk:
        m.lang = "da_DK"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = da_dk

    for m in ans:
        m.text = (
            "Hvis du går langsomt, men aldrig stopper, når du ender frem til dit mål."
        )
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# greek
def get_el_models():
    el_gr = [
        PiperModel(name="rapunzelina", kind="low", sr=16000, ns=1),
    ]

    for m in el_gr:
        m.lang = "el_GR"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = el_gr

    for m in ans:
        m.text = (
            "Όταν το δέντρο είναι μικρό, το στρέβλεις· όταν είναι μεγάλο, το λυγίζεις."
        )
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# spanish
def get_es_models():
    es_ES = [
        PiperModel(name="carlfm", kind="x_low", sr=16000, ns=1),
        PiperModel(name="davefx", kind="medium", sr=22050, ns=1),
        PiperModel(name="sharvard", kind="medium", sr=22050, ns=2),
    ]

    es_ES.extend(
        [
            # https://github.com/rhasspy/piper/issues/187#issuecomment-1802216304
            # https://drive.google.com/file/d/12tNCCyd0Hf5jsyqCw8828kLSHHx5LOw9/view
            PiperModel(
                name="glados",
                kind="medium",
                sr=22050,
                ns=1,
                cmd="""
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-es_ES-glados-medium/resolve/main/es_ES-glados-medium.onnx
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-es_ES-glados-medium/resolve/main/es_ES-glados-medium.onnx.json
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-es_ES-glados-medium/resolve/main/README.md
                   """,
                url="https://github.com/rhasspy/piper/issues/187#issuecomment-1802216304",
            ),
        ]
    )

    es_ES.extend(
        [
            PiperModel(
                name="miro",
                kind="high",
                sr=22050,
                ns=1,
                cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_es-ES_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_es-ES_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_es-ES_miro/resolve/main/miro_es-ES.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_es-ES_miro/resolve/main/miro_es-ES.onnx.json

                   mv miro_es-ES.onnx es_ES-miro-high.onnx
                   mv miro_es-ES.onnx.json es_ES-miro-high.onnx.json
                   """,
                url="https://huggingface.co/OpenVoiceOS/pipertts_es-ES_miro",
            ),
        ]
    )

    es_MX = [
        PiperModel(name="ald", kind="medium", sr=22050, ns=1),
        PiperModel(name="claude", kind="high", sr=22050, ns=1),
    ]

    # Argentina
    es_AR = [
        PiperModel(name="daniela", kind="high", sr=22050, ns=1),
    ]

    for m in es_ES:
        m.lang = "es_ES"

    for m in es_MX:
        m.lang = "es_MX"

    for m in es_AR:
        m.lang = "es_AR"

    ans = es_ES + es_MX + es_AR

    for m in ans:
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        m.text = "Cuando te encuentres ante una puerta cerrada, no olvides que a veces el destino cierra una puerta para que te desvíes hacia un camino que lleva a una ventana que nunca habrías encontrado por tu cuenta."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# persian
def get_fa_models():
    fa_IR = [
        PiperModel(name="amir", kind="medium", sr=22050, ns=1),
        PiperModel(name="ganji", kind="medium", sr=22050, ns=1),
        PiperModel(name="ganji_adabi", kind="medium", sr=22050, ns=1),
        PiperModel(name="gyro", kind="medium", sr=22050, ns=1),
        PiperModel(name="reza_ibrahim", kind="medium", sr=22050, ns=1),
    ]

    for m in fa_IR:
        m.lang = "fa_IR"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = fa_IR

    for m in ans:
        m.text = "همانطور که کوه ها در برابر باد و باران پایدارند، اما به مرور زمان خرد و پخش می شوند، انسان نیز باید در برابر مشکلات قوی باشد، اما با خرد و خویشتن داری در زندگی به پیش برود."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# finnish
def get_fi_models():
    fi_FI = [
        PiperModel(name="harri", kind="low", sr=16000, ns=1),
        PiperModel(name="harri", kind="medium", sr=22050, ns=1),
    ]

    for m in fi_FI:
        m.lang = "fi_FI"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = fi_FI

    for m in ans:
        m.text = "Sateenkaaren päässä on kultaa, mutta vain ne, jotka siihen uskovat, voivat sen löytää."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# french
def get_fr_models():
    fr_FR = [
        PiperModel(name="gilles", kind="low", sr=16000, ns=1),
        PiperModel(name="siwis", kind="low", sr=16000, ns=1),
        PiperModel(name="siwis", kind="medium", sr=22050, ns=1),
        PiperModel(name="tom", kind="medium", sr=44100, ns=1),
        PiperModel(name="upmc", kind="medium", sr=22050, ns=2),
    ]

    fr_FR.extend(
        [
            PiperModel(
                name="tjiho",
                kind=f"model{k}",
                sr=44100,
                ns=1,
                cmd=f"""
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-fr_FR-tjiho-model{k}/resolve/main/fr_FR-tjiho-model{k}.onnx
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-fr_FR-tjiho-model{k}/resolve/main/fr_FR-tjiho-model{k}.onnx.json
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-fr_FR-tjiho-model{k}/resolve/main/LICENSE.txt
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-fr_FR-tjiho-model{k}/resolve/main/MODEL_CARD
                   """,
                url=f"https://huggingface.co/csukuangfj/vits-piper-fr_FR-tjiho-model{k}/tree/main",
            )
            for k in [1, 2, 3]
        ]
    )

    fr_FR += [
        PiperModel(
            name="miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_fr-FR_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_fr-FR_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_fr-FR_miro/resolve/main/miro_fr-FR.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_fr-FR_miro/resolve/main/miro_fr-FR.onnx.json

                   mv miro_fr-FR.onnx fr_FR-miro-high.onnx
                   mv miro_fr-FR.onnx.json fr_FR-miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_fr-FR_miro",
        ),
    ]

    for m in fr_FR:
        m.lang = "fr_FR"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = fr_FR

    for m in ans:
        m.text = "Pas de nouvelles, bonnes nouvelles."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# hindi
def get_hi_models():
    hi_IN = [
        PiperModel(name="pratham", kind="medium", sr=22050, ns=1),
        PiperModel(name="priyamvada", kind="medium", sr=22050, ns=1),
        PiperModel(name="rohan", kind="medium", sr=22050, ns=1),
    ]

    for m in hi_IN:
        m.lang = "hi_IN"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = hi_IN

    for m in ans:
        m.text = "यह मत पूछो कि तुम्हारा देश तुम्हारे लिए क्या कर सकता है। यह पूछो कि तुम अपने देश के लिए क्या कर सकते हो।"
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# hungarian
def get_hu_models():
    hu_HU = [
        PiperModel(name="anna", kind="medium", sr=22050, ns=1),
        PiperModel(name="berta", kind="medium", sr=22050, ns=1),
        PiperModel(name="imre", kind="medium", sr=22050, ns=1),
    ]

    for m in hu_HU:
        m.lang = "hu_HU"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = hu_HU

    for m in ans:
        m.text = "Ha északról fúj a szél, a lányok nem lógnak együtt."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# icelandic
def get_is_models():
    is_IS = [
        PiperModel(name="bui", kind="medium", sr=22050, ns=1),
        PiperModel(name="salka", kind="medium", sr=22050, ns=1),
        PiperModel(name="steinn", kind="medium", sr=22050, ns=1),
        PiperModel(name="ugla", kind="medium", sr=22050, ns=1),
    ]

    for m in is_IS:
        m.lang = "is_IS"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = is_IS

    for m in ans:
        m.text = "Farðu með allt, eða farðu ekki."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# italian
def get_it_models():
    it_IT = [
        PiperModel(name="paola", kind="medium", sr=22050, ns=1),
        PiperModel(name="riccardo", kind="x_low", sr=16000, ns=1),
    ]

    it_IT += [
        PiperModel(
            name="miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_it-IT_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_it-IT_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_it-IT_miro/resolve/main/miro_it-IT.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_it-IT_miro/resolve/main/miro_it-IT.onnx.json

                   mv miro_it-IT.onnx it_IT-miro-high.onnx
                   mv miro_it-IT.onnx.json it_IT-miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_it-IT_miro",
        ),
        PiperModel(
            name="dii",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_it-IT_dii/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_it-IT_dii" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_it-IT_dii/resolve/main/dii_it-IT.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_it-IT_dii/resolve/main/dii_it-IT.onnx.json

                   mv dii_it-IT.onnx it_IT-dii-high.onnx
                   mv dii_it-IT.onnx.json it_IT-dii-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_it-IT_dii",
        ),
    ]

    for m in it_IT:
        m.lang = "it_IT"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = it_IT

    for m in ans:
        m.text = (
            "Se vuoi andare veloce, vai da solo; se vuoi andare lontano, vai insieme."
        )
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# georgian
def get_ka_models():
    ka_GE = [
        PiperModel(name="natia", kind="medium", sr=22050, ns=1),
    ]

    for m in ka_GE:
        m.lang = "ka_GE"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = ka_GE

    for m in ans:
        m.text = "ღვინო თბილისში, საქართველო სამტრედში"
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# kazakh
def get_kk_models():
    kk_KZ = [
        PiperModel(name="iseke", kind="x_low", sr=16000, ns=1),
        PiperModel(name="issai", kind="high", sr=22050, ns=6),
        PiperModel(name="raya", kind="x_low", sr=16000, ns=1),
    ]

    for m in kk_KZ:
        m.lang = "kk_KZ"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = kk_KZ

    for m in ans:
        m.text = "Әлемнің жұлдыздары сенің көзің, жаным."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# luxembourgish
def get_lb_models():
    lb_LU = [
        PiperModel(name="marylux", kind="medium", sr=22050, ns=1),
    ]

    for m in lb_LU:
        m.lang = "lb_LU"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = lb_LU

    for m in ans:
        m.text = "Op der Haaptstrooss sinn all Stroossen Brécken, awer d'Dier kann iwwerall erreecht ginn."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# latvian
def get_lv_models():
    lv_LV = [
        PiperModel(name="aivars", kind="medium", sr=22050, ns=1),
    ]

    for m in lv_LV:
        m.lang = "lv_LV"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = lv_LV

    for m in ans:
        m.text = "Zeme nenes augļus, ja tēvs sēj, bet māte auž."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# malayalam
def get_ml_models():
    ml_IN = [
        PiperModel(name="arjun", kind="medium", sr=22050, ns=1),
        PiperModel(name="meera", kind="medium", sr=22050, ns=1),
    ]

    for m in ml_IN:
        m.lang = "ml_IN"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = ml_IN

    for m in ans:
        m.text = "മണ്ണ് മരിക്കുമ്പോൾ കാട്ടിലെ വെള്ളവും മരിക്കുന്നു."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Nepali
def get_ne_models():
    ne_NP = [
        PiperModel(name="chitwan", kind="medium", sr=22050, ns=1),
        PiperModel(name="google", kind="medium", sr=22050, ns=18),
        PiperModel(name="google", kind="x_low", sr=16000, ns=18),
    ]

    for m in ne_NP:
        m.lang = "ne_NP"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = ne_NP

    for m in ans:
        m.text = "घाँसको पातले पहाडलाई अभिवादन गर्दै झुक्छ।"
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# dutch
def get_nl_models():
    nl_BE = [
        PiperModel(name="nathalie", kind="medium", sr=22050, ns=1),
        PiperModel(name="nathalie", kind="x_low", sr=16000, ns=1),
    ]

    nl_NL = [
        PiperModel(name="pim", kind="medium", sr=22050, ns=1),
        PiperModel(name="ronnie", kind="medium", sr=22050, ns=1),
    ]

    nl_NL += [
        PiperModel(
            name="miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_miro/resolve/main/miro_nl-NL.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_miro/resolve/main/miro_nl-NL.onnx.json

                   mv miro_nl-NL.onnx nl_NL-miro-high.onnx
                   mv miro_nl-NL.onnx.json nl_NL-miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_miro",
        ),
        PiperModel(
            name="dii",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_dii/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_dii" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_dii/resolve/main/dii_nl-NL.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_dii/resolve/main/dii_nl-NL.onnx.json

                   mv dii_nl-NL.onnx nl_NL-dii-high.onnx
                   mv dii_nl-NL.onnx.json nl_NL-dii-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_nl-NL_dii",
        ),
    ]

    for m in nl_BE:
        m.lang = "nl_BE"

    for m in nl_NL:
        m.lang = "nl_NL"

    ans = nl_BE + nl_NL

    for m in ans:
        m.text = "God schiep het water, maar de Nederlander schiep de dijk"

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# norwegian
def get_no_models():
    no_NO = [
        PiperModel(name="talesyntese", kind="medium", sr=22050, ns=1),
    ]

    for m in no_NO:
        m.lang = "no_NO"

    ans = no_NO

    for m in ans:
        m.text = "Uskyldig kan stormen veroorzaken"

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# polish
def get_pl_models():
    pl_PL = [
        PiperModel(name="darkman", kind="medium", sr=22050, ns=1),
        PiperModel(name="gosia", kind="medium", sr=22050, ns=1),
        PiperModel(name="mc_speech", kind="medium", sr=22050, ns=1),
    ]

    pl_PL.extend(
        [
            PiperModel(
                name="jarvis_wg_glos",
                kind="medium",
                sr=22050,
                ns=1,
                cmd="""
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/pl_PL-jarvis_wg_glos-medium.onnx
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/pl_PL-jarvis_wg_glos-medium.onnx.json
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/README.md
                   """,
                url="https://github.com/k2-fsa/sherpa-onnx/issues/2402",
            ),
            PiperModel(
                name="justyna_wg_glos",
                kind="medium",
                sr=22050,
                ns=1,
                cmd="""
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/pl_PL-justyna_wg_glos-medium.onnx
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/pl_PL-justyna_wg_glos-medium.onnx.json
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/README.md
                   """,
                url="https://github.com/k2-fsa/sherpa-onnx/issues/2402",
            ),
            PiperModel(
                name="meski_wg_glos",
                kind="medium",
                sr=22050,
                ns=1,
                cmd="""
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/pl_PL-meski_wg_glos-medium.onnx
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/pl_PL-meski_wg_glos-medium.onnx.json
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/README.md
                   """,
                url="https://github.com/k2-fsa/sherpa-onnx/issues/2402",
            ),
            PiperModel(
                name="zenski_wg_glos",
                kind="medium",
                sr=22050,
                ns=1,
                cmd="""
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/pl_PL-zenski_wg_glos-medium.onnx
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/pl_PL-zenski_wg_glos-medium.onnx.json
                   wget -qq https://huggingface.co/WitoldG/polish_piper_models/resolve/main/README.md
                   """,
                url="https://github.com/k2-fsa/sherpa-onnx/issues/2402",
            ),
        ]
    )

    for m in pl_PL:
        m.lang = "pl_PL"

    ans = pl_PL

    for m in ans:
        m.text = "Nieważne, za kogo walczysz, i tak popełnisz błąd"

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Portuguese
def get_pt_models():
    pt_BR = [
        PiperModel(name="cadu", kind="medium", sr=22050, ns=1),
        PiperModel(name="edresson", kind="low", sr=16000, ns=1),
        PiperModel(name="faber", kind="medium", sr=22050, ns=1),
        PiperModel(name="jeff", kind="medium", sr=22050, ns=1),
    ]

    pt_PT = [
        PiperModel(
            name="tugao",
            kind="medium",
            sr=22050,
            ns=1,
            cmd="""
                    wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/pt/pt_PT/tugão/medium/pt_PT-tugão-medium.onnx
                    wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/pt/pt_PT/tugão/medium/pt_PT-tugão-medium.onnx.json
                    wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/pt/pt_PT/tugão/medium/MODEL_CARD

                    mv pt_PT-tugão-medium.onnx pt_PT-tugao-medium.onnx
                    mv pt_PT-tugão-medium.onnx.json pt_PT-tugao-medium.onnx.json
                   """,
            url="https://huggingface.co/rhasspy/piper-voices/tree/main/pt/pt_PT/tugão/medium",
        ),
    ]

    pt_PT += [
        PiperModel(
            name="miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_miro/resolve/main/miro_pt-PT.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_miro/resolve/main/miro_pt-PT.onnx.json

                   mv miro_pt-PT.onnx pt_PT-miro-high.onnx
                   mv miro_pt-PT.onnx.json pt_PT-miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_miro",
        ),
        PiperModel(
            name="dii",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_dii/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_dii" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_dii/resolve/main/dii_pt-PT.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_dii/resolve/main/dii_pt-PT.onnx.json

                   mv dii_pt-PT.onnx pt_PT-dii-high.onnx
                   mv dii_pt-PT.onnx.json pt_PT-dii-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_pt-PT_dii",
        ),
    ]

    pt_BR += [
        PiperModel(
            name="miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_miro/resolve/main/miro_pt-BR.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_miro/resolve/main/miro_pt-BR.onnx.json

                   mv miro_pt-BR.onnx pt_BR-miro-high.onnx
                   mv miro_pt-BR.onnx.json pt_BR-miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_miro",
        ),
        PiperModel(
            name="dii",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_dii/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_dii" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_dii/resolve/main/dii_pt-BR.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_dii/resolve/main/dii_pt-BR.onnx.json

                   mv dii_pt-BR.onnx pt_BR-dii-high.onnx
                   mv dii_pt-BR.onnx.json pt_BR-dii-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_pt-BR_dii",
        ),
    ]

    for m in pt_BR:
        m.lang = "pt_BR"

    for m in pt_PT:
        m.lang = "pt_PT"

    ans = pt_BR + pt_PT

    for m in ans:
        m.text = "Marinha sem vento, não chega a porto"

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Romanian
def get_ro_models():
    ro_RO = [
        PiperModel(name="mihai", kind="medium", sr=22050, ns=1),
    ]

    for m in ro_RO:
        m.lang = "ro_RO"

    ans = ro_RO

    for m in ans:
        m.text = "Un foc fără lemne se stinge, o lume fără poveste moare."

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Russian
def get_ru_models():
    ru_RU = [
        PiperModel(name="denis", kind="medium", sr=22050, ns=1),
        PiperModel(name="dmitri", kind="medium", sr=22050, ns=1),
        PiperModel(name="irina", kind="medium", sr=22050, ns=1),
        PiperModel(name="ruslan", kind="medium", sr=22050, ns=1),
    ]

    for m in ru_RU:
        m.lang = "ru_RU"

    ans = ru_RU

    for m in ans:
        m.text = "Если курица укусит, ей отрубят голову."

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Slovak
def get_sk_models():
    sk_SK = [
        PiperModel(name="lili", kind="medium", sr=22050, ns=1),
    ]

    for m in sk_SK:
        m.lang = "sk_SK"

    ans = sk_SK

    for m in ans:
        m.text = "Kto nepozná strach, nepozná vôľu."

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Slovenian
def get_sl_models():
    sl_SI = [
        PiperModel(name="artur", kind="medium", sr=22050, ns=1),
    ]

    for m in sl_SI:
        m.lang = "sl_SI"

    ans = sl_SI

    for m in ans:
        m.text = "Kto sa nebojí, nie je hlúpy."

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Serbian
def get_sr_models():
    sr_RS = [
        PiperModel(name="serbski_institut", kind="medium", sr=22050, ns=2),
    ]

    for m in sr_RS:
        m.lang = "sr_RS"

    ans = sr_RS

    for m in ans:
        m.text = "Круг не може постојати без свог центра, а нација не може постојати без својих хероја."

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Swedish
def get_sv_models():
    sv_SE = [
        PiperModel(name="lisa", kind="medium", sr=22050, ns=1),
        PiperModel(name="nst", kind="medium", sr=22050, ns=1),
    ]

    for m in sv_SE:
        m.lang = "sv_SE"

    ans = sv_SE

    for m in ans:
        m.text = "Liten skog, med många träd"

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Swahili
def get_sw_models():
    sw_CD = [
        PiperModel(name="lanfrica", kind="medium", sr=22050, ns=1),
    ]

    for m in sw_CD:
        m.lang = "sw_CD"

    ans = sw_CD

    for m in ans:
        m.text = "Mtu mmoja hawezi kuiba mazingira."

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Turkish
def get_tr_models():
    tr_TR = [
        PiperModel(name="dfki", kind="medium", sr=22050, ns=1),
        PiperModel(name="fahrettin", kind="medium", sr=22050, ns=1),
        PiperModel(name="fettah", kind="medium", sr=22050, ns=1),
    ]

    for m in tr_TR:
        m.lang = "tr_TR"

    ans = tr_TR

    for m in ans:
        m.text = "Bir evin duvarları, bir adamın sözü, bir kadının gülü kırılmaz"

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Ukrainian
def get_uk_models():
    uk_UA = [
        PiperModel(name="lada", kind="x_low", sr=16000, ns=1),
        PiperModel(name="ukrainian_tts", kind="medium", sr=22050, ns=3),
    ]

    for m in uk_UA:
        m.lang = "uk_UA"

    ans = uk_UA

    for m in ans:
        m.text = "Ви не можете навчити коня, якщо не відвикнете від годівлі."

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Vietnamese
def get_vi_models():
    vi_VN = [
        PiperModel(name="25hours_single", kind="low", sr=16000, ns=1),
        PiperModel(name="vais1000", kind="medium", sr=22050, ns=1),
        PiperModel(name="vivos", kind="x_low", sr=16000, ns=65),
    ]

    for m in vi_VN:
        m.lang = "vi_VN"

    ans = vi_VN

    for m in ans:
        m.text = "Nước cũ đào gỗ mới, sông cũ chảy nước mới"

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


# Indonesian
def get_id_models():
    id_ID = [
        PiperModel(name="news_tts", kind="medium", sr=22050, ns=1),
    ]

    for m in id_ID:
        m.lang = "id_ID"

    ans = id_ID

    for m in ans:
        m.text = "Jangan tanyakan apa yang negara bisa berikan kepadamu, tapi tanyakan apa yang bisa kamu berikan untuk negaramu."

        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


def get_en_models():
    en_gb = [
        PiperModel(name="alan", kind="low", sr=16000, ns=1),
        PiperModel(name="alan", kind="medium", sr=22050, ns=1),
        PiperModel(name="alba", kind="medium", sr=22050, ns=1),
        PiperModel(name="aru", kind="medium", sr=22050, ns=12),
        PiperModel(name="cori", kind="high", sr=22050, ns=1),
        PiperModel(name="cori", kind="medium", sr=22050, ns=1),
        PiperModel(name="jenny_dioco", kind="medium", sr=22050, ns=1),
        PiperModel(name="northern_english_male", kind="medium", sr=22050, ns=1),
        PiperModel(name="semaine", kind="medium", sr=22050, ns=4),
        PiperModel(name="southern_english_female", kind="low", sr=16000, ns=1),
        PiperModel(name="vctk", kind="medium", sr=22050, ns=109),
    ]
    en_us = [
        PiperModel(name="amy", kind="low", sr=16000, ns=1),
        PiperModel(name="amy", kind="medium", sr=22050, ns=1),
        PiperModel(name="arctic", kind="medium", sr=22050, ns=18),
        PiperModel(name="bryce", kind="medium", sr=22050, ns=1),
        PiperModel(name="danny", kind="low", sr=16000, ns=1),
        PiperModel(name="hfc_female", kind="medium", sr=22050, ns=1),
        PiperModel(name="hfc_male", kind="medium", sr=22050, ns=1),
        PiperModel(name="joe", kind="medium", sr=22050, ns=1),
        PiperModel(name="john", kind="medium", sr=22050, ns=1),
        PiperModel(name="kathleen", kind="low", sr=16000, ns=1),
        PiperModel(name="kristin", kind="medium", sr=22050, ns=1),
        PiperModel(name="kusal", kind="medium", sr=22050, ns=1),
        PiperModel(name="l2arctic", kind="medium", sr=22050, ns=24),
        PiperModel(name="lessac", kind="high", sr=22050, ns=1),
        PiperModel(name="lessac", kind="low", sr=16000, ns=1),
        PiperModel(name="lessac", kind="medium", sr=22050, ns=1),
        PiperModel(name="libritts", kind="high", sr=22050, ns=904),
        PiperModel(name="libritts_r", kind="medium", sr=22050, ns=904),
        PiperModel(name="ljspeech", kind="high", sr=22050, ns=1),
        PiperModel(name="ljspeech", kind="medium", sr=22050, ns=1),
        PiperModel(name="norman", kind="medium", sr=22050, ns=1),
        PiperModel(name="reza_ibrahim", kind="medium", sr=22050, ns=1),
        PiperModel(name="ryan", kind="high", sr=22050, ns=1),
        PiperModel(name="ryan", kind="low", sr=16000, ns=1),
        PiperModel(name="ryan", kind="medium", sr=22050, ns=1),
        PiperModel(name="sam", kind="medium", sr=22050, ns=1),
    ]

    en_gb.extend(
        [
            PiperModel(
                name="southern_english_female",
                kind="medium",
                sr=22050,
                ns=6,
                cmd="""
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-en_GB-southern_english_female-medium/resolve/main/en_GB-southern_english_female-medium.onnx
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-en_GB-southern_english_female-medium/resolve/main/en_GB-southern_english_female-medium.onnx.json
                   """,
                url="https://huggingface.co/csukuangfj/vits-piper-en_GB-southern_english_female-medium",
            ),
            PiperModel(
                name="southern_english_male",
                kind="medium",
                sr=22050,
                ns=8,
                cmd="""
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-en_GB-southern_english_male-medium/resolve/main/en_GB-southern_english_male-medium.onnx
                   wget -qq https://huggingface.co/csukuangfj/vits-piper-en_GB-southern_english_male-medium/resolve/main/en_GB-southern_english_male-medium.onnx.json
                   """,
                url="https://huggingface.co/csukuangfj/vits-piper-en_GB-southern_english_male-medium",
            ),
        ]
    )

    en_gb += [
        PiperModel(
            name="miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-GB_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_en-GB_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-GB_miro/resolve/main/miro_en-GB.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-GB_miro/resolve/main/miro_en-GB.onnx.json

                   mv miro_en-GB.onnx en_GB-miro-high.onnx
                   mv miro_en-GB.onnx.json en_GB-miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_en-GB_miro",
        ),
        PiperModel(
            name="dii",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-GB_dii/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_en-GB_dii" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-GB_dii/resolve/main/dii_en-GB.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-GB_dii/resolve/main/dii_en-GB.onnx.json

                   mv dii_en-GB.onnx en_GB-dii-high.onnx
                   mv dii_en-GB.onnx.json en_GB-dii-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_en-GB_dii",
        ),
    ]

    en_us.extend(
        [
            # https://github.com/rhasspy/piper/issues/187#issuecomment-1805709037
            # https://drive.google.com/file/d/1t2D7zP-e2flduS5duHm__UMB9RjuGqWK/view
            PiperModel(
                name="glados",
                kind="high",
                sr=22050,
                ns=1,
                cmd="""
                   wget -qq https://huggingface.co/csukuangfj/en_US-glados-high/resolve/main/en_US-glados-high.onnx
                   wget -qq https://huggingface.co/csukuangfj/en_US-glados-high/resolve/main/en_US-glados-high.onnx.json
                   wget -qq https://huggingface.co/csukuangfj/en_US-glados-high/resolve/main/README.md
                   wget -qq https://huggingface.co/csukuangfj/en_US-glados-high/resolve/main/MODEL_CARD
                   """,
                url="https://github.com/rhasspy/piper/issues/187#issuecomment-1805709037",
            ),
        ]
    )

    en_us += [
        PiperModel(
            name="miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-US_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_en-US_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-US_miro/resolve/main/miro_en-US.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_en-US_miro/resolve/main/miro_en-US.onnx.json

                   mv miro_en-US.onnx en_US-miro-high.onnx
                   mv miro_en-US.onnx.json en_US-miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_en-US_miro",
        ),
    ]

    for m in en_gb:
        m.lang = "en_GB"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    for m in en_us:
        m.lang = "en_US"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = en_gb + en_us

    for m in ans:
        m.text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """
        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


def get_de_models():
    de_de = [
        PiperModel(name="eva_k", kind="x_low", sr=16000, ns=1),
        PiperModel(name="karlsson", kind="low", sr=16000, ns=1),
        PiperModel(name="kerstin", kind="low", sr=16000, ns=1),
        PiperModel(name="pavoque", kind="low", sr=16000, ns=1),
        PiperModel(name="ramona", kind="low", sr=16000, ns=1),
        PiperModel(name="thorsten", kind="high", sr=22050, ns=1),
        PiperModel(name="thorsten", kind="low", sr=16000, ns=1),
        PiperModel(name="thorsten", kind="medium", sr=22050, ns=1),
        PiperModel(name="thorsten_emotional", kind="medium", sr=22050, ns=8),
        # https://github.com/rhasspy/piper/issues/187#issuecomment-2691653607
        PiperModel(
            name="glados",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/high/de_DE-glados-high.onnx
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/high/de_DE-glados-high.onnx.json
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/high/MODEL_CARD
               wget -qq https://huggingface.co/csukuangfj/vits-piper-de_DE-glados-high/resolve/main/README.md
               """,
            url="https://huggingface.co/systemofapwne/piper-de-glados",
        ),
        PiperModel(
            name="glados",
            kind="low",
            sr=16000,
            ns=1,
            cmd="""
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/low/de_DE-glados-low.onnx
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/low/de_DE-glados-low.onnx.json
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/low/MODEL_CARD
               wget -qq https://huggingface.co/csukuangfj/vits-piper-de_DE-glados-low/resolve/main/README.md
               """,
            url="https://huggingface.co/systemofapwne/piper-de-glados",
        ),
        PiperModel(
            name="glados",
            kind="medium",
            sr=22050,
            ns=1,
            cmd="""
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/medium/de_DE-glados-medium.onnx
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/medium/de_DE-glados-medium.onnx.json
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados/medium/MODEL_CARD
               wget -qq https://huggingface.co/csukuangfj/vits-piper-de_DE-glados-medium/resolve/main/README.md
               """,
            url="https://huggingface.co/systemofapwne/piper-de-glados",
        ),
        PiperModel(
            name="glados_turret",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/high/de_DE-glados-turret-high.onnx
               mv de_DE-glados-turret-high.onnx de_DE-glados_turret-high.onnx
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/high/de_DE-glados-turret-high.onnx.json
               mv de_DE-glados-turret-high.onnx.json de_DE-glados_turret-high.onnx.json
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/high/MODEL_CARD
               wget https://huggingface.co/csukuangfj/vits-piper-de_DE-glados_turret-high/resolve/main/README.md
               """,
            url="https://huggingface.co/systemofapwne/piper-de-glados",
        ),
        PiperModel(
            name="glados_turret",
            kind="low",
            sr=16000,
            ns=1,
            cmd="""
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/low/de_DE-glados-turret-low.onnx
               mv de_DE-glados-turret-low.onnx de_DE-glados_turret-low.onnx
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/low/de_DE-glados-turret-low.onnx.json
               mv de_DE-glados-turret-low.onnx.json de_DE-glados_turret-low.onnx.json
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/low/MODEL_CARD
               wget https://huggingface.co/csukuangfj/vits-piper-de_DE-glados_turret-low/resolve/main/README.md
               """,
            url="https://huggingface.co/systemofapwne/piper-de-glados",
        ),
        PiperModel(
            name="glados_turret",
            kind="medium",
            sr=22050,
            ns=1,
            cmd="""
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/medium/de_DE-glados-turret-medium.onnx
               mv de_DE-glados-turret-medium.onnx de_DE-glados_turret-medium.onnx
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/medium/de_DE-glados-turret-medium.onnx.json
               mv de_DE-glados-turret-medium.onnx.json de_DE-glados_turret-medium.onnx.json
               wget -qq https://huggingface.co/systemofapwne/piper-de-glados/resolve/main/de/de_DE/glados-turret/medium/MODEL_CARD
               wget https://huggingface.co/csukuangfj/vits-piper-de_DE-glados_turret-medium/resolve/main/README.md
               """,
            url="https://huggingface.co/systemofapwne/piper-de-glados",
        ),
    ]

    de_de += [
        PiperModel(
            name="miro",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_de-DE_miro/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_de-DE_miro" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_de-DE_miro/resolve/main/miro_de-DE.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_de-DE_miro/resolve/main/miro_de-DE.onnx.json

                   mv miro_de-DE.onnx de_DE-miro-high.onnx
                   mv miro_de-DE.onnx.json de_DE-miro-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_de-DE_miro",
        ),
        PiperModel(
            name="dii",
            kind="high",
            sr=22050,
            ns=1,
            cmd="""
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_de-DE_dii/resolve/main/README.md

                   echo "\n\nSee https://huggingface.co/OpenVoiceOS/pipertts_de-DE_dii" >> README.md
                   echo "and https://github.com/OHF-Voice/piper1-gpl/discussions/27" >> README.md
                   echo "\n\n# License\n\n" >> README.md

                   echo "See also https://github.com/k2-fsa/sherpa-onnx/pull/2480\n\n" >> README.md
                   echo "This model is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/).\n" >> README.md

                   echo "- ✅ Always free for regular (non-commercial) users  \n" >> README.md
                   echo "- ❌ Commercial use is not allowed at this time  \n" >> README.md
                   echo "- 🔄 The author may relax the restrictions in the future (e.g., allow commercial use), but will not make them stricter  \n\n" >> README.md
                   echo "**Important:** You must include this license when redistributing the model or any derivatives.\n" >> README.md


                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_de-DE_dii/resolve/main/dii_de-DE.onnx
                   wget -qq https://huggingface.co/OpenVoiceOS/pipertts_de-DE_dii/resolve/main/dii_de-DE.onnx.json

                   mv dii_de-DE.onnx de_DE-dii-high.onnx
                   mv dii_de-DE.onnx.json de_DE-dii-high.onnx.json
                   """,
            url="https://huggingface.co/OpenVoiceOS/pipertts_de-DE_dii",
        ),
    ]

    for m in de_de:
        m.lang = "de_DE"
        if m.model_name == "":
            m.model_name = f"{m.lang}-{m.name}-{m.kind}.onnx"

    ans = de_de

    for m in ans:
        m.text = "Alles hat ein Ende, nur die Wurst hat zwei."
        code = m.lang[:2]
        if m.cmd == "":
            m.cmd = f"""
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/{m.model_name}.json
            wget -qq https://huggingface.co/rhasspy/piper-voices/resolve/main/{code}/{m.lang}/{m.name}/{m.kind}/MODEL_CARD
            """

        if m.url == "":
            m.url = f"https://huggingface.co/rhasspy/piper-voices/tree/main/{code}/{m.lang}/{m.name}/{m.kind}"

    return ans


def get_all_models():
    ans = []
    ans += get_ar_models()
    ans += get_ca_models()
    ans += get_cs_models()
    ans += get_cy_models()
    ans += get_da_models()
    ans += get_de_models()
    ans += get_el_models()
    ans += get_en_models()
    ans += get_es_models()
    ans += get_fa_models()
    ans += get_fi_models()
    ans += get_fr_models()
    ans += get_hi_models()
    ans += get_id_models()
    ans += get_hu_models()
    ans += get_is_models()
    ans += get_it_models()
    ans += get_ka_models()
    ans += get_kk_models()
    ans += get_lb_models()
    ans += get_lv_models()
    ans += get_ml_models()
    ans += get_ne_models()
    ans += get_nl_models()
    ans += get_no_models()
    ans += get_pl_models()
    ans += get_pt_models()
    ans += get_ro_models()
    ans += get_ru_models()
    ans += get_sk_models()
    ans += get_sl_models()
    ans += get_sr_models()
    ans += get_sv_models()
    ans += get_sw_models()
    ans += get_tr_models()
    ans += get_uk_models()
    ans += get_vi_models()


    for i, m in enumerate(ans):
        m.index = i

    return ans


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_all_models()

    print(all_model_list)

    num_models = len(all_model_list)
    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]

    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./generate.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        if not Path(f"{filename}.in").is_file():
            print(f"skip {filename}")
            continue

        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)

    print(f"There are {len(all_model_list)} models")
    for m in all_model_list:
        print(m.index, m.model_name)

    if Path("hf").is_dir():
        with open("./generate_samples.py.in") as f:
            s = f.read()
        template = environment.from_string(s)
        for m in all_model_list:
            model_dir = f"vits-piper-{m.lang}-{m.name}-{m.kind}"
            d = {
                "model": f"{model_dir}/{m.model_name}",
                "data_dir": f"{model_dir}/espeak-ng-data",
                "tokens": f"{model_dir}/tokens.txt",
                "text": m.text,
            }
            for i in range(m.ns):
                s = template.render(
                    **d,
                    sid=i,
                    output_filename=f"hf/piper/mp3/{m.lang}/{model_dir}/{i}.mp3",
                )

                with open(f"generate_samples-{model_dir}-{i}.py", "w") as f:
                    print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/piper/generate.sh.in
================================================
#!/usr/bin/env bash
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
#
# Auto generated! Do NOT edit!

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
tar xf espeak-ng-data.tar.bz2
rm espeak-ng-data.tar.bz2

mkdir -p release

{% for model in model_list %}

name={{ model.name }}
kind={{ model.kind }}
lang={{ model.lang }}
model_name={{ model.model_name }}
text="{{ model.text }}"
num_speakers={{ model.ns }}
sample_rate={{ model.sr }}

{{ model.cmd }}

echo "files"

ls -lh
echo "---"

python3 ./add_meta_data.py \
  --name $name \
  --kind $kind \
  --lang $lang

dst=vits-piper-$lang-$name-$kind
dst_int8=vits-piper-$lang-$name-$kind-int8
dst_fp16=vits-piper-$lang-$name-$kind-fp16
mkdir -p $dst

mv -v tokens.txt  $dst/
mv -v MODEL_CARD $dst/ || true
mv -v README $dst/ || true
mv -v README.md $dst/ || true
mv -v LICENSE.txt $dst/ || true
mv -v *.json  $dst/
cp -a ./espeak-ng-data $dst/

cp -a $dst $dst_int8
cp -a $dst $dst_fp16

mv -v *.onnx  $dst/

python3 ./dynamic_quantization.py \
  --input $dst/$model_name \
  --output-int8 $dst_int8/$model_name \
  --output-fp16 $dst_fp16/$model_name >/dev/null 2>&1

echo "---fp32---"
ls -lh $dst

echo "---int8---"
ls -lh $dst_int8

echo "---fp16---"
ls -lh $dst_fp16

tar cjf ${dst}.tar.bz2 $dst
tar cjf ${dst_int8}.tar.bz2 $dst_int8
tar cjf ${dst_fp16}.tar.bz2 $dst_fp16

if [ -d hf ]; then
  mkdir -p hf/piper/mp3/$lang/vits-piper-$lang-$name-$kind
  for i in $(seq $num_speakers); do
    i=$((i-1))
    python3 ./generate_samples-$dst-$i.py
  done
  ls -lh hf/piper/mp3/$lang/vits-piper-$lang-$name-$kind
fi

mv $dst release
mv $dst_int8 release
mv $dst_fp16 release

ls -lh release/*

{% endfor %}


================================================
FILE: scripts/piper/generate_samples.py.in
================================================
import sherpa_onnx
import soundfile as sf

config = sherpa_onnx.OfflineTtsConfig(
    model=sherpa_onnx.OfflineTtsModelConfig(
        vits=sherpa_onnx.OfflineTtsVitsModelConfig(
            model="{{ model }}",
            lexicon="",
            data_dir="{{ data_dir }}",
            tokens="{{ tokens }}",
        ),
        num_threads=1,
    ),
)

if not config.validate():
    raise ValueError("Please check your config")

tts = sherpa_onnx.OfflineTts(config)
audio = tts.generate(text="{{text}}", sid={{sid}}, speed=1.0)

sf.write("{{ output_filename }}", audio.samples, samplerate=audio.sample_rate)


================================================
FILE: scripts/pocket-tts/.gitignore
================================================
*.json
*.model


================================================
FILE: scripts/pocket-tts/README.md
================================================
# Introduction

- [./convert_tokenizer.py](./convert_tokenizer.py) It produces `./token_scores.json`
  and `./vocab.json` from [./tokenizer.model](https://huggingface.co/KevinAHM/pocket-tts-onnx/resolve/main/tokenizer.model)

- [./test_tokenizer.py](./test_tokenizer.py) is used to test the exported `./token_scores.json`
  and `./vocab.json`

In C++, we don't need to use the [sentencepiece](https://github.com/google/sentencepiece) or protobuf for the tokenizer.


================================================
FILE: scripts/pocket-tts/convert_tokenizer.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2026  Xiaomi Corporation

import json

import sentencepiece as spm

sp = spm.SentencePieceProcessor(model_file="tokenizer.model")

token2id = {}
token2score = {}

for i in range(sp.get_piece_size()):
    tok = sp.id_to_piece(i)
    token2id[tok] = i
    token2score[tok] = sp.get_score(i)

with open("vocab.json", "w", encoding="utf-8") as f:
    json.dump(token2id, f, indent=2, ensure_ascii=False)

with open("token_scores.json", "w", encoding="utf-8") as f:
    json.dump(token2score, f, indent=2, ensure_ascii=False)


================================================
FILE: scripts/pocket-tts/test_tokenizer.py
================================================
#!/usr/bin/env python3
#
# Copyright (c)  2026  Xiaomi Corporation

import json

import sentencepiece as spm


class SentencePieceBPETokenizer:
    def __init__(self, vocab_json, token_scores_json):
        with open(vocab_json, encoding="utf-8") as f:
            self.token2id = json.load(f)

        with open(token_scores_json, encoding="utf-8") as f:
            self.token2score = json.load(f)

        self.id2token = {v: k for k, v in self.token2id.items()}

        # index tokens by first char for speed
        self.by_first_char = {}
        for tok in self.token2id:
            if tok:
                self.by_first_char.setdefault(tok[0], []).append(tok)

        # byte fallback <0xNN>
        self.byte_token = {b: f"<0x{b:02X}>" for b in range(256)}

    def encode(self, text, return_type="ids"):
        text = text.replace(" ", "▁")
        if not text.startswith("▁"):
            text = "▁" + text

        n = len(text)
        dp = [-1e30] * (n + 1)
        back = [None] * (n + 1)
        dp[n] = 0.0

        for i in range(n - 1, -1, -1):
            c = text[i]

            for tok in self.by_first_char.get(c, []):
                if text.startswith(tok, i):
                    j = i + len(tok)
                    score = self.token2score[tok] + dp[j]
                    if score > dp[i]:
                        dp[i] = score
                        back[i] = tok

            # byte fallback
            if back[i] is None:
                b = text[i].encode("utf-8")[0]
                tok = self.byte_token[b]
                dp[i] = self.token2score[tok] + dp[i + 1]
                back[i] = tok

        # reconstruct
        tokens = []
        i = 0
        while i < n:
            tok = back[i]
            tokens.append(tok)
            i += len(tok)

        if return_type == "tokens":
            return tokens
        return [self.token2id[t] for t in tokens]


def main():
    tokenizer = SentencePieceBPETokenizer(
        vocab_json="./vocab.json", token_scores_json="./token_scores.json"
    )
    s = "Yesterday, I bought 3 apples, 2 bananas, and a dozen oranges. Wow! That's amazing—did you see it too? I can't believe it's already 10:30 p.m."

    tokens = tokenizer.encode(s, return_type="tokens")
    token_ids = tokenizer.encode(s, return_type="int")
    print(tokens)
    print(token_ids)
    sp = spm.SentencePieceProcessor(model_file="tokenizer.model")
    #  print(help(sp.encode))

    gt_tokens = sp.encode(s, out_type=str)
    gt_token_ids = sp.encode(s, out_type=int)
    print(gt_tokens)
    print(len(tokens), len(gt_tokens))
    a = []
    for k, p in zip(tokens, gt_tokens):
        a.append(k == p)
    print(a)

    print(token_ids)
    print(gt_token_ids)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/pyannote/segmentation/.gitignore
================================================
*.bin
*.onnx


================================================
FILE: scripts/pyannote/segmentation/README.md
================================================
# File description

Please download test wave files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models

## 0-four-speakers-zh.wav

It is recorded by @csukuangfj

## 1-two-speakers-en.wav

This file is from
https://github.com/pengzhendong/pyannote-onnx/blob/master/data/test_16k.wav
and it contains speeches from two speakers.

Note that we have renamed it from `test_16k.wav` to `1-two-speakers-en.wav`


## 2-two-speakers-en.wav
This file is from
https://huggingface.co/spaces/Xenova/whisper-speaker-diarization

Note that the original file is `./fcf059e3-689f-47ec-a000-bdace87f0113.mp4`.
We use the following commands to convert it to `2-two-speakers-en.wav`.

```bash
ffmpeg -i ./fcf059e3-689f-47ec-a000-bdace87f0113.mp4 -ac 1 -ar 16000 ./2-two-speakers-en.wav
```

## 3-two-speakers-en.wav

This file is from
https://aws.amazon.com/blogs/machine-learning/deploy-a-hugging-face-pyannote-speaker-diarization-model-on-amazon-sagemaker-as-an-asynchronous-endpoint/

Note that the original file is `ML16091-Audio.mp3`. We use the following
commands to convert it to `3-two-speakers-en.wav`


```bash
sox ML16091-Audio.mp3 -r 16k 3-two-speakers-en.wav
```


================================================
FILE: scripts/pyannote/segmentation/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import os
from typing import Any, Dict

import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic
from pyannote.audio import Model
from pyannote.audio.core.task import Problem, Resolution


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    # You can download ./pytorch_model.bin from
    # https://hf-mirror.com/csukuangfj/pyannote-models/tree/main/segmentation-3.0
    # or from
    # https://huggingface.co/Revai/reverb-diarization-v1/tree/main
    pt_filename = "./pytorch_model.bin"
    model = Model.from_pretrained(pt_filename)
    model.eval()
    assert model.dimension == 7, model.dimension
    print(model.specifications)

    assert (
        model.specifications.problem == Problem.MONO_LABEL_CLASSIFICATION
    ), model.specifications.problem

    assert (
        model.specifications.resolution == Resolution.FRAME
    ), model.specifications.resolution

    assert model.specifications.duration == 10.0, model.specifications.duration

    assert model.audio.sample_rate == 16000, model.audio.sample_rate

    # (batch, num_channels, num_samples)
    assert list(model.example_input_array.shape) == [
        1,
        1,
        16000 * 10,
    ], model.example_input_array.shape

    example_output = model(model.example_input_array)

    # (batch, num_frames, num_classes)
    assert list(example_output.shape) == [1, 589, 7], example_output.shape

    assert model.receptive_field.step == 0.016875, model.receptive_field.step
    assert model.receptive_field.duration == 0.0619375, model.receptive_field.duration
    assert model.receptive_field.step * 16000 == 270, model.receptive_field.step * 16000
    assert model.receptive_field.duration * 16000 == 991, (
        model.receptive_field.duration * 16000
    )

    opset_version = 13

    filename = "model.onnx"
    torch.onnx.export(
        model,
        model.example_input_array,
        filename,
        opset_version=opset_version,
        input_names=["x"],
        output_names=["y"],
        dynamic_axes={
            "x": {0: "N", 2: "T"},
            "y": {0: "N", 1: "T"},
        },
    )

    sample_rate = model.audio.sample_rate

    window_size = int(model.specifications.duration) * 16000
    receptive_field_size = int(model.receptive_field.duration * 16000)
    receptive_field_shift = int(model.receptive_field.step * 16000)

    is_revai = os.getenv("SHERPA_ONNX_IS_REVAI", "")
    if is_revai == "":
        url_1 = "https://huggingface.co/pyannote/segmentation-3.0"
        url_2 = "https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0"
        license_url = (
            "https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE"
        )
        model_author = "pyannote-audio"
    else:
        url_1 = "https://huggingface.co/Revai/reverb-diarization-v1"
        url_2 = "https://huggingface.co/csukuangfj/sherpa-onnx-reverb-diarization-v1"
        license_url = (
            "https://huggingface.co/Revai/reverb-diarization-v1/blob/main/LICENSE"
        )
        model_author = "Revai"

    meta_data = {
        "num_speakers": len(model.specifications.classes),
        "powerset_max_classes": model.specifications.powerset_max_classes,
        "num_classes": model.dimension,
        "sample_rate": sample_rate,
        "window_size": window_size,
        "receptive_field_size": receptive_field_size,
        "receptive_field_shift": receptive_field_shift,
        "model_type": "pyannote-segmentation-3.0",
        "version": "1",
        "model_author": model_author,
        "maintainer": "k2-fsa",
        "url_1": url_1,
        "url_2": url_2,
        "license": license_url,
    }
    add_meta_data(filename=filename, meta_data=meta_data)

    print("Generate int8 quantization models")

    filename_int8 = "model.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        weight_type=QuantType.QUInt8,
    )

    print(f"Saved to {filename} and {filename_int8}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/pyannote/segmentation/notes.md
================================================

# config.yaml


```yaml
task:
  _target_: pyannote.audio.tasks.SpeakerDiarization
  duration: 10.0
  max_speakers_per_chunk: 3
  max_speakers_per_frame: 2
model:
  _target_: pyannote.audio.models.segmentation.PyanNet
  sample_rate: 16000
  num_channels: 1
  sincnet:
    stride: 10
  lstm:
    hidden_size: 128
    num_layers: 4
    bidirectional: true
    monolithic: true
  linear:
    hidden_size: 128
    num_layers: 2
```

# Model architecture of ./pytorch_model.bin

`print(model)`:

```python3
PyanNet(
  (sincnet): SincNet(
    (wav_norm1d): InstanceNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
    (conv1d): ModuleList(
      (0): Encoder(
        (filterbank): ParamSincFB()
      )
      (1): Conv1d(80, 60, kernel_size=(5,), stride=(1,))
      (2): Conv1d(60, 60, kernel_size=(5,), stride=(1,))
    )
    (pool1d): ModuleList(
      (0-2): 3 x MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    )
    (norm1d): ModuleList(
      (0): InstanceNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
      (1-2): 2 x InstanceNorm1d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
    )
  )
  (lstm): LSTM(60, 128, num_layers=4, batch_first=True, dropout=0.5, bidirectional=True)
  (linear): ModuleList(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): Linear(in_features=128, out_features=128, bias=True)
  )
  (classifier): Linear(in_features=128, out_features=7, bias=True)
  (activation): LogSoftmax(dim=-1)
)
```

```python3
>>> list(model.specifications)
[Specifications(problem=<Problem.MONO_LABEL_CLASSIFICATION: 1>, resolution=<Resolution.FRAME: 1>, duration=10.0, min_duration=None, warm_up=(0.0, 0.0), classes=['speaker#1', 'speaker#2', 'speaker#3'], powerset_max_classes=2, permutation_invariant=True)]
```

```python3
>>> model.hparams
"linear":       {'hidden_size': 128, 'num_layers': 2}
"lstm":         {'hidden_size': 128, 'num_layers': 4, 'bidirectional': True, 'monolithic': True, 'dropout': 0.5, 'batch_first': True}
"num_channels": 1
"sample_rate":  16000
"sincnet":      {'stride': 10, 'sample_rate': 16000}
```

## Papers

- [pyannote.audio 2.1 speaker diarization pipeline: principle, benchmark, and recipe](https://hal.science/hal-04247212/document)
- [pyannote.audio speaker diarization pipeline at VoxSRC 2023](https://mmai.io/datasets/voxceleb/voxsrc/data_workshop_2023/reports/pyannote_report.pdf)


================================================
FILE: scripts/pyannote/segmentation/preprocess.sh
================================================
#!/usr/bin/env bash
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)


python3 -m onnxruntime.quantization.preprocess --input model.onnx --output tmp.preprocessed.onnx
mv ./tmp.preprocessed.onnx ./model.onnx
./show-onnx.py --filename ./model.onnx

<<EOF
=========./model.onnx==========
NodeArg(name='x', type='tensor(float)', shape=[1, 1, 'T'])
-----
NodeArg(name='y', type='tensor(float)', shape=[1, 'floor(floor(floor(floor(T/10 - 251/10)/3 - 2/3)/3)/3 - 8/3) + 1', 7])

  floor(floor(floor(floor(T/10 - 251/10)/3 - 2/3)/3)/3 - 8/3) + 1
= floor(floor(floor(floor(T - 251)/30 - 2/3)/3)/3 - 8/3) + 1
= floor(floor(floor(floor(T - 271)/30)/3)/3 - 8/3) + 1
= floor(floor(floor(floor(T - 271)/90))/3 - 8/3) + 1
= floor(floor(floor(T - 271)/90)/3 - 8/3) + 1
= floor(floor((T - 271)/90)/3 - 8/3) + 1
= floor(floor((T - 271)/90 - 8)/3) + 1
= floor(floor((T - 271 - 720)/90)/3) + 1
= floor(floor((T - 991)/90)/3) + 1
= floor(floor((T - 991)/270)) + 1
= (T - 991)/270 + 1
= (T - 991 + 270)/270
= (T - 721)/270

It means:
 - Number of input samples should be at least 721
 - One frame corresponds to 270 samples. (If we use T + 270, it outputs one more frame)
EOF


================================================
FILE: scripts/pyannote/segmentation/show-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnxruntime
import argparse


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--filename",
        type=str,
        required=True,
        help="Path to model.onnx",
    )

    return parser.parse_args()


def show(filename):
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def main():
    args = get_args()
    print(f"========={args.filename}==========")
    show(args.filename)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/pyannote/segmentation/speaker-diarization-onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2024  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
Please refer to
https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/speaker-diarization.yaml
for usages.
"""

import argparse
from datetime import timedelta
from pathlib import Path
from typing import List

import librosa
import numpy as np
import onnxruntime as ort
import sherpa_onnx
import soundfile as sf
from numpy.lib.stride_tricks import as_strided


class Segment:
    def __init__(
        self,
        start,
        end,
        speaker,
    ):
        assert start < end
        self.start = start
        self.end = end
        self.speaker = speaker

    def merge(self, other, gap=0.5):
        assert self.speaker == other.speaker, (self.speaker, other.speaker)
        if self.end < other.start and self.end + gap >= other.start:
            return Segment(start=self.start, end=other.end, speaker=self.speaker)
        elif other.end < self.start and other.end + gap >= self.start:
            return Segment(start=other.start, end=self.end, speaker=self.speaker)
        else:
            return None

    @property
    def duration(self):
        return self.end - self.start

    def __str__(self):
        s = f"{timedelta(seconds=self.start)}"[:-3]
        s += " --> "
        s += f"{timedelta(seconds=self.end)}"[:-3]
        s += f" speaker_{self.speaker:02d}"
        return s


def merge_segment_list(in_out: List[Segment], min_duration_off: float):
    changed = True
    while changed:
        changed = False
        for i in range(len(in_out)):
            if i + 1 >= len(in_out):
                continue

            new_segment = in_out[i].merge(in_out[i + 1], gap=min_duration_off)
            if new_segment is None:
                continue
            del in_out[i + 1]
            in_out[i] = new_segment
            changed = True
            break


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--seg-model",
        type=str,
        required=True,
        help="Path to model.onnx for segmentation",
    )
    parser.add_argument(
        "--speaker-embedding-model",
        type=str,
        required=True,
        help="Path to model.onnx for speaker embedding extractor",
    )
    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


class OnnxSegmentationModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map
        print(meta)

        self.window_size = int(meta["window_size"])
        self.sample_rate = int(meta["sample_rate"])
        self.window_shift = int(0.1 * self.window_size)
        self.receptive_field_size = int(meta["receptive_field_size"])
        self.receptive_field_shift = int(meta["receptive_field_shift"])
        self.num_speakers = int(meta["num_speakers"])
        self.powerset_max_classes = int(meta["powerset_max_classes"])
        self.num_classes = int(meta["num_classes"])

    def __call__(self, x):
        """
        Args:
          x: (N, num_samples)
        Returns:
          A tensor of shape (N, num_frames, num_classes)
        """
        x = np.expand_dims(x, axis=1)

        (y,) = self.model.run(
            [self.model.get_outputs()[0].name], {self.model.get_inputs()[0].name: x}
        )

        return y


def load_wav(filename, expected_sample_rate) -> np.ndarray:
    audio, sample_rate = sf.read(filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != expected_sample_rate:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=expected_sample_rate,
        )
    return audio


def get_powerset_mapping(num_classes, num_speakers, powerset_max_classes):
    mapping = np.zeros((num_classes, num_speakers))

    k = 1
    for i in range(1, powerset_max_classes + 1):
        if i == 1:
            for j in range(0, num_speakers):
                mapping[k, j] = 1
                k += 1
        elif i == 2:
            for j in range(0, num_speakers):
                for m in range(j + 1, num_speakers):
                    mapping[k, j] = 1
                    mapping[k, m] = 1
                    k += 1
        elif i == 3:
            raise RuntimeError("Unsupported")

    return mapping


def to_multi_label(y, mapping):
    """
    Args:
      y: (num_chunks, num_frames, num_classes)
    Returns:
      A tensor of shape (num_chunks, num_frames, num_speakers)
    """
    y = np.argmax(y, axis=-1)
    labels = mapping[y.reshape(-1)].reshape(y.shape[0], y.shape[1], -1)
    return labels


# speaker count per frame
def speaker_count(labels, seg_m):
    """
    Args:
      labels: (num_chunks, num_frames, num_speakers)
      seg_m: Segmentation model
    Returns:
      A integer array of shape (num_total_frames,)
    """
    labels = labels.sum(axis=-1)
    # Now labels: (num_chunks, num_frames)

    num_frames = (
        int(
            (seg_m.window_size + (labels.shape[0] - 1) * seg_m.window_shift)
            / seg_m.receptive_field_shift
        )
        + 1
    )
    ans = np.zeros((num_frames,))
    count = np.zeros((num_frames,))

    for i in range(labels.shape[0]):
        this_chunk = labels[i]
        start = int(i * seg_m.window_shift / seg_m.receptive_field_shift + 0.5)
        end = start + this_chunk.shape[0]
        ans[start:end] += this_chunk
        count[start:end] += 1

    ans /= np.maximum(count, 1e-12)

    return (ans + 0.5).astype(np.int8)


def load_speaker_embedding_model(filename):
    config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
        model=filename,
        num_threads=1,
        debug=0,
    )
    if not config.validate():
        raise ValueError(f"Invalid config. {config}")
    extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config)
    return extractor


def get_embeddings(embedding_filename, audio, labels, seg_m, exclude_overlap):
    """
    Args:
      embedding_filename: Path to the speaker embedding extractor model
      audio: (num_samples,)
      labels: (num_chunks, num_frames, num_speakers)
      seg_m: segmentation model
    Returns:
      Return (num_chunks, num_speakers, embedding_dim)
    """
    if exclude_overlap:
        labels = labels * (labels.sum(axis=-1, keepdims=True) < 2)

    extractor = load_speaker_embedding_model(embedding_filename)
    buffer = np.empty(seg_m.window_size)
    num_chunks, num_frames, num_speakers = labels.shape

    ans_chunk_speaker_pair = []
    ans_embeddings = []

    for i in range(num_chunks):
        labels_T = labels[i].T
        # t: (num_speakers, num_frames)

        sample_offset = i * seg_m.window_shift

        for j in range(num_speakers):
            frames = labels_T[j]
            if frames.sum() < 10:
                # skip segment less than 20 frames, i.e., about 0.2 seconds
                continue

            start = None
            start_samples = 0
            idx = 0
            for k in range(num_frames):
                if frames[k] != 0:
                    if start is None:
                        start = k
                elif start is not None:
                    start_samples = (
                        int(start / num_frames * seg_m.window_size) + sample_offset
                    )
                    end_samples = (
                        int(k / num_frames * seg_m.window_size) + sample_offset
                    )
                    num_samples = end_samples - start_samples
                    buffer[idx : idx + num_samples] = audio[start_samples:end_samples]
                    idx += num_samples

                    start = None
            if start is not None:
                start_samples = (
                    int(start / num_frames * seg_m.window_size) + sample_offset
                )
                end_samples = int(k / num_frames * seg_m.window_size) + sample_offset
                num_samples = end_samples - start_samples
                buffer[idx : idx + num_samples] = audio[start_samples:end_samples]
                idx += num_samples

            stream = extractor.create_stream()
            stream.accept_waveform(sample_rate=seg_m.sample_rate, waveform=buffer[:idx])
            stream.input_finished()

            assert extractor.is_ready(stream)
            embedding = extractor.compute(stream)
            embedding = np.array(embedding)

            ans_chunk_speaker_pair.append([i, j])
            ans_embeddings.append(embedding)

    assert len(ans_chunk_speaker_pair) == len(ans_embeddings), (
        len(ans_chunk_speaker_pair),
        len(ans_embeddings),
    )
    return ans_chunk_speaker_pair, np.array(ans_embeddings)


def main():
    args = get_args()
    assert Path(args.seg_model).is_file(), args.seg_model
    assert Path(args.wav).is_file(), args.wav

    seg_m = OnnxSegmentationModel(args.seg_model)
    audio = load_wav(args.wav, seg_m.sample_rate)
    # audio: (num_samples,)

    num = (audio.shape[0] - seg_m.window_size) // seg_m.window_shift + 1

    samples = as_strided(
        audio,
        shape=(num, seg_m.window_size),
        strides=(seg_m.window_shift * audio.strides[0], audio.strides[0]),
    )

    # or use torch.Tensor.unfold
    #  samples = torch.from_numpy(audio).unfold(0, seg_m.window_size, seg_m.window_shift).numpy()

    if (
        audio.shape[0] < seg_m.window_size
        or (audio.shape[0] - seg_m.window_size) % seg_m.window_shift > 0
    ):
        has_last_chunk = True
    else:
        has_last_chunk = False

    num_chunks = samples.shape[0]
    batch_size = 32
    output = []
    for i in range(0, num_chunks, batch_size):
        start = i
        end = i + batch_size
        # it's perfectly ok to use end > num_chunks
        y = seg_m(samples[start:end])
        output.append(y)

    if has_last_chunk:
        last_chunk = audio[num_chunks * seg_m.window_shift :]  # noqa
        pad_size = seg_m.window_size - last_chunk.shape[0]
        last_chunk = np.pad(last_chunk, (0, pad_size))
        last_chunk = np.expand_dims(last_chunk, axis=0)
        y = seg_m(last_chunk)
        output.append(y)

    y = np.vstack(output)
    # y: (num_chunks, num_frames, num_classes)

    mapping = get_powerset_mapping(
        num_classes=seg_m.num_classes,
        num_speakers=seg_m.num_speakers,
        powerset_max_classes=seg_m.powerset_max_classes,
    )
    labels = to_multi_label(y, mapping=mapping)
    # labels: (num_chunks, num_frames, num_speakers)

    inactive = (labels.sum(axis=1) == 0).astype(np.int8)
    # inactive: (num_chunks, num_speakers)

    speakers_per_frame = speaker_count(labels=labels, seg_m=seg_m)
    # speakers_per_frame: (num_frames, speakers_per_frame)

    if speakers_per_frame.max() == 0:
        print("No speakers found in the audio file!")
        return

    # if users specify only 1 speaker for clustering, then return the
    # result directly

    # Now, get embeddings
    chunk_speaker_pair, embeddings = get_embeddings(
        args.speaker_embedding_model,
        audio=audio,
        labels=labels,
        seg_m=seg_m,
        #  exclude_overlap=True,
        exclude_overlap=False,
    )
    # chunk_speaker_pair: a list of (chunk_idx, speaker_idx)
    # embeddings: (batch_size, embedding_dim)

    # Please change num_clusters or threshold by yourself.
    clustering_config = sherpa_onnx.FastClusteringConfig(num_clusters=2)
    #  clustering_config = sherpa_onnx.FastClusteringConfig(threshold=0.8)
    clustering = sherpa_onnx.FastClustering(clustering_config)
    cluster_labels = clustering(embeddings)

    chunk_speaker_to_cluster = dict()
    for (chunk_idx, speaker_idx), cluster_idx in zip(
        chunk_speaker_pair, cluster_labels
    ):
        if inactive[chunk_idx, speaker_idx] == 1:
            print("skip ", chunk_idx, speaker_idx)
            continue
        chunk_speaker_to_cluster[(chunk_idx, speaker_idx)] = cluster_idx

    num_speakers = max(cluster_labels) + 1
    relabels = np.zeros((labels.shape[0], labels.shape[1], num_speakers))
    for i in range(labels.shape[0]):
        for j in range(labels.shape[1]):
            for k in range(labels.shape[2]):
                if (i, k) not in chunk_speaker_to_cluster:
                    continue
                t = chunk_speaker_to_cluster[(i, k)]

                if labels[i, j, k] == 1:
                    relabels[i, j, t] = 1

    num_frames = (
        int(
            (seg_m.window_size + (relabels.shape[0] - 1) * seg_m.window_shift)
            / seg_m.receptive_field_shift
        )
        + 1
    )

    count = np.zeros((num_frames, relabels.shape[-1]))
    for i in range(relabels.shape[0]):
        this_chunk = relabels[i]
        start = int(i * seg_m.window_shift / seg_m.receptive_field_shift + 0.5)
        end = start + this_chunk.shape[0]
        count[start:end] += this_chunk

    if has_last_chunk:
        stop_frame = int(audio.shape[0] / seg_m.receptive_field_shift)
        count = count[:stop_frame]

    sorted_count = np.argsort(-count, axis=-1)
    final = np.zeros((count.shape[0], count.shape[1]))

    for i, (c, sc) in enumerate(zip(speakers_per_frame, sorted_count)):
        for k in range(c):
            final[i, sc[k]] = 1

    min_duration_off = 0.5
    min_duration_on = 0.3
    onset = 0.5
    offset = 0.5
    # final: (num_frames, num_speakers)

    final = final.T
    for kk in range(final.shape[0]):
        segment_list = []
        frames = final[kk]

        is_active = frames[0] > onset

        start = None
        if is_active:
            start = 0
        scale = seg_m.receptive_field_shift / seg_m.sample_rate
        scale_offset = seg_m.receptive_field_size / seg_m.sample_rate * 0.5
        for i in range(1, len(frames)):
            if is_active:
                if frames[i] < offset:
                    segment = Segment(
                        start=start * scale + scale_offset,
                        end=i * scale + scale_offset,
                        speaker=kk,
                    )
                    segment_list.append(segment)
                    is_active = False
            else:
                if frames[i] > onset:
                    start = i
                    is_active = True

        if is_active:
            segment = Segment(
                start=start * scale + scale_offset,
                end=(len(frames) - 1) * scale + scale_offset,
                speaker=kk,
            )
            segment_list.append(segment)

        if len(segment_list) > 1:
            merge_segment_list(segment_list, min_duration_off=min_duration_off)
            for s in segment_list:
                if s.duration < min_duration_on:
                    continue
                print(s)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/pyannote/segmentation/speaker-diarization-torch.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
Please refer to
https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/speaker-diarization.yaml
for usages.
"""

"""
1. Go to https://huggingface.co/hbredin/wespeaker-voxceleb-resnet34-LM/tree/main
wget https://huggingface.co/hbredin/wespeaker-voxceleb-resnet34-LM/resolve/main/speaker-embedding.onnx

2. Change line 166 of pyannote/audio/pipelines/speaker_diarization.py

```
            #  self._embedding = PretrainedSpeakerEmbedding(
            #      self.embedding, use_auth_token=use_auth_token
            #  )
            self._embedding = embedding
```
"""

import argparse
from pathlib import Path

import torch
from pyannote.audio import Model
from pyannote.audio.pipelines import SpeakerDiarization as SpeakerDiarizationPipeline
from pyannote.audio.pipelines.speaker_verification import (
    ONNXWeSpeakerPretrainedSpeakerEmbedding,
)


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


def build_pipeline():
    embedding_filename = "./speaker-embedding.onnx"
    if Path(embedding_filename).is_file():
        # You need to modify line 166
        # of pyannote/audio/pipelines/speaker_diarization.py
        # Please see the comments at the start of this script for details
        embedding = ONNXWeSpeakerPretrainedSpeakerEmbedding(embedding_filename)
    else:
        embedding = "hbredin/wespeaker-voxceleb-resnet34-LM"

    pt_filename = "./pytorch_model.bin"
    segmentation = Model.from_pretrained(pt_filename)
    segmentation.eval()

    pipeline = SpeakerDiarizationPipeline(
        segmentation=segmentation,
        embedding=embedding,
        embedding_exclude_overlap=True,
    )

    params = {
        "clustering": {
            "method": "centroid",
            "min_cluster_size": 12,
            "threshold": 0.7045654963945799,
        },
        "segmentation": {"min_duration_off": 0.5},
    }

    pipeline.instantiate(params)
    return pipeline


@torch.no_grad()
def main():
    args = get_args()
    assert Path(args.wav).is_file(), args.wav
    pipeline = build_pipeline()
    print(pipeline)
    t = pipeline(args.wav)
    print(type(t))
    print(t)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/pyannote/segmentation/vad-onnx.py
================================================
#!/usr/bin/env python3

"""
./export-onnx.py
./preprocess.sh

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
./vad-onnx.py --model ./model.onnx --wav ./lei-jun-test.wav
"""

import argparse
from pathlib import Path

import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf
from numpy.lib.stride_tricks import as_strided


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, required=True, help="Path to model.onnx")
    parser.add_argument("--wav", type=str, required=True, help="Path to test.wav")

    return parser.parse_args()


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map
        print(meta)

        self.window_size = int(meta["window_size"])
        self.sample_rate = int(meta["sample_rate"])
        self.window_shift = int(0.1 * self.window_size)
        self.receptive_field_size = int(meta["receptive_field_size"])
        self.receptive_field_shift = int(meta["receptive_field_shift"])
        self.num_speakers = int(meta["num_speakers"])
        self.powerset_max_classes = int(meta["powerset_max_classes"])
        self.num_classes = int(meta["num_classes"])

    def __call__(self, x):
        """
        Args:
          x: (N, num_samples)
        Returns:
          A tensor of shape (N, num_frames, num_classes)
        """
        x = np.expand_dims(x, axis=1)

        (y,) = self.model.run(
            [self.model.get_outputs()[0].name], {self.model.get_inputs()[0].name: x}
        )

        return y


def load_wav(filename, expected_sample_rate) -> np.ndarray:
    audio, sample_rate = sf.read(filename, dtype="float32", always_2d=True)
    audio = audio[:, 0]  # only use the first channel
    if sample_rate != expected_sample_rate:
        audio = librosa.resample(
            audio,
            orig_sr=sample_rate,
            target_sr=expected_sample_rate,
        )
    return audio


def get_powerset_mapping(num_classes, num_speakers, powerset_max_classes):
    mapping = np.zeros((num_classes, num_speakers))

    k = 1
    for i in range(1, powerset_max_classes + 1):
        if i == 1:
            for j in range(0, num_speakers):
                mapping[k, j] = 1
                k += 1
        elif i == 2:
            for j in range(0, num_speakers):
                for m in range(j + 1, num_speakers):
                    mapping[k, j] = 1
                    mapping[k, m] = 1
                    k += 1
        elif i == 3:
            raise RuntimeError("Unsupported")

    return mapping


def to_multi_label(y, mapping):
    """
    Args:
      y: (num_chunks, num_frames, num_classes)
    Returns:
      A tensor of shape (num_chunks, num_frames, num_speakers)
    """
    y = np.argmax(y, axis=-1)
    labels = mapping[y.reshape(-1)].reshape(y.shape[0], y.shape[1], -1)
    return labels


def main():
    args = get_args()
    assert Path(args.model).is_file(), args.model
    assert Path(args.wav).is_file(), args.wav

    m = OnnxModel(args.model)
    audio = load_wav(args.wav, m.sample_rate)
    # audio: (num_samples,)
    print("audio", audio.shape, audio.min(), audio.max(), audio.sum())

    num = (audio.shape[0] - m.window_size) // m.window_shift + 1

    samples = as_strided(
        audio,
        shape=(num, m.window_size),
        strides=(m.window_shift * audio.strides[0], audio.strides[0]),
    )

    # or use torch.Tensor.unfold
    #  samples = torch.from_numpy(audio).unfold(0, m.window_size, m.window_shift).numpy()

    print(
        "samples",
        samples.shape,
        samples.mean(),
        samples.sum(),
        samples[:3, :3].sum(axis=-1),
    )

    if (
        audio.shape[0] < m.window_size
        or (audio.shape[0] - m.window_size) % m.window_shift > 0
    ):
        has_last_chunk = True
    else:
        has_last_chunk = False

    num_chunks = samples.shape[0]
    batch_size = 32
    output = []
    for i in range(0, num_chunks, batch_size):
        start = i
        end = i + batch_size
        # it's perfectly ok to use end > num_chunks
        y = m(samples[start:end])
        output.append(y)

    if has_last_chunk:
        last_chunk = audio[num_chunks * m.window_shift :]  # noqa
        pad_size = m.window_size - last_chunk.shape[0]
        last_chunk = np.pad(last_chunk, (0, pad_size))
        last_chunk = np.expand_dims(last_chunk, axis=0)
        y = m(last_chunk)
        output.append(y)

    y = np.vstack(output)
    # y: (num_chunks, num_frames, num_classes)

    mapping = get_powerset_mapping(
        num_classes=m.num_classes,
        num_speakers=m.num_speakers,
        powerset_max_classes=m.powerset_max_classes,
    )
    labels = to_multi_label(y, mapping=mapping)
    # labels: (num_chunks, num_frames, num_speakers)

    # binary classification
    labels = np.max(labels, axis=-1)
    # labels: (num_chunk, num_frames)

    num_frames = (
        int(
            (m.window_size + (labels.shape[0] - 1) * m.window_shift)
            / m.receptive_field_shift
        )
        + 1
    )

    count = np.zeros((num_frames,))
    classification = np.zeros((num_frames,))
    weight = np.hamming(labels.shape[1])

    for i in range(labels.shape[0]):
        this_chunk = labels[i]
        start = int(i * m.window_shift / m.receptive_field_shift + 0.5)
        end = start + this_chunk.shape[0]

        classification[start:end] += this_chunk * weight
        count[start:end] += weight

    classification /= np.maximum(count, 1e-12)

    if has_last_chunk:
        stop_frame = int(audio.shape[0] / m.receptive_field_shift)
        classification = classification[:stop_frame]

    classification = classification.tolist()

    onset = 0.5
    offset = 0.5

    is_active = classification[0] > onset
    start = None
    if is_active:
        start = 0

    scale = m.receptive_field_shift / m.sample_rate
    scale_offset = m.receptive_field_size / m.sample_rate * 0.5

    for i in range(len(classification)):
        if is_active:
            if classification[i] < offset:
                print(
                    f"{start*scale + scale_offset:.3f} -- {i*scale + scale_offset:.3f}"
                )
                is_active = False
        else:
            if classification[i] > onset:
                start = i
                is_active = True

    if is_active:
        print(
            f"{start*scale + scale_offset:.3f} -- {(len(classification)-1)*scale + scale_offset:.3f}"
        )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/pyannote/segmentation/vad-torch.py
================================================
#!/usr/bin/env python3

import torch
from pyannote.audio import Model
from pyannote.audio.pipelines import (
    VoiceActivityDetection as VoiceActivityDetectionPipeline,
)


@torch.no_grad()
def main():
    # Please download it from
    # https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0
    pt_filename = "./pytorch_model.bin"
    model = Model.from_pretrained(pt_filename)
    model.eval()

    pipeline = VoiceActivityDetectionPipeline(segmentation=model)

    # https://huggingface.co/pyannote/voice-activity-detection/blob/main/config.yaml
    # https://github.com/pyannote/pyannote-audio/issues/1215
    initial_params = {
        "min_duration_on": 0.0,
        "min_duration_off": 0.0,
    }
    pipeline.onset = 0.5
    pipeline.offset = 0.5

    pipeline.instantiate(initial_params)

    # wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
    t = pipeline("./lei-jun-test.wav")
    print(type(t))
    print(t)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/qnn/__init__.py
================================================


================================================
FILE: scripts/qnn/device_info.py
================================================
#!/usr/bin/env python3
from dataclasses import dataclass
from enum import IntEnum, unique

"""
See also
https://docs.qualcomm.com/doc/80-63442-10/topic/QNN_general_overview.html#supported-snapdragon-devices

SA8255 soc_id    52 dsp_arch     v73 vtcm_size (MB)      8
SA8295 soc_id    39 dsp_arch     v68 vtcm_size (MB)      8
SM8350 soc_id    35 dsp_arch     v68 vtcm_size (MB)      4
SM8450 soc_id    36 dsp_arch     v69 vtcm_size (MB)      8
SM8475 soc_id    42 dsp_arch     v69 vtcm_size (MB)      8
SM8550 soc_id    43 dsp_arch     v73 vtcm_size (MB)      8
SM8650 soc_id    57 dsp_arch     v75 vtcm_size (MB)      8
SM8750 soc_id    69 dsp_arch     v79 vtcm_size (MB)      8
SM8850 soc_id    87 dsp_arch     v81 vtcm_size (MB)      8
SSG2115P soc_id  46 dsp_arch     v73 vtcm_size (MB)      2
SSG2125P soc_id  58 dsp_arch     v73 vtcm_size (MB)      2
SXR1230P soc_id  45 dsp_arch     v73 vtcm_size (MB)      2
SXR2230P soc_id  53 dsp_arch     v69 vtcm_size (MB)      8
SXR2330P soc_id  75 dsp_arch     v79 vtcm_size (MB)      8
QCS9100 soc_id   77 dsp_arch     v73 vtcm_size (MB)      8
SAR2230P soc_id  95 dsp_arch     v81 vtcm_size (MB)      4
SW6100 soc_id    96 dsp_arch     v81 vtcm_size (MB)      4
"""


@unique
class Chipset(IntEnum):
    # see https://github.com/pytorch/executorch/blob/main/backends/qualcomm/serialization/qc_schema.py#L41
    # SA8255, soc_id 52,  dsp_arch v73
    SA8255 = 52  # v73
    SA8295 = 39  # v68
    SM8350 = 35  # v68
    SM8450 = 36  # v69
    SM8475 = 42  # v69
    SM8550 = 43  # v73
    SM8650 = 57  # v75
    SM8750 = 69  # v79
    SM8850 = 87  # v81
    #  SSG2115P = 46  # v73
    #  SSG2125P = 58  # v73
    #  SXR1230P = 45  # v73
    #  SXR2230P = 53  # v69
    #  SXR2330P = 75  # v79
    QCS9100 = 77  # v73
    #  SAR2230P = 95  # v81
    #  SW6100 = 96  # v81


@unique
class HtpArch(IntEnum):
    v68 = 68
    v69 = 69
    v73 = 73
    v75 = 75
    v79 = 79
    v81 = 81
    v87 = 87


@dataclass
class HtpInfo:
    arch: HtpArch
    vtcm_size_in_mb: int


@dataclass
class SocInfo:
    model: Chipset
    info: HtpInfo


soc_info_list = [
    SocInfo(Chipset.SA8255, HtpInfo(HtpArch.v73, 8)),
    SocInfo(Chipset.SA8295, HtpInfo(HtpArch.v68, 8)),
    SocInfo(Chipset.SM8350, HtpInfo(HtpArch.v68, 4)),
    SocInfo(Chipset.SM8450, HtpInfo(HtpArch.v69, 8)),
    SocInfo(Chipset.SM8475, HtpInfo(HtpArch.v69, 8)),
    SocInfo(Chipset.SM8550, HtpInfo(HtpArch.v73, 8)),
    SocInfo(Chipset.SM8650, HtpInfo(HtpArch.v75, 8)),
    SocInfo(Chipset.SM8750, HtpInfo(HtpArch.v79, 8)),
    SocInfo(Chipset.SM8850, HtpInfo(HtpArch.v81, 8)),
    #  SocInfo(Chipset.SSG2115P, HtpInfo(HtpArch.v73, 2)),
    #  SocInfo(Chipset.SSG2125P, HtpInfo(HtpArch.v73, 2)),
    #  SocInfo(Chipset.SXR1230P, HtpInfo(HtpArch.v73, 2)),
    #  SocInfo(Chipset.SXR2230P, HtpInfo(HtpArch.v69, 8)),
    #  SocInfo(Chipset.SXR2330P, HtpInfo(HtpArch.v79, 8)),
    SocInfo(Chipset.QCS9100, HtpInfo(HtpArch.v73, 8)),
    #  SocInfo(Chipset.SAR2230P, HtpInfo(HtpArch.v81, 4)),
    #  SocInfo(Chipset.SW6100, HtpInfo(HtpArch.v81, 4)),
]

soc_info_dict = {soc.model.name: soc for soc in soc_info_list}


def _test():
    for soc in soc_info_list:
        print(
            soc.model.name,
            "soc_id\t",
            soc.model.value,
            "dsp_arch\t",
            soc.info.arch.name,
            "vtcm_size (MB)\t",
            soc.info.vtcm_size_in_mb,
        )


if __name__ == "__main__":
    _test()


================================================
FILE: scripts/qnn/generate_config.py
================================================
#!/usr/bin/env python3

# see
# https://github.com/MollySophia/rwkv-qualcomm/blob/2a82c641c90ee130cbd7038ca7449b2fa818de71/utils/htp_devices_config.py
# https://docs.qualcomm.com/bundle/publicresource/topics/80-64748-1/model_prep_linux.html#QNN-HTP-context-binary

import argparse
import json
from pathlib import Path

from device_info import soc_info_dict


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--soc",
        type=str,
        required=True,
        help="SM8850, SA8295, etc",
    )

    parser.add_argument(
        "--graph-name",
        type=str,
        required=True,
        help="Graph name",
    )

    parser.add_argument(
        "--output-dir",
        type=str,
        required=True,
        help="Output directory to save the generated json files",
    )

    parser.add_argument(
        "--qnn-sdk-root",
        type=str,
        required=True,
        help="Path to qnn sdk",
    )

    return parser.parse_args()


def generate_config(
    soc_name: str,
    graph_name: str,
    output_dir: str,
    qnn_sdk_root: str,
):
    if soc_name not in soc_info_dict:
        raise ValueError(
            f"Unsupported SOC {soc_name}. Supported: - {sorted(list(soc_info_dict.keys()))}"
        )
    soc = soc_info_dict[soc_name]

    output_dir = Path(output_dir).absolute()
    output_dir.mkdir(parents=True, exist_ok=True)

    htp_backend_extensions_data = {
        "backend_extensions": {
            "shared_library_path": f"{qnn_sdk_root}/lib/x86_64-linux-clang/libQnnHtpNetRunExtensions.so",
            "config_file_path": f"{output_dir}/htp_config.json",
        }
    }

    htp_backend_config_data = {
        "graphs": [
            {
                "vtcm_mb": soc.info.vtcm_size_in_mb,
                "O": 3,
                "graph_names": [graph_name],
            }
        ],
        "devices": [
            {
                "device_id": 0,
                "soc_id": soc.model.value,
                "dsp_arch": soc.info.arch.name,
                "cores": [
                    {
                        "core_id": 0,
                        "perf_profile": "burst",
                        "rpc_control_latency": 200,
                    }
                ],
            }
        ],
    }

    with open(str(output_dir / "htp_backend_extensions.json"), "w") as f:
        json.dump(htp_backend_extensions_data, f, indent=4)

    with open(str(output_dir / "htp_config.json"), "w") as f:
        json.dump(htp_backend_config_data, f, indent=4)


def _test():
    qnn_sdk_root = "/home/fangjun/open-source/qairt/2.40.0.251030"
    generate_config(
        soc_name="SM8850",
        graph_name="model_10_seconds_quantized",
        output_dir="./tmp",
        qnn_sdk_root=qnn_sdk_root,
    )


if __name__ == "__main__":
    #  _test()

    args = get_args()
    print(vars(args))
    generate_config(
        soc_name=args.soc,
        graph_name=args.graph_name,
        output_dir=args.output_dir,
        qnn_sdk_root=args.qnn_sdk_root,
    )

# ./generate_config.py  --soc SM8850 --graph-name abc --output-dir ./tmp2 --qnn-sdk-root $QNN_SDK_ROOT


================================================
FILE: scripts/sense-voice/README-nano.md
================================================
# Introduction

This directory contains models converted from
https://huggingface.co/FunAudioLLM/Fun-ASR-Nano-2512

## Core Features

> From  https://huggingface.co/FunAudioLLM/Fun-ASR-Nano-2512

    - Far-field High-noise Recognition: Deeply optimized for far-distance sound pickup and high-noise scenarios (such as conference rooms, in-vehicle environments, industrial sites, etc.), improving recognition accuracy to 93%.

    - Chinese Dialects and Regional Accents:

        - Supports 7 major dialects: Wu, Cantonese, Min, Hakka, Gan, Xiang, Jin
        - Covers 26 regional accents: including Henan, Shaanxi, Hubei, Sichuan, Chongqing, Yunnan, Guizhou, Guangdong, Guangxi and more than 20 other regions

    - Multi-language Free Speech: Supports recognition of 31 languages, with focused optimization on East and Southeast Asian languages, supporting free language switching and mixed recognition.
    - Music Background Lyric Recognition: Enhanced speech recognition performance under music background interference, supporting accurate recognition of lyric content in songs.


## 核心特性

> From https://huggingface.co/FunAudioLLM/Fun-ASR-Nano-2512/blob/main/README_zh.md

    - 远场高噪声识别： 针对远距离拾音及高噪声场景（如会议室、车载环境、工业现场等）进行深度优化，识别准确率提升至 **93%**。
    - 中文方言与地方口音：

        - 支持 7 大方言：吴语、粤语、闽语、客家话、赣语、湘语、晋语
        - 覆盖 26 个地区口音：包括河南、陕西、湖北、四川、重庆、云南、贵州、广东、广西等 20 多个地区

    - 多语言自由说： 支持 31 种语言识别，重点优化东亚与东南亚语种，支持语种自由切换和混合识别。
    - 音乐背景歌词识别： 强化在音乐背景干扰下的语音识别性能，支持对歌曲中歌词内容的精准识别。


================================================
FILE: scripts/sense-voice/README.md
================================================
# Introduction

This directory contains models converted from
https://github.com/FunAudioLLM/SenseVoice


================================================
FILE: scripts/sense-voice/ascend-npu/export_onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import List, Tuple

import sentencepiece as spm
import torch

from torch_model import SenseVoiceSmall


def load_cmvn(filename) -> Tuple[List[float], List[float]]:
    neg_mean = None
    inv_stddev = None

    with open(filename) as f:
        for line in f:
            if not line.startswith("<LearnRateCoef>"):
                continue
            t = line.split()[3:-1]

            if neg_mean is None:
                neg_mean = list(map(lambda x: float(x), t))
            else:
                inv_stddev = list(map(lambda x: float(x), t))

    return neg_mean, inv_stddev


def generate_tokens(sp):
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i in range(sp.vocab_size()):
            f.write(f"{sp.id_to_piece(i)} {i}\n")
    print("saved to tokens.txt")


class ModelWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, x, prompt):
        logits = self.m(x[None], prompt)[0]
        part1 = logits[:4]
        part2 = logits[4:]
        part1 = part1.reshape(4, 25055)
        part2 = part2.reshape(x.size(0), 25055)
        return part1, part2


@torch.no_grad()
def main():
    sp = spm.SentencePieceProcessor()
    sp.load("./chn_jpn_yue_eng_ko_spectok.bpe.model")
    generate_tokens(sp)

    print("loading model")

    state_dict = torch.load("./model.pt", map_location="cpu")
    if "state_dict" in state_dict:
        state_dict = state_dict["state_dict"]

    neg_mean, inv_stddev = load_cmvn("./am.mvn")

    neg_mean = torch.tensor(neg_mean, dtype=torch.float32)
    inv_stddev = torch.tensor(inv_stddev, dtype=torch.float32)

    model = SenseVoiceSmall(neg_mean=neg_mean, inv_stddev=inv_stddev)
    model.load_state_dict(state_dict)
    model.eval()
    del state_dict

    model = ModelWrapper(model)
    model.eval()

    x = torch.randn(1, 93, 560, dtype=torch.float32)

    language = 3
    text_norm = 15
    prompt = torch.tensor([language, 1, 2, text_norm], dtype=torch.int32)

    opset_version = 14
    filename = "model.onnx"
    torch.onnx.export(
        model.m,
        (x, prompt),
        filename,
        opset_version=opset_version,
        input_names=["x", "prompt"],
        output_names=["logits"],
        dynamic_axes={
            "x": {0: "N", 1: "T"},
            "logits": {0: "N", 1: "T_4"},
        },
    )
    print(f"saved to {filename}")


if __name__ == "__main__":
    torch.manual_seed(20251018)
    main()


================================================
FILE: scripts/sense-voice/ascend-npu/export_onnx_static_shape.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from typing import List, Tuple

import sentencepiece as spm
import torch

from torch_model import SenseVoiceSmall


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--input-len-in-seconds",
        type=int,
        required=True,
        help="""Some Ascend NPU does not support dynamic shape, so we need to hard-code
        how long the model can process.
        """,
    )
    return parser.parse_args()


def load_cmvn(filename) -> Tuple[List[float], List[float]]:
    neg_mean = None
    inv_stddev = None

    with open(filename) as f:
        for line in f:
            if not line.startswith("<LearnRateCoef>"):
                continue
            t = line.split()[3:-1]

            if neg_mean is None:
                neg_mean = list(map(lambda x: float(x), t))
            else:
                inv_stddev = list(map(lambda x: float(x), t))

    return neg_mean, inv_stddev


def generate_tokens(sp):
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i in range(sp.vocab_size()):
            f.write(f"{sp.id_to_piece(i)} {i}\n")
    print("saved to tokens.txt")


class ModelWrapper(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def forward(self, x, prompt):
        logits = self.m(x[None], prompt)[0]
        part1 = logits[:4]
        part2 = logits[4:]
        part1 = part1.reshape(4, 25055)
        part2 = part2.reshape(x.size(0), 25055)
        return part1, part2


@torch.no_grad()
def main():
    args = get_args()
    print(vars(args))

    sp = spm.SentencePieceProcessor()
    sp.load("./chn_jpn_yue_eng_ko_spectok.bpe.model")
    generate_tokens(sp)

    print("loading model")

    state_dict = torch.load("./model.pt", map_location="cpu")
    if "state_dict" in state_dict:
        state_dict = state_dict["state_dict"]

    neg_mean, inv_stddev = load_cmvn("./am.mvn")

    neg_mean = torch.tensor(neg_mean, dtype=torch.float32)
    inv_stddev = torch.tensor(inv_stddev, dtype=torch.float32)

    model = SenseVoiceSmall(neg_mean=neg_mean, inv_stddev=inv_stddev)
    model.load_state_dict(state_dict)
    model.eval()
    del state_dict

    model = ModelWrapper(model)
    model.eval()

    lfr_window_size = 7
    lfr_window_shift = 6

    # frame shift is 10ms, 1 second has about 100 feature frames
    input_len_in_seconds = int(args.input_len_in_seconds)
    num_frames = input_len_in_seconds * 100
    print("num_frames", num_frames)

    # num_input_frames is an approximate number
    num_input_frames = int(num_frames / lfr_window_shift + 0.5)
    print("num_input_frames", num_input_frames)

    x = torch.randn(1, num_input_frames, 560, dtype=torch.float32)
    print("x.shape", x.shape)

    language = 3
    text_norm = 15
    prompt = torch.tensor([language, 1, 2, text_norm], dtype=torch.int32)

    opset_version = 14
    filename = "model.onnx"
    torch.onnx.export(
        model.m,
        (x, prompt),
        filename,
        opset_version=opset_version,
        input_names=["x", "prompt"],
        output_names=["logits"],
        dynamic_axes={},
    )
    print(f"saved to {filename}")


if __name__ == "__main__":
    torch.manual_seed(20251018)
    main()


================================================
FILE: scripts/sense-voice/ascend-npu/test_om.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import soundfile as sf
from ais_bench.infer.interface import InferSession


class OmModel:
    def __init__(self):
        self.model = InferSession(device_id=0, model_path="./model.om", debug=False)

        print("---model---")
        for i in self.model.get_inputs():
            print(i.name, i.datatype, i.shape)

        print("-----")

        for i in self.model.get_outputs():
            print(i.name, i.datatype, i.shape)

    def __call__(self, x, prompt=None, language=None, text_norm=None):
        return self.model.infer([x, prompt], mode="dymshape", custom_sizes=10000000)[0][
            0
        ]


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def load_tokens(filename):
    ans = dict()
    i = 0
    with open(filename, encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


def compute_feat(
    samples,
    sample_rate,
    window_size: int = 7,  # lfr_m
    window_shift: int = 6,  # lfr_n
):
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = False
    opts.frame_opts.window_type = "hamming"
    opts.frame_opts.samp_freq = sample_rate
    opts.mel_opts.num_bins = 80

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(sample_rate, (samples * 32768).tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )
    assert features.data.contiguous is True
    assert features.dtype == np.float32, features.dtype

    T = (features.shape[0] - window_size) // window_shift + 1
    features = np.lib.stride_tricks.as_strided(
        features,
        shape=(T, features.shape[1] * window_size),
        strides=((window_shift * features.shape[1]) * 4, 4),
    )

    return np.copy(features)


def main():
    samples, sample_rate = load_audio("./test_wavs/zh.wav")
    if sample_rate != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    model = OmModel()

    features = compute_feat(
        samples=samples,
        sample_rate=sample_rate,
    )
    print("features.shape", features.shape)

    language_auto = 0
    language_zh = 3
    language_en = 4
    language_yue = 7
    language_ya = 11
    language_ko = 12
    language_nospeech = 13

    language = language_auto

    with_itn = 14
    without_itn = 15

    text_norm = with_itn

    prompt = np.array([language, 1, 2, text_norm], dtype=np.int32)

    print("prompt", prompt.shape)

    logits = model(
        x=features[None],
        prompt=prompt,
    )
    print("logits.shape", logits.shape, type(logits))

    idx = logits.argmax(axis=-1)
    print(idx)
    print(len(idx))
    prev = -1
    ids = []
    for i in idx:
        if i != prev:
            ids.append(i)
        prev = i
    ids = [i for i in ids if i != 0]
    print(ids)

    tokens = load_tokens("./tokens.txt")
    text = "".join([tokens[i] for i in ids])

    text = text.replace("▁", " ")
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/sense-voice/ascend-npu/test_om_static.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import soundfile as sf
import torch
from ais_bench.infer.interface import InferSession


class OmModel:
    def __init__(self):
        self.model = InferSession(device_id=0, model_path="./model.om", debug=False)

        print("---model---")
        for i in self.model.get_inputs():
            print(i.name, i.datatype, i.shape)

        print("-----")

        for i in self.model.get_outputs():
            print(i.name, i.datatype, i.shape)

        self.num_frames = self.model.get_inputs()[0].shape[1]

    def __call__(self, x, prompt=None, language=None, text_norm=None):
        return self.model.infer([x, prompt], mode="static", custom_sizes=10000000)[0][0]
        return logits


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def load_tokens(filename):
    ans = dict()
    i = 0
    with open(filename, encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


def compute_feat(
    samples,
    sample_rate,
    window_size: int = 7,  # lfr_m
    window_shift: int = 6,  # lfr_n
):
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = False
    opts.frame_opts.window_type = "hamming"
    opts.frame_opts.samp_freq = sample_rate
    opts.mel_opts.num_bins = 80

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(sample_rate, (samples * 32768).tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )
    assert features.data.contiguous is True
    assert features.dtype == np.float32, features.dtype

    T = (features.shape[0] - window_size) // window_shift + 1
    features = np.lib.stride_tricks.as_strided(
        features,
        shape=(T, features.shape[1] * window_size),
        strides=((window_shift * features.shape[1]) * 4, 4),
    )

    return np.copy(features)


def main():
    samples, sample_rate = load_audio("./test_wavs/zh.wav")
    if sample_rate != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    model = OmModel()

    features = compute_feat(
        samples=samples,
        sample_rate=sample_rate,
    )
    print("features.shape", features.shape)
    if model.num_frames > 0:
        if features.shape[0] < model.num_frames:
            features = np.pad(
                features,
                ((0, model.num_frames - features.shape[0]), (0, 0)),
                mode="constant",
                constant_values=0,
            )
        elif features.shape[0] > model.num_frames:
            features = features[: model.num_frames]

        print("features.shape (new)", features.shape)

    language_auto = 0
    language_zh = 3
    language_en = 4
    language_yue = 7
    language_ya = 11
    language_ko = 12
    language_nospeech = 13

    language = language_auto

    with_itn = 14
    without_itn = 15

    text_norm = with_itn

    prompt = np.array([language, 1, 2, text_norm], dtype=np.int32)
    # language = np.array([language], dtype=np.int32)
    # text_norm = np.array([text_norm], dtype=np.int32)

    print("prompt", prompt.shape)

    logits = model(
        x=features[None],
        prompt=prompt,
        # language=language,
        ##text_norm=text_norm,
    )
    print("logits.shape", logits.shape, type(logits))

    idx = logits.argmax(axis=-1)
    print(idx)
    print(len(idx))
    prev = -1
    ids = []
    for i in idx:
        if i != prev:
            ids.append(i)
        prev = i
    ids = [i for i in ids if i != 0]
    print(ids)

    tokens = load_tokens("./tokens.txt")
    text = "".join([tokens[i] for i in ids])

    text = text.replace("▁", " ")
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/sense-voice/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
We use
https://hf-mirror.com/yuekai/model_repo_sense_voice_small/blob/main/export_onnx.py
as a reference while writing this file.

Thanks to https://github.com/yuekaizhang for making the file public.

You should install FunASR before you run this file.
"""

import os
from typing import Any, Dict, Tuple

import onnx
import torch
from model import SenseVoiceSmall
from onnxruntime.quantization import QuantType, quantize_dynamic


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def modified_forward(
    self,
    x: torch.Tensor,
    x_length: torch.Tensor,
    language: torch.Tensor,
    text_norm: torch.Tensor,
):
    """
    Args:
      x:
        A 3-D tensor of shape (N, T, C) with dtype torch.float32
      x_length:
        A 1-D tensor of shape (N,) with dtype torch.int32
      language:
        A 1-D tensor of shape (N,) with dtype torch.int32
        See also https://github.com/FunAudioLLM/SenseVoice/blob/a80e676461b24419cf1130a33d4dd2f04053e5cc/model.py#L640
      text_norm:
        A 1-D tensor of shape (N,) with dtype torch.int32
        See also https://github.com/FunAudioLLM/SenseVoice/blob/a80e676461b24419cf1130a33d4dd2f04053e5cc/model.py#L642
    """
    language_query = self.embed(language).unsqueeze(1)
    text_norm_query = self.embed(text_norm).unsqueeze(1)

    event_emo_query = self.embed(torch.LongTensor([[1, 2]])).repeat(x.size(0), 1, 1)

    x = torch.cat((language_query, event_emo_query, text_norm_query, x), dim=1)
    x_length += 4

    encoder_out, encoder_out_lens = self.encoder(x, x_length)
    if isinstance(encoder_out, tuple):
        encoder_out = encoder_out[0]

    ctc_logits = self.ctc.ctc_lo(encoder_out)

    return ctc_logits


def load_cmvn(filename) -> Tuple[str, str]:
    neg_mean = None
    inv_stddev = None

    with open(filename) as f:
        for line in f:
            if not line.startswith("<LearnRateCoef>"):
                continue
            t = line.split()[3:-1]

            if neg_mean is None:
                neg_mean = ",".join(t)
            else:
                inv_stddev = ",".join(t)

    return neg_mean, inv_stddev


def generate_tokens(params):
    sp = params["tokenizer"].sp
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i in range(sp.vocab_size()):
            f.write(f"{sp.id_to_piece(i)} {i}\n")

    os.system("head tokens.txt; tail -n200 tokens.txt")


def display_params(params):
    print("----------params----------")
    print(params)

    print("----------frontend_conf----------")
    print(params["frontend_conf"])

    os.system(f"cat {params['frontend_conf']['cmvn_file']}")

    print("----------config----------")
    print(params["config"])

    os.system(f"cat {params['config']}")


@torch.no_grad()
def main():
    model, params = SenseVoiceSmall.from_pretrained(
        model="iic/SenseVoiceSmall", device="cpu"
    )
    model.eval()

    display_params(params)

    generate_tokens(params)

    model.__class__.forward = modified_forward

    x = torch.randn(2, 100, 560, dtype=torch.float32)
    x_length = torch.tensor([80, 100], dtype=torch.int32)
    language = torch.tensor([0, 3], dtype=torch.int32)
    text_norm = torch.tensor([14, 15], dtype=torch.int32)

    opset_version = 13
    filename = "model.onnx"
    torch.onnx.export(
        model,
        (x, x_length, language, text_norm),
        filename,
        opset_version=opset_version,
        input_names=["x", "x_length", "language", "text_norm"],
        output_names=["logits"],
        dynamic_axes={
            "x": {0: "N", 1: "T"},
            "x_length": {0: "N"},
            "language": {0: "N"},
            "text_norm": {0: "N"},
            "logits": {0: "N", 1: "T"},
        },
    )

    lfr_window_size = params["frontend_conf"]["lfr_m"]
    lfr_window_shift = params["frontend_conf"]["lfr_n"]

    neg_mean, inv_stddev = load_cmvn(params["frontend_conf"]["cmvn_file"])
    vocab_size = params["tokenizer"].sp.vocab_size()

    meta_data = {
        "lfr_window_size": lfr_window_size,
        "lfr_window_shift": lfr_window_shift,
        "normalize_samples": 0,  # input should be in the range [-32768, 32767]
        "neg_mean": neg_mean,
        "inv_stddev": inv_stddev,
        "model_type": "sense_voice_ctc",
        # version 1: Use QInt8
        # version 2: Use QUInt8
        "version": "2",
        "model_author": "iic",
        "maintainer": "k2-fsa",
        "vocab_size": vocab_size,
        "comment": "iic/SenseVoiceSmall",
        "lang_auto": model.lid_dict["auto"],
        "lang_zh": model.lid_dict["zh"],
        "lang_en": model.lid_dict["en"],
        "lang_yue": model.lid_dict["yue"],  # cantonese
        "lang_ja": model.lid_dict["ja"],
        "lang_ko": model.lid_dict["ko"],
        "lang_nospeech": model.lid_dict["nospeech"],
        "with_itn": model.textnorm_dict["withitn"],
        "without_itn": model.textnorm_dict["woitn"],
        "url": "https://huggingface.co/FunAudioLLM/SenseVoiceSmall",
    }
    add_meta_data(filename=filename, meta_data=meta_data)

    filename_int8 = "model.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        op_types_to_quantize=["MatMul"],
        # Note that we have to use QUInt8 here.
        #
        # When QInt8 is used, C++ onnxruntime produces incorrect results
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    torch.manual_seed(20240717)
    main()


================================================
FILE: scripts/sense-voice/export_onnx_nano.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
import os
from typing import Any, Dict

import onnx
import torch
from onnxruntime.quantization import QuantType, quantize_dynamic

from test_nano_torch import load_tokens, load_torch_model


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--opset-version",
        type=int,
        default=13,
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


@torch.no_grad()
def main():
    args = get_args()
    print(vars(args))
    id2tokens = load_tokens()

    vocab_size = len(id2tokens)
    blank_id = vocab_size - 1

    print("loading model")

    model = load_torch_model()
    model.eval()

    x = torch.randn(1, 30, 560, dtype=torch.float32)

    opset_version = args.opset_version
    filename = "model.onnx"
    torch.onnx.export(
        model,
        x,
        filename,
        opset_version=opset_version,
        input_names=["x"],
        output_names=["logits"],
        dynamic_axes={
            "x": {1: "T"},
        },
    )

    model_author = "FunAudioLLM"
    comment = os.environ.get("comment", "FunAudioLLM/Fun-ASR-Nano-2512")
    url = "https://huggingface.co/FunAudioLLM/Fun-ASR-Nano-2512"

    meta_data = {
        "lfr_window_size": 7,
        "lfr_window_shift": 6,
        "normalize_samples": 0,  # input should be in the range [-32768, 32767]
        "model_type": "sense_voice_ctc",
        "version": "1",
        "model_author": model_author,
        "maintainer": "k2-fsa",
        "vocab_size": vocab_size,
        "blank_id": blank_id,
        "comment": comment,
        "url": url,
    }
    add_meta_data(filename=filename, meta_data=meta_data)

    filename_int8 = "model.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        op_types_to_quantize=["MatMul"],
        # Note that we have to use QUInt8 here.
        #
        # When QInt8 is used, C++ onnxruntime produces incorrect results
        weight_type=QuantType.QUInt8,
    )


if __name__ == "__main__":
    torch.manual_seed(20251217)
    main()


================================================
FILE: scripts/sense-voice/qnn/.gitignore
================================================
*.raw


================================================
FILE: scripts/sense-voice/qnn/decode_logits.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import numpy as np


def load_tokens(filename):
    ans = dict()
    i = 0
    with open(filename, encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


logits = np.fromfile("./logits.raw", dtype=np.float32).reshape((-1, 25055))

idx = logits.argmax(axis=-1)
print("idx", idx)
print(len(idx))
prev = -1
ids = []
for i in idx:
    if i != prev:
        ids.append(i)
    prev = i
ids = [i for i in ids if i != 0]
print(ids)

tokens = load_tokens("./tokens.txt")
text = "".join([tokens[i] for i in ids])

text = text.replace("_", " ")
print(text)


================================================
FILE: scripts/sense-voice/qnn/generate_test_data.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import soundfile as sf


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--num-frames",
        type=int,
        required=True,
    )

    parser.add_argument(
        "--wav",
        type=str,
        required=True,
    )
    return parser.parse_args()


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_feat(
    samples,
    sample_rate,
    window_size: int = 7,  # lfr_m
    window_shift: int = 6,  # lfr_n
):
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = False
    opts.frame_opts.window_type = "hamming"
    opts.frame_opts.samp_freq = sample_rate
    opts.mel_opts.num_bins = 80

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(sample_rate, (samples * 32768).tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )
    assert features.data.contiguous is True
    assert features.dtype == np.float32, features.dtype

    T = (features.shape[0] - window_size) // window_shift + 1
    features = np.lib.stride_tricks.as_strided(
        features,
        shape=(T, features.shape[1] * window_size),
        strides=((window_shift * features.shape[1]) * 4, 4),
    )

    return np.copy(features)


def main():
    args = get_args()
    print(vars(args))

    samples, sample_rate = load_audio(args.wav)
    if sample_rate != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    features = compute_feat(
        samples=samples,
        sample_rate=sample_rate,
    )
    print("features.shape", features.shape)
    if features.shape[0] > args.num_frames:
        features = features[: args.num_frames]
    elif features.shape[0] < args.num_frames:
        pad_width = ((0, args.num_frames - features.shape[0]), (0, 0))
        features = np.pad(features, pad_width, mode="constant", constant_values=0)

    features.tofile("input0.raw")

    language_auto = 0
    language_zh = 3
    language_en = 4
    language_yue = 7
    language_ya = 11
    language_ko = 12
    language_nospeech = 13

    language = language_auto

    with_itn = 14
    without_itn = 15

    text_norm = with_itn

    prompt = np.array([language, 1, 2, text_norm], dtype=np.int32)
    prompt.tofile("input1.raw")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/sense-voice/rknn/adaptor.py
================================================
import torch
from torch import nn

import torch_model


class MultiHeadedAttention(nn.Module):
    """
    This class is copied and modified from
    https://github.com/modelscope/FunASR/blob/main/funasr/models/transformer/attention.py
    """

    def __init__(self, n_head, n_feat, dropout_rate):
        super().__init__()
        assert n_feat % n_head == 0

        # We assume d_v always equals d_k
        self.d_k = n_feat // n_head
        self.h = n_head
        self.linear_q = nn.Linear(n_feat, n_feat)
        self.linear_k = nn.Linear(n_feat, n_feat)
        self.linear_v = nn.Linear(n_feat, n_feat)
        self.linear_out = nn.Linear(n_feat, n_feat)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward_qkv(self, query, key, value):
        """Transform query, key and value.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).

        Returns:
            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).

        """
        n_batch = query.size(0)
        q = self.linear_q(query).view(n_batch, -1, self.h, self.d_k)
        k = self.linear_k(key).view(n_batch, -1, self.h, self.d_k)
        v = self.linear_v(value).view(n_batch, -1, self.h, self.d_k)
        q = q.transpose(1, 2)  # (batch, head, time1, d_k)
        k = k.transpose(1, 2)  # (batch, head, time2, d_k)
        v = v.transpose(1, 2)  # (batch, head, time2, d_k)

        return q, k, v

    def forward_attention(self, value, scores, mask):
        """Compute attention context vector.

        Args:
            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            torch.Tensor: Transformed value (#batch, time1, d_model)
                weighted by the attention score (#batch, time1, time2).

        """
        n_batch = value.size(0)
        if mask is not None:
            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)

            min_value = -float(
                "inf"
            )  # min_value = float(np.finfo(torch.tensor(0, dtype=qk.dtype).numpy().dtype).min)
            scores = scores.masked_fill(mask, min_value)
            attn = torch.softmax(scores, dim=-1).masked_fill(
                mask, 0.0
            )  # (batch, head, time1, time2)
        else:
            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)

        p_attn = self.dropout(attn)
        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
        x = (
            x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k)
        )  # (batch, time1, d_model)

        return self.linear_out(x)  # (batch, time1, d_model)

    def forward(self, query, key, value, mask):
        """Compute scaled dot product attention.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).
            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).

        Returns:
            torch.Tensor: Output tensor (#batch, time1, d_model).

        """
        q, k, v = self.forward_qkv(query, key, value)
        #  scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)
        scores = torch.matmul(q, k.transpose(-2, -1)) * self.d_k ** (-0.5)

        return self.forward_attention(v, scores, mask)


class EncoderLayer(nn.Module):
    """
    This class is copied and modified from
    https://github.com/modelscope/FunASR/blob/main/funasr/models/transformer/encoder.py
    """

    def __init__(
        self,
        size,
        self_attn,
        feed_forward,
        dropout_rate,
        normalize_before=True,
        concat_after=False,
        stochastic_depth_rate=0.0,
    ):
        super().__init__()

        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.norm1 = nn.LayerNorm(size, eps=1e-12)
        self.norm2 = nn.LayerNorm(size, eps=1e-12)
        self.dropout = nn.Dropout(dropout_rate)
        self.size = size
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        if self.concat_after:
            self.concat_linear = nn.Linear(size + size, size)
        self.stochastic_depth_rate = stochastic_depth_rate

    def forward(self, x, mask=None, cache=None):
        """Compute encoded features.

        Args:
            x_input (torch.Tensor): Input tensor (#batch, time, size).
            mask (torch.Tensor): Mask tensor for the input (#batch, time).
            cache (torch.Tensor): Cache tensor of the input (#batch, time - 1, size).

        Returns:
            torch.Tensor: Output tensor (#batch, time, size).
            torch.Tensor: Mask tensor (#batch, time).

        """
        skip_layer = False
        # with stochastic depth, residual connection `x + f(x)` becomes
        # `x <- x + 1 / (1 - p) * f(x)` at training time.
        stoch_layer_coeff = 1.0

        if skip_layer:
            if cache is not None:
                x = torch.cat([cache, x], dim=1)
            return x, mask

        residual = x
        if self.normalize_before:
            x = self.norm1(x)

        if cache is None:
            x_q = x
        else:
            assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size)
            x_q = x[:, -1:, :]
            residual = residual[:, -1:, :]
            mask = None if mask is None else mask[:, -1:, :]

        if self.concat_after:
            x_concat = torch.cat((x, self.self_attn(x_q, x, x, mask)), dim=-1)
            x = residual + stoch_layer_coeff * self.concat_linear(x_concat)
        else:
            x = residual + stoch_layer_coeff * self.dropout(
                self.self_attn(x_q, x, x, mask)
            )
        if not self.normalize_before:
            x = self.norm1(x)

        residual = x
        if self.normalize_before:
            x = self.norm2(x)
        x = residual + stoch_layer_coeff * self.dropout(self.feed_forward(x))
        if not self.normalize_before:
            x = self.norm2(x)

        if cache is not None:
            x = torch.cat([cache, x], dim=1)

        return x, mask


class Transformer(nn.Module):
    # This class is copied and modified from
    # https://github.com/modelscope/FunASR/blob/main/funasr/models/llm_asr/adaptor.py
    def __init__(
        self,
        downsample_rate=1,
        encoder_dim=512,
        llm_dim=512,
        ffn_dim: int = 2048,
        n_layer: int = 5,
        **kwargs
    ):
        super().__init__()
        assert downsample_rate == 1, downsample_rate
        self.k = downsample_rate
        self.encoder_dim = encoder_dim
        self.llm_dim = llm_dim
        self.linear1 = nn.Linear(self.encoder_dim * self.k, ffn_dim)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(ffn_dim, self.llm_dim)

        self.blocks = None
        if n_layer > 0:
            self.blocks = nn.ModuleList(
                [
                    EncoderLayer(
                        llm_dim,
                        MultiHeadedAttention(
                            kwargs.get("attention_heads", 8),
                            llm_dim,
                            kwargs.get("attention_dropout_rate", 0.0),
                        ),
                        torch_model.PositionwiseFeedForward(
                            llm_dim,
                            llm_dim // 4,
                            kwargs.get("dropout_rate", 0.0),
                        ),
                        kwargs.get("dropout_rate", 0.0),
                    )
                    for i in range(n_layer)
                ]
            )

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)

        masks = None

        if self.blocks is not None:
            for layer, block in enumerate(self.blocks):
                x, masks = block(x, masks)
        return x


================================================
FILE: scripts/sense-voice/rknn/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
import os
from typing import Any, Dict, List, Tuple

import onnx
import sentencepiece as spm
import torch

from torch_model import SenseVoiceSmall


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--input-len-in-seconds",
        type=int,
        required=True,
        help="""RKNN does not support dynamic shape, so we need to hard-code
        how long the model can process.
        """,
    )

    parser.add_argument(
        "--opset-version",
        type=int,
        default=13,
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def load_cmvn(filename) -> Tuple[List[float], List[float]]:
    neg_mean = None
    inv_stddev = None

    with open(filename) as f:
        for line in f:
            if not line.startswith("<LearnRateCoef>"):
                continue
            t = line.split()[3:-1]

            if neg_mean is None:
                neg_mean = list(map(lambda x: float(x), t))
            else:
                inv_stddev = list(map(lambda x: float(x), t))

    return neg_mean, inv_stddev


def generate_tokens(sp):
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for i in range(sp.vocab_size()):
            f.write(f"{sp.id_to_piece(i)} {i}\n")
    print("saved to tokens.txt")


@torch.no_grad()
def main():
    args = get_args()
    print(vars(args))

    sp = spm.SentencePieceProcessor()
    sp.load("./chn_jpn_yue_eng_ko_spectok.bpe.model")
    vocab_size = sp.vocab_size()
    generate_tokens(sp)

    print("loading model")

    state_dict = torch.load("./model.pt", map_location="cpu")
    if "state_dict" in state_dict:
        state_dict = state_dict["state_dict"]

    neg_mean, inv_stddev = load_cmvn("./am.mvn")

    neg_mean = torch.tensor(neg_mean, dtype=torch.float32)
    inv_stddev = torch.tensor(inv_stddev, dtype=torch.float32)

    model = SenseVoiceSmall(neg_mean=neg_mean, inv_stddev=inv_stddev)
    model.load_state_dict(state_dict)
    model.eval()
    del state_dict

    lfr_window_size = 7
    lfr_window_shift = 6

    # frame shift is 10ms, 1 second has about 100 feature frames
    input_len_in_seconds = int(args.input_len_in_seconds)
    num_frames = input_len_in_seconds * 100
    print("num_frames", num_frames)

    # num_input_frames is an approximate number
    num_input_frames = int(num_frames / lfr_window_shift + 0.5)
    print("num_input_frames", num_input_frames)

    x = torch.randn(1, num_input_frames, 560, dtype=torch.float32)

    language = 3
    text_norm = 15
    prompt = torch.tensor([language, 1, 2, text_norm], dtype=torch.int32)

    opset_version = args.opset_version
    filename = f"model-{input_len_in_seconds}-seconds.onnx"
    torch.onnx.export(
        model,
        (x, prompt),
        filename,
        opset_version=opset_version,
        input_names=["x", "prompt"],
        output_names=["logits"],
        dynamic_axes={},
    )

    model_author = os.environ.get("model_author", "iic")
    comment = os.environ.get("comment", "iic/SenseVoiceSmall")
    url = os.environ.get("url", "https://huggingface.co/FunAudioLLM/SenseVoiceSmall")

    meta_data = {
        "lfr_window_size": lfr_window_size,
        "lfr_window_shift": lfr_window_shift,
        "num_input_frames": num_input_frames,
        "normalize_samples": 0,  # input should be in the range [-32768, 32767]
        "model_type": "sense_voice_ctc",
        "version": "1",
        "model_author": model_author,
        "maintainer": "k2-fsa",
        "vocab_size": vocab_size,
        "comment": comment,
        "lang_auto": model.lid_dict["auto"],
        "lang_zh": model.lid_dict["zh"],
        "lang_en": model.lid_dict["en"],
        "lang_yue": model.lid_dict["yue"],  # cantonese
        "lang_ja": model.lid_dict["ja"],
        "lang_ko": model.lid_dict["ko"],
        "lang_nospeech": model.lid_dict["nospeech"],
        "with_itn": model.textnorm_dict["withitn"],
        "without_itn": model.textnorm_dict["woitn"],
        "url": url,
    }
    add_meta_data(filename=filename, meta_data=meta_data)


if __name__ == "__main__":
    torch.manual_seed(20250717)
    main()


================================================
FILE: scripts/sense-voice/rknn/export-rknn.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

import argparse
import logging
from pathlib import Path

from rknn.api import RKNN

logging.basicConfig(level=logging.WARNING)

g_platforms = [
    #  "rv1103",
    #  "rv1103b",
    #  "rv1106",
    #  "rk2118",
    "rk3562",
    "rk3566",
    "rk3568",
    "rk3576",
    "rk3588",
]


def get_parser():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--target-platform",
        type=str,
        required=True,
        help=f"Supported values are: {','.join(g_platforms)}",
    )

    parser.add_argument(
        "--in-model",
        type=str,
        required=True,
        help="Path to the input onnx model",
    )

    parser.add_argument(
        "--out-model",
        type=str,
        required=True,
        help="Path to the output rknn model",
    )

    return parser


def get_meta_data(model: str):
    import onnxruntime

    session_opts = onnxruntime.SessionOptions()
    session_opts.inter_op_num_threads = 1
    session_opts.intra_op_num_threads = 1

    m = onnxruntime.InferenceSession(
        model,
        sess_options=session_opts,
        providers=["CPUExecutionProvider"],
    )

    for i in m.get_inputs():
        print(i)

    print("-----")

    for i in m.get_outputs():
        print(i)
    print()

    meta = m.get_modelmeta().custom_metadata_map
    s = ""
    sep = ""
    for key, value in meta.items():
        if key in ("neg_mean", "inv_stddev"):
            continue
        s = s + sep + f"{key}={value}"
        sep = ";"
    assert len(s) < 1024, len(s)

    print("len(s)", len(s), s)

    return s


def export_rknn(rknn, filename):
    ret = rknn.export_rknn(filename)
    if ret != 0:
        exit(f"Export rknn model to {filename} failed!")


def init_model(filename: str, target_platform: str, custom_string=None):
    rknn = RKNN(verbose=False)

    rknn.config(
        optimization_level=0,
        target_platform=target_platform,
        custom_string=custom_string,
    )
    if not Path(filename).is_file():
        exit(f"{filename} does not exist")

    ret = rknn.load_onnx(model=filename)
    if ret != 0:
        exit(f"Load model {filename} failed!")

    ret = rknn.build(do_quantization=False)
    if ret != 0:
        exit(f"Build model {filename} failed!")

    return rknn


class RKNNModel:
    def __init__(
        self,
        model: str,
        target_platform: str,
    ):
        meta = get_meta_data(model)
        print(meta)

        self.model = init_model(
            model,
            target_platform=target_platform,
            custom_string=meta,
        )

    def export_rknn(self, model):
        export_rknn(self.model, model)

    def release(self):
        self.model.release()


def main():
    args = get_parser().parse_args()
    print(vars(args))

    model = RKNNModel(
        model=args.in_model,
        target_platform=args.target_platform,
    )

    model.export_rknn(
        model=args.out_model,
    )

    model.release()


if __name__ == "__main__":
    main()


================================================
FILE: scripts/sense-voice/rknn/nano.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from torch import nn

import adaptor
import torch_model


class Nano(nn.Module):
    def __init__(self, vocab_size: int = 60515):
        super().__init__()
        self.audio_encoder = torch_model.SenseVoiceEncoderSmall()
        self.ctc_decoder = adaptor.Transformer()
        # blank is 60514, i.e., the last token id
        self.ctc = torch_model.CTC(
            odim=vocab_size,
            encoder_output_size=self.audio_encoder.output_size,
        )

    def forward(self, x):
        """
        Args:
          x: (N, T, C)
        Returns:
          - logits: (N, T, vocab_size)
        """
        encoder_out = self.audio_encoder(x)
        encoder_out = self.ctc_decoder(encoder_out)
        logits = self.ctc.ctc_lo(encoder_out)
        return logits


================================================
FILE: scripts/sense-voice/rknn/test_nano_torch.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import base64
from pathlib import Path

import torch

import nano
import test_onnx


def load_tokens(filename: str = "./tokens.txt"):
    id2token = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            try:
                f = line.strip().split()
                if len(f) == 2:
                    t, i = f
                else:
                    t = " "
                    i = f[0]
                id2token[int(i)] = t
            except Exception as ex:
                print(ex)
                raise
    return id2token


def load_torch_model():
    if not Path("./model.pt").is_file():
        raise ValueError(
            "Please download files from https://huggingface.co/csukuangfj/funasr-nano-with-ctc"
        )
    model = nano.Nano()

    state_dict = torch.load("./model.pt", map_location="cpu")

    to_delete = [k for k in state_dict if "llm" in k or "audio_adaptor" in k]

    for k in to_delete:
        del state_dict[k]

    model.load_state_dict(state_dict, strict=True)
    model.eval()

    del state_dict

    return model


@torch.no_grad()
def main():
    model = load_torch_model()
    num_params = sum(p.numel() for p in model.parameters())
    print("num_params (M)", num_params, num_params / 1000000)

    samples, sample_rate = test_onnx.load_audio("./zh.wav")
    assert sample_rate == 16000, sample_rate

    features = test_onnx.compute_feat(samples=samples, sample_rate=sample_rate)
    x = torch.from_numpy(features)[None]
    logits = model(x)

    idx = logits.squeeze(0).argmax(dim=-1)
    print(idx)
    idx = torch.unique_consecutive(idx).tolist()
    print(idx)

    id2token = load_tokens("./tokens.txt")
    blank_id = len(id2token) - 1

    idx = [i for i in idx if i != blank_id]
    print(idx)

    s = b""
    for i in idx:
        s += base64.b64decode(id2token[i])

    text = s.decode().strip()
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/sense-voice/rknn/test_onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
Note: This is for testing the onnx models that would be later used to export
to RKNN
"""

import argparse
from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to model.onnx",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--wave",
        type=str,
        required=True,
        help="The input wave to be recognized",
    )

    parser.add_argument(
        "--language",
        type=str,
        default="auto",
        help="the language of the input wav file. Supported values: zh, en, ja, ko, yue, auto",
    )

    parser.add_argument(
        "--use-itn",
        type=int,
        default=0,
        help="1 to use inverse text normalization. 0 to not use inverse text normalization",
    )

    return parser.parse_args()


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map

        self.window_size = int(meta["lfr_window_size"])  # lfr_m
        self.window_shift = int(meta["lfr_window_shift"])  # lfr_n

        lang_zh = int(meta["lang_zh"])
        lang_en = int(meta["lang_en"])
        lang_ja = int(meta["lang_ja"])
        lang_ko = int(meta["lang_ko"])
        lang_yue = int(meta["lang_yue"])
        lang_auto = int(meta["lang_auto"])

        self.lang_id = {
            "zh": lang_zh,
            "en": lang_en,
            "ja": lang_ja,
            "ko": lang_ko,
            "yue": lang_yue,
            "auto": lang_auto,
        }
        self.with_itn = int(meta["with_itn"])
        self.without_itn = int(meta["without_itn"])

        self.max_len = self.model.get_inputs()[0].shape[1]

    def __call__(self, x, prompt):
        logits = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x.numpy(),
                self.model.get_inputs()[1].name: prompt.numpy(),
            },
        )[0]

        return torch.from_numpy(logits)


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def load_tokens(filename):
    ans = dict()
    i = 0
    with open(filename, encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


def compute_feat(
    samples,
    sample_rate,
    max_len: int = -1,
    window_size: int = 7,  # lfr_m
    window_shift: int = 6,  # lfr_n
):
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = False
    opts.frame_opts.window_type = "hamming"
    opts.frame_opts.samp_freq = sample_rate
    opts.mel_opts.num_bins = 80

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(sample_rate, (samples * 32768).tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )
    assert features.data.contiguous is True
    assert features.dtype == np.float32, features.dtype

    T = (features.shape[0] - window_size) // window_shift + 1
    features = np.lib.stride_tricks.as_strided(
        features,
        shape=(T, features.shape[1] * window_size),
        strides=((window_shift * features.shape[1]) * 4, 4),
    )

    print("features.shape", features.shape)

    if max_len > 0:
        if features.shape[0] > max_len:
            features = features[:max_len]
        elif features.shape[0] < max_len:
            features = np.pad(
                features,
                ((0, max_len - features.shape[0]), (0, 0)),
                mode="constant",
                constant_values=0,
            )

    print("features.shape", features.shape)
    features = np.ascontiguousarray(features)

    return features


def main():
    args = get_args()
    print(vars(args))
    samples, sample_rate = load_audio(args.wave)
    if sample_rate != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    model = OnnxModel(filename=args.model)

    features = compute_feat(
        samples=samples,
        sample_rate=sample_rate,
        max_len=model.max_len,
        window_size=model.window_size,
        window_shift=model.window_shift,
    )

    features = torch.from_numpy(features).unsqueeze(0)

    language = model.lang_id["auto"]
    if args.language in model.lang_id:
        language = model.lang_id[args.language]
    else:
        print(f"Invalid language: '{args.language}'")
        print("Use auto")

    if args.use_itn:
        text_norm = model.with_itn
    else:
        text_norm = model.without_itn

    prompt = torch.tensor([language, 1, 2, text_norm], dtype=torch.int32)

    logits = model(
        x=features,
        prompt=prompt,
    )

    idx = logits.squeeze(0).argmax(dim=-1)
    # idx is of shape (T,)
    idx = torch.unique_consecutive(idx)

    blank_id = 0
    idx = idx[idx != blank_id].tolist()

    tokens = load_tokens(args.tokens)
    text = "".join([tokens[i] for i in idx])

    text = text.replace("▁", " ")
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/sense-voice/rknn/torch_model.py
================================================
# This file is modified from
# https://github.com/modelscope/FunASR/blob/main/funasr/models/sense_voice/model.py

import torch
import torch.nn
import torch.nn as nn
import torch.nn.functional as F


class SinusoidalPositionEncoder(nn.Module):
    def __init__(self, d_model=80, dropout_rate=0.1):
        super().__init__()

    def encode(
        self,
        positions: torch.Tensor = None,
        depth: int = None,
        dtype: torch.dtype = torch.float32,
    ):
        """
        Args:
          positions: (batch_size, )
        """
        batch_size = positions.size(0)
        positions = positions.type(dtype)
        device = positions.device
        log_timescale_increment = torch.log(
            torch.tensor([10000], dtype=dtype, device=device)
        ) / (depth / 2 - 1)
        inv_timescales = torch.exp(
            torch.arange(depth / 2, device=device).type(dtype)
            * (-log_timescale_increment)
        )
        inv_timescales = torch.reshape(inv_timescales, [batch_size, -1])
        scaled_time = torch.reshape(positions, [1, -1, 1]) * torch.reshape(
            inv_timescales, [1, 1, -1]
        )
        encoding = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=2)
        return encoding.type(dtype)

    def forward(self, x):
        batch_size, timesteps, input_dim = x.size()
        positions = torch.arange(1, timesteps + 1, device=x.device)[None, :]
        position_encoding = self.encode(positions, input_dim, x.dtype).to(x.device)

        return x + position_encoding


class PositionwiseFeedForward(nn.Module):
    """Positionwise feed forward layer.

    Args:
        idim (int): Input dimension.
        hidden_units (int): The number of hidden units.
        dropout_rate (float): Dropout rate.

    """

    def __init__(self, idim, hidden_units, dropout_rate, activation=None):
        super().__init__()
        self.w_1 = torch.nn.Linear(idim, hidden_units)
        self.w_2 = torch.nn.Linear(hidden_units, idim)
        self.dropout = torch.nn.Dropout(dropout_rate)
        if activation is None:
            activation = torch.nn.ReLU()
        self.activation = activation

    def forward(self, x):
        """Forward function."""
        return self.w_2(self.dropout(self.activation(self.w_1(x))))


class MultiHeadedAttentionSANM(nn.Module):
    """Multi-Head Attention layer.

    Args:
        n_head (int): The number of heads.
        n_feat (int): The number of features.
        dropout_rate (float): Dropout rate.

    """

    def __init__(
        self,
        n_head,
        in_feat,
        n_feat,
        dropout_rate,
        kernel_size,
        sanm_shfit=0,
        lora_list=None,
        lora_rank=8,
        lora_alpha=16,
        lora_dropout=0.1,
    ):
        super().__init__()
        assert n_feat % n_head == 0
        # We assume d_v always equals d_k
        self.d_k = n_feat // n_head
        self.h = n_head
        self.linear_out = nn.Linear(n_feat, n_feat)
        self.linear_q_k_v = nn.Linear(in_feat, n_feat * 3)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout_rate)

        self.fsmn_block = nn.Conv1d(
            n_feat, n_feat, kernel_size, stride=1, padding=0, groups=n_feat, bias=False
        )
        # padding
        left_padding = (kernel_size - 1) // 2
        if sanm_shfit > 0:
            left_padding = left_padding + sanm_shfit
        right_padding = kernel_size - 1 - left_padding
        self.pad_fn = nn.ConstantPad1d((left_padding, right_padding), 0.0)

    def forward_fsmn(self, inputs, mask, mask_shfit_chunk=None):
        b, t, d = inputs.size()
        if mask is not None:
            mask = torch.reshape(mask, (b, -1, 1))
            if mask_shfit_chunk is not None:
                mask = mask * mask_shfit_chunk
            inputs = inputs * mask

        x = inputs.transpose(1, 2)
        x = self.pad_fn(x)
        x = self.fsmn_block(x)
        x = x.transpose(1, 2)
        x += inputs
        x = self.dropout(x)
        if mask is not None:
            x = x * mask
        return x

    def forward_qkv(self, x):
        """Transform query, key and value.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).

        Returns:
            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).

        """
        b, t, d = x.size()
        q_k_v = self.linear_q_k_v(x)
        q, k, v = torch.split(q_k_v, int(self.h * self.d_k), dim=-1)
        q_h = torch.reshape(q, (b, t, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time1, d_k)
        k_h = torch.reshape(k, (b, t, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time2, d_k)
        v_h = torch.reshape(v, (b, t, self.h, self.d_k)).transpose(
            1, 2
        )  # (batch, head, time2, d_k)

        return q_h, k_h, v_h, v

    def forward_attention(self, value, scores, mask, mask_att_chunk_encoder=None):
        """Compute attention context vector.

        Args:
            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            torch.Tensor: Transformed value (#batch, time1, d_model)
                weighted by the attention score (#batch, time1, time2).

        """
        n_batch = value.size(0)
        if mask is not None:
            if mask_att_chunk_encoder is not None:
                mask = mask * mask_att_chunk_encoder

            mask = mask.unsqueeze(1).eq(0)  # (batch, 1, *, time2)

            min_value = -float(
                "inf"
            )  # float(numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min)
            scores = scores.masked_fill(mask, min_value)
            attn = torch.softmax(scores, dim=-1).masked_fill(
                mask, 0.0
            )  # (batch, head, time1, time2)
        else:
            attn = torch.softmax(scores, dim=-1)  # (batch, head, time1, time2)

        p_attn = self.dropout(attn)
        x = torch.matmul(p_attn, value)  # (batch, head, time1, d_k)
        x = (
            x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k)
        )  # (batch, time1, d_model)

        return self.linear_out(x)  # (batch, time1, d_model)

    def forward(self, x, mask, mask_shfit_chunk=None, mask_att_chunk_encoder=None):
        """Compute scaled dot product attention.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).
            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).

        Returns:
            torch.Tensor: Output tensor (#batch, time1, d_model).

        """
        q_h, k_h, v_h, v = self.forward_qkv(x)
        fsmn_memory = self.forward_fsmn(v, mask, mask_shfit_chunk)
        q_h = q_h * self.d_k ** (-0.5)
        scores = torch.matmul(q_h, k_h.transpose(-2, -1))
        att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)
        return att_outs + fsmn_memory


class EncoderLayerSANM(nn.Module):
    def __init__(
        self,
        in_size,
        size,
        self_attn,
        feed_forward,
        dropout_rate,
        normalize_before=True,
        concat_after=False,
        stochastic_depth_rate=0.0,
    ):
        super().__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.norm1 = LayerNorm(in_size)
        self.norm2 = LayerNorm(size)
        self.dropout = nn.Dropout(dropout_rate)
        self.in_size = in_size
        self.size = size
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        if self.concat_after:
            self.concat_linear = nn.Linear(size + size, size)
        self.stochastic_depth_rate = stochastic_depth_rate
        self.dropout_rate = dropout_rate

    def forward(
        self, x, mask, cache=None, mask_shfit_chunk=None, mask_att_chunk_encoder=None
    ):
        """Compute encoded features.

        Args:
            x_input (torch.Tensor): Input tensor (#batch, time, size).
            mask (torch.Tensor): Mask tensor for the input (#batch, time).
            cache (torch.Tensor): Cache tensor of the input (#batch, time - 1, size).

        Returns:
            torch.Tensor: Output tensor (#batch, time, size).
            torch.Tensor: Mask tensor (#batch, time).

        """
        skip_layer = False
        # with stochastic depth, residual connection `x + f(x)` becomes
        # `x <- x + 1 / (1 - p) * f(x)` at training time.
        stoch_layer_coeff = 1.0
        if self.training and self.stochastic_depth_rate > 0:
            skip_layer = torch.rand(1).item() < self.stochastic_depth_rate
            stoch_layer_coeff = 1.0 / (1 - self.stochastic_depth_rate)

        if skip_layer:
            if cache is not None:
                x = torch.cat([cache, x], dim=1)
            return x, mask

        residual = x
        if self.normalize_before:
            x = self.norm1(x)

        if self.concat_after:
            x_concat = torch.cat(
                (
                    x,
                    self.self_attn(
                        x,
                        mask,
                        mask_shfit_chunk=mask_shfit_chunk,
                        mask_att_chunk_encoder=mask_att_chunk_encoder,
                    ),
                ),
                dim=-1,
            )
            if self.in_size == self.size:
                x = residual + stoch_layer_coeff * self.concat_linear(x_concat)
            else:
                x = stoch_layer_coeff * self.concat_linear(x_concat)
        else:
            if self.in_size == self.size:
                x = residual + stoch_layer_coeff * self.dropout(
                    self.self_attn(
                        x,
                        mask,
                        mask_shfit_chunk=mask_shfit_chunk,
                        mask_att_chunk_encoder=mask_att_chunk_encoder,
                    )
                )
            else:
                x = stoch_layer_coeff * self.dropout(
                    self.self_attn(
                        x,
                        mask,
                        mask_shfit_chunk=mask_shfit_chunk,
                        mask_att_chunk_encoder=mask_att_chunk_encoder,
                    )
                )
                return x, mask
        if not self.normalize_before:
            x = self.norm1(x)

        residual = x
        if self.normalize_before:
            x = self.norm2(x)
        x = residual + stoch_layer_coeff * self.dropout(self.feed_forward(x))
        if not self.normalize_before:
            x = self.norm2(x)

        return x, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder


class LayerNorm(nn.LayerNorm):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def forward(self, input):
        output = F.layer_norm(
            input.float(),
            self.normalized_shape,
            self.weight.float() if self.weight is not None else None,
            self.bias.float() if self.bias is not None else None,
            self.eps,
        )
        return output.type_as(input)


class SenseVoiceEncoderSmall(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_size = 80 * 7
        self.output_size = 512
        self.attention_heads = 4
        self.linear_units = 2048
        self.num_blocks = 50
        self.tp_blocks = 20
        self.input_layer = "pe"
        self.pos_enc_class = "SinusoidalPositionEncoder"
        self.normalize_before = True
        self.kernel_size = 11
        self.sanm_shfit = 0
        self.concat_after = False
        self.positionwise_layer_type = "linear"
        self.positionwise_conv_kernel_size = 1
        self.padding_idx = -1
        self.selfattention_layer_type = "sanm"
        self.dropout_rate = 0.1
        self.attention_dropout_rate = 0.1

        self._output_size = self.output_size

        self.embed = SinusoidalPositionEncoder()

        positionwise_layer = PositionwiseFeedForward
        positionwise_layer_args = (
            self.output_size,
            self.linear_units,
            self.dropout_rate,
        )

        encoder_selfattn_layer = MultiHeadedAttentionSANM
        encoder_selfattn_layer_args0 = (
            self.attention_heads,
            self.input_size,
            self.output_size,
            self.attention_dropout_rate,
            self.kernel_size,
            self.sanm_shfit,
        )
        encoder_selfattn_layer_args = (
            self.attention_heads,
            self.output_size,
            self.output_size,
            self.attention_dropout_rate,
            self.kernel_size,
            self.sanm_shfit,
        )

        self.encoders0 = nn.ModuleList(
            [
                EncoderLayerSANM(
                    self.input_size,
                    self.output_size,
                    encoder_selfattn_layer(*encoder_selfattn_layer_args0),
                    positionwise_layer(*positionwise_layer_args),
                    self.dropout_rate,
                )
                for i in range(1)
            ]
        )

        self.encoders = nn.ModuleList(
            [
                EncoderLayerSANM(
                    self.output_size,
                    self.output_size,
                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
                    positionwise_layer(*positionwise_layer_args),
                    self.dropout_rate,
                )
                for i in range(self.num_blocks - 1)
            ]
        )

        self.tp_encoders = nn.ModuleList(
            [
                EncoderLayerSANM(
                    self.output_size,
                    self.output_size,
                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
                    positionwise_layer(*positionwise_layer_args),
                    self.dropout_rate,
                )
                for i in range(self.tp_blocks)
            ]
        )

        self.after_norm = LayerNorm(self.output_size)

        self.tp_norm = LayerNorm(self.output_size)

    def forward(
        self,
        xs_pad: torch.Tensor,
    ):
        masks = None

        xs_pad *= self.output_size**0.5

        xs_pad = self.embed(xs_pad)

        # forward encoder1
        for layer_idx, encoder_layer in enumerate(self.encoders0):
            encoder_outs = encoder_layer(xs_pad, masks)
            xs_pad, masks = encoder_outs[0], encoder_outs[1]

        for layer_idx, encoder_layer in enumerate(self.encoders):
            encoder_outs = encoder_layer(xs_pad, masks)
            xs_pad, masks = encoder_outs[0], encoder_outs[1]

        xs_pad = self.after_norm(xs_pad)

        for layer_idx, encoder_layer in enumerate(self.tp_encoders):
            encoder_outs = encoder_layer(xs_pad, masks)
            xs_pad, masks = encoder_outs[0], encoder_outs[1]

        xs_pad = self.tp_norm(xs_pad)
        return xs_pad


class CTC(nn.Module):
    def __init__(
        self,
        odim: int,
        encoder_output_size: int,
        dropout_rate: float = 0.0,
        ctc_type: str = "builtin",
        reduce: bool = True,
        ignore_nan_grad: bool = True,
        extra_linear: bool = True,
    ):
        super().__init__()
        eprojs = encoder_output_size
        self.dropout_rate = dropout_rate

        if extra_linear:
            self.ctc_lo = torch.nn.Linear(eprojs, odim)
        else:
            self.ctc_lo = None

    def softmax(self, hs_pad):
        """softmax of frame activations

        Args:
            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: softmax applied 3d tensor (B, Tmax, odim)
        """
        if self.ctc_lo is not None:
            return F.softmax(self.ctc_lo(hs_pad), dim=2)
        else:
            return F.softmax(hs_pad, dim=2)

    def log_softmax(self, hs_pad):
        """log_softmax of frame activations

        Args:
            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
        """
        if self.ctc_lo is not None:
            return F.log_softmax(self.ctc_lo(hs_pad), dim=2)
        else:
            return F.log_softmax(hs_pad, dim=2)

    def argmax(self, hs_pad):
        """argmax of frame activations

        Args:
            torch.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            torch.Tensor: argmax applied 2d tensor (B, Tmax)
        """
        if self.ctc_lo is not None:
            return torch.argmax(self.ctc_lo(hs_pad), dim=2)
        else:
            return torch.argmax(hs_pad, dim=2)


class SenseVoiceSmall(nn.Module):
    def __init__(self, neg_mean: torch.Tensor, inv_stddev: torch.Tensor):
        super().__init__()
        self.sos = 1
        self.eos = 2
        self.length_normalized_loss = True
        self.ignore_id = -1
        self.blank_id = 0
        self.input_size = 80 * 7
        self.vocab_size = 25055

        self.neg_mean = neg_mean.unsqueeze(0).unsqueeze(0)
        self.inv_stddev = inv_stddev.unsqueeze(0).unsqueeze(0)

        self.lid_dict = {
            "auto": 0,
            "zh": 3,
            "en": 4,
            "yue": 7,
            "ja": 11,
            "ko": 12,
            "nospeech": 13,
        }
        self.lid_int_dict = {
            24884: 3,
            24885: 4,
            24888: 7,
            24892: 11,
            24896: 12,
            24992: 13,
        }
        self.textnorm_dict = {"withitn": 14, "woitn": 15}
        self.textnorm_int_dict = {25016: 14, 25017: 15}

        self.emo_dict = {
            "unk": 25009,
            "happy": 25001,
            "sad": 25002,
            "angry": 25003,
            "neutral": 25004,
        }

        self.encoder = SenseVoiceEncoderSmall()
        self.ctc = CTC(
            odim=self.vocab_size,
            encoder_output_size=self.encoder.output_size,
        )
        self.embed = torch.nn.Embedding(
            7 + len(self.lid_dict) + len(self.textnorm_dict), self.input_size
        )

    def forward(self, x, prompt):
        input_query = self.embed(prompt).unsqueeze(0)

        # for export, we always assume x and self.neg_mean are on CPU
        x = (x + self.neg_mean) * self.inv_stddev
        x = torch.cat((input_query, x), dim=1)

        encoder_out = self.encoder(x)
        logits = self.ctc.ctc_lo(encoder_out)

        return logits


================================================
FILE: scripts/sense-voice/show-info.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnxruntime


def show(filename):
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(filename, session_opts)
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)

    meta = sess.get_modelmeta().custom_metadata_map
    print("*****************************************")
    print("meta\n", meta)


def main():
    print("=========model==========")
    show("./model.onnx")


if __name__ == "__main__":
    main()
"""
=========model==========
NodeArg(name='x', type='tensor(float)', shape=['N', 'T', 560])
NodeArg(name='x_length', type='tensor(int32)', shape=['N'])
NodeArg(name='language', type='tensor(int32)', shape=['N'])
NodeArg(name='text_norm', type='tensor(int32)', shape=['N'])
-----
NodeArg(name='logits', type='tensor(float)', shape=['N', 'T', 25055])
*****************************************
"""


================================================
FILE: scripts/sense-voice/test.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime
import onnxruntime as ort
import soundfile as sf
import torch


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to model.onnx",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--wave",
        type=str,
        required=True,
        help="The input wave to be recognized",
    )

    parser.add_argument(
        "--language",
        type=str,
        default="auto",
        help="the language of the input wav file. Supported values: zh, en, ja, ko, yue, auto",
    )

    parser.add_argument(
        "--use-itn",
        type=int,
        default=0,
        help="1 to use inverse text normalization. 0 to not use inverse text normalization",
    )

    return parser.parse_args()


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map

        self.window_size = int(meta["lfr_window_size"])  # lfr_m
        self.window_shift = int(meta["lfr_window_shift"])  # lfr_n

        lang_zh = int(meta["lang_zh"])
        lang_en = int(meta["lang_en"])
        lang_ja = int(meta["lang_ja"])
        lang_ko = int(meta["lang_ko"])
        lang_auto = int(meta["lang_auto"])

        self.lang_id = {
            "zh": lang_zh,
            "en": lang_en,
            "ja": lang_ja,
            "ko": lang_ko,
            "auto": lang_auto,
        }
        self.with_itn = int(meta["with_itn"])
        self.without_itn = int(meta["without_itn"])

        neg_mean = meta["neg_mean"].split(",")
        neg_mean = list(map(lambda x: float(x), neg_mean))

        inv_stddev = meta["inv_stddev"].split(",")
        inv_stddev = list(map(lambda x: float(x), inv_stddev))

        self.neg_mean = np.array(neg_mean, dtype=np.float32)
        self.inv_stddev = np.array(inv_stddev, dtype=np.float32)

    def __call__(self, x, x_length, language, text_norm):
        logits = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x.numpy(),
                self.model.get_inputs()[1].name: x_length.numpy(),
                self.model.get_inputs()[2].name: language.numpy(),
                self.model.get_inputs()[3].name: text_norm.numpy(),
            },
        )[0]

        return torch.from_numpy(logits)


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def load_tokens(filename):
    ans = dict()
    i = 0
    with open(filename, encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


def compute_feat(
    samples,
    sample_rate,
    neg_mean: np.ndarray,
    inv_stddev: np.ndarray,
    window_size: int = 7,  # lfr_m
    window_shift: int = 6,  # lfr_n
):
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = False
    opts.frame_opts.window_type = "hamming"
    opts.frame_opts.samp_freq = sample_rate
    opts.mel_opts.num_bins = 80

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(sample_rate, (samples * 32768).tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )
    assert features.data.contiguous is True
    assert features.dtype == np.float32, features.dtype

    T = (features.shape[0] - window_size) // window_shift + 1
    features = np.lib.stride_tricks.as_strided(
        features,
        shape=(T, features.shape[1] * window_size),
        strides=((window_shift * features.shape[1]) * 4, 4),
    )

    features = (features + neg_mean) * inv_stddev

    return features


def main():
    args = get_args()
    print(vars(args))
    samples, sample_rate = load_audio(args.wave)
    if sample_rate != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    model = OnnxModel(filename=args.model)

    features = compute_feat(
        samples=samples,
        sample_rate=sample_rate,
        neg_mean=model.neg_mean,
        inv_stddev=model.inv_stddev,
        window_size=model.window_size,
        window_shift=model.window_shift,
    )

    features = torch.from_numpy(features).unsqueeze(0)
    features_length = torch.tensor([features.size(1)], dtype=torch.int32)

    language = model.lang_id["auto"]
    if args.language in model.lang_id:
        language = model.lang_id[args.language]
    else:
        print(f"Invalid language: '{args.language}'")
        print("Use auto")

    if args.use_itn:
        text_norm = model.with_itn
    else:
        text_norm = model.without_itn

    language = torch.tensor([language], dtype=torch.int32)
    text_norm = torch.tensor([text_norm], dtype=torch.int32)

    logits = model(
        x=features,
        x_length=features_length,
        language=language,
        text_norm=text_norm,
    )

    idx = logits.squeeze(0).argmax(dim=-1)
    # idx is of shape (T,)
    idx = torch.unique_consecutive(idx)

    blank_id = 0
    idx = idx[idx != blank_id].tolist()

    tokens = load_tokens(args.tokens)
    text = "".join([tokens[i] for i in idx])

    text = text.replace("▁", " ")
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/sense-voice/test_onnx_nano.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
=========./model.onnx==========
NodeArg(name='x', type='tensor(float)', shape=[1, 'T', 560])
-----
NodeArg(name='logits', type='tensor(float)', shape=['Addlogits_dim_0', 'Addlogits_dim_1', 60515])

=========./model.int8.onnx==========
NodeArg(name='x', type='tensor(float)', shape=[1, 'T', 560])
-----
NodeArg(name='logits', type='tensor(float)', shape=['Addlogits_dim_0', 'Addlogits_dim_1', 60515])
"""

import argparse
import base64
from typing import Tuple

from test_onnx import compute_feat, load_audio

import onnxruntime as ort
import librosa


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to model.onnx",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--wave",
        type=str,
        required=True,
        help="The input wave to be recognized",
    )

    return parser.parse_args()


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map

        self.window_size = int(meta["lfr_window_size"])  # lfr_m
        self.window_shift = int(meta["lfr_window_shift"])  # lfr_n
        self.blank_id = int(meta["blank_id"])

    def __call__(self, x):
        logits = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )[0]

        return logits


def load_tokens(filename: str):
    ans = dict()
    i = 0
    with open(filename, encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


def main():
    args = get_args()
    print(vars(args))
    samples, sample_rate = load_audio(args.wave)
    if sample_rate != 16000:
        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    model = OnnxModel(filename=args.model)

    features = compute_feat(
        samples=samples,
        sample_rate=sample_rate,
        window_size=model.window_size,
        window_shift=model.window_shift,
    )

    logits = model(
        x=features[None],
    )

    idx = logits[0].argmax(axis=-1)
    print("initial ids", idx)
    id2token = load_tokens(args.tokens)
    blank_id = model.blank_id
    print("blank_id", blank_id)

    unique_ids = []
    prev = -1
    for i in idx:
        if i == prev:
            continue
        unique_ids.append(i)
        prev = i
    print("unique_ids", unique_ids)

    ids = [i for i in unique_ids if i != blank_id]

    print("ids without blank", ids)
    s = b""
    for i in ids:
        s += base64.b64decode(id2token[i])

    text = s.decode().strip()
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/silero_vad/v4/README.md
================================================
# Introduction

This folder contains script for exporting
[silero_vad v4](https://github.com/snakers4/silero-vad/tree/v4.0)
to rknn.

# Steps to run

## 1. Download a jit model
You can download it from <https://github.com/snakers4/silero-vad/blob/v4.0/files/silero_vad.jit>

```bash
wget https://github.com/snakers4/silero-vad/raw/refs/tags/v4.0/files/silero_vad.jit
```

```bash
ls -lh silero_vad.jit
-rw-r--r-- 1 kuangfangjun root 1.4M Mar 30 11:04 silero_vad.jit
```

## 2. Export it to onnx
```bash
./export-onnx.py
```

It will generate a file `./m.onnx`

```bash
 ls -lh m.onnx
-rw-r--r-- 1 kuangfangjun root 627K Mar 30 11:13 m.onnx
```

## 3. Test the onnx model

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
./test-onnx.py  --model ./m.onnx --wav ./lei-jun-test.wav
```

## 4. Convert the onnx model to RKNN format

We assume you have installed rknn toolkit 2.1
```bash
./export-rknn.py --in-model ./m.onnx --out-model m.rknn  --target-platform rk3588
```

It will generate a file `./m.rknn`

```bash
ls -lh m.rknn
-rw-r--r-- 1 kuangfangjun root 2.2M Mar 30 11:19 m.rknn
```


================================================
FILE: scripts/silero_vad/v4/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnx
import torch
from onnxsim import simplify

import torch
from torch import Tensor


def simple_pad(x: Tensor, pad: int) -> Tensor:
    #  _0 = torch.slice(torch.slice(torch.slice(x), 1), 2, 1, torch.add(1, pad))
    _0 = x[:, :, 1 : 1 + pad]

    left_pad = torch.flip(_0, [-1])
    #  _1 = torch.slice(torch.slice(torch.slice(x), 1), 2, torch.sub(-1, pad), -1)

    _1 = x[:, :, (-1 - pad) : -1]

    right_pad = torch.flip(_1, [-1])
    _2 = torch.cat([left_pad, x, right_pad], 2)
    return _2


class MyModule(torch.nn.Module):
    def __init__(self, m):
        super().__init__()
        self.m = m

    def adaptive_normalization_forward(self, spect):
        m = self.m._model.adaptive_normalization
        _0 = simple_pad

        # Note(fangjun): rknn uses fp16 by default, whose max value is 65504
        # so we need to re-write the computation for spect0
        #  spect0 = torch.log1p(torch.mul(spect, 1048576))
        spect0 = torch.log1p(spect) + 13.86294

        _1 = torch.eq(len(spect0.shape), 2)
        if _1:
            _2 = torch.unsqueeze(spect0, 0)
            spect1 = _2
        else:
            spect1 = spect0
        mean = torch.mean(spect1, [1], True)
        to_pad = m.to_pad
        mean0 = _0(
            mean,
            to_pad,
        )
        filter_ = m.filter_
        mean1 = torch.conv1d(mean0, filter_)
        mean_mean = torch.mean(mean1, [-1], True)
        spect2 = torch.add(spect1, torch.neg(mean_mean))
        return spect2

    def forward(self, x: torch.Tensor, h: torch.Tensor, c: torch.Tensor):
        m = self.m._model

        feature_extractor = m.feature_extractor
        x0 = (feature_extractor).forward(
            x,
        )
        norm = self.adaptive_normalization_forward(x0)
        x1 = torch.cat([x0, norm], 1)
        first_layer = m.first_layer
        x2 = (first_layer).forward(
            x1,
        )
        encoder = m.encoder
        x3 = (encoder).forward(
            x2,
        )
        decoder = m.decoder
        x4, h0, c0, = (decoder).forward(
            x3,
            h,
            c,
        )
        _0 = torch.mean(torch.squeeze(x4, 1), [1])
        out = torch.unsqueeze(_0, 1)
        return (out, h0, c0)


@torch.no_grad()
def main():
    m = torch.jit.load("./silero_vad.jit")
    m = MyModule(m)
    x = torch.rand((1, 512), dtype=torch.float32)
    h = torch.rand((2, 1, 64), dtype=torch.float32)
    c = torch.rand((2, 1, 64), dtype=torch.float32)
    m = torch.jit.script(m)
    torch.onnx.export(
        m,
        (x, h, c),
        "m.onnx",
        input_names=["x", "h", "c"],
        output_names=["prob", "next_h", "next_c"],
    )

    print("simplifying ...")
    model = onnx.load("m.onnx")

    meta_data = {
        "model_type": "silero-vad-v4",
        "sample_rate": 16000,
        "version": 4,
        "h_shape": "2,1,64",
        "c_shape": "2,1,64",
    }

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")
    print(model.metadata_props)

    model_simp, check = simplify(model)
    onnx.save(model_simp, "m.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/silero_vad/v4/export-rknn.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

import argparse
import logging
from pathlib import Path

from rknn.api import RKNN

logging.basicConfig(level=logging.WARNING)

g_platforms = [
    #  "rv1103",
    #  "rv1103b",
    #  "rv1106",
    #  "rk2118",
    "rk3562",
    "rk3566",
    "rk3568",
    "rk3576",
    "rk3588",
]


def get_parser():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--target-platform",
        type=str,
        required=True,
        help=f"Supported values are: {','.join(g_platforms)}",
    )

    parser.add_argument(
        "--in-model",
        type=str,
        required=True,
        help="Path to the input onnx model",
    )

    parser.add_argument(
        "--out-model",
        type=str,
        required=True,
        help="Path to the output rknn model",
    )

    return parser


def get_meta_data(model: str):
    import onnxruntime

    session_opts = onnxruntime.SessionOptions()
    session_opts.inter_op_num_threads = 1
    session_opts.intra_op_num_threads = 1

    m = onnxruntime.InferenceSession(
        model,
        sess_options=session_opts,
        providers=["CPUExecutionProvider"],
    )

    for i in m.get_inputs():
        print(i)

    print("-----")

    for i in m.get_outputs():
        print(i)
    print()

    meta = m.get_modelmeta().custom_metadata_map
    s = ""
    sep = ""
    for key, value in meta.items():
        s = s + sep + f"{key}={value}"
        sep = ";"
    assert len(s) < 1024

    return s


def export_rknn(rknn, filename):
    ret = rknn.export_rknn(filename)
    if ret != 0:
        exit("Export rknn model to {filename} failed!")


def init_model(filename: str, target_platform: str, custom_string=None):
    rknn = RKNN(verbose=False)

    rknn.config(
        optimization_level=0,
        target_platform=target_platform,
        custom_string=custom_string,
    )
    if not Path(filename).is_file():
        exit(f"{filename} does not exist")

    ret = rknn.load_onnx(model=filename)
    if ret != 0:
        exit(f"Load model {filename} failed!")

    ret = rknn.build(do_quantization=False)
    if ret != 0:
        exit("Build model {filename} failed!")

    return rknn


class RKNNModel:
    def __init__(
        self,
        model: str,
        target_platform: str,
    ):
        meta = get_meta_data(model)
        print(meta)

        self.model = init_model(
            model,
            target_platform=target_platform,
            custom_string=meta,
        )

    def export_rknn(self, model):
        export_rknn(self.model, model)

    def release(self):
        self.model.release()


def main():
    args = get_parser().parse_args()
    print(vars(args))

    model = RKNNModel(
        model=args.in_model,
        target_platform=args.target_platform,
    )

    model.export_rknn(
        model=args.out_model,
    )

    model.release()


if __name__ == "__main__":
    main()


================================================
FILE: scripts/silero_vad/v4/show.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnxruntime
import onnx

"""
[key: "model_type"
value: "silero-vad-v4"
, key: "sample_rate"
value: "16000"
, key: "version"
value: "4"
, key: "h_shape"
value: "2,1,64"
, key: "c_shape"
value: "2,1,64"
]
NodeArg(name='x', type='tensor(float)', shape=[1, 512])
NodeArg(name='h', type='tensor(float)', shape=[2, 1, 64])
NodeArg(name='c', type='tensor(float)', shape=[2, 1, 64])
-----
NodeArg(name='prob', type='tensor(float)', shape=[1, 1])
NodeArg(name='next_h', type='tensor(float)', shape=[2, 1, 64])
NodeArg(name='next_c', type='tensor(float)', shape=[2, 1, 64])
"""


def show(filename):
    model = onnx.load(filename)
    print(model.metadata_props)

    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(
        filename, session_opts, providers=["CPUExecutionProvider"]
    )
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def main():
    show("./m.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/silero_vad/v4/test-on-rk3588-board.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

# Please run this file on your rk3588 board

try:
    from rknnlite.api import RKNNLite
except:
    print("Please run this file on your board (linux + aarch64 + npu)")
    print("You need to install rknn_toolkit_lite2")
    print(
        " from https://github.com/airockchip/rknn-toolkit2/tree/master/rknn-toolkit-lite2/packages"
    )
    print(
        "https://github.com/airockchip/rknn-toolkit2/blob/v2.1.0/rknn-toolkit-lite2/packages/rknn_toolkit_lite2-2.1.0-cp310-cp310-linux_aarch64.whl"
    )
    print("is known to work")
    raise

import time
from pathlib import Path
from typing import Tuple

import numpy as np
import soundfile as sf


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel

    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def init_model(filename, target_platform="rk3588"):
    if not Path(filename).is_file():
        exit(f"{filename} does not exist")

    rknn_lite = RKNNLite(verbose=False)
    ret = rknn_lite.load_rknn(path=filename)
    if ret != 0:
        exit(f"Load model {filename} failed!")

    ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
    if ret != 0:
        exit(f"Failed to init rknn runtime for {filename}")
    return rknn_lite


class RKNNModel:
    def __init__(self, model: str, target_platform="rk3588"):
        self.model = init_model(model)

    def release(self):
        self.model.release()

    def __call__(self, x: np.ndarray, h: np.ndarray, c: np.ndarray):
        """
        Args:
          x: (1, 512), np.float32
          h: (2, 1, 64), np.float32
          c: (2, 1, 64), np.float32
        Returns:
          prob:
          next_h:
          next_c
        """
        out, next_h, next_c = self.model.inference(inputs=[x, h, c])
        return out.item(), next_h, next_c


def main():
    model = RKNNModel(model="./m.rknn")
    for i in range(1):
        test(model)


def test(model):
    print("started")
    start = time.time()
    samples, sample_rate = load_audio("./lei-jun-test.wav")
    assert sample_rate == 16000, sample_rate

    window_size = 512

    h = np.zeros((2, 1, 64), dtype=np.float32)
    c = np.zeros((2, 1, 64), dtype=np.float32)

    threshold = 0.5
    num_windows = samples.shape[0] // window_size
    out = []
    for i in range(num_windows):
        print(i, num_windows)
        this_samples = samples[i * window_size : (i + 1) * window_size]
        prob, h, c = model(this_samples[None], h, c)
        out.append(prob > threshold)

    min_speech_duration = 0.25 * sample_rate / window_size
    min_silence_duration = 0.25 * sample_rate / window_size

    result = []
    last = -1
    for k, f in enumerate(out):
        if f >= threshold:
            if last == -1:
                last = k
        elif last != -1:
            if k - last > min_speech_duration:
                result.append((last, k))
            last = -1

    if last != -1 and k - last > min_speech_duration:
        result.append((last, k))

    if not result:
        print("Empty for ./lei-jun-test.wav")
        return

    print(result)

    final = [result[0]]
    for r in result[1:]:
        f = final[-1]
        if r[0] - f[1] < min_silence_duration:
            final[-1] = (f[0], r[1])
        else:
            final.append(r)

    for f in final:
        start = f[0] * window_size / sample_rate
        end = f[1] * window_size / sample_rate
        print("{:.3f} -- {:.3f}".format(start, end))


if __name__ == "__main__":
    main()


================================================
FILE: scripts/silero_vad/v4/test-onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import onnxruntime as ort
import argparse
import soundfile as sf
from typing import Tuple
import numpy as np


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the onnx model",
    )

    parser.add_argument(
        "--wav",
        type=str,
        required=True,
        help="Path to the input wav",
    )
    return parser.parse_args()


class OnnxModel:
    def __init__(
        self,
        model: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1
        self.model = ort.InferenceSession(
            model,
            sess_options=session_opts,
            providers=["CPUExecutionProvider"],
        )

    def get_init_states(self):
        h = np.zeros((2, 1, 64), dtype=np.float32)
        c = np.zeros((2, 1, 64), dtype=np.float32)
        return h, c

    def __call__(self, x, h, c):
        """
        Args:
          x: (1, 512)
          h: (2, 1, 64)
          c: (2, 1, 64)
        Returns:
          prob: (1, 1)
          next_h: (2, 1, 64)
          next_c: (2, 1, 64)
        """
        x = x[None]
        out, next_h, next_c = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
                self.model.get_outputs()[2].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
                self.model.get_inputs()[1].name: h,
                self.model.get_inputs()[2].name: c,
            },
        )
        return out, next_h, next_c


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def main():
    args = get_args()

    samples, sample_rate = load_audio(args.wav)
    if sample_rate != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    model = OnnxModel(args.model)
    probs = []
    h, c = model.get_init_states()
    window_size = 512
    num_windows = samples.shape[0] // window_size

    for i in range(num_windows):
        start = i * window_size
        end = start + window_size

        p, h, c = model(samples[start:end], h, c)

        probs.append(p[0].item())

    threshold = 0.5
    out = np.array(probs) > threshold
    out = out.tolist()
    min_speech_duration = 0.25 * sample_rate / window_size
    min_silence_duration = 0.25 * sample_rate / window_size

    result = []
    last = -1
    for k, f in enumerate(out):
        if f >= threshold:
            if last == -1:
                last = k
        elif last != -1:
            if k - last > min_speech_duration:
                result.append((last, k))
            last = -1

    if last != -1 and k - last > min_speech_duration:
        result.append((last, k))

    if not result:
        print(f"Empty for {args.wav}")
        return

    print(result)

    final = [result[0]]
    for r in result[1:]:
        f = final[-1]
        if r[0] - f[1] < min_silence_duration:
            final[-1] = (f[0], r[1])
        else:
            final.append(r)

    for f in final:
        start = f[0] * window_size / sample_rate
        end = f[1] * window_size / sample_rate
        print("{:.3f} -- {:.3f}".format(start, end))


if __name__ == "__main__":
    main()


================================================
FILE: scripts/spleeter/.gitignore
================================================
2stems.tar.gz
2stems


================================================
FILE: scripts/spleeter/__init__.py
================================================


================================================
FILE: scripts/spleeter/convert_to_pb.py
================================================
#!/usr/bin/env python3

# Code in this file is modified from
# https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc
#
# Please see ./run.sh for usages
import argparse

import tensorflow as tf


def freeze_graph(model_dir, output_node_names, output_filename):
    """Extract the sub graph defined by the output nodes and convert all its
    variables into constant

    Args:
      model_dir:
        the root folder containing the checkpoint state file
      output_node_names:
        a string, containing all the output node's names, comma separated
      output_filename:
        Filename to save the graph.
    """
    if not tf.compat.v1.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            "directory: %s" % model_dir
        )

    if not output_node_names:
        print("You need to supply the name of a node to --output_node_names.")
        return -1

    # We retrieve our checkpoint fullpath
    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    # We precise the file fullname of our freezed graph
    output_graph = output_filename

    # We clear devices to allow TensorFlow to control on which device it will load operations
    clear_devices = True

    # We start a session using a temporary fresh Graph
    with tf.compat.v1.Session(graph=tf.Graph()) as sess:
        # We import the meta graph in the current default Graph
        saver = tf.compat.v1.train.import_meta_graph(
            input_checkpoint + ".meta", clear_devices=clear_devices
        )

        # We restore the weights
        saver.restore(sess, input_checkpoint)

        # We use a built-in TF helper to export variables to constants
        output_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
            sess,  # The session is used to retrieve the weights
            tf.compat.v1.get_default_graph().as_graph_def(),  # The graph_def is used to retrieve the nodes
            output_node_names.split(
                ","
            ),  # The output node names are used to select the useful nodes
        )

        # Finally we serialize and dump the output graph to the filesystem
        with tf.compat.v1.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        print("%d ops in the final graph." % len(output_graph_def.node))

    return output_graph_def


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model-dir", type=str, default="", help="Model folder to export"
    )
    parser.add_argument(
        "--output-node-names",
        type=str,
        default="vocals_spectrogram/mul,accompaniment_spectrogram/mul",
        help="The name of the output nodes, comma separated.",
    )

    parser.add_argument(
        "--output-filename",
        type=str,
    )
    args = parser.parse_args()

    freeze_graph(args.model_dir, args.output_node_names, args.output_filename)


================================================
FILE: scripts/spleeter/convert_to_torch.py
================================================
#!/usr/bin/env python3
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)

# Please see ./run.sh for usage

import argparse

import numpy as np
import tensorflow as tf
import torch

from unet import UNet


def load_graph(frozen_graph_filename):
    # This function is modified from
    # https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc

    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.compat.v1.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.compat.v1.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we import the graph_def into a new Graph and returns it
    with tf.Graph().as_default() as graph:
        # The name var will prefix every op/nodes in your graph
        # Since we load everything in a new graph, this is not needed
        #  tf.import_graph_def(graph_def, name="prefix")
        tf.import_graph_def(graph_def, name="")
    return graph


def generate_waveform():
    np.random.seed(20230821)
    waveform = np.random.rand(60 * 44100).astype(np.float32)

    # (num_samples, num_channels)
    waveform = waveform.reshape(-1, 2)
    return waveform


def get_param(graph, name):
    with tf.compat.v1.Session(graph=graph) as sess:
        constant_ops = [op for op in sess.graph.get_operations() if op.type == "Const"]
        for constant_op in constant_ops:
            if constant_op.name != name:
                continue

            value = sess.run(constant_op.outputs[0])
            return torch.from_numpy(value)


@torch.no_grad()
def main(name):
    graph = load_graph(f"./2stems/frozen_{name}_model.pb")
    #  for op in graph.get_operations():
    #      print(op.name)
    x = graph.get_tensor_by_name("waveform:0")
    #  y = graph.get_tensor_by_name("Reshape:0")
    y0 = graph.get_tensor_by_name("strided_slice_3:0")
    #  y1 = graph.get_tensor_by_name("leaky_re_lu_5/LeakyRelu:0")
    #  y1 = graph.get_tensor_by_name("conv2d_5/BiasAdd:0")
    #  y1 = graph.get_tensor_by_name("conv2d_transpose/BiasAdd:0")
    #  y1 = graph.get_tensor_by_name("re_lu/Relu:0")
    #  y1 = graph.get_tensor_by_name("batch_normalization_6/cond/FusedBatchNorm_1:0")
    #  y1 = graph.get_tensor_by_name("concatenate/concat:0")
    #  y1 = graph.get_tensor_by_name("concatenate_1/concat:0")
    #  y1 = graph.get_tensor_by_name("concatenate_4/concat:0")
    #  y1 = graph.get_tensor_by_name("batch_normalization_11/cond/FusedBatchNorm_1:0")
    #  y1 = graph.get_tensor_by_name("conv2d_6/Sigmoid:0")
    y1 = graph.get_tensor_by_name(f"{name}_spectrogram/mul:0")

    unet = UNet()
    unet.eval()

    # For the conv2d in tensorflow, weight shape is (kernel_h, kernel_w, in_channel, out_channel)
    # default input shape is NHWC

    # For the conv2d in torch, weight shape is (out_channel, in_channel, kernel_h, kernel_w)
    # default input shape is NCHW
    state_dict = unet.state_dict()
    #  print(list(state_dict.keys()))

    if name == "vocals":
        state_dict["conv.weight"] = get_param(graph, "conv2d/kernel").permute(
            3, 2, 0, 1
        )
        state_dict["conv.bias"] = get_param(graph, "conv2d/bias")

        state_dict["bn.weight"] = get_param(graph, "batch_normalization/gamma")
        state_dict["bn.bias"] = get_param(graph, "batch_normalization/beta")
        state_dict["bn.running_mean"] = get_param(
            graph, "batch_normalization/moving_mean"
        )
        state_dict["bn.running_var"] = get_param(
            graph, "batch_normalization/moving_variance"
        )

        conv_offset = 0
        bn_offset = 0
    else:
        state_dict["conv.weight"] = get_param(graph, "conv2d_7/kernel").permute(
            3, 2, 0, 1
        )
        state_dict["conv.bias"] = get_param(graph, "conv2d_7/bias")

        state_dict["bn.weight"] = get_param(graph, "batch_normalization_12/gamma")
        state_dict["bn.bias"] = get_param(graph, "batch_normalization_12/beta")
        state_dict["bn.running_mean"] = get_param(
            graph, "batch_normalization_12/moving_mean"
        )
        state_dict["bn.running_var"] = get_param(
            graph, "batch_normalization_12/moving_variance"
        )
        conv_offset = 7
        bn_offset = 12

    for i in range(1, 6):
        state_dict[f"conv{i}.weight"] = get_param(
            graph, f"conv2d_{i+conv_offset}/kernel"
        ).permute(3, 2, 0, 1)
        state_dict[f"conv{i}.bias"] = get_param(graph, f"conv2d_{i+conv_offset}/bias")
        if i >= 5:
            continue
        state_dict[f"bn{i}.weight"] = get_param(
            graph, f"batch_normalization_{i+bn_offset}/gamma"
        )
        state_dict[f"bn{i}.bias"] = get_param(
            graph, f"batch_normalization_{i+bn_offset}/beta"
        )
        state_dict[f"bn{i}.running_mean"] = get_param(
            graph, f"batch_normalization_{i+bn_offset}/moving_mean"
        )
        state_dict[f"bn{i}.running_var"] = get_param(
            graph, f"batch_normalization_{i+bn_offset}/moving_variance"
        )

    if name == "vocals":
        state_dict["up1.weight"] = get_param(graph, "conv2d_transpose/kernel").permute(
            3, 2, 0, 1
        )
        state_dict["up1.bias"] = get_param(graph, "conv2d_transpose/bias")

        state_dict["bn5.weight"] = get_param(graph, "batch_normalization_6/gamma")
        state_dict["bn5.bias"] = get_param(graph, "batch_normalization_6/beta")
        state_dict["bn5.running_mean"] = get_param(
            graph, "batch_normalization_6/moving_mean"
        )
        state_dict["bn5.running_var"] = get_param(
            graph, "batch_normalization_6/moving_variance"
        )
        conv_offset = 0
        bn_offset = 0
    else:
        state_dict["up1.weight"] = get_param(
            graph, "conv2d_transpose_6/kernel"
        ).permute(3, 2, 0, 1)
        state_dict["up1.bias"] = get_param(graph, "conv2d_transpose_6/bias")

        state_dict["bn5.weight"] = get_param(graph, "batch_normalization_18/gamma")
        state_dict["bn5.bias"] = get_param(graph, "batch_normalization_18/beta")
        state_dict["bn5.running_mean"] = get_param(
            graph, "batch_normalization_18/moving_mean"
        )
        state_dict["bn5.running_var"] = get_param(
            graph, "batch_normalization_18/moving_variance"
        )
        conv_offset = 6
        bn_offset = 12

    for i in range(1, 6):
        state_dict[f"up{i+1}.weight"] = get_param(
            graph, f"conv2d_transpose_{i+conv_offset}/kernel"
        ).permute(3, 2, 0, 1)

        state_dict[f"up{i+1}.bias"] = get_param(
            graph, f"conv2d_transpose_{i+conv_offset}/bias"
        )

        state_dict[f"bn{5+i}.weight"] = get_param(
            graph, f"batch_normalization_{6+i+bn_offset}/gamma"
        )
        state_dict[f"bn{5+i}.bias"] = get_param(
            graph, f"batch_normalization_{6+i+bn_offset}/beta"
        )
        state_dict[f"bn{5+i}.running_mean"] = get_param(
            graph, f"batch_normalization_{6+i+bn_offset}/moving_mean"
        )
        state_dict[f"bn{5+i}.running_var"] = get_param(
            graph, f"batch_normalization_{6+i+bn_offset}/moving_variance"
        )

    if name == "vocals":
        state_dict["up7.weight"] = get_param(graph, "conv2d_6/kernel").permute(
            3, 2, 0, 1
        )
        state_dict["up7.bias"] = get_param(graph, "conv2d_6/bias")
    else:
        state_dict["up7.weight"] = get_param(graph, "conv2d_13/kernel").permute(
            3, 2, 0, 1
        )
        state_dict["up7.bias"] = get_param(graph, "conv2d_13/bias")

    unet.load_state_dict(state_dict)

    with tf.compat.v1.Session(graph=graph) as sess:
        y0_out, y1_out = sess.run([y0, y1], feed_dict={x: generate_waveform()})
        #  y0_out = sess.run(y0, feed_dict={x: generate_waveform()})
        #  y1_out = sess.run(y1, feed_dict={x: generate_waveform()})
        #  print(y0_out.shape)
        #  print(y1_out.shape)

    # for the batchnormalization in tensorflow,
    # default input shape is NHWC

    # for the batchnormalization in torch,
    # default input shape is NCHW

    torch_y1_out = unet(torch.from_numpy(y0_out).permute(3, 0, 1, 2))
    torch_y1_out = torch_y1_out.permute(1, 0, 2, 3)

    #  print(torch_y1_out.shape, torch.from_numpy(y1_out).permute(0, 3, 1, 2).shape)
    assert torch.allclose(
        torch_y1_out, torch.from_numpy(y1_out).permute(0, 3, 1, 2), atol=1e-1
    ), ((torch_y1_out - torch.from_numpy(y1_out).permute(0, 3, 1, 2)).abs().max())
    torch.save(unet.state_dict(), f"2stems/{name}.pt")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--name",
        type=str,
        required=True,
        choices=["vocals", "accompaniment"],
    )
    args = parser.parse_args()
    print(vars(args))
    main(args.name)


================================================
FILE: scripts/spleeter/export_onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnx
import onnxmltools
import torch
from onnxmltools.utils.float16_converter import convert_float_to_float16
from onnxruntime.quantization import QuantType, quantize_dynamic

from unet import UNet


def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path):
    onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path)
    onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True)
    onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path)


def add_meta_data(filename, prefix):
    meta_data = {
        "model_type": "spleeter",
        "sample_rate": 41000,
        "version": 1,
        "model_url": "https://github.com/deezer/spleeter",
        "stems": 2,
        "comment": prefix,
        "model_name": "2stems.tar.gz",
    }
    model = onnx.load(filename)

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, filename)


def export(model, prefix):
    num_splits = 1
    x = torch.rand(2, num_splits, 512, 1024, dtype=torch.float32)

    filename = f"./2stems/{prefix}.onnx"
    torch.onnx.export(
        model,
        x,
        filename,
        input_names=["x"],
        output_names=["y"],
        dynamic_axes={
            "x": {1: "num_splits"},
        },
        opset_version=13,
    )

    add_meta_data(filename, prefix)

    filename_int8 = f"./2stems/{prefix}.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        weight_type=QuantType.QUInt8,
    )

    filename_fp16 = f"./2stems/{prefix}.fp16.onnx"
    export_onnx_fp16(filename, filename_fp16)


@torch.no_grad()
def main():
    vocals = UNet()
    state_dict = torch.load("./2stems/vocals.pt", map_location="cpu")
    vocals.load_state_dict(state_dict)
    vocals.eval()

    accompaniment = UNet()
    state_dict = torch.load("./2stems/accompaniment.pt", map_location="cpu")
    accompaniment.load_state_dict(state_dict)
    accompaniment.eval()

    export(vocals, "vocals")
    export(accompaniment, "accompaniment")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/spleeter/separate.py
================================================
#!/usr/bin/env python3
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)

# Please see ./run.sh for usage

from typing import Optional

import ffmpeg
import numpy as np
import soundfile as sf
import torch
from pydub import AudioSegment

from unet import UNet


def load_audio(filename, sample_rate: Optional[int] = 44100):
    probe = ffmpeg.probe(filename)
    if "streams" not in probe or len(probe["streams"]) == 0:
        raise ValueError("No stream was found with ffprobe")

    metadata = next(
        stream for stream in probe["streams"] if stream["codec_type"] == "audio"
    )
    n_channels = metadata["channels"]

    if sample_rate is None:
        sample_rate = metadata["sample_rate"]

    process = (
        ffmpeg.input(filename)
        .output("pipe:", format="f32le", ar=sample_rate)
        .run_async(pipe_stdout=True, pipe_stderr=True)
    )
    buffer, _ = process.communicate()
    waveform = np.frombuffer(buffer, dtype="<f4").reshape(-1, n_channels)

    waveform = torch.from_numpy(np.copy(waveform)).to(torch.float32)
    if n_channels == 1:
        waveform = waveform.tile(1, 2)

    if n_channels > 2:
        waveform = waveform[:, :2]

    return waveform, sample_rate


@torch.no_grad()
def main():
    vocals = UNet()
    vocals.eval()
    state_dict = torch.load("./2stems/vocals.pt", map_location="cpu")
    vocals.load_state_dict(state_dict)

    accompaniment = UNet()
    accompaniment.eval()
    state_dict = torch.load("./2stems/accompaniment.pt", map_location="cpu")
    accompaniment.load_state_dict(state_dict)

    #
    #  waveform, sample_rate = load_audio("./audio_example.mp3")

    # You can download the following two mp3 from
    # https://huggingface.co/spaces/csukuangfj/music-source-separation/tree/main/examples
    waveform, sample_rate = load_audio("./qi-feng-le.mp3")
    #  waveform, sample_rate = load_audio("./Yesterday_Once_More-Carpenters.mp3")
    assert waveform.shape[1] == 2, waveform.shape

    waveform = torch.nn.functional.pad(waveform, (0, 0, 0, 4096))

    # torch.stft requires a 2-D input of shape (N, T), so we transpose waveform
    stft = torch.stft(
        waveform.t(),
        n_fft=4096,
        hop_length=1024,
        window=torch.hann_window(4096, periodic=True),
        center=False,
        onesided=True,
        return_complex=True,
    )
    print("stft", stft.shape)

    # stft: (2, 2049, 465)
    # stft is a complex tensor
    y = stft.permute(2, 1, 0)
    print("y0", y.shape)
    # (465, 2049, 2)

    y = y[:, :1024, :]
    # (465, 1024, 2)

    tensor_size = y.shape[0] - int(y.shape[0] / 512) * 512
    pad_size = 512 - tensor_size
    y = torch.nn.functional.pad(y, (0, 0, 0, 0, 0, pad_size))
    # (512, 1024, 2)
    print("y1", y.shape, y.dtype)

    num_splits = int(y.shape[0] / 512)
    y = y.reshape([num_splits, 512] + list(y.shape[1:]))
    # y: (1, 512, 1024, 2)
    print("y2", y.shape, y.dtype)

    y = y.abs()

    y = y.permute(3, 0, 1, 2)
    # (2, 1, 512, 1024)
    print("y3", y.shape, y.dtype)

    vocals_spec = vocals(y)
    accompaniment_spec = accompaniment(y)

    vocals_spec = vocals_spec.permute(1, 0, 2, 3)
    accompaniment_spec = accompaniment_spec.permute(1, 0, 2, 3)

    sum_spec = (vocals_spec**2 + accompaniment_spec**2) + 1e-10
    print(
        "vocals_spec",
        vocals_spec.shape,
        accompaniment_spec.shape,
        sum_spec.shape,
        vocals_spec.dtype,
    )

    vocals_spec = (vocals_spec**2 + 1e-10 / 2) / sum_spec
    # (1, 2, 512, 1024)

    accompaniment_spec = (accompaniment_spec**2 + 1e-10 / 2) / sum_spec
    # (1, 2, 512, 1024)

    for name, spec in zip(
        ["vocals", "accompaniment"], [vocals_spec, accompaniment_spec]
    ):
        spec = torch.nn.functional.pad(spec, (0, 2049 - 1024, 0, 0, 0, 0, 0, 0))
        # (1, 2, 512, 2049)

        spec = spec.permute(0, 2, 3, 1)
        # (1, 512, 2049, 2)
        print("here00", spec.shape)

        spec = spec.reshape(-1, spec.shape[2], spec.shape[3])
        # (512, 2049, 2)

        print("here2", spec.shape)
        # (512, 2049, 2)

        spec = spec[: stft.shape[2], :, :]
        # (465, 2049, 2)
        print("here 3", spec.shape, stft.shape)

        spec = spec.permute(2, 1, 0)
        # (2, 2049, 465)

        masked_stft = spec * stft

        wave = torch.istft(
            masked_stft,
            4096,
            1024,
            window=torch.hann_window(4096, periodic=True),
            onesided=True,
        ) * (2 / 3)

        print(wave.shape, wave.dtype)
        sf.write(f"{name}.wav", wave.t(), 44100)

        wave = (wave.t() * 32768).to(torch.int16)
        sound = AudioSegment(
            data=wave.numpy().tobytes(), sample_width=2, frame_rate=44100, channels=2
        )
        sound.export(f"{name}.mp3", format="mp3", bitrate="128k")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/spleeter/separate_onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
import time

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch

from separate import load_audio

"""
----------inputs for ./2stems/vocals.onnx----------
NodeArg(name='x', type='tensor(float)', shape=[2, 'num_splits', 512, 1024])
----------outputs for ./2stems/vocals.onnx----------
NodeArg(name='y', type='tensor(float)', shape=[2, 'Transposey_dim_1', 512, 1024])

----------inputs for ./2stems/accompaniment.onnx----------
NodeArg(name='x', type='tensor(float)', shape=[2, 'num_splits', 512, 1024])
----------outputs for ./2stems/accompaniment.onnx----------
NodeArg(name='y', type='tensor(float)', shape=[2, 'Transposey_dim_1', 512, 1024])
"""


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print(f"----------inputs for {filename}----------")
        for i in self.model.get_inputs():
            print(i)

        print(f"----------outputs for {filename}----------")

        for i in self.model.get_outputs():
            print(i)
        print("--------------------")

    def __call__(self, x):
        """
        Args:
          x: (num_splits, 2, 512, 1024)
        """
        spec = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x.numpy(),
            },
        )[0]

        return torch.from_numpy(spec)


def main():
    vocals = OnnxModel("./2stems/vocals.onnx")
    accompaniment = OnnxModel("./2stems/accompaniment.onnx")

    waveform, sample_rate = load_audio("./qi-feng-le.mp3")
    waveform = waveform[: 44100 * 10, :]

    stft_config = knf.StftConfig(
        n_fft=4096,
        hop_length=1024,
        win_length=4096,
        center=False,
        window_type="hann",
    )
    knf_stft = knf.Stft(stft_config)
    knf_istft = knf.IStft(stft_config)

    start = time.time()

    stft_result_c0 = knf_stft(waveform[:, 0].tolist())
    stft_result_c1 = knf_stft(waveform[:, 1].tolist())
    print("c0 stft", stft_result_c0.num_frames)

    orig_real0 = np.array(stft_result_c0.real, dtype=np.float32).reshape(
        stft_result_c0.num_frames, -1
    )
    orig_imag0 = np.array(stft_result_c0.imag, dtype=np.float32).reshape(
        stft_result_c0.num_frames, -1
    )

    orig_real1 = np.array(stft_result_c1.real, dtype=np.float32).reshape(
        stft_result_c1.num_frames, -1
    )
    orig_imag1 = np.array(stft_result_c1.imag, dtype=np.float32).reshape(
        stft_result_c1.num_frames, -1
    )

    real0 = torch.from_numpy(orig_real0)
    imag0 = torch.from_numpy(orig_imag0)
    real1 = torch.from_numpy(orig_real1)
    imag1 = torch.from_numpy(orig_imag1)
    # (num_frames, n_fft/2_1)
    print("real0", real0.shape)

    # keep only the first 1024 bins
    real0 = real0[:, :1024]
    imag0 = imag0[:, :1024]
    real1 = real1[:, :1024]
    imag1 = imag1[:, :1024]

    stft0 = (real0.square() + imag0.square()).sqrt()
    stft1 = (real1.square() + imag1.square()).sqrt()

    # pad it to multiple of 512
    padding = 512 - real0.shape[0] % 512
    print("padding", padding)
    if padding > 0:
        stft0 = torch.nn.functional.pad(stft0, (0, 0, 0, padding))
        stft1 = torch.nn.functional.pad(stft1, (0, 0, 0, padding))
    stft0 = stft0.reshape(1, -1, 512, 1024)
    stft1 = stft1.reshape(1, -1, 512, 1024)

    stft_01 = torch.cat([stft0, stft1], axis=0)

    print("stft_01", stft_01.shape, stft_01.dtype)

    vocals_spec = vocals(stft_01)
    accompaniment_spec = accompaniment(stft_01)
    # (num_channels, num_splits, 512, 1024)

    sum_spec = (vocals_spec.square() + accompaniment_spec.square()) + 1e-10

    vocals_spec = (vocals_spec**2 + 1e-10 / 2) / sum_spec
    accompaniment_spec = (accompaniment_spec**2 + 1e-10 / 2) / sum_spec

    for name, spec in zip(
        ["vocals", "accompaniment"], [vocals_spec, accompaniment_spec]
    ):
        spec_c0 = spec[0]
        spec_c1 = spec[1]

        spec_c0 = spec_c0.reshape(-1, 1024)
        spec_c1 = spec_c1.reshape(-1, 1024)

        spec_c0 = spec_c0[: stft_result_c0.num_frames, :]
        spec_c1 = spec_c1[: stft_result_c0.num_frames, :]

        spec_c0 = torch.nn.functional.pad(spec_c0, (0, 2049 - 1024, 0, 0))
        spec_c1 = torch.nn.functional.pad(spec_c1, (0, 2049 - 1024, 0, 0))

        spec_c0_real = spec_c0 * orig_real0
        spec_c0_imag = spec_c0 * orig_imag0

        spec_c1_real = spec_c1 * orig_real1
        spec_c1_imag = spec_c1 * orig_imag1

        result0 = knf.StftResult(
            real=spec_c0_real.reshape(-1).tolist(),
            imag=spec_c0_imag.reshape(-1).tolist(),
            num_frames=orig_real0.shape[0],
        )

        result1 = knf.StftResult(
            real=spec_c1_real.reshape(-1).tolist(),
            imag=spec_c1_imag.reshape(-1).tolist(),
            num_frames=orig_real1.shape[0],
        )

        wav0 = knf_istft(result0)
        wav1 = knf_istft(result1)

        wav = np.array([wav0, wav1], dtype=np.float32)
        wav = np.transpose(wav)
        # now wav is (num_samples, num_channels)

        sf.write(f"./onnx-{name}.wav", wav, 44100)

        print(f"Saved to ./onnx-{name}.wav")

    end = time.time()
    elapsed_seconds = end - start
    audio_duration = waveform.shape[0] / sample_rate
    real_time_factor = elapsed_seconds / audio_duration

    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/spleeter/unet.py
================================================
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)

import torch


class UNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = torch.nn.Conv2d(2, 16, kernel_size=5, stride=(2, 2), padding=0)
        self.bn = torch.nn.BatchNorm2d(
            16, track_running_stats=True, eps=1e-3, momentum=0.01
        )
        #
        self.conv1 = torch.nn.Conv2d(16, 32, kernel_size=5, stride=(2, 2), padding=0)
        self.bn1 = torch.nn.BatchNorm2d(
            32, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=5, stride=(2, 2), padding=0)
        self.bn2 = torch.nn.BatchNorm2d(
            64, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.conv3 = torch.nn.Conv2d(64, 128, kernel_size=5, stride=(2, 2), padding=0)
        self.bn3 = torch.nn.BatchNorm2d(
            128, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.conv4 = torch.nn.Conv2d(128, 256, kernel_size=5, stride=(2, 2), padding=0)
        self.bn4 = torch.nn.BatchNorm2d(
            256, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.conv5 = torch.nn.Conv2d(256, 512, kernel_size=5, stride=(2, 2), padding=0)

        self.up1 = torch.nn.ConvTranspose2d(512, 256, kernel_size=5, stride=2)
        self.bn5 = torch.nn.BatchNorm2d(
            256, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.up2 = torch.nn.ConvTranspose2d(512, 128, kernel_size=5, stride=2)
        self.bn6 = torch.nn.BatchNorm2d(
            128, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.up3 = torch.nn.ConvTranspose2d(256, 64, kernel_size=5, stride=2)
        self.bn7 = torch.nn.BatchNorm2d(
            64, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.up4 = torch.nn.ConvTranspose2d(128, 32, kernel_size=5, stride=2)
        self.bn8 = torch.nn.BatchNorm2d(
            32, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.up5 = torch.nn.ConvTranspose2d(64, 16, kernel_size=5, stride=2)
        self.bn9 = torch.nn.BatchNorm2d(
            16, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        self.up6 = torch.nn.ConvTranspose2d(32, 1, kernel_size=5, stride=2)
        self.bn10 = torch.nn.BatchNorm2d(
            1, track_running_stats=True, eps=1e-3, momentum=0.01
        )

        # output logit is False, so we need self.up7
        self.up7 = torch.nn.Conv2d(1, 2, kernel_size=4, dilation=2, padding=3)

    def forward(self, x):
        """
        Args:
          x: (num_audio_channels, num_splits, 512, 1024)
        Returns:
          y: (num_audio_channels, num_splits, 512, 1024)
        """
        x = x.permute(1, 0, 2, 3)

        in_x = x
        # in_x is (3, 2, 512, 1024) = (T, 2, 512, 1024)
        x = torch.nn.functional.pad(x, (1, 2, 1, 2), "constant", 0)
        conv1 = self.conv(x)
        batch1 = self.bn(conv1)
        rel1 = torch.nn.functional.leaky_relu(batch1, negative_slope=0.2)

        x = torch.nn.functional.pad(rel1, (1, 2, 1, 2), "constant", 0)
        conv2 = self.conv1(x)  # (3, 32, 128, 256)
        batch2 = self.bn1(conv2)
        rel2 = torch.nn.functional.leaky_relu(
            batch2, negative_slope=0.2
        )  # (3, 32, 128, 256)

        x = torch.nn.functional.pad(rel2, (1, 2, 1, 2), "constant", 0)
        conv3 = self.conv2(x)  # (3, 64, 64, 128)
        batch3 = self.bn2(conv3)
        rel3 = torch.nn.functional.leaky_relu(
            batch3, negative_slope=0.2
        )  # (3, 64, 64, 128)

        x = torch.nn.functional.pad(rel3, (1, 2, 1, 2), "constant", 0)
        conv4 = self.conv3(x)  # (3, 128, 32, 64)
        batch4 = self.bn3(conv4)
        rel4 = torch.nn.functional.leaky_relu(
            batch4, negative_slope=0.2
        )  # (3, 128, 32, 64)

        x = torch.nn.functional.pad(rel4, (1, 2, 1, 2), "constant", 0)
        conv5 = self.conv4(x)  # (3, 256, 16, 32)
        batch5 = self.bn4(conv5)
        rel6 = torch.nn.functional.leaky_relu(
            batch5, negative_slope=0.2
        )  # (3, 256, 16, 32)

        x = torch.nn.functional.pad(rel6, (1, 2, 1, 2), "constant", 0)
        conv6 = self.conv5(x)  # (3, 512, 8, 16)

        up1 = self.up1(conv6)
        up1 = up1[:, :, 1:-2, 1:-2]  # (3, 256, 16, 32)
        up1 = torch.nn.functional.relu(up1)
        batch7 = self.bn5(up1)
        merge1 = torch.cat([conv5, batch7], axis=1)  # (3, 512, 16, 32)

        up2 = self.up2(merge1)
        up2 = up2[:, :, 1:-2, 1:-2]
        up2 = torch.nn.functional.relu(up2)
        batch8 = self.bn6(up2)

        merge2 = torch.cat([conv4, batch8], axis=1)  # (3, 256, 32, 64)

        up3 = self.up3(merge2)
        up3 = up3[:, :, 1:-2, 1:-2]
        up3 = torch.nn.functional.relu(up3)
        batch9 = self.bn7(up3)

        merge3 = torch.cat([conv3, batch9], axis=1)  # (3, 128, 64, 128)

        up4 = self.up4(merge3)
        up4 = up4[:, :, 1:-2, 1:-2]
        up4 = torch.nn.functional.relu(up4)
        batch10 = self.bn8(up4)

        merge4 = torch.cat([conv2, batch10], axis=1)  # (3, 64, 128, 256)

        up5 = self.up5(merge4)
        up5 = up5[:, :, 1:-2, 1:-2]
        up5 = torch.nn.functional.relu(up5)
        batch11 = self.bn9(up5)

        merge5 = torch.cat([conv1, batch11], axis=1)  # (3, 32, 256, 512)

        up6 = self.up6(merge5)
        up6 = up6[:, :, 1:-2, 1:-2]
        up6 = torch.nn.functional.relu(up6)
        batch12 = self.bn10(up6)  # (3, 1, 512, 1024)  = (T, 1, 512, 1024)

        up7 = self.up7(batch12)
        up7 = torch.sigmoid(up7)  # (3, 2, 512, 1024)

        ans = up7 * in_x
        return ans.permute(1, 0, 2, 3)


================================================
FILE: scripts/supertonic/README.md
================================================
# Supertonic TTS INT8 Quantization

Quantize [Supertonic](https://github.com/supertone-inc/supertonic) TTS ONNX models to INT8 for on-device deployment.

## Overview

- **Pipeline**: `gen_calib_configs` → `dump_inputs` → `convert`; stage 4 generates **.bin** assets when JSONs exist: `generate_voices_bin.py`, `generate_indexer_bin.py`. Runtime loads **tts.json** for TTS config.
- **Quantization**: duration_predictor, text_encoder, vector_estimator → dynamic INT8; vocoder → static INT8 (calibration from dumped data).
- **Voice**: Runtime loads one **`voice.bin`**. Generate with `python3 generate_voices_bin.py [input_dir] [output_bin]`. Pass `--supertonic-voice-style=/path/to/voice.bin`. Use `--sid` 0..N-1 to select speaker.
- **Unicode indexer**: Runtime uses **`unicode_indexer.bin`**. Generate with `python3 generate_indexer_bin.py [json_path] [bin_path]`. Pass `--supertonic-unicode-indexer=/path/to/unicode_indexer.bin`.
- **TTS config**: Runtime loads **`tts.json`**. Pass `--supertonic-tts-json=/path/to/tts.json`.

## Usage

```bash
./run.sh              # Run all stages (0–4)
./run.sh 4            # Only generate voice.bin, unicode_indexer.bin
```

**Stages:** 0 = download models, 1 = gen calib configs, 2 = dump calib data, 3 = quantize, 4 = generate `voice.bin`, `unicode_indexer.bin`. 


================================================
FILE: scripts/supertonic/convert.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2026 zengyw

"""
Quantize Supertonic TTS ONNX models (duration_predictor, text_encoder,
vector_estimator, vocoder) to int8.
See also https://github.com/supertone-inc/supertonic
"""

import argparse
import glob
import inspect
import os
import shutil
import tempfile
from typing import Dict, List, Optional, Tuple

import numpy as np
import onnx
from onnx import numpy_helper
import onnxruntime as ort
from onnxruntime.quantization import (
    CalibrationDataReader,
    QuantFormat,
    QuantType,
    quantize_dynamic,
    quantize_static,
)

try:
    from onnxruntime.quantization import CalibrationMethod
except Exception:
    CalibrationMethod = None

_quant_pre_process = None
try:
    from onnxruntime.quantization.shape_inference import quant_pre_process as _qpp
    _quant_pre_process = _qpp
except Exception:
    try:
        from onnxruntime.quantization import quant_pre_process as _qpp
        _quant_pre_process = _qpp
    except Exception:
        _quant_pre_process = None


def ensure_graph_names(m: onnx.ModelProto) -> None:
    def fix_graph(g: onnx.GraphProto, prefix: str) -> None:
        if not g.name:
            g.name = prefix
        for node in g.node:
            for attr in node.attribute:
                if attr.type == onnx.AttributeProto.GRAPH and attr.g is not None:
                    fix_graph(attr.g, f"{prefix}_{node.name or node.op_type}_g")
                elif attr.type == onnx.AttributeProto.GRAPHS:
                    for i, sg in enumerate(attr.graphs):
                        fix_graph(sg, f"{prefix}_{node.name or node.op_type}_gs{i}")

    if not m.graph.name:
        m.graph.name = "graph"
    fix_graph(m.graph, m.graph.name)


def ensure_node_names(m: onnx.ModelProto) -> None:
    for i, n in enumerate(m.graph.node):
        if not n.name:
            n.name = f"{n.op_type}_{i}"


def save_clean(path: str) -> None:
    m = onnx.load(path)
    ensure_graph_names(m)
    ensure_node_names(m)
    onnx.save_model(m, path, save_as_external_data=False)


def preprocess(src: str, dst: str, mode: str) -> str:
    if mode == "none":
        return src
    if mode == "onnx":
        m = onnx.load(src)
        ensure_graph_names(m)
        ensure_node_names(m)
        try:
            m = onnx.shape_inference.infer_shapes(m)
        except Exception:
            pass
        onnx.save_model(m, dst, save_as_external_data=False)
        return dst
    if mode == "ort":
        if _quant_pre_process is None:
            return preprocess(src, dst, "onnx")
        sig = inspect.signature(_quant_pre_process)
        allowed = set(sig.parameters.keys())
        kwargs = {}
        if "skip_symbolic_shape_inference" in allowed:
            kwargs["skip_symbolic_shape_inference"] = True
        if "skip_onnx_shape_inference" in allowed:
            kwargs["skip_onnx_shape_inference"] = False
        if "skip_optimization" in allowed:
            kwargs["skip_optimization"] = False
        try:
            _quant_pre_process(src, dst, **kwargs)
            save_clean(dst)
            return dst
        except Exception:
            return preprocess(src, dst, "onnx")
    raise ValueError(f"Unknown preprocess mode: {mode}")


def pick_calib_method(name: str):
    # fallback to name (str) when CalibrationMethod unavailable
    if CalibrationMethod is None:
        print(f"CalibrationMethod is None, using {name}")
        return name
    return getattr(CalibrationMethod, name, CalibrationMethod.MinMax)


def get_io_names(model_path: str) -> Tuple[List[str], List[str]]:
    so = ort.SessionOptions()
    so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
    sess = ort.InferenceSession(model_path, sess_options=so, providers=["CPUExecutionProvider"])
    ins = [i.name for i in sess.get_inputs()]
    outs = [o.name for o in sess.get_outputs()]
    return ins, outs


def onnx_int8_name(src_name: str) -> str:
    return os.path.splitext(src_name)[0] + ".int8.onnx"


def _detect_variable_axis(shapes: List[Tuple[int, ...]]) -> Optional[int]:
    # return axis index if exactly one axis varies across shapes, else None
    if not shapes:
        return None
    nd = len(shapes[0])
    if any(len(s) != nd for s in shapes):
        return None
    var_axes = []
    for ax in range(nd):
        vals = {s[ax] for s in shapes}
        if len(vals) > 1:
            var_axes.append(ax)
    if len(var_axes) == 1:
        return var_axes[0]
    return None


def _crop_center(arr: np.ndarray, axis: int, target: int) -> np.ndarray:
    cur = arr.shape[axis]
    if cur <= target:
        return arr
    start = (cur - target) // 2
    sl = [slice(None)] * arr.ndim
    sl[axis] = slice(start, start + target)
    return arr[tuple(sl)]


def _pad(arr: np.ndarray, axis: int, target: int, pad_value: float) -> np.ndarray:
    cur = arr.shape[axis]
    if cur >= target:
        return arr
    pad_width = [(0, 0)] * arr.ndim
    pad_width[axis] = (0, target - cur)
    return np.pad(arr, pad_width, mode="constant", constant_values=pad_value)


def _pad_or_crop(arr: np.ndarray, axis: int, target: int, pad_value: float) -> np.ndarray:
    cur = arr.shape[axis]
    if cur > target:
        return _crop_center(arr, axis, target)
    if cur < target:
        return _pad(arr, axis, target, pad_value)
    return arr


def _pad_value_for(name: str, dtype: np.dtype):
    n = name.lower()
    if "mask" in n:
        return 0
    if np.issubdtype(dtype, np.integer):
        return 0
    return 0.0


def _build_pad_plan_percentile(
    folder: str,
    input_names: List[str],
    limit: int,
    pad_percentile: int,
    pad_max: int,
) -> Dict[str, Tuple[int, int]]:
    files = sorted(glob.glob(os.path.join(folder, "*.npz")))
    files = files[:limit] if limit > 0 else files
    if not files:
        raise RuntimeError(f"No npz in: {folder}")

    shapes_per_in: Dict[str, List[Tuple[int, ...]]] = {n: [] for n in input_names}
    for f in files:
        d = np.load(f, allow_pickle=False)
        for n in input_names:
            if n not in d:
                raise KeyError(f"{f} missing '{n}', keys={list(d.keys())}")
            shapes_per_in[n].append(tuple(d[n].shape))

    plan: Dict[str, Tuple[int, int]] = {}
    for n, shapes in shapes_per_in.items():
        ax = _detect_variable_axis(shapes)
        if ax is None:
            continue
        lens = np.array([s[ax] for s in shapes], dtype=np.int64)
        tgt = int(np.percentile(lens, pad_percentile))
        tgt = max(1, tgt)
        if pad_max > 0:
            tgt = min(tgt, pad_max)
        plan[n] = (ax, tgt)
    return plan


class PaddedNpzDataReader(CalibrationDataReader):
    def __init__(self, folder: str, input_names: List[str], limit: int, pad_percentile: int, pad_max: int):
        self.files = sorted(glob.glob(os.path.join(folder, "*.npz")))
        if limit > 0:
            self.files = self.files[:limit]
        if not self.files:
            raise RuntimeError(f"No calibration npz in: {folder}")
        self.input_names = input_names
        self.pad_plan = _build_pad_plan_percentile(folder, input_names, limit, pad_percentile, pad_max)
        self._iter = iter(self.files)

    def get_next(self) -> Optional[Dict[str, np.ndarray]]:
        try:
            p = next(self._iter)
        except StopIteration:
            return None
        d = np.load(p, allow_pickle=False)
        feeds: Dict[str, np.ndarray] = {}
        for n in self.input_names:
            x = d[n]
            if x.dtype == np.float64:
                x = x.astype(np.float32)
            if n in self.pad_plan:
                axis, tgt = self.pad_plan[n]
                pv = _pad_value_for(n, x.dtype)
                x = _pad_or_crop(x, axis, tgt, pv)
            feeds[n] = x
        return feeds

    def rewind(self) -> None:
        self._iter = iter(self.files)


def safe_copy(src: str, dst: str) -> None:
    shutil.copy2(src, dst)
    try:
        save_clean(dst)
    except Exception:
        pass


def quantize_dynamic_safe(fp32_path: str, out_path: str, op_types: List[str], wt_type: QuantType) -> None:
    try:
        quantize_dynamic(
            model_input=fp32_path,
            model_output=out_path,
            op_types_to_quantize=op_types,
            weight_type=wt_type,
            per_channel=False,
            reduce_range=False,
            use_external_data_format=False,
        )
        save_clean(out_path)
    except Exception as e:
        print(f"[WARN] dynamic quant failed for {os.path.basename(fp32_path)}: {e} -> fallback copy")
        safe_copy(fp32_path, out_path)


def quantize_static_safe(
    fp32_path: str,
    out_path: str,
    calib_folder: str,
    preprocess_mode: str,
    calib_limit: int,
    calibrate_method: str,
    act_type: QuantType,
    wt_type: QuantType,
    per_channel: bool,
    reduce_range: bool,
    op_types: List[str],
    nodes_to_exclude: Optional[List[str]],
    pad_percentile: int,
    pad_max: int,
) -> None:
    with tempfile.TemporaryDirectory(prefix="st_q_") as td:
        pre_path = os.path.join(td, "pre.onnx")
        fp32_for_quant = preprocess(fp32_path, pre_path, preprocess_mode)
        ins, _ = get_io_names(fp32_for_quant)

        extra = {"WeightSymmetric": True}
        extra["ActivationSymmetric"] = (act_type == QuantType.QInt8)

        def _run(method: str) -> None:
            sig = inspect.signature(quantize_static)
            allowed = set(sig.parameters.keys())
            kwargs = dict(
                quant_format=QuantFormat.QDQ,
                op_types_to_quantize=op_types,
                per_channel=per_channel,
                reduce_range=reduce_range,
                activation_type=act_type,
                weight_type=wt_type,
                optimize_model=False,
                use_external_data_format=False,
                extra_options=extra,
                calibration_providers=["CPUExecutionProvider"],
            )
            cm = pick_calib_method(method)
            if "calibrate_method" in allowed:
                kwargs["calibrate_method"] = cm
            if nodes_to_exclude and "nodes_to_exclude" in allowed:
                kwargs["nodes_to_exclude"] = nodes_to_exclude
            kwargs = {k: v for k, v in kwargs.items() if k in allowed}

            dr = PaddedNpzDataReader(calib_folder, ins, calib_limit, pad_percentile, pad_max)
            quantize_static(fp32_for_quant, out_path, dr, **kwargs)
            save_clean(out_path)

        try:
            _run(calibrate_method)
        except Exception as e:
            msg = str(e)
            if "inhomogeneous shape" in msg or "setting an array element with a sequence" in msg:
                print(f"[WARN] calib shape issue on {os.path.basename(fp32_path)} -> fallback MinMax")
                _run("MinMax")
            else:
                print(f"[WARN] static quant failed for {os.path.basename(fp32_path)}: {e} -> fallback copy")
                safe_copy(fp32_path, out_path)


def _name_exists(model: onnx.ModelProto, name: str) -> bool:
    for t in model.graph.initializer:
        if t.name == name:
            return True
    for v in list(model.graph.value_info) + list(model.graph.input) + list(model.graph.output):
        if v.name == name:
            return True
    for n in model.graph.node:
        if name in n.output:
            return True
    return False


def _unique_name(model: onnx.ModelProto, base: str) -> str:
    if not _name_exists(model, base):
        return base
    i = 0
    while True:
        cand = f"{base}_{i}"
        if not _name_exists(model, cand):
            return cand
        i += 1


def _w8dq_quantize_per_channel_s8(w: np.ndarray, axis: int = 0) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    w = w.astype(np.float32)
    w_abs = np.max(np.abs(w), axis=tuple(i for i in range(w.ndim) if i != axis), keepdims=False)
    w_abs = np.maximum(w_abs, 1e-8)
    scale = (w_abs / 127.0).astype(np.float32)
    zp = np.zeros_like(scale, dtype=np.int8)

    shape = [1] * w.ndim
    shape[axis] = w.shape[axis]
    scale_b = scale.reshape(shape)
    w_q = np.round(w / scale_b).clip(-127, 127).astype(np.int8)
    return w_q, scale, zp


def apply_w8dq_to_conv_weights(
    model_in: str,
    model_out: str,
    exclude_last_conv: int,
    only_fp32: bool = True,
) -> None:
    m = onnx.load(model_in)
    ensure_graph_names(m)
    ensure_node_names(m)

    convs_all = [n for n in m.graph.node if n.op_type == "Conv"]
    if exclude_last_conv > 0 and len(convs_all) >= exclude_last_conv:
        convs_use = convs_all[:-exclude_last_conv]
    else:
        convs_use = convs_all

    imap = {t.name: t for t in m.graph.initializer}

    def remove_initializer(name: str) -> None:
        keep = [t for t in m.graph.initializer if t.name != name]
        del m.graph.initializer[:]
        m.graph.initializer.extend(keep)

    new_nodes = []
    changed = 0

    for node in m.graph.node:
        if node.op_type != "Conv":
            continue
        if node not in convs_use:
            continue
        if len(node.input) < 2:
            continue

        w_name = node.input[1]
        if w_name not in imap:
            continue
        w_t = imap[w_name]
        w = numpy_helper.to_array(w_t)
        if only_fp32 and w.dtype != np.float32:
            continue

        w_q, scale, zp = _w8dq_quantize_per_channel_s8(w, axis=0)

        wq_name = _unique_name(m, w_name + "_wq")
        sc_name = _unique_name(m, w_name + "_scale")
        zp_name = _unique_name(m, w_name + "_zp")
        dq_out = _unique_name(m, w_name + "_dq")

        m.graph.initializer.extend([numpy_helper.from_array(w_q, name=wq_name)])
        m.graph.initializer.extend([numpy_helper.from_array(scale.astype(np.float32), name=sc_name)])
        m.graph.initializer.extend([numpy_helper.from_array(zp.astype(np.int8), name=zp_name)])

        dq = onnx.helper.make_node(
            "DequantizeLinear",
            inputs=[wq_name, sc_name, zp_name],
            outputs=[dq_out],
            name=_unique_name(m, "DQ_" + w_name),
            axis=0,
        )
        new_nodes.append(dq)

        node.input[1] = dq_out
        remove_initializer(w_name)
        changed += 1

    if new_nodes:
        old_nodes = list(m.graph.node)
        del m.graph.node[:]
        m.graph.node.extend(new_nodes + old_nodes)

    onnx.checker.check_model(m)
    onnx.save_model(m, model_out, save_as_external_data=False)
    save_clean(model_out)
    print(f"[W8-DQ] conv weights compressed: {changed} (exclude_last_conv={exclude_last_conv})")


def infer_vocoder_latent_shape(vocoder_fp32: str, voc_calib_dir: str) -> Optional[Tuple[int, ...]]:
    try:
        voc_in, _ = get_io_names(vocoder_fp32)
        if len(voc_in) != 1:
            return None
        inp = voc_in[0]
        files = sorted(glob.glob(os.path.join(voc_calib_dir, "*.npz")))
        if not files:
            return None
        d = np.load(files[0], allow_pickle=False)
        if inp not in d:
            return None
        return tuple(d[inp].shape)
    except Exception:
        return None


def pick_ve_output_index(ve_model_path: str, ve_calib_dir: str, voc_latent_shape: Optional[Tuple[int, ...]]) -> int:
    ve_in, _ = get_io_names(ve_model_path)
    files = sorted(glob.glob(os.path.join(ve_calib_dir, "*.npz")))
    if not files:
        return 0
    d = np.load(files[0], allow_pickle=False)
    feeds = {}
    for n in ve_in:
        x = d[n]
        if x.dtype == np.float64:
            x = x.astype(np.float32)
        feeds[n] = x

    so = ort.SessionOptions()
    so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    sess = ort.InferenceSession(ve_model_path, sess_options=so, providers=["CPUExecutionProvider"])
    outs = sess.run(None, feeds)

    best = 0
    if voc_latent_shape is not None:
        vrank = len(voc_latent_shape)
        for i, y in enumerate(outs):
            y = np.asarray(y)
            if not np.issubdtype(y.dtype, np.floating):
                continue
            if y.ndim != vrank:
                continue
            # Supertonic VE latent dim 512, pick output matching vocoder input
            if 512 in y.shape:
                best = i
                break
        return best

    for i, y in enumerate(outs):
        y = np.asarray(y)
        if np.issubdtype(y.dtype, np.floating) and y.ndim == 3 and (512 in y.shape):  # latent dim
            best = i
            break
    return best


def build_vocoder_calib_from_ve(
    ve_model_path: str,
    ve_calib_dir: str,
    vocoder_fp32: str,
    out_dir: str,
    ve_output_index: int,
    limit: int,
    pad_percentile: int,
    pad_max: int,
) -> None:
    os.makedirs(out_dir, exist_ok=True)
    voc_in, _ = get_io_names(vocoder_fp32)
    if len(voc_in) != 1:
        raise RuntimeError(f"vocoder inputs != 1, got {voc_in}")
    voc_in_name = voc_in[0]

    ve_in, _ = get_io_names(ve_model_path)
    files = sorted(glob.glob(os.path.join(ve_calib_dir, "*.npz")))
    files = files[:limit] if limit > 0 else files
    if not files:
        raise RuntimeError(f"No npz in {ve_calib_dir}")

    so = ort.SessionOptions()
    so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    sess = ort.InferenceSession(ve_model_path, sess_options=so, providers=["CPUExecutionProvider"])

    ve_pad_plan = _build_pad_plan_percentile(ve_calib_dir, ve_in, limit, pad_percentile, pad_max)

    latents = []
    for f in files:
        d = np.load(f, allow_pickle=False)
        feeds = {}
        for n in ve_in:
            x = d[n]
            if x.dtype == np.float64:
                x = x.astype(np.float32)
            if n in ve_pad_plan:
                axis, tgt = ve_pad_plan[n]
                pv = _pad_value_for(n, x.dtype)
                x = _pad_or_crop(x, axis, tgt, pv)
            feeds[n] = x
        y = np.asarray(sess.run(None, feeds)[ve_output_index], dtype=np.float32)
        latents.append(y)

    shapes = [tuple(z.shape) for z in latents]
    ax = _detect_variable_axis(shapes)
    if ax is not None:
        lens = np.array([s[ax] for s in shapes], dtype=np.int64)
        tgt = int(np.percentile(lens, pad_percentile))
        tgt = max(1, tgt)
        if pad_max > 0:
            tgt = min(tgt, pad_max)
        latents = [_pad_or_crop(z, ax, tgt, 0.0) for z in latents]

    for i, y in enumerate(latents):
        np.savez(os.path.join(out_dir, f"{i:05d}.npz"), **{voc_in_name: y})


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--src-dir", type=str, required=True, help="source model dir"
    )
    parser.add_argument(
        "--dst-dir", type=str, required=True, help="output model dir"
    )
    parser.add_argument(
        "--calib-dir", type=str, required=True, help="calibration npz dir"
    )
    parser.add_argument(
        "--preprocess", choices=["onnx", "ort", "none"], default="ort"
    )
    parser.add_argument("--duration-predictor", default="duration_predictor.onnx")
    parser.add_argument("--text-encoder", default="text_encoder.onnx")
    parser.add_argument("--vector-estimator", default="vector_estimator.onnx")
    parser.add_argument("--vocoder", default="vocoder.onnx")
    parser.add_argument("--dp-mode", choices=["copy", "dynamic"], default="copy")
    parser.add_argument("--te-mode", choices=["copy", "dynamic"], default="copy")
    parser.add_argument(
        "--dp-te-weight-type", choices=["qint8", "quint8"], default="qint8"
    )
    parser.add_argument("--ve-mode", choices=["copy", "dynamic"], default="dynamic")
    parser.add_argument("--ve-conv-w8dq", action="store_true", default=True)
    parser.add_argument("--ve-w8dq-exclude-last-conv", type=int, default=6)
    parser.add_argument("--ve-calib-limit", type=int, default=100)
    parser.add_argument("--vocoder-calib-limit", type=int, default=100)
    parser.add_argument(
        "--vocoder-calibrate-method",
        choices=["MinMax", "Entropy", "Percentile"],
        default="Percentile",
    )
    parser.add_argument("--vocoder-act", choices=["qint8", "quint8"], default="quint8")
    parser.add_argument("--vocoder-wt", choices=["qint8", "quint8"], default="qint8")
    parser.add_argument("--vocoder-per-channel", action="store_true", default=True)
    parser.add_argument("--vocoder-reduce-range", action="store_true", default=True)
    parser.add_argument("--exclude-last-conv", type=int, default=8)
    parser.add_argument("--vocoder-tail-w8dq", action="store_true", default=True)
    parser.add_argument(
        "--vocoder-tail-w8dq-exclude-last-conv", type=int, default=0
    )
    parser.add_argument("--vocoder-calib-from-ve", action="store_true", default=True)
    parser.add_argument("--ve-output-index", type=int, default=-1)
    parser.add_argument("--pad-percentile", type=int, default=90)
    parser.add_argument("--pad-max", type=int, default=0)
    return parser.parse_args()


def main():
    args = get_args()
    os.makedirs(args.dst_dir, exist_ok=True)

    print("ORT:", ort.__version__, "providers:", ort.get_available_providers())

    dp_fp32 = os.path.join(args.src_dir, args.duration_predictor)
    te_fp32 = os.path.join(args.src_dir, args.text_encoder)
    ve_fp32 = os.path.join(args.src_dir, args.vector_estimator)
    voc_fp32 = os.path.join(args.src_dir, args.vocoder)

    dp_out = os.path.join(args.dst_dir, onnx_int8_name(args.duration_predictor))
    te_out = os.path.join(args.dst_dir, onnx_int8_name(args.text_encoder))
    ve_out = os.path.join(args.dst_dir, onnx_int8_name(args.vector_estimator))
    voc_out = os.path.join(args.dst_dir, onnx_int8_name(args.vocoder))

    dp_te_wt = QuantType.QInt8 if args.dp_te_weight_type == "qint8" else QuantType.QUInt8
    voc_act = QuantType.QInt8 if args.vocoder_act == "qint8" else QuantType.QUInt8
    voc_wt = QuantType.QInt8 if args.vocoder_wt == "qint8" else QuantType.QUInt8

    if args.dp_mode == "copy":
        safe_copy(dp_fp32, dp_out)
    else:
        quantize_dynamic_safe(dp_fp32, dp_out, ["MatMul", "Gemm"], dp_te_wt)

    if args.te_mode == "copy":
        safe_copy(te_fp32, te_out)
    else:
        quantize_dynamic_safe(te_fp32, te_out, ["MatMul", "Gemm"], dp_te_wt)

    if args.ve_mode == "copy":
        safe_copy(ve_fp32, ve_out)
    else:
        quantize_dynamic_safe(ve_fp32, ve_out, ["MatMul", "Gemm"], QuantType.QInt8)

    if args.ve_conv_w8dq:
        apply_w8dq_to_conv_weights(
            model_in=ve_out,
            model_out=ve_out,
            exclude_last_conv=args.ve_w8dq_exclude_last_conv,
            only_fp32=True,
        )

    ve_calib = os.path.join(args.calib_dir, os.path.splitext(args.vector_estimator)[0])
    voc_calib_dir = os.path.join(args.calib_dir, os.path.splitext(args.vocoder)[0])

    voc_lat_shape = infer_vocoder_latent_shape(voc_fp32, voc_calib_dir)

    nodes_excl = None
    if args.exclude_last_conv > 0:
        with tempfile.TemporaryDirectory(prefix="voc_pre_") as td:
            pre_voc = os.path.join(td, "voc_pre.onnx")
            voc_for = preprocess(voc_fp32, pre_voc, args.preprocess)
            m = onnx.load(voc_for)
            ensure_node_names(m)
            convs = [n.name for n in m.graph.node if n.op_type == "Conv"]
            if len(convs) >= args.exclude_last_conv:
                nodes_excl = convs[-args.exclude_last_conv:]

    def _run_vocoder_quantize(calib_folder: str) -> None:
        quantize_static_safe(
            fp32_path=voc_fp32,
            out_path=voc_out,
            calib_folder=calib_folder,
            preprocess_mode=args.preprocess,
            calib_limit=args.vocoder_calib_limit,
            calibrate_method=args.vocoder_calibrate_method,
            act_type=voc_act,
            wt_type=voc_wt,
            per_channel=args.vocoder_per_channel,
            reduce_range=args.vocoder_reduce_range,
            op_types=["Conv"],
            nodes_to_exclude=nodes_excl,
            pad_percentile=args.pad_percentile,
            pad_max=args.pad_max,
        )
        if args.vocoder_tail_w8dq and args.exclude_last_conv > 0:
            apply_w8dq_to_conv_weights(
                model_in=voc_out,
                model_out=voc_out,
                exclude_last_conv=args.vocoder_tail_w8dq_exclude_last_conv,
                only_fp32=True,
            )

    if args.vocoder_calib_from_ve:
        with tempfile.TemporaryDirectory(prefix="vocoder_calib_") as tmp_voc_calib:
            ve_idx = args.ve_output_index
            if ve_idx < 0:
                ve_idx = pick_ve_output_index(ve_out, ve_calib, voc_lat_shape)
            print(f"[INFO] VE output index for vocoder calib: {ve_idx}")
            build_vocoder_calib_from_ve(
                ve_model_path=ve_out,
                ve_calib_dir=ve_calib,
                vocoder_fp32=voc_fp32,
                out_dir=tmp_voc_calib,
                ve_output_index=ve_idx,
                limit=args.vocoder_calib_limit,
                pad_percentile=args.pad_percentile,
                pad_max=args.pad_max,
            )
            _run_vocoder_quantize(tmp_voc_calib)
    else:
        _run_vocoder_quantize(voc_calib_dir)

    print("Quantization completed!")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/supertonic/dump_inputs.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2026 zengyw

"""
Dump Supertonic TTS model inputs to npz for calibration.
See also https://github.com/supertone-inc/supertonic
"""

import argparse
import os

import numpy as np
import onnxruntime as ort

from helper import (
    UnicodeProcessor,
    Style,
    TextToSpeech,
    load_onnx_all,
    load_cfgs,
    load_text_processor,
    load_voice_style,
    chunk_text
)


class DumpTextToSpeech(TextToSpeech):
    """TTS with input dumping capability."""

    def __init__(
        self,
        cfgs: dict,
        text_processor: UnicodeProcessor,
        dp_ort: ort.InferenceSession,
        text_enc_ort: ort.InferenceSession,
        vector_est_ort: ort.InferenceSession,
        vocoder_ort: ort.InferenceSession,
        dump_dir: str = "calib",
    ):
        super().__init__(
            cfgs, text_processor, dp_ort, text_enc_ort, vector_est_ort, vocoder_ort
        )
        self.dump_dir = dump_dir

        self.dump_dirs = {
            "duration_predictor": os.path.join(dump_dir, "duration_predictor"),
            "text_encoder": os.path.join(dump_dir, "text_encoder"),
            "vector_estimator": os.path.join(dump_dir, "vector_estimator"),
            "vocoder": os.path.join(dump_dir, "vocoder"),
        }
        for d in self.dump_dirs.values():
            os.makedirs(d, exist_ok=True)
        self.counters = {k: 0 for k in self.dump_dirs}

    def _save_inputs(self, model_name: str, inputs: dict):
        """Save input tensors to npz file."""
        counter = self.counters[model_name]
        output_path = os.path.join(self.dump_dirs[model_name], f"{counter:03d}.npz")
        np.savez(output_path, **inputs)
        self.counters[model_name] += 1
        print(f"  Saved {model_name} inputs to {output_path}")

    def _infer(
        self,
        text_list: list[str],
        lang_list: list[str],
        style: Style,
        total_step: int,
        speed: float = 1.05,
    ) -> tuple[np.ndarray, np.ndarray]:
        """Run inference with input dumping."""
        assert (
            len(text_list) == style.ttl.shape[0]
        ), "Number of texts must match number of style vectors"
        bsz = len(text_list)

        text_ids, text_mask = self.text_processor(text_list, lang_list)
        dp_inputs = {
            "text_ids": text_ids,
            "style_dp": style.dp,
            "text_mask": text_mask,
        }
        self._save_inputs("duration_predictor", dp_inputs)
        dur_onnx, *_ = self.dp_ort.run(None, dp_inputs)
        dur_onnx = dur_onnx / speed
        text_emb_onnx, *_ = self.text_enc_ort.run(
            None,
            {
                "text_ids": text_ids,
                "style_ttl": style.ttl,
                "text_mask": text_mask,
            },
        )
        self._save_inputs("text_encoder", {
            "text_ids": text_ids,
            "style_ttl": style.ttl,
            "text_mask": text_mask,
        })
        xt, latent_mask = self.sample_noisy_latent(dur_onnx)
        total_step_np = np.array([total_step] * bsz, dtype=np.float32)

        # dump vector_estimator inputs at last step (most informative)
        for step in range(total_step):
            current_step = np.array([step] * bsz, dtype=np.float32)
            ve_inputs = {
                "noisy_latent": xt,
                "text_emb": text_emb_onnx,
                "style_ttl": style.ttl,
                "text_mask": text_mask,
                "latent_mask": latent_mask,
                "current_step": current_step,
                "total_step": total_step_np,
            }
            if step == total_step - 1:
                self._save_inputs("vector_estimator", ve_inputs)
            xt, *_ = self.vector_est_ort.run(None, ve_inputs)

        # Vocoder inputs and run
        vocoder_inputs = {"latent": xt}
        self._save_inputs("vocoder", vocoder_inputs)
        wav, *_ = self.vocoder_ort.run(None, vocoder_inputs)

        return wav, dur_onnx

    def __call__(
        self,
        text: str,
        lang: str,
        style: Style,
        total_step: int,
        speed: float = 1.05,
        silence_duration: float = 0.3,
    ) -> tuple[np.ndarray, np.ndarray]:
        """Single text to speech with input dumping."""
        assert (
            style.ttl.shape[0] == 1
        ), "Single speaker text to speech only supports single style"
        max_len = 120 if lang == "ko" else 300
        text_list = chunk_text(text, max_len=max_len)
        wav_cat = None
        dur_cat = None

        for i, text_chunk in enumerate(text_list):
            print(f"Processing chunk {i+1}/{len(text_list)}: '{text_chunk[:50]}...'")
            wav, dur_onnx = self._infer([text_chunk], [lang], style, total_step, speed)
            if wav_cat is None:
                wav_cat = wav
                dur_cat = dur_onnx
            else:
                silence = np.zeros(
                    (1, int(silence_duration * self.sample_rate)), dtype=np.float32
                )
                wav_cat = np.concatenate([wav_cat, silence, wav], axis=1)
                dur_cat += dur_onnx + silence_duration
        return wav_cat, dur_cat

    def batch(
        self,
        text_list: list[str],
        lang_list: list[str],
        style: Style,
        total_step: int,
        speed: float = 1.05,
    ) -> tuple[np.ndarray, np.ndarray]:
        """Batch inference with input dumping."""
        return self._infer(text_list, lang_list, style, total_step, speed)


def load_dump_text_to_speech(
    onnx_dir: str, dump_dir: str = "calib", use_gpu: bool = False
) -> DumpTextToSpeech:
    """Load TTS model for dumping inputs."""
    opts = ort.SessionOptions()
    if use_gpu:
        raise NotImplementedError("GPU mode is not fully tested")
    else:
        providers = ["CPUExecutionProvider"]
        print("Using CPU for inference")

    cfgs = load_cfgs(onnx_dir)
    dp_ort, text_enc_ort, vector_est_ort, vocoder_ort = load_onnx_all(
        onnx_dir, opts, providers
    )
    text_processor = load_text_processor(onnx_dir)
    return DumpTextToSpeech(
        cfgs, text_processor, dp_ort, text_enc_ort, vector_est_ort, vocoder_ort, dump_dir
    )


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--onnx-dir", type=str, default="assets/onnx", help="onnx model dir"
    )
    parser.add_argument(
        "--dump-dir", type=str, default="calib", help="output npz dir"
    )
    parser.add_argument(
        "--total-step", type=int, default=5, help="denoising steps"
    )
    parser.add_argument(
        "--speed", type=float, default=1.05, help="speech speed"
    )
    parser.add_argument(
        "--n_test", type=int, default=1, help="num sentences"
    )
    parser.add_argument("--batch", action="store_true", help="batch mode")
    parser.add_argument(
        "--voice_style",
        type=str,
        nargs="+",
        default=["assets/voice_styles/M1.json"],
        help="voice style json path(s)",
    )
    parser.add_argument(
        "--text",
        type=str,
        nargs="+",
        default=[
            "This morning, I took a walk in the park, and the sound of the birds and the breeze was so pleasant."
        ],
        help="text(s) to synthesize",
    )
    parser.add_argument(
        "--lang", type=str, nargs="+", default=["en"], help="language(s)"
    )
    parser.add_argument("--clear", action="store_true", help="clear dump dir")
    parser.add_argument(
        "--config-file",
        type=str, default=None, dest="config_file", help="batch config json"
    )
    return parser.parse_args()


def main():
    args = get_args()

    if args.clear and os.path.exists(args.dump_dir):
        import shutil
        shutil.rmtree(args.dump_dir)
        print(f"Cleared existing directory: {args.dump_dir}")

    # Load TTS with dumping
    print(f"Loading models from {args.onnx_dir}...")

    if args.config_file:
        import json
        with open(args.config_file, "r") as f:
            configs = json.load(f)

        print(f"Loaded {len(configs)} configurations from {args.config_file}")

        # Process each configuration one by one
        tts = load_dump_text_to_speech(args.onnx_dir, args.dump_dir, use_gpu=False)

        print(f"\nProcessing {len(configs)} sentence(s)...")
        for i, cfg in enumerate(configs):
            print(f"\n[{i+1}/{len(configs)}] voice={cfg['voice'].split('/')[-1]}, lang={cfg['lang']}")
            voice = load_voice_style([cfg["voice"]])
            _wav, _duration = tts(cfg["text"], cfg["lang"], voice, args.total_step, args.speed)
    else:
        # Validate inputs for non-batch mode
        if args.batch:
            assert len(args.voice_style) == len(args.text), (
                f"Number of voice styles ({len(args.voice_style)}) must match "
                f"number of texts ({len(args.text)})"
            )

        tts = load_dump_text_to_speech(args.onnx_dir, args.dump_dir, use_gpu=False)

        # Load voice style
        style = load_voice_style(args.voice_style, verbose=True)

        # Process sentences
        print(f"\nProcessing {args.n_test} sentence(s)...")
        for n in range(args.n_test):
            print(f"\n[{n+1}/{args.n_test}]")

            if args.batch:
                wav, duration = tts.batch(args.text, args.lang, style, args.total_step, args.speed)
            else:
                wav, duration = tts(args.text[0], args.lang[0], style, args.total_step, args.speed)

    # Print summary
    print("\n" + "=" * 50)
    print("Dumping completed!")
    print("=" * 50)
    print("\nGenerated files:")
    for model_name, counter in tts.counters.items():
        dump_dir = tts.dump_dirs[model_name]
        if os.path.exists(dump_dir):
            files = sorted(os.listdir(dump_dir))
            print(f"  {model_name}: {len(files)} files in {dump_dir}/")
            for f in files[:5]:
                print(f"    - {f}")
            if len(files) > 5:
                print(f"    ... and {len(files) - 5} more")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/supertonic/gen_calib_configs.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2026 zengyw
# Generate calibration configs (voice/text/lang) with diverse voices and text.

import json
import random
from collections import Counter

SENTENCES = {
    "en": [
        "Hello world.",
        "How are you today?",
        "The sky is blue.",
        "I love machine learning.",
        "Python is awesome.",
        "Good morning everyone.",
        "Artificial intelligence is growing.",
        "Speech synthesis is fascinating.",
        "Neural networks are powerful.",
        "Text to speech converts text to audio.",
        "The quick brown fox jumps over the lazy dog.",
        "Machine learning enables computers to learn from data.",
        "Natural language processing helps machines understand text.",
        "Deep learning has revolutionized artificial intelligence.",
        "Speech synthesis technology has advanced significantly.",
        "Neural voice cloning can replicate speaking styles.",
        "Text normalization is important for proper pronunciation.",
        "Voice assistants help us interact with technology naturally.",
        "Modern TTS systems use deep learning for high-quality speech.",
        "Human computer interaction has become more intuitive.",
    ],
    "es": [
        "Hola mundo.",
        "¿Cómo estás hoy?",
        "El cielo es azul.",
        "Me encanta el aprendizaje automático.",
        "Python es increíble.",
        "Buenos días a todos.",
        "La inteligencia artificial está creciendo.",
        "La síntesis de voz es fascinante.",
        "Las redes neuronales son poderosas.",
        "El texto a voz convierte texto en audio.",
        "El veloz marrón salta sobre el perro perezoso.",
        "El aprendizaje automático permite a las computadoras aprender.",
        "El procesamiento del lenguaje natural ayuda a las máquinas.",
        "El aprendizaje profundo ha revolucionado la inteligencia artificial.",
        "La tecnología de síntesis de voz ha avanzado significativamente.",
        "La clonación de voz neuronal puede replicar estilos de habla.",
        "La normalización de texto es importante para la pronunciación.",
        "Los asistentes de voz nos ayudan a interactuar con la tecnología.",
        "Los sistemas TTS modernos utilizan aprendizaje profundo.",
        "La interacción humano computadora se ha vuelto más intuitiva.",
    ],
    "pt": [
        "Olá mundo.",
        "Como você está hoje?",
        "O céu é azul.",
        "Eu amo aprendizado de máquina.",
        "Python é incrível.",
        "Bom dia a todos.",
        "A inteligência artificial está crescendo.",
        "A síntese de voz é fascinante.",
        "As redes neurais são poderosas.",
        "Texto para voz converte texto em áudio.",
        "A rápida raposa marrom salta sobre o cachorro preguiçoso.",
        "O aprendizado de máquina permite que computadores aprendam.",
        "O processamento de linguagem natural ajuda máquinas a entender.",
        "O aprendizado profundo revolucionou a inteligência artificial.",
        "A tecnologia de síntese de voz avançou significativamente.",
        "A clonagem de voz neural pode replicar estilos de fala.",
        "A normalização de texto é importante para pronúncia.",
        "Assistentes de voz nos ajudam a interagir com tecnologia.",
        "Sistemas TTS modernos usam aprendizado profundo para áudio.",
        "A interação humano computador tornou-se mais intuitiva.",
    ],
    "fr": [
        "Bonjour le monde.",
        "Comment allez-vous aujourd'hui?",
        "Le ciel est bleu.",
        "J'aime l'apprentissage automatique.",
        "Python est incroyable.",
        "Bonjour à tous.",
        "L'intelligence artificielle grandit.",
        "La synthèse vocale est fascinante.",
        "Les réseaux neuronaux sont puissants.",
        "Le texte en voix convertit le texte en audio.",
        "Le rapide renard brun saute par-dessus le chien paresseux.",
        "L'apprentissage automatique permet aux ordinateurs d'apprendre.",
        "Le traitement du langage naturel aide les machines à comprendre.",
        "L'apprentissage profond a révolutionné l'intelligence artificielle.",
        "La technologie de synthèse vocale a considérablement progressé.",
        "Le clonage vocal neuronal peut reproduire les styles de parole.",
        "La normalisation du texte est importante pour la prononciation.",
        "Les assistants vocaux nous aident à interagir avec la technologie.",
        "Les systèmes TTS modernes utilisent l'apprentissage profond.",
        "L'interaction homme machine est devenue plus intuitive.",
    ],
    "ko": [
        "안녕하세요 세계.",
        "오늘 어떻게 지내세요?",
        "하늘이 푸릅니다.",
        "기계학습을 사랑합니다.",
        "파이썬은 놀라워요.",
        "모든 분께 좋은 아침입니다.",
        "인공지능이 성장하고 있습니다.",
        "음성 합성은 매력적입니다.",
        "신경망은 강력합니다.",
        "텍스트 음성 변환이 텍스트를 오디오로 변환합니다.",
        "빠른 갈색 여우가 게으른 개를 뛰어넘습니다.",
        "기계학습이 컴퓨터가 데이터로 학습할 수 있게 합니다.",
        "자연어 처리가 기계를 이해하도록 돕습니다.",
        "딥러닝이 인공지능을 혁신했습니다.",
        "음성 합성 기술이 크게 발전했습니다.",
        "음성 클로닝이 음성 스타일을 복제할 수 있습니다.",
        "텍스트 정규화가 올바른 발음에 중요합니다.",
        "음성 비서가 기술과 상호작용하는 데 도움이 됩니다.",
        "최신 TTS 시스템이 고품질 음성을 생성합니다.",
        "인간 컴퓨터 상호작용이 더 직관적이 되었습니다.",
    ],
}

VOICE_STYLES = {
    "M": [
        "assets/voice_styles/M1.json",
        "assets/voice_styles/M2.json",
        "assets/voice_styles/M3.json",
        "assets/voice_styles/M4.json",
        "assets/voice_styles/M5.json",
    ],
    "F": [
        "assets/voice_styles/F1.json",
        "assets/voice_styles/F2.json",
        "assets/voice_styles/F3.json",
        "assets/voice_styles/F4.json",
        "assets/voice_styles/F5.json",
    ],
}

SAMPLES_PER_LANG = 20

def generate_config():
    configs = []
    random.seed(42)

    for lang, sentences in SENTENCES.items():
        voice_pool = VOICE_STYLES["M"] + VOICE_STYLES["F"]
        random.shuffle(voice_pool)

        for i in range(SAMPLES_PER_LANG):
            voice = voice_pool[i % len(voice_pool)]
            sentence_idx = i % len(sentences)
            sentence = sentences[sentence_idx]

            if i % 3 == 0:
                sentence2 = sentences[(sentence_idx + 1) % len(sentences)]
                sentence = sentence + " " + sentence2
            if i % 5 == 0:
                sentence3 = sentences[(sentence_idx + 2) % len(sentences)]
                sentence = sentence + " " + sentence3

            configs.append({
                "voice": voice,
                "text": sentence,
                "lang": lang,
            })

    random.shuffle(configs)
    return configs


def main():
    configs = generate_config()
    with open("calib_configs.json", "w", encoding="utf-8") as f:
        json.dump(configs, f, ensure_ascii=False, indent=2)

    print(f"Generated {len(configs)} configurations saved to calib_configs.json")
    print("\nDistribution:")
    voices = [c["voice"].split("/")[-1] for c in configs]
    langs = [c["lang"] for c in configs]
    lens = [len(c["text"]) for c in configs]

    print("\nVoice distribution:")
    for v, c in Counter(voices).items():
        print(f"  {v}: {c}")

    print("\nLanguage distribution:")
    for lang, c in Counter(langs).items():
        print(f"  {lang}: {c}")

    print("\nText length stats:")
    print(f"  min: {min(lens)}, max: {max(lens)}, avg: {sum(lens)/len(lens):.1f}")

    print("\nSample configs:")
    for i in range(0, len(configs), 20):
        c = configs[i]
        print(f"  [{i//20 + 1}] lang={c['lang']}, voice={c['voice'].split('/')[-1]}, text='{c['text'][:30]}...'")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/supertonic/generate_indexer_bin.py
================================================
#!/usr/bin/env python3
# Copyright    2026  zengyw
# Generate unicode_indexer.bin from unicode_indexer.json.

import json
import sys
from pathlib import Path

import numpy as np


def main():
    script_dir = Path(__file__).parent
    default_json = script_dir.parent.parent / "assets" / "onnx" / "unicode_indexer.json"
    json_path = Path(sys.argv[1]) if len(sys.argv) > 1 else default_json
    bin_path = Path(sys.argv[2]) if len(sys.argv) > 2 else json_path.with_suffix(".bin")

    if not json_path.exists():
        print(f"Error: {json_path} does not exist")
        return 1

    try:
        with open(json_path, "r", encoding="utf-8") as f:
            arr = json.load(f)
    except Exception as e:
        print(f"Error: failed to read JSON {json_path}: {e}")
        return 1

    if not isinstance(arr, list):
        print(f"Error: JSON must be an array of integers, got {type(arr)}")
        return 1

    for i, x in enumerate(arr):
        if isinstance(x, bool) or not isinstance(x, (int, np.integer)):
            print(f"Error: JSON element {i} is not an integer: {x} (type={type(x)})")
            return 1
        if x < np.iinfo(np.int32).min or x > np.iinfo(np.int32).max:
            print(f"Error: JSON element {i} out of int32 range: {x}")
            return 1

    array = np.asarray(arr, dtype=np.int32)

    try:
        with open(bin_path, "wb") as f:
            f.write(array.tobytes(order="C"))
    except Exception as e:
        print(f"Error: failed to write {bin_path}: {e}")
        return 1

    print(f"Wrote {array.size} int32 -> {bin_path}")
    return 0


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: scripts/supertonic/generate_voices_bin.py
================================================
#!/usr/bin/env python3
# Copyright    2026  zengyw
# Merge Supertonic voice style JSONs from a directory into one voice.bin

import json
import sys
from pathlib import Path

import numpy as np


def load_one_json(json_path):
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    if "style_ttl" not in data:
        raise ValueError(f"{json_path}: missing key 'style_ttl'")
    if "style_dp" not in data:
        raise ValueError(f"{json_path}: missing key 'style_dp'")

    style_ttl = data["style_ttl"]
    if "dims" not in style_ttl or "data" not in style_ttl:
        raise ValueError(f"{json_path}: 'style_ttl' must contain keys 'dims' and 'data'")
    ttl_dims = tuple(int(x) for x in style_ttl["dims"])
    ttl_arr = np.asarray(style_ttl["data"], dtype=np.float32)

    ttl_size = int(np.prod(ttl_dims)) if len(ttl_dims) > 0 else 0
    if ttl_arr.size != ttl_size:
        raise ValueError(
            f"{json_path}: ttl size {ttl_arr.size} != prod(ttl_dims) {ttl_size} (ttl_dims={ttl_dims})"
        )
    ttl_arr = ttl_arr.reshape(ttl_dims)
    if not np.all(np.isfinite(ttl_arr)):
        raise ValueError(f"{json_path}: ttl contains NaN/Inf")

    style_dp = data["style_dp"]
    if "dims" not in style_dp or "data" not in style_dp:
        raise ValueError(f"{json_path}: 'style_dp' must contain keys 'dims' and 'data'")
    dp_dims = tuple(int(x) for x in style_dp["dims"])
    dp_arr = np.asarray(style_dp["data"], dtype=np.float32)

    dp_size = int(np.prod(dp_dims)) if len(dp_dims) > 0 else 0
    if dp_arr.size != dp_size:
        raise ValueError(
            f"{json_path}: dp size {dp_arr.size} != prod(dp_dims) {dp_size} (dp_dims={dp_dims})"
        )
    dp_arr = dp_arr.reshape(dp_dims)
    if not np.all(np.isfinite(dp_arr)):
        raise ValueError(f"{json_path}: dp contains NaN/Inf")
    return ttl_dims, ttl_arr, dp_dims, dp_arr


def merge_jsons_to_binary(json_paths, output_path):
    if not json_paths:
        raise ValueError("No JSON paths given")
    ttl_arrays = []
    dp_arrays = []
    ref_ttl = ref_dp = None
    for p in json_paths:
        ttl_dims, ttl_arr, dp_dims, dp_arr = load_one_json(p)
        if len(ttl_dims) != 3 or ttl_dims[0] != 1:
            raise ValueError(
                f"{p}: expected ttl dims [1, d1, d2], got {ttl_dims}"
            )
        if len(dp_dims) != 3 or dp_dims[0] != 1:
            raise ValueError(
                f"{p}: expected dp dims [1, d1, d2], got {dp_dims}"
            )
        if ref_ttl is None:
            ref_ttl, ref_dp = ttl_dims, dp_dims
        elif ttl_dims[1:] != ref_ttl[1:] or dp_dims[1:] != ref_dp[1:]:
            raise ValueError(
                f"File {p} has dims ttl{ttl_dims} dp{dp_dims}; "
                f"expected ttl[1:]={ref_ttl[1:]}, dp[1:]={ref_dp[1:]}"
            )
        ttl_arrays.append(ttl_arr)
        dp_arrays.append(dp_arr)

    n = len(json_paths)
    ttl_stack = np.concatenate(ttl_arrays, axis=0)
    dp_stack = np.concatenate(dp_arrays, axis=0)
    out_ttl_dims = np.array([n, ref_ttl[1], ref_ttl[2]], dtype=np.int64)
    out_dp_dims = np.array([n, ref_dp[1], ref_dp[2]], dtype=np.int64)

    with open(output_path, "wb") as f:
        f.write(out_ttl_dims.tobytes())
        f.write(out_dp_dims.tobytes())
        f.write(ttl_stack.ravel().tobytes())
        f.write(dp_stack.ravel().tobytes())
    print(f"Merged {n} voice(s) -> {output_path} (sid 0..{n - 1})")


def main():
    script_dir = Path(__file__).parent
    default_input = script_dir / "assets" / "voice_styles"
    input_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else default_input
    if len(sys.argv) > 2:
        output_path = Path(sys.argv[2])
    else:
        output_path = input_dir / "voice.bin"

    if not input_dir.exists() or not input_dir.is_dir():
        print(f"Error: input dir does not exist or not a directory: {input_dir}")
        return 1
    json_files = sorted(input_dir.glob("*.json"))

    if not json_files:
        print(f"No JSON files found in {input_dir}")
        return 1

    try:
        merge_jsons_to_binary([str(p) for p in json_files], str(output_path))
    except Exception as e:
        print(f"Error: {e}")
        return 1

    return 0


if __name__ == "__main__":
    exit(main())


================================================
FILE: scripts/t-one/README.md
================================================
# Introduction

This folder contains scripts for exporting models from
https://github.com/voicekit-team/T-one
to sherpa-onnx.


================================================
FILE: scripts/t-one/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import onnx


def main():
    meta_data = {
        "model_type": "t-one",
        "language": "Russian",
        "version": 1,
        "maintainer": "k2-fsa",
        "sample_rate": 8000,
        "frame_length_ms": 300,  # chunk_duration_ms
        "state_dim": 219729,
        "comment": "This is a streaming CTC model for Russian with expected audio sample rate 8000",
        "url": "https://github.com/voicekit-team/T-one",
        "see_also": "https://huggingface.co/t-tech/T-one",
    }
    model = onnx.load("./model.onnx")

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, "./model.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/t-one/generate_tokens.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import json


def main():
    with open("vocab.json") as f:
        token2id = json.load(f)

    with open("tokens.txt", "w", encoding="utf-8") as f:
        for s, i in token2id.items():
            if s == "|":
                s = " "
            if s == "[PAD]":
                s = "<blk>"

            f.write(f"{s} {i}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/t-one/test.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to model.onnx",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to tokens.txt",
    )

    parser.add_argument(
        "--wave",
        type=str,
        required=True,
        help="The input wave to be recognized",
    )

    return parser.parse_args()


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map

        self.frame_length_ms = int(meta["frame_length_ms"])
        self.sample_rate = int(meta["sample_rate"])
        self.state_dim = int(meta["state_dim"])

    def get_init_state(self, batch_size=1):
        return np.zeros((batch_size, self.state_dim), dtype=np.float16)

    def __call__(self, x, state):
        """
        Args:
          x: (batch_size, num_samples, 1), int32
          state: (batch_size, 219729)
        Returns:
          log_probs: (batch_size, num_frames, vocab_size)
          next_state: (batch_size, 219729)
        """
        log_prob, next_state = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
                self.model.get_inputs()[1].name: state,
            },
        )
        return log_prob, next_state


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def load_tokens(filename):
    ans = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 1:
                ans[int(fields[0])] = " "
            else:
                ans[int(fields[1])] = fields[0]
    return ans


def compute_feat(
    samples,
    sample_rate,
    frame_length_ms: int,
):
    opts = knf.RawAudioSamplesOptions()
    opts.frame_opts.samp_freq = sample_rate
    opts.frame_opts.frame_length_ms = frame_length_ms
    opts.frame_opts.frame_shift_ms = frame_length_ms

    raw_audio_samples = knf.OnlineRawAudioSamples(opts)

    raw_audio_samples.accept_waveform(sample_rate, samples)
    raw_audio_samples.input_finished()

    features = []

    for i in range(raw_audio_samples.num_frames_ready):
        f = raw_audio_samples.get_frame(i)
        features.append(f)

    return (np.array(features, dtype=np.float32) * 32768).astype(np.int32)


def main():
    args = get_args()
    print(vars(args))

    model = OnnxModel(filename=args.model)

    samples, sample_rate = load_audio(args.wave)
    if sample_rate != model.sample_rate:
        import librosa

        samples = librosa.resample(
            samples, orig_sr=sample_rate, target_sr=model.sample_rate
        )
        sample_rate = model.sample_rate

    # Pad 0.5 seconds
    samples = np.pad(samples, (2400, 2400))

    features = compute_feat(
        samples=samples,
        sample_rate=sample_rate,
        frame_length_ms=model.frame_length_ms,
    )

    id2token = load_tokens(args.tokens)

    blank = -2
    for idx, token in id2token.items():
        if token == "<blk>":
            blank = idx

    state = model.get_init_state()
    token_id_list = []
    for f in features:
        log_probs, state = model(f[None, :, None], state)

        max_token_ids = log_probs[0].argmax(axis=-1).tolist()
        token_id_list += max_token_ids

    unique_ids = []
    prev = -1
    for t in token_id_list:
        if t == blank:
            prev = t
            continue

        if t == prev:
            continue

        prev = t
        unique_ids.append(prev)
    text = "".join([id2token[i] for i in unique_ids])
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/tele-speech/.gitignore
================================================
*.json


================================================
FILE: scripts/tele-speech/README.md
================================================
# Introduction

This folder contains scripts about adding metadata to
onnx models from
https://hf-mirror.com/lovemefan/telespeech/tree/main

Please see

  - https://github.com/Tele-AI/TeleSpeech-ASR
  - https://github.com/lovemefan/telespeech-asr-python
  - [TeleSpeech模型社区许可协议.pdf](https://github.com/Tele-AI/TeleSpeech-ASR/blob/master/TeleSpeech%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf)

for more details.


================================================
FILE: scripts/tele-speech/add-metadata.py
================================================
#!/usr/bin/env python3

import json
from typing import Dict

import onnx
from onnxruntime.quantization import QuantType, quantize_dynamic


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = value

    onnx.save(model, filename)


def main():
    with open("./vocab.json", "r", encoding="utf-8") as f:
        tokens = json.load(f)

    vocab_size = len(tokens)
    with open("tokens.txt", "w", encoding="utf-8") as f:
        for token, idx in tokens.items():
            if idx == 0:
                f.write("<blk> 0\n")
            else:
                f.write(f"{token} {idx}\n")

    filename = "model.onnx"
    meta_data = {
        "model_type": "telespeech_ctc",
        "version": "1",
        "model_author": "Tele-AI",
        "comment": "See also https://github.com/lovemefan/telespeech-asr-python",
        "license": "https://github.com/Tele-AI/TeleSpeech-ASR/blob/master/TeleSpeech%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf",
        "url": "https://github.com/Tele-AI/TeleSpeech-ASR",
    }

    add_meta_data(filename, meta_data)

    filename_int8 = f"model.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )

    #  filename_uint8 = f"model.uint8.onnx"
    #  quantize_dynamic(
    #      model_input=filename,
    #      model_output=filename_uint8,
    #      op_types_to_quantize=["MatMul"],
    #      weight_type=QuantType.QUInt8,
    #  )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/tele-speech/test.py
================================================
#!/usr/bin/env python3
# Copyright      2024  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf

"""
NodeArg(name='feats', type='tensor(float)', shape=[1, 'T', 40])
-----
NodeArg(name='logits', type='tensor(float)', shape=['Addlogits_dim_0', 1, 7535])
"""


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        self.show()

    def show(self):
        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)

    def __call__(self, x):
        """
        Args:
          x: a float32 tensor of shape (N, T, C)
        """
        logits = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )[0]

        return logits


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def get_features(test_wav_filename):
    samples, sample_rate = load_audio(test_wav_filename)

    if sample_rate != 16000:
        import librosa

        samples = librosa.resample(samples, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    samples *= 32768

    opts = knf.MfccOptions()
    # See https://github.com/Tele-AI/TeleSpeech-ASR/blob/master/mfcc_hires.conf
    opts.frame_opts.dither = 0

    opts.num_ceps = 40
    opts.use_energy = False

    opts.mel_opts.num_bins = 40
    opts.mel_opts.low_freq = 40
    opts.mel_opts.high_freq = -200

    mfcc = knf.OnlineMfcc(opts)
    mfcc.accept_waveform(16000, samples)
    frames = []
    for i in range(mfcc.num_frames_ready):
        frames.append(mfcc.get_frame(i))

    frames = np.stack(frames, axis=0)
    return frames


def cmvn(features):
    # See https://github.com/Tele-AI/TeleSpeech-ASR/blob/master/wenet_representation/conf/train_d2v2_ark_conformer.yaml#L70
    # https://github.com/Tele-AI/TeleSpeech-ASR/blob/master/wenet_representation/wenet/dataset/dataset.py#L184
    # https://github.com/Tele-AI/TeleSpeech-ASR/blob/master/wenet_representation/wenet/dataset/processor.py#L278
    mean = features.mean(axis=0, keepdims=True)
    std = features.std(axis=0, keepdims=True)
    return (features - mean) / (std + 1e-5)


def main():
    # Please download the test data from
    # https://hf-mirror.com/csukuangfj/sherpa-onnx-paraformer-zh-small-2024-03-09/tree/main/test_wavs
    test_wav_filename = "./3-sichuan.wav"
    test_wav_filename = "./4-tianjin.wav"
    test_wav_filename = "./5-henan.wav"

    features = get_features(test_wav_filename)

    features = cmvn(features)

    features = np.expand_dims(features, axis=0)  # (T, C) -> (N, T, C)

    model_filename = "./model.int8.onnx"
    model = OnnxModel(model_filename)
    logits = model(features)
    logits = logits.squeeze(axis=1)  # remove batch axis
    ids = logits.argmax(axis=-1)

    id2token = dict()
    with open("./tokens.txt", encoding="utf-8") as f:
        for line in f:
            t, idx = line.split()
            id2token[int(idx)] = t

    tokens = []

    blank = 0
    prev = -1

    for k in ids:
        if k != blank and k != prev:
            tokens.append(k)
        prev = k

    tokens = [id2token[i] for i in tokens]
    text = "".join(tokens)
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/text2token.py
================================================
#!/usr/bin/env python3

"""
This script encode the texts (given line by line through `text`) to tokens and
write the results to the file given by ``output``.

Usage:
If the tokens_type is bpe:

python3 ./text2token.py \
          --text texts.txt \
          --tokens tokens.txt \
          --tokens-type bpe \
          --bpe-model bpe.model \
          --output hotwords.txt

If the tokens_type is cjkchar:

python3 ./text2token.py \
          --text texts.txt \
          --tokens tokens.txt \
          --tokens-type cjkchar \
          --output hotwords.txt

If the tokens_type is cjkchar+bpe:

python3 ./text2token.py \
          --text texts.txt \
          --tokens tokens.txt \
          --tokens-type cjkchar+bpe \
          --bpe-model bpe.model \
          --output hotwords.txt

"""
import argparse

from sherpa_onnx import text2token


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--text",
        type=str,
        required=True,
        help="""Path to the input texts.

        Each line in the texts contains the original phrase, it might also contain some
        extra items, for example, the boosting score (starting with :), the triggering
        threshold (starting with #, only used in keyword spotting task) and the original
        phrase (starting with @). Note: extra items will be kept in the output.

        example input 1 (tokens_type = ppinyin):

        小爱同学 :2.0 #0.6 @小爱同学
        你好问问 :3.5 @你好问问
        小艺小艺 #0.6 @小艺小艺

        example output 1:

        x iǎo ài t óng x ué :2.0 #0.6 @小爱同学
        n ǐ h ǎo w èn w èn :3.5 @你好问问
        x iǎo y ì x iǎo y ì #0.6 @小艺小艺

        example input 2 (tokens_type = bpe):

        HELLO WORLD :1.5 #0.4
        HI GOOGLE :2.0 #0.8
        HEY SIRI #0.35

        example output 2:

        ▁HE LL O ▁WORLD :1.5 #0.4
        ▁HI ▁GO O G LE :2.0 #0.8
        ▁HE Y ▁S I RI #0.35
        """,
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="The path to tokens.txt.",
    )

    parser.add_argument(
        "--tokens-type",
        type=str,
        required=True,
        choices=[
            "cjkchar",
            "bpe",
            "cjkchar+bpe",
            "fpinyin",
            "ppinyin",
            "phone+ppinyin",
        ],
        help="""The type of modeling units, should be cjkchar, bpe, cjkchar+bpe, fpinyin
        ppinyin or phone+ppinyin.
        fpinyin means full pinyin, each cjkchar has a pinyin(with tone).
        ppinyin means partial pinyin, it splits pinyin into initial and final,
        phone means English phonemes in CMU dictionary format.
        """,
    )

    parser.add_argument(
        "--bpe-model",
        type=str,
        help="The path to bpe.model. Only required when tokens-type is bpe or cjkchar+bpe.",
    )

    parser.add_argument(
        "--lexicon",
        type=str,
        help="The path to lexicon.txt. Only required when tokens-type is phone+ppinyin.",
    )

    parser.add_argument(
        "--output",
        type=str,
        required=True,
        help="Path where the encoded tokens will be written to.",
    )

    return parser.parse_args()


def main():
    args = get_args()

    texts = []
    # extra information like boosting score (start with :), triggering threshold (start with #)
    # original keyword (start with @)
    extra_info = []
    with open(args.text, "r", encoding="utf8") as f:
        for line in f:
            extra = []
            text = []
            toks = line.strip().split()
            for tok in toks:
                if tok[0] == ":" or tok[0] == "#" or tok[0] == "@":
                    extra.append(tok)
                else:
                    text.append(tok)
            texts.append(" ".join(text))
            extra_info.append(extra)
    encoded_texts = text2token(
        texts,
        tokens=args.tokens,
        tokens_type=args.tokens_type,
        bpe_model=args.bpe_model,
        lexicon=args.lexicon,
    )
    with open(args.output, "w", encoding="utf8") as f:
        for i, txt in enumerate(encoded_texts):
            txt += extra_info[i]
            f.write(" ".join(txt) + "\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/utils.sh
================================================
#!/bin/bash

default='\033[0m'
bold='\033[1m'
red='\033[31m'
green='\033[32m'

function ok() {
  printf "${bold}${green}[OK]${default} $1\n"
}

function error() {
  printf "${bold}${red}[FAILED]${default} $1\n"
}

function abort() {
  printf "${bold}${red}[FAILED]${default} $1\n"
  exit 1
}


================================================
FILE: scripts/uvr_mdx/READEME.md
================================================
# Introduction

This folder contains scripts for converting models from
https://github.com/TRvlvr/model_repo/releases/tag/all_public_uvr_models
to sherpa-onnx.


================================================
FILE: scripts/uvr_mdx/add_meta_data_and_quantize.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
from pathlib import Path

import onnx
import onnxmltools
import onnxruntime
from onnxmltools.utils.float16_converter import convert_float_to_float16
from onnxruntime.quantization import QuantType, quantize_dynamic


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--filename",
        type=str,
        required=True,
        help="Path to onnx model",
    )

    return parser.parse_args()


def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path):
    onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path)
    onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True)
    onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path)


def validate(model: onnxruntime.InferenceSession):
    for i in model.get_inputs():
        print(i)

    print("-----")

    for i in model.get_outputs():
        print(i)

    assert len(model.get_inputs()) == 1, len(model.get_inputs())
    assert len(model.get_outputs()) == 1, len(model.get_outputs())

    inp = model.get_inputs()[0]
    outp = model.get_outputs()[0]

    assert len(inp.shape) == 4, inp.shape
    assert len(outp.shape) == 4, outp.shape

    assert inp.shape[1:] == outp.shape[1:], (inp.shape, outp.shape)


def add_meta_data(filename, meta_data):
    model = onnx.load(filename)

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, filename)


def main():
    args = get_args()
    filename = Path(args.filename)
    if not filename.is_file():
        raise ValueError(f"{filename} does not exist")

    name = filename.stem
    print("name", name)

    model = onnx.load(str(filename))

    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(
        str(filename), session_opts, providers=["CPUExecutionProvider"]
    )
    validate(sess)

    inp = sess.get_inputs()[0]
    outp = sess.get_outputs()[0]

    meta_data = {
        "model_type": "UVR",
        "model_name": name,
        "sample_rate": 44100,
        "comment": "This model is downloaded from https://github.com/TRvlvr/model_repo/releases",
        "n_fft": inp.shape[2] * 2,
        "center": 1,
        "window_type": "hann",
        "win_length": inp.shape[2] * 2,
        "hop_length": 1024,
        "dim_t": inp.shape[3],
        "dim_f": inp.shape[2],
        "dim_c": inp.shape[1],
        "stems": 2,
    }
    add_meta_data(str(filename), meta_data)

    filename_fp16 = f"./{name}.fp16.onnx"
    export_onnx_fp16(filename, filename_fp16)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/uvr_mdx/show.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import onnxruntime
import onnx

"""
[]
NodeArg(name='input', type='tensor(float)', shape=['batch_size', 4, 3072, 256])
-----
NodeArg(name='output', type='tensor(float)', shape=['batch_size', 4, 3072, 256])
"""


def show(filename):
    model = onnx.load(filename)
    print(model.metadata_props)

    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3
    sess = onnxruntime.InferenceSession(
        filename, session_opts, providers=["CPUExecutionProvider"]
    )
    for i in sess.get_inputs():
        print(i)

    print("-----")

    for i in sess.get_outputs():
        print(i)


def main():
    #  show("./UVR-MDX-NET-Voc_FT.onnx")
    show("./UVR_MDXNET_1_9703.onnx")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/uvr_mdx/test.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import time

import argparse
import kaldi_native_fbank as knf
import librosa
import numpy as np
import onnxruntime as ort
import soundfile as sf


def get_args():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--model-filename",
        type=str,
        required=True,
        help="Path to onnx model",
    )

    parser.add_argument(
        "--audio-filename",
        type=str,
        required=True,
        help="Path to input audio file",
    )

    return parser.parse_args()


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 4
        session_opts.intra_op_num_threads = 4

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        self.dim_t = self.model.get_outputs()[0].shape[3]

        self.dim_f = self.model.get_outputs()[0].shape[2]

        self.n_fft = self.dim_f * 2

        self.dim_c = self.model.get_outputs()[0].shape[1]
        assert self.dim_c == 4, self.dim_c

        self.hop = 1024
        self.n_bins = self.n_fft // 2 + 1
        self.chunk_size = self.hop * (self.dim_t - 1)

        self.freq_pad = np.zeros([1, self.dim_c, self.n_bins - self.dim_f, self.dim_t])

        print(f"----------inputs for {filename}----------")
        for i in self.model.get_inputs():
            print(i)

        print(f"----------outputs for {filename}----------")

        for i in self.model.get_outputs():
            print(i)
            print(i.shape)
        print("--------------------")

    def __call__(self, x):
        """
        Args:
          x: (batch_size, 4, self.dim_f, self.dim_t)
        Returns:
          spec: (batch_size, 4, self.dim_f, self.dim_t)
        """
        spec = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )[0]

        return spec


def main():
    args = get_args()
    m = OnnxModel(args.model_filename)

    stft_config = knf.StftConfig(
        n_fft=m.n_fft,
        hop_length=m.hop,
        win_length=m.n_fft,
        center=True,
        window_type="hann",
    )
    knf_stft = knf.Stft(stft_config)
    knf_istft = knf.IStft(stft_config)

    sample_rate = 44100

    samples, rate = librosa.load(args.audio_filename, mono=False, sr=sample_rate)

    start_time = time.time()

    assert rate == sample_rate, (rate, sample_rate)

    # samples: (2, 479832) , (num_channels, num_samples), 44100, 10.88
    print("samples", samples.shape, rate, samples.shape[1] / rate)

    assert samples.ndim == 2, samples.shape
    assert samples.shape[0] == 2, samples.shape

    margin = sample_rate

    num_chunks = 15
    chunk_size = num_chunks * sample_rate

    # if they are too few samples, reset chunk_size
    if samples.shape[1] < chunk_size:
        chunk_size = samples.shape[1]

    if margin > chunk_size:
        margin = chunk_size

    segments = []
    for skip in range(0, samples.shape[1], chunk_size):
        start = max(0, skip - margin)
        end = min(skip + chunk_size + margin, samples.shape[1])
        segments.append(samples[:, start:end])
        if end == samples.shape[1]:
            break

    sources = []
    for kk, s in enumerate(segments):
        num_samples = s.shape[1]
        trim = m.n_fft // 2
        gen_size = m.chunk_size - 2 * trim
        pad = gen_size - s.shape[1] % gen_size
        mix_p = np.concatenate(
            (
                np.zeros((2, trim)),
                s,
                np.zeros((2, pad)),
                np.zeros((2, trim)),
            ),
            axis=1,
        )

        chunk_list = []
        i = 0
        while i < s.shape[1] + pad:
            chunk_list.append(mix_p[:, i : i + m.chunk_size])
            i += gen_size

        mix_waves = np.array(chunk_list)

        mix_waves_reshaped = mix_waves.reshape(-1, m.chunk_size)
        stft_results = []
        for w in mix_waves_reshaped:
            stft = knf_stft(w)
            stft_results.append(stft)
        real = np.array(
            [np.array(s.real).reshape(s.num_frames, -1) for s in stft_results],
            dtype=np.float32,
        )[:, :, :-1]
        # real: (6, 256, 3072)

        real = real.transpose(0, 2, 1)
        # real: (6, 3072, 256)

        imag = np.array(
            [np.array(s.imag).reshape(s.num_frames, -1) for s in stft_results],
            dtype=np.float32,
        )[:, :, :-1]
        imag = imag.transpose(0, 2, 1)
        # imag: (6, 3072, 256)

        x = np.stack([real, imag], axis=1)
        # x: (6, 2, 3072, 256) -> (batch_size, real_imag, 3072, 256)
        x = x.reshape(-1, m.dim_c, m.dim_f, m.dim_t)
        # x: (3, 4, 3072, 256)
        spec = m(x)

        freq_pad = np.repeat(m.freq_pad, spec.shape[0], axis=0)

        x = np.concatenate([spec, freq_pad], axis=2)
        # x: (3, 4, 3073, 256)
        x = x.reshape(-1, 2, m.n_bins, m.dim_t)
        # x: (6, 2, 3073, 256)
        x = x.transpose(0, 1, 3, 2)
        # x: (6, 2, 256, 3073)
        num_frames = x.shape[2]

        x = x.reshape(x.shape[0], x.shape[1], -1)
        wav_list = []
        for k in range(x.shape[0]):
            istft_result = knf.StftResult(
                real=x[k, 0].reshape(-1).tolist(),
                imag=x[k, 1].reshape(-1).tolist(),
                num_frames=num_frames,
            )
            wav = knf_istft(istft_result)
            wav_list.append(wav)
        wav = np.array(wav_list, dtype=np.float32)
        # wav: (6, 261120)

        wav = wav.reshape(-1, 2, wav.shape[-1])
        # wav: (3, 2, 261120)

        wav = wav[:, :, trim:-trim]
        # wav: (3, 2, 254976)

        wav = wav.transpose(1, 0, 2)
        # wav: (2, 3, 254976)

        wav = wav.reshape(2, -1)
        # wav: (2, 764928)

        wav = wav[:, :-pad]
        # wav: 2, 705600)
        if kk == 0:
            start = 0
        else:
            start = margin

        if kk == len(segments) - 1:
            end = None
        else:
            end = -margin

        sources.append(wav[:, start:end])

    sources = np.concatenate(sources, axis=-1)

    vocals = sources
    non_vocals = samples - vocals
    end_time = time.time()
    elapsed_seconds = end_time - start_time
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")

    audio_duration = samples.shape[1] / sample_rate
    real_time_factor = elapsed_seconds / audio_duration
    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
    print(f"Audio duration in seconds: {audio_duration:.3f}")
    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")

    sf.write(f"./vocals.mp3", np.transpose(vocals), sample_rate)
    sf.write(f"./non_vocals.mp3", np.transpose(non_vocals), sample_rate)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/vits/.gitignore
================================================
tokens-ljs.txt
tokens-vctk.txt


================================================
FILE: scripts/vits/__init__.py
================================================


================================================
FILE: scripts/vits/export-onnx-ljs.py
================================================
#!/usr/bin/env python3
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
This script converts vits models trained using the LJ Speech dataset.

Usage:

(1) Download vits

cd /Users/fangjun/open-source
git clone https://github.com/jaywalnut310/vits

(2) Download pre-trained models from
https://huggingface.co/csukuangfj/vits-ljs/tree/main

wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/pretrained_ljs.pth

(3) Run this file

./export-onnx-ljs.py  \
  --config ~/open-source//vits/configs/ljs_base.json \
  --checkpoint ~/open-source/icefall-models/vits-ljs/pretrained_ljs.pth

It will generate the following two files:

$ ls -lh *.onnx
-rw-r--r--  1 fangjun  staff    36M Oct 10 20:48 vits-ljs.int8.onnx
-rw-r--r--  1 fangjun  staff   109M Oct 10 20:48 vits-ljs.onnx
"""
import sys

# Please change this line to point to the vits directory.
# You can download vits from
# https://github.com/jaywalnut310/vits
sys.path.insert(0, "/Users/fangjun/open-source/vits")  # noqa

import argparse
from pathlib import Path
from typing import Dict, Any

import commons
import onnx
import torch
import utils
from models import SynthesizerTrn
from onnxruntime.quantization import QuantType, quantize_dynamic
from text import text_to_sequence
from text.symbols import symbols
from text.symbols import _punctuation


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--config",
        type=str,
        required=True,
        help="""Path to ljs_base.json.
        You can find it at
        https://huggingface.co/csukuangfj/vits-ljs/resolve/main/ljs_base.json
        """,
    )

    parser.add_argument(
        "--checkpoint",
        type=str,
        required=True,
        help="""Path to the checkpoint file.
        You can find it at
        https://huggingface.co/csukuangfj/vits-ljs/resolve/main/pretrained_ljs.pth

        """,
    )

    return parser.parse_args()


class OnnxModel(torch.nn.Module):
    def __init__(self, model: SynthesizerTrn):
        super().__init__()
        self.model = model

    def forward(
        self,
        x,
        x_lengths,
        noise_scale=1,
        length_scale=1,
        noise_scale_w=1.0,
        sid=None,
        max_len=None,
    ):
        return self.model.infer(
            x=x,
            x_lengths=x_lengths,
            sid=sid,
            noise_scale=noise_scale,
            length_scale=length_scale,
            noise_scale_w=noise_scale_w,
            max_len=max_len,
        )[0]


def get_text(text, hps):
    text_norm = text_to_sequence(text, hps.data.text_cleaners)
    if hps.data.add_blank:
        text_norm = commons.intersperse(text_norm, 0)
    text_norm = torch.LongTensor(text_norm)
    return text_norm


def check_args(args):
    assert Path(args.config).is_file(), args.config
    assert Path(args.checkpoint).is_file(), args.checkpoint


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def generate_tokens():
    with open("tokens-ljs.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(symbols):
            f.write(f"{s} {i}\n")
    print("Generated tokens-ljs.txt")


@torch.no_grad()
def main():
    args = get_args()
    check_args(args)

    generate_tokens()

    hps = utils.get_hparams_from_file(args.config)

    net_g = SynthesizerTrn(
        len(symbols),
        hps.data.filter_length // 2 + 1,
        hps.train.segment_size // hps.data.hop_length,
        **hps.model,
    )
    _ = net_g.eval()

    _ = utils.load_checkpoint(args.checkpoint, net_g, None)

    x = get_text("Liliana is the most beautiful assistant", hps)
    x = x.unsqueeze(0)

    x_length = torch.tensor([x.shape[1]], dtype=torch.int64)
    noise_scale = torch.tensor([1], dtype=torch.float32)
    length_scale = torch.tensor([1], dtype=torch.float32)
    noise_scale_w = torch.tensor([1], dtype=torch.float32)

    model = OnnxModel(net_g)

    opset_version = 13

    filename = "vits-ljs.onnx"

    torch.onnx.export(
        model,
        (x, x_length, noise_scale, length_scale, noise_scale_w),
        filename,
        opset_version=opset_version,
        input_names=["x", "x_length", "noise_scale", "length_scale", "noise_scale_w"],
        output_names=["y"],
        dynamic_axes={
            "x": {0: "N", 1: "L"},  # n_audio is also known as batch_size
            "x_length": {0: "N"},
            "y": {0: "N", 2: "L"},
        },
    )
    meta_data = {
        "model_type": "vits",
        "comment": "ljspeech",
        "language": "English",
        "add_blank": int(hps.data.add_blank),
        "n_speakers": int(hps.data.n_speakers),
        "sample_rate": hps.data.sampling_rate,
        "punctuation": " ".join(list(_punctuation)),
    }
    print("meta_data", meta_data)
    add_meta_data(filename=filename, meta_data=meta_data)

    print("Generate int8 quantization models")

    filename_int8 = "vits-ljs.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        weight_type=QuantType.QUInt8,
    )

    print(f"Saved to {filename} and {filename_int8}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/vits/export-onnx-vctk.py
================================================
#!/usr/bin/env python3
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
This script converts vits models trained using the VCTK dataset.

Usage:

(1) Download vits

cd /Users/fangjun/open-source
git clone https://github.com/jaywalnut310/vits

(2) Download pre-trained models from
https://huggingface.co/csukuangfj/vits-vctk/tree/main

wget https://huggingface.co/csukuangfj/vits-vctk/resolve/main/pretrained_vctk.pth

(3) Run this file

./export-onnx-vctk.py  \
  --config ~/open-source//vits/configs/vctk_base.json \
  --checkpoint ~/open-source/icefall-models/vits-vctk/pretrained_vctk.pth

It will generate the following two files:

$ ls -lh *.onnx
-rw-r--r--  1 fangjun  staff    37M Oct 16 10:57 vits-vctk.int8.onnx
-rw-r--r--  1 fangjun  staff   116M Oct 16 10:57 vits-vctk.onnx
"""
import sys

# Please change this line to point to the vits directory.
# You can download vits from
# https://github.com/jaywalnut310/vits
sys.path.insert(0, "/Users/fangjun/open-source/vits")  # noqa

import argparse
from pathlib import Path
from typing import Dict, Any

import commons
import onnx
import torch
import utils
from models import SynthesizerTrn
from onnxruntime.quantization import QuantType, quantize_dynamic
from text import text_to_sequence
from text.symbols import symbols
from text.symbols import _punctuation


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--config",
        type=str,
        required=True,
        help="""Path to vctk_base.json.
        You can find it at
        https://huggingface.co/csukuangfj/vits-vctk/resolve/main/vctk_base.json
        """,
    )

    parser.add_argument(
        "--checkpoint",
        type=str,
        required=True,
        help="""Path to the checkpoint file.
        You can find it at
        https://huggingface.co/csukuangfj/vits-vctk/resolve/main/pretrained_vctk.pth
        """,
    )

    return parser.parse_args()


class OnnxModel(torch.nn.Module):
    def __init__(self, model: SynthesizerTrn):
        super().__init__()
        self.model = model

    def forward(
        self,
        x,
        x_lengths,
        noise_scale=1,
        length_scale=1,
        noise_scale_w=1.0,
        sid=0,
        max_len=None,
    ):
        return self.model.infer(
            x=x,
            x_lengths=x_lengths,
            sid=sid,
            noise_scale=noise_scale,
            length_scale=length_scale,
            noise_scale_w=noise_scale_w,
            max_len=max_len,
        )[0]


def get_text(text, hps):
    text_norm = text_to_sequence(text, hps.data.text_cleaners)
    if hps.data.add_blank:
        text_norm = commons.intersperse(text_norm, 0)
    text_norm = torch.LongTensor(text_norm)
    return text_norm


def check_args(args):
    assert Path(args.config).is_file(), args.config
    assert Path(args.checkpoint).is_file(), args.checkpoint


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def generate_tokens():
    with open("tokens-vctk.txt", "w", encoding="utf-8") as f:
        for i, s in enumerate(symbols):
            f.write(f"{s} {i}\n")
    print("Generated tokens-vctk.txt")


@torch.no_grad()
def main():
    args = get_args()
    check_args(args)

    generate_tokens()

    hps = utils.get_hparams_from_file(args.config)

    net_g = SynthesizerTrn(
        len(symbols),
        hps.data.filter_length // 2 + 1,
        hps.train.segment_size // hps.data.hop_length,
        n_speakers=hps.data.n_speakers,
        **hps.model,
    )
    _ = net_g.eval()

    _ = utils.load_checkpoint(args.checkpoint, net_g, None)

    x = get_text("Liliana is the most beautiful assistant", hps)
    x = x.unsqueeze(0)

    x_length = torch.tensor([x.shape[1]], dtype=torch.int64)
    noise_scale = torch.tensor([1], dtype=torch.float32)
    length_scale = torch.tensor([1], dtype=torch.float32)
    noise_scale_w = torch.tensor([1], dtype=torch.float32)
    sid = torch.tensor([0], dtype=torch.int64)

    model = OnnxModel(net_g)

    opset_version = 13

    filename = "vits-vctk.onnx"

    torch.onnx.export(
        model,
        (x, x_length, noise_scale, length_scale, noise_scale_w, sid),
        filename,
        opset_version=opset_version,
        input_names=[
            "x",
            "x_length",
            "noise_scale",
            "length_scale",
            "noise_scale_w",
            "sid",
        ],
        output_names=["y"],
        dynamic_axes={
            "x": {0: "N", 1: "L"},  # n_audio is also known as batch_size
            "x_length": {0: "N"},
            "y": {0: "N", 2: "L"},
        },
    )
    meta_data = {
        "model_type": "vits",
        "comment": "vctk",
        "language": "English",
        "add_blank": int(hps.data.add_blank),
        "n_speakers": int(hps.data.n_speakers),
        "sample_rate": hps.data.sampling_rate,
        "punctuation": " ".join(list(_punctuation)),
    }
    print("meta_data", meta_data)
    add_meta_data(filename=filename, meta_data=meta_data)

    print("Generate int8 quantization models")

    filename_int8 = "vits-vctk.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        weight_type=QuantType.QUInt8,
    )

    print(f"Saved to {filename} and {filename_int8}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/vocos/README.md
================================================
# Introduction

This folder contains script to export the ONNX model from
https://huggingface.co/BSC-LT
to sherpa-onnx


================================================
FILE: scripts/vocos/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import argparse

import onnx


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--in-model", type=str, required=True, help="input onnx model")

    parser.add_argument(
        "--out-model", type=str, required=True, help="output onnx model"
    )

    return parser.parse_args()


def main():
    args = get_args()
    print(args.in_model, args.out_model)

    model = onnx.load(args.in_model)

    meta_data = {
        "model_type": "vocos",
        "model_filename": "mel_spec_22khz_univ.onnx",
        "sample_rate": 22050,
        "version": 1,
        "model_author": "BSC-LT",
        "maintainer": "k2-fsa",
        "n_fft": 1024,
        "hop_length": 256,
        "win_length": 1024,
        "window_type": "hann",
        "center": 1,
        "pad_mode": "reflect",
        "normalized": 0,
        "url1": "https://huggingface.co/BSC-LT/vocos-mel-22khz",
        "url2": "https://github.com/gemelo-ai/vocos",
    }

    print(model.metadata_props)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)
    print("--------------------")

    print(model.metadata_props)

    onnx.save(model, args.out_model)

    print(f"Saved to {args.out_model}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/vocos/test.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import datetime as dt

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf

try:
    from piper_phonemize import phonemize_espeak
except Exception as ex:
    raise RuntimeError(
        f"{ex}\nPlease run\n"
        "pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html"
    )


class OnnxVocosModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print("----------vocos----------")
        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)
        print()

    def __call__(self, x: np.ndarray):
        """
        Args:
          x: (N, feat_dim, num_frames)
        Returns:
          mag: (N, n_fft/2+1, num_frames)
          x: (N, n_fft/2+1, num_frames)
          y: (N, n_fft/2+1, num_frames)

        The complex spectrum is mag * (x + j*y)
        """
        assert x.ndim == 3, x.shape
        assert x.shape[0] == 1, x.shape

        mag, x, y = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
                self.model.get_outputs()[2].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )

        return mag, x, y


class OnnxHifiGANModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print("----------hifigan----------")
        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)
        print()

    def __call__(self, x: np.ndarray):
        """
        Args:
          x: (N, feat_dim, num_frames)
        Returns:
          audio: (N, num_samples)
        """
        assert x.ndim == 3, x.shape
        assert x.shape[0] == 1, x.shape

        audio = self.model.run(
            [self.model.get_outputs()[0].name],
            {
                self.model.get_inputs()[0].name: x,
            },
        )[0]
        # audio: (batch_size, num_samples)

        return audio


def load_tokens(filename):
    token2id = dict()
    with open(filename, encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 1:
                t = " "
                idx = int(fields[0])
            else:
                t, idx = line.strip().split()
            token2id[t] = int(idx)
    return token2id


class OnnxModel:
    def __init__(
        self,
        filename: str,
        tokens: str,
    ):
        self.token2id = load_tokens(tokens)
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        print(f"{self.model.get_modelmeta().custom_metadata_map}")
        metadata = self.model.get_modelmeta().custom_metadata_map
        self.sample_rate = int(metadata["sample_rate"])

        print("----------matcha----------")
        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)
        print()

    def __call__(self, x: np.ndim):
        """
        Args:
        """
        assert x.ndim == 2, x.shape
        assert x.shape[0] == 1, x.shape

        x_lengths = np.array([x.shape[1]], dtype=np.int64)

        noise_scale = np.array([1.0], dtype=np.float32)
        length_scale = np.array([1.0], dtype=np.float32)

        mel = self.model.run(
            [self.model.get_outputs()[0].name],
            {
                self.model.get_inputs()[0].name: x,
                self.model.get_inputs()[1].name: x_lengths,
                self.model.get_inputs()[2].name: noise_scale,
                self.model.get_inputs()[3].name: length_scale,
            },
        )[0]
        # mel: (batch_size, feat_dim, num_frames)

        return mel


def main():
    am = OnnxModel(
        filename="./matcha-icefall-en_US-ljspeech/model-steps-3.onnx",
        tokens="./matcha-icefall-en_US-ljspeech/tokens.txt",
    )
    vocoder = OnnxHifiGANModel("./hifigan_v2.onnx")
    vocos = OnnxVocosModel("./mel_spec_22khz_univ.onnx")

    text = "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
    tokens_list = phonemize_espeak(text, "en-us")
    print(tokens_list)
    tokens = []
    for t in tokens_list:
        tokens.extend(t)

    token_ids = []
    for t in tokens:
        if t not in am.token2id:
            print(f"Skip OOV '{t}'")
            continue
        token_ids.append(am.token2id[t])

    token_ids2 = [am.token2id["_"]] * (len(token_ids) * 2 + 1)
    token_ids2[1::2] = token_ids
    token_ids = token_ids2
    x = np.array([token_ids], dtype=np.int64)

    mel_start_t = dt.datetime.now()
    mel = am(x)
    mel_end_t = dt.datetime.now()

    print("mel", mel.shape)
    # mel:(1, 80, 78)

    vocos_start_t = dt.datetime.now()
    mag, x, y = vocos(mel)
    stft_result = knf.StftResult(
        real=(mag * x)[0].transpose().reshape(-1).tolist(),
        imag=(mag * y)[0].transpose().reshape(-1).tolist(),
        num_frames=mag.shape[2],
    )
    config = knf.StftConfig(
        n_fft=1024,
        hop_length=256,
        win_length=1024,
        window_type="hann",
        center=True,
        pad_mode="reflect",
        normalized=False,
    )
    istft = knf.IStft(config)
    audio_vocos = istft(stft_result)
    vocos_end_t = dt.datetime.now()

    audio_vocos = np.array(audio_vocos)
    #  audio = audio / 2
    print("vocos max/min", np.max(audio_vocos), np.min(audio_vocos))

    sf.write("vocos.wav", audio_vocos, am.sample_rate, "PCM_16")

    hifigan_start_t = dt.datetime.now()
    audio_hifigan = vocoder(mel)
    hifigan_end_t = dt.datetime.now()
    audio_hifigan = audio_hifigan.squeeze()

    print("hifigan max/min", np.max(audio_hifigan), np.min(audio_hifigan))

    sample_rate = am.sample_rate
    sf.write("hifigan-v2.wav", audio_hifigan, sample_rate, "PCM_16")

    am_t = (mel_end_t - mel_start_t).total_seconds()
    vocos_t = (vocos_end_t - vocos_start_t).total_seconds()
    hifigan_t = (hifigan_end_t - hifigan_start_t).total_seconds()

    mean_audio_duration = (
        (audio_vocos.shape[-1] + audio_hifigan.shape[-1]) / 2 / sample_rate
    )
    rtf_am = am_t / mean_audio_duration

    rtf_vocos = vocos_t * sample_rate / audio_vocos.shape[-1]
    rtf_hifigan = hifigan_t * sample_rate / audio_hifigan.shape[-1]

    print(
        "Audio duration for vocos {:.3f} s".format(audio_vocos.shape[-1] / sample_rate)
    )
    print(
        "Audio duration for hifigan {:.3f} s".format(
            audio_hifigan.shape[-1] / sample_rate
        )
    )
    print("Mean audio duration: {:.3f} s".format(mean_audio_duration))
    print("RTF for acoustic model {:.3f}".format(rtf_am))
    print("RTF for vocos {:.3f}".format(rtf_vocos))
    print("RTF for hifigan {:.3f}".format(rtf_hifigan))


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wasm/generate-tts.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    model_name: str
    hf: str  # huggingface space name
    ms: str  # modelscope space name
    cmd: str = ""


def get_models():
    models = [
        Model(
            model_name="vits-piper-de_DE-thorsten_emotional-medium",
            hf="k2-fsa/web-assembly-tts-sherpa-onnx-de",
            ms="k2-fsa/web-assembly-tts-sherpa-onnx-de",
            cmd="""
            pushd $model_name

            mv -v *.onnx ../
            mv -v tokens.txt ../
            mv -v espeak-ng-data ../
            popd


            git checkout .

            rm -rf $model_name
            git diff
            """,
        ),
        Model(
            model_name="vits-piper-en_US-libritts_r-medium",
            hf="k2-fsa/web-assembly-tts-sherpa-onnx-en",
            ms="k2-fsa/web-assembly-tts-sherpa-onnx-en",
            cmd="""
            pushd $model_name

            mv -v *.onnx ../
            mv -v tokens.txt ../
            mv -v espeak-ng-data ../
            popd


            git checkout .

            rm -rf $model_name
            git diff
            """,
        ),
        Model(
            model_name="matcha-icefall-zh-en",
            hf="k2-fsa/web-assembly-zh-en-tts-matcha",
            ms="csukuangfj/web-assembly-zh-en-tts-matcha",
            cmd="""
            pushd $model_name

            mv -v *.fst ../
            mv -v *.onnx ../
            mv -v tokens.txt ../
            mv -v lexicon.txt ../
            mv -v espeak-ng-data ../
            popd

            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-16khz-univ.onnx

            git checkout .
            sed -i.bak 's/let modelType = 0/let modelType = 1/g' ../sherpa-onnx-tts.js

            rm -rf $model_name
            git diff
            """,
        ),
        Model(
            model_name="matcha-icefall-zh-baker",
            hf="k2-fsa/web-assembly-zh-tts-matcha",
            ms="csukuangfj/web-assembly-zh-tts-matcha",
            cmd="""
            pushd $model_name

            mv -v *.fst ../
            mv -v *.onnx ../
            mv -v tokens.txt ../
            mv -v lexicon.txt ../
            popd

            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx


            git checkout .
            sed -i.bak 's/let modelType = 0/let modelType = 2/g' ../sherpa-onnx-tts.js

            rm -rf $model_name
            git diff
            """,
        ),
        Model(
            model_name="matcha-icefall-en_US-ljspeech",
            hf="k2-fsa/web-assembly-en-tts-matcha",
            ms="csukuangfj/web-assembly-en-tts-matcha",
            cmd="""
            pushd $model_name

            mv -v *.onnx ../
            mv -v tokens.txt ../
            mv -v espeak-ng-data ../
            popd

            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx


            git checkout .
            sed -i.bak 's/let modelType = 0/let modelType = 3/g' ../sherpa-onnx-tts.js

             rm -rf $model_name
             git diff
             """,
        ),
        Model(
            model_name="sherpa-onnx-zipvoice-distill-int8-zh-en-emilia",
            hf="k2-fsa/web-assembly-zh-en-tts-zipvoice",
            ms="csukuangfj/web-assembly-zh-en-tts-zipvoice",
            cmd="""
            pushd $model_name

            mv -v encoder.int8.onnx ../
            mv -v decoder.int8.onnx ../
            mv -v tokens.txt ../
            mv -v lexicon.txt ../
            mv -v espeak-ng-data ../
            popd

            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

            git checkout .
            sed -i.bak 's/let modelType = 0/let modelType = 4/g' ../sherpa-onnx-tts.js
            rm -rf $model_name
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-pocket-tts-int8-2026-01-26",
            hf="k2-fsa/web-assembly-en-tts-pocket",
            ms="csukuangfj/web-assembly-en-tts-pocket",
            cmd="""
            pushd $model_name

            mv -v lm_flow.int8.onnx ../
            mv -v lm_main.int8.onnx ../
            mv -v encoder.onnx ../
            mv -v decoder.int8.onnx ../
            mv -v text_conditioner.onnx ../
            mv -v vocab.json ../
            mv -v token_scores.json ../
            popd

            git checkout .
            sed -i.bak 's/let modelType = 0/let modelType = 5/g' ../sherpa-onnx-tts.js
            rm -rf $model_name
            git diff
            """,
        ),
    ]
    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./run-tts.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wasm/generate-vad-asr.py
================================================
#!/usr/bin/env python3

import argparse
from dataclasses import dataclass

import jinja2


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()


@dataclass
class Model:
    model_name: str
    hf: str  # huggingface space name
    ms: str  # modelscope space name
    short_name: str
    cmd: str = ""


def get_models():
    models = [
        Model(
            model_name="sherpa-onnx-whisper-tiny.en",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny",
            short_name="vad-asr-en-whisper_tiny",
            cmd="""
            pushd $model_name
            mv -v tiny.en-encoder.int8.onnx ../whisper-encoder.onnx
            mv -v tiny.en-decoder.int8.onnx ../whisper-decoder.onnx
            mv -v tiny.en-tokens.txt ../tokens.txt
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Whisper tiny.en supporting English 英文/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-en-int8",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-moonshine-tiny",
            short_name="vad-asr-en-moonshine_tiny",
            cmd="""
            pushd $model_name
            mv -v preprocess.onnx ../moonshine-preprocessor.onnx
            mv -v encode.int8.onnx ../moonshine-encoder.onnx
            mv -v uncached_decode.int8.onnx ../moonshine-uncached-decoder.onnx
            mv -v cached_decode.int8.onnx ../moonshine-cached-decoder.onnx
            mv -v tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine tiny supporting English 英文/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-tiny-en",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-tiny-en",
            short_name="vad-asr-moonshine-v2-tiny-en",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 tiny-en supporting English 英语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-ja-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-tiny-ja",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-tiny-ja",
            short_name="vad-asr-moonshine-v2-tiny-ja",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 tiny-ja supporting Japanese 日语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-tiny-ko-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-tiny-ko",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-tiny-ko",
            short_name="vad-asr-moonshine-v2-tiny-ko",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 tiny-ko supporting Korean 韩语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-en-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-en",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-en",
            short_name="vad-asr-moonshine-v2-base-en",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 base-en supporting English 英语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-zh-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-zh",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-zh",
            short_name="vad-asr-moonshine-v2-base-zh",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 base-zh supporting Chinese 普通话/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-ja-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-ja",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-ja",
            short_name="vad-asr-moonshine-v2-base-ja",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 base-ja supporting Japanese 日文/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-vi-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-vi",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-vi",
            short_name="vad-asr-moonshine-v2-base-vi",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 base-vi supporting Vietnamese 越南语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-es-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-es",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-es",
            short_name="vad-asr-moonshine-v2-base-es",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 base-es supporting Spanish 西班牙语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-ar-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-ar",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-ar",
            short_name="vad-asr-moonshine-v2-base-ar",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 base-ar supporting Arabic 阿拉伯语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-moonshine-base-uk-quantized-2026-02-27",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-uk",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-moonshine-v2-base-uk",
            short_name="vad-asr-moonshine-v2-base-uk",
            cmd="""
            pushd $model_name
            mv -v encoder_model.ort ../moonshine-encoder.ort
            mv -v decoder_model_merged.ort ../moonshine-merged-decoder.ort
            mv -v tokens.txt ../
            mv -v LICENSE ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Moonshine v2 base-uk supporting Ukrainian 乌克兰语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice",
            short_name="vad-asr-zh_en_ja_ko_cantonese-sense_voice_small",
            cmd="""
            pushd $model_name
            mv -v model.int8.onnx ../sense-voice.onnx
            mv -v tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/SenseVoice Small supporting English, Chinese, Japanese, Korean, Cantonese 中英日韩粤/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-2023-09-14",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer",
            short_name="vad-asr-zh_en-paraformer_large",
            cmd="""
            pushd $model_name
            mv -v model.int8.onnx ../paraformer.onnx
            mv -v tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Paraformer supporting Chinese, English 中英/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-small-2024-03-09",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small",
            short_name="vad-asr-zh_en-paraformer_small",
            cmd="""
            pushd $model_name
            mv -v model.int8.onnx ../paraformer.onnx
            mv -v tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Paraformer-small supporting Chinese, English 中英文/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-gigaspeech-2023-12-12",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech",
            short_name="vad-asr-en-zipformer_gigaspeech",
            cmd="""
            pushd $model_name
            mv encoder-epoch-30-avg-1.int8.onnx ../transducer-encoder.onnx
            mv decoder-epoch-30-avg-1.onnx ../transducer-decoder.onnx
            mv joiner-epoch-30-avg-1.int8.onnx ../transducer-joiner.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer supporting English 英语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-wenetspeech-20230615",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech",
            short_name="vad-asr-zh-zipformer_wenetspeech",
            cmd="""
            pushd $model_name
            mv -v data/lang_char/tokens.txt ../
            mv -v exp/encoder-epoch-12-avg-4.int8.onnx ../transducer-encoder.onnx
            mv -v exp/decoder-epoch-12-avg-4.onnx ../transducer-decoder.onnx
            mv -v exp/joiner-epoch-12-avg-4.int8.onnx ../transducer-joiner.onnx
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer supporting Chinese 中文/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer",
            short_name="vad-asr-ja-zipformer_reazonspeech",
            cmd="""
            pushd $model_name
            mv encoder-epoch-99-avg-1.int8.onnx ../transducer-encoder.onnx
            mv decoder-epoch-99-avg-1.onnx ../transducer-decoder.onnx
            mv joiner-epoch-99-avg-1.int8.onnx ../transducer-joiner.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer supporting Japanese 日语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-thai-2024-06-20",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer",
            short_name="vad-asr-th-zipformer_gigaspeech2",
            cmd="""
            pushd $model_name
            mv encoder-epoch-12-avg-5.int8.onnx ../transducer-encoder.onnx
            mv decoder-epoch-12-avg-5.onnx ../transducer-decoder.onnx
            mv joiner-epoch-12-avg-5.int8.onnx ../transducer-joiner.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer supporting Thai 泰语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech",
            short_name="vad-asr-zh-telespeech",
            cmd="""
            pushd $model_name
            mv model.int8.onnx ../telespeech.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/TeleSpeech-ASR supporting Chinese 多种中文方言/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-multi-lang-dophin-ctc",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-multi-lang-dophin-ctc",
            short_name="vad-asr-multi_lang-dolphin_ctc",
            cmd="""
            pushd $model_name
            mv model.int8.onnx ../dolphin.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's%Zipformer%<a href="https://github.com/DataoceanAI/Dolphin">Dolphin</a> (多种中文方言及非常多种语言)%g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc",
            short_name="vad-asr-zh-zipformer-ctc",
            cmd="""
            pushd $model_name
            mv model.int8.onnx ../zipformer-ctc.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer CTC supporting Chinese 中文/g' ../index.html
            git diff
            """,
        ),
    ]
    return models


def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)

    all_model_list = get_models()

    num_models = len(all_model_list)

    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")

    start = index * num_per_runner
    end = start + num_per_runner

    remaining = num_models - args.total * num_per_runner

    print(f"{index}/{total}: {start}-{end}/{num_models}")

    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")

    filename_list = [
        "./run-vad-asr.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)

        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wasm/run-tts.sh.in
================================================
#!/usr/bin/env bash
#
# Build WebAssembly APPs for huggingface spaces and modelscope spaces

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)


{% for model in model_list %}
model_name={{ model.model_name }}
hf_name={{ model.hf }}
ms_name={{ model.ms }}

pushd wasm/tts
git checkout .
rm -rf assets
mkdir assets
cd assets
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2
rm ${model_name}.tar.bz2

{{ model.cmd }}

popd

ls -lh wasm/tts/assets

rm -rf build-wasm-simd-tts/install
rm -rf build-wasm-simd-tts/wasm

./build-wasm-simd-tts.sh

dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-${model_name}
mv build-wasm-simd-tts/install/bin/wasm/tts $dst
ls -lh $dst
tar cjfv $dst.tar.bz2 ./$dst
ls -lh *.tar.bz2

git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"

export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false

rm -rf ms
git clone https://www.modelscope.cn/studios/$ms_name.git ms

cd ms
cp -v ../$dst/* .

git status
git lfs track "*.data"
git lfs track "*.wasm"
ls -lh

git add .
git commit -m "update model" || true
git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/$ms_name.git || true
cd ..
rm -rf ms

rm -rf huggingface

git clone https://huggingface.co/spaces/$hf_name huggingface
cd huggingface
cp -v ../$dst/* .

git status
git lfs track "*.data"
git lfs track "*.wasm"
ls -lh

git add .
git commit -m "update model" || true
git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/$hf_name main || true
cd ..
rm -rf huggingface
rm -rf $dst

ls -lh *.tar.bz2

{% endfor %}


================================================
FILE: scripts/wasm/run-vad-asr.sh.in
================================================
#!/usr/bin/env bash
#
# Build WebAssembly APPs for huggingface spaces and modelscope spaces

set -ex

log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)


{% for model in model_list %}
model_name={{ model.model_name }}
short_name={{ model.short_name }}
hf_name={{ model.hf }}
ms_name={{ model.ms }}

pushd wasm/vad-asr
git checkout .
rm -rf assets
mkdir assets
cd assets
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2
rm ${model_name}.tar.bz2

{{ model.cmd }}

popd

ls -lh wasm/vad-asr/assets

rm -rf build-wasm-simd-vad-asr/install
rm -rf build-wasm-simd-vad-asr/wasm

./build-wasm-simd-vad-asr.sh

dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-${short_name}
mv build-wasm-simd-vad-asr/install/bin/wasm/vad-asr $dst
ls -lh $dst
tar cjfv $dst.tar.bz2 ./$dst
ls -lh *.tar.bz2

git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"

export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false

if [ x"$ms_name" != x"" ]; then
  rm -rf ms
  git clone https://www.modelscope.cn/studios/$ms_name.git ms

  cd ms
  cp -v ../$dst/* .

  git status
  git lfs track "*.data"
  git lfs track "*.wasm"
  ls -lh

  git add .
  git commit -m "update model" || true
  git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/$ms_name.git || true
  cd ..
  rm -rf ms
fi

rm -rf huggingface

git clone https://huggingface.co/spaces/$hf_name huggingface
cd huggingface
cp -v ../$dst/* .

git status
git lfs track "*.data"
git lfs track "*.wasm"
ls -lh

git add .
git commit -m "update model" || true
git push https://csukuangfj2:$HF_TOKEN@huggingface.co/spaces/$hf_name main || true
cd ..
rm -rf huggingface
rm -rf $dst

ls -lh *.tar.bz2

{% endfor %}


================================================
FILE: scripts/wenet/README.md
================================================
# Introduction

This folder contains script for exporting models
from [wenet](https://github.com/wenet-e2e/wenet)
to onnx. You can use the exported models in sherpa-onnx.

Note that both **streaming** and **non-streaming** models are supported.

We only use the CTC branch. Rescore with the attention decoder
is not supported, though decoding with H, HL, and HLG is supported.


================================================
FILE: scripts/wenet/export-onnx-streaming.py
================================================
#!/usr/bin/env python3
# Copyright      2023  Xiaomi Corp.        (authors: Fangjun Kuang)

# pip install git+https://github.com/wenet-e2e/wenet.git
# pip install onnxruntime onnx pyyaml
# cp -a ~/open-source/wenet/wenet/transducer/search .
# cp -a ~/open-source//wenet/wenet/e_branchformer .
# cp -a ~/open-source/wenet/wenet/ctl_model .

import os
from typing import Dict

import onnx
import torch
import yaml
from onnxruntime.quantization import QuantType, quantize_dynamic

from wenet.utils.init_model import init_model


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    #  model = onnx.version_converter.convert_version(model, 21)

    onnx.save(model, filename)


class OnnxModel(torch.nn.Module):
    def __init__(self, encoder: torch.nn.Module, ctc: torch.nn.Module):
        super().__init__()
        self.encoder = encoder
        self.ctc = ctc

    def forward(
        self,
        x: torch.Tensor,
        offset: torch.Tensor,
        required_cache_size: torch.Tensor,
        attn_cache: torch.Tensor,
        conv_cache: torch.Tensor,
        attn_mask: torch.Tensor,
    ):
        """
        Args:
          x:
            A 3-D float32 tensor of shape (N, T, C). It supports only N == 1.
          offset:
            A scalar of dtype torch.int64.
          required_cache_size:
            A scalar of dtype torch.int64.
          attn_cache:
            A 4-D float32 tensor of shape (num_blocks, head, required_cache_size, encoder_output_size / head /2).
          conv_cache:
            A 4-D float32 tensor of shape (num_blocks, N, encoder_output_size, cnn_module_kernel - 1).
          attn_mask:
            A 3-D bool tensor of shape (N, 1, required_cache_size + chunk_size)
        Returns:
          Return a tuple of 3 tensors:
            - A 3-D float32 tensor of shape (N, T, C) containing log_probs
            - next_attn_cache
            - next_conv_cache
        """
        encoder_out, next_att_cache, next_conv_cache = self.encoder.forward_chunk(
            xs=x,
            offset=offset,
            required_cache_size=required_cache_size,
            att_cache=attn_cache,
            cnn_cache=conv_cache,
            att_mask=attn_mask,
        )
        log_probs = self.ctc.log_softmax(encoder_out)

        return log_probs, next_att_cache, next_conv_cache


class Foo:
    pass


@torch.no_grad()
def main():
    args = Foo()
    args.checkpoint = "./final.pt"
    config_file = "./train.yaml"

    with open(config_file, "r") as fin:
        configs = yaml.load(fin, Loader=yaml.FullLoader)
    torch_model, configs = init_model(args, configs)
    torch_model.eval()

    head = configs["encoder_conf"]["attention_heads"]
    num_blocks = configs["encoder_conf"]["num_blocks"]
    output_size = configs["encoder_conf"]["output_size"]
    cnn_module_kernel = configs["encoder_conf"].get("cnn_module_kernel", 1)

    right_context = torch_model.right_context()
    subsampling_factor = torch_model.encoder.embed.subsampling_rate
    chunk_size = 16
    left_chunks = 4

    decoding_window = (chunk_size - 1) * subsampling_factor + right_context + 1

    required_cache_size = chunk_size * left_chunks

    offset = required_cache_size

    attn_cache = torch.zeros(
        num_blocks,
        head,
        required_cache_size,
        output_size // head * 2,
        dtype=torch.float32,
    )

    attn_mask = torch.ones(1, 1, required_cache_size + chunk_size, dtype=torch.bool)
    attn_mask[:, :, :required_cache_size] = 0

    conv_cache = torch.zeros(
        num_blocks, 1, output_size, cnn_module_kernel - 1, dtype=torch.float32
    )

    sos = torch_model.sos_symbol()
    eos = torch_model.eos_symbol()

    onnx_model = OnnxModel(
        encoder=torch_model.encoder,
        ctc=torch_model.ctc,
    )
    filename = "model-streaming.onnx"

    N = 1
    T = decoding_window
    C = 80
    x = torch.rand(N, T, C, dtype=torch.float32)
    offset = torch.tensor([offset], dtype=torch.int64)
    required_cache_size = torch.tensor([required_cache_size], dtype=torch.int64)

    opset_version = 13
    torch.onnx.export(
        onnx_model,
        (x, offset, required_cache_size, attn_cache, conv_cache, attn_mask),
        filename,
        opset_version=opset_version,
        input_names=[
            "x",
            "offset",
            "required_cache_size",
            "attn_cache",
            "conv_cache",
            "attn_mask",
        ],
        output_names=["log_probs", "next_att_cache", "next_conv_cache"],
        dynamic_axes={
            "x": {0: "N", 1: "T"},
            "attn_cache": {2: "T"},
            "attn_mask": {2: "T"},
            "log_probs": {0: "N"},
            "new_attn_cache": {2: "T"},
        },
    )

    # https://wenet.org.cn/downloads?models=wenet&version=aishell_u2pp_conformer_exp.tar.gz
    url = os.environ.get("WENET_URL", "")
    meta_data = {
        "model_type": "wenet_ctc",
        "version": "1",
        "model_author": "wenet",
        "comment": "streaming",
        "url": "https://wenet.org.cn/downloads?models=wenet&version=aishell_u2pp_conformer_exp.tar.gz",
        "chunk_size": chunk_size,
        "left_chunks": left_chunks,
        "head": head,
        "num_blocks": num_blocks,
        "output_size": output_size,
        "cnn_module_kernel": cnn_module_kernel,
        "right_context": right_context,
        "subsampling_factor": subsampling_factor,
        "vocab_size": torch_model.ctc.ctc_lo.weight.shape[0],
    }
    add_meta_data(filename=filename, meta_data=meta_data)

    print("Generate int8 quantization models")

    filename_int8 = f"model-streaming.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wenet/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2023  Xiaomi Corp.        (authors: Fangjun Kuang)

# pip install git+https://github.com/wenet-e2e/wenet.git
# pip install onnxruntime onnx pyyaml
# cp -a ~/open-source/wenet/wenet/transducer/search .
# cp -a ~/open-source//wenet/wenet/e_branchformer .
# cp -a ~/open-source/wenet/wenet/ctl_model .

import os
from typing import Dict

import onnx
import torch
import yaml
from onnxruntime.quantization import QuantType, quantize_dynamic

from wenet.utils.init_model import init_model


class Foo:
    pass


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    #  model = onnx.version_converter.convert_version(model, 21)

    onnx.save(model, filename)


class OnnxModel(torch.nn.Module):
    def __init__(self, encoder: torch.nn.Module, ctc: torch.nn.Module):
        super().__init__()
        self.encoder = encoder
        self.ctc = ctc

    def forward(self, x, x_lens):
        """
        Args:
          x:
            A 3-D tensor of shape (N, T, C)
          x_lens:
            A 1-D tensor of shape (N,) containing valid lengths in x before
            padding. Its type is torch.int64
        """
        encoder_out, encoder_out_mask = self.encoder(
            x,
            x_lens,
            decoding_chunk_size=-1,
            num_decoding_left_chunks=-1,
        )
        log_probs = self.ctc.log_softmax(encoder_out)
        log_probs_lens = encoder_out_mask.int().squeeze(1).sum(1)

        return log_probs, log_probs_lens


@torch.no_grad()
def main():
    args = Foo()
    args.checkpoint = "./final.pt"
    config_file = "./train.yaml"

    with open(config_file, "r") as fin:
        configs = yaml.load(fin, Loader=yaml.FullLoader)
    torch_model, configs = init_model(args, configs)
    torch_model.eval()

    onnx_model = OnnxModel(encoder=torch_model.encoder, ctc=torch_model.ctc)
    filename = "model.onnx"

    N = 1
    T = 1000
    C = 80
    x = torch.rand(N, T, C, dtype=torch.float)
    x_lens = torch.full((N,), fill_value=T, dtype=torch.int64)

    # https://github.com/pytorch/pytorch/issues/114801
    opset_version = 13
    onnx_model = torch.jit.script(onnx_model)
    torch.onnx.export(
        onnx_model,
        (x, x_lens),
        filename,
        opset_version=opset_version,
        input_names=["x", "x_lens"],
        output_names=["log_probs", "log_probs_lens"],
        dynamic_axes={
            "x": {0: "N", 1: "T"},
            "x_lens": {0: "N"},
            "log_probs": {0: "N", 1: "T"},
            "log_probs_lens": {0: "N"},
        },
    )

    # https://wenet.org.cn/downloads?models=wenet&version=aishell_u2pp_conformer_exp.tar.gz
    url = os.environ.get("WENET_URL", "")
    meta_data = {
        "model_type": "wenet_ctc",
        "version": "1",
        "model_author": "wenet",
        "comment": "non-streaming",
        "subsampling_factor": torch_model.encoder.embed.subsampling_rate,
        "vocab_size": torch_model.ctc.ctc_lo.weight.shape[0],
        "url": url,
    }
    add_meta_data(filename=filename, meta_data=meta_data)

    print("Generate int8 quantization models")

    filename_int8 = f"model.int8.onnx"
    quantize_dynamic(
        model_input=filename,
        model_output=filename_int8,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wenet/test-onnx-streaming.py
================================================
#!/usr/bin/env python3
# Copyright      2023  Xiaomi Corp.        (authors: Fangjun Kuang)

import kaldi_native_fbank as knf
import onnxruntime as ort
import torch
import torchaudio
from torch.nn.utils.rnn import pad_sequence


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 4

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.model.get_modelmeta().custom_metadata_map
        self.left_chunks = int(meta["left_chunks"])
        self.num_blocks = int(meta["num_blocks"])
        self.chunk_size = int(meta["chunk_size"])
        self.head = int(meta["head"])
        self.output_size = int(meta["output_size"])
        self.cnn_module_kernel = int(meta["cnn_module_kernel"])
        self.right_context = int(meta["right_context"])
        self.subsampling_factor = int(meta["subsampling_factor"])

        self._init_cache()

    def _init_cache(self):
        required_cache_size = self.chunk_size * self.left_chunks

        self.attn_cache = torch.zeros(
            self.num_blocks,
            self.head,
            required_cache_size,
            self.output_size // self.head * 2,
            dtype=torch.float32,
        ).numpy()

        self.conv_cache = torch.zeros(
            self.num_blocks,
            1,
            self.output_size,
            self.cnn_module_kernel - 1,
            dtype=torch.float32,
        ).numpy()

        self.offset = torch.tensor([required_cache_size], dtype=torch.int64).numpy()

        self.required_cache_size = torch.tensor(
            [self.chunk_size * self.left_chunks], dtype=torch.int64
        ).numpy()

    def __call__(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
          x:
            A 2-D tensor of shape (T, C)
        Returns:
          Return a 2-D tensor of shape (T, C) containing log_probs.
        """
        attn_mask = torch.ones(
            1, 1, int(self.required_cache_size + self.chunk_size), dtype=torch.bool
        )
        chunk_idx = self.offset // self.chunk_size - self.left_chunks
        if chunk_idx < self.left_chunks:
            attn_mask[
                :, :, : int(self.required_cache_size - chunk_idx * self.chunk_size)
            ] = False

        log_probs, new_attn_cache, new_conv_cache = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
                self.model.get_outputs()[2].name,
            ],
            {
                self.model.get_inputs()[0].name: x.unsqueeze(0).numpy(),
                self.model.get_inputs()[1].name: self.offset,
                self.model.get_inputs()[2].name: self.required_cache_size,
                self.model.get_inputs()[3].name: self.attn_cache,
                self.model.get_inputs()[4].name: self.conv_cache,
                self.model.get_inputs()[5].name: attn_mask.numpy(),
            },
        )

        self.attn_cache = new_attn_cache
        self.conv_cache = new_conv_cache

        log_probs = torch.from_numpy(log_probs)

        self.offset += log_probs.shape[1]

        return log_probs.squeeze(0)


def get_features(test_wav_filename):
    wave, sample_rate = torchaudio.load(test_wav_filename)
    audio = wave[0].contiguous()  # only use the first channel
    if sample_rate != 16000:
        audio = torchaudio.functional.resample(
            audio, orig_freq=sample_rate, new_freq=16000
        )
    audio *= 32768

    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.mel_opts.num_bins = 80
    opts.frame_opts.snip_edges = False
    opts.mel_opts.debug_mel = False

    fbank = knf.OnlineFbank(opts)
    fbank.accept_waveform(16000, audio.numpy())
    frames = []
    for i in range(fbank.num_frames_ready):
        frames.append(torch.from_numpy(fbank.get_frame(i)))
    frames = torch.stack(frames)
    return frames


def main():
    model_filename = "./model-streaming.onnx"
    model = OnnxModel(model_filename)

    filename = "./0.wav"
    x = get_features(filename)

    padding = torch.zeros(50, 80)
    x = torch.cat([x, padding], dim=0)

    chunk_length = (
        (model.chunk_size - 1) * model.subsampling_factor + model.right_context + 1
    )
    chunk_length = int(chunk_length)
    chunk_shift = int(model.chunk_size * model.subsampling_factor)
    print(chunk_length, chunk_shift)

    num_frames = x.shape[0]
    n = (num_frames - chunk_length) // chunk_shift + 1
    tokens = []
    for i in range(n):
        start = i * chunk_shift
        end = start + chunk_length
        frames = x[start:end, :]
        log_probs = model(frames)

        indexes = log_probs.argmax(dim=1)
        indexes = torch.unique_consecutive(indexes)
        indexes = indexes[indexes != 0].tolist()
        if indexes:
            tokens.extend(indexes)

    id2word = dict()
    with open("./units.txt", encoding="utf-8") as f:
        for line in f:
            word, idx = line.strip().split()
            id2word[int(idx)] = word
    text = "".join([id2word[i] for i in tokens])
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wenet/test-onnx.py
================================================
#!/usr/bin/env python3
# Copyright      2023  Xiaomi Corp.        (authors: Fangjun Kuang)

import kaldi_native_fbank as knf
import onnxruntime as ort
import torch
import torchaudio
from torch.nn.utils.rnn import pad_sequence


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 4

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

    def __call__(self, x: torch.Tensor, x_lens: torch.Tensor) -> torch.Tensor:
        """
        Args:
          x:
            A 3-D tensor of shape (N, T, C)
          x_lens:
            A 1-D tensor of shape (N,). Its dtype is torch.int64
        Returns:
          Return a 3-D tensor of shape (N, T, C) containing log_probs.
        """
        log_probs, log_probs_lens = self.model.run(
            [self.model.get_outputs()[0].name, self.model.get_outputs()[1].name],
            {
                self.model.get_inputs()[0].name: x.numpy(),
                self.model.get_inputs()[1].name: x_lens.numpy(),
            },
        )
        return torch.from_numpy(log_probs), torch.from_numpy(log_probs_lens)


def get_features(test_wav_filename):
    wave, sample_rate = torchaudio.load(test_wav_filename)
    audio = wave[0].contiguous()  # only use the first channel
    if sample_rate != 16000:
        audio = torchaudio.functional.resample(
            audio, orig_freq=sample_rate, new_freq=16000
        )
    audio *= 32768

    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.mel_opts.num_bins = 80
    opts.frame_opts.snip_edges = False
    opts.mel_opts.debug_mel = False

    fbank = knf.OnlineFbank(opts)
    fbank.accept_waveform(16000, audio.numpy())
    frames = []
    for i in range(fbank.num_frames_ready):
        frames.append(torch.from_numpy(fbank.get_frame(i)))
    frames = torch.stack(frames)
    return frames


def main():
    model_filename = "./model.onnx"
    model = OnnxModel(model_filename)

    filename = "./0.wav"
    x = get_features(filename)
    x = x.unsqueeze(0)

    # Note: It supports only batch size == 1
    x_lens = torch.tensor([x.shape[1]], dtype=torch.int64)

    print(x.shape, x_lens)

    log_probs, log_probs_lens = model(x, x_lens)
    log_probs = log_probs[0]
    print(log_probs.shape)

    indexes = log_probs.argmax(dim=1)
    print(indexes)
    indexes = torch.unique_consecutive(indexes)
    indexes = indexes[indexes != 0].tolist()

    id2word = dict()
    with open("./units.txt", encoding="utf-8") as f:
        for line in f:
            word, idx = line.strip().split()
            id2word[int(idx)] = word
    text = "".join([id2word[i] for i in indexes])
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wespeaker/README.md
================================================
# Introduction

This folder contains script for adding meta data to onnx models from
https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md

You can use the models with metadata in sherpa-onnx.


**Caution**: You have to add model meta data to `*.onnx` since we plan
to support models from different frameworks.


================================================
FILE: scripts/wespeaker/add_meta_data.py
================================================
#!/usr/bin/env python3
# Copyright      2023  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
This script adds meta data to a model so that it can be used in sherpa-onnx.

Usage:
./add_meta_data.py --model ./voxceleb_resnet34.onnx  --language English
"""

import argparse
from pathlib import Path
from typing import Dict

import onnx
import onnxruntime


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the input onnx model. Example value: model.onnx",
    )

    parser.add_argument(
        "--language",
        type=str,
        required=True,
        help="""Supported language of the input model.
        Example value: Chinese, English.
        """,
    )

    parser.add_argument(
        "--url",
        type=str,
        default="https://github.com/wenet-e2e/wespeaker/blob/master/docs/pretrained.md",
        help="Where the model is downloaded",
    )

    parser.add_argument(
        "--comment",
        type=str,
        default="no comment",
        help="Comment about the model",
    )

    parser.add_argument(
        "--sample-rate",
        type=int,
        default=16000,
        help="Sample rate expected by the model",
    )

    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    onnx.save(model, filename)


def get_output_dim(filename) -> int:
    filename = str(filename)
    session_opts = onnxruntime.SessionOptions()
    session_opts.log_severity_level = 3  # error level
    sess = onnxruntime.InferenceSession(filename, session_opts)

    for i in sess.get_inputs():
        print(i)

    print("----------")

    for o in sess.get_outputs():
        print(o)

    print("----------")

    assert len(sess.get_inputs()) == 1
    assert len(sess.get_outputs()) == 1

    i = sess.get_inputs()[0]
    o = sess.get_outputs()[0]

    assert i.shape[:2] == ["B", "T"], i.shape
    assert o.shape[0] == "B"

    assert i.shape[2] == 80, i.shape

    return o.shape[1]


def main():
    args = get_args()
    model = Path(args.model)
    language = args.language
    url = args.url
    comment = args.comment
    sample_rate = args.sample_rate

    if not model.is_file():
        raise ValueError(f"{model} does not exist")

    assert len(language) > 0, len(language)
    assert len(url) > 0, len(url)

    output_dim = get_output_dim(model)

    # all models from wespeaker expect input samples in the range
    # [-32768, 32767]
    normalize_samples = 0

    meta_data = {
        "framework": "wespeaker",
        "language": language,
        "url": url,
        "comment": comment,
        "sample_rate": sample_rate,
        "output_dim": output_dim,
        "normalize_samples": normalize_samples,
    }
    print(meta_data)
    add_meta_data(filename=str(model), meta_data=meta_data)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wespeaker/test.py
================================================
#!/usr/bin/env python3
# Copyright      2023  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
This script computes speaker similarity score in the range [0-1]
of two wave files using a speaker embedding model.
"""
import argparse
import wave
from pathlib import Path

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
from numpy.linalg import norm


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Path to the input onnx model. Example value: model.onnx",
    )

    parser.add_argument(
        "--file1",
        type=str,
        required=True,
        help="Input wave 1",
    )

    parser.add_argument(
        "--file2",
        type=str,
        required=True,
        help="Input wave 2",
    )

    return parser.parse_args()


def read_wavefile(filename, expected_sample_rate: int = 16000) -> np.ndarray:
    """
    Args:
      filename:
        Path to a wave file, which must be of 16-bit and 16kHz.
     expected_sample_rate:
       Expected sample rate of the wave file.
    Returns:
      Return a 1-D float32 array containing audio samples. Each sample is in
      the range [-1, 1].
    """
    filename = str(filename)
    with wave.open(filename) as f:
        wave_file_sample_rate = f.getframerate()
        assert wave_file_sample_rate == expected_sample_rate, (
            wave_file_sample_rate,
            expected_sample_rate,
        )

        num_channels = f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_int16 = samples_int16.reshape(-1, num_channels)[:, 0]
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768

        return samples_float32


def compute_features(samples: np.ndarray, sample_rate: int) -> np.ndarray:
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.samp_freq = sample_rate
    opts.frame_opts.snip_edges = False

    opts.mel_opts.num_bins = 80
    opts.mel_opts.debug_mel = False

    fbank = knf.OnlineFbank(opts)
    fbank.accept_waveform(sample_rate, samples)
    fbank.input_finished()

    features = []
    for i in range(fbank.num_frames_ready):
        f = fbank.get_frame(i)
        features.append(f)
    features = np.stack(features, axis=0)

    return features


class OnnxModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
        )

        meta = self.model.get_modelmeta().custom_metadata_map
        self.normalize_samples = int(meta["normalize_samples"])
        self.sample_rate = int(meta["sample_rate"])
        self.output_dim = int(meta["output_dim"])

    def __call__(self, x: np.ndarray) -> np.ndarray:
        """
        Args:
          x:
            A 2-D float32 tensor of shape (T, C).
          y:
            A 1-D float32 tensor containing model output.
        """
        x = np.expand_dims(x, axis=0)

        return self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )[0][0]


def main():
    args = get_args()
    filename = Path(args.model)
    file1 = Path(args.file1)
    file2 = Path(args.file2)
    assert filename.is_file(), filename
    assert file1.is_file(), file1
    assert file2.is_file(), file2

    model = OnnxModel(filename)
    wave1 = read_wavefile(file1, model.sample_rate)
    wave2 = read_wavefile(file2, model.sample_rate)

    if not model.normalize_samples:
        wave1 = wave1 * 32768
        wave2 = wave2 * 32768

    features1 = compute_features(wave1, model.sample_rate)
    features2 = compute_features(wave2, model.sample_rate)

    output1 = model(features1)
    output2 = model(features2)

    similarity = np.dot(output1, output2) / (norm(output1) * norm(output2))
    print(f"similarity in the range [0-1]: {similarity}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wheel/README.md
================================================
# Introduction

This folder is for developers only.

## sherpa-onnx-core

It contains the scripts for building the package sherpa-onnx-core.

```
python3 setup.py bdist_wheel --plat-name=macosx_10_15_x86_64
python3 setup.py bdist_wheel --plat-name=macosx_11_0_arm64
python3 setup.py bdist_wheel --plat-name=macosx_11_0_universal2
python3 setup.py bdist_wheel --plat-name=macosx_10_15_universal2

python3 setup.py bdist_wheel --plat-name=win_amd64
python3 setup.py bdist_wheel --plat-name=win32

python3 setup.py bdist_wheel --plat-name=manylinux2014_x86_64
python3 setup.py bdist_wheel --plat-name=manylinux2014_aarch64
python3 setup.py bdist_wheel --plat-name=linux_armv7l
```

## sherpa-onnx-bin


================================================
FILE: scripts/wheel/patch_wheel.py
================================================
#!/usr/bin/env python3
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)

import argparse
import glob
import shutil
import subprocess
import sys
from pathlib import Path


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--in-dir",
        type=Path,
        required=True,
        help="Input directory.",
    )

    parser.add_argument(
        "--out-dir",
        type=Path,
        required=True,
        help="Output directory.",
    )
    return parser.parse_args()


def process(out_dir: Path, whl: Path):
    tmp_dir = out_dir / "tmp"
    subprocess.check_call(f"unzip {whl} -d {tmp_dir}", shell=True)
    if "cp37" in str(whl):
        py_version = "3.7"
    elif "cp38" in str(whl):
        py_version = "3.8"
    elif "cp39" in str(whl):
        py_version = "3.9"
    elif "cp310" in str(whl):
        py_version = "3.10"
    elif "cp311" in str(whl):
        py_version = "3.11"
    elif "cp312" in str(whl):
        py_version = "3.12"
    elif "cp313" in str(whl):
        py_version = "3.13"
    elif "cp314" in str(whl):
        py_version = "3.14"
    elif "py3-none" in str(whl):
        py_version = None
    else:
        assert False, f"Unknown python version in {whl}"

    if py_version:
        rpath_list = [
            f"$ORIGIN/../lib/python{py_version}/site-packages/sherpa_onnx/lib",
            f"$ORIGIN/../lib/python{py_version}/dist-packages/sherpa_onnx/lib",
            #
            f"$ORIGIN/../lib/python{py_version}/site-packages/sherpa_onnx/lib64",
            f"$ORIGIN/../lib/python{py_version}/dist-packages/sherpa_onnx/lib64",
            #
            f"$ORIGIN/../lib/python{py_version}/site-packages/sherpa_onnx.libs",
        ]
    else:
        rpath_list = []
        for p in ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]:
            rpath_list.extend(
                [
                    f"$ORIGIN/../lib/python{p}/site-packages/sherpa_onnx/lib",
                    f"$ORIGIN/../lib/python{p}/dist-packages/sherpa_onnx/lib",
                ]
            )

    rpaths = ":".join(rpath_list)

    for filename in glob.glob(f"{tmp_dir}/sherpa_onnx*data/data/bin/*", recursive=True):
        print(filename)
        existing_rpath = (
            subprocess.check_output(["patchelf", "--print-rpath", filename])
            .decode()
            .strip()
        )
        target_rpaths = rpaths + ":" + existing_rpath
        subprocess.check_call(
            f"patchelf --force-rpath --set-rpath '{target_rpaths}' {filename}",
            shell=True,
        )

    outwheel = Path(shutil.make_archive(whl, "zip", tmp_dir))
    Path(outwheel).rename(out_dir / whl.name)

    shutil.rmtree(tmp_dir)


def main():
    args = get_args()
    print(args)
    in_dir = args.in_dir
    out_dir = args.out_dir
    out_dir.mkdir(exist_ok=True, parents=True)

    for whl in in_dir.glob("*.whl"):
        process(out_dir, whl)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wheel/sherpa-onnx-bin/setup.py
================================================
import glob
import platform

from setuptools import setup


def is_windows():
    return platform.system() == "Windows"


bin_files = glob.glob("bin/*")
print("bin_files", bin_files)

setup(
    name="sherpa-onnx-bin",
    version="1.12.31",
    description="Binary executables for sherpa-onnx",
    author="The sherpa-onnx development team",
    url="https://github.com/k2-fsa/sherpa-onnx",
    author_email="dpovey@gmail.com",
    zip_safe=False,
    license="Apache 2.0",
    packages=[],
    data_files=[("Scripts", bin_files) if is_windows() else ("bin", bin_files)],
    install_requires=[
        "sherpa-onnx-core==1.12.31",
    ],
    classifiers=[
        "Programming Language :: Python :: 3",
        "Operating System :: Microsoft :: Windows",
        "Operating System :: POSIX :: Linux",
        "Operating System :: MacOS :: MacOS X",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
    ],
)


================================================
FILE: scripts/wheel/sherpa-onnx-core/.gitignore
================================================


================================================
FILE: scripts/wheel/sherpa-onnx-core/MANIFEST.in
================================================
recursive-include sherpa_onnx/lib *
recursive-include sherpa_onnx/include *


================================================
FILE: scripts/wheel/sherpa-onnx-core/setup.py
================================================
import platform

from setuptools import setup


def is_windows():
    return platform.system() == "Windows"


def get_binaries():
    if not is_windows():
        return None
    libs = [
        "onnxruntime.dll",
        "sherpa-onnx-c-api.dll",
        "sherpa-onnx-cxx-api.dll",
        "sherpa-onnx-c-api.lib",
        "sherpa-onnx-cxx-api.lib",
    ]
    prefix = "./sherpa_onnx/lib"
    return [f"{prefix}/{lib}" for lib in libs]


setup(
    name="sherpa-onnx-core",
    version="1.12.31",
    description="Core shared libraries for sherpa-onnx",
    packages=["sherpa_onnx"],
    include_package_data=True,
    data_files=[("Scripts", get_binaries())] if get_binaries() else None,
    author="The sherpa-onnx development team",
    url="https://github.com/k2-fsa/sherpa-onnx",
    author_email="dpovey@gmail.com",
    zip_safe=False,
    license="Apache-2.0",
    classifiers=[
        "Programming Language :: Python :: 3",
        "Operating System :: Microsoft :: Windows",
        "Operating System :: POSIX :: Linux",
        "Operating System :: MacOS :: MacOS X",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
    ],
)


================================================
FILE: scripts/wheel/sherpa-onnx-core/sherpa_onnx/__main__.py
================================================
import sys
from . import _info


def main():
    args = sys.argv[1:]
    if not args:
        print(
            "Usage: python3 -m sherpa_onnx [--cflags|--c-api-libs|--c-api-libs-only-L|--c-api-libs-only-l|--cxx-api-libs|--cxx-api-libs-only-L|--cxx-api-libs-only-l]"
        )
        sys.exit(1)

    if "--cflags" in args:
        print(f"-I{_info.get_include_dir()}")
    elif "--c-api-libs" in args:
        lib_flags = " ".join(f"-l{lib}" for lib in _info.get_c_api_libs())
        print(f"-L{_info.get_libs_dir()} {lib_flags}")
    elif "--c-api-libs-only-L" in args:
        print(f"-L{_info.get_libs_dir()}")
    elif "--c-api-libs-only-l" in args:
        print(" ".join(f"-l{lib}" for lib in _info.get_c_api_libs()))
    elif "--cxx-api-libs" in args:
        lib_flags = " ".join(f"-l{lib}" for lib in _info.get_cxx_api_libs())
        print(f"-L{_info.get_libs_dir()} {lib_flags}")
    elif "--cxx-api-libs-only-L" in args:
        print(f"-L{_info.get_libs_dir()}")
    elif "--cxx-api-libs-only-l" in args:
        print(" ".join(f"-l{lib}" for lib in _info.get_cxx_api_libs()))
    else:
        print("Unknown option:", args[0])
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/wheel/sherpa-onnx-core/sherpa_onnx/_info.py
================================================
from pathlib import Path
from typing import List

_pkg_dir = Path(__file__).parent
libs_dir = _pkg_dir / "lib"
include_dir = _pkg_dir / "include"

# List of libraries (without "lib" prefix, without extension)
# Adjust to match your actual .so/.dll/.dylib files
onnxruntime_lib = ["onnxruntime"]
c_lib = ["sherpa-onnx-c-api"] + onnxruntime_lib
cxx_lib = ["sherpa-onnx-cxx-api"] + c_lib


def get_include_dir() -> str:
    return str(include_dir)


def get_libs_dir() -> str:
    return str(libs_dir)


def get_c_api_libs() -> List[str]:
    return c_lib


def get_cxx_api_libs() -> List[str]:
    return cxx_lib


================================================
FILE: scripts/whisper/.gitignore
================================================
*.onnx
*.config
*.ort
*-tokens.txt
*.bias
*.weights
*.weight
*.*embedding
_Const*
onnx__*


================================================
FILE: scripts/whisper/README.md
================================================
# Introduction

This folder contains code showing how to convert [Whisper][whisper] to onnx
and use onnxruntime to replace PyTorch for speech recognition.

You can use [sherpa-onnx][sherpa-onnx] to run the converted model.

Please see
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html
for details.

## Finding Alignment Heads for Word Timestamps

The `export-onnx-with-attention.py` script exports Whisper models with
cross-attention weights for word-level timestamps. It requires knowing which
attention heads are "alignment heads" - heads that show monotonically increasing
attention patterns useful for aligning audio to text.

For standard OpenAI Whisper models, alignment heads are defined in the
`ALIGNMENT_HEADS` dict in the export script. For new or custom models (like
distil-whisper variants), you can discover alignment heads using:

```bash
python find_alignment_heads.py --model <model-name> --audio <test-audio.wav>
```

This script analyzes all attention heads and ranks them by:
- **Monotonicity**: Whether attention peaks move forward as tokens are decoded
- **Diagonal score**: Correlation with expected diagonal attention pattern

Example output:
```
Top 15 alignment head candidates:
------------------------------------------------------------
 Layer   Head    Monotonic     Diagonal     Combined
------------------------------------------------------------
     3      2        0.846        0.985        0.915
     0      0        0.962        0.617        0.789
     ...
```

Heads with high combined scores (>0.7) are good candidates. A single head with
a very high diagonal score (>0.9) is often sufficient for accurate timestamps.

[whisper]: https://github.com/openai/whisper
[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx


================================================
FILE: scripts/whisper/ascend-npu/test_om.py
================================================
#!/usr/bin/env python3
# Copyright    2026  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
Usage example:

./test_om.py \
  --encoder ./tiny.en-encoder.om \
  --decoder ./tiny.en-decoder.om \
  --tokens ./tiny.en-tokens.txt \
  --wav  ./test_wavs/0.wav
"""

import argparse
import base64
from typing import List

import kaldi_native_fbank as knf
import librosa
import numpy as np
from ais_bench.infer.interface import InferSession


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--encoder",
        type=str,
        required=True,
        help="Path to the encoder",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        required=True,
        help="Path to the decoder",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to the tokens",
    )

    parser.add_argument(
        "--wav",
        type=str,
        required=True,
        help="Path to the test wav",
    )

    return parser.parse_args()


def causal_mask_1d(n: int, L: int):
    """
    Returns a 1-D int mask of shape (L,) with:
      0 -> allowed
      1 -> masked (will be converted to -inf later)
    """
    mask = np.ones((L,), dtype=np.int32)
    if n > 0:
        mask[:n] = 0
    return mask


def load_audio(filename: str) -> np.ndarray:
    samples, _ = librosa.load(filename, sr=16000)

    samples = np.ascontiguousarray(samples)
    return samples


def compute_features(samples: np.ndarray, dim: int = 80) -> np.ndarray:
    """
    Returns:
      Return a 1-D float32 tensor of shape (1, 80, 3000) containing the features.
    """
    features = []
    opts = knf.WhisperFeatureOptions()
    opts.dim = dim
    online_whisper_fbank = knf.OnlineWhisperFbank(opts)
    online_whisper_fbank.accept_waveform(16000, samples)
    online_whisper_fbank.input_finished()

    features = np.stack(
        [
            online_whisper_fbank.get_frame(i)
            for i in range(online_whisper_fbank.num_frames_ready)
        ]
    )
    log_spec = np.log10(np.clip(features, a_min=1e-10, a_max=None))
    log_spec = np.maximum(log_spec, log_spec.max() - 8.0)
    mel = (log_spec + 4.0) / 4.0
    num_frames = mel.shape[0]
    target = 3000
    if num_frames < target:
        mel = np.pad(
            mel,
            pad_width=((0, target - num_frames), (0, 0)),
            mode="constant",
            constant_values=0,
        )

    mel = np.expand_dims(mel.T, axis=0)
    mel = np.ascontiguousarray(mel)

    return mel


def load_tokens(filename):
    tokens = dict()
    with open(filename, "r") as f:
        for line in f:
            t, i = line.split()
            tokens[int(i)] = t
    return tokens


class OmModel:
    def __init__(self, encoder: str, decoder: str):
        self.encoder = InferSession(device_id=0, model_path=encoder, debug=False)
        self.decoder = InferSession(device_id=0, model_path=decoder, debug=False)

        name = self.encoder.get_inputs()[0].name

        if ".en" in name:
            self.sot_sequence = [50257, 50362]
            self.eot = 50256
        else:
            self.sot_sequence = [50258, 50259, 50359, 50363]
            self.eot = 50257

        if "tiny" in name:
            self.n_text_layer = 4
            self.n_text_ctx = 448
            self.n_text_state = 384
        elif "base" in name:
            self.n_text_layer = 6
            self.n_text_ctx = 448
            self.n_text_state = 512
        elif "small" in name:
            self.n_text_layer = 12
            self.n_text_ctx = 448
            self.n_text_state = 768
        elif "medium" in name:
            self.n_text_layer = 24
            self.n_text_ctx = 448
            self.n_text_state = 1024
        else:
            assert False, f"Unsupported encoder input {name}"

        print("---encoder---")
        for i in self.encoder.get_inputs():
            print(i.name, i.datatype, i.shape)

        print("-----")

        for i in self.encoder.get_outputs():
            print(i.name, i.datatype, i.shape)

        print("---decoder---")
        for i in self.decoder.get_inputs():
            print(i.name, i.datatype, i.shape)

        print("-----")

        for i in self.decoder.get_outputs():
            print(i.name, i.datatype, i.shape)

    def get_self_cache(self) -> List[np.ndarray]:
        self_cache = []
        batch_size = 1
        for i in range(self.n_text_layer):
            k = np.zeros(
                (batch_size, self.n_text_ctx, self.n_text_state), dtype=np.float32
            )
            v = np.zeros(
                (batch_size, self.n_text_ctx, self.n_text_state), dtype=np.float32
            )
            self_cache.extend([k, v])
        return self_cache

    def run_encoder(self, x: np.ndarray):
        """
        Args:
          x: (1, 80, 3000), np.float32
        Returns:
          cross_kv:
           - (k, v) for layer 0
           - (k, v) for layer 1
           - (k, v) for layer 2
           - (k, v) for layer 3
        """
        out = self.encoder.infer([x])
        return out

    def run_decoder(self, tokens: np.ndarray, self_kv, cross_kv, offset, mask):
        """
        Args:
          tokens: (1, 1), np.int32
          offset: (1,), np.int32
          mask: (model.n_text_ctx,), np.int32
        Returns:
          logit: (1, 1, vocab_size)
          this_self_kv
        """
        return self.decoder.infer([tokens] + self_kv + cross_kv + [offset, mask])


def main():
    args = get_args()
    print(vars(args))
    samples = load_audio(args.wav)
    features = compute_features(samples)
    print("features", features.shape)

    model = OmModel(args.encoder, args.decoder)

    cross_kv = model.run_encoder(features)

    self_kv = model.get_self_cache()

    offset = np.array([0], dtype=np.int32)
    for t in model.sot_sequence:
        token = np.array([[t]], dtype=np.int32)  # sot
        mask = causal_mask_1d(offset.item(), model.n_text_ctx)
        print(t, model.sot_sequence, token, mask.shape, len(cross_kv), len(self_kv))

        out = model.run_decoder(
            tokens=token, self_kv=self_kv, cross_kv=cross_kv, offset=offset, mask=mask
        )

        for i in range(1, len(out)):
            self_kv[i - 1][:, offset.item() : offset.item() + 1, :] = out[i]

        offset += 1

    idx = out[0][0, 0].argmax()

    eot = model.eot

    ans = []

    while idx != eot and offset.item() < 100:
        ans.append(idx)
        token = np.array([[idx]], dtype=np.int32)

        mask = causal_mask_1d(offset.item(), model.n_text_ctx)

        out = model.run_decoder(
            tokens=token, self_kv=self_kv, cross_kv=cross_kv, offset=offset, mask=mask
        )

        for i in range(1, len(out)):
            self_kv[i - 1][:, offset.item() : offset.item() + 1, :] = out[i]

        offset += 1
        idx = out[0][0, 0].argmax()

    print(ans)
    id2token = load_tokens(args.tokens)

    s = b""
    for i in ans:
        if i in id2token:
            s += base64.b64decode(id2token[i])

    print(s.decode().strip())
    return


if __name__ == "__main__":
    main()


================================================
FILE: scripts/whisper/export-onnx-with-attention.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Posit Software, PBC
# flake8: noqa

"""
Export Whisper ONNX models with cross-attention weights for word-level timestamps.

This script exports Whisper models that include cross-attention weights from
alignment heads as an additional decoder output. These weights can be used
with Dynamic Time Warping (DTW) to compute word-level timestamps.

Based on the original export-onnx.py script.

Usage:
  python export-onnx-with-attention.py --model tiny

The exported decoder will have 4 outputs instead of 3:
  - logits
  - out_n_layer_self_k_cache
  - out_n_layer_self_v_cache
  - cross_attention_weights  (NEW: shape [n_alignment_heads, n_audio_ctx])
"""

import argparse
import importlib.util
import os
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import onnx
import torch
import torch.nn.functional as F
from onnxruntime.quantization import QuantType, quantize_dynamic
from torch import Tensor, nn

import whisper
from whisper.model import (
    MultiHeadAttention,
    ResidualAttentionBlock,
    TextDecoder,
)

from export_onnx import add_meta_data, load_model, AudioEncoderTensorCache


# Sentinel value indicating alignment heads should be read from model metadata
USE_MODEL_METADATA = True

# Alignment heads for each model variant.
# For official OpenAI models, we use USE_MODEL_METADATA to read from the model.
# For distil-whisper models, we use empirically-determined heads since their
# metadata includes all heads in certain layers rather than curated ones.
ALIGNMENT_HEADS = {
    # TODO: ["medium-aishell"]
    # Official OpenAI models - trust their metadata
    "tiny.en": USE_MODEL_METADATA,
    "tiny": USE_MODEL_METADATA,
    "base.en": USE_MODEL_METADATA,
    "base": USE_MODEL_METADATA,
    "small.en": USE_MODEL_METADATA,
    "small": USE_MODEL_METADATA,
    "medium.en": USE_MODEL_METADATA,
    "medium": USE_MODEL_METADATA,
    "large-v1": USE_MODEL_METADATA,
    "large-v2": USE_MODEL_METADATA,
    "large-v3": USE_MODEL_METADATA,
    "large": USE_MODEL_METADATA,
    "turbo": USE_MODEL_METADATA,
    # Distil-whisper models (alignment heads discovered empirically)
    # distil-small.en has 4 decoder layers; head (3,2) has 0.985 diagonal score
    "distil-small.en": [(3, 2)],
    # distil-medium.en has 2 decoder layers; head (1,11) has 0.804 diagonal score
    "distil-medium.en": [(1, 11)],
    # distil-large-v2 has 2 decoder layers; head (1,12) has 0.806 diagonal score
    "distil-large-v2": [(1, 12)],
    # distil-large-v3 has 2 decoder layers; head (1,3) has 0.623 diagonal score
    "distil-large-v3": [(1, 3)],
    # distil-large-v3.5 has 2 decoder layers; head (1,3) has 0.483 diagonal score
    "distil-large-v3.5": [(1, 3)],
}

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        choices=list(ALIGNMENT_HEADS.keys()),
        help="Whisper model name (must have known alignment heads)",
    )
    return parser.parse_args()


def extract_alignment_heads_from_model(model) -> List[Tuple[int, int]]:
    """Extract alignment heads from model metadata.

    Official OpenAI whisper models store alignment heads as a sparse boolean
    tensor with shape (n_layers, n_heads) where True indicates an alignment head.

    Returns:
        List of (layer, head) tuples.

    Raises:
        ValueError: If alignment heads cannot be extracted from model.
    """
    if not hasattr(model, "alignment_heads") or model.alignment_heads is None:
        raise ValueError("Model does not have alignment_heads metadata")

    ah = model.alignment_heads
    if not hasattr(ah, "indices"):
        raise ValueError("Model alignment_heads is not a sparse tensor")

    indices = ah.indices()
    return list(zip(indices[0].tolist(), indices[1].tolist()))


def get_alignment_heads(name: str, model) -> List[Tuple[int, int]]:
    """Get alignment heads for a model.

    If ALIGNMENT_HEADS[name] is USE_MODEL_METADATA, alignment heads are read
    from the model's metadata. Otherwise, the explicit list is used.

    Args:
        name: Model name
        model: Loaded whisper model

    Returns:
        List of (layer, head) tuples for alignment heads.

    Raises:
        ValueError: If no alignment heads can be determined for the model.
    """
    if name not in ALIGNMENT_HEADS:
        raise ValueError(
            f"No alignment heads defined for model '{name}'. "
            f"Supported models: {', '.join(sorted(ALIGNMENT_HEADS.keys()))}"
        )

    heads = ALIGNMENT_HEADS[name]

    if heads is USE_MODEL_METADATA:
        print("Reading alignment heads from model metadata")
        return extract_alignment_heads_from_model(model)
    else:
        print("Using alignment heads from ALIGNMENT_HEADS table")
        return heads


def convert_tokens(name: str, model):
    """Convert and save tokens file."""
    whisper_dir = Path(whisper.__file__).parent
    multilingual = model.is_multilingual
    tokenizer = (
        whisper_dir
        / "assets"
        / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken")
    )
    if not tokenizer.is_file():
        raise ValueError(f"Cannot find {tokenizer}")

    with open(tokenizer, "r") as f:
        contents = f.read()
        tokens = {
            token: int(rank)
            for token, rank in (line.split() for line in contents.splitlines() if line)
        }

    output_path = f"{name}-tokens.txt"
    with open(output_path, "w") as f:
        for t, i in tokens.items():
            f.write(f"{t} {i}\n")


# =============================================================================
# Attention-enabled decoder classes
# =============================================================================


class MultiHeadAttentionCrossWithWeights(nn.Module):
    """Cross-attention that returns both output and attention weights."""

    def __init__(
        self,
        inMultiHeadAttention: MultiHeadAttention,
        layer_index: int,
        alignment_heads: List[Tuple[int, int]],
    ):
        super().__init__()
        self.multiHeadAttention = inMultiHeadAttention
        self.layer_index = layer_index
        # Find which heads in this layer are alignment heads
        self.alignment_head_indices = [
            head_idx for (layer_idx, head_idx) in alignment_heads
            if layer_idx == layer_index
        ]
        self.n_head = inMultiHeadAttention.n_head

    def forward(
        self,
        x: Tensor,
        k: Tensor,
        v: Tensor,
    ) -> Tuple[Tensor, Optional[Tensor]]:
        q = self.multiHeadAttention.query(x)

        # Compute attention weights manually (don't use SDPA)
        n_batch, n_ctx, n_state = q.shape
        scale = (n_state // self.n_head) ** -0.25

        q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
        k_reshaped = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
        v_reshaped = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)

        # Compute QK^T with scaling
        qk = (q * scale) @ (k_reshaped * scale).transpose(-1, -2)
        qk = qk.float()

        # Softmax to get attention weights
        w = F.softmax(qk, dim=-1).to(q.dtype)

        # Compute output
        out = (w @ v_reshaped).permute(0, 2, 1, 3).flatten(start_dim=2)
        out = self.multiHeadAttention.out(out)

        # Extract alignment head weights if this layer has any
        if self.alignment_head_indices:
            # w shape: (batch, n_head, n_ctx, n_audio_ctx)
            # Select only the alignment heads for this layer
            # Output shape: (batch, n_alignment_heads, n_ctx, n_audio_ctx)
            alignment_weights = w[:, self.alignment_head_indices, :, :]
        else:
            alignment_weights = None

        return out, alignment_weights


class MultiHeadAttentionSelfManual(nn.Module):
    """Self-attention with KV cache support and manual attention computation."""

    def __init__(self, inMultiHeadAttention: MultiHeadAttention):
        super().__init__()
        self.multiHeadAttention = inMultiHeadAttention
        self.n_head = inMultiHeadAttention.n_head

    def forward(
        self,
        x: Tensor,
        k_cache: Tensor,
        v_cache: Tensor,
        mask: Tensor,
    ):
        q = self.multiHeadAttention.query(x)
        k = self.multiHeadAttention.key(x)
        v = self.multiHeadAttention.value(x)

        k_cache[:, -k.shape[1] :, :] = k
        v_cache[:, -v.shape[1] :, :] = v

        # Manual attention computation (avoid SDPA for ONNX compatibility)
        n_batch, n_ctx, n_state = q.shape
        scale = (n_state // self.n_head) ** -0.25

        q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
        k_reshaped = k_cache.view(*k_cache.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
        v_reshaped = v_cache.view(*v_cache.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)

        qk = (q * scale) @ (k_reshaped * scale).transpose(-1, -2)
        if mask is not None:
            qk = qk + mask[:n_ctx, :n_ctx]
        qk = qk.float()

        w = F.softmax(qk, dim=-1).to(q.dtype)
        out = (w @ v_reshaped).permute(0, 2, 1, 3).flatten(start_dim=2)

        return self.multiHeadAttention.out(out), k_cache, v_cache


class ResidualAttentionBlockWithWeights(nn.Module):
    """Residual attention block that returns cross-attention weights."""

    def __init__(
        self,
        inResidualAttentionBlock: ResidualAttentionBlock,
        layer_index: int,
        alignment_heads: List[Tuple[int, int]],
    ):
        super().__init__()
        self.originalBlock = inResidualAttentionBlock
        self.attn = MultiHeadAttentionSelfManual(inResidualAttentionBlock.attn)
        self.cross_attn = (
            MultiHeadAttentionCrossWithWeights(
                inResidualAttentionBlock.cross_attn,
                layer_index,
                alignment_heads,
            )
            if inResidualAttentionBlock.cross_attn
            else None
        )

    def forward(
        self,
        x: Tensor,
        self_k_cache: Tensor,
        self_v_cache: Tensor,
        cross_k: Tensor,
        cross_v: Tensor,
        mask: Tensor,
    ) -> Tuple[Tensor, Tensor, Tensor, Optional[Tensor]]:
        self_attn_x, self_k_cache_updated, self_v_cache_updated = self.attn(
            self.originalBlock.attn_ln(x), self_k_cache, self_v_cache, mask=mask
        )
        x = x + self_attn_x

        cross_attention_weights = None
        if self.cross_attn:
            cross_out, cross_attention_weights = self.cross_attn(
                self.originalBlock.cross_attn_ln(x), cross_k, cross_v
            )
            x = x + cross_out

        x = x + self.originalBlock.mlp(self.originalBlock.mlp_ln(x))
        return x, self_k_cache_updated, self_v_cache_updated, cross_attention_weights


class TextDecoderWithAttention(nn.Module):
    """Text decoder that outputs cross-attention weights from alignment heads."""

    def __init__(
        self,
        inTextDecoder: TextDecoder,
        in_n_ctx: int,
        alignment_heads: List[Tuple[int, int]],
    ):
        super().__init__()
        self.textDecoder = inTextDecoder
        self.n_ctx = in_n_ctx
        self.alignment_heads = alignment_heads

        self.blocks = nn.ModuleList()
        for i, original_block in enumerate(self.textDecoder.blocks):
            self.blocks.append(
                ResidualAttentionBlockWithWeights(original_block, i, alignment_heads)
            )

    def forward(
        self,
        tokens: Tensor,
        n_layer_self_k_cache: Tensor,
        n_layer_self_v_cache: Tensor,
        n_layer_cross_k: Tensor,
        n_layer_cross_v: Tensor,
        offset: Tensor,
    ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
        x = (
            self.textDecoder.token_embedding(tokens)
            + self.textDecoder.positional_embedding[
                offset[0] : offset[0] + tokens.shape[-1]
            ]
        )
        x = x.to(n_layer_cross_k[0].dtype)

        # Collect attention weights from alignment heads across all layers
        all_attention_weights = []

        for i, block in enumerate(self.blocks):
            self_k_cache = n_layer_self_k_cache[i, :, : offset[0] + tokens.shape[-1], :]
            self_v_cache = n_layer_self_v_cache[i, :, : offset[0] + tokens.shape[-1], :]

            x, self_k_cache, self_v_cache, attn_weights = block(
                x,
                self_k_cache=self_k_cache,
                self_v_cache=self_v_cache,
                cross_k=n_layer_cross_k[i],
                cross_v=n_layer_cross_v[i],
                mask=self.textDecoder.mask,
            )

            n_layer_self_k_cache[i, :, : offset[0] + tokens.shape[-1], :] = self_k_cache
            n_layer_self_v_cache[i, :, : offset[0] + tokens.shape[-1], :] = self_v_cache

            if attn_weights is not None:
                all_attention_weights.append(attn_weights)

        x = self.textDecoder.ln(x)

        logits = (
            torch.matmul(
                self.textDecoder.token_embedding.weight.to(x.dtype),
                x.permute(0, 2, 1),
            )
            .permute(0, 2, 1)
            .float()
        )

        # Stack attention weights from all alignment heads
        # Shape: (batch, total_alignment_heads, n_tokens, n_audio_ctx)
        if all_attention_weights:
            cross_attention_weights = torch.cat(all_attention_weights, dim=1)
        else:
            # Fallback: create dummy tensor if no alignment heads configured
            cross_attention_weights = torch.zeros(
                tokens.shape[0], 1, tokens.shape[1], n_layer_cross_k.shape[2],
                device=tokens.device, dtype=logits.dtype
            )

        return logits, n_layer_self_k_cache, n_layer_self_v_cache, cross_attention_weights


# =============================================================================
# Main export function
# =============================================================================


@torch.no_grad()
def main():
    args = get_args()
    name = args.model

    print(f"Exporting {name} with cross-attention weights")

    opset_version = 13

    # Load model
    model = load_model(name)
    print(f"Model dimensions: {model.dims}")
    print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

    # Get alignment heads for this model
    alignment_heads = get_alignment_heads(name, model)
    print(f"Using {len(alignment_heads)} alignment heads: {alignment_heads}")

    convert_tokens(name=name, model=model)

    tokenizer = whisper.tokenizer.get_tokenizer(
        model.is_multilingual, num_languages=model.num_languages
    )

    model.eval()

    # Prepare test input
    audio = torch.rand(16000 * 2)
    audio = whisper.pad_or_trim(audio)

    if name in ("distil-large-v3", "distil-large-v3.5"):
        n_mels = 128
    elif name in ("large", "large-v3", "turbo"):
        n_mels = 128
    else:
        n_mels = 80

    mel = whisper.log_mel_spectrogram(audio, n_mels=n_mels).to(model.device).unsqueeze(0)
    batch_size = 1

    # Export encoder (same as original)
    encoder = AudioEncoderTensorCache(model.encoder, model.decoder)
    n_layer_cross_k, n_layer_cross_v = encoder(mel)

    encoder_filename = f"{name}-encoder.onnx"
    torch.onnx.export(
        encoder,
        mel,
        encoder_filename,
        opset_version=opset_version,
        input_names=["mel"],
        output_names=["n_layer_cross_k", "n_layer_cross_v"],
        dynamic_axes={
            "mel": {0: "n_audio", 2: "T"},
            "n_layer_cross_k": {1: "n_audio", 2: "T"},
            "n_layer_cross_v": {1: "n_audio", 2: "T"},
        },
    )

    encoder_meta_data = {
        "model_type": f"whisper-{name}",
        "version": "2",  # Version 2 indicates attention-enabled
        "maintainer": "k2-fsa",
        "n_mels": model.dims.n_mels,
        "n_audio_ctx": model.dims.n_audio_ctx,
        "n_audio_state": model.dims.n_audio_state,
        "n_audio_head": model.dims.n_audio_head,
        "n_audio_layer": model.dims.n_audio_layer,
        "n_vocab": model.dims.n_vocab,
        "n_text_ctx": model.dims.n_text_ctx,
        "n_text_state": model.dims.n_text_state,
        "n_text_head": model.dims.n_text_head,
        "n_text_layer": model.dims.n_text_layer,
        "sot_sequence": ",".join(list(map(str, tokenizer.sot_sequence))),
        "all_language_tokens": ",".join(list(map(str, tokenizer.all_language_tokens))),
        "all_language_codes": ",".join(tokenizer.all_language_codes),
        "sot": tokenizer.sot,
        "sot_index": tokenizer.sot_sequence.index(tokenizer.sot),
        "eot": tokenizer.eot,
        "blank_id": tokenizer.encode(" ")[0],
        "is_multilingual": int(model.is_multilingual),
        "no_speech": tokenizer.no_speech,
        "non_speech_tokens": ",".join(list(map(str, tokenizer.non_speech_tokens))),
        "transcribe": tokenizer.transcribe,
        "translate": tokenizer.translate,
        "sot_prev": tokenizer.sot_prev,
        "sot_lm": tokenizer.sot_lm,
        "no_timestamps": tokenizer.no_timestamps,
        # Attention-specific metadata
        "n_alignment_heads": len(alignment_heads),
        "alignment_heads": ",".join([f"{l}:{h}" for l, h in alignment_heads]),
    }
    print(f"Encoder metadata: {encoder_meta_data}")
    add_meta_data(filename=encoder_filename, meta_data=encoder_meta_data)

    # Export decoder with attention outputs
    n_audio = mel.shape[0]
    tokens = torch.tensor(
        [[tokenizer.sot, tokenizer.sot, tokenizer.sot]] * n_audio
    ).to(mel.device)

    decoder = TextDecoderWithAttention(
        model.decoder, model.dims.n_text_ctx, alignment_heads
    )

    n_layer_self_k_cache = torch.zeros(
        (
            len(model.decoder.blocks),
            n_audio,
            model.dims.n_text_ctx,
            model.dims.n_text_state,
        ),
        device=mel.device,
    )
    n_layer_self_v_cache = torch.zeros(
        (
            len(model.decoder.blocks),
            n_audio,
            model.dims.n_text_ctx,
            model.dims.n_text_state,
        ),
        device=mel.device,
    )
    offset = torch.zeros(1, dtype=torch.int64).to(mel.device)

    # Test forward pass
    logits, _, _, cross_attn_weights = decoder(
        tokens,
        n_layer_self_k_cache.clone(),
        n_layer_self_v_cache.clone(),
        n_layer_cross_k,
        n_layer_cross_v,
        offset,
    )

    print(f"Logits shape: {logits.shape}")
    print(f"Cross-attention weights shape: {cross_attn_weights.shape}")
    assert cross_attn_weights.shape == (
        n_audio, len(alignment_heads), tokens.shape[1], model.dims.n_audio_ctx
    ), f"Unexpected attention shape: {cross_attn_weights.shape}"

    # Export with single token input (for autoregressive decoding)
    offset = torch.tensor([tokens.shape[1]], dtype=torch.int64).to(mel.device)
    tokens_single = torch.tensor([[tokenizer.sot]] * n_audio).to(mel.device)

    decoder_filename = f"{name}-decoder.onnx"
    torch.onnx.export(
        decoder,
        (
            tokens_single,
            n_layer_self_k_cache,
            n_layer_self_v_cache,
            n_layer_cross_k,
            n_layer_cross_v,
            offset,
        ),
        decoder_filename,
        opset_version=opset_version,
        input_names=[
            "tokens",
            "in_n_layer_self_k_cache",
            "in_n_layer_self_v_cache",
            "n_layer_cross_k",
            "n_layer_cross_v",
            "offset",
        ],
        output_names=[
            "logits",
            "out_n_layer_self_k_cache",
            "out_n_layer_self_v_cache",
            "cross_attention_weights",
        ],
        dynamic_axes={
            "tokens": {0: "n_audio", 1: "n_tokens"},
            "in_n_layer_self_k_cache": {1: "n_audio"},
            "in_n_layer_self_v_cache": {1: "n_audio"},
            "n_layer_cross_k": {1: "n_audio", 2: "T"},
            "n_layer_cross_v": {1: "n_audio", 2: "T"},
            "cross_attention_weights": {0: "n_audio", 2: "n_tokens", 3: "T"},
        },
    )

    if "large" in name:
        decoder_external_filename = decoder_filename.split(".onnx")[0]
        decoder_model = onnx.load(decoder_filename)
        onnx.save(
            decoder_model,
            decoder_filename,
            save_as_external_data=True,
            all_tensors_to_one_file=True,
            location=decoder_external_filename + ".weights",
        )

    # Generate int8 quantized models
    print("Generating int8 quantized models...")

    encoder_filename_int8 = f"{name}-encoder.int8.onnx"
    quantize_dynamic(
        model_input=encoder_filename,
        model_output=encoder_filename_int8,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )

    decoder_filename_int8 = f"{name}-decoder.int8.onnx"
    quantize_dynamic(
        model_input=decoder_filename,
        model_output=decoder_filename_int8,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )

    print(f"\nExported files:")
    print(f"  - {encoder_filename}")
    print(f"  - {encoder_filename_int8}")
    print(f"  - {decoder_filename}")
    print(f"  - {decoder_filename_int8}")
    print(f"  - {name}-tokens.txt")
    print(f"\nDecoder has 4 outputs including cross_attention_weights")


if __name__ == "__main__":
    torch.set_num_threads(1)
    torch.set_num_interop_threads(1)
    # To fix
    # TypeError: scaled_dot_product_attention(): argument 'is_causal' must be bool, not Tensor
    # See also https://github.com/k2-fsa/sherpa-onnx/issues/1764
    from whisper.model import disable_sdpa

    with disable_sdpa():
        main()


================================================
FILE: scripts/whisper/export-onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)
# flake8: noqa

"""
Note: Code in this file is modified from
https://github.com/TadaoYamaoka/whisper/blob/main/to_onnx.py

Thanks to https://github.com/TadaoYamaoka
for making the onnx export script public.

Note that we have removed the 30 seconds constraint from whisper. You can
use any T <= 30.
"""

import argparse
import os
from pathlib import Path
from typing import Any, Dict, Optional

import onnx
import torch
import torch.nn.functional as F
from onnxruntime.quantization import QuantType, quantize_dynamic
from torch import Tensor, nn

import whisper
from whisper.model import (
    AudioEncoder,
    MultiHeadAttention,
    ResidualAttentionBlock,
    TextDecoder,
)


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        # fmt: off
        choices=[
            "tiny", "tiny.en", "base", "base.en",
            "small", "small.en", "medium", "medium.en",
            "large-v1", "large-v2",
            "large", "large-v3", "turbo", # these three have feature dim 128
            "distil-medium.en", "distil-small.en", "distil-large-v2",
            "distil-large-v3",
            "distil-large-v3.5",
            # for fine-tuned models from icefall
            "medium-aishell",
            ],
        # fmt: on
    )
    return parser.parse_args()


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    if "large" in filename or "turbo" in filename:
        external_filename = filename.split(".onnx")[0]
        onnx.save(
            model,
            filename,
            save_as_external_data=True,
            all_tensors_to_one_file=True,
            location=external_filename + ".weights",
        )
    else:
        onnx.save(model, filename)


def modified_audio_encoder_forward(self: AudioEncoder, x: torch.Tensor):
    """
    x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
        the mel spectrogram of the audio
    """
    x = F.gelu(self.conv1(x))
    x = F.gelu(self.conv2(x))
    x = x.permute(0, 2, 1)

    if False:
        # This branch contains the original code
        assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape"
        x = (x + self.positional_embedding).to(x.dtype)
    else:
        # This branch contains the actual changes
        assert (
            x.shape[2] == self.positional_embedding.shape[1]
        ), f"incorrect audio shape: {x.shape}, {self.positional_embedding.shape}"
        assert (
            x.shape[1] == self.positional_embedding.shape[0]
        ), f"incorrect audio shape: {x.shape}, {self.positional_embedding.shape}"
        x = (x + self.positional_embedding[: x.shape[1]]).to(x.dtype)

    for block in self.blocks:
        x = block(x)

    x = self.ln_post(x)
    return x


AudioEncoder.forward = modified_audio_encoder_forward


class AudioEncoderTensorCache(nn.Module):
    def __init__(self, inAudioEncoder: AudioEncoder, inTextDecoder: TextDecoder):
        super().__init__()
        self.audioEncoder = inAudioEncoder
        self.textDecoder = inTextDecoder

    def forward(self, x: Tensor):
        audio_features = self.audioEncoder(x)

        n_layer_cross_k_list = []
        n_layer_cross_v_list = []
        for block in self.textDecoder.blocks:
            n_layer_cross_k_list.append(block.cross_attn.key(audio_features))
            n_layer_cross_v_list.append(block.cross_attn.value(audio_features))

        return torch.stack(n_layer_cross_k_list), torch.stack(n_layer_cross_v_list)


class MultiHeadAttentionCross(nn.Module):
    def __init__(self, inMultiHeadAttention: MultiHeadAttention):
        super().__init__()
        self.multiHeadAttention = inMultiHeadAttention

    def forward(
        self,
        x: Tensor,
        k: Tensor,
        v: Tensor,
        mask: Optional[Tensor] = None,
    ):
        q = self.multiHeadAttention.query(x)
        wv, qk = self.multiHeadAttention.qkv_attention(q, k, v, mask)
        return self.multiHeadAttention.out(wv)


class MultiHeadAttentionSelf(nn.Module):
    def __init__(self, inMultiHeadAttention: MultiHeadAttention):
        super().__init__()
        self.multiHeadAttention = inMultiHeadAttention

    def forward(
        self,
        x: Tensor,  # (b, n_ctx      , n_state)
        k_cache: Tensor,  # (b, n_ctx_cache, n_state)
        v_cache: Tensor,  # (b, n_ctx_cache, n_state)
        mask: Tensor,
    ):
        q = self.multiHeadAttention.query(x)  # (b, n_ctx, n_state)
        k = self.multiHeadAttention.key(x)  # (b, n_ctx, n_state)
        v = self.multiHeadAttention.value(x)  # (b, n_ctx, n_state)

        k_cache[:, -k.shape[1] :, :] = k  # (b, n_ctx_cache + n_ctx, n_state)
        v_cache[:, -v.shape[1] :, :] = v  # (b, n_ctx_cache + n_ctx, n_state)

        wv, qk = self.multiHeadAttention.qkv_attention(q, k_cache, v_cache, mask)
        return self.multiHeadAttention.out(wv), k_cache, v_cache


class ResidualAttentionBlockTensorCache(nn.Module):
    def __init__(self, inResidualAttentionBlock: ResidualAttentionBlock):
        super().__init__()
        self.originalBlock = inResidualAttentionBlock
        self.attn = MultiHeadAttentionSelf(inResidualAttentionBlock.attn)
        self.cross_attn = (
            MultiHeadAttentionCross(inResidualAttentionBlock.cross_attn)
            if inResidualAttentionBlock.cross_attn
            else None
        )

    def forward(
        self,
        x: Tensor,
        self_k_cache: Tensor,
        self_v_cache: Tensor,
        cross_k: Tensor,
        cross_v: Tensor,
        mask: Tensor,
    ):
        self_attn_x, self_k_cache_updated, self_v_cache_updated = self.attn(
            self.originalBlock.attn_ln(x), self_k_cache, self_v_cache, mask=mask
        )
        x = x + self_attn_x

        if self.cross_attn:
            x = x + self.cross_attn(
                self.originalBlock.cross_attn_ln(x), cross_k, cross_v
            )

        x = x + self.originalBlock.mlp(self.originalBlock.mlp_ln(x))
        return x, self_k_cache_updated, self_v_cache_updated


class TextDecoderTensorCache(nn.Module):
    def __init__(self, inTextDecoder: TextDecoder, in_n_ctx: int):
        super().__init__()
        self.textDecoder = inTextDecoder
        self.n_ctx = in_n_ctx

        self.blocks = []
        for orginal_block in self.textDecoder.blocks:
            self.blocks.append(ResidualAttentionBlockTensorCache(orginal_block))

    def forward(
        self,
        tokens: Tensor,
        n_layer_self_k_cache: Tensor,
        n_layer_self_v_cache: Tensor,
        n_layer_cross_k: Tensor,
        n_layer_cross_v: Tensor,
        offset: Tensor,
    ):
        x = (
            self.textDecoder.token_embedding(tokens)
            + self.textDecoder.positional_embedding[
                offset[0] : offset[0] + tokens.shape[-1]
            ]
        )
        x = x.to(n_layer_cross_k[0].dtype)

        i = 0
        for block in self.blocks:
            self_k_cache = n_layer_self_k_cache[i, :, : offset[0] + tokens.shape[-1], :]
            self_v_cache = n_layer_self_v_cache[i, :, : offset[0] + tokens.shape[-1], :]
            x, self_k_cache, self_v_cache = block(
                x,
                self_k_cache=self_k_cache,
                self_v_cache=self_v_cache,
                cross_k=n_layer_cross_k[i],
                cross_v=n_layer_cross_v[i],
                mask=self.textDecoder.mask,
            )
            n_layer_self_k_cache[i, :, : offset[0] + tokens.shape[-1], :] = self_k_cache
            n_layer_self_v_cache[i, :, : offset[0] + tokens.shape[-1], :] = self_v_cache
            i += 1

        x = self.textDecoder.ln(x)

        if False:
            # x.shape (1, 3, 384)
            # weight.shape (51684, 384)

            logits = (
                x
                @ torch.transpose(
                    self.textDecoder.token_embedding.weight.to(x.dtype), 0, 1
                )
            ).float()
        else:
            logits = (
                torch.matmul(
                    self.textDecoder.token_embedding.weight.to(x.dtype),
                    x.permute(0, 2, 1),
                )
                .permute(0, 2, 1)
                .float()
            )

        return logits, n_layer_self_k_cache, n_layer_self_v_cache


# ref: https://github.com/ggerganov/whisper.cpp/blob/master/models/convert-pt-to-ggml.py#L232
def convert_tokens(name, model):
    whisper_dir = Path(whisper.__file__).parent
    multilingual = model.is_multilingual
    tokenizer = (
        whisper_dir
        / "assets"
        / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken")
    )
    if not tokenizer.is_file():
        raise ValueError(f"Cannot find {tokenizer}")

    #  import base64

    with open(tokenizer, "r") as f:
        contents = f.read()
        #  tokens = {
        #      base64.b64decode(token): int(rank)
        #      for token, rank in (line.split() for line in contents.splitlines() if line)
        #  }
        tokens = {
            token: int(rank)
            for token, rank in (line.split() for line in contents.splitlines() if line)
        }

    with open(f"{name}-tokens.txt", "w") as f:
        for t, i in tokens.items():
            f.write(f"{t} {i}\n")


def load_model(name: str):
    """Load a Whisper model by name.

    For standard OpenAI models (tiny, base, small, medium, large, etc.),
    this uses whisper.load_model() directly.

    For distil-whisper and fine-tuned models, this expects the checkpoint
    file to be pre-downloaded to the current directory with a specific name.

    Args:
        name: Model name (e.g., "tiny", "distil-small.en", "medium-aishell")

    Returns:
        The loaded whisper model.

    Raises:
        ValueError: If a required checkpoint file is not found.
    """
    if name == "distil-medium.en":
        filename = "./distil-medium-en-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-medium.en
                to download original-model.bin
                You can use the following command to do that:

                wget -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin
            """
            )
        return whisper.load_model(filename)
    elif name == "distil-large-v2":
        filename = "./distil-large-v2-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-large-v2
                to download original-model.bin
                You can use the following command to do that:

                wget -O distil-large-v2-original-model.bin https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/original-model.bin
            """
            )
        return whisper.load_model(filename)
    elif name == "distil-large-v3":
        filename = "./distil-large-v3-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-large-v3-openai
                to download model.bin
                You can use the following command to do that:

                wget -O distil-large-v3-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3-openai/resolve/main/model.bin
            """
            )
        return whisper.load_model(filename)
    elif name == "distil-large-v3.5":
        filename = "./distil-large-v3.5-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-large-v3.5-openai/
                to download model.bin
                You can use the following command to do that:

                wget -O distil-large-v3.5-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3.5-openai/resolve/main/model.bin
            """
            )
        return whisper.load_model(filename)
    elif name == "distil-small.en":
        filename = "./distil-small-en-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-small.en
                to download original-model.bin
                You can use the following command to do that:

                wget -O distil-small-en-original-model.bin https://huggingface.co/distil-whisper/distil-small.en/resolve/main/original-model.bin
            """
            )
        return whisper.load_model(filename)
    elif name == "medium-aishell":
        filename = "./medium-aishell.pt"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/yuekai/icefall_asr_aishell_whisper/tree/main/exp_medium
                to download whisper-medium-aishell1-epoch-10-avg-4.pt
                You can use the following command to do that:

                wget -O medium-aishell.pt https://huggingface.co/yuekai/icefall_asr_aishell_whisper/resolve/main/exp_medium/whisper-medium-aishell1-epoch-10-avg-4.pt
            """
            )
        return whisper.load_model(filename)
    else:
        return whisper.load_model(name)


@torch.no_grad()
def main():
    args = get_args()
    name = args.model
    print(args)
    print(name)

    opset_version = 17

    model = load_model(name)
    print(model.dims)

    print(
        f"number of model parameters: {name}",
        sum(p.numel() for p in model.parameters()),
    )
    print(
        f"number of encoder parameters: {name}",
        sum(p.numel() for p in model.encoder.parameters()),
    )
    print(
        f"number of decoder parameters: {name}",
        sum(p.numel() for p in model.decoder.parameters()),
    )

    convert_tokens(name=name, model=model)

    # write tokens

    tokenizer = whisper.tokenizer.get_tokenizer(
        model.is_multilingual, num_languages=model.num_languages
    )

    model.eval()
    print(model.dims)
    audio = torch.rand(16000 * 2)
    audio = whisper.pad_or_trim(audio)
    assert audio.shape == (16000 * 30,), audio.shape

    if args.model in ("distil-large-v3", "distil-large-v3.5"):
        n_mels = 128
    elif args.model in (
        "large",
        "large-v3",
        "turbo",
    ):
        n_mels = 128
    else:
        n_mels = 80

    mel = (
        whisper.log_mel_spectrogram(audio, n_mels=n_mels).to(model.device).unsqueeze(0)
    )
    batch_size = 1
    assert mel.shape == (batch_size, n_mels, 30 * 100), mel.shape

    encoder = AudioEncoderTensorCache(model.encoder, model.decoder)

    n_layer_cross_k, n_layer_cross_v = encoder(mel)
    assert n_layer_cross_k.shape == (
        model.dims.n_text_layer,
        batch_size,
        model.dims.n_audio_ctx,
        model.dims.n_text_state,
    ), (n_layer_cross_k.shape, model.dims)
    assert n_layer_cross_v.shape == (
        model.dims.n_text_layer,
        batch_size,
        model.dims.n_audio_ctx,
        model.dims.n_text_state,
    ), (n_layer_cross_v.shape, model.dims)

    encoder_filename = f"{name}-encoder.onnx"
    torch.onnx.export(
        encoder,
        mel,
        encoder_filename,
        opset_version=opset_version,
        input_names=["mel"],
        output_names=["n_layer_cross_k", "n_layer_cross_v"],
        dynamic_axes={
            "mel": {0: "n_audio", 2: "T"},  # n_audio is also known as batch_size
            "n_layer_cross_k": {1: "n_audio", 2: "T"},
            "n_layer_cross_v": {1: "n_audio", 2: "T"},
        },
    )

    encoder_meta_data = {
        "model_type": f"whisper-{name}",
        "version": "1",
        "maintainer": "k2-fsa",
        "n_mels": model.dims.n_mels,
        "n_audio_ctx": model.dims.n_audio_ctx,
        "n_audio_state": model.dims.n_audio_state,
        "n_audio_head": model.dims.n_audio_head,
        "n_audio_layer": model.dims.n_audio_layer,
        "n_vocab": model.dims.n_vocab,
        "n_text_ctx": model.dims.n_text_ctx,
        "n_text_state": model.dims.n_text_state,
        "n_text_head": model.dims.n_text_head,
        "n_text_layer": model.dims.n_text_layer,
        "sot_sequence": ",".join(list(map(str, tokenizer.sot_sequence))),
        "all_language_tokens": ",".join(
            list(map(str, tokenizer.all_language_tokens))
        ),  # a list of ids
        "all_language_codes": ",".join(
            tokenizer.all_language_codes
        ),  # e.g., en, de, zh, fr
        "sot": tokenizer.sot,
        "sot_index": tokenizer.sot_sequence.index(tokenizer.sot),
        "eot": tokenizer.eot,
        "blank_id": tokenizer.encode(" ")[0],
        "is_multilingual": int(model.is_multilingual),
        "no_speech": tokenizer.no_speech,
        "non_speech_tokens": ",".join(list(map(str, tokenizer.non_speech_tokens))),
        "transcribe": tokenizer.transcribe,
        "translate": tokenizer.translate,
        "sot_prev": tokenizer.sot_prev,
        "sot_lm": tokenizer.sot_lm,
        "no_timestamps": tokenizer.no_timestamps,
    }
    print(f"encoder_meta_data: {encoder_meta_data}")
    add_meta_data(filename=encoder_filename, meta_data=encoder_meta_data)

    n_audio = mel.shape[0]
    tokens = torch.tensor([[tokenizer.sot, tokenizer.sot, tokenizer.sot]] * n_audio).to(
        mel.device
    )  # [n_audio, 3]
    decoder = TextDecoderTensorCache(model.decoder, model.dims.n_text_ctx)
    n_layer_self_k_cache = torch.zeros(
        (
            len(model.decoder.blocks),
            n_audio,
            model.dims.n_text_ctx,
            model.dims.n_text_state,
        ),
        device=mel.device,
    )
    n_layer_self_v_cache = torch.zeros(
        (
            len(model.decoder.blocks),
            n_audio,
            model.dims.n_text_ctx,
            model.dims.n_text_state,
        ),
        device=mel.device,
    )
    offset = torch.zeros(1, dtype=torch.int64).to(mel.device)
    logits, n_layer_self_k_cache, n_layer_self_v_cache = decoder(
        tokens,
        n_layer_self_k_cache,
        n_layer_self_v_cache,
        n_layer_cross_k,
        n_layer_cross_v,
        offset,
    )
    assert logits.shape == (n_audio, tokens.shape[1], model.dims.n_vocab)
    assert n_layer_self_k_cache.shape == (
        model.dims.n_text_layer,
        n_audio,
        model.dims.n_text_ctx,
        model.dims.n_text_state,
    )
    assert n_layer_self_v_cache.shape == (
        model.dims.n_text_layer,
        n_audio,
        model.dims.n_text_ctx,
        model.dims.n_text_state,
    )

    offset = torch.tensor([tokens.shape[1]], dtype=torch.int64).to(mel.device)
    tokens = torch.tensor([[tokenizer.sot]] * n_audio).to(mel.device)  # [n_audio, 1]

    logits, out_n_layer_self_k_cache, out_n_layer_self_v_cache = decoder(
        tokens,
        n_layer_self_k_cache,
        n_layer_self_v_cache,
        n_layer_cross_k,
        n_layer_cross_v,
        offset,
    )

    decoder_filename = f"{name}-decoder.onnx"
    torch.onnx.export(
        decoder,
        (
            tokens,
            n_layer_self_k_cache,
            n_layer_self_v_cache,
            n_layer_cross_k,
            n_layer_cross_v,
            offset,
        ),
        decoder_filename,
        opset_version=opset_version,
        input_names=[
            "tokens",
            "in_n_layer_self_k_cache",
            "in_n_layer_self_v_cache",
            "n_layer_cross_k",
            "n_layer_cross_v",
            "offset",
        ],
        output_names=["logits", "out_n_layer_self_k_cache", "out_n_layer_self_v_cache"],
        dynamic_axes={
            "tokens": {0: "n_audio", 1: "n_tokens"},
            "in_n_layer_self_k_cache": {1: "n_audio"},
            "in_n_layer_self_v_cache": {1: "n_audio"},
            "n_layer_cross_k": {1: "n_audio", 2: "T"},
            "n_layer_cross_v": {1: "n_audio", 2: "T"},
        },
    )

    if "large" in args.model:
        decoder_external_filename = decoder_filename.split(".onnx")[0]
        decoder_model = onnx.load(decoder_filename)
        onnx.save(
            decoder_model,
            decoder_filename,
            save_as_external_data=True,
            all_tensors_to_one_file=True,
            location=decoder_external_filename + ".weights",
        )

    # Generate int8 quantization models
    # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection

    print("Generate int8 quantization models")

    encoder_filename_int8 = f"{name}-encoder.int8.onnx"
    quantize_dynamic(
        model_input=encoder_filename,
        model_output=encoder_filename_int8,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )

    decoder_filename_int8 = f"{name}-decoder.int8.onnx"
    quantize_dynamic(
        model_input=decoder_filename,
        model_output=decoder_filename_int8,
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )


if __name__ == "__main__":
    torch.set_num_threads(1)
    torch.set_num_interop_threads(1)
    # To fix
    # TypeError: scaled_dot_product_attention(): argument 'is_causal' must be bool, not Tensor
    # See also https://github.com/k2-fsa/sherpa-onnx/issues/1764
    from whisper.model import disable_sdpa

    with disable_sdpa():
        main()


================================================
FILE: scripts/whisper/find_alignment_heads.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Posit Software, PBC

"""
Find alignment heads for a Whisper model by analyzing cross-attention patterns.

Alignment heads are attention heads that show monotonically increasing patterns,
meaning they attend to progressively later parts of the audio as more text tokens
are decoded. These heads are useful for computing word-level timestamps.

Usage:
    python find_alignment_heads.py --model distil-small.en --audio 0.wav
"""

import argparse
from collections import defaultdict
from typing import Dict, List, Tuple

import numpy as np
import torch
import whisper
from whisper.audio import load_audio, log_mel_spectrogram, pad_or_trim

from export_onnx import load_model


def get_args():
    parser = argparse.ArgumentParser(description="Find alignment heads in Whisper models")
    parser.add_argument("--model", type=str, required=True, help="Model name (e.g., distil-small.en)")
    parser.add_argument("--audio", type=str, required=True, help="Path to audio file")
    parser.add_argument("--top-k", type=int, default=10, help="Number of top heads to report")
    return parser.parse_args()


@torch.no_grad()
def compute_cross_attention_weights(
    model: whisper.Whisper,
    audio_path: str,
) -> Tuple[Dict[Tuple[int, int], np.ndarray], List[int], str]:
    """
    Run transcription and capture cross-attention weights from all heads.

    Returns:
        attention_weights: Dict mapping (layer, head) to attention matrix [n_tokens, n_audio_frames]
        token_ids: List of decoded token IDs
        text: Transcribed text
    """
    # Load and preprocess audio
    audio = load_audio(audio_path)
    audio = pad_or_trim(audio)

    n_mels = model.dims.n_mels
    mel = log_mel_spectrogram(audio, n_mels=n_mels).to(model.device)

    # Encode audio
    audio_features = model.encoder(mel.unsqueeze(0))

    # Get tokenizer
    tokenizer = whisper.tokenizer.get_tokenizer(
        model.is_multilingual,
        num_languages=getattr(model, 'num_languages', None) or (99 if model.is_multilingual else None),
        task="transcribe",
    )

    # Initial tokens (SOT sequence + no_timestamps)
    # The no_timestamps token is required for proper decoding
    tokens = list(tokenizer.sot_sequence) + [tokenizer.no_timestamps]

    # Storage for attention weights per (layer, head)
    all_attention_weights: Dict[Tuple[int, int], List[np.ndarray]] = defaultdict(list)

    n_layers = len(model.decoder.blocks)
    n_heads = model.dims.n_text_head

    print(f"Model has {n_layers} decoder layers with {n_heads} attention heads each")

    # Decode with attention capture
    max_tokens = 448  # max context length

    for i in range(max_tokens):
        tokens_tensor = torch.tensor([tokens]).to(model.device)

        # We need to manually run through decoder blocks to capture attention
        x = model.decoder.token_embedding(tokens_tensor) + model.decoder.positional_embedding[:tokens_tensor.shape[1]]
        x = x.to(audio_features.dtype)

        for layer_idx, block in enumerate(model.decoder.blocks):
            # Self-attention (we don't need this for alignment)
            x = x + block.attn(block.attn_ln(x), mask=model.decoder.mask)[0]

            # Cross-attention - compute manually to get weights
            cross_attn = block.cross_attn
            ln_output = block.cross_attn_ln(x)

            q = cross_attn.query(ln_output)
            k = cross_attn.key(audio_features)
            v = cross_attn.value(audio_features)

            # Reshape for multi-head attention
            batch_size, n_ctx, n_state = q.shape
            head_dim = n_state // n_heads

            q = q.view(batch_size, n_ctx, n_heads, head_dim).permute(0, 2, 1, 3)
            k = k.view(batch_size, -1, n_heads, head_dim).permute(0, 2, 1, 3)
            v = v.view(batch_size, -1, n_heads, head_dim).permute(0, 2, 1, 3)

            # Compute attention weights
            scale = head_dim ** -0.25
            qk = (q * scale) @ (k * scale).transpose(-1, -2)
            attn_weights = torch.softmax(qk.float(), dim=-1)  # [batch, heads, n_ctx, n_audio]

            # Store attention weights for each head (only the last token's attention)
            for head_idx in range(n_heads):
                # Get attention from the last decoded token
                weights = attn_weights[0, head_idx, -1, :].detach().cpu().numpy()
                all_attention_weights[(layer_idx, head_idx)].append(weights)

            # Compute attention output
            attn_output = (attn_weights.to(v.dtype) @ v).permute(0, 2, 1, 3).flatten(start_dim=2)
            attn_output = cross_attn.out(attn_output)
            x = x + attn_output

            # MLP
            x = x + block.mlp(block.mlp_ln(x))

        x = model.decoder.ln(x)
        logits = (x @ model.decoder.token_embedding.weight.T).float()

        # Get next token
        next_token = logits[0, -1].argmax().item()

        if next_token == tokenizer.eot:
            break

        tokens.append(next_token)

    # Convert to numpy arrays [n_tokens, n_audio_frames]
    attention_matrices = {}
    for key, weights_list in all_attention_weights.items():
        attention_matrices[key] = np.stack(weights_list, axis=0)

    # Decode text
    text = tokenizer.decode(tokens[len(tokenizer.sot_sequence):])

    return attention_matrices, tokens, text


def compute_monotonicity_score(attention: np.ndarray) -> float:
    """
    Compute how monotonically increasing the attention pattern is.

    For each token, find the frame with maximum attention (argmax).
    A good alignment head should have these argmax positions increasing
    monotonically (or nearly so) as tokens progress.

    Returns a score between 0 and 1, where 1 is perfectly monotonic.
    """
    n_tokens, n_frames = attention.shape

    if n_tokens < 2:
        return 0.0

    # Get the frame with maximum attention for each token
    peak_positions = np.argmax(attention, axis=1)

    # Count how many times position increases (or stays same)
    increases = 0
    for i in range(1, len(peak_positions)):
        if peak_positions[i] >= peak_positions[i - 1]:
            increases += 1

    monotonicity = increases / (len(peak_positions) - 1)
    return monotonicity


def compute_diagonal_score(attention: np.ndarray) -> float:
    """
    Compute how diagonal the attention pattern is.

    A diagonal pattern means token i attends mostly to audio frame i*scale,
    where scale = n_frames / n_tokens.
    """
    n_tokens, n_frames = attention.shape

    if n_tokens < 2:
        return 0.0

    # Expected diagonal positions
    scale = n_frames / n_tokens
    expected_positions = np.arange(n_tokens) * scale

    # Actual peak positions
    peak_positions = np.argmax(attention, axis=1)

    # Compute correlation between expected and actual
    if np.std(peak_positions) < 1e-6:
        return 0.0

    correlation = np.corrcoef(expected_positions, peak_positions)[0, 1]

    # Handle NaN
    if np.isnan(correlation):
        return 0.0

    return max(0, correlation)  # Only positive correlations indicate good alignment


def analyze_attention_heads(
    attention_matrices: Dict[Tuple[int, int], np.ndarray],
    top_k: int = 10,
) -> List[Tuple[Tuple[int, int], float, float, float]]:
    """
    Analyze all attention heads and rank them by alignment quality.

    Returns list of ((layer, head), monotonicity_score, diagonal_score, combined_score)
    sorted by combined score descending.
    """
    results = []

    for (layer, head), attention in attention_matrices.items():
        mono_score = compute_monotonicity_score(attention)
        diag_score = compute_diagonal_score(attention)
        combined_score = (mono_score + diag_score) / 2
        results.append(((layer, head), mono_score, diag_score, combined_score))

    # Sort by combined score (descending)
    results.sort(key=lambda x: x[3], reverse=True)

    return results[:top_k]


def main():
    args = get_args()

    # Load model
    print(f"Loading model: {args.model}")
    model = load_model(args.model)
    model.eval()  # Set to evaluation mode

    print(f"Model dimensions: {model.dims}")

    # Check if model already has alignment heads
    if hasattr(model, 'alignment_heads') and model.alignment_heads is not None:
        indices = model.alignment_heads.indices()
        existing_heads = list(zip(indices[0].tolist(), indices[1].tolist()))
        print(f"Model has pre-defined alignment heads: {existing_heads}")

    # Run transcription and capture attention
    print(f"\nTranscribing: {args.audio}")
    attention_matrices, tokens, text = compute_cross_attention_weights(model, args.audio)

    print(f"\nTranscription: {text}")
    print(f"Number of tokens: {len(tokens)}")

    # Analyze heads
    print(f"\nAnalyzing {len(attention_matrices)} attention heads...")
    top_heads = analyze_attention_heads(attention_matrices, args.top_k)

    print(f"\nTop {args.top_k} alignment head candidates:")
    print("-" * 60)
    print(f"{'Layer':>6} {'Head':>6} {'Monotonic':>12} {'Diagonal':>12} {'Combined':>12}")
    print("-" * 60)

    for (layer, head), mono, diag, combined in top_heads:
        print(f"{layer:>6} {head:>6} {mono:>12.3f} {diag:>12.3f} {combined:>12.3f}")

    # Generate Python code for the best heads
    print("\n" + "=" * 60)
    print("Suggested ALIGNMENT_HEADS entry:")
    print("=" * 60)

    # Use heads with combined score > 0.7 (or top 6 if fewer qualify)
    good_heads = [(l, h) for (l, h), m, d, c in top_heads if c > 0.7]
    if len(good_heads) < 6:
        good_heads = [(l, h) for (l, h), m, d, c in top_heads[:6]]

    print(f'"{args.model}": {good_heads},')


if __name__ == "__main__":
    main()


================================================
FILE: scripts/whisper/model-info.md
================================================
# tiny/tiny.en
```
ModelDimensions(
    n_mels=80,
    n_audio_ctx=1500,
    n_audio_state=384,
    n_audio_head=6,
    n_audio_layer=4,
    n_vocab=51865,
    n_text_ctx=448,
    n_text_state=384,
    n_text_head=6,
    n_text_layer=4
)
```

# base/base.en
```
ModelDimensions(
    n_mels=80,
    n_audio_ctx=1500,
    n_audio_state=512,
    n_audio_head=8,
    n_audio_layer=6,
    n_vocab=51865,
    n_text_ctx=448,
    n_text_state=512,
    n_text_head=8,
    n_text_layer=6
)
```

# small/small.en
```
ModelDimensions(
    n_mels=80,
    n_audio_ctx=1500,
    n_audio_state=768,
    n_audio_head=12,
    n_audio_layer=12,
    n_vocab=51865,
    n_text_ctx=448,
    n_text_state=768,
    n_text_head=12,
    n_text_layer=12
)
```


# medium/medium.en
```
ModelDimensions(
    n_mels=80,
    n_audio_ctx=1500,
    n_audio_state=1024,
    n_audio_head=16,
    n_audio_layer=24,
    n_vocab=51865,
    n_text_ctx=448,
    n_text_state=1024,
    n_text_head=16,
    n_text_layer=24
)
```

# large
```
ModelDimensions(
    n_mels=80,
    n_audio_ctx=1500,
    n_audio_state=1280,
    n_audio_head=20,
    n_audio_layer=32,
    n_vocab=51865,
    n_text_ctx=448,
    n_text_state=1280,
    n_text_head=20,
    n_text_layer=32
)
```


================================================
FILE: scripts/whisper/requirements.txt
================================================
openai-whisper


================================================
FILE: scripts/whisper/rknn/README.md
================================================
# Usage

You can find pre-exported rknn models for rk3588 at

https://modelscope.cn/models/csukuangfj/2026-01-05-rknn/files


# Download test wave

```
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/en.wav
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/en-16k.wav
```

## Export to onnx

```
./export_onnx.py --model tiny.en
```

## Test onnx

```
./test_onnx.py --model tiny.en
```

## Export to rknn

```
python3 ./export_rknn.py --target-platform rk3588  --in-model ./tiny.en-encoder.onnx --out-model ./tiny.en-encoder.rknn

python3 ./export_rknn.py --target-platform rk3588  --in-model ./tiny.en-decoder.onnx --out-model ./tiny.en-decoder.rknn
```

```
ls -lh tiny.en-*.rknn

-rw-r--r-- 1 kuangfangjun root 95M Jan  5 16:16 tiny.en-decoder.rknn
-rw-r--r-- 1 kuangfangjun root 22M Jan  5 16:15 tiny.en-encoder.rknn
```

## Run it on your rk3588 board

```
wget https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en/resolve/main/tiny.en-tokens.txt

./test_on_rk3588_board.py  --encoder ./tiny.en-encoder.rknn --decoder ./tiny.en-decoder.rknn --tokens ./tiny.en-tokens.txt --wav ./en-16k.wav
```


================================================
FILE: scripts/whisper/rknn/export_onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)
# flake8: noqa

"""
Note: Code in this file is modified from
https://github.com/TadaoYamaoka/whisper/blob/main/to_onnx.py

Thanks to https://github.com/TadaoYamaoka
for making the onnx export script public.

Note that we have removed the 30 seconds constraint from whisper. You can
use any T <= 30.
"""

import argparse
import inspect
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import onnx
import torch
import torch.nn.functional as F
import whisper
from onnxruntime.quantization import QuantType, quantize_dynamic
from torch import Tensor, nn
from whisper.model import (
    AudioEncoder,
    MultiHeadAttention,
    ResidualAttentionBlock,
    TextDecoder,
)


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        # fmt: off
        choices=[
            "tiny", "tiny.en", "base", "base.en",
            "small", "small.en", "medium", "medium.en",
            "large-v1", "large-v2",
            "large", "large-v3", "turbo", # these three have feature dim 128
            "distil-medium.en", "distil-small.en", "distil-large-v2",
            "distil-large-v3",
            "distil-large-v3.5",
            # for fine-tuned models from icefall
            "medium-aishell",
            ],
        # fmt: on
    )
    return parser.parse_args()


def causal_mask_1d(n: int, L: int, device=None, dtype=torch.int32):
    """
    Returns a 1-D int mask of shape (L,) with:
      0 -> allowed
      1 -> masked (will be converted to -inf later)
    """
    mask = torch.ones((L,), device=device, dtype=dtype)
    if n > 0:
        mask[:n] = 0
    return mask


def add_meta_data(filename: str, meta_data: Dict[str, Any]):
    """Add meta data to an ONNX model. It is changed in-place.

    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)

    while len(model.metadata_props):
        model.metadata_props.pop()

    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = str(value)

    if "large" in filename or "turbo" in filename:
        external_filename = filename.split(".onnx")[0]
        onnx.save(
            model,
            filename,
            save_as_external_data=True,
            all_tensors_to_one_file=True,
            location=external_filename + ".weights",
        )
    else:
        onnx.save(model, filename)


def modified_self_qkv_attention(
    self,
    q: Tensor,
    k_cache: Tensor,
    v_cache: Tensor,
    k1: Tensor,
    v1: Tensor,
    mask: Tensor,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
    assert mask is not None

    n_batch, n_ctx, n_state = q.shape

    scale = (n_state // self.n_head) ** -0.25
    q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
    k_cache = k_cache.view(*k_cache.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
    v_cache = v_cache.view(*v_cache.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)

    k1 = k1.view(*k1.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
    v1 = v1.view(*v1.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)

    qk = (q * scale) @ (k_cache * scale).transpose(-1, -2)  # (1, 6, 1, 448)

    qk1 = (q * scale) @ (k1 * scale).transpose(-1, -2)  # (1, 6, 1, 1)

    #  qk = qk + mask
    #  qk.masked_fill_(mask.to(torch.bool), float("-inf"))
    qk.masked_fill_(mask.to(torch.bool), -60000)

    qk = qk.float()
    qk1 = qk1.float()

    qk_total = torch.cat([qk, qk1], dim=-1)

    w_total = F.softmax(qk_total, dim=-1).to(q.dtype)
    w = w_total[:, :, :, :-1]
    w1 = w_total[:, :, :, -1:]

    out = (w @ v_cache).permute(0, 2, 1, 3).flatten(start_dim=2)
    out1 = (w1 @ v1).permute(0, 2, 1, 3).flatten(start_dim=2)
    out = out + out1

    qk = qk.detach()

    return out, qk


MultiHeadAttention.qkv_attention_self = modified_self_qkv_attention


def modified_audio_encoder_forward(self: AudioEncoder, x: torch.Tensor):
    """
    x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
        the mel spectrogram of the audio
    """
    x = F.gelu(self.conv1(x))
    x = F.gelu(self.conv2(x))
    x = x.permute(0, 2, 1)

    if False:
        # This branch contains the original code
        assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape"
        x = (x + self.positional_embedding).to(x.dtype)
    else:
        #  print(x.shape, self.positional_embedding.shape)
        # This branch contains the actual changes
        assert (
            x.shape[2] == self.positional_embedding.shape[1]
        ), f"incorrect audio shape: {x.shape}, {self.positional_embedding.shape}"
        assert (
            x.shape[1] == self.positional_embedding.shape[0]
        ), f"incorrect audio shape: {x.shape}, {self.positional_embedding.shape}"
        x = (x + self.positional_embedding[: x.shape[1]]).to(x.dtype)

    for block in self.blocks:
        x = block(x)

    x = self.ln_post(x)
    return x


AudioEncoder.forward = modified_audio_encoder_forward


class AudioEncoderTensorCache(nn.Module):
    def __init__(self, inAudioEncoder: AudioEncoder, inTextDecoder: TextDecoder):
        super().__init__()
        self.audioEncoder = inAudioEncoder
        self.textDecoder = inTextDecoder

    def forward(self, x: Tensor) -> List[Tuple[Tensor, Tensor]]:
        """
        Args:
          x: (1, 80, 3000)
          cross_kv_pair:
            - the i-th entry contains kv cache for the i-th layer
        """
        audio_features = self.audioEncoder(x)

        n_layer_cross_k_list = []
        n_layer_cross_v_list = []

        cross_kv_pair = []
        for block in self.textDecoder.blocks:
            k = block.cross_attn.key(audio_features)  # (batch_size, 1500, 384)
            v = block.cross_attn.value(audio_features)  # (batch_size, 1500, 384)

            cross_kv_pair.append((k, v))

        return cross_kv_pair


class MultiHeadAttentionCross(nn.Module):
    def __init__(self, inMultiHeadAttention: MultiHeadAttention):
        super().__init__()
        self.multiHeadAttention = inMultiHeadAttention

    def forward(
        self,
        x: Tensor,
        k: Tensor,
        v: Tensor,
        mask: Optional[Tensor] = None,
    ):
        q = self.multiHeadAttention.query(x)
        wv, qk = self.multiHeadAttention.qkv_attention(q, k, v, mask)
        return self.multiHeadAttention.out(wv)


class MultiHeadAttentionSelf(nn.Module):
    def __init__(self, inMultiHeadAttention: MultiHeadAttention):
        super().__init__()
        self.multiHeadAttention = inMultiHeadAttention

    def forward(
        self,
        x: Tensor,  # (1, 1      , 384)
        k_cache: Tensor,  # (1, 448, 384)
        v_cache: Tensor,  # (1, 448, 384)
        mask: Tensor,  # (448,)
    ):
        q = self.multiHeadAttention.query(x)  # (1, 1, 384)
        k = self.multiHeadAttention.key(x)  # (1, 1, 384)
        v = self.multiHeadAttention.value(x)  # (1, 1, 384)

        #  k_cache[:, offset : offset + 1, :] = k  # (b, n_ctx_cache + n_ctx, n_state)
        #  v_cache[:, offset : offset + 1, :] = v  # (b, n_ctx_cache + n_ctx, n_state)

        wv, qk = self.multiHeadAttention.qkv_attention_self(
            q,
            k_cache=k_cache,
            v_cache=v_cache,
            k1=k,
            v1=v,
            mask=mask,
        )

        return self.multiHeadAttention.out(wv), k, v


class ResidualAttentionBlockTensorCache(nn.Module):
    def __init__(self, inResidualAttentionBlock: ResidualAttentionBlock):
        super().__init__()
        self.originalBlock = inResidualAttentionBlock
        self.attn = MultiHeadAttentionSelf(inResidualAttentionBlock.attn)
        self.cross_attn = (
            MultiHeadAttentionCross(inResidualAttentionBlock.cross_attn)
            if inResidualAttentionBlock.cross_attn
            else None
        )

    def forward(
        self,
        x: Tensor,
        self_k_cache: Tensor,
        self_v_cache: Tensor,
        cross_k: Tensor,
        cross_v: Tensor,
        offset: Tensor,
        mask: Tensor,
    ):
        self_attn_x, self_k, self_v = self.attn(
            self.originalBlock.attn_ln(x),
            self_k_cache,
            self_v_cache,
            mask=mask,
        )
        x = x + self_attn_x

        if self.cross_attn:
            x = x + self.cross_attn(
                self.originalBlock.cross_attn_ln(x), cross_k, cross_v
            )

        x = x + self.originalBlock.mlp(self.originalBlock.mlp_ln(x))
        return x, self_k, self_v


class TextDecoderTensorCache(nn.Module):
    def __init__(self, inTextDecoder: TextDecoder, in_n_ctx: int):
        super().__init__()
        self.textDecoder = inTextDecoder
        self.n_ctx = in_n_ctx

        self.blocks = []
        for orginal_block in self.textDecoder.blocks:
            self.blocks.append(ResidualAttentionBlockTensorCache(orginal_block))

    def forward(
        self,
        tokens: Tensor,
        self_kv_pair: List[Tuple[Tensor, Tensor]],
        cross_kv_pair: List[Tuple[Tensor, Tensor]],
        offset: Tensor,
        mask: Tensor,
    ) -> Tuple[Tensor, List[Tuple[Tensor, Tensor]]]:
        """
        tokens: (batch_size, 1)
        self_kv_pair:
            - [i][0]: layer_i_self_k_cache, (batch_size, 448, dim)
            - [i][1]: layer_i_self_v_cache, (batch_size, 448, dim)
        Returns:
          - logits
          - this_self_kv_pair
        """
        assert tokens.shape == (1, 1), tokens.shape
        x = self.textDecoder.token_embedding(
            tokens
        ) + self.textDecoder.positional_embedding[offset.to(torch.int64)].unsqueeze(0)

        i = 0
        this_self_kv_pair = []
        for block in self.blocks:
            self_k_cache = self_kv_pair[i][0]
            self_v_cache = self_kv_pair[i][1]

            x, self_k, self_v = block(
                x,
                #  self_k_cache=self_k_cache[:, : offset + 1],
                #  self_v_cache=self_v_cache[:, : offset + 1],
                self_k_cache=self_k_cache,
                self_v_cache=self_v_cache,
                cross_k=cross_kv_pair[i][0],
                cross_v=cross_kv_pair[i][1],
                offset=offset,
                #  mask=self.textDecoder.mask,
                mask=mask,
            )
            #  self_k_cache[:, : offset + 1] = updated_self_k_cache
            #  self_v_cache[:, : offset + 1] = updated_self_v_cache
            #  updated_self_kv_pair.append((self_k_cache, self_v_cache))
            this_self_kv_pair.append((self_k, self_v))

            i += 1

        x = self.textDecoder.ln(x)

        if False:
            # x.shape (1, 3, 384)
            # weight.shape (51684, 384)

            logits = (
                x
                @ torch.transpose(
                    self.textDecoder.token_embedding.weight.to(x.dtype), 0, 1
                )
            ).float()
        else:
            logits = (
                torch.matmul(
                    self.textDecoder.token_embedding.weight.to(x.dtype),
                    x.permute(0, 2, 1),
                )
                .permute(0, 2, 1)
                .float()
            )

        return logits, this_self_kv_pair


# ref: https://github.com/ggerganov/whisper.cpp/blob/master/models/convert-pt-to-ggml.py#L232
def convert_tokens(name, model):
    whisper_dir = Path(whisper.__file__).parent
    multilingual = model.is_multilingual
    tokenizer = (
        whisper_dir
        / "assets"
        / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken")
    )
    if not tokenizer.is_file():
        raise ValueError(f"Cannot find {tokenizer}")

    #  import base64

    with open(tokenizer, "r") as f:
        contents = f.read()
        #  tokens = {
        #      base64.b64decode(token): int(rank)
        #      for token, rank in (line.split() for line in contents.splitlines() if line)
        #  }
        tokens = {
            token: int(rank)
            for token, rank in (line.split() for line in contents.splitlines() if line)
        }

    with open(f"{name}-tokens.txt", "w") as f:
        for t, i in tokens.items():
            f.write(f"{t} {i}\n")
    print(f"Saved to {name}-tokens.txt")


@torch.no_grad()
def main():
    args = get_args()
    name = args.model
    print(args)
    print(name)

    opset_version = 17

    if name == "distil-medium.en":
        filename = "./distil-medium-en-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-medium.en
                to download original-model.bin
                You can use the following command to do that:

                wget -O distil-medium-en-original-model.bin https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/original-model.bin
            """
            )
        model = whisper.load_model(filename)
    elif name == "distil-large-v2":
        filename = "./distil-large-v2-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-large-v2
                to download original-model.bin
                You can use the following command to do that:

                wget -O distil-large-v2-original-model.bin https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/original-model.bin
            """
            )
        model = whisper.load_model(filename)
    elif name == "distil-large-v3":
        filename = "./distil-large-v3-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-large-v3-openai
                to download model.bin
                You can use the following command to do that:

                wget -O distil-large-v3-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3-openai/resolve/main/model.bin
            """
            )
        model = whisper.load_model(filename)
    elif name == "distil-large-v3.5":
        filename = "./distil-large-v3.5-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-large-v3.5-openai/
                to download model.bin
                You can use the following command to do that:

                wget -O distil-large-v3.5-original-model.bin https://huggingface.co/distil-whisper/distil-large-v3.5-openai/resolve/main/model.bin
            """
            )
        model = whisper.load_model(filename)
    elif name == "distil-small.en":
        filename = "./distil-small-en-original-model.bin"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/distil-whisper/distil-small.en
                to download original-model.bin
                You can use the following command to do that:

                wget -O distil-small-en-original-model.bin https://huggingface.co/distil-whisper/distil-small.en/resolve/main/original-model.bin
            """
            )
        model = whisper.load_model(filename)
    elif name == "medium-aishell":
        filename = "./medium-aishell.pt"
        if not Path(filename).is_file():
            raise ValueError(
                """
                Please go to https://huggingface.co/yuekai/icefall_asr_aishell_whisper/tree/main/exp_medium
                to download whisper-medium-aishell1-epoch-10-avg-4.pt
                You can use the following command to do that:

                wget -O medium-aishell.pt https://huggingface.co/yuekai/icefall_asr_aishell_whisper/resolve/main/exp_medium/whisper-medium-aishell1-epoch-10-avg-4.pt
            """
            )
        model = whisper.load_model(filename)
    else:
        model = whisper.load_model(name)
    model.to("cpu")

    num_params = sum(p.numel() for p in model.parameters())
    num_encoder_params = sum(p.numel() for p in model.encoder.parameters())
    num_decoder_params = sum(p.numel() for p in model.decoder.parameters())
    print(f"{name} model parameters: {num_params} (or {num_params/1000/1000} M)")
    print(
        f"{name} encoder parameters: {num_encoder_params} (or {num_encoder_params/1000/1000} M)"
    )
    print(
        f"{name} decoder parameters: {num_decoder_params} (or {num_decoder_params/1000/1000} M)"
    )

    convert_tokens(name=name, model=model)

    # write tokens

    tokenizer = whisper.tokenizer.get_tokenizer(
        model.is_multilingual, num_languages=model.num_languages
    )
    # tiny: <|startoftranscript|><|en|><|transcribe|> (50258, 50259, 50359)
    # base: <|startoftranscript|><|en|><|transcribe|> (50258, 50259, 50359)
    # tiny.en: <|startoftranscript|> (50257,)
    print(tokenizer.decode(tokenizer.sot_sequence), tokenizer.sot_sequence)

    # tiny: <|notimestamps|> 50363
    # base: <|notimestamps|> 50363
    # tiny.en: <|notimestamps|> 50362
    print(tokenizer.decode([tokenizer.no_timestamps]), tokenizer.no_timestamps)

    model.eval()
    print(model.dims)
    audio = torch.rand(16000 * 2)
    audio = whisper.pad_or_trim(audio)
    assert audio.shape == (16000 * 30,), audio.shape

    if args.model in ("distil-large-v3", "distil-large-v3.5"):
        n_mels = 128
    elif args.model in (
        "large",
        "large-v3",
        "turbo",
    ):
        n_mels = 128
    else:
        n_mels = 80

    mel = (
        whisper.log_mel_spectrogram(audio, n_mels=n_mels).to(model.device).unsqueeze(0)
    )
    batch_size = 1
    assert mel.shape == (batch_size, n_mels, 30 * 100), mel.shape

    encoder = AudioEncoderTensorCache(model.encoder, model.decoder)

    cross_kv_pair = encoder(mel)
    assert len(cross_kv_pair) == model.dims.n_text_layer, (
        len(cross_kv_pair),
        model.dims.n_text_layer,
    )

    output_names = []
    for i in range(model.dims.n_text_layer):
        k = f"cross_k_{i}"
        v = f"cross_v_{i}"
        output_names.append(k)
        output_names.append(v)

    export_sig = inspect.signature(torch.onnx.export)

    kwargs = dict()
    if "dynamo" in export_sig.parameters:
        kwargs["dynamo"] = False

    if "external_data" in export_sig.parameters:
        kwargs["external_data"] = False

    encoder_filename = f"{name}-encoder.onnx"
    torch.onnx.export(
        encoder,
        mel,
        encoder_filename,
        opset_version=opset_version,
        input_names=[f"{name}-mel"],
        output_names=output_names,
        **kwargs,
    )

    encoder_meta_data = {
        "model_type": f"whisper-{name}",
        "version": "1",
        "maintainer": "k2-fsa",
        "n_mels": model.dims.n_mels,
        "n_audio_ctx": model.dims.n_audio_ctx,
        "n_audio_state": model.dims.n_audio_state,
        "n_audio_head": model.dims.n_audio_head,
        "n_audio_layer": model.dims.n_audio_layer,
        "n_vocab": model.dims.n_vocab,
        "n_text_ctx": model.dims.n_text_ctx,
        "n_text_state": model.dims.n_text_state,
        "n_text_head": model.dims.n_text_head,
        "n_text_layer": model.dims.n_text_layer,
        "sot_sequence": ",".join(list(map(str, tokenizer.sot_sequence))),
        #  "all_language_tokens": ",".join(
        #      list(map(str, tokenizer.all_language_tokens))
        #  ),  # a list of ids
        #  "all_language_codes": ",".join(
        #      tokenizer.all_language_codes
        #  ),  # e.g., en, de, zh, fr
        "sot": tokenizer.sot,
        "sot_index": tokenizer.sot_sequence.index(tokenizer.sot),
        "eot": tokenizer.eot,
        "blank_id": tokenizer.encode(" ")[0],
        "is_multilingual": int(model.is_multilingual),
        "no_speech": tokenizer.no_speech,
        "non_speech_tokens": ",".join(list(map(str, tokenizer.non_speech_tokens))),
        "transcribe": tokenizer.transcribe,
        "translate": tokenizer.translate,
        "sot_prev": tokenizer.sot_prev,
        "sot_lm": tokenizer.sot_lm,
        "no_timestamps": tokenizer.no_timestamps,
    }
    print(f"encoder_meta_data: {encoder_meta_data}")
    add_meta_data(filename=encoder_filename, meta_data=encoder_meta_data)

    tokens = torch.tensor([[tokenizer.sot]], dtype=torch.int32)
    decoder = TextDecoderTensorCache(model.decoder, model.dims.n_text_ctx)

    self_kv_pair = []
    batch_size = 1
    for i in range(model.dims.n_text_layer):
        k = torch.zeros(batch_size, model.dims.n_text_ctx, model.dims.n_text_state)
        v = torch.zeros(batch_size, model.dims.n_text_ctx, model.dims.n_text_state)
        self_kv_pair.append((k, v))

    offset = torch.zeros(1, dtype=torch.int32)
    mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

    logits, this_self_kv_pair = decoder(
        tokens,
        self_kv_pair,
        cross_kv_pair,
        offset,
        mask,
    )

    assert logits.shape == (batch_size, tokens.shape[1], model.dims.n_vocab)
    assert len(this_self_kv_pair) == model.dims.n_text_layer, (
        len(this_self_kv_pair),
        model.dims.n_text_layer,
    )

    input_names = [f"{name}-tokens"]
    for i in range(model.dims.n_text_layer):
        k = f"{name}-self_k_{i}"
        v = f"{name}-self_v_{i}"
        input_names.append(k)
        input_names.append(v)

    for i in range(model.dims.n_text_layer):
        k = f"{name}-cross_k_{i}"
        v = f"{name}-cross_v_{i}"
        input_names.append(k)
        input_names.append(v)
    input_names.append(f"{name}-offset")
    input_names.append(f"{name}-mask")

    output_names = [f"{name}-logits"]
    for i in range(model.dims.n_text_layer):
        k = f"{name}-this_self_k_{i}"
        v = f"{name}-this_self_v_{i}"
        output_names.append(k)
        output_names.append(v)

    decoder_filename = f"{name}-decoder.onnx"
    torch.onnx.export(
        decoder,
        (
            tokens,
            self_kv_pair,
            cross_kv_pair,
            offset,
            mask,
        ),
        decoder_filename,
        opset_version=opset_version,
        input_names=input_names,
        output_names=output_names,
        **kwargs,
    )

    if "large" in args.model:
        decoder_external_filename = decoder_filename.split(".onnx")[0]
        decoder_model = onnx.load(decoder_filename)
        onnx.save(
            decoder_model,
            decoder_filename,
            save_as_external_data=True,
            all_tensors_to_one_file=True,
            location=decoder_external_filename + ".weights",
        )

    if False:
        # Generate int8 quantization models
        # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection

        print("Generate int8 quantization models")

        encoder_filename_int8 = f"{name}-encoder.int8.onnx"
        quantize_dynamic(
            model_input=encoder_filename,
            model_output=encoder_filename_int8,
            op_types_to_quantize=["MatMul"],
            weight_type=QuantType.QInt8,
        )

        decoder_filename_int8 = f"{name}-decoder.int8.onnx"
        quantize_dynamic(
            model_input=decoder_filename,
            model_output=decoder_filename_int8,
            op_types_to_quantize=["MatMul"],
            weight_type=QuantType.QInt8,
        )


if __name__ == "__main__":
    torch.set_num_threads(1)
    torch.set_num_interop_threads(1)
    try:
        # To fix
        # TypeError: scaled_dot_product_attention(): argument 'is_causal' must be bool, not Tensor
        # See also https://github.com/k2-fsa/sherpa-onnx/issues/1764
        from whisper.model import disable_sdpa

        with disable_sdpa():
            main()
    except:
        main()


================================================
FILE: scripts/whisper/rknn/export_rknn.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)

import argparse
import logging
from pathlib import Path

from rknn.api import RKNN

logging.basicConfig(level=logging.WARNING)

g_platforms = [
    "rk3562",
    "rk3566",
    "rk3568",
    "rk3576",
    "rk3588",
]


def get_parser():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument(
        "--target-platform",
        type=str,
        required=True,
        help=f"Supported values are: {','.join(g_platforms)}",
    )

    parser.add_argument(
        "--in-model",
        type=str,
        required=True,
        help="Path to the input onnx model",
    )

    parser.add_argument(
        "--out-model",
        type=str,
        required=True,
        help="Path to the output rknn model",
    )

    return parser


def get_meta_data(model: str):
    import onnxruntime

    session_opts = onnxruntime.SessionOptions()
    session_opts.inter_op_num_threads = 1
    session_opts.intra_op_num_threads = 1

    m = onnxruntime.InferenceSession(
        model,
        sess_options=session_opts,
        providers=["CPUExecutionProvider"],
    )

    for i in m.get_inputs():
        print(i)

    print("-----")

    for i in m.get_outputs():
        print(i)
    print()

    meta = m.get_modelmeta().custom_metadata_map
    s = ""
    sep = ""
    for key, value in meta.items():
        s = s + sep + f"{key}={value}"
        sep = ";"
    assert len(s) < 1024, len(s)

    print("len(s)", len(s), s)

    return s


def export_rknn(rknn, filename):
    ret = rknn.export_rknn(filename)
    if ret != 0:
        exit(f"Export rknn model to {filename} failed!")


def init_model(filename: str, target_platform: str, custom_string=None):
    rknn = RKNN(verbose=False)

    rknn.config(
        optimization_level=0,
        target_platform=target_platform,
        custom_string=custom_string,
    )
    if not Path(filename).is_file():
        exit(f"{filename} does not exist")

    ret = rknn.load_onnx(model=filename)
    if ret != 0:
        exit(f"Load model {filename} failed!")

    ret = rknn.build(do_quantization=False)
    if ret != 0:
        exit(f"Build model {filename} failed!")

    return rknn


class RKNNModel:
    def __init__(
        self,
        model: str,
        target_platform: str,
    ):
        meta = get_meta_data(model)
        print(meta)

        self.model = init_model(
            model,
            target_platform=target_platform,
            custom_string=meta,
        )

    def export_rknn(self, model):
        export_rknn(self.model, model)

    def release(self):
        self.model.release()


def main():
    args = get_parser().parse_args()
    print(vars(args))

    model = RKNNModel(
        model=args.in_model,
        target_platform=args.target_platform,
    )

    model.export_rknn(
        model=args.out_model,
    )

    model.release()


if __name__ == "__main__":
    main()


================================================
FILE: scripts/whisper/rknn/generate_decoder_data.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import glob
from dataclasses import dataclass
from pathlib import Path
from typing import List, Tuple

import numpy as np
import torch
import whisper

from export_onnx import AudioEncoderTensorCache, TextDecoderTensorCache, causal_mask_1d
from test_torch import compute_feat

# we need to transpose cross_kv to (1, 384, 1500) when using it as an input
# we need to transpose self_kv to (1, 384, 448) when using it as an input


def deepcopy_pair(pair):
    return [(a.clone(), b.clone()) for a, b in pair]


def to_file(tensor, filename, debug):
    if debug:
        print(filename, tensor.shape, tensor.dtype)
    tensor.numpy().tofile(filename)


@dataclass
class DecoderInput:
    tokens: torch.Tensor
    self_kv_pair: List[Tuple[torch.Tensor, torch.Tensor]]
    cross_kv_pair: List[Tuple[torch.Tensor, torch.Tensor]]
    offset: torch.Tensor
    mask: torch.Tensor

    def save_to_file(self, prefix, debug):
        ans = []
        to_file(self.tokens.to(torch.int32), f"{prefix}-tokens.raw", debug)
        ans.append(f"{prefix}-tokens.raw")

        for i, (k, v) in enumerate(self.self_kv_pair):
            to_file(k.permute(0, 2, 1), f"{prefix}-self_k_{i}.raw", debug)
            ans.append(f"{prefix}-self_k_{i}.raw")

            to_file(v.permute(0, 2, 1), f"{prefix}-self_v_{i}.raw", debug)
            ans.append(f"{prefix}-self_v_{i}.raw")

        for i, (k, v) in enumerate(self.cross_kv_pair):
            to_file(k.permute(0, 2, 1), f"{prefix}-cross_k_{i}.raw", debug)
            ans.append(f"{prefix}-cross_k_{i}.raw")

            to_file(v.permute(0, 2, 1), f"{prefix}-cross_v_{i}.raw", debug)
            ans.append(f"{prefix}-cross_v_{i}.raw")

        to_file(self.offset.to(torch.int32), f"{prefix}-offset.raw", debug)
        ans.append(f"{prefix}-offset.raw")

        to_file(self.mask.to(torch.int32), f"{prefix}-mask.raw", debug)
        ans.append(f"{prefix}-mask.raw")

        return ans


def process(model, tokenizer, w):
    mel = compute_feat(w)

    encoder = AudioEncoderTensorCache(model.encoder, model.decoder)
    cross_kv_pair = encoder(mel)

    # cross_kv_pair[0][0]: (1, 1500, 384)
    # cross_kv_pair[0][1]: (1, 1500, 384)

    ans = []

    decoder = TextDecoderTensorCache(model.decoder, model.dims.n_text_ctx)

    batch_size = 1
    self_kv_pair = []
    for i in range(model.dims.n_text_layer):
        k = torch.zeros(batch_size, model.dims.n_text_ctx, model.dims.n_text_state)
        v = torch.zeros(batch_size, model.dims.n_text_ctx, model.dims.n_text_state)

        self_kv_pair.append((k, v))
    # self_kv_pair[0][0]: (1, 448, 384)
    # self_kv_pair[0][1]: (1, 448, 384)

    offset = torch.zeros(1, dtype=torch.int64).to(mel.device)
    mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

    tokens = torch.tensor([[tokenizer.sot]])

    ans.append(
        DecoderInput(
            tokens=tokens.clone(),
            self_kv_pair=deepcopy_pair(self_kv_pair),
            cross_kv_pair=deepcopy_pair(cross_kv_pair),
            offset=offset.clone(),
            mask=mask.clone(),
        )
    )

    logits, this_self_kv_pair = decoder(
        tokens,
        self_kv_pair,
        cross_kv_pair,
        offset,
        mask,
    )
    for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
        k_cache[:, offset : offset + 1] = k
        v_cache[:, offset : offset + 1] = v

    offset += 1

    mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

    tokens = torch.tensor([[tokenizer.no_timestamps]])
    logits, this_self_kv_pair = decoder(
        tokens, self_kv_pair, cross_kv_pair, offset, mask
    )

    ans.append(
        DecoderInput(
            tokens=tokens.clone(),
            self_kv_pair=deepcopy_pair(self_kv_pair),
            cross_kv_pair=deepcopy_pair(cross_kv_pair),
            offset=offset.clone(),
            mask=mask.clone(),
        )
    )

    for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
        k_cache[:, offset : offset + 1] = k
        v_cache[:, offset : offset + 1] = v

    assert logits.shape == (1, tokens.shape[1], model.dims.n_vocab)

    print("logits.shape", logits.shape)  # (1, 3, 51864)
    idx = logits[0, -1].argmax().item()

    steps = 0
    results = []
    while idx != tokenizer.eot and steps < 50:
        results.append(idx)
        tokens = torch.tensor([[results[-1]]])

        offset += 1
        mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

        logits, this_self_kv_pair = decoder(
            tokens, self_kv_pair, cross_kv_pair, offset, mask
        )

        ans.append(
            DecoderInput(
                tokens=tokens.clone(),
                self_kv_pair=deepcopy_pair(self_kv_pair),
                cross_kv_pair=deepcopy_pair(cross_kv_pair),
                offset=offset.clone(),
                mask=mask.clone(),
            )
        )

        for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
            k_cache[:, offset : offset + 1] = k
            v_cache[:, offset : offset + 1] = v

        idx = logits[0, -1].argmax().item()
        steps += 1

    print(results)
    print(tokenizer.decode(results))
    return ans


@torch.no_grad()
def main():
    model = whisper.load_model("tiny.en")
    model.eval()
    tokenizer = whisper.tokenizer.get_tokenizer(
        model.is_multilingual, num_languages=model.num_languages
    )

    wav_files = glob.glob("*.wav")
    features_name = []
    for w in wav_files:
        decoder_input_list = process(model, tokenizer, w)
        print(len(decoder_input_list))

        name = Path(w).stem
        files = [
            d.save_to_file(f"{name}-decoder-iter-{k:02d}", k == 0)
            for k, d in enumerate(decoder_input_list)
        ]

        features_name.extend(files)

    with open("decoder-input-list.txt", "w") as f:
        for line in features_name:
            line = " ".join(line)
            f.write(f"{line}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/whisper/rknn/generate_encoder_data.py
================================================
#!/usr/bin/env python3
# Copyright (c)  2025  Xiaomi Corporation

import glob
from pathlib import Path

import numpy as np

from test_torch import compute_feat


@torch.no_grad()
def main():
    wav_files = glob.glob("*.wav")
    features_name = []
    for w in wav_files:
        f = compute_feat(w)

        # Note: qnn expects (1, 3000, 80) as input
        f = f.permute(0, 2, 1)  # (1, 80, 3000) -> (1, 3000, 80)

        f = f.numpy()
        print(w, f.shape)
        name = Path(w).stem

        s = f"encoder-input-{name}.raw"
        f.tofile(s)
        features_name.append(s)

    with open("encoder-input-list.txt", "w") as f:
        for line in features_name:
            f.write(f"{line}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/whisper/rknn/notes.md
================================================
# Note

## Encoder
```
=========./tiny.en-encoder.onnx==========
NodeArg(name='tiny.en-mel', type='tensor(float)', shape=[1, 80, 3000])
-----
NodeArg(name='cross_k_0', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_v_0', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_k_1', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_v_1', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_k_2', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_v_2', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_k_3', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_v_3', type='tensor(float)', shape=[1, 1500, 384])
```

## Decoder

```
=========./tiny.en-decoder.onnx==========
NodeArg(name='tiny.en-tokens', type='tensor(int32)', shape=[1, 1])
NodeArg(name='tiny.en-self_k_0', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_v_0', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_k_1', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_v_1', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_k_2', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_v_2', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_k_3', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_v_3', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-cross_k_0', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_v_0', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_k_1', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_v_1', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_k_2', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_v_2', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_k_3', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_v_3', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-offset', type='tensor(int64)', shape=[1])
NodeArg(name='tiny.en-mask', type='tensor(float)', shape=[448])
-----
NodeArg(name='tiny.en-logits', type='tensor(float)', shape=['Casttiny.en-logits_dim_0', 'Casttiny.en-logits_dim_1', 51864])
NodeArg(name='tiny.en-this_self_k_0', type='tensor(float)', shape=[1, 'MatMultiny.en-this_self_k_0_dim_1', 384])
NodeArg(name='tiny.en-this_self_v_0', type='tensor(float)', shape=[1, 'MatMultiny.en-this_self_k_0_dim_1', 384])
NodeArg(name='tiny.en-this_self_k_1', type='tensor(float)', shape=['MatMultiny.en-this_self_k_1_dim_0', 'MatMultiny.en-this_self_k_1_dim_1', 384])
NodeArg(name='tiny.en-this_self_v_1', type='tensor(float)', shape=['MatMultiny.en-this_self_k_1_dim_0', 'MatMultiny.en-this_self_k_1_dim_1', 384])
NodeArg(name='tiny.en-this_self_k_2', type='tensor(float)', shape=['MatMultiny.en-this_self_k_2_dim_0', 'MatMultiny.en-this_self_k_2_dim_1', 384])
NodeArg(name='tiny.en-this_self_v_2', type='tensor(float)', shape=['MatMultiny.en-this_self_k_2_dim_0', 'MatMultiny.en-this_self_k_2_dim_1', 384])
NodeArg(name='tiny.en-this_self_k_3', type='tensor(float)', shape=['MatMultiny.en-this_self_k_3_dim_0', 'MatMultiny.en-this_self_k_3_dim_1', 384])
NodeArg(name='tiny.en-this_self_v_3', type='tensor(float)', shape=['MatMultiny.en-this_self_k_3_dim_0', 'MatMultiny.en-this_self_k_3_dim_1', 384])
```


================================================
FILE: scripts/whisper/rknn/test_on_rk3588_board.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

"""
usage:

./test_on_rk3588_board.py  --encoder ./base-encoder.rknn --decoder ./base-decoder.rknn --tokens ./base-tokens.txt --wav ./en-16k.wav

./test_on_rk3588_board.py  --encoder ./base.en-encoder.rknn --decoder ./base.en-decoder.rknn --tokens ./base.en-tokens.txt --wav ./en-16k.wav
"""

try:
    from rknnlite.api import RKNNLite
except:
    print("Please run this file on your board (linux + aarch64 + npu)")
    print("You need to install rknn_toolkit_lite2")
    print(
        " from https://github.com/airockchip/rknn-toolkit2/tree/master/rknn-toolkit-lite2/packages"
    )
    print(
        "https://github.com/airockchip/rknn-toolkit2/blob/v2.1.0/rknn-toolkit-lite2/packages/rknn_toolkit_lite2-2.1.0-cp310-cp310-linux_aarch64.whl"
    )
    print("is known to work")
    raise

import argparse
import base64
import time
from pathlib import Path
from typing import List, Tuple

import kaldi_native_fbank as knf
import numpy as np
import soundfile as sf
import torch


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--encoder",
        type=str,
        required=True,
        help="Path to the encoder",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        required=True,
        help="Path to the decoder",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to the tokens",
    )

    parser.add_argument(
        "--wav",
        type=str,
        required=True,
        help="Path to the test wav",
    )

    return parser.parse_args()


def causal_mask_1d(n: int, L: int):
    """
    Returns a 1-D int mask of shape (L,) with:
      0 -> allowed
      1 -> masked (will be converted to -inf later)
    """
    mask = np.ones((L,), dtype=np.int32)
    if n > 0:
        mask[:n] = 0
    return mask


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel

    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_features(samples: np.ndarray, dim: int = 80) -> np.ndarray:
    """
    Returns:
      Return a 1-D float32 tensor of shape (1, 80, 3000) containing the features.
    """
    features = []
    opts = knf.WhisperFeatureOptions()
    opts.dim = dim
    online_whisper_fbank = knf.OnlineWhisperFbank(opts)
    online_whisper_fbank.accept_waveform(16000, samples)
    online_whisper_fbank.input_finished()
    for i in range(online_whisper_fbank.num_frames_ready):
        f = online_whisper_fbank.get_frame(i)
        f = torch.from_numpy(f)
        features.append(f)

    features = torch.stack(features)

    log_spec = torch.clamp(features, min=1e-10).log10()
    log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
    mel = (log_spec + 4.0) / 4.0
    # mel (T, 80)

    # We pad 1500 frames at the end so that it is able to detect eot
    # You can use another value instead of 1500.
    mel = torch.nn.functional.pad(mel, (0, 0, 0, 1500), "constant", 0)
    # Note that if it throws for a multilingual model,
    # please use a larger value, say 300

    target = 3000
    if mel.shape[0] > target:
        # -50 so that there are some zero tail paddings.
        mel = mel[: target - 50]
        mel = torch.nn.functional.pad(mel, (0, 0, 0, 50), "constant", 0)
    elif mel.shape[0] < target:
        mel = torch.nn.functional.pad(
            mel, (0, 0, 0, target - mel.shape[0]), "constant", 0
        )

    mel = mel.t().unsqueeze(0)

    return mel


def load_tokens(filename):
    tokens = dict()
    with open(filename, "r") as f:
        for line in f:
            t, i = line.split()
            tokens[int(i)] = t
    return tokens


def init_model(filename, target_platform="rk3588"):

    if not Path(filename).is_file():
        exit(f"{filename} does not exist")

    rknn_lite = RKNNLite(verbose=False)
    ret = rknn_lite.load_rknn(path=filename)
    if ret != 0:
        exit(f"Load model {filename} failed!")

    ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
    if ret != 0:
        exit(f"Failed to init rknn runtime for {filename}")
    return rknn_lite


class RKNNModel:
    def __init__(
        self,
        encoder: str,
        decoder: str,
        sot_sequence: List[int],
        eot: int,
        n_text_layer: int,
        n_text_ctx: int,
        n_text_state: int,
        target_platform="rk3588",
    ):
        self.sot_sequence = sot_sequence
        self.eot = eot
        self.n_text_layer = n_text_layer
        self.n_text_ctx = n_text_ctx
        self.n_text_state = n_text_state

        print("sot_sequence", self.sot_sequence)
        print("eot", self.eot)

        self.encoder = init_model(encoder)
        self.decoder = init_model(decoder)

    def release(self):
        self.encoder.release()
        self.decoder.release()

    def run_encoder(self, x: np.ndarray):
        """
        Args:
          x: (1, 80, 3000), np.float32
        Returns:
          cross_kv:
           - (k, v) for layer 0
           - (k, v) for layer 1
           - (k, v) for layer 2
           - (k, v) for layer 3
        """
        out = self.encoder.inference(inputs=[x.numpy()])
        return out

    def get_self_cache(self) -> List[np.ndarray]:
        self_cache = []
        batch_size = 1
        for i in range(self.n_text_layer):
            k = np.zeros(
                (batch_size, self.n_text_ctx, self.n_text_state), dtype=np.float32
            )
            v = np.zeros(
                (batch_size, self.n_text_ctx, self.n_text_state), dtype=np.float32
            )
            self_cache.extend([k, v])
        return self_cache

    def run_decoder(self, tokens: np.ndarray, self_kv, cross_kv, offset, mask):
        """
        Args:
          tokens: (1, 1), np.int32
          offset: (1,), np.int32
          mask: (model.n_text_ctx,), np.int32
        Returns:
          logit: (1, 1, vocab_size)
          this_self_kv
        """
        return self.decoder.inference(
            inputs=[tokens] + self_kv + cross_kv + [offset, mask]
        )


def main():
    args = get_args()
    print(vars(args))

    id2token = load_tokens(args.tokens)

    if ".en" in args.encoder:
        sot_sequence = [50257, 50362]
        eot = 50256
    else:
        sot_sequence = [50258, 50259, 50359, 50363]
        eot = 50257

    if "tiny" in args.encoder:
        n_text_layer = 4
        n_text_ctx = 448
        n_text_state = 384
    elif "base" in args.encoder:
        n_text_layer = 6
        n_text_ctx = 448
        n_text_state = 512
    elif "small" in args.encoder:
        n_text_layer = 12
        n_text_ctx = 448
        n_text_state = 768
    elif "medium" in args.encoder:
        n_text_layer = 24
        n_text_ctx = 448
        n_text_state = 1024
    else:
        assert False, f"Unsupported encoder {args.encoder}"

    model = RKNNModel(
        encoder=args.encoder,
        decoder=args.decoder,
        sot_sequence=sot_sequence,
        eot=eot,
        n_text_layer=n_text_layer,
        n_text_ctx=n_text_ctx,
        n_text_state=n_text_state,
    )

    for i in range(1):
        test(model, id2token)


def test(model, id2token):

    start = time.time()
    samples, sample_rate = load_audio("./en-16k.wav")
    assert sample_rate == 16000, sample_rate

    features = compute_features(samples)
    print(features.shape)
    cross_kv = model.run_encoder(features)

    self_kv = model.get_self_cache()

    offset = np.array([0], dtype=np.int32)
    for t in model.sot_sequence:
        token = np.array([[t]], dtype=np.int32)  # sot
        mask = causal_mask_1d(offset.item(), model.n_text_ctx)

        out = model.run_decoder(
            tokens=token, self_kv=self_kv, cross_kv=cross_kv, offset=offset, mask=mask
        )

        for i in range(1, len(out)):
            self_kv[i - 1][:, offset.item() : offset.item() + 1, :] = out[i]

        offset += 1

    idx = out[0][0, 0].argmax()

    eot = model.eot

    ans = []

    while idx != eot and offset.item() < 100:
        ans.append(idx)
        token = np.array([[idx]], dtype=np.int32)

        mask = causal_mask_1d(offset.item(), model.n_text_ctx)

        out = model.run_decoder(
            tokens=token, self_kv=self_kv, cross_kv=cross_kv, offset=offset, mask=mask
        )

        for i in range(1, len(out)):
            self_kv[i - 1][:, offset.item() : offset.item() + 1, :] = out[i]

        offset += 1
        idx = out[0][0, 0].argmax()

    print(ans)

    s = b""
    for i in ans:
        if i in id2token:
            s += base64.b64decode(id2token[i])

    print(s.decode().strip())


if __name__ == "__main__":
    main()


================================================
FILE: scripts/whisper/rknn/test_onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import List, Tuple

import numpy as np
import onnxruntime as ort
import torch
import whisper

from test_torch import compute_feat
from export_onnx import causal_mask_1d, get_args


class OnnxModel:
    def __init__(
        self,
        encoder: str,
        decoder: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 4

        self.session_opts = session_opts

        self.init_encoder(encoder)
        self.init_decoder(decoder)

    def init_encoder(self, encoder: str):
        self.encoder = ort.InferenceSession(
            encoder,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        self.encoder_input_names = []
        self.encoder_output_names = []

        print(f"-----{encoder}-----")
        print(f"----input----")
        for i in self.encoder.get_inputs():
            print(i)
            self.encoder_input_names.append(i.name)

        print("-----output-----")

        for i in self.encoder.get_outputs():
            print(i)
            self.encoder_output_names.append(i.name)

        meta = self.encoder.get_modelmeta().custom_metadata_map
        self.n_text_layer = int(meta["n_text_layer"])
        self.n_text_ctx = int(meta["n_text_ctx"])
        self.n_text_state = int(meta["n_text_state"])

    def init_decoder(self, decoder: str):
        self.decoder = ort.InferenceSession(
            decoder,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        self.decoder_input_names = []
        self.decoder_output_names = []

        print(f"-----{decoder}-----")
        print(f"----input----")
        for i in self.decoder.get_inputs():
            print(i)
            self.decoder_input_names.append(i.name)

        print("-----output-----")

        for i in self.decoder.get_outputs():
            print(i)
            self.decoder_output_names.append(i.name)

    def run_encoder(
        self,
        mel: np.ndarray,
    ) -> List[np.ndarray]:
        cross_kv = self.encoder.run(
            self.encoder_output_names,
            {
                self.encoder.get_inputs()[0].name: mel,
            },
        )
        return cross_kv

    def run_decoder(self, inputs: List[np.ndarray]) -> List[np.ndarray]:
        feed = {
            self.decoder.get_inputs()[i].name: inputs[i] for i in range(len(inputs))
        }

        out = self.decoder.run(
            self.decoder_output_names,
            feed,
        )
        return out

    def get_self_cache(self) -> List[np.ndarray]:
        self_cache = []
        batch_size = 1
        for i in range(self.n_text_layer):
            k = np.zeros(
                (batch_size, self.n_text_ctx, self.n_text_state), dtype=np.float32
            )
            v = np.zeros(
                (batch_size, self.n_text_ctx, self.n_text_state), dtype=np.float32
            )
            self_cache.extend([k, v])
        return self_cache


def main():
    args = get_args()
    print(vars(args))

    torch_model = whisper.load_model(args.model)
    tokenizer = whisper.tokenizer.get_tokenizer(
        torch_model.is_multilingual, num_languages=torch_model.num_languages
    )

    mel = compute_feat("./en-16k.wav").numpy()
    print(mel.shape)  # (1, 80. 3000)
    model = OnnxModel(f"./{args.model}-encoder.onnx", f"./{args.model}-decoder.onnx")

    sot_sequence = list(tokenizer.sot_sequence) + [tokenizer.no_timestamps]

    # tiny.en: [50257, 50362]
    # tiny: [50258, 50259, 50359, 50363]
    print("sot sequence", sot_sequence)

    cross_kv = model.run_encoder(mel)
    print(len(cross_kv))  # 8

    self_kv = model.get_self_cache()

    # tiny.en: 50256
    # tiny: 50257
    eot = tokenizer.eot
    print("eot", eot)

    offset = np.array([0], dtype=np.int32)
    for t in sot_sequence:
        token = np.array([[t]], dtype=np.int32)  # sot
        mask = causal_mask_1d(offset.item(), model.n_text_ctx).numpy()

        out = model.run_decoder([token] + self_kv + cross_kv + [offset, mask])

        for i in range(1, len(out)):
            self_kv[i - 1][:, offset.item() : offset.item() + 1, :] = out[i]

        offset += 1

    idx = out[0][0, 0].argmax()

    ans = []

    while idx != eot and offset.item() < 200:
        ans.append(idx)
        token = np.array([[idx]], dtype=np.int32)  # no_timestamps
        for i in range(1, len(out)):
            self_kv[i - 1][:, offset.item() : offset.item() + 1, :] = out[i]

        mask = causal_mask_1d(offset.item(), model.n_text_ctx).numpy()

        out = model.run_decoder([token] + self_kv + cross_kv + [offset, mask])
        idx = out[0][0, 0].argmax()

        offset += 1

    print(ans)
    text = "".join(tokenizer.decode(ans)).strip()
    print(text)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/whisper/rknn/test_qnn.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import Tuple

import numpy as np
import soundfile as sf
import torch
import whisper

from export_onnx import AudioEncoderTensorCache, TextDecoderTensorCache, causal_mask_1d
from test_torch import compute_feat


@torch.no_grad()
def main():
    mel = compute_feat("en.wav")

    model = whisper.load_model("tiny.en")
    tokenizer = whisper.tokenizer.get_tokenizer(
        model.is_multilingual, num_languages=model.num_languages
    )

    model.eval()

    cross_kv_pair = []
    for i in range(4):
        k = features = np.fromfile(f"./cross_k_{i}.raw", dtype=np.float32).reshape(
            1, 1500, 384
        )
        v = features = np.fromfile(f"./cross_v_{i}.raw", dtype=np.float32).reshape(
            1, 1500, 384
        )

        k = torch.from_numpy(k)
        v = torch.from_numpy(v)

        cross_kv_pair.append((k, v))

    n_audio = mel.shape[0]

    decoder = TextDecoderTensorCache(model.decoder, model.dims.n_text_ctx)

    self_kv_pair = []
    for i in range(model.dims.n_text_layer):
        k = torch.zeros(n_audio, model.dims.n_text_ctx, model.dims.n_text_state)
        v = torch.zeros(n_audio, model.dims.n_text_ctx, model.dims.n_text_state)
        self_kv_pair.append((k, v))

    offset = torch.zeros(1, dtype=torch.int64).to(mel.device)

    mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

    tokens = torch.tensor([[tokenizer.sot]])
    logits, this_self_kv_pair = decoder(
        tokens,
        self_kv_pair,
        cross_kv_pair,
        offset,
        mask,
    )
    for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
        k_cache[:, offset : offset + 1] = k
        v_cache[:, offset : offset + 1] = v

    offset += 1

    mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

    tokens = torch.tensor([[tokenizer.no_timestamps]])
    logits, this_self_kv_pair = decoder(
        tokens, self_kv_pair, cross_kv_pair, offset, mask
    )

    for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
        k_cache[:, offset : offset + 1] = k
        v_cache[:, offset : offset + 1] = v

    assert logits.shape == (n_audio, tokens.shape[1], model.dims.n_vocab)

    print("logits.shape", logits.shape)  # (1, 3, 51864)
    idx = logits[0, -1].argmax().item()

    steps = 0
    results = []
    while idx != tokenizer.eot and steps < 50:
        results.append(idx)
        tokens = torch.tensor([[results[-1]]])

        offset += 1
        mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

        logits, this_self_kv_pair = decoder(
            tokens, self_kv_pair, cross_kv_pair, offset, mask
        )

        for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
            k_cache[:, offset : offset + 1] = k
            v_cache[:, offset : offset + 1] = v

        idx = logits[0, -1].argmax().item()
        steps += 1

    print(results)
    print(tokenizer.decode(results))


if __name__ == "__main__":
    torch.set_num_threads(1)
    torch.set_num_interop_threads(1)
    # To fix
    # TypeError: scaled_dot_product_attention(): argument 'is_causal' must be bool, not Tensor
    # See also https://github.com/k2-fsa/sherpa-onnx/issues/1764
    from whisper.model import disable_sdpa

    with disable_sdpa():
        main()


================================================
FILE: scripts/whisper/rknn/test_torch.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import Tuple

import numpy as np
import soundfile as sf
import torch
import whisper

from export_onnx import (
    AudioEncoderTensorCache,
    TextDecoderTensorCache,
    causal_mask_1d,
    get_args,
)


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_feat(filename: str):
    wave, sample_rate = load_audio(filename)
    if sample_rate != 16000:
        import librosa

        wave = librosa.resample(wave, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    audio = whisper.pad_or_trim(wave)
    assert audio.shape == (16000 * 30,), audio.shape

    mel = whisper.log_mel_spectrogram(audio, n_mels=80).unsqueeze(0)
    assert mel.shape == (1, 80, 3000), mel.shape

    return mel


@torch.no_grad()
def main():
    args = get_args()
    print(vars(args))
    mel = compute_feat("en.wav")

    model = whisper.load_model(args.model, device="cpu")
    tokenizer = whisper.tokenizer.get_tokenizer(
        model.is_multilingual, num_languages=model.num_languages
    )

    model.eval()

    encoder = AudioEncoderTensorCache(model.encoder, model.decoder)

    cross_kv_pair = encoder(mel)

    n_audio = mel.shape[0]

    decoder = TextDecoderTensorCache(model.decoder, model.dims.n_text_ctx)

    self_kv_pair = []
    for i in range(model.dims.n_text_layer):
        k = torch.zeros(n_audio, model.dims.n_text_ctx, model.dims.n_text_state)
        v = torch.zeros(n_audio, model.dims.n_text_ctx, model.dims.n_text_state)
        self_kv_pair.append((k, v))

    offset = torch.zeros(1, dtype=torch.int64).to(mel.device)

    mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

    tokens = torch.tensor([[tokenizer.sot]])
    logits, this_self_kv_pair = decoder(
        tokens,
        self_kv_pair,
        cross_kv_pair,
        offset,
        mask,
    )
    for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
        k_cache[:, offset : offset + 1] = k
        v_cache[:, offset : offset + 1] = v

    offset += 1

    mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

    tokens = torch.tensor([[tokenizer.no_timestamps]])
    logits, this_self_kv_pair = decoder(
        tokens, self_kv_pair, cross_kv_pair, offset, mask
    )

    for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
        k_cache[:, offset : offset + 1] = k
        v_cache[:, offset : offset + 1] = v

    assert logits.shape == (n_audio, tokens.shape[1], model.dims.n_vocab)

    print("logits.shape", logits.shape)  # (1, 3, 51864)
    idx = logits[0, -1].argmax().item()

    steps = 0
    results = []
    while idx != tokenizer.eot and steps < 50:
        results.append(idx)
        tokens = torch.tensor([[results[-1]]])

        offset += 1
        mask = causal_mask_1d(offset.item(), model.dims.n_text_ctx)

        logits, this_self_kv_pair = decoder(
            tokens, self_kv_pair, cross_kv_pair, offset, mask
        )

        for (k_cache, v_cache), (k, v) in zip(self_kv_pair, this_self_kv_pair):
            k_cache[:, offset : offset + 1] = k
            v_cache[:, offset : offset + 1] = v

        idx = logits[0, -1].argmax().item()
        steps += 1

    print(results)
    print(tokenizer.decode(results))


if __name__ == "__main__":
    torch.set_num_threads(1)
    torch.set_num_interop_threads(1)
    try:
        # To fix
        # TypeError: scaled_dot_product_attention(): argument 'is_causal' must be bool, not Tensor
        # See also https://github.com/k2-fsa/sherpa-onnx/issues/1764
        from whisper.model import disable_sdpa

        with disable_sdpa():
            main()
    except:
        main()


================================================
FILE: scripts/whisper/rknn/tiny-en-onnx-info.md
================================================
# tiny.en encoder

```
----input----
NodeArg(name='tiny.en-mel', type='tensor(float)', shape=[1, 80, 3000])

-----output-----
NodeArg(name='cross_k_0', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_v_0', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_k_1', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_v_1', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_k_2', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_v_2', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_k_3', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='cross_v_3', type='tensor(float)', shape=[1, 1500, 384])
```

# tiny.en decoder

```
----input----
NodeArg(name='tiny.en-tokens', type='tensor(int32)', shape=[1, 1])
NodeArg(name='tiny.en-self_k_0', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_v_0', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_k_1', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_v_1', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_k_2', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_v_2', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_k_3', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-self_v_3', type='tensor(float)', shape=[1, 448, 384])
NodeArg(name='tiny.en-cross_k_0', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_v_0', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_k_1', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_v_1', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_k_2', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_v_2', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_k_3', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-cross_v_3', type='tensor(float)', shape=[1, 1500, 384])
NodeArg(name='tiny.en-offset', type='tensor(int32)', shape=[1])
NodeArg(name='tiny.en-mask', type='tensor(int32)', shape=[448])

-----output-----

NodeArg(name='tiny.en-logits', type='tensor(float)', shape=[1, 1, 51864])
NodeArg(name='tiny.en-this_self_k_0', type='tensor(float)', shape=[1, 1, 384])
NodeArg(name='tiny.en-this_self_v_0', type='tensor(float)', shape=[1, 1, 384])
NodeArg(name='tiny.en-this_self_k_1', type='tensor(float)', shape=[1, 1, 384])
NodeArg(name='tiny.en-this_self_v_1', type='tensor(float)', shape=[1, 1, 384])
NodeArg(name='tiny.en-this_self_k_2', type='tensor(float)', shape=[1, 1, 384])
NodeArg(name='tiny.en-this_self_v_2', type='tensor(float)', shape=[1, 1, 384])
NodeArg(name='tiny.en-this_self_k_3', type='tensor(float)', shape=[1, 1, 384])
NodeArg(name='tiny.en-this_self_v_3', type='tensor(float)', shape=[1, 1, 384])
```


================================================
FILE: scripts/whisper/test.py
================================================
#!/usr/bin/env python3
# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)
"""
Please first run ./export-onnx.py
before you run this script
"""
import argparse
import base64
from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf
import torch


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--encoder",
        type=str,
        required=True,
        help="Path to the encoder",
    )

    parser.add_argument(
        "--decoder",
        type=str,
        required=True,
        help="Path to the decoder",
    )

    parser.add_argument(
        "--tokens",
        type=str,
        required=True,
        help="Path to the tokens",
    )

    parser.add_argument(
        "--language",
        type=str,
        help="""The actual spoken language in the audio.
        Example values, en, de, zh, jp, fr.
        If None, we will detect the language using the first 30s of the
        input audio
        """,
    )

    parser.add_argument(
        "--task",
        choices=["transcribe", "translate"],
        type=str,
        default="transcribe",
        help="Valid values are: transcribe, translate",
    )

    parser.add_argument(
        "--test-attention",
        action="store_true",
        help="Test cross-attention outputs (requires attention-enabled model)",
    )

    parser.add_argument(
        "sound_file",
        type=str,
        help="Path to the test wave",
    )
    return parser.parse_args()


class OnnxModel:
    def __init__(
        self,
        encoder: str,
        decoder: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 4

        self.session_opts = session_opts

        self.init_encoder(encoder)
        self.init_decoder(decoder)

    def init_encoder(self, encoder: str):
        self.encoder = ort.InferenceSession(
            encoder,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

        meta = self.encoder.get_modelmeta().custom_metadata_map
        self.n_text_layer = int(meta["n_text_layer"])
        self.n_text_ctx = int(meta["n_text_ctx"])
        self.n_text_state = int(meta["n_text_state"])
        self.n_mels = int(meta["n_mels"])
        self.sot = int(meta["sot"])
        self.eot = int(meta["eot"])
        self.translate = int(meta["translate"])
        self.transcribe = int(meta["transcribe"])
        self.no_timestamps = int(meta["no_timestamps"])
        self.no_speech = int(meta["no_speech"])
        self.blank = int(meta["blank_id"])

        self.sot_sequence = list(map(int, meta["sot_sequence"].split(",")))
        self.sot_sequence.append(self.no_timestamps)

        self.all_language_tokens = list(
            map(int, meta["all_language_tokens"].split(","))
        )
        self.all_language_codes = meta["all_language_codes"].split(",")
        self.lang2id = dict(zip(self.all_language_codes, self.all_language_tokens))
        self.id2lang = dict(zip(self.all_language_tokens, self.all_language_codes))

        self.is_multilingual = int(meta["is_multilingual"]) == 1

    def init_decoder(self, decoder: str):
        self.decoder = ort.InferenceSession(
            decoder,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

    def run_encoder(
        self,
        mel: torch.Tensor,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        n_layer_cross_k, n_layer_cross_v = self.encoder.run(
            [
                self.encoder.get_outputs()[0].name,
                self.encoder.get_outputs()[1].name,
            ],
            {
                self.encoder.get_inputs()[0].name: mel.numpy(),
            },
        )
        return torch.from_numpy(n_layer_cross_k), torch.from_numpy(n_layer_cross_v)

    def run_decoder(
        self,
        tokens: torch.Tensor,
        n_layer_self_k_cache: torch.Tensor,
        n_layer_self_v_cache: torch.Tensor,
        n_layer_cross_k: torch.Tensor,
        n_layer_cross_v: torch.Tensor,
        offset: torch.Tensor,
        return_attention: bool = False,
    ):
        # Caller must verify decoder has 4 outputs before passing return_attention=True
        logits, out_n_layer_self_k_cache, out_n_layer_self_v_cache, *rest = self.decoder.run(
            [
                self.decoder.get_outputs()[0].name,
                self.decoder.get_outputs()[1].name,
                self.decoder.get_outputs()[2].name,
                *([self.decoder.get_outputs()[3].name] if return_attention else []),
            ],
            {
                self.decoder.get_inputs()[0].name: tokens.numpy(),
                self.decoder.get_inputs()[1].name: n_layer_self_k_cache.numpy(),
                self.decoder.get_inputs()[2].name: n_layer_self_v_cache.numpy(),
                self.decoder.get_inputs()[3].name: n_layer_cross_k.numpy(),
                self.decoder.get_inputs()[4].name: n_layer_cross_v.numpy(),
                self.decoder.get_inputs()[5].name: offset.numpy(),
            },
        )
        return (
            torch.from_numpy(logits),
            torch.from_numpy(out_n_layer_self_k_cache),
            torch.from_numpy(out_n_layer_self_v_cache),
            torch.from_numpy(rest[0]) if return_attention else None,
        )

    def get_self_cache(self) -> Tuple[torch.Tensor, torch.Tensor]:
        batch_size = 1
        n_layer_self_k_cache = torch.zeros(
            self.n_text_layer,
            batch_size,
            self.n_text_ctx,
            self.n_text_state,
        )
        n_layer_self_v_cache = torch.zeros(
            self.n_text_layer,
            batch_size,
            self.n_text_ctx,
            self.n_text_state,
        )
        return n_layer_self_k_cache, n_layer_self_v_cache

    def suppress_tokens(self, logits, is_initial: bool) -> None:
        # suppress blank
        if is_initial:
            logits[self.eot] = float("-inf")
            logits[self.blank] = float("-inf")

        # suppress <|notimestamps|>
        logits[self.no_timestamps] = float("-inf")

        logits[self.sot] = float("-inf")
        logits[self.no_speech] = float("-inf")

        # logits is changed in-place
        logits[self.translate] = float("-inf")

    def detect_language(
        self, n_layer_cross_k: torch.Tensor, n_layer_cross_v: torch.Tensor
    ) -> int:
        tokens = torch.tensor([[self.sot]], dtype=torch.int64)
        offset = torch.zeros(1, dtype=torch.int64)
        n_layer_self_k_cache, n_layer_self_v_cache = self.get_self_cache()

        logits, n_layer_self_k_cache, n_layer_self_v_cache, _ = self.run_decoder(
            tokens=tokens,
            n_layer_self_k_cache=n_layer_self_k_cache,
            n_layer_self_v_cache=n_layer_self_v_cache,
            n_layer_cross_k=n_layer_cross_k,
            n_layer_cross_v=n_layer_cross_v,
            offset=offset,
        )
        logits = logits.reshape(-1)
        mask = torch.ones(logits.shape[0], dtype=torch.int64)
        mask[self.all_language_tokens] = 0
        logits[mask != 0] = float("-inf")
        lang_id = logits.argmax().item()
        print("detected language: ", self.id2lang[lang_id])
        return lang_id


def load_tokens(filename):
    tokens = dict()
    with open(filename, "r") as f:
        for line in f:
            t, i = line.split()
            tokens[int(i)] = t
    return tokens


def verify_attention(attention_weights, n_audio_ctx, tokens, token_table):
    """Verify attention weights and print approximate timestamps."""
    if not attention_weights:
        print("No attention weights to verify")
        return

    n_heads = attention_weights[0].shape[1]
    print("\n--- Attention Verification ---")
    print(f"Alignment heads: {n_heads}, Audio frames: {n_audio_ctx}, Tokens: {len(tokens)}")

    for i, attn in enumerate(attention_weights):
        expected = (1, n_heads, 1, n_audio_ctx)
        if tuple(attn.shape) != expected:
            print(f"  Token {i}: expected shape {expected}, got {tuple(attn.shape)}")

    print("\n--- Approximate Timestamps ---")
    for i, attn in enumerate(attention_weights):
        peak_frame = attn.mean(dim=1).squeeze().argmax().item()
        timestamp = peak_frame * 0.02
        token_str = token_table.get(tokens[i], f"<{tokens[i]}>")
        try:
            token_display = base64.b64decode(token_str).decode()
        except Exception:
            token_display = token_str
        print(f"  Token {i} ({token_display!r}): ~{timestamp:.2f}s")


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_features(filename: str, dim: int = 80) -> torch.Tensor:
    """
    Args:
      filename:
        Path to an audio file.
    Returns:
      Return a 1-D float32 tensor of shape (1, 80, 3000) containing the features.
    """
    wave, sample_rate = load_audio(filename)
    if sample_rate != 16000:
        import librosa

        wave = librosa.resample(wave, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    features = []
    opts = knf.WhisperFeatureOptions()
    opts.dim = dim
    online_whisper_fbank = knf.OnlineWhisperFbank(opts)
    online_whisper_fbank.accept_waveform(16000, wave)
    online_whisper_fbank.input_finished()
    for i in range(online_whisper_fbank.num_frames_ready):
        f = online_whisper_fbank.get_frame(i)
        f = torch.from_numpy(f)
        features.append(f)

    features = torch.stack(features)

    log_spec = torch.clamp(features, min=1e-10).log10()
    log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
    mel = (log_spec + 4.0) / 4.0
    # mel (T, 80)

    # We pad 1500 frames at the end so that it is able to detect eot
    # You can use another value instead of 1500.
    mel = torch.nn.functional.pad(mel, (0, 0, 0, 1500), "constant", 0)
    # Note that if it throws for a multilingual model,
    # please use a larger value, say 300

    target = 3000
    if mel.shape[0] > target:
        # -50 so that there are some zero tail paddings.
        mel = mel[: target - 50]
        mel = torch.nn.functional.pad(mel, (0, 0, 0, 50), "constant", 0)

    # We don't need to pad it to 30 seconds now!
    #  mel = torch.nn.functional.pad(mel, (0, 0, 0, target - mel.shape[0]), "constant", 0)

    mel = mel.t().unsqueeze(0)

    return mel


def main():
    args = get_args()

    model = OnnxModel(args.encoder, args.decoder)

    if args.test_attention and len(model.decoder.get_outputs()) < 4:
        raise RuntimeError(
            "--test-attention requires a model with cross-attention outputs. "
            "Use export-onnx-with-attention.py to export a compatible model."
        )

    n_mels = model.n_mels

    mel = compute_features(args.sound_file, dim=n_mels)

    n_layer_cross_k, n_layer_cross_v = model.run_encoder(mel)

    if args.language is not None:
        if model.is_multilingual is False and args.language != "en":
            print(f"This model supports only English. Given: {args.language}")
            return

        if args.language not in model.lang2id:
            print(f"Invalid language: {args.language}")
            print(f"Valid values are: {list(model.lang2id.keys())}")
            return

        # [sot, lang, task, notimestamps]
        model.sot_sequence[1] = model.lang2id[args.language]
    elif model.is_multilingual is True:
        print("detecting language")
        lang = model.detect_language(n_layer_cross_k, n_layer_cross_v)
        model.sot_sequence[1] = lang

    if args.task is not None:
        if model.is_multilingual is False and args.task != "transcribe":
            print("This model supports only English. Please use --task=transcribe")
            return
        assert args.task in ["transcribe", "translate"], args.task

        if args.task == "translate":
            model.sot_sequence[2] = model.translate

    n_layer_self_k_cache, n_layer_self_v_cache = model.get_self_cache()

    print(model.sot_sequence)
    tokens = torch.tensor([model.sot_sequence], dtype=torch.int64)
    offset = torch.zeros(1, dtype=torch.int64)
    logits, n_layer_self_k_cache, n_layer_self_v_cache, _ = model.run_decoder(
        tokens=tokens,
        n_layer_self_k_cache=n_layer_self_k_cache,
        n_layer_self_v_cache=n_layer_self_v_cache,
        n_layer_cross_k=n_layer_cross_k,
        n_layer_cross_v=n_layer_cross_v,
        offset=offset,
    )
    offset += len(model.sot_sequence)
    # logits.shape (batch_size, tokens.shape[1], vocab_size)
    logits = logits[0, -1]
    model.suppress_tokens(logits, is_initial=True)
    #  logits = logits.softmax(dim=-1)
    # for greedy search, we don't need to compute softmax or log_softmax
    max_token_id = logits.argmax(dim=-1)
    results = []
    all_attention_weights = []
    for i in range(model.n_text_ctx):
        if max_token_id == model.eot:
            break
        results.append(max_token_id.item())
        tokens = torch.tensor([[results[-1]]])

        logits, n_layer_self_k_cache, n_layer_self_v_cache, attn = model.run_decoder(
            tokens=tokens,
            n_layer_self_k_cache=n_layer_self_k_cache,
            n_layer_self_v_cache=n_layer_self_v_cache,
            n_layer_cross_k=n_layer_cross_k,
            n_layer_cross_v=n_layer_cross_v,
            offset=offset,
            return_attention=args.test_attention,
        )
        if attn is not None:
            all_attention_weights.append(attn)
        offset += 1
        logits = logits[0, -1]
        model.suppress_tokens(logits, is_initial=False)
        max_token_id = logits.argmax(dim=-1)
    token_table = load_tokens(args.tokens)
    s = b""
    for i in results:
        if i in token_table:
            s += base64.b64decode(token_table[i])

    print(s.decode().strip())

    if args.test_attention:
        n_audio_ctx = n_layer_cross_k.shape[2]
        verify_attention(all_attention_weights, n_audio_ctx, results, token_table)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/whisper/test_torch.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)

import torch


from export_onnx import AudioEncoderTensorCache, TextDecoderTensorCache
from test import load_audio

import whisper


@torch.no_grad()
def main():
    wave, sample_rate = load_audio("en.wav")
    if sample_rate != 16000:
        import librosa

        wave = librosa.resample(wave, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    audio = whisper.pad_or_trim(wave)
    assert audio.shape == (16000 * 30,), audio.shape

    mel = whisper.log_mel_spectrogram(audio, n_mels=80).unsqueeze(0)
    assert mel.shape == (1, 80, 3000), mel.shape

    model = whisper.load_model("tiny.en")
    tokenizer = whisper.tokenizer.get_tokenizer(
        model.is_multilingual, num_languages=model.num_languages
    )

    model.eval()

    encoder = AudioEncoderTensorCache(model.encoder, model.decoder)

    n_layer_cross_k, n_layer_cross_v = encoder(mel)
    print("n_layer_cross_k", n_layer_cross_k.shape)  # (4, 1, 1500, 384)
    print("n_layer_cross_v", n_layer_cross_v.shape)  # (4, 1, 1500, 384)

    n_audio = mel.shape[0]
    tokens = torch.tensor([[tokenizer.sot, tokenizer.sot, tokenizer.sot]] * n_audio).to(
        mel.device
    )  # [n_audio, 3]

    decoder = TextDecoderTensorCache(model.decoder, model.dims.n_text_ctx)

    n_layer_self_k_cache = torch.zeros(
        (
            len(model.decoder.blocks),
            n_audio,
            model.dims.n_text_ctx,
            model.dims.n_text_state,
        ),
        device=mel.device,
    )
    n_layer_self_v_cache = torch.zeros(
        (
            len(model.decoder.blocks),
            n_audio,
            model.dims.n_text_ctx,
            model.dims.n_text_state,
        ),
        device=mel.device,
    )
    offset = torch.zeros(1, dtype=torch.int64).to(mel.device)
    logits, n_layer_self_k_cache, n_layer_self_v_cache = decoder(
        tokens,
        n_layer_self_k_cache,
        n_layer_self_v_cache,
        n_layer_cross_k,
        n_layer_cross_v,
        offset,
    )
    assert logits.shape == (n_audio, tokens.shape[1], model.dims.n_vocab)
    assert n_layer_self_k_cache.shape == (
        model.dims.n_text_layer,
        n_audio,
        model.dims.n_text_ctx,
        model.dims.n_text_state,
    )
    assert n_layer_self_v_cache.shape == (
        model.dims.n_text_layer,
        n_audio,
        model.dims.n_text_ctx,
        model.dims.n_text_state,
    )

    offset = torch.zeros(1, dtype=torch.int64).to(mel.device)

    offset += len(tokenizer.sot_sequence)
    print("logits.shape", logits.shape)  # (1, 3, 51864)
    idx = logits[0, -1].argmax().item()

    steps = 0
    results = []
    while idx != tokenizer.eot and steps < 50:
        results.append(idx)
        tokens = torch.tensor([[results[-1]]])
        offset += 1

        logits, n_layer_self_k_cache, n_layer_self_v_cache = decoder(
            tokens,
            n_layer_self_k_cache,
            n_layer_self_v_cache,
            n_layer_cross_k,
            n_layer_cross_v,
            offset,
        )
        idx = logits[0, -1].argmax().item()
        print("idx", idx, "step", steps)
        steps += 1

    print(results)
    print(tokenizer.decode(results))


if __name__ == "__main__":
    torch.set_num_threads(1)
    torch.set_num_interop_threads(1)
    # To fix
    # TypeError: scaled_dot_product_attention(): argument 'is_causal' must be bool, not Tensor
    # See also https://github.com/k2-fsa/sherpa-onnx/issues/1764
    from whisper.model import disable_sdpa

    with disable_sdpa():
        main()


================================================
FILE: scripts/whisper/tools/timestamp_viewer.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Whisper Timestamp Viewer</title>
    <style>
        * {
            box-sizing: border-box;
        }
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            max-width: 1400px;
            margin: 0 auto;
            padding: 20px;
            background: #f5f5f5;
        }
        h1 {
            text-align: center;
            color: #333;
        }
        .controls {
            background: white;
            padding: 20px;
            border-radius: 8px;
            margin-bottom: 20px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        .controls label {
            display: block;
            margin-bottom: 10px;
            font-weight: 600;
        }
        .controls input[type="file"] {
            margin-bottom: 15px;
        }
        .player-container {
            background: white;
            padding: 20px;
            border-radius: 8px;
            margin-bottom: 20px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
            position: sticky;
            top: 0;
            z-index: 100;
        }
        audio {
            width: 100%;
        }
        .current-time {
            text-align: center;
            font-size: 24px;
            font-weight: bold;
            color: #333;
            margin-top: 10px;
            font-family: monospace;
        }
        .columns-container {
            display: flex;
            gap: 20px;
            overflow-x: auto;
        }
        .column {
            flex: 1;
            min-width: 250px;
            background: white;
            border-radius: 8px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
            overflow: hidden;
        }
        .column-header {
            background: #4a90d9;
            color: white;
            padding: 10px 15px;
            font-weight: 600;
            position: sticky;
            top: 0;
            z-index: 10;
        }
        .column-content {
        }
        .token-row {
            display: flex;
            padding: 6px 12px;
            border-bottom: 1px solid #eee;
            font-size: 14px;
            transition: background-color 0.1s;
            cursor: pointer;
        }
        .token-row:hover {
            background: #f0f0f0;
        }
        .token-row:active {
            background: #e0e0e0;
        }
        .token-row:focus {
            outline: 2px solid #4a90d9;
            outline-offset: -2px;
            background: #e8f0fa;
        }
        .token-row.active {
            background: #fff3cd;
        }
        .token-row.past {
            background: #e8f5e9;
        }
        .token-text {
            flex: 1;
            font-family: monospace;
            white-space: pre;
        }
        .token-time {
            color: #666;
            font-family: monospace;
            font-size: 12px;
            margin-left: 10px;
        }
        .instructions {
            background: #e3f2fd;
            padding: 15px;
            border-radius: 8px;
            margin-bottom: 20px;
            color: #1565c0;
        }
        .csv-list {
            margin-top: 10px;
        }
        .csv-item {
            display: inline-flex;
            align-items: center;
            background: #e0e0e0;
            padding: 5px 10px;
            border-radius: 4px;
            margin: 5px 5px 5px 0;
        }
        .csv-item button {
            background: none;
            border: none;
            color: #666;
            cursor: pointer;
            margin-left: 8px;
            font-size: 16px;
        }
        .csv-item button:hover {
            color: #c00;
        }
    </style>
</head>
<body>
    <h1>Whisper Timestamp Viewer</h1>

    <div class="instructions">
        <strong>Instructions:</strong> Upload a WAV file and one or more CSV files containing token timestamps.
        CSV files should have columns: token, timestamp, duration.
        <br><br>
        <strong>Click</strong> a token to play just that token's audio.
        <strong>Shift+click</strong> to play continuously from that point.
    </div>

    <div class="controls">
        <label>
            Audio File (WAV):
            <input type="file" id="audioInput" accept=".wav,.mp3,.ogg,.m4a">
        </label>

        <label>
            CSV Files (can select multiple):
            <input type="file" id="csvInput" accept=".csv" multiple>
        </label>

        <div class="csv-list" id="csvList"></div>
    </div>

    <div class="player-container" id="playerContainer" style="display: none;">
        <audio id="audioPlayer" controls></audio>
        <div class="current-time" id="currentTime">0.000s</div>
    </div>

    <div class="columns-container" id="columnsContainer"></div>

    <script>
        const audioInput = document.getElementById('audioInput');
        const csvInput = document.getElementById('csvInput');
        const csvList = document.getElementById('csvList');
        const playerContainer = document.getElementById('playerContainer');
        const audioPlayer = document.getElementById('audioPlayer');
        const currentTimeDisplay = document.getElementById('currentTime');
        const columnsContainer = document.getElementById('columnsContainer');

        let csvData = [];  // Array of {name, tokens: [{token, timestamp, duration}]}

        // Handle audio file
        audioInput.addEventListener('change', (e) => {
            const file = e.target.files[0];
            if (file) {
                // Revoke previous URL if exists
                if (audioPlayer.src.startsWith('blob:')) {
                    URL.revokeObjectURL(audioPlayer.src);
                }
                const url = URL.createObjectURL(file);
                audioPlayer.src = url;
                playerContainer.style.display = 'block';
            }
        });

        // Handle CSV files
        csvInput.addEventListener('change', async (e) => {
            const files = Array.from(e.target.files);
            for (const file of files) {
                const text = await file.text();
                const tokens = parseCSV(text);
                csvData.push({name: file.name, tokens});
            }
            updateCsvList();
            renderColumns();
        });

        function parseCSV(text) {
            const lines = text.trim().split('\n');
            const tokens = [];

            // Skip header
            for (let i = 1; i < lines.length; i++) {
                const line = lines[i];
                // Handle CSV properly (quoted strings may contain commas)
                let token, rest;

                if (line.startsWith('"')) {
                    // Token is quoted
                    const endQuote = line.indexOf('",', 1);
                    if (endQuote !== -1) {
                        token = line.substring(1, endQuote);
                        rest = line.substring(endQuote + 2);
                    } else {
                        continue;
                    }
                } else {
                    // Token is not quoted
                    const firstComma = line.indexOf(',');
                    if (firstComma !== -1) {
                        token = line.substring(0, firstComma);
                        rest = line.substring(firstComma + 1);
                    } else {
                        continue;
                    }
                }

                const [timestamp, duration] = rest.split(',').map(s => parseFloat(s.trim()));
                if (!isNaN(timestamp) && !isNaN(duration)) {
                    tokens.push({token, timestamp, duration});
                }
            }

            return tokens;
        }

        function updateCsvList() {
            csvList.innerHTML = csvData.map((csv, i) => `
                <span class="csv-item">
                    ${escapeHtml(csv.name)}
                    <button onclick="removeCSV(${i})">&times;</button>
                </span>
            `).join('');
        }

        function removeCSV(index) {
            csvData.splice(index, 1);
            updateCsvList();
            renderColumns();
        }

        function escapeHtml(text) {
            const div = document.createElement('div');
            div.textContent = text;
            return div.innerHTML;
        }

        // Play a segment from start to end time, then pause and seek back to start
        let segmentStartTime = null;
        let segmentEndTime = null;
        let isPlayingSegment = false;

        function playSegment(start, end) {
            isPlayingSegment = true;
            segmentStartTime = start;
            segmentEndTime = end;
            audioPlayer.currentTime = start;
            audioPlayer.play();
        }

        function checkSegmentEnd() {
            if (segmentEndTime !== null && audioPlayer.currentTime >= segmentEndTime) {
                audioPlayer.pause();
                const seekBackTo = segmentStartTime;
                segmentStartTime = null;
                segmentEndTime = null;
                if (seekBackTo !== null) {
                    audioPlayer.currentTime = seekBackTo;
                }
            }
        }

        function renderColumns() {
            columnsContainer.innerHTML = csvData.map((csv, colIndex) => `
                <div class="column">
                    <div class="column-header">${escapeHtml(csv.name)}</div>
                    <div class="column-content" id="column-${colIndex}">
                        ${csv.tokens.map((t, i) => `
                            <div class="token-row" data-col="${colIndex}" data-idx="${i}"
                                 data-start="${t.timestamp}" data-end="${t.timestamp + t.duration}"
                                 tabindex="0" role="button">
                                <span class="token-text">${escapeHtml(t.token)}</span>
                                <span class="token-time">${t.timestamp.toFixed(2)}s</span>
                            </div>
                        `).join('')}
                    </div>
                </div>
            `).join('');

            // Add click and keyboard handlers to play just that token's timespan
            // Shift+click plays continuously from that point
            document.querySelectorAll('.token-row').forEach(row => {
                function activateRow(e) {
                    const start = parseFloat(row.dataset.start);
                    const end = parseFloat(row.dataset.end);
                    if (e.shiftKey) {
                        // Shift+click: play continuously from this point
                        segmentEndTime = null;
                        audioPlayer.currentTime = start;
                        audioPlayer.play();
                    } else {
                        // Regular click: play just this token
                        playSegment(start, end);
                    }
                }

                row.addEventListener('click', activateRow);
                row.addEventListener('keydown', (e) => {
                    if (e.key === 'Enter' || e.key === ' ') {
                        e.preventDefault();
                        activateRow(e);
                    }
                });
            });
        }

        // Update highlighting during playback
        audioPlayer.addEventListener('timeupdate', () => {
            const currentTime = audioPlayer.currentTime;
            currentTimeDisplay.textContent = currentTime.toFixed(3) + 's';

            document.querySelectorAll('.token-row').forEach(row => {
                const start = parseFloat(row.dataset.start);
                const end = parseFloat(row.dataset.end);

                row.classList.remove('active', 'past');

                if (currentTime >= start && currentTime < end) {
                    row.classList.add('active');
                } else if (currentTime >= end) {
                    row.classList.add('past');
                }
            });
        });

        // More frequent updates for smoother highlighting
        let animationFrame;
        let lastScrolledToken = null;

        audioPlayer.addEventListener('play', () => {
            function update() {
                const currentTime = audioPlayer.currentTime;
                currentTimeDisplay.textContent = currentTime.toFixed(3) + 's';

                let firstActiveRow = null;
                document.querySelectorAll('.token-row').forEach(row => {
                    const start = parseFloat(row.dataset.start);
                    const end = parseFloat(row.dataset.end);

                    row.classList.remove('active', 'past');

                    if (currentTime >= start && currentTime < end) {
                        row.classList.add('active');
                        if (!firstActiveRow) {
                            firstActiveRow = row;
                        }
                    } else if (currentTime >= end) {
                        row.classList.add('past');
                    }
                });

                // Auto-scroll to keep active token visible
                if (firstActiveRow && firstActiveRow !== lastScrolledToken) {
                    const rect = firstActiveRow.getBoundingClientRect();
                    const playerHeight = playerContainer.offsetHeight;
                    // If active token is below the visible area or too close to player
                    if (rect.top < playerHeight + 20 || rect.bottom > window.innerHeight - 50) {
                        firstActiveRow.scrollIntoView({behavior: 'smooth', block: 'center'});
                        lastScrolledToken = firstActiveRow;
                    }
                }

                // Check if we should stop at segment end
                checkSegmentEnd();

                if (!audioPlayer.paused) {
                    animationFrame = requestAnimationFrame(update);
                }
            }
            update();
        });

        audioPlayer.addEventListener('pause', () => {
            cancelAnimationFrame(animationFrame);
        });

        audioPlayer.addEventListener('seeked', () => {
            // Clear segment on manual seek (but not if triggered by playSegment)
            if (!isPlayingSegment && segmentEndTime !== null) {
                segmentStartTime = null;
                segmentEndTime = null;
            }
            // Clear the flag now that seek has completed
            isPlayingSegment = false;
            // Update highlighting immediately after seek
            const currentTime = audioPlayer.currentTime;
            lastScrolledToken = null;  // Reset so we scroll to new position
            let firstActiveRow = null;

            document.querySelectorAll('.token-row').forEach(row => {
                const start = parseFloat(row.dataset.start);
                const end = parseFloat(row.dataset.end);

                row.classList.remove('active', 'past');

                if (currentTime >= start && currentTime < end) {
                    row.classList.add('active');
                    if (!firstActiveRow) {
                        firstActiveRow = row;
                    }
                } else if (currentTime >= end) {
                    row.classList.add('past');
                }
            });

            // Scroll to active token on seek
            if (firstActiveRow) {
                firstActiveRow.scrollIntoView({behavior: 'smooth', block: 'center'});
            }
        });
    </script>
</body>
</html>


================================================
FILE: scripts/whisper/tools/whisper_timestamps_csv.py
================================================
#!/usr/bin/env python3
"""
Generate CSV file with token timestamps from a Whisper model.

Usage:
    python whisper_timestamps_csv.py \
        --encoder path/to/encoder.onnx \
        --decoder path/to/decoder.onnx \
        --tokens path/to/tokens.txt \
        --audio path/to/audio.wav \
        --output timestamps.csv \
        [--enable-segment-timestamps]
"""

import argparse
import csv
import wave
import numpy as np
import sherpa_onnx


def main():
    parser = argparse.ArgumentParser(
        description="Generate CSV with token timestamps from Whisper model"
    )
    parser.add_argument("--encoder", required=True, help="Path to encoder ONNX model")
    parser.add_argument("--decoder", required=True, help="Path to decoder ONNX model")
    parser.add_argument("--tokens", required=True, help="Path to tokens.txt file")
    parser.add_argument("--audio", required=True, help="Path to input WAV file")
    parser.add_argument("--output", required=True, help="Path to output CSV file")
    parser.add_argument(
        "--enable-segment-timestamps",
        action="store_true",
        help="Enable segment-level timestamps",
    )
    parser.add_argument(
        "--language", default="en", help="Language code (default: en)"
    )
    parser.add_argument(
        "--num-threads", type=int, default=4, help="Number of threads (default: 4)"
    )
    args = parser.parse_args()

    # Create recognizer
    recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
        encoder=args.encoder,
        decoder=args.decoder,
        tokens=args.tokens,
        language=args.language,
        task="transcribe",
        enable_token_timestamps=True,
        enable_segment_timestamps=args.enable_segment_timestamps,
        num_threads=args.num_threads,
    )

    # Load audio
    with wave.open(args.audio, "rb") as f:
        assert f.getnchannels() == 1, "Audio must be mono"
        assert f.getsampwidth() == 2, "Audio must be 16-bit"
        sample_rate = f.getframerate()
        samples = f.readframes(f.getnframes())

    samples = np.frombuffer(samples, dtype=np.int16).astype(np.float32) / 32768.0

    # Run recognition
    stream = recognizer.create_stream()
    stream.accept_waveform(sample_rate, samples)
    recognizer.decode_stream(stream)
    result = stream.result

    # Write CSV
    with open(args.output, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["token", "timestamp", "duration"])
        for token, ts, dur in zip(result.tokens, result.timestamps, result.durations):
            writer.writerow([token, f"{ts:.3f}", f"{dur:.3f}"])

    print(f"Wrote {len(result.tokens)} tokens to {args.output}")
    print(f"Full text: {result.text}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/zipformer-ctc/ascend/2025-07-03/onnx_test.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from typing import Tuple

import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf

BPE_UNK = chr(8263)
PRINTABLE_BASE_CHARS = (
    list(range(256, 287 + 1))
    + list(range(32, 126 + 1))
    + list(range(288, 305 + 1))
    + list(range(308, 318 + 1))
    + list(range(321, 328 + 1))
    + list(range(330, 382 + 1))
    + list(range(384, 422 + 1))
)


BYTE_TO_BCHAR = {b: chr(PRINTABLE_BASE_CHARS[b]) for b in range(256)}
BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
BCHAR_TO_BYTE[BPE_UNK] = 32  # map unk to space


def load_tokens(filename):
    ans = dict()
    i = 0
    with open(filename, encoding="utf-8") as f:
        for line in f:
            ans[i] = line.strip().split()[0]
            i += 1
    return ans


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel

    if sample_rate != 16000:
        import librosa

        data = librosa.resample(data, orig_sr=sample_rate, target_sr=16000)
        sample_rate = 16000

    samples = np.ascontiguousarray(data)
    return samples, sample_rate


def compute_feat(
    samples: np.ndarray,
    sample_rate: int,
    max_len: int,
):
    opts = knf.FbankOptions()
    opts.frame_opts.dither = 0
    opts.frame_opts.snip_edges = False
    opts.frame_opts.window_type = "povey"
    opts.frame_opts.samp_freq = sample_rate
    opts.mel_opts.num_bins = 80

    online_fbank = knf.OnlineFbank(opts)
    online_fbank.accept_waveform(sample_rate, samples.tolist())
    online_fbank.input_finished()

    features = np.stack(
        [online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
    )

    if features.shape[0] > max_len:
        features = features[:max_len]
    elif features.shape[0] < max_len:
        features = np.pad(
            features,
            ((0, max_len - features.shape[0]), (0, 0)),
            mode="constant",
            constant_values=0,
        )

    features = np.ascontiguousarray(features)

    assert features.data.contiguous is True
    assert features.dtype == np.float32, features.dtype

    return features


class OnnxModel:
    def __init__(self, filename):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts

        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        shape = self.model.get_inputs()[0].shape
        self.max_len = shape[1]

        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)

    def __call__(self, x):
        log_probs = self.model.run(
            [
                self.model.get_outputs()[0].name,
            ],
            {self.model.get_inputs()[0].name: x[None]},
        )[0]

        return log_probs


def main():
    wave = "./0.wav"
    wave = "./1.wav"
    samples, sample_rate = load_audio(wave)

    model = OnnxModel("./model.onnx")

    features = compute_feat(
        samples=samples,
        sample_rate=sample_rate,
        max_len=model.max_len,
    )
    print("features", features.shape)

    log_probs = model(features)

    idx = log_probs[0].argmax(axis=-1)
    print("idx", idx)
    print(len(idx))
    prev = -1
    ids = []
    for i in idx:
        if i != prev:
            ids.append(i)
        prev = i
    ids = [i for i in ids if i != 0]
    print(ids)

    tokens = load_tokens("./tokens.txt")
    text = "".join([tokens[i] for i in ids])

    s = b""
    for t in text:
        if t == "▁":
            continue
        elif t in BCHAR_TO_BYTE:
            s += bytes([BCHAR_TO_BYTE[t]])
        else:
            print("skip OOV", t)

    print(s.decode())


if __name__ == "__main__":
    main()


================================================
FILE: scripts/zipformer-ctc/ascend/2025-07-03/test_om.py
================================================
#!/usr/bin/env python3
# Copyright      2025  Xiaomi Corp.        (authors: Fangjun Kuang)

from ais_bench.infer.interface import InferSession

from onnx_test import BCHAR_TO_BYTE, compute_feat, load_audio, load_tokens


class OmModel:
    def __init__(self):
        self.model = InferSession(device_id=0, model_path="./model.om", debug=False)

        self.max_len = self.model.get_inputs()[0].shape[1]
        print("---model---")
        for i in self.model.get_inputs():
            print(i.name, i.datatype, i.shape)

        print("-----")

        for i in self.model.get_outputs():
            print(i.name, i.datatype, i.shape)

    def __call__(self, x):
        """
        Args:
          x: (N, T, C)
        Returns:
          log_probs: (N, T, vocab_size)
        """
        return self.model.infer([x], mode="static", custom_sizes=10000000)[0]


def main():
    samples, sample_rate = load_audio("./test_wavs/0.wav")
    model = OmModel()

    features = compute_feat(
        samples=samples, sample_rate=sample_rate, max_len=model.max_len
    )
    print("features.shape", features.shape)

    log_probs = model(x=features[None])
    print("log_probs.shape", log_probs.shape, type(log_probs))

    idx = log_probs[0].argmax(axis=-1)
    print("idx", idx)
    print(len(idx))
    prev = -1
    ids = []
    for i in idx:
        if i != prev:
            ids.append(i)
        prev = i
    ids = [i for i in ids if i != 0]
    print(ids)

    tokens = load_tokens("./tokens.txt")
    text = "".join([tokens[i] for i in ids])

    s = b""
    for t in text:
        if t == "▁":
            continue
        elif t in BCHAR_TO_BYTE:
            s += bytes([BCHAR_TO_BYTE[t]])
        else:
            print("skip OOV", t)

    print(s.decode())


if __name__ == "__main__":
    main()


================================================
FILE: scripts/zipvoice/zh-en/generate_lexicon.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


from pypinyin import Style, lazy_pinyin, load_phrases_dict, phrases_dict, pinyin_dict
from pypinyin.contrib.tone_convert import to_finals_tone3, to_initials

load_phrases_dict(
    {
        "行长": [["hang2"], ["zhang3"]],
        "银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
    }
)
user_defined = {
    "微调": ["wei1", "tiao2"],
    "这个": ["zhe4", "ge4"],
    "方便地": ["fang1", "bian2", "de1"],
}


def get_initial_final(token):
    if isinstance(token, list):
        ans = ""
        sep = ""
        for t in token:
            ans += sep + get_initial_final(t)
            sep = " "
        return ans

    initial = to_initials(token, strict=False)

    final = to_finals_tone3(
        token,
        strict=False,
        neutral_tone_with_five=True,
    )

    ans = ""
    if initial:
        ans = initial + "0"

    if final:
        ans += f" {final}"

    return ans


def main():
    filename = "lexicon.txt"

    word_dict = pinyin_dict.pinyin_dict
    phrases = phrases_dict.phrases_dict

    with open(filename, "w", encoding="utf-8") as f:
        for key in word_dict:
            if not (0x4E00 <= key <= 0x9FFF):
                continue

            w = chr(key)
            token = lazy_pinyin(
                w,
                style=Style.TONE3,
                tone_sandhi=True,
                neutral_tone_with_five=True,
            )[0]

            initial_final = get_initial_final(token)

            f.write(f"{w} {initial_final}\n")

        for key, value in user_defined.items():
            initial_final = get_initial_final(value)
            f.write(f"{key} {initial_final}\n")

        for key in phrases:
            if key in user_defined:
                continue
            token = lazy_pinyin(
                key,
                style=Style.TONE3,
                tone_sandhi=True,
                neutral_tone_with_five=True,
            )
            initial_final = get_initial_final(token)

            f.write(f"{key} {initial_final}\n")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/zipvoice/zh-en/test_onnx.py
================================================
#!/usr/bin/env python3
# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)


import kaldi_native_fbank as knf
import numpy as np
import onnxruntime as ort
import soundfile as sf


def compute_features(samples):
    stft_config = knf.StftConfig(
        n_fft=1024,
        hop_length=256,
        win_length=1024,
        center=True,
        window_type="hann",
    )
    knf_stft = knf.Stft(stft_config)
    stft_result = knf_stft(samples.tolist())
    real = np.array(stft_result.real, dtype=np.float32).reshape(
        stft_result.num_frames, -1
    )
    imag = np.array(stft_result.imag, dtype=np.float32).reshape(
        stft_result.num_frames, -1
    )

    mag = np.sqrt(real * real + imag * imag).astype(np.float32)

    mel_opts = knf.MelBanksOptions()
    mel_opts.num_bins = 100
    mel_opts.low_freq = 0
    mel_opts.high_freq = 24000 // 2
    mel_opts.is_librosa = True
    mel_opts.norm = ""
    mel_opts.use_slaney_mel_scale = False

    frame_opts = knf.FrameExtractionOptions()
    frame_opts.samp_freq = 24000
    #  frame_opts.frame_length_ms = 1024 * 1000 / 24000
    #  frame_opts.frame_shift_ms = 256 * 1000 / 24000

    mel_filters = knf.MelBanks(mel_opts, frame_opts)
    mel_features = np.zeros((mag.shape[0], 100))
    for i in range(mag.shape[0]):
        mel_features[i] = mel_filters.compute(mag[i])
    print("sum", np.sum(mel_features), np.mean(mel_features))

    mel_features = np.log(mel_features + 1e-10)
    return mel_features


class OnnxModel:
    def __init__(
        self,
        text_encoder_path: str,
        fm_decoder_path: str,
        num_thread: int = 1,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = num_thread
        session_opts.intra_op_num_threads = num_thread

        self.session_opts = session_opts

        self.init_text_encoder(text_encoder_path)
        self.init_fm_decoder(fm_decoder_path)

    def init_text_encoder(self, model_path: str):
        self.text_encoder = ort.InferenceSession(
            model_path,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )

    def init_fm_decoder(self, model_path: str):
        self.fm_decoder = ort.InferenceSession(
            model_path,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        meta = self.fm_decoder.get_modelmeta().custom_metadata_map
        self.feat_dim = int(meta["feat_dim"])

    def run_text_encoder(
        self,
        tokens: np.ndarray,
        prompt_tokens: np.ndarray,
        prompt_features_len: np.ndarray,
        speed: np.ndarray,
    ) -> np.ndarray:
        out = self.text_encoder.run(
            [
                self.text_encoder.get_outputs()[0].name,
            ],
            {
                self.text_encoder.get_inputs()[0].name: tokens,
                self.text_encoder.get_inputs()[1].name: prompt_tokens,
                self.text_encoder.get_inputs()[2].name: prompt_features_len,
                self.text_encoder.get_inputs()[3].name: speed,
            },
        )
        return out[0]

    def run_fm_decoder(
        self,
        t: np.ndarray,
        x: np.ndarray,
        text_condition: np.ndarray,
        speech_condition: np.ndarray,
        guidance_scale: np.ndarray,
    ) -> np.ndarray:
        out = self.fm_decoder.run(
            [
                self.fm_decoder.get_outputs()[0].name,
            ],
            {
                self.fm_decoder.get_inputs()[0].name: t,
                self.fm_decoder.get_inputs()[1].name: x,
                self.fm_decoder.get_inputs()[2].name: text_condition,
                self.fm_decoder.get_inputs()[3].name: speech_condition,
                self.fm_decoder.get_inputs()[4].name: guidance_scale,
            },
        )
        return out[0]


class OnnxVocosModel:
    def __init__(
        self,
        filename: str,
    ):
        session_opts = ort.SessionOptions()
        session_opts.inter_op_num_threads = 1
        session_opts.intra_op_num_threads = 1

        self.session_opts = session_opts
        self.model = ort.InferenceSession(
            filename,
            sess_options=self.session_opts,
            providers=["CPUExecutionProvider"],
        )
        print(f"vocos {self.model.get_modelmeta().custom_metadata_map}")

        print("----------vocos----------")
        for i in self.model.get_inputs():
            print(i)

        print("-----")

        for i in self.model.get_outputs():
            print(i)
        print()

    def __call__(self, x: np.ndarray):
        """
        Args:
          x: (N, feat_dim, num_frames)
        Returns:
          mag: (N, n_fft/2+1, num_frames)
          x: (N, n_fft/2+1, num_frames)
          y: (N, n_fft/2+1, num_frames)

        The complex spectrum is mag * (x + j*y)
        """
        assert x.ndim == 3, x.shape
        assert x.shape[0] == 1, x.shape

        mag, x, y = self.model.run(
            [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name,
                self.model.get_outputs()[2].name,
            ],
            {
                self.model.get_inputs()[0].name: x,
            },
        )

        return mag, x, y


def get_phones(text):
    if text[-1] != ".":
        text += "."

    word2tokens = dict()
    with open("./lexicon.txt", encoding="utf-8") as f:
        for line in f:
            fields = line.split()
            word = fields[0]
            tokens = fields[1:]
            word2tokens[word] = tokens

    token2id = dict()
    with open("./tokens.txt", encoding="utf-8") as f:
        for line in f:
            fields = line.strip().split()
            if len(fields) == 1:
                token2id[" "] = int(fields[0])
            else:
                token2id[fields[0]] = int(fields[1])

    tokens = []
    for w in text:
        if w in word2tokens:
            tokens += word2tokens[w]
        else:
            tokens.append(w)
    ids = []
    for t in tokens:
        if t in token2id:
            ids.append(token2id[t])
        else:
            print(f"skip {t}")

    return ids


def compute_rms(features):
    return np.sqrt(np.mean(np.square(features)))


def get_timestamps(num_steps, t_shift=1):
    steps = np.linspace(0, 1, num_steps + 1)
    if t_shift != 1:
        steps = t_shift * steps / (1 + (t_shift - 1) * steps)

    return steps.tolist()


def trim_leading_silence_energy(samples, frame_size=2048, hop=512, energy_thresh=0.5):
    energies = [
        np.sum(np.abs(samples[i : i + frame_size]) ** 2)
        for i in range(0, len(samples) - frame_size, hop)
    ]
    #  print(energies)
    # First frame whose energy exceeds threshold
    frame_index = next((i for i, e in enumerate(energies) if e > energy_thresh), 0)
    frame_index = max(frame_index - 3, 0)
    start_sample = frame_index * hop
    return samples[start_sample:]


def main():
    vocoder = OnnxVocosModel("./vocos_24khz.onnx")

    prompt_text = "各位村民, 大家新年好! 近期, 湖北省武汉市等多个地区"
    prompt_wav_filename = "news-female.wav"

    prompt_text = "本台消息, 中共中央国务院, 近日印发关于构建数据基础制度, 更好发挥数据要素作用的意见."
    prompt_wav_filename = "news-female-2.wav"

    prompt_text = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系."
    prompt_wav_filename = "leijun-1.wav"

    prompt_ids = get_phones(prompt_text)

    text = "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

    ids = get_phones(text)

    data, sample_rate = sf.read(
        prompt_wav_filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    if sample_rate != 24000:
        import librosa

        samples = librosa.resample(
            samples,
            orig_sr=sample_rate,
            target_sr=24000,
        )
        sample_rate = 24000

    assert len(samples.shape) == 1, samples.shape

    rms = compute_rms(samples)
    print("rms", rms)

    target_rms = 0.1
    if rms < target_rms:
        samples = samples * target_rms / rms
    new_rms = compute_rms(samples)

    print("new_rms", new_rms)

    prompt_features = compute_features(samples)
    print("features.shape", prompt_features.shape)

    feat_scale = 0.1
    prompt_features = prompt_features * feat_scale

    model = OnnxModel(
        text_encoder_path="./text_encoder_int8.onnx",
        fm_decoder_path="./fm_decoder_int8.onnx",
    )

    tokens = np.array([ids], dtype=np.int64)
    assert len(tokens.shape) == 2, tokens.shape

    prompt_tokens = np.array([prompt_ids], dtype=np.int64)
    assert len(prompt_tokens.shape) == 2, prompt_tokens.shape
    prompt_features_len = np.array(prompt_features.shape[0], dtype=np.int64)
    speed = np.array(1.0, dtype=np.float32)

    print(tokens.shape, prompt_tokens.shape, prompt_features_len)

    text_condition = model.run_text_encoder(
        tokens=tokens,
        prompt_tokens=prompt_tokens,
        prompt_features_len=prompt_features_len,
        speed=speed,
    )

    x = np.random.randn(*text_condition.shape).astype(np.float32)

    speech_condition = np.pad(
        prompt_features,
        pad_width=((0, x.shape[1] - prompt_features.shape[0]), (0, 0)),
        mode="constant",
        constant_values=0,
    )[None].astype(np.float32)

    print(speech_condition.shape, prompt_features.shape)

    guidance_scale = np.array(1.0, dtype=np.float32)

    num_steps = 8
    steps = get_timestamps(num_steps=num_steps, t_shift=0.5)
    for i in range(num_steps):
        t = np.array(steps[i], dtype=np.float32)
        v = model.run_fm_decoder(
            t=t,
            x=x,
            text_condition=text_condition,
            speech_condition=speech_condition,
            guidance_scale=guidance_scale,
        )
        x = x + v * (steps[i + 1] - steps[i])
    print("prompt_features", prompt_features.shape)
    x = x[:, prompt_features.shape[0] :]
    print("x", x.shape)

    x = x / feat_scale
    mel = x.transpose(0, 2, 1)
    mag, x, y = vocoder(mel)
    print("mag", mag.shape, x.shape, y.shape)

    stft_result = knf.StftResult(
        real=(mag * x)[0].transpose().reshape(-1).tolist(),
        imag=(mag * y)[0].transpose().reshape(-1).tolist(),
        num_frames=mag.shape[2],
    )
    config = knf.StftConfig(
        n_fft=1024,
        hop_length=256,
        win_length=1024,
        window_type="hann",
        center=True,
        pad_mode="reflect",
        normalized=False,
    )
    istft = knf.IStft(config)
    audio_vocos = istft(stft_result)

    audio_vocos = np.array(audio_vocos)
    audio_vocos = trim_leading_silence_energy(audio_vocos)

    #  if rms < target_rms:
    #      audio_vocos = audio_vocos / target_rms * rms

    sf.write("generated.wav", audio_vocos, sample_rate, "PCM_16")


if __name__ == "__main__":
    main()


================================================
FILE: setup.py
================================================
#!/usr/bin/env python3

import os
import re
from pathlib import Path

import setuptools

from cmake.cmake_extension import (
    BuildExtension,
    bdist_wheel,
    cmake_extension,
    get_binaries,
    is_windows,
    need_split_package,
)


def read_long_description():
    with open("README.md", encoding="utf8") as f:
        readme = f.read()
    return readme


def get_package_version():
    with open("CMakeLists.txt") as f:
        content = f.read()

    match = re.search(r"set\(SHERPA_ONNX_VERSION (.*)\)", content)
    latest_version = match.group(1).strip('"')

    cmake_args = os.environ.get("SHERPA_ONNX_CMAKE_ARGS", "")
    extra_version = ""
    if "-DSHERPA_ONNX_ENABLE_GPU=ON" in cmake_args:
        extra_version = "+cuda"

    cuda_version = os.environ.get("SHERPA_ONNX_CUDA_VERSION", "")
    if cuda_version:
        extra_version += cuda_version

    latest_version += extra_version

    return latest_version


package_name = "sherpa_onnx"

with open("sherpa-onnx/python/sherpa_onnx/__init__.py", "a") as f:
    f.write(f"__version__ = '{get_package_version()}'\n")


def get_binaries_to_install():
    if need_split_package():
        return None

    cmake_args = os.environ.get("SHERPA_ONNX_CMAKE_ARGS", "")
    if "-DSHERPA_ONNX_ENABLE_BINARY=OFF" in cmake_args:
        return None

    bin_dir = Path("build") / "sherpa_onnx" / "bin"
    bin_dir.mkdir(parents=True, exist_ok=True)
    suffix = ".exe" if is_windows() else ""

    binaries = get_binaries()

    exe = []
    for f in binaries:
        suffix = "" if (".dll" in f or ".lib" in f) else suffix
        t = bin_dir / (f + suffix)
        exe.append(str(t))
    return exe


setuptools.setup(
    name=package_name,
    python_requires=">=3.7",
    version=get_package_version(),
    author="The sherpa-onnx development team",
    author_email="dpovey@gmail.com",
    package_dir={
        "sherpa_onnx": "sherpa-onnx/python/sherpa_onnx",
    },
    packages=["sherpa_onnx"],
    data_files=(
        [
            (
                ("Scripts", get_binaries_to_install())
                if is_windows()
                else ("bin", get_binaries_to_install())
            )
        ]
        if get_binaries_to_install()
        else None
    ),
    url="https://github.com/k2-fsa/sherpa-onnx",
    long_description=read_long_description(),
    long_description_content_type="text/markdown",
    ext_modules=[cmake_extension("_sherpa_onnx")],
    cmdclass={"build_ext": BuildExtension, "bdist_wheel": bdist_wheel},
    zip_safe=False,
    classifiers=[
        "Programming Language :: C++",
        "Programming Language :: Python",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
    ],
    entry_points={
        "console_scripts": [
            "sherpa-onnx-cli=sherpa_onnx.cli:cli",
        ],
    },
    license="Apache licensed, as found in the LICENSE file",
    install_requires=["sherpa-onnx-core==1.12.31"] if need_split_package() else None,
)

with open("sherpa-onnx/python/sherpa_onnx/__init__.py", "r") as f:
    lines = f.readlines()

with open("sherpa-onnx/python/sherpa_onnx/__init__.py", "w") as f:
    for line in lines:
        if "__version__" in line:
            # skip __version__ = "x.x.x"
            continue
        f.write(line)


================================================
FILE: sherpa-onnx/CMakeLists.txt
================================================
add_subdirectory(csrc)
if(SHERPA_ONNX_ENABLE_PYTHON)
  add_subdirectory(python)
endif()

if(SHERPA_ONNX_ENABLE_JNI)
  add_subdirectory(jni)
endif()

if(SHERPA_ONNX_ENABLE_C_API)
  add_subdirectory(c-api)
endif()


================================================
FILE: sherpa-onnx/c-api/CMakeLists.txt
================================================
include_directories(${PROJECT_SOURCE_DIR})
add_library(sherpa-onnx-c-api c-api.cc)
target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core)
target_include_directories(sherpa-onnx-c-api PUBLIC ${PROJECT_SOURCE_DIR})

if(BUILD_SHARED_LIBS)
  target_compile_definitions(sherpa-onnx-c-api PUBLIC SHERPA_ONNX_BUILD_SHARED_LIBS=1)
  target_compile_definitions(sherpa-onnx-c-api PUBLIC SHERPA_ONNX_BUILD_MAIN_LIB=1)
endif()

add_library(sherpa-onnx-cxx-api cxx-api.cc)
target_link_libraries(sherpa-onnx-cxx-api sherpa-onnx-c-api)
target_include_directories(sherpa-onnx-cxx-api PUBLIC ${PROJECT_SOURCE_DIR})

if(ANDROID OR (UNIX AND NOT APPLE))
  set_target_properties(sherpa-onnx-c-api PROPERTIES
    LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/sherpa-onnx-symbols-c.lds"
  )
elseif(APPLE)
  set_target_properties(sherpa-onnx-c-api PROPERTIES
    LINK_FLAGS "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/sherpa-onnx-symbols-c.exp"
  )
endif()

install(
  TARGETS
    sherpa-onnx-c-api
    sherpa-onnx-cxx-api
  DESTINATION
    lib
)

install(
  FILES
    c-api.h
    cxx-api.h
  DESTINATION
    include/sherpa-onnx/c-api
)


================================================
FILE: sherpa-onnx/c-api/Doxyfile
================================================
# Doxygen configuration for sherpa-onnx C and C++ public APIs.
# Run from this directory with:
#
#   doxygen Doxyfile
#
# HTML output is generated under ./doxygen-docs/html/.

DOXYFILE_ENCODING      = UTF-8
PROJECT_NAME           = "sherpa-onnx C API"
PROJECT_BRIEF          = "Public C API and C++ wrapper for sherpa-onnx"
PROJECT_NUMBER         = 1.0
OUTPUT_DIRECTORY       = doxygen-docs
CREATE_SUBDIRS         = NO
ALLOW_UNICODE_NAMES    = YES
OUTPUT_LANGUAGE        = English
BRIEF_MEMBER_DESC      = YES
REPEAT_BRIEF           = NO
ALWAYS_DETAILED_SEC    = NO
INLINE_INHERITED_MEMB  = NO
FULL_PATH_NAMES        = YES
STRIP_FROM_PATH        = ..
SHORT_NAMES            = NO
JAVADOC_AUTOBRIEF      = NO
QT_AUTOBRIEF           = NO
MULTILINE_CPP_IS_BRIEF = NO
INHERIT_DOCS           = YES
SEPARATE_MEMBER_PAGES  = NO
TAB_SIZE               = 2
OPTIMIZE_OUTPUT_FOR_C  = NO
OPTIMIZE_OUTPUT_JAVA   = NO
OPTIMIZE_FOR_FORTRAN   = NO
OPTIMIZE_OUTPUT_VHDL   = NO
MARKDOWN_SUPPORT       = YES
AUTOLINK_SUPPORT       = YES
BUILTIN_STL_SUPPORT    = YES
CPP_CLI_SUPPORT        = NO
SIP_SUPPORT            = NO
IDL_PROPERTY_SUPPORT   = YES
DISTRIBUTE_GROUP_DOC   = NO
GROUP_NESTED_COMPOUNDS = NO
SUBGROUPING            = YES
INLINE_GROUPED_CLASSES = NO
INLINE_SIMPLE_STRUCTS  = NO
TYPEDEF_HIDES_STRUCT   = NO
LOOKUP_CACHE_SIZE      = 2

EXTRACT_ALL            = YES
EXTRACT_PRIVATE        = NO
EXTRACT_PRIV_VIRTUAL   = NO
EXTRACT_PACKAGE        = NO
EXTRACT_STATIC         = NO
EXTRACT_LOCAL_CLASSES  = YES
EXTRACT_LOCAL_METHODS  = NO
EXTRACT_ANON_NSPACES   = NO
HIDE_UNDOC_MEMBERS     = NO
HIDE_UNDOC_CLASSES     = NO
HIDE_FRIEND_COMPOUNDS  = NO
HIDE_IN_BODY_DOCS      = NO
INTERNAL_DOCS          = NO
CASE_SENSE_NAMES       = YES
HIDE_SCOPE_NAMES       = NO
HIDE_COMPOUND_REFERENCE = NO
SHOW_HEADERFILE        = YES
SHOW_INCLUDE_FILES     = YES
SHOW_GROUPED_MEMB_INC  = NO
FORCE_LOCAL_INCLUDES   = NO
INLINE_INFO            = YES
SORT_MEMBER_DOCS       = YES
SORT_BRIEF_DOCS        = NO
SORT_MEMBERS_CTORS_1ST = NO
SORT_GROUP_NAMES       = NO
SORT_BY_SCOPE_NAME     = NO
STRICT_PROTO_MATCHING  = NO
GENERATE_TODOLIST      = YES
GENERATE_TESTLIST      = NO
GENERATE_BUGLIST       = NO
GENERATE_DEPRECATEDLIST = YES
ENABLED_SECTIONS       =
MAX_INITIALIZER_LINES  = 30
SHOW_USED_FILES        = YES
SHOW_FILES             = YES
SHOW_NAMESPACES        = YES
FILE_VERSION_FILTER    =
LAYOUT_FILE            =
CITE_BIB_FILES         =

QUIET                  = NO
WARNINGS               = YES
WARN_IF_UNDOCUMENTED   = NO
WARN_IF_DOC_ERROR      = YES
WARN_IF_INCOMPLETE_DOC = YES
WARN_NO_PARAMDOC       = NO
WARN_AS_ERROR          = NO
WARN_FORMAT            = "$file:$line: $text"
WARN_LOGFILE           =

INPUT                  = mainpage.md \
                         c-api.h \
                         cxx-api.h
INPUT_ENCODING         = UTF-8
FILE_PATTERNS          = *.h
RECURSIVE              = NO
EXCLUDE                =
EXCLUDE_SYMLINKS       = NO
EXCLUDE_PATTERNS       =
EXCLUDE_SYMBOLS        =
EXAMPLE_PATH           = ../../c-api-examples \
                         ../../cxx-api-examples
EXAMPLE_PATTERNS       = *.c \
                         *.cc \
                         *.h
EXAMPLE_RECURSIVE      = NO
IMAGE_PATH             =
INPUT_FILTER           =
FILTER_PATTERNS        =
FILTER_SOURCE_FILES    = NO
FILTER_SOURCE_PATTERNS =
USE_MDFILE_AS_MAINPAGE = mainpage.md
SOURCE_BROWSER         = YES
INLINE_SOURCES         = NO
STRIP_CODE_COMMENTS    = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION    = YES
REFERENCES_LINK_SOURCE = YES
SOURCE_TOOLTIPS        = YES
USE_HTAGS              = NO
VERBATIM_HEADERS       = YES

CLANG_ASSISTED_PARSING = NO
CLANG_ADD_INC_PATHS    = YES
CLANG_OPTIONS          =

ALPHABETICAL_INDEX     = YES
COLS_IN_ALPHA_INDEX    = 5
IGNORE_PREFIX          = SherpaOnnx

HTML_OUTPUT            = html
HTML_FILE_EXTENSION    = .html
HTML_HEADER            =
HTML_FOOTER            =
HTML_STYLESHEET        =
HTML_EXTRA_STYLESHEET  =
HTML_EXTRA_FILES       =
HTML_COLORSTYLE        = LIGHT
HTML_COLORSTYLE_HUE    = 220
HTML_COLORSTYLE_SAT    = 100
HTML_COLORSTYLE_GAMMA  = 80
HTML_TIMESTAMP         = NO
HTML_DYNAMIC_MENUS     = YES
HTML_DYNAMIC_SECTIONS  = YES
HTML_INDEX_NUM_ENTRIES = 100
GENERATE_DOCSET        = NO
GENERATE_HTMLHELP      = NO
GENERATE_CHI           = NO
GENERATE_QHP           = NO
GENERATE_ECLIPSEHELP   = NO
DISABLE_INDEX          = NO
GENERATE_TREEVIEW      = YES
ENUM_VALUES_PER_LINE   = 1
TREEVIEW_WIDTH         = 250
EXT_LINKS_IN_WINDOW    = NO
OBFUSCATE_EMAILS       = YES
HTML_FORMULA_FORMAT    = svg
FORMULA_FONTSIZE       = 10
FORMULA_MACROFILE      =
USE_MATHJAX            = NO
SEARCHENGINE           = YES
SERVER_BASED_SEARCH    = NO
EXTERNAL_SEARCH        = NO
SEARCHENGINE_URL       =
SEARCHDATA_FILE        = searchdata.xml
EXTERNAL_SEARCH_ID     =
EXTRA_SEARCH_MAPPINGS  =

LATEX_OUTPUT           = latex
GENERATE_LATEX         = NO
GENERATE_RTF           = NO
GENERATE_MAN           = NO
GENERATE_XML           = NO
GENERATE_DOCBOOK       = NO
GENERATE_AUTOGEN_DEF   = NO
GENERATE_PERLMOD       = NO

ENABLE_PREPROCESSING   = YES
MACRO_EXPANSION        = YES
EXPAND_ONLY_PREDEF     = YES
SEARCH_INCLUDES        = YES
INCLUDE_PATH           = ..
INCLUDE_FILE_PATTERNS  =
PREDEFINED             = SHERPA_ONNX_API= \
                         SHERPA_ONNX_EXPORT= \
                         SHERPA_ONNX_IMPORT= \
                         SHERPA_ONNX_DEPRECATED(x)=
EXPAND_AS_DEFINED      =
SKIP_FUNCTION_MACROS   = YES

TAGFILES               =
GENERATE_TAGFILE       =
ALLEXTERNALS           = NO
EXTERNAL_GROUPS        = YES
EXTERNAL_PAGES         = YES

CLASS_DIAGRAMS         = YES
HIDE_UNDOC_RELATIONS   = YES
HAVE_DOT               = YES
CLASS_GRAPH            = YES
COLLABORATION_GRAPH    = YES
GROUP_GRAPHS           = YES
UML_LOOK               = NO
UML_LIMIT_NUM_FIELDS   = 10
DOT_NUM_THREADS        = 0
DOT_FONTNAME           = Helvetica
DOT_FONTSIZE           = 10
DOT_FONTPATH           =
CLASS_GRAPH_WIDTH      = 1024
DOT_GRAPH_MAX_NODES    = 50
MAX_DOT_GRAPH_DEPTH    = 0
DOT_TRANSPARENT        = NO
DOT_MULTI_TARGETS      = NO
GENERATE_LEGEND        = YES
DOT_CLEANUP            = YES


================================================
FILE: sherpa-onnx/c-api/README.md
================================================
# Introduction


## View doc

You can find documentation for C API and CXX API at the following address:
<https://k2-fsa.github.io/sherpa/onnx/c-api/html/index.html>

## Generate doc

```bash
sudo apt install doxygen graphviz      # Ubuntu/Debian
brew install doxygen graphviz          # macOS
```

```bash
doxygen ./Doxyfile
```


================================================
FILE: sherpa-onnx/c-api/c-api.cc
================================================
// sherpa-onnx/c-api/c-api.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/c-api/c-api.h"

#include <algorithm>
#include <cstring>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "nlohmann/json.hpp"

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/keyword-spotter.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-punctuation.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/csrc/online-punctuation.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/online-speech-denoiser.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
#include "sherpa-onnx/csrc/spoken-language-identification.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/version.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"

#if SHERPA_ONNX_ENABLE_TTS == 1
#include "sherpa-onnx/csrc/offline-tts.h"
#endif

#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
#endif

const char *SherpaOnnxGetVersionStr() { return sherpa_onnx::GetVersionStr(); }
const char *SherpaOnnxGetGitSha1() { return sherpa_onnx::GetGitSha1(); }
const char *SherpaOnnxGetGitDate() { return sherpa_onnx::GetGitDate(); }

struct SherpaOnnxOnlineRecognizer {
  std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
};

struct SherpaOnnxOnlineStream {
  std::unique_ptr<sherpa_onnx::OnlineStream> impl;
  explicit SherpaOnnxOnlineStream(std::unique_ptr<sherpa_onnx::OnlineStream> p)
      : impl(std::move(p)) {}
};

struct SherpaOnnxDisplay {
  std::unique_ptr<sherpa_onnx::Display> impl;
};

#define SHERPA_ONNX_OR(x, y) (x ? x : y)

static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig(
    const SherpaOnnxOnlineRecognizerConfig *config) {
  sherpa_onnx::OnlineRecognizerConfig recognizer_config;

  recognizer_config.feat_config.sampling_rate =
      SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
  recognizer_config.feat_config.feature_dim =
      SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);

  recognizer_config.model_config.transducer.encoder =
      SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
  recognizer_config.model_config.transducer.decoder =
      SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
  recognizer_config.model_config.transducer.joiner =
      SHERPA_ONNX_OR(config->model_config.transducer.joiner, "");

  recognizer_config.model_config.paraformer.encoder =
      SHERPA_ONNX_OR(config->model_config.paraformer.encoder, "");
  recognizer_config.model_config.paraformer.decoder =
      SHERPA_ONNX_OR(config->model_config.paraformer.decoder, "");

  recognizer_config.model_config.zipformer2_ctc.model =
      SHERPA_ONNX_OR(config->model_config.zipformer2_ctc.model, "");

  recognizer_config.model_config.tokens =
      SHERPA_ONNX_OR(config->model_config.tokens, "");
  if (config->model_config.tokens_buf &&
      config->model_config.tokens_buf_size > 0) {
    recognizer_config.model_config.tokens_buf = std::string(
        config->model_config.tokens_buf, config->model_config.tokens_buf_size);
  }

  recognizer_config.model_config.nemo_ctc.model =
      SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");

  recognizer_config.model_config.t_one_ctc.model =
      SHERPA_ONNX_OR(config->model_config.t_one_ctc.model, "");

  recognizer_config.model_config.num_threads =
      SHERPA_ONNX_OR(config->model_config.num_threads, 1);
  recognizer_config.model_config.provider_config.provider =
      SHERPA_ONNX_OR(config->model_config.provider, "cpu");

  if (recognizer_config.model_config.provider_config.provider.empty()) {
    recognizer_config.model_config.provider_config.provider = "cpu";
  }

  recognizer_config.model_config.model_type =
      SHERPA_ONNX_OR(config->model_config.model_type, "");
  recognizer_config.model_config.debug = config->model_config.debug;
  recognizer_config.model_config.modeling_unit =
      SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");

  if (recognizer_config.model_config.modeling_unit.empty()) {
    recognizer_config.model_config.modeling_unit = "cjkchar";
  }

  recognizer_config.model_config.bpe_vocab =
      SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");

  recognizer_config.decoding_method =
      SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
  if (recognizer_config.decoding_method.empty()) {
    recognizer_config.decoding_method = "greedy_search";
  }

  recognizer_config.max_active_paths =
      SHERPA_ONNX_OR(config->max_active_paths, 4);

  recognizer_config.enable_endpoint =
      SHERPA_ONNX_OR(config->enable_endpoint, 0);

  recognizer_config.endpoint_config.rule1.min_trailing_silence =
      SHERPA_ONNX_OR(config->rule1_min_trailing_silence, 2.4);

  recognizer_config.endpoint_config.rule2.min_trailing_silence =
      SHERPA_ONNX_OR(config->rule2_min_trailing_silence, 1.2);

  recognizer_config.endpoint_config.rule3.min_utterance_length =
      SHERPA_ONNX_OR(config->rule3_min_utterance_length, 20);

  recognizer_config.hotwords_file = SHERPA_ONNX_OR(config->hotwords_file, "");
  recognizer_config.hotwords_score =
      SHERPA_ONNX_OR(config->hotwords_score, 1.5);
  if (config->hotwords_buf && config->hotwords_buf_size > 0) {
    recognizer_config.hotwords_buf =
        std::string(config->hotwords_buf, config->hotwords_buf_size);
  }

  recognizer_config.blank_penalty = config->blank_penalty;

  recognizer_config.ctc_fst_decoder_config.graph =
      SHERPA_ONNX_OR(config->ctc_fst_decoder_config.graph, "");
  recognizer_config.ctc_fst_decoder_config.max_active =
      SHERPA_ONNX_OR(config->ctc_fst_decoder_config.max_active, 3000);

  recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
  recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");

  recognizer_config.hr.lexicon = SHERPA_ONNX_OR(config->hr.lexicon, "");
  recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, "");

  if (config->model_config.debug) {
#if __OHOS__
    auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128);
    for (const auto &s : str_vec) {
      SHERPA_ONNX_LOGE("%{public}s\n", s.c_str());
      SHERPA_ONNX_LOGE("%s\n", s.c_str());
    }
#else
    SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
#endif
  }

  return recognizer_config;
}

const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer(
    const SherpaOnnxOnlineRecognizerConfig *config) {
  sherpa_onnx::OnlineRecognizerConfig recognizer_config =
      GetOnlineRecognizerConfig(config);

  if (!recognizer_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config!");
    return nullptr;
  }

  SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;

  recognizer->impl =
      std::make_unique<sherpa_onnx::OnlineRecognizer>(recognizer_config);

  return recognizer;
}

void SherpaOnnxDestroyOnlineRecognizer(
    const SherpaOnnxOnlineRecognizer *recognizer) {
  if (!recognizer) return;
  delete recognizer;
}

const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
    const SherpaOnnxOnlineRecognizer *recognizer) {
  SherpaOnnxOnlineStream *stream =
      new SherpaOnnxOnlineStream(recognizer->impl->CreateStream());
  return stream;
}

const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStreamWithHotwords(
    const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords) {
  SherpaOnnxOnlineStream *stream =
      new SherpaOnnxOnlineStream(recognizer->impl->CreateStream(hotwords));
  return stream;
}

void SherpaOnnxDestroyOnlineStream(const SherpaOnnxOnlineStream *stream) {
  if (!stream) return;
  delete stream;
}

void SherpaOnnxOnlineStreamAcceptWaveform(const SherpaOnnxOnlineStream *stream,
                                          int32_t sample_rate,
                                          const float *samples, int32_t n) {
  stream->impl->AcceptWaveform(sample_rate, samples, n);
}

int32_t SherpaOnnxIsOnlineStreamReady(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream *stream) {
  return recognizer->impl->IsReady(stream->impl.get());
}

void SherpaOnnxDecodeOnlineStream(const SherpaOnnxOnlineRecognizer *recognizer,
                                  const SherpaOnnxOnlineStream *stream) {
  recognizer->impl->DecodeStream(stream->impl.get());
}

void SherpaOnnxDecodeMultipleOnlineStreams(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream **streams, int32_t n) {
  std::vector<sherpa_onnx::OnlineStream *> ss(n);
  for (int32_t i = 0; i != n; ++i) {
    ss[i] = streams[i]->impl.get();
  }
  recognizer->impl->DecodeStreams(ss.data(), n);
}

const SherpaOnnxOnlineRecognizerResult *SherpaOnnxGetOnlineStreamResult(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream *stream) {
  sherpa_onnx::OnlineRecognizerResult result =
      recognizer->impl->GetResult(stream->impl.get());
  const auto &text = result.text;

  auto r = new SherpaOnnxOnlineRecognizerResult;
  memset(r, 0, sizeof(SherpaOnnxOnlineRecognizerResult));

  // copy text
  char *pText = new char[text.size() + 1];
  std::copy(text.begin(), text.end(), pText);
  pText[text.size()] = 0;
  r->text = pText;

  // copy json
  std::string json = result.AsJsonString();
  char *pJson = new char[json.size() + 1];
  std::copy(json.begin(), json.end(), pJson);
  pJson[json.size()] = 0;
  r->json = pJson;

  // copy tokens
  auto count = result.tokens.size();
  if (count > 0) {
    size_t total_length = 0;
    for (const auto &token : result.tokens) {
      // +1 for the null character at the end of each token
      total_length += token.size() + 1;
    }

    r->count = count;
    // Each word ends with nullptr
    char *tokens = new char[total_length]{};
    char **tokens_temp = new char *[r->count];
    int32_t pos = 0;
    for (int32_t i = 0; i < r->count; ++i) {
      tokens_temp[i] = tokens + pos;
      memcpy(tokens + pos, result.tokens[i].c_str(), result.tokens[i].size());
      // +1 to move past the null character
      pos += result.tokens[i].size() + 1;
    }
    r->tokens_arr = tokens_temp;

    if (!result.timestamps.empty() && result.timestamps.size() == r->count) {
      r->timestamps = new float[r->count];
      std::copy(result.timestamps.begin(), result.timestamps.end(),
                r->timestamps);
    } else {
      r->timestamps = nullptr;
    }

    r->tokens = tokens;
  } else {
    r->count = 0;
    r->timestamps = nullptr;
    r->tokens = nullptr;
    r->tokens_arr = nullptr;
  }

  return r;
}

void SherpaOnnxDestroyOnlineRecognizerResult(
    const SherpaOnnxOnlineRecognizerResult *r) {
  if (r) {
    delete[] r->text;
    delete[] r->json;
    delete[] r->tokens;
    delete[] r->tokens_arr;
    delete[] r->timestamps;
    delete r;
  }
}

const char *SherpaOnnxGetOnlineStreamResultAsJson(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream *stream) {
  sherpa_onnx::OnlineRecognizerResult result =
      recognizer->impl->GetResult(stream->impl.get());
  std::string json = result.AsJsonString();
  char *pJson = new char[json.size() + 1];
  std::copy(json.begin(), json.end(), pJson);
  pJson[json.size()] = 0;
  return pJson;
}

void SherpaOnnxDestroyOnlineStreamResultJson(const char *s) {
  if (!s) return;
  delete[] s;
}

void SherpaOnnxOnlineStreamReset(const SherpaOnnxOnlineRecognizer *recognizer,
                                 const SherpaOnnxOnlineStream *stream) {
  recognizer->impl->Reset(stream->impl.get());
}

void SherpaOnnxOnlineStreamInputFinished(const SherpaOnnxOnlineStream *stream) {
  stream->impl->InputFinished();
}

void SherpaOnnxOnlineStreamSetOption(const SherpaOnnxOnlineStream *stream,
                                     const char *key, const char *value) {
  if (!stream || !key || !value) return;
  stream->impl->SetOption(key, value);
}

const char *SherpaOnnxOnlineStreamGetOption(
    const SherpaOnnxOnlineStream *stream, const char *key) {
  if (!stream || !key) return nullptr;
  return stream->impl->GetOption(key).c_str();
}

int32_t SherpaOnnxOnlineStreamHasOption(const SherpaOnnxOnlineStream *stream,
                                        const char *key) {
  if (!stream || !key) return 0;
  return stream->impl->HasOption(key);
}

int32_t SherpaOnnxOnlineStreamIsEndpoint(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream *stream) {
  return recognizer->impl->IsEndpoint(stream->impl.get());
}

const SherpaOnnxDisplay *SherpaOnnxCreateDisplay(int32_t max_word_per_line) {
  SherpaOnnxDisplay *ans = new SherpaOnnxDisplay;
  ans->impl = std::make_unique<sherpa_onnx::Display>(max_word_per_line);
  return ans;
}

void SherpaOnnxDestroyDisplay(const SherpaOnnxDisplay *display) {
  if (!display) return;
  delete display;
}

void SherpaOnnxPrint(const SherpaOnnxDisplay *display, int32_t idx,
                     const char *s) {
  display->impl->Print(idx, s);
}

// ============================================================
// For offline ASR (i.e., non-streaming ASR)
// ============================================================
//
struct SherpaOnnxOfflineRecognizer {
  std::unique_ptr<sherpa_onnx::OfflineRecognizer> impl;
};

struct SherpaOnnxOfflineStream {
  std::unique_ptr<sherpa_onnx::OfflineStream> impl;
  explicit SherpaOnnxOfflineStream(
      std::unique_ptr<sherpa_onnx::OfflineStream> p)
      : impl(std::move(p)) {}
};

static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
    const SherpaOnnxOfflineRecognizerConfig *config) {
  sherpa_onnx::OfflineRecognizerConfig recognizer_config;

  recognizer_config.feat_config.sampling_rate =
      SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);

  recognizer_config.feat_config.feature_dim =
      SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);

  recognizer_config.model_config.transducer.encoder_filename =
      SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");

  recognizer_config.model_config.transducer.decoder_filename =
      SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");

  recognizer_config.model_config.transducer.joiner_filename =
      SHERPA_ONNX_OR(config->model_config.transducer.joiner, "");

  recognizer_config.model_config.paraformer.model =
      SHERPA_ONNX_OR(config->model_config.paraformer.model, "");

  recognizer_config.model_config.nemo_ctc.model =
      SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");

  recognizer_config.model_config.whisper.encoder =
      SHERPA_ONNX_OR(config->model_config.whisper.encoder, "");

  recognizer_config.model_config.whisper.decoder =
      SHERPA_ONNX_OR(config->model_config.whisper.decoder, "");

  recognizer_config.model_config.whisper.language =
      SHERPA_ONNX_OR(config->model_config.whisper.language, "");

  recognizer_config.model_config.whisper.task =
      SHERPA_ONNX_OR(config->model_config.whisper.task, "transcribe");
  if (recognizer_config.model_config.whisper.task.empty()) {
    recognizer_config.model_config.whisper.task = "transcribe";
  }

  recognizer_config.model_config.whisper.tail_paddings =
      SHERPA_ONNX_OR(config->model_config.whisper.tail_paddings, -1);

  recognizer_config.model_config.whisper.enable_token_timestamps =
      config->model_config.whisper.enable_token_timestamps;

  recognizer_config.model_config.whisper.enable_segment_timestamps =
      config->model_config.whisper.enable_segment_timestamps;

  recognizer_config.model_config.tdnn.model =
      SHERPA_ONNX_OR(config->model_config.tdnn.model, "");

  recognizer_config.model_config.tokens =
      SHERPA_ONNX_OR(config->model_config.tokens, "");
  recognizer_config.model_config.num_threads =
      SHERPA_ONNX_OR(config->model_config.num_threads, 1);
  recognizer_config.model_config.debug = config->model_config.debug;
  recognizer_config.model_config.provider =
      SHERPA_ONNX_OR(config->model_config.provider, "cpu");
  if (recognizer_config.model_config.provider.empty()) {
    recognizer_config.model_config.provider = "cpu";
  }

  recognizer_config.model_config.model_type =
      SHERPA_ONNX_OR(config->model_config.model_type, "");
  recognizer_config.model_config.modeling_unit =
      SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");

  if (recognizer_config.model_config.modeling_unit.empty()) {
    recognizer_config.model_config.modeling_unit = "cjkchar";
  }

  recognizer_config.model_config.bpe_vocab =
      SHERPA_ONNX_OR(config->model_config.bpe_vocab, "");

  recognizer_config.model_config.telespeech_ctc =
      SHERPA_ONNX_OR(config->model_config.telespeech_ctc, "");

  recognizer_config.model_config.sense_voice.model =
      SHERPA_ONNX_OR(config->model_config.sense_voice.model, "");

  recognizer_config.model_config.sense_voice.language =
      SHERPA_ONNX_OR(config->model_config.sense_voice.language, "");

  recognizer_config.model_config.sense_voice.use_itn =
      config->model_config.sense_voice.use_itn;

  recognizer_config.model_config.moonshine.preprocessor =
      SHERPA_ONNX_OR(config->model_config.moonshine.preprocessor, "");

  recognizer_config.model_config.moonshine.encoder =
      SHERPA_ONNX_OR(config->model_config.moonshine.encoder, "");

  recognizer_config.model_config.moonshine.uncached_decoder =
      SHERPA_ONNX_OR(config->model_config.moonshine.uncached_decoder, "");

  recognizer_config.model_config.moonshine.cached_decoder =
      SHERPA_ONNX_OR(config->model_config.moonshine.cached_decoder, "");

  recognizer_config.model_config.moonshine.merged_decoder =
      SHERPA_ONNX_OR(config->model_config.moonshine.merged_decoder, "");

  recognizer_config.model_config.fire_red_asr.encoder =
      SHERPA_ONNX_OR(config->model_config.fire_red_asr.encoder, "");

  recognizer_config.model_config.fire_red_asr.decoder =
      SHERPA_ONNX_OR(config->model_config.fire_red_asr.decoder, "");

  recognizer_config.model_config.dolphin.model =
      SHERPA_ONNX_OR(config->model_config.dolphin.model, "");

  recognizer_config.model_config.zipformer_ctc.model =
      SHERPA_ONNX_OR(config->model_config.zipformer_ctc.model, "");

  recognizer_config.model_config.canary.encoder =
      SHERPA_ONNX_OR(config->model_config.canary.encoder, "");

  recognizer_config.model_config.canary.decoder =
      SHERPA_ONNX_OR(config->model_config.canary.decoder, "");

  recognizer_config.model_config.canary.src_lang =
      SHERPA_ONNX_OR(config->model_config.canary.src_lang, "");

  recognizer_config.model_config.canary.tgt_lang =
      SHERPA_ONNX_OR(config->model_config.canary.tgt_lang, "");

  recognizer_config.model_config.canary.use_pnc =
      config->model_config.canary.use_pnc;

  recognizer_config.model_config.wenet_ctc.model =
      SHERPA_ONNX_OR(config->model_config.wenet_ctc.model, "");

  recognizer_config.model_config.omnilingual.model =
      SHERPA_ONNX_OR(config->model_config.omnilingual.model, "");

  recognizer_config.model_config.medasr.model =
      SHERPA_ONNX_OR(config->model_config.medasr.model, "");

  recognizer_config.model_config.funasr_nano.encoder_adaptor =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.encoder_adaptor, "");
  recognizer_config.model_config.funasr_nano.llm =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.llm, "");
  recognizer_config.model_config.funasr_nano.embedding =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.embedding, "");
  recognizer_config.model_config.funasr_nano.tokenizer =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.tokenizer, "");
  recognizer_config.model_config.funasr_nano.system_prompt =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.system_prompt,
                     "You are a helpful assistant.");
  recognizer_config.model_config.funasr_nano.user_prompt = SHERPA_ONNX_OR(
      config->model_config.funasr_nano.user_prompt, "语音转写：");
  recognizer_config.model_config.funasr_nano.language =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.language, "");
  recognizer_config.model_config.funasr_nano.itn =
      config->model_config.funasr_nano.itn;
  recognizer_config.model_config.funasr_nano.hotwords =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.hotwords, "");
  recognizer_config.model_config.funasr_nano.max_new_tokens =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.max_new_tokens, 512);
  recognizer_config.model_config.funasr_nano.temperature =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.temperature, 1e-6f);
  recognizer_config.model_config.funasr_nano.top_p =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.top_p, 0.8f);
  recognizer_config.model_config.funasr_nano.seed =
      SHERPA_ONNX_OR(config->model_config.funasr_nano.seed, 42);

  recognizer_config.model_config.fire_red_asr_ctc.model =
      SHERPA_ONNX_OR(config->model_config.fire_red_asr_ctc.model, "");

  recognizer_config.lm_config.model =
      SHERPA_ONNX_OR(config->lm_config.model, "");
  recognizer_config.lm_config.scale =
      SHERPA_ONNX_OR(config->lm_config.scale, 1.0);

  recognizer_config.decoding_method =
      SHERPA_ONNX_OR(config->decoding_method, "greedy_search");

  if (recognizer_config.decoding_method.empty()) {
    recognizer_config.decoding_method = "greedy_search";
  }

  recognizer_config.max_active_paths =
      SHERPA_ONNX_OR(config->max_active_paths, 4);

  recognizer_config.hotwords_file = SHERPA_ONNX_OR(config->hotwords_file, "");
  recognizer_config.hotwords_score =
      SHERPA_ONNX_OR(config->hotwords_score, 1.5);

  recognizer_config.blank_penalty = config->blank_penalty;

  recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
  recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");

  recognizer_config.hr.lexicon = SHERPA_ONNX_OR(config->hr.lexicon, "");
  recognizer_config.hr.rule_fsts = SHERPA_ONNX_OR(config->hr.rule_fsts, "");

  if (config->model_config.debug) {
#if __OHOS__
    auto str_vec = sherpa_onnx::SplitString(recognizer_config.ToString(), 128);
    for (const auto &s : str_vec) {
      SHERPA_ONNX_LOGE("%{public}s\n", s.c_str());
      SHERPA_ONNX_LOGE("%s\n", s.c_str());
    }
#else
    SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
#endif
  }

  return recognizer_config;
}

const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizer(
    const SherpaOnnxOfflineRecognizerConfig *config) {
  sherpa_onnx::OfflineRecognizerConfig recognizer_config =
      GetOfflineRecognizerConfig(config);

  if (!recognizer_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;

  recognizer->impl =
      std::make_unique<sherpa_onnx::OfflineRecognizer>(recognizer_config);

  return recognizer;
}

void SherpaOnnxOfflineRecognizerSetConfig(
    const SherpaOnnxOfflineRecognizer *recognizer,
    const SherpaOnnxOfflineRecognizerConfig *config) {
  sherpa_onnx::OfflineRecognizerConfig recognizer_config =
      GetOfflineRecognizerConfig(config);
  recognizer->impl->SetConfig(recognizer_config);
}

void SherpaOnnxDestroyOfflineRecognizer(
    const SherpaOnnxOfflineRecognizer *recognizer) {
  if (!recognizer) return;
  delete recognizer;
}

const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream(
    const SherpaOnnxOfflineRecognizer *recognizer) {
  SherpaOnnxOfflineStream *stream =
      new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());
  return stream;
}

const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStreamWithHotwords(
    const SherpaOnnxOfflineRecognizer *recognizer, const char *hotwords) {
  SherpaOnnxOfflineStream *stream =
      new SherpaOnnxOfflineStream(recognizer->impl->CreateStream(hotwords));
  return stream;
}

void SherpaOnnxDestroyOfflineStream(const SherpaOnnxOfflineStream *stream) {
  if (!stream) return;
  delete stream;
}

void SherpaOnnxAcceptWaveformOffline(const SherpaOnnxOfflineStream *stream,
                                     int32_t sample_rate, const float *samples,
                                     int32_t n) {
  stream->impl->AcceptWaveform(sample_rate, samples, n);
}

void SherpaOnnxOfflineStreamSetOption(const SherpaOnnxOfflineStream *stream,
                                      const char *key, const char *value) {
  if (!stream || !key || !value) return;
  stream->impl->SetOption(key, value);
}

const char *SherpaOnnxOfflineStreamGetOption(
    const SherpaOnnxOfflineStream *stream, const char *key) {
  if (!stream || !key) return nullptr;
  return stream->impl->GetOption(key).c_str();
}

int32_t SherpaOnnxOfflineStreamHasOption(const SherpaOnnxOfflineStream *stream,
                                         const char *key) {
  if (!stream || !key) return 0;
  return stream->impl->HasOption(key);
}

void SherpaOnnxDecodeOfflineStream(
    const SherpaOnnxOfflineRecognizer *recognizer,
    const SherpaOnnxOfflineStream *stream) {
  recognizer->impl->DecodeStream(stream->impl.get());
}

void SherpaOnnxDecodeMultipleOfflineStreams(
    const SherpaOnnxOfflineRecognizer *recognizer,
    const SherpaOnnxOfflineStream **streams, int32_t n) {
  std::vector<sherpa_onnx::OfflineStream *> ss(n);
  for (int32_t i = 0; i != n; ++i) {
    ss[i] = streams[i]->impl.get();
  }
  recognizer->impl->DecodeStreams(ss.data(), n);
}

const SherpaOnnxOfflineRecognizerResult *SherpaOnnxGetOfflineStreamResult(
    const SherpaOnnxOfflineStream *stream) {
  const sherpa_onnx::OfflineRecognitionResult &result =
      stream->impl->GetResult();
  const auto &text = result.text;

  auto r = new SherpaOnnxOfflineRecognizerResult;
  memset(r, 0, sizeof(SherpaOnnxOfflineRecognizerResult));

  char *pText = new char[text.size() + 1];
  std::copy(text.begin(), text.end(), pText);
  pText[text.size()] = 0;
  r->text = pText;

  // lang
  const auto &lang = result.lang;
  char *c_lang = new char[lang.size() + 1];
  std::copy(lang.begin(), lang.end(), c_lang);
  c_lang[lang.size()] = '\0';
  r->lang = c_lang;

  // emotion
  const auto &emotion = result.emotion;
  char *c_emotion = new char[emotion.size() + 1];
  std::copy(emotion.begin(), emotion.end(), c_emotion);
  c_emotion[emotion.size()] = '\0';
  r->emotion = c_emotion;

  // event
  const auto &event = result.event;
  char *c_event = new char[event.size() + 1];
  std::copy(event.begin(), event.end(), c_event);
  c_event[event.size()] = '\0';
  r->event = c_event;

  // copy json
  std::string json = result.AsJsonString();
  char *pJson = new char[json.size() + 1];
  std::copy(json.begin(), json.end(), pJson);
  pJson[json.size()] = 0;
  r->json = pJson;

  // copy tokens
  auto count = result.tokens.size();
  if (count > 0) {
    size_t total_length = 0;
    for (const auto &token : result.tokens) {
      // +1 for the null character at the end of each token
      total_length += token.size() + 1;
    }

    r->count = count;
    // Each word ends with nullptr
    char *tokens = new char[total_length]{};
    char **tokens_temp = new char *[r->count];
    int32_t pos = 0;
    for (int32_t i = 0; i < r->count; ++i) {
      tokens_temp[i] = tokens + pos;
      memcpy(tokens + pos, result.tokens[i].c_str(), result.tokens[i].size());
      // +1 to move past the null character
      pos += result.tokens[i].size() + 1;
    }
    r->tokens_arr = tokens_temp;

    if (!result.timestamps.empty() && result.timestamps.size() == r->count) {
      r->timestamps = new float[r->count];
      std::copy(result.timestamps.begin(), result.timestamps.end(),
                r->timestamps);
    } else {
      r->timestamps = nullptr;
    }

    if (!result.durations.empty() && result.durations.size() == r->count) {
      r->durations = new float[r->count];
      std::copy(result.durations.begin(), result.durations.end(), r->durations);
    } else {
      r->durations = nullptr;
    }

    if (!result.ys_log_probs.empty() &&
        result.ys_log_probs.size() == r->count) {
      r->ys_log_probs = new float[r->count];
      std::copy(result.ys_log_probs.begin(), result.ys_log_probs.end(),
                r->ys_log_probs);
    } else {
      r->ys_log_probs = nullptr;
    }

    r->tokens = tokens;
  } else {
    r->count = 0;
    r->timestamps = nullptr;
    r->tokens = nullptr;
    r->tokens_arr = nullptr;
    r->ys_log_probs = nullptr;
  }

  // Copy segment-level timestamps (from Whisper with segment timestamps)
  auto segment_count = result.segment_texts.size();
  if (segment_count > 0 && result.segment_timestamps.size() == segment_count &&
      result.segment_durations.size() == segment_count) {
    r->segment_count = segment_count;

    // Copy segment timestamps
    float *timestamps = new float[segment_count];
    std::copy(result.segment_timestamps.begin(),
              result.segment_timestamps.end(), timestamps);
    r->segment_timestamps = timestamps;

    // Copy segment durations
    float *durations = new float[segment_count];
    std::copy(result.segment_durations.begin(), result.segment_durations.end(),
              durations);
    r->segment_durations = durations;

    // Copy segment texts (similar to tokens)
    size_t total_length = 0;
    for (const auto &seg_text : result.segment_texts) {
      total_length += seg_text.size() + 1;  // +1 for null terminator
    }

    char *segment_texts = new char[total_length]{};
    char **segment_texts_temp = new char *[segment_count];
    int32_t pos = 0;
    for (int32_t i = 0; i < static_cast<int32_t>(segment_count); ++i) {
      segment_texts_temp[i] = segment_texts + pos;
      memcpy(segment_texts + pos, result.segment_texts[i].c_str(),
             result.segment_texts[i].size());
      pos += result.segment_texts[i].size() + 1;
    }
    r->segment_texts = segment_texts;
    r->segment_texts_arr = segment_texts_temp;
  } else {
    r->segment_count = 0;
    r->segment_timestamps = nullptr;
    r->segment_durations = nullptr;
    r->segment_texts = nullptr;
    r->segment_texts_arr = nullptr;
  }

  return r;
}

void SherpaOnnxDestroyOfflineRecognizerResult(
    const SherpaOnnxOfflineRecognizerResult *r) {
  if (r) {
    delete[] r->text;
    delete[] r->timestamps;
    delete[] r->durations;
    delete[] r->ys_log_probs;
    delete[] r->tokens;
    delete[] r->tokens_arr;
    delete[] r->json;
    delete[] r->lang;
    delete[] r->emotion;
    delete[] r->event;
    delete[] r->segment_timestamps;
    delete[] r->segment_durations;
    delete[] r->segment_texts;
    delete[] r->segment_texts_arr;
    delete r;
  }
}

const char *SherpaOnnxGetOfflineStreamResultAsJson(
    const SherpaOnnxOfflineStream *stream) {
  const sherpa_onnx::OfflineRecognitionResult &result =
      stream->impl->GetResult();
  std::string json = result.AsJsonString();
  char *pJson = new char[json.size() + 1];
  std::copy(json.begin(), json.end(), pJson);
  pJson[json.size()] = 0;
  return pJson;
}

void SherpaOnnxDestroyOfflineStreamResultJson(const char *s) {
  if (!s) return;
  delete[] s;
}

// ============================================================
// For Keyword Spot
// ============================================================

struct SherpaOnnxKeywordSpotter {
  std::unique_ptr<sherpa_onnx::KeywordSpotter> impl;
};

static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig(
    const SherpaOnnxKeywordSpotterConfig *config) {
  sherpa_onnx::KeywordSpotterConfig spotter_config;

  spotter_config.feat_config.sampling_rate =
      SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
  spotter_config.feat_config.feature_dim =
      SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);

  spotter_config.model_config.transducer.encoder =
      SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
  spotter_config.model_config.transducer.decoder =
      SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
  spotter_config.model_config.transducer.joiner =
      SHERPA_ONNX_OR(config->model_config.transducer.joiner, "");

  spotter_config.model_config.paraformer.encoder =
      SHERPA_ONNX_OR(config->model_config.paraformer.encoder, "");
  spotter_config.model_config.paraformer.decoder =
      SHERPA_ONNX_OR(config->model_config.paraformer.decoder, "");

  spotter_config.model_config.zipformer2_ctc.model =
      SHERPA_ONNX_OR(config->model_config.zipformer2_ctc.model, "");

  spotter_config.model_config.nemo_ctc.model =
      SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");

  spotter_config.model_config.tokens =
      SHERPA_ONNX_OR(config->model_config.tokens, "");
  if (config->model_config.tokens_buf &&
      config->model_config.tokens_buf_size > 0) {
    spotter_config.model_config.tokens_buf = std::string(
        config->model_config.tokens_buf, config->model_config.tokens_buf_size);
  }

  spotter_config.model_config.num_threads =
      SHERPA_ONNX_OR(config->model_config.num_threads, 1);
  spotter_config.model_config.provider_config.provider =
      SHERPA_ONNX_OR(config->model_config.provider, "cpu");
  if (spotter_config.model_config.provider_config.provider.empty()) {
    spotter_config.model_config.provider_config.provider = "cpu";
  }

  spotter_config.model_config.model_type =
      SHERPA_ONNX_OR(config->model_config.model_type, "");
  spotter_config.model_config.debug = config->model_config.debug;

  spotter_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);

  spotter_config.num_trailing_blanks =
      SHERPA_ONNX_OR(config->num_trailing_blanks, 1);

  spotter_config.keywords_score = SHERPA_ONNX_OR(config->keywords_score, 1.0);

  spotter_config.keywords_threshold =
      SHERPA_ONNX_OR(config->keywords_threshold, 0.25);

  spotter_config.keywords_file = SHERPA_ONNX_OR(config->keywords_file, "");
  if (config->keywords_buf && config->keywords_buf_size > 0) {
    spotter_config.keywords_buf =
        std::string(config->keywords_buf, config->keywords_buf_size);
  }

  if (spotter_config.model_config.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", spotter_config.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", spotter_config.ToString().c_str());
#endif
  }

  return spotter_config;
}

const SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
    const SherpaOnnxKeywordSpotterConfig *config) {
  auto spotter_config = GetKeywordSpotterConfig(config);
  if (!spotter_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config!");
    return nullptr;
  }

  SherpaOnnxKeywordSpotter *spotter = new SherpaOnnxKeywordSpotter;

  spotter->impl = std::make_unique<sherpa_onnx::KeywordSpotter>(spotter_config);

  return spotter;
}

void SherpaOnnxDestroyKeywordSpotter(const SherpaOnnxKeywordSpotter *spotter) {
  if (!spotter) return;
  delete spotter;
}

const SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStream(
    const SherpaOnnxKeywordSpotter *spotter) {
  SherpaOnnxOnlineStream *stream =
      new SherpaOnnxOnlineStream(spotter->impl->CreateStream());
  return stream;
}

const SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStreamWithKeywords(
    const SherpaOnnxKeywordSpotter *spotter, const char *keywords) {
  SherpaOnnxOnlineStream *stream =
      new SherpaOnnxOnlineStream(spotter->impl->CreateStream(keywords));
  return stream;
}

int32_t SherpaOnnxIsKeywordStreamReady(const SherpaOnnxKeywordSpotter *spotter,
                                       const SherpaOnnxOnlineStream *stream) {
  return spotter->impl->IsReady(stream->impl.get());
}

void SherpaOnnxDecodeKeywordStream(const SherpaOnnxKeywordSpotter *spotter,
                                   const SherpaOnnxOnlineStream *stream) {
  spotter->impl->DecodeStream(stream->impl.get());
}

void SherpaOnnxResetKeywordStream(const SherpaOnnxKeywordSpotter *spotter,
                                  const SherpaOnnxOnlineStream *stream) {
  spotter->impl->Reset(stream->impl.get());
}

void SherpaOnnxDecodeMultipleKeywordStreams(
    const SherpaOnnxKeywordSpotter *spotter,
    const SherpaOnnxOnlineStream **streams, int32_t n) {
  std::vector<sherpa_onnx::OnlineStream *> ss(n);
  for (int32_t i = 0; i != n; ++i) {
    ss[i] = streams[i]->impl.get();
  }
  spotter->impl->DecodeStreams(ss.data(), n);
}

const SherpaOnnxKeywordResult *SherpaOnnxGetKeywordResult(
    const SherpaOnnxKeywordSpotter *spotter,
    const SherpaOnnxOnlineStream *stream) {
  const sherpa_onnx::KeywordResult &result =
      spotter->impl->GetResult(stream->impl.get());
  const auto &keyword = result.keyword;

  auto r = new SherpaOnnxKeywordResult;
  memset(r, 0, sizeof(SherpaOnnxKeywordResult));

  r->start_time = result.start_time;

  // copy keyword
  char *pKeyword = new char[keyword.size() + 1];
  std::copy(keyword.begin(), keyword.end(), pKeyword);
  pKeyword[keyword.size()] = 0;
  r->keyword = pKeyword;

  // copy json
  std::string json = result.AsJsonString();
  char *pJson = new char[json.size() + 1];
  std::copy(json.begin(), json.end(), pJson);
  pJson[json.size()] = 0;
  r->json = pJson;

  // copy tokens
  auto count = result.tokens.size();
  if (count > 0) {
    size_t total_length = 0;
    for (const auto &token : result.tokens) {
      // +1 for the null character at the end of each token
      total_length += token.size() + 1;
    }

    r->count = count;
    // Each word ends with nullptr
    char *pTokens = new char[total_length]{};
    char **tokens_temp = new char *[r->count];
    int32_t pos = 0;
    for (int32_t i = 0; i < r->count; ++i) {
      tokens_temp[i] = pTokens + pos;
      memcpy(pTokens + pos, result.tokens[i].c_str(), result.tokens[i].size());
      // +1 to move past the null character
      pos += result.tokens[i].size() + 1;
    }
    r->tokens = pTokens;
    r->tokens_arr = tokens_temp;

    if (!result.timestamps.empty()) {
      r->timestamps = new float[result.timestamps.size()];
      std::copy(result.timestamps.begin(), result.timestamps.end(),
                r->timestamps);
    } else {
      r->timestamps = nullptr;
    }

  } else {
    r->count = 0;
    r->timestamps = nullptr;
    r->tokens = nullptr;
    r->tokens_arr = nullptr;
  }

  return r;
}

void SherpaOnnxDestroyKeywordResult(const SherpaOnnxKeywordResult *r) {
  if (r) {
    delete[] r->keyword;
    delete[] r->json;
    delete[] r->tokens;
    delete[] r->tokens_arr;
    delete[] r->timestamps;
    delete r;
  }
}

const char *SherpaOnnxGetKeywordResultAsJson(
    const SherpaOnnxKeywordSpotter *spotter,
    const SherpaOnnxOnlineStream *stream) {
  const sherpa_onnx::KeywordResult &result =
      spotter->impl->GetResult(stream->impl.get());

  std::string json = result.AsJsonString();
  char *pJson = new char[json.size() + 1];
  std::copy(json.begin(), json.end(), pJson);
  pJson[json.size()] = 0;
  return pJson;
}

void SherpaOnnxFreeKeywordResultJson(const char *s) {
  if (!s) return;
  delete[] s;
}

// ============================================================
// For VAD
// ============================================================
//
struct SherpaOnnxCircularBuffer {
  std::unique_ptr<sherpa_onnx::CircularBuffer> impl;
};

const SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer(
    int32_t capacity) {
  SherpaOnnxCircularBuffer *buffer = new SherpaOnnxCircularBuffer;
  buffer->impl = std::make_unique<sherpa_onnx::CircularBuffer>(capacity);
  return buffer;
}

void SherpaOnnxDestroyCircularBuffer(const SherpaOnnxCircularBuffer *buffer) {
  if (!buffer) return;
  delete buffer;
}

void SherpaOnnxCircularBufferPush(const SherpaOnnxCircularBuffer *buffer,
                                  const float *p, int32_t n) {
  buffer->impl->Push(p, n);
}

const float *SherpaOnnxCircularBufferGet(const SherpaOnnxCircularBuffer *buffer,
                                         int32_t start_index, int32_t n) {
  std::vector<float> v = buffer->impl->Get(start_index, n);

  float *p = new float[n];
  std::copy(v.begin(), v.end(), p);
  return p;
}

void SherpaOnnxCircularBufferFree(const float *p) {
  if (!p) return;
  delete[] p;
}

void SherpaOnnxCircularBufferPop(const SherpaOnnxCircularBuffer *buffer,
                                 int32_t n) {
  buffer->impl->Pop(n);
}

int32_t SherpaOnnxCircularBufferSize(const SherpaOnnxCircularBuffer *buffer) {
  return buffer->impl->Size();
}

int32_t SherpaOnnxCircularBufferHead(const SherpaOnnxCircularBuffer *buffer) {
  return buffer->impl->Head();
}

void SherpaOnnxCircularBufferReset(const SherpaOnnxCircularBuffer *buffer) {
  buffer->impl->Reset();
}

struct SherpaOnnxVoiceActivityDetector {
  std::unique_ptr<sherpa_onnx::VoiceActivityDetector> impl;
};

static sherpa_onnx::VadModelConfig GetVadModelConfig(
    const SherpaOnnxVadModelConfig *config) {
  sherpa_onnx::VadModelConfig vad_config;

  vad_config.silero_vad.model = SHERPA_ONNX_OR(config->silero_vad.model, "");
  vad_config.silero_vad.threshold =
      SHERPA_ONNX_OR(config->silero_vad.threshold, 0.5);

  vad_config.silero_vad.min_silence_duration =
      SHERPA_ONNX_OR(config->silero_vad.min_silence_duration, 0.5);

  vad_config.silero_vad.min_speech_duration =
      SHERPA_ONNX_OR(config->silero_vad.min_speech_duration, 0.25);

  vad_config.silero_vad.window_size =
      SHERPA_ONNX_OR(config->silero_vad.window_size, 512);

  vad_config.silero_vad.max_speech_duration =
      SHERPA_ONNX_OR(config->silero_vad.max_speech_duration, 20);

  vad_config.ten_vad.model = SHERPA_ONNX_OR(config->ten_vad.model, "");
  vad_config.ten_vad.threshold = SHERPA_ONNX_OR(config->ten_vad.threshold, 0.5);

  vad_config.ten_vad.min_silence_duration =
      SHERPA_ONNX_OR(config->ten_vad.min_silence_duration, 0.5);

  vad_config.ten_vad.min_speech_duration =
      SHERPA_ONNX_OR(config->ten_vad.min_speech_duration, 0.25);

  vad_config.ten_vad.window_size =
      SHERPA_ONNX_OR(config->ten_vad.window_size, 256);

  vad_config.ten_vad.max_speech_duration =
      SHERPA_ONNX_OR(config->ten_vad.max_speech_duration, 20);

  vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000);
  vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
  vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
  if (vad_config.provider.empty()) {
    vad_config.provider = "cpu";
  }

  vad_config.debug = config->debug;

  if (vad_config.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", vad_config.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", vad_config.ToString().c_str());
#endif
  }

  return vad_config;
}

const SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
    const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds) {
  if (!config) {
    SHERPA_ONNX_LOGE("vad config is nullptr");
    return nullptr;
  }

  auto vad_config = GetVadModelConfig(config);

  if (!vad_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector;
  p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>(
      vad_config, buffer_size_in_seconds);

  return p;
}

void SherpaOnnxDestroyVoiceActivityDetector(
    const SherpaOnnxVoiceActivityDetector *p) {
  if (!p) return;
  delete p;
}

void SherpaOnnxVoiceActivityDetectorAcceptWaveform(
    const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n) {
  if (!p) {
    SHERPA_ONNX_LOGE("vad is nullptr");
    return;
  }

  if (!samples) {
    SHERPA_ONNX_LOGE("samples is nullptr");
    return;
  }

  p->impl->AcceptWaveform(samples, n);
}

int32_t SherpaOnnxVoiceActivityDetectorEmpty(
    const SherpaOnnxVoiceActivityDetector *p) {
  if (!p) {
    SHERPA_ONNX_LOGE("vad is nullptr");
    return 1;  // 1 means it is empty
  }

  return p->impl->Empty();
}

int32_t SherpaOnnxVoiceActivityDetectorDetected(
    const SherpaOnnxVoiceActivityDetector *p) {
  if (!p) {
    SHERPA_ONNX_LOGE("vad is nullptr");
    return 0;
  }

  return p->impl->IsSpeechDetected();
}

void SherpaOnnxVoiceActivityDetectorPop(
    const SherpaOnnxVoiceActivityDetector *p) {
  if (!p) {
    SHERPA_ONNX_LOGE("vad is nullptr");
    return;
  }

  p->impl->Pop();
}

void SherpaOnnxVoiceActivityDetectorClear(
    const SherpaOnnxVoiceActivityDetector *p) {
  if (!p) {
    SHERPA_ONNX_LOGE("vad is nullptr");
    return;
  }

  p->impl->Clear();
}

const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
    const SherpaOnnxVoiceActivityDetector *p) {
  if (!p) {
    SHERPA_ONNX_LOGE("vad is nullptr");
    return nullptr;
  }

  if (SherpaOnnxVoiceActivityDetectorEmpty(p)) {
    return nullptr;
  }

  const sherpa_onnx::SpeechSegment &segment = p->impl->Front();

  SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment;
  ans->start = segment.start;
  ans->samples = new float[segment.samples.size()];
  std::copy(segment.samples.begin(), segment.samples.end(), ans->samples);
  ans->n = segment.samples.size();

  return ans;
}

void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
  if (p) {
    delete[] p->samples;
    delete p;
  }
}

void SherpaOnnxVoiceActivityDetectorReset(
    const SherpaOnnxVoiceActivityDetector *p) {
  if (!p) {
    SHERPA_ONNX_LOGE("vad is nullptr");
    return;
  }

  p->impl->Reset();
}

void SherpaOnnxVoiceActivityDetectorFlush(
    const SherpaOnnxVoiceActivityDetector *p) {
  if (!p) {
    SHERPA_ONNX_LOGE("vad is nullptr");
    return;
  }

  p->impl->Flush();
}

#if SHERPA_ONNX_ENABLE_TTS == 1
struct SherpaOnnxOfflineTts {
  std::unique_ptr<sherpa_onnx::OfflineTts> impl;
};

static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
    const SherpaOnnxOfflineTtsConfig *config) {
  sherpa_onnx::OfflineTtsConfig tts_config;

  // vits
  tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
  tts_config.model.vits.lexicon =
      SHERPA_ONNX_OR(config->model.vits.lexicon, "");
  tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
  tts_config.model.vits.data_dir =
      SHERPA_ONNX_OR(config->model.vits.data_dir, "");
  tts_config.model.vits.noise_scale =
      SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
  tts_config.model.vits.noise_scale_w =
      SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
  tts_config.model.vits.length_scale =
      SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);

  // matcha
  tts_config.model.matcha.acoustic_model =
      SHERPA_ONNX_OR(config->model.matcha.acoustic_model, "");
  tts_config.model.matcha.vocoder =
      SHERPA_ONNX_OR(config->model.matcha.vocoder, "");
  tts_config.model.matcha.lexicon =
      SHERPA_ONNX_OR(config->model.matcha.lexicon, "");
  tts_config.model.matcha.tokens =
      SHERPA_ONNX_OR(config->model.matcha.tokens, "");
  tts_config.model.matcha.data_dir =
      SHERPA_ONNX_OR(config->model.matcha.data_dir, "");
  tts_config.model.matcha.noise_scale =
      SHERPA_ONNX_OR(config->model.matcha.noise_scale, 0.667);
  tts_config.model.matcha.length_scale =
      SHERPA_ONNX_OR(config->model.matcha.length_scale, 1.0);

  // kokoro
  tts_config.model.kokoro.model =
      SHERPA_ONNX_OR(config->model.kokoro.model, "");
  tts_config.model.kokoro.voices =
      SHERPA_ONNX_OR(config->model.kokoro.voices, "");
  tts_config.model.kokoro.tokens =
      SHERPA_ONNX_OR(config->model.kokoro.tokens, "");
  tts_config.model.kokoro.data_dir =
      SHERPA_ONNX_OR(config->model.kokoro.data_dir, "");
  tts_config.model.kokoro.length_scale =
      SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0);
  tts_config.model.kokoro.lexicon =
      SHERPA_ONNX_OR(config->model.kokoro.lexicon, "");
  tts_config.model.kokoro.lang = SHERPA_ONNX_OR(config->model.kokoro.lang, "");

  // kitten
  tts_config.model.kitten.model =
      SHERPA_ONNX_OR(config->model.kitten.model, "");
  tts_config.model.kitten.voices =
      SHERPA_ONNX_OR(config->model.kitten.voices, "");
  tts_config.model.kitten.tokens =
      SHERPA_ONNX_OR(config->model.kitten.tokens, "");
  tts_config.model.kitten.data_dir =
      SHERPA_ONNX_OR(config->model.kitten.data_dir, "");
  tts_config.model.kitten.length_scale =
      SHERPA_ONNX_OR(config->model.kitten.length_scale, 1.0);

  // zipvoice
  tts_config.model.zipvoice.tokens =
      SHERPA_ONNX_OR(config->model.zipvoice.tokens, "");
  tts_config.model.zipvoice.encoder =
      SHERPA_ONNX_OR(config->model.zipvoice.encoder, "");
  tts_config.model.zipvoice.decoder =
      SHERPA_ONNX_OR(config->model.zipvoice.decoder, "");
  tts_config.model.zipvoice.vocoder =
      SHERPA_ONNX_OR(config->model.zipvoice.vocoder, "");
  tts_config.model.zipvoice.data_dir =
      SHERPA_ONNX_OR(config->model.zipvoice.data_dir, "");
  tts_config.model.zipvoice.lexicon =
      SHERPA_ONNX_OR(config->model.zipvoice.lexicon, "");
  tts_config.model.zipvoice.feat_scale =
      SHERPA_ONNX_OR(config->model.zipvoice.feat_scale, 0.1f);
  tts_config.model.zipvoice.t_shift =
      SHERPA_ONNX_OR(config->model.zipvoice.t_shift, 0.5f);
  tts_config.model.zipvoice.target_rms =
      SHERPA_ONNX_OR(config->model.zipvoice.target_rms, 0.1f);
  tts_config.model.zipvoice.guidance_scale =
      SHERPA_ONNX_OR(config->model.zipvoice.guidance_scale, 1.0f);

  // pocket
  tts_config.model.pocket.lm_flow =
      SHERPA_ONNX_OR(config->model.pocket.lm_flow, "");
  tts_config.model.pocket.lm_main =
      SHERPA_ONNX_OR(config->model.pocket.lm_main, "");
  tts_config.model.pocket.encoder =
      SHERPA_ONNX_OR(config->model.pocket.encoder, "");
  tts_config.model.pocket.decoder =
      SHERPA_ONNX_OR(config->model.pocket.decoder, "");
  tts_config.model.pocket.text_conditioner =
      SHERPA_ONNX_OR(config->model.pocket.text_conditioner, "");
  tts_config.model.pocket.vocab_json =
      SHERPA_ONNX_OR(config->model.pocket.vocab_json, "");
  tts_config.model.pocket.token_scores_json =
      SHERPA_ONNX_OR(config->model.pocket.token_scores_json, "");
  if (config->model.pocket.voice_embedding_cache_capacity >= 0) {
    tts_config.model.pocket.voice_embedding_cache_capacity =
        config->model.pocket.voice_embedding_cache_capacity;
  } else {
    tts_config.model.pocket.voice_embedding_cache_capacity = 50;
  }

  // supertonic
  tts_config.model.supertonic.duration_predictor =
      SHERPA_ONNX_OR(config->model.supertonic.duration_predictor, "");
  tts_config.model.supertonic.text_encoder =
      SHERPA_ONNX_OR(config->model.supertonic.text_encoder, "");
  tts_config.model.supertonic.vector_estimator =
      SHERPA_ONNX_OR(config->model.supertonic.vector_estimator, "");
  tts_config.model.supertonic.vocoder =
      SHERPA_ONNX_OR(config->model.supertonic.vocoder, "");
  tts_config.model.supertonic.tts_json =
      SHERPA_ONNX_OR(config->model.supertonic.tts_json, "");
  tts_config.model.supertonic.unicode_indexer =
      SHERPA_ONNX_OR(config->model.supertonic.unicode_indexer, "");
  tts_config.model.supertonic.voice_style =
      SHERPA_ONNX_OR(config->model.supertonic.voice_style, "");

  tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
  tts_config.model.debug = config->model.debug;
  tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  if (tts_config.model.provider.empty()) {
    tts_config.model.provider = "cpu";
  }

  tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
  tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
  tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 1);
  tts_config.silence_scale = SHERPA_ONNX_OR(config->silence_scale, 0.2);

  if (tts_config.model.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", tts_config.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str());
#endif
  }

  return tts_config;
}

const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
    const SherpaOnnxOfflineTtsConfig *config) {
  auto tts_config = GetOfflineTtsConfig(config);

  if (!tts_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;

  tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);

  return tts;
}

void SherpaOnnxDestroyOfflineTts(const SherpaOnnxOfflineTts *tts) {
  if (!tts) return;
  delete tts;
}

int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
  return tts->impl->SampleRate();
}

int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) {
  return tts->impl->NumSpeakers();
}

static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    std::function<int32_t(const float *, int32_t, float)> callback) {
  sherpa_onnx::GeneratedAudio audio =
      tts->impl->Generate(text, sid, speed, callback);

  if (audio.samples.empty()) {
    return nullptr;
  }

  SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;

  float *samples = new float[audio.samples.size()];
  std::copy(audio.samples.begin(), audio.samples.end(), samples);

  ans->samples = samples;
  ans->n = audio.samples.size();
  ans->sample_rate = audio.sample_rate;

  return ans;
}

static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
    const SherpaOnnxOfflineTts *tts, const char *text,
    const SherpaOnnxGenerationConfig *config,
    std::function<int32_t(const float *, int32_t, float)> callback) {
  sherpa_onnx::GenerationConfig cfg;
  if (config->reference_audio) {
    if (config->reference_audio_len <= 0) {
      SHERPA_ONNX_LOGE("Invalid reference audio len: %d",
                       config->reference_audio_len);
      return nullptr;
    }

    cfg.reference_audio.assign(
        config->reference_audio,
        config->reference_audio + config->reference_audio_len);
  }

  cfg.silence_scale = SHERPA_ONNX_OR(config->silence_scale, 0.2);
  cfg.speed = SHERPA_ONNX_OR(config->speed, 1.0);
  cfg.sid = config->sid;

  cfg.reference_sample_rate = config->reference_sample_rate;

  cfg.reference_text = SHERPA_ONNX_OR(config->reference_text, "");
  cfg.num_steps = SHERPA_ONNX_OR(config->num_steps, 5);

  if (config->extra && !std::string(config->extra).empty()) {
    try {
      auto json = nlohmann::json::parse(config->extra);
      for (auto &[k, v] : json.items()) {
        std::string val = v.is_string() ? v.get<std::string>() : v.dump();
        cfg.extra.insert_or_assign(std::string(k), std::move(val));
      }
    } catch (const nlohmann::json::parse_error &e) {
      SHERPA_ONNX_LOGE("Failed to parse extra JSON: '%s'", e.what());
      SHERPA_ONNX_LOGE("Ignore the extra opt");
    }
  }

  sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, cfg, callback);

  if (audio.samples.empty()) {
    return nullptr;
  }

  SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;

  float *samples = new float[audio.samples.size()];
  std::copy(audio.samples.begin(), audio.samples.end(), samples);

  ans->samples = samples;
  ans->n = audio.samples.size();
  ans->sample_rate = audio.sample_rate;

  return ans;
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
    float speed) {
  if (!tts) {
    SHERPA_ONNX_LOGE("tts is nullptr");
    return nullptr;
  }

  if (!text) {
    SHERPA_ONNX_LOGE("text is nullptr");
    return nullptr;
  }

  return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioCallback callback) {
  if (!tts) {
    SHERPA_ONNX_LOGE("tts is nullptr");
    return nullptr;
  }

  if (!text) {
    SHERPA_ONNX_LOGE("text is nullptr");
    return nullptr;
  }

  if (callback) {
    auto wrapper = [callback](const float *samples, int32_t n,
                              float /*progress*/) {
      return callback(samples, n);
    };

    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed,
                                                std::move(wrapper));
  } else {
    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
  }
}

const SherpaOnnxGeneratedAudio *
SherpaOnnxOfflineTtsGenerateWithProgressCallback(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioProgressCallback callback) {
  if (!tts) {
    SHERPA_ONNX_LOGE("tts is nullptr");
    return nullptr;
  }

  if (!text) {
    SHERPA_ONNX_LOGE("text is nullptr");
    return nullptr;
  }

  if (callback) {
    auto wrapper = [callback](const float *samples, int32_t n, float progress) {
      return callback(samples, n, progress);
    };
    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed,
                                                std::move(wrapper));
  } else {
    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
  }
}

const SherpaOnnxGeneratedAudio *
SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
  if (!tts) {
    SHERPA_ONNX_LOGE("tts is nullptr");
    return nullptr;
  }

  if (!text) {
    SHERPA_ONNX_LOGE("text is nullptr");
    return nullptr;
  }

  if (callback) {
    auto wrapper = [callback, arg](const float *samples, int32_t n,
                                   float progress) {
      return callback(samples, n, progress, arg);
    };
    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed,
                                                std::move(wrapper));
  } else {
    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
  }
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
  if (!tts) {
    SHERPA_ONNX_LOGE("tts is nullptr");
    return nullptr;
  }

  if (!text) {
    SHERPA_ONNX_LOGE("text is nullptr");
    return nullptr;
  }

  if (callback) {
    auto wrapper = [callback, arg](const float *samples, int32_t n,
                                   float /*progress*/) {
      return callback(samples, n, arg);
    };

    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed,
                                                std::move(wrapper));
  } else {
    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
  }
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithZipvoice(
    const SherpaOnnxOfflineTts *tts, const char *text, const char *prompt_text,
    const float *prompt_samples, int32_t n_prompt, int32_t prompt_sr,
    float speed, int32_t num_steps) {
  if (!tts) {
    SHERPA_ONNX_LOGE("tts is nullptr");
    return nullptr;
  }

  if (!text) {
    SHERPA_ONNX_LOGE("text is nullptr");
    return nullptr;
  }

  if (!prompt_text) {
    SHERPA_ONNX_LOGE("prompt_text is nullptr");
    return nullptr;
  }

  if (!prompt_samples) {
    SHERPA_ONNX_LOGE("prompt_samples is nullptr");
    return nullptr;
  }

  std::string text_s = text;
  std::string ptext_s = prompt_text;

  std::vector<float> prompt_vec;
  if (n_prompt > 0) {
    prompt_vec.assign(prompt_samples,
                      prompt_samples + static_cast<size_t>(n_prompt));
  }

  auto out = tts->impl->Generate(text_s, ptext_s, prompt_vec, prompt_sr, speed,
                                 num_steps,
                                 /*callback=*/nullptr);

  if (out.samples.empty()) {
    return nullptr;
  }

  auto *ans = new SherpaOnnxGeneratedAudio;
  ans->sample_rate = static_cast<int32_t>(out.sample_rate);
  ans->n = static_cast<int32_t>(out.samples.size());

  float *buf = new float[out.samples.size()];
  std::copy(out.samples.begin(), out.samples.end(), buf);
  ans->samples = buf;

  return ans;
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithConfig(
    const SherpaOnnxOfflineTts *tts, const char *text,
    const SherpaOnnxGenerationConfig *config,
    SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
  if (!tts) {
    SHERPA_ONNX_LOGE("tts is nullptr");
    return nullptr;
  }

  if (!text) {
    SHERPA_ONNX_LOGE("text is nullptr");
    return nullptr;
  }

  if (!config) {
    SHERPA_ONNX_LOGE("config is nullptr");
    return nullptr;
  }

  if (callback) {
    auto wrapper = [callback, arg](const float *samples, int32_t n,
                                   float progress) {
      return callback(samples, n, progress, arg);
    };

    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, config,
                                                std::move(wrapper));
  } else {
    return SherpaOnnxOfflineTtsGenerateInternal(tts, text, config, nullptr);
  }
}

void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
    const SherpaOnnxGeneratedAudio *p) {
  if (p) {
    delete[] p->samples;
    delete p;
  }
}
#else
const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
    const SherpaOnnxOfflineTtsConfig *config) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

void SherpaOnnxDestroyOfflineTts(const SherpaOnnxOfflineTts *tts) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
}

int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return 0;
}

int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return 0;
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
    float speed) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioCallback callback) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

const SherpaOnnxGeneratedAudio *
SherpaOnnxOfflineTtsGenerateWithProgressCallback(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioProgressCallback callback) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

const SherpaOnnxGeneratedAudio *
SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithZipvoice(
    const SherpaOnnxOfflineTts *tts, const char *text, const char *prompt_text,
    const float *prompt_samples, int32_t n_prompt, int32_t prompt_sr,
    float speed, int32_t num_steps) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithConfig(
    const SherpaOnnxOfflineTts *tts, const char *text,
    const SherpaOnnxGenerationConfig *config,
    SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
    const SherpaOnnxGeneratedAudio *p) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
}
#endif  // SHERPA_ONNX_ENABLE_TTS == 1

int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
                            int32_t sample_rate, const char *filename) {
  return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
}

int64_t SherpaOnnxWaveFileSize(int32_t n_samples) {
  return sherpa_onnx::WaveFileSize(n_samples);
}

void SherpaOnnxWriteWaveToBuffer(const float *samples, int32_t n,
                                 int32_t sample_rate, char *buffer) {
  sherpa_onnx::WriteWave(buffer, sample_rate, samples, n);
}

const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename) {
  int32_t sample_rate = -1;
  bool is_ok = false;
  std::vector<float> samples =
      sherpa_onnx::ReadWave(filename, &sample_rate, &is_ok);
  if (!is_ok) {
    return nullptr;
  }

  float *c_samples = new float[samples.size()];
  std::copy(samples.begin(), samples.end(), c_samples);

  SherpaOnnxWave *wave = new SherpaOnnxWave;
  wave->samples = c_samples;
  wave->sample_rate = sample_rate;
  wave->num_samples = samples.size();
  return wave;
}

const SherpaOnnxWave *SherpaOnnxReadWaveFromBinaryData(const char *data,
                                                       int32_t n) {
  if (!data || n <= 0) {
    return nullptr;
  }

  int32_t sample_rate = -1;
  bool is_ok = false;

  std::istringstream is(std::string(data, n));

  std::vector<float> samples = sherpa_onnx::ReadWave(is, &sample_rate, &is_ok);
  if (!is_ok) {
    return nullptr;
  }

  float *c_samples = new float[samples.size()];
  std::copy(samples.begin(), samples.end(), c_samples);

  SherpaOnnxWave *wave = new SherpaOnnxWave;
  wave->samples = c_samples;
  wave->sample_rate = sample_rate;
  wave->num_samples = samples.size();
  return wave;
}

void SherpaOnnxFreeWave(const SherpaOnnxWave *wave) {
  if (wave) {
    delete[] wave->samples;
    delete wave;
  }
}

struct SherpaOnnxSpokenLanguageIdentification {
  std::unique_ptr<sherpa_onnx::SpokenLanguageIdentification> impl;
};

const SherpaOnnxSpokenLanguageIdentification *
SherpaOnnxCreateSpokenLanguageIdentification(
    const SherpaOnnxSpokenLanguageIdentificationConfig *config) {
  sherpa_onnx::SpokenLanguageIdentificationConfig slid_config;
  slid_config.whisper.encoder = SHERPA_ONNX_OR(config->whisper.encoder, "");
  slid_config.whisper.decoder = SHERPA_ONNX_OR(config->whisper.decoder, "");
  slid_config.whisper.tail_paddings =
      SHERPA_ONNX_OR(config->whisper.tail_paddings, -1);
  slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
  slid_config.debug = config->debug;
  slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
  if (slid_config.provider.empty()) {
    slid_config.provider = "cpu";
  }

  if (slid_config.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", slid_config.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
#endif
  }

  if (!slid_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  SherpaOnnxSpokenLanguageIdentification *slid =
      new SherpaOnnxSpokenLanguageIdentification;
  slid->impl =
      std::make_unique<sherpa_onnx::SpokenLanguageIdentification>(slid_config);

  return slid;
}

void SherpaOnnxDestroySpokenLanguageIdentification(
    const SherpaOnnxSpokenLanguageIdentification *slid) {
  if (!slid) return;
  delete slid;
}

SherpaOnnxOfflineStream *
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
    const SherpaOnnxSpokenLanguageIdentification *slid) {
  SherpaOnnxOfflineStream *stream =
      new SherpaOnnxOfflineStream(slid->impl->CreateStream());
  return stream;
}

const SherpaOnnxSpokenLanguageIdentificationResult *
SherpaOnnxSpokenLanguageIdentificationCompute(
    const SherpaOnnxSpokenLanguageIdentification *slid,
    const SherpaOnnxOfflineStream *s) {
  std::string lang = slid->impl->Compute(s->impl.get());
  char *c_lang = new char[lang.size() + 1];
  std::copy(lang.begin(), lang.end(), c_lang);
  c_lang[lang.size()] = '\0';
  SherpaOnnxSpokenLanguageIdentificationResult *r =
      new SherpaOnnxSpokenLanguageIdentificationResult;
  r->lang = c_lang;
  return r;
}

void SherpaOnnxDestroySpokenLanguageIdentificationResult(
    const SherpaOnnxSpokenLanguageIdentificationResult *r) {
  if (r) {
    delete[] r->lang;
    delete r;
  }
}

struct SherpaOnnxSpeakerEmbeddingExtractor {
  std::unique_ptr<sherpa_onnx::SpeakerEmbeddingExtractor> impl;
};

static sherpa_onnx::SpeakerEmbeddingExtractorConfig
GetSpeakerEmbeddingExtractorConfig(
    const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
  sherpa_onnx::SpeakerEmbeddingExtractorConfig c;
  c.model = SHERPA_ONNX_OR(config->model, "");

  c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
  c.debug = config->debug;
  c.provider = SHERPA_ONNX_OR(config->provider, "cpu");
  if (c.provider.empty()) {
    c.provider = "cpu";
  }

  if (config->debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
#endif
  }

  return c;
}

const SherpaOnnxSpeakerEmbeddingExtractor *
SherpaOnnxCreateSpeakerEmbeddingExtractor(
    const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
  auto c = GetSpeakerEmbeddingExtractorConfig(config);

  if (!c.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config!");
    return nullptr;
  }

  auto p = new SherpaOnnxSpeakerEmbeddingExtractor;

  p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingExtractor>(c);

  return p;
}

void SherpaOnnxDestroySpeakerEmbeddingExtractor(
    const SherpaOnnxSpeakerEmbeddingExtractor *p) {
  if (!p) return;
  delete p;
}

int32_t SherpaOnnxSpeakerEmbeddingExtractorDim(
    const SherpaOnnxSpeakerEmbeddingExtractor *p) {
  return p->impl->Dim();
}

const SherpaOnnxOnlineStream *SherpaOnnxSpeakerEmbeddingExtractorCreateStream(
    const SherpaOnnxSpeakerEmbeddingExtractor *p) {
  SherpaOnnxOnlineStream *stream =
      new SherpaOnnxOnlineStream(p->impl->CreateStream());
  return stream;
}

int32_t SherpaOnnxSpeakerEmbeddingExtractorIsReady(
    const SherpaOnnxSpeakerEmbeddingExtractor *p,
    const SherpaOnnxOnlineStream *s) {
  return p->impl->IsReady(s->impl.get());
}

const float *SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(
    const SherpaOnnxSpeakerEmbeddingExtractor *p,
    const SherpaOnnxOnlineStream *s) {
  std::vector<float> v = p->impl->Compute(s->impl.get());
  float *ans = new float[v.size()];
  std::copy(v.begin(), v.end(), ans);
  return ans;
}

void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(const float *v) {
  if (!v) return;
  delete[] v;
}

struct SherpaOnnxSpeakerEmbeddingManager {
  std::unique_ptr<sherpa_onnx::SpeakerEmbeddingManager> impl;
};

const SherpaOnnxSpeakerEmbeddingManager *
SherpaOnnxCreateSpeakerEmbeddingManager(int32_t dim) {
  auto p = new SherpaOnnxSpeakerEmbeddingManager;
  p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingManager>(dim);
  return p;
}

void SherpaOnnxDestroySpeakerEmbeddingManager(
    const SherpaOnnxSpeakerEmbeddingManager *p) {
  if (!p) return;
  delete p;
}

int32_t SherpaOnnxSpeakerEmbeddingManagerAdd(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
    const float *v) {
  return p->impl->Add(name, v);
}

int32_t SherpaOnnxSpeakerEmbeddingManagerAddList(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
    const float **v) {
  int32_t n = 0;
  auto q = v;
  while (q && q[0]) {
    ++n;
    ++q;
  }

  if (n == 0) {
    SHERPA_ONNX_LOGE("Empty embedding!");
    return 0;
  }

  std::vector<std::vector<float>> vec(n);
  int32_t dim = p->impl->Dim();

  for (int32_t i = 0; i != n; ++i) {
    vec[i] = std::vector<float>(v[i], v[i] + dim);
  }

  return p->impl->Add(name, vec);
}

int32_t SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
    const float *v, int32_t n) {
  std::vector<std::vector<float>> vec(n);

  int32_t dim = p->impl->Dim();

  for (int32_t i = 0; i != n; ++i, v += dim) {
    vec[i] = std::vector<float>(v, v + dim);
  }

  return p->impl->Add(name, vec);
}

int32_t SherpaOnnxSpeakerEmbeddingManagerRemove(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name) {
  return p->impl->Remove(name);
}

const char *SherpaOnnxSpeakerEmbeddingManagerSearch(
    const SherpaOnnxSpeakerEmbeddingManager *p, const float *v,
    float threshold) {
  auto r = p->impl->Search(v, threshold);
  if (r.empty()) {
    return nullptr;
  }

  char *name = new char[r.size() + 1];
  std::copy(r.begin(), r.end(), name);
  name[r.size()] = '\0';

  return name;
}

void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(const char *name) {
  if (!name) return;
  delete[] name;
}

const SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult *
SherpaOnnxSpeakerEmbeddingManagerGetBestMatches(
    const SherpaOnnxSpeakerEmbeddingManager *p, const float *v, float threshold,
    int32_t n) {
  auto matches = p->impl->GetBestMatches(v, threshold, n);

  if (matches.empty()) {
    return nullptr;
  }

  auto resultMatches =
      new SherpaOnnxSpeakerEmbeddingManagerSpeakerMatch[matches.size()];
  for (int i = 0; i < matches.size(); ++i) {
    resultMatches[i].score = matches[i].score;

    char *name = new char[matches[i].name.size() + 1];
    std::copy(matches[i].name.begin(), matches[i].name.end(), name);
    name[matches[i].name.size()] = '\0';

    resultMatches[i].name = name;
  }

  auto *result = new SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult();
  result->count = matches.size();
  result->matches = resultMatches;

  return result;
}

void SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches(
    const SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult *r) {
  if (r == nullptr) {
    return;
  }

  for (int32_t i = 0; i < r->count; ++i) {
    delete[] r->matches[i].name;
  }
  delete[] r->matches;
  delete r;
}

int32_t SherpaOnnxSpeakerEmbeddingManagerVerify(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
    const float *v, float threshold) {
  return p->impl->Verify(name, v, threshold);
}

int32_t SherpaOnnxSpeakerEmbeddingManagerContains(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name) {
  return p->impl->Contains(name);
}

int32_t SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(
    const SherpaOnnxSpeakerEmbeddingManager *p) {
  return p->impl->NumSpeakers();
}

const char *const *SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(
    const SherpaOnnxSpeakerEmbeddingManager *manager) {
  std::vector<std::string> all_speakers = manager->impl->GetAllSpeakers();
  int32_t num_speakers = all_speakers.size();
  char **p = new char *[num_speakers + 1];
  p[num_speakers] = nullptr;

  int32_t i = 0;
  for (const auto &name : all_speakers) {
    p[i] = new char[name.size() + 1];
    std::copy(name.begin(), name.end(), p[i]);
    p[i][name.size()] = '\0';

    i += 1;
  }
  return p;
}

void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
    const char *const *names) {
  auto p = names;

  while (p && p[0]) {
    delete[] p[0];
    ++p;
  }

  delete[] names;
}

struct SherpaOnnxAudioTagging {
  std::unique_ptr<sherpa_onnx::AudioTagging> impl;
};

const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
    const SherpaOnnxAudioTaggingConfig *config) {
  sherpa_onnx::AudioTaggingConfig ac;
  ac.model.zipformer.model = SHERPA_ONNX_OR(config->model.zipformer.model, "");
  ac.model.ced = SHERPA_ONNX_OR(config->model.ced, "");
  ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
  ac.model.debug = config->model.debug;
  ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  if (ac.model.provider.empty()) {
    ac.model.provider = "cpu";
  }

  ac.labels = SHERPA_ONNX_OR(config->labels, "");
  ac.top_k = SHERPA_ONNX_OR(config->top_k, 5);

  if (ac.model.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", ac.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", ac.ToString().c_str());
#endif
  }

  if (!ac.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  SherpaOnnxAudioTagging *tagger = new SherpaOnnxAudioTagging;
  tagger->impl = std::make_unique<sherpa_onnx::AudioTagging>(ac);

  return tagger;
}

void SherpaOnnxDestroyAudioTagging(const SherpaOnnxAudioTagging *tagger) {
  if (!tagger) return;
  delete tagger;
}

const SherpaOnnxOfflineStream *SherpaOnnxAudioTaggingCreateOfflineStream(
    const SherpaOnnxAudioTagging *tagger) {
  const SherpaOnnxOfflineStream *stream =
      new SherpaOnnxOfflineStream(tagger->impl->CreateStream());
  return stream;
}

const SherpaOnnxAudioEvent *const *SherpaOnnxAudioTaggingCompute(
    const SherpaOnnxAudioTagging *tagger, const SherpaOnnxOfflineStream *s,
    int32_t top_k) {
  std::vector<sherpa_onnx::AudioEvent> events =
      tagger->impl->Compute(s->impl.get(), top_k);

  int32_t n = static_cast<int32_t>(events.size());
  SherpaOnnxAudioEvent **ans = new SherpaOnnxAudioEvent *[n + 1];
  ans[n] = nullptr;

  int32_t i = 0;
  for (const auto &e : events) {
    SherpaOnnxAudioEvent *p = new SherpaOnnxAudioEvent;

    char *name = new char[e.name.size() + 1];
    std::copy(e.name.begin(), e.name.end(), name);
    name[e.name.size()] = 0;

    p->name = name;

    p->index = e.index;
    p->prob = e.prob;

    ans[i] = p;
    i += 1;
  }

  return ans;
}

void SherpaOnnxAudioTaggingFreeResults(
    const SherpaOnnxAudioEvent *const *events) {
  auto p = events;

  while (p && *p) {
    auto e = *p;

    delete[] e->name;
    delete e;

    ++p;
  }

  delete[] events;
}

struct SherpaOnnxOfflinePunctuation {
  std::unique_ptr<sherpa_onnx::OfflinePunctuation> impl;
};

static sherpa_onnx::OfflinePunctuationConfig GetOfflinePunctuationConfig(
    const SherpaOnnxOfflinePunctuationConfig *config) {
  sherpa_onnx::OfflinePunctuationConfig c;
  c.model.ct_transformer = SHERPA_ONNX_OR(config->model.ct_transformer, "");
  c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
  c.model.debug = config->model.debug;
  c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  if (c.model.provider.empty()) {
    c.model.provider = "cpu";
  }

  if (config->model.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
#endif
  }

  return c;
}

const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation(
    const SherpaOnnxOfflinePunctuationConfig *config) {
  if (config == nullptr) {
    return nullptr;
  }

  auto c = GetOfflinePunctuationConfig(config);

  if (!c.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  SherpaOnnxOfflinePunctuation *punct = new SherpaOnnxOfflinePunctuation;
  punct->impl = std::make_unique<sherpa_onnx::OfflinePunctuation>(c);

  return punct;
}

void SherpaOnnxDestroyOfflinePunctuation(
    const SherpaOnnxOfflinePunctuation *punct) {
  if (!punct) return;
  delete punct;
}

const char *SherpaOfflinePunctuationAddPunct(
    const SherpaOnnxOfflinePunctuation *punct, const char *text) {
  if (!punct || !text) return nullptr;
  std::string text_with_punct = punct->impl->AddPunctuation(text);

  char *ans = new char[text_with_punct.size() + 1];
  std::copy(text_with_punct.begin(), text_with_punct.end(), ans);
  ans[text_with_punct.size()] = 0;

  return ans;
}

void SherpaOfflinePunctuationFreeText(const char *text) {
  if (!text) return;
  delete[] text;
}

struct SherpaOnnxOnlinePunctuation {
  std::unique_ptr<sherpa_onnx::OnlinePunctuation> impl;
};

static sherpa_onnx::OnlinePunctuationConfig GetOnlinePunctuationConfig(
    const SherpaOnnxOnlinePunctuationConfig *config) {
  sherpa_onnx::OnlinePunctuationConfig punctuation_config;
  punctuation_config.model.cnn_bilstm =
      SHERPA_ONNX_OR(config->model.cnn_bilstm, "");
  punctuation_config.model.bpe_vocab =
      SHERPA_ONNX_OR(config->model.bpe_vocab, "");
  punctuation_config.model.num_threads =
      SHERPA_ONNX_OR(config->model.num_threads, 1);
  punctuation_config.model.debug = config->model.debug;
  punctuation_config.model.provider =
      SHERPA_ONNX_OR(config->model.provider, "cpu");

  if (config->model.debug) {
#if __OHOS__
    auto str_vec = sherpa_onnx::SplitString(punctuation_config.ToString(), 128);
    for (const auto &s : str_vec) {
      SHERPA_ONNX_LOGE("%{public}s\n", s.c_str());
      SHERPA_ONNX_LOGE("%s\n", s.c_str());
    }
#else
    SHERPA_ONNX_LOGE("%s", punctuation_config.ToString().c_str());
#endif
  }

  return punctuation_config;
}

const SherpaOnnxOnlinePunctuation *SherpaOnnxCreateOnlinePunctuation(
    const SherpaOnnxOnlinePunctuationConfig *config) {
  if (config == nullptr) {
    return nullptr;
  }

  auto punctuation_config = GetOnlinePunctuationConfig(config);
  if (!punctuation_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  auto *p = new SherpaOnnxOnlinePunctuation;
  p->impl =
      std::make_unique<sherpa_onnx::OnlinePunctuation>(punctuation_config);
  return p;
}

void SherpaOnnxDestroyOnlinePunctuation(const SherpaOnnxOnlinePunctuation *p) {
  if (!p) return;
  delete p;
}

const char *SherpaOnnxOnlinePunctuationAddPunct(
    const SherpaOnnxOnlinePunctuation *punctuation, const char *text) {
  if (!punctuation || !text) return nullptr;

  try {
    std::string s = punctuation->impl->AddPunctuationWithCase(text);
    char *p = new char[s.size() + 1];
    std::copy(s.begin(), s.end(), p);
    p[s.size()] = '\0';
    return p;
  } catch (const std::exception &e) {
    SHERPA_ONNX_LOGE("Failed to add punctuation: %s", e.what());
    return nullptr;
  }
}

void SherpaOnnxOnlinePunctuationFreeText(const char *text) {
  if (!text) return;
  delete[] text;
}

struct SherpaOnnxLinearResampler {
  std::unique_ptr<sherpa_onnx::LinearResample> impl;
};

const SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler(
    int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz,
    int32_t num_zeros) {
  SherpaOnnxLinearResampler *p = new SherpaOnnxLinearResampler;
  p->impl = std::make_unique<sherpa_onnx::LinearResample>(
      samp_rate_in_hz, samp_rate_out_hz, filter_cutoff_hz, num_zeros);

  return p;
}

void SherpaOnnxDestroyLinearResampler(const SherpaOnnxLinearResampler *p) {
  if (!p) return;
  delete p;
}

const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample(
    const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim,
    int32_t flush) {
  std::vector<float> o;
  p->impl->Resample(input, input_dim, flush, &o);

  float *s = new float[o.size()];
  std::copy(o.begin(), o.end(), s);

  SherpaOnnxResampleOut *ans = new SherpaOnnxResampleOut;
  ans->samples = s;
  ans->n = static_cast<int32_t>(o.size());

  return ans;
}

void SherpaOnnxLinearResamplerResampleFree(const SherpaOnnxResampleOut *p) {
  if (!p) return;
  delete[] p->samples;
  delete p;
}

int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate(
    const SherpaOnnxLinearResampler *p) {
  return p->impl->GetInputSamplingRate();
}

int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
    const SherpaOnnxLinearResampler *p) {
  return p->impl->GetOutputSamplingRate();
}

void SherpaOnnxLinearResamplerReset(const SherpaOnnxLinearResampler *p) {
  p->impl->Reset();
}

int32_t SherpaOnnxFileExists(const char *filename) {
  return sherpa_onnx::FileExists(filename);
}

struct SherpaOnnxOfflineSpeechDenoiser {
  std::unique_ptr<sherpa_onnx::OfflineSpeechDenoiser> impl;
};

static const SherpaOnnxDenoisedAudio *CreateDenoisedAudio(
    const sherpa_onnx::DenoisedAudio &audio) {
  auto ans = new SherpaOnnxDenoisedAudio;

  float *denoised_samples = nullptr;
  if (!audio.samples.empty()) {
    denoised_samples = new float[audio.samples.size()];
    std::copy(audio.samples.begin(), audio.samples.end(), denoised_samples);
  }

  ans->samples = denoised_samples;
  ans->n = audio.samples.size();
  ans->sample_rate = audio.sample_rate;

  return ans;
}

static sherpa_onnx::OfflineSpeechDenoiserConfig GetOfflineSpeechDenoiserConfig(
    const SherpaOnnxOfflineSpeechDenoiserConfig *config) {
  sherpa_onnx::OfflineSpeechDenoiserConfig c;
  c.model.gtcrn.model = SHERPA_ONNX_OR(config->model.gtcrn.model, "");
  c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
  c.model.debug = config->model.debug;
  c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  c.model.dpdfnet.model = SHERPA_ONNX_OR(config->model.dpdfnet.model, "");

  if (c.model.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
#endif
  }

  return c;
}

const SherpaOnnxOfflineSpeechDenoiser *SherpaOnnxCreateOfflineSpeechDenoiser(
    const SherpaOnnxOfflineSpeechDenoiserConfig *config) {
  if (config == nullptr) {
    return nullptr;
  }

  auto sd_config = GetOfflineSpeechDenoiserConfig(config);

  if (!sd_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;

  sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config);

  return sd;
}

void SherpaOnnxDestroyOfflineSpeechDenoiser(
    const SherpaOnnxOfflineSpeechDenoiser *sd) {
  if (!sd) return;
  delete sd;
}

int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(
    const SherpaOnnxOfflineSpeechDenoiser *sd) {
  if (sd == nullptr) {
    return 0;
  }

  return sd->impl->GetSampleRate();
}

const SherpaOnnxDenoisedAudio *SherpaOnnxOfflineSpeechDenoiserRun(
    const SherpaOnnxOfflineSpeechDenoiser *sd, const float *samples, int32_t n,
    int32_t sample_rate) {
  if (sd == nullptr) {
    return nullptr;
  }

  if (samples == nullptr && n > 0) {
    return nullptr;
  }

  auto audio = sd->impl->Run(samples, n, sample_rate);
  return CreateDenoisedAudio(audio);
}

void SherpaOnnxDestroyDenoisedAudio(const SherpaOnnxDenoisedAudio *p) {
  if (!p) return;
  delete[] p->samples;
  delete p;
}

struct SherpaOnnxOnlineSpeechDenoiser {
  std::unique_ptr<sherpa_onnx::OnlineSpeechDenoiser> impl;
};

static sherpa_onnx::OnlineSpeechDenoiserConfig GetOnlineSpeechDenoiserConfig(
    const SherpaOnnxOnlineSpeechDenoiserConfig *config) {
  sherpa_onnx::OnlineSpeechDenoiserConfig c;
  c.model.gtcrn.model = SHERPA_ONNX_OR(config->model.gtcrn.model, "");
  c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
  c.model.debug = config->model.debug;
  c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  c.model.dpdfnet.model = SHERPA_ONNX_OR(config->model.dpdfnet.model, "");

  if (c.model.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
#endif
  }

  return c;
}

const SherpaOnnxOnlineSpeechDenoiser *SherpaOnnxCreateOnlineSpeechDenoiser(
    const SherpaOnnxOnlineSpeechDenoiserConfig *config) {
  if (config == nullptr) {
    return nullptr;
  }

  auto sd_config = GetOnlineSpeechDenoiserConfig(config);

  if (!sd_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  auto *sd = new SherpaOnnxOnlineSpeechDenoiser;
  sd->impl = std::make_unique<sherpa_onnx::OnlineSpeechDenoiser>(sd_config);
  return sd;
}

void SherpaOnnxDestroyOnlineSpeechDenoiser(
    const SherpaOnnxOnlineSpeechDenoiser *sd) {
  if (!sd) return;
  delete sd;
}

int32_t SherpaOnnxOnlineSpeechDenoiserGetSampleRate(
    const SherpaOnnxOnlineSpeechDenoiser *sd) {
  if (sd == nullptr) {
    return 0;
  }

  return sd->impl->GetSampleRate();
}

int32_t SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(
    const SherpaOnnxOnlineSpeechDenoiser *sd) {
  if (sd == nullptr) {
    return 0;
  }

  return sd->impl->GetFrameShiftInSamples();
}

const SherpaOnnxDenoisedAudio *SherpaOnnxOnlineSpeechDenoiserRun(
    const SherpaOnnxOnlineSpeechDenoiser *sd, const float *samples, int32_t n,
    int32_t sample_rate) {
  if (sd == nullptr) {
    return nullptr;
  }

  if (samples == nullptr && n > 0) {
    return nullptr;
  }

  auto audio = sd->impl->Run(samples, n, sample_rate);

  if (audio.samples.empty()) {
    return nullptr;
  }

  return CreateDenoisedAudio(audio);
}

const SherpaOnnxDenoisedAudio *SherpaOnnxOnlineSpeechDenoiserFlush(
    const SherpaOnnxOnlineSpeechDenoiser *sd) {
  if (sd == nullptr) {
    return nullptr;
  }

  auto audio = sd->impl->Flush();

  if (audio.samples.empty()) {
    return nullptr;
  }

  return CreateDenoisedAudio(audio);
}

void SherpaOnnxOnlineSpeechDenoiserReset(
    const SherpaOnnxOnlineSpeechDenoiser *sd) {
  if (sd == nullptr) {
    return;
  }

  sd->impl->Reset();
}

#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1

struct SherpaOnnxOfflineSpeakerDiarization {
  std::unique_ptr<sherpa_onnx::OfflineSpeakerDiarization> impl;
};

struct SherpaOnnxOfflineSpeakerDiarizationResult {
  sherpa_onnx::OfflineSpeakerDiarizationResult impl;
};

static sherpa_onnx::OfflineSpeakerDiarizationConfig
GetOfflineSpeakerDiarizationConfig(
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
  sherpa_onnx::OfflineSpeakerDiarizationConfig sd_config;

  sd_config.segmentation.pyannote.model =
      SHERPA_ONNX_OR(config->segmentation.pyannote.model, "");
  sd_config.segmentation.num_threads =
      SHERPA_ONNX_OR(config->segmentation.num_threads, 1);
  sd_config.segmentation.debug = config->segmentation.debug;
  sd_config.segmentation.provider =
      SHERPA_ONNX_OR(config->segmentation.provider, "cpu");
  if (sd_config.segmentation.provider.empty()) {
    sd_config.segmentation.provider = "cpu";
  }

  sd_config.embedding.model = SHERPA_ONNX_OR(config->embedding.model, "");
  sd_config.embedding.num_threads =
      SHERPA_ONNX_OR(config->embedding.num_threads, 1);
  sd_config.embedding.debug = config->embedding.debug;
  sd_config.embedding.provider =
      SHERPA_ONNX_OR(config->embedding.provider, "cpu");
  if (sd_config.embedding.provider.empty()) {
    sd_config.embedding.provider = "cpu";
  }

  sd_config.clustering.num_clusters =
      SHERPA_ONNX_OR(config->clustering.num_clusters, -1);

  sd_config.clustering.threshold =
      SHERPA_ONNX_OR(config->clustering.threshold, 0.5);

  sd_config.min_duration_on = SHERPA_ONNX_OR(config->min_duration_on, 0.3);

  sd_config.min_duration_off = SHERPA_ONNX_OR(config->min_duration_off, 0.5);

  if (sd_config.segmentation.debug || sd_config.embedding.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", sd_config.ToString().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", sd_config.ToString().c_str());
#endif
  }

  return sd_config;
}

const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarization(
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
  auto sd_config = GetOfflineSpeakerDiarizationConfig(config);

  if (!sd_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in config");
    return nullptr;
  }

  SherpaOnnxOfflineSpeakerDiarization *sd =
      new SherpaOnnxOfflineSpeakerDiarization;

  sd->impl =
      std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(sd_config);

  return sd;
}

void SherpaOnnxDestroyOfflineSpeakerDiarization(
    const SherpaOnnxOfflineSpeakerDiarization *sd) {
  if (!sd) return;
  delete sd;
}

int32_t SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(
    const SherpaOnnxOfflineSpeakerDiarization *sd) {
  return sd->impl->SampleRate();
}

void SherpaOnnxOfflineSpeakerDiarizationSetConfig(
    const SherpaOnnxOfflineSpeakerDiarization *sd,
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
  sherpa_onnx::OfflineSpeakerDiarizationConfig sd_config;

  sd_config.clustering.num_clusters =
      SHERPA_ONNX_OR(config->clustering.num_clusters, -1);

  sd_config.clustering.threshold =
      SHERPA_ONNX_OR(config->clustering.threshold, 0.5);

  sd->impl->SetConfig(sd_config);
}

int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
  return r->impl.NumSpeakers();
}

int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
  return r->impl.NumSegments();
}

const SherpaOnnxOfflineSpeakerDiarizationSegment *
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
  if (r->impl.NumSegments() == 0) {
    return nullptr;
  }

  auto segments = r->impl.SortByStartTime();

  int32_t n = segments.size();
  SherpaOnnxOfflineSpeakerDiarizationSegment *ans =
      new SherpaOnnxOfflineSpeakerDiarizationSegment[n];

  for (int32_t i = 0; i != n; ++i) {
    const auto &s = segments[i];

    ans[i].start = s.Start();
    ans[i].end = s.End();
    ans[i].speaker = s.Speaker();
  }

  return ans;
}

void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
    const SherpaOnnxOfflineSpeakerDiarizationSegment *s) {
  if (!s) return;
  delete[] s;
}

const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcess(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n) {
  auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult;
  ans->impl = sd->impl->Process(samples, n);

  return ans;
}

void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
  if (!r) return;
  delete r;
}

const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback,
    void *arg) {
  auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult;
  ans->impl = sd->impl->Process(samples, n, callback, arg);

  return ans;
}

const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n,
    SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback) {
  auto wrapper = [callback](int32_t num_processed_chunks,
                            int32_t num_total_chunks, void *) {
    return callback(num_processed_chunks, num_total_chunks);
  };

  auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult;
  ans->impl = sd->impl->Process(samples, n, wrapper);

  return ans;
}
#else

const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarization(
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

void SherpaOnnxDestroyOfflineSpeakerDiarization(
    const SherpaOnnxOfflineSpeakerDiarization *sd) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
}

int32_t SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(
    const SherpaOnnxOfflineSpeakerDiarization *sd) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return 0;
}

void SherpaOnnxOfflineSpeakerDiarizationSetConfig(
    const SherpaOnnxOfflineSpeakerDiarization *sd,
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
}

int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return 0;
}

int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return 0;
}

const SherpaOnnxOfflineSpeakerDiarizationSegment *
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
    const SherpaOnnxOfflineSpeakerDiarizationSegment *s) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
}

const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcess(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback,
    void *arg) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n,
    SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
}

#endif

#ifdef __OHOS__

const SherpaOnnxOfflineSpeechDenoiser *
SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
    const SherpaOnnxOfflineSpeechDenoiserConfig *config,
    NativeResourceManager *mgr) {
  if (config == nullptr) {
    return nullptr;
  }

  if (!mgr) {
    return SherpaOnnxCreateOfflineSpeechDenoiser(config);
  }

  auto sd_config = GetOfflineSpeechDenoiserConfig(config);

  SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;

  sd->impl =
      std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(mgr, sd_config);

  return sd;
}

const SherpaOnnxOnlineSpeechDenoiser *SherpaOnnxCreateOnlineSpeechDenoiserOHOS(
    const SherpaOnnxOnlineSpeechDenoiserConfig *config,
    NativeResourceManager *mgr) {
  if (config == nullptr) {
    return nullptr;
  }

  if (mgr == nullptr) {
    return SherpaOnnxCreateOnlineSpeechDenoiser(config);
  }

  auto sd_config = GetOnlineSpeechDenoiserConfig(config);

  auto *sd = new SherpaOnnxOnlineSpeechDenoiser;
  sd->impl =
      std::make_unique<sherpa_onnx::OnlineSpeechDenoiser>(mgr, sd_config);

  return sd;
}

const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS(
    const SherpaOnnxOnlineRecognizerConfig *config,
    NativeResourceManager *mgr) {
  if (!mgr) {
    return SherpaOnnxCreateOnlineRecognizer(config);
  }

  sherpa_onnx::OnlineRecognizerConfig recognizer_config =
      GetOnlineRecognizerConfig(config);

  SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;

  recognizer->impl =
      std::make_unique<sherpa_onnx::OnlineRecognizer>(mgr, recognizer_config);

  return recognizer;
}

const SherpaOnnxOnlinePunctuation *SherpaOnnxCreateOnlinePunctuationOHOS(
    const SherpaOnnxOnlinePunctuationConfig *config,
    NativeResourceManager *mgr) {
  if (config == nullptr) {
    return nullptr;
  }

  if (mgr == nullptr) {
    return SherpaOnnxCreateOnlinePunctuation(config);
  }

  auto punctuation_config = GetOnlinePunctuationConfig(config);
  auto *p = new SherpaOnnxOnlinePunctuation;
  p->impl =
      std::make_unique<sherpa_onnx::OnlinePunctuation>(mgr, punctuation_config);
  return p;
}

const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizerOHOS(
    const SherpaOnnxOfflineRecognizerConfig *config,
    NativeResourceManager *mgr) {
  if (mgr == nullptr) {
    return SherpaOnnxCreateOfflineRecognizer(config);
  }

  sherpa_onnx::OfflineRecognizerConfig recognizer_config =
      GetOfflineRecognizerConfig(config);

  SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;

  recognizer->impl =
      std::make_unique<sherpa_onnx::OfflineRecognizer>(mgr, recognizer_config);

  return recognizer;
}

const SherpaOnnxVoiceActivityDetector *
SherpaOnnxCreateVoiceActivityDetectorOHOS(
    const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
    NativeResourceManager *mgr) {
  if (mgr == nullptr) {
    return SherpaOnnxCreateVoiceActivityDetector(config,
                                                 buffer_size_in_seconds);
  }

  auto vad_config = GetVadModelConfig(config);

  SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector;
  p->impl = std::make_unique<sherpa_onnx::VoiceActivityDetector>(
      mgr, vad_config, buffer_size_in_seconds);

  return p;
}

const SherpaOnnxSpeakerEmbeddingExtractor *
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
    const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
    NativeResourceManager *mgr) {
  if (!mgr) {
    return SherpaOnnxCreateSpeakerEmbeddingExtractor(config);
  }

  auto c = GetSpeakerEmbeddingExtractorConfig(config);

  auto p = new SherpaOnnxSpeakerEmbeddingExtractor;

  p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingExtractor>(mgr, c);

  return p;
}

const SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotterOHOS(
    const SherpaOnnxKeywordSpotterConfig *config, NativeResourceManager *mgr) {
  if (!mgr) {
    return SherpaOnnxCreateKeywordSpotter(config);
  }

  auto spotter_config = GetKeywordSpotterConfig(config);

  SherpaOnnxKeywordSpotter *spotter = new SherpaOnnxKeywordSpotter;

  spotter->impl =
      std::make_unique<sherpa_onnx::KeywordSpotter>(mgr, spotter_config);

  return spotter;
}

#if SHERPA_ONNX_ENABLE_TTS == 1
const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
    const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
  if (!mgr) {
    return SherpaOnnxCreateOfflineTts(config);
  }

  auto tts_config = GetOfflineTtsConfig(config);

  SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;

  tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(mgr, tts_config);

  return tts;
}
#else
const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
    const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
  SHERPA_ONNX_LOGE("TTS is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}
#endif  // #if SHERPA_ONNX_ENABLE_TTS == 1

const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuationOHOS(
    const SherpaOnnxOfflinePunctuationConfig *config,
    NativeResourceManager *mgr) {
  if (config == nullptr) {
    return nullptr;
  }

  if (!mgr) {
    return SherpaOnnxCreateOfflinePunctuation(config);
  }

  auto c = GetOfflinePunctuationConfig(config);
  if (c.model.ct_transformer.empty()) {
    SHERPA_ONNX_LOGE("Please specify a punctuation model! Return a null pointer");
    return nullptr;
  }

  auto *punct = new SherpaOnnxOfflinePunctuation;
  punct->impl = std::make_unique<sherpa_onnx::OfflinePunctuation>(mgr, c);

  return punct;
}

#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
    NativeResourceManager *mgr) {
  if (!mgr) {
    return SherpaOnnxCreateOfflineSpeakerDiarization(config);
  }

  auto sd_config = GetOfflineSpeakerDiarizationConfig(config);

  SherpaOnnxOfflineSpeakerDiarization *sd =
      new SherpaOnnxOfflineSpeakerDiarization;

  sd->impl =
      std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(mgr, sd_config);

  return sd;
}
#else

const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
    NativeResourceManager *mgr) {
  SHERPA_ONNX_LOGE(
      "Speaker diarization is not enabled. Please rebuild sherpa-onnx");
  return nullptr;
}

#endif  // #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1

#endif  // #ifdef __OHOS__


================================================
FILE: sherpa-onnx/c-api/c-api.h
================================================
// sherpa-onnx/c-api/c-api.h
//
// Copyright (c)  2023  Xiaomi Corporation
/**
 * @file c-api.h
 * @brief Public C API for sherpa-onnx.
 *
 * This header exposes the main sherpa-onnx inference features through a stable
 * C interface. It is intended for native C/C++ applications and for language
 * bindings that need a C ABI.
 *
 * The file is organized by feature family. The major API groups are:
 *
 * - Utility helpers: version/build information, file checks, WAVE I/O, and a
 *   display helper for incremental text output
 * - Streaming ASR: online recognizers, online streams, endpointing, and
 *   per-stream runtime options
 * - Non-streaming ASR: offline recognizers, offline streams, batch decode, and
 *   result retrieval
 * - Keyword spotting: streaming keyword detection, custom keyword streams, and
 *   keyword result snapshots
 * - Voice activity detection: Silero/Ten VAD models, speech segment buffers,
 *   and detector state management
 * - Text-to-speech: offline TTS model families, generation configuration, and
 *   generated audio helpers
 * - Spoken language identification
 * - Speaker embedding extraction and speaker enrollment/search/verification
 * - Audio tagging
 * - Offline and online punctuation restoration
 * - Linear resampling
 * - Offline speaker diarization
 * - Offline and online speech enhancement / denoising
 * - HarmonyOS-specific constructor variants
 *
 * Common ownership rules:
 *
 * - Opaque handles created by `SherpaOnnxCreate*()` functions are generally
 *   destroyed with a matching `SherpaOnnxDestroy*()` function
 * - Snapshot/result objects returned by query functions usually need explicit
 *   destruction as documented on each API
 * - Strings or arrays returned by helper/query functions are either:
 *   - statically owned by the library and must not be freed, or
 *   - heap-allocated for the caller and must be released with the matching
 *     `Free`/`Destroy` API
 *
 * General usage pattern:
 *
 * 1. Zero-initialize a config struct with `memset(&config, 0, sizeof(config))`
 * 2. Fill in the required model paths and runtime options
 * 3. Create the corresponding engine with `SherpaOnnxCreate*()`
 * 4. Create a stream if the feature uses one
 * 5. Feed audio or text, run the compute/decode API, and retrieve results
 * 6. Release every returned object with the documented matching API
 *
 * The examples in `c-api-examples/` show complete end-to-end usage. Useful
 * starting points include:
 *
 * - `decode-file-c-api.c` for ASR
 * - `kws-c-api.c` for keyword spotting
 * - `vad-whisper-c-api.c` for VAD
 * - `offline-tts-c-api.c` and `kokoro-tts-en-c-api.c` for TTS
 * - `speaker-identification-c-api.c` for speaker embedding and verification
 * - `audio-tagging-c-api.c` for audio tagging
 * - `add-punctuation-c-api.c` and `add-punctuation-online-c-api.c` for
 *   punctuation
 * - `offline-sepaker-diarization-c-api.c` for diarization
 * - `speech-enhancement-gtcrn-c-api.c` and
 *   `online-speech-enhancement-gtcrn-c-api.c` for speech enhancement
 */

#ifndef SHERPA_ONNX_C_API_C_API_H_
#define SHERPA_ONNX_C_API_C_API_H_

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

// See https://github.com/pytorch/pytorch/blob/main/c10/macros/Export.h
// We will set SHERPA_ONNX_BUILD_SHARED_LIBS and SHERPA_ONNX_BUILD_MAIN_LIB in
// CMakeLists.txt

#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wattributes"
#endif

#if defined(_WIN32)
#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS)
#define SHERPA_ONNX_EXPORT __declspec(dllexport)
#define SHERPA_ONNX_IMPORT __declspec(dllimport)
#else
#define SHERPA_ONNX_EXPORT
#define SHERPA_ONNX_IMPORT
#endif
#else  // WIN32
#define SHERPA_ONNX_EXPORT __attribute__((visibility("default")))

#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT
#endif  // WIN32

#if defined(SHERPA_ONNX_BUILD_MAIN_LIB)
#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT
#else
#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT
#endif

#ifndef SHERPA_ONNX_DEPRECATED
#if defined(_MSC_VER)
#define SHERPA_ONNX_DEPRECATED(msg) __declspec(deprecated(msg))
#elif defined(__GNUC__) || defined(__clang__)
#define SHERPA_ONNX_DEPRECATED(msg) __attribute__((deprecated(msg)))
#else
#define SHERPA_ONNX_DEPRECATED(msg)
#endif
#endif

/**
 * @brief Return the sherpa-onnx version string.
 *
 * The returned pointer refers to statically allocated memory owned by the
 * library. Do not free it and do not modify it.
 *
 * @return Version string, for example `"1.12.1"`.
 *
 * @code
 * printf("sherpa-onnx version: %s\n", SherpaOnnxGetVersionStr());
 * @endcode
 */
SHERPA_ONNX_API const char *SherpaOnnxGetVersionStr();

/**
 * @brief Return the Git SHA1 used to build the library.
 *
 * The returned pointer refers to statically allocated memory owned by the
 * library. Do not free it and do not modify it.
 *
 * @return Short Git SHA1 string, for example `"6982b86c"`.
 */
SHERPA_ONNX_API const char *SherpaOnnxGetGitSha1();

/**
 * @brief Return the Git build date used to build the library.
 *
 * The returned pointer refers to statically allocated memory owned by the
 * library. Do not free it and do not modify it.
 *
 * @return Build date string, for example `"Fri Jun 20 11:22:52 2025"`.
 */
SHERPA_ONNX_API const char *SherpaOnnxGetGitDate();

/**
 * @brief Check whether a file exists.
 *
 * @param filename File path to test.
 * @return 1 if the file exists; otherwise 0.
 *
 * @code
 * if (!SherpaOnnxFileExists("./Obama.wav")) {
 *   fprintf(stderr, "Please download Obama.wav\n");
 * }
 * @endcode
 */
SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename);

/**
 * @brief Configuration for a streaming transducer model.
 *
 * Please refer to
 * https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
 * to download compatible pre-trained models.
 */
typedef struct SherpaOnnxOnlineTransducerModelConfig {
  /** Path to the encoder ONNX model. */
  const char *encoder;
  /** Path to the decoder ONNX model. */
  const char *decoder;
  /** Path to the joiner ONNX model. */
  const char *joiner;
} SherpaOnnxOnlineTransducerModelConfig;

/**
 * @brief Configuration for a streaming Paraformer model.
 *
 * Please visit
 * https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
 * to download compatible models.
 */
typedef struct SherpaOnnxOnlineParaformerModelConfig {
  /** Path to the encoder ONNX model. */
  const char *encoder;
  /** Path to the decoder ONNX model. */
  const char *decoder;
} SherpaOnnxOnlineParaformerModelConfig;

/**
 * @brief Configuration for a streaming Zipformer2 CTC model.
 */
typedef struct SherpaOnnxOnlineZipformer2CtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOnlineZipformer2CtcModelConfig;

/** @brief Configuration for a streaming NeMo CTC model. */
typedef struct SherpaOnnxOnlineNemoCtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOnlineNemoCtcModelConfig;

/** @brief Configuration for a streaming T-One CTC model. */
typedef struct SherpaOnnxOnlineToneCtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOnlineToneCtcModelConfig;

/**
 * @brief Model configuration shared by streaming ASR recognizers.
 *
 * Zero-initialize this struct before use, then fill in the sub-config for the
 * model family you want to use together with the shared fields such as
 * @c tokens, @c provider, and @c num_threads.
 *
 * Exactly one model family should be configured for each recognizer. For
 * example, set only one of @c transducer, @c paraformer, @c zipformer2_ctc,
 * @c nemo_ctc, or @c t_one_ctc.
 *
 * If multiple model families are configured at the same time, the
 * implementation will choose one of them, and which one is used is
 * implementation-defined. Do not rely on any precedence rule.
 */
typedef struct SherpaOnnxOnlineModelConfig {
  /** Streaming transducer model files. */
  SherpaOnnxOnlineTransducerModelConfig transducer;
  /** Streaming Paraformer model files. */
  SherpaOnnxOnlineParaformerModelConfig paraformer;
  /** Streaming Zipformer2 CTC model files. */
  SherpaOnnxOnlineZipformer2CtcModelConfig zipformer2_ctc;
  /** Path to the tokens file. */
  const char *tokens;
  /** Number of threads used by the ONNX Runtime backend. */
  int32_t num_threads;
  /** Execution provider, for example "cpu", "cuda", or "coreml". */
  const char *provider;
  /** Non-zero to print model debug information. */
  int32_t debug;
  /** Optional explicit model type override. */
  const char *model_type;
  /**
   * Modeling unit used by the tokens.
   *
   * Valid values include:
   * - "cjkchar"
   * - "bpe"
   * - "cjkchar+bpe"
   */
  const char *modeling_unit;
  /** Path to the BPE vocabulary file when BPE is used. */
  const char *bpe_vocab;
  /** Optional in-memory tokens data. Used instead of @c tokens when non-NULL.
   */
  const char *tokens_buf;
  /** Size in bytes of @c tokens_buf, excluding the trailing '\0'. */
  int32_t tokens_buf_size;
  /** Streaming NeMo CTC model files. */
  SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc;
  /** Streaming T-One CTC model files. */
  SherpaOnnxOnlineToneCtcModelConfig t_one_ctc;
} SherpaOnnxOnlineModelConfig;

/**
 * @brief Feature extraction settings for ASR.
 *
 * The bundled ASR models typically expect 16 kHz mono audio and 80-bin
 * features.
 */
typedef struct SherpaOnnxFeatureConfig {
  /** Sample rate expected by the model, for example 16000. */
  int32_t sample_rate;

  /** Feature dimension expected by the model, for example 80. */
  int32_t feature_dim;
} SherpaOnnxFeatureConfig;

/** @brief Configuration for HLG/FST-based online CTC decoding. */
typedef struct SherpaOnnxOnlineCtcFstDecoderConfig {
  /** Path to the decoding graph. */
  const char *graph;
  /** Decoder max-active setting. */
  int32_t max_active;
} SherpaOnnxOnlineCtcFstDecoderConfig;

/** @brief Configuration for homophone replacement. */
typedef struct SherpaOnnxHomophoneReplacerConfig {
  /** Unused legacy field kept for ABI compatibility. */
  const char *dict_dir;
  /** Path to the lexicon used by the homophone replacer. */
  const char *lexicon;
  /** Path to the replacement rule FST file. */
  const char *rule_fsts;
} SherpaOnnxHomophoneReplacerConfig;

/**
 * @brief Configuration for a streaming ASR recognizer.
 *
 * Zero-initialize this struct before use. Then fill in @c feat_config,
 * @c model_config, and any optional decoding, endpoint, or hotword settings.
 *
 * Example model package:
 * `sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20`
 *
 * @code
 * SherpaOnnxOnlineRecognizerConfig config;
 * memset(&config, 0, sizeof(config));
 *
 * config.feat_config.sample_rate = 16000;
 * config.feat_config.feature_dim = 80;
 *
 * config.model_config.transducer.encoder =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "encoder-epoch-99-avg-1.int8.onnx";
 * config.model_config.transducer.decoder =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "decoder-epoch-99-avg-1.onnx";
 * config.model_config.transducer.joiner =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "joiner-epoch-99-avg-1.int8.onnx";
 * config.model_config.tokens =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "tokens.txt";
 * config.model_config.provider = "cpu";
 * config.model_config.num_threads = 1;
 *
 * config.decoding_method = "greedy_search";
 * @endcode
 */
typedef struct SherpaOnnxOnlineRecognizerConfig {
  /** Feature extraction settings. */
  SherpaOnnxFeatureConfig feat_config;
  /** Streaming model configuration. */
  SherpaOnnxOnlineModelConfig model_config;

  /** Decoding method, for example "greedy_search" or "modified_beam_search". */
  const char *decoding_method;

  /** Number of active paths for modified beam search. */
  int32_t max_active_paths;

  /** Set to non-zero to enable endpoint detection. */
  int32_t enable_endpoint;

  /** Endpoint rule 1 trailing silence threshold in seconds. */
  float rule1_min_trailing_silence;

  /** Endpoint rule 2 trailing silence threshold in seconds. */
  float rule2_min_trailing_silence;

  /** Endpoint rule 3 utterance-length threshold in seconds. */
  float rule3_min_utterance_length;

  /** Path to a hotwords file. */
  const char *hotwords_file;

  /** Bonus score added to each hotword token during decoding. */
  float hotwords_score;

  /** Optional HLG/FST online CTC decoder configuration. */
  SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config;
  /** Path to punctuation or text-processing rule FSTs. */
  const char *rule_fsts;
  /** Path to FAR archives used by text-processing rules. */
  const char *rule_fars;
  /** Optional blank penalty applied during decoding. */
  float blank_penalty;

  /** Optional in-memory hotwords text used instead of @c hotwords_file. */
  const char *hotwords_buf;
  /** Size in bytes of @c hotwords_buf, excluding the trailing '\0'. */
  int32_t hotwords_buf_size;
  /** Optional homophone replacement configuration. */
  SherpaOnnxHomophoneReplacerConfig hr;
} SherpaOnnxOnlineRecognizerConfig;

/**
 * @brief Incremental recognition result for a streaming ASR stream.
 *
 * All pointers in this struct are owned by the result object returned from
 * SherpaOnnxGetOnlineStreamResult() and become invalid after
 * SherpaOnnxDestroyOnlineRecognizerResult() is called.
 */
typedef struct SherpaOnnxOnlineRecognizerResult {
  /** Recognized text accumulated so far. */
  const char *text;

  /**
   * Contiguous memory block containing token strings separated by '\0'.
   *
   * Use @c tokens_arr for convenient indexed access.
   */
  const char *tokens;

  /** Array of @c count pointers into @c tokens. */
  const char *const *tokens_arr;

  /**
   * Optional token timestamps in seconds.
   *
   * This field may be NULL when the model does not provide timestamps.
   * When non-NULL, it contains @c count entries and is parallel to
   * @c tokens_arr.
   */
  float *timestamps;

  /** Number of entries in @c tokens_arr and, when available, @c timestamps. */
  int32_t count;

  /** JSON serialization of the result. */
  const char *json;
} SherpaOnnxOnlineRecognizerResult;

/** @brief Streaming recognizer handle. */
typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer;
/** @brief Streaming decoding state for one utterance or stream. */
typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;

/**
 * @brief Create a streaming ASR recognizer.
 *
 * The returned recognizer runs locally and does not require Internet access.
 *
 * @param config Recognizer configuration.
 * @return A recognizer handle on success, or NULL if the configuration is
 *         invalid. The caller owns the returned object and must free it with
 *         SherpaOnnxDestroyOnlineRecognizer().
 *
 * @code
 * SherpaOnnxOnlineRecognizerConfig config;
 * memset(&config, 0, sizeof(config));
 * config.feat_config.sample_rate = 16000;
 * config.feat_config.feature_dim = 80;
 * config.model_config.transducer.encoder =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "encoder-epoch-99-avg-1.int8.onnx";
 * config.model_config.transducer.decoder =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "decoder-epoch-99-avg-1.onnx";
 * config.model_config.transducer.joiner =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "joiner-epoch-99-avg-1.int8.onnx";
 * config.model_config.tokens =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "tokens.txt";
 * config.model_config.provider = "cpu";
 * config.model_config.num_threads = 1;
 * config.decoding_method = "greedy_search";
 *
 * const SherpaOnnxOnlineRecognizer *recognizer =
 *     SherpaOnnxCreateOnlineRecognizer(&config);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOnlineRecognizer *
SherpaOnnxCreateOnlineRecognizer(
    const SherpaOnnxOnlineRecognizerConfig *config);

/**
 * @brief Destroy a streaming recognizer.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 *
 * @code
 * SherpaOnnxDestroyOnlineRecognizer(recognizer);
 * recognizer = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOnlineRecognizer(
    const SherpaOnnxOnlineRecognizer *recognizer);

/**
 * @brief Create a streaming ASR state object.
 *
 * One stream corresponds to one decoding state. Reuse the same recognizer to
 * create multiple streams.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @return A newly created stream. The caller owns the returned object and must
 *         free it with SherpaOnnxDestroyOnlineStream().
 *
 * @code
 * const SherpaOnnxWave *wave = SherpaOnnxReadWave(
 *     "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav");
 * const SherpaOnnxOnlineStream *stream =
 *     SherpaOnnxCreateOnlineStream(recognizer);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOnlineStream *SherpaOnnxCreateOnlineStream(
    const SherpaOnnxOnlineRecognizer *recognizer);

/**
 * @brief Create a streaming ASR state object with per-stream hotwords.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @param hotwords Hotwords text to associate with the stream.
 * @return A newly created stream. The caller owns the returned object and must
 *         free it with SherpaOnnxDestroyOnlineStream().
 *
 * @code
 * const SherpaOnnxOnlineStream *stream =
 *     SherpaOnnxCreateOnlineStreamWithHotwords(recognizer,
 *                                              "▁HELLO ▁WORLD");
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOnlineStream *
SherpaOnnxCreateOnlineStreamWithHotwords(
    const SherpaOnnxOnlineRecognizer *recognizer, const char *hotwords);

/**
 * @brief Destroy a streaming ASR state object.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream() or
 *               SherpaOnnxCreateOnlineStreamWithHotwords().
 *
 * @code
 * SherpaOnnxDestroyOnlineStream(stream);
 * stream = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOnlineStream(
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Append audio samples to a streaming ASR stream.
 *
 * The input is mono floating-point PCM normalized to the range [-1, 1].
 * If @p sample_rate differs from the recognizer feature sample rate,
 * sherpa-onnx resamples internally.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 * @param sample_rate Sample rate of @p samples.
 * @param samples Pointer to @p n samples in the range [-1, 1].
 * @param n Number of samples.
 *
 * @code
 * int32_t start = 0;
 * int32_t chunk_size = 3200;  // 0.2 seconds at 16 kHz
 * SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
 *                                      wave->samples + start, chunk_size);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxOnlineStreamAcceptWaveform(
    const SherpaOnnxOnlineStream *stream, int32_t sample_rate,
    const float *samples, int32_t n);

/**
 * @brief Check whether a streaming ASR stream is ready to decode.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 * @return 1 if enough frames are available for decoding; otherwise 0.
 *
 * @code
 * if (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
 *   SherpaOnnxDecodeOnlineStream(recognizer, stream);
 * }
 * @endcode
 */
SHERPA_ONNX_API int32_t
SherpaOnnxIsOnlineStreamReady(const SherpaOnnxOnlineRecognizer *recognizer,
                              const SherpaOnnxOnlineStream *stream);

/**
 * @brief Decode one step of a streaming ASR stream.
 *
 * Call this only when SherpaOnnxIsOnlineStreamReady() returns 1.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 *
 * @code
 * SherpaOnnxOnlineStreamAcceptWaveform(stream, sample_rate, samples, n);
 * while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
 *   SherpaOnnxDecodeOnlineStream(recognizer, stream);
 * }
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDecodeOnlineStream(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Decode multiple streaming ASR streams in parallel.
 *
 * The caller must ensure every stream in @p streams is ready before calling
 * this function.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @param streams Array of @p n stream pointers.
 * @param n Number of streams in @p streams.
 *
 * @code
 * const SherpaOnnxOnlineStream *streams[2] = {stream1, stream2};
 * SherpaOnnxDecodeMultipleOnlineStreams(recognizer, streams, 2);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDecodeMultipleOnlineStreams(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream **streams, int32_t n);

/**
 * @brief Get the current streaming ASR result for a stream.
 *
 * The returned snapshot is independent from the stream state. The caller owns
 * it and must free it with SherpaOnnxDestroyOnlineRecognizerResult().
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 * @return A newly allocated result snapshot.
 *
 * @code
 * const SherpaOnnxOnlineRecognizerResult *r =
 *     SherpaOnnxGetOnlineStreamResult(recognizer, stream);
 * printf("%s\n", r->text);
 * // r->tokens_arr[i] and r->timestamps[i] are parallel when timestamps
 * // are available.
 * SherpaOnnxDestroyOnlineRecognizerResult(r);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOnlineRecognizerResult *
SherpaOnnxGetOnlineStreamResult(const SherpaOnnxOnlineRecognizer *recognizer,
                                const SherpaOnnxOnlineStream *stream);

/**
 * @brief Destroy a result returned by SherpaOnnxGetOnlineStreamResult().
 *
 * @param r A pointer returned by SherpaOnnxGetOnlineStreamResult().
 *
 * @code
 * SherpaOnnxDestroyOnlineRecognizerResult(r);
 * r = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOnlineRecognizerResult(
    const SherpaOnnxOnlineRecognizerResult *r);

/**
 * @brief Get the current streaming ASR result as JSON.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 * @return A newly allocated JSON string. Free it with
 *         SherpaOnnxDestroyOnlineStreamResultJson().
 *
 * @code
 * const char *json =
 *     SherpaOnnxGetOnlineStreamResultAsJson(recognizer, stream);
 * puts(json);
 * SherpaOnnxDestroyOnlineStreamResultJson(json);
 * @endcode
 */
SHERPA_ONNX_API const char *SherpaOnnxGetOnlineStreamResultAsJson(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Free a JSON string returned by
 * SherpaOnnxGetOnlineStreamResultAsJson().
 *
 * @param s A pointer returned by SherpaOnnxGetOnlineStreamResultAsJson().
 *
 * @code
 * SherpaOnnxDestroyOnlineStreamResultJson(json);
 * json = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOnlineStreamResultJson(const char *s);

/**
 * @brief Reset a streaming ASR stream after an endpoint or utterance boundary.
 *
 * This clears the decoder state for the stream so that it can be reused for a
 * new utterance.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 *
 * @code
 * if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
 *   SherpaOnnxOnlineStreamReset(recognizer, stream);
 * }
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxOnlineStreamReset(
    const SherpaOnnxOnlineRecognizer *recognizer,
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Signal end-of-input for a streaming ASR stream.
 *
 * After calling this function, do not append more samples to the stream.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 *
 * @code
 * SherpaOnnxOnlineStreamInputFinished(stream);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxOnlineStreamInputFinished(
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Set a per-stream runtime option.
 *
 * This is a generic extension point for model-specific or runtime-specific
 * options such as "is_final" for streaming Paraformer.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 * @param key Option name.
 * @param value Option value represented as text.
 *
 * @code
 * SherpaOnnxOnlineStreamSetOption(stream, "is_final", "1");
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxOnlineStreamSetOption(
    const SherpaOnnxOnlineStream *stream, const char *key, const char *value);

/**
 * @brief Get a per-stream runtime option.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 * @param key Option name.
 * @return The option value. The returned pointer is owned by the stream, must
 *         not be freed by the caller, and may be invalidated if the option is
 *         overwritten or the stream is destroyed.
 *
 * @code
 * const char *value = SherpaOnnxOnlineStreamGetOption(stream, "is_final");
 * @endcode
 */
SHERPA_ONNX_API const char *SherpaOnnxOnlineStreamGetOption(
    const SherpaOnnxOnlineStream *stream, const char *key);

/**
 * @brief Check whether a per-stream runtime option exists.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 * @param key Option name.
 * @return 1 if the option exists; otherwise 0.
 *
 * @code
 * int32_t has_option = SherpaOnnxOnlineStreamHasOption(stream, "is_final");
 * @endcode
 */
SHERPA_ONNX_API int32_t SherpaOnnxOnlineStreamHasOption(
    const SherpaOnnxOnlineStream *stream, const char *key);

/**
 * @brief Check whether endpoint detection has triggered for a stream.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOnlineRecognizer().
 * @param stream A pointer returned by SherpaOnnxCreateOnlineStream().
 * @return 1 if an endpoint is detected; otherwise 0.
 *
 * @code
 * if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
 *   SherpaOnnxOnlineStreamReset(recognizer, stream);
 * }
 * @endcode
 */
SHERPA_ONNX_API int32_t
SherpaOnnxOnlineStreamIsEndpoint(const SherpaOnnxOnlineRecognizer *recognizer,
                                 const SherpaOnnxOnlineStream *stream);

/**
 * @brief Helper for pretty-printing incremental recognition results.
 *
 * This utility is mainly used by example programs on Linux and macOS.
 */
typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;

/**
 * @brief Create a display helper.
 *
 * @param max_word_per_line Maximum number of words to show per line.
 * @return A newly allocated display helper. Free it with
 *         SherpaOnnxDestroyDisplay().
 *
 * @code
 * const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxDisplay *SherpaOnnxCreateDisplay(
    int32_t max_word_per_line);

/**
 * @brief Destroy a display helper.
 *
 * @param display A pointer returned by SherpaOnnxCreateDisplay().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyDisplay(const SherpaOnnxDisplay *display);

/**
 * @brief Print one line of text using the display helper.
 *
 * @param display A pointer returned by SherpaOnnxCreateDisplay().
 * @param idx Segment or utterance index to print.
 * @param s Text to print.
 *
 * @code
 * SherpaOnnxPrint(display, segment_id, r->text);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxPrint(const SherpaOnnxDisplay *display,
                                     int32_t idx, const char *s);
// ============================================================
// For offline ASR (i.e., non-streaming ASR)
// ============================================================

/**
 * @brief Configuration for a non-streaming transducer model.
 */
typedef struct SherpaOnnxOfflineTransducerModelConfig {
  /** Path to the encoder ONNX model. */
  const char *encoder;
  /** Path to the decoder ONNX model. */
  const char *decoder;
  /** Path to the joiner ONNX model. */
  const char *joiner;
} SherpaOnnxOfflineTransducerModelConfig;

/** @brief Configuration for a non-streaming Paraformer model. */
typedef struct SherpaOnnxOfflineParaformerModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineParaformerModelConfig;

/** @brief Configuration for a non-streaming NeMo CTC model. */
typedef struct SherpaOnnxOfflineNemoEncDecCtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineNemoEncDecCtcModelConfig;

/**
 * @brief Configuration for a non-streaming Whisper model.
 */
typedef struct SherpaOnnxOfflineWhisperModelConfig {
  /** Path to the encoder ONNX model. */
  const char *encoder;
  /** Path to the decoder ONNX model. */
  const char *decoder;
  /** Optional language hint, for example "en" or "zh". */
  const char *language;
  /** Optional Whisper task such as "transcribe" or "translate". */
  const char *task;
  /** Number of tail padding frames appended internally. */
  int32_t tail_paddings;

  /** Non-zero to enable token-level timestamps when supported by the model. */
  int32_t enable_token_timestamps;

  /** Non-zero to enable Whisper segment-level timestamps. */
  int32_t enable_segment_timestamps;
} SherpaOnnxOfflineWhisperModelConfig;

/** @brief Configuration for a Canary model. */
typedef struct SherpaOnnxOfflineCanaryModelConfig {
  /** Path to the encoder ONNX model. */
  const char *encoder;
  /** Path to the decoder ONNX model. */
  const char *decoder;
  /** Source language hint. */
  const char *src_lang;
  /** Target language hint. */
  const char *tgt_lang;
  /** Non-zero to enable punctuation and capitalization when supported. */
  int32_t use_pnc;
} SherpaOnnxOfflineCanaryModelConfig;

/** @brief Configuration for a FireRedAsr encoder/decoder model. */
typedef struct SherpaOnnxOfflineFireRedAsrModelConfig {
  /** Path to the encoder ONNX model. */
  const char *encoder;
  /** Path to the decoder ONNX model. */
  const char *decoder;
} SherpaOnnxOfflineFireRedAsrModelConfig;

/** @brief Configuration for a FireRedAsr CTC model. */
typedef struct SherpaOnnxOfflineFireRedAsrCtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineFireRedAsrCtcModelConfig;

/** @brief Configuration for a Moonshine model. */
typedef struct SherpaOnnxOfflineMoonshineModelConfig {
  /** Path to the preprocessor ONNX model. */
  const char *preprocessor;
  /** Path to the encoder ONNX model. */
  const char *encoder;
  /** Path to the uncached decoder ONNX model. */
  const char *uncached_decoder;
  /** Path to the cached decoder ONNX model. */
  const char *cached_decoder;
  /** Path to the merged decoder ONNX model. */
  const char *merged_decoder;
} SherpaOnnxOfflineMoonshineModelConfig;

/** @brief Configuration for a TDNN model. */
typedef struct SherpaOnnxOfflineTdnnModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineTdnnModelConfig;

/** @brief Configuration for an offline language model. */
typedef struct SherpaOnnxOfflineLMConfig {
  /** Path to the language model. */
  const char *model;
  /** Interpolation scale for the language model. */
  float scale;
} SherpaOnnxOfflineLMConfig;

/** @brief Configuration for a SenseVoice model. */
typedef struct SherpaOnnxOfflineSenseVoiceModelConfig {
  /** Path to the ONNX model. */
  const char *model;
  /** Optional language hint. */
  const char *language;
  /** Non-zero to enable inverse text normalization. */
  int32_t use_itn;
} SherpaOnnxOfflineSenseVoiceModelConfig;

/** @brief Configuration for a Dolphin model. */
typedef struct SherpaOnnxOfflineDolphinModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineDolphinModelConfig;

/** @brief Configuration for an offline Zipformer CTC model. */
typedef struct SherpaOnnxOfflineZipformerCtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineZipformerCtcModelConfig;

/** @brief Configuration for an offline WeNet CTC model. */
typedef struct SherpaOnnxOfflineWenetCtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineWenetCtcModelConfig;

/** @brief Configuration for an omnilingual offline CTC model. */
typedef struct SherpaOnnxOfflineOmnilingualAsrCtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineOmnilingualAsrCtcModelConfig;

/** @brief Configuration for an offline FunASR Nano model. */
typedef struct SherpaOnnxOfflineFunASRNanoModelConfig {
  /** Path to the encoder adaptor. */
  const char *encoder_adaptor;
  /** Path to the LLM ONNX model. */
  const char *llm;
  /** Path to the embedding model. */
  const char *embedding;
  /** Path to the tokenizer file. */
  const char *tokenizer;
  /** System prompt. */
  const char *system_prompt;
  /** User prompt. */
  const char *user_prompt;
  /** Maximum number of generated tokens. */
  int32_t max_new_tokens;
  /** Sampling temperature. */
  float temperature;
  /** Top-p sampling threshold. */
  float top_p;
  /** Random seed. */
  int32_t seed;
  /** Optional language hint. */
  const char *language;
  /** Non-zero to enable inverse text normalization. */
  int32_t itn;
  /** Optional hotwords text. */
  const char *hotwords;
} SherpaOnnxOfflineFunASRNanoModelConfig;

/** @brief Configuration for a MedASR CTC model. */
typedef struct SherpaOnnxOfflineMedAsrCtcModelConfig {
  /** Path to the ONNX model. */
  const char *model;
} SherpaOnnxOfflineMedAsrCtcModelConfig;

/**
 * @brief Model configuration shared by offline ASR recognizers.
 *
 * Zero-initialize this struct before use, then fill in exactly the sub-config
 * needed by the model family you want to run.
 *
 * Exactly one model family should be configured for each recognizer. For
 * example, set only one of @c transducer, @c paraformer, @c nemo_ctc,
 * @c whisper, @c tdnn, @c sense_voice, @c moonshine, @c fire_red_asr,
 * @c dolphin, @c zipformer_ctc, @c canary, @c wenet_ctc, @c omnilingual,
 * @c medasr, @c funasr_nano, or @c fire_red_asr_ctc.
 *
 * If multiple model families are configured at the same time, the
 * implementation will choose one of them, and which one is used is
 * implementation-defined. Do not rely on any precedence rule.
 */
typedef struct SherpaOnnxOfflineModelConfig {
  /** Non-streaming transducer model files. */
  SherpaOnnxOfflineTransducerModelConfig transducer;
  /** Non-streaming Paraformer model files. */
  SherpaOnnxOfflineParaformerModelConfig paraformer;
  /** Non-streaming NeMo CTC model files. */
  SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc;
  /** Whisper model files and options. */
  SherpaOnnxOfflineWhisperModelConfig whisper;
  /** TDNN model files. */
  SherpaOnnxOfflineTdnnModelConfig tdnn;

  /** Path to the tokens file. */
  const char *tokens;
  /** Number of backend threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider, for example "cpu" or "cuda". */
  const char *provider;
  /** Optional explicit model type override. */
  const char *model_type;
  /** Modeling unit, such as "cjkchar", "bpe", or "cjkchar+bpe". */
  const char *modeling_unit;
  /** Path to the BPE vocabulary file when BPE is used. */
  const char *bpe_vocab;
  /** Path to the TeleSpeech CTC model. */
  const char *telespeech_ctc;
  /** SenseVoice configuration. */
  SherpaOnnxOfflineSenseVoiceModelConfig sense_voice;
  /** Moonshine configuration. */
  SherpaOnnxOfflineMoonshineModelConfig moonshine;
  /** FireRedAsr configuration. */
  SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr;
  /** Dolphin configuration. */
  SherpaOnnxOfflineDolphinModelConfig dolphin;
  /** Zipformer CTC configuration. */
  SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc;
  /** Canary configuration. */
  SherpaOnnxOfflineCanaryModelConfig canary;
  /** WeNet CTC configuration. */
  SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc;
  /** Omnilingual CTC configuration. */
  SherpaOnnxOfflineOmnilingualAsrCtcModelConfig omnilingual;
  /** MedASR configuration. */
  SherpaOnnxOfflineMedAsrCtcModelConfig medasr;
  /** FunASR Nano configuration. */
  SherpaOnnxOfflineFunASRNanoModelConfig funasr_nano;
  /** FireRedAsr CTC configuration. */
  SherpaOnnxOfflineFireRedAsrCtcModelConfig fire_red_asr_ctc;
} SherpaOnnxOfflineModelConfig;

/**
 * @brief Configuration for a non-streaming ASR recognizer.
 *
 * Zero-initialize this struct before use.
 *
 * Example using Whisper:
 *
 * @code
 * SherpaOnnxOfflineRecognizerConfig config;
 * memset(&config, 0, sizeof(config));
 *
 * config.feat_config.sample_rate = 16000;
 * config.feat_config.feature_dim = 80;
 *
 * config.model_config.whisper.encoder =
 *     "./sherpa-onnx-whisper-tiny/tiny-encoder.onnx";
 * config.model_config.whisper.decoder =
 *     "./sherpa-onnx-whisper-tiny/tiny-decoder.onnx";
 * config.model_config.whisper.language = "en";
 * config.model_config.whisper.task = "transcribe";
 * config.model_config.tokens =
 *     "./sherpa-onnx-whisper-tiny/tiny-tokens.txt";
 * config.model_config.provider = "cpu";
 * config.model_config.num_threads = 1;
 *
 * config.decoding_method = "greedy_search";
 * @endcode
 *
 * Example using SenseVoice:
 *
 * @code
 * config.model_config.sense_voice.model =
 *     "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/model.int8.onnx";
 * config.model_config.sense_voice.language = "auto";
 * config.model_config.sense_voice.use_itn = 1;
 * config.model_config.tokens =
 *     "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/tokens.txt";
 * @endcode
 *
 * Example using Parakeet TDT:
 *
 * @code
 * config.model_config.transducer.encoder =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/encoder.int8.onnx";
 * config.model_config.transducer.decoder =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/decoder.int8.onnx";
 * config.model_config.transducer.joiner =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/joiner.int8.onnx";
 * config.model_config.tokens =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/tokens.txt";
 * config.model_config.model_type = "nemo_transducer";
 * @endcode
 */
typedef struct SherpaOnnxOfflineRecognizerConfig {
  /** Feature extraction settings. */
  SherpaOnnxFeatureConfig feat_config;
  /** Offline model configuration. */
  SherpaOnnxOfflineModelConfig model_config;
  /** Optional language model configuration. */
  SherpaOnnxOfflineLMConfig lm_config;

  /** Decoding method, for example "greedy_search" or "modified_beam_search". */
  const char *decoding_method;
  /** Number of active paths for modified beam search. */
  int32_t max_active_paths;

  /** Path to a hotwords file. */
  const char *hotwords_file;

  /** Bonus score added to each hotword token. */
  float hotwords_score;
  /** Path to punctuation or text-processing rule FSTs. */
  const char *rule_fsts;
  /** Path to FAR archives used by text-processing rules. */
  const char *rule_fars;
  /** Optional blank penalty applied during decoding. */
  float blank_penalty;

  /** Optional homophone replacement configuration. */
  SherpaOnnxHomophoneReplacerConfig hr;
} SherpaOnnxOfflineRecognizerConfig;

/** @brief Non-streaming recognizer handle. */
typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer;

/** @brief Non-streaming decoding state for one utterance. */
typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;

/**
 * @brief Create a non-streaming ASR recognizer.
 *
 * @param config Recognizer configuration.
 * @return A recognizer handle on success, or NULL if the configuration is
 *         invalid. The caller owns the returned object and must free it with
 *         SherpaOnnxDestroyOfflineRecognizer().
 *
 * Whisper example:
 *
 * @code
 * SherpaOnnxOfflineRecognizerConfig config;
 * memset(&config, 0, sizeof(config));
 * config.feat_config.sample_rate = 16000;
 * config.feat_config.feature_dim = 80;
 * config.model_config.whisper.encoder =
 *     "./sherpa-onnx-whisper-tiny/tiny-encoder.onnx";
 * config.model_config.whisper.decoder =
 *     "./sherpa-onnx-whisper-tiny/tiny-decoder.onnx";
 * config.model_config.whisper.language = "en";
 * config.model_config.whisper.task = "transcribe";
 * config.model_config.tokens =
 *     "./sherpa-onnx-whisper-tiny/tiny-tokens.txt";
 * config.model_config.provider = "cpu";
 * config.model_config.num_threads = 1;
 * config.decoding_method = "greedy_search";
 *
 * const SherpaOnnxOfflineRecognizer *recognizer =
 *     SherpaOnnxCreateOfflineRecognizer(&config);
 * @endcode
 *
 * SenseVoice example:
 *
 * @code
 * config.model_config.sense_voice.model =
 *     "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/model.int8.onnx";
 * config.model_config.sense_voice.language = "auto";
 * config.model_config.sense_voice.use_itn = 1;
 * config.model_config.tokens =
 *     "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/tokens.txt";
 * @endcode
 *
 * Parakeet TDT example:
 *
 * @code
 * config.model_config.transducer.encoder =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/encoder.int8.onnx";
 * config.model_config.transducer.decoder =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/decoder.int8.onnx";
 * config.model_config.transducer.joiner =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/joiner.int8.onnx";
 * config.model_config.tokens =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/tokens.txt";
 * config.model_config.model_type = "nemo_transducer";
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOfflineRecognizer *
SherpaOnnxCreateOfflineRecognizer(
    const SherpaOnnxOfflineRecognizerConfig *config);

/**
 * @brief Update the configuration of an existing offline recognizer.
 *
 * @param recognizer Recognizer handle.
 * @param config New recognizer configuration.
 *
 * @code
 * SherpaOnnxOfflineRecognizerSetConfig(recognizer, &config);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxOfflineRecognizerSetConfig(
    const SherpaOnnxOfflineRecognizer *recognizer,
    const SherpaOnnxOfflineRecognizerConfig *config);

/**
 * @brief Destroy a non-streaming recognizer.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer().
 *
 * @code
 * SherpaOnnxDestroyOfflineRecognizer(recognizer);
 * recognizer = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineRecognizer(
    const SherpaOnnxOfflineRecognizer *recognizer);

/**
 * @brief Create a non-streaming ASR input stream.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer().
 * @return A newly created stream. The caller owns the returned object and must
 *         free it with SherpaOnnxDestroyOfflineStream().
 *
 * @code
 * const SherpaOnnxWave *wave =
 *     SherpaOnnxReadWave("./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav");
 * const SherpaOnnxOfflineStream *stream =
 *     SherpaOnnxCreateOfflineStream(recognizer);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOfflineStream *SherpaOnnxCreateOfflineStream(
    const SherpaOnnxOfflineRecognizer *recognizer);

/**
 * @brief Create a non-streaming ASR input stream with per-stream hotwords.
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer().
 * @param hotwords Hotwords text to associate with the stream.
 * @return A newly created stream. The caller owns the returned object and must
 *         free it with SherpaOnnxDestroyOfflineStream().
 *
 * @code
 * const SherpaOnnxOfflineStream *stream =
 *     SherpaOnnxCreateOfflineStreamWithHotwords(recognizer,
 *                                               "▁HELLO ▁WORLD");
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOfflineStream *
SherpaOnnxCreateOfflineStreamWithHotwords(
    const SherpaOnnxOfflineRecognizer *recognizer, const char *hotwords);

/**
 * @brief Destroy a non-streaming ASR stream.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOfflineStream() or
 *               SherpaOnnxCreateOfflineStreamWithHotwords().
 *
 * @code
 * SherpaOnnxDestroyOfflineStream(stream);
 * stream = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineStream(
    const SherpaOnnxOfflineStream *stream);

/**
 * @brief Provide the full utterance to an offline ASR stream.
 *
 * The input is mono floating-point PCM normalized to the range [-1, 1].
 * If @p sample_rate differs from the recognizer feature sample rate,
 * sherpa-onnx resamples internally.
 *
 * @warning Call this function at most once for each offline stream. Offline
 * recognition expects the entire utterance in a single call.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOfflineStream().
 * @param sample_rate Sample rate of @p samples.
 * @param samples Pointer to @p n samples in the range [-1, 1].
 * @param n Number of samples.
 *
 * @code
 * const SherpaOnnxWave *wave =
 *     SherpaOnnxReadWave("./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav");
 * const SherpaOnnxOfflineStream *stream =
 *     SherpaOnnxCreateOfflineStream(recognizer);
 * SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate,
 *                                 wave->samples, wave->num_samples);
 * SherpaOnnxDecodeOfflineStream(recognizer, stream);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxAcceptWaveformOffline(
    const SherpaOnnxOfflineStream *stream, int32_t sample_rate,
    const float *samples, int32_t n);

/**
 * @brief Set a per-stream runtime option for offline ASR.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOfflineStream().
 * @param key Option name.
 * @param value Option value represented as text.
 *
 * @code
 * SherpaOnnxOfflineStreamSetOption(stream, "language", "en");
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxOfflineStreamSetOption(
    const SherpaOnnxOfflineStream *stream, const char *key, const char *value);

/**
 * @brief Get a per-stream runtime option for offline ASR.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOfflineStream().
 * @param key Option name.
 * @return The option value. The returned pointer is owned by the stream, must
 *         not be freed by the caller, and may be invalidated if the option is
 *         overwritten or the stream is destroyed.
 *
 * @code
 * const char *value = SherpaOnnxOfflineStreamGetOption(stream, "language");
 * @endcode
 */
SHERPA_ONNX_API const char *SherpaOnnxOfflineStreamGetOption(
    const SherpaOnnxOfflineStream *stream, const char *key);

/**
 * @brief Check whether a per-stream runtime option exists.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOfflineStream().
 * @param key Option name.
 * @return 1 if the option exists; otherwise 0.
 *
 * @code
 * int32_t has_language =
 *     SherpaOnnxOfflineStreamHasOption(stream, "language");
 * @endcode
 */
SHERPA_ONNX_API int32_t SherpaOnnxOfflineStreamHasOption(
    const SherpaOnnxOfflineStream *stream, const char *key);

/**
 * @brief Run offline ASR on one stream.
 *
 * Call this after SherpaOnnxAcceptWaveformOffline().
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer().
 * @param stream A pointer returned by SherpaOnnxCreateOfflineStream().
 *
 * @code
 * SherpaOnnxDecodeOfflineStream(recognizer, stream);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDecodeOfflineStream(
    const SherpaOnnxOfflineRecognizer *recognizer,
    const SherpaOnnxOfflineStream *stream);

/**
 * @brief Run offline ASR on multiple streams in parallel.
 *
 * The caller must have already provided one utterance to each stream via
 * SherpaOnnxAcceptWaveformOffline().
 *
 * @param recognizer A pointer returned by SherpaOnnxCreateOfflineRecognizer().
 * @param streams Array of @p n offline stream pointers.
 * @param n Number of streams in @p streams.
 *
 * @code
 * const SherpaOnnxOfflineStream *streams[2] = {stream1, stream2};
 * SherpaOnnxDecodeMultipleOfflineStreams(recognizer, streams, 2);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDecodeMultipleOfflineStreams(
    const SherpaOnnxOfflineRecognizer *recognizer,
    const SherpaOnnxOfflineStream **streams, int32_t n);

/**
 * @brief Recognition result for a non-streaming ASR stream.
 *
 * All pointers in this struct are owned by the result object returned from
 * SherpaOnnxGetOfflineStreamResult() and become invalid after
 * SherpaOnnxDestroyOfflineRecognizerResult() is called.
 */
typedef struct SherpaOnnxOfflineRecognizerResult {
  /** Recognized text. */
  const char *text;

  /**
   * Optional token timestamps in seconds.
   *
   * This field may be NULL when the model does not provide token timestamps.
   * When non-NULL, it contains @c count entries and is parallel to
   * @c tokens_arr.
   */
  float *timestamps;

  /** Number of token entries in @c tokens_arr and related per-token arrays. */
  int32_t count;

  /**
   * Contiguous memory block containing token strings separated by '\0'.
   *
   * Use @c tokens_arr for convenient indexed access.
   */
  const char *tokens;

  /** Array of @c count pointers into @c tokens. */
  const char *const *tokens_arr;

  /** JSON serialization of the result. */
  const char *json;

  /** Optional recognized language label. */
  const char *lang;

  /** Optional recognized emotion label. */
  const char *emotion;

  /** Optional recognized event label. */
  const char *event;

  /** Optional token durations in seconds, parallel to @c tokens_arr. */
  float *durations;

  /** Optional token log probabilities, parallel to @c tokens_arr. */
  float *ys_log_probs;

  /** Optional segment start times in seconds, parallel to @c segment_texts_arr.
   */
  const float *segment_timestamps;

  /** Optional segment durations in seconds, parallel to @c segment_texts_arr.
   */
  const float *segment_durations;

  /** Contiguous memory block containing segment texts separated by '\0'. */
  const char *segment_texts;

  /** Array of @c segment_count pointers into @c segment_texts. */
  const char *const *segment_texts_arr;

  /** Number of segment entries in the segment-level arrays. */
  int32_t segment_count;
} SherpaOnnxOfflineRecognizerResult;

/**
 * @brief Get the recognition result for an offline ASR stream.
 *
 * Call this after SherpaOnnxDecodeOfflineStream() or
 * SherpaOnnxDecodeMultipleOfflineStreams().
 *
 * @param stream A pointer returned by SherpaOnnxCreateOfflineStream().
 * @return A newly allocated result snapshot. Free it with
 *         SherpaOnnxDestroyOfflineRecognizerResult().
 *
 * @code
 * const SherpaOnnxOfflineRecognizerResult *r =
 *     SherpaOnnxGetOfflineStreamResult(stream);
 * printf("%s\n", r->text);
 * if (r->timestamps) {
 *   printf("First token starts at %.3f seconds\n", r->timestamps[0]);
 * }
 * SherpaOnnxDestroyOfflineRecognizerResult(r);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOfflineRecognizerResult *
SherpaOnnxGetOfflineStreamResult(const SherpaOnnxOfflineStream *stream);

/**
 * @brief Destroy a result returned by SherpaOnnxGetOfflineStreamResult().
 *
 * @param r A pointer returned by SherpaOnnxGetOfflineStreamResult().
 *
 * @code
 * SherpaOnnxDestroyOfflineRecognizerResult(r);
 * r = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineRecognizerResult(
    const SherpaOnnxOfflineRecognizerResult *r);

/**
 * @brief Get the offline ASR result as JSON.
 *
 * @param stream A pointer returned by SherpaOnnxCreateOfflineStream().
 * @return A newly allocated JSON string. Free it with
 *         SherpaOnnxDestroyOfflineStreamResultJson().
 *
 * @code
 * const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream);
 * puts(json);
 * SherpaOnnxDestroyOfflineStreamResultJson(json);
 * @endcode
 */
SHERPA_ONNX_API const char *SherpaOnnxGetOfflineStreamResultAsJson(
    const SherpaOnnxOfflineStream *stream);

/**
 * @brief Free a JSON string returned by
 * SherpaOnnxGetOfflineStreamResultAsJson().
 *
 * @param s A pointer returned by SherpaOnnxGetOfflineStreamResultAsJson().
 *
 * @code
 * SherpaOnnxDestroyOfflineStreamResultJson(json);
 * json = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineStreamResultJson(const char *s);

// ============================================================
// For keyword spotting
// ============================================================
/**
 * @brief Snapshot of the current keyword spotting result.
 *
 * Free this object with SherpaOnnxDestroyKeywordResult().
 */
typedef struct SherpaOnnxKeywordResult {
  /**
   * Triggered keyword text.
   *
   * For English models this is usually space-separated words. For Chinese
   * models it is typically the surface form without spaces.
   */
  const char *keyword;

  /**
   * Token sequence as a single string.
   *
   * For BPE-based models this contains the decoded BPE tokens.
   */
  const char *tokens;

  /**
   * Token sequence as an array.
   *
   * The array length is @c count. Each string is owned by this result object.
   */
  const char *const *tokens_arr;

  /** Number of decoded tokens in @c tokens_arr and @c timestamps. */
  int32_t count;

  /**
   * Per-token timestamps in seconds.
   *
   * This array has @c count elements. Element @c i corresponds to
   * `tokens_arr[i]`.
   */
  float *timestamps;

  /** Start time of the current segment in seconds. */
  float start_time;

  /**
   * JSON representation of the result.
   *
   * The JSON includes `keyword`, `tokens`, `timestamps`, and `start_time`.
   */
  const char *json;
} SherpaOnnxKeywordResult;

/**
 * @brief Configuration for keyword spotting.
 *
 * The acoustic model is configured through @c model_config. In practice this is
 * usually a streaming transducer model.
 *
 * Keyword definitions can be provided either through @c keywords_file or
 * through @c keywords_buf/@c keywords_buf_size. If both are set, the buffer is
 * used.
 *
 * Example using
 * `sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile`:
 *
 * @code
 * SherpaOnnxKeywordSpotterConfig config;
 * memset(&config, 0, sizeof(config));
 *
 * config.model_config.transducer.encoder =
 *     "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
 *     "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
 * config.model_config.transducer.decoder =
 *     "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
 *     "decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
 * config.model_config.transducer.joiner =
 *     "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
 *     "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx";
 * config.model_config.tokens =
 *     "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
 *     "tokens.txt";
 * config.model_config.provider = "cpu";
 * config.model_config.num_threads = 1;
 *
 * config.keywords_file =
 *     "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/"
 *     "test_wavs/test_keywords.txt";
 * config.max_active_paths = 4;
 * config.keywords_score = 3.0f;
 * config.keywords_threshold = 0.1f;
 * @endcode
 */
typedef struct SherpaOnnxKeywordSpotterConfig {
  /** Feature extraction parameters. */
  SherpaOnnxFeatureConfig feat_config;
  /** Streaming acoustic model configuration. */
  SherpaOnnxOnlineModelConfig model_config;
  /** Maximum number of active decoding paths. */
  int32_t max_active_paths;
  /** Number of trailing blank symbols required before trigger finalization. */
  int32_t num_trailing_blanks;
  /** Bonus score applied to keywords during search. */
  float keywords_score;
  /** Detection threshold. Larger values are more conservative. */
  float keywords_threshold;
  /** Optional keyword file. */
  const char *keywords_file;
  /** Optional in-memory keyword data. If non-null, it overrides @c
   * keywords_file. */
  const char *keywords_buf;
  /** Size in bytes of @c keywords_buf, excluding any trailing `'\0'`. */
  int32_t keywords_buf_size;
} SherpaOnnxKeywordSpotterConfig;

/** @brief Opaque keyword spotter handle. */
typedef struct SherpaOnnxKeywordSpotter SherpaOnnxKeywordSpotter;

/**
 * @brief Create a keyword spotter.
 *
 * @param config Keyword spotter configuration.
 * @return A newly allocated keyword spotter on success, or NULL on error. Free
 *         it with SherpaOnnxDestroyKeywordSpotter().
 */
SHERPA_ONNX_API const SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter(
    const SherpaOnnxKeywordSpotterConfig *config);

/**
 * @brief Destroy a keyword spotter.
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyKeywordSpotter(
    const SherpaOnnxKeywordSpotter *spotter);

/**
 * @brief Create a keyword spotting stream using the spotter's built-in keyword
 * list.
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 * @return A newly allocated stream. Free it with
 * SherpaOnnxDestroyOnlineStream().
 */
SHERPA_ONNX_API const SherpaOnnxOnlineStream *SherpaOnnxCreateKeywordStream(
    const SherpaOnnxKeywordSpotter *spotter);

/**
 * @brief Create a keyword spotting stream with extra or replacement keywords.
 *
 * The @p keywords string uses the same textual format as the keyword files used
 * by the examples. For instance:
 *
 * @code
 * const SherpaOnnxOnlineStream *stream =
 *     SherpaOnnxCreateKeywordStreamWithKeywords(
 *         kws, "y ǎn y uán @演员/zh ī m íng @知名");
 * @endcode
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 * @param keywords Inline keyword definition string.
 * @return A newly allocated stream. Free it with
 * SherpaOnnxDestroyOnlineStream().
 */
SHERPA_ONNX_API const SherpaOnnxOnlineStream *
SherpaOnnxCreateKeywordStreamWithKeywords(
    const SherpaOnnxKeywordSpotter *spotter, const char *keywords);

/**
 * @brief Check whether a keyword stream has enough audio for decoding.
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 * @param stream A pointer returned by SherpaOnnxCreateKeywordStream() or
 *               SherpaOnnxCreateKeywordStreamWithKeywords().
 * @return 1 if the stream is ready to decode; otherwise 0.
 */
SHERPA_ONNX_API int32_t
SherpaOnnxIsKeywordStreamReady(const SherpaOnnxKeywordSpotter *spotter,
                               const SherpaOnnxOnlineStream *stream);

/**
 * @brief Decode one ready keyword stream.
 *
 * Call this only when SherpaOnnxIsKeywordStreamReady() returns 1.
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 * @param stream A pointer returned by SherpaOnnxCreateKeywordStream() or
 *               SherpaOnnxCreateKeywordStreamWithKeywords().
 */
SHERPA_ONNX_API void SherpaOnnxDecodeKeywordStream(
    const SherpaOnnxKeywordSpotter *spotter,
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Reset a keyword stream after a keyword is detected.
 *
 * The examples call this immediately after a successful trigger so the next
 * keyword can be detected independently.
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 * @param stream A pointer returned by SherpaOnnxCreateKeywordStream() or
 *               SherpaOnnxCreateKeywordStreamWithKeywords().
 */
SHERPA_ONNX_API void SherpaOnnxResetKeywordStream(
    const SherpaOnnxKeywordSpotter *spotter,
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Decode multiple ready keyword streams in parallel.
 *
 * The caller must ensure every stream in @p streams is ready before calling
 * this function.
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 * @param streams Array of ready streams.
 * @param n Number of elements in @p streams.
 */
SHERPA_ONNX_API void SherpaOnnxDecodeMultipleKeywordStreams(
    const SherpaOnnxKeywordSpotter *spotter,
    const SherpaOnnxOnlineStream **streams, int32_t n);

/**
 * @brief Get the current keyword spotting result for a stream.
 *
 * The returned snapshot may represent either "no trigger yet" or a detected
 * keyword. A common pattern is to check whether `strlen(r->keyword) != 0`.
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 * @param stream A pointer returned by SherpaOnnxCreateKeywordStream() or
 *               SherpaOnnxCreateKeywordStreamWithKeywords().
 * @return A newly allocated result snapshot. Free it with
 *         SherpaOnnxDestroyKeywordResult().
 *
 * @code
 * const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
 * if (r && r->json && strlen(r->keyword)) {
 *   fprintf(stderr, "Detected keyword: %s\n", r->json);
 *   SherpaOnnxResetKeywordStream(kws, stream);
 * }
 * SherpaOnnxDestroyKeywordResult(r);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxKeywordResult *SherpaOnnxGetKeywordResult(
    const SherpaOnnxKeywordSpotter *spotter,
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Destroy a keyword result snapshot.
 *
 * @param r A pointer returned by SherpaOnnxGetKeywordResult().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyKeywordResult(
    const SherpaOnnxKeywordResult *r);

/**
 * @brief Get the current keyword spotting result as JSON.
 *
 * @param spotter A pointer returned by SherpaOnnxCreateKeywordSpotter().
 * @param stream A pointer returned by SherpaOnnxCreateKeywordStream() or
 *               SherpaOnnxCreateKeywordStreamWithKeywords().
 * @return A newly allocated JSON string. Free it with
 *         SherpaOnnxFreeKeywordResultJson().
 */
SHERPA_ONNX_API const char *SherpaOnnxGetKeywordResultAsJson(
    const SherpaOnnxKeywordSpotter *spotter,
    const SherpaOnnxOnlineStream *stream);

/**
 * @brief Free a JSON string returned by SherpaOnnxGetKeywordResultAsJson().
 *
 * @param s A pointer returned by SherpaOnnxGetKeywordResultAsJson().
 */
SHERPA_ONNX_API void SherpaOnnxFreeKeywordResultJson(const char *s);

// ============================================================
// For VAD
// ============================================================

/** @brief Configuration for a Silero VAD model. */
typedef struct SherpaOnnxSileroVadModelConfig {
  /** Path to `silero_vad.onnx`. */
  const char *model;
  /** Speech probability threshold. Frames above this value are speech. */
  float threshold;
  /** Minimum silence duration in seconds used to close a speech segment. */
  float min_silence_duration;
  /** Minimum speech duration in seconds to keep a detected segment. */
  float min_speech_duration;
  /** Input window size in samples. A common value is 512. */
  int32_t window_size;
  /**
   * Maximum speech duration in seconds.
   *
   * When a segment exceeds this value, the detector temporarily uses a higher
   * threshold to encourage a split.
   */
  float max_speech_duration;
} SherpaOnnxSileroVadModelConfig;

/** @brief Configuration for a Ten VAD model. */
typedef struct SherpaOnnxTenVadModelConfig {
  /** Path to `ten-vad.onnx`. */
  const char *model;
  /** Speech probability threshold. Frames above this value are speech. */
  float threshold;
  /** Minimum silence duration in seconds used to close a speech segment. */
  float min_silence_duration;
  /** Minimum speech duration in seconds to keep a detected segment. */
  float min_speech_duration;
  /** Input window size in samples. A common value is 256. */
  int32_t window_size;
  /**
   * Maximum speech duration in seconds.
   *
   * When a segment exceeds this value, the detector temporarily uses a higher
   * threshold to encourage a split.
   */
  float max_speech_duration;
} SherpaOnnxTenVadModelConfig;

/**
 * @brief Configuration shared by voice activity detectors.
 *
 * Exactly one VAD model family should be configured. Set either
 * @c silero_vad.model or @c ten_vad.model.
 *
 * If both are configured, the implementation will choose one of them, and
 * which one is used is implementation-defined. Do not rely on any precedence
 * rule.
 *
 * Example model files:
 * - `./silero_vad.onnx`
 * - `./ten-vad.onnx`
 *
 * @code
 * SherpaOnnxVadModelConfig config;
 * memset(&config, 0, sizeof(config));
 *
 * config.silero_vad.model = "./silero_vad.onnx";
 * config.silero_vad.threshold = 0.25f;
 * config.silero_vad.min_silence_duration = 0.5f;
 * config.silero_vad.min_speech_duration = 0.5f;
 * config.silero_vad.max_speech_duration = 10.0f;
 * config.silero_vad.window_size = 512;
 *
 * config.sample_rate = 16000;
 * config.num_threads = 1;
 * config.provider = "cpu";
 * config.debug = 0;
 * @endcode
 */
typedef struct SherpaOnnxVadModelConfig {
  /** Silero VAD configuration. */
  SherpaOnnxSileroVadModelConfig silero_vad;
  /** Input sample rate expected by the detector, usually 16000. */
  int32_t sample_rate;
  /** Number of backend threads. */
  int32_t num_threads;
  /** Execution provider, for example "cpu" or "cuda". */
  const char *provider;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Ten VAD configuration. */
  SherpaOnnxTenVadModelConfig ten_vad;
} SherpaOnnxVadModelConfig;

/** @brief Opaque circular-buffer handle used by helper APIs. */
typedef struct SherpaOnnxCircularBuffer SherpaOnnxCircularBuffer;

/**
 * @brief Create a floating-point circular buffer.
 *
 * @param capacity Maximum number of samples the buffer can keep.
 * @return A newly allocated buffer. Free it with
 *         SherpaOnnxDestroyCircularBuffer().
 *
 * @code
 * const SherpaOnnxCircularBuffer *buffer =
 *     SherpaOnnxCreateCircularBuffer(16000 * 30);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer(
    int32_t capacity);

/**
 * @brief Destroy a circular buffer.
 *
 * @param buffer A pointer returned by SherpaOnnxCreateCircularBuffer().
 *
 * @code
 * SherpaOnnxDestroyCircularBuffer(buffer);
 * buffer = NULL;
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxDestroyCircularBuffer(
    const SherpaOnnxCircularBuffer *buffer);

/**
 * @brief Append samples to a circular buffer.
 *
 * @param buffer A pointer returned by SherpaOnnxCreateCircularBuffer().
 * @param p Pointer to @p n samples.
 * @param n Number of samples.
 *
 * @code
 * SherpaOnnxCircularBufferPush(buffer, wave->samples, wave->num_samples);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxCircularBufferPush(
    const SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n);

/**
 * @brief Copy out a slice of samples from a circular buffer.
 *
 * @param buffer A pointer returned by SherpaOnnxCreateCircularBuffer().
 * @param start_index Absolute start index in the buffer timeline.
 * @param n Number of samples to copy.
 * @return A newly allocated array containing @p n samples. Free it with
 *         SherpaOnnxCircularBufferFree().
 *
 * @code
 * const float *samples = SherpaOnnxCircularBufferGet(buffer, start, 3200);
 * SherpaOnnxCircularBufferFree(samples);
 * @endcode
 */
SHERPA_ONNX_API const float *SherpaOnnxCircularBufferGet(
    const SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n);

/** @brief Free an array returned by SherpaOnnxCircularBufferGet(). */
SHERPA_ONNX_API void SherpaOnnxCircularBufferFree(const float *p);

/**
 * @brief Drop samples from the front of a circular buffer.
 *
 * @param buffer A pointer returned by SherpaOnnxCreateCircularBuffer().
 * @param n Number of samples to remove.
 */
SHERPA_ONNX_API void SherpaOnnxCircularBufferPop(
    const SherpaOnnxCircularBuffer *buffer, int32_t n);

/**
 * @brief Return the number of currently stored samples.
 *
 * @param buffer A pointer returned by SherpaOnnxCreateCircularBuffer().
 * @return Number of samples currently in the buffer.
 */
SHERPA_ONNX_API int32_t
SherpaOnnxCircularBufferSize(const SherpaOnnxCircularBuffer *buffer);

/**
 * @brief Return the current head index of the buffer timeline.
 *
 * The value is monotonically non-decreasing until
 * SherpaOnnxCircularBufferReset() is called.
 *
 * @param buffer A pointer returned by SherpaOnnxCreateCircularBuffer().
 * @return The current head index.
 */
SHERPA_ONNX_API int32_t
SherpaOnnxCircularBufferHead(const SherpaOnnxCircularBuffer *buffer);

/**
 * @brief Clear a circular buffer and reset its head index.
 *
 * @param buffer A pointer returned by SherpaOnnxCreateCircularBuffer().
 */
SHERPA_ONNX_API void SherpaOnnxCircularBufferReset(
    const SherpaOnnxCircularBuffer *buffer);

/**
 * @brief One detected speech segment returned by the VAD.
 *
 * The segment owns @c samples. Free the whole object with
 * SherpaOnnxDestroySpeechSegment().
 */
typedef struct SherpaOnnxSpeechSegment {
  /** Start index, in input samples, of this segment. */
  int32_t start;
  /** Newly allocated mono samples for this segment. */
  float *samples;
  /** Number of samples in @c samples. */
  int32_t n;
} SherpaOnnxSpeechSegment;

/** @brief Opaque voice activity detector handle. */
typedef struct SherpaOnnxVoiceActivityDetector SherpaOnnxVoiceActivityDetector;

/**
 * @brief Create a voice activity detector.
 *
 * Example model files are shown in `c-api-examples/vad-whisper-c-api.c`.
 *
 * @param config VAD configuration.
 * @param buffer_size_in_seconds Internal buffering capacity in seconds.
 * @return A newly allocated detector on success, or NULL on configuration
 *         error. Free it with SherpaOnnxDestroyVoiceActivityDetector().
 *
 * @code
 * SherpaOnnxVadModelConfig config;
 * memset(&config, 0, sizeof(config));
 * config.silero_vad.model = "./silero_vad.onnx";
 * config.silero_vad.threshold = 0.25f;
 * config.silero_vad.min_silence_duration = 0.5f;
 * config.silero_vad.min_speech_duration = 0.5f;
 * config.silero_vad.max_speech_duration = 10.0f;
 * config.silero_vad.window_size = 512;
 * config.sample_rate = 16000;
 * config.num_threads = 1;
 *
 * const SherpaOnnxVoiceActivityDetector *vad =
 *     SherpaOnnxCreateVoiceActivityDetector(&config, 30.0f);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxVoiceActivityDetector *
SherpaOnnxCreateVoiceActivityDetector(const SherpaOnnxVadModelConfig *config,
                                      float buffer_size_in_seconds);

/**
 * @brief Destroy a voice activity detector.
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyVoiceActivityDetector(
    const SherpaOnnxVoiceActivityDetector *p);

/**
 * @brief Feed audio samples to the VAD.
 *
 * Input samples are mono floating-point PCM in the range [-1, 1].
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 * @param samples Pointer to @p n samples.
 * @param n Number of samples.
 *
 * @code
 * SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad,
 *                                               wave->samples + i,
 *                                               window_size);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorAcceptWaveform(
    const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n);

/**
 * @brief Check whether the detector currently has any completed speech segment.
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 * @return 1 if no completed speech segment is available; otherwise 0.
 */
SHERPA_ONNX_API int32_t
SherpaOnnxVoiceActivityDetectorEmpty(const SherpaOnnxVoiceActivityDetector *p);

/**
 * @brief Check whether the detector is currently inside speech.
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 * @return 1 if speech is currently detected; otherwise 0.
 */
SHERPA_ONNX_API int32_t SherpaOnnxVoiceActivityDetectorDetected(
    const SherpaOnnxVoiceActivityDetector *p);

/**
 * @brief Remove the front speech segment from the detector queue.
 *
 * Call this after consuming the segment returned by
 * SherpaOnnxVoiceActivityDetectorFront().
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 *
 * @code
 * const SherpaOnnxSpeechSegment *segment =
 *     SherpaOnnxVoiceActivityDetectorFront(vad);
 * // ... use segment ...
 * SherpaOnnxDestroySpeechSegment(segment);
 * SherpaOnnxVoiceActivityDetectorPop(vad);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorPop(
    const SherpaOnnxVoiceActivityDetector *p);

/**
 * @brief Remove all queued speech segments.
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 */
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorClear(
    const SherpaOnnxVoiceActivityDetector *p);

/**
 * @brief Get the first queued speech segment.
 *
 * The returned segment is a copy owned by the caller. Free it with
 * SherpaOnnxDestroySpeechSegment().
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 * @return The first queued speech segment, or NULL if none is available.
 *
 * @code
 * while (!SherpaOnnxVoiceActivityDetectorEmpty(vad)) {
 *   const SherpaOnnxSpeechSegment *segment =
 *       SherpaOnnxVoiceActivityDetectorFront(vad);
 *   printf("start=%d, samples=%d\n", segment->start, segment->n);
 *   SherpaOnnxDestroySpeechSegment(segment);
 *   SherpaOnnxVoiceActivityDetectorPop(vad);
 * }
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxSpeechSegment *
SherpaOnnxVoiceActivityDetectorFront(const SherpaOnnxVoiceActivityDetector *p);

/**
 * @brief Destroy a speech segment returned by
 * SherpaOnnxVoiceActivityDetectorFront().
 *
 * @param p A pointer returned by SherpaOnnxVoiceActivityDetectorFront().
 */
SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
    const SherpaOnnxSpeechSegment *p);

/**
 * @brief Reset a voice activity detector so it can process a new stream.
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 */
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
    const SherpaOnnxVoiceActivityDetector *p);

/**
 * @brief Flush buffered tail samples and force final segmentation.
 *
 * Call this after the last chunk of input has been fed.
 *
 * @param p A pointer returned by SherpaOnnxCreateVoiceActivityDetector().
 *
 * @code
 * SherpaOnnxVoiceActivityDetectorFlush(vad);
 * @endcode
 */
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorFlush(
    const SherpaOnnxVoiceActivityDetector *p);

// ============================================================
// For offline Text-to-Speech (i.e., non-streaming TTS)
// ============================================================

/** @brief Configuration for a VITS TTS model. */
typedef struct SherpaOnnxOfflineTtsVitsModelConfig {
  /** Path to the VITS ONNX model, for example `./vits-ljs.onnx`. */
  const char *model;
  /** Path to the lexicon file. Ignored if @c data_dir is provided. */
  const char *lexicon;
  /** Path to the tokens file. */
  const char *tokens;
  /** Optional path to espeak-ng-data. */
  const char *data_dir;
  /** VITS noise scale. */
  float noise_scale;
  /** VITS duration noise scale. */
  float noise_scale_w;
  /** Speech rate scale. Values < 1 are slower; values > 1 are faster. */
  float length_scale;
  /** Unused legacy field kept for ABI compatibility. */
  const char *dict_dir;
} SherpaOnnxOfflineTtsVitsModelConfig;

/** @brief Configuration for a Matcha TTS model. */
typedef struct SherpaOnnxOfflineTtsMatchaModelConfig {
  /** Path to the Matcha acoustic model. */
  const char *acoustic_model;
  /** Path to the vocoder model, for example `./vocos-22khz-univ.onnx`. */
  const char *vocoder;
  /** Path to the lexicon file. */
  const char *lexicon;
  /** Path to the tokens file. */
  const char *tokens;
  /** Optional path to espeak-ng-data. */
  const char *data_dir;
  /** Matcha noise scale. */
  float noise_scale;
  /** Speech rate scale. Values < 1 are slower; values > 1 are faster. */
  float length_scale;
  /** Unused legacy field kept for ABI compatibility. */
  const char *dict_dir;
} SherpaOnnxOfflineTtsMatchaModelConfig;

/** @brief Configuration for a Kokoro TTS model. */
typedef struct SherpaOnnxOfflineTtsKokoroModelConfig {
  /** Path to the Kokoro model, for example `./kokoro-en-v0_19/model.onnx`. */
  const char *model;
  /** Path to the Kokoro voices file. */
  const char *voices;
  /** Path to the tokens file. */
  const char *tokens;
  /** Optional path to espeak-ng-data. */
  const char *data_dir;
  /** Speech rate scale. Values < 1 are slower; values > 1 are faster. */
  float length_scale;
  /** Unused legacy field kept for ABI compatibility. */
  const char *dict_dir;
  /** Optional lexicon file. */
  const char *lexicon;
  /** Optional language hint. */
  const char *lang;
} SherpaOnnxOfflineTtsKokoroModelConfig;

/** @brief Configuration for a Kitten TTS model. */
typedef struct SherpaOnnxOfflineTtsKittenModelConfig {
  /** Path to the Kitten model. */
  const char *model;
  /** Path to the Kitten voices file. */
  const char *voices;
  /** Path to the tokens file. */
  const char *tokens;
  /** Optional path to espeak-ng-data. */
  const char *data_dir;
  /** Speech rate scale. Values < 1 are slower; values > 1 are faster. */
  float length_scale;
} SherpaOnnxOfflineTtsKittenModelConfig;

/** @brief Configuration for a ZipVoice TTS model. */
typedef struct SherpaOnnxOfflineTtsZipvoiceModelConfig {
  /** Path to the tokens file. */
  const char *tokens;
  /** Path to the ZipVoice encoder model. */
  const char *encoder;
  /** Path to the ZipVoice decoder model. */
  const char *decoder;
  /** Path to the vocoder model. */
  const char *vocoder;
  /** Optional path to espeak-ng-data. */
  const char *data_dir;
  /** Path to the lexicon file. */
  const char *lexicon;
  /** Feature scaling factor. */
  float feat_scale;
  /** Time shift parameter. */
  float t_shift;
  /** Target RMS parameter. */
  float target_rms;
  /** Guidance scale parameter. */
  float guidance_scale;
} SherpaOnnxOfflineTtsZipvoiceModelConfig;

/** @brief Configuration for a Pocket TTS model. */
typedef struct SherpaOnnxOfflineTtsPocketModelConfig {
  /** Path to `lm_flow*.onnx`. */
  const char *lm_flow;
  /** Path to `lm_main*.onnx`. */
  const char *lm_main;
  /** Path to the Pocket encoder model. */
  const char *encoder;
  /** Path to the Pocket decoder model. */
  const char *decoder;
  /** Path to the text conditioner model. */
  const char *text_conditioner;
  /** Path to `vocab.json`. */
  const char *vocab_json;
  /** Path to `token_scores.json`. */
  const char *token_scores_json;
  /** Voice embedding cache capacity. */
  int32_t voice_embedding_cache_capacity;
} SherpaOnnxOfflineTtsPocketModelConfig;

/** @brief Configuration for a Supertonic TTS model. */
typedef struct SherpaOnnxOfflineTtsSupertonicModelConfig {
  /** Path to the duration predictor model. */
  const char *duration_predictor;
  /** Path to the text encoder model. */
  const char *text_encoder;
  /** Path to the vector estimator model. */
  const char *vector_estimator;
  /** Path to the vocoder model. */
  const char *vocoder;
  /** Path to `tts.json`. */
  const char *tts_json;
  /** Path to the unicode indexer file. */
  const char *unicode_indexer;
  /** Path to the voice style file. */
  const char *voice_style;
} SherpaOnnxOfflineTtsSupertonicModelConfig;

/**
 * @brief Configuration shared by offline TTS models.
 *
 * Exactly one TTS model family should be configured. For example, set only one
 * of @c vits, @c matcha, @c kokoro, @c kitten, @c zipvoice, @c pocket, or
 * @c supertonic.
 *
 * If multiple model families are configured at the same time, the
 * implementation will choose one of them, and which one is used is
 * implementation-defined. Do not rely on any precedence rule.
 *
 * Concrete example model packages in this repository include:
 * - `kokoro-en-v0_19`
 * - `sherpa-onnx-pocket-tts-int8-2026-01-26`
 * - `matcha-icefall-en_US-ljspeech`
 * - `sherpa-onnx-zipvoice-distill-int8-zh-en-emilia`
 */
typedef struct SherpaOnnxOfflineTtsModelConfig {
  /** VITS configuration. */
  SherpaOnnxOfflineTtsVitsModelConfig vits;
  /** Number of backend threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider, for example "cpu" or "cuda". */
  const char *provider;
  /** Matcha configuration. */
  SherpaOnnxOfflineTtsMatchaModelConfig matcha;
  /** Kokoro configuration. */
  SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
  /** Kitten configuration. */
  SherpaOnnxOfflineTtsKittenModelConfig kitten;
  /** ZipVoice configuration. */
  SherpaOnnxOfflineTtsZipvoiceModelConfig zipvoice;
  /** Pocket configuration. */
  SherpaOnnxOfflineTtsPocketModelConfig pocket;
  /** Supertonic configuration. */
  SherpaOnnxOfflineTtsSupertonicModelConfig supertonic;
} SherpaOnnxOfflineTtsModelConfig;

/**
 * @brief Configuration for offline text-to-speech.
 *
 * @code
 * SherpaOnnxOfflineTtsConfig config;
 * memset(&config, 0, sizeof(config));
 *
 * config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
 * config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
 * config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
 * config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";
 * config.model.num_threads = 2;
 * config.model.provider = "cpu";
 * config.model.debug = 0;
 * config.max_num_sentences = 2;
 * @endcode
 */
typedef struct SherpaOnnxOfflineTtsConfig {
  /** TTS model configuration. */
  SherpaOnnxOfflineTtsModelConfig model;
  /** Optional comma-separated rule FST list. */
  const char *rule_fsts;
  /** Maximum number of sentences processed per chunk. */
  int32_t max_num_sentences;
  /** Optional FAR archives used by text normalization rules. */
  const char *rule_fars;
  /** Default silence scale between sentences. */
  float silence_scale;
} SherpaOnnxOfflineTtsConfig;

/**
 * @brief Generated waveform returned by TTS APIs.
 *
 * The returned structure owns @c samples. Free the whole object with
 * SherpaOnnxDestroyOfflineTtsGeneratedAudio().
 */
typedef struct SherpaOnnxGeneratedAudio {
  /** Generated mono samples in the range [-1, 1]. */
  const float *samples;
  /** Number of samples in @c samples. */
  int32_t n;
  /** Output sample rate. */
  int32_t sample_rate;
} SherpaOnnxGeneratedAudio;

/**
 * @brief Callback invoked during incremental generation.
 *
 * Return 1 to continue generation. Return 0 to stop early.
 *
 * The @p samples pointer is only valid during the callback. Copy the samples if
 * you need to keep them after the callback returns.
 */
typedef int32_t (*SherpaOnnxGeneratedAudioCallback)(const float *samples,
                                                    int32_t n);

/**
 * @brief Same as SherpaOnnxGeneratedAudioCallback but with an extra user
 * pointer.
 */
typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
                                                           int32_t n,
                                                           void *arg);

/**
 * @brief Progress callback invoked during incremental generation.
 *
 * @param samples Newly generated samples valid only during the callback.
 * @param n Number of samples in @p samples.
 * @param p Progress in the range [0, 1].
 * @return Return 1 to continue generation. Return 0 to stop early.
 */
typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)(
    const float *samples, int32_t n, float p);

/**
 * @brief Same as SherpaOnnxGeneratedAudioProgressCallback but with an extra
 * user pointer.
 */
typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallbackWithArg)(
    const float *samples, int32_t n, float p, void *arg);

/** @brief Opaque offline TTS handle. */
typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;

/**
 * @brief Create an offline TTS engine.
 *
 * @param config TTS configuration.
 * @return A newly allocated TTS engine on success, or NULL on configuration
 *         error. Free it with SherpaOnnxDestroyOfflineTts().
 *
 * @code
 * SherpaOnnxOfflineTtsConfig config;
 * memset(&config, 0, sizeof(config));
 * config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
 * config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
 * config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
 * config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";
 * config.model.num_threads = 2;
 *
 * const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
    const SherpaOnnxOfflineTtsConfig *config);

/**
 * @brief Destroy an offline TTS engine.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(
    const SherpaOnnxOfflineTts *tts);

/**
 * @brief Return the output sample rate of a TTS engine.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 * @return Output sample rate in Hz.
 */
SHERPA_ONNX_API int32_t
SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts);

/**
 * @brief Return the number of available speaker IDs.
 *
 * Single-speaker models often return 1.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 * @return Number of speakers supported by the model.
 */
SHERPA_ONNX_API int32_t
SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts);

/**
 * @brief Generate speech from text using the simple sid/speed interface.
 *
 * @deprecated Use SherpaOnnxOfflineTtsGenerateWithConfig() instead.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 * @param text Input text.
 * @param sid Speaker ID for multi-speaker models.
 * @param speed Speech rate. Values > 1 are faster.
 * @return Generated audio, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineTtsGeneratedAudio().
 *
 * @code
 * const SherpaOnnxGeneratedAudio *audio =
 *     SherpaOnnxOfflineTtsGenerate(tts, "Hello from sherpa-onnx!", 0, 1.0f);
 * SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
 *                     "./generated.wav");
 * SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
 * @endcode
 */
SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
    "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
    SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
        const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
        float speed);

/**
 * @brief Generate speech and receive incremental audio chunks through a
 * callback.
 *
 * @deprecated Use SherpaOnnxOfflineTtsGenerateWithConfig() instead.
 *
 * The callback receives newly generated samples. The sample pointer is valid
 * only for the duration of the callback.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 * @param text Input text.
 * @param sid Speaker ID for multi-speaker models.
 * @param speed Speech rate. Values > 1 are faster.
 * @param callback Incremental callback. Return 0 to stop generation early.
 * @return Final generated audio, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineTtsGeneratedAudio().
 */
SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
    "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
    SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
        const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
        float speed, SherpaOnnxGeneratedAudioCallback callback);

/**
 * @brief Generate speech with a progress callback.
 *
 * @deprecated Use SherpaOnnxOfflineTtsGenerateWithConfig() instead.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 * @param text Input text.
 * @param sid Speaker ID for multi-speaker models.
 * @param speed Speech rate. Values > 1 are faster.
 * @param callback Progress callback. Return 0 to stop generation early.
 * @return Final generated audio, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineTtsGeneratedAudio().
 *
 * @code
 * int32_t Progress(const float *samples, int32_t n, float p) {
 *   fprintf(stderr, "Progress: %.2f%%\n", p * 100);
 *   return 1;
 * }
 *
 * const SherpaOnnxGeneratedAudio *audio =
 *     SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, 0, 1.0f,
 *                                                      Progress);
 * @endcode
 */
SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
    "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
    SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithProgressCallback(
        const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
        float speed, SherpaOnnxGeneratedAudioProgressCallback callback);

/**
 * @brief Generate speech with a progress callback that receives a user pointer.
 *
 * @deprecated Use SherpaOnnxOfflineTtsGenerateWithConfig() instead.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 * @param text Input text.
 * @param sid Speaker ID for multi-speaker models.
 * @param speed Speech rate. Values > 1 are faster.
 * @param callback Progress callback with user pointer. Return 0 to stop early.
 * @param arg User pointer forwarded to @p callback.
 * @return Final generated audio, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineTtsGeneratedAudio().
 */
SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
    "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
    SherpaOnnxGeneratedAudio
        *SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
            const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
            float speed,
            SherpaOnnxGeneratedAudioProgressCallbackWithArg callback,
            void *arg);

/**
 * @brief Same as SherpaOnnxOfflineTtsGenerateWithCallback() but with a user
 * pointer.
 *
 * @deprecated Use SherpaOnnxOfflineTtsGenerateWithConfig() instead.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 * @param text Input text.
 * @param sid Speaker ID for multi-speaker models.
 * @param speed Speech rate. Values > 1 are faster.
 * @param callback Incremental callback with user pointer.
 * @param arg User pointer forwarded to @p callback.
 * @return Final generated audio, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineTtsGeneratedAudio().
 */
SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
    "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
    SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
        const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
        float speed, SherpaOnnxGeneratedAudioCallbackWithArg callback,
        void *arg);

/**
 * @brief Deprecated ZipVoice-specific generation API.
 *
 * Use SherpaOnnxOfflineTtsGenerateWithConfig() instead.
 */
SHERPA_ONNX_API SHERPA_ONNX_DEPRECATED(
    "Use SherpaOnnxOfflineTtsGenerateWithConfig() instead") const
    SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithZipvoice(
        const SherpaOnnxOfflineTts *tts, const char *text,
        const char *prompt_text, const float *prompt_samples, int32_t n_prompt,
        int32_t prompt_sr, float speed, int32_t num_steps);

/**
 * @brief Generation-time parameters shared by advanced TTS APIs.
 *
 * This struct supports both simple multi-speaker synthesis and more advanced
 * zero-shot or reference-conditioned models.
 *
 * Example for Pocket TTS:
 *
 * @code
 * SherpaOnnxGenerationConfig cfg;
 * memset(&cfg, 0, sizeof(cfg));
 * cfg.speed = 1.0f;
 * cfg.reference_audio = wave->samples;
 * cfg.reference_audio_len = wave->num_samples;
 * cfg.reference_sample_rate = wave->sample_rate;
 * cfg.extra = "{\"max_reference_audio_len\": 10.0, \"seed\": 42}";
 * @endcode
 */
typedef struct SherpaOnnxGenerationConfig {
  /** Silence scale between sentences. */
  float silence_scale;
  /** Speech rate. Used only by models that support it. */
  float speed;
  /** Speaker ID for multi-speaker models. */
  int32_t sid;
  /** Optional reference audio for zero-shot or voice-cloning models. */
  const float *reference_audio;
  /** Length of @c reference_audio in samples. */
  int32_t reference_audio_len;
  /** Sample rate of @c reference_audio. */
  int32_t reference_sample_rate;
  /** Optional reference text associated with @c reference_audio. */
  const char *reference_text;
  /** Optional number of flow-matching steps. */
  int32_t num_steps;
  /** Optional model-specific JSON string with extra key/value pairs. */
  const char *extra;
} SherpaOnnxGenerationConfig;

/**
 * @brief Generate speech using the advanced configuration interface.
 *
 * This is the preferred API for new integrations. It supports callback-based
 * progress reporting and model-specific options such as reference audio.
 *
 * @param tts A pointer returned by SherpaOnnxCreateOfflineTts().
 * @param text Input text.
 * @param config Generation-time configuration.
 * @param callback Optional progress callback with user pointer. Return 0 to
 *                 stop early.
 * @param arg User pointer forwarded to @p callback.
 * @return Generated audio, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineTtsGeneratedAudio().
 *
 * @code
 * SherpaOnnxGenerationConfig cfg;
 * memset(&cfg, 0, sizeof(cfg));
 * cfg.sid = 0;
 * cfg.speed = 1.0f;
 * cfg.silence_scale = 0.2f;
 *
 * const SherpaOnnxGeneratedAudio *audio =
 *     SherpaOnnxOfflineTtsGenerateWithConfig(tts,
 *         "Today as always, men fall into two groups.",
 *         &cfg, NULL, NULL);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *
SherpaOnnxOfflineTtsGenerateWithConfig(
    const SherpaOnnxOfflineTts *tts, const char *text,
    const SherpaOnnxGenerationConfig *config,
    SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg);

/**
 * @brief Destroy audio returned by a TTS generation API.
 *
 * @param p A pointer returned by one of the SherpaOnnxOfflineTtsGenerate*
 *          functions.
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
    const SherpaOnnxGeneratedAudio *p);

/**
 * @brief Write floating-point PCM to a mono 16-bit WAVE file.
 *
 * @param samples Pointer to @p n samples in the range [-1, 1].
 * @param n Number of samples.
 * @param sample_rate Sample rate in Hz.
 * @param filename Output filename.
 * @return 1 on success; 0 on failure.
 *
 * @code
 * SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
 *                     "./generated-kokoro-en.wav");
 * @endcode
 */
SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
                                            int32_t sample_rate,
                                            const char *filename);

/**
 * @brief Return the number of bytes needed for a mono 16-bit WAVE file.
 *
 * @param n_samples Number of PCM samples.
 * @return Required buffer size in bytes.
 */
SHERPA_ONNX_API int64_t SherpaOnnxWaveFileSize(int32_t n_samples);

/**
 * @brief Write a mono 16-bit WAVE file to a caller-provided buffer.
 *
 * Allocate at least SherpaOnnxWaveFileSize(@p n) bytes before calling.
 *
 * @param samples Pointer to @p n samples in the range [-1, 1].
 * @param n Number of samples.
 * @param sample_rate Sample rate in Hz.
 * @param buffer Output buffer.
 */
SHERPA_ONNX_API void SherpaOnnxWriteWaveToBuffer(const float *samples,
                                                 int32_t n, int32_t sample_rate,
                                                 char *buffer);

/**
 * @brief Decoded mono WAVE file content.
 *
 * Free this object with SherpaOnnxFreeWave().
 */
typedef struct SherpaOnnxWave {
  /** Samples normalized to the range [-1, 1]. */
  const float *samples;
  /** Sample rate in Hz. */
  int32_t sample_rate;
  /** Number of samples. */
  int32_t num_samples;
} SherpaOnnxWave;

/**
 * @brief Read a mono 16-bit PCM WAVE file.
 *
 * @param filename Input WAVE filename.
 * @return A newly allocated wave object, or NULL on error. Free it with
 *         SherpaOnnxFreeWave().
 *
 * @code
 * const SherpaOnnxWave *wave = SherpaOnnxReadWave("./Obama.wav");
 * if (wave) {
 *   printf("sample_rate=%d, num_samples=%d\n",
 *          wave->sample_rate, wave->num_samples);
 *   SherpaOnnxFreeWave(wave);
 * }
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename);

/**
 * @brief Read a mono 16-bit PCM WAVE file from binary memory.
 *
 * @param data Pointer to the WAVE file bytes.
 * @param n Size of @p data in bytes.
 * @return A newly allocated wave object, or NULL on error. Free it with
 *         SherpaOnnxFreeWave().
 */
SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWaveFromBinaryData(
    const char *data, int32_t n);

/**
 * @brief Destroy a wave object returned by SherpaOnnxReadWave() or
 * SherpaOnnxReadWaveFromBinaryData().
 */
SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave);

// ============================================================
// For spoken language identification
// ============================================================

/**
 * @brief Whisper-based model files for spoken language identification.
 *
 * Example:
 *
 * @code
 * SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper;
 * memset(&whisper, 0, sizeof(whisper));
 * whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
 * whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
 * @endcode
 */
typedef struct SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
  /** Whisper encoder model. */
  const char *encoder;
  /** Whisper decoder model. */
  const char *decoder;
  /** Optional tail padding in samples appended internally before inference. */
  int32_t tail_paddings;
} SherpaOnnxSpokenLanguageIdentificationWhisperConfig;

/**
 * @brief Configuration for spoken language identification.
 *
 * The current implementation uses Whisper-based models.
 *
 * Example using `sherpa-onnx-whisper-tiny`:
 *
 * @code
 * SherpaOnnxSpokenLanguageIdentificationConfig config;
 * memset(&config, 0, sizeof(config));
 * config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
 * config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
 * config.num_threads = 1;
 * config.provider = "cpu";
 * @endcode
 */
typedef struct SherpaOnnxSpokenLanguageIdentificationConfig {
  /** Whisper model configuration. */
  SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper;
  /** Number of inference threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider such as `"cpu"`. */
  const char *provider;
} SherpaOnnxSpokenLanguageIdentificationConfig;

/** @brief Opaque spoken-language identification handle. */
typedef struct SherpaOnnxSpokenLanguageIdentification
    SherpaOnnxSpokenLanguageIdentification;

/**
 * @brief Create a spoken-language identifier.
 *
 * @param config Spoken-language identification configuration.
 * @return A newly allocated identifier on success, or NULL on error. Free it
 *         with SherpaOnnxDestroySpokenLanguageIdentification().
 */
SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentification *
SherpaOnnxCreateSpokenLanguageIdentification(
    const SherpaOnnxSpokenLanguageIdentificationConfig *config);

/**
 * @brief Destroy a spoken-language identifier.
 *
 * @param slid A pointer returned by
 * SherpaOnnxCreateSpokenLanguageIdentification().
 */
SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentification(
    const SherpaOnnxSpokenLanguageIdentification *slid);

/**
 * @brief Create an offline stream for spoken-language identification.
 *
 * Feed audio to the returned stream with SherpaOnnxAcceptWaveformOffline(), and
 * then call SherpaOnnxSpokenLanguageIdentificationCompute().
 *
 * @param slid A pointer returned by
 * SherpaOnnxCreateSpokenLanguageIdentification().
 * @return A newly allocated offline stream. Free it with
 *         SherpaOnnxDestroyOfflineStream().
 */
SHERPA_ONNX_API SherpaOnnxOfflineStream *
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
    const SherpaOnnxSpokenLanguageIdentification *slid);

/**
 * @brief Result of spoken-language identification.
 *
 * Free this object with SherpaOnnxDestroySpokenLanguageIdentificationResult().
 */
typedef struct SherpaOnnxSpokenLanguageIdentificationResult {
  /**
   * Predicted language code such as `"en"`, `"de"`, `"zh"`, or `"es"`.
   */
  const char *lang;
} SherpaOnnxSpokenLanguageIdentificationResult;

/**
 * @brief Run spoken-language identification on an offline stream.
 *
 * Example:
 *
 * @code
 * SherpaOnnxOfflineStream *stream =
 *     SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);
 * SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
 *                                 wave->num_samples);
 * const SherpaOnnxSpokenLanguageIdentificationResult *result =
 *     SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);
 * printf("lang=%s\n", result->lang);
 * SherpaOnnxDestroySpokenLanguageIdentificationResult(result);
 * SherpaOnnxDestroyOfflineStream(stream);
 * @endcode
 *
 * @param slid A pointer returned by
 * SherpaOnnxCreateSpokenLanguageIdentification().
 * @param s A pointer returned by
 *          SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream().
 * @return A newly allocated result object. Free it with
 *         SherpaOnnxDestroySpokenLanguageIdentificationResult().
 */
SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentificationResult *
SherpaOnnxSpokenLanguageIdentificationCompute(
    const SherpaOnnxSpokenLanguageIdentification *slid,
    const SherpaOnnxOfflineStream *s);

/**
 * @brief Destroy a spoken-language identification result.
 *
 * @param r A pointer returned by
 * SherpaOnnxSpokenLanguageIdentificationCompute().
 */
SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentificationResult(
    const SherpaOnnxSpokenLanguageIdentificationResult *r);

// ============================================================
// For speaker embedding extraction
// ============================================================
/**
 * @brief Configuration for speaker embedding extraction.
 *
 * Example using
 * `3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx`:
 *
 * @code
 * SherpaOnnxSpeakerEmbeddingExtractorConfig config;
 * memset(&config, 0, sizeof(config));
 * config.model = "./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx";
 * config.num_threads = 1;
 * config.provider = "cpu";
 * @endcode
 */
typedef struct SherpaOnnxSpeakerEmbeddingExtractorConfig {
  /** Speaker embedding model file. */
  const char *model;
  /** Number of inference threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider such as `"cpu"`. */
  const char *provider;
} SherpaOnnxSpeakerEmbeddingExtractorConfig;

/** @brief Opaque speaker embedding extractor handle. */
typedef struct SherpaOnnxSpeakerEmbeddingExtractor
    SherpaOnnxSpeakerEmbeddingExtractor;

/**
 * @brief Create a speaker embedding extractor.
 *
 * @param config Speaker embedding extractor configuration.
 * @return A newly allocated extractor on success, or NULL on error. Free it
 *         with SherpaOnnxDestroySpeakerEmbeddingExtractor().
 */
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
SherpaOnnxCreateSpeakerEmbeddingExtractor(
    const SherpaOnnxSpeakerEmbeddingExtractorConfig *config);

/**
 * @brief Destroy a speaker embedding extractor.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingExtractor().
 */
SHERPA_ONNX_API void SherpaOnnxDestroySpeakerEmbeddingExtractor(
    const SherpaOnnxSpeakerEmbeddingExtractor *p);

/**
 * @brief Return the embedding dimension produced by the extractor.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingExtractor().
 * @return Embedding dimension.
 */
SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingExtractorDim(
    const SherpaOnnxSpeakerEmbeddingExtractor *p);

/**
 * @brief Create a streaming feature buffer for embedding extraction.
 *
 * Feed samples with SherpaOnnxOnlineStreamAcceptWaveform(), then call
 * SherpaOnnxSpeakerEmbeddingExtractorIsReady() and
 * SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding().
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingExtractor().
 * @return A newly allocated online stream. Free it with
 *         SherpaOnnxDestroyOnlineStream().
 */
SHERPA_ONNX_API const SherpaOnnxOnlineStream *
SherpaOnnxSpeakerEmbeddingExtractorCreateStream(
    const SherpaOnnxSpeakerEmbeddingExtractor *p);

/**
 * @brief Check whether enough audio has been provided to compute an embedding.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingExtractor().
 * @param s A pointer returned by
 * SherpaOnnxSpeakerEmbeddingExtractorCreateStream().
 * @return 1 if the stream is ready; otherwise 0.
 */
SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingExtractorIsReady(
    const SherpaOnnxSpeakerEmbeddingExtractor *p,
    const SherpaOnnxOnlineStream *s);

/**
 * @brief Compute the embedding for a stream.
 *
 * The returned vector has `SherpaOnnxSpeakerEmbeddingExtractorDim(p)` elements.
 * Free it with SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding().
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingExtractor().
 * @param s A pointer returned by
 * SherpaOnnxSpeakerEmbeddingExtractorCreateStream().
 * @return A newly allocated embedding vector.
 *
 * @code
 * const SherpaOnnxOnlineStream *stream =
 *     SherpaOnnxSpeakerEmbeddingExtractorCreateStream(ex);
 * SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
 * wave->samples, wave->num_samples);
 * SherpaOnnxOnlineStreamInputFinished(stream);
 * if (SherpaOnnxSpeakerEmbeddingExtractorIsReady(ex, stream)) {
 *   const float *v =
 *       SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(ex, stream);
 *   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v);
 * }
 * SherpaOnnxDestroyOnlineStream(stream);
 * @endcode
 */
SHERPA_ONNX_API const float *
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(
    const SherpaOnnxSpeakerEmbeddingExtractor *p,
    const SherpaOnnxOnlineStream *s);

/**
 * @brief Destroy an embedding vector returned by
 * SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding().
 *
 * @param v A pointer returned by
 *          SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding().
 */
SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
    const float *v);

/** @brief Opaque speaker embedding manager handle. */
typedef struct SherpaOnnxSpeakerEmbeddingManager
    SherpaOnnxSpeakerEmbeddingManager;

/**
 * @brief Create a speaker embedding manager.
 *
 * The manager stores enrolled speaker embeddings and supports speaker search
 * and verification.
 *
 * @param dim Embedding dimension. This should match
 *            SherpaOnnxSpeakerEmbeddingExtractorDim().
 * @return A newly allocated manager. Free it with
 *         SherpaOnnxDestroySpeakerEmbeddingManager().
 */
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingManager *
SherpaOnnxCreateSpeakerEmbeddingManager(int32_t dim);

/**
 * @brief Destroy a speaker embedding manager.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 */
SHERPA_ONNX_API void SherpaOnnxDestroySpeakerEmbeddingManager(
    const SherpaOnnxSpeakerEmbeddingManager *p);

/**
 * @brief Add one enrollment embedding for a speaker.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @param name Speaker name.
 * @param v Embedding vector with exactly `dim` elements.
 * @return 1 on success; 0 on error.
 */
SHERPA_ONNX_API int32_t
SherpaOnnxSpeakerEmbeddingManagerAdd(const SherpaOnnxSpeakerEmbeddingManager *p,
                                     const char *name, const float *v);

/**
 * @brief Add multiple enrollment embeddings for one speaker.
 *
 * @p v is a NULL-terminated array of embedding pointers:
 * `v[0]`, `v[1]`, ..., `v[n - 1]`, followed by `v[n] == NULL`.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @param name Speaker name.
 * @param v NULL-terminated array of embedding pointers.
 * @return 1 on success; 0 on error.
 *
 * @code
 * const float *spk1_vec[4] = {e1, e2, e3, NULL};
 * SherpaOnnxSpeakerEmbeddingManagerAddList(manager, "fangjun", spk1_vec);
 * @endcode
 */
SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerAddList(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
    const float **v);

/**
 * @brief Add multiple enrollment embeddings packed in one flat array.
 *
 * The input contains @p n embeddings laid out consecutively, so the total
 * array length must be `n * dim`.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @param name Speaker name.
 * @param v Flattened embedding array.
 * @param n Number of embeddings in @p v.
 * @return 1 on success; 0 on error.
 */
SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
    const float *v, int32_t n);

/**
 * @brief Remove a speaker from the manager.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @param name Speaker name to remove.
 * @return 1 if removed; otherwise 0. Returns 0 if the speaker does not exist.
 */
SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerRemove(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name);

/**
 * @brief Search for the best matching enrolled speaker.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @param v Query embedding vector.
 * @param threshold Minimum similarity threshold in the range [0, 1].
 * @return A newly allocated speaker name on match, or NULL if no speaker
 *         passes the threshold. Free the returned name with
 *         SherpaOnnxSpeakerEmbeddingManagerFreeSearch().
 */
SHERPA_ONNX_API const char *SherpaOnnxSpeakerEmbeddingManagerSearch(
    const SherpaOnnxSpeakerEmbeddingManager *p, const float *v,
    float threshold);

/**
 * @brief Free a string returned by SherpaOnnxSpeakerEmbeddingManagerSearch().
 *
 * @param name A pointer returned by
 *             SherpaOnnxSpeakerEmbeddingManagerSearch().
 */
SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(
    const char *name);

/**
 * @brief One speaker match returned by the best-matches API.
 */
typedef struct SherpaOnnxSpeakerEmbeddingManagerSpeakerMatch {
  /** Similarity score. Larger means more similar. */
  float score;
  /** Speaker name. */
  const char *name;
} SherpaOnnxSpeakerEmbeddingManagerSpeakerMatch;

/**
 * @brief Collection of best speaker matches.
 *
 * Free this object with SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches().
 */
typedef struct SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult {
  /** Pointer to an array of @c count matches. */
  const SherpaOnnxSpeakerEmbeddingManagerSpeakerMatch *matches;
  /** Number of valid entries in @c matches. */
  int32_t count;
} SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult;

/**
 * @brief Return up to @p n best matches above a similarity threshold.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @param v Query embedding vector.
 * @param threshold Minimum similarity threshold in the range [0, 1].
 * @param n Maximum number of matches to return.
 * @return A newly allocated result object, or NULL if no matches are found.
 *         Free it with SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches().
 */
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult *
SherpaOnnxSpeakerEmbeddingManagerGetBestMatches(
    const SherpaOnnxSpeakerEmbeddingManager *p, const float *v, float threshold,
    int32_t n);

/**
 * @brief Destroy a best-matches result.
 *
 * @param r A pointer returned by
 * SherpaOnnxSpeakerEmbeddingManagerGetBestMatches().
 */
SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches(
    const SherpaOnnxSpeakerEmbeddingManagerBestMatchesResult *r);

/**
 * @brief Verify whether a query embedding matches a named speaker.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @param name Speaker name to compare against.
 * @param v Query embedding vector.
 * @param threshold Minimum similarity threshold in the range [0, 1].
 * @return 1 if the speaker matches; otherwise 0.
 */
SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerVerify(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
    const float *v, float threshold);

/**
 * @brief Check whether a speaker is enrolled.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @param name Speaker name.
 * @return 1 if the speaker exists; otherwise 0.
 */
SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerContains(
    const SherpaOnnxSpeakerEmbeddingManager *p, const char *name);

/**
 * @brief Return the number of enrolled speakers.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @return Number of enrolled speakers.
 */
SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(
    const SherpaOnnxSpeakerEmbeddingManager *p);

/**
 * @brief Return all enrolled speaker names.
 *
 * The returned array is NULL-terminated. If no speakers are enrolled, the
 * returned array still exists and its first element is NULL.
 *
 * @param p A pointer returned by SherpaOnnxCreateSpeakerEmbeddingManager().
 * @return A newly allocated NULL-terminated array of speaker names. Free it
 *         with SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers().
 */
SHERPA_ONNX_API const char *const *
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(
    const SherpaOnnxSpeakerEmbeddingManager *p);

/**
 * @brief Free an array returned by
 * SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers().
 *
 * @param names A pointer returned by
 * SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers().
 */
SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
    const char *const *names);

// ============================================================
// For audio tagging
// ============================================================
/** @brief Zipformer audio-tagging model configuration. */
typedef struct SherpaOnnxOfflineZipformerAudioTaggingModelConfig {
  /** Model filename. */
  const char *model;
} SherpaOnnxOfflineZipformerAudioTaggingModelConfig;

/**
 * @brief Audio-tagging model configuration.
 *
 * Configure exactly one model family. If multiple model families are provided,
 * one of them will be used and the choice is implementation-defined.
 *
 * Example using
 * `sherpa-onnx-zipformer-audio-tagging-2024-04-09`:
 *
 * @code
 * SherpaOnnxAudioTaggingModelConfig model;
 * memset(&model, 0, sizeof(model));
 * model.zipformer.model =
 *     "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx";
 * model.num_threads = 1;
 * model.provider = "cpu";
 * @endcode
 */
typedef struct SherpaOnnxAudioTaggingModelConfig {
  /** Zipformer model configuration. */
  SherpaOnnxOfflineZipformerAudioTaggingModelConfig zipformer;
  /** Alternative CED model file. */
  const char *ced;
  /** Number of inference threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider such as `"cpu"`. */
  const char *provider;
} SherpaOnnxAudioTaggingModelConfig;

/**
 * @brief Configuration for audio tagging.
 *
 * @code
 * SherpaOnnxAudioTaggingConfig config;
 * memset(&config, 0, sizeof(config));
 * config.model.zipformer.model =
 *     "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx";
 * config.model.num_threads = 1;
 * config.model.provider = "cpu";
 * config.labels =
 *     "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv";
 * config.top_k = 5;
 * @endcode
 */
typedef struct SherpaOnnxAudioTaggingConfig {
  /** Acoustic model configuration. */
  SherpaOnnxAudioTaggingModelConfig model;
  /** CSV file containing class labels. */
  const char *labels;
  /** Default number of results to return when `top_k == -1` at inference time.
   */
  int32_t top_k;
} SherpaOnnxAudioTaggingConfig;

/**
 * @brief One audio-tagging prediction.
 */
typedef struct SherpaOnnxAudioEvent {
  /** Event label. */
  const char *name;
  /** Integer label index. */
  int32_t index;
  /** Probability or confidence score. */
  float prob;
} SherpaOnnxAudioEvent;

/** @brief Opaque audio tagger handle. */
typedef struct SherpaOnnxAudioTagging SherpaOnnxAudioTagging;

/**
 * @brief Create an audio tagger.
 *
 * @param config Audio-tagging configuration.
 * @return A newly allocated audio tagger on success, or NULL on error. Free it
 *         with SherpaOnnxDestroyAudioTagging().
 */
SHERPA_ONNX_API const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging(
    const SherpaOnnxAudioTaggingConfig *config);

/**
 * @brief Destroy an audio tagger.
 *
 * @param tagger A pointer returned by SherpaOnnxCreateAudioTagging().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyAudioTagging(
    const SherpaOnnxAudioTagging *tagger);

/**
 * @brief Create an offline stream for audio tagging.
 *
 * @param tagger A pointer returned by SherpaOnnxCreateAudioTagging().
 * @return A newly allocated offline stream. Free it with
 *         SherpaOnnxDestroyOfflineStream().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineStream *
SherpaOnnxAudioTaggingCreateOfflineStream(const SherpaOnnxAudioTagging *tagger);

/**
 * @brief Run audio tagging on an offline stream.
 *
 * The returned array is NULL-terminated. If @p top_k is -1, the value stored in
 * `config.top_k` is used instead.
 *
 * @param tagger A pointer returned by SherpaOnnxCreateAudioTagging().
 * @param s A pointer returned by SherpaOnnxAudioTaggingCreateOfflineStream().
 * @param top_k Number of top results to return, or -1 to use the configured
 *              default.
 * @return A newly allocated NULL-terminated array of result pointers ordered by
 *         descending probability. Free it with
 *         SherpaOnnxAudioTaggingFreeResults().
 *
 * @code
 * const SherpaOnnxAudioEvent *const *results =
 *     SherpaOnnxAudioTaggingCompute(tagger, stream, 5);
 * for (int32_t i = 0; results[i] != NULL; ++i) {
 *   printf("%d %.3f %s\n", results[i]->index, results[i]->prob,
 *          results[i]->name);
 * }
 * SherpaOnnxAudioTaggingFreeResults(results);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxAudioEvent *const *
SherpaOnnxAudioTaggingCompute(const SherpaOnnxAudioTagging *tagger,
                              const SherpaOnnxOfflineStream *s, int32_t top_k);

/**
 * @brief Destroy results returned by SherpaOnnxAudioTaggingCompute().
 *
 * @param p A pointer returned by SherpaOnnxAudioTaggingCompute().
 */
SHERPA_ONNX_API void SherpaOnnxAudioTaggingFreeResults(
    const SherpaOnnxAudioEvent *const *p);

// ============================================================
// For punctuation
// ============================================================

/**
 * @brief Offline punctuation model configuration.
 *
 * Example:
 *
 * @code
 * SherpaOnnxOfflinePunctuationModelConfig model;
 * memset(&model, 0, sizeof(model));
 * model.ct_transformer =
 *     "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
 * model.num_threads = 1;
 * model.provider = "cpu";
 * @endcode
 */
typedef struct SherpaOnnxOfflinePunctuationModelConfig {
  /** Offline punctuation model file. */
  const char *ct_transformer;
  /** Number of inference threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider such as `"cpu"`. */
  const char *provider;
} SherpaOnnxOfflinePunctuationModelConfig;

/** @brief Configuration for offline punctuation. */
typedef struct SherpaOnnxOfflinePunctuationConfig {
  /** Model configuration. */
  SherpaOnnxOfflinePunctuationModelConfig model;
} SherpaOnnxOfflinePunctuationConfig;

/** @brief Opaque offline punctuation handle. */
typedef struct SherpaOnnxOfflinePunctuation SherpaOnnxOfflinePunctuation;

/**
 * @brief Create an offline punctuation processor.
 *
 * @param config Offline punctuation configuration.
 * @return A newly allocated punctuation processor on success, or NULL on
 *         error. Free it with SherpaOnnxDestroyOfflinePunctuation().
 */
SHERPA_ONNX_API const SherpaOnnxOfflinePunctuation *
SherpaOnnxCreateOfflinePunctuation(
    const SherpaOnnxOfflinePunctuationConfig *config);

/**
 * @brief Destroy an offline punctuation processor.
 *
 * @param punct A pointer returned by SherpaOnnxCreateOfflinePunctuation().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflinePunctuation(
    const SherpaOnnxOfflinePunctuation *punct);

/**
 * @brief Add punctuation to a complete input text.
 *
 * @param punct A pointer returned by SherpaOnnxCreateOfflinePunctuation().
 * @param text Input text without punctuation.
 * @return A newly allocated punctuated string. Free it with
 *         SherpaOfflinePunctuationFreeText().
 */
SHERPA_ONNX_API const char *SherpaOfflinePunctuationAddPunct(
    const SherpaOnnxOfflinePunctuation *punct, const char *text);

/**
 * @brief Free a string returned by SherpaOfflinePunctuationAddPunct().
 *
 * @param text A pointer returned by SherpaOfflinePunctuationAddPunct().
 */
SHERPA_ONNX_API void SherpaOfflinePunctuationFreeText(const char *text);

/**
 * @brief Online punctuation model configuration.
 *
 * Example using `sherpa-onnx-online-punct-en-2024-08-06`:
 *
 * @code
 * SherpaOnnxOnlinePunctuationModelConfig model;
 * memset(&model, 0, sizeof(model));
 * model.cnn_bilstm =
 * "./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx"; model.bpe_vocab =
 * "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab"; model.num_threads = 1;
 * model.provider = "cpu";
 * @endcode
 */
typedef struct SherpaOnnxOnlinePunctuationModelConfig {
  /** Online punctuation model file. */
  const char *cnn_bilstm;
  /** BPE vocabulary used by the model. */
  const char *bpe_vocab;
  /** Number of inference threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider such as `"cpu"`. */
  const char *provider;
} SherpaOnnxOnlinePunctuationModelConfig;

/** @brief Configuration for online punctuation. */
typedef struct SherpaOnnxOnlinePunctuationConfig {
  /** Model configuration. */
  SherpaOnnxOnlinePunctuationModelConfig model;
} SherpaOnnxOnlinePunctuationConfig;

/** @brief Opaque online punctuation handle. */
typedef struct SherpaOnnxOnlinePunctuation SherpaOnnxOnlinePunctuation;

/**
 * @brief Create an online punctuation processor.
 *
 * @param config Online punctuation configuration.
 * @return A newly allocated punctuation processor on success, or NULL on
 *         error. Free it with SherpaOnnxDestroyOnlinePunctuation().
 */
SHERPA_ONNX_API const SherpaOnnxOnlinePunctuation *
SherpaOnnxCreateOnlinePunctuation(
    const SherpaOnnxOnlinePunctuationConfig *config);

/**
 * @brief Destroy an online punctuation processor.
 *
 * @param punctuation A pointer returned by SherpaOnnxCreateOnlinePunctuation().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOnlinePunctuation(
    const SherpaOnnxOnlinePunctuation *punctuation);

/**
 * @brief Add punctuation to one text chunk using the online punctuation model.
 *
 * @param punctuation A pointer returned by SherpaOnnxCreateOnlinePunctuation().
 * @param text Input text chunk.
 * @return A newly allocated punctuated string. Free it with
 *         SherpaOnnxOnlinePunctuationFreeText().
 *
 * @code
 * const char *out =
 *     SherpaOnnxOnlinePunctuationAddPunct(punct,
 *         "how are you i am fine thank you");
 * printf("%s\n", out);
 * SherpaOnnxOnlinePunctuationFreeText(out);
 * @endcode
 */
SHERPA_ONNX_API const char *SherpaOnnxOnlinePunctuationAddPunct(
    const SherpaOnnxOnlinePunctuation *punctuation, const char *text);

/**
 * @brief Free a string returned by SherpaOnnxOnlinePunctuationAddPunct().
 *
 * @param text A pointer returned by SherpaOnnxOnlinePunctuationAddPunct().
 */
SHERPA_ONNX_API void SherpaOnnxOnlinePunctuationFreeText(const char *text);

// For resampling
/** @brief Opaque linear resampler handle. */
typedef struct SherpaOnnxLinearResampler SherpaOnnxLinearResampler;

/**
 * @brief Create a linear resampler.
 *
 * A common choice is:
 *
 * @code
 * float min_freq = samp_rate_in_hz < samp_rate_out_hz ? samp_rate_in_hz
 *                                                 : samp_rate_out_hz;
 * float filter_cutoff_hz = 0.99f * 0.5f * min_freq;
 * int32_t num_zeros = 6;
 * @endcode
 *
 * @param samp_rate_in_hz Input sample rate in Hz.
 * @param samp_rate_out_hz Output sample rate in Hz.
 * @param filter_cutoff_hz Low-pass cutoff frequency in Hz.
 * @param num_zeros Low-pass filter width control parameter.
 * @return A newly allocated resampler. Free it with
 *         SherpaOnnxDestroyLinearResampler().
 */
SHERPA_ONNX_API const SherpaOnnxLinearResampler *
SherpaOnnxCreateLinearResampler(int32_t samp_rate_in_hz,
                                int32_t samp_rate_out_hz,
                                float filter_cutoff_hz, int32_t num_zeros);

/**
 * @brief Destroy a linear resampler.
 *
 * @param p A pointer returned by SherpaOnnxCreateLinearResampler().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyLinearResampler(
    const SherpaOnnxLinearResampler *p);

/**
 * @brief Reset a linear resampler to its initial state.
 *
 * @param p A pointer returned by SherpaOnnxCreateLinearResampler().
 */
SHERPA_ONNX_API void SherpaOnnxLinearResamplerReset(
    const SherpaOnnxLinearResampler *p);

/**
 * @brief Output chunk returned by SherpaOnnxLinearResamplerResample().
 *
 * Free this object with SherpaOnnxLinearResamplerResampleFree().
 */
typedef struct SherpaOnnxResampleOut {
  /** Output samples. */
  const float *samples;
  /** Number of output samples. */
  int32_t n;
} SherpaOnnxResampleOut;

/**
 * @brief Resample one chunk of input audio.
 *
 * Set @p flush to 1 for the final chunk so buffered samples are emitted.
 *
 * @param p A pointer returned by SherpaOnnxCreateLinearResampler().
 * @param input Input sample array.
 * @param input_dim Number of input samples.
 * @param flush 1 if this is the final chunk; otherwise 0.
 * @return A newly allocated output chunk. Free it with
 *         SherpaOnnxLinearResamplerResampleFree().
 */
SHERPA_ONNX_API const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample(
    const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim,
    int32_t flush);

/**
 * @brief Destroy a resampler output chunk.
 *
 * @param p A pointer returned by SherpaOnnxLinearResamplerResample().
 */
SHERPA_ONNX_API void SherpaOnnxLinearResamplerResampleFree(
    const SherpaOnnxResampleOut *p);

/**
 * @brief Return the resampler input sample rate.
 *
 * @param p A pointer returned by SherpaOnnxCreateLinearResampler().
 * @return Input sample rate in Hz.
 */
SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate(
    const SherpaOnnxLinearResampler *p);

/**
 * @brief Return the resampler output sample rate.
 *
 * @param p A pointer returned by SherpaOnnxCreateLinearResampler().
 * @return Output sample rate in Hz.
 */
SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
    const SherpaOnnxLinearResampler *p);

// =========================================================================
// For offline speaker diarization (i.e., non-streaming speaker diarization)
// =========================================================================
/** @brief Pyannote speaker-segmentation model configuration. */
typedef struct SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig {
  /** Segmentation model filename. */
  const char *model;
} SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig;

/**
 * @brief Segmentation model configuration for offline speaker diarization.
 *
 * Configure exactly one model family. If multiple model families are provided,
 * one is chosen and the choice is implementation-defined.
 */
typedef struct SherpaOnnxOfflineSpeakerSegmentationModelConfig {
  /** Pyannote segmentation model configuration. */
  SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote;
  /** Number of inference threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider such as `"cpu"`. */
  const char *provider;
} SherpaOnnxOfflineSpeakerSegmentationModelConfig;

/**
 * @brief Fast clustering configuration.
 *
 * If @c num_clusters is greater than 0, @c threshold is ignored. When the
 * number of speakers is known in advance, setting @c num_clusters is strongly
 * recommended.
 */
typedef struct SherpaOnnxFastClusteringConfig {
  /** Known number of speakers. If > 0, threshold-based clustering is bypassed.
   */
  int32_t num_clusters;
  /** Distance threshold used when the number of speakers is unknown. */
  float threshold;
} SherpaOnnxFastClusteringConfig;

/**
 * @brief Configuration for offline speaker diarization.
 *
 * Example based on `offline-sepaker-diarization-c-api.c`:
 *
 * @code
 * SherpaOnnxOfflineSpeakerDiarizationConfig config;
 * memset(&config, 0, sizeof(config));
 * config.segmentation.pyannote.model =
 *     "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
 * config.embedding.model =
 *     "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
 * config.clustering.num_clusters = 4;
 * @endcode
 */
typedef struct SherpaOnnxOfflineSpeakerDiarizationConfig {
  /** Speaker segmentation model configuration. */
  SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation;
  /** Speaker embedding extractor configuration. */
  SherpaOnnxSpeakerEmbeddingExtractorConfig embedding;
  /** Clustering configuration. */
  SherpaOnnxFastClusteringConfig clustering;
  /** Segments shorter than this duration in seconds are discarded. */
  float min_duration_on;
  /** Small gaps shorter than this duration in seconds may be merged. */
  float min_duration_off;
} SherpaOnnxOfflineSpeakerDiarizationConfig;

/** @brief Opaque offline speaker diarization handle. */
typedef struct SherpaOnnxOfflineSpeakerDiarization
    SherpaOnnxOfflineSpeakerDiarization;

/**
 * @brief Create an offline speaker diarization pipeline.
 *
 * @param config Offline speaker diarization configuration.
 * @return A newly allocated diarizer on success, or NULL on error. Free it
 *         with SherpaOnnxDestroyOfflineSpeakerDiarization().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarization(
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config);

/**
 * @brief Destroy an offline speaker diarizer.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeakerDiarization().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineSpeakerDiarization(
    const SherpaOnnxOfflineSpeakerDiarization *sd);

/**
 * @brief Return the expected input sample rate.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeakerDiarization().
 * @return Required input sample rate in Hz.
 */
SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(
    const SherpaOnnxOfflineSpeakerDiarization *sd);

/**
 * @brief Update clustering-related settings of an existing diarizer.
 *
 * Only `config->clustering` is used. Other fields are ignored.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeakerDiarization().
 * @param config Configuration whose `clustering` field will be applied.
 */
SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationSetConfig(
    const SherpaOnnxOfflineSpeakerDiarization *sd,
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config);

/** @brief Opaque offline speaker diarization result. */
typedef struct SherpaOnnxOfflineSpeakerDiarizationResult
    SherpaOnnxOfflineSpeakerDiarizationResult;

/**
 * @brief One diarization segment.
 */
typedef struct SherpaOnnxOfflineSpeakerDiarizationSegment {
  /** Segment start time in seconds. */
  float start;
  /** Segment end time in seconds. */
  float end;
  /** Speaker label, typically an integer cluster ID. */
  int32_t speaker;
} SherpaOnnxOfflineSpeakerDiarizationSegment;

/**
 * @brief Return the number of speakers in a diarization result.
 *
 * @param r A pointer returned by one of the
 *          SherpaOnnxOfflineSpeakerDiarizationProcess*() functions.
 * @return Number of speaker clusters.
 */
SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r);

/**
 * @brief Return the number of diarization segments.
 *
 * @param r A pointer returned by one of the
 *          SherpaOnnxOfflineSpeakerDiarizationProcess*() functions.
 * @return Number of segments.
 */
SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r);

/**
 * @brief Return segments sorted by start time.
 *
 * The returned array contains exactly
 * SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments() entries.
 *
 * @param r A pointer returned by one of the
 *          SherpaOnnxOfflineSpeakerDiarizationProcess*() functions.
 * @return A newly allocated segment array. Free it with
 *         SherpaOnnxOfflineSpeakerDiarizationDestroySegment().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationSegment *
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r);

/**
 * @brief Destroy a segment array returned by
 * SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime().
 *
 * @param s A pointer returned by
 *          SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime().
 */
SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
    const SherpaOnnxOfflineSpeakerDiarizationSegment *s);

/**
 * @brief Progress callback for offline speaker diarization.
 *
 * The current implementation reports progress but ignores the callback's
 * return value.
 */
typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)(
    int32_t num_processed_chunks, int32_t num_total_chunks, void *arg);

/**
 * @brief Same as SherpaOnnxOfflineSpeakerDiarizationProgressCallback but
 * without a user pointer.
 */
typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)(
    int32_t num_processed_chunks, int32_t num_total_chunks);

/**
 * @brief Run offline speaker diarization.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeakerDiarization().
 * @param samples Input mono PCM samples normalized to [-1, 1].
 * @param n Number of input samples.
 * @return A newly allocated diarization result. Free it with
 *         SherpaOnnxOfflineSpeakerDiarizationDestroyResult().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcess(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n);

/**
 * @brief Run offline speaker diarization with a progress callback.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeakerDiarization().
 * @param samples Input mono PCM samples normalized to [-1, 1].
 * @param n Number of input samples.
 * @param callback Progress callback.
 * @param arg User pointer forwarded to @p callback.
 * @return A newly allocated diarization result. Free it with
 *         SherpaOnnxOfflineSpeakerDiarizationDestroyResult().
 *
 * @code
 * static int32_t ProgressCallback(int32_t done, int32_t total, void *arg) {
 *   fprintf(stderr, "progress %.2f%%\n", 100.0f * done / total);
 *   return 0;
 * }
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback,
    void *arg);

/**
 * @brief Run offline speaker diarization with a progress callback that has no
 * user pointer.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeakerDiarization().
 * @param samples Input mono PCM samples normalized to [-1, 1].
 * @param n Number of input samples.
 * @param callback Progress callback.
 * @return A newly allocated diarization result. Free it with
 *         SherpaOnnxOfflineSpeakerDiarizationDestroyResult().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
    const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
    int32_t n,
    SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback);

/**
 * @brief Destroy a diarization result.
 *
 * @param r A pointer returned by one of the
 *          SherpaOnnxOfflineSpeakerDiarizationProcess*() functions.
 */
SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
    const SherpaOnnxOfflineSpeakerDiarizationResult *r);

// =========================================================================
// For offline speech enhancement
// =========================================================================
/** @brief GTCRN offline denoiser model configuration. */
typedef struct SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig {
  /** Model filename. */
  const char *model;
} SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig;

/** @brief DPDFNet offline denoiser model configuration. */
typedef struct SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig {
  /** Model filename. */
  const char *model;
} SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig;

/**
 * @brief Speech denoiser model configuration shared by offline and online APIs.
 *
 * Configure exactly one model family. If multiple model families are provided,
 * one is chosen and the choice is implementation-defined.
 */
typedef struct SherpaOnnxOfflineSpeechDenoiserModelConfig {
  /** GTCRN model configuration. */
  SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn;
  /** Number of inference threads. */
  int32_t num_threads;
  /** Non-zero to print debug information. */
  int32_t debug;
  /** Execution provider such as `"cpu"`. */
  const char *provider;
  /** DPDFNet model configuration. */
  SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig dpdfnet;
} SherpaOnnxOfflineSpeechDenoiserModelConfig;

/** @brief Configuration for offline speech denoising. */
typedef struct SherpaOnnxOfflineSpeechDenoiserConfig {
  /** Model configuration. */
  SherpaOnnxOfflineSpeechDenoiserModelConfig model;
} SherpaOnnxOfflineSpeechDenoiserConfig;

/** @brief Opaque offline speech denoiser handle. */
typedef struct SherpaOnnxOfflineSpeechDenoiser SherpaOnnxOfflineSpeechDenoiser;

/**
 * @brief Create an offline speech denoiser.
 *
 * Example using `gtcrn_simple.onnx`:
 *
 * @code
 * SherpaOnnxOfflineSpeechDenoiserConfig config;
 * memset(&config, 0, sizeof(config));
 * config.model.gtcrn.model = "./gtcrn_simple.onnx";
 * @endcode
 *
 * @param config Offline denoiser configuration.
 * @return A newly allocated denoiser on success, or NULL on error. Free it
 *         with SherpaOnnxDestroyOfflineSpeechDenoiser().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser *
SherpaOnnxCreateOfflineSpeechDenoiser(
    const SherpaOnnxOfflineSpeechDenoiserConfig *config);

/**
 * @brief Destroy an offline speech denoiser.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeechDenoiser().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineSpeechDenoiser(
    const SherpaOnnxOfflineSpeechDenoiser *sd);

/**
 * @brief Return the expected sample rate for the denoiser.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeechDenoiser().
 * @return Required input sample rate in Hz.
 */
SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(
    const SherpaOnnxOfflineSpeechDenoiser *sd);

/**
 * @brief Denoised audio returned by offline or online speech enhancement APIs.
 *
 * Free this object with SherpaOnnxDestroyDenoisedAudio().
 */
typedef struct SherpaOnnxDenoisedAudio {
  /** Output samples in the range [-1, 1]. */
  const float *samples;
  /** Number of output samples. */
  int32_t n;
  /** Output sample rate in Hz. */
  int32_t sample_rate;
} SherpaOnnxDenoisedAudio;

/**
 * @brief Run offline speech denoising on a complete waveform.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOfflineSpeechDenoiser().
 * @param samples Input mono PCM samples normalized to [-1, 1].
 * @param n Number of input samples.
 * @param sample_rate Input sample rate in Hz.
 * @return A newly allocated denoised waveform. Free it with
 *         SherpaOnnxDestroyDenoisedAudio().
 *
 * @code
 * const SherpaOnnxDenoisedAudio *denoised =
 *     SherpaOnnxOfflineSpeechDenoiserRun(sd, wave->samples, wave->num_samples,
 *                                        wave->sample_rate);
 * SherpaOnnxWriteWave(denoised->samples, denoised->n, denoised->sample_rate,
 *                     "./enhanced.wav");
 * SherpaOnnxDestroyDenoisedAudio(denoised);
 * @endcode
 */
SHERPA_ONNX_API const SherpaOnnxDenoisedAudio *
SherpaOnnxOfflineSpeechDenoiserRun(const SherpaOnnxOfflineSpeechDenoiser *sd,
                                   const float *samples, int32_t n,
                                   int32_t sample_rate);

/**
 * @brief Destroy denoised audio returned by a speech enhancement API.
 *
 * @param p A pointer returned by SherpaOnnxOfflineSpeechDenoiserRun(),
 *          SherpaOnnxOnlineSpeechDenoiserRun(), or
 *          SherpaOnnxOnlineSpeechDenoiserFlush().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyDenoisedAudio(
    const SherpaOnnxDenoisedAudio *p);

// =========================================================================
// For streaming speech enhancement
// =========================================================================
/** @brief Configuration for streaming speech denoising. */
typedef struct SherpaOnnxOnlineSpeechDenoiserConfig {
  /** Model configuration. */
  SherpaOnnxOfflineSpeechDenoiserModelConfig model;
} SherpaOnnxOnlineSpeechDenoiserConfig;

/** @brief Opaque online speech denoiser handle. */
typedef struct SherpaOnnxOnlineSpeechDenoiser SherpaOnnxOnlineSpeechDenoiser;

/**
 * @brief Create an online speech denoiser.
 *
 * @param config Online denoiser configuration.
 * @return A newly allocated denoiser on success, or NULL on error. Free it
 *         with SherpaOnnxDestroyOnlineSpeechDenoiser().
 */
SHERPA_ONNX_API const SherpaOnnxOnlineSpeechDenoiser *
SherpaOnnxCreateOnlineSpeechDenoiser(
    const SherpaOnnxOnlineSpeechDenoiserConfig *config);

/**
 * @brief Destroy an online speech denoiser.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOnlineSpeechDenoiser().
 */
SHERPA_ONNX_API void SherpaOnnxDestroyOnlineSpeechDenoiser(
    const SherpaOnnxOnlineSpeechDenoiser *sd);

/**
 * @brief Return the expected input sample rate for the online denoiser.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOnlineSpeechDenoiser().
 * @return Required input sample rate in Hz.
 */
SHERPA_ONNX_API int32_t SherpaOnnxOnlineSpeechDenoiserGetSampleRate(
    const SherpaOnnxOnlineSpeechDenoiser *sd);

/**
 * @brief Return the recommended chunk size in samples for streaming input.
 *
 * Example programs feed audio to the online denoiser in this chunk size.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOnlineSpeechDenoiser().
 * @return Frame shift in samples.
 */
SHERPA_ONNX_API int32_t SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(
    const SherpaOnnxOnlineSpeechDenoiser *sd);

/**
 * @brief Process one chunk of streaming audio.
 *
 * This function is not thread-safe. It may return NULL when not enough input
 * has been accumulated to produce denoised output yet.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOnlineSpeechDenoiser().
 * @param samples Input chunk normalized to [-1, 1].
 * @param n Number of input samples.
 * @param sample_rate Input sample rate in Hz.
 * @return A newly allocated denoised chunk, or NULL if no output is available
 *         yet. Free non-NULL results with SherpaOnnxDestroyDenoisedAudio().
 */
SHERPA_ONNX_API const SherpaOnnxDenoisedAudio *
SherpaOnnxOnlineSpeechDenoiserRun(const SherpaOnnxOnlineSpeechDenoiser *sd,
                                  const float *samples, int32_t n,
                                  int32_t sample_rate);

/**
 * @brief Flush buffered samples and reset the online denoiser.
 *
 * This also resets the denoiser so it can be reused for a new utterance.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOnlineSpeechDenoiser().
 * @return A newly allocated denoised chunk, or NULL if no buffered output
 *         remains. Free non-NULL results with SherpaOnnxDestroyDenoisedAudio().
 */
SHERPA_ONNX_API const SherpaOnnxDenoisedAudio *
SherpaOnnxOnlineSpeechDenoiserFlush(const SherpaOnnxOnlineSpeechDenoiser *sd);

/**
 * @brief Reset an online denoiser so it can process a new stream.
 *
 * @param sd A pointer returned by SherpaOnnxCreateOnlineSpeechDenoiser().
 */
SHERPA_ONNX_API void SherpaOnnxOnlineSpeechDenoiserReset(
    const SherpaOnnxOnlineSpeechDenoiser *sd);

#ifdef __OHOS__

/**
 * @brief HarmonyOS native resource manager type.
 *
 * Pass the resource manager provided by the HarmonyOS application runtime when
 * using the `*OHOS()` constructors below.
 */
typedef struct NativeResourceManager NativeResourceManager;

/**
 * @brief Create an offline speech denoiser on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateOfflineSpeechDenoiser().
 *
 * @param config Offline denoiser configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated denoiser, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineSpeechDenoiser().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser *
SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
    const SherpaOnnxOfflineSpeechDenoiserConfig *config,
    NativeResourceManager *mgr);

/**
 * @brief Create an online speech denoiser on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateOnlineSpeechDenoiser().
 *
 * @param config Online denoiser configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated denoiser, or NULL on error. Free it with
 *         SherpaOnnxDestroyOnlineSpeechDenoiser().
 */
SHERPA_ONNX_API const SherpaOnnxOnlineSpeechDenoiser *
SherpaOnnxCreateOnlineSpeechDenoiserOHOS(
    const SherpaOnnxOnlineSpeechDenoiserConfig *config,
    NativeResourceManager *mgr);

/**
 * @brief Create an online recognizer on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateOnlineRecognizer().
 *
 * @param config Recognizer configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated recognizer, or NULL on error. Free it with
 *         SherpaOnnxDestroyOnlineRecognizer().
 */
SHERPA_ONNX_API const SherpaOnnxOnlineRecognizer *
SherpaOnnxCreateOnlineRecognizerOHOS(
    const SherpaOnnxOnlineRecognizerConfig *config, NativeResourceManager *mgr);

/**
 * @brief Create an offline recognizer on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateOfflineRecognizer().
 *
 * @param config Recognizer configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated recognizer, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineRecognizer().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineRecognizer *
SherpaOnnxCreateOfflineRecognizerOHOS(
    const SherpaOnnxOfflineRecognizerConfig *config,
    NativeResourceManager *mgr);

/**
 * @brief Create a voice activity detector on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateVoiceActivityDetector().
 *
 * @param config VAD model configuration.
 * @param buffer_size_in_seconds Internal buffer duration in seconds.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated VAD instance, or NULL on error. Free it with
 *         SherpaOnnxDestroyVoiceActivityDetector().
 */
SHERPA_ONNX_API const SherpaOnnxVoiceActivityDetector *
SherpaOnnxCreateVoiceActivityDetectorOHOS(
    const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
    NativeResourceManager *mgr);

/**
 * @brief Create an offline TTS engine on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateOfflineTts().
 *
 * @param config Offline TTS configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated TTS engine, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineTts().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
    const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);

/**
 * @brief Create an offline punctuation processor on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateOfflinePunctuation().
 *
 * @param config Offline punctuation configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated punctuation processor, or NULL on error. Free it
 *         with SherpaOnnxDestroyOfflinePunctuation().
 */
SHERPA_ONNX_API const SherpaOnnxOfflinePunctuation *
SherpaOnnxCreateOfflinePunctuationOHOS(
    const SherpaOnnxOfflinePunctuationConfig *config,
    NativeResourceManager *mgr);

/**
 * @brief Create an online punctuation processor on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateOnlinePunctuation().
 *
 * @param config Online punctuation configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated punctuation processor, or NULL on error. Free it
 *         with SherpaOnnxDestroyOnlinePunctuation().
 */
SHERPA_ONNX_API const SherpaOnnxOnlinePunctuation *
SherpaOnnxCreateOnlinePunctuationOHOS(
    const SherpaOnnxOnlinePunctuationConfig *config,
    NativeResourceManager *mgr);

/**
 * @brief Create a speaker embedding extractor on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of
 * SherpaOnnxCreateSpeakerEmbeddingExtractor().
 *
 * @param config Speaker embedding extractor configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated extractor, or NULL on error. Free it with
 *         SherpaOnnxDestroySpeakerEmbeddingExtractor().
 */
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
    const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
    NativeResourceManager *mgr);

/**
 * @brief Create a keyword spotter on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of SherpaOnnxCreateKeywordSpotter().
 *
 * @param config Keyword spotter configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated keyword spotter, or NULL on error. Free it with
 *         SherpaOnnxDestroyKeywordSpotter().
 */
SHERPA_ONNX_API const SherpaOnnxKeywordSpotter *
SherpaOnnxCreateKeywordSpotterOHOS(const SherpaOnnxKeywordSpotterConfig *config,
                                   NativeResourceManager *mgr);

/**
 * @brief Create an offline speaker diarizer on HarmonyOS.
 *
 * This is the HarmonyOS counterpart of
 * SherpaOnnxCreateOfflineSpeakerDiarization().
 *
 * @param config Offline speaker diarization configuration.
 * @param mgr HarmonyOS resource manager used to resolve bundled assets.
 * @return A newly allocated diarizer, or NULL on error. Free it with
 *         SherpaOnnxDestroyOfflineSpeakerDiarization().
 */
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarization *
SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
    const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
    NativeResourceManager *mgr);
#endif

#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif  // SHERPA_ONNX_C_API_C_API_H_


================================================
FILE: sherpa-onnx/c-api/cxx-api.cc
================================================
// sherpa-onnx/c-api/cxx-api.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/c-api/cxx-api.h"

#include <algorithm>
#include <cstring>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "nlohmann/json.hpp"

namespace sherpa_onnx::cxx {

static void FillSpeechDenoiserModelConfig(
    const OfflineSpeechDenoiserModelConfig &src,
    SherpaOnnxOfflineSpeechDenoiserModelConfig *dst) {
  memset(dst, 0, sizeof(*dst));
  dst->gtcrn.model = src.gtcrn.model.c_str();
  dst->dpdfnet.model = src.dpdfnet.model.c_str();
  dst->num_threads = src.num_threads;
  dst->provider = src.provider.c_str();
  dst->debug = src.debug;
}

Wave ReadWave(const std::string &filename) {
  auto p = SherpaOnnxReadWave(filename.c_str());

  Wave ans;
  if (p) {
    ans.samples.resize(p->num_samples);

    std::copy(p->samples, p->samples + p->num_samples, ans.samples.data());

    ans.sample_rate = p->sample_rate;
    SherpaOnnxFreeWave(p);
  }

  return ans;
}

bool WriteWave(const std::string &filename, const Wave &wave) {
  return SherpaOnnxWriteWave(wave.samples.data(), wave.samples.size(),
                             wave.sample_rate, filename.c_str());
}

OnlineStream::OnlineStream(const SherpaOnnxOnlineStream *p)
    : MoveOnly<OnlineStream, SherpaOnnxOnlineStream>(p) {}

void OnlineStream::Destroy(const SherpaOnnxOnlineStream *p) const {
  SherpaOnnxDestroyOnlineStream(p);
}

void OnlineStream::AcceptWaveform(int32_t sample_rate, const float *samples,
                                  int32_t n) const {
  SherpaOnnxOnlineStreamAcceptWaveform(p_, sample_rate, samples, n);
}

void OnlineStream::InputFinished() const {
  SherpaOnnxOnlineStreamInputFinished(p_);
}

void OnlineStream::SetOption(const char *key, const char *value) const {
  SherpaOnnxOnlineStreamSetOption(p_, key, value);
}

const char *OnlineStream::GetOption(const char *key) const {
  return SherpaOnnxOnlineStreamGetOption(p_, key);
}

int32_t OnlineStream::HasOption(const char *key) const {
  return SherpaOnnxOnlineStreamHasOption(p_, key);
}

OnlineRecognizer OnlineRecognizer::Create(
    const OnlineRecognizerConfig &config) {
  struct SherpaOnnxOnlineRecognizerConfig c;
  memset(&c, 0, sizeof(c));

  c.feat_config.sample_rate = config.feat_config.sample_rate;
  c.feat_config.feature_dim = config.feat_config.feature_dim;

  c.model_config.transducer.encoder =
      config.model_config.transducer.encoder.c_str();
  c.model_config.transducer.decoder =
      config.model_config.transducer.decoder.c_str();
  c.model_config.transducer.joiner =
      config.model_config.transducer.joiner.c_str();

  c.model_config.paraformer.encoder =
      config.model_config.paraformer.encoder.c_str();
  c.model_config.paraformer.decoder =
      config.model_config.paraformer.decoder.c_str();

  c.model_config.zipformer2_ctc.model =
      config.model_config.zipformer2_ctc.model.c_str();

  c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str();
  c.model_config.t_one_ctc.model = config.model_config.t_one_ctc.model.c_str();

  c.model_config.tokens = config.model_config.tokens.c_str();
  c.model_config.num_threads = config.model_config.num_threads;
  c.model_config.provider = config.model_config.provider.c_str();
  c.model_config.debug = config.model_config.debug;
  c.model_config.model_type = config.model_config.model_type.c_str();
  c.model_config.modeling_unit = config.model_config.modeling_unit.c_str();
  c.model_config.bpe_vocab = config.model_config.bpe_vocab.c_str();
  c.model_config.tokens_buf = config.model_config.tokens_buf.c_str();
  c.model_config.tokens_buf_size = config.model_config.tokens_buf.size();

  c.decoding_method = config.decoding_method.c_str();
  c.max_active_paths = config.max_active_paths;
  c.enable_endpoint = config.enable_endpoint;
  c.rule1_min_trailing_silence = config.rule1_min_trailing_silence;
  c.rule2_min_trailing_silence = config.rule2_min_trailing_silence;
  c.rule3_min_utterance_length = config.rule3_min_utterance_length;
  c.hotwords_file = config.hotwords_file.c_str();
  c.hotwords_score = config.hotwords_score;

  c.ctc_fst_decoder_config.graph = config.ctc_fst_decoder_config.graph.c_str();
  c.ctc_fst_decoder_config.max_active =
      config.ctc_fst_decoder_config.max_active;

  c.rule_fsts = config.rule_fsts.c_str();
  c.rule_fars = config.rule_fars.c_str();

  c.blank_penalty = config.blank_penalty;

  c.hotwords_buf = config.hotwords_buf.c_str();
  c.hotwords_buf_size = config.hotwords_buf.size();

  c.hr.lexicon = config.hr.lexicon.c_str();
  c.hr.rule_fsts = config.hr.rule_fsts.c_str();

  auto p = SherpaOnnxCreateOnlineRecognizer(&c);
  return OnlineRecognizer(p);
}

OnlineRecognizer::OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p)
    : MoveOnly<OnlineRecognizer, SherpaOnnxOnlineRecognizer>(p) {}

void OnlineRecognizer::Destroy(const SherpaOnnxOnlineRecognizer *p) const {
  SherpaOnnxDestroyOnlineRecognizer(p);
}

OnlineStream OnlineRecognizer::CreateStream() const {
  auto s = SherpaOnnxCreateOnlineStream(p_);
  return OnlineStream{s};
}

OnlineStream OnlineRecognizer::CreateStream(const std::string &hotwords) const {
  auto s = SherpaOnnxCreateOnlineStreamWithHotwords(p_, hotwords.c_str());
  return OnlineStream{s};
}

bool OnlineRecognizer::IsReady(const OnlineStream *s) const {
  return SherpaOnnxIsOnlineStreamReady(p_, s->Get());
}

void OnlineRecognizer::Decode(const OnlineStream *s) const {
  SherpaOnnxDecodeOnlineStream(p_, s->Get());
}

void OnlineRecognizer::Reset(const OnlineStream *s) const {
  SherpaOnnxOnlineStreamReset(p_, s->Get());
}

bool OnlineRecognizer::IsEndpoint(const OnlineStream *s) const {
  return SherpaOnnxOnlineStreamIsEndpoint(p_, s->Get());
}

void OnlineRecognizer::Decode(const OnlineStream *ss, int32_t n) const {
  if (n <= 0) {
    return;
  }

  std::vector<const SherpaOnnxOnlineStream *> streams(n);
  for (int32_t i = 0; i != n; ++i) {
    streams[i] = ss[i].Get();
  }

  SherpaOnnxDecodeMultipleOnlineStreams(p_, streams.data(), n);
}

OnlineRecognizerResult OnlineRecognizer::GetResult(
    const OnlineStream *s) const {
  auto r = SherpaOnnxGetOnlineStreamResult(p_, s->Get());

  OnlineRecognizerResult ans;
  ans.text = r->text;

  ans.tokens.resize(r->count);
  for (int32_t i = 0; i != r->count; ++i) {
    ans.tokens[i] = r->tokens_arr[i];
  }

  if (r->timestamps) {
    ans.timestamps.resize(r->count);
    std::copy(r->timestamps, r->timestamps + r->count, ans.timestamps.data());
  }

  ans.json = r->json;

  SherpaOnnxDestroyOnlineRecognizerResult(r);

  return ans;
}

// ============================================================================
// Non-streaming ASR
// ============================================================================
OfflineStream::OfflineStream(const SherpaOnnxOfflineStream *p)
    : MoveOnly<OfflineStream, SherpaOnnxOfflineStream>(p) {}

void OfflineStream::Destroy(const SherpaOnnxOfflineStream *p) const {
  SherpaOnnxDestroyOfflineStream(p);
}

void OfflineStream::AcceptWaveform(int32_t sample_rate, const float *samples,
                                   int32_t n) const {
  SherpaOnnxAcceptWaveformOffline(p_, sample_rate, samples, n);
}

void OfflineStream::SetOption(const char *key, const char *value) const {
  SherpaOnnxOfflineStreamSetOption(p_, key, value);
}

const char *OfflineStream::GetOption(const char *key) const {
  return SherpaOnnxOfflineStreamGetOption(p_, key);
}

int32_t OfflineStream::HasOption(const char *key) const {
  return SherpaOnnxOfflineStreamHasOption(p_, key);
}

static SherpaOnnxOfflineRecognizerConfig Convert(
    const OfflineRecognizerConfig &config) {
  struct SherpaOnnxOfflineRecognizerConfig c;
  memset(&c, 0, sizeof(c));

  c.feat_config.sample_rate = config.feat_config.sample_rate;
  c.feat_config.feature_dim = config.feat_config.feature_dim;
  c.model_config.transducer.encoder =
      config.model_config.transducer.encoder.c_str();
  c.model_config.transducer.decoder =
      config.model_config.transducer.decoder.c_str();
  c.model_config.transducer.joiner =
      config.model_config.transducer.joiner.c_str();

  c.model_config.paraformer.model =
      config.model_config.paraformer.model.c_str();

  c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str();

  c.model_config.whisper.encoder = config.model_config.whisper.encoder.c_str();
  c.model_config.whisper.decoder = config.model_config.whisper.decoder.c_str();
  c.model_config.whisper.language =
      config.model_config.whisper.language.c_str();
  c.model_config.whisper.task = config.model_config.whisper.task.c_str();
  c.model_config.whisper.tail_paddings =
      config.model_config.whisper.tail_paddings;
  c.model_config.whisper.enable_token_timestamps =
      config.model_config.whisper.enable_token_timestamps;
  c.model_config.whisper.enable_segment_timestamps =
      config.model_config.whisper.enable_segment_timestamps;

  c.model_config.tdnn.model = config.model_config.tdnn.model.c_str();

  c.model_config.tokens = config.model_config.tokens.c_str();
  c.model_config.num_threads = config.model_config.num_threads;
  c.model_config.debug = config.model_config.debug;
  c.model_config.provider = config.model_config.provider.c_str();
  c.model_config.model_type = config.model_config.model_type.c_str();
  c.model_config.modeling_unit = config.model_config.modeling_unit.c_str();
  c.model_config.bpe_vocab = config.model_config.bpe_vocab.c_str();
  c.model_config.telespeech_ctc = config.model_config.telespeech_ctc.c_str();

  c.model_config.sense_voice.model =
      config.model_config.sense_voice.model.c_str();
  c.model_config.sense_voice.language =
      config.model_config.sense_voice.language.c_str();
  c.model_config.sense_voice.use_itn = config.model_config.sense_voice.use_itn;

  c.model_config.moonshine.preprocessor =
      config.model_config.moonshine.preprocessor.c_str();
  c.model_config.moonshine.encoder =
      config.model_config.moonshine.encoder.c_str();
  c.model_config.moonshine.uncached_decoder =
      config.model_config.moonshine.uncached_decoder.c_str();
  c.model_config.moonshine.cached_decoder =
      config.model_config.moonshine.cached_decoder.c_str();
  c.model_config.moonshine.merged_decoder =
      config.model_config.moonshine.merged_decoder.c_str();

  c.model_config.fire_red_asr.encoder =
      config.model_config.fire_red_asr.encoder.c_str();
  c.model_config.fire_red_asr.decoder =
      config.model_config.fire_red_asr.decoder.c_str();

  c.model_config.dolphin.model = config.model_config.dolphin.model.c_str();

  c.model_config.zipformer_ctc.model =
      config.model_config.zipformer_ctc.model.c_str();

  c.model_config.canary.encoder = config.model_config.canary.encoder.c_str();
  c.model_config.canary.decoder = config.model_config.canary.decoder.c_str();
  c.model_config.canary.src_lang = config.model_config.canary.src_lang.c_str();
  c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str();
  c.model_config.canary.use_pnc = config.model_config.canary.use_pnc;

  c.model_config.wenet_ctc.model = config.model_config.wenet_ctc.model.c_str();

  c.model_config.omnilingual.model =
      config.model_config.omnilingual.model.c_str();

  c.model_config.funasr_nano.encoder_adaptor =
      config.model_config.funasr_nano.encoder_adaptor.c_str();
  c.model_config.funasr_nano.llm = config.model_config.funasr_nano.llm.c_str();
  c.model_config.funasr_nano.embedding =
      config.model_config.funasr_nano.embedding.c_str();
  c.model_config.funasr_nano.tokenizer =
      config.model_config.funasr_nano.tokenizer.c_str();
  c.model_config.funasr_nano.system_prompt =
      config.model_config.funasr_nano.system_prompt.c_str();
  c.model_config.funasr_nano.user_prompt =
      config.model_config.funasr_nano.user_prompt.c_str();
  c.model_config.funasr_nano.max_new_tokens =
      config.model_config.funasr_nano.max_new_tokens;
  c.model_config.funasr_nano.temperature =
      config.model_config.funasr_nano.temperature;
  c.model_config.funasr_nano.top_p = config.model_config.funasr_nano.top_p;
  c.model_config.funasr_nano.seed = config.model_config.funasr_nano.seed;
  c.model_config.funasr_nano.language =
      config.model_config.funasr_nano.language.c_str();
  c.model_config.funasr_nano.itn = config.model_config.funasr_nano.itn ? 1 : 0;
  c.model_config.funasr_nano.hotwords =
      config.model_config.funasr_nano.hotwords.c_str();
  c.model_config.medasr.model = config.model_config.medasr.model.c_str();

  c.model_config.fire_red_asr_ctc.model =
      config.model_config.fire_red_asr_ctc.model.c_str();

  c.lm_config.model = config.lm_config.model.c_str();
  c.lm_config.scale = config.lm_config.scale;

  c.decoding_method = config.decoding_method.c_str();
  c.max_active_paths = config.max_active_paths;
  c.hotwords_file = config.hotwords_file.c_str();
  c.hotwords_score = config.hotwords_score;

  c.rule_fsts = config.rule_fsts.c_str();
  c.rule_fars = config.rule_fars.c_str();

  c.blank_penalty = config.blank_penalty;

  c.hr.lexicon = config.hr.lexicon.c_str();
  c.hr.rule_fsts = config.hr.rule_fsts.c_str();

  return c;
}

OfflineRecognizer OfflineRecognizer::Create(
    const OfflineRecognizerConfig &config) {
  auto c = Convert(config);

  auto p = SherpaOnnxCreateOfflineRecognizer(&c);
  return OfflineRecognizer(p);
}

void OfflineRecognizer::SetConfig(const OfflineRecognizerConfig &config) const {
  auto c = Convert(config);
  SherpaOnnxOfflineRecognizerSetConfig(p_, &c);
}

OfflineRecognizer::OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p)
    : MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer>(p) {}

void OfflineRecognizer::Destroy(const SherpaOnnxOfflineRecognizer *p) const {
  SherpaOnnxDestroyOfflineRecognizer(p);
}

OfflineStream OfflineRecognizer::CreateStream() const {
  auto s = SherpaOnnxCreateOfflineStream(p_);
  return OfflineStream{s};
}

OfflineStream OfflineRecognizer::CreateStream(
    const std::string &hotwords) const {
  auto s = SherpaOnnxCreateOfflineStreamWithHotwords(p_, hotwords.c_str());
  return OfflineStream{s};
}

void OfflineRecognizer::Decode(const OfflineStream *s) const {
  SherpaOnnxDecodeOfflineStream(p_, s->Get());
}

void OfflineRecognizer::Decode(const OfflineStream *ss, int32_t n) const {
  if (n <= 0) {
    return;
  }

  std::vector<const SherpaOnnxOfflineStream *> streams(n);
  for (int32_t i = 0; i != n; ++i) {
    streams[i] = ss[i].Get();
  }

  SherpaOnnxDecodeMultipleOfflineStreams(p_, streams.data(), n);
}

OfflineRecognizerResult OfflineRecognizer::GetResult(
    const OfflineStream *s) const {
  auto r = SherpaOnnxGetOfflineStreamResult(s->Get());

  OfflineRecognizerResult ans;
  if (r) {
    ans.text = r->text;

    if (r->timestamps) {
      ans.timestamps.resize(r->count);
      std::copy(r->timestamps, r->timestamps + r->count, ans.timestamps.data());
    }

    ans.tokens.resize(r->count);
    for (int32_t i = 0; i != r->count; ++i) {
      ans.tokens[i] = r->tokens_arr[i];
    }

    ans.json = r->json;
    ans.lang = r->lang ? r->lang : "";
    ans.emotion = r->emotion ? r->emotion : "";
    ans.event = r->event ? r->event : "";

    if (r->durations) {
      ans.durations.resize(r->count);
      std::copy(r->durations, r->durations + r->count, ans.durations.data());
    }
  }

  SherpaOnnxDestroyOfflineRecognizerResult(r);

  return ans;
}

std::shared_ptr<OfflineRecognizerResult> OfflineRecognizer::GetResultPtr(
    const OfflineStream *s) const {
  auto r = GetResult(s);
  return std::make_shared<OfflineRecognizerResult>(r);
}

OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) {
  struct SherpaOnnxOfflineTtsConfig c;
  memset(&c, 0, sizeof(c));

  c.model.vits.model = config.model.vits.model.c_str();
  c.model.vits.lexicon = config.model.vits.lexicon.c_str();
  c.model.vits.tokens = config.model.vits.tokens.c_str();
  c.model.vits.data_dir = config.model.vits.data_dir.c_str();
  c.model.vits.noise_scale = config.model.vits.noise_scale;
  c.model.vits.noise_scale_w = config.model.vits.noise_scale_w;
  c.model.vits.length_scale = config.model.vits.length_scale;

  c.model.matcha.acoustic_model = config.model.matcha.acoustic_model.c_str();
  c.model.matcha.vocoder = config.model.matcha.vocoder.c_str();
  c.model.matcha.lexicon = config.model.matcha.lexicon.c_str();
  c.model.matcha.tokens = config.model.matcha.tokens.c_str();
  c.model.matcha.data_dir = config.model.matcha.data_dir.c_str();
  c.model.matcha.noise_scale = config.model.matcha.noise_scale;
  c.model.matcha.length_scale = config.model.matcha.length_scale;

  c.model.kokoro.model = config.model.kokoro.model.c_str();
  c.model.kokoro.voices = config.model.kokoro.voices.c_str();
  c.model.kokoro.tokens = config.model.kokoro.tokens.c_str();
  c.model.kokoro.data_dir = config.model.kokoro.data_dir.c_str();
  c.model.kokoro.length_scale = config.model.kokoro.length_scale;
  c.model.kokoro.lexicon = config.model.kokoro.lexicon.c_str();
  c.model.kokoro.lang = config.model.kokoro.lang.c_str();

  c.model.kitten.model = config.model.kitten.model.c_str();
  c.model.kitten.voices = config.model.kitten.voices.c_str();
  c.model.kitten.tokens = config.model.kitten.tokens.c_str();
  c.model.kitten.data_dir = config.model.kitten.data_dir.c_str();
  c.model.kitten.length_scale = config.model.kitten.length_scale;

  c.model.zipvoice.tokens = config.model.zipvoice.tokens.c_str();
  c.model.zipvoice.encoder = config.model.zipvoice.encoder.c_str();
  c.model.zipvoice.decoder = config.model.zipvoice.decoder.c_str();
  c.model.zipvoice.vocoder = config.model.zipvoice.vocoder.c_str();
  c.model.zipvoice.data_dir = config.model.zipvoice.data_dir.c_str();
  c.model.zipvoice.lexicon = config.model.zipvoice.lexicon.c_str();
  c.model.zipvoice.feat_scale = config.model.zipvoice.feat_scale;
  c.model.zipvoice.t_shift = config.model.zipvoice.t_shift;
  c.model.zipvoice.target_rms = config.model.zipvoice.target_rms;
  c.model.zipvoice.guidance_scale = config.model.zipvoice.guidance_scale;

  c.model.pocket.lm_flow = config.model.pocket.lm_flow.c_str();
  c.model.pocket.lm_main = config.model.pocket.lm_main.c_str();
  c.model.pocket.encoder = config.model.pocket.encoder.c_str();
  c.model.pocket.decoder = config.model.pocket.decoder.c_str();
  c.model.pocket.text_conditioner =
      config.model.pocket.text_conditioner.c_str();

  c.model.pocket.vocab_json = config.model.pocket.vocab_json.c_str();

  c.model.pocket.token_scores_json =
      config.model.pocket.token_scores_json.c_str();

  c.model.pocket.voice_embedding_cache_capacity =
      config.model.pocket.voice_embedding_cache_capacity;

  c.model.supertonic.duration_predictor =
      config.model.supertonic.duration_predictor.c_str();
  c.model.supertonic.text_encoder =
      config.model.supertonic.text_encoder.c_str();
  c.model.supertonic.vector_estimator =
      config.model.supertonic.vector_estimator.c_str();
  c.model.supertonic.vocoder = config.model.supertonic.vocoder.c_str();
  c.model.supertonic.tts_json = config.model.supertonic.tts_json.c_str();
  c.model.supertonic.unicode_indexer =
      config.model.supertonic.unicode_indexer.c_str();
  c.model.supertonic.voice_style = config.model.supertonic.voice_style.c_str();

  c.model.num_threads = config.model.num_threads;
  c.model.debug = config.model.debug;
  c.model.provider = config.model.provider.c_str();

  c.rule_fsts = config.rule_fsts.c_str();
  c.max_num_sentences = config.max_num_sentences;
  c.silence_scale = config.silence_scale;
  c.rule_fars = config.rule_fars.c_str();

  auto p = SherpaOnnxCreateOfflineTts(&c);
  return OfflineTts(p);
}

OfflineTts::OfflineTts(const SherpaOnnxOfflineTts *p)
    : MoveOnly<OfflineTts, SherpaOnnxOfflineTts>(p) {}

void OfflineTts::Destroy(const SherpaOnnxOfflineTts *p) const {
  SherpaOnnxDestroyOfflineTts(p);
}

int32_t OfflineTts::SampleRate() const {
  return SherpaOnnxOfflineTtsSampleRate(p_);
}

int32_t OfflineTts::NumSpeakers() const {
  return SherpaOnnxOfflineTtsNumSpeakers(p_);
}

GeneratedAudio OfflineTts::Generate(const std::string &text,
                                    int32_t sid /*= 0*/, float speed /*= 1.0*/,
                                    OfflineTtsCallback callback /*= nullptr*/,
                                    void *arg /*= nullptr*/) const {
  const SherpaOnnxGeneratedAudio *audio;
  if (!callback) {
    audio = SherpaOnnxOfflineTtsGenerate(p_, text.c_str(), sid, speed);
  } else {
    audio = SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
        p_, text.c_str(), sid, speed, callback, arg);
  }

  GeneratedAudio ans;

  if (!audio) {
    return ans;
  }

  ans.samples = std::vector<float>{audio->samples, audio->samples + audio->n};
  ans.sample_rate = audio->sample_rate;

  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  return ans;
}

GeneratedAudio OfflineTts::Generate(const std::string &text,
                                    const GenerationConfig &config,
                                    OfflineTtsCallback callback /*= nullptr*/,
                                    void *arg /*= nullptr*/) const {
  SherpaOnnxGenerationConfig c;
  memset(&c, 0, sizeof(c));

  c.silence_scale = config.silence_scale;
  c.speed = config.speed;
  c.sid = config.sid;
  c.reference_audio = config.reference_audio.data();
  c.reference_audio_len = config.reference_audio.size();
  c.reference_sample_rate = config.reference_sample_rate;
  c.reference_text = config.reference_text.c_str();
  c.num_steps = config.num_steps;

  nlohmann::json j = config.extra;
  std::string s = j.dump();
  c.extra = s.c_str();

  const SherpaOnnxGeneratedAudio *audio =
      SherpaOnnxOfflineTtsGenerateWithConfig(p_, text.c_str(), &c, callback,
                                             arg);
  GeneratedAudio ans;

  if (!audio) {
    return ans;
  }

  ans.samples = std::vector<float>{audio->samples, audio->samples + audio->n};
  ans.sample_rate = audio->sample_rate;
  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  return ans;
}

std::shared_ptr<GeneratedAudio> OfflineTts::Generate2(
    const std::string &text, int32_t sid /*= 0*/, float speed /*= 1.0*/,
    OfflineTtsCallback callback /*= nullptr*/, void *arg /*= nullptr*/) const {
  auto audio = Generate(text, sid, speed, callback, arg);

  GeneratedAudio *ans = new GeneratedAudio;
  ans->samples = std::move(audio.samples);
  ans->sample_rate = audio.sample_rate;

  return std::shared_ptr<GeneratedAudio>(ans);
}

std::shared_ptr<GeneratedAudio> OfflineTts::Generate2(
    const std::string &text, const GenerationConfig &config,
    OfflineTtsCallback callback /*= nullptr*/, void *arg /*= nullptr*/) const {
  auto audio = Generate(text, config, callback, arg);

  GeneratedAudio *ans = new GeneratedAudio;
  ans->samples = std::move(audio.samples);
  ans->sample_rate = audio.sample_rate;

  return std::shared_ptr<GeneratedAudio>(ans);
}

KeywordSpotter KeywordSpotter::Create(const KeywordSpotterConfig &config) {
  struct SherpaOnnxKeywordSpotterConfig c;
  memset(&c, 0, sizeof(c));

  c.feat_config.sample_rate = config.feat_config.sample_rate;

  c.model_config.transducer.encoder =
      config.model_config.transducer.encoder.c_str();
  c.model_config.transducer.decoder =
      config.model_config.transducer.decoder.c_str();
  c.model_config.transducer.joiner =
      config.model_config.transducer.joiner.c_str();
  c.feat_config.feature_dim = config.feat_config.feature_dim;

  c.model_config.paraformer.encoder =
      config.model_config.paraformer.encoder.c_str();
  c.model_config.paraformer.decoder =
      config.model_config.paraformer.decoder.c_str();

  c.model_config.zipformer2_ctc.model =
      config.model_config.zipformer2_ctc.model.c_str();

  c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str();

  c.model_config.tokens = config.model_config.tokens.c_str();
  c.model_config.num_threads = config.model_config.num_threads;
  c.model_config.provider = config.model_config.provider.c_str();
  c.model_config.debug = config.model_config.debug;
  c.model_config.model_type = config.model_config.model_type.c_str();
  c.model_config.modeling_unit = config.model_config.modeling_unit.c_str();
  c.model_config.bpe_vocab = config.model_config.bpe_vocab.c_str();
  c.model_config.tokens_buf = config.model_config.tokens_buf.c_str();
  c.model_config.tokens_buf_size = config.model_config.tokens_buf.size();

  c.max_active_paths = config.max_active_paths;
  c.num_trailing_blanks = config.num_trailing_blanks;
  c.keywords_score = config.keywords_score;
  c.keywords_threshold = config.keywords_threshold;
  c.keywords_file = config.keywords_file.c_str();
  c.keywords_buf = config.keywords_buf.c_str();
  c.keywords_buf_size = static_cast<int32_t>(config.keywords_buf.size());

  auto p = SherpaOnnxCreateKeywordSpotter(&c);
  return KeywordSpotter(p);
}

KeywordSpotter::KeywordSpotter(const SherpaOnnxKeywordSpotter *p)
    : MoveOnly<KeywordSpotter, SherpaOnnxKeywordSpotter>(p) {}

void KeywordSpotter::Destroy(const SherpaOnnxKeywordSpotter *p) const {
  SherpaOnnxDestroyKeywordSpotter(p);
}

OnlineStream KeywordSpotter::CreateStream() const {
  auto s = SherpaOnnxCreateKeywordStream(p_);
  return OnlineStream{s};
}

OnlineStream KeywordSpotter::CreateStream(const std::string &keywords) const {
  auto s = SherpaOnnxCreateKeywordStreamWithKeywords(p_, keywords.c_str());
  return OnlineStream{s};
}

bool KeywordSpotter::IsReady(const OnlineStream *s) const {
  return SherpaOnnxIsKeywordStreamReady(p_, s->Get());
}

void KeywordSpotter::Decode(const OnlineStream *s) const {
  return SherpaOnnxDecodeKeywordStream(p_, s->Get());
}

void KeywordSpotter::Decode(const OnlineStream *ss, int32_t n) const {
  if (n <= 0) {
    return;
  }

  std::vector<const SherpaOnnxOnlineStream *> streams(n);
  for (int32_t i = 0; i != n; ++i) {
    streams[i] = ss[i].Get();
  }

  SherpaOnnxDecodeMultipleKeywordStreams(p_, streams.data(), n);
}

KeywordResult KeywordSpotter::GetResult(const OnlineStream *s) const {
  auto r = SherpaOnnxGetKeywordResult(p_, s->Get());

  KeywordResult ans;
  ans.keyword = r->keyword;

  ans.tokens.resize(r->count);
  for (int32_t i = 0; i < r->count; ++i) {
    ans.tokens[i] = r->tokens_arr[i];
  }

  if (r->timestamps) {
    ans.timestamps.resize(r->count);
    std::copy(r->timestamps, r->timestamps + r->count, ans.timestamps.data());
  }

  ans.start_time = r->start_time;
  ans.json = r->json;

  SherpaOnnxDestroyKeywordResult(r);

  return ans;
}

void KeywordSpotter::Reset(const OnlineStream *s) const {
  SherpaOnnxResetKeywordStream(p_, s->Get());
}

// ============================================================
// For Offline Speech Enhancement
// ============================================================

OfflineSpeechDenoiser OfflineSpeechDenoiser::Create(
    const OfflineSpeechDenoiserConfig &config) {
  struct SherpaOnnxOfflineSpeechDenoiserConfig c;
  FillSpeechDenoiserModelConfig(config.model, &c.model);

  auto p = SherpaOnnxCreateOfflineSpeechDenoiser(&c);

  return OfflineSpeechDenoiser(p);
}

void OfflineSpeechDenoiser::Destroy(
    const SherpaOnnxOfflineSpeechDenoiser *p) const {
  SherpaOnnxDestroyOfflineSpeechDenoiser(p);
}

OfflineSpeechDenoiser::OfflineSpeechDenoiser(
    const SherpaOnnxOfflineSpeechDenoiser *p)
    : MoveOnly<OfflineSpeechDenoiser, SherpaOnnxOfflineSpeechDenoiser>(p) {}

DenoisedAudio OfflineSpeechDenoiser::Run(const float *samples, int32_t n,
                                         int32_t sample_rate) const {
  auto audio = SherpaOnnxOfflineSpeechDenoiserRun(p_, samples, n, sample_rate);
  if (audio == nullptr) {
    return {};
  }

  DenoisedAudio ans;
  ans.samples = {audio->samples, audio->samples + audio->n};
  ans.sample_rate = audio->sample_rate;
  SherpaOnnxDestroyDenoisedAudio(audio);

  return ans;
}

int32_t OfflineSpeechDenoiser::GetSampleRate() const {
  return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(p_);
}

OnlineSpeechDenoiser OnlineSpeechDenoiser::Create(
    const OnlineSpeechDenoiserConfig &config) {
  struct SherpaOnnxOnlineSpeechDenoiserConfig c;
  FillSpeechDenoiserModelConfig(config.model, &c.model);

  auto p = SherpaOnnxCreateOnlineSpeechDenoiser(&c);
  return OnlineSpeechDenoiser(p);
}

void OnlineSpeechDenoiser::Destroy(
    const SherpaOnnxOnlineSpeechDenoiser *p) const {
  SherpaOnnxDestroyOnlineSpeechDenoiser(p);
}

OnlineSpeechDenoiser::OnlineSpeechDenoiser(
    const SherpaOnnxOnlineSpeechDenoiser *p)
    : MoveOnly<OnlineSpeechDenoiser, SherpaOnnxOnlineSpeechDenoiser>(p) {}

DenoisedAudio OnlineSpeechDenoiser::Run(const float *samples, int32_t n,
                                        int32_t sample_rate) const {
  auto audio = SherpaOnnxOnlineSpeechDenoiserRun(p_, samples, n, sample_rate);
  if (audio == nullptr) {
    return {};
  }

  DenoisedAudio ans;
  ans.samples = {audio->samples, audio->samples + audio->n};
  ans.sample_rate = audio->sample_rate;
  SherpaOnnxDestroyDenoisedAudio(audio);
  return ans;
}

DenoisedAudio OnlineSpeechDenoiser::Flush() const {
  auto audio = SherpaOnnxOnlineSpeechDenoiserFlush(p_);
  if (audio == nullptr) {
    return {};
  }

  DenoisedAudio ans;
  ans.samples = {audio->samples, audio->samples + audio->n};
  ans.sample_rate = audio->sample_rate;
  SherpaOnnxDestroyDenoisedAudio(audio);
  return ans;
}

void OnlineSpeechDenoiser::Reset() const {
  SherpaOnnxOnlineSpeechDenoiserReset(p_);
}

int32_t OnlineSpeechDenoiser::GetSampleRate() const {
  return SherpaOnnxOnlineSpeechDenoiserGetSampleRate(p_);
}

int32_t OnlineSpeechDenoiser::GetFrameShiftInSamples() const {
  return SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(p_);
}

CircularBuffer CircularBuffer::Create(int32_t capacity) {
  auto p = SherpaOnnxCreateCircularBuffer(capacity);
  return CircularBuffer(p);
}

CircularBuffer::CircularBuffer(const SherpaOnnxCircularBuffer *p)
    : MoveOnly<CircularBuffer, SherpaOnnxCircularBuffer>(p) {}

void CircularBuffer::Destroy(const SherpaOnnxCircularBuffer *p) const {
  SherpaOnnxDestroyCircularBuffer(p);
}

void CircularBuffer::Push(const float *samples, int32_t n) const {
  SherpaOnnxCircularBufferPush(p_, samples, n);
}

std::vector<float> CircularBuffer::Get(int32_t start_index, int32_t n) const {
  const float *samples = SherpaOnnxCircularBufferGet(p_, start_index, n);
  std::vector<float> ans(n);
  std::copy(samples, samples + n, ans.begin());

  SherpaOnnxCircularBufferFree(samples);
  return ans;
}

void CircularBuffer::Pop(int32_t n) const {
  SherpaOnnxCircularBufferPop(p_, n);
}

int32_t CircularBuffer::Size() const {
  return SherpaOnnxCircularBufferSize(p_);
}

int32_t CircularBuffer::Head() const {
  return SherpaOnnxCircularBufferHead(p_);
}

void CircularBuffer::Reset() const { SherpaOnnxCircularBufferReset(p_); }

VoiceActivityDetector VoiceActivityDetector::Create(
    const VadModelConfig &config, float buffer_size_in_seconds) {
  struct SherpaOnnxVadModelConfig c;
  memset(&c, 0, sizeof(c));

  c.silero_vad.model = config.silero_vad.model.c_str();
  c.silero_vad.threshold = config.silero_vad.threshold;
  c.silero_vad.min_silence_duration = config.silero_vad.min_silence_duration;
  c.silero_vad.min_speech_duration = config.silero_vad.min_speech_duration;
  c.silero_vad.window_size = config.silero_vad.window_size;
  c.silero_vad.max_speech_duration = config.silero_vad.max_speech_duration;

  c.ten_vad.model = config.ten_vad.model.c_str();
  c.ten_vad.threshold = config.ten_vad.threshold;
  c.ten_vad.min_silence_duration = config.ten_vad.min_silence_duration;
  c.ten_vad.min_speech_duration = config.ten_vad.min_speech_duration;
  c.ten_vad.window_size = config.ten_vad.window_size;
  c.ten_vad.max_speech_duration = config.ten_vad.max_speech_duration;

  c.sample_rate = config.sample_rate;
  c.num_threads = config.num_threads;
  c.provider = config.provider.c_str();
  c.debug = config.debug;

  auto p = SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
  return VoiceActivityDetector(p);
}

VoiceActivityDetector::VoiceActivityDetector(
    const SherpaOnnxVoiceActivityDetector *p)
    : MoveOnly<VoiceActivityDetector, SherpaOnnxVoiceActivityDetector>(p) {}

void VoiceActivityDetector::Destroy(
    const SherpaOnnxVoiceActivityDetector *p) const {
  SherpaOnnxDestroyVoiceActivityDetector(p);
}

void VoiceActivityDetector::AcceptWaveform(const float *samples,
                                           int32_t n) const {
  SherpaOnnxVoiceActivityDetectorAcceptWaveform(p_, samples, n);
}

bool VoiceActivityDetector::IsEmpty() const {
  return SherpaOnnxVoiceActivityDetectorEmpty(p_);
}

bool VoiceActivityDetector ::IsDetected() const {
  return SherpaOnnxVoiceActivityDetectorDetected(p_);
}

void VoiceActivityDetector::Pop() const {
  SherpaOnnxVoiceActivityDetectorPop(p_);
}

void VoiceActivityDetector::Clear() const {
  SherpaOnnxVoiceActivityDetectorClear(p_);
}

SpeechSegment VoiceActivityDetector::Front() const {
  auto f = SherpaOnnxVoiceActivityDetectorFront(p_);

  SpeechSegment segment;
  if (!f) return segment;
  segment.start = f->start;
  segment.samples = std::vector<float>{f->samples, f->samples + f->n};

  SherpaOnnxDestroySpeechSegment(f);

  return segment;
}

std::shared_ptr<SpeechSegment> VoiceActivityDetector::FrontPtr() const {
  auto segment = Front();
  return std::make_shared<SpeechSegment>(segment);
}

void VoiceActivityDetector::Reset() const {
  SherpaOnnxVoiceActivityDetectorReset(p_);
}

void VoiceActivityDetector::Flush() const {
  SherpaOnnxVoiceActivityDetectorFlush(p_);
}

LinearResampler LinearResampler::Create(int32_t samp_rate_in_hz,
                                        int32_t samp_rate_out_hz,
                                        float filter_cutoff_hz,
                                        int32_t num_zeros) {
  auto p = SherpaOnnxCreateLinearResampler(samp_rate_in_hz, samp_rate_out_hz,
                                           filter_cutoff_hz, num_zeros);
  return LinearResampler(p);
}

LinearResampler::LinearResampler(const SherpaOnnxLinearResampler *p)
    : MoveOnly<LinearResampler, SherpaOnnxLinearResampler>(p) {}

void LinearResampler::Destroy(const SherpaOnnxLinearResampler *p) const {
  SherpaOnnxDestroyLinearResampler(p);
}

void LinearResampler::Reset() const { SherpaOnnxLinearResamplerReset(p_); }

std::vector<float> LinearResampler::Resample(const float *input,
                                             int32_t input_dim,
                                             bool flush) const {
  auto out = SherpaOnnxLinearResamplerResample(p_, input, input_dim, flush);

  std::vector<float> ans{out->samples, out->samples + out->n};

  SherpaOnnxLinearResamplerResampleFree(out);

  return ans;
}

int32_t LinearResampler::GetInputSamplingRate() const {
  return SherpaOnnxLinearResamplerResampleGetInputSampleRate(p_);
}

int32_t LinearResampler::GetOutputSamplingRate() const {
  return SherpaOnnxLinearResamplerResampleGetOutputSampleRate(p_);
}

std::string GetVersionStr() { return SherpaOnnxGetVersionStr(); }

std::string GetGitSha1() { return SherpaOnnxGetGitSha1(); }

std::string GetGitDate() { return SherpaOnnxGetGitDate(); }

bool FileExists(const std::string &filename) {
  return SherpaOnnxFileExists(filename.c_str());
}

// ============================================================
// For Offline Punctuation
// ============================================================
OfflinePunctuation OfflinePunctuation::Create(
    const OfflinePunctuationConfig &config) {
  struct SherpaOnnxOfflinePunctuationConfig c;
  memset(&c, 0, sizeof(c));
  c.model.ct_transformer = config.model.ct_transformer.c_str();
  c.model.num_threads = config.model.num_threads;
  c.model.debug = config.model.debug;
  c.model.provider = config.model.provider.c_str();

  const SherpaOnnxOfflinePunctuation *punct =
      SherpaOnnxCreateOfflinePunctuation(&c);
  return OfflinePunctuation(punct);
}

OfflinePunctuation::OfflinePunctuation(const SherpaOnnxOfflinePunctuation *p)
    : MoveOnly<OfflinePunctuation, SherpaOnnxOfflinePunctuation>(p) {}

void OfflinePunctuation::Destroy(const SherpaOnnxOfflinePunctuation *p) const {
  SherpaOnnxDestroyOfflinePunctuation(p);
}

std::string OfflinePunctuation::AddPunctuation(const std::string &text) const {
  const char *result = SherpaOfflinePunctuationAddPunct(p_, text.c_str());
  if (!result) return {};
  std::string ans(result);
  SherpaOfflinePunctuationFreeText(result);
  return ans;
}

// ============================================================
// For Online Punctuation
// ============================================================
OnlinePunctuation OnlinePunctuation::Create(
    const OnlinePunctuationConfig &config) {
  struct SherpaOnnxOnlinePunctuationConfig c;
  memset(&c, 0, sizeof(c));
  c.model.cnn_bilstm = config.model.cnn_bilstm.c_str();
  c.model.bpe_vocab = config.model.bpe_vocab.c_str();
  c.model.num_threads = config.model.num_threads;
  c.model.debug = config.model.debug;
  c.model.provider = config.model.provider.c_str();

  const SherpaOnnxOnlinePunctuation *punct =
      SherpaOnnxCreateOnlinePunctuation(&c);
  return OnlinePunctuation(punct);
}

OnlinePunctuation::OnlinePunctuation(const SherpaOnnxOnlinePunctuation *p)
    : MoveOnly<OnlinePunctuation, SherpaOnnxOnlinePunctuation>(p) {}

void OnlinePunctuation::Destroy(const SherpaOnnxOnlinePunctuation *p) const {
  SherpaOnnxDestroyOnlinePunctuation(p);
}

std::string OnlinePunctuation::AddPunctuation(const std::string &text) const {
  const char *result = SherpaOnnxOnlinePunctuationAddPunct(p_, text.c_str());
  if (!result) return {};
  std::string ans(result);
  SherpaOnnxOnlinePunctuationFreeText(result);
  return ans;
}

// ============================================================
// For Audio tagging
// ============================================================
AudioTagging AudioTagging::Create(const AudioTaggingConfig &config) {
  struct SherpaOnnxAudioTaggingConfig c;
  memset(&c, 0, sizeof(c));

  c.model.zipformer.model = config.model.zipformer.model.c_str();
  c.model.ced = config.model.ced.c_str();
  c.model.num_threads = config.model.num_threads;
  c.model.debug = config.model.debug;
  c.model.provider = config.model.provider.c_str();
  c.labels = config.labels.c_str();
  c.top_k = config.top_k;

  const SherpaOnnxAudioTagging *tagger = SherpaOnnxCreateAudioTagging(&c);
  return AudioTagging(tagger);
}

AudioTagging::AudioTagging(const SherpaOnnxAudioTagging *p)
    : MoveOnly<AudioTagging, SherpaOnnxAudioTagging>(p) {}

void AudioTagging::Destroy(const SherpaOnnxAudioTagging *p) const {
  SherpaOnnxDestroyAudioTagging(p);
}

OfflineStream AudioTagging::CreateStream() const {
  auto s = SherpaOnnxAudioTaggingCreateOfflineStream(p_);
  return OfflineStream{s};
}

std::vector<AudioEvent> AudioTagging::Compute(const OfflineStream *s,
                                              int32_t top_k /*= -1*/) {
  auto events = SherpaOnnxAudioTaggingCompute(p_, s->Get(), top_k);
  std::vector<AudioEvent> ans;

  auto pe = events;
  while (pe && *pe) {
    AudioEvent e;
    e.name = (*pe)->name;
    e.index = (*pe)->index;
    e.prob = (*pe)->prob;
    ans.push_back(std::move(e));
    ++pe;
  }

  SherpaOnnxAudioTaggingFreeResults(events);

  return ans;
}

std::shared_ptr<std::vector<AudioEvent>> AudioTagging::ComputePtr(
    const OfflineStream *s, int32_t top_k /*= -1*/) {
  auto events = Compute(s, top_k);
  return std::make_shared<std::vector<AudioEvent>>(events);
}

}  // namespace sherpa_onnx::cxx


================================================
FILE: sherpa-onnx/c-api/cxx-api.h
================================================
// sherpa-onnx/c-api/cxx-api.h
//
// Copyright (c)  2024  Xiaomi Corporation
/**
 * @file cxx-api.h
 * @brief Public C++ wrapper for the sherpa-onnx C API.
 *
 * This header provides a lightweight C++ interface on top of `c-api.h`. The
 * wrapper follows a few simple design rules:
 *
 * - Configuration objects are plain structs with `std::string`,
 *   `std::vector`, and default values
 * - Runtime handles are move-only RAII classes that automatically release the
 *   underlying C handle
 * - Result objects are copied into standard C++ containers so callers do not
 *   need to manage C-allocated memory manually
 * - The API mirrors the C API closely, while offering a more idiomatic C++
 *   surface
 *
 * Major feature families available in this file:
 *
 * - Streaming ASR
 * - Non-streaming ASR
 * - Non-streaming TTS
 * - Keyword spotting
 * - Offline and online speech enhancement
 * - VAD and circular buffering
 * - Linear resampling
 * - Version/file/WAVE helpers
 * - Offline and online punctuation
 * - Audio tagging
 *
 * Typical usage pattern:
 *
 * 1. Fill a config struct
 * 2. Create the corresponding RAII wrapper with `Class::Create(...)`
 * 3. Check `wrapper.Get()` for success
 * 4. Feed audio or text, run inference, and retrieve results as C++ objects
 * 5. Let destructors clean up automatically
 *
 * Example programs are available in `cxx-api-examples/` and show concrete model
 * packages and end-to-end usage.
 */
#ifndef SHERPA_ONNX_C_API_CXX_API_H_
#define SHERPA_ONNX_C_API_CXX_API_H_

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "sherpa-onnx/c-api/c-api.h"

namespace sherpa_onnx::cxx {

// ============================================================================
// Streaming ASR
// ============================================================================
/** @brief Streaming transducer model files. */
struct OnlineTransducerModelConfig {
  /** Encoder ONNX model. */
  std::string encoder;
  /** Decoder ONNX model. */
  std::string decoder;
  /** Joiner ONNX model. */
  std::string joiner;
};

/** @brief Streaming Paraformer model files. */
struct OnlineParaformerModelConfig {
  /** Encoder ONNX model. */
  std::string encoder;
  /** Decoder ONNX model. */
  std::string decoder;
};

/** @brief Streaming Zipformer2 CTC model file. */
struct OnlineZipformer2CtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Streaming NeMo CTC model file. */
struct OnlineNemoCtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Streaming T-One CTC model file. */
struct OnlineToneCtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/**
 * @brief Acoustic model configuration for streaming ASR.
 *
 * Configure exactly one model family. If multiple model families are set, one
 * of them will be chosen and the choice is implementation-defined.
 *
 * Example using
 * `sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20`:
 *
 * @code
 * OnlineModelConfig model;
 * model.transducer.encoder =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "encoder-epoch-99-avg-1.int8.onnx";
 * model.transducer.decoder =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "decoder-epoch-99-avg-1.onnx";
 * model.transducer.joiner =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "joiner-epoch-99-avg-1.int8.onnx";
 * model.tokens =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
 * model.num_threads = 1;
 * @endcode
 */
struct OnlineModelConfig {
  /** Streaming transducer configuration. */
  OnlineTransducerModelConfig transducer;
  /** Streaming Paraformer configuration. */
  OnlineParaformerModelConfig paraformer;
  /** Streaming Zipformer2 CTC configuration. */
  OnlineZipformer2CtcModelConfig zipformer2_ctc;
  /** Streaming NeMo CTC configuration. */
  OnlineNemoCtcModelConfig nemo_ctc;
  /** Streaming T-One CTC configuration. */
  OnlineToneCtcModelConfig t_one_ctc;
  /** Token file path. */
  std::string tokens;
  /** Number of inference threads. */
  int32_t num_threads = 1;
  /** Execution provider such as `"cpu"`. */
  std::string provider = "cpu";
  /** Enable verbose debug logging. */
  bool debug = false;
  /** Optional explicit model type hint. */
  std::string model_type;
  /** Modeling unit such as `"cjkchar"` or `"bpe"`. */
  std::string modeling_unit = "cjkchar";
  /** Optional BPE vocabulary. */
  std::string bpe_vocab;
  /** Optional in-memory token content. If non-empty, it is used instead of a
   * file. */
  std::string tokens_buf;
};

/** @brief Feature extraction settings shared by ASR and KWS wrappers. */
struct FeatureConfig {
  /** Input sample rate in Hz. */
  int32_t sample_rate = 16000;
  /** Number of features per frame. */
  int32_t feature_dim = 80;
};

/** @brief Decoder graph configuration for online CTC + FST decoding. */
struct OnlineCtcFstDecoderConfig {
  /** FST graph file. */
  std::string graph;
  /** Maximum number of active states during search. */
  int32_t max_active = 3000;
};

/** @brief Homophone replacement resources used by some Chinese ASR setups. */
struct HomophoneReplacerConfig {
  /** Reserved field. Currently unused by the wrapper. */
  std::string dict_dir;
  /** Lexicon file used by the replacer. */
  std::string lexicon;
  /** Rule FST file used for replacement. */
  std::string rule_fsts;
};

/**
 * @brief Configuration for streaming ASR.
 *
 * Example:
 *
 * @code
 * OnlineRecognizerConfig config;
 * config.model_config.transducer.encoder =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "encoder-epoch-99-avg-1.int8.onnx";
 * config.model_config.transducer.decoder =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "decoder-epoch-99-avg-1.onnx";
 * config.model_config.transducer.joiner =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/"
 *     "joiner-epoch-99-avg-1.int8.onnx";
 * config.model_config.tokens =
 *     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt";
 * config.model_config.num_threads = 1;
 * config.hr.lexicon = "./lexicon.txt";
 * config.hr.rule_fsts = "./replace.fst";
 * @endcode
 */
struct OnlineRecognizerConfig {
  /** Feature extraction configuration. */
  FeatureConfig feat_config;
  /** Acoustic model configuration. */
  OnlineModelConfig model_config;

  /** Decoding method such as `"greedy_search"` or `"modified_beam_search"`. */
  std::string decoding_method = "greedy_search";

  /** Maximum number of active paths for beam-search-style decoding. */
  int32_t max_active_paths = 4;

  /** Enable endpoint detection. */
  bool enable_endpoint = false;

  /** Endpointing rule 1 trailing silence threshold in seconds. */
  float rule1_min_trailing_silence = 2.4;

  /** Endpointing rule 2 trailing silence threshold in seconds. */
  float rule2_min_trailing_silence = 1.2;

  /** Endpointing rule 3 minimum utterance length in seconds. */
  float rule3_min_utterance_length = 20;

  /** Optional hotword file. */
  std::string hotwords_file;

  /** Hotword boost score. */
  float hotwords_score = 1.5;

  /** Optional CTC+FST decoder configuration. */
  OnlineCtcFstDecoderConfig ctc_fst_decoder_config;
  /** Optional ITN rule FST archive. */
  std::string rule_fsts;
  /** Optional ITN rule FAR archive. */
  std::string rule_fars;
  /** Optional blank penalty applied during decoding. */
  float blank_penalty = 0;

  /** Optional in-memory hotword definitions. */
  std::string hotwords_buf;
  /** Optional homophone replacement configuration. */
  HomophoneReplacerConfig hr;
};

/** @brief Current streaming ASR result copied into C++ containers. */
struct OnlineRecognizerResult {
  /** Decoded text. */
  std::string text;
  /** Token sequence. */
  std::vector<std::string> tokens;
  /** Per-token timestamps in seconds. */
  std::vector<float> timestamps;
  /** JSON representation of the result. */
  std::string json;
};

/** @brief Mono PCM waveform used by the helper I/O functions. */
struct Wave {
  /** Samples normalized to `[-1, 1]`. */
  std::vector<float> samples;
  /** Sample rate in Hz. */
  int32_t sample_rate = 0;
};

/**
 * @brief Read a mono WAVE file into a C++ value object.
 *
 * On failure, the returned wave has `samples.empty() == true`.
 *
 * @param filename Input WAVE filename.
 * @return Decoded wave data.
 */
SHERPA_ONNX_API Wave ReadWave(const std::string &filename);

/**
 * @brief Write a mono WAVE file from a C++ value object.
 *
 * @param filename Output filename.
 * @param wave PCM samples and sample rate to write.
 * @return `true` on success; `false` on failure.
 */
SHERPA_ONNX_API bool WriteWave(const std::string &filename, const Wave &wave);

/**
 * @brief Base class for move-only RAII wrappers around C handles.
 *
 * Derived classes implement `Destroy(const T *) const` and inherit automatic
 * destruction, `Get()`, and `Release()`.
 */
template <typename Derived, typename T>
class SHERPA_ONNX_API MoveOnly {
 public:
  /** @brief Construct an empty wrapper. */
  MoveOnly() = default;
  /** @brief Construct a wrapper from a raw C handle. */
  explicit MoveOnly(const T *p) : p_(p) {}

  /** @brief Destroy the wrapped handle if present. */
  ~MoveOnly() { Destroy(); }

  MoveOnly(const MoveOnly &) = delete;

  MoveOnly &operator=(const MoveOnly &) = delete;

  MoveOnly(MoveOnly &&other) : p_(other.Release()) {}

  MoveOnly &operator=(MoveOnly &&other) {
    if (&other == this) {
      return *this;
    }

    Destroy();

    p_ = other.Release();

    return *this;
  }

  /** @brief Return the wrapped raw pointer without transferring ownership. */
  const T *Get() const { return p_; }

  /** @brief Release ownership of the wrapped raw pointer. */
  const T *Release() {
    const T *p = p_;
    p_ = nullptr;
    return p;
  }

 private:
  void Destroy() {
    if (p_ == nullptr) {
      return;
    }

    static_cast<Derived *>(this)->Destroy(p_);

    p_ = nullptr;
  }

 protected:
  const T *p_ = nullptr;
};

class SHERPA_ONNX_API OnlineStream
    : public MoveOnly<OnlineStream, SherpaOnnxOnlineStream> {
 public:
  /** @brief Wrap an existing C online stream handle. */
  explicit OnlineStream(const SherpaOnnxOnlineStream *p);

  /** @brief Append audio samples to the stream. */
  void AcceptWaveform(int32_t sample_rate, const float *samples,
                      int32_t n) const;

  /** @brief Indicate that no more input audio will be provided. */
  void InputFinished() const;

  /** @brief Set a per-stream string option. */
  void SetOption(const char *key, const char *value) const;
  /** @brief Get a per-stream string option. */
  const char *GetOption(const char *key) const;
  /** @brief Check whether a per-stream option exists. */
  int32_t HasOption(const char *key) const;

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOnlineStream *p) const;
};

/**
 * @brief RAII wrapper for a streaming recognizer.
 *
 * Example:
 *
 * @code
 * OnlineRecognizer recognizer = OnlineRecognizer::Create(config);
 * OnlineStream stream = recognizer.CreateStream();
 * stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
 *                       wave.samples.size());
 * stream.InputFinished();
 * while (recognizer.IsReady(&stream)) {
 *   recognizer.Decode(&stream);
 * }
 * auto result = recognizer.GetResult(&stream);
 * @endcode
 */
class SHERPA_ONNX_API OnlineRecognizer
    : public MoveOnly<OnlineRecognizer, SherpaOnnxOnlineRecognizer> {
 public:
  /** @brief Create a streaming recognizer from a config struct. */
  static OnlineRecognizer Create(const OnlineRecognizerConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOnlineRecognizer *p) const;

  /** @brief Create a stream that uses the recognizer's configured hotwords. */
  OnlineStream CreateStream() const;

  /** @brief Create a stream with inline hotwords. */
  OnlineStream CreateStream(const std::string &hotwords) const;

  /** @brief Check whether the given stream has enough data to decode. */
  bool IsReady(const OnlineStream *s) const;

  /** @brief Decode one ready stream. */
  void Decode(const OnlineStream *s) const;

  /** @brief Decode multiple ready streams in parallel. */
  void Decode(const OnlineStream *ss, int32_t n) const;

  /** @brief Return the current recognition result for a stream. */
  OnlineRecognizerResult GetResult(const OnlineStream *s) const;

  /** @brief Reset a stream after endpointing or utterance completion. */
  void Reset(const OnlineStream *s) const;

  /** @brief Check whether endpointing has triggered for a stream. */
  bool IsEndpoint(const OnlineStream *s) const;

 private:
  explicit OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p);
};

// ============================================================================
// Non-streaming ASR
// ============================================================================
/** @brief Offline transducer model files. */
struct OfflineTransducerModelConfig {
  /** Encoder ONNX model. */
  std::string encoder;
  /** Decoder ONNX model. */
  std::string decoder;
  /** Joiner ONNX model. */
  std::string joiner;
};

/** @brief Offline Paraformer model file. */
struct OfflineParaformerModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline NeMo EncDec CTC model file. */
struct OfflineNemoEncDecCtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline Whisper model configuration. */
struct OfflineWhisperModelConfig {
  /** Encoder ONNX model. */
  std::string encoder;
  /** Decoder ONNX model. */
  std::string decoder;
  /** Whisper language string such as `"en"` or `"zh"`. */
  std::string language;
  /** Task such as `"transcribe"` or `"translate"`. */
  std::string task = "transcribe";
  /** Optional tail paddings in samples. */
  int32_t tail_paddings = -1;
  /** Enable token timestamps in the result. */
  bool enable_token_timestamps = false;
  /** Enable segment timestamps in the result JSON. */
  bool enable_segment_timestamps = false;
};

/** @brief Offline Canary model configuration. */
struct OfflineCanaryModelConfig {
  /** Encoder ONNX model. */
  std::string encoder;
  /** Decoder ONNX model. */
  std::string decoder;
  /** Source language code. */
  std::string src_lang;
  /** Target language code. */
  std::string tgt_lang;
  /** Whether punctuation/casing is enabled by the model. */
  bool use_pnc = true;
};

/** @brief Offline FireRed ASR model files. */
struct OfflineFireRedAsrModelConfig {
  /** Encoder ONNX model. */
  std::string encoder;
  /** Decoder ONNX model. */
  std::string decoder;
};

/** @brief Offline FireRed ASR CTC model file. */
struct OfflineFireRedAsrCtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline TDNN model file. */
struct OfflineTdnnModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline SenseVoice model configuration. */
struct OfflineSenseVoiceModelConfig {
  /** Model ONNX file. */
  std::string model;
  /** Language hint. */
  std::string language;
  /** Enable inverse text normalization. */
  bool use_itn = false;
};

/** @brief Offline Dolphin model file. */
struct OfflineDolphinModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline Zipformer CTC model file. */
struct OfflineZipformerCtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline WeNet CTC model file. */
struct OfflineWenetCtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline omnilingual ASR CTC model file. */
struct OfflineOmnilingualAsrCtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline MedASR CTC model file. */
struct OfflineMedAsrCtcModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief Offline Moonshine model configuration. */
struct OfflineMoonshineModelConfig {
  /** Preprocessor model file. */
  std::string preprocessor;
  /** Encoder model file. */
  std::string encoder;
  /** Uncached decoder model file. */
  std::string uncached_decoder;
  /** Cached decoder model file. */
  std::string cached_decoder;
  /** Merged decoder model file. */
  std::string merged_decoder;
};

/** @brief Offline FunASR Nano model configuration. */
struct OfflineFunASRNanoModelConfig {
  /** Encoder adaptor model file. */
  std::string encoder_adaptor;
  /** LLM model file. */
  std::string llm;
  /** Embedding model file. */
  std::string embedding;
  /** Tokenizer file. */
  std::string tokenizer;
  /** System prompt passed to the model. */
  std::string system_prompt = "You are a helpful assistant.";
  /** User prompt prefix passed to the model. */
  std::string user_prompt = "语音转写：";
  /** Maximum number of generated tokens. */
  int32_t max_new_tokens = 512;
  /** Sampling temperature. */
  float temperature = 1e-6f;
  /** Top-p sampling parameter. */
  float top_p = 0.8f;
  /** Random seed. */
  int32_t seed = 42;
  /** Language hint. */
  std::string language;
  /** Enable inverse text normalization. */
  bool itn = true;
  /** Optional hotwords string. */
  std::string hotwords;
};

/**
 * @brief Acoustic model configuration for offline ASR.
 *
 * Configure exactly one model family. If multiple model families are set, one
 * is chosen and the choice is implementation-defined.
 */
struct OfflineModelConfig {
  /** Offline transducer configuration. */
  OfflineTransducerModelConfig transducer;
  /** Offline Paraformer configuration. */
  OfflineParaformerModelConfig paraformer;
  /** Offline NeMo CTC configuration. */
  OfflineNemoEncDecCtcModelConfig nemo_ctc;
  /** Offline Whisper configuration. */
  OfflineWhisperModelConfig whisper;
  /** Offline TDNN configuration. */
  OfflineTdnnModelConfig tdnn;

  /** Token file. */
  std::string tokens;
  /** Number of inference threads. */
  int32_t num_threads = 1;
  /** Enable verbose debug logging. */
  bool debug = false;
  /** Execution provider such as `"cpu"`. */
  std::string provider = "cpu";
  /** Optional explicit model type hint. */
  std::string model_type;
  /** Modeling unit such as `"cjkchar"` or `"bpe"`. */
  std::string modeling_unit = "cjkchar";
  /** Optional BPE vocabulary. */
  std::string bpe_vocab;
  /** Telespeech CTC model file. */
  std::string telespeech_ctc;
  /** SenseVoice configuration. */
  OfflineSenseVoiceModelConfig sense_voice;
  /** Moonshine configuration. */
  OfflineMoonshineModelConfig moonshine;
  /** FireRed transducer configuration. */
  OfflineFireRedAsrModelConfig fire_red_asr;
  /** Dolphin configuration. */
  OfflineDolphinModelConfig dolphin;
  /** Zipformer CTC configuration. */
  OfflineZipformerCtcModelConfig zipformer_ctc;
  /** Canary configuration. */
  OfflineCanaryModelConfig canary;
  /** WeNet CTC configuration. */
  OfflineWenetCtcModelConfig wenet_ctc;
  /** Omnilingual ASR configuration. */
  OfflineOmnilingualAsrCtcModelConfig omnilingual;
  /** MedASR configuration. */
  OfflineMedAsrCtcModelConfig medasr;
  /** FunASR Nano configuration. */
  OfflineFunASRNanoModelConfig funasr_nano;
  /** FireRed CTC configuration. */
  OfflineFireRedAsrCtcModelConfig fire_red_asr_ctc;
};

/** @brief Optional language-model rescoring configuration for offline ASR. */
struct OfflineLMConfig {
  /** LM model file. */
  std::string model;
  /** LM scale. */
  float scale = 1.0;
};

/**
 * @brief Configuration for offline ASR.
 *
 * Example using SenseVoice:
 *
 * @code
 * OfflineRecognizerConfig config;
 * config.model_config.sense_voice.model =
 *     "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/model.int8.onnx";
 * config.model_config.sense_voice.language = "auto";
 * config.model_config.sense_voice.use_itn = true;
 * config.model_config.tokens =
 *     "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/tokens.txt";
 * config.model_config.num_threads = 1;
 * @endcode
 *
 * Example using Parakeet TDT v2:
 *
 * @code
 * OfflineRecognizerConfig config;
 * config.model_config.transducer.encoder =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx";
 * config.model_config.transducer.decoder =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx";
 * config.model_config.transducer.joiner =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx";
 * config.model_config.tokens =
 *     "./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt";
 * config.model_config.model_type = "nemo_transducer";
 * config.model_config.num_threads = 1;
 * @endcode
 */
struct OfflineRecognizerConfig {
  /** Feature extraction configuration. */
  FeatureConfig feat_config;
  /** Acoustic model configuration. */
  OfflineModelConfig model_config;
  /** Optional LM configuration. */
  OfflineLMConfig lm_config;

  /** Decoding method such as `"greedy_search"` or `"modified_beam_search"`. */
  std::string decoding_method = "greedy_search";
  /** Maximum number of active paths for beam-search-style decoding. */
  int32_t max_active_paths = 4;

  /** Optional hotword file. */
  std::string hotwords_file;

  /** Hotword boost score. */
  float hotwords_score = 1.5;
  /** Optional ITN rule FST archive. */
  std::string rule_fsts;
  /** Optional ITN rule FAR archive. */
  std::string rule_fars;
  /** Optional blank penalty applied during decoding. */
  float blank_penalty = 0;
  /** Optional homophone replacement configuration. */
  HomophoneReplacerConfig hr;
};

/** @brief Offline ASR result copied into C++ containers. */
struct OfflineRecognizerResult {
  /** Decoded text. */
  std::string text;
  /** Per-token timestamps in seconds when available. */
  std::vector<float> timestamps;
  /** Token sequence. */
  std::vector<std::string> tokens;
  /** JSON representation of the result. */
  std::string json;
  /** Detected language when provided by the model. */
  std::string lang;
  /** Detected emotion when provided by the model. */
  std::string emotion;
  /** Detected event when provided by the model. */
  std::string event;

  /** Non-empty only for TDT-style models. */
  std::vector<float> durations;
};

/** @brief RAII wrapper for an offline decoding stream. */
class SHERPA_ONNX_API OfflineStream
    : public MoveOnly<OfflineStream, SherpaOnnxOfflineStream> {
 public:
  /** @brief Wrap an existing C offline stream handle. */
  explicit OfflineStream(const SherpaOnnxOfflineStream *p);

  /** @brief Provide the complete waveform for offline decoding. */
  void AcceptWaveform(int32_t sample_rate, const float *samples,
                      int32_t n) const;

  /** @brief Set a per-stream string option. */
  void SetOption(const char *key, const char *value) const;
  /** @brief Get a per-stream string option. */
  const char *GetOption(const char *key) const;
  /** @brief Check whether a per-stream option exists. */
  int32_t HasOption(const char *key) const;

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOfflineStream *p) const;
};

/**
 * @brief RAII wrapper for an offline recognizer.
 *
 * For most offline models, call `AcceptWaveform()` once per stream, then call
 * `Decode()` and `GetResult()`.
 */
class SHERPA_ONNX_API OfflineRecognizer
    : public MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer> {
 public:
  /** @brief Create an offline recognizer from a config struct. */
  static OfflineRecognizer Create(const OfflineRecognizerConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOfflineRecognizer *p) const;

  /** @brief Create a stream using the recognizer's configured hotwords. */
  OfflineStream CreateStream() const;

  /** @brief Create a stream with inline hotwords. */
  OfflineStream CreateStream(const std::string &hotwords) const;

  /** @brief Decode one offline stream. */
  void Decode(const OfflineStream *s) const;

  /** @brief Decode multiple offline streams in parallel. */
  void Decode(const OfflineStream *ss, int32_t n) const;

  /** @brief Return the copied recognition result for one stream. */
  OfflineRecognizerResult GetResult(const OfflineStream *s) const;

  /**
   * @brief Convenience wrapper that returns the result inside a shared pointer.
   *
   * This helper exists mainly for integration environments that prefer owning
   * pointers, such as Unreal Engine.
   */
  std::shared_ptr<OfflineRecognizerResult> GetResultPtr(
      const OfflineStream *s) const;

  /** @brief Update recognizer runtime configuration after creation. */
  void SetConfig(const OfflineRecognizerConfig &config) const;

 private:
  explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p);
};

// ============================================================================
// Non-streaming TTS
// ============================================================================
/** @brief VITS model configuration. */
struct OfflineTtsVitsModelConfig {
  /** Acoustic model file. */
  std::string model;
  /** Lexicon file. */
  std::string lexicon;
  /** Token file. */
  std::string tokens;
  /** Data directory such as `espeak-ng-data`. */
  std::string data_dir;
  /** Reserved field. Currently unused by the wrapper. */
  std::string dict_dir;

  /** VITS noise scale. */
  float noise_scale = 0.667;
  /** VITS noise scale for duration prediction. */
  float noise_scale_w = 0.8;
  /** Length scale. Values < 1 are faster; values > 1 are slower. */
  float length_scale = 1.0;
};

/** @brief Matcha model configuration. */
struct OfflineTtsMatchaModelConfig {
  /** Acoustic model file. */
  std::string acoustic_model;
  /** Vocoder model file. */
  std::string vocoder;
  /** Lexicon file. */
  std::string lexicon;
  /** Token file. */
  std::string tokens;
  /** Data directory such as `espeak-ng-data`. */
  std::string data_dir;
  /** Reserved field. Currently unused by the wrapper. */
  std::string dict_dir;

  /** Matcha noise scale. */
  float noise_scale = 0.667;
  /** Length scale. Values < 1 are faster; values > 1 are slower. */
  float length_scale = 1.0;
};

/** @brief Kokoro model configuration. */
struct OfflineTtsKokoroModelConfig {
  /** Acoustic model file. */
  std::string model;
  /** Voices file. */
  std::string voices;
  /** Token file. */
  std::string tokens;
  /** Data directory such as `espeak-ng-data`. */
  std::string data_dir;
  /** Reserved field. Currently unused by the wrapper. */
  std::string dict_dir;
  /** Optional lexicon file. */
  std::string lexicon;
  /** Language/voice family hint. */
  std::string lang;

  /** Length scale. Values < 1 are faster; values > 1 are slower. */
  float length_scale = 1.0;
};

/** @brief Kitten model configuration. */
struct OfflineTtsKittenModelConfig {
  /** Acoustic model file. */
  std::string model;
  /** Voices file. */
  std::string voices;
  /** Token file. */
  std::string tokens;
  /** Data directory. */
  std::string data_dir;

  /** Length scale. Values < 1 are faster; values > 1 are slower. */
  float length_scale = 1.0;
};

/** @brief ZipVoice model configuration. */
struct OfflineTtsZipvoiceModelConfig {
  /** Token file. */
  std::string tokens;
  /** Encoder model file. */
  std::string encoder;
  /** Decoder model file. */
  std::string decoder;
  /** Vocoder model file. */
  std::string vocoder;
  /** Data directory. */
  std::string data_dir;
  /** Lexicon file. */
  std::string lexicon;

  /** Feature scale. */
  float feat_scale = 0.1;
  /** Time shift. */
  float t_shift = 0.5;
  /** Target RMS. */
  float target_rms = 0.1;
  /** Guidance scale. */
  float guidance_scale = 1.0;
};

/** @brief Pocket TTS model configuration. */
struct OfflineTtsPocketModelConfig {
  /** Flow model file. */
  std::string lm_flow;
  /** Main language model file. */
  std::string lm_main;
  /** Encoder model file. */
  std::string encoder;
  /** Decoder model file. */
  std::string decoder;
  /** Text conditioner model file. */
  std::string text_conditioner;

  /** Vocabulary JSON file. */
  std::string vocab_json;
  /** Token scores JSON file. */
  std::string token_scores_json;
  /** Voice embedding cache size. */
  int32_t voice_embedding_cache_capacity = 50;
};

/** @brief Supertonic model configuration. */
struct OfflineTtsSupertonicModelConfig {
  /** Duration predictor model file. */
  std::string duration_predictor;
  /** Text encoder model file. */
  std::string text_encoder;
  /** Vector estimator model file. */
  std::string vector_estimator;
  /** Vocoder model file. */
  std::string vocoder;
  /** Model metadata JSON. */
  std::string tts_json;
  /** Unicode indexer resource. */
  std::string unicode_indexer;
  /** Voice style resource. */
  std::string voice_style;
};

/**
 * @brief Model configuration for offline TTS.
 *
 * Configure exactly one model family. If multiple model families are set, one
 * is chosen and the choice is implementation-defined.
 */
struct OfflineTtsModelConfig {
  /** VITS configuration. */
  OfflineTtsVitsModelConfig vits;
  /** Matcha configuration. */
  OfflineTtsMatchaModelConfig matcha;
  /** Kokoro configuration. */
  OfflineTtsKokoroModelConfig kokoro;
  /** Kitten configuration. */
  OfflineTtsKittenModelConfig kitten;
  /** ZipVoice configuration. */
  OfflineTtsZipvoiceModelConfig zipvoice;
  /** Pocket configuration. */
  OfflineTtsPocketModelConfig pocket;
  /** Supertonic configuration. */
  OfflineTtsSupertonicModelConfig supertonic;

  /** Number of inference threads. */
  int32_t num_threads = 1;
  /** Enable verbose debug logging. */
  bool debug = false;
  /** Execution provider such as `"cpu"`. */
  std::string provider = "cpu";
};

/** @brief Generation-time options for advanced TTS synthesis. */
struct GenerationConfig {
  /** Silence scale between sentences. */
  float silence_scale = 0.2;
  /** Speech speed. Used only by some models. */
  float speed = 1.0;
  /** Speaker ID for multi-speaker models. */
  int32_t sid = 0;
  /** Reference audio samples for zero-shot or voice-cloning models. */
  std::vector<float> reference_audio;
  /** Sample rate of `reference_audio`. */
  int32_t reference_sample_rate = 0;
  /** Optional reference text. Not all models require it. */
  std::string reference_text;
  /** Number of flow-matching steps when supported. */
  int32_t num_steps = 5;

  /** Model-specific extra attributes serialized to JSON internally. */
  std::unordered_map<std::string, std::string> extra;
};

/** @brief Configuration for offline TTS. */
struct OfflineTtsConfig {
  /** Model configuration. */
  OfflineTtsModelConfig model;
  /** Optional ITN rule FST archive. */
  std::string rule_fsts;
  /** Optional ITN rule FAR archive. */
  std::string rule_fars;
  /** Sentence chunking limit for generation. */
  int32_t max_num_sentences = 1;
  /** Silence scale between generated sentences. */
  float silence_scale = 0.2;
};

/** @brief Generated audio returned by the C++ TTS wrapper. */
struct GeneratedAudio {
  /** Output samples normalized to `[-1, 1]`. */
  std::vector<float> samples;
  /** Output sample rate in Hz. */
  int32_t sample_rate = 0;
};

/**
 * @brief TTS progress callback.
 *
 * Return 1 to continue generating and 0 to stop early.
 */
using OfflineTtsCallback = int32_t (*)(const float *samples,
                                       int32_t num_samples, float progress,
                                       void *arg);

/**
 * @brief RAII wrapper for offline TTS.
 *
 * Example using Pocket TTS:
 *
 * @code
 * OfflineTtsConfig config;
 * config.model.pocket.lm_flow =
 *     "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx";
 * config.model.pocket.lm_main =
 *     "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx";
 * config.model.pocket.encoder =
 *     "./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx";
 * config.model.pocket.decoder =
 *     "./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx";
 * config.model.pocket.text_conditioner =
 *     "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx";
 * config.model.pocket.vocab_json =
 *     "./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json";
 * config.model.pocket.token_scores_json =
 *     "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json";
 * @endcode
 */
class SHERPA_ONNX_API OfflineTts
    : public MoveOnly<OfflineTts, SherpaOnnxOfflineTts> {
 public:
  /** @brief Create an offline TTS engine. */
  static OfflineTts Create(const OfflineTtsConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOfflineTts *p) const;

  /** @brief Return the output sample rate of generated audio. */
  int32_t SampleRate() const;

  /** @brief Return the number of supported speakers. */
  int32_t NumSpeakers() const;

  /**
   * @brief Generate speech using the simple speaker-id and speed interface.
   *
   * This overload mirrors the legacy/simple TTS API. Prefer the
   * `GenerationConfig` overload for new code.
   */
  GeneratedAudio Generate(const std::string &text, int32_t sid = 0,
                          float speed = 1.0,
                          OfflineTtsCallback callback = nullptr,
                          void *arg = nullptr) const;

  /** @brief Generate speech using the advanced generation configuration. */
  GeneratedAudio Generate(const std::string &text,
                          const GenerationConfig &config,
                          OfflineTtsCallback callback = nullptr,
                          void *arg = nullptr) const;

  /** @brief Like Generate(), but returns a shared pointer to the result. */
  std::shared_ptr<GeneratedAudio> Generate2(
      const std::string &text, int32_t sid = 0, float speed = 1.0,
      OfflineTtsCallback callback = nullptr, void *arg = nullptr) const;

  /** @brief Like the advanced Generate() overload, but returns a shared
   * pointer. */
  std::shared_ptr<GeneratedAudio> Generate2(
      const std::string &text, const GenerationConfig &config,
      OfflineTtsCallback callback = nullptr, void *arg = nullptr) const;

 private:
  explicit OfflineTts(const SherpaOnnxOfflineTts *p);
};

// ============================================================
// For Keyword Spotter
// ============================================================

/** @brief Current keyword spotting result copied into C++ containers. */
struct KeywordResult {
  /** Triggered keyword text. */
  std::string keyword;
  /** Decoded token sequence. */
  std::vector<std::string> tokens;
  /** Per-token timestamps in seconds. */
  std::vector<float> timestamps;
  /** Segment start time in seconds. */
  float start_time = 0.0f;
  /** JSON representation of the result. */
  std::string json;
};

/** @brief Configuration for the C++ keyword spotting wrapper. */
struct KeywordSpotterConfig {
  /** Feature extraction configuration. */
  FeatureConfig feat_config;
  /** Streaming acoustic model configuration. */
  OnlineModelConfig model_config;
  /** Maximum number of active paths. */
  int32_t max_active_paths = 4;
  /** Number of trailing blanks required before finalizing a trigger. */
  int32_t num_trailing_blanks = 1;
  /** Keyword score bonus. */
  float keywords_score = 1.0f;
  /** Detection threshold. */
  float keywords_threshold = 0.25f;
  /** Keyword file. */
  std::string keywords_file;
  /** In-memory keyword definitions. */
  std::string keywords_buf;
};

/** @brief RAII wrapper for keyword spotting. */
class SHERPA_ONNX_API KeywordSpotter
    : public MoveOnly<KeywordSpotter, SherpaOnnxKeywordSpotter> {
 public:
  /** @brief Create a keyword spotter from a config struct. */
  static KeywordSpotter Create(const KeywordSpotterConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxKeywordSpotter *p) const;

  /** @brief Create a keyword stream using configured keywords. */
  OnlineStream CreateStream() const;

  /** @brief Create a keyword stream with inline extra or replacement keywords.
   */
  OnlineStream CreateStream(const std::string &keywords) const;

  /** @brief Check whether the stream has enough data to decode. */
  bool IsReady(const OnlineStream *s) const;

  /** @brief Decode one ready stream. */
  void Decode(const OnlineStream *s) const;

  /** @brief Decode multiple ready streams in parallel. */
  void Decode(const OnlineStream *ss, int32_t n) const;

  /** @brief Reset a stream after a keyword trigger. */
  void Reset(const OnlineStream *s) const;

  /** @brief Return the copied keyword spotting result for a stream. */
  KeywordResult GetResult(const OnlineStream *s) const;

 private:
  explicit KeywordSpotter(const SherpaOnnxKeywordSpotter *p);
};

/** @brief GTCRN speech denoiser model configuration. */
struct OfflineSpeechDenoiserGtcrnModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/** @brief DPDFNet speech denoiser model configuration. */
struct OfflineSpeechDenoiserDpdfNetModelConfig {
  /** Model ONNX file. */
  std::string model;
};

/**
 * @brief Speech denoiser model configuration.
 *
 * Configure exactly one model family. If multiple model families are set, one
 * is chosen and the choice is implementation-defined.
 */
struct OfflineSpeechDenoiserModelConfig {
  /** GTCRN configuration. */
  OfflineSpeechDenoiserGtcrnModelConfig gtcrn;
  /** DPDFNet configuration. */
  OfflineSpeechDenoiserDpdfNetModelConfig dpdfnet;
  /** Number of inference threads. */
  int32_t num_threads = 1;
  /** Enable verbose debug logging. */
  bool debug = false;
  /** Execution provider such as `"cpu"`. */
  std::string provider = "cpu";
};

/** @brief Configuration for offline speech denoising. */
struct OfflineSpeechDenoiserConfig {
  /** Model configuration. */
  OfflineSpeechDenoiserModelConfig model;
};

/** @brief Denoised waveform returned by speech enhancement wrappers. */
struct DenoisedAudio {
  /** Output samples normalized to `[-1, 1]`. */
  std::vector<float> samples;
  /** Output sample rate in Hz. */
  int32_t sample_rate = 0;
};

/** @brief RAII wrapper for offline speech denoising. */
class SHERPA_ONNX_API OfflineSpeechDenoiser
    : public MoveOnly<OfflineSpeechDenoiser, SherpaOnnxOfflineSpeechDenoiser> {
 public:
  /** @brief Create an offline speech denoiser. */
  static OfflineSpeechDenoiser Create(
      const OfflineSpeechDenoiserConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOfflineSpeechDenoiser *p) const;

  /** @brief Run denoising on a complete waveform. */
  DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const;

  /** @brief Return the expected input sample rate. */
  int32_t GetSampleRate() const;

 private:
  explicit OfflineSpeechDenoiser(const SherpaOnnxOfflineSpeechDenoiser *p);
};

/** @brief Configuration for online speech denoising. */
struct OnlineSpeechDenoiserConfig {
  /** Model configuration. */
  OfflineSpeechDenoiserModelConfig model;
};

/** @brief RAII wrapper for online speech denoising. */
class SHERPA_ONNX_API OnlineSpeechDenoiser
    : public MoveOnly<OnlineSpeechDenoiser, SherpaOnnxOnlineSpeechDenoiser> {
 public:
  /** @brief Create an online speech denoiser. */
  static OnlineSpeechDenoiser Create(const OnlineSpeechDenoiserConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOnlineSpeechDenoiser *p) const;

  /** @brief Process one chunk of streaming audio. */
  DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const;

  /** @brief Flush buffered audio and reset the denoiser. */
  DenoisedAudio Flush() const;

  /** @brief Reset the denoiser for a new stream. */
  void Reset() const;

  /** @brief Return the expected input sample rate. */
  int32_t GetSampleRate() const;

  /** @brief Return the recommended frame shift in samples for streaming input.
   */
  int32_t GetFrameShiftInSamples() const;

 private:
  explicit OnlineSpeechDenoiser(const SherpaOnnxOnlineSpeechDenoiser *p);
};

// ==============================
// VAD
// ==============================

/** @brief Silero VAD model configuration. */
struct SileroVadModelConfig {
  /** Model ONNX file. */
  std::string model;
  /** Detection threshold. */
  float threshold = 0.5;
  /** Minimum silence duration in seconds. */
  float min_silence_duration = 0.5;
  /** Minimum speech duration in seconds. */
  float min_speech_duration = 0.25;
  /** Window size in samples. */
  int32_t window_size = 512;
  /** Maximum speech duration in seconds before forced split. */
  float max_speech_duration = 20;
};

/** @brief Ten VAD model configuration. */
struct TenVadModelConfig {
  /** Model ONNX file. */
  std::string model;
  /** Detection threshold. */
  float threshold = 0.5;
  /** Minimum silence duration in seconds. */
  float min_silence_duration = 0.5;
  /** Minimum speech duration in seconds. */
  float min_speech_duration = 0.25;
  /** Window size in samples. */
  int32_t window_size = 256;
  /** Maximum speech duration in seconds before forced split. */
  float max_speech_duration = 20;
};

/**
 * @brief VAD model configuration.
 *
 * Configure exactly one model family. If multiple model families are set, one
 * is chosen and the choice is implementation-defined.
 */
struct VadModelConfig {
  /** Silero VAD configuration. */
  SileroVadModelConfig silero_vad;
  /** Ten VAD configuration. */
  TenVadModelConfig ten_vad;

  /** Input sample rate in Hz. */
  int32_t sample_rate = 16000;
  /** Number of inference threads. */
  int32_t num_threads = 1;
  /** Execution provider such as `"cpu"`. */
  std::string provider = "cpu";
  /** Enable verbose debug logging. */
  bool debug = false;
};

/** @brief One speech segment produced by the VAD wrapper. */
struct SpeechSegment {
  /** Start sample index relative to the processed audio timeline. */
  int32_t start = 0;
  /** Speech samples for the segment. */
  std::vector<float> samples;
};

/** @brief RAII wrapper for the circular buffer helper used by VAD. */
class SHERPA_ONNX_API CircularBuffer
    : public MoveOnly<CircularBuffer, SherpaOnnxCircularBuffer> {
 public:
  /** @brief Create a circular buffer with the given capacity in samples. */
  static CircularBuffer Create(int32_t capacity);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxCircularBuffer *p) const;

  /** @brief Append samples to the buffer. */
  void Push(const float *p, int32_t n) const;

  /** @brief Copy a contiguous span from the buffer. */
  std::vector<float> Get(int32_t start_index, int32_t n) const;

  /** @brief Remove samples from the head of the buffer. */
  void Pop(int32_t n) const;

  /** @brief Return the number of stored samples. */
  int32_t Size() const;

  /** @brief Return the current head index. */
  int32_t Head() const;

  /** @brief Reset the buffer to empty. */
  void Reset() const;

 private:
  explicit CircularBuffer(const SherpaOnnxCircularBuffer *p);
};

/**
 * @brief RAII wrapper for voice activity detection.
 *
 * The wrapper collects detected speech segments internally. Use `IsEmpty()`,
 * `Front()`, and `Pop()` to consume them.
 */
class SHERPA_ONNX_API VoiceActivityDetector
    : public MoveOnly<VoiceActivityDetector, SherpaOnnxVoiceActivityDetector> {
 public:
  /** @brief Create a VAD instance. */
  static VoiceActivityDetector Create(const VadModelConfig &config,
                                      float buffer_size_in_seconds);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxVoiceActivityDetector *p) const;

  /** @brief Feed more audio samples to the detector. */
  void AcceptWaveform(const float *samples, int32_t n) const;

  /** @brief Check whether no speech segments are currently queued. */
  bool IsEmpty() const;

  /** @brief Check whether speech is currently detected. */
  bool IsDetected() const;

  /** @brief Remove the front queued speech segment. */
  void Pop() const;

  /** @brief Remove all queued speech segments. */
  void Clear() const;

  /** @brief Return the front queued speech segment. */
  SpeechSegment Front() const;

  /** @brief Like Front(), but returns the segment in a shared pointer. */
  std::shared_ptr<SpeechSegment> FrontPtr() const;

  /** @brief Reset the detector state. */
  void Reset() const;

  /** @brief Flush buffered context at end of input. */
  void Flush() const;

 private:
  explicit VoiceActivityDetector(const SherpaOnnxVoiceActivityDetector *p);
};

/** @brief RAII wrapper for linear resampling. */
class SHERPA_ONNX_API LinearResampler
    : public MoveOnly<LinearResampler, SherpaOnnxLinearResampler> {
 public:
  /** @brief Construct an empty wrapper. */
  LinearResampler() = default;
  /** @brief Create a linear resampler. */
  static LinearResampler Create(int32_t samp_rate_in_hz,
                                int32_t samp_rate_out_hz,
                                float filter_cutoff_hz, int32_t num_zeros);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxLinearResampler *p) const;

  /** @brief Reset the resampler state. */
  void Reset() const;

  /** @brief Resample one chunk of input audio. */
  std::vector<float> Resample(const float *input, int32_t input_dim,
                              bool flush) const;

  /** @brief Return the input sample rate in Hz. */
  int32_t GetInputSamplingRate() const;
  /** @brief Return the output sample rate in Hz. */
  int32_t GetOutputSamplingRate() const;

 private:
  explicit LinearResampler(const SherpaOnnxLinearResampler *p);
};

/** @brief Return the sherpa-onnx version string as a C++ string. */
SHERPA_ONNX_API std::string GetVersionStr();
/** @brief Return the build Git SHA1 as a C++ string. */
SHERPA_ONNX_API std::string GetGitSha1();
/** @brief Return the build Git date as a C++ string. */
SHERPA_ONNX_API std::string GetGitDate();
/** @brief Return `true` if a file exists. */
SHERPA_ONNX_API bool FileExists(const std::string &filename);

// ============================================================================
// Offline Punctuation
// ============================================================================
/** @brief Offline punctuation model configuration. */
struct OfflinePunctuationModelConfig {
  /** Model file. */
  std::string ct_transformer;
  /** Number of inference threads. */
  int32_t num_threads = 1;
  /** Enable verbose debug logging. */
  bool debug = false;
  /** Execution provider such as `"cpu"`. */
  std::string provider = "cpu";
};

/** @brief Configuration for offline punctuation. */
struct OfflinePunctuationConfig {
  /** Model configuration. */
  OfflinePunctuationModelConfig model;
};

/** @brief RAII wrapper for offline punctuation restoration. */
class SHERPA_ONNX_API OfflinePunctuation
    : public MoveOnly<OfflinePunctuation, SherpaOnnxOfflinePunctuation> {
 public:
  /** @brief Create an offline punctuation model. */
  static OfflinePunctuation Create(const OfflinePunctuationConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOfflinePunctuation *p) const;

  /** @brief Add punctuation to a complete input text. */
  std::string AddPunctuation(const std::string &text) const;

 private:
  explicit OfflinePunctuation(const SherpaOnnxOfflinePunctuation *p);
};

// ============================================================================
// Online Punctuation
// ============================================================================
/** @brief Online punctuation model configuration. */
struct OnlinePunctuationModelConfig {
  /** Model file. */
  std::string cnn_bilstm;
  /** BPE vocabulary file. */
  std::string bpe_vocab;
  /** Number of inference threads. */
  int32_t num_threads = 1;
  /** Enable verbose debug logging. */
  bool debug = false;
  /** Execution provider such as `"cpu"`. */
  std::string provider = "cpu";
};

/** @brief Configuration for online punctuation. */
struct OnlinePunctuationConfig {
  /** Model configuration. */
  OnlinePunctuationModelConfig model;
};

/** @brief RAII wrapper for online punctuation restoration. */
class SHERPA_ONNX_API OnlinePunctuation
    : public MoveOnly<OnlinePunctuation, SherpaOnnxOnlinePunctuation> {
 public:
  /** @brief Create an online punctuation model. */
  static OnlinePunctuation Create(const OnlinePunctuationConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxOnlinePunctuation *p) const;

  /** @brief Add punctuation to one input text chunk. */
  std::string AddPunctuation(const std::string &text) const;

 private:
  explicit OnlinePunctuation(const SherpaOnnxOnlinePunctuation *p);
};

// ============================================================================
// Audio tagging
// ============================================================================
/** @brief Zipformer audio-tagging model configuration. */
struct OfflineZipformerAudioTaggingModelConfig {
  /** Model file. */
  std::string model;
};

/**
 * @brief Audio-tagging model configuration.
 *
 * Configure exactly one model family. If multiple model families are set, one
 * is chosen and the choice is implementation-defined.
 */
struct AudioTaggingModelConfig {
  /** Zipformer model configuration. */
  OfflineZipformerAudioTaggingModelConfig zipformer;
  /** Alternative CED model file. */
  std::string ced;
  /** Number of inference threads. */
  int32_t num_threads = 1;
  /** Enable verbose debug logging. */
  bool debug = false;
  /** Execution provider such as `"cpu"`. */
  std::string provider = "cpu";
};

/** @brief Configuration for audio tagging. */
struct AudioTaggingConfig {
  /** Model configuration. */
  AudioTaggingModelConfig model;
  /** CSV file containing label names. */
  std::string labels;
  /** Default number of results to return. */
  int32_t top_k = 5;
};

/** @brief One audio-tagging event returned by the C++ wrapper. */
struct AudioEvent {
  /** Event label. */
  std::string name;
  /** Class index. */
  int32_t index;
  /** Probability or confidence score. */
  float prob;
};

/** @brief RAII wrapper for audio tagging. */
class SHERPA_ONNX_API AudioTagging
    : public MoveOnly<AudioTagging, SherpaOnnxAudioTagging> {
 public:
  /** @brief Create an audio tagger. */
  static AudioTagging Create(const AudioTaggingConfig &config);

  /** @brief Destroy the wrapped C handle. */
  void Destroy(const SherpaOnnxAudioTagging *p) const;

  /** @brief Create an offline stream for tagging. */
  OfflineStream CreateStream() const;
  /**
   * @brief Run audio tagging and return copied results.
   *
   * When `top_k == -1`, the wrapper uses `config.top_k`. When `top_k > 0`,
   * that argument overrides the configured default.
   */
  std::vector<AudioEvent> Compute(const OfflineStream *s, int32_t top_k = -1);

  /** @brief Like Compute(), but returns the result vector in a shared pointer.
   */
  std::shared_ptr<std::vector<AudioEvent>> ComputePtr(const OfflineStream *s,
                                                      int32_t top_k = -1);

 private:
  explicit AudioTagging(const SherpaOnnxAudioTagging *p);
};

}  // namespace sherpa_onnx::cxx

#endif  // SHERPA_ONNX_C_API_CXX_API_H_


================================================
FILE: sherpa-onnx/c-api/generate.sh
================================================
#!/usr/bin/env bash
set -ex

nm -g ../../build/lib/libsherpa-onnx-c-api.dylib | awk '$2=="T" && $3 ~ /^_Sherpa/ {print $3}' | sort  > ./sherpa-onnx-symbols-c.exp


================================================
FILE: sherpa-onnx/c-api/mainpage.md
================================================
# sherpa-onnx public API documentation

This documentation covers the public native APIs shipped in:

- `c-api.h` — the C API
- `cxx-api.h` — the C++ wrapper built on top of the C API

These headers expose the main sherpa-onnx inference features for native
applications and for language bindings that need a stable ABI.

## What is documented here

The generated docs include the public APIs for:

- streaming ASR
- non-streaming ASR
- keyword spotting
- voice activity detection
- offline text-to-speech
- spoken language identification
- speaker embedding extraction and speaker management
- audio tagging
- offline and online punctuation
- linear resampling
- offline speaker diarization
- offline and online speech enhancement

The C API also includes HarmonyOS-specific constructor variants where
applicable.

## Which header should I use?

Use `c-api.h` if you are:

- writing C code
- building FFI bindings for other languages
- integrating through a plain C ABI

Use `cxx-api.h` if you are:

- writing C++ code directly
- preferring RAII wrappers over manual destroy/free calls
- preferring `std::string`, `std::vector`, and move-only wrapper classes

## Common ownership rules

For the C API:

- objects created by `SherpaOnnxCreate*()` are usually destroyed with a
  matching `SherpaOnnxDestroy*()`
- result snapshots, returned strings, and returned arrays must be released with
  the specific matching free/destroy function documented on each API
- some helpers return pointers to statically owned strings; those must not be
  freed

For the C++ API:

- wrapper classes are move-only and use RAII
- copied result objects are returned as standard C++ value types
- callers normally do not need to manage the underlying C pointers directly

## Typical workflow

For both APIs, the usual flow is:

1. create and fill a config object
2. create the engine or recognizer
3. create a stream if the feature is stream-based
4. feed audio or text
5. run decode/compute/generate
6. read back results
7. destroy resources, or let the C++ wrappers clean them up automatically

## Recommended entry points

Start with:

- [`c-api.h`](https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/c-api/c-api.h)
  for the plain C API
- [`cxx-api.h`](https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/c-api/cxx-api.h)
  for the C++ wrapper

Representative example programs live in:

- [`c-api-examples/`](https://github.com/k2-fsa/sherpa-onnx/tree/master/c-api-examples)
- [`cxx-api-examples/`](https://github.com/k2-fsa/sherpa-onnx/tree/master/cxx-api-examples)

Useful examples include:

- [`decode-file-c-api.c`](https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c)
- [`whisper-c-api.c`](https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/whisper-c-api.c)
- [`sense-voice-c-api.c`](https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/sense-voice-c-api.c)
- [`nemo-parakeet-c-api.c`](https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/nemo-parakeet-c-api.c)
- [`streaming-zipformer-with-hr-cxx-api.cc`](https://github.com/k2-fsa/sherpa-onnx/blob/master/cxx-api-examples/streaming-zipformer-with-hr-cxx-api.cc)
- [`sense-voice-cxx-api.cc`](https://github.com/k2-fsa/sherpa-onnx/blob/master/cxx-api-examples/sense-voice-cxx-api.cc)
- [`pocket-tts-en-cxx-api.cc`](https://github.com/k2-fsa/sherpa-onnx/blob/master/cxx-api-examples/pocket-tts-en-cxx-api.cc)
- [`vad-cxx-api.cc`](https://github.com/k2-fsa/sherpa-onnx/blob/master/cxx-api-examples/vad-cxx-api.cc)

## Generating the documentation

From `sherpa-onnx/c-api/`, run:

```bash
doxygen Doxyfile
```

HTML output is written to:

```text
doxygen-docs/html/
```


================================================
FILE: sherpa-onnx/c-api/sherpa-onnx-symbols-c.exp
================================================
_SherpaOfflinePunctuationAddPunct
_SherpaOfflinePunctuationFreeText
_SherpaOnnxAcceptWaveformOffline
_SherpaOnnxAudioTaggingCompute
_SherpaOnnxAudioTaggingCreateOfflineStream
_SherpaOnnxAudioTaggingFreeResults
_SherpaOnnxCircularBufferFree
_SherpaOnnxCircularBufferGet
_SherpaOnnxCircularBufferHead
_SherpaOnnxCircularBufferPop
_SherpaOnnxCircularBufferPush
_SherpaOnnxCircularBufferReset
_SherpaOnnxCircularBufferSize
_SherpaOnnxCreateAudioTagging
_SherpaOnnxCreateCircularBuffer
_SherpaOnnxCreateDisplay
_SherpaOnnxCreateKeywordSpotter
_SherpaOnnxCreateKeywordStream
_SherpaOnnxCreateKeywordStreamWithKeywords
_SherpaOnnxCreateLinearResampler
_SherpaOnnxCreateOfflinePunctuation
_SherpaOnnxCreateOfflineRecognizer
_SherpaOnnxCreateOfflineSpeakerDiarization
_SherpaOnnxCreateOfflineSpeechDenoiser
_SherpaOnnxCreateOfflineStream
_SherpaOnnxCreateOfflineStreamWithHotwords
_SherpaOnnxCreateOfflineTts
_SherpaOnnxCreateOnlineSpeechDenoiser
_SherpaOnnxCreateOnlinePunctuation
_SherpaOnnxCreateOnlineRecognizer
_SherpaOnnxCreateOnlineStream
_SherpaOnnxCreateOnlineStreamWithHotwords
_SherpaOnnxCreateSpeakerEmbeddingExtractor
_SherpaOnnxCreateSpeakerEmbeddingManager
_SherpaOnnxCreateSpokenLanguageIdentification
_SherpaOnnxCreateVoiceActivityDetector
_SherpaOnnxDecodeKeywordStream
_SherpaOnnxDecodeMultipleKeywordStreams
_SherpaOnnxDecodeMultipleOfflineStreams
_SherpaOnnxDecodeMultipleOnlineStreams
_SherpaOnnxDecodeOfflineStream
_SherpaOnnxDecodeOnlineStream
_SherpaOnnxDestroyAudioTagging
_SherpaOnnxDestroyCircularBuffer
_SherpaOnnxDestroyDenoisedAudio
_SherpaOnnxDestroyDisplay
_SherpaOnnxDestroyKeywordResult
_SherpaOnnxDestroyKeywordSpotter
_SherpaOnnxDestroyLinearResampler
_SherpaOnnxDestroyOfflinePunctuation
_SherpaOnnxDestroyOfflineRecognizer
_SherpaOnnxDestroyOfflineRecognizerResult
_SherpaOnnxDestroyOfflineSpeakerDiarization
_SherpaOnnxDestroyOfflineSpeechDenoiser
_SherpaOnnxDestroyOfflineStream
_SherpaOnnxDestroyOfflineStreamResultJson
_SherpaOnnxDestroyOfflineTts
_SherpaOnnxDestroyOfflineTtsGeneratedAudio
_SherpaOnnxDestroyOnlineSpeechDenoiser
_SherpaOnnxDestroyOnlinePunctuation
_SherpaOnnxDestroyOnlineRecognizer
_SherpaOnnxDestroyOnlineRecognizerResult
_SherpaOnnxDestroyOnlineStream
_SherpaOnnxDestroyOnlineStreamResultJson
_SherpaOnnxDestroySpeakerEmbeddingExtractor
_SherpaOnnxDestroySpeakerEmbeddingManager
_SherpaOnnxDestroySpeechSegment
_SherpaOnnxDestroySpokenLanguageIdentification
_SherpaOnnxDestroySpokenLanguageIdentificationResult
_SherpaOnnxDestroyVoiceActivityDetector
_SherpaOnnxFileExists
_SherpaOnnxFreeKeywordResultJson
_SherpaOnnxFreeWave
_SherpaOnnxGetGitDate
_SherpaOnnxGetGitSha1
_SherpaOnnxGetKeywordResult
_SherpaOnnxGetKeywordResultAsJson
_SherpaOnnxGetOfflineStreamResult
_SherpaOnnxGetOfflineStreamResultAsJson
_SherpaOnnxGetOnlineStreamResult
_SherpaOnnxGetOnlineStreamResultAsJson
_SherpaOnnxGetVersionStr
_SherpaOnnxIsKeywordStreamReady
_SherpaOnnxIsOnlineStreamReady
_SherpaOnnxLinearResamplerResample
_SherpaOnnxLinearResamplerResampleFree
_SherpaOnnxLinearResamplerResampleGetInputSampleRate
_SherpaOnnxLinearResamplerResampleGetOutputSampleRate
_SherpaOnnxLinearResamplerReset
_SherpaOnnxOfflineRecognizerSetConfig
_SherpaOnnxOfflineStreamGetOption
_SherpaOnnxOfflineStreamHasOption
_SherpaOnnxOfflineStreamSetOption
_SherpaOnnxOfflineSpeakerDiarizationDestroyResult
_SherpaOnnxOfflineSpeakerDiarizationDestroySegment
_SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
_SherpaOnnxOfflineSpeakerDiarizationProcess
_SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback
_SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
_SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
_SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers
_SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
_SherpaOnnxOfflineSpeakerDiarizationSetConfig
_SherpaOnnxOfflineSpeechDenoiserGetSampleRate
_SherpaOnnxOfflineSpeechDenoiserRun
_SherpaOnnxOfflineTtsGenerate
_SherpaOnnxOfflineTtsGenerateWithCallback
_SherpaOnnxOfflineTtsGenerateWithCallbackWithArg
_SherpaOnnxOfflineTtsGenerateWithConfig
_SherpaOnnxOfflineTtsGenerateWithProgressCallback
_SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg
_SherpaOnnxOfflineTtsGenerateWithZipvoice
_SherpaOnnxOfflineTtsNumSpeakers
_SherpaOnnxOfflineTtsSampleRate
_SherpaOnnxOnlinePunctuationAddPunct
_SherpaOnnxOnlinePunctuationFreeText
_SherpaOnnxOnlineSpeechDenoiserFlush
_SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples
_SherpaOnnxOnlineSpeechDenoiserGetSampleRate
_SherpaOnnxOnlineSpeechDenoiserReset
_SherpaOnnxOnlineSpeechDenoiserRun
_SherpaOnnxOnlineStreamAcceptWaveform
_SherpaOnnxOnlineStreamGetOption
_SherpaOnnxOnlineStreamHasOption
_SherpaOnnxOnlineStreamInputFinished
_SherpaOnnxOnlineStreamSetOption
_SherpaOnnxOnlineStreamIsEndpoint
_SherpaOnnxOnlineStreamReset
_SherpaOnnxPrint
_SherpaOnnxReadWave
_SherpaOnnxReadWaveFromBinaryData
_SherpaOnnxResetKeywordStream
_SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding
_SherpaOnnxSpeakerEmbeddingExtractorCreateStream
_SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
_SherpaOnnxSpeakerEmbeddingExtractorDim
_SherpaOnnxSpeakerEmbeddingExtractorIsReady
_SherpaOnnxSpeakerEmbeddingManagerAdd
_SherpaOnnxSpeakerEmbeddingManagerAddList
_SherpaOnnxSpeakerEmbeddingManagerAddListFlattened
_SherpaOnnxSpeakerEmbeddingManagerContains
_SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers
_SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches
_SherpaOnnxSpeakerEmbeddingManagerFreeSearch
_SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers
_SherpaOnnxSpeakerEmbeddingManagerGetBestMatches
_SherpaOnnxSpeakerEmbeddingManagerNumSpeakers
_SherpaOnnxSpeakerEmbeddingManagerRemove
_SherpaOnnxSpeakerEmbeddingManagerSearch
_SherpaOnnxSpeakerEmbeddingManagerVerify
_SherpaOnnxSpokenLanguageIdentificationCompute
_SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
_SherpaOnnxVoiceActivityDetectorAcceptWaveform
_SherpaOnnxVoiceActivityDetectorClear
_SherpaOnnxVoiceActivityDetectorDetected
_SherpaOnnxVoiceActivityDetectorEmpty
_SherpaOnnxVoiceActivityDetectorFlush
_SherpaOnnxVoiceActivityDetectorFront
_SherpaOnnxVoiceActivityDetectorPop
_SherpaOnnxVoiceActivityDetectorReset
_SherpaOnnxWaveFileSize
_SherpaOnnxWriteWave
_SherpaOnnxWriteWaveToBuffer


================================================
FILE: sherpa-onnx/c-api/sherpa-onnx-symbols-c.lds
================================================
{
  global:
    SherpaOnnx*;
    # For offline punctuation.
    SherpaOffline*;
  local:
    *;
};


================================================
FILE: sherpa-onnx/csrc/.gitignore
================================================
*.cc-bak
*.h-bak


================================================
FILE: sherpa-onnx/csrc/CMakeLists.txt
================================================
include_directories(${PROJECT_SOURCE_DIR})

if(SHERPA_ONNX_ENABLE_PYTHON)
  message(STATUS "PYTHON_EXECUTABLE: ${PYTHON_EXECUTABLE}")
  execute_process(
    COMMAND "${PYTHON_EXECUTABLE}" -c "import sys; print('.'.join(sys.version.split('.')[:2]))"
    OUTPUT_STRIP_TRAILING_WHITESPACE
    OUTPUT_VARIABLE PYTHON_VERSION
  )
  message(STATUS "PYTHON_VERSION: ${PYTHON_VERSION}")
endif()

set(sources
  base64-decode.cc
  bbpe.cc
  cat.cc
  circular-buffer.cc
  context-graph.cc
  endpoint.cc
  features.cc
  file-utils.cc
  fst-utils.cc
  homophone-replacer.cc
  hypothesis.cc
  keyword-spotter-impl.cc
  keyword-spotter.cc
  lodr-fst.cc
  math.cc
  normal-data-generator.cc
  offline-canary-model-config.cc
  offline-canary-model.cc
  offline-ctc-fst-decoder-config.cc
  offline-ctc-fst-decoder.cc
  offline-ctc-greedy-search-decoder.cc
  offline-ctc-model.cc
  offline-dolphin-model-config.cc
  offline-dolphin-model.cc
  offline-fire-red-asr-ctc-model-config.cc
  offline-fire-red-asr-ctc-model.cc
  offline-fire-red-asr-greedy-search-decoder.cc
  offline-fire-red-asr-model-config.cc
  offline-fire-red-asr-model.cc
  offline-lm-config.cc
  offline-lm.cc
  offline-medasr-ctc-model-config.cc
  offline-medasr-ctc-model.cc
  offline-model-config.cc
  offline-moonshine-greedy-search-decoder.cc
  offline-moonshine-v2-greedy-search-decoder.cc
  offline-moonshine-model-config.cc
  offline-moonshine-model-v2.cc
  offline-moonshine-model.cc
  offline-nemo-enc-dec-ctc-model-config.cc
  offline-nemo-enc-dec-ctc-model.cc
  offline-omnilingual-asr-ctc-model-config.cc
  offline-omnilingual-asr-ctc-model.cc
  offline-paraformer-greedy-search-decoder.cc
  offline-paraformer-model-config.cc
  offline-paraformer-model.cc
  offline-recognizer-impl.cc
  offline-recognizer.cc
  offline-rnn-lm.cc
  offline-sense-voice-model-config.cc
  offline-sense-voice-model.cc
  offline-source-separation-impl.cc
  offline-source-separation-model-config.cc
  offline-source-separation-spleeter-model-config.cc
  offline-source-separation-spleeter-model.cc
  offline-source-separation-uvr-model-config.cc
  offline-source-separation-uvr-model.cc
  offline-source-separation.cc
  offline-stream.cc
  offline-tdnn-ctc-model.cc
  offline-tdnn-model-config.cc
  offline-telespeech-ctc-model.cc
  offline-transducer-greedy-search-decoder.cc
  offline-transducer-greedy-search-nemo-decoder.cc
  offline-transducer-model-config.cc
  offline-transducer-model.cc
  offline-transducer-modified-beam-search-decoder.cc
  offline-transducer-modified-beam-search-nemo-decoder.cc
  offline-transducer-nemo-model.cc
  offline-wenet-ctc-model-config.cc
  offline-wenet-ctc-model.cc
  offline-whisper-dtw.cc
  offline-whisper-greedy-search-decoder.cc
  offline-whisper-model-config.cc
  offline-whisper-model.cc
  offline-whisper-timestamp-rules.cc
  offline-zipformer-ctc-model-config.cc
  offline-zipformer-ctc-model.cc
  online-conformer-transducer-model.cc
  online-ctc-fst-decoder-config.cc
  online-ctc-fst-decoder.cc
  online-ctc-greedy-search-decoder.cc
  online-ctc-model.cc
  online-ebranchformer-transducer-model.cc
  online-lm-config.cc
  online-lm.cc
  online-lstm-transducer-model.cc
  online-model-config.cc
  online-nemo-ctc-model-config.cc
  online-nemo-ctc-model.cc
  online-paraformer-model-config.cc
  online-paraformer-model.cc
  online-recognizer-impl.cc
  online-recognizer.cc
  online-rnn-lm.cc
  online-stream.cc
  online-t-one-ctc-model-config.cc
  online-t-one-ctc-model.cc
  online-transducer-decoder.cc
  online-transducer-greedy-search-decoder.cc
  online-transducer-greedy-search-nemo-decoder.cc
  online-transducer-model-config.cc
  online-transducer-model.cc
  online-transducer-modified-beam-search-decoder.cc
  online-transducer-nemo-model.cc
  online-wenet-ctc-model-config.cc
  online-wenet-ctc-model.cc
  online-zipformer-transducer-model.cc
  online-zipformer2-ctc-model-config.cc
  online-zipformer2-ctc-model.cc
  online-zipformer2-transducer-model.cc
  onnx-utils.cc
  packed-sequence.cc
  pad-sequence.cc
  parse-options.cc
  phrase-matcher.cc
  provider-config.cc
  provider.cc
  resample.cc
  session.cc
  silero-vad-model-config.cc
  silero-vad-model.cc
  slice.cc
  spoken-language-identification-impl.cc
  spoken-language-identification.cc
  stack.cc
  symbol-table.cc
  ten-vad-model-config.cc
  ten-vad-model.cc
  text-utils.cc
  timer.cc
  transducer-keyword-decoder.cc
  transpose.cc
  unbind.cc
  utils.cc
  vad-model-config.cc
  vad-model.cc
  version.cc
  voice-activity-detector.cc
  wave-reader.cc
  wave-writer.cc
)

# speaker embedding extractor
list(APPEND sources
  speaker-embedding-extractor-impl.cc
  speaker-embedding-extractor-model.cc
  speaker-embedding-extractor-nemo-model.cc
  speaker-embedding-extractor.cc
  speaker-embedding-manager.cc
)

# audio tagging
list(APPEND sources
  audio-tagging-impl.cc
  audio-tagging-label-file.cc
  audio-tagging-model-config.cc
  audio-tagging.cc
  offline-ced-model.cc
  offline-zipformer-audio-tagging-model-config.cc
  offline-zipformer-audio-tagging-model.cc
)

list(APPEND sources
  qnn-config.cc
)

# punctuation
list(APPEND sources
  offline-ct-transformer-model.cc
  offline-punctuation-impl.cc
  offline-punctuation-model-config.cc
  offline-punctuation.cc
  online-cnn-bilstm-model.cc
  online-punctuation-impl.cc
  online-punctuation-model-config.cc
  online-punctuation.cc
)
if(SHERPA_ONNX_ENABLE_RKNN)
  list(APPEND sources
    ./rknn/context-blocking-queue-rknn.cc
    ./rknn/offline-sense-voice-model-rknn.cc
    ./rknn/offline-paraformer-model-rknn.cc
    ./rknn/online-stream-rknn.cc
    ./rknn/online-transducer-greedy-search-decoder-rknn.cc
    ./rknn/online-transducer-modified-beam-search-decoder-rknn.cc
    ./rknn/online-zipformer-ctc-model-rknn.cc
    ./rknn/online-zipformer-transducer-model-rknn.cc
    ./rknn/silero-vad-model-rknn.cc
    ./rknn/transducer-keyword-decoder-rknn.cc
    ./rknn/utils.cc
  )
endif()

if(SHERPA_ONNX_ENABLE_AXERA)
  list(APPEND sources
    ./axera/ax-engine-guard.cc
    ./axera/offline-sense-voice-model-axera.cc
    ./axera/utils.cc
  )
endif()

if(SHERPA_ONNX_ENABLE_AXCL)
  list(APPEND sources
    ./axcl/axcl-engine-guard.cc
    ./axcl/axcl-engine-io-guard.cc
    ./axcl/axcl-engine-io-info-guard.cc
    ./axcl/axcl-manager.cc
    ./axcl/axcl-model.cc
    ./axcl/offline-sense-voice-model-axcl.cc
    ./axcl/utils.cc
  )
endif()

if(SHERPA_ONNX_ENABLE_RKNN OR SHERPA_ONNX_ENABLE_ASCEND_NPU OR SHERPA_ONNX_ENABLE_QNN OR SHERPA_ONNX_ENABLE_AXERA OR SHERPA_ONNX_ENABLE_AXCL)
  list(APPEND sources
    ./rknn/offline-ctc-greedy-search-decoder-rknn.cc
  )
endif()

if(SHERPA_ONNX_ENABLE_ASCEND_NPU)
  list(APPEND sources
    ./ascend/offline-paraformer-model-ascend.cc
    ./ascend/offline-sense-voice-model-ascend.cc
    ./ascend/offline-whisper-model-ascend.cc
    ./ascend/offline-zipformer-ctc-model-ascend.cc
    ./ascend/utils.cc
  )
endif()

if(SHERPA_ONNX_ENABLE_QNN)
  list(APPEND sources
    ./qnn/offline-sense-voice-model-qnn.cc
    ./qnn/offline-paraformer-model-qnn.cc
    ./qnn/offline-zipformer-ctc-model-qnn.cc
    ./qnn/qnn-backend.cc
    ./qnn/qnn-model.cc
    ./qnn/utils.cc
  )
endif()

list(APPEND sources
  offline-funasr-nano-model-config.cc
  offline-funasr-nano-model.cc
  offline-recognizer-funasr-nano-impl.cc
  funasr-nano-tokenizer.cc
)

if(SHERPA_ONNX_ENABLE_TTS)
  list(APPEND sources
    character-lexicon.cc
    hifigan-vocoder.cc
    kokoro-multi-lang-lexicon.cc
    lexicon.cc
    matcha-tts-lexicon.cc
    melo-tts-lexicon.cc
    offline-tts-character-frontend.cc
    offline-tts-frontend.cc
    offline-tts-impl.cc
    offline-tts-kitten-model-config.cc
    offline-tts-kitten-model.cc
    offline-tts-kokoro-model-config.cc
    offline-tts-kokoro-model.cc
    offline-tts-matcha-model-config.cc
    offline-tts-matcha-model.cc
    offline-tts-model-config.cc
    offline-tts-pocket-model-config.cc
    offline-tts-pocket-model.cc
    offline-tts-supertonic-impl.cc
    offline-tts-supertonic-model-config.cc
    offline-tts-supertonic-model.cc
    offline-tts-supertonic-unicode-processor.cc
    offline-tts-vits-model-config.cc
    offline-tts-vits-model.cc
    offline-tts-zipvoice-model-config.cc
    offline-tts-zipvoice-model.cc
    offline-tts.cc
    piper-phonemize-lexicon.cc
    sentence-piece-tokenizer.cc
    vocoder.cc
    vocos-vocoder.cc
  )
endif()

list(APPEND sources
  offline-speech-denoiser-dpdfnet-model-config.cc
  offline-speech-denoiser-dpdfnet-model.cc
  offline-speech-denoiser-gtcrn-model-config.cc
  offline-speech-denoiser-gtcrn-model.cc
  offline-speech-denoiser-impl.cc
  offline-speech-denoiser-model-config.cc
  offline-speech-denoiser.cc
  online-speech-denoiser-impl.cc
  online-speech-denoiser.cc
)

if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
  list(APPEND sources
    fast-clustering-config.cc
    fast-clustering.cc
    offline-speaker-diarization-impl.cc
    offline-speaker-diarization-result.cc
    offline-speaker-diarization.cc
    offline-speaker-segmentation-model-config.cc
    offline-speaker-segmentation-pyannote-model-config.cc
    offline-speaker-segmentation-pyannote-model.cc
  )
endif()

if(SHERPA_ONNX_ENABLE_CHECK)
  list(APPEND sources log.cc)
endif()

# Always static build
add_library(sherpa-onnx-core STATIC ${sources})


if(WIN32 AND SHERPA_ONNX_LINK_D3D)
    target_link_libraries(sherpa-onnx-core dxguid.lib d3d12.lib dxgi.lib dxcore.lib)
endif()


if(TARGET directml)
    target_link_libraries(sherpa-onnx-core directml)
endif()

set_target_properties(
    sherpa-onnx-core
  PROPERTIES
    POSITION_INDEPENDENT_CODE ON
    C_VISIBILITY_PRESET hidden
    CXX_VISIBILITY_PRESET hidden
)

if(APPLE)
  target_compile_options(sherpa-onnx-core PRIVATE
    -Wno-deprecated-declarations
  )
endif()

if(ANDROID_NDK)
  target_link_libraries(sherpa-onnx-core android log)
endif()

target_link_libraries(sherpa-onnx-core
  kaldi-native-fbank-core
  kaldi-decoder-core
  ssentencepiece_core
)
if(DEFINED OHOS AND x${OHOS} STREQUAL xOHOS)
  target_link_libraries(sherpa-onnx-core
    hilog_ndk.z
    rawfile.z
  )
endif()

if(SHERPA_ONNX_ENABLE_RKNN)
  if(DEFINED ENV{SHERPA_ONNX_RKNN_TOOLKIT2_LIB_DIR})
    target_link_libraries(sherpa-onnx-core -L$ENV{SHERPA_ONNX_RKNN_TOOLKIT2_LIB_DIR} -lrknnrt)
  else()
    target_link_libraries(sherpa-onnx-core rknnrt)
  endif()
endif()

if(SHERPA_ONNX_ENABLE_AXERA)
  if(DEFINED ENV{SHERPA_ONNX_AXERA_LIB_DIR})
    target_link_libraries(sherpa-onnx-core
      -L$ENV{SHERPA_ONNX_AXERA_LIB_DIR}
      -lax_engine
      -lax_interpreter
      -lax_sys
      -lpthread
    )
  else()
    target_link_libraries(sherpa-onnx-core
      ax_engine
      ax_interpreter
      ax_sys
      pthread
    )
  endif()
endif()

if(SHERPA_ONNX_ENABLE_AXCL)
  if(DEFINED ENV{SHERPA_ONNX_AXCL_LIB_DIR})
    target_link_libraries(sherpa-onnx-core
      -L$ENV{SHERPA_ONNX_AXCL_LIB_DIR}
      -laxcl_rt
      )
  else()
    target_link_libraries(sherpa-onnx-core
      axcl_rt
    )
  endif()
endif()

if(SHERPA_ONNX_ENABLE_ASCEND_NPU)
    target_include_directories(sherpa-onnx-core PRIVATE ${ASCEND_TOOLKIT_HOME}/include)
    target_link_libraries(sherpa-onnx-core
      -L${ASCEND_TOOLKIT_HOME}/lib64
      -lascendcl
    )
endif()

if(SHERPA_ONNX_ENABLE_QNN)
  target_include_directories(sherpa-onnx-core PRIVATE ${QNN_SDK_ROOT}/include/QNN)
endif()

if(SHERPA_ONNX_ENABLE_SPACEMIT)
  if(TARGET spacemit_ep)
    target_link_libraries(sherpa-onnx-core spacemit_ep)
  else()
    target_link_libraries(sherpa-onnx-core ${spacemit_ep_lib_files})
  endif()
endif()

if(TARGET onnxruntime)
  target_link_libraries(sherpa-onnx-core onnxruntime)
else()
  target_link_libraries(sherpa-onnx-core ${onnxruntime_lib_files})
endif()

if(NOT WIN32)
  target_link_libraries(sherpa-onnx-core -lm)
endif()

if(NOT BUILD_SHARED_LIBS AND APPLE)
  target_link_libraries(sherpa-onnx-core "-framework Foundation")
endif()

target_link_libraries(sherpa-onnx-core fstfar fst)

if(SHERPA_ONNX_ENABLE_TTS)
  target_link_libraries(sherpa-onnx-core
    piper_phonemize)
endif()

if(SHERPA_ONNX_ENABLE_CHECK)
  target_compile_definitions(sherpa-onnx-core PUBLIC SHERPA_ONNX_ENABLE_CHECK=1)

  if(SHERPA_ONNX_HAVE_EXECINFO_H)
    target_compile_definitions(sherpa-onnx-core PRIVATE SHERPA_ONNX_HAVE_EXECINFO_H=1)
  endif()

  if(SHERPA_ONNX_HAVE_CXXABI_H)
    target_compile_definitions(sherpa-onnx-core PRIVATE SHERPA_ONNX_HAVE_CXXABI_H=1)
  endif()
endif()

if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux)
  # This is for linux arm32 and arm64
  target_link_libraries(sherpa-onnx-core -ldl)
endif()

if(NOT WIN32 AND NOT SHERPA_ONNX_ENABLE_WASM AND CMAKE_SYSTEM_NAME STREQUAL Linux)
  target_link_libraries(sherpa-onnx-core -pthread)
endif()

if(SHERPA_ONNX_ENABLE_BINARY)
  add_executable(sherpa-onnx sherpa-onnx.cc)
  add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc)
  add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc)
  add_executable(sherpa-onnx-offline-audio-tagging sherpa-onnx-offline-audio-tagging.cc)
  add_executable(sherpa-onnx-offline-denoiser sherpa-onnx-offline-denoiser.cc)
  add_executable(sherpa-onnx-offline-language-identification sherpa-onnx-offline-language-identification.cc)
  add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc)
  add_executable(sherpa-onnx-offline-punctuation sherpa-onnx-offline-punctuation.cc)
  add_executable(sherpa-onnx-offline-source-separation sherpa-onnx-offline-source-separation.cc)
  add_executable(sherpa-onnx-online-denoiser sherpa-onnx-online-denoiser.cc)
  add_executable(sherpa-onnx-online-punctuation sherpa-onnx-online-punctuation.cc)
  add_executable(sherpa-onnx-version sherpa-onnx-version.cc version.cc)
  add_executable(sherpa-onnx-vad sherpa-onnx-vad.cc)

  if(SHERPA_ONNX_ENABLE_TTS)
    add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc)
  endif()

  if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
    add_executable(sherpa-onnx-offline-speaker-diarization sherpa-onnx-offline-speaker-diarization.cc)
  endif()

  set(main_exes
    sherpa-onnx
    sherpa-onnx-keyword-spotter
    sherpa-onnx-offline
    sherpa-onnx-offline-audio-tagging
    sherpa-onnx-offline-denoiser
    sherpa-onnx-offline-language-identification
    sherpa-onnx-offline-parallel
    sherpa-onnx-offline-punctuation
    sherpa-onnx-offline-source-separation
    sherpa-onnx-online-denoiser
    sherpa-onnx-online-punctuation
    sherpa-onnx-vad
  )
  if(SHERPA_ONNX_ENABLE_TTS)
    list(APPEND main_exes
      sherpa-onnx-offline-tts
    )
  endif()

  if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
    list(APPEND main_exes
      sherpa-onnx-offline-speaker-diarization
    )
  endif()

  foreach(exe IN LISTS main_exes)
    target_link_libraries(${exe} sherpa-onnx-core)
  endforeach()

  if(NOT WIN32)
    foreach(exe IN LISTS main_exes)
      target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
      target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")

      if(SHERPA_ONNX_ENABLE_PYTHON)
        target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
      elseif(SHERPA_ONNX_SPLIT_PYTHON_PACKAGE)
        foreach(ver in ITEMS 3.8 3.9 3.10 3.11 3.12 3.13 3.14)
          target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${ver}/site-packages/sherpa_onnx/lib")
        endforeach()
      endif()
    endforeach()
  endif()
endif()

if(NOT BUILD_SHARED_LIBS)
  install(TARGETS sherpa-onnx-core DESTINATION lib)
endif()

if(SHERPA_ONNX_ENABLE_BINARY)
  install(
    TARGETS
      ${main_exes}
      sherpa-onnx-version
    DESTINATION
      bin
  )
endif()

if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
  add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc)
  add_executable(sherpa-onnx-alsa-offline sherpa-onnx-alsa-offline.cc alsa.cc)
  add_executable(sherpa-onnx-alsa-offline-audio-tagging sherpa-onnx-alsa-offline-audio-tagging.cc alsa.cc)
  add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc)
  add_executable(sherpa-onnx-keyword-spotter-alsa sherpa-onnx-keyword-spotter-alsa.cc alsa.cc)
  add_executable(sherpa-onnx-vad-alsa sherpa-onnx-vad-alsa.cc alsa.cc)
  add_executable(sherpa-onnx-vad-alsa-offline-asr sherpa-onnx-vad-alsa-offline-asr.cc alsa.cc)


  if(SHERPA_ONNX_ENABLE_TTS)
    add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc)
  endif()

  set(exes
    sherpa-onnx-alsa
    sherpa-onnx-alsa-offline
    sherpa-onnx-alsa-offline-speaker-identification
    sherpa-onnx-keyword-spotter-alsa
    sherpa-onnx-vad-alsa
    sherpa-onnx-vad-alsa-offline-asr
    sherpa-onnx-alsa-offline-audio-tagging
  )

  if(SHERPA_ONNX_ENABLE_TTS)
    list(APPEND exes
      sherpa-onnx-offline-tts-play-alsa
    )
  endif()

  #   # To fix the following error for Windows when building exe
  #   #  mismatch detected for 'RuntimeLibrary': value 'MT_StaticRelease' doesn't match value 'MD_Dynamic Release'

  foreach(exe IN LISTS exes)
    target_link_libraries(${exe} sherpa-onnx-core)
  endforeach()

  foreach(exe IN LISTS exes)
    if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
      target_link_libraries(${exe} -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
    else()
      target_link_libraries(${exe} asound)
    endif()
  endforeach()

  if(NOT WIN32)
    foreach(exe IN LISTS exes)
      target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
      target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")
    endforeach()

    if(SHERPA_ONNX_ENABLE_PYTHON)
      foreach(exe IN LISTS exes)
        target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
      endforeach()
    elseif(SHERPA_ONNX_SPLIT_PYTHON_PACKAGE)
      foreach(exe IN LISTS exes)
        foreach(ver in ITEMS 3.8 3.9 3.10 3.11 3.12 3.13 3.14)
          target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${ver}/site-packages/sherpa_onnx/lib")
        endforeach()
      endforeach()
    endif()
  endif()

  install(
    TARGETS ${exes}
    DESTINATION
      bin
  )
endif()

if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
  if(SHERPA_ONNX_ENABLE_TTS)
    add_executable(sherpa-onnx-offline-tts-play
      sherpa-onnx-offline-tts-play.cc
      microphone.cc
    )
  endif()

  add_executable(sherpa-onnx-keyword-spotter-microphone
    sherpa-onnx-keyword-spotter-microphone.cc
    microphone.cc
  )

  add_executable(sherpa-onnx-microphone
    sherpa-onnx-microphone.cc
    microphone.cc
  )


  add_executable(sherpa-onnx-microphone-offline
    sherpa-onnx-microphone-offline.cc
    microphone.cc
  )

  add_executable(sherpa-onnx-vad-microphone
    sherpa-onnx-vad-microphone.cc
    microphone.cc
  )

  add_executable(sherpa-onnx-vad-microphone-simulated-streaming-asr
    sherpa-onnx-vad-microphone-simulated-streaming-asr.cc
    microphone.cc
  )

  add_executable(sherpa-onnx-vad-with-offline-asr
    sherpa-onnx-vad-with-offline-asr.cc
  )

  add_executable(sherpa-onnx-vad-with-online-asr
    sherpa-onnx-vad-with-online-asr.cc
  )

  add_executable(sherpa-onnx-vad-microphone-offline-asr
    sherpa-onnx-vad-microphone-offline-asr.cc
    microphone.cc
  )

  add_executable(sherpa-onnx-microphone-offline-speaker-identification
    sherpa-onnx-microphone-offline-speaker-identification.cc
    microphone.cc
  )

  add_executable(sherpa-onnx-microphone-offline-audio-tagging
    sherpa-onnx-microphone-offline-audio-tagging.cc
    microphone.cc
  )

  set(exes
    sherpa-onnx-keyword-spotter-microphone
    sherpa-onnx-microphone
    sherpa-onnx-microphone-offline
    sherpa-onnx-microphone-offline-audio-tagging
    sherpa-onnx-microphone-offline-speaker-identification
    sherpa-onnx-vad-microphone
    sherpa-onnx-vad-microphone-simulated-streaming-asr
    sherpa-onnx-vad-microphone-offline-asr
    sherpa-onnx-vad-with-offline-asr
    sherpa-onnx-vad-with-online-asr
  )
  if(SHERPA_ONNX_ENABLE_TTS)
    list(APPEND exes
      sherpa-onnx-offline-tts-play
    )
  endif()

  foreach(exe IN LISTS exes)
    target_link_libraries(${exe} portaudio_static sherpa-onnx-core)
  endforeach()

  if(NOT WIN32)
    foreach(exe IN LISTS exes)
      target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
      target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")
    endforeach()

    if(SHERPA_ONNX_ENABLE_PYTHON)
      foreach(exe IN LISTS exes)
        target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
      endforeach()
    elseif(SHERPA_ONNX_SPLIT_PYTHON_PACKAGE)
      foreach(exe IN LISTS exes)
        foreach(ver in ITEMS 3.8 3.9 3.10 3.11 3.12 3.13 3.14)
          target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${ver}/site-packages/sherpa_onnx/lib")
        endforeach()
      endforeach()
    endif()
  endif()

  install(
    TARGETS ${exes}
    DESTINATION
      bin
  )
endif()

if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY)
  add_definitions(-DASIO_STANDALONE)
  add_definitions(-D_WEBSOCKETPP_CPP11_STL_)

  add_executable(sherpa-onnx-online-websocket-server
    online-websocket-server-impl.cc
    online-websocket-server.cc
  )
  target_link_libraries(sherpa-onnx-online-websocket-server sherpa-onnx-core)

  add_executable(sherpa-onnx-online-websocket-client
    online-websocket-client.cc
  )
  target_link_libraries(sherpa-onnx-online-websocket-client sherpa-onnx-core)

  if(NOT WIN32)
    target_compile_options(sherpa-onnx-online-websocket-server PRIVATE -Wno-deprecated-declarations)

    target_compile_options(sherpa-onnx-online-websocket-client PRIVATE -Wno-deprecated-declarations)
  endif()

  # For offline websocket
  add_executable(sherpa-onnx-offline-websocket-server
    offline-websocket-server-impl.cc
    offline-websocket-server.cc
  )
  target_link_libraries(sherpa-onnx-offline-websocket-server sherpa-onnx-core)

  if(NOT WIN32)
    target_compile_options(sherpa-onnx-offline-websocket-server PRIVATE -Wno-deprecated-declarations)
  endif()

  if(NOT WIN32)
    target_link_libraries(sherpa-onnx-online-websocket-server "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
    target_link_libraries(sherpa-onnx-online-websocket-server "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")

    target_link_libraries(sherpa-onnx-online-websocket-client "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
    target_link_libraries(sherpa-onnx-online-websocket-client "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")

    target_link_libraries(sherpa-onnx-offline-websocket-server "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
    target_link_libraries(sherpa-onnx-offline-websocket-server "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")

    if(SHERPA_ONNX_ENABLE_PYTHON AND NOT WIN32)
      target_link_libraries(sherpa-onnx-online-websocket-server "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
      target_link_libraries(sherpa-onnx-online-websocket-client "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
      target_link_libraries(sherpa-onnx-offline-websocket-server "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
    elseif(SHERPA_ONNX_SPLIT_PYTHON_PACKAGE)
        foreach(ver in ITEMS 3.8 3.9 3.10 3.11 3.12 3.13 3.14)
          target_link_libraries(sherpa-onnx-online-websocket-server "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${ver}/site-packages/sherpa_onnx/lib")
          target_link_libraries(sherpa-onnx-online-websocket-client "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${ver}/site-packages/sherpa_onnx/lib")
          target_link_libraries(sherpa-onnx-offline-websocket-server "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${ver}/site-packages/sherpa_onnx/lib")
        endforeach()
    endif()
  endif()

  install(
    TARGETS
      sherpa-onnx-online-websocket-server
      sherpa-onnx-online-websocket-client
      sherpa-onnx-offline-websocket-server
    DESTINATION
      bin
  )
endif()

if(SHERPA_ONNX_ENABLE_TESTS)
  set(sherpa_onnx_test_srcs
    cat-test.cc
    circular-buffer-test.cc
    context-graph-test.cc
    math-test.cc
    offline-whisper-timestamp-rules-test.cc
    packed-sequence-test.cc
    pad-sequence-test.cc
    regex-lang-test.cc
    slice-test.cc
    stack-test.cc
    text-utils-test.cc
    text2token-test.cc
    transpose-test.cc
    unbind-test.cc
    utfcpp-test.cc
    wave-reader-test.cc
  )
  if(SHERPA_ONNX_ENABLE_TTS)
    list(APPEND sherpa_onnx_test_srcs
      sentence-piece-tokenizer-test.cc
      piper-phonemize-test.cc
    )
  endif()

  if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
    list(APPEND sherpa_onnx_test_srcs
      fast-clustering-test.cc
    )
  endif()

  list(APPEND sherpa_onnx_test_srcs
    speaker-embedding-manager-test.cc
  )

  function(sherpa_onnx_add_test source)
    get_filename_component(name ${source} NAME_WE)
    set(target_name ${name})
    add_executable(${target_name} "${source}")

    target_link_libraries(${target_name}
      PRIVATE
        gtest
        gtest_main
        sherpa-onnx-core
    )

    add_test(NAME "${target_name}"
      COMMAND
        $<TARGET_FILE:${target_name}>
    )
  endfunction()

  foreach(source IN LISTS sherpa_onnx_test_srcs)
    sherpa_onnx_add_test(${source})
  endforeach()
endif()

set(srcs_to_check)
foreach(s IN LISTS sources)
  list(APPEND srcs_to_check ${CMAKE_CURRENT_LIST_DIR}/${s})
endforeach()

# For clang-tidy
add_custom_target(
  clang-tidy-check
  clang-tidy -p ${CMAKE_BINARY_DIR}/compile_commands.json --config-file ${PROJECT_SOURCE_DIR}/.clang-tidy ${srcs_to_check}
  DEPENDS ${sources})

add_custom_target(check DEPENDS clang-tidy-check)


================================================
FILE: sherpa-onnx/csrc/CPPLINT.cfg
================================================
exclude_files=tee-stream.h


================================================
FILE: sherpa-onnx/csrc/README.md
================================================
# File descriptions

- [./sherpa-onnx-alsa.cc](./sherpa-onnx-alsa.cc) For Linux only, especially for
  embedded Linux, e.g., Raspberry Pi; it uses a streaming model for real-time
  speech recognition with a microphone.

- [./sherpa-onnx-microphone.cc](./sherpa-onnx-microphone.cc)
  For Linux/Windows/macOS; it uses a streaming model for real-time speech
  recognition with a microphone.

- [./sherpa-onnx-microphone-offline.cc](./sherpa-onnx-microphone-offline.cc)
  For Linux/Windows/macOS; it uses a non-streaming model for speech
  recognition with a microphone.

- [./sherpa-onnx.cc](./sherpa-onnx.cc)
  It uses a streaming model to decode wave files

- [./sherpa-onnx-offline.cc](./sherpa-onnx-offline.cc)
  It uses a non-streaming model to decode wave files

- [./online-websocket-server.cc](./online-websocket-server.cc)
  WebSocket server for streaming models.

- [./offline-websocket-server.cc](./offline-websocket-server.cc)
  WebSocket server for non-streaming models.

- [./sherpa-onnx-vad-microphone.cc](./sherpa-onnx-vad-microphone.cc)
  Use silero VAD to detect speeches with a microphone.


================================================
FILE: sherpa-onnx/csrc/alsa-play.cc
================================================
// sherpa-onnx/csrc/alsa-play.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifdef SHERPA_ONNX_ENABLE_ALSA

#include "sherpa-onnx/csrc/alsa-play.h"

#include <algorithm>
#include <cstdio>
#include <memory>
#include <vector>

namespace sherpa_onnx {

AlsaPlay::AlsaPlay(const char *device_name, int32_t sample_rate) {
  int32_t err = snd_pcm_open(&handle_, device_name, SND_PCM_STREAM_PLAYBACK, 0);

  if (err) {
    fprintf(stderr, "Unable to open: %s. %s\n", device_name, snd_strerror(err));
    exit(-1);
  }

  SetParameters(sample_rate);
}

AlsaPlay::~AlsaPlay() {
  if (handle_) {
    int32_t err = snd_pcm_close(handle_);
    if (err < 0) {
      printf("Failed to close pcm: %s\n", snd_strerror(err));
    }
  }
}

void AlsaPlay::SetParameters(int32_t sample_rate) {
  // set the following parameters
  // 1. sample_rate
  // 2. sample format: int16_t
  // 3. num_channels: 1
  snd_pcm_hw_params_t *params;
  snd_pcm_hw_params_alloca(&params);
  snd_pcm_hw_params_any(handle_, params);

  int32_t err = snd_pcm_hw_params_set_access(handle_, params,
                                             SND_PCM_ACCESS_RW_INTERLEAVED);
  if (err < 0) {
    printf("SND_PCM_ACCESS_RW_INTERLEAVED is not supported: %s\n",
           snd_strerror(err));
    exit(-1);
  }

  err = snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);

  if (err < 0) {
    printf("Can't set format to 16-bit: %s\n", snd_strerror(err));
    exit(-1);
  }

  err = snd_pcm_hw_params_set_channels(handle_, params, 1);

  if (err < 0) {
    printf("Can't set channel number to 1: %s\n", snd_strerror(err));
  }

  uint32_t rate = sample_rate;
  err = snd_pcm_hw_params_set_rate_near(handle_, params, &rate, 0);
  if (err < 0) {
    printf("Can't set rate to %d. %s\n", rate, snd_strerror(err));
  }

  err = snd_pcm_hw_params(handle_, params);
  if (err < 0) {
    printf("Can't set hardware parameters. %s\n", snd_strerror(err));
    exit(-1);
  }

  uint32_t tmp;
  snd_pcm_hw_params_get_rate(params, &tmp, 0);
  int32_t actual_sample_rate = tmp;
  if (actual_sample_rate != sample_rate) {
    fprintf(stderr,
            "Creating a resampler:\n"
            "   in_sample_rate: %d\n"
            "   output_sample_rate: %d\n",
            sample_rate, actual_sample_rate);

    float min_freq = std::min(actual_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler_ = std::make_unique<LinearResample>(
        sample_rate, actual_sample_rate, lowpass_cutoff, lowpass_filter_width);
  }

  snd_pcm_uframes_t frames;
  snd_pcm_hw_params_get_period_size(params, &frames, 0);
  buf_.resize(frames);
}

void AlsaPlay::Play(const std::vector<float> &samples) {
  std::vector<float> tmp;
  const float *p = samples.data();
  int32_t num_samples = samples.size();
  if (resampler_) {
    resampler_->Resample(samples.data(), samples.size(), false, &tmp);
    p = tmp.data();
    num_samples = tmp.size();
  }

  int32_t frames = buf_.size();
  int32_t i = 0;
  for (; i + frames < num_samples; i += frames) {
    for (int32_t k = 0; k != frames; ++k) {
      buf_[k] = p[i + k] * 32767;
    }

    int32_t err = snd_pcm_writei(handle_, buf_.data(), frames);
    if (err == -EPIPE) {
      printf("XRUN.\n");
      snd_pcm_prepare(handle_);
    } else if (err < 0) {
      printf("Can't write to PCM device: %s\n", snd_strerror(err));
      exit(-1);
    }
  }

  if (i < num_samples) {
    for (int32_t k = 0; k + i < num_samples; ++k) {
      buf_[k] = p[i + k] * 32767;
    }

    int32_t err = snd_pcm_writei(handle_, buf_.data(), num_samples - i);
    if (err == -EPIPE) {
      printf("XRUN.\n");
      snd_pcm_prepare(handle_);
    } else if (err < 0) {
      printf("Can't write to PCM device: %s\n", snd_strerror(err));
      exit(-1);
    }
  }
}

void AlsaPlay::Drain() {
  int32_t err = snd_pcm_drain(handle_);
  if (err < 0) {
    printf("Failed to drain pcm. %s\n", snd_strerror(err));
  }
}

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_ENABLE_ALSA


================================================
FILE: sherpa-onnx/csrc/alsa-play.h
================================================
// sherpa-onnx/csrc/alsa-play.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ALSA_PLAY_H_
#define SHERPA_ONNX_CSRC_ALSA_PLAY_H_

#include <cstdint>
#include <memory>
#include <vector>

#include "alsa/asoundlib.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

class AlsaPlay {
 public:
  AlsaPlay(const char *device_name, int32_t sample_rate);
  ~AlsaPlay();
  void Play(const std::vector<float> &samples);

  // wait for all the samples to be played
  void Drain();

 private:
  void SetParameters(int32_t sample_rate);

 private:
  snd_pcm_t *handle_ = nullptr;
  std::unique_ptr<LinearResample> resampler_;
  std::vector<int16_t> buf_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ALSA_PLAY_H_


================================================
FILE: sherpa-onnx/csrc/alsa.cc
================================================
// sherpa-onnx/csrc/sherpa-alsa.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifdef SHERPA_ONNX_ENABLE_ALSA

#include "sherpa-onnx/csrc/alsa.h"

#include <algorithm>
#include <cstdio>
#include <memory>
#include <vector>

#include "alsa/asoundlib.h"

namespace sherpa_onnx {

void ToFloat(const std::vector<int16_t> &in, int32_t num_channels,
             std::vector<float> *out) {
  out->resize(in.size() / num_channels);

  int32_t n = in.size();
  for (int32_t i = 0, k = 0; i < n; i += num_channels, ++k) {
    (*out)[k] = in[i] / 32768.;
  }
}

Alsa::Alsa(const char *device_name) {
  const char *kDeviceHelp = R"(
Please use the command:

  arecord -l

to list all available devices. For instance, if the output is:

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

  )";

  int32_t err =
      snd_pcm_open(&capture_handle_, device_name, SND_PCM_STREAM_CAPTURE, 0);
  if (err) {
    fprintf(stderr, "Unable to open: %s. %s\n", device_name, snd_strerror(err));
    fprintf(stderr, "%s\n", kDeviceHelp);
    exit(-1);
  }

  snd_pcm_hw_params_t *hw_params;
  snd_pcm_hw_params_alloca(&hw_params);

  err = snd_pcm_hw_params_any(capture_handle_, hw_params);
  if (err) {
    fprintf(stderr, "Failed to initialize hw_params: %s\n", snd_strerror(err));
    exit(-1);
  }

  err = snd_pcm_hw_params_set_access(capture_handle_, hw_params,
                                     SND_PCM_ACCESS_RW_INTERLEAVED);
  if (err) {
    fprintf(stderr, "Failed to set access type: %s\n", snd_strerror(err));
    exit(-1);
  }

  err = snd_pcm_hw_params_set_format(capture_handle_, hw_params,
                                     SND_PCM_FORMAT_S16_LE);
  if (err) {
    fprintf(stderr, "Failed to set format: %s\n", snd_strerror(err));
    exit(-1);
  }

  // mono
  err = snd_pcm_hw_params_set_channels(capture_handle_, hw_params, 1);
  if (err) {
    fprintf(stderr, "Failed to set number of channels to 1. %s\n",
            snd_strerror(err));

    err = snd_pcm_hw_params_set_channels(capture_handle_, hw_params, 2);
    if (err) {
      fprintf(stderr, "Failed to set number of channels to 2. %s\n",
              snd_strerror(err));

      exit(-1);
    }
    actual_channel_count_ = 2;
    fprintf(stderr,
            "Channel count is set to 2. Will use only 1 channel of it.\n");
  }

  uint32_t actual_sample_rate = expected_sample_rate_;

  int32_t dir = 0;
  err = snd_pcm_hw_params_set_rate_near(capture_handle_, hw_params,
                                        &actual_sample_rate, &dir);
  if (err) {
    fprintf(stderr, "Failed to set sample rate to, %d: %s\n",
            expected_sample_rate_, snd_strerror(err));
    exit(-1);
  }
  actual_sample_rate_ = actual_sample_rate;

  if (actual_sample_rate_ != expected_sample_rate_) {
    fprintf(stderr, "Failed to set sample rate to %d\n", expected_sample_rate_);
    fprintf(stderr, "Current sample rate is %d\n", actual_sample_rate_);
    fprintf(stderr,
            "Creating a resampler:\n"
            "   in_sample_rate: %d\n"
            "   output_sample_rate: %d\n",
            actual_sample_rate_, expected_sample_rate_);

    float min_freq = std::min(actual_sample_rate_, expected_sample_rate_);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler_ = std::make_unique<LinearResample>(
        actual_sample_rate_, expected_sample_rate_, lowpass_cutoff,
        lowpass_filter_width);
  } else {
    fprintf(stderr, "Current sample rate: %d\n", actual_sample_rate_);
  }

  err = snd_pcm_hw_params(capture_handle_, hw_params);
  if (err) {
    fprintf(stderr, "Failed to set hw params: %s\n", snd_strerror(err));
    exit(-1);
  }

  err = snd_pcm_prepare(capture_handle_);
  if (err) {
    fprintf(stderr, "Failed to prepare for recording: %s\n", snd_strerror(err));
    exit(-1);
  }

  fprintf(stderr, "Recording started!\n");
}

Alsa::~Alsa() { snd_pcm_close(capture_handle_); }

const std::vector<float> &Alsa::Read(int32_t num_samples) {
  samples_.resize(num_samples * actual_channel_count_);

  // count is in frames. Each frame contains actual_channel_count_ samples
  int32_t count = snd_pcm_readi(capture_handle_, samples_.data(), num_samples);
  if (count == -EPIPE) {
    static int32_t n = 0;
    if (++n > 5) {
      fprintf(
          stderr,
          "Too many overruns. It is very likely that the RTF on your board is "
          "larger than 1. Please use ./bin/sherpa-onnx to compute the RTF.\n");
      exit(-1);
    }
    fprintf(stderr, "XRUN.\n");
    snd_pcm_prepare(capture_handle_);

    static std::vector<float> tmp;
    return tmp;
  } else if (count < 0) {
    fprintf(stderr, "Can't read PCM device: %s\n", snd_strerror(count));
    exit(-1);
  }

  samples_.resize(count * actual_channel_count_);

  ToFloat(samples_, actual_channel_count_, &samples1_);

  if (!resampler_) {
    return samples1_;
  }

  resampler_->Resample(samples1_.data(), samples_.size(), false, &samples2_);
  return samples2_;
}

}  // namespace sherpa_onnx

#endif


================================================
FILE: sherpa-onnx/csrc/alsa.h
================================================
// sherpa-onnx/csrc/sherpa-alsa.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ALSA_H_
#define SHERPA_ONNX_CSRC_ALSA_H_

#include <memory>
#include <vector>

#include "alsa/asoundlib.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

class Alsa {
 public:
  explicit Alsa(const char *device_name);
  ~Alsa();

  // This is a blocking read.
  //
  // @param num_samples  Number of samples to read.
  //
  // The returned value is valid until the next call to Read().
  const std::vector<float> &Read(int32_t num_samples);

  int32_t GetExpectedSampleRate() const { return expected_sample_rate_; }
  int32_t GetActualSampleRate() const { return actual_sample_rate_; }

 private:
  snd_pcm_t *capture_handle_;
  int32_t expected_sample_rate_ = 16000;
  int32_t actual_sample_rate_;

  int32_t actual_channel_count_ = 1;

  std::unique_ptr<LinearResample> resampler_;
  std::vector<int16_t> samples_;  // directly from the microphone
  std::vector<float> samples1_;   // normalized version of samples_
  std::vector<float> samples2_;   // possibly resampled from samples1_
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ALSA_H_


================================================
FILE: sherpa-onnx/csrc/ascend/macros.h
================================================
// sherpa-onnx/csrc/ascend/macros.h
//
// Copyright      2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ASCEND_MACROS_H_
#define SHERPA_ONNX_CSRC_ASCEND_MACROS_H_

#include "sherpa-onnx/csrc/macros.h"

#define SHERPA_ONNX_ASCEND_CHECK(ret, msg, ...)    \
  do {                                             \
    if (ret != ACL_ERROR_NONE) {                   \
      const char *_msg = aclGetRecentErrMsg();     \
      SHERPA_ONNX_LOGE("Return code is: %d", ret); \
      SHERPA_ONNX_LOGE("Error message: %s", _msg); \
      SHERPA_ONNX_LOGE(msg, ##__VA_ARGS__);        \
      SHERPA_ONNX_EXIT(-1);                        \
    }                                              \
  } while (0)

#endif  // SHERPA_ONNX_CSRC_ASCEND_MACROS_H_


================================================
FILE: sherpa-onnx/csrc/ascend/offline-paraformer-model-ascend.cc
================================================
// sherpa-onnx/csrc/ascend/offline-paraformer-model-ascend.cc
//
// Copyright (c)  2025  Xiaomi Corporation

// References:
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/83RC1alpha003/API/appdevgapi/aclcppdevg_03_0298.html
#include "sherpa-onnx/csrc/ascend/offline-paraformer-model-ascend.h"

#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/ascend/macros.h"
#include "sherpa-onnx/csrc/ascend/utils.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineParaformerModelAscend::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    PreInit();

    std::vector<std::string> filenames;
    SplitStringToVector(config_.paraformer.model, ",", false, &filenames);
    if (filenames.size() != 3) {
      SHERPA_ONNX_LOGE("Invalid paraformer ascend NPU model '%s'",
                       config_.paraformer.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    InitEncoder(filenames[0]);
    InitPredictor(filenames[1]);
    InitDecoder(filenames[2]);

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    PreInit();

    std::vector<std::string> filenames;
    SplitStringToVector(config_.paraformer.model, ",", false, &filenames);
    if (filenames.size() != 3) {
      SHERPA_ONNX_LOGE("Invalid paraformer ascend NPU model '%s'",
                       config_.paraformer.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    {
      auto buf = ReadFile(mgr, filenames[0]);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, filenames[1]);
      InitPredictor(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, filenames[2]);
      InitDecoder(buf.data(), buf.size());
    }

    PostInit();
  }

  std::vector<float> Run(std::vector<float> features) {
    // TODO(fangjun): Support multi clients
    std::lock_guard<std::mutex> lock(mutex_);

    features = ApplyLFR(std::move(features));
    if (features.empty()) {
      return {};
    }

    int32_t num_frames = features.size() / 560;

    RunEncoder(std::move(features));

    std::vector<float> encoder_out_cpu(num_frames * encoder_dim_);
    aclError ret = aclrtMemcpy(
        encoder_out_cpu.data(), num_frames * encoder_dim_ * sizeof(float),
        *encoder_out_ptr_, num_frames * encoder_dim_ * sizeof(float),
        ACL_MEMCPY_DEVICE_TO_HOST);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    RunPredictor(num_frames);

    std::vector<float> alphas_cpu(num_frames);

    ret =
        aclrtMemcpy(alphas_cpu.data(), num_frames * sizeof(float), *alphas_ptr_,
                    num_frames * sizeof(float), ACL_MEMCPY_DEVICE_TO_HOST);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    std::vector<float> acoustic_embedding =
        ComputeAcousticEmbedding(encoder_out_cpu, alphas_cpu, encoder_dim_);
    if (acoustic_embedding.empty()) {
      // no speech in the audio file
      return {};
    }

    encoder_out_cpu.clear();
    alphas_cpu.clear();

    int32_t num_tokens = acoustic_embedding.size() / encoder_dim_;

    RunDecoder(num_frames, std::move(acoustic_embedding));

    std::vector<float> logits(num_tokens * vocab_size_);

    ret = aclrtMemcpy(logits.data(), num_tokens * vocab_size_ * sizeof(float),
                      *logits_ptr_, num_tokens * vocab_size_ * sizeof(float),
                      ACL_MEMCPY_DEVICE_TO_HOST);

    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    return logits;
  }

  int32_t VocabSize() const { return vocab_size_; }

 private:
  void RunEncoder(std::vector<float> features) {
    int32_t num_frames = features.size() / 560;

    aclError ret = aclrtMemcpy(*features_ptr_, features.size() * sizeof(float),
                               features.data(), features.size() * sizeof(float),
                               ACL_MEMCPY_HOST_TO_DEVICE);

    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    AclMdlDataset input_dataset;
    AclDataBuffer features_buf(*features_ptr_, features.size() * sizeof(float));
    input_dataset.AddBuffer(features_buf);

    // dynamic shape input
    // https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/83RC1alpha003/appdevg/acldevg/aclcppdevg_000044.html

    std::array<int64_t, 3> features_shape = {1, num_frames, 560};
    AclTensorDesc features_desc(ACL_FLOAT, features_shape.size(),
                                features_shape.data(), ACL_FORMAT_ND);
    input_dataset.SetTensorDesc(features_desc, 0);

    AclMdlDataset output_dataset;

    AclDataBuffer encoder_out(*encoder_out_ptr_,
                              num_frames * encoder_dim_ * sizeof(float));
    output_dataset.AddBuffer(encoder_out);

    ret = aclmdlExecute(*encoder_model_, input_dataset, output_dataset);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlExecute for encoder");
  }

  void RunPredictor(int32_t num_frames) {
    AclMdlDataset input_dataset;
    AclDataBuffer encoder_out_buf(*encoder_out_ptr_,
                                  num_frames * encoder_dim_ * sizeof(float));
    input_dataset.AddBuffer(encoder_out_buf);

    std::array<int64_t, 3> encoder_out_shape = {1, num_frames, encoder_dim_};
    AclTensorDesc encoder_out_desc(ACL_FLOAT, encoder_out_shape.size(),
                                   encoder_out_shape.data(), ACL_FORMAT_ND);
    input_dataset.SetTensorDesc(encoder_out_desc, 0);

    AclMdlDataset output_dataset;
    AclDataBuffer alphas_buf(*alphas_ptr_, num_frames * sizeof(float));
    output_dataset.AddBuffer(alphas_buf);

    aclError ret =
        aclmdlExecute(*predictor_model_, input_dataset, output_dataset);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlExecute for predictor");
  }

  void RunDecoder(int32_t num_frames, std::vector<float> acoustic_embedding) {
    aclError ret = aclrtMemcpy(
        *acoustic_embedding_ptr_, acoustic_embedding.size() * sizeof(float),
        acoustic_embedding.data(), acoustic_embedding.size() * sizeof(float),
        ACL_MEMCPY_HOST_TO_DEVICE);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    int32_t num_tokens = acoustic_embedding.size() / encoder_dim_;

    AclMdlDataset input_dataset;
    AclDataBuffer encoder_out_buf(*encoder_out_ptr_,
                                  num_frames * encoder_dim_ * sizeof(float));
    input_dataset.AddBuffer(encoder_out_buf);

    std::array<int64_t, 3> encoder_out_shape = {1, num_frames, encoder_dim_};
    AclTensorDesc encoder_out_desc(ACL_FLOAT, encoder_out_shape.size(),
                                   encoder_out_shape.data(), ACL_FORMAT_ND);
    input_dataset.SetTensorDesc(encoder_out_desc, 0);

    AclDataBuffer acoustic_embedding_buf(
        *acoustic_embedding_ptr_, num_tokens * encoder_dim_ * sizeof(float));
    input_dataset.AddBuffer(acoustic_embedding_buf);

    std::array<int64_t, 3> acoustic_embedding_shape = {1, num_tokens,
                                                       encoder_dim_};
    AclTensorDesc acoustic_embedding_desc(
        ACL_FLOAT, acoustic_embedding_shape.size(),
        acoustic_embedding_shape.data(), ACL_FORMAT_ND);
    input_dataset.SetTensorDesc(acoustic_embedding_desc, 1);

    AclMdlDataset output_dataset;
    AclDataBuffer logits_buf(*logits_ptr_,
                             num_tokens * vocab_size_ * sizeof(float));
    output_dataset.AddBuffer(logits_buf);

    ret = aclmdlExecute(*decoder_model_, input_dataset, output_dataset);

    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlExecute for decoder");
  }

  void InitEncoder(const std::string &filename) {
    encoder_model_ = std::make_unique<AclModel>(filename);
    if (config_.debug) {
      auto s = encoder_model_->GetInfo();

      SHERPA_ONNX_LOGE("----encoder----\n%s\n", s.c_str());
    }
  }

  void InitPredictor(const std::string &filename) {
    predictor_model_ = std::make_unique<AclModel>(filename);
    if (config_.debug) {
      auto s = predictor_model_->GetInfo();

      SHERPA_ONNX_LOGE("----predictor----\n%s\n", s.c_str());
    }
  }

  void InitDecoder(const std::string &filename) {
    decoder_model_ = std::make_unique<AclModel>(filename);
    if (config_.debug) {
      auto s = decoder_model_->GetInfo();

      SHERPA_ONNX_LOGE("----decoder----\n%s\n", s.c_str());
    }
  }

  void InitEncoder(void *data, size_t size) {
    encoder_model_ = std::make_unique<AclModel>(data, size);
    if (config_.debug) {
      auto s = encoder_model_->GetInfo();
      SHERPA_ONNX_LOGE("----encoder----\n%s\n", s.c_str());
    }
  }

  void InitPredictor(void *data, size_t size) {
    predictor_model_ = std::make_unique<AclModel>(data, size);
    if (config_.debug) {
      auto s = predictor_model_->GetInfo();
      SHERPA_ONNX_LOGE("----predictor----\n%s\n", s.c_str());
    }
  }

  void InitDecoder(void *data, size_t size) {
    decoder_model_ = std::make_unique<AclModel>(data, size);
    if (config_.debug) {
      auto s = decoder_model_->GetInfo();
      SHERPA_ONNX_LOGE("----decoder----\n%s\n", s.c_str());
    }
  }

  void PreInit() {
    int32_t device_id = 0;
    aclError ret = aclrtSetDevice(device_id);
    SHERPA_ONNX_ASCEND_CHECK(
        ret, "Failed to call aclrtSetDevice with device id: %d", device_id);

    context_ = std::make_unique<AclContext>(device_id);

    ret = aclrtSetCurrentContext(*context_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtSetCurrentContext");
  }

  void PostInit() {
    encoder_dim_ = encoder_model_->GetOutputShapes()[0].back();
    vocab_size_ = decoder_model_->GetOutputShapes()[0].back();

    Preallocate();
  }

  void Preallocate() {
    // max 30 seconds
    max_num_frames_ = (30 * 100 - 7) / 6 + 1;

    features_ptr_ = std::make_unique<AclDevicePtr>(max_num_frames_ * feat_dim_ *
                                                   sizeof(float));

    encoder_out_ptr_ = std::make_unique<AclDevicePtr>(
        max_num_frames_ * encoder_dim_ * sizeof(float));

    alphas_ptr_ =
        std::make_unique<AclDevicePtr>(max_num_frames_ * sizeof(float));

    acoustic_embedding_ptr_ = std::make_unique<AclDevicePtr>(
        max_num_frames_ * encoder_dim_ * sizeof(float));

    logits_ptr_ = std::make_unique<AclDevicePtr>(max_num_frames_ * vocab_size_ *
                                                 sizeof(float));
  }

  std::vector<float> ApplyLFR(std::vector<float> in) const {
    int32_t lfr_window_size = 7;
    int32_t lfr_window_shift = 6;
    int32_t in_feat_dim = 80;

    int32_t in_num_frames = in.size() / in_feat_dim;
    if (in_num_frames < lfr_window_size) {
      return {};
    }

    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;

    if (out_num_frames > max_num_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          out_num_frames, max_num_frames_);

      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios.");

      out_num_frames = max_num_frames_;
    }

    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    std::vector<float> out(out_num_frames * out_feat_dim);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

 private:
  std::mutex mutex_;
  Acl acl_;

  std::unique_ptr<AclContext> context_;

  OfflineModelConfig config_;

  std::unique_ptr<AclModel> encoder_model_;
  std::unique_ptr<AclModel> predictor_model_;
  std::unique_ptr<AclModel> decoder_model_;

  int32_t encoder_dim_ = 0;
  int32_t vocab_size_ = 0;
  int32_t max_num_frames_ = 0;
  int32_t feat_dim_ = 560;

  std::unique_ptr<AclDevicePtr> features_ptr_;
  std::unique_ptr<AclDevicePtr> encoder_out_ptr_;
  std::unique_ptr<AclDevicePtr> alphas_ptr_;
  std::unique_ptr<AclDevicePtr> acoustic_embedding_ptr_;
  std::unique_ptr<AclDevicePtr> logits_ptr_;
};

OfflineParaformerModelAscend::OfflineParaformerModelAscend(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineParaformerModelAscend::OfflineParaformerModelAscend(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineParaformerModelAscend::~OfflineParaformerModelAscend() = default;

std::vector<float> OfflineParaformerModelAscend::Run(
    std::vector<float> features) const {
  return impl_->Run(std::move(features));
}

int32_t OfflineParaformerModelAscend::VocabSize() const {
  return impl_->VocabSize();
}

#if __ANDROID_API__ >= 9
template OfflineParaformerModelAscend::OfflineParaformerModelAscend(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineParaformerModelAscend::OfflineParaformerModelAscend(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/ascend/offline-paraformer-model-ascend.h
================================================
// sherpa-onnx/csrc/ascend/offline-paraformer-model-ascend.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ASCEND_OFFLINE_PARAFORMER_MODEL_ASCEND_H_
#define SHERPA_ONNX_CSRC_ASCEND_OFFLINE_PARAFORMER_MODEL_ASCEND_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineParaformerModelAscend {
 public:
  ~OfflineParaformerModelAscend();

  explicit OfflineParaformerModelAscend(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineParaformerModelAscend(Manager *mgr, const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (num_frames, feature_dim)
   *                 before applying LFR.
   * @returns Return a tensor of shape (num_output_frames, vocab_size)
   */
  std::vector<float> Run(std::vector<float> features) const;

  int32_t VocabSize() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ASCEND_OFFLINE_PARAFORMER_MODEL_ASCEND_H_


================================================
FILE: sherpa-onnx/csrc/ascend/offline-recognizer-zipformer-ctc-ascend-impl.h
================================================
// sherpa-onnx/csrc/ascend/offline-recognizer-zipformer-ctc-ascend-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ASCEND_OFFLINE_RECOGNIZER_ZIPFORMER_CTC_ASCEND_IMPL_H_
#define SHERPA_ONNX_CSRC_ASCEND_OFFLINE_RECOGNIZER_ZIPFORMER_CTC_ASCEND_IMPL_H_

#include <ios>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/ascend/offline-zipformer-ctc-model-ascend.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/rknn/offline-ctc-greedy-search-decoder-rknn.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

// defined in ../offline-recognizer-ctc-impl.h
OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src,
                                 const SymbolTable &sym_table,
                                 int32_t frame_shift_ms,
                                 int32_t subsampling_factor);

class OfflineRecognizerZipformerCtcAscendImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerZipformerCtcAscendImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineZipformerCtcModelAscend>(
            config.model_config)) {
    Init();
  }

  template <typename Manager>
  OfflineRecognizerZipformerCtcAscendImpl(Manager *mgr,
                                          const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<OfflineZipformerCtcModelAscend>(
            mgr, config.model_config)) {
    Init();
  }

  void Init() {
    if (config_.decoding_method == "greedy_search") {
      if (!symbol_table_.Contains("<blk>") &&
          !symbol_table_.Contains("<eps>") &&
          !symbol_table_.Contains("<blank>") &&
          config_.model_config.omnilingual.model.empty()) {
        // for omnilingual asr, its blank id is 0
        SHERPA_ONNX_LOGE(
            "We expect that tokens.txt contains "
            "the symbol <blk> or <eps> or <blank> and its ID.");
        SHERPA_ONNX_EXIT(-1);
      }

      int32_t blank_id = 0;
      if (symbol_table_.Contains("<blk>")) {
        blank_id = symbol_table_["<blk>"];
      } else if (symbol_table_.Contains("<eps>")) {
        // for tdnn models of the yesno recipe from icefall
        blank_id = symbol_table_["<eps>"];
      } else if (symbol_table_.Contains("<blank>")) {
        // for Wenet CTC models
        blank_id = symbol_table_["<blank>"];
      }

      decoder_ = std::make_unique<OfflineCtcGreedySearchDecoderRknn>(blank_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  // Decode a single stream.
  // Some models do not support batch size > 1, e.g., WeNet CTC models.
  void DecodeStream(OfflineStream *s) const {
    std::vector<float> f = s->GetFrames();

    int32_t vocab_size = model_->VocabSize();

    std::vector<float> log_probs = model_->Run(std::move(f));
    int32_t num_out_frames = log_probs.size() / vocab_size;

    auto result =
        decoder_->Decode(log_probs.data(), num_out_frames, vocab_size);

    int32_t frame_shift_ms = 10;

    auto r = Convert(result, symbol_table_, frame_shift_ms,
                     model_->SubsamplingFactor());
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    s->SetResult(r);
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineZipformerCtcModelAscend> model_;
  std::unique_ptr<OfflineCtcGreedySearchDecoderRknn> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ASCEND_OFFLINE_RECOGNIZER_ZIPFORMER_CTC_ASCEND_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/ascend/offline-sense-voice-model-ascend.cc
================================================
// sherpa-onnx/csrc/ascend/offline-sense-voice-model-ascend.cc
//
// Copyright (c)  2025  Xiaomi Corporation

// References:
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/83RC1alpha003/API/appdevgapi/aclcppdevg_03_0298.html
#include "sherpa-onnx/csrc/ascend/offline-sense-voice-model-ascend.h"

#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/ascend/macros.h"
#include "sherpa-onnx/csrc/ascend/utils.h"
#include "sherpa-onnx/csrc/file-utils.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelAscend::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    PreInit();
    InitModel(config_.sense_voice.model);
    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    PreInit();
    {
      auto buf = ReadFile(mgr, config_.sense_voice.model);
      InitModel(buf.data(), buf.size());
    }
    PostInit();
  }

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) {
    // TODO(fangjun): Support multi clients
    std::lock_guard<std::mutex> lock(mutex_);

    features = ApplyLFR(std::move(features));
    if (features.empty()) {
      return {};
    }

    int32_t num_frames = features.size() / 560;

    aclError ret =
        aclrtMemcpy(*x_ptr_, features.size() * sizeof(float), features.data(),
                    features.size() * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    std::array<int32_t, 4> prompt_array{language, 1, 2, text_norm};
    ret = aclrtMemcpy(*prompt_ptr_, prompt_ptr_->Size(), prompt_array.data(),
                      prompt_ptr_->Size(), ACL_MEMCPY_HOST_TO_DEVICE);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    AclMdlDataset input_dataset;
    AclDataBuffer x_buf(*x_ptr_, features.size() * sizeof(float));
    input_dataset.AddBuffer(x_buf);

    AclDataBuffer prompt_buf(*prompt_ptr_, prompt_ptr_->Size());
    input_dataset.AddBuffer(prompt_buf);

    // dynamic shape input
    // https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/83RC1alpha003/appdevg/acldevg/aclcppdevg_000044.html

    std::array<int64_t, 3> x_shape = {1, num_frames, 560};
    AclTensorDesc x_desc(ACL_FLOAT, x_shape.size(), x_shape.data(),
                         ACL_FORMAT_ND);
    input_dataset.SetTensorDesc(x_desc, 0);

    std::array<int64_t, 1> prompt_shape = {4};
    AclTensorDesc prompt_desc(ACL_INT32, prompt_shape.size(),
                              prompt_shape.data(), ACL_FORMAT_ND);
    input_dataset.SetTensorDesc(prompt_desc, 1);

    AclMdlDataset output_dataset;

    AclDataBuffer logits_buf(*logits_ptr_,
                             num_frames * vocab_size_ * sizeof(float));
    output_dataset.AddBuffer(logits_buf);

    ret = aclmdlExecute(*model_, input_dataset, output_dataset);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlExecute");

    std::vector<float> logits(num_frames * vocab_size_);
    ret = aclrtMemcpy(logits.data(), num_frames * vocab_size_ * sizeof(float),
                      *logits_ptr_, num_frames * vocab_size_ * sizeof(float),
                      ACL_MEMCPY_DEVICE_TO_HOST);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    return logits;
  }

 private:
  void InitModel(const std::string &filename) {
    model_ = std::make_unique<AclModel>(filename);
    if (config_.debug) {
      auto s = model_->GetInfo();
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
  }

  void InitModel(void *data, size_t size) {
    model_ = std::make_unique<AclModel>(data, size);
    if (config_.debug) {
      auto s = model_->GetInfo();
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
  }

  void PreInit() {
    int32_t device_id = 0;
    aclError ret = aclrtSetDevice(device_id);
    SHERPA_ONNX_ASCEND_CHECK(
        ret, "Failed to call aclrtSetDevice with device id: %d", device_id);

    context_ = std::make_unique<AclContext>(device_id);

    ret = aclrtSetCurrentContext(*context_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtSetCurrentContext");
  }

  void PostInit() {
    vocab_size_ = model_->GetOutputShapes()[0].back();

    Preallocate();
  }

  void Preallocate() {
    // max 30 seconds
    max_num_frames_ = (30 * 100 - 7) / 6 + 1;
    x_ptr_ = std::make_unique<AclDevicePtr>(max_num_frames_ * feat_dim_ *
                                            sizeof(float));

    prompt_ptr_ = std::make_unique<AclDevicePtr>(4 * sizeof(int32_t));

    logits_ptr_ = std::make_unique<AclDevicePtr>((max_num_frames_ + 4) *
                                                 vocab_size_ * sizeof(float));
  }

  std::vector<float> ApplyLFR(std::vector<float> in) const {
    int32_t lfr_window_size = meta_data_.window_size;
    int32_t lfr_window_shift = meta_data_.window_shift;
    int32_t in_feat_dim = 80;

    int32_t in_num_frames = in.size() / in_feat_dim;
    if (in_num_frames < lfr_window_size) {
      return {};
    }

    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;

    if (out_num_frames > max_num_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          out_num_frames, max_num_frames_);

      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios.");

      out_num_frames = max_num_frames_;
    }

    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    std::vector<float> out(out_num_frames * out_feat_dim);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

 private:
  std::mutex mutex_;
  Acl acl_;

  std::unique_ptr<AclContext> context_;

  OfflineModelConfig config_;
  OfflineSenseVoiceModelMetaData meta_data_;

  std::unique_ptr<AclModel> model_;
  int32_t vocab_size_ = 0;
  int32_t max_num_frames_ = 0;
  int32_t feat_dim_ = 560;

  std::unique_ptr<AclDevicePtr> x_ptr_;
  std::unique_ptr<AclDevicePtr> prompt_ptr_;
  std::unique_ptr<AclDevicePtr> logits_ptr_;
};

OfflineSenseVoiceModelAscend::OfflineSenseVoiceModelAscend(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSenseVoiceModelAscend::OfflineSenseVoiceModelAscend(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineSenseVoiceModelAscend::~OfflineSenseVoiceModelAscend() = default;

std::vector<float> OfflineSenseVoiceModelAscend::Run(
    std::vector<float> features, int32_t language, int32_t text_norm) const {
  return impl_->Run(std::move(features), language, text_norm);
}

const OfflineSenseVoiceModelMetaData &
OfflineSenseVoiceModelAscend::GetModelMetadata() const {
  return impl_->GetModelMetadata();
}

#if __ANDROID_API__ >= 9
template OfflineSenseVoiceModelAscend::OfflineSenseVoiceModelAscend(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineSenseVoiceModelAscend::OfflineSenseVoiceModelAscend(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/ascend/offline-sense-voice-model-ascend.h
================================================
// sherpa-onnx/csrc/ascend/offline-sense-voice-model-ascend.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ASCEND_OFFLINE_SENSE_VOICE_MODEL_ASCEND_H_
#define SHERPA_ONNX_CSRC_ASCEND_OFFLINE_SENSE_VOICE_MODEL_ASCEND_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelAscend {
 public:
  ~OfflineSenseVoiceModelAscend();

  explicit OfflineSenseVoiceModelAscend(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineSenseVoiceModelAscend(Manager *mgr, const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (num_frames, feature_dim)
   *                 before applying LFR.
   * @param language
   * @param text_norm
   * @returns Return a tensor of shape (num_output_frames, vocab_size)
   */
  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) const;

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ASCEND_OFFLINE_SENSE_VOICE_MODEL_ASCEND_H_


================================================
FILE: sherpa-onnx/csrc/ascend/offline-whisper-model-ascend.cc
================================================
// sherpa-onnx/csrc/ascend/offline-whisper-model-ascend.cc
//
// Copyright (c)  2026  Xiaomi Corporation
#include "sherpa-onnx/csrc/ascend/offline-whisper-model-ascend.h"

#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/ascend/macros.h"
#include "sherpa-onnx/csrc/ascend/utils.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

// masked positions: 1
// unmasked positions: 0
static void UpdateCausalMask(int32_t offset, int32_t capacity, int32_t *p) {
  std::fill(p, p + offset, 0);
  std::fill(p + offset, p + capacity, 1);
}

static WhisperModelType ParseWhisperModelFromString(const std::string &s) {
  auto pos = s.find('-');
  if (pos == std::string::npos) {
    SHERPA_ONNX_LOGE("Unexpected model input '%s'", s.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  if (s.substr(pos + 1) != "mel") {
    SHERPA_ONNX_LOGE("Unexpected model input '%s'", s.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  if (pos == 0) {
    SHERPA_ONNX_LOGE("Empty model name in '%s'", s.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  return ParseWhisperModelType(s.substr(0, pos));
}

class OfflineWhisperModelAscend::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    PreInit();

    InitEncoder(config_.whisper.encoder);
    InitDecoder(config_.whisper.decoder);

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    PreInit();

    {
      auto buf = ReadFile(mgr, config_.whisper.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config_.whisper.decoder);
      InitDecoder(buf.data(), buf.size());
    }

    PostInit();
  }

  OfflineWhisperDecoderResult Run(std::vector<float> features) {
    // TODO(fangjun): Support multi clients
    std::lock_guard<std::mutex> lock(mutex_);

    OfflineWhisperDecoderResult r;

    if (features.empty()) {
      return r;
    }

    int32_t num_frames = features.size() / feat_dim_;
    if (num_frames > num_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          num_frames, num_frames_);

      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios or use VAD to cut your audio into "
          "small chunks.");

      num_frames = num_frames_;
    }

    // assume at most 6 tokens per second
    int32_t num_possible_tokens = num_frames / 100.0 * 6;
    num_possible_tokens =
        std::min<int32_t>(num_possible_tokens, n_text_ctx_ / 2);

    features.resize(num_frames_ * feat_dim_, 0);

    // (num_frames_, feat_dim_) -> (feat_dim_, num_frames_)
    features = Transpose(features.data(), num_frames_, feat_dim_);

    RunEncoder(std::move(features));

    // Note(fangjun): No need to initialize the self kv cache to 0

    std::vector<int32_t> sot_sequence(sot_sequence_);

    if (IsMultilingual(model_type_)) {
      if (config_.whisper.task == "translate") {
        sot_sequence[2] = translate_;
      } else if (config_.whisper.task != "transcribe") {
        SHERPA_ONNX_LOGE(
            "Valid task values are: translate, transcribe. Given: '%s'",
            config_.whisper.task.c_str());
        SHERPA_ONNX_EXIT(-1);
      }

      if (!config_.whisper.language.empty()) {
        int32_t lang_id = GetWhisperLanguageTokenId(config_.whisper.language);
        if (lang_id < 0) {
          SHERPA_ONNX_LOGE("Unsupported language: '%s'",
                           config_.whisper.language.c_str());
          SHERPA_ONNX_EXIT(-1);
        }
        r.lang = config_.whisper.language;

        sot_sequence[1] = lang_id;
      } else {
        // detect language
        if (config_.debug) {
          SHERPA_ONNX_LOGE("Detecting language.");
        }
        token_offset_mask_cpu_[0] = sot_sequence_[0];
        token_offset_mask_cpu_[1] = 0;
        UpdateCausalMask(0, n_text_ctx_, token_offset_mask_cpu_.data() + 2);

        int32_t lang_id = DetectLanguage();
        r.lang = GetWhisperLanguageCode(lang_id);

        if (config_.debug) {
          SHERPA_ONNX_LOGE("Detected Language: %s", r.lang.c_str());
        }

        sot_sequence[1] = lang_id;
      }
    }

    int32_t &token = token_offset_mask_cpu_[0];
    int32_t &offset = token_offset_mask_cpu_[1];
    offset = 0;

    int32_t *p_mask = token_offset_mask_cpu_.data() + 2;
    UpdateCausalMask(offset, n_text_ctx_, p_mask);

    for (int32_t i = 0; i < sot_sequence.size(); ++i) {
      token = sot_sequence[i];
      token = RunDecoder();
      p_mask[offset] = 0;

      offset += 1;
    }

    if (token == eot_) {
      return r;
    }

    r.tokens.reserve(num_possible_tokens);

    while (offset < num_possible_tokens && token != eot_) {
      r.tokens.push_back(token);
      token = RunDecoder();

      p_mask[offset] = 0;
      offset += 1;
    }

    return r;
  }

  int32_t FeatureDim() const { return feat_dim_; }

 private:
  void RunEncoder(std::vector<float> features) {
    aclError ret = aclrtMemcpy(features_ptr_, features.size() * sizeof(float),
                               features.data(), features.size() * sizeof(float),
                               ACL_MEMCPY_HOST_TO_DEVICE);

    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    AclMdlDataset input_dataset;
    input_dataset.AddBuffer(encoder_input_buffer_[0]);

    AclMdlDataset output_dataset;

    for (auto &p : encoder_output_buffer_) {
      output_dataset.AddBuffer(p);
    }

    ret = aclmdlExecute(*encoder_model_, input_dataset, output_dataset);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlExecute");
  }

  int32_t RunDecoder() {
    RunDecoderImpl();

    UpdateSelfKvCache();

    auto ret = aclrtMemcpy(
        logits_cpu_.data(), logits_cpu_.size() * sizeof(float), logits_ptr_,
        logits_cpu_.size() * sizeof(float), ACL_MEMCPY_DEVICE_TO_HOST);

    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    return MaxElementIndex(logits_cpu_.data(), logits_cpu_.size());
  }

  int32_t DetectLanguage() {
    RunDecoderImpl();

    // No need to update the Self KV cache

    auto ret = aclrtMemcpy(
        logits_cpu_.data(), logits_cpu_.size() * sizeof(float), logits_ptr_,
        logits_cpu_.size() * sizeof(float), ACL_MEMCPY_DEVICE_TO_HOST);

    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    const auto &all_lang_ids = GetAllWhisperLanguageTokenIds();
    int32_t lang_id = all_lang_ids[0];
    float this_logit = logits_cpu_[lang_id];

    for (int32_t i = 1; i != all_lang_ids.size(); ++i) {
      int32_t id = all_lang_ids[i];
      float p = logits_cpu_[id];

      if (p > this_logit) {
        this_logit = p;
        lang_id = id;
      }
    }

    return lang_id;
  }

  void RunDecoderImpl() {
    aclError ret =
        aclrtMemcpy(token_ptr_, token_offset_mask_cpu_.size() * sizeof(int32_t),
                    token_offset_mask_cpu_.data(),
                    token_offset_mask_cpu_.size() * sizeof(int32_t),
                    ACL_MEMCPY_HOST_TO_DEVICE);

    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    AclMdlDataset input_dataset;

    for (auto &p : decoder_input_buffer_) {
      input_dataset.AddBuffer(p);
    }

    AclMdlDataset output_dataset;

    for (auto &p : decoder_output_buffer_) {
      output_dataset.AddBuffer(p);
    }

    ret = aclmdlExecute(*decoder_model_, input_dataset, output_dataset);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlExecute");
  }

  void UpdateSelfKvCache() {
    int32_t offset = token_offset_mask_cpu_[1];
    for (int32_t i = 0; i < n_text_layer_ * 2; ++i) {
      const float *src = delta_kv_ptr_[i];
      float *dst = self_kv_ptr_[i] + offset * n_text_state_;

      auto ret = aclrtMemcpy(dst, n_text_state_ * sizeof(float), src,
                             n_text_state_ * sizeof(float),
                             ACL_MEMCPY_DEVICE_TO_DEVICE);
      SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");
    }
  }

  void PreInit() {
    int32_t device_id = 0;
    aclError ret = aclrtSetDevice(device_id);
    SHERPA_ONNX_ASCEND_CHECK(
        ret, "Failed to call aclrtSetDevice with device id: %d", device_id);

    context_ = std::make_unique<AclContext>(device_id);

    ret = aclrtSetCurrentContext(*context_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtSetCurrentContext");
  }

  void PostInit() {
    PostInitEncoder();
    PostInitDecoder();
    Preallocate();
    InitSotSequence();

    InitEncoderBuffer();
    InitDecoderBuffer();
  }

  void InitEncoderBuffer() {
    AclDataBuffer features_buf(features_ptr_,
                               feat_dim_ * num_frames_ * sizeof(float));
    encoder_input_buffer_.clear();
    encoder_input_buffer_.push_back(std::move(features_buf));

    encoder_output_buffer_.reserve(cross_kv_ptr_.size());
    for (auto p : cross_kv_ptr_) {
      AclDataBuffer tmp_buffer(p,
                               num_out_frames_ * n_text_state_ * sizeof(float));
      encoder_output_buffer_.push_back(std::move(tmp_buffer));
    }
  }

  void InitDecoderBuffer() {
    decoder_input_buffer_.reserve(1 + 2 * n_text_layer_ + 2 * n_text_layer_ +
                                  1 + 1);
    // token, self_kv, cross_kv, offset, mask

    AclDataBuffer token_buf(token_ptr_, sizeof(int32_t));
    decoder_input_buffer_.push_back(std::move(token_buf));

    for (auto &p : self_kv_ptr_) {
      AclDataBuffer tmp_buffer(p, n_text_ctx_ * n_text_state_ * sizeof(float));
      decoder_input_buffer_.push_back(std::move(tmp_buffer));
    }

    for (auto &p : cross_kv_ptr_) {
      AclDataBuffer tmp_buffer(p,
                               num_out_frames_ * n_text_state_ * sizeof(float));
      decoder_input_buffer_.push_back(std::move(tmp_buffer));
    }

    AclDataBuffer offset_buf(offset_ptr_, sizeof(int32_t));
    decoder_input_buffer_.push_back(std::move(offset_buf));

    AclDataBuffer mask_buf(mask_ptr_, n_text_ctx_ * sizeof(int32_t));
    decoder_input_buffer_.push_back(std::move(mask_buf));

    decoder_output_buffer_.reserve(1 + 2 * n_text_layer_);
    AclDataBuffer logits_buf(logits_ptr_, vocab_size_ * sizeof(float));
    decoder_output_buffer_.push_back(std::move(logits_buf));

    for (auto &p : delta_kv_ptr_) {
      AclDataBuffer tmp_buffer(p, n_text_state_ * sizeof(float));
      decoder_output_buffer_.push_back(std::move(tmp_buffer));
    }
  }

  void InitSotSequence() {
    switch (model_type_) {
      case WhisperModelType::TinyEn:
        // fallthrough
      case WhisperModelType::BaseEn:
        // fallthrough
      case WhisperModelType::SmallEn:
        // fallthrough
      case WhisperModelType::MediumEn:
        // fallthrough
        // <|startoftranscript|><|notimestamps|>
        sot_sequence_ = {50257, 50362};
        eot_ = 50256;
        break;
      case WhisperModelType::Tiny:
      case WhisperModelType::Base:
        // fallthrough
      case WhisperModelType::Small:
        // fallthrough
      case WhisperModelType::Medium:
        // fallthrough
      case WhisperModelType::Large:
        // <|startoftranscript|><|en|><|transcribe|><|notimestamps|>
        sot_sequence_ = {50258, 50259, 50359, 50363};
        eot_ = 50257;
        translate_ = 50358;
        break;
      default:
        SHERPA_ONNX_LOGE("Unsupported model type: '%s'",
                         ToString(model_type_).c_str());
        SHERPA_ONNX_EXIT(-1);
    }

    if (config_.debug) {
      std::ostringstream os;
      os << "sot_sequence: ";
      for (auto i : sot_sequence_) {
        os << i << " ";
      }
      os << "\n";
      os << "eot: " << eot_ << "\n";
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
    }
  }

  void Preallocate() {
    // Allocate a single big block.
    int32_t total = 0;

    // features: (1, feat_dim_, num_frames_)
    total += num_frames_ * feat_dim_ * sizeof(float);
    // token: (1,)
    total += sizeof(int32_t);
    // offset: (1,)
    total += sizeof(int32_t);

    // mask: (1, n_text_ctx_)
    total += n_text_ctx_ * sizeof(int32_t);

    // logits: (1, 1, vocab_size_)
    total += vocab_size_ * sizeof(float);

    // cross_kv: n_text_layer_ * 2 * (num_out_frames_, n_text_state_)

    total +=
        n_text_layer_ * 2 * num_out_frames_ * n_text_state_ * sizeof(float);

    // self_kv: n_text_layer_ * 2 * (n_text_ctx_, n_text_state_)
    total += n_text_layer_ * 2 * n_text_ctx_ * n_text_state_ * sizeof(float);

    // delta_kv: n_text_layer_ * 2 * (1, 1, n_text_state_)
    total += n_text_layer_ * 2 * n_text_state_ * sizeof(float);

    ptr_ = std::make_unique<AclDevicePtr>(total);
    float *start = ptr_->Get<float>();
    int32_t *start_int32 = ptr_->Get<int32_t>();
    int32_t offset = 0;

    // (1, feat_dim_, num_frames_)
    features_ptr_ = start + offset;
    offset += feat_dim_ * num_frames_;  // in float or in int32_t, not in bytes

    // make sure token,offset,mask are contiguous in device memory

    // (1,)
    token_ptr_ = start_int32 + offset;
    offset += 1;

    // (1,)
    offset_ptr_ = start_int32 + offset;
    offset += 1;

    // (1, n_text_ctx_)
    mask_ptr_ = start_int32 + offset;
    offset += n_text_ctx_;

    // (1, 1, vocab_size_)
    logits_ptr_ = start + offset;
    offset += vocab_size_;

    // (1, num_frames_, n_text_state_)
    cross_kv_ptr_.reserve(n_text_layer_ * 2);
    for (int32_t i = 0; i < n_text_layer_ * 2; ++i) {
      auto p = start + offset;
      offset += num_out_frames_ * n_text_state_;
      cross_kv_ptr_.push_back(std::move(p));
    }

    // (1, n_text_ctx_, n_text_state_)
    self_kv_ptr_.reserve(n_text_layer_ * 2);
    for (int32_t i = 0; i < n_text_layer_ * 2; ++i) {
      auto p = start + offset;
      offset += n_text_ctx_ * n_text_state_;
      self_kv_ptr_.push_back(std::move(p));
    }

    // (1, 1, n_text_state_)
    delta_kv_ptr_.reserve(n_text_layer_ * 2);
    for (int32_t i = 0; i < n_text_layer_ * 2; ++i) {
      auto p = start + offset;
      offset += n_text_state_;
      delta_kv_ptr_.push_back(std::move(p));
    }

    if (config_.debug) {
      SHERPA_ONNX_LOGE("Allocated %d bytes, or %.3f MB", total,
                       total / 1024. / 1024.);
    }
  }

  void PostInitEncoder() {
    const std::vector<std::string> &names = encoder_model_->GetInputNames();
    model_type_ = ParseWhisperModelFromString(names[0]);
    if (config_.debug) {
      SHERPA_ONNX_LOGE("model type: %s", ToString(model_type_).c_str());
    }

    const std::vector<std::vector<int64_t>> &input_shapes =
        encoder_model_->GetInputShapes();

    const auto &mel_shape = input_shapes[0];
    if (mel_shape[0] != 1) {
      SHERPA_ONNX_LOGE("It supports only batch size == 1. Given: %d",
                       static_cast<int32_t>(mel_shape[0]));
      SHERPA_ONNX_EXIT(-1);
    }

    feat_dim_ = mel_shape[1];
    num_frames_ = mel_shape[2];

    const std::vector<std::vector<int64_t>> &output_shapes =
        encoder_model_->GetOutputShapes();

    n_text_layer_ = output_shapes.size() / 2;

    num_out_frames_ = output_shapes[0][1];
    n_text_state_ = output_shapes[0].back();

    if (config_.debug) {
      SHERPA_ONNX_LOGE("feat_dim_: %d", feat_dim_);
      SHERPA_ONNX_LOGE("num_frames_: %d", num_frames_);
      SHERPA_ONNX_LOGE("num_out_frames_: %d", num_out_frames_);
      SHERPA_ONNX_LOGE("n_text_layer_: %d", n_text_layer_);
      SHERPA_ONNX_LOGE("n_text_state_: %d", n_text_state_);
    }
  }

  void PostInitDecoder() {
    const std::vector<std::vector<int64_t>> &input_shapes =
        decoder_model_->GetInputShapes();
    // tokens, self_kv, cross_kv, offset, mask
    int32_t expected_num_inputs = 1 + 2 * n_text_layer_ + 2 * n_text_layer_ + 2;
    if (input_shapes.size() != expected_num_inputs) {
      SHERPA_ONNX_LOGE("Expect %d inputs. Actual: %d", expected_num_inputs,
                       static_cast<int32_t>(input_shapes.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    const auto &s = input_shapes[1];
    if (s[0] != 1) {
      SHERPA_ONNX_LOGE("Support only batch size 1. Given: %d",
                       static_cast<int32_t>(s[0]));
      SHERPA_ONNX_EXIT(-1);
    }

    n_text_ctx_ = s[1];
    token_offset_mask_cpu_.resize(1 + 1 + n_text_ctx_);

    if (s[2] != n_text_state_) {
      SHERPA_ONNX_LOGE("Expect n_text_state_ %d. Given: %d", n_text_state_,
                       static_cast<int32_t>(s[2]));
      SHERPA_ONNX_EXIT(-1);
    }

    if (config_.debug) {
      SHERPA_ONNX_LOGE("n_text_ctx_: %d", n_text_ctx_);
    }

    const std::vector<std::vector<int64_t>> &output_shapes =
        decoder_model_->GetOutputShapes();

    vocab_size_ = output_shapes[0].back();
    logits_cpu_.resize(vocab_size_);

    if (config_.debug) {
      SHERPA_ONNX_LOGE("vocab_size: %d", vocab_size_);
    }
  }

  void InitEncoder(const std::string &filename) {
    encoder_model_ = std::make_unique<AclModel>(filename);
    if (config_.debug) {
      auto s = encoder_model_->GetInfo();

      SHERPA_ONNX_LOGE("----encoder----\n%s\n", s.c_str());
    }
  }

  void InitEncoder(void *data, size_t size) {
    encoder_model_ = std::make_unique<AclModel>(data, size);
    if (config_.debug) {
      auto s = encoder_model_->GetInfo();
      SHERPA_ONNX_LOGE("----encoder----\n%s\n", s.c_str());
    }
  }

  void InitDecoder(const std::string &filename) {
    decoder_model_ = std::make_unique<AclModel>(filename);
    if (config_.debug) {
      auto s = decoder_model_->GetInfo();

      SHERPA_ONNX_LOGE("----decoder----\n%s\n", s.c_str());
    }
  }

  void InitDecoder(void *data, size_t size) {
    decoder_model_ = std::make_unique<AclModel>(data, size);
    if (config_.debug) {
      auto s = decoder_model_->GetInfo();
      SHERPA_ONNX_LOGE("----decoder----\n%s\n", s.c_str());
    }
  }

 private:
  std::mutex mutex_;
  Acl acl_;

  std::unique_ptr<AclContext> context_;

  std::unique_ptr<AclModel> encoder_model_;
  std::unique_ptr<AclModel> decoder_model_;

  OfflineModelConfig config_;

  // tiny, tiny.en, base.en, base, etc
  WhisperModelType model_type_;
  int32_t feat_dim_ = 0;
  int32_t num_frames_ = 0;
  int32_t num_out_frames_ = 0;
  int32_t n_text_layer_ = 0;
  int32_t n_text_ctx_ = 0;
  int32_t n_text_state_ = 0;
  int32_t vocab_size_ = 0;

  std::unique_ptr<AclDevicePtr> ptr_;

  // All of the following raw pointers will point to some already allocated
  // device memory. No need to free them.
  float *features_ptr_ = nullptr;
  int32_t *token_ptr_ = nullptr;
  int32_t *offset_ptr_ = nullptr;
  int32_t *mask_ptr_ = nullptr;
  float *logits_ptr_ = nullptr;

  std::vector<float *> cross_kv_ptr_;
  std::vector<float *> self_kv_ptr_;
  std::vector<float *> delta_kv_ptr_;

  std::vector<int32_t> token_offset_mask_cpu_;
  std::vector<float> logits_cpu_;

  std::vector<int32_t> sot_sequence_;
  int32_t eot_ = 0;
  int32_t translate_ = 0;

  std::vector<AclDataBuffer> encoder_input_buffer_;
  std::vector<AclDataBuffer> encoder_output_buffer_;

  std::vector<AclDataBuffer> decoder_input_buffer_;
  std::vector<AclDataBuffer> decoder_output_buffer_;
};

OfflineWhisperModelAscend::OfflineWhisperModelAscend(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineWhisperModelAscend::OfflineWhisperModelAscend(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineWhisperModelAscend::~OfflineWhisperModelAscend() = default;

OfflineWhisperDecoderResult OfflineWhisperModelAscend::Run(
    std::vector<float> features) const {
  return impl_->Run(std::move(features));
}

int32_t OfflineWhisperModelAscend::FeatureDim() const {
  return impl_->FeatureDim();
}

#if __ANDROID_API__ >= 9
template OfflineWhisperModelAscend::OfflineWhisperModelAscend(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineWhisperModelAscend::OfflineWhisperModelAscend(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/ascend/offline-whisper-model-ascend.h
================================================
// sherpa-onnx/csrc/ascend/offline-whisper-model-ascend.h
//
// Copyright (c)  2026  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ASCEND_OFFLINE_WHISPER_MODEL_ASCEND_H_
#define SHERPA_ONNX_CSRC_ASCEND_OFFLINE_WHISPER_MODEL_ASCEND_H_

#include <cstdint>
#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineWhisperModelAscend {
 public:
  ~OfflineWhisperModelAscend();

  explicit OfflineWhisperModelAscend(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineWhisperModelAscend(Manager *mgr, const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (1, num_frames, feat_dim)
   */
  OfflineWhisperDecoderResult Run(std::vector<float> features) const;

  int32_t FeatureDim() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ASCEND_OFFLINE_WHISPER_MODEL_ASCEND_H_


================================================
FILE: sherpa-onnx/csrc/ascend/offline-zipformer-ctc-model-ascend.cc
================================================
// sherpa-onnx/csrc/ascend/offline-zipformer-ctc-model-ascend.cc
//
// Copyright (c)  2025  Xiaomi Corporation

// References:
// https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/83RC1alpha003/API/appdevgapi/aclcppdevg_03_0298.html
#include "sherpa-onnx/csrc/ascend/offline-zipformer-ctc-model-ascend.h"

#include <algorithm>
#include <array>
#include <memory>
#include <mutex>  // NOLINT
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/ascend/macros.h"
#include "sherpa-onnx/csrc/ascend/utils.h"
#include "sherpa-onnx/csrc/file-utils.h"

namespace sherpa_onnx {

class OfflineZipformerCtcModelAscend::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    PreInit();
    InitModel(config_.zipformer_ctc.model);
    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    PreInit();
    {
      auto buf = ReadFile(mgr, config_.zipformer_ctc.model);
      InitModel(buf.data(), buf.size());
    }
    PostInit();
  }

  std::vector<float> Run(std::vector<float> features) {
    // TODO(fangjun): Support multi clients
    std::lock_guard<std::mutex> lock(mutex_);

    int32_t num_frames = features.size() / feat_dim_;

    if (num_frames != max_num_frames_) {
      if (num_frames > max_num_frames_) {
        SHERPA_ONNX_LOGE(
            "Number of input frames %d is too large. Truncate it to %d frames.",
            num_frames, max_num_frames_);

        SHERPA_ONNX_LOGE(
            "Recognition result may be truncated/incomplete. Please select a "
            "model accepting longer audios.");
      }

      features.resize(max_num_frames_ * feat_dim_, 0);

      num_frames = max_num_frames_;
    }

    aclError ret =
        aclrtMemcpy(*x_ptr_, features.size() * sizeof(float), features.data(),
                    features.size() * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    AclMdlDataset input_dataset;
    AclDataBuffer x_buf(*x_ptr_, features.size() * sizeof(float));
    input_dataset.AddBuffer(x_buf);

    AclMdlDataset output_dataset;

    AclDataBuffer logits_buf(*log_probs_ptr_,
                             num_output_frames_ * vocab_size_ * sizeof(float));
    output_dataset.AddBuffer(logits_buf);

    ret = aclmdlExecute(*model_, input_dataset, output_dataset);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlExecute");

    std::vector<float> log_probs(num_output_frames_ * vocab_size_);
    ret = aclrtMemcpy(
        log_probs.data(), num_output_frames_ * vocab_size_ * sizeof(float),
        *log_probs_ptr_, num_output_frames_ * vocab_size_ * sizeof(float),
        ACL_MEMCPY_DEVICE_TO_HOST);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMemcpy");

    return log_probs;
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t SubsamplingFactor() const { return subsampling_factor_; }

 private:
  void InitModel(const std::string &filename) {
    model_ = std::make_unique<AclModel>(filename);
    if (config_.debug) {
      auto s = model_->GetInfo();
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
  }

  void InitModel(void *data, size_t size) {
    model_ = std::make_unique<AclModel>(data, size);
    if (config_.debug) {
      auto s = model_->GetInfo();
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
  }

  void PreInit() {
    int32_t device_id = 0;
    aclError ret = aclrtSetDevice(device_id);
    SHERPA_ONNX_ASCEND_CHECK(
        ret, "Failed to call aclrtSetDevice with device id: %d", device_id);

    context_ = std::make_unique<AclContext>(device_id);

    ret = aclrtSetCurrentContext(*context_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtSetCurrentContext");
  }

  void PostInit() {
    auto in_shape = model_->GetInputShapes()[0];

    max_num_frames_ = in_shape[1];
    feat_dim_ = in_shape[2];

    auto out_shape = model_->GetOutputShapes()[0];

    num_output_frames_ = out_shape[1];
    vocab_size_ = out_shape[2];

    subsampling_factor_ = max_num_frames_ / out_shape[1];
    if (config_.debug) {
      SHERPA_ONNX_LOGE("max_num_frames: %d", max_num_frames_);
      SHERPA_ONNX_LOGE("feat_dim: %d", feat_dim_);
      SHERPA_ONNX_LOGE("vocab_size: %d", vocab_size_);
      SHERPA_ONNX_LOGE("subsampling_factor: %d", subsampling_factor_);
    }

    Preallocate();
  }

  void Preallocate() {
    x_ptr_ = std::make_unique<AclDevicePtr>(max_num_frames_ * feat_dim_ *
                                            sizeof(float));

    log_probs_ptr_ = std::make_unique<AclDevicePtr>(
        num_output_frames_ * vocab_size_ * sizeof(float));
  }

 private:
  std::mutex mutex_;
  Acl acl_;

  std::unique_ptr<AclContext> context_;

  OfflineModelConfig config_;

  std::unique_ptr<AclModel> model_;
  int32_t vocab_size_ = 0;
  int32_t max_num_frames_ = 0;
  int32_t num_output_frames_ = 0;
  int32_t feat_dim_ = 0;
  int32_t subsampling_factor_ = 0;

  std::unique_ptr<AclDevicePtr> x_ptr_;
  std::unique_ptr<AclDevicePtr> log_probs_ptr_;
};

OfflineZipformerCtcModelAscend::OfflineZipformerCtcModelAscend(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineZipformerCtcModelAscend::OfflineZipformerCtcModelAscend(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineZipformerCtcModelAscend::~OfflineZipformerCtcModelAscend() = default;

std::vector<float> OfflineZipformerCtcModelAscend::Run(
    std::vector<float> features) const {
  return impl_->Run(std::move(features));
}

int32_t OfflineZipformerCtcModelAscend::VocabSize() const {
  return impl_->VocabSize();
}

int32_t OfflineZipformerCtcModelAscend::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

#if __ANDROID_API__ >= 9
template OfflineZipformerCtcModelAscend::OfflineZipformerCtcModelAscend(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineZipformerCtcModelAscend::OfflineZipformerCtcModelAscend(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/ascend/offline-zipformer-ctc-model-ascend.h
================================================
// sherpa-onnx/csrc/ascend/offline-zipformer-ctc-model-ascend.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ASCEND_OFFLINE_ZIPFORMER_CTC_MODEL_ASCEND_H_
#define SHERPA_ONNX_CSRC_ASCEND_OFFLINE_ZIPFORMER_CTC_MODEL_ASCEND_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineZipformerCtcModelAscend {
 public:
  ~OfflineZipformerCtcModelAscend();

  explicit OfflineZipformerCtcModelAscend(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineZipformerCtcModelAscend(Manager *mgr,
                                 const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (num_frames, feature_dim)
   * @returns Return a tensor of shape (num_output_frames, vocab_size)
   */
  std::vector<float> Run(std::vector<float> features) const;

  int32_t VocabSize() const;
  int32_t SubsamplingFactor() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ASCEND_OFFLINE_ZIPFORMER_CTC_MODEL_ASCEND_H_


================================================
FILE: sherpa-onnx/csrc/ascend/utils.cc
================================================
// sherpa-onnx/csrc/ascend/utils.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/ascend/utils.h"

#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/ascend/macros.h"

namespace sherpa_onnx {

static const char *AclDataTypeToString(aclDataType data_type) {
  switch (data_type) {
    case ACL_DT_UNDEFINED:
      return "ACL_DT_UNDEFINED";
    case ACL_FLOAT:
      return "ACL_FLOAT";
    case ACL_FLOAT16:
      return "ACL_FLOAT16";
    case ACL_INT8:
      return "ACL_INT8";
    case ACL_INT32:
      return "ACL_INT32";
    case ACL_UINT8:
      return "ACL_UINT8";
    case ACL_INT16:
      return "ACL_INT16";
    case ACL_UINT16:
      return "ACL_UINT16";
    case ACL_UINT32:
      return "ACL_UINT32";
    case ACL_INT64:
      return "ACL_INT64";
    case ACL_UINT64:
      return "ACL_UINT64";
    case ACL_DOUBLE:
      return "ACL_DOUBLE";
    case ACL_BOOL:
      return "ACL_BOOL";
    case ACL_STRING:
      return "ACL_STRING";
    case ACL_COMPLEX64:
      return "ACL_COMPLEX64";
    case ACL_COMPLEX128:
      return "ACL_COMPLEX128";
    case ACL_BF16:
      return "ACL_BF16";
#if defined(ACL_INT4)
    case ACL_INT4:
      return "ACL_INT4";
#endif
    case ACL_UINT1:
      return "ACL_UINT1";
    case ACL_COMPLEX32:
      return "ACL_COMPLEX32";
    default:
      return "unknown";
  }
}

static const char *AclFormatToString(aclFormat format) {
  switch (format) {
    case ACL_FORMAT_UNDEFINED:
      return "ACL_FORMAT_UNDEFINED";
    case ACL_FORMAT_NCHW:
      return "ACL_FORMAT_NCHW";
    case ACL_FORMAT_NHWC:
      return "ACL_FORMAT_NHWC";
    case ACL_FORMAT_ND:
      return "ACL_FORMAT_ND";
    case ACL_FORMAT_NC1HWC0:
      return "ACL_FORMAT_NC1HWC0";
    case ACL_FORMAT_FRACTAL_Z:
      return "ACL_FORMAT_FRACTAL_Z";
    case ACL_FORMAT_NC1HWC0_C04:
      return "ACL_FORMAT_NC1HWC0_C04";
    case ACL_FORMAT_HWCN:
      return "ACL_FORMAT_HWCN";
    case ACL_FORMAT_NDHWC:
      return "ACL_FORMAT_NDHWC";
    case ACL_FORMAT_FRACTAL_NZ:
      return "ACL_FORMAT_FRACTAL_NZ";
    case ACL_FORMAT_NCDHW:
      return "ACL_FORMAT_NCDHW";
    case ACL_FORMAT_NDC1HWC0:
      return "ACL_FORMAT_NDC1HWC0";
    case ACL_FRACTAL_Z_3D:
      return "ACL_FRACTAL_Z_3D";
    case ACL_FORMAT_NC:
      return "ACL_FORMAT_NC";
    case ACL_FORMAT_NCL:
      return "ACL_FORMAT_NCL";
    default:
      return "unknown";
  }
}

Acl::Acl() {
  aclError ret = aclInit(nullptr);
  SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclInit");
  initialized_ = true;
}

Acl::~Acl() {
  if (initialized_) {
    aclError ret = aclFinalize();
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclFinalize");
  }
}

AclContext::AclContext(int32_t device_id) {
  aclError ret = aclrtCreateContext(&context_, device_id);
  SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtCreateContext");
}

AclContext::~AclContext() {
  if (context_) {
    aclError ret = aclrtDestroyContext(context_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtDestroyContext");
  }
}

aclrtContext AclContext::Get() const { return context_; }

AclDevicePtr::AclDevicePtr(
    size_t size, aclrtMemMallocPolicy policy /*= ACL_MEM_MALLOC_HUGE_FIRST*/) {
  if (size > 0) {
    aclError ret = aclrtMalloc(&p_, size, policy);

    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtMalloc with size: %zu",
                             size);
  }
  size_ = size;
}

AclDevicePtr::~AclDevicePtr() {
  if (p_) {
    aclError ret = aclrtFree(p_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclrtFree");
  }
}

AclModelDesc::AclModelDesc(uint32_t model_id) {
  p_ = aclmdlCreateDesc();
  if (!p_) {
    SHERPA_ONNX_LOGE("Failed to call aclmdlCreateDesc");
    SHERPA_ONNX_EXIT(-1);
  }

  aclError ret = aclmdlGetDesc(p_, model_id);
  SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlGetDesc");
}

AclModelDesc::~AclModelDesc() {
  if (p_) {
    aclError ret = aclmdlDestroyDesc(p_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlDestroyDesc");
  }
}

AclModel::AclModel(const std::string &model_path) {
  aclError ret = aclmdlLoadFromFile(model_path.c_str(), &model_id_);
  SHERPA_ONNX_ASCEND_CHECK(ret,
                           "Failed to call aclmdlLoadFromFile from file '%s'",
                           model_path.c_str());

  Init();
}

AclModel::AclModel(const void *model, size_t model_size) {
  aclError ret = aclmdlLoadFromMem(model, model_size, &model_id_);
  SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlLoadFromMem");

  Init();
}

AclModel::~AclModel() {
  if (model_id_ != 0) {
    aclError ret = aclmdlUnload(model_id_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlUnload");
  }
}

void AclModel::Init() {
  desc_ = std::make_unique<AclModelDesc>(model_id_);

  InitInputNames();
  InitInputShapes();

  InitOutputNames();
  InitOutputShapes();
}

void AclModel::InitInputNames() {
  size_t num_inputs = aclmdlGetNumInputs(desc_->Get());
  input_names_.resize(num_inputs);

  for (int32_t i = 0; i < num_inputs; ++i) {
    const char *name = aclmdlGetInputNameByIndex(desc_->Get(), i);
    input_names_[i] = name;
  }
}

void AclModel::InitInputShapes() {
  size_t num_inputs = aclmdlGetNumInputs(desc_->Get());
  input_shapes_.resize(num_inputs);

  std::vector<int64_t> shape;
  for (int32_t i = 0; i < num_inputs; ++i) {
    aclmdlIODims dims;
    aclError ret = aclmdlGetInputDims(desc_->Get(), i, &dims);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlGetInputDims");

    shape.resize(dims.dimCount);
    for (int32_t k = 0; k < dims.dimCount; ++k) {
      shape[k] = dims.dims[k];
    }
    input_shapes_[i] = std::move(shape);
  }
}

void AclModel::InitOutputNames() {
  size_t num_outputs = aclmdlGetNumOutputs(desc_->Get());
  output_names_.resize(num_outputs);
  for (int32_t i = 0; i < num_outputs; ++i) {
    const char *name = aclmdlGetOutputNameByIndex(desc_->Get(), i);
    output_names_[i] = name;
  }
}

void AclModel::InitOutputShapes() {
  size_t num_outputs = aclmdlGetNumOutputs(desc_->Get());
  output_shapes_.resize(num_outputs);

  std::vector<int64_t> shape;
  for (int32_t i = 0; i < num_outputs; ++i) {
    aclmdlIODims dims;
    aclError ret = aclmdlGetOutputDims(desc_->Get(), i, &dims);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlGetOutputDims");

    shape.resize(dims.dimCount);
    for (int32_t k = 0; k < dims.dimCount; ++k) {
      shape[k] = dims.dims[k];
    }
    output_shapes_[i] = std::move(shape);
  }
}

std::string AclModel::GetInfo() const {
  size_t num_inputs = aclmdlGetNumInputs(desc_->Get());
  size_t num_outputs = aclmdlGetNumOutputs(desc_->Get());

  std::ostringstream os;
  os << "Model id: " << model_id_ << "\n";
  os << "Num inputs: " << num_inputs << "\n";
  os << "Num outputs: " << num_outputs << "\n";

  for (int32_t i = 0; i < num_inputs; ++i) {
    os << "---input " << i << "---\n";

    size_t size_in_bytes = aclmdlGetInputSizeByIndex(desc_->Get(), i);

    os << " size in bytes: " << size_in_bytes << "\n";
    os << " size in MB:    " << size_in_bytes / 1024. / 1024 << "\n";

    const char *name = aclmdlGetInputNameByIndex(desc_->Get(), i);
    os << " name: " << name << "\n";

    aclFormat format = aclmdlGetInputFormat(desc_->Get(), i);

    os << " format: " << AclFormatToString(format) << "\n";
    aclDataType type = aclmdlGetInputDataType(desc_->Get(), i);
    os << " data type: " << AclDataTypeToString(type) << "\n";

    aclmdlIODims dims;
    aclError ret = aclmdlGetInputDims(desc_->Get(), i, &dims);
    os << " dim: " << dims.dimCount << "\n";
    for (size_t d = 0; d < dims.dimCount; ++d) {
      os << "  " << d << " -> " << dims.name << ", " << dims.dims[d] << "\n";
    }
  }

  for (int32_t i = 0; i < num_outputs; ++i) {
    os << "---output " << i << "---\n";

    size_t size_out_bytes = aclmdlGetOutputSizeByIndex(desc_->Get(), i);

    os << " size out bytes: " << size_out_bytes << "\n";
    os << " size out MB:    " << size_out_bytes / 1024 / 1024 << "\n";

    const char *name = aclmdlGetOutputNameByIndex(desc_->Get(), i);
    os << " name: " << name << "\n";

    aclFormat format = aclmdlGetOutputFormat(desc_->Get(), i);

    os << " format: " << AclFormatToString(format) << "\n";
    aclDataType type = aclmdlGetOutputDataType(desc_->Get(), i);
    os << " data type: " << AclDataTypeToString(type) << "\n";

    aclmdlIODims dims;
    aclError ret = aclmdlGetOutputDims(desc_->Get(), i, &dims);
    os << " dim: " << dims.dimCount << "\n";
    for (size_t d = 0; d < dims.dimCount; ++d) {
      os << "  " << d << " -> " << dims.name << ", " << dims.dims[d] << "\n";
    }
  }

  return os.str();
}

AclMdlDataset::AclMdlDataset() {
  p_ = aclmdlCreateDataset();
  if (!p_) {
    SHERPA_ONNX_LOGE("Failed to call aclmdlCreateDataset");
    SHERPA_ONNX_EXIT(-1);
  }
}

AclMdlDataset::~AclMdlDataset() {
  if (p_) {
    aclError ret = aclmdlDestroyDataset(p_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlDestroyDataset");
  }
}

void AclMdlDataset::AddBuffer(aclDataBuffer *buffer) const {
  aclError ret = aclmdlAddDatasetBuffer(p_, buffer);
  SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclmdlAddDatasetBuffer");
}

void AclMdlDataset::SetTensorDesc(aclTensorDesc *tensor_desc,
                                  size_t index) const {
  aclError ret = aclmdlSetDatasetTensorDesc(p_, tensor_desc, index);

  SHERPA_ONNX_ASCEND_CHECK(
      ret, "Failed to call aclmdlSetDatasetTensorDesc for input %zu", index);
}

AclDataBuffer::AclDataBuffer(void *data, size_t size) {
  p_ = aclCreateDataBuffer(data, size);

  if (!p_) {
    SHERPA_ONNX_LOGE("Failed to call aclCreateDataBuffer");
    SHERPA_ONNX_EXIT(-1);
  }
}

AclDataBuffer::~AclDataBuffer() { Release(); }

void AclDataBuffer::Release() {
  if (p_) {
    aclError ret = aclDestroyDataBuffer(p_);
    SHERPA_ONNX_ASCEND_CHECK(ret, "Failed to call aclDestroyDataBuffer");
  }
  p_ = nullptr;
}

AclTensorDesc::AclTensorDesc(aclDataType data_type, int num_dims,
                             const int64_t *dims, aclFormat format) {
  p_ = aclCreateTensorDesc(data_type, num_dims, dims, format);
  if (!p_) {
    SHERPA_ONNX_LOGE("Failed to call aclCreateTensorDesc");
    SHERPA_ONNX_EXIT(-1);
  }
}

AclTensorDesc::~AclTensorDesc() {
  if (p_) {
    aclDestroyTensorDesc(p_);
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/ascend/utils.h
================================================
// sherpa-onnx/csrc/ascend/utils.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ASCEND_UTILS_H_
#define SHERPA_ONNX_CSRC_ASCEND_UTILS_H_

#include <memory>
#include <string>
#include <vector>

#include "acl/acl.h"

namespace sherpa_onnx {

class Acl {
 public:
  Acl();
  ~Acl();

  Acl(const Acl &) = delete;
  Acl &operator=(const Acl &) = delete;

  Acl(Acl &&) = delete;
  Acl &operator=(Acl &&) = delete;

 private:
  bool initialized_ = false;
};

class AclContext {
 public:
  explicit AclContext(int32_t device_id);

  ~AclContext();

  AclContext(const AclContext &) = delete;
  AclContext &operator=(const AclContext &) = delete;

  AclContext(AclContext &&) = delete;
  AclContext &operator=(AclContext &&) = delete;

  aclrtContext Get() const;
  operator aclrtContext() { return context_; }

 private:
  aclrtContext context_ = nullptr;
};

class AclDevicePtr {
 public:
  explicit AclDevicePtr(
      size_t size, aclrtMemMallocPolicy policy = ACL_MEM_MALLOC_HUGE_FIRST);

  ~AclDevicePtr();

  AclDevicePtr(const AclDevicePtr &) = delete;
  AclDevicePtr &operator=(const AclDevicePtr &) = delete;

  AclDevicePtr(AclDevicePtr &&) = delete;
  AclDevicePtr &operator=(AclDevicePtr &&) = delete;

  void *Get() const { return p_; }

  template <typename T>
  T *Get() const {
    return reinterpret_cast<T *>(p_);
  }

  operator void *() { return p_; }

  size_t Size() const { return size_; }

 private:
  void *p_ = nullptr;
  size_t size_ = 0;
};

class AclModelDesc {
 public:
  explicit AclModelDesc(uint32_t model_id);

  ~AclModelDesc();

  AclModelDesc(const AclModelDesc &) = delete;
  AclModelDesc &operator=(const AclModelDesc &) = delete;

  AclModelDesc(AclModelDesc &&) = delete;
  AclModelDesc &operator=(AclModelDesc &&) = delete;

  aclmdlDesc *Get() const { return p_; }
  operator aclmdlDesc *() const { return p_; }

  size_t Size() const { return size_; }

 private:
  aclmdlDesc *p_ = nullptr;
  size_t size_ = 0;
};

class AclModel {
 public:
  explicit AclModel(const std::string &model_path);
  AclModel(const void *model, size_t model_size);
  ~AclModel();

  uint32_t Get() const { return model_id_; }
  operator uint32_t() const { return model_id_; }

  AclModel(const AclModel &) = delete;
  AclModel &operator=(const AclModel &) = delete;

  AclModel(AclModel &&) = delete;
  AclModel &operator=(AclModel &&) = delete;

  std::string GetInfo() const;

  const std::vector<std::string> &GetInputNames() const { return input_names_; }

  const std::vector<std::vector<int64_t>> &GetInputShapes() const {
    return input_shapes_;
  }

  const std::vector<std::string> &GetOutputNames() const {
    return output_names_;
  }

  const std::vector<std::vector<int64_t>> &GetOutputShapes() const {
    return output_shapes_;
  }

 private:
  void Init();
  void InitInputNames();
  void InitInputShapes();

  void InitOutputNames();
  void InitOutputShapes();

 private:
  uint32_t model_id_ = 0;
  std::unique_ptr<AclModelDesc> desc_;

  std::vector<std::string> input_names_;
  std::vector<std::vector<int64_t>> input_shapes_;

  std::vector<std::string> output_names_;
  std::vector<std::vector<int64_t>> output_shapes_;
};

class AclMdlDataset {
 public:
  AclMdlDataset();
  ~AclMdlDataset();

  AclMdlDataset(const AclMdlDataset &) = delete;
  AclMdlDataset &operator=(const AclMdlDataset &) = delete;

  AclMdlDataset(AclMdlDataset &&) = delete;
  AclMdlDataset &operator=(AclMdlDataset &&) = delete;

  void AddBuffer(aclDataBuffer *buffer) const;
  void SetTensorDesc(aclTensorDesc *tensor_desc, size_t index) const;

  aclmdlDataset *Get() const { return p_; }
  operator aclmdlDataset *() const { return p_; }

 private:
  aclmdlDataset *p_ = nullptr;
};

class AclDataBuffer {
 public:
  AclDataBuffer(void *data, size_t size);
  ~AclDataBuffer();

  AclDataBuffer(const AclDataBuffer &) = delete;
  AclDataBuffer &operator=(const AclDataBuffer &) = delete;

  AclDataBuffer(AclDataBuffer &&other) {
    p_ = other.p_;
    other.p_ = nullptr;
  }
  AclDataBuffer &operator=(AclDataBuffer &&other) {
    if (this == &other) {
      return *this;
    }

    Release();

    p_ = other.p_;
    other.p_ = nullptr;
    return *this;
  }

  void Release();

  aclDataBuffer *Get() const { return p_; }
  operator aclDataBuffer *() const { return p_; }

 private:
  aclDataBuffer *p_ = nullptr;
};

class AclTensorDesc {
 public:
  AclTensorDesc(aclDataType data_type, int num_dims, const int64_t *dims,
                aclFormat format);
  ~AclTensorDesc();

  AclTensorDesc(const AclTensorDesc &) = delete;
  AclTensorDesc &operator=(const AclTensorDesc &) = delete;

  AclTensorDesc(AclTensorDesc &&) = delete;
  AclTensorDesc &operator=(AclTensorDesc &&) = delete;

  aclTensorDesc *Get() const { return p_; }
  operator aclTensorDesc *() const { return p_; }

 private:
  aclTensorDesc *p_ = nullptr;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ASCEND_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/audio-tagging-ced-impl.h
================================================
// sherpa-onnx/csrc/audio-tagging-ced-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_AUDIO_TAGGING_CED_IMPL_H_
#define SHERPA_ONNX_CSRC_AUDIO_TAGGING_CED_IMPL_H_

#include <assert.h>

#include <memory>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/audio-tagging-impl.h"
#include "sherpa-onnx/csrc/audio-tagging-label-file.h"
#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-ced-model.h"

namespace sherpa_onnx {

class AudioTaggingCEDImpl : public AudioTaggingImpl {
 public:
  explicit AudioTaggingCEDImpl(const AudioTaggingConfig &config)
      : config_(config), model_(config.model), labels_(config.labels) {
    if (model_.NumEventClasses() != labels_.NumEventClasses()) {
      SHERPA_ONNX_LOGE("number of classes: %d (model) != %d (label file)",
                       model_.NumEventClasses(), labels_.NumEventClasses());
      exit(-1);
    }
  }

#if __ANDROID_API__ >= 9
  explicit AudioTaggingCEDImpl(AAssetManager *mgr,
                               const AudioTaggingConfig &config)
      : config_(config),
        model_(mgr, config.model),
        labels_(mgr, config.labels) {
    if (model_.NumEventClasses() != labels_.NumEventClasses()) {
      SHERPA_ONNX_LOGE("number of classes: %d (model) != %d (label file)",
                       model_.NumEventClasses(), labels_.NumEventClasses());
      exit(-1);
    }
  }
#endif

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(CEDTag{});
  }

  std::vector<AudioEvent> Compute(OfflineStream *s,
                                  int32_t top_k = -1) const override {
    if (top_k < 0) {
      top_k = config_.top_k;
    }

    int32_t num_event_classes = model_.NumEventClasses();

    if (top_k > num_event_classes) {
      top_k = num_event_classes;
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    // WARNING(fangjun): It is fixed to 64 for CED models
    int32_t feat_dim = 64;
    std::vector<float> f = s->GetFrames();

    int32_t num_frames = f.size() / feat_dim;
    assert(feat_dim * num_frames == static_cast<int32_t>(f.size()));

    std::array<int64_t, 3> shape = {1, num_frames, feat_dim};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
                                            shape.data(), shape.size());

    Ort::Value probs = model_.Forward(std::move(x));

    const float *p = probs.GetTensorData<float>();

    std::vector<int32_t> top_k_indexes = TopkIndex(p, num_event_classes, top_k);

    std::vector<AudioEvent> ans(top_k);

    int32_t i = 0;

    for (int32_t index : top_k_indexes) {
      ans[i].name = labels_.GetEventName(index);
      ans[i].index = index;
      ans[i].prob = p[index];
      i += 1;
    }

    return ans;
  }

 private:
  AudioTaggingConfig config_;
  OfflineCEDModel model_;
  AudioTaggingLabels labels_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AUDIO_TAGGING_CED_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/audio-tagging-impl.cc
================================================
// sherpa-onnx/csrc/audio-tagging-impl.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/audio-tagging-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/audio-tagging-ced-impl.h"
#include "sherpa-onnx/csrc/audio-tagging-zipformer-impl.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

std::unique_ptr<AudioTaggingImpl> AudioTaggingImpl::Create(
    const AudioTaggingConfig &config) {
  if (!config.model.zipformer.model.empty()) {
    return std::make_unique<AudioTaggingZipformerImpl>(config);
  } else if (!config.model.ced.empty()) {
    return std::make_unique<AudioTaggingCEDImpl>(config);
  }

  SHERPA_ONNX_LOGE(
      "Please specify an audio tagging model! Return a null pointer");
  return nullptr;
}

#if __ANDROID_API__ >= 9
std::unique_ptr<AudioTaggingImpl> AudioTaggingImpl::Create(
    AAssetManager *mgr, const AudioTaggingConfig &config) {
  if (!config.model.zipformer.model.empty()) {
    return std::make_unique<AudioTaggingZipformerImpl>(mgr, config);
  } else if (!config.model.ced.empty()) {
    return std::make_unique<AudioTaggingCEDImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE(
      "Please specify an audio tagging model! Return a null pointer");
  return nullptr;
}
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/audio-tagging-impl.h
================================================
// sherpa-onnx/csrc/audio-tagging-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_AUDIO_TAGGING_IMPL_H_
#define SHERPA_ONNX_CSRC_AUDIO_TAGGING_IMPL_H_

#include <memory>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/audio-tagging.h"

namespace sherpa_onnx {

class AudioTaggingImpl {
 public:
  virtual ~AudioTaggingImpl() = default;

  static std::unique_ptr<AudioTaggingImpl> Create(
      const AudioTaggingConfig &config);

#if __ANDROID_API__ >= 9
  static std::unique_ptr<AudioTaggingImpl> Create(
      AAssetManager *mgr, const AudioTaggingConfig &config);
#endif

  virtual std::unique_ptr<OfflineStream> CreateStream() const = 0;

  virtual std::vector<AudioEvent> Compute(OfflineStream *s,
                                          int32_t top_k = -1) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AUDIO_TAGGING_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/audio-tagging-label-file.cc
================================================
// sherpa-onnx/csrc/audio-tagging-label-file.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/audio-tagging-label-file.h"

#include <fstream>
#include <sstream>
#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

AudioTaggingLabels::AudioTaggingLabels(const std::string &filename) {
  std::ifstream is(filename);
  Init(is);
}

#if __ANDROID_API__ >= 9
AudioTaggingLabels::AudioTaggingLabels(AAssetManager *mgr,
                                       const std::string &filename) {
  auto buf = ReadFile(mgr, filename);
  std::istringstream is(std::string(buf.data(), buf.size()));
  Init(is);
}
#endif

// Format of a label file
/*
index,mid,display_name
0,/m/09x0r,"Speech"
1,/m/05zppz,"Male speech, man speaking"
*/
void AudioTaggingLabels::Init(std::istream &is) {
  std::string line;
  std::getline(is, line);  // skip the header

  std::string index;
  std::string tmp;
  std::string name;

  while (std::getline(is, line)) {
    index.clear();
    name.clear();
    std::istringstream input2(line);

    std::getline(input2, index, ',');
    std::getline(input2, tmp, ',');
    std::getline(input2, name);

    std::size_t pos{};
    int32_t i = std::stoi(index, &pos);
    if (index.empty() || pos != index.size()) {
      SHERPA_ONNX_LOGE("Invalid line: %s", line.c_str());
      exit(-1);
    }

    if (i != static_cast<int32_t>(names_.size())) {
      SHERPA_ONNX_LOGE(
          "Index should be sorted and contiguous. Expected index: %d, given: "
          "%d.",
          static_cast<int32_t>(names_.size()), i);
    }
    if (name.empty() || name.front() != '"' || name.back() != '"') {
      SHERPA_ONNX_LOGE("Invalid line: %s", line.c_str());
      exit(-1);
    }

    names_.emplace_back(name.begin() + 1, name.end() - 1);
  }
}

const std::string &AudioTaggingLabels::GetEventName(int32_t index) const {
  return names_.at(index);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/audio-tagging-label-file.h
================================================
// sherpa-onnx/csrc/audio-tagging-label-file.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_AUDIO_TAGGING_LABEL_FILE_H_
#define SHERPA_ONNX_CSRC_AUDIO_TAGGING_LABEL_FILE_H_

#include <istream>
#include <string>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

namespace sherpa_onnx {

class AudioTaggingLabels {
 public:
  explicit AudioTaggingLabels(const std::string &filename);
#if __ANDROID_API__ >= 9
  AudioTaggingLabels(AAssetManager *mgr, const std::string &filename);
#endif

  // Return the event name for the given index.
  // The returned reference is valid as long as this object is alive
  const std::string &GetEventName(int32_t index) const;
  int32_t NumEventClasses() const { return names_.size(); }

 private:
  void Init(std::istream &is);

 private:
  std::vector<std::string> names_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AUDIO_TAGGING_LABEL_FILE_H_


================================================
FILE: sherpa-onnx/csrc/audio-tagging-model-config.cc
================================================
// sherpa-onnx/csrc/audio-tagging-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/audio-tagging-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void AudioTaggingModelConfig::Register(ParseOptions *po) {
  zipformer.Register(po);

  po->Register("ced-model", &ced,
               "Path to CED model. Only need to pass one of --zipformer-model "
               "or --ced-model");

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool AudioTaggingModelConfig::Validate() const {
  if (!zipformer.model.empty() && !zipformer.Validate()) {
    return false;
  }

  if (!ced.empty() && !FileExists(ced)) {
    SHERPA_ONNX_LOGE("CED model file '%s' does not exist", ced.c_str());
    return false;
  }

  if (zipformer.model.empty() && ced.empty()) {
    SHERPA_ONNX_LOGE("Please provide either --zipformer-model or --ced-model");
    return false;
  }

  return true;
}

std::string AudioTaggingModelConfig::ToString() const {
  std::ostringstream os;

  os << "AudioTaggingModelConfig(";
  os << "zipformer=" << zipformer.ToString() << ", ";
  os << "ced=\"" << ced << "\", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/audio-tagging-model-config.h
================================================
// sherpa-onnx/csrc/audio-tagging-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_AUDIO_TAGGING_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_AUDIO_TAGGING_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/offline-zipformer-audio-tagging-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct AudioTaggingModelConfig {
  struct OfflineZipformerAudioTaggingModelConfig zipformer;
  std::string ced;

  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  AudioTaggingModelConfig() = default;

  AudioTaggingModelConfig(
      const OfflineZipformerAudioTaggingModelConfig &zipformer,
      const std::string &ced, int32_t num_threads, bool debug,
      const std::string &provider)
      : zipformer(zipformer),
        ced(ced),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AUDIO_TAGGING_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/audio-tagging-zipformer-impl.h
================================================
// sherpa-onnx/csrc/audio-tagging-zipformer-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_AUDIO_TAGGING_ZIPFORMER_IMPL_H_
#define SHERPA_ONNX_CSRC_AUDIO_TAGGING_ZIPFORMER_IMPL_H_

#include <assert.h>

#include <memory>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/audio-tagging-impl.h"
#include "sherpa-onnx/csrc/audio-tagging-label-file.h"
#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.h"

namespace sherpa_onnx {

class AudioTaggingZipformerImpl : public AudioTaggingImpl {
 public:
  explicit AudioTaggingZipformerImpl(const AudioTaggingConfig &config)
      : config_(config), model_(config.model), labels_(config.labels) {
    if (model_.NumEventClasses() != labels_.NumEventClasses()) {
      SHERPA_ONNX_LOGE("number of classes: %d (model) != %d (label file)",
                       model_.NumEventClasses(), labels_.NumEventClasses());
      exit(-1);
    }
  }

#if __ANDROID_API__ >= 9
  explicit AudioTaggingZipformerImpl(AAssetManager *mgr,
                                     const AudioTaggingConfig &config)
      : config_(config),
        model_(mgr, config.model),
        labels_(mgr, config.labels) {
    if (model_.NumEventClasses() != labels_.NumEventClasses()) {
      SHERPA_ONNX_LOGE("number of classes: %d (model) != %d (label file)",
                       model_.NumEventClasses(), labels_.NumEventClasses());
      exit(-1);
    }
  }
#endif

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>();
  }

  std::vector<AudioEvent> Compute(OfflineStream *s,
                                  int32_t top_k = -1) const override {
    if (top_k < 0) {
      top_k = config_.top_k;
    }

    int32_t num_event_classes = model_.NumEventClasses();

    if (top_k > num_event_classes) {
      top_k = num_event_classes;
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    // WARNING(fangjun): It is fixed to 80 for all models from icefall
    int32_t feat_dim = 80;
    std::vector<float> f = s->GetFrames();

    int32_t num_frames = f.size() / feat_dim;

    assert(feat_dim * num_frames == static_cast<int32_t>(f.size()));

    std::array<int64_t, 3> shape = {1, num_frames, feat_dim};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
                                            shape.data(), shape.size());

    int64_t x_length_scalar = num_frames;
    std::array<int64_t, 1> x_length_shape = {1};
    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &x_length_scalar, 1,
                                 x_length_shape.data(), x_length_shape.size());

    Ort::Value probs = model_.Forward(std::move(x), std::move(x_length));

    const float *p = probs.GetTensorData<float>();

    std::vector<int32_t> top_k_indexes = TopkIndex(p, num_event_classes, top_k);

    std::vector<AudioEvent> ans(top_k);

    int32_t i = 0;

    for (int32_t index : top_k_indexes) {
      ans[i].name = labels_.GetEventName(index);
      ans[i].index = index;
      ans[i].prob = p[index];
      i += 1;
    }

    return ans;
  }

 private:
  AudioTaggingConfig config_;
  OfflineZipformerAudioTaggingModel model_;
  AudioTaggingLabels labels_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AUDIO_TAGGING_ZIPFORMER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/audio-tagging.cc
================================================
// sherpa-onnx/csrc/audio-tagging.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/audio-tagging.h"

#include <memory>
#include <string>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/audio-tagging-impl.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

std::string AudioEvent::ToString() const {
  std::ostringstream os;
  os << "AudioEvent(";
  os << "name=\"" << name << "\", ";
  os << "index=" << index << ", ";
  os << "prob=" << prob << ")";
  return os.str();
}

void AudioTaggingConfig::Register(ParseOptions *po) {
  model.Register(po);
  po->Register("labels", &labels, "Event label file");
  po->Register("top-k", &top_k, "Top k events to return in the result");
}

bool AudioTaggingConfig::Validate() const {
  if (!model.Validate()) {
    return false;
  }

  if (top_k < 1) {
    SHERPA_ONNX_LOGE("--top-k should be >= 1. Given: %d", top_k);
    return false;
  }

  if (labels.empty()) {
    SHERPA_ONNX_LOGE("Please provide --labels");
    return false;
  }

  if (!FileExists(labels)) {
    SHERPA_ONNX_LOGE("--labels '%s' does not exist", labels.c_str());
    return false;
  }

  return true;
}
std::string AudioTaggingConfig::ToString() const {
  std::ostringstream os;

  os << "AudioTaggingConfig(";
  os << "model=" << model.ToString() << ", ";
  os << "labels=\"" << labels << "\", ";
  os << "top_k=" << top_k << ")";

  return os.str();
}

AudioTagging::AudioTagging(const AudioTaggingConfig &config)
    : impl_(AudioTaggingImpl::Create(config)) {}

#if __ANDROID_API__ >= 9
AudioTagging::AudioTagging(AAssetManager *mgr, const AudioTaggingConfig &config)
    : impl_(AudioTaggingImpl::Create(mgr, config)) {}
#endif

AudioTagging::~AudioTagging() = default;

std::unique_ptr<OfflineStream> AudioTagging::CreateStream() const {
  return impl_->CreateStream();
}

std::vector<AudioEvent> AudioTagging::Compute(OfflineStream *s,
                                              int32_t top_k /*= -1*/) const {
  return impl_->Compute(s, top_k);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/audio-tagging.h
================================================
// sherpa-onnx/csrc/audio-tagging.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_AUDIO_TAGGING_H_
#define SHERPA_ONNX_CSRC_AUDIO_TAGGING_H_

#include <memory>
#include <string>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/audio-tagging-model-config.h"
#include "sherpa-onnx/csrc/offline-stream.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct AudioTaggingConfig {
  AudioTaggingModelConfig model;
  std::string labels;

  int32_t top_k = 5;

  AudioTaggingConfig() = default;

  AudioTaggingConfig(const AudioTaggingModelConfig &model,
                     const std::string &labels, int32_t top_k)
      : model(model), labels(labels), top_k(top_k) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

struct AudioEvent {
  std::string name;  // name of the event
  int32_t index;     // index of the event in the label file
  float prob;        // probability of the event

  std::string ToString() const;
};

class AudioTaggingImpl;

class AudioTagging {
 public:
  explicit AudioTagging(const AudioTaggingConfig &config);

#if __ANDROID_API__ >= 9
  AudioTagging(AAssetManager *mgr, const AudioTaggingConfig &config);
#endif

  ~AudioTagging();

  std::unique_ptr<OfflineStream> CreateStream() const;

  // If top_k is -1, then config.top_k is used.
  // Otherwise, config.top_k is ignored
  //
  // Return top_k AudioEvent. ans[0].prob is the largest of all returned events.
  std::vector<AudioEvent> Compute(OfflineStream *s, int32_t top_k = -1) const;

 private:
  std::unique_ptr<AudioTaggingImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AUDIO_TAGGING_H_


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-engine-guard.cc
================================================
// sherpa-onnx/csrc/axcl/axcl-engine-guard.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/axcl/axcl-engine-guard.h"

#include <cstdint>

#include "axcl.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

AxclEngineGuard::AxclEngineGuard(
    axclrtEngineVNpuKind npuKind /*= AXCL_VNPU_DISABLE*/) {
  axclError ret = axclrtEngineInit(npuKind);
  if (ret != 0) {
    SHERPA_ONNX_LOGE("Failed to call axclrtEngineInit(). Return code is: %d",
                     static_cast<int32_t>(ret));
    SHERPA_ONNX_EXIT(-1);
  }

  initialized_ = true;
}

AxclEngineGuard::~AxclEngineGuard() {
  if (initialized_) {
    auto ret = axclrtEngineFinalize();

    if (ret != 0) {
      SHERPA_ONNX_LOGE(
          "Failed to call axclrtEngineFinalize(). Return code is: %d",
          static_cast<int32_t>(ret));
      SHERPA_ONNX_EXIT(-1);
    }
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-engine-guard.h
================================================
// sherpa-onnx/csrc/axcl/axcl-engine-guard.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_GUARD_H_
#define SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_GUARD_H_
#include "axcl.h"  // NOLINT

namespace sherpa_onnx {

class AxclEngineGuard {
 public:
  explicit AxclEngineGuard(axclrtEngineVNpuKind npuKind = AXCL_VNPU_DISABLE);
  ~AxclEngineGuard();

  AxclEngineGuard(const AxclEngineGuard &) = delete;
  AxclEngineGuard &operator=(const AxclEngineGuard &) = delete;
  AxclEngineGuard(AxclEngineGuard &&) = delete;
  AxclEngineGuard &operator=(AxclEngineGuard &&) = delete;

 private:
  bool initialized_ = false;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_GUARD_H_


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-engine-io-guard.cc
================================================
// sherpa-onnx/csrc/axcl/axcl-engine-io-guard.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/axcl/axcl-engine-io-guard.h"

#include <cstdint>

#include "axcl.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

AxclEngineIOGuard::AxclEngineIOGuard(axclrtEngineIOInfo io_info) {
  axclError ret = axclrtEngineCreateIO(io_info, &io_);
  if (ret != 0) {
    SHERPA_ONNX_LOGE(
        "Failed to call axclrtEngineCreateIO(). Return code is: %d",
        static_cast<int32_t>(ret));
    SHERPA_ONNX_EXIT(-1);
  }

  initialized_ = true;
}

AxclEngineIOGuard::~AxclEngineIOGuard() {
  if (initialized_) {
    auto ret = axclrtEngineDestroyIO(io_);

    if (ret != 0) {
      SHERPA_ONNX_LOGE(
          "Failed to call axclrtEngineDestroyIO(). Return code is: %d",
          static_cast<int32_t>(ret));
      SHERPA_ONNX_EXIT(-1);
    }
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-engine-io-guard.h
================================================
// sherpa-onnx/csrc/axcl/axcl-engine-io-guard.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_IO_GUARD_H_
#define SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_IO_GUARD_H_
#include "axcl.h"  // NOLINT

namespace sherpa_onnx {

class AxclEngineIOGuard {
 public:
  explicit AxclEngineIOGuard(axclrtEngineIOInfo io_info);
  ~AxclEngineIOGuard();

  AxclEngineIOGuard(const AxclEngineIOGuard &) = delete;
  AxclEngineIOGuard &operator=(const AxclEngineIOGuard &) = delete;
  AxclEngineIOGuard(AxclEngineIOGuard &&) = delete;
  AxclEngineIOGuard &operator=(AxclEngineIOGuard &&) = delete;

  operator axclrtEngineIO() { return io_; }

 private:
  bool initialized_ = false;
  axclrtEngineIO io_ = nullptr;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_IO_GUARD_H_


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-engine-io-info-guard.cc
================================================
// sherpa-onnx/csrc/axcl/axcl-engine-io-info-guard.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/axcl/axcl-engine-io-info-guard.h"

#include <cstdint>

#include "axcl.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

AxclEngineIOInfoGuard::AxclEngineIOInfoGuard(uint64_t model_id) {
  axclError ret = axclrtEngineGetIOInfo(model_id, &io_info_);
  if (ret != 0) {
    SHERPA_ONNX_LOGE(
        "Failed to call axclrtEngineGetIOInfo(). Return code is: %d",
        static_cast<int32_t>(ret));
    SHERPA_ONNX_EXIT(-1);
  }

  initialized_ = true;
}

AxclEngineIOInfoGuard::~AxclEngineIOInfoGuard() {
  if (initialized_) {
    auto ret = axclrtEngineDestroyIOInfo(io_info_);

    if (ret != 0) {
      SHERPA_ONNX_LOGE(
          "Failed to call axclrtEngineDestroyIOInfo(). Return code is: %d",
          static_cast<int32_t>(ret));
      SHERPA_ONNX_EXIT(-1);
    }
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-engine-io-info-guard.h
================================================
// sherpa-onnx/csrc/axcl/axcl-engine-io-info-guard.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_IO_INFO_GUARD_H_
#define SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_IO_INFO_GUARD_H_
#include <cstdint>

#include "axcl.h"  // NOLINT

namespace sherpa_onnx {

class AxclEngineIOInfoGuard {
 public:
  explicit AxclEngineIOInfoGuard(uint64_t model_id);
  ~AxclEngineIOInfoGuard();

  AxclEngineIOInfoGuard(const AxclEngineIOInfoGuard &) = delete;
  AxclEngineIOInfoGuard &operator=(const AxclEngineIOInfoGuard &) = delete;
  AxclEngineIOInfoGuard(AxclEngineIOInfoGuard &&) = delete;
  AxclEngineIOInfoGuard &operator=(AxclEngineIOInfoGuard &&) = delete;

  operator axclrtEngineIOInfo() { return io_info_; }

 private:
  bool initialized_ = false;
  axclrtEngineIOInfo io_info_ = nullptr;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXCL_AXCL_ENGINE_IO_INFO_GUARD_H_


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-manager.cc
================================================
// sherpa-onnx/csrc/axcl/axcl-manager.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/axcl/axcl-manager.h"

#include <cstdint>

#include "axcl.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

std::mutex AxclManager::mutex_;

int32_t AxclManager::count_{0};

AxclManager::AxclManager(const char *config /*= nullptr*/) {
  std::lock_guard<std::mutex> lock(mutex_);
  if (count_ == 0) {
    auto ret = axclInit(config);
    if (ret != 0) {
      SHERPA_ONNX_LOGE("Failed to call axclInit(). Return code: %d",
                       static_cast<int32_t>(ret));
      SHERPA_ONNX_EXIT(-1);
    }
  }

  ++count_;
}

AxclManager::~AxclManager() {
  std::lock_guard<std::mutex> lock(mutex_);
  if (--count_ == 0) {
    auto ret = axclFinalize();

    if (ret != 0) {
      SHERPA_ONNX_LOGE("Failed to call axclFinalize(). Return code: %d",
                       static_cast<int32_t>(ret));
      SHERPA_ONNX_EXIT(-1);
    }
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-manager.h
================================================
// sherpa-onnx/csrc/axcl/axcl-manager.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_AXCL_AXCL_MANAGER_H_
#define SHERPA_ONNX_CSRC_AXCL_AXCL_MANAGER_H_

#include <cstdint>
#include <mutex>

namespace sherpa_onnx {

class AxclManager {
 public:
  explicit AxclManager(const char *config = nullptr);
  ~AxclManager();

  AxclManager(const AxclManager &) = delete;
  AxclManager &operator=(const AxclManager &) = delete;

  AxclManager(AxclManager &&) = delete;
  AxclManager &operator=(AxclManager &&) = delete;

 private:
  static std::mutex mutex_;
  static int32_t count_;
};
}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_AXCL_AXCL_MANAGER_H_


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-model.cc
================================================
// sherpa-onnx/csrc/axcl/axcl-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/axcl/axcl-model.h"

#include <memory>
#include <string>
#include <vector>

#include "axcl.h"  // NOLINT
#include "sherpa-onnx/csrc/axcl/axcl-engine-guard.h"
#include "sherpa-onnx/csrc/axcl/axcl-engine-io-guard.h"
#include "sherpa-onnx/csrc/axcl/axcl-engine-io-info-guard.h"
#include "sherpa-onnx/csrc/axcl/axcl-manager.h"
#include "sherpa-onnx/csrc/axcl/utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

/*
Initialization step:

1. AxclInit()
2. set device
3. init engine
4. axclrtEngineLoadFromMem or axclrtEngineLoadFromFile
5. axclrtEngineCreateContext
 */

class AxclModel::Impl {
 public:
  Impl(const std::string &filename, int32_t device_id) {
    if (!SetDevice(device_id)) {
      return;
    }

    InitEngine();

    axclError ret = axclrtEngineLoadFromFile(filename.c_str(), &model_id_);
    if (ret != 0) {
      SHERPA_ONNX_LOGE(
          "Failed to call axclrtEngineLoadFromFile() with file: %s. Return "
          "code is: %d",
          filename.c_str(), static_cast<int32_t>(ret));
      SHERPA_ONNX_EXIT(-1);
    }

    model_loaded_ = true;

    PostInit();
  }

  Impl(const void *cpu_buf, size_t buf_len_in_bytes, int32_t device_id) {
    if (!SetDevice(device_id)) {
      return;
    }

    InitEngine();

    {
      AxclDevicePtr device_ptr(buf_len_in_bytes, AXCL_MEM_MALLOC_NORMAL_ONLY);
      auto ret = axclrtMemcpy(device_ptr, cpu_buf, buf_len_in_bytes,
                              AXCL_MEMCPY_HOST_TO_DEVICE);
      if (ret != 0) {
        SHERPA_ONNX_LOGE("Failed to call axclrtMemcpy(). Return code is: %d",
                         static_cast<int32_t>(ret));
        return;
      }

      ret = axclrtEngineLoadFromMem(device_ptr, buf_len_in_bytes, &model_id_);
      if (ret != 0) {
        SHERPA_ONNX_LOGE(
            "Failed to call axclrtEngineLoadFromMem(). Return code is: %d",
            static_cast<int32_t>(ret));
        return;
      }
    }

    model_loaded_ = true;

    PostInit();
  }

  ~Impl() {
    if (model_loaded_) {
      axclError ret = axclrtEngineUnload(model_id_);

      if (ret != 0) {
        SHERPA_ONNX_LOGE(
            "Failed to call axclrtEngineUnload(). Return code is: %d",
            static_cast<int32_t>(ret));
        SHERPA_ONNX_EXIT(-1);
      }
    }
  }

  const std::vector<std::string> &InputTensorNames() const {
    return input_tensor_names_;
  }
  const std::vector<std::string> &OutputTensorNames() const {
    return output_tensor_names_;
  }

  std::vector<int32_t> TensorShape(const std::string &name) const {
    for (size_t i = 0; i < input_tensor_names_.size(); ++i) {
      if (input_tensor_names_[i] == name) {
        return input_tensor_shapes_[i];
      }
    }

    for (size_t i = 0; i < output_tensor_names_.size(); ++i) {
      if (output_tensor_names_[i] == name) {
        return output_tensor_shapes_[i];
      }
    }

    SHERPA_ONNX_LOGE("Found no tensor with name: '%s'", name.c_str());
    return {};
  }

  int32_t TensorSizeInBytes(const std::string &name) const {
    for (size_t i = 0; i < input_tensor_names_.size(); ++i) {
      if (input_tensor_names_[i] == name) {
        return input_tensors_[i].Size();
      }
    }

    for (size_t i = 0; i < output_tensor_names_.size(); ++i) {
      if (output_tensor_names_[i] == name) {
        return output_tensors_[i].Size();
      }
    }

    SHERPA_ONNX_LOGE("Found no tensor with name: '%s'", name.c_str());
    return 0;
  }

  bool HasTensor(const std::string &name) const {
    for (size_t i = 0; i < input_tensor_names_.size(); ++i) {
      if (input_tensor_names_[i] == name) {
        return true;
      }
    }

    for (size_t i = 0; i < output_tensor_names_.size(); ++i) {
      if (output_tensor_names_[i] == name) {
        return true;
      }
    }

    return false;
  }

  template <typename T>
  bool SetInputTensorData(const std::string &name, const T *p,
                          int32_t n) const {
    for (size_t i = 0; i < input_tensor_names_.size(); ++i) {
      if (input_tensor_names_[i] == name) {
        if (n * sizeof(T) != input_tensors_[i].Size()) {
          SHERPA_ONNX_LOGE("Expected size: %zu, given: %zu",
                           input_tensors_[i].Size(), n * sizeof(T));
          return false;
        }

        auto ret =
            axclrtMemcpy(input_tensors_[i].Get(), p, input_tensors_[i].Size(),
                         AXCL_MEMCPY_HOST_TO_DEVICE);
        if (ret != 0) {
          SHERPA_ONNX_LOGE(
              "Failed to call axclrtMemcpy(). tensor name: '%s', return code: "
              "%d",
              name.c_str(), static_cast<int32_t>(ret));
          return false;
        }

        return true;
      }
    }

    SHERPA_ONNX_LOGE("Found no tensor with name: '%s'", name.c_str());

    return false;
  }

  std::vector<float> GetOutputTensorData(const std::string &name) const {
    for (size_t i = 0; i < output_tensor_names_.size(); ++i) {
      if (output_tensor_names_[i] == name) {
        size_t bytes = output_tensors_[i].Size();
        std::vector<float> out(bytes / sizeof(float));

        auto ret = axclrtMemcpy(out.data(), output_tensors_[i].Get(), bytes,
                                AXCL_MEMCPY_DEVICE_TO_HOST);
        if (ret != 0) {
          SHERPA_ONNX_LOGE(
              "Failed to call axclrtMemcpy(). tensor name: '%s', return code: "
              "%d",
              name.c_str(), static_cast<int32_t>(ret));
          return {};
        }

        return out;
      }
    }

    SHERPA_ONNX_LOGE("Found no tensor with name: '%s'", name.c_str());

    return {};
  }

  bool Run() const {
    uint32_t group = 0;
    auto ret =
        axclrtEngineExecute(model_id_, context_id_, group, *engine_io_guard_);
    if (ret != 0) {
      SHERPA_ONNX_LOGE("Failed to call axclrtEngineExecute(), return code: %d",
                       static_cast<int32_t>(ret));
      return false;
    }
    return true;
  }

  bool IsInitialized() const { return model_loaded_; }

 private:
  bool SetDevice(int32_t device_id) {
    axclrtDeviceList lst;
    auto ret = axclrtGetDeviceList(&lst);
    if (ret != 0) {
      SHERPA_ONNX_LOGE(
          "Failed to call axclrtGetDeviceList(). Return code is: %d",
          static_cast<int32_t>(ret));
      return false;
    }

    if (lst.num == 0) {
      SHERPA_ONNX_LOGE("Found 0 device.");
      return false;
    }

    // device_id counts from 0
    if (device_id < 0 || device_id >= lst.num) {
      SHERPA_ONNX_LOGE("Invalid device_id: %d. Valid range: 0-%d", device_id,
                       lst.num - 1);
      return false;
    }

    ret = axclrtSetDevice(lst.devices[device_id]);
    if (ret != 0) {
      SHERPA_ONNX_LOGE("Failed to call axclrtSetDevice(). Return code is: %d",
                       static_cast<int32_t>(ret));
      return false;
    }

    return true;
  }

  void InitEngine() { engine_guard_ = std::make_unique<AxclEngineGuard>(); }

  void PostInit() {
    InitContext();

    io_info_guard_ = std::make_unique<AxclEngineIOInfoGuard>(model_id_);

    int32_t count = 0;
    auto ret = axclrtEngineGetShapeGroupsCount(*io_info_guard_, &count);
    if (ret != 0) {
      SHERPA_ONNX_LOGE(
          "Failed to call axclrtEngineGetShapeGroupsCount(). Return code is: "
          "%d",
          static_cast<int32_t>(ret));
      SHERPA_ONNX_EXIT(-1);
    }

    if (count != 1) {
      SHERPA_ONNX_LOGE("Only support 1 group at present. Given: %d", count);
      SHERPA_ONNX_EXIT(-1);
    }

    engine_io_guard_ = std::make_unique<AxclEngineIOGuard>(*io_info_guard_);

    InitInput();
    InitOutput();
  }

  void InitContext() {
    // Note(fangjun): No need to destroy context_id_
    auto ret = axclrtEngineCreateContext(model_id_, &context_id_);
    if (ret != 0) {
      SHERPA_ONNX_LOGE(
          "Failed to call axclrtEngineCreateContext(). Return code is: %d",
          static_cast<int32_t>(ret));
      SHERPA_ONNX_EXIT(-1);
    }
  }

  void InitInput() {
    uint32_t group = 0;

    int32_t num_inputs = axclrtEngineGetNumInputs(*io_info_guard_);

    input_tensor_names_.resize(num_inputs);
    input_tensor_shapes_.reserve(num_inputs);

    for (int32_t i = 0; i < num_inputs; ++i) {
      size_t size_in_bytes =
          axclrtEngineGetInputSizeByIndex(*io_info_guard_, group, i);
      input_tensors_.emplace_back(size_in_bytes, AXCL_MEM_MALLOC_HUGE_FIRST);

      axclrtEngineIODims dims;
      auto ret = axclrtEngineGetInputDims(*io_info_guard_, group, i, &dims);
      if (ret != 0) {
        SHERPA_ONNX_LOGE(
            "Failed to call axclrtEngineGetInputDims(). Return code is: %d",
            static_cast<int32_t>(ret));
        SHERPA_ONNX_EXIT(-1);
      }

      input_tensor_shapes_.emplace_back(dims.dims, dims.dims + dims.dimCount);

      input_tensor_names_[i] =
          axclrtEngineGetInputNameByIndex(*io_info_guard_, i);

      ret = axclrtEngineSetInputBufferByIndex(*engine_io_guard_, i,
                                              input_tensors_[i], size_in_bytes);
      if (ret != 0) {
        SHERPA_ONNX_LOGE(
            "Failed to call axclrtEngineSetInputBufferByIndex(). Return code "
            "is: %d",
            static_cast<int32_t>(ret));
        SHERPA_ONNX_EXIT(-1);
      }
    }
  }

  void InitOutput() {
    uint32_t group = 0;

    int32_t num_outputs = axclrtEngineGetNumOutputs(*io_info_guard_);

    output_tensor_names_.resize(num_outputs);
    output_tensor_shapes_.reserve(num_outputs);

    for (int32_t i = 0; i < num_outputs; ++i) {
      auto size_in_bytes =
          axclrtEngineGetOutputSizeByIndex(*io_info_guard_, group, i);
      output_tensors_.emplace_back(size_in_bytes, AXCL_MEM_MALLOC_HUGE_FIRST);

      axclrtEngineIODims dims;
      auto ret = axclrtEngineGetOutputDims(*io_info_guard_, group, i, &dims);
      if (ret != 0) {
        SHERPA_ONNX_LOGE(
            "Failed to call axclrtEngineGetOutputDims(). Return code is: %d",
            static_cast<int32_t>(ret));
        SHERPA_ONNX_EXIT(-1);
      }

      output_tensor_shapes_.emplace_back(dims.dims, dims.dims + dims.dimCount);
      output_tensor_names_[i] =
          axclrtEngineGetOutputNameByIndex(*io_info_guard_, i);

      ret = axclrtEngineSetOutputBufferByIndex(
          *engine_io_guard_, i, output_tensors_[i], size_in_bytes);
      if (ret != 0) {
        SHERPA_ONNX_LOGE(
            "Failed to call axclrtEngineSetOutputBufferByIndex(). Return code "
            "is: %d",
            static_cast<int32_t>(ret));
        SHERPA_ONNX_EXIT(-1);
      }
    }
  }

 private:
  AxclManager manager_;
  std::unique_ptr<AxclEngineGuard> engine_guard_;
  std::unique_ptr<AxclEngineIOGuard> engine_io_guard_;
  std::unique_ptr<AxclEngineIOInfoGuard> io_info_guard_;

  bool model_loaded_ = false;
  uint64_t model_id_ = 0;
  uint64_t context_id_ = 0;

  std::vector<std::string> input_tensor_names_;
  std::vector<std::string> output_tensor_names_;

  std::vector<AxclDevicePtr> input_tensors_;
  std::vector<AxclDevicePtr> output_tensors_;

  std::vector<std::vector<int32_t>> input_tensor_shapes_;
  std::vector<std::vector<int32_t>> output_tensor_shapes_;
};

AxclModel::AxclModel(const std::string &filename, int32_t device_id /*= 0*/)
    : impl_(std::make_unique<Impl>(filename, device_id)) {}

AxclModel::AxclModel(const void *cpu_buf, size_t buf_len_in_bytes,
                     int32_t device_id /*= 0*/)
    : impl_(std::make_unique<Impl>(cpu_buf, buf_len_in_bytes, device_id)) {}

AxclModel::~AxclModel() = default;

const std::vector<std::string> &AxclModel::InputTensorNames() const {
  return impl_->InputTensorNames();
}
const std::vector<std::string> &AxclModel::OutputTensorNames() const {
  return impl_->OutputTensorNames();
}

std::vector<int32_t> AxclModel::TensorShape(const std::string &name) const {
  return impl_->TensorShape(name);
}

int32_t AxclModel::TensorSizeInBytes(const std::string &name) const {
  return impl_->TensorSizeInBytes(name);
}

bool AxclModel::HasTensor(const std::string &name) const {
  return impl_->HasTensor(name);
}

bool AxclModel::SetInputTensorData(const std::string &name, const float *p,
                                   int32_t n) const {
  return impl_->SetInputTensorData(name, p, n);
}

bool AxclModel::SetInputTensorData(const std::string &name, const int32_t *p,
                                   int32_t n) const {
  return impl_->SetInputTensorData(name, p, n);
}

std::vector<float> AxclModel::GetOutputTensorData(
    const std::string &name) const {
  return impl_->GetOutputTensorData(name);
}

bool AxclModel::Run() const { return impl_->Run(); }

bool AxclModel::IsInitialized() const { return impl_->IsInitialized(); }

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axcl/axcl-model.h
================================================
// sherpa-onnx/csrc/axcl/axcl-model.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_AXCL_AXCL_MODEL_H_
#define SHERPA_ONNX_CSRC_AXCL_AXCL_MODEL_H_

#include <cstdint>
#include <memory>
#include <string>
#include <vector>

namespace sherpa_onnx {

class AxclModel {
 public:
  explicit AxclModel(const std::string &filename, int32_t device_id = 0);

  AxclModel(const void *cpu_buf, size_t buf_len_in_bytes,
            int32_t device_id = 0);
  ~AxclModel();

  const std::vector<std::string> &InputTensorNames() const;
  const std::vector<std::string> &OutputTensorNames() const;

  std::vector<int32_t> TensorShape(const std::string &name) const;
  int32_t TensorSizeInBytes(const std::string &name) const;

  bool HasTensor(const std::string &name) const;

  bool SetInputTensorData(const std::string &name, const float *p,
                          int32_t n) const;

  bool SetInputTensorData(const std::string &name, const int32_t *p,
                          int32_t n) const;

  std::vector<float> GetOutputTensorData(const std::string &name) const;

  bool Run() const;
  bool IsInitialized() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXCL_AXCL_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/axcl/offline-sense-voice-model-axcl.cc
================================================
// sherpa-onnx/csrc/axcl/offline-sense-voice-model-axcl.cc
//
// Copyright (c)  2025  M5Stack Technology CO LTD

#include "sherpa-onnx/csrc/axcl/offline-sense-voice-model-axcl.h"

#include <algorithm>
#include <array>
#include <cstring>
#include <memory>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/axcl/axcl-model.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelAxcl::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    model_ = std::make_unique<AxclModel>(config_.sense_voice.model);

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    auto buf = ReadFile(mgr, config_.sense_voice.model);
    model_ = std::make_unique<AxclModel>(buf.data(), buf.size());

    PostInit();
  }

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) {
    features = ApplyLFR(std::move(features));
    std::array<int32_t, 4> prompt{language, 1, 2, text_norm};

    model_->SetInputTensorData("x", features.data(), features.size());
    model_->SetInputTensorData("prompt", prompt.data(), prompt.size());
    model_->Run();
    return model_->GetOutputTensorData("logits");
  }

 private:
  void PostInit() {
    if (!model_->IsInitialized()) {
      SHERPA_ONNX_LOGE("Failed to initialize the model with '%s'",
                       config_.sense_voice.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    num_input_frames_ = model_->TensorShape("x")[1];

    if (config_.debug) {
      SHERPA_ONNX_LOGE("  num_input_frames_ = %d", num_input_frames_);
    }
  }

  std::vector<float> ApplyLFR(std::vector<float> in) const {
    int32_t lfr_window_size = meta_data_.window_size;
    int32_t lfr_window_shift = meta_data_.window_shift;
    int32_t in_feat_dim = 80;
    int32_t in_num_frames = in.size() / in_feat_dim;
    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;

    if (out_num_frames > num_input_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          out_num_frames, num_input_frames_);
      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios.");
      out_num_frames = num_input_frames_;
    }

    int32_t out_feat_dim = in_feat_dim * lfr_window_size;
    std::vector<float> out(num_input_frames_ * out_feat_dim);
    const float *p_in = in.data();
    float *p_out = out.data();
    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);
      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }
    return out;
  }

 private:
  OfflineModelConfig config_;
  std::unique_ptr<AxclModel> model_;
  OfflineSenseVoiceModelMetaData meta_data_;
  int32_t num_input_frames_ = -1;
};

OfflineSenseVoiceModelAxcl::~OfflineSenseVoiceModelAxcl() = default;

OfflineSenseVoiceModelAxcl::OfflineSenseVoiceModelAxcl(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSenseVoiceModelAxcl::OfflineSenseVoiceModelAxcl(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

std::vector<float> OfflineSenseVoiceModelAxcl::Run(std::vector<float> features,
                                                   int32_t language,
                                                   int32_t text_norm) const {
  return impl_->Run(std::move(features), language, text_norm);
}

const OfflineSenseVoiceModelMetaData &
OfflineSenseVoiceModelAxcl::GetModelMetadata() const {
  return impl_->GetModelMetadata();
}

#if __ANDROID_API__ >= 9
template OfflineSenseVoiceModelAxcl::OfflineSenseVoiceModelAxcl(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineSenseVoiceModelAxcl::OfflineSenseVoiceModelAxcl(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axcl/offline-sense-voice-model-axcl.h
================================================
// sherpa-onnx/csrc/axcl/offline-sense-voice-model-axcl.h
//
// Copyright (c)  2025  M5Stack Technology CO LTD

#ifndef SHERPA_ONNX_CSRC_AXCL_OFFLINE_SENSE_VOICE_MODEL_AXCL_H_
#define SHERPA_ONNX_CSRC_AXCL_OFFLINE_SENSE_VOICE_MODEL_AXCL_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelAxcl {
 public:
  ~OfflineSenseVoiceModelAxcl();

  explicit OfflineSenseVoiceModelAxcl(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineSenseVoiceModelAxcl(Manager *mgr, const OfflineModelConfig &config);

  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) const;

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXCL_OFFLINE_SENSE_VOICE_MODEL_AXCL_H_


================================================
FILE: sherpa-onnx/csrc/axcl/utils.cc
================================================
// sherpa-onnx/csrc/axcl/utils.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/axcl/utils.h"

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

AxclDevicePtr::AxclDevicePtr(
    size_t size,
    axclrtMemMallocPolicy policy /*= AXCL_MEM_MALLOC_HUGE_FIRST*/) {
  auto ret = axclrtMalloc(&p_, size, policy);
  if (ret != 0) {
    SHERPA_ONNX_LOGE("Failed to call axclrtMalloc(). Return code: %d",
                     static_cast<int32_t>(ret));
    SHERPA_ONNX_EXIT(-1);
  }

  size_ = size;
}

void AxclDevicePtr::Release() {
  if (!p_) {
    return;
  }

  auto ret = axclrtFree(p_);
  if (ret != 0) {
    SHERPA_ONNX_LOGE("Failed to call axclrtFree(). Return code: %d",
                     static_cast<int32_t>(ret));
    SHERPA_ONNX_EXIT(-1);
  }
  p_ = nullptr;
  size_ = 0;
}

AxclDevicePtr::~AxclDevicePtr() { Release(); }

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axcl/utils.h
================================================
// sherpa-onnx/csrc/axcl/utils.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_AXCL_UTILS_H_
#define SHERPA_ONNX_CSRC_AXCL_UTILS_H_

#include "axcl.h"  // NOLINT

namespace sherpa_onnx {

class AxclDevicePtr {
 public:
  explicit AxclDevicePtr(
      size_t size, axclrtMemMallocPolicy policy = AXCL_MEM_MALLOC_HUGE_FIRST);

  ~AxclDevicePtr();

  AxclDevicePtr(const AxclDevicePtr &) = delete;
  AxclDevicePtr &operator=(const AxclDevicePtr &) = delete;

  AxclDevicePtr(AxclDevicePtr &&other) {
    p_ = other.p_;
    size_ = other.size_;

    other.p_ = nullptr;
    other.size_ = 0;
  }
  AxclDevicePtr &operator=(AxclDevicePtr &&other) {
    if (this == &other) {
      return *this;
    }
    Release();
    p_ = other.p_;
    size_ = other.size_;

    other.p_ = nullptr;
    other.size_ = 0;

    return *this;
  }

  void Release();

  void *Get() const { return p_; }
  operator void *() { return p_; }

  size_t Size() const { return size_; }

 private:
  void *p_ = nullptr;
  size_t size_ = 0;  // in bytes
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXCL_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/axera/ax-engine-guard.cc
================================================
// sherpa-onnx/csrc/axera/ax-engine-guard.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/axera/ax-engine-guard.h"

#include <cstring>

#include "ax_engine_api.h"  // NOLINT
#include "ax_sys_api.h"     // NOLINT
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

thread_local int32_t AxEngineGuard::count_ = 0;

AxEngineGuard::AxEngineGuard() {
  if (count_ == 0) {
    auto ret = AX_SYS_Init();
    if (ret != 0) {
      SHERPA_ONNX_LOGE("Failed to call AX_SYS_Init. ret code: %d",
                       static_cast<int32_t>(ret));

      SHERPA_ONNX_EXIT(-1);
    }

    AX_ENGINE_NPU_ATTR_T npu_attr;
    memset(&npu_attr, 0, sizeof(npu_attr));
    npu_attr.eHardMode = AX_ENGINE_VIRTUAL_NPU_DISABLE;
    ret = AX_ENGINE_Init(&npu_attr);

    if (ret != 0) {
      SHERPA_ONNX_LOGE("Failed to call AX_ENGINE_Init. ret code: %d",
                       static_cast<int32_t>(ret));

      SHERPA_ONNX_EXIT(-1);
    }
  }

  ++count_;
}

AxEngineGuard::~AxEngineGuard() {
  --count_;
  if (count_ == 0) {
    AX_ENGINE_Deinit();
    AX_SYS_Deinit();
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axera/ax-engine-guard.h
================================================
// sherpa-onnx/csrc/axera/ax-engine-guard.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_AXERA_AX_ENGINE_GUARD_H_
#define SHERPA_ONNX_CSRC_AXERA_AX_ENGINE_GUARD_H_
#include <cstdint>

namespace sherpa_onnx {

class AxEngineGuard {
 public:
  AxEngineGuard();
  ~AxEngineGuard();

  AxEngineGuard(const AxEngineGuard &) = delete;
  AxEngineGuard &operator=(const AxEngineGuard &) = delete;

  AxEngineGuard(AxEngineGuard &&) = delete;
  AxEngineGuard &operator=(AxEngineGuard &&) = delete;

 private:
  static thread_local int32_t count_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXERA_AX_ENGINE_GUARD_H_


================================================
FILE: sherpa-onnx/csrc/axera/offline-sense-voice-model-axera.cc
================================================
// sherpa-onnx/csrc/axera/offline-sense-voice-model-axera.cc
//
// Copyright (c)  2025  M5Stack Technology CO LTD

#include "sherpa-onnx/csrc/axera/offline-sense-voice-model-axera.h"

#include <algorithm>
#include <array>
#include <cstring>
#include <mutex>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "ax_engine_api.h"  // NOLINT
#include "ax_sys_api.h"     // NOLINT
#include "sherpa-onnx/csrc/axera/ax-engine-guard.h"
#include "sherpa-onnx/csrc/axera/utils.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelAxera::Impl {
 public:
  ~Impl() {
    FreeIO(&io_data_);
    if (handle_) {
      AX_ENGINE_DestroyHandle(handle_);
    }
  }

  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    auto buf = ReadFile(config_.sense_voice.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    auto buf = ReadFile(mgr, config_.sense_voice.model);
    Init(buf.data(), buf.size());
  }

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) {
    // TODO(fangjun): Support multi clients
    std::lock_guard<std::mutex> lock(mutex_);

    features = ApplyLFR(std::move(features));

    std::array<int32_t, 4> prompt{language, 1, 2, text_norm};

    const auto &in0_meta = io_info_->pInputs[0];
    size_t bytes0 = in0_meta.nSize;

    if (bytes0 != features.size() * sizeof(float)) {
      SHERPA_ONNX_LOGE(
          "Feature size mismatch. model expects %u bytes, but got %zu bytes",
          in0_meta.nSize, features.size() * sizeof(float));
      SHERPA_ONNX_EXIT(-1);
    }

    std::memcpy(io_data_.pInputs[0].pVirAddr, features.data(), bytes0);

    const auto &in1_meta = io_info_->pInputs[1];
    size_t bytes1 = in1_meta.nSize;
    if (bytes1 != prompt.size() * sizeof(int32_t)) {
      SHERPA_ONNX_LOGE(
          "Prompt size mismatch. model expects %u bytes, but got %zu bytes",
          in1_meta.nSize, prompt.size() * sizeof(int32_t));
      SHERPA_ONNX_EXIT(-1);
    }
    std::memcpy(io_data_.pInputs[1].pVirAddr, prompt.data(), bytes1);

    auto ret = AX_ENGINE_RunSync(handle_, &io_data_);
    if (ret != 0) {
      SHERPA_ONNX_LOGE("AX_ENGINE_RunSync failed, ret = %d", ret);
      SHERPA_ONNX_EXIT(-1);
    }

    const auto &out_meta = io_info_->pOutputs[0];
    auto &out_buf = io_data_.pOutputs[0];

    size_t out_elems = out_meta.nSize / sizeof(float);
    std::vector<float> out(out_elems);

    std::memcpy(out.data(), out_buf.pVirAddr, out_meta.nSize);

    return out;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &handle_);

    InitInputOutputAttrs(handle_, config_.debug, &io_info_);

    PrepareIO(io_info_, &io_data_, config_.debug);

    if (!io_info_ || io_info_->nInputSize != 2 || !io_info_->pInputs) {
      SHERPA_ONNX_LOGE("No input tensor in Axera model");
      SHERPA_ONNX_EXIT(-1);
    }

    auto &in0 = io_info_->pInputs[0];
    if (in0.nShapeSize < 2) {
      SHERPA_ONNX_LOGE("Input tensor rank is too small (nShapeSize = %u)",
                       in0.nShapeSize);
      SHERPA_ONNX_EXIT(-1);
    }
    num_input_frames_ = in0.pShape[1];

    if (io_info_->nOutputSize != 1) {
      SHERPA_ONNX_LOGE("Axera sense voice model expected only 1 output tensor");
      SHERPA_ONNX_EXIT(-1);
    }

    if (config_.debug) {
      SHERPA_ONNX_LOGE("Axera SenseVoice model init done.");
      SHERPA_ONNX_LOGE("  num_input_frames_ = %d", num_input_frames_);
    }
  }

  std::vector<float> ApplyLFR(std::vector<float> in) const {
    int32_t lfr_window_size = meta_data_.window_size;
    int32_t lfr_window_shift = meta_data_.window_shift;
    int32_t in_feat_dim = 80;
    int32_t in_num_frames = in.size() / in_feat_dim;
    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;

    if (out_num_frames > num_input_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          out_num_frames, num_input_frames_);
      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios.");
      out_num_frames = num_input_frames_;
    }

    int32_t out_feat_dim = in_feat_dim * lfr_window_size;
    std::vector<float> out(num_input_frames_ * out_feat_dim);
    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);
      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

 private:
  std::mutex mutex_;
  AxEngineGuard ax_engine_guard_;

  OfflineModelConfig config_;
  AX_ENGINE_HANDLE handle_ = nullptr;
  AX_ENGINE_IO_INFO_T *io_info_ = nullptr;
  AX_ENGINE_IO_T io_data_;
  OfflineSenseVoiceModelMetaData meta_data_;
  int32_t num_input_frames_ = -1;
};

OfflineSenseVoiceModelAxera::~OfflineSenseVoiceModelAxera() = default;

OfflineSenseVoiceModelAxera::OfflineSenseVoiceModelAxera(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSenseVoiceModelAxera::OfflineSenseVoiceModelAxera(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

std::vector<float> OfflineSenseVoiceModelAxera::Run(std::vector<float> features,
                                                    int32_t language,
                                                    int32_t text_norm) const {
  return impl_->Run(std::move(features), language, text_norm);
}

const OfflineSenseVoiceModelMetaData &
OfflineSenseVoiceModelAxera::GetModelMetadata() const {
  return impl_->GetModelMetadata();
}

#if __ANDROID_API__ >= 9
template OfflineSenseVoiceModelAxera::OfflineSenseVoiceModelAxera(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineSenseVoiceModelAxera::OfflineSenseVoiceModelAxera(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axera/offline-sense-voice-model-axera.h
================================================
// sherpa-onnx/csrc/axera/offline-sense-voice-model-axera.h
//
// Copyright (c)  2025  M5Stack Technology CO LTD

#ifndef SHERPA_ONNX_CSRC_AXERA_OFFLINE_SENSE_VOICE_MODEL_AXERA_H_
#define SHERPA_ONNX_CSRC_AXERA_OFFLINE_SENSE_VOICE_MODEL_AXERA_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelAxera {
 public:
  ~OfflineSenseVoiceModelAxera();

  explicit OfflineSenseVoiceModelAxera(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineSenseVoiceModelAxera(Manager *mgr, const OfflineModelConfig &config);

  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) const;

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXERA_OFFLINE_SENSE_VOICE_MODEL_AXERA_H_


================================================
FILE: sherpa-onnx/csrc/axera/utils.cc
================================================
// sherpa-onnx/csrc/axera/utils.cc
//
// Copyright (c)  2025  M5Stack Technology CO LTD

#include "sherpa-onnx/csrc/axera/utils.h"

#include <string.h>

#include <sstream>
#include <string>
#include <utility>

#include "ax_engine_api.h"   // NOLINT
#include "ax_engine_type.h"  // NOLINT
#include "ax_sys_api.h"      // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

#define SHERPA_ONNX_TO_STRING(type) \
  case type:                        \
    return #type

namespace sherpa_onnx {

static constexpr int32_t kCmnAlignSize = 128;
static const char *kSherpaOnnxAxeraSessionName = "sherpa-onnx-axera";

static std::string VectorToString(AX_S32 *arr, AX_U8 n) {
  std::ostringstream os;
  std::string sep;
  os << "[";
  for (AX_U8 i = 0; i < n; ++i) {
    os << sep << arr[i];
    sep = ", ";
  }
  os << "]";

  return os.str();
}

static const char *AxEngineDataTypeToString(AX_ENGINE_DATA_TYPE_T type) {
  switch (type) {
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_UNKNOWN);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_UINT8);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_UINT16);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_FLOAT32);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_SINT16);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_SINT8);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_SINT32);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_UINT32);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_FLOAT64);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_UINT10_PACKED);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_UINT12_PACKED);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_UINT14_PACKED);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_DT_UINT16_PACKED);
    default:
      return "Unknown data type";
  }
}

static const char *AxEngineTensorLayoutToString(
    AX_ENGINE_TENSOR_LAYOUT_T layout) {
  switch (layout) {
    SHERPA_ONNX_TO_STRING(AX_ENGINE_TENSOR_LAYOUT_UNKNOWN);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_TENSOR_LAYOUT_NHWC);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_TENSOR_LAYOUT_NCHW);
    default:
      return "Unknown data layout";
  }
}

static const char *AxEngineMemoryTypeToString(AX_ENGINE_MEMORY_TYPE_T type) {
  switch (type) {
    SHERPA_ONNX_TO_STRING(AX_ENGINE_MT_PHYSICAL);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_MT_VIRTUAL);
    SHERPA_ONNX_TO_STRING(AX_ENGINE_MT_OCM);
    default:
      return "Unknown memory type";
  }
}

/*
num_inputs: 2
num_outputs: 1
max_bach_size: 1
dynamic_bach_size: false
---input 0---
 name: x
 shape: [1, 167, 560]
 layout: AX_ENGINE_TENSOR_LAYOUT_NCHW
 memory_type: AX_ENGINE_MT_PHYSICAL
 data_type: AX_ENGINE_DT_FLOAT32
 n_size (number of bytes): 374080
---input 1---
 name: prompt
 shape: [4]
 layout: AX_ENGINE_TENSOR_LAYOUT_NCHW
 memory_type: AX_ENGINE_MT_PHYSICAL
 data_type: AX_ENGINE_DT_SINT32
 n_size (number of bytes): 16

---output 0---
 name: logits
 shape: [1, 171, 25055]
 layout: AX_ENGINE_TENSOR_LAYOUT_UNKNOWN
 memory_type: AX_ENGINE_MT_PHYSICAL
 data_type: AX_ENGINE_DT_FLOAT32
 n_size: 17137620
 */
static std::string ToString(const AX_ENGINE_IO_INFO_T *io_info) {
  std::ostringstream os;
  os << "num_inputs: " << io_info->nInputSize << "\n";
  os << "num_outputs: " << io_info->nOutputSize << "\n";
  os << "max_bach_size: " << io_info->nMaxBatchSize << "\n";
  os << "dynamic_bach_size: " << (io_info->bDynamicBatchSize ? "true" : "false")
     << "\n";

  for (AX_U32 i = 0; i < io_info->nInputSize; ++i) {
    const auto &input = io_info->pInputs[i];
    os << "---input " << i << "---\n";
    os << " name: " << input.pName << "\n";
    os << " shape: " << VectorToString(input.pShape, input.nShapeSize) << "\n";
    os << " layout: " << AxEngineTensorLayoutToString(input.eLayout) << "\n";
    os << " memory_type: " << AxEngineMemoryTypeToString(input.eMemoryType)
       << "\n";
    os << " data_type: " << AxEngineDataTypeToString(input.eDataType) << "\n";
    os << " n_size (number of bytes): " << input.nSize << "\n";
  }
  os << "\n";

  for (AX_U32 i = 0; i < io_info->nOutputSize; ++i) {
    const auto &output = io_info->pOutputs[i];
    os << "---output " << i << "---\n";
    os << " name: " << output.pName << "\n";
    os << " shape: " << VectorToString(output.pShape, output.nShapeSize)
       << "\n";
    os << " layout: " << AxEngineTensorLayoutToString(output.eLayout) << "\n";
    os << " memory_type: " << AxEngineMemoryTypeToString(output.eMemoryType)
       << "\n";
    os << " data_type: " << AxEngineDataTypeToString(output.eDataType) << "\n";
    os << " n_size: " << output.nSize << "\n";
  }

  return os.str();
}

void InitContext(const void *model_data, size_t model_data_length, bool debug,
                 AX_ENGINE_HANDLE *handle) {
  if (!handle) {
    SHERPA_ONNX_LOGE("InitContext: handle is null");
    SHERPA_ONNX_EXIT(-1);
  }

  auto ret = AX_ENGINE_CreateHandle(handle, model_data, model_data_length);
  if (ret != 0) {
    SHERPA_ONNX_LOGE("AX_ENGINE_CreateHandle failed, ret = %d", ret);
    SHERPA_ONNX_EXIT(-1);
  }

  if (debug) {
    SHERPA_ONNX_LOGE("AX_ENGINE_CreateHandle done. handle = %p", *handle);
  }

  ret = AX_ENGINE_CreateContext(*handle);
  if (ret != 0) {
    SHERPA_ONNX_LOGE("AX_ENGINE_CreateContext failed, ret = %d", ret);
    SHERPA_ONNX_EXIT(-1);
  }

  if (debug) {
    SHERPA_ONNX_LOGE("AX_ENGINE_CreateContext done.");
  }
}

void InitInputOutputAttrs(AX_ENGINE_HANDLE handle, bool debug,
                          AX_ENGINE_IO_INFO_T **io_info) {
  if (!io_info) {
    SHERPA_ONNX_LOGE("InitInputOutputAttrs: io_info is null");
    SHERPA_ONNX_EXIT(-1);
  }

  // Note(fangjun): No need to free *io_info
  auto ret = AX_ENGINE_GetIOInfo(handle, io_info);
  if (ret != 0) {
    SHERPA_ONNX_LOGE("AX_ENGINE_GetIOInfo failed, ret = %d", ret);
    SHERPA_ONNX_EXIT(-1);
  }

  if (debug) {
    SHERPA_ONNX_LOGE("AX_ENGINE_GetIOInfo done.");
    SHERPA_ONNX_LOGE("IO_INFO:\n%s", ToString(*io_info).c_str());
  }
}

void PrepareIO(AX_ENGINE_IO_INFO_T *io_info, AX_ENGINE_IO_T *io_data,
               bool debug) {
  if (!io_info || !io_data) {
    SHERPA_ONNX_LOGE("PrepareIO: io_info or io_data is null");
    SHERPA_ONNX_EXIT(-1);
  }

  memset(io_data, 0, sizeof(AX_ENGINE_IO_T));

  io_data->pInputs = new AX_ENGINE_IO_BUFFER_T[io_info->nInputSize];

  memset(io_data->pInputs, 0,
         sizeof(AX_ENGINE_IO_BUFFER_T) * io_info->nInputSize);

  io_data->nInputSize = io_info->nInputSize;

  for (AX_U32 i = 0; i < io_info->nInputSize; ++i) {
    const auto &input = io_info->pInputs[i];
    auto &buffer = io_data->pInputs[i];

    buffer.nSize = input.nSize;

    auto ret = AX_SYS_MemAlloc(
        reinterpret_cast<AX_U64 *>(&buffer.phyAddr), &buffer.pVirAddr,
        input.nSize, kCmnAlignSize,
        reinterpret_cast<const AX_S8 *>(kSherpaOnnxAxeraSessionName));

    if (ret != 0) {
      SHERPA_ONNX_LOGE("Failed to allocate memory for Input %d",
                       static_cast<int32_t>(i));
      SHERPA_ONNX_EXIT(-1);
    }
  }

  io_data->pOutputs = new AX_ENGINE_IO_BUFFER_T[io_info->nOutputSize];

  memset(io_data->pOutputs, 0,
         sizeof(AX_ENGINE_IO_BUFFER_T) * io_info->nOutputSize);

  io_data->nOutputSize = io_info->nOutputSize;

  for (AX_U32 i = 0; i < io_info->nOutputSize; ++i) {
    const auto &output = io_info->pOutputs[i];
    auto &buffer = io_data->pOutputs[i];
    buffer.nSize = output.nSize;
    auto ret = AX_SYS_MemAllocCached(
        reinterpret_cast<AX_U64 *>(&buffer.phyAddr), &buffer.pVirAddr,
        output.nSize, kCmnAlignSize,
        reinterpret_cast<const AX_S8 *>(kSherpaOnnxAxeraSessionName));

    if (ret != 0) {
      SHERPA_ONNX_LOGE("Failed to allocate memory for Output %d",
                       static_cast<int32_t>(i));
      SHERPA_ONNX_EXIT(-1);
    }
  }
}

void FreeIO(AX_ENGINE_IO_T *io_data) {
  for (AX_U32 i = 0; i < io_data->nInputSize; ++i) {
    auto &buf = io_data->pInputs[i];
    AX_SYS_MemFree(buf.phyAddr, buf.pVirAddr);
  }

  for (AX_U32 i = 0; i < io_data->nOutputSize; ++i) {
    auto &buf = io_data->pOutputs[i];
    AX_SYS_MemFree(buf.phyAddr, buf.pVirAddr);
  }
  delete[] io_data->pInputs;
  delete[] io_data->pOutputs;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/axera/utils.h
================================================
// sherpa-onnx/csrc/axera/utils.h
//
// Copyright (c)  2025  M5Stack Technology CO LTD

#ifndef SHERPA_ONNX_CSRC_AXERA_UTILS_H_
#define SHERPA_ONNX_CSRC_AXERA_UTILS_H_

#include <cstddef>

#include "ax_engine_api.h"  // NOLINT

namespace sherpa_onnx {

void InitContext(const void *model_data, size_t model_data_length, bool debug,
                 AX_ENGINE_HANDLE *handle);

void InitInputOutputAttrs(AX_ENGINE_HANDLE handle, bool debug,
                          AX_ENGINE_IO_INFO_T **io_info);

void PrepareIO(AX_ENGINE_IO_INFO_T *io_info, AX_ENGINE_IO_T *io_data,
               bool debug);

void FreeIO(AX_ENGINE_IO_T *io_data);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_AXERA_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/base64-decode.cc
================================================
// sherpa-onnx/csrc/base64-decode.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/base64-decode.h"

#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

static int32_t Ord(char c) {
  if (c >= 'A' && c <= 'Z') {
    return c - 'A';
  } else if (c >= 'a' && c <= 'z') {
    return c - 'a' + ('Z' - 'A') + 1;
  } else if (c >= '0' && c <= '9') {
    return c - '0' + ('Z' - 'A') + ('z' - 'a') + 2;
  } else if (c == '+') {
    return 62;
  } else if (c == '/') {
    return 63;
  }

  SHERPA_ONNX_LOGE("Unknown character %d, %c\n", c, c);

  exit(-1);
}

// see
// https://github.com/ReneNyffenegger/cpp-base64/blob/master/base64.cpp#L243
std::string Base64Decode(const std::string &s) {
  if (s.empty()) {
    SHERPA_ONNX_LOGE("Empty string!");
    exit(-1);
  }

  int32_t n = static_cast<int32_t>(s.size()) / 4 * 3;

  std::string ans;
  ans.reserve(n);

  int32_t i = 0;
  while (i < static_cast<int32_t>(s.size())) {
    if (s[i] == '=') {
      return " ";
    }

    int32_t first = (Ord(s[i]) << 2) + ((Ord(s[i + 1]) & 0x30) >> 4);
    ans.push_back(static_cast<char>(first));

    if (i + 2 < static_cast<int32_t>(s.size()) && s[i + 2] != '=') {
      int32_t second =
          ((Ord(s[i + 1]) & 0x0f) << 4) + ((Ord(s[i + 2]) & 0x3c) >> 2);
      ans.push_back(static_cast<char>(second));

      if (i + 3 < static_cast<int32_t>(s.size()) && s[i + 3] != '=') {
        int32_t third = ((Ord(s[i + 2]) & 0x03) << 6) + Ord(s[i + 3]);
        ans.push_back(static_cast<char>(third));
      }
    }
    i += 4;
  }

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/base64-decode.h
================================================
// sherpa-onnx/csrc/base64-decode.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_BASE64_DECODE_H_
#define SHERPA_ONNX_CSRC_BASE64_DECODE_H_

#include <string>

namespace sherpa_onnx {

/** @param s A base64 encoded string.
 *  @return Return the decoded string.
 */
std::string Base64Decode(const std::string &s);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_BASE64_DECODE_H_


================================================
FILE: sherpa-onnx/csrc/bbpe.cc
================================================
// sherpa-onnx/csrc/bbpe.cc
//
// Copyright (c)  2024 Xiaomi Corporation

// Auto-generated! DO NOT EDIT

#include "sherpa-onnx/csrc/bbpe.h"

#include <cstdint>
#include <string>
#include <unordered_map>

const std::unordered_map<std::string, uint8_t> &GetByteBpeTable() {
  static const std::unordered_map<std::string, uint8_t> table = {
      {"Ā", 0},   {"ā", 1},   {"Ă", 2},   {"ă", 3},   {"Ą", 4},   {"ą", 5},
      {"Ć", 6},   {"ć", 7},   {"Ĉ", 8},   {"ĉ", 9},   {"Ċ", 10},  {"ċ", 11},
      {"Č", 12},  {"č", 13},  {"Ď", 14},  {"ď", 15},  {"Đ", 16},  {"đ", 17},
      {"Ē", 18},  {"ē", 19},  {"Ĕ", 20},  {"ĕ", 21},  {"Ė", 22},  {"ė", 23},
      {"Ę", 24},  {"ę", 25},  {"Ě", 26},  {"ě", 27},  {"Ĝ", 28},  {"ĝ", 29},
      {"Ğ", 30},  {"ğ", 31},  {" ", 32},  {"!", 33},  {"\"", 34}, {"#", 35},
      {"$", 36},  {"%", 37},  {"&", 38},  {"'", 39},  {"(", 40},  {")", 41},
      {"*", 42},  {"+", 43},  {",", 44},  {"-", 45},  {".", 46},  {"/", 47},
      {"0", 48},  {"1", 49},  {"2", 50},  {"3", 51},  {"4", 52},  {"5", 53},
      {"6", 54},  {"7", 55},  {"8", 56},  {"9", 57},  {":", 58},  {";", 59},
      {"<", 60},  {"=", 61},  {">", 62},  {"?", 63},  {"@", 64},  {"A", 65},
      {"B", 66},  {"C", 67},  {"D", 68},  {"E", 69},  {"F", 70},  {"G", 71},
      {"H", 72},  {"I", 73},  {"J", 74},  {"K", 75},  {"L", 76},  {"M", 77},
      {"N", 78},  {"O", 79},  {"P", 80},  {"Q", 81},  {"R", 82},  {"S", 83},
      {"T", 84},  {"U", 85},  {"V", 86},  {"W", 87},  {"X", 88},  {"Y", 89},
      {"Z", 90},  {"[", 91},  {"\\", 92}, {"]", 93},  {"^", 94},  {"_", 95},
      {"`", 96},  {"a", 97},  {"b", 98},  {"c", 99},  {"d", 100}, {"e", 101},
      {"f", 102}, {"g", 103}, {"h", 104}, {"i", 105}, {"j", 106}, {"k", 107},
      {"l", 108}, {"m", 109}, {"n", 110}, {"o", 111}, {"p", 112}, {"q", 113},
      {"r", 114}, {"s", 115}, {"t", 116}, {"u", 117}, {"v", 118}, {"w", 119},
      {"x", 120}, {"y", 121}, {"z", 122}, {"{", 123}, {"|", 124}, {"}", 125},
      {"~", 126}, {"Ġ", 127}, {"ġ", 128}, {"Ģ", 129}, {"ģ", 130}, {"Ĥ", 131},
      {"ĥ", 132}, {"Ħ", 133}, {"ħ", 134}, {"Ĩ", 135}, {"ĩ", 136}, {"Ī", 137},
      {"ī", 138}, {"Ĭ", 139}, {"ĭ", 140}, {"Į", 141}, {"į", 142}, {"İ", 143},
      {"ı", 144}, {"Ĵ", 145}, {"ĵ", 146}, {"Ķ", 147}, {"ķ", 148}, {"ĸ", 149},
      {"Ĺ", 150}, {"ĺ", 151}, {"Ļ", 152}, {"ļ", 153}, {"Ľ", 154}, {"ľ", 155},
      {"Ł", 156}, {"ł", 157}, {"Ń", 158}, {"ń", 159}, {"Ņ", 160}, {"ņ", 161},
      {"Ň", 162}, {"ň", 163}, {"Ŋ", 164}, {"ŋ", 165}, {"Ō", 166}, {"ō", 167},
      {"Ŏ", 168}, {"ŏ", 169}, {"Ő", 170}, {"ő", 171}, {"Œ", 172}, {"œ", 173},
      {"Ŕ", 174}, {"ŕ", 175}, {"Ŗ", 176}, {"ŗ", 177}, {"Ř", 178}, {"ř", 179},
      {"Ś", 180}, {"ś", 181}, {"Ŝ", 182}, {"ŝ", 183}, {"Ş", 184}, {"ş", 185},
      {"Š", 186}, {"š", 187}, {"Ţ", 188}, {"ţ", 189}, {"Ť", 190}, {"ť", 191},
      {"Ŧ", 192}, {"ŧ", 193}, {"Ũ", 194}, {"ũ", 195}, {"Ū", 196}, {"ū", 197},
      {"Ŭ", 198}, {"ŭ", 199}, {"Ů", 200}, {"ů", 201}, {"Ű", 202}, {"ű", 203},
      {"Ų", 204}, {"ų", 205}, {"Ŵ", 206}, {"ŵ", 207}, {"Ŷ", 208}, {"ŷ", 209},
      {"Ÿ", 210}, {"Ź", 211}, {"ź", 212}, {"Ż", 213}, {"ż", 214}, {"Ž", 215},
      {"ž", 216}, {"ƀ", 217}, {"Ɓ", 218}, {"Ƃ", 219}, {"ƃ", 220}, {"Ƅ", 221},
      {"ƅ", 222}, {"Ɔ", 223}, {"Ƈ", 224}, {"ƈ", 225}, {"Ɖ", 226}, {"Ɗ", 227},
      {"Ƌ", 228}, {"ƌ", 229}, {"ƍ", 230}, {"Ǝ", 231}, {"Ə", 232}, {"Ɛ", 233},
      {"Ƒ", 234}, {"ƒ", 235}, {"Ɠ", 236}, {"Ɣ", 237}, {"ƕ", 238}, {"Ɩ", 239},
      {"Ɨ", 240}, {"Ƙ", 241}, {"ƙ", 242}, {"ƚ", 243}, {"ƛ", 244}, {"Ɯ", 245},
      {"Ɲ", 246}, {"ƞ", 247}, {"Ɵ", 248}, {"Ơ", 249}, {"ơ", 250}, {"Ƣ", 251},
      {"ƣ", 252}, {"Ƥ", 253}, {"ƥ", 254}, {"Ʀ", 255}, {"⁇", 32},
  };

  return table;
}

const std::unordered_map<uint8_t, std::string> &GetByteBpeTableId2Token() {
  static const std::unordered_map<uint8_t, std::string> table = {
      {0, "Ā"},   {1, "ā"},   {2, "Ă"},   {3, "ă"},   {4, "Ą"},   {5, "ą"},
      {6, "Ć"},   {7, "ć"},   {8, "Ĉ"},   {9, "ĉ"},   {10, "Ċ"},  {11, "ċ"},
      {12, "Č"},  {13, "č"},  {14, "Ď"},  {15, "ď"},  {16, "Đ"},  {17, "đ"},
      {18, "Ē"},  {19, "ē"},  {20, "Ĕ"},  {21, "ĕ"},  {22, "Ė"},  {23, "ė"},
      {24, "Ę"},  {25, "ę"},  {26, "Ě"},  {27, "ě"},  {28, "Ĝ"},  {29, "ĝ"},
      {30, "Ğ"},  {31, "ğ"},  {32, " "},  {33, "!"},  {34, "\""}, {35, "#"},
      {36, "$"},  {37, "%"},  {38, "&"},  {39, "'"},  {40, "("},  {41, ")"},
      {42, "*"},  {43, "+"},  {44, ","},  {45, "-"},  {46, "."},  {47, "/"},
      {48, "0"},  {49, "1"},  {50, "2"},  {51, "3"},  {52, "4"},  {53, "5"},
      {54, "6"},  {55, "7"},  {56, "8"},  {57, "9"},  {58, ":"},  {59, ";"},
      {60, "<"},  {61, "="},  {62, ">"},  {63, "?"},  {64, "@"},  {65, "A"},
      {66, "B"},  {67, "C"},  {68, "D"},  {69, "E"},  {70, "F"},  {71, "G"},
      {72, "H"},  {73, "I"},  {74, "J"},  {75, "K"},  {76, "L"},  {77, "M"},
      {78, "N"},  {79, "O"},  {80, "P"},  {81, "Q"},  {82, "R"},  {83, "S"},
      {84, "T"},  {85, "U"},  {86, "V"},  {87, "W"},  {88, "X"},  {89, "Y"},
      {90, "Z"},  {91, "["},  {92, "\\"}, {93, "]"},  {94, "^"},  {95, "_"},
      {96, "`"},  {97, "a"},  {98, "b"},  {99, "c"},  {100, "d"}, {101, "e"},
      {102, "f"}, {103, "g"}, {104, "h"}, {105, "i"}, {106, "j"}, {107, "k"},
      {108, "l"}, {109, "m"}, {110, "n"}, {111, "o"}, {112, "p"}, {113, "q"},
      {114, "r"}, {115, "s"}, {116, "t"}, {117, "u"}, {118, "v"}, {119, "w"},
      {120, "x"}, {121, "y"}, {122, "z"}, {123, "{"}, {124, "|"}, {125, "}"},
      {126, "~"}, {127, "Ġ"}, {128, "ġ"}, {129, "Ģ"}, {130, "ģ"}, {131, "Ĥ"},
      {132, "ĥ"}, {133, "Ħ"}, {134, "ħ"}, {135, "Ĩ"}, {136, "ĩ"}, {137, "Ī"},
      {138, "ī"}, {139, "Ĭ"}, {140, "ĭ"}, {141, "Į"}, {142, "į"}, {143, "İ"},
      {144, "ı"}, {145, "Ĵ"}, {146, "ĵ"}, {147, "Ķ"}, {148, "ķ"}, {149, "ĸ"},
      {150, "Ĺ"}, {151, "ĺ"}, {152, "Ļ"}, {153, "ļ"}, {154, "Ľ"}, {155, "ľ"},
      {156, "Ł"}, {157, "ł"}, {158, "Ń"}, {159, "ń"}, {160, "Ņ"}, {161, "ņ"},
      {162, "Ň"}, {163, "ň"}, {164, "Ŋ"}, {165, "ŋ"}, {166, "Ō"}, {167, "ō"},
      {168, "Ŏ"}, {169, "ŏ"}, {170, "Ő"}, {171, "ő"}, {172, "Œ"}, {173, "œ"},
      {174, "Ŕ"}, {175, "ŕ"}, {176, "Ŗ"}, {177, "ŗ"}, {178, "Ř"}, {179, "ř"},
      {180, "Ś"}, {181, "ś"}, {182, "Ŝ"}, {183, "ŝ"}, {184, "Ş"}, {185, "ş"},
      {186, "Š"}, {187, "š"}, {188, "Ţ"}, {189, "ţ"}, {190, "Ť"}, {191, "ť"},
      {192, "Ŧ"}, {193, "ŧ"}, {194, "Ũ"}, {195, "ũ"}, {196, "Ū"}, {197, "ū"},
      {198, "Ŭ"}, {199, "ŭ"}, {200, "Ů"}, {201, "ů"}, {202, "Ű"}, {203, "ű"},
      {204, "Ų"}, {205, "ų"}, {206, "Ŵ"}, {207, "ŵ"}, {208, "Ŷ"}, {209, "ŷ"},
      {210, "Ÿ"}, {211, "Ź"}, {212, "ź"}, {213, "Ż"}, {214, "ż"}, {215, "Ž"},
      {216, "ž"}, {217, "ƀ"}, {218, "Ɓ"}, {219, "Ƃ"}, {220, "ƃ"}, {221, "Ƅ"},
      {222, "ƅ"}, {223, "Ɔ"}, {224, "Ƈ"}, {225, "ƈ"}, {226, "Ɖ"}, {227, "Ɗ"},
      {228, "Ƌ"}, {229, "ƌ"}, {230, "ƍ"}, {231, "Ǝ"}, {232, "Ə"}, {233, "Ɛ"},
      {234, "Ƒ"}, {235, "ƒ"}, {236, "Ɠ"}, {237, "Ɣ"}, {238, "ƕ"}, {239, "Ɩ"},
      {240, "Ɨ"}, {241, "Ƙ"}, {242, "ƙ"}, {243, "ƚ"}, {244, "ƛ"}, {245, "Ɯ"},
      {246, "Ɲ"}, {247, "ƞ"}, {248, "Ɵ"}, {249, "Ơ"}, {250, "ơ"}, {251, "Ƣ"},
      {252, "ƣ"}, {253, "Ƥ"}, {254, "ƥ"}, {255, "Ʀ"},
  };

  return table;
}


================================================
FILE: sherpa-onnx/csrc/bbpe.h
================================================
// sherpa-onnx/csrc/bbpe.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_BBPE_H_
#define SHERPA_ONNX_CSRC_BBPE_H_
#include <cstdint>
#include <string>
#include <unordered_map>

// It is equivalent to the map BCHAR_TO_BYTE
// from
// https://github.com/k2-fsa/icefall/blob/master/icefall/byte_utils.py#L280
const std::unordered_map<std::string, uint8_t> &GetByteBpeTable();

const std::unordered_map<uint8_t, std::string> &GetByteBpeTableId2Token();

#endif  // SHERPA_ONNX_CSRC_BBPE_H_


================================================
FILE: sherpa-onnx/csrc/cat-test.cc
================================================
// sherpa-onnx/csrc/cat-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/cat.h"

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

TEST(Cat, Test1DTensors) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 1> a_shape{3};
  std::array<int64_t, 1> b_shape{6};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());
  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Cat(allocator, {&a, &b}, 0);

  const float *pans = ans.GetTensorData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0]); ++i) {
    EXPECT_EQ(pa[i], pans[i]);
  }

  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0]); ++i) {
    EXPECT_EQ(pb[i], pans[i + a_shape[0]]);
  }

  Print1D(&a);
  Print1D(&b);
  Print1D(&ans);
}

TEST(Cat, Test2DTensorsDim0) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 2> a_shape{2, 3};
  std::array<int64_t, 2> b_shape{4, 3};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0] * a_shape[1]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0] * b_shape[1]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Cat(allocator, {&a, &b}, 0);

  const float *pans = ans.GetTensorData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0] * a_shape[1]); ++i) {
    EXPECT_EQ(pa[i], pans[i]);
  }
  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0] * b_shape[1]); ++i) {
    EXPECT_EQ(pb[i], pans[i + a_shape[0] * a_shape[1]]);
  }

  Print2D(&a);
  Print2D(&b);
  Print2D(&ans);
}

TEST(Cat, Test2DTensorsDim1) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 2> a_shape{4, 3};
  std::array<int64_t, 2> b_shape{4, 2};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0] * a_shape[1]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0] * b_shape[1]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Cat(allocator, {&a, &b}, 1);

  const float *pans = ans.GetTensorData<float>();

  for (int32_t r = 0; r != static_cast<int32_t>(a_shape[0]); ++r) {
    for (int32_t i = 0; i != static_cast<int32_t>(a_shape[1]);
         ++i, ++pa, ++pans) {
      EXPECT_EQ(*pa, *pans);
    }

    for (int32_t i = 0; i != static_cast<int32_t>(b_shape[1]);
         ++i, ++pb, ++pans) {
      EXPECT_EQ(*pb, *pans);
    }
  }

  Print2D(&a);
  Print2D(&b);
  Print2D(&ans);
}

TEST(Cat, Test3DTensorsDim0) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 3> a_shape{2, 3, 2};
  std::array<int64_t, 3> b_shape{4, 3, 2};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0;
       i != static_cast<int32_t>(a_shape[0] * a_shape[1] * a_shape[2]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0;
       i != static_cast<int32_t>(b_shape[0] * b_shape[1] * b_shape[2]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Cat(allocator, {&a, &b}, 0);

  const float *pans = ans.GetTensorData<float>();
  for (int32_t i = 0;
       i != static_cast<int32_t>(a_shape[0] * a_shape[1] * a_shape[2]); ++i) {
    EXPECT_EQ(pa[i], pans[i]);
  }
  for (int32_t i = 0;
       i != static_cast<int32_t>(b_shape[0] * b_shape[1] * b_shape[2]); ++i) {
    EXPECT_EQ(pb[i], pans[i + a_shape[0] * a_shape[1] * a_shape[2]]);
  }

  Print3D(&a);
  Print3D(&b);
  Print3D(&ans);
}

TEST(Cat, Test3DTensorsDim1) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 3> a_shape{2, 2, 3};
  std::array<int64_t, 3> b_shape{2, 4, 3};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0;
       i != static_cast<int32_t>(a_shape[0] * a_shape[1] * a_shape[2]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0;
       i != static_cast<int32_t>(b_shape[0] * b_shape[1] * b_shape[2]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Cat(allocator, {&a, &b}, 1);

  const float *pans = ans.GetTensorData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0]); ++i) {
    for (int32_t k = 0; k != static_cast<int32_t>(a_shape[1] * a_shape[2]);
         ++k, ++pa, ++pans) {
      EXPECT_EQ(*pa, *pans);
    }

    for (int32_t k = 0; k != static_cast<int32_t>(b_shape[1] * b_shape[2]);
         ++k, ++pb, ++pans) {
      EXPECT_EQ(*pb, *pans);
    }
  }

  Print3D(&a);
  Print3D(&b);
  Print3D(&ans);
}

TEST(Cat, Test3DTensorsDim2) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 3> a_shape{2, 3, 4};
  std::array<int64_t, 3> b_shape{2, 3, 5};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0;
       i != static_cast<int32_t>(a_shape[0] * a_shape[1] * a_shape[2]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0;
       i != static_cast<int32_t>(b_shape[0] * b_shape[1] * b_shape[2]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Cat(allocator, {&a, &b}, 2);

  const float *pans = ans.GetTensorData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0] * a_shape[1]); ++i) {
    for (int32_t k = 0; k != static_cast<int32_t>(a_shape[2]);
         ++k, ++pa, ++pans) {
      EXPECT_EQ(*pa, *pans);
    }

    for (int32_t k = 0; k != static_cast<int32_t>(b_shape[2]);
         ++k, ++pb, ++pans) {
      EXPECT_EQ(*pb, *pans);
    }
  }

  Print3D(&a);
  Print3D(&b);
  Print3D(&ans);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/cat.cc
================================================
// sherpa-onnx/csrc/cat.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/cat.h"

#include <algorithm>
#include <functional>
#include <numeric>
#include <sstream>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

static bool Compare(const std::vector<int64_t> &a,
                    const std::vector<int64_t> &b, int32_t skip_dim) {
  if (a.size() != b.size()) return false;

  for (int32_t i = 0; i != static_cast<int32_t>(a.size()); ++i) {
    if (i == skip_dim) continue;

    if (a[i] != b[i]) return false;
  }

  return true;
}

static void PrintShape(const std::vector<int64_t> &a) {
  std::ostringstream os;
  for (auto i : a) {
    os << i << " ";
  }
  os << "\n";
  SHERPA_ONNX_LOGE("%s", os.str().c_str());
}

template <typename T /*=float*/>
Ort::Value Cat(OrtAllocator *allocator,
               const std::vector<const Ort::Value *> &values, int32_t dim) {
  if (values.size() == 1u) {
    return Clone(allocator, values[0]);
  }

  std::vector<int64_t> v0_shape =
      values[0]->GetTensorTypeAndShapeInfo().GetShape();

  int64_t total_dim = v0_shape[dim];

  for (int32_t i = 1; i != static_cast<int32_t>(values.size()); ++i) {
    auto s = values[i]->GetTensorTypeAndShapeInfo().GetShape();
    total_dim += s[dim];

    bool ret = Compare(v0_shape, s, dim);
    if (!ret) {
      SHERPA_ONNX_LOGE("Incorrect shape in Cat !\n");

      SHERPA_ONNX_LOGE("Shape for tensor 0: ");
      PrintShape(v0_shape);

      SHERPA_ONNX_LOGE("Shape for tensor %d: ", i);
      PrintShape(s);

      SHERPA_ONNX_EXIT(-1);
    }
  }

  std::vector<int64_t> ans_shape;
  ans_shape.reserve(v0_shape.size());
  ans_shape.insert(ans_shape.end(), v0_shape.data(), v0_shape.data() + dim);
  ans_shape.push_back(total_dim);
  ans_shape.insert(ans_shape.end(), v0_shape.data() + dim + 1,
                   v0_shape.data() + v0_shape.size());

  auto leading_size = static_cast<int32_t>(std::accumulate(
      v0_shape.begin(), v0_shape.begin() + dim, 1, std::multiplies<int64_t>()));

  auto trailing_size = static_cast<int32_t>(
      std::accumulate(v0_shape.begin() + dim + 1, v0_shape.end(), 1,
                      std::multiplies<int64_t>()));

  Ort::Value ans = Ort::Value::CreateTensor<T>(allocator, ans_shape.data(),
                                               ans_shape.size());
  T *dst = ans.GetTensorMutableData<T>();

  for (int32_t i = 0; i != leading_size; ++i) {
    for (auto value : values) {
      auto this_dim = value->GetTensorTypeAndShapeInfo().GetShape()[dim];
      const T *src = value->GetTensorData<T>();
      src += i * this_dim * trailing_size;

      std::copy(src, src + this_dim * trailing_size, dst);
      dst += this_dim * trailing_size;
    }
  }

  return ans;
}

template Ort::Value Cat<float>(OrtAllocator *allocator,
                               const std::vector<const Ort::Value *> &values,
                               int32_t dim);

template Ort::Value Cat<uint16_t>(OrtAllocator *allocator,
                                  const std::vector<const Ort::Value *> &values,
                                  int32_t dim);

template Ort::Value Cat<int64_t>(OrtAllocator *allocator,
                                 const std::vector<const Ort::Value *> &values,
                                 int32_t dim);

Ort::Value CatFloat16(OrtAllocator *allocator,
                      const std::vector<const Ort::Value *> &values,
                      int32_t dim) {
  if (values.size() == 1u) {
    return Clone(allocator, values[0]);
  }

  std::vector<int64_t> v0_shape =
      values[0]->GetTensorTypeAndShapeInfo().GetShape();

  int64_t total_dim = v0_shape[dim];

  for (int32_t i = 1; i != static_cast<int32_t>(values.size()); ++i) {
    auto s = values[i]->GetTensorTypeAndShapeInfo().GetShape();
    total_dim += s[dim];

    bool ret = Compare(v0_shape, s, dim);
    if (!ret) {
      SHERPA_ONNX_LOGE("Incorrect shape in Cat !\n");

      SHERPA_ONNX_LOGE("Shape for tensor 0: ");
      PrintShape(v0_shape);

      SHERPA_ONNX_LOGE("Shape for tensor %d: ", i);
      PrintShape(s);

      SHERPA_ONNX_EXIT(-1);
    }
  }

  std::vector<int64_t> ans_shape;
  ans_shape.reserve(v0_shape.size());
  ans_shape.insert(ans_shape.end(), v0_shape.data(), v0_shape.data() + dim);
  ans_shape.push_back(total_dim);
  ans_shape.insert(ans_shape.end(), v0_shape.data() + dim + 1,
                   v0_shape.data() + v0_shape.size());

  auto leading_size = static_cast<int32_t>(std::accumulate(
      v0_shape.begin(), v0_shape.begin() + dim, 1, std::multiplies<int64_t>()));

  auto trailing_size = static_cast<int32_t>(
      std::accumulate(v0_shape.begin() + dim + 1, v0_shape.end(), 1,
                      std::multiplies<int64_t>()));

  Ort::Value ans =
      Ort::Value::CreateTensor(allocator, ans_shape.data(), ans_shape.size(),
                               ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
  using T = uint16_t;

  T *dst = ans.GetTensorMutableData<T>();

  for (int32_t i = 0; i != leading_size; ++i) {
    for (auto value : values) {
      auto this_dim = value->GetTensorTypeAndShapeInfo().GetShape()[dim];
      const T *src = value->GetTensorData<T>();
      src += i * this_dim * trailing_size;

      std::copy(src, src + this_dim * trailing_size, dst);
      dst += this_dim * trailing_size;
    }
  }

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/cat.h
================================================
// sherpa-onnx/csrc/cat.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_CAT_H_
#define SHERPA_ONNX_CSRC_CAT_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

/** Cat a list of tensors along the given dim.
 *
 * @param allocator Allocator to allocate space for the returned tensor
 * @param values  Pointer to a list of tensors. The shape of the tensor must
 *                be the same except on the dim to be concatenated.
 * @param dim  The dim along which to concatenate the input tensors
 *
 * @return Return the concatenated tensor
 */
template <typename T = float>
Ort::Value Cat(OrtAllocator *allocator,
               const std::vector<const Ort::Value *> &values, int32_t dim);

Ort::Value CatFloat16(OrtAllocator *allocator,
                      const std::vector<const Ort::Value *> &values,
                      int32_t dim);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_CAT_H_


================================================
FILE: sherpa-onnx/csrc/character-lexicon.cc
================================================
// sherpa-onnx/csrc/character-lexicon.cc
//
// Copyright (c)  2022-2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/character-lexicon.h"

#include <algorithm>
#include <fstream>
#include <memory>
#include <regex>  // NOLINT
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/phrase-matcher.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class CharacterLexicon::Impl {
 public:
  Impl(const std::string &lexicon, const std::string &tokens, bool debug)
      : debug_(debug) {
    if (lexicon.empty()) {
      SHERPA_ONNX_LOGE("Please provide lexicon.txt for this model");
      SHERPA_ONNX_EXIT(-1);
    }

    {
      std::ifstream is(tokens);
      InitTokens(is);
    }

    {
      std::ifstream is(lexicon);
      InitLexicon(is);
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
       bool debug)
      : debug_(debug) {
    if (lexicon.empty()) {
      SHERPA_ONNX_LOGE("Please provide lexicon.txt for this model");
      SHERPA_ONNX_EXIT(-1);
    }

    {
      auto buf = ReadFile(mgr, tokens);
      std::istringstream is(std::string(buf.data(), buf.size()));

      InitTokens(is);
    }

    {
      auto buf = ReadFile(mgr, lexicon);
      std::istringstream is(std::string(buf.data(), buf.size()));
      InitLexicon(is);
    }
  }

  std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &text) const {
    // see
    // https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
    std::regex punct_re{"：|、|；"};
    std::string s = std::regex_replace(text, punct_re, "，");

    std::regex punct_re2("[.]");
    s = std::regex_replace(s, punct_re2, "。");

    std::regex punct_re3("[?]");
    s = std::regex_replace(s, punct_re3, "？");

    std::regex punct_re4("[!]");
    s = std::regex_replace(s, punct_re4, "！");

    std::vector<std::string> words = SplitUtf8(text);

    if (debug_) {
#if __OHOS__
      SHERPA_ONNX_LOGE("input text:\n%{public}s", text.c_str());
      SHERPA_ONNX_LOGE("after replacing punctuations:\n%{public}s", s.c_str());
#else
      SHERPA_ONNX_LOGE("input text:\n%s", text.c_str());
      SHERPA_ONNX_LOGE("after replacing punctuations:\n%s", s.c_str());
#endif

      std::ostringstream os;
      std::string sep = "";
      for (const auto &w : words) {
        os << sep << w;
        sep = "_";
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("after splitting into UTF8:\n%{public}s",
                       os.str().c_str());
#else
      SHERPA_ONNX_LOGE("after splitting into UTF8:\n%s", os.str().c_str());
#endif
    }

    // remove spaces after punctuations
    std::vector<std::string> words2 = std::move(words);
    words.reserve(words2.size());

    for (int32_t i = 0; i < words2.size(); ++i) {
      if (i == 0) {
        words.push_back(std::move(words2[i]));
      } else if (words2[i] == " ") {
        if (words.back() == " " || IsPunct(words.back())) {
          continue;
        } else {
          words.push_back(std::move(words2[i]));
        }
      } else if (IsPunct(words2[i])) {
        if (words.back() == " " || IsPunct(words.back())) {
          continue;
        } else {
          words.push_back(std::move(words2[i]));
        }
      } else {
        words.push_back(std::move(words2[i]));
      }
    }

    if (debug_) {
      std::ostringstream os;
      std::string sep = "";
      for (const auto &w : words) {
        os << sep << w;
        sep = "_";
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("after removing spaces after punctuations:\n%{public}s",
                       os.str().c_str());
#else
      SHERPA_ONNX_LOGE("after removing spaces after punctuations:\n%s",
                       os.str().c_str());
#endif
    }

    std::vector<TokenIDs> ans;
    std::vector<int64_t> this_sentence;

    PhraseMatcher matcher(&all_words_, words, debug_);

    for (const std::string &w : matcher) {
      auto ids = ConvertWordToIds(w);
      if (ids.empty()) {
#if __OHOS__
        SHERPA_ONNX_LOGE("Ignore OOV '%{public}s'", w.c_str());
#else
        SHERPA_ONNX_LOGE("Ignore OOV '%s'", w.c_str());
#endif
        continue;
      }

      this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());

      if (IsPunct(w)) {
        ans.emplace_back(std::move(this_sentence));
        this_sentence = {};
      }
    }  // for (const std::string &w : matcher)

    if (!this_sentence.empty()) {
      ans.emplace_back(std::move(this_sentence));
    }

    return ans;
  }

 private:
  std::vector<int32_t> ConvertWordToIds(const std::string &w) const {
    std::vector<int32_t> ans;

    if (word2ids_.count(w)) {
      ans = word2ids_.at(w);
    } else if (token2id_.count(w)) {
      ans = {token2id_.at(w)};
    } else {
      std::vector<std::string> words = SplitUtf8(w);
      for (const auto &word : words) {
        if (word2ids_.count(word)) {
          auto ids = ConvertWordToIds(word);
          ans.insert(ans.end(), ids.begin(), ids.end());
        }
      }
    }
    if (debug_) {
      std::ostringstream os;
      os << w << ": ";
      for (auto i : ans) {
        os << id2token_.at(i) << " ";
      }
      os << "\n";
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    return ans;
  }

  void InitTokens(std::istream &is) {
    token2id_ = ReadTokens(is);

    std::vector<std::pair<std::string, std::string>> puncts = {
        {",", "，"}, {".", "。"}, {"!", "！"}, {"?", "？"}, {":", "："},
        {"\"", "“"}, {"\"", "”"}, {"'", "‘"},  {"'", "’"},  {";", "；"},
    };

    for (const auto &p : puncts) {
      if (token2id_.count(p.first) && !token2id_.count(p.second)) {
        token2id_[p.second] = token2id_[p.first];
      }

      if (!token2id_.count(p.first) && token2id_.count(p.second)) {
        token2id_[p.first] = token2id_[p.second];
      }
    }

    if (!token2id_.count("、") && token2id_.count("，")) {
      token2id_["、"] = token2id_["，"];
    }

    if (!token2id_.count(";") && token2id_.count(",")) {
      token2id_[";"] = token2id_[","];
    }

    if (debug_) {
      for (const auto &p : token2id_) {
        id2token_[p.second] = p.first;
      }
    }
  }

  void InitLexicon(std::istream &is) {
    std::string word;
    std::vector<std::string> token_list;
    std::string line;
    std::string phone;
    int32_t line_num = 0;

    while (std::getline(is, line)) {
      ++line_num;
      if (line.find_first_not_of(" \t\n\v\f\r") == std::string::npos) {
        // Line is empty or only spaces/tabs, skip it
        continue;
      }

      std::istringstream iss(line);

      token_list.clear();

      iss >> word;
      ToLowerCase(&word);

      if (word2ids_.count(word)) {
#if __OHOS__
        SHERPA_ONNX_LOGE(
            "Duplicated word: %{public}s at line %{public}d:%{public}s. Ignore "
            "it.",
            word.c_str(), line_num, line.c_str());
#else
        SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
                         word.c_str(), line_num, line.c_str());
#endif
        continue;
      }

      while (iss >> phone) {
        token_list.push_back(std::move(phone));
      }

      std::vector<int32_t> ids = ConvertTokensToIds(token2id_, token_list);
      if (ids.empty()) {
        if (debug_) {
#if __OHOS__
          SHERPA_ONNX_LOGE("Empty token ids for '%{public}s'", line.c_str());
#else
          SHERPA_ONNX_LOGE("Empty token ids for '%s'", line.c_str());
#endif
        }
        continue;
      }

      word2ids_.insert({std::move(word), std::move(ids)});
    }

    for (const auto &[key, _] : word2ids_) {
      all_words_.insert(key);
    }
  }

 private:
  // lexicon.txt is saved in word2ids_
  std::unordered_map<std::string, std::vector<int32_t>> word2ids_;
  std::unordered_set<std::string> all_words_;

  // tokens.txt is saved in token2id_
  std::unordered_map<std::string, int32_t> token2id_;

  std::unordered_map<int32_t, std::string> id2token_;

  bool debug_ = false;
};

CharacterLexicon::~CharacterLexicon() = default;

CharacterLexicon::CharacterLexicon(const std::string &lexicon,
                                   const std::string &tokens, bool debug)
    : impl_(std::make_unique<Impl>(lexicon, tokens, debug)) {}

template <typename Manager>
CharacterLexicon::CharacterLexicon(Manager *mgr, const std::string &lexicon,
                                   const std::string &tokens, bool debug)
    : impl_(std::make_unique<Impl>(mgr, lexicon, tokens, debug)) {}

std::vector<TokenIDs> CharacterLexicon::ConvertTextToTokenIds(
    const std::string &text, const std::string & /*unused_voice = ""*/) const {
  return impl_->ConvertTextToTokenIds(text);
}

#if __ANDROID_API__ >= 9
template CharacterLexicon::CharacterLexicon(AAssetManager *mgr,
                                            const std::string &lexicon,
                                            const std::string &tokens,
                                            bool debug);
#endif

#if __OHOS__
template CharacterLexicon::CharacterLexicon(NativeResourceManager *mgr,
                                            const std::string &lexicon,
                                            const std::string &tokens,
                                            bool debug);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/character-lexicon.h
================================================
// sherpa-onnx/csrc/character-lexicon.h
//
// Copyright (c)  2022-2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_CHARACTER_LEXICON_H_
#define SHERPA_ONNX_CSRC_CHARACTER_LEXICON_H_

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts-frontend.h"

namespace sherpa_onnx {

class CharacterLexicon : public OfflineTtsFrontend {
 public:
  ~CharacterLexicon() override;

  CharacterLexicon(const std::string &lexicon, const std::string &tokens,
                   bool debug);

  template <typename Manager>
  CharacterLexicon(Manager *mgr, const std::string &lexicon,
                   const std::string &tokens, bool debug);

  std::vector<TokenIDs> ConvertTextToTokenIds(
      const std::string &text,
      const std::string &unused_voice = "") const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_CHARACTER_LEXICON_H_


================================================
FILE: sherpa-onnx/csrc/circular-buffer-test.cc
================================================
// sherpa-onnx/csrc/circular-buffer-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/circular-buffer.h"

#include <vector>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

TEST(CircularBuffer, Push) {
  CircularBuffer buffer(10);
  EXPECT_EQ(buffer.Size(), 0);
  EXPECT_EQ(buffer.Head(), 0);
  EXPECT_EQ(buffer.Tail(), 0);

  std::vector<float> a = {0, 1, 2, 3, 4, 5};
  buffer.Push(a.data(), a.size());

  EXPECT_EQ(buffer.Size(), 6);
  EXPECT_EQ(buffer.Head(), 0);
  EXPECT_EQ(buffer.Tail(), 6);

  auto c = buffer.Get(0, a.size());
  EXPECT_EQ(a.size(), c.size());
  for (int32_t i = 0; i != a.size(); ++i) {
    EXPECT_EQ(a[i], c[i]);
  }

  std::vector<float> d = {-6, -7, -8, -9};
  buffer.Push(d.data(), d.size());

  c = buffer.Get(a.size(), d.size());
  EXPECT_EQ(d.size(), c.size());
  for (int32_t i = 0; i != d.size(); ++i) {
    EXPECT_EQ(d[i], c[i]);
  }
}

TEST(CircularBuffer, PushAndPop) {
  CircularBuffer buffer(5);
  std::vector<float> a = {0, 1, 2, 3};
  buffer.Push(a.data(), a.size());

  EXPECT_EQ(buffer.Size(), 4);
  EXPECT_EQ(buffer.Head(), 0);
  EXPECT_EQ(buffer.Tail(), 4);

  buffer.Pop(2);

  EXPECT_EQ(buffer.Size(), 2);
  EXPECT_EQ(buffer.Head(), 2);
  EXPECT_EQ(buffer.Tail(), 4);

  auto c = buffer.Get(2, 2);
  EXPECT_EQ(c.size(), 2);
  EXPECT_EQ(c[0], 2);
  EXPECT_EQ(c[1], 3);

  a = {10, 20, 30};
  buffer.Push(a.data(), a.size());
  EXPECT_EQ(buffer.Size(), 5);
  EXPECT_EQ(buffer.Head(), 2);
  EXPECT_EQ(buffer.Tail(), 7);

  c = buffer.Get(2, 5);
  EXPECT_EQ(c.size(), 5);
  EXPECT_EQ(c[0], 2);
  EXPECT_EQ(c[1], 3);
  EXPECT_EQ(c[2], 10);
  EXPECT_EQ(c[3], 20);
  EXPECT_EQ(c[4], 30);

  c = buffer.Get(3, 4);
  EXPECT_EQ(c.size(), 4);
  EXPECT_EQ(c[0], 3);
  EXPECT_EQ(c[1], 10);
  EXPECT_EQ(c[2], 20);
  EXPECT_EQ(c[3], 30);

  c = buffer.Get(4, 3);
  EXPECT_EQ(c.size(), 3);
  EXPECT_EQ(c[0], 10);
  EXPECT_EQ(c[1], 20);
  EXPECT_EQ(c[2], 30);

  buffer.Pop(4);
  EXPECT_EQ(buffer.Size(), 1);
  EXPECT_EQ(buffer.Head(), 6);
  EXPECT_EQ(buffer.Tail(), 7);

  c = buffer.Get(6, 1);
  EXPECT_EQ(c.size(), 1);
  EXPECT_EQ(c[0], 30);

  a = {100, 200, 300, 400};
  buffer.Push(a.data(), a.size());
  EXPECT_EQ(buffer.Size(), 5);

  EXPECT_EQ(buffer.Size(), 5);
  EXPECT_EQ(buffer.Head(), 6);
  EXPECT_EQ(buffer.Tail(), 11);

  c = buffer.Get(6, 5);
  EXPECT_EQ(c.size(), 5);
  EXPECT_EQ(c[0], 30);
  EXPECT_EQ(c[1], 100);
  EXPECT_EQ(c[2], 200);
  EXPECT_EQ(c[3], 300);
  EXPECT_EQ(c[4], 400);

  buffer.Pop(3);
  EXPECT_EQ(buffer.Size(), 2);
  EXPECT_EQ(buffer.Head(), 9);
  EXPECT_EQ(buffer.Tail(), 11);

  c = buffer.Get(10, 1);
  EXPECT_EQ(c.size(), 1);
  EXPECT_EQ(c[0], 400);

  a = {1000, 2000, 3000};
  buffer.Push(a.data(), a.size());

  EXPECT_EQ(buffer.Size(), 5);
  EXPECT_EQ(buffer.Head(), 9);
  EXPECT_EQ(buffer.Tail(), 14);

  buffer.Pop(1);

  EXPECT_EQ(buffer.Size(), 4);
  EXPECT_EQ(buffer.Head(), 10);
  EXPECT_EQ(buffer.Tail(), 14);

  a = {4000};

  buffer.Push(a.data(), a.size());
  EXPECT_EQ(buffer.Size(), 5);
  EXPECT_EQ(buffer.Head(), 10);
  EXPECT_EQ(buffer.Tail(), 15);

  c = buffer.Get(13, 2);
  EXPECT_EQ(c.size(), 2);
  EXPECT_EQ(c[0], 3000);
  EXPECT_EQ(c[1], 4000);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/circular-buffer.cc
================================================
// sherpa-onnx/csrc/circular-buffer.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/circular-buffer.h"

#include <algorithm>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

CircularBuffer::CircularBuffer(int32_t capacity) {
  if (capacity <= 0) {
    SHERPA_ONNX_LOGE("Please specify a positive capacity. Given: %d\n",
                     capacity);
    exit(-1);
  }
  buffer_.resize(capacity);
}

void CircularBuffer::Resize(int32_t new_capacity) {
  int32_t capacity = static_cast<int32_t>(buffer_.size());
  if (new_capacity <= capacity) {
#if __OHOS__
    SHERPA_ONNX_LOGE(
        "new_capacity (%{public}d) <= original capacity (%{public}d). Skip it.",
        new_capacity, capacity);
#else
    SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.",
                     new_capacity, capacity);
#endif
    return;
  }

  int32_t size = Size();
  if (size == 0) {
    buffer_.resize(new_capacity);
    return;
  }

  std::vector<float> new_buffer(new_capacity);
  int32_t start = head_ % capacity;
  int32_t dest = head_ % new_capacity;

  if (start + size <= capacity) {
    if (dest + size <= new_capacity) {
      std::copy(buffer_.begin() + start, buffer_.begin() + start + size,
                new_buffer.begin() + dest);
    } else {
      int32_t part1_size = new_capacity - dest;

      // copy [start, start+part1_size] to new_buffer
      std::copy(buffer_.begin() + start, buffer_.begin() + start + part1_size,
                new_buffer.begin() + dest);

      // copy [start+part1_size, start+size] to new_buffer
      std::copy(buffer_.begin() + start + part1_size,
                buffer_.begin() + start + size, new_buffer.begin());
    }
  } else {
    int32_t part1_size = capacity - start;
    int32_t part2_size = size - part1_size;

    // copy [start, start+part1_size] to new_buffer
    if (dest + part1_size <= new_capacity) {
      std::copy(buffer_.begin() + start, buffer_.begin() + start + part1_size,
                new_buffer.begin() + dest);
    } else {
      int32_t first_part = new_capacity - dest;
      std::copy(buffer_.begin() + start, buffer_.begin() + start + first_part,
                new_buffer.begin() + dest);

      std::copy(buffer_.begin() + start + first_part,
                buffer_.begin() + start + part1_size, new_buffer.begin());
    }

    int32_t new_dest = (dest + part1_size) % new_capacity;

    if (new_dest + part2_size <= new_capacity) {
      std::copy(buffer_.begin(), buffer_.begin() + part2_size,
                new_buffer.begin() + new_dest);
    } else {
      int32_t first_part = new_capacity - new_dest;
      std::copy(buffer_.begin(), buffer_.begin() + first_part,
                new_buffer.begin() + new_dest);
      std::copy(buffer_.begin() + first_part, buffer_.begin() + part2_size,
                new_buffer.begin());
    }
  }
  buffer_.swap(new_buffer);
}

void CircularBuffer::Push(const float *p, int32_t n) {
  int32_t capacity = static_cast<int32_t>(buffer_.size());
  int32_t size = Size();
  if (n + size > capacity) {
    int32_t new_capacity = std::max(capacity * 2, n + size);
#if __OHOS__
    SHERPA_ONNX_LOGE(
        "Overflow! n: %{public}d, size: %{public}d, n+size: %{public}d, "
        "capacity: %{public}d. Increase "
        "capacity to: %{public}d. (Original data is copied. No data loss!)",
        n, size, n + size, capacity, new_capacity);
#else
    SHERPA_ONNX_LOGE(
        "Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase "
        "capacity to: %d. (Original data is copied. No data loss!)",
        n, size, n + size, capacity, new_capacity);
#endif
    Resize(new_capacity);

    capacity = new_capacity;
  }

  int32_t start = tail_ % capacity;

  tail_ += n;

  if (start + n < capacity) {
    std::copy(p, p + n, buffer_.begin() + start);
    return;
  }

  int32_t part1_size = capacity - start;

  std::copy(p, p + part1_size, buffer_.begin() + start);

  std::copy(p + part1_size, p + n, buffer_.begin());
}

std::vector<float> CircularBuffer::Get(int32_t start_index, int32_t n) const {
  if (start_index < head_ || start_index >= tail_) {
    SHERPA_ONNX_LOGE("Invalid start_index: %d. head_: %d, tail_: %d",
                     start_index, head_, tail_);
    return {};
  }

  int32_t size = Size();
  if (n < 0 || n > size) {
    SHERPA_ONNX_LOGE("Invalid n: %d. size: %d", n, size);
    return {};
  }

  int32_t capacity = static_cast<int32_t>(buffer_.size());

  if (start_index - head_ + n > size) {
    SHERPA_ONNX_LOGE("Invalid start_index: %d and n: %d. head_: %d, size: %d",
                     start_index, n, head_, size);
    return {};
  }

  int32_t start = start_index % capacity;

  if (start + n < capacity) {
    return {buffer_.begin() + start, buffer_.begin() + start + n};
  }

  std::vector<float> ans(n);

  std::copy(buffer_.begin() + start, buffer_.end(), ans.begin());

  int32_t part1_size = capacity - start;
  int32_t part2_size = n - part1_size;
  std::copy(buffer_.begin(), buffer_.begin() + part2_size,
            ans.begin() + part1_size);

  return ans;
}

void CircularBuffer::Pop(int32_t n) {
  int32_t size = Size();
  if (n < 0 || n > size) {
    SHERPA_ONNX_LOGE("Invalid n: %d. size: %d", n, size);
    return;
  }

  head_ += n;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/circular-buffer.h
================================================
// sherpa-onnx/csrc/circular-buffer.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_CIRCULAR_BUFFER_H_
#define SHERPA_ONNX_CSRC_CIRCULAR_BUFFER_H_

#include <cstdint>
#include <vector>

namespace sherpa_onnx {

class CircularBuffer {
 public:
  // Capacity of this buffer. Should be large enough.
  // If it is full, we just print a message and exit the program.
  explicit CircularBuffer(int32_t capacity);

  // Push an array
  //
  // @param p Pointer to the start address of the array
  // @param n Number of elements in the array
  //
  // Note: If n + Size() > capacity, we print an error message and exit.
  void Push(const float *p, int32_t n);

  // @param start_index Should in the range [head_, tail_)
  // @param n Number of elements to get
  // @return Return a vector of size n containing the requested elements
  std::vector<float> Get(int32_t start_index, int32_t n) const;

  // Remove n elements from the buffer
  //
  // @param n Should be in the range [0, size_]
  void Pop(int32_t n);

  // Number of elements in the buffer.
  int32_t Size() const { return tail_ - head_; }

  // Current position of the head
  int32_t Head() const { return head_; }

  // Current position of the tail
  int32_t Tail() const { return tail_; }

  void Reset() {
    head_ = 0;
    tail_ = 0;
  }

  void Resize(int32_t new_capacity);

 private:
  std::vector<float> buffer_;

  int32_t head_ = 0;  // linear index; always increasing; never wraps around
  int32_t tail_ = 0;  // linear index, always increasing; never wraps around.
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_CIRCULAR_BUFFER_H_


================================================
FILE: sherpa-onnx/csrc/context-graph-test.cc
================================================
// sherpa-onnx/csrc/context-graph-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/context-graph.h"

#include <chrono>
#include <cmath>
#include <map>
#include <random>
#include <string>
#include <utility>
#include <vector>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

static void TestHelper(const std::map<std::string, float> &queries, float score,
                       bool strict_mode) {
  std::vector<std::string> contexts_str(
      {"S", "HE", "SHE", "SHELL", "HIS", "HERS", "HELLO", "THIS", "THEM"});
  std::vector<std::vector<int32_t>> contexts;
  std::vector<float> scores;
  for (int32_t i = 0; i < contexts_str.size(); ++i) {
    contexts.emplace_back(contexts_str[i].begin(), contexts_str[i].end());
    scores.push_back(std::round(score / contexts_str[i].size() * 100) / 100);
  }
  auto context_graph = ContextGraph(contexts, 1, scores);

  for (const auto &iter : queries) {
    float total_scores = 0;
    auto state = context_graph.Root();
    for (auto q : iter.first) {
      auto res = context_graph.ForwardOneStep(state, q, strict_mode);
      total_scores += std::get<0>(res);
      state = std::get<1>(res);
    }
    auto res = context_graph.Finalize(state);
    EXPECT_EQ(res.second->token, -1);
    total_scores += res.first;
    EXPECT_EQ(total_scores, iter.second);
  }
}

TEST(ContextGraph, TestBasic) {
  auto queries = std::map<std::string, float>{
      {"HEHERSHE", 14}, {"HERSHE", 12}, {"HISHE", 9},
      {"SHED", 6},      {"SHELF", 6},   {"HELL", 2},
      {"HELLO", 7},     {"DHRHISQ", 4}, {"THEN", 2}};
  TestHelper(queries, 0, true);
}

TEST(ContextGraph, TestBasicNonStrict) {
  auto queries = std::map<std::string, float>{
      {"HEHERSHE", 7}, {"HERSHE", 5}, {"HISHE", 5},   {"SHED", 3}, {"SHELF", 3},
      {"HELL", 2},     {"HELLO", 2},  {"DHRHISQ", 3}, {"THEN", 2}};
  TestHelper(queries, 0, false);
}

TEST(ContextGraph, TestCustomize) {
  auto queries = std::map<std::string, float>{
      {"HEHERSHE", 35.84}, {"HERSHE", 30.84},  {"HISHE", 24.18},
      {"SHED", 18.34},     {"SHELF", 18.34},   {"HELL", 5},
      {"HELLO", 13},       {"DHRHISQ", 10.84}, {"THEN", 5}};
  TestHelper(queries, 5, true);
}

TEST(ContextGraph, TestCustomizeNonStrict) {
  auto queries = std::map<std::string, float>{
      {"HEHERSHE", 20}, {"HERSHE", 15},    {"HISHE", 10.84},
      {"SHED", 10},     {"SHELF", 10},     {"HELL", 5},
      {"HELLO", 5},     {"DHRHISQ", 5.84}, {"THEN", 5}};
  TestHelper(queries, 5, false);
}

TEST(ContextGraph, Benchmark) {
  std::random_device rd;
  std::mt19937 mt(rd());
  std::uniform_int_distribution<int32_t> char_dist(0, 25);
  std::uniform_int_distribution<int32_t> len_dist(3, 8);
  for (int32_t num = 10; num <= 10000; num *= 10) {
    std::vector<std::vector<int32_t>> contexts;
    for (int32_t i = 0; i < num; ++i) {
      std::vector<int32_t> tmp;
      int32_t word_len = len_dist(mt);
      for (int32_t j = 0; j < word_len; ++j) {
        tmp.push_back(char_dist(mt));
      }
      contexts.push_back(std::move(tmp));
    }
    auto start = std::chrono::high_resolution_clock::now();
    auto context_graph = ContextGraph(contexts, 1);
    auto stop = std::chrono::high_resolution_clock::now();
    auto duration =
        std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
    SHERPA_ONNX_LOGE("Construct context graph for %d item takes %d us.", num,
                     static_cast<int32_t>(duration.count()));
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/context-graph.cc
================================================
// sherpa-onnx/csrc/context-graph.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/context-graph.h"

#include <algorithm>
#include <cassert>
#include <memory>
#include <queue>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {
void ContextGraph::Build(const std::vector<std::vector<int32_t>> &token_ids,
                         const std::vector<float> &scores,
                         const std::vector<std::string> &phrases,
                         const std::vector<float> &ac_thresholds) const {
  if (!scores.empty()) {
    SHERPA_ONNX_CHECK_EQ(token_ids.size(), scores.size());
  }
  if (!phrases.empty()) {
    SHERPA_ONNX_CHECK_EQ(token_ids.size(), phrases.size());
  }
  if (!ac_thresholds.empty()) {
    SHERPA_ONNX_CHECK_EQ(token_ids.size(), ac_thresholds.size());
  }
  for (int32_t i = 0; i < static_cast<int32_t>(token_ids.size()); ++i) {
    auto node = root_.get();
    float score = scores.empty() ? 0.0f : scores[i];
    score = score == 0.0f ? context_score_ : score;
    float ac_threshold = ac_thresholds.empty() ? 0.0f : ac_thresholds[i];
    ac_threshold = ac_threshold == 0.0f ? ac_threshold_ : ac_threshold;
    std::string phrase = phrases.empty() ? std::string() : phrases[i];

    for (int32_t j = 0; j < static_cast<int32_t>(token_ids[i].size()); ++j) {
      int32_t token = token_ids[i][j];
      if (0 == node->next.count(token)) {
        bool is_end = j == (static_cast<int32_t>(token_ids[i].size()) - 1);
        node->next[token] = std::make_unique<ContextState>(
            token, score, node->node_score + score,
            is_end ? node->node_score + score : 0, j + 1,
            is_end ? ac_threshold : 0.0f, is_end,
            is_end ? phrase : std::string());
      } else {
        float token_score = std::max(score, node->next[token]->token_score);
        node->next[token]->token_score = token_score;
        float node_score = node->node_score + token_score;
        node->next[token]->node_score = node_score;
        bool is_end = (j == static_cast<int32_t>(token_ids[i].size()) - 1) ||
                      node->next[token]->is_end;
        node->next[token]->output_score = is_end ? node_score : 0.0f;
        node->next[token]->is_end = is_end;
        if (j == static_cast<int32_t>(token_ids[i].size()) - 1) {
          node->next[token]->phrase = phrase;
          node->next[token]->ac_threshold = ac_threshold;
        }
      }
      node = node->next[token].get();
    }
  }
  FillFailOutput();
}

std::tuple<float, const ContextState *, const ContextState *>
ContextGraph::ForwardOneStep(const ContextState *state, int32_t token,
                             bool strict_mode /*= true*/) const {
  const ContextState *node = nullptr;
  float score = 0;
  if (1 == state->next.count(token)) {
    node = state->next.at(token).get();
    score = node->token_score;
  } else {
    node = state->fail;
    while (0 == node->next.count(token)) {
      node = node->fail;
      if (-1 == node->token) break;  // root
    }
    if (1 == node->next.count(token)) {
      node = node->next.at(token).get();
    }
    score = node->node_score - state->node_score;
  }

  if (!node) {
    SHERPA_ONNX_LOGE("Some bad things happened.");
    exit(-1);
  }

  const ContextState *matched_node =
      node->is_end ? node : (node->output != nullptr ? node->output : nullptr);

  if (!strict_mode && node->output_score != 0) {
    SHERPA_ONNX_CHECK(nullptr != matched_node);
    float output_score =
        node->is_end ? node->node_score
                     : (node->output != nullptr ? node->output->node_score
                                                : node->node_score);
    return std::make_tuple(score + output_score - node->node_score, root_.get(),
                           matched_node);
  }
  return std::make_tuple(score + node->output_score, node, matched_node);
}

std::pair<float, const ContextState *> ContextGraph::Finalize(
    const ContextState *state) const {
  float score = -state->node_score;
  return std::make_pair(score, root_.get());
}

std::pair<bool, const ContextState *> ContextGraph::IsMatched(
    const ContextState *state) const {
  bool status = false;
  const ContextState *node = nullptr;
  if (state->is_end) {
    status = true;
    node = state;
  } else {
    if (state->output != nullptr) {
      status = true;
      node = state->output;
    }
  }
  return std::make_pair(status, node);
}

void ContextGraph::FillFailOutput() const {
  std::queue<const ContextState *> node_queue;
  for (auto &kv : root_->next) {
    kv.second->fail = root_.get();
    node_queue.push(kv.second.get());
  }
  while (!node_queue.empty()) {
    auto current_node = node_queue.front();
    node_queue.pop();
    for (auto &kv : current_node->next) {
      auto fail = current_node->fail;
      if (1 == fail->next.count(kv.first)) {
        fail = fail->next.at(kv.first).get();
      } else {
        fail = fail->fail;
        while (0 == fail->next.count(kv.first)) {
          fail = fail->fail;
          if (-1 == fail->token) break;
        }
        if (1 == fail->next.count(kv.first))
          fail = fail->next.at(kv.first).get();
      }
      kv.second->fail = fail;
      // fill the output arc
      auto output = fail;
      while (!output->is_end) {
        output = output->fail;
        if (-1 == output->token) {
          output = nullptr;
          break;
        }
      }
      kv.second->output = output;
      kv.second->output_score += output == nullptr ? 0 : output->output_score;
      node_queue.push(kv.second.get());
    }
  }
}
}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/context-graph.h
================================================
// sherpa-onnx/csrc/context-graph.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_CONTEXT_GRAPH_H_
#define SHERPA_ONNX_CSRC_CONTEXT_GRAPH_H_

#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/log.h"

namespace sherpa_onnx {

class ContextGraph;
using ContextGraphPtr = std::shared_ptr<ContextGraph>;

struct ContextState {
  int32_t token;
  float token_score;
  float node_score;
  float output_score;
  int32_t level;
  float ac_threshold;
  bool is_end;
  std::string phrase;
  std::unordered_map<int32_t, std::unique_ptr<ContextState>> next;
  const ContextState *fail = nullptr;
  const ContextState *output = nullptr;

  ContextState() = default;
  ContextState(int32_t token, float token_score, float node_score,
               float output_score, int32_t level = 0, float ac_threshold = 0.0f,
               bool is_end = false, const std::string &phrase = {})
      : token(token),
        token_score(token_score),
        node_score(node_score),
        output_score(output_score),
        level(level),
        ac_threshold(ac_threshold),
        is_end(is_end),
        phrase(phrase) {}
};

class ContextGraph {
 public:
  ContextGraph() = default;
  ContextGraph(const std::vector<std::vector<int32_t>> &token_ids,
               float context_score, float ac_threshold,
               const std::vector<float> &scores = {},
               const std::vector<std::string> &phrases = {},
               const std::vector<float> &ac_thresholds = {})
      : context_score_(context_score), ac_threshold_(ac_threshold) {
    root_ = std::make_unique<ContextState>(-1, 0, 0, 0);
    root_->fail = root_.get();
    Build(token_ids, scores, phrases, ac_thresholds);
  }

  ContextGraph(const std::vector<std::vector<int32_t>> &token_ids,
               float context_score, const std::vector<float> &scores = {})
      : ContextGraph(token_ids, context_score, 0.0f, scores,
                     std::vector<std::string>(), std::vector<float>()) {}

  std::tuple<float, const ContextState *, const ContextState *> ForwardOneStep(
      const ContextState *state, int32_t token_id,
      bool strict_mode = true) const;

  std::pair<bool, const ContextState *> IsMatched(
      const ContextState *state) const;

  std::pair<float, const ContextState *> Finalize(
      const ContextState *state) const;

  const ContextState *Root() const { return root_.get(); }

 private:
  float context_score_;
  float ac_threshold_;
  std::unique_ptr<ContextState> root_;
  void Build(const std::vector<std::vector<int32_t>> &token_ids,
             const std::vector<float> &scores,
             const std::vector<std::string> &phrases,
             const std::vector<float> &ac_thresholds) const;
  void FillFailOutput() const;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_CONTEXT_GRAPH_H_


================================================
FILE: sherpa-onnx/csrc/display.h
================================================
// sherpa-onnx/csrc/display.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_DISPLAY_H_
#define SHERPA_ONNX_CSRC_DISPLAY_H_
#include <stdio.h>

#include <string>

namespace sherpa_onnx {

class Display {
 public:
  explicit Display(int32_t max_word_per_line = 60)
      : max_word_per_line_(max_word_per_line) {}

  void Print(int32_t segment_id, const std::string &s) {
#ifdef _MSC_VER
    if (segment_id != -1) {
      fprintf(stderr, "%d:%s\n", segment_id, s.c_str());
    } else {
      fprintf(stderr, "%s\n", s.c_str());
    }
    return;
#endif
    if (last_segment_ == segment_id) {
      Clear();
    } else {
      if (last_segment_ != -1) {
        fprintf(stderr, "\n\r");
      }
      last_segment_ = segment_id;
      num_previous_lines_ = 0;
    }

    if (segment_id != -1) {
      fprintf(stderr, "\r%d:", segment_id);
    }

    int32_t i = 0;
    for (size_t n = 0; n < s.size();) {
      if (s[n] > 0 && s[n] < 0x7f) {
        fprintf(stderr, "%c", s[n]);
        ++n;
      } else {
        // Each Chinese character occupies 3 bytes for UTF-8 encoding.
        std::string tmp(s.begin() + n, s.begin() + n + 3);
        fprintf(stderr, "%s", tmp.data());
        n += 3;
      }

      ++i;
      if (i >= max_word_per_line_ && n + 1 < s.size() &&
          (s[n] == ' ' || s[n] < 0)) {
        fprintf(stderr, "\n\r ");
        ++num_previous_lines_;
        i = 0;
      }
    }
  }

 private:
  // Clear the output for the current segment
  void Clear() {
    ClearCurrentLine();
    while (num_previous_lines_ > 0) {
      GoUpOneLine();
      ClearCurrentLine();
      --num_previous_lines_;
    }
  }

  // Clear the current line
  void ClearCurrentLine() const { fprintf(stderr, "\33[2K\r"); }

  // Move the cursor to the previous line
  void GoUpOneLine() const { fprintf(stderr, "\033[1A\r"); }

 private:
  int32_t max_word_per_line_;
  int32_t num_previous_lines_ = 0;
  int32_t last_segment_ = -1;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_DISPLAY_H_


================================================
FILE: sherpa-onnx/csrc/endpoint.cc
================================================
// sherpa-onnx/csrc/endpoint.cc
//
// Copyright (c)  2022  (authors: Pingfeng Luo)
//                2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/endpoint.h"

#include <string>

#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

static bool RuleActivated(const EndpointRule &rule,
                          const std::string &rule_name, float trailing_silence,
                          float utterance_length) {
  bool contain_nonsilence = utterance_length > trailing_silence;
  bool ans = (contain_nonsilence || !rule.must_contain_nonsilence) &&
             trailing_silence >= rule.min_trailing_silence &&
             utterance_length >= rule.min_utterance_length;
  if (ans) {
    SHERPA_ONNX_LOG(DEBUG) << "Endpointing rule " << rule_name << " activated: "
                           << (contain_nonsilence ? "true" : "false") << ','
                           << trailing_silence << ',' << utterance_length;
  }
  return ans;
}

static void RegisterEndpointRule(ParseOptions *po, EndpointRule *rule,
                                 const std::string &rule_name) {
  po->Register(
      rule_name + "-must-contain-nonsilence", &rule->must_contain_nonsilence,
      "If True, for this endpointing " + rule_name +
          " to apply there must be nonsilence in the best-path traceback. "
          "For decoding, a non-blank token is considered as non-silence");
  po->Register(rule_name + "-min-trailing-silence", &rule->min_trailing_silence,
               "This endpointing " + rule_name +
                   " requires duration of trailing silence in seconds) to "
                   "be >= this value.");
  po->Register(rule_name + "-min-utterance-length", &rule->min_utterance_length,
               "This endpointing " + rule_name +
                   " requires utterance-length (in seconds) to be >= this "
                   "value.");
}

std::string EndpointRule::ToString() const {
  std::ostringstream os;

  os << "EndpointRule(";
  os << "must_contain_nonsilence="
     << (must_contain_nonsilence ? "True" : "False") << ", ";
  os << "min_trailing_silence=" << min_trailing_silence << ", ";
  os << "min_utterance_length=" << min_utterance_length << ")";

  return os.str();
}

void EndpointConfig::Register(ParseOptions *po) {
  RegisterEndpointRule(po, &rule1, "rule1");
  RegisterEndpointRule(po, &rule2, "rule2");
  RegisterEndpointRule(po, &rule3, "rule3");
}

std::string EndpointConfig::ToString() const {
  std::ostringstream os;

  os << "EndpointConfig(";
  os << "rule1=" << rule1.ToString() << ", ";
  os << "rule2=" << rule2.ToString() << ", ";
  os << "rule3=" << rule3.ToString() << ")";

  return os.str();
}

bool Endpoint::IsEndpoint(int32_t num_frames_decoded,
                          int32_t trailing_silence_frames,
                          float frame_shift_in_seconds) const {
  float utterance_length =
      static_cast<float>(num_frames_decoded) * frame_shift_in_seconds;

  float trailing_silence =
      static_cast<float>(trailing_silence_frames) * frame_shift_in_seconds;

  if (RuleActivated(config_.rule1, "rule1", trailing_silence,
                    utterance_length) ||
      RuleActivated(config_.rule2, "rule2", trailing_silence,
                    utterance_length) ||
      RuleActivated(config_.rule3, "rule3", trailing_silence,
                    utterance_length)) {
    return true;
  }
  return false;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/endpoint.h
================================================
// sherpa-onnx/csrc/endpoint.h
//
// Copyright (c)  2022  (authors: Pingfeng Luo)
//                2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ENDPOINT_H_
#define SHERPA_ONNX_CSRC_ENDPOINT_H_

#include <string>
#include <vector>

namespace sherpa_onnx {

struct EndpointRule {
  // If True, for this endpointing rule to apply there must
  // be nonsilence in the best-path traceback.
  // For decoding, a non-blank token is considered as non-silence
  bool must_contain_nonsilence = true;
  // This endpointing rule requires duration of trailing silence
  // (in seconds) to be >= this value.
  float min_trailing_silence = 2.0;
  // This endpointing rule requires utterance-length (in seconds)
  // to be >= this value.
  float min_utterance_length = 0.0f;

  EndpointRule() = default;

  EndpointRule(bool must_contain_nonsilence, float min_trailing_silence,
               float min_utterance_length)
      : must_contain_nonsilence(must_contain_nonsilence),
        min_trailing_silence(min_trailing_silence),
        min_utterance_length(min_utterance_length) {}

  std::string ToString() const;
};

class ParseOptions;

struct EndpointConfig {
  // For default setting,
  // rule1 times out after 2.4 seconds of silence, even if we decoded nothing.
  // rule2 times out after 1.2 seconds of silence after decoding something.
  // rule3 times out after the utterance is 20 seconds long, regardless of
  // anything else.
  EndpointRule rule1;
  EndpointRule rule2;
  EndpointRule rule3;

  void Register(ParseOptions *po);

  EndpointConfig()
      : rule1{false, 2.4, 0}, rule2{true, 1.2, 0}, rule3{false, 0, 20} {}

  EndpointConfig(const EndpointRule &rule1, const EndpointRule &rule2,
                 const EndpointRule &rule3)
      : rule1(rule1), rule2(rule2), rule3(rule3) {}

  std::string ToString() const;
};

class Endpoint {
 public:
  explicit Endpoint(const EndpointConfig &config) : config_(config) {}

  /// This function returns true if this set of endpointing rules thinks we
  /// should terminate decoding.
  bool IsEndpoint(int32_t num_frames_decoded, int32_t trailing_silence_frames,
                  float frame_shift_in_seconds) const;

 private:
  EndpointConfig config_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ENDPOINT_H_


================================================
FILE: sherpa-onnx/csrc/fast-clustering-config.cc
================================================
// sherpa-onnx/csrc/fast-clustering-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/fast-clustering-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {
std::string FastClusteringConfig::ToString() const {
  std::ostringstream os;

  os << "FastClusteringConfig(";
  os << "num_clusters=" << num_clusters << ", ";
  os << "threshold=" << threshold << ")";

  return os.str();
}

void FastClusteringConfig::Register(ParseOptions *po) {
  po->Register(
      "num-clusters", &num_clusters,
      "Number of cluster. If greater than 0, then cluster threshold is "
      "ignored. Please provide it if you know the actual number of "
      "clusters in advance.");

  po->Register("cluster-threshold", &threshold,
               "If num_clusters is not specified, then it specifies the "
               "distance threshold for clustering. smaller value -> more "
               "clusters. larger value -> fewer clusters");
}

bool FastClusteringConfig::Validate() const {
  if (num_clusters < 1 && threshold < 0) {
    SHERPA_ONNX_LOGE("Please provide either num_clusters or threshold");
    return false;
  }

  return true;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/fast-clustering-config.h
================================================
// sherpa-onnx/csrc/fast-clustering-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FAST_CLUSTERING_CONFIG_H_
#define SHERPA_ONNX_CSRC_FAST_CLUSTERING_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct FastClusteringConfig {
  // If greater than 0, then threshold is ignored.
  //
  // We strongly recommend that you set it if you know the number of clusters
  // in advance
  int32_t num_clusters = -1;

  // distance threshold.
  //
  // The smaller, the more clusters it will generate.
  // The larger, the fewer clusters it will generate.
  float threshold = 0.5;

  FastClusteringConfig() = default;

  FastClusteringConfig(int32_t num_clusters, float threshold)
      : num_clusters(num_clusters), threshold(threshold) {}

  std::string ToString() const;

  void Register(ParseOptions *po);
  bool Validate() const;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_FAST_CLUSTERING_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/fast-clustering-test.cc
================================================
// sherpa-onnx/csrc/fast-clustering-test.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/fast-clustering.h"

#include <iostream>
#include <vector>

#include "gtest/gtest.h"

namespace sherpa_onnx {

TEST(FastClustering, TestTwoClusters) {
  std::vector<float> features = {
      // point 0
      0.1,
      0.1,
      // point 2
      0.4,
      -0.5,
      // point 3
      0.6,
      -0.7,
      // point 1
      0.2,
      0.3,
  };

  FastClusteringConfig config;
  config.num_clusters = 2;

  FastClustering clustering(config);
  auto labels = clustering.Cluster(features.data(), 4, 2);
  int32_t k = 0;
  for (auto i : labels) {
    std::cout << "point " << k << ": label " << i << "\n";
    ++k;
  }
}

TEST(FastClustering, TestClusteringWithThreshold) {
  std::vector<float> features = {
      // point 0
      0.1,
      0.1,
      // point 2
      0.4,
      -0.5,
      // point 3
      0.6,
      -0.7,
      // point 1
      0.2,
      0.3,
  };

  FastClusteringConfig config;
  config.threshold = 0.5;

  FastClustering clustering(config);
  auto labels = clustering.Cluster(features.data(), 4, 2);
  int32_t k = 0;
  for (auto i : labels) {
    std::cout << "point " << k << ": label " << i << "\n";
    ++k;
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/fast-clustering.cc
================================================
// sherpa-onnx/csrc/fast-clustering.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/fast-clustering.h"

#include <vector>

#include "Eigen/Dense"
#include "fastcluster-all-in-one.h"  // NOLINT

namespace sherpa_onnx {

class FastClustering::Impl {
 public:
  explicit Impl(const FastClusteringConfig &config) : config_(config) {}

  std::vector<int32_t> Cluster(float *features, int32_t num_rows,
                               int32_t num_cols) const {
    if (num_rows <= 0) {
      return {};
    }

    if (num_rows == 1) {
      return {0};
    }

    Eigen::Map<
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
        m(features, num_rows, num_cols);
    m.rowwise().normalize();

    std::vector<double> distance((num_rows * (num_rows - 1)) / 2);

    int32_t k = 0;
    for (int32_t i = 0; i != num_rows; ++i) {
      auto v = m.row(i);
      for (int32_t j = i + 1; j != num_rows; ++j) {
        double cosine_similarity = v.dot(m.row(j));
        double consine_dissimilarity = 1 - cosine_similarity;

        if (consine_dissimilarity < 0) {
          consine_dissimilarity = 0;
        }

        distance[k] = consine_dissimilarity;
        ++k;
      }
    }

    std::vector<int32_t> merge(2 * (num_rows - 1));
    std::vector<double> height(num_rows - 1);

    fastclustercpp::hclust_fast(num_rows, distance.data(),
                                fastclustercpp::HCLUST_METHOD_COMPLETE,
                                merge.data(), height.data());

    std::vector<int32_t> labels(num_rows);
    if (config_.num_clusters > 0) {
      fastclustercpp::cutree_k(num_rows, merge.data(), config_.num_clusters,
                               labels.data());
    } else {
      fastclustercpp::cutree_cdist(num_rows, merge.data(), height.data(),
                                   config_.threshold, labels.data());
    }

    return labels;
  }

 private:
  FastClusteringConfig config_;
};

FastClustering::FastClustering(const FastClusteringConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

FastClustering::~FastClustering() = default;

std::vector<int32_t> FastClustering::Cluster(float *features, int32_t num_rows,
                                             int32_t num_cols) const {
  return impl_->Cluster(features, num_rows, num_cols);
}
}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/fast-clustering.h
================================================
// sherpa-onnx/csrc/fast-clustering.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FAST_CLUSTERING_H_
#define SHERPA_ONNX_CSRC_FAST_CLUSTERING_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/fast-clustering-config.h"

namespace sherpa_onnx {

class FastClustering {
 public:
  explicit FastClustering(const FastClusteringConfig &config);
  ~FastClustering();

  /**
   * @param features Pointer to a 2-D feature matrix in row major. Each row
   *                 is a feature frame. It is changed in-place. We will
   *                 convert each feature frame to a normalized vector.
   *                 That is, the L2-norm of each vector will be equal to 1.
   *                 It uses cosine dissimilarity,
   *                 which is 1 - (cosine similarity)
   * @param num_rows Number of feature frames
   * @param num-cols The feature dimension.
   *
   * @return Return a vector of size num_rows. ans[i] contains the label
   *         for the i-th feature frame, i.e., the i-th row of the feature
   *         matrix.
   */
  std::vector<int32_t> Cluster(float *features, int32_t num_rows,
                               int32_t num_cols) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_FAST_CLUSTERING_H_


================================================
FILE: sherpa-onnx/csrc/features.cc
================================================
// sherpa-onnx/csrc/features.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/features.h"

#include <algorithm>
#include <memory>
#include <mutex>
#include <sstream>
#include <string>
#include <vector>

#include "kaldi-native-fbank/csrc/online-feature.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

void FeatureExtractorConfig::Register(ParseOptions *po) {
  po->Register("sample-rate", &sampling_rate,
               "Sampling rate of the input waveform. "
               "Note: You can have a different "
               "sample rate for the input waveform. We will do resampling "
               "inside the feature extractor");

  po->Register("feat-dim", &feature_dim,
               "Feature dimension. Must match the one expected by the model. "
               "Not used by whisper and CED models");

  po->Register("low-freq", &low_freq, "Low cutoff frequency for mel bins");

  po->Register("high-freq", &high_freq,
               "High cutoff frequency for mel bins "
               "(if <= 0, offset from Nyquist)");

  po->Register("dither", &dither,
               "Dithering constant (0.0 means no dither). "
               "By default the audio samples are in range [-1,+1], "
               "so 0.00003 is a good value, "
               "equivalent to the default 1.0 from kaldi");
}

std::string FeatureExtractorConfig::ToString() const {
  std::ostringstream os;

  os << "FeatureExtractorConfig(";
  os << "sampling_rate=" << sampling_rate << ", ";
  os << "feature_dim=" << feature_dim << ", ";
  os << "low_freq=" << low_freq << ", ";
  os << "high_freq=" << high_freq << ", ";
  os << "dither=" << dither << ", ";
  os << "normalize_samples=" << (normalize_samples ? "True" : "False") << ", ";
  os << "snip_edges=" << (snip_edges ? "True" : "False") << ")";

  return os.str();
}

class FeatureExtractor::Impl {
 public:
  explicit Impl(const FeatureExtractorConfig &config) : config_(config) {
    if (config_.is_mfcc) {
      InitMfcc();
    } else if (config_.is_whisper) {
      InitWhisper();
    } else if (config_.is_t_one) {
      InitRawAudioSamples();
    } else {
      InitFbank();
    }
  }

  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
    if (config_.normalize_samples) {
      AcceptWaveformImpl(sampling_rate, waveform, n);
    } else {
      std::vector<float> buf(n);
      for (int32_t i = 0; i != n; ++i) {
        buf[i] = waveform[i] * 32768;
      }
      AcceptWaveformImpl(sampling_rate, buf.data(), n);
    }
  }

  void AcceptWaveformImpl(int32_t sampling_rate, const float *waveform,
                          int32_t n) {
    std::lock_guard<std::mutex> lock(mutex_);

    if (resampler_) {
      if (sampling_rate != resampler_->GetInputSamplingRate()) {
        SHERPA_ONNX_LOGE(
            "You changed the input sampling rate!! Expected: %d, given: "
            "%d",
            resampler_->GetInputSamplingRate(), sampling_rate);
        exit(-1);
      }

      std::vector<float> samples;
      resampler_->Resample(waveform, n, false, &samples);

      AcceptWaveformWrapper(config_.sampling_rate, samples.data(),
                            samples.size());
      return;
    }

    if (sampling_rate != config_.sampling_rate) {
      SHERPA_ONNX_LOGE(
          "Creating a resampler:\n"
          "   in_sample_rate: %d\n"
          "   output_sample_rate: %d\n",
          sampling_rate, static_cast<int32_t>(config_.sampling_rate));

      float min_freq = std::min<int32_t>(sampling_rate, config_.sampling_rate);
      float lowpass_cutoff = 0.99 * 0.5 * min_freq;

      int32_t lowpass_filter_width = 6;
      resampler_ = std::make_unique<LinearResample>(
          sampling_rate, config_.sampling_rate, lowpass_cutoff,
          lowpass_filter_width);

      std::vector<float> samples;
      resampler_->Resample(waveform, n, false, &samples);

      AcceptWaveformWrapper(config_.sampling_rate, samples.data(),
                            samples.size());

      return;
    }

    AcceptWaveformWrapper(sampling_rate, waveform, n);
  }

  void InputFinished() const {
    std::lock_guard<std::mutex> lock(mutex_);
    if (fbank_) {
      fbank_->InputFinished();
      return;
    } else if (whisper_fbank_) {
      whisper_fbank_->InputFinished();
      return;
    } else if (raw_audio_) {
      raw_audio_->InputFinished();
      return;
    } else if (mfcc_) {
      mfcc_->InputFinished();
      return;
    }

    SHERPA_ONNX_LOGE("unreachable code");
    SHERPA_ONNX_EXIT(-1);
  }

  int32_t NumFramesReady() const {
    if (fbank_) {
      return fbank_->NumFramesReady();
    } else if (whisper_fbank_) {
      return whisper_fbank_->NumFramesReady();
    } else if (raw_audio_) {
      return raw_audio_->NumFramesReady();
    } else if (mfcc_) {
      return mfcc_->NumFramesReady();
    }
    SHERPA_ONNX_LOGE("unreachable code");
    SHERPA_ONNX_EXIT(-1);
    return -1;
  }

  bool IsLastFrame(int32_t frame) const {
    std::lock_guard<std::mutex> lock(mutex_);
    if (fbank_) {
      return fbank_->IsLastFrame(frame);
    } else if (whisper_fbank_) {
      return whisper_fbank_->IsLastFrame(frame);
    } else if (raw_audio_) {
      return raw_audio_->IsLastFrame(frame);
    } else if (mfcc_) {
      return mfcc_->IsLastFrame(frame);
    }

    SHERPA_ONNX_LOGE("unreachable code");
    SHERPA_ONNX_EXIT(-1);
    return false;
  }

  std::vector<float> GetFrames(int32_t frame_index, int32_t n) {
    std::lock_guard<std::mutex> lock(mutex_);
    if (frame_index + n > NumFramesReady()) {
      SHERPA_ONNX_LOGE("%d + %d > %d\n", frame_index, n, NumFramesReady());
      SHERPA_ONNX_EXIT(-1);
    }

    int32_t discard_num = frame_index - last_frame_index_;
    if (discard_num < 0) {
      SHERPA_ONNX_LOGE("last_frame_index_: %d, frame_index_: %d",
                       last_frame_index_, frame_index);
      SHERPA_ONNX_EXIT(-1);
    }

    PopWrapper(discard_num);

    int32_t feature_dim = FeatureDim();
    std::vector<float> features(feature_dim * n);

    float *p = features.data();

    for (int32_t i = 0; i != n; ++i) {
      const float *f = GetFrameWrapper(i + frame_index);
      std::copy(f, f + feature_dim, p);
      p += feature_dim;
    }

    last_frame_index_ = frame_index;

    return features;
  }

  int32_t FeatureDim() const {
    if (fbank_ || whisper_fbank_) {
      return opts_.mel_opts.num_bins;
    } else if (mfcc_) {
      return mfcc_opts_.num_ceps;
    } else if (raw_audio_) {
      return raw_audio_->Dim();
    }

    SHERPA_ONNX_LOGE("unreachable code");
    SHERPA_ONNX_EXIT(-1);
    return -1;
  }

 private:
  void AcceptWaveformWrapper(float sampling_rate, const float *waveform,
                             int32_t n) const {
    if (fbank_) {
      fbank_->AcceptWaveform(sampling_rate, waveform, n);
      return;
    } else if (whisper_fbank_) {
      whisper_fbank_->AcceptWaveform(sampling_rate, waveform, n);
      return;
    } else if (raw_audio_) {
      raw_audio_->AcceptWaveform(sampling_rate, waveform, n);
      return;
    } else if (mfcc_) {
      mfcc_->AcceptWaveform(sampling_rate, waveform, n);
      return;
    }

    SHERPA_ONNX_LOGE("unreachable code");
    SHERPA_ONNX_EXIT(-1);
  }

  const float *GetFrameWrapper(int32_t frame_index) const {
    if (fbank_) {
      return fbank_->GetFrame(frame_index);
    } else if (whisper_fbank_) {
      return whisper_fbank_->GetFrame(frame_index);
    } else if (raw_audio_) {
      return raw_audio_->GetFrame(frame_index);
    } else if (mfcc_) {
      return mfcc_->GetFrame(frame_index);
    }

    SHERPA_ONNX_LOGE("unreachable code");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
  }

  void PopWrapper(int32_t discard_num) const {
    if (fbank_) {
      fbank_->Pop(discard_num);
      return;
    } else if (whisper_fbank_) {
      whisper_fbank_->Pop(discard_num);
      return;
    } else if (raw_audio_) {
      raw_audio_->Pop(discard_num);
      return;
    } else if (mfcc_) {
      mfcc_->Pop(discard_num);
      return;
    }

    SHERPA_ONNX_LOGE("unreachable code");
    SHERPA_ONNX_EXIT(-1);
  }

  void InitFbank() {
    opts_.frame_opts.dither = config_.dither;
    opts_.frame_opts.snip_edges = config_.snip_edges;
    opts_.frame_opts.samp_freq = config_.sampling_rate;
    opts_.frame_opts.frame_shift_ms = config_.frame_shift_ms;
    opts_.frame_opts.frame_length_ms = config_.frame_length_ms;
    opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
    opts_.frame_opts.preemph_coeff = config_.preemph_coeff;
    opts_.frame_opts.window_type = config_.window_type;
    opts_.frame_opts.round_to_power_of_two = config_.round_to_power_of_two;

    opts_.mel_opts.num_bins = config_.feature_dim;

    opts_.mel_opts.high_freq = config_.high_freq;
    opts_.mel_opts.low_freq = config_.low_freq;

    opts_.mel_opts.is_librosa = config_.is_librosa;

    fbank_ = std::make_unique<knf::OnlineFbank>(opts_);
  }

  void InitMfcc() {
    mfcc_opts_.frame_opts.dither = config_.dither;
    mfcc_opts_.frame_opts.snip_edges = config_.snip_edges;
    mfcc_opts_.frame_opts.samp_freq = config_.sampling_rate;
    mfcc_opts_.frame_opts.frame_shift_ms = config_.frame_shift_ms;
    mfcc_opts_.frame_opts.frame_length_ms = config_.frame_length_ms;
    mfcc_opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
    mfcc_opts_.frame_opts.preemph_coeff = config_.preemph_coeff;
    mfcc_opts_.frame_opts.window_type = config_.window_type;
    mfcc_opts_.frame_opts.round_to_power_of_two = config_.round_to_power_of_two;

    mfcc_opts_.mel_opts.num_bins = config_.feature_dim;

    mfcc_opts_.mel_opts.high_freq = config_.high_freq;
    mfcc_opts_.mel_opts.low_freq = config_.low_freq;

    mfcc_opts_.mel_opts.is_librosa = config_.is_librosa;

    mfcc_opts_.num_ceps = config_.num_ceps;
    mfcc_opts_.use_energy = config_.use_energy;

    mfcc_ = std::make_unique<knf::OnlineMfcc>(mfcc_opts_);
  }

  void InitWhisper() {
    config_.normalize_samples = true;
    opts_.frame_opts.samp_freq = 16000;
    opts_.mel_opts.num_bins = config_.feature_dim;

    knf::WhisperFeatureOptions whisper_opts;
    whisper_opts.frame_opts = opts_.frame_opts;
    whisper_opts.dim = config_.feature_dim;

    whisper_fbank_ = std::make_unique<knf::OnlineWhisperFbank>(whisper_opts);
    config_.sampling_rate = opts_.frame_opts.samp_freq;
  }

  void InitRawAudioSamples() {
    opts_raw_audio_.frame_opts.samp_freq = config_.sampling_rate;
    opts_raw_audio_.frame_opts.frame_length_ms = config_.frame_length_ms;
    opts_raw_audio_.frame_opts.frame_shift_ms = config_.frame_shift_ms;

    raw_audio_ = std::make_unique<knf::OnlineRawAudioSamples>(opts_raw_audio_);
  }

 private:
  std::unique_ptr<knf::OnlineFbank> fbank_;
  std::unique_ptr<knf::OnlineMfcc> mfcc_;
  std::unique_ptr<knf::OnlineWhisperFbank> whisper_fbank_;
  std::unique_ptr<knf::OnlineRawAudioSamples> raw_audio_;
  knf::FbankOptions opts_;
  knf::RawAudioSamplesOptions opts_raw_audio_;
  knf::MfccOptions mfcc_opts_;
  FeatureExtractorConfig config_;
  mutable std::mutex mutex_;
  std::unique_ptr<LinearResample> resampler_;
  int32_t last_frame_index_ = 0;
};

FeatureExtractor::FeatureExtractor(const FeatureExtractorConfig &config /*={}*/)
    : impl_(std::make_unique<Impl>(config)) {}

FeatureExtractor::~FeatureExtractor() = default;

void FeatureExtractor::AcceptWaveform(int32_t sampling_rate,
                                      const float *waveform, int32_t n) const {
  impl_->AcceptWaveform(sampling_rate, waveform, n);
}

void FeatureExtractor::InputFinished() const { impl_->InputFinished(); }

int32_t FeatureExtractor::NumFramesReady() const {
  return impl_->NumFramesReady();
}

bool FeatureExtractor::IsLastFrame(int32_t frame) const {
  return impl_->IsLastFrame(frame);
}

std::vector<float> FeatureExtractor::GetFrames(int32_t frame_index,
                                               int32_t n) const {
  return impl_->GetFrames(frame_index, n);
}

int32_t FeatureExtractor::FeatureDim() const { return impl_->FeatureDim(); }

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/features.h
================================================
// sherpa-onnx/csrc/features.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FEATURES_H_
#define SHERPA_ONNX_CSRC_FEATURES_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct FeatureExtractorConfig {
  // Sampling rate used by the feature extractor. If it is different from
  // the sampling rate of the input waveform, we will do resampling inside.
  int32_t sampling_rate = 16000;

  // num_mel_bins
  //
  // Note: for mfcc, this value is also for num_mel_bins.
  // The actual feature dimension is num_ceps
  int32_t feature_dim = 80;

  // minimal frequency for Mel-filterbank, in Hz
  float low_freq = 20.0f;

  // maximal frequency of Mel-filterbank
  // in Hz; negative value is subtracted from Nyquist freq.:
  // i.e. for sampling_rate 16000 / 2 - 400 = 7600Hz
  //
  // Please see
  // https://github.com/lhotse-speech/lhotse/blob/master/lhotse/features/fbank.py#L27
  // and
  // https://github.com/k2-fsa/sherpa-onnx/issues/514
  float high_freq = -400.0f;

  // dithering constant, useful for signals with hard-zeroes in non-speech parts
  // this prevents large negative values in log-mel filterbanks
  //
  // In k2, audio samples are in range [-1..+1], in kaldi the range was
  // [-32k..+32k], so the value 0.00003 is equivalent to kaldi default 1.0
  //
  float dither = 0.0f;  // dithering disabled by default

  // Set internally by some models, e.g., paraformer sets it to false.
  // This parameter is not exposed to users from the commandline
  // If true, the feature extractor expects inputs to be normalized to
  // the range [-1, 1].
  // If false, we will multiply the inputs by 32768
  bool normalize_samples = true;

  bool snip_edges = false;
  float frame_shift_ms = 10.0f;   // in milliseconds.
  float frame_length_ms = 25.0f;  // in milliseconds.
  bool is_librosa = false;
  bool remove_dc_offset = true;       // Subtract mean of wave before FFT.
  float preemph_coeff = 0.97f;        // Preemphasis coefficient.
  std::string window_type = "povey";  // e.g. Hamming window

  // For models from NeMo
  // This option is not exposed and is set internally when loading models.
  // Possible values:
  // - per_feature
  // - all_features (not implemented yet)
  // - fixed_mean (not implemented)
  // - fixed_std (not implemented)
  // - or just leave it to empty
  // See
  // https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/parts/preprocessing/features.py#L59
  // for details
  std::string nemo_normalize_type;

  // for MFCC
  int32_t num_ceps = 13;
  bool use_energy = true;

  bool is_mfcc = false;

  bool is_whisper = false;

  bool is_t_one = false;

  bool round_to_power_of_two = true;

  std::string ToString() const;

  void Register(ParseOptions *po);
};

class FeatureExtractor {
 public:
  explicit FeatureExtractor(const FeatureExtractorConfig &config = {});
  ~FeatureExtractor();

  /**
     @param sampling_rate The sampling_rate of the input waveform. If it does
                          not equal to  config.sampling_rate, we will do
                          resampling inside.
     @param waveform Pointer to a 1-D array of size n. It must be normalized to
                     the range [-1, 1].
     @param n Number of entries in waveform
   */
  void AcceptWaveform(int32_t sampling_rate, const float *waveform,
                      int32_t n) const;

  /**
   * InputFinished() tells the class you won't be providing any
   * more waveform.  This will help flush out the last frame or two
   * of features, in the case where snip-edges == false; it also
   * affects the return value of IsLastFrame().
   */
  void InputFinished() const;

  int32_t NumFramesReady() const;

  /** Note: IsLastFrame() will only ever return true if you have called
   * InputFinished() (and this frame is the last frame).
   */
  bool IsLastFrame(int32_t frame) const;

  /** Get n frames starting from the given frame index.
   *
   * @param frame_index  The starting frame index
   * @param n  Number of frames to get.
   * @return Return a 2-D tensor of shape (n, feature_dim).
   *         which is flattened into a 1-D vector (flattened in row major)
   */
  std::vector<float> GetFrames(int32_t frame_index, int32_t n) const;

  /// Return feature dim of this extractor
  int32_t FeatureDim() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_FEATURES_H_


================================================
FILE: sherpa-onnx/csrc/file-utils.cc
================================================
// sherpa-onnx/csrc/file-utils.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/file-utils.h"

#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

#ifdef _WIN32
#include <windows.h>
#else
#include <limits.h>
#include <stdlib.h>
#endif

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

bool FileExists(const std::string &filename) {
  return std::ifstream(filename).good();
}

void AssertFileExists(const std::string &filename) {
  if (!FileExists(filename)) {
    SHERPA_ONNX_LOGE("filename '%s' does not exist", filename.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
}

std::vector<char> ReadFile(const std::string &filename) {
  std::ifstream file(filename, std::ios::binary | std::ios::ate);
  if (!file.is_open()) {
    return {};
  }

  std::streamsize size = file.tellg();
  file.seekg(0, std::ios::beg);

  std::vector<char> buffer(size);
  if (!file.read(buffer.data(), size)) {
    return {};
  }

  return buffer;
}

#if __ANDROID_API__ >= 9
std::vector<char> ReadFile(AAssetManager *mgr, const std::string &filename) {
  if (!filename.empty() && filename[0] == '/') {
    SHERPA_ONNX_LOGE(
        "You are using an absolute path '%s', but assetManager is NOT set to "
        "null.",
        filename.c_str());

    SHERPA_ONNX_LOGE(
        "Please set assetManager to null when you load model files from the SD "
        "card");

    SHERPA_ONNX_LOGE(
        "See also https://github.com/k2-fsa/sherpa-onnx/issues/2562");
  }

  AAsset *asset = AAssetManager_open(mgr, filename.c_str(), AASSET_MODE_BUFFER);
  if (!asset) {
    __android_log_print(ANDROID_LOG_FATAL, "sherpa-onnx",
                        "Read binary file: Load '%s' failed", filename.c_str());
    exit(-1);
  }

  auto p = reinterpret_cast<const char *>(AAsset_getBuffer(asset));
  size_t asset_length = AAsset_getLength(asset);

  std::vector<char> buffer(p, p + asset_length);
  AAsset_close(asset);

  return buffer;
}
#endif

#if __OHOS__
std::vector<char> ReadFile(NativeResourceManager *mgr,
                           const std::string &filename) {
  std::unique_ptr<RawFile, decltype(&OH_ResourceManager_CloseRawFile)> fp(
      OH_ResourceManager_OpenRawFile(mgr, filename.c_str()),
      OH_ResourceManager_CloseRawFile);

  if (!fp) {
    std::ostringstream os;
    os << "Read file '" << filename << "' failed.";
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
    return {};
  }

  auto len = static_cast<int32_t>(OH_ResourceManager_GetRawFileSize(fp.get()));

  std::vector<char> buffer(len);

  int32_t n = OH_ResourceManager_ReadRawFile(fp.get(), buffer.data(), len);

  if (n != len) {
    std::ostringstream os;
    os << "Read file '" << filename << "' failed. Number of bytes read: " << n
       << ". Expected bytes to read: " << len;
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
    return {};
  }

  return buffer;
}
#endif

std::string ResolveAbsolutePath(const std::string &path) {
  if (path.empty()) {
    return path;
  }

#ifdef _WIN32
  // Check if path is already absolute (drive letter or UNC path)
  if ((path.size() > 1 && path[1] == ':') ||
      (path.size() > 1 && path[0] == '\\' && path[1] == '\\')) {
    return path;
  }

  char buffer[MAX_PATH];
  if (GetFullPathNameA(path.c_str(), MAX_PATH, buffer, nullptr)) {
    return std::string(buffer);
  }

  return path;  // fallback on failure

#else
  // POSIX: absolute paths start with '/'
  if (path[0] == '/') {
    return path;
  }

  char buffer[PATH_MAX];
  if (realpath(path.c_str(), buffer)) {
    return std::string(buffer);
  }

  return path;  // fallback on failure
#endif
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/file-utils.h
================================================
// sherpa-onnx/csrc/file-utils.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FILE_UTILS_H_
#define SHERPA_ONNX_CSRC_FILE_UTILS_H_

#include <fstream>
#include <string>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

namespace sherpa_onnx {

/** Check whether a given path is a file or not
 *
 * @param filename Path to check.
 * @return Return true if the given path is a file; return false otherwise.
 */
bool FileExists(const std::string &filename);

/** Abort if the file does not exist.
 *
 * @param filename The file to check.
 */
void AssertFileExists(const std::string &filename);

std::vector<char> ReadFile(const std::string &filename);

#if __ANDROID_API__ >= 9
std::vector<char> ReadFile(AAssetManager *mgr, const std::string &filename);
#endif

#if __OHOS__
std::vector<char> ReadFile(NativeResourceManager *mgr,
                           const std::string &filename);
#endif

std::string ResolveAbsolutePath(const std::string &path);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_FILE_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/fst-utils.cc
================================================
// sherpa-onnx/csrc/fst-utils.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/fst-utils.h"

#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

// This function is copied from kaldi.
//
// @param filename Path to a StdVectorFst or StdConstFst graph
// @return The caller should free the returned pointer using `delete` to
//         avoid memory leak.
fst::Fst<fst::StdArc> *ReadGraph(const std::string &filename) {
  // read decoding network FST
  std::ifstream is(filename, std::ios::binary);
  if (!is.good()) {
    SHERPA_ONNX_LOGE("Could not open decoding-graph FST %s", filename.c_str());
  }

  fst::FstHeader hdr;
  if (!hdr.Read(is, "<unknown>")) {
    SHERPA_ONNX_LOGE("Reading FST: error reading FST header.");
  }

  if (hdr.ArcType() != fst::StdArc::Type()) {
    SHERPA_ONNX_LOGE("FST with arc type %s not supported",
                     hdr.ArcType().c_str());
  }
  fst::FstReadOptions ropts("<unspecified>", &hdr);

  fst::Fst<fst::StdArc> *decode_fst = nullptr;

  if (hdr.FstType() == "vector") {
    decode_fst = fst::VectorFst<fst::StdArc>::Read(is, ropts);
  } else if (hdr.FstType() == "const") {
    decode_fst = fst::ConstFst<fst::StdArc>::Read(is, ropts);
  } else {
    SHERPA_ONNX_LOGE("Reading FST: unsupported FST type: %s",
                     hdr.FstType().c_str());
  }

  if (decode_fst == nullptr) {  // fst code will warn.
    SHERPA_ONNX_LOGE("Error reading FST (after reading header).");
    return nullptr;
  } else {
    return decode_fst;
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/fst-utils.h
================================================
// sherpa-onnx/csrc/fst-utils.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_FST_UTILS_H_
#define SHERPA_ONNX_CSRC_FST_UTILS_H_

#include <string>

#include "fst/fstlib.h"

namespace sherpa_onnx {

fst::Fst<fst::StdArc> *ReadGraph(const std::string &filename);

}

#endif  // SHERPA_ONNX_CSRC_FST_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/funasr-nano-tokenizer.cc
================================================
// sherpa-onnx/csrc/funasr-nano-tokenizer.cc
//
// Copyright (c)  2025  zengyw

#include "sherpa-onnx/csrc/funasr-nano-tokenizer.h"

#include <algorithm>
#include <cctype>
#include <cstdint>
#include <cstring>
#include <limits>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

namespace {

static std::string FindTokenizerJson(const std::string &tokenizer_dir) {
  std::string p = tokenizer_dir + "/tokenizer.json";
  if (FileExists(p)) return p;
  return "";
}

static std::string FindVocabJson(const std::string &tokenizer_dir) {
  std::string p = tokenizer_dir + "/vocab.json";
  if (FileExists(p)) return p;
  return "";
}

static std::string FindMergesTxt(const std::string &tokenizer_dir) {
  std::string p = tokenizer_dir + "/merges.txt";
  if (FileExists(p)) return p;
  return "";
}

static std::string LoadBytesFromFile(const std::string &path) {
  std::vector<char> data = ReadFile(path);
  if (data.empty()) return "";
  return std::string(data.data(), data.size());
}

#if __ANDROID_API__ >= 9
static std::string LoadBytesFromFile(AAssetManager *mgr,
                                     const std::string &path) {
  std::vector<char> data = ReadFile(mgr, path);
  if (data.empty()) return "";
  return std::string(data.data(), data.size());
}
#endif

#if __OHOS__
static std::string LoadBytesFromFile(NativeResourceManager *mgr,
                                     const std::string &path) {
  std::vector<char> data = ReadFile(mgr, path);
  if (data.empty()) return "";
  return std::string(data.data(), data.size());
}
#endif

static inline void TrimInPlace(std::string *s) {
  if (!s) return;
  auto &x = *s;
  size_t b = x.find_first_not_of(" \t\r\n");
  if (b == std::string::npos) {
    x.clear();
    return;
  }
  size_t e = x.find_last_not_of(" \t\r\n");
  x = x.substr(b, e - b + 1);
}

static inline void AppendUtf8(uint32_t cp, std::string *out) {
  if (!out) return;
  if (cp <= 0x7Fu) {
    out->push_back(static_cast<char>(cp));
  } else if (cp <= 0x7FFu) {
    out->push_back(static_cast<char>(0xC0u | ((cp >> 6) & 0x1Fu)));
    out->push_back(static_cast<char>(0x80u | (cp & 0x3Fu)));
  } else if (cp <= 0xFFFFu) {
    out->push_back(static_cast<char>(0xE0u | ((cp >> 12) & 0x0Fu)));
    out->push_back(static_cast<char>(0x80u | ((cp >> 6) & 0x3Fu)));
    out->push_back(static_cast<char>(0x80u | (cp & 0x3Fu)));
  } else {
    out->push_back(static_cast<char>(0xF0u | ((cp >> 18) & 0x07u)));
    out->push_back(static_cast<char>(0x80u | ((cp >> 12) & 0x3Fu)));
    out->push_back(static_cast<char>(0x80u | ((cp >> 6) & 0x3Fu)));
    out->push_back(static_cast<char>(0x80u | (cp & 0x3Fu)));
  }
}

static inline bool Utf8Next(const std::string &s, size_t *i, uint32_t *cp,
                            size_t *nbytes) {
  if (!i || !cp || !nbytes) return false;
  if (*i >= s.size()) return false;
  const unsigned char c = static_cast<unsigned char>(s[*i]);
  if (c < 0x80) {
    *cp = c;
    *nbytes = 1;
    return true;
  }
  if ((c >> 5) == 0x6) {  // 110xxxxx
    if (*i + 1 >= s.size()) return false;
    const unsigned char c1 = static_cast<unsigned char>(s[*i + 1]);
    if ((c1 >> 6) != 0x2) return false;
    *cp = ((c & 0x1F) << 6) | (c1 & 0x3F);
    *nbytes = 2;
    return true;
  }
  if ((c >> 4) == 0xE) {  // 1110xxxx
    if (*i + 2 >= s.size()) return false;
    const unsigned char c1 = static_cast<unsigned char>(s[*i + 1]);
    const unsigned char c2 = static_cast<unsigned char>(s[*i + 2]);
    if ((c1 >> 6) != 0x2 || (c2 >> 6) != 0x2) return false;
    *cp = ((c & 0x0F) << 12) | ((c1 & 0x3F) << 6) | (c2 & 0x3F);
    *nbytes = 3;
    return true;
  }
  if ((c >> 3) == 0x1E) {  // 11110xxx
    if (*i + 3 >= s.size()) return false;
    const unsigned char c1 = static_cast<unsigned char>(s[*i + 1]);
    const unsigned char c2 = static_cast<unsigned char>(s[*i + 2]);
    const unsigned char c3 = static_cast<unsigned char>(s[*i + 3]);
    if ((c1 >> 6) != 0x2 || (c2 >> 6) != 0x2 || (c3 >> 6) != 0x2) return false;
    *cp = ((c & 0x07) << 18) | ((c1 & 0x3F) << 12) | ((c2 & 0x3F) << 6) |
          (c3 & 0x3F);
    *nbytes = 4;
    return true;
  }
  return false;
}

enum class Utf8ConsumeStatus {
  kOk = 0,
  kIncomplete = 1,
  kInvalid = 2,
};

struct Utf8ConsumeResult {
  std::string prefix;
  Utf8ConsumeStatus status;
};

static Utf8ConsumeResult ConsumeValidUtf8Prefix(std::string *pending) {
  Utf8ConsumeResult r;
  if (!pending || pending->empty()) {
    r.status = Utf8ConsumeStatus::kOk;
    return r;
  }

  const auto is_cont = [](uint8_t b) -> bool { return (b & 0xC0u) == 0x80u; };

  const std::string &s = *pending;
  const size_t n = s.size();

  size_t i = 0;
  size_t last_good = 0;

  while (i < n) {
    uint8_t b0 = static_cast<uint8_t>(s[i]);

    if (b0 < 0x80u) {
      ++i;
      last_good = i;
      continue;
    }

    size_t need = 0;

    if (b0 >= 0xC2u && b0 <= 0xDFu) {
      need = 2;
      if (i + need > n) {
        r.status = Utf8ConsumeStatus::kIncomplete;
        break;
      }
      uint8_t b1 = static_cast<uint8_t>(s[i + 1]);
      if (!is_cont(b1)) {
        r.status = Utf8ConsumeStatus::kInvalid;
        break;
      }
      i += need;
      last_good = i;
      continue;
    }

    if (b0 >= 0xE0u && b0 <= 0xEFu) {
      need = 3;
      if (i + need > n) {
        r.status = Utf8ConsumeStatus::kIncomplete;
        break;
      }
      uint8_t b1 = static_cast<uint8_t>(s[i + 1]);
      uint8_t b2 = static_cast<uint8_t>(s[i + 2]);
      if (!is_cont(b1) || !is_cont(b2)) {
        r.status = Utf8ConsumeStatus::kInvalid;
        break;
      }

      if (b0 == 0xE0u && b1 < 0xA0u) {
        r.status = Utf8ConsumeStatus::kInvalid;
        break;
      }
      if (b0 == 0xEDu && b1 > 0x9Fu) {
        r.status = Utf8ConsumeStatus::kInvalid;
        break;
      }

      i += need;
      last_good = i;
      continue;
    }

    if (b0 >= 0xF0u && b0 <= 0xF4u) {
      need = 4;
      if (i + need > n) {
        r.status = Utf8ConsumeStatus::kIncomplete;
        break;
      }
      uint8_t b1 = static_cast<uint8_t>(s[i + 1]);
      uint8_t b2 = static_cast<uint8_t>(s[i + 2]);
      uint8_t b3 = static_cast<uint8_t>(s[i + 3]);
      if (!is_cont(b1) || !is_cont(b2) || !is_cont(b3)) {
        r.status = Utf8ConsumeStatus::kInvalid;
        break;
      }

      if (b0 == 0xF0u && b1 < 0x90u) {
        r.status = Utf8ConsumeStatus::kInvalid;
        break;
      }
      if (b0 == 0xF4u && b1 > 0x8Fu) {
        r.status = Utf8ConsumeStatus::kInvalid;
        break;
      }

      i += need;
      last_good = i;
      continue;
    }

    r.status = Utf8ConsumeStatus::kInvalid;
    break;
  }

  if (i == n) {
    r.status = Utf8ConsumeStatus::kOk;
    last_good = n;
  }

  if (last_good > 0) {
    r.prefix = pending->substr(0, last_good);
    pending->erase(0, last_good);
  } else {
    r.prefix.clear();
  }

  return r;
}

static inline void ByteLevelDecodeTokenToBytes(
    const std::string &token,
    const std::unordered_map<std::string, uint8_t> &unicode_to_byte,
    std::string *out_bytes) {
  if (!out_bytes) return;

  size_t i = 0;
  while (i < token.size()) {
    size_t t = i;
    uint32_t cp = 0;
    size_t n = 0;
    if (!Utf8Next(token, &t, &cp, &n) || n == 0) {
      out_bytes->push_back(token[i]);
      i += 1;
      continue;
    }
    std::string ch = token.substr(i, n);
    auto it = unicode_to_byte.find(ch);
    if (it != unicode_to_byte.end()) {
      out_bytes->push_back(static_cast<char>(it->second));
    } else {
      out_bytes->append(ch);
    }
    i += n;
  }
}

static inline bool IsNewline(uint32_t cp) { return cp == '\n' || cp == '\r'; }

static inline bool IsAsciiSpace(uint32_t cp) { return cp == ' '; }

static inline bool IsWhitespace(uint32_t cp) {
  return cp == ' ' || cp == '\t' || cp == '\n' || cp == '\r' || cp == '\v' ||
         cp == '\f';
}

static inline bool IsAsciiAlpha(uint32_t cp) {
  return (cp >= 'a' && cp <= 'z') || (cp >= 'A' && cp <= 'Z');
}

static inline bool IsAsciiDigit(uint32_t cp) {
  return (cp >= '0' && cp <= '9');
}

// A light-weight unicode letter/number approximation good enough for
// Qwen3(English/Chinese/Japanese/Korean + common scripts).
static inline bool IsLetter(uint32_t cp) {
  if (IsAsciiAlpha(cp)) return true;

  // CJK Unified Ideographs
  if (cp >= 0x4E00 && cp <= 0x9FFF) return true;
  // CJK Extension A
  if (cp >= 0x3400 && cp <= 0x4DBF) return true;
  // Hiragana/Katakana
  if (cp >= 0x3040 && cp <= 0x30FF) return true;
  // Hangul syllables
  if (cp >= 0xAC00 && cp <= 0xD7AF) return true;
  // Hangul Jamo
  if (cp >= 0x1100 && cp <= 0x11FF) return true;

  // Latin-1 Supplement + Latin Extended (covers most European letters)
  if (cp >= 0x00C0 && cp <= 0x02AF) return true;

  return false;
}

static inline bool IsNumber(uint32_t cp) {
  if (IsAsciiDigit(cp)) return true;
  // Fullwidth digits
  if (cp >= 0xFF10 && cp <= 0xFF19) return true;
  return false;
}

class JsonReader {
 public:
  explicit JsonReader(const std::string &s) : s_(s), p_(0) {}

  bool SeekToKey(const std::string &key) {
    std::string needle = "\"" + key + "\"";
    size_t pos = s_.find(needle);
    if (pos == std::string::npos) return false;
    p_ = pos + needle.size();
    return true;
  }

  void SkipWs() {
    while (p_ < s_.size()) {
      char c = s_[p_];
      if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
        ++p_;
      } else {
        break;
      }
    }
  }

  bool Consume(char c) {
    SkipWs();
    if (p_ < s_.size() && s_[p_] == c) {
      ++p_;
      return true;
    }
    return false;
  }

  bool Peek(char *c) const {
    if (!c) return false;
    size_t q = p_;
    while (q < s_.size()) {
      char x = s_[q];
      if (x == ' ' || x == '\t' || x == '\r' || x == '\n') {
        ++q;
        continue;
      }
      *c = x;
      return true;
    }
    return false;
  }

  bool ParseString(std::string *out) {
    if (!out) return false;
    SkipWs();
    if (p_ >= s_.size() || s_[p_] != '"') return false;
    ++p_;
    std::string r;
    while (p_ < s_.size()) {
      char c = s_[p_++];
      if (c == '"') {
        *out = std::move(r);
        return true;
      }
      if (c != '\\') {
        r.push_back(c);
        continue;
      }
      if (p_ >= s_.size()) return false;
      char esc = s_[p_++];
      switch (esc) {
        case '"':
          r.push_back('"');
          break;
        case '\\':
          r.push_back('\\');
          break;
        case '/':
          r.push_back('/');
          break;
        case 'b':
          r.push_back('\b');
          break;
        case 'f':
          r.push_back('\f');
          break;
        case 'n':
          r.push_back('\n');
          break;
        case 'r':
          r.push_back('\r');
          break;
        case 't':
          r.push_back('\t');
          break;
        case 'u': {
          if (p_ + 4 > s_.size()) return false;
          uint32_t u = 0;
          for (int i = 0; i < 4; ++i) {
            char h = s_[p_++];
            u <<= 4;
            if (h >= '0' && h <= '9')
              u |= (h - '0');
            else if (h >= 'a' && h <= 'f')
              u |= (h - 'a' + 10);
            else if (h >= 'A' && h <= 'F')
              u |= (h - 'A' + 10);
            else
              return false;
          }
          if (u >= 0xD800 && u <= 0xDBFF) {
            size_t save = p_;
            if (p_ + 6 <= s_.size() && s_[p_] == '\\' && s_[p_ + 1] == 'u') {
              p_ += 2;
              uint32_t v = 0;
              for (int i = 0; i < 4; ++i) {
                char h = s_[p_++];
                v <<= 4;
                if (h >= '0' && h <= '9')
                  v |= (h - '0');
                else if (h >= 'a' && h <= 'f')
                  v |= (h - 'a' + 10);
                else if (h >= 'A' && h <= 'F')
                  v |= (h - 'A' + 10);
                else
                  return false;
              }
              if (v >= 0xDC00 && v <= 0xDFFF) {
                uint32_t cp = 0x10000 + (((u - 0xD800) << 10) | (v - 0xDC00));
                AppendUtf8(cp, &r);
                break;
              }
            }
            p_ = save;
          }
          AppendUtf8(u, &r);
          break;
        }
        default:
          return false;
      }
    }
    return false;
  }

  bool ParseBool(bool *out) {
    if (!out) return false;
    SkipWs();
    if (p_ + 4 <= s_.size() && s_.compare(p_, 4, "true") == 0) {
      p_ += 4;
      *out = true;
      return true;
    }
    if (p_ + 5 <= s_.size() && s_.compare(p_, 5, "false") == 0) {
      p_ += 5;
      *out = false;
      return true;
    }
    return false;
  }

  bool ParseInt64(int64_t *out) {
    if (!out) return false;
    SkipWs();
    if (p_ >= s_.size()) return false;
    bool neg = false;
    if (s_[p_] == '-') {
      neg = true;
      ++p_;
    }
    if (p_ >= s_.size() || !std::isdigit(static_cast<unsigned char>(s_[p_]))) {
      return false;
    }
    int64_t v = 0;
    while (p_ < s_.size() && std::isdigit(static_cast<unsigned char>(s_[p_]))) {
      int d = s_[p_] - '0';
      if (v > (std::numeric_limits<int64_t>::max() - d) / 10) return false;
      v = v * 10 + d;
      ++p_;
    }
    *out = neg ? -v : v;
    return true;
  }

  bool SkipValue() {
    SkipWs();
    if (p_ >= s_.size()) return false;
    char c = s_[p_];
    if (c == '"') {
      std::string tmp;
      return ParseString(&tmp);
    }
    if (c == '{') return SkipObject();
    if (c == '[') return SkipArray();
    if (c == 't' || c == 'f') {
      bool b = false;
      return ParseBool(&b);
    }
    if (c == 'n') {
      if (p_ + 4 <= s_.size() && s_.compare(p_, 4, "null") == 0) {
        p_ += 4;
        return true;
      }
      return false;
    }
    int64_t v = 0;
    return ParseInt64(&v);
  }

 private:
  bool SkipObject() {
    if (!Consume('{')) return false;
    SkipWs();
    if (Consume('}')) return true;
    while (true) {
      std::string k;
      if (!ParseString(&k)) return false;
      if (!Consume(':')) return false;
      if (!SkipValue()) return false;
      SkipWs();
      if (Consume('}')) return true;
      if (!Consume(',')) return false;
    }
  }

  bool SkipArray() {
    if (!Consume('[')) return false;
    SkipWs();
    if (Consume(']')) return true;
    while (true) {
      if (!SkipValue()) return false;
      SkipWs();
      if (Consume(']')) return true;
      if (!Consume(',')) return false;
    }
  }

 private:
  const std::string &s_;
  size_t p_;
};

namespace {
static inline int64_t TokenToIdOrDefault(
    const std::unordered_map<std::string, int32_t> &vocab,
    const std::string &tok, int64_t def_val) {
  auto it = vocab.find(tok);
  if (it == vocab.end()) return def_val;
  return static_cast<int64_t>(it->second);
}
}  // namespace

// Build bytes_to_unicode mapping (ByteLevel encoder/decoder).
static void BuildBytesToUnicode(
    std::string byte_to_unicode[256],
    std::unordered_map<std::string, uint8_t> *unicode_to_byte) {
  std::vector<uint32_t> bs;
  bs.reserve(256);
  for (uint32_t c = 33; c <= 126; ++c) bs.push_back(c);
  for (uint32_t c = 161; c <= 172; ++c) bs.push_back(c);
  for (uint32_t c = 174; c <= 255; ++c) bs.push_back(c);

  std::vector<uint32_t> cs = bs;
  cs.reserve(256);
  uint32_t n = 0;
  auto contains = [&](uint32_t b) -> bool {
    return std::find(bs.begin(), bs.end(), b) != bs.end();
  };
  for (uint32_t b = 0; b <= 255; ++b) {
    if (!contains(b)) {
      bs.push_back(b);
      cs.push_back(256 + n);
      ++n;
    }
  }

  if (unicode_to_byte) unicode_to_byte->clear();
  for (size_t i = 0; i < bs.size(); ++i) {
    uint32_t b = bs[i];
    uint32_t c = cs[i];
    std::string u;
    AppendUtf8(c, &u);
    byte_to_unicode[b] = u;
    if (unicode_to_byte) {
      (*unicode_to_byte)[u] = static_cast<uint8_t>(b);
    }
  }
}

// Parse vocab.json: {"token": id, ...}
static bool ParseVocabJson(const std::string &blob,
                           std::unordered_map<std::string, int32_t> *out) {
  if (!out) return false;
  out->clear();
  JsonReader r(blob);
  r.SkipWs();
  if (!r.Consume('{')) return false;
  r.SkipWs();
  if (r.Consume('}')) return true;

  while (true) {
    std::string key;
    if (!r.ParseString(&key)) return false;
    if (!r.Consume(':')) return false;
    int64_t id64 = 0;
    if (!r.ParseInt64(&id64)) return false;
    if (id64 < 0 || id64 > std::numeric_limits<int32_t>::max()) return false;
    (*out)[key] = static_cast<int32_t>(id64);

    r.SkipWs();
    if (r.Consume('}')) return true;
    if (!r.Consume(',')) return false;
  }
}

// Parse merges.txt: each non-comment line: "left right"
static bool ParseMergesTxt(const std::string &blob,
                           std::unordered_map<std::string, int32_t> *out) {
  if (!out) return false;
  out->clear();
  std::istringstream is(blob);
  std::string line;
  int32_t rank = 0;
  while (std::getline(is, line)) {
    if (line.empty()) continue;
    if (line.rfind("#version", 0) == 0) continue;
    std::string left, right;
    {
      std::istringstream ls(line);
      if (!(ls >> left >> right)) continue;
    }
    std::string key = left;
    key.push_back('\t');
    key.append(right);
    (*out)[key] = rank++;
  }
  return true;
}

static inline bool IsWordChar(uint32_t cp) {
  return IsLetter(cp) || IsNumber(cp) || cp == '_';
}

// A manual approximation for Qwen3 tokenizer Split regex.
// The regex is in tokenizer.json pre_tokenizer Split. We avoid std::regex
// due to missing \p{L}/\p{N} support in libc++/libstdc++ regex.
static std::vector<std::string> SplitByQwen3Pattern(const std::string &text) {
  std::vector<std::string> out;
  out.reserve(text.size() / 2 + 1);

  size_t i = 0;
  while (i < text.size()) {
    if (text[i] == '\'') {
      auto lower = [](char c) -> char {
        return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
      };
      if (i + 1 < text.size()) {
        char c1 = lower(text[i + 1]);
        if (c1 == 's' || c1 == 't' || c1 == 'm' || c1 == 'd') {
          out.push_back(text.substr(i, 2));
          i += 2;
          continue;
        }
        if (i + 2 < text.size()) {
          char c2 = lower(text[i + 2]);
          if (c1 == 'r' && c2 == 'e') {
            out.push_back(text.substr(i, 3));
            i += 3;
            continue;
          }
          if (c1 == 'v' && c2 == 'e') {
            out.push_back(text.substr(i, 3));
            i += 3;
            continue;
          }
          if (c1 == 'l' && c2 == 'l') {
            out.push_back(text.substr(i, 3));
            i += 3;
            continue;
          }
        }
      }
    }

    size_t cur = i;
    uint32_t cp = 0;
    size_t n = 0;
    if (!Utf8Next(text, &cur, &cp, &n) || n == 0) {
      out.push_back(text.substr(i, 1));
      i += 1;
      continue;
    }

    auto peek_next_cp = [&](size_t pos, uint32_t *cp2, size_t *n2) -> bool {
      size_t t = pos;
      uint32_t x = 0;
      size_t nn = 0;
      if (!Utf8Next(text, &t, &x, &nn)) return false;
      if (cp2) *cp2 = x;
      if (n2) *n2 = nn;
      return true;
    };

    {
      uint32_t next_cp = 0;
      size_t next_n = 0;
      bool has_next = peek_next_cp(i + n, &next_cp, &next_n);

      bool cur_ok_prefix = (!IsNewline(cp) && !IsLetter(cp) && !IsNumber(cp));
      bool cur_is_letter = IsLetter(cp);

      if (cur_is_letter || (cur_ok_prefix && has_next && IsLetter(next_cp))) {
        size_t start = i;
        size_t j = i;
        if (!cur_is_letter) {
          j += n;
          while (j < text.size()) {
            size_t t = j;
            uint32_t cpl = 0;
            size_t nl = 0;
            if (!Utf8Next(text, &t, &cpl, &nl)) break;
            if (!IsLetter(cpl)) break;
            j += nl;
          }
        } else {
          j = i;
          while (j < text.size()) {
            size_t t = j;
            uint32_t cpl = 0;
            size_t nl = 0;
            if (!Utf8Next(text, &t, &cpl, &nl)) break;
            if (!IsLetter(cpl)) break;
            j += nl;
          }
        }
        out.push_back(text.substr(start, j - start));
        i = j;
        continue;
      }
    }

    if (IsNumber(cp)) {
      out.push_back(text.substr(i, n));
      i += n;
      continue;
    }

    {
      bool starts_with_space_prefix = IsAsciiSpace(cp);
      size_t start = i;
      size_t j = i;

      auto is_punct_like = [&](uint32_t x) -> bool {
        return (!IsWhitespace(x) && !IsLetter(x) && !IsNumber(x));
      };

      if (starts_with_space_prefix) {
        uint32_t next_cp = 0;
        size_t next_n = 0;
        if (peek_next_cp(i + n, &next_cp, &next_n) && is_punct_like(next_cp)) {
          j += n;
          while (j < text.size()) {
            size_t t = j;
            uint32_t cx = 0;
            size_t nx = 0;
            if (!Utf8Next(text, &t, &cx, &nx)) break;
            if (!is_punct_like(cx)) break;
            j += nx;
          }
          while (j < text.size()) {
            size_t t = j;
            uint32_t cx = 0;
            size_t nx = 0;
            if (!Utf8Next(text, &t, &cx, &nx)) break;
            if (!IsNewline(cx)) break;
            j += nx;
          }
          out.push_back(text.substr(start, j - start));
          i = j;
          continue;
        }
      } else if (is_punct_like(cp)) {
        while (j < text.size()) {
          size_t t = j;
          uint32_t cx = 0;
          size_t nx = 0;
          if (!Utf8Next(text, &t, &cx, &nx)) break;
          if (!is_punct_like(cx)) break;
          j += nx;
        }
        while (j < text.size()) {
          size_t t = j;
          uint32_t cx = 0;
          size_t nx = 0;
          if (!Utf8Next(text, &t, &cx, &nx)) break;
          if (!IsNewline(cx)) break;
          j += nx;
        }
        out.push_back(text.substr(start, j - start));
        i = j;
        continue;
      }
    }

    {
      if (IsWhitespace(cp)) {
        size_t start = i;
        size_t j = i;

        bool saw_newline = false;
        while (j < text.size()) {
          size_t t = j;
          uint32_t cx = 0;
          size_t nx = 0;
          if (!Utf8Next(text, &t, &cx, &nx)) break;
          if (IsNewline(cx)) {
            saw_newline = true;
            break;
          }
          if (!IsWhitespace(cx)) break;
          j += nx;
        }

        if (saw_newline) {
          while (j < text.size()) {
            size_t t = j;
            uint32_t cx = 0;
            size_t nx = 0;
            if (!Utf8Next(text, &t, &cx, &nx)) break;
            if (!IsNewline(cx)) break;
            j += nx;
          }
          out.push_back(text.substr(start, j - start));
          i = j;
          continue;
        }
      }
    }

    if (IsWhitespace(cp)) {
      bool only_ws_to_end = true;
      size_t j = i;
      while (j < text.size()) {
        size_t t = j;
        uint32_t cx = 0;
        size_t nx = 0;
        if (!Utf8Next(text, &t, &cx, &nx)) break;
        if (!IsWhitespace(cx)) {
          only_ws_to_end = false;
          break;
        }
        j += nx;
      }
      if (only_ws_to_end) {
        out.push_back(text.substr(i));
        break;
      }
    }

    if (IsWhitespace(cp)) {
      size_t start = i;
      size_t j = i;
      while (j < text.size()) {
        size_t t = j;
        uint32_t cx = 0;
        size_t nx = 0;
        if (!Utf8Next(text, &t, &cx, &nx)) break;
        if (!IsWhitespace(cx)) break;
        j += nx;
      }
      out.push_back(text.substr(start, j - start));
      i = j;
      continue;
    }

    out.push_back(text.substr(i, n));
    i += n;
  }

  return out;
}

static std::vector<std::string> SplitUtf8ToChars(const std::string &s) {
  std::vector<std::string> out;
  out.reserve(s.size());
  size_t i = 0;
  while (i < s.size()) {
    size_t t = i;
    uint32_t cp = 0;
    size_t n = 0;
    if (!Utf8Next(s, &t, &cp, &n) || n == 0) {
      out.push_back(s.substr(i, 1));
      i += 1;
      continue;
    }
    out.push_back(s.substr(i, n));
    i += n;
  }
  return out;
}

static inline std::string MakeMergeKey(const std::string &a,
                                       const std::string &b) {
  std::string k = a;
  k.push_back('\t');
  k.append(b);
  return k;
}

}  // namespace

// Parse tokenizer.json added_tokens: extract objects with {id, content, ...}
static bool ParseAddedTokensFromTokenizerJson(
    const std::string &blob,
    std::vector<FunASRNanoTokenizer::AddedToken> *out) {
  if (!out) return false;
  out->clear();

  JsonReader r(blob);
  if (!r.SeekToKey("added_tokens")) return true;
  if (!r.Consume(':')) return false;
  if (!r.Consume('[')) return false;

  r.SkipWs();
  if (r.Consume(']')) return true;

  while (true) {
    if (!r.Consume('{')) return false;
    FunASRNanoTokenizer::AddedToken t;

    r.SkipWs();
    if (!r.Consume('}')) {
      while (true) {
        std::string k;
        if (!r.ParseString(&k)) return false;
        if (!r.Consume(':')) return false;

        if (k == "id") {
          int64_t v = 0;
          if (!r.ParseInt64(&v)) return false;
          t.id = static_cast<int32_t>(v);
        } else if (k == "content") {
          if (!r.ParseString(&t.content)) return false;
        } else if (k == "single_word") {
          if (!r.ParseBool(&t.single_word)) return false;
        } else if (k == "lstrip") {
          if (!r.ParseBool(&t.lstrip)) return false;
        } else if (k == "rstrip") {
          if (!r.ParseBool(&t.rstrip)) return false;
        } else if (k == "normalized") {
          if (!r.ParseBool(&t.normalized)) return false;
        } else if (k == "special") {
          if (!r.ParseBool(&t.special)) return false;
        } else {
          if (!r.SkipValue()) return false;
        }

        r.SkipWs();
        if (r.Consume('}')) break;
        if (!r.Consume(',')) return false;
      }
    }

    if (t.id >= 0 && !t.content.empty()) {
      out->push_back(std::move(t));
    }

    r.SkipWs();
    if (r.Consume(']')) return true;
    if (!r.Consume(',')) return false;
  }
}

// Build trie for AddedTokens longest match (byte-wise).
void BuildAddedTokensTrie(
    const std::vector<FunASRNanoTokenizer::AddedToken> &tokens,
    std::vector<FunASRNanoTokenizer::TrieNode> *trie) {
  if (!trie) return;
  trie->clear();
  trie->push_back(FunASRNanoTokenizer::TrieNode{});
  for (int32_t i = 0; i < static_cast<int32_t>(tokens.size()); ++i) {
    const auto &tok = tokens[i];
    int32_t node = 0;
    for (uint8_t b :
         std::vector<uint8_t>(tok.content.begin(), tok.content.end())) {
      auto it = (*trie)[node].next.find(b);
      if (it == (*trie)[node].next.end()) {
        int32_t new_node = static_cast<int32_t>(trie->size());
        trie->push_back(FunASRNanoTokenizer::TrieNode{});
        (*trie)[node].next.emplace(b, new_node);
        node = new_node;
      } else {
        node = it->second;
      }
    }
    (*trie)[node].token_index = i;
  }
}

static void MergeVocabAndAddedTokens(
    std::unordered_map<std::string, int32_t> *vocab,
    const std::vector<FunASRNanoTokenizer::AddedToken> &added,
    std::unordered_set<std::string> *added_contents) {
  if (!vocab) return;
  if (added_contents) added_contents->clear();

  int32_t overwritten = 0;
  for (const auto &t : added) {
    if (t.id < 0 || t.content.empty()) continue;
    if (added_contents) added_contents->insert(t.content);

    auto it = vocab->find(t.content);
    if (it != vocab->end() && it->second != t.id) {
      ++overwritten;
    }
    (*vocab)[t.content] = t.id;
  }

  if (overwritten > 0) {
    SHERPA_ONNX_LOGE(
        "AddedTokens overwrote %d vocab entries with different ids. "
        "This is expected for some tokenizers; keeping added-token ids.",
        overwritten);
  }
}

void BuildIdToToken(const std::unordered_map<std::string, int32_t> &vocab,
                    const std::unordered_set<std::string> &added_contents,
                    std::vector<std::string> *id2token) {
  if (!id2token) return;
  int32_t max_id = -1;
  for (const auto &kv : vocab) {
    max_id = std::max(max_id, kv.second);
  }
  if (max_id < 0) {
    id2token->clear();
    return;
  }
  id2token->assign(static_cast<size_t>(max_id) + 1, std::string{});

  int32_t dup = 0;
  for (const auto &kv : vocab) {
    const std::string &tok = kv.first;
    int32_t id = kv.second;
    if (id < 0) continue;
    std::string &slot = (*id2token)[static_cast<size_t>(id)];
    if (slot.empty()) {
      slot = tok;
      continue;
    }
    if (slot == tok) continue;

    bool slot_is_added = added_contents.count(slot) > 0;
    bool tok_is_added = added_contents.count(tok) > 0;
    if (!slot_is_added && tok_is_added) {
      slot = tok;
    }
    ++dup;
  }

  if (dup > 0) {
    SHERPA_ONNX_LOGE(
        "Detected %d duplicated id->token collisions while building id2token. "
        "Kept added_tokens' string when possible.",
        dup);
  }
}

// Try to match an AddedToken at byte-position `pos`.
// Returns (matched_len_bytes, token_index) or (0, -1) if no match.
std::pair<int32_t, int32_t> MatchAddedToken(
    const std::string &text, size_t pos,
    const std::vector<FunASRNanoTokenizer::TrieNode> &trie) {
  if (trie.empty()) return {0, -1};
  int32_t node = 0;
  int32_t best_idx = -1;
  int32_t best_len = 0;

  size_t i = pos;
  while (i < text.size()) {
    uint8_t b = static_cast<uint8_t>(text[i]);
    auto it = trie[node].next.find(b);
    if (it == trie[node].next.end()) break;
    node = it->second;
    ++i;
    if (trie[node].token_index >= 0) {
      best_idx = trie[node].token_index;
      best_len = static_cast<int32_t>(i - pos);
    }
  }
  return {best_len, best_idx};
}

FunASRNanoTokenizer::FunASRNanoTokenizer(const std::string &tokenizer_dir) {
  Init(tokenizer_dir);
}

#if __ANDROID_API__ >= 9
FunASRNanoTokenizer::FunASRNanoTokenizer(AAssetManager *mgr,
                                         const std::string &tokenizer_dir) {
  Init(mgr, tokenizer_dir);
}
#endif

#if __OHOS__
FunASRNanoTokenizer::FunASRNanoTokenizer(NativeResourceManager *mgr,
                                         const std::string &tokenizer_dir) {
  Init(mgr, tokenizer_dir);
}
#endif

void FunASRNanoTokenizer::Init(const std::string &tokenizer_dir) {
  std::string tok_json = FindTokenizerJson(tokenizer_dir);
  if (tok_json.empty()) {
    SHERPA_ONNX_LOGE("Cannot find tokenizer.json in: %s",
                     tokenizer_dir.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  std::string vocab_json = FindVocabJson(tokenizer_dir);
  if (vocab_json.empty()) {
    SHERPA_ONNX_LOGE("Cannot find vocab.json in: %s", tokenizer_dir.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  std::string merges_txt = FindMergesTxt(tokenizer_dir);
  if (merges_txt.empty()) {
    SHERPA_ONNX_LOGE("Cannot find merges.txt in: %s", tokenizer_dir.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  const std::string tok_blob = LoadBytesFromFile(tok_json);
  const std::string vocab_blob = LoadBytesFromFile(vocab_json);
  const std::string merges_blob = LoadBytesFromFile(merges_txt);

  if (tok_blob.empty() || vocab_blob.empty() || merges_blob.empty()) {
    SHERPA_ONNX_LOGE("Failed to read tokenizer files from: %s",
                     tokenizer_dir.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  // Build ByteLevel bytes_to_unicode mapping
  BuildBytesToUnicode(byte_to_unicode_, &unicode_to_byte_);

  if (!ParseVocabJson(vocab_blob, &token2id_)) {
    SHERPA_ONNX_LOGE("Failed to parse vocab.json: %s", vocab_json.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  if (!ParseMergesTxt(merges_blob, &merges_rank_)) {
    SHERPA_ONNX_LOGE("Failed to parse merges.txt: %s", merges_txt.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  if (!ParseAddedTokensFromTokenizerJson(tok_blob, &added_tokens_)) {
    SHERPA_ONNX_LOGE("Failed to parse added_tokens from tokenizer.json: %s",
                     tok_json.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  MergeVocabAndAddedTokens(&token2id_, added_tokens_, &added_token_contents_);

  BuildIdToToken(token2id_, added_token_contents_, &id2token_);

  BuildAddedTokensTrie(added_tokens_, &trie_);

  FinalizeSpecialIds();
}

#if __ANDROID_API__ >= 9
void FunASRNanoTokenizer::Init(AAssetManager *mgr,
                               const std::string &tokenizer_dir) {
  std::string tok_json = tokenizer_dir + "/tokenizer.json";
  std::string vocab_json = tokenizer_dir + "/vocab.json";
  std::string merges_txt = tokenizer_dir + "/merges.txt";

  const std::string tok_blob = LoadBytesFromFile(mgr, tok_json);
  const std::string vocab_blob = LoadBytesFromFile(mgr, vocab_json);
  const std::string merges_blob = LoadBytesFromFile(mgr, merges_txt);

  if (tok_blob.empty() || vocab_blob.empty() || merges_blob.empty()) {
    SHERPA_ONNX_LOGE("Failed to read tokenizer files from assets: %s",
                     tokenizer_dir.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  BuildBytesToUnicode(byte_to_unicode_, &unicode_to_byte_);

  if (!ParseVocabJson(vocab_blob, &token2id_)) {
    SHERPA_ONNX_LOGE("Failed to parse vocab.json from assets: %s",
                     vocab_json.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  if (!ParseMergesTxt(merges_blob, &merges_rank_)) {
    SHERPA_ONNX_LOGE("Failed to parse merges.txt from assets: %s",
                     merges_txt.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  if (!ParseAddedTokensFromTokenizerJson(tok_blob, &added_tokens_)) {
    SHERPA_ONNX_LOGE("Failed to parse added_tokens from assets tokenizer.json");
    SHERPA_ONNX_EXIT(-1);
  }
  MergeVocabAndAddedTokens(&token2id_, added_tokens_, &added_token_contents_);
  BuildIdToToken(token2id_, added_token_contents_, &id2token_);
  BuildAddedTokensTrie(added_tokens_, &trie_);
  FinalizeSpecialIds();
}
#endif

#if __OHOS__
void FunASRNanoTokenizer::Init(NativeResourceManager *mgr,
                               const std::string &tokenizer_dir) {
  std::string tok_json = tokenizer_dir + "/tokenizer.json";
  std::string vocab_json = tokenizer_dir + "/vocab.json";
  std::string merges_txt = tokenizer_dir + "/merges.txt";

  const std::string tok_blob = LoadBytesFromFile(mgr, tok_json);
  const std::string vocab_blob = LoadBytesFromFile(mgr, vocab_json);
  const std::string merges_blob = LoadBytesFromFile(mgr, merges_txt);

  if (tok_blob.empty() || vocab_blob.empty() || merges_blob.empty()) {
    SHERPA_ONNX_LOGE("Failed to read tokenizer files from rawfile: %s",
                     tokenizer_dir.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  BuildBytesToUnicode(byte_to_unicode_, &unicode_to_byte_);

  if (!ParseVocabJson(vocab_blob, &token2id_)) {
    SHERPA_ONNX_LOGE("Failed to parse vocab.json from rawfile: %s",
                     vocab_json.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  if (!ParseMergesTxt(merges_blob, &merges_rank_)) {
    SHERPA_ONNX_LOGE("Failed to parse merges.txt from rawfile: %s",
                     merges_txt.c_str());
    SHERPA_ONNX_EXIT(-1);
  }

  if (!ParseAddedTokensFromTokenizerJson(tok_blob, &added_tokens_)) {
    SHERPA_ONNX_LOGE(
        "Failed to parse added_tokens from rawfile tokenizer.json");
    SHERPA_ONNX_EXIT(-1);
  }
  MergeVocabAndAddedTokens(&token2id_, added_tokens_, &added_token_contents_);
  BuildIdToToken(token2id_, added_token_contents_, &id2token_);
  BuildAddedTokensTrie(added_tokens_, &trie_);
  FinalizeSpecialIds();
}
#endif

void FunASRNanoTokenizer::FinalizeSpecialIds() {
  im_end_token_id_ = TokenToIdOrDefault(token2id_, "<|im_end|>", 151645);
  eos_token_id_ = TokenToIdOrDefault(token2id_, "<|endoftext|>", -1);
  if (eos_token_id_ < 0) eos_token_id_ = im_end_token_id_;

  pad_token_id_ = TokenToIdOrDefault(token2id_, "<|pad|>", -1);
  if (pad_token_id_ < 0) pad_token_id_ = eos_token_id_;

  special_ids_.clear();
  special_ids_.insert(static_cast<int32_t>(eos_token_id_));
  special_ids_.insert(static_cast<int32_t>(im_end_token_id_));
  special_ids_.insert(static_cast<int32_t>(pad_token_id_));

  int64_t im_start = TokenToIdOrDefault(token2id_, "<|im_start|>", -1);
  if (im_start >= 0) special_ids_.insert(static_cast<int32_t>(im_start));
}

static inline bool CheckSingleWordBoundary(const std::string &text, size_t pos,
                                           size_t end) {
  auto prev_is_word = [&]() -> bool {
    if (pos == 0) return false;
    size_t j = pos;
    while (j > 0 && (static_cast<unsigned char>(text[j - 1]) & 0xC0) == 0x80)
      --j;
    if (j == 0) return false;
    size_t t = j - 1;
    while (t > 0 && (static_cast<unsigned char>(text[t]) & 0xC0) == 0x80) --t;
    size_t k = t;
    uint32_t cp = 0;
    size_t nb = 0;
    if (!Utf8Next(text, &k, &cp, &nb)) return false;
    return IsWordChar(cp);
  };

  auto next_is_word = [&]() -> bool {
    if (end >= text.size()) return false;
    size_t k = end;
    uint32_t cp = 0;
    size_t nb = 0;
    if (!Utf8Next(text, &k, &cp, &nb)) return false;
    return IsWordChar(cp);
  };

  return !(prev_is_word() || next_is_word());
}

// ByteLevel encode: map each byte to unicode char (bytes_to_unicode).
static inline std::string ByteLevelEncode(
    const std::string &token, const std::string byte_to_unicode[256]) {
  std::string out;
  out.reserve(token.size() * 2);
  for (unsigned char b : token) {
    out.append(byte_to_unicode[b]);
  }
  return out;
}

// BPE encode (with cache): bytelevel_word to merged token strings.
static std::vector<std::string> BpeEncodeWithCache(
    const std::string &word,
    const std::unordered_map<std::string, int32_t> &merges_rank,
    std::unordered_map<std::string, std::vector<std::string>> *cache) {
  if (!cache) return {};
  auto it = cache->find(word);
  if (it != cache->end()) return it->second;

  std::vector<std::string> symbols = SplitUtf8ToChars(word);
  if (symbols.empty()) {
    (*cache)[word] = {};
    return {};
  }
  if (symbols.size() == 1) {
    (*cache)[word] = symbols;
    return symbols;
  }

  while (symbols.size() > 1) {
    int32_t best_rank = std::numeric_limits<int32_t>::max();
    int32_t best_pos = -1;

    for (int32_t i = 0; i + 1 < static_cast<int32_t>(symbols.size()); ++i) {
      std::string key = MakeMergeKey(symbols[i], symbols[i + 1]);
      auto it2 = merges_rank.find(key);
      if (it2 != merges_rank.end()) {
        int32_t r = it2->second;
        if (r < best_rank) {
          best_rank = r;
          best_pos = i;
        }
      }
    }

    if (best_pos < 0) break;

    // Merge best pair
    symbols[best_pos].append(symbols[best_pos + 1]);
    symbols.erase(symbols.begin() + best_pos + 1);
  }

  (*cache)[word] = symbols;
  return symbols;
}

std::vector<int64_t> FunASRNanoTokenizer::Encode(const std::string &text) {
  if (token2id_.empty()) {
    SHERPA_ONNX_LOGE("Tokenizer not initialized");
    SHERPA_ONNX_EXIT(-1);
  }

  std::vector<int64_t> out;
  if (text.empty()) return out;

  size_t pos = 0;
  size_t last = 0;
  while (pos < text.size()) {
    auto m = MatchAddedToken(text, pos, trie_);
    int32_t mlen = m.first;
    int32_t tidx = m.second;

    if (mlen > 0 && tidx >= 0) {
      const auto &tok = added_tokens_[static_cast<size_t>(tidx)];

      if (tok.single_word) {
        if (!CheckSingleWordBoundary(text, pos, pos + mlen)) {
          mlen = 0;
          tidx = -1;
        }
      }
    }

    if (mlen > 0 && tidx >= 0) {
      if (pos > last) {
        std::string seg = text.substr(last, pos - last);
        auto pieces = SplitByQwen3Pattern(seg);
        for (const auto &p : pieces) {
          std::string bl = ByteLevelEncode(p, byte_to_unicode_);
          auto bpe_toks = BpeEncodeWithCache(bl, merges_rank_, &bpe_cache_);
          for (const auto &bt : bpe_toks) {
            auto it = token2id_.find(bt);
            if (it == token2id_.end()) {
              continue;
            }
            out.push_back(static_cast<int64_t>(it->second));
          }
        }
      }

      const auto &atok = added_tokens_[static_cast<size_t>(tidx)];
      out.push_back(static_cast<int64_t>(atok.id));

      pos += static_cast<size_t>(mlen);
      last = pos;
      continue;
    }

    ++pos;
  }

  if (last < text.size()) {
    std::string seg = text.substr(last);
    auto pieces = SplitByQwen3Pattern(seg);
    for (const auto &p : pieces) {
      std::string bl = ByteLevelEncode(p, byte_to_unicode_);
      auto bpe_toks = BpeEncodeWithCache(bl, merges_rank_, &bpe_cache_);
      for (const auto &bt : bpe_toks) {
        auto it = token2id_.find(bt);
        if (it == token2id_.end()) continue;
        out.push_back(static_cast<int64_t>(it->second));
      }
    }
  }

  return out;
}

std::string FunASRNanoTokenizer::GetTokenStringStreaming(
    int64_t token_id, std::string *pending_bytes) const {
  if (!pending_bytes) return "";

  if (id2token_.empty()) {
    SHERPA_ONNX_LOGE("Tokenizer not initialized");
    SHERPA_ONNX_EXIT(-1);
  }

  int32_t id = static_cast<int32_t>(token_id);
  if (id < 0 || static_cast<size_t>(id) >= id2token_.size()) return "";

  if (!special_ids_.empty() && special_ids_.count(id)) return "";

  const std::string &token = id2token_[static_cast<size_t>(id)];
  if (token.empty()) return "";

  ByteLevelDecodeTokenToBytes(token, unicode_to_byte_, pending_bytes);

  std::string out;

  while (!pending_bytes->empty()) {
    Utf8ConsumeResult c = ConsumeValidUtf8Prefix(pending_bytes);
    out.append(c.prefix);

    if (c.status == Utf8ConsumeStatus::kOk) {
      break;
    }

    if (c.status == Utf8ConsumeStatus::kIncomplete) {
      break;
    }

    if (c.status == Utf8ConsumeStatus::kInvalid) {
      if (!pending_bytes->empty()) {
        pending_bytes->erase(0, 1);
      }
      out.append("\xEF\xBF\xBD");
      continue;
    }
  }

  return out;
}

std::string FunASRNanoTokenizer::Decode(const std::vector<int64_t> &token_ids) {
  if (id2token_.empty()) {
    SHERPA_ONNX_LOGE("Tokenizer not initialized");
    SHERPA_ONNX_EXIT(-1);
  }
  if (token_ids.empty()) return "";

  std::vector<std::string> toks;
  toks.reserve(token_ids.size());
  for (int64_t v : token_ids) {
    if (v < 0) continue;
    if (v > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) continue;
    int32_t id = static_cast<int32_t>(v);
    if (!special_ids_.empty() && special_ids_.count(id)) continue;
    if (id < 0 || static_cast<size_t>(id) >= id2token_.size()) continue;
    const std::string &t = id2token_[static_cast<size_t>(id)];
    if (!t.empty()) toks.push_back(t);
  }

  std::string merged;
  {
    size_t total = 0;
    for (const auto &t : toks) total += t.size();
    merged.reserve(total);
    for (const auto &t : toks) merged.append(t);
  }

  std::vector<uint8_t> bytes;
  bytes.reserve(merged.size());

  size_t i = 0;
  while (i < merged.size()) {
    size_t t = i;
    uint32_t cp = 0;
    size_t n = 0;
    if (!Utf8Next(merged, &t, &cp, &n) || n == 0) {
      bytes.push_back(static_cast<uint8_t>(merged[i]));
      i += 1;
      continue;
    }
    std::string ch = merged.substr(i, n);
    auto it = unicode_to_byte_.find(ch);
    if (it != unicode_to_byte_.end()) {
      bytes.push_back(it->second);
    } else {
      for (unsigned char b : ch) bytes.push_back(b);
    }
    i += n;
  }

  std::string out(reinterpret_cast<const char *>(bytes.data()), bytes.size());

  for (const char *sp : {"<|im_end|>", "<|im_start|>", "<|endoftext|>"}) {
    std::string needle(sp);
    size_t pos = 0;
    while ((pos = out.find(needle, pos)) != std::string::npos) {
      out.erase(pos, needle.size());
    }
  }

  TrimInPlace(&out);
  return out;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/funasr-nano-tokenizer.h
================================================
// sherpa-onnx/csrc/funasr-nano-tokenizer.h
//
// Copyright (c)  2025  zengyw
//
// A self-contained Qwen3 ByteLevel-BPE tokenizer implementation.
// - No dependency on tokenizers-cpp / HF tokenizers
// - Loads vocab.json + merges.txt + tokenizer.json(added_tokens)
// - Supports AddedTokens via Trie longest-match
// - ByteLevel bytes_to_unicode encode/decode

#ifndef SHERPA_ONNX_CSRC_FUNASR_NANO_TOKENIZER_H_
#define SHERPA_ONNX_CSRC_FUNASR_NANO_TOKENIZER_H_

#include <cstdint>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#if __ANDROID_API__ >= 9
#include <android/asset_manager.h>
#endif

#if __OHOS__
struct NativeResourceManager;
#endif

namespace sherpa_onnx {

class FunASRNanoTokenizer {
 public:
  explicit FunASRNanoTokenizer(const std::string &tokenizer_dir);

#if __ANDROID_API__ >= 9
  FunASRNanoTokenizer(AAssetManager *mgr, const std::string &tokenizer_dir);
#endif

#if __OHOS__
  FunASRNanoTokenizer(NativeResourceManager *mgr,
                      const std::string &tokenizer_dir);
#endif

  std::vector<int64_t> Encode(const std::string &text);
  std::string Decode(const std::vector<int64_t> &token_ids);
  std::string GetTokenStringStreaming(int64_t token_id,
                                      std::string *pending_bytes) const;

  int64_t GetEosTokenId() const { return eos_token_id_; }
  int64_t GetPadTokenId() const { return pad_token_id_; }
  int64_t GetImEndTokenId() const { return im_end_token_id_; }

  // Public structures for helper functions
  struct AddedToken {
    std::string content;
    int32_t id = -1;
    bool single_word = false;
    bool lstrip = false;
    bool rstrip = false;
    bool normalized = false;
    bool special = false;
  };

  struct TrieNode {
    std::unordered_map<uint8_t, int32_t> next;
    int32_t token_index = -1;  // index in added_tokens_ if terminal
  };

 private:
  void Init(const std::string &tokenizer_dir);

#if __ANDROID_API__ >= 9
  void Init(AAssetManager *mgr, const std::string &tokenizer_dir);
#endif

#if __OHOS__
  void Init(NativeResourceManager *mgr, const std::string &tokenizer_dir);
#endif

  void FinalizeSpecialIds();

 private:
  // Special ids
  int64_t eos_token_id_ = -1;
  int64_t pad_token_id_ = -1;
  int64_t im_end_token_id_ = -1;

  std::unordered_set<int32_t> special_ids_;

  // Vocab: token <-> id
  std::unordered_map<std::string, int32_t> token2id_;
  std::vector<std::string> id2token_;

  // merges ranks: "left\tright" -> rank
  std::unordered_map<std::string, int32_t> merges_rank_;

  // BPE cache: bytelevel_word -> list of merged tokens
  std::unordered_map<std::string, std::vector<std::string>> bpe_cache_;

  // bytes_to_unicode mapping (ByteLevel)
  std::string byte_to_unicode_[256];
  std::unordered_map<std::string, uint8_t> unicode_to_byte_;

  // AddedTokens
  std::vector<AddedToken> added_tokens_;
  std::vector<TrieNode> trie_;
  std::unordered_set<std::string> added_token_contents_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_FUNASR_NANO_TOKENIZER_H_


================================================
FILE: sherpa-onnx/csrc/hifigan-vocoder.cc
================================================
// sherpa-onnx/csrc/hifigan-vocoder.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/hifigan-vocoder.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"

namespace sherpa_onnx {

class HifiganVocoder::Impl {
 public:
  explicit Impl(int32_t num_threads, const std::string &provider,
                const std::string &model)
      : env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(num_threads, provider)),
        allocator_{} {
    auto buf = ReadFile(model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  explicit Impl(Manager *mgr, int32_t num_threads, const std::string &provider,
                const std::string &model)
      : env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(num_threads, provider)),
        allocator_{} {
    auto buf = ReadFile(mgr, model);
    Init(buf.data(), buf.size());
  }

  std::vector<float> Run(Ort::Value mel) const {
    auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1,
                          output_names_ptr_.data(), output_names_ptr_.size());

    std::vector<int64_t> audio_shape =
        out[0].GetTensorTypeAndShapeInfo().GetShape();

    int64_t total = 1;
    // The output shape may be (1, 1, total) or (1, total) or (total,)
    for (auto i : audio_shape) {
      total *= i;
    }

    const float *p = out[0].GetTensorData<float>();
    return {p, p + total};
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
  }

 private:
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;
};

HifiganVocoder::HifiganVocoder(int32_t num_threads, const std::string &provider,
                               const std::string &model)
    : impl_(std::make_unique<Impl>(num_threads, provider, model)) {}

template <typename Manager>
HifiganVocoder::HifiganVocoder(Manager *mgr, int32_t num_threads,
                               const std::string &provider,
                               const std::string &model)
    : impl_(std::make_unique<Impl>(mgr, num_threads, provider, model)) {}

HifiganVocoder::~HifiganVocoder() = default;

std::vector<float> HifiganVocoder::Run(Ort::Value mel) const {
  return impl_->Run(std::move(mel));
}

#if __ANDROID_API__ >= 9
template HifiganVocoder::HifiganVocoder(AAssetManager *mgr, int32_t num_threads,
                                        const std::string &provider,
                                        const std::string &model);
#endif

#if __OHOS__
template HifiganVocoder::HifiganVocoder(NativeResourceManager *mgr,
                                        int32_t num_threads,
                                        const std::string &provider,
                                        const std::string &model);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/hifigan-vocoder.h
================================================
// sherpa-onnx/csrc/hifigan-vocoder.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_HIFIGAN_VOCODER_H_
#define SHERPA_ONNX_CSRC_HIFIGAN_VOCODER_H_

#include <memory>
#include <string>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/vocoder.h"

namespace sherpa_onnx {

class HifiganVocoder : public Vocoder {
 public:
  ~HifiganVocoder() override;

  HifiganVocoder(int32_t num_threads, const std::string &provider,
                 const std::string &model);

  template <typename Manager>
  HifiganVocoder(Manager *mgr, int32_t num_threads, const std::string &provider,
                 const std::string &model);

  /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames).
   *  @return Return a float32 tensor of shape (batch_size, num_samples).
   */
  std::vector<float> Run(Ort::Value mel) const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_HIFIGAN_VOCODER_H_


================================================
FILE: sherpa-onnx/csrc/homophone-replacer.cc
================================================
// sherpa-onnx/csrc/homophone-replacer.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/homophone-replacer.h"

#include <cctype>
#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/phrase-matcher.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void HomophoneReplacerConfig::Register(ParseOptions *po) {
  po->Register("hr-dict-dir", &dict_dir,
               "Not used. You don't need to provide a value for it");

  po->Register("hr-lexicon", &lexicon,
               "Path to lexicon.txt used by HomophoneReplacer.");

  po->Register("hr-rule-fsts", &rule_fsts,
               "Fst files for HomophoneReplacer. If there are multiple, they "
               "are separated by a comma. E.g., a.fst,b.fst,c.fst");
}

bool HomophoneReplacerConfig::Validate() const {
  if (!dict_dir.empty()) {
    SHERPA_ONNX_LOGE(
        "From sherpa-onnx v1.12.15, you don't need to provide dict_dir for "
        "this model. Ignore it");
  }

  if (!lexicon.empty() && !FileExists(lexicon)) {
    SHERPA_ONNX_LOGE("--hr-lexicon: '%s' does not exist", lexicon.c_str());
    return false;
  }

  if (!rule_fsts.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(rule_fsts, ",", false, &files);

    if (files.size() > 1) {
      SHERPA_ONNX_LOGE("Only 1 file is supported now.");
      SHERPA_ONNX_EXIT(-1);
    }

    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE("Rule fst '%s' does not exist. ", f.c_str());
        return false;
      }
    }
  }

  return true;
}

std::string HomophoneReplacerConfig::ToString() const {
  std::ostringstream os;

  os << "HomophoneReplacerConfig(";
  os << "lexicon=\"" << lexicon << "\", ";
  os << "rule_fsts=\"" << rule_fsts << "\")";

  return os.str();
}

class HomophoneReplacer::Impl {
 public:
  explicit Impl(const HomophoneReplacerConfig &config) : config_(config) {
    {
      std::ifstream is(config.lexicon);
      InitLexicon(is);
    }

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      replacer_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config_.debug) {
          SHERPA_ONNX_LOGE("hr rule fst: %s", f.c_str());
        }
        replacer_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
      }
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const HomophoneReplacerConfig &config) : config_(config) {
    {
      auto buf = ReadFile(mgr, config.lexicon);

      std::istringstream is(std::string(buf.data(), buf.size()));
      InitLexicon(is);
    }

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      replacer_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config_.debug) {
          SHERPA_ONNX_LOGE("hr rule fst: %s", f.c_str());
        }
        auto buf = ReadFile(mgr, f);
        std::istringstream is(std::string(buf.data(), buf.size()));
        replacer_list_.push_back(
            std::make_unique<kaldifst::TextNormalizer>(is));
      }
    }
  }

  std::string Apply(const std::string &text) const {
    std::string ans;

    if (text.empty()) {
      return ans;
    }

    std::vector<std::string> words = SplitUtf8(text);

    if (config_.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Input text: '%{public}s'", text.c_str());
#else
      SHERPA_ONNX_LOGE("Input text: '%s'", text.c_str());
#endif
      std::ostringstream os;
      os << "After splitting into UTF8: ";
      std::string sep;
      for (const auto &w : words) {
        os << sep << w;
        sep = "_";
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    // convert words to pronunciations
    std::vector<std::string> current_words;
    std::vector<std::string> current_pronunciations;

    PhraseMatcher matcher(&all_words_, words, config_.debug);

    for (const std::string &w : matcher) {
      if (w.size() < 3 ||
          reinterpret_cast<const uint8_t *>(w.data())[0] < 128) {
        if (!current_words.empty()) {
          ans += ApplyImpl(current_words, current_pronunciations);
          current_words.clear();
          current_pronunciations.clear();
        }
        ans += w;
        if (isalpha(w[0])) {
          ans.push_back(' ');
        }
        continue;
      }

      auto p = ConvertWordToPronunciation(w);
      if (config_.debug) {
        SHERPA_ONNX_LOGE("%s %s", w.c_str(), p.c_str());
      }

      current_words.push_back(w);
      current_pronunciations.push_back(std::move(p));
    }  // for (const std::string &w : matcher) {

    if (!current_words.empty()) {
      ans += ApplyImpl(current_words, current_pronunciations);
    }

    if (config_.debug) {
      SHERPA_ONNX_LOGE("Output text: '%s'", ans.c_str());
    }

    if (!ans.empty() && ans.back() == ' ') {
      ans.pop_back();
    }

    return ans;
  }

 private:
  std::string ApplyImpl(const std::vector<std::string> &words,
                        const std::vector<std::string> &pronunciations) const {
    std::string ans;
    for (const auto &r : replacer_list_) {
      ans = r->Normalize(words, pronunciations);
      // TODO(fangjun): We support only 1 rule fst at present.
      break;
    }
    return ans;
  }
  std::string ConvertWordToPronunciation(const std::string &word) const {
    if (word2pron_.count(word)) {
      return word2pron_.at(word);
    }

    if (word.size() <= 3) {
      // not a Chinese character
      return word;
    }

    std::vector<std::string> words = SplitUtf8(word);
    std::string ans;
    for (const auto &w : words) {
      if (word2pron_.count(w)) {
        ans.append(word2pron_.at(w));
      } else {
        ans.append(w);
      }
    }

    return ans;
  }

  void InitLexicon(std::istream &is) {
    std::string word;
    std::string pron;
    std::string p;

    std::string line;
    int32_t line_num = 0;
    int32_t num_warn = 0;
    while (std::getline(is, line)) {
      ++line_num;
      std::istringstream iss(line);

      pron.clear();
      iss >> word;
      ToLowerCase(&word);

      if (word2pron_.count(word)) {
        num_warn += 1;
        if (num_warn < 10) {
          SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
                           word.c_str(), line_num, line.c_str());
        }
        continue;
      }

      while (iss >> p) {
        if (p.back() > '4') {
          p.push_back('1');
        }
        pron.append(std::move(p));
      }

      if (pron.empty()) {
        SHERPA_ONNX_LOGE(
            "Empty pronunciation for word '%s' at line %d:%s. Ignore it.",
            word.c_str(), line_num, line.c_str());
        continue;
      }

      word2pron_.insert({std::move(word), std::move(pron)});
    }

    for (const auto &[key, _] : word2pron_) {
      all_words_.insert(key);
    }
  }

 private:
  HomophoneReplacerConfig config_;
  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> replacer_list_;
  std::unordered_map<std::string, std::string> word2pron_;
  std::unordered_set<std::string> all_words_;
};

HomophoneReplacer::HomophoneReplacer(const HomophoneReplacerConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
HomophoneReplacer::HomophoneReplacer(Manager *mgr,
                                     const HomophoneReplacerConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

HomophoneReplacer::~HomophoneReplacer() = default;

std::string HomophoneReplacer::Apply(const std::string &text) const {
  return RemoveInvalidUtf8Sequences(impl_->Apply(text));
}

#if __ANDROID_API__ >= 9
template HomophoneReplacer::HomophoneReplacer(
    AAssetManager *mgr, const HomophoneReplacerConfig &config);
#endif

#if __OHOS__
template HomophoneReplacer::HomophoneReplacer(
    NativeResourceManager *mgr, const HomophoneReplacerConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/homophone-replacer.h
================================================
// sherpa-onnx/csrc/homophone-replacer.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_HOMOPHONE_REPLACER_H_
#define SHERPA_ONNX_CSRC_HOMOPHONE_REPLACER_H_

#include <memory>
#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct HomophoneReplacerConfig {
  std::string dict_dir;  // unused
  std::string lexicon;

  // comma separated fst files, e.g. a.fst,b.fst,c.fst
  std::string rule_fsts;

  bool debug;

  HomophoneReplacerConfig() = default;

  HomophoneReplacerConfig(const std::string &dict_dir,
                          const std::string &lexicon,
                          const std::string &rule_fsts, bool debug)
      : dict_dir(dict_dir),
        lexicon(lexicon),
        rule_fsts(rule_fsts),
        debug(debug) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

class HomophoneReplacer {
 public:
  explicit HomophoneReplacer(const HomophoneReplacerConfig &config);

  template <typename Manager>
  HomophoneReplacer(Manager *mgr, const HomophoneReplacerConfig &config);

  ~HomophoneReplacer();

  std::string Apply(const std::string &text) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_HOMOPHONE_REPLACER_H_


================================================
FILE: sherpa-onnx/csrc/hypothesis.cc
================================================
/**
 * Copyright (c)  2023  Xiaomi Corporation
 * Copyright (c)  2023  Pingfeng Luo
 */

#include "sherpa-onnx/csrc/hypothesis.h"

#include <algorithm>
#include <utility>
#include <vector>

namespace sherpa_onnx {

void Hypotheses::Add(Hypothesis hyp) {
  auto key = hyp.Key();
  auto it = hyps_dict_.find(key);
  if (it == hyps_dict_.end()) {
    hyps_dict_[key] = std::move(hyp);
  } else {
    it->second.log_prob = LogAdd<double>()(it->second.log_prob, hyp.log_prob);
  }
}

Hypothesis Hypotheses::GetMostProbable(bool length_norm) const {
  if (length_norm == false) {
    return std::max_element(hyps_dict_.begin(), hyps_dict_.end(),
                            [](const auto &left, auto &right) -> bool {
                              return left.second.TotalLogProb() <
                                     right.second.TotalLogProb();
                            })
        ->second;
  } else {
    // for length_norm is true
    return std::max_element(
               hyps_dict_.begin(), hyps_dict_.end(),
               [](const auto &left, const auto &right) -> bool {
                 return left.second.TotalLogProb() / left.second.ys.size() <
                        right.second.TotalLogProb() / right.second.ys.size();
               })
        ->second;
  }
}

std::vector<Hypothesis> Hypotheses::GetTopK(int32_t k, bool length_norm) const {
  k = std::max(k, 1);
  k = std::min(k, Size());

  std::vector<Hypothesis> all_hyps = Vec();

  if (length_norm == false) {
    std::partial_sort(all_hyps.begin(), all_hyps.begin() + k, all_hyps.end(),
                      [](const auto &a, const auto &b) {
                        return a.TotalLogProb() > b.TotalLogProb();
                      });
  } else {
    // for length_norm is true
    std::partial_sort(all_hyps.begin(), all_hyps.begin() + k, all_hyps.end(),
                      [](const auto &a, const auto &b) {
                        return a.TotalLogProb() / a.ys.size() >
                               b.TotalLogProb() / b.ys.size();
                      });
  }

  return {all_hyps.begin(), all_hyps.begin() + k};
}

const std::vector<int32_t> GetHypsRowSplits(
    const std::vector<Hypotheses> &hyps) {
  std::vector<int32_t> row_splits;
  row_splits.reserve(hyps.size() + 1);

  row_splits.push_back(0);
  int32_t s = 0;
  for (const auto &h : hyps) {
    s += h.Size();
    row_splits.push_back(s);
  }

  return row_splits;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/hypothesis.h
================================================
/**
 * Copyright (c)  2023  Xiaomi Corporation
 * Copyright (c)  2023  Pingfeng Luo
 *
 */

#ifndef SHERPA_ONNX_CSRC_HYPOTHESIS_H_
#define SHERPA_ONNX_CSRC_HYPOTHESIS_H_

#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <memory>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/lodr-fst.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

struct Hypothesis {
  // The predicted tokens so far. Newly predicated tokens are appended.
  std::vector<int64_t> ys;

  // timestamps[i] contains the frame number after subsampling
  // on which ys[i] is decoded.
  std::vector<int32_t> timestamps;

  // The acoustic probability for each token in ys.
  // Used for keyword spotting task.
  // For transducer modified beam-search and greedy-search,
  // this is filled with log_posterior scores.
  std::vector<float> ys_probs;

  // lm_probs[i] contains the lm score for each token in ys.
  // Used only in transducer modified beam-search.
  // Elements filled only if LM is used.
  std::vector<float> lm_probs;

  // context_scores[i] contains the context-graph score for each token in ys.
  // Used only in transducer modified beam-search.
  // Elements filled only if `ContextGraph` is used.
  std::vector<float> context_scores;

  // The total score of ys in log space.
  // It contains only acoustic scores
  double log_prob = 0;

  // LM log prob if any.
  double lm_log_prob = 0;

  // the nn lm score for next token given the current ys,
  // when using shallow fusion
  CopyableOrtValue nn_lm_scores;

  // cur scored tokens by RNN LM, when rescoring
  int32_t cur_scored_pos = 0;

  // the nn lm states
  std::vector<CopyableOrtValue> nn_lm_states;

  // the LODR states
  std::shared_ptr<LodrStateCost> lodr_state;

  const ContextState *context_state;

  // TODO(fangjun): Make it configurable
  // the minimum of tokens in a chunk for streaming RNN LM
  int32_t lm_rescore_min_chunk = 2;  // a const

  int32_t num_trailing_blanks = 0;

  Hypothesis() = default;
  Hypothesis(const std::vector<int64_t> &ys, double log_prob,
             const ContextState *context_state = nullptr)
      : ys(ys), log_prob(log_prob), context_state(context_state) {}

  double TotalLogProb() const { return log_prob + lm_log_prob; }

  // If two Hypotheses have the same `Key`, then they contain
  // the same token sequence.
  std::string Key() const {
    // TODO(fangjun): Use a hash function?
    std::ostringstream os;
    std::string sep;
    for (auto i : ys) {
      os << sep << i;
      sep = "-";
    }
    return os.str();
  }

  // For debugging
  std::string ToString() const {
    std::ostringstream os;
    os << "(" << Key() << ", " << log_prob << ")";
    return os.str();
  }
};

class Hypotheses {
 public:
  Hypotheses() = default;

  explicit Hypotheses(std::vector<Hypothesis> hyps) {
    for (auto &h : hyps) {
      hyps_dict_[h.Key()] = std::move(h);
    }
  }

  explicit Hypotheses(std::unordered_map<std::string, Hypothesis> hyps_dict)
      : hyps_dict_(std::move(hyps_dict)) {}

  // Add hyp to this object. If it already exists, its log_prob
  // is updated with the given hyp using log-sum-exp.
  void Add(Hypothesis hyp);

  // Get the hyp that has the largest log_prob.
  // If length_norm is true, hyp's log_prob is divided by
  // len(hyp.ys) before comparison.
  Hypothesis GetMostProbable(bool length_norm) const;

  // Get the k hyps that have the largest log_prob.
  // If length_norm is true, hyp's log_prob is divided by
  // len(hyp.ys) before comparison.
  std::vector<Hypothesis> GetTopK(int32_t k, bool length_norm) const;

  int32_t Size() const { return hyps_dict_.size(); }

  std::string ToString() const {
    std::ostringstream os;
    for (const auto &p : hyps_dict_) {
      os << p.second.ToString() << "\n";
    }
    return os.str();
  }

  auto begin() const { return hyps_dict_.begin(); }
  auto end() const { return hyps_dict_.end(); }

  auto begin() { return hyps_dict_.begin(); }
  auto end() { return hyps_dict_.end(); }

  void Clear() { hyps_dict_.clear(); }

  // Return a list of hyps contained in this object.
  std::vector<Hypothesis> Vec() const {
    std::vector<Hypothesis> ans;
    ans.reserve(hyps_dict_.size());
    for (const auto &p : hyps_dict_) {
      ans.push_back(p.second);
    }
    return ans;
  }

 private:
  using Map = std ::unordered_map<std::string, Hypothesis>;
  Map hyps_dict_;
};

const std::vector<int32_t> GetHypsRowSplits(
    const std::vector<Hypotheses> &hyps);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_HYPOTHESIS_H_


================================================
FILE: sherpa-onnx/csrc/keyword-spotter-impl.cc
================================================
// sherpa-onnx/csrc/keyword-spotter-impl.cc
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/keyword-spotter-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/keyword-spotter-transducer-impl.h"
#include "sherpa-onnx/csrc/macros.h"

#if SHERPA_ONNX_ENABLE_RKNN
#include "sherpa-onnx/csrc/rknn/keyword-spotter-transducer-rknn-impl.h"
#endif

namespace sherpa_onnx {

std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
    const KeywordSpotterConfig &config) {
  if (config.model_config.provider_config.provider == "rknn") {
#if SHERPA_ONNX_ENABLE_RKNN
    if (!config.model_config.transducer.encoder.empty()) {
      return std::make_unique<KeywordSpotterTransducerRknnImpl>(config);
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_RKNN=ON if you "
        "want to use rknn.");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (!config.model_config.transducer.encoder.empty()) {
    return std::make_unique<KeywordSpotterTransducerImpl>(config);
  }

  SHERPA_ONNX_LOGE("Please specify a model");
  SHERPA_ONNX_EXIT(-1);
}

template <typename Manager>
std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
    Manager *mgr, const KeywordSpotterConfig &config) {
  if (config.model_config.provider_config.provider == "rknn") {
#if SHERPA_ONNX_ENABLE_RKNN
    if (!config.model_config.transducer.encoder.empty()) {
      return std::make_unique<KeywordSpotterTransducerRknnImpl>(mgr, config);
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_RKNN=ON if you "
        "want to use rknn.");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (!config.model_config.transducer.encoder.empty()) {
    return std::make_unique<KeywordSpotterTransducerImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please specify a model");
  exit(-1);
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
    AAssetManager *mgr, const KeywordSpotterConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<KeywordSpotterImpl> KeywordSpotterImpl::Create(
    NativeResourceManager *mgr, const KeywordSpotterConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/keyword-spotter-impl.h
================================================
// sherpa-onnx/csrc/keyword-spotter-impl.h
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_IMPL_H_
#define SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_IMPL_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/keyword-spotter.h"
#include "sherpa-onnx/csrc/online-stream.h"

namespace sherpa_onnx {

class KeywordSpotterImpl {
 public:
  static std::unique_ptr<KeywordSpotterImpl> Create(
      const KeywordSpotterConfig &config);

  template <typename Manager>
  static std::unique_ptr<KeywordSpotterImpl> Create(
      Manager *mgr, const KeywordSpotterConfig &config);

  virtual ~KeywordSpotterImpl() = default;

  virtual std::unique_ptr<OnlineStream> CreateStream() const = 0;

  virtual std::unique_ptr<OnlineStream> CreateStream(
      const std::string &keywords) const = 0;

  virtual bool IsReady(OnlineStream *s) const = 0;

  virtual void Reset(OnlineStream *s) const = 0;

  virtual void DecodeStreams(OnlineStream **ss, int32_t n) const = 0;

  virtual KeywordResult GetResult(OnlineStream *s) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/keyword-spotter-transducer-impl.h
================================================
// sherpa-onnx/csrc/keyword-spotter-transducer-impl.h
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_TRANSDUCER_IMPL_H_
#define SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_TRANSDUCER_IMPL_H_

#include <algorithm>
#include <memory>
#include <regex>  // NOLINT
#include <string>
#include <sstream>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/keyword-spotter-impl.h"
#include "sherpa-onnx/csrc/keyword-spotter.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/transducer-keyword-decoder.h"
#include "sherpa-onnx/csrc/utils.h"

namespace sherpa_onnx {

KeywordResult Convert(const TransducerKeywordResult &src,
                      const SymbolTable &sym_table, float frame_shift_ms,
                      int32_t subsampling_factor, int32_t frames_since_start) {
  KeywordResult r;
  r.tokens.reserve(src.tokens.size());
  r.timestamps.reserve(src.tokens.size());
  r.keyword = src.keyword;
  bool from_tokens = src.keyword.empty();

  for (auto i : src.tokens) {
    auto sym = sym_table[i];
    if (from_tokens) {
      r.keyword.append(sym);
    }
    r.tokens.push_back(std::move(sym));
  }
  if (from_tokens && r.keyword.size()) {
    r.keyword = r.keyword.substr(1);
  }

  float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
  for (auto t : src.timestamps) {
    float time = frame_shift_s * t;
    r.timestamps.push_back(time);
  }

  r.start_time = frames_since_start * frame_shift_ms / 1000.;

  return r;
}

class KeywordSpotterTransducerImpl : public KeywordSpotterImpl {
 public:
  explicit KeywordSpotterTransducerImpl(const KeywordSpotterConfig &config)
      : config_(config),
        model_(OnlineTransducerModel::Create(config.model_config)) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      /// assuming tokens_buf and tokens are guaranteed not being both empty
      sym_ = SymbolTable(config.model_config.tokens, true);
    }

    if (sym_.Contains("<unk>")) {
      unk_id_ = sym_["<unk>"];
    }

    model_->SetFeatureDim(config.feat_config.feature_dim);

    if (config.keywords_buf.empty()) {
      InitKeywords();
    } else {
      InitKeywordsFromBufStr();
    }

    decoder_ = std::make_unique<TransducerKeywordDecoder>(
        model_.get(), config_.max_active_paths, config_.num_trailing_blanks,
        unk_id_);
  }

  template <typename Manager>
  KeywordSpotterTransducerImpl(Manager *mgr, const KeywordSpotterConfig &config)
      : config_(config),
        model_(OnlineTransducerModel::Create(mgr, config.model_config)),
        sym_(mgr, config.model_config.tokens) {
    if (sym_.Contains("<unk>")) {
      unk_id_ = sym_["<unk>"];
    }

    model_->SetFeatureDim(config.feat_config.feature_dim);

    InitKeywords(mgr);

    decoder_ = std::make_unique<TransducerKeywordDecoder>(
        model_.get(), config_.max_active_paths, config_.num_trailing_blanks,
        unk_id_);
  }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    auto stream =
        std::make_unique<OnlineStream>(config_.feat_config, keywords_graph_);
    InitOnlineStream(stream.get());
    return stream;
  }

  std::unique_ptr<OnlineStream> CreateStream(
      const std::string &keywords) const override {
    auto kws = std::regex_replace(keywords, std::regex("/"), "\n");
    std::istringstream is(kws);

    std::vector<std::vector<int32_t>> current_ids;
    std::vector<std::string> current_kws;
    std::vector<float> current_scores;
    std::vector<float> current_thresholds;

    if (!EncodeKeywords(is, sym_, &current_ids, &current_kws, &current_scores,
                        &current_thresholds)) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Encode keywords '%{public}s' failed.",
                       keywords.c_str());
#else
      SHERPA_ONNX_LOGE("Encode keywords '%s' failed.", keywords.c_str());
#endif
      return nullptr;
    }

    int32_t num_kws = current_ids.size();
    int32_t num_default_kws = keywords_id_.size();

    current_ids.insert(current_ids.end(), keywords_id_.begin(),
                       keywords_id_.end());

    if (!current_kws.empty() && !keywords_.empty()) {
      current_kws.insert(current_kws.end(), keywords_.begin(), keywords_.end());
    } else if (!current_kws.empty() && keywords_.empty()) {
      current_kws.insert(current_kws.end(), num_default_kws, std::string());
    } else if (current_kws.empty() && !keywords_.empty()) {
      current_kws.insert(current_kws.end(), num_kws, std::string());
      current_kws.insert(current_kws.end(), keywords_.begin(), keywords_.end());
    } else {
      // Do nothing.
    }

    if (!current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else if (!current_scores.empty() && boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_default_kws,
                            config_.keywords_score);
    } else if (current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_kws,
                            config_.keywords_score);
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else {
      // Do nothing.
    }

    if (!current_thresholds.empty() && !thresholds_.empty()) {
      current_thresholds.insert(current_thresholds.end(), thresholds_.begin(),
                                thresholds_.end());
    } else if (!current_thresholds.empty() && thresholds_.empty()) {
      current_thresholds.insert(current_thresholds.end(), num_default_kws,
                                config_.keywords_threshold);
    } else if (current_thresholds.empty() && !thresholds_.empty()) {
      current_thresholds.insert(current_thresholds.end(), num_kws,
                                config_.keywords_threshold);
      current_thresholds.insert(current_thresholds.end(), thresholds_.begin(),
                                thresholds_.end());
    } else {
      // Do nothing.
    }

    auto keywords_graph = std::make_shared<ContextGraph>(
        current_ids, config_.keywords_score, config_.keywords_threshold,
        current_scores, current_kws, current_thresholds);

    auto stream =
        std::make_unique<OnlineStream>(config_.feat_config, keywords_graph);
    InitOnlineStream(stream.get());
    return stream;
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() + model_->ChunkSize() <
           s->NumFramesReady();
  }
  void Reset(OnlineStream *s) const override { InitOnlineStream(s); }

  void DecodeStreams(OnlineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i < n; ++i) {
      auto s = ss[i];
      auto r = s->GetKeywordResult(true);
      int32_t num_trailing_blanks = r.num_trailing_blanks;
      // assume subsampling_factor is 4
      // assume frameshift is 0.01 second
      float trailing_silence = num_trailing_blanks * 4 * 0.01;

      // it resets automatically after detecting 1.5 seconds of silence
      float threshold = 1.5;
      if (trailing_silence > threshold) {
        Reset(s);
      }
    }

    int32_t chunk_size = model_->ChunkSize();
    int32_t chunk_shift = model_->ChunkShift();

    int32_t feature_dim = ss[0]->FeatureDim();

    std::vector<TransducerKeywordResult> results(n);
    std::vector<float> features_vec(n * chunk_size * feature_dim);
    std::vector<std::vector<Ort::Value>> states_vec(n);
    std::vector<int64_t> all_processed_frames(n);

    for (int32_t i = 0; i != n; ++i) {
      SHERPA_ONNX_CHECK(ss[i]->GetContextGraph() != nullptr);

      const auto num_processed_frames = ss[i]->GetNumProcessedFrames();
      std::vector<float> features =
          ss[i]->GetFrames(num_processed_frames, chunk_size);

      // Question: should num_processed_frames include chunk_shift?
      ss[i]->GetNumProcessedFrames() += chunk_shift;

      std::copy(features.begin(), features.end(),
                features_vec.data() + i * chunk_size * feature_dim);

      results[i] = std::move(ss[i]->GetKeywordResult());
      states_vec[i] = std::move(ss[i]->GetStates());
      all_processed_frames[i] = num_processed_frames;
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{n, chunk_size, feature_dim};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, features_vec.data(),
                                            features_vec.size(), x_shape.data(),
                                            x_shape.size());

    std::array<int64_t, 1> processed_frames_shape{
        static_cast<int64_t>(all_processed_frames.size())};

    Ort::Value processed_frames = Ort::Value::CreateTensor(
        memory_info, all_processed_frames.data(), all_processed_frames.size(),
        processed_frames_shape.data(), processed_frames_shape.size());

    auto states = model_->StackStates(states_vec);

    auto pair = model_->RunEncoder(std::move(x), std::move(states),
                                   std::move(processed_frames));

    decoder_->Decode(std::move(pair.first), ss, &results);

    std::vector<std::vector<Ort::Value>> next_states =
        model_->UnStackStates(pair.second);

    for (int32_t i = 0; i != n; ++i) {
      ss[i]->SetKeywordResult(results[i]);
      ss[i]->SetStates(std::move(next_states[i]));
    }
  }

  KeywordResult GetResult(OnlineStream *s) const override {
    TransducerKeywordResult decoder_result = s->GetKeywordResult(true);

    // TODO(fangjun): Remember to change these constants if needed
    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = 4;
    return Convert(decoder_result, sym_, frame_shift_ms, subsampling_factor,
                   s->GetNumFramesSinceStart());
  }

 private:
  void InitKeywords(std::istream &is) {
    if (!EncodeKeywords(is, sym_, &keywords_id_, &keywords_, &boost_scores_,
                        &thresholds_)) {
      SHERPA_ONNX_LOGE("Encode keywords failed.");
      exit(-1);
    }
    keywords_graph_ = std::make_shared<ContextGraph>(
        keywords_id_, config_.keywords_score, config_.keywords_threshold,
        boost_scores_, keywords_, thresholds_);
  }

  void InitKeywords() {
#ifdef SHERPA_ONNX_ENABLE_WASM_KWS
    // Due to the limitations of the wasm file system,
    // the keyword_file variable is directly parsed as a string of keywords
    // if WASM KWS on
    std::istringstream is(config_.keywords_file);
    InitKeywords(is);
#else
    // each line in keywords_file contains space-separated words
    std::ifstream is(config_.keywords_file);
    if (!is) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Open keywords file failed: '%{public}s'",
                       config_.keywords_file.c_str());
#else
      SHERPA_ONNX_LOGE("Open keywords file failed: '%s'",
                       config_.keywords_file.c_str());
#endif
      exit(-1);
    }
    InitKeywords(is);
#endif
  }

  template <typename Manager>
  void InitKeywords(Manager *mgr) {
    // each line in keywords_file contains space-separated words

    auto buf = ReadFile(mgr, config_.keywords_file);

    std::istringstream is(std::string(buf.data(), buf.size()));

    if (!is) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Open keywords file failed: '%{public}s'",
                       config_.keywords_file.c_str());
#else
      SHERPA_ONNX_LOGE("Open keywords file failed: '%s'",
                       config_.keywords_file.c_str());
#endif
      exit(-1);
    }
    InitKeywords(is);
  }

  void InitKeywordsFromBufStr() {
    // keywords_buf's content is supposed to be same as the keywords_file's
    std::istringstream is(config_.keywords_buf);
    InitKeywords(is);
  }

  void InitOnlineStream(OnlineStream *stream) const {
    auto r = decoder_->GetEmptyResult();
    SHERPA_ONNX_CHECK_EQ(r.hyps.Size(), 1);

    SHERPA_ONNX_CHECK(stream->GetContextGraph() != nullptr);
    r.hyps.begin()->second.context_state = stream->GetContextGraph()->Root();

    stream->SetKeywordResult(r);
    stream->SetStates(model_->GetEncoderInitStates());
  }

 private:
  KeywordSpotterConfig config_;
  std::vector<std::vector<int32_t>> keywords_id_;
  std::vector<float> boost_scores_;
  std::vector<float> thresholds_;
  std::vector<std::string> keywords_;
  ContextGraphPtr keywords_graph_;
  std::unique_ptr<OnlineTransducerModel> model_;
  std::unique_ptr<TransducerKeywordDecoder> decoder_;
  SymbolTable sym_;
  int32_t unk_id_ = -1;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_TRANSDUCER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/keyword-spotter.cc
================================================
// sherpa-onnx/csrc/keyword-spotter.cc
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/keyword-spotter.h"

#include <algorithm>
#include <cassert>
#include <fstream>
#include <iomanip>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/keyword-spotter-impl.h"

namespace sherpa_onnx {

std::string KeywordResult::AsJsonString() const {
  std::ostringstream os;
  os << "{";
  os << "\"start_time\":" << std::fixed << std::setprecision(2) << start_time
     << ", ";

  os << "\"keyword\""
     << ": ";
  os << "\"" << keyword << "\""
     << ", ";

  os << "\""
     << "timestamps"
     << "\""
     << ": ";
  os << "[";

  std::string sep = "";
  for (auto t : timestamps) {
    os << sep << std::fixed << std::setprecision(2) << t;
    sep = ", ";
  }
  os << "], ";

  os << "\""
     << "tokens"
     << "\""
     << ":";
  os << "[";

  sep = "";
  auto oldFlags = os.flags();
  for (const auto &t : tokens) {
    if (t.size() == 1 && static_cast<uint8_t>(t[0]) > 0x7f) {
      const uint8_t *p = reinterpret_cast<const uint8_t *>(t.c_str());
      os << sep << "\""
         << "<0x" << std::hex << std::uppercase << static_cast<uint32_t>(p[0])
         << ">"
         << "\"";
      os.flags(oldFlags);
    } else {
      os << sep << "\"" << t << "\"";
    }
    sep = ", ";
  }
  os << "]";
  os << "}";

  return os.str();
}

void KeywordSpotterConfig::Register(ParseOptions *po) {
  feat_config.Register(po);
  model_config.Register(po);

  po->Register("max-active-paths", &max_active_paths,
               "beam size used in modified beam search.");
  po->Register("num-trailing-blanks", &num_trailing_blanks,
               "The number of trailing blanks should have after the keyword.");
  po->Register("keywords-score", &keywords_score,
               "The bonus score for each token in context word/phrase.");
  po->Register("keywords-threshold", &keywords_threshold,
               "The acoustic threshold (probability) to trigger the keywords.");
  po->Register(
      "keywords-file", &keywords_file,
      "The file containing keywords, one word/phrase per line, and for each"
      "phrase the bpe/cjkchar are separated by a space. For example: "
      "▁HE LL O ▁WORLD"
      "你 好 世 界");
}

bool KeywordSpotterConfig::Validate() const {
  if (!keywords_file.empty() && !keywords_buf.empty()) {
    SHERPA_ONNX_LOGE(
        "you can not provide a keywords_buf and a keywords file: '%s', "
        "at the same time, which is confusing",
        keywords_file.c_str());
    return false;
  }

  if (keywords_file.empty() && keywords_buf.empty()) {
    SHERPA_ONNX_LOGE(
        "Please provide either a keywords-file or the keywords-buf");
    return false;
  }

#ifndef SHERPA_ONNX_ENABLE_WASM_KWS
  // due to the limitations of the wasm file system,
  // keywords file will be packaged into the sherpa-onnx-wasm-kws-main.data file
  // Solution: take keyword_file variable is directly
  // parsed as a string of keywords
  if (keywords_buf.empty() && !std::ifstream(keywords_file.c_str()).good()) {
    SHERPA_ONNX_LOGE("Keywords file '%s' does not exist.",
                     keywords_file.c_str());
    return false;
  }
#endif

  return model_config.Validate();
}

std::string KeywordSpotterConfig::ToString() const {
  std::ostringstream os;

  os << "KeywordSpotterConfig(";
  os << "feat_config=" << feat_config.ToString() << ", ";
  os << "model_config=" << model_config.ToString() << ", ";
  os << "max_active_paths=" << max_active_paths << ", ";
  os << "num_trailing_blanks=" << num_trailing_blanks << ", ";
  os << "keywords_score=" << keywords_score << ", ";
  os << "keywords_threshold=" << keywords_threshold << ", ";
  os << "keywords_file=\"" << keywords_file << "\")";

  return os.str();
}

KeywordSpotter::KeywordSpotter(const KeywordSpotterConfig &config)
    : impl_(KeywordSpotterImpl::Create(config)) {}

template <typename Manager>
KeywordSpotter::KeywordSpotter(Manager *mgr, const KeywordSpotterConfig &config)
    : impl_(KeywordSpotterImpl::Create(mgr, config)) {}

KeywordSpotter::~KeywordSpotter() = default;

std::unique_ptr<OnlineStream> KeywordSpotter::CreateStream() const {
  return impl_->CreateStream();
}

std::unique_ptr<OnlineStream> KeywordSpotter::CreateStream(
    const std::string &keywords) const {
  return impl_->CreateStream(keywords);
}

bool KeywordSpotter::IsReady(OnlineStream *s) const {
  return impl_->IsReady(s);
}

void KeywordSpotter::Reset(OnlineStream *s) const { impl_->Reset(s); }

void KeywordSpotter::DecodeStreams(OnlineStream **ss, int32_t n) const {
  impl_->DecodeStreams(ss, n);
}

KeywordResult KeywordSpotter::GetResult(OnlineStream *s) const {
  return impl_->GetResult(s);
}

#if __ANDROID_API__ >= 9
template KeywordSpotter::KeywordSpotter(AAssetManager *mgr,
                                        const KeywordSpotterConfig &config);
#endif

#if __OHOS__
template KeywordSpotter::KeywordSpotter(NativeResourceManager *mgr,
                                        const KeywordSpotterConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/keyword-spotter.h
================================================
// sherpa-onnx/csrc/keyword-spotter.h
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_H_
#define SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/features.h"
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/online-transducer-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct KeywordResult {
  /// The triggered keyword.
  /// For English, it consists of space separated words.
  /// For Chinese, it consists of Chinese words without spaces.
  /// Example 1: "hello world"
  /// Example 2: "你好世界"
  std::string keyword;

  /// Decoded results at the token level.
  /// For instance, for BPE-based models it consists of a list of BPE tokens.
  std::vector<std::string> tokens;

  /// timestamps.size() == tokens.size()
  /// timestamps[i] records the time in seconds when tokens[i] is decoded.
  std::vector<float> timestamps;

  /// Starting time of this segment.
  /// When an endpoint is detected, it will change
  float start_time = 0;

  /** Return a json string.
   *
   * The returned string contains:
   *   {
   *     "keyword": "The triggered keyword",
   *     "tokens": [x, x, x],
   *     "timestamps": [x, x, x],
   *     "start_time": x,
   *   }
   */
  std::string AsJsonString() const;
};

struct KeywordSpotterConfig {
  FeatureExtractorConfig feat_config;
  OnlineModelConfig model_config;

  int32_t max_active_paths = 4;

  int32_t num_trailing_blanks = 1;

  float keywords_score = 1.0;

  float keywords_threshold = 0.25;

  std::string keywords_file;

  /// if keywords_buf is non-empty,
  /// the keywords will be loaded from the buffer instead of from the
  /// "keywrods_file"
  std::string keywords_buf;

  KeywordSpotterConfig() = default;

  KeywordSpotterConfig(const FeatureExtractorConfig &feat_config,
                       const OnlineModelConfig &model_config,
                       int32_t max_active_paths, int32_t num_trailing_blanks,
                       float keywords_score, float keywords_threshold,
                       const std::string &keywords_file)
      : feat_config(feat_config),
        model_config(model_config),
        max_active_paths(max_active_paths),
        num_trailing_blanks(num_trailing_blanks),
        keywords_score(keywords_score),
        keywords_threshold(keywords_threshold),
        keywords_file(keywords_file) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

class KeywordSpotterImpl;

class KeywordSpotter {
 public:
  explicit KeywordSpotter(const KeywordSpotterConfig &config);

  template <typename Manager>
  KeywordSpotter(Manager *mgr, const KeywordSpotterConfig &config);

  ~KeywordSpotter();

  /** Create a stream for decoding.
   *
   */
  std::unique_ptr<OnlineStream> CreateStream() const;

  /** Create a stream for decoding.
   *
   *  @param The keywords for this string, it might contain several keywords,
   *         the keywords are separated by "/". In each of the keywords, there
   *         are cjkchars or bpes, the bpe/cjkchar are separated by space (" ").
   *         For example, keywords I LOVE YOU and HELLO WORLD, looks like:
   *
   *         "▁I ▁LOVE ▁YOU/▁HE LL O ▁WORLD"
   */
  std::unique_ptr<OnlineStream> CreateStream(const std::string &keywords) const;

  /**
   * Return true if the given stream has enough frames for decoding.
   * Return false otherwise
   */
  bool IsReady(OnlineStream *s) const;

  // Remember to call it after detecting a keyword
  void Reset(OnlineStream *s) const;

  /** Decode a single stream. */
  void DecodeStream(OnlineStream *s) const {
    OnlineStream *ss[1] = {s};
    DecodeStreams(ss, 1);
  }

  /** Decode multiple streams in parallel
   *
   * @param ss Pointer array containing streams to be decoded.
   * @param n Number of streams in `ss`.
   */
  void DecodeStreams(OnlineStream **ss, int32_t n) const;

  KeywordResult GetResult(OnlineStream *s) const;

 private:
  std::unique_ptr<KeywordSpotterImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_KEYWORD_SPOTTER_H_


================================================
FILE: sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
================================================
// sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h"

#include <fstream>
#include <regex>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "espeak-ng/speak_lib.h"
#include "phoneme_ids.hpp"  // NOLINT
#include "phonemize.hpp"    // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/phrase-matcher.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void CallPhonemizeEspeak(const std::string &text,
                         piper::eSpeakPhonemeConfig &config,  // NOLINT
                         std::vector<std::vector<piper::Phoneme>> *phonemes);

class KokoroMultiLangLexicon::Impl {
 public:
  Impl(const std::string &tokens, const std::string &lexicon,
       const std::string &data_dir,
       const OfflineTtsKokoroModelMetaData &meta_data, bool debug)
      : meta_data_(meta_data), debug_(debug) {
    InitTokens(tokens);

    InitLexicon(lexicon);

    InitEspeak(data_dir);  // See ./piper-phonemize-lexicon.cc
  }

  template <typename Manager>
  Impl(Manager *mgr, const std::string &tokens, const std::string &lexicon,
       const std::string &data_dir,
       const OfflineTtsKokoroModelMetaData &meta_data, bool debug)
      : meta_data_(meta_data), debug_(debug) {
    InitTokens(mgr, tokens);

    InitLexicon(mgr, lexicon);

    // we assume you have copied data_dir from assets to some path

    InitEspeak(data_dir);  // See ./piper-phonemize-lexicon.cc
  }

  std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text,
                                              const std::string &voice) const {
    // we cannot convert text to lowercase here since it will affect
    // how piper_phonemize handles punctuations inside the text
    std::string text = _text;

    std::vector<std::pair<std::string, std::string>> replace_str_pairs = {
        {"，", ","}, {":", ","},  {"、", ","}, {"；", ";"},   {"：", ":"},
        {"。", "."}, {"？", "?"}, {"！", "!"}, {"\\s+", " "},
    };
    for (const auto &p : replace_str_pairs) {
      std::regex re(p.first);
      text = std::regex_replace(text, re, p.second);
    }

    if (debug_) {
      SHERPA_ONNX_LOGE("After replacing punctuations and merging spaces:\n%s",
                       text.c_str());
    }

    // https://en.cppreference.com/w/cpp/regex
    // https://stackoverflow.com/questions/37989081/how-to-use-unicode-range-in-c-regex
    std::string expr_chinese = "([\\u4e00-\\u9fff]+)";
    std::string expr_not_chinese = "([^\\u4e00-\\u9fff]+)";

    std::string expr_both = expr_chinese + "|" + expr_not_chinese;

    auto ws = ToWideString(text);
    std::wstring wexpr_both = ToWideString(expr_both);
    std::wregex we_both(wexpr_both);

    std::wstring wexpr_zh = ToWideString(expr_chinese);
    std::wregex we_zh(wexpr_zh);

    auto begin = std::wsregex_iterator(ws.begin(), ws.end(), we_both);
    auto end = std::wsregex_iterator();

    std::vector<TokenIDs> ans;

    for (std::wsregex_iterator i = begin; i != end; ++i) {
      std::wsmatch match = *i;
      std::wstring match_str = match.str();

      auto ms = ToString(match_str);
      uint8_t c = reinterpret_cast<const uint8_t *>(ms.data())[0];

      std::vector<std::vector<int32_t>> ids_vec;
      if (std::regex_match(match_str, we_zh)) {
        if (debug_) {
          SHERPA_ONNX_LOGE("Chinese: %s", ms.c_str());
        }
        ids_vec = ConvertChineseToTokenIDs(ms);
      } else {
        if (debug_) {
          SHERPA_ONNX_LOGE("Non-Chinese: %s", ms.c_str());
        }

        ids_vec = ConvertNonChineseToTokenIDs(ms, voice);
      }

      for (const auto &ids : ids_vec) {
        if (ids.size() > 10 + 2) {
          ans.emplace_back(ids);
        } else {
          if (ans.empty()) {
            ans.emplace_back(ids);
          } else {
            if ((ans.back().tokens.size() + ids.size() < 50) ||
                (ids.size() < 5)) {
              ans.back().tokens.back() = ids[1];
              ans.back().tokens.insert(ans.back().tokens.end(), ids.begin() + 2,
                                       ids.end());
            } else {
              ans.emplace_back(ids);
            }
          }
        }
      }
    }

    if (debug_) {
      for (const auto &v : ans) {
        std::ostringstream os;
        os << "\n";
        std::string sep;
        for (auto i : v.tokens) {
          os << sep << i;
          sep = " ";
        }
        os << "\n";
        SHERPA_ONNX_LOGE("%s", os.str().c_str());
      }
    }

    return ans;
  }

 private:
  bool IsPunctuation(const std::string &text) const {
    if (text == ";" || text == ":" || text == "," || text == "." ||
        text == "!" || text == "?" || text == "—" || text == "…" ||
        text == "\"" || text == "(" || text == ")" || text == "“" ||
        text == "”") {
      return true;
    }

    return false;
  }

  std::vector<int32_t> ConvertWordToIds(const std::string &w) const {
    std::vector<int32_t> ans;
    if (word2ids_.count(w)) {
      ans = word2ids_.at(w);
    } else {
      std::vector<std::string> words = SplitUtf8(w);
      for (const auto &word : words) {
        if (word2ids_.count(word)) {
          auto ids = ConvertWordToIds(word);
          ans.insert(ans.end(), ids.begin(), ids.end());
        } else {
          if (debug_) {
            SHERPA_ONNX_LOGE("Skip OOV: '%s'", word.c_str());
          }
        }
      }
    }

    if (debug_ && !ans.empty()) {
      std::ostringstream os;
      os << w << ": ";
      for (auto i : ans) {
        os << id2token_.at(i) << " ";
      }
      os << "\n";
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    return ans;
  }

  std::vector<std::vector<int32_t>> ConvertChineseToTokenIDs(
      const std::string &text) const {
    std::vector<std::string> words = SplitUtf8(text);

    if (debug_) {
      std::ostringstream os;
      std::string sep = "";
      for (const auto &w : words) {
        os << sep << w;
        sep = "_";
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("after splitting into UTF8:\n%{public}s",
                       os.str().c_str());
#else
      SHERPA_ONNX_LOGE("after splitting into UTF8:\n%s", os.str().c_str());
#endif
    }

    std::vector<std::vector<int32_t>> ans;
    std::vector<int32_t> this_sentence;
    int32_t max_len = meta_data_.max_token_len;

    this_sentence.push_back(0);

    PhraseMatcher matcher(&all_words_, words, debug_);

    for (const std::string &w : matcher) {
      auto ids = ConvertWordToIds(w);
      if (ids.empty()) {
#if __OHOS__
        SHERPA_ONNX_LOGE("Ignore OOV '%{public}s'", w.c_str());
#else
        SHERPA_ONNX_LOGE("Ignore OOV '%s'", w.c_str());
#endif
        continue;
      }

      if (this_sentence.size() + ids.size() > max_len - 2) {
        this_sentence.push_back(0);
        ans.push_back(std::move(this_sentence));

        this_sentence.push_back(0);
      }

      this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
    }  // for (const std::string &w : matcher)

    if (this_sentence.size() > 1) {
      this_sentence.push_back(0);
      ans.push_back(std::move(this_sentence));
    }

    if (debug_) {
      for (const auto &v : ans) {
        std::ostringstream os;
        os << "\n";
        std::string sep;
        for (auto i : v) {
          os << sep << i;
          sep = " ";
        }
        os << "\n";
        SHERPA_ONNX_LOGE("%s", os.str().c_str());
      }
    }

    return ans;
  }

  std::vector<std::vector<int32_t>> ConvertTextToTokenIDsWithEspeak(
      const std::string &text, const std::string &voice) const {
    auto temp = ConvertTextToTokenIdsKokoroOrKitten(
        phoneme2id_, meta_data_.max_token_len, text, voice);
    std::vector<std::vector<int32_t>> ans;
    ans.reserve(temp.size());

    for (const auto &i : temp) {
      ans.emplace_back(i.tokens.begin(), i.tokens.end());
    }

    return ans;
  }

  std::vector<std::vector<int32_t>> ConvertNonChineseToTokenIDs(
      const std::string &text, const std::string &voice) const {
    if (IsPunctuation(text)) {
      return {std::vector<int32_t>{0, token2id_.at(text), 0}};
    }

    if (!voice.empty()) {
      return ConvertTextToTokenIDsWithEspeak(text, voice);
    }

    // If voice is empty, we split the text into words and use the lexicon
    // to lookup the pronunciation of each word, fallback to espeak if
    // a word is not in the lexicon.

    std::vector<std::string> words = SplitUtf8(text);
    if (debug_) {
      std::ostringstream os;
      os << "After splitting to words: ";
      std::string sep;
      for (const auto &w : words) {
        os << sep << w;
        sep = "_";
      }
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
    }

    std::vector<std::vector<int32_t>> ans;
    int32_t max_len = meta_data_.max_token_len;
    std::vector<int32_t> this_sentence;

    int32_t space_id = token2id_.at(" ");

    this_sentence.push_back(0);

    for (const auto &_word : words) {
      auto word = ToLowerCase(_word);
      if (IsPunctuation(word)) {
        this_sentence.push_back(token2id_.at(word));

        if (this_sentence.size() > max_len - 2) {
          // this sentence is too long, split it
          this_sentence.push_back(0);
          ans.push_back(std::move(this_sentence));

          this_sentence.push_back(0);
          continue;
        }

        if (word == "." || word == "!" || word == "?" || word == ";") {
          // Note: You can add more punctuations here to split the text
          // into sentences. We just use four here: .!?;
          this_sentence.push_back(0);
          ans.push_back(std::move(this_sentence));

          this_sentence.push_back(0);
        }
      } else if (word2ids_.count(word)) {
        const auto &ids = word2ids_.at(word);
        if (this_sentence.size() + ids.size() + 3 > max_len - 2) {
          this_sentence.push_back(0);
          ans.push_back(std::move(this_sentence));

          this_sentence.push_back(0);
        }

        this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
        this_sentence.push_back(space_id);
      } else {
        if (debug_) {
          SHERPA_ONNX_LOGE("Use espeak-ng to handle the OOV: '%s'",
                           word.c_str());
        }

        piper::eSpeakPhonemeConfig config;

        config.voice = meta_data_.voice;

        std::vector<std::vector<piper::Phoneme>> phonemes;

        CallPhonemizeEspeak(word, config, &phonemes);
        // Note phonemes[i] contains a vector of unicode codepoints;
        // we need to convert them to utf8

        std::vector<int32_t> ids;
        for (const auto &v : phonemes) {
          for (const auto p : v) {
            auto token = Utf32ToUtf8(p);
            if (token2id_.count(token)) {
              ids.push_back(token2id_.at(token));
            } else {
              if (debug_) {
                SHERPA_ONNX_LOGE("Skip OOV token '%s' from '%s'", token.c_str(),
                                 word.c_str());
              }
            }
          }
        }

        if (this_sentence.size() + ids.size() + 3 > max_len - 2) {
          this_sentence.push_back(0);
          ans.push_back(std::move(this_sentence));

          this_sentence.push_back(0);
        }

        this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
        this_sentence.push_back(space_id);
      }
    }

    if (this_sentence.size() > 1) {
      this_sentence.push_back(0);
      ans.push_back(std::move(this_sentence));
    }

    if (debug_) {
      for (const auto &v : ans) {
        std::ostringstream os;
        os << "\n";
        std::string sep;
        for (auto i : v) {
          os << sep << i;
          sep = " ";
        }
        os << "\n";
        SHERPA_ONNX_LOGE("%s", os.str().c_str());
      }
    }

    return ans;
  }

  void InitTokens(const std::string &tokens) {
    std::ifstream is(tokens);
    InitTokens(is);
  }

  template <typename Manager>
  void InitTokens(Manager *mgr, const std::string &tokens) {
    auto buf = ReadFile(mgr, tokens);

    std::istringstream is(std::string(buf.data(), buf.size()));
    InitTokens(is);
  }

  void InitTokens(std::istream &is) {
    token2id_ = ReadTokens(is);  // defined in ./symbol-table.cc

    if (debug_) {
      for (const auto &p : token2id_) {
        id2token_[p.second] = p.first;
      }
    }

    std::u32string s;
    for (const auto &p : token2id_) {
      s = Utf8ToUtf32(p.first);

      if (s.size() != 1) {
        SHERPA_ONNX_LOGE("Error for token %s with id %d", p.first.c_str(),
                         p.second);
        SHERPA_ONNX_EXIT(-1);
      }

      char32_t c = s[0];
      phoneme2id_.insert({c, p.second});
    }
  }

  void InitLexicon(const std::string &lexicon) {
    if (lexicon.empty()) {
      return;
    }

    std::vector<std::string> files;
    SplitStringToVector(lexicon, ",", false, &files);
    for (const auto &f : files) {
      std::ifstream is(f);
      InitLexicon(is);
    }
  }

  template <typename Manager>
  void InitLexicon(Manager *mgr, const std::string &lexicon) {
    if (lexicon.empty()) {
      return;
    }

    std::vector<std::string> files;
    SplitStringToVector(lexicon, ",", false, &files);
    for (const auto &f : files) {
      auto buf = ReadFile(mgr, f);

      std::istringstream is(std::string(buf.data(), buf.size()));
      InitLexicon(is);
    }
  }

  void InitLexicon(std::istream &is) {
    std::string word;
    std::vector<std::string> token_list;
    std::string token;

    std::string line;
    int32_t line_num = 0;
    int32_t num_warn = 0;
    while (std::getline(is, line)) {
      ++line_num;
      std::istringstream iss(line);

      token_list.clear();
      iss >> word;
      ToLowerCase(&word);

      if (word2ids_.count(word)) {
        num_warn += 1;
        if (num_warn < 10) {
          SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
                           word.c_str(), line_num, line.c_str());
        }
        continue;
      }

      while (iss >> token) {
        token_list.push_back(std::move(token));
      }

      std::vector<int32_t> ids = ConvertTokensToIds(token2id_, token_list);

      if (ids.empty() && word != "呣") {
        SHERPA_ONNX_LOGE(
            "Invalid pronunciation for word '%s' at line %d:%s. Ignore it",
            word.c_str(), line_num, line.c_str());
        continue;
      }

      word2ids_.insert({std::move(word), std::move(ids)});
    }

    for (const auto &[key, _] : word2ids_) {
      all_words_.insert(key);
    }
  }

 private:
  OfflineTtsKokoroModelMetaData meta_data_;

  // word to token IDs
  std::unordered_map<std::string, std::vector<int32_t>> word2ids_;
  std::unordered_set<std::string> all_words_;

  // tokens.txt is saved in token2id_
  std::unordered_map<std::string, int32_t> token2id_;
  std::unordered_map<int32_t, std::string> id2token_;

  std::unordered_map<char32_t, int32_t> phoneme2id_;

  bool debug_ = false;
};

KokoroMultiLangLexicon::~KokoroMultiLangLexicon() = default;

KokoroMultiLangLexicon::KokoroMultiLangLexicon(
    const std::string &tokens, const std::string &lexicon,
    const std::string &data_dir, const OfflineTtsKokoroModelMetaData &meta_data,
    bool debug)
    : impl_(std::make_unique<Impl>(tokens, lexicon, data_dir, meta_data,
                                   debug)) {}  // NOLINT

template <typename Manager>
KokoroMultiLangLexicon::KokoroMultiLangLexicon(
    Manager *mgr, const std::string &tokens, const std::string &lexicon,
    const std::string &data_dir, const OfflineTtsKokoroModelMetaData &meta_data,
    bool debug)
    : impl_(std::make_unique<Impl>(mgr, tokens, lexicon, data_dir, meta_data,
                                   debug)) {}  // NOLINT

std::vector<TokenIDs> KokoroMultiLangLexicon::ConvertTextToTokenIds(
    const std::string &text, const std::string &voice /*= ""*/) const {
  return impl_->ConvertTextToTokenIds(text, voice);
}

#if __ANDROID_API__ >= 9
template KokoroMultiLangLexicon::KokoroMultiLangLexicon(
    AAssetManager *mgr, const std::string &tokens, const std::string &lexicon,
    const std::string &data_dir, const OfflineTtsKokoroModelMetaData &meta_data,
    bool debug);
#endif

#if __OHOS__
template KokoroMultiLangLexicon::KokoroMultiLangLexicon(
    NativeResourceManager *mgr, const std::string &tokens,
    const std::string &lexicon, const std::string &data_dir,
    const OfflineTtsKokoroModelMetaData &meta_data, bool debug);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h
================================================
// sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_KOKORO_MULTI_LANG_LEXICON_H_
#define SHERPA_ONNX_CSRC_KOKORO_MULTI_LANG_LEXICON_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h"

namespace sherpa_onnx {

class KokoroMultiLangLexicon : public OfflineTtsFrontend {
 public:
  ~KokoroMultiLangLexicon() override;

  KokoroMultiLangLexicon(const std::string &tokens, const std::string &lexicon,
                         const std::string &data_dir,
                         const OfflineTtsKokoroModelMetaData &meta_data,
                         bool debug);

  template <typename Manager>
  KokoroMultiLangLexicon(Manager *mgr, const std::string &tokens,
                         const std::string &lexicon,
                         const std::string &data_dir,
                         const OfflineTtsKokoroModelMetaData &meta_data,
                         bool debug);

  std::vector<TokenIDs> ConvertTextToTokenIds(
      const std::string &text, const std::string &voice = "") const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_KOKORO_MULTI_LANG_LEXICON_H_


================================================
FILE: sherpa-onnx/csrc/lexicon.cc
================================================
// sherpa-onnx/csrc/lexicon.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/lexicon.h"

#include <algorithm>
#include <cctype>
#include <fstream>
#include <iomanip>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

static std::vector<std::string> ProcessHeteronyms(
    const std::vector<std::string> &words) {
  std::vector<std::string> ans;
  ans.reserve(words.size());

  int32_t num_words = static_cast<int32_t>(words.size());
  int32_t i = 0;
  int32_t prev = -1;
  while (i < num_words) {
    // start of a phrase #$|
    if ((i + 2 < num_words) && words[i] == "#" && words[i + 1] == "$" &&
        words[i + 2] == "|") {
      if (prev == -1) {
        prev = i + 3;
      }
      i = i + 3;
      continue;
    }

    // end of a phrase |$#
    if ((i + 2 < num_words) && words[i] == "|" && words[i + 1] == "$" &&
        words[i + 2] == "#") {
      if (prev != -1) {
        std::ostringstream os;
        for (int32_t k = prev; k < i; ++k) {
          if (words[k] != "|" && words[k] != "$" && words[k] != "#") {
            os << words[k];
          }
        }
        ans.push_back(os.str());

        prev = -1;
      }

      i += 3;
      continue;
    }

    if (prev == -1) {
      // not inside a phrase
      ans.push_back(words[i]);
    }

    ++i;
  }

  return ans;
}

std::vector<int32_t> ConvertTokensToIds(
    const std::unordered_map<std::string, int32_t> &token2id,
    const std::vector<std::string> &tokens) {
  std::vector<int32_t> ids;
  ids.reserve(tokens.size());
  for (const auto &s : tokens) {
    if (!token2id.count(s)) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Unknown token: %{public}s", s.c_str());
#else
      SHERPA_ONNX_LOGE("Unknown token: %s", s.c_str());
#endif
      return {};
    }
    int32_t id = token2id.at(s);
    ids.push_back(id);
  }

  return ids;
}

Lexicon::Lexicon(const std::string &lexicon, const std::string &tokens,
                 const std::string &punctuations, const std::string &language,
                 bool debug /*= false*/)
    : debug_(debug) {
  InitLanguage(language);

  {
    std::ifstream is(tokens);
    InitTokens(is);
  }

  {
    std::ifstream is(lexicon);
    InitLexicon(is);
  }

  InitPunctuations(punctuations);
}

template <typename Manager>
Lexicon::Lexicon(Manager *mgr, const std::string &lexicon,
                 const std::string &tokens, const std::string &punctuations,
                 const std::string &language, bool debug /*= false*/
                 )
    : debug_(debug) {
  InitLanguage(language);

  {
    auto buf = ReadFile(mgr, tokens);
    std::istringstream is(std::string(buf.data(), buf.size()));
    InitTokens(is);
  }

  {
    auto buf = ReadFile(mgr, lexicon);
    std::istringstream is(std::string(buf.data(), buf.size()));
    InitLexicon(is);
  }

  InitPunctuations(punctuations);
}

std::vector<TokenIDs> Lexicon::ConvertTextToTokenIds(
    const std::string &text, const std::string & /*voice*/ /*= ""*/) const {
  switch (language_) {
    case Language::kChinese:
      return ConvertTextToTokenIdsChinese(text);
    case Language::kNotChinese:
      return ConvertTextToTokenIdsNotChinese(text);
    default:
      SHERPA_ONNX_LOGE("Unknown language: %d", static_cast<int32_t>(language_));
      SHERPA_ONNX_EXIT(-1);
  }

  return {};
}

std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsChinese(
    const std::string &_text) const {
  std::string text(_text);
  ToLowerCase(&text);

  std::vector<std::string> words = SplitUtf8(text);
  words = ProcessHeteronyms(words);

  if (debug_) {
    std::ostringstream os;

    os << "Input text in string: " << text << "\n";
    os << "Input text in bytes:";
    for (uint8_t c : text) {
      os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
         << static_cast<int32_t>(c);
    }
    os << "\n";
    os << "After splitting to words:";
    for (const auto &w : words) {
      os << " " << w;
    }
    os << "\n";

#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  std::vector<TokenIDs> ans;
  std::vector<int64_t> this_sentence;

  int32_t sil = -1;
  int32_t eos = -1;
  if (token2id_.count("sil")) {
    sil = token2id_.at("sil");
    eos = token2id_.at("eos");
  }

  int32_t pad = -1;
  if (token2id_.count("#0")) {
    pad = token2id_.at("#0");
  }

  if (sil != -1) {
    this_sentence.push_back(sil);
  }

  for (const auto &w : words) {
    if (w == "." || w == ";" || w == "!" || w == "?" || w == "-" || w == ":" ||
        w == "。" || w == "；" || w == "！" || w == "？" || w == "：" ||
        w == "”" ||
        // not sentence break
        w == "," || w == "“" || w == "，" || w == "、") {
      if (punctuations_.count(w)) {
        if (token2id_.count(w)) {
          this_sentence.push_back(token2id_.at(w));
        } else if (pad != -1) {
          this_sentence.push_back(pad);
        } else if (sil != -1) {
          this_sentence.push_back(sil);
        }
      }

      if (w != "," && w != "“" && w != "，" && w != "、") {
        if (eos != -1) {
          this_sentence.push_back(eos);
        }
        ans.emplace_back(std::move(this_sentence));
        this_sentence = {};

        if (sil != -1) {
          this_sentence.push_back(sil);
        }
      }
      continue;
    }

    if (!word2ids_.count(w)) {
      SHERPA_ONNX_LOGE("OOV %s. Ignore it!", w.c_str());
      continue;
    }

    const auto &token_ids = word2ids_.at(w);
    this_sentence.insert(this_sentence.end(), token_ids.begin(),
                         token_ids.end());
  }

  if (sil != -1) {
    this_sentence.push_back(sil);
  }

  if (eos != -1) {
    this_sentence.push_back(eos);
  }

  if (!this_sentence.empty()) {
    ans.emplace_back(std::move(this_sentence));
  }

  return ans;
}

std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsNotChinese(
    const std::string &_text) const {
  std::string text(_text);
  ToLowerCase(&text);

  std::vector<std::string> words = SplitUtf8(text);

  if (debug_) {
    std::ostringstream os;

    os << "Input text (lowercase) in string: " << text << "\n";
    os << "Input text in bytes:";
    for (uint8_t c : text) {
      os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
         << static_cast<int32_t>(c);
    }
    os << "\n";
    os << "After splitting to words:";
    for (const auto &w : words) {
      os << " " << w;
    }
    os << "\n";

#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  int32_t blank = token2id_.at(" ");

  std::vector<TokenIDs> ans;
  std::vector<int64_t> this_sentence;

  for (const auto &w : words) {
    if (w == "." || w == ";" || w == "!" || w == "?" || w == "-" || w == ":" ||
        // not sentence break
        w == ",") {
      if (punctuations_.count(w)) {
        this_sentence.push_back(token2id_.at(w));
      }

      if (w != ",") {
        this_sentence.push_back(blank);
        ans.emplace_back(std::move(this_sentence));
        this_sentence = {};
      }

      continue;
    }

    if (!word2ids_.count(w)) {
      SHERPA_ONNX_LOGE("OOV %s. Ignore it!", w.c_str());
      continue;
    }

    const auto &token_ids = word2ids_.at(w);
    this_sentence.insert(this_sentence.end(), token_ids.begin(),
                         token_ids.end());
    this_sentence.push_back(blank);
  }

  if (!this_sentence.empty()) {
    // remove the last blank
    this_sentence.resize(this_sentence.size() - 1);
  }

  if (!this_sentence.empty()) {
    ans.emplace_back(std::move(this_sentence));
  }

  return ans;
}

void Lexicon::InitTokens(std::istream &is) { token2id_ = ReadTokens(is); }

void Lexicon::InitLanguage(const std::string &_lang) {
  std::string lang(_lang);
  ToLowerCase(&lang);
  if (lang == "chinese") {
    language_ = Language::kChinese;
  } else if (!lang.empty()) {
    language_ = Language::kNotChinese;
  } else {
#if __OHOS__
    SHERPA_ONNX_LOGE("Unknown language: %{public}s", _lang.c_str());
#else
    SHERPA_ONNX_LOGE("Unknown language: %s", _lang.c_str());
#endif
    SHERPA_ONNX_EXIT(-1);
  }
}

void Lexicon::InitLexicon(std::istream &is) {
  std::string word;
  std::vector<std::string> token_list;
  std::string line;
  std::string phone;

  while (std::getline(is, line)) {
    std::istringstream iss(line);

    token_list.clear();

    iss >> word;
    ToLowerCase(&word);

    if (word2ids_.count(word)) {
      SHERPA_ONNX_LOGE("Duplicated word: %s. Ignore it.", word.c_str());
      continue;
    }

    while (iss >> phone) {
      token_list.push_back(std::move(phone));
    }

    std::vector<int32_t> ids = ConvertTokensToIds(token2id_, token_list);
    if (ids.empty()) {
      continue;
    }

    word2ids_.insert({std::move(word), std::move(ids)});
  }
}

void Lexicon::InitPunctuations(const std::string &punctuations) {
  std::vector<std::string> punctuation_list;
  SplitStringToVector(punctuations, " ", false, &punctuation_list);
  for (auto &s : punctuation_list) {
    punctuations_.insert(std::move(s));
  }
}

#if __ANDROID_API__ >= 9
template Lexicon::Lexicon(AAssetManager *mgr, const std::string &lexicon,
                          const std::string &tokens,
                          const std::string &punctuations,
                          const std::string &language, bool debug = false);
#endif

#if __OHOS__
template Lexicon::Lexicon(NativeResourceManager *mgr,
                          const std::string &lexicon, const std::string &tokens,
                          const std::string &punctuations,
                          const std::string &language, bool debug = false);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/lexicon.h
================================================
// sherpa-onnx/csrc/lexicon.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_LEXICON_H_
#define SHERPA_ONNX_CSRC_LEXICON_H_

#include <cstdint>
#include <istream>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts-frontend.h"

namespace sherpa_onnx {

class Lexicon : public OfflineTtsFrontend {
 public:
  Lexicon() = default;  // for subclasses
                        //
  // Note: for models from piper, we won't use this class.
  Lexicon(const std::string &lexicon, const std::string &tokens,
          const std::string &punctuations, const std::string &language,
          bool debug = false);

  template <typename Manager>
  Lexicon(Manager *mgr, const std::string &lexicon, const std::string &tokens,
          const std::string &punctuations, const std::string &language,
          bool debug = false);

  std::vector<TokenIDs> ConvertTextToTokenIds(
      const std::string &text, const std::string &voice = "") const override;

 private:
  std::vector<TokenIDs> ConvertTextToTokenIdsNotChinese(
      const std::string &text) const;

  std::vector<TokenIDs> ConvertTextToTokenIdsChinese(
      const std::string &text) const;

  void InitLanguage(const std::string &lang);
  void InitTokens(std::istream &is);
  void InitLexicon(std::istream &is);
  void InitPunctuations(const std::string &punctuations);

 private:
  enum class Language {
    kNotChinese,
    kChinese,
    kUnknown,
  };

 private:
  std::unordered_map<std::string, std::vector<int32_t>> word2ids_;
  std::unordered_set<std::string> punctuations_;
  std::unordered_map<std::string, int32_t> token2id_;
  Language language_ = Language::kUnknown;
  bool debug_ = false;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_LEXICON_H_


================================================
FILE: sherpa-onnx/csrc/lodr-fst.cc
================================================
// sherpa-onnx/csrc/lodr-fst.cc
//
// Contains code copied from icefall/utils/ngram_lm.py
// Copyright (c)  2023 Xiaomi Corporation
//
// Copyright (c)  2025 Tilde SIA (Askars Salimbajevs)

#include "sherpa-onnx/csrc/lodr-fst.h"

#include <algorithm>
#include <limits>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

int32_t LodrFst::FindBackoffId() {
  // assume that the backoff id is the only input label with epsilon output

  for (int32_t state = 0; state < fst_->NumStates(); ++state) {
    fst::ArcIterator<fst::StdConstFst> arc_iter(*fst_, state);
    for (; !arc_iter.Done(); arc_iter.Next()) {
      const auto &arc = arc_iter.Value();
      if (arc.olabel == 0) {  // Check if the output label is epsilon (0)
        return arc.ilabel;    // Return the input label
      }
    }
  }

  return -1;  // Return -1 if no such input symbol is found
}

LodrFst::LodrFst(const std::string &fst_path, int32_t backoff_id)
    : backoff_id_(backoff_id) {
  fst_ = std::unique_ptr<fst::StdConstFst>(
      CastOrConvertToConstFst(fst::StdVectorFst::Read(fst_path)));

  if (backoff_id < 0) {
    // backoff_id_ is not provided, find it automatically
    backoff_id_ = FindBackoffId();
    if (backoff_id_ < 0) {
      std::string err_msg = "Failed to initialize LODR: No backoff arc found";
      SHERPA_ONNX_LOGE("%s", err_msg.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }
}

std::vector<std::tuple<int32_t, float>> LodrFst::ProcessBackoffArcs(
    int32_t state, float cost) {
  std::vector<std::tuple<int32_t, float>> ans;
  auto next = GetNextStatesCostsNoBackoff(state, backoff_id_);
  if (!next.has_value()) {
    return ans;
  }
  auto [next_state, next_cost] = next.value();
  ans.emplace_back(next_state, next_cost + cost);
  auto recursive_result = ProcessBackoffArcs(next_state, next_cost + cost);
  ans.insert(ans.end(), recursive_result.begin(), recursive_result.end());
  return ans;
}

std::optional<std::tuple<int32_t, float>> LodrFst::GetNextStatesCostsNoBackoff(
    int32_t state, int32_t label) {
  fst::ArcIterator<fst::StdConstFst> arc_iter(*fst_, state);
  int32_t num_arcs = fst_->NumArcs(state);

  int32_t left = 0, right = num_arcs - 1;
  while (left <= right) {
    int32_t mid = (left + right) / 2;
    arc_iter.Seek(mid);
    auto arc = arc_iter.Value();
    if (arc.ilabel < label) {
      left = mid + 1;
    } else if (arc.ilabel > label) {
      right = mid - 1;
    } else {
      return std::make_tuple(arc.nextstate, arc.weight.Value());
    }
  }
  return std::nullopt;
}

std::pair<std::vector<int32_t>, std::vector<float>> LodrFst::GetNextStateCosts(
    int32_t state, int32_t label) {
  std::vector<int32_t> states = {state};
  std::vector<float> costs = {0};

  auto extra_states_costs = ProcessBackoffArcs(state, 0);
  for (const auto &[s, c] : extra_states_costs) {
    states.push_back(s);
    costs.push_back(c);
  }

  std::vector<int32_t> next_states;
  std::vector<float> next_costs;
  for (size_t i = 0; i < states.size(); ++i) {
    auto next = GetNextStatesCostsNoBackoff(states[i], label);
    if (next.has_value()) {
      auto [ns, nc] = next.value();
      next_states.push_back(ns);
      next_costs.push_back(costs[i] + nc);
    }
  }

  return std::make_pair(next_states, next_costs);
}

void LodrFst::ComputeScore(float scale, Hypothesis *hyp, int32_t offset) {
  if (scale == 0) {
    return;
  }

  hyp->lodr_state = std::make_unique<LodrStateCost>(this);

  // Walk through the FST with the input text from the hypothesis
  for (size_t i = offset; i < hyp->ys.size(); ++i) {
    *hyp->lodr_state = hyp->lodr_state->ForwardOneStep(hyp->ys[i]);
  }

  float lodr_score = hyp->lodr_state->FinalScore();

  if (lodr_score == -std::numeric_limits<float>::infinity()) {
    SHERPA_ONNX_LOGE("Failed to compute LODR. Empty or mismatched FST?");
    return;
  }

  // Update the hyp score
  hyp->log_prob += scale * lodr_score;
}

float LodrFst::GetFinalCost(int32_t state) {
  auto final_weight = fst_->Final(state);
  if (final_weight == fst::StdArc::Weight::Zero()) {
    return 0.0;
  }
  return final_weight.Value();
}

LodrStateCost::LodrStateCost(
    LodrFst *fst, const std::unordered_map<int32_t, float> &state_cost)
    : fst_(fst) {
  if (state_cost.empty()) {
    state_cost_[0] = 0.0;
  } else {
    state_cost_ = state_cost;
  }
}

LodrStateCost LodrStateCost::ForwardOneStep(int32_t label) {
  std::unordered_map<int32_t, float> state_cost;
  for (const auto &[s, c] : state_cost_) {
    auto [next_states, next_costs] = fst_->GetNextStateCosts(s, label);
    for (size_t i = 0; i < next_states.size(); ++i) {
      int32_t ns = next_states[i];
      float nc = next_costs[i];
      if (state_cost.find(ns) == state_cost.end()) {
        state_cost[ns] = std::numeric_limits<float>::infinity();
      }
      state_cost[ns] = std::min(state_cost[ns], c + nc);
    }
  }
  return LodrStateCost(fst_, state_cost);
}

float LodrStateCost::Score() const {
  if (state_cost_.empty()) {
    return -std::numeric_limits<float>::infinity();
  }
  auto min_cost = std::min_element(
      state_cost_.begin(), state_cost_.end(),
      [](const auto &a, const auto &b) { return a.second < b.second; });
  return -min_cost->second;
}

float LodrStateCost::FinalScore() const {
  if (state_cost_.empty()) {
    return -std::numeric_limits<float>::infinity();
  }
  auto min_cost = std::min_element(
      state_cost_.begin(), state_cost_.end(),
      [](const auto &a, const auto &b) { return a.second < b.second; });
  return -(min_cost->second + fst_->GetFinalCost(min_cost->first));
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/lodr-fst.h
================================================
// sherpa-onnx/csrc/lodr-fst.h
//
// Contains code copied from icefall/utils/ngram_lm.py
// Copyright (c)  2023 Xiaomi Corporation
//
// Copyright (c)  2025 Tilde SIA (Askars Salimbajevs)


#ifndef SHERPA_ONNX_CSRC_LODR_FST_H_
#define SHERPA_ONNX_CSRC_LODR_FST_H_

#include <memory>
#include <string>
#include <vector>
#include <optional>
#include <tuple>
#include <unordered_map>
#include <limits>
#include <algorithm>
#include <utility>

#include "kaldifst/csrc/kaldi-fst-io.h"

namespace sherpa_onnx {

struct Hypothesis;

class LodrFst {
 public:
  explicit LodrFst(const std::string &fst_path, int32_t backoff_id = -1);

  std::pair<std::vector<int32_t>, std::vector<float>> GetNextStateCosts(
    int32_t state, int32_t label);

  float GetFinalCost(int32_t state);

  void ComputeScore(float scale, Hypothesis *hyp, int32_t offset);

 private:
  fst::StdVectorFst YsToFst(const std::vector<int64_t> &ys, int32_t offset);

  std::vector<std::tuple<int32_t, float>> ProcessBackoffArcs(
    int32_t state, float cost);

  std::optional<std::tuple<int32_t, float>> GetNextStatesCostsNoBackoff(
    int32_t state, int32_t label);

  int32_t FindBackoffId();


  int32_t backoff_id_ = -1;
  std::unique_ptr<fst::StdConstFst> fst_;  // owned by this class
};

class LodrStateCost {
 public:
  explicit LodrStateCost(
    LodrFst* fst,
    const std::unordered_map<int32_t, float> &state_cost = {});

    LodrStateCost ForwardOneStep(int32_t label);

  float Score() const;
  float FinalScore() const;

 private:
  // The fst_ is not owned by this class and borrowed from the caller
  // (e.g. OnlineRnnLM).
  LodrFst* fst_;
  std::unordered_map<int32_t, float> state_cost_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_LODR_FST_H_


================================================
FILE: sherpa-onnx/csrc/log.cc
================================================
// sherpa-onnx/csrc/log.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/log.h"

#ifdef SHERPA_ONNX_HAVE_EXECINFO_H
#include <execinfo.h>  // To get stack trace in error messages.
#ifdef SHERPA_ONNX_HAVE_CXXABI_H
#include <cxxabi.h>  // For name demangling.
// Useful to decode the stack trace, but only used if we have execinfo.h
#endif  // SHERPA_ONNX_HAVE_CXXABI_H
#endif  // SHERPA_ONNX_HAVE_EXECINFO_H

#include <stdlib.h>

#include <ctime>
#include <iomanip>
#include <string>

namespace sherpa_onnx {

std::string GetDateTimeStr() {
  std::ostringstream os;
  std::time_t t = std::time(nullptr);
  std::tm tm = *std::localtime(&t);
  os << std::put_time(&tm, "%F %T");  // yyyy-mm-dd hh:mm:ss
  return os.str();
}

static bool LocateSymbolRange(const std::string &trace_name, std::size_t *begin,
                              std::size_t *end) {
  // Find the first '_' with leading ' ' or '('.
  *begin = std::string::npos;
  for (std::size_t i = 1; i < trace_name.size(); ++i) {
    if (trace_name[i] != '_') {
      continue;
    }
    if (trace_name[i - 1] == ' ' || trace_name[i - 1] == '(') {
      *begin = i;
      break;
    }
  }
  if (*begin == std::string::npos) {
    return false;
  }
  *end = trace_name.find_first_of(" +", *begin);
  return *end != std::string::npos;
}

#ifdef SHERPA_ONNX_HAVE_EXECINFO_H
static std::string Demangle(const std::string &trace_name) {
#ifndef SHERPA_ONNX_HAVE_CXXABI_H
  return trace_name;
#else   // SHERPA_ONNX_HAVE_CXXABI_H
  // Try demangle the symbol. We are trying to support the following formats
  // produced by different platforms:
  //
  // Linux:
  //   ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
  //
  // Mac:
  //   0 server 0x000000010f67614d _ZNK5kaldi13MessageLogger10LogMessageEv + 813
  //
  // We want to extract the name e.g., '_ZN5kaldi13UnitTestErrorEv' and
  // demangle it info a readable name like kaldi::UnitTextError.
  std::size_t begin, end;
  if (!LocateSymbolRange(trace_name, &begin, &end)) {
    return trace_name;
  }
  std::string symbol = trace_name.substr(begin, end - begin);
  int status;
  char *demangled_name = abi::__cxa_demangle(symbol.c_str(), 0, 0, &status);
  if (status == 0 && demangled_name != nullptr) {
    symbol = demangled_name;
    free(demangled_name);
  }
  return trace_name.substr(0, begin) + symbol +
         trace_name.substr(end, std::string::npos);
#endif  // SHERPA_ONNX_HAVE_CXXABI_H
}
#endif  // SHERPA_ONNX_HAVE_EXECINFO_H

std::string GetStackTrace() {
  std::string ans;
#ifdef SHERPA_ONNX_HAVE_EXECINFO_H
  constexpr const std::size_t kMaxTraceSize = 50;
  constexpr const std::size_t kMaxTracePrint = 50;  // Must be even.
                                                    // Buffer for the trace.
  void *trace[kMaxTraceSize];
  // Get the trace.
  std::size_t size = backtrace(trace, kMaxTraceSize);
  // Get the trace symbols.
  char **trace_symbol = backtrace_symbols(trace, size);
  if (trace_symbol == nullptr) return ans;

  // Compose a human-readable backtrace string.
  ans += "[ Stack-Trace: ]\n";
  if (size <= kMaxTracePrint) {
    for (std::size_t i = 0; i < size; ++i) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
  } else {  // Print out first+last (e.g.) 5.
    for (std::size_t i = 0; i < kMaxTracePrint / 2; ++i) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
    ans += ".\n.\n.\n";
    for (std::size_t i = size - kMaxTracePrint / 2; i < size; ++i) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
    if (size == kMaxTraceSize)
      ans += ".\n.\n.\n";  // Stack was too long, probably a bug.
  }

  // We must free the array of pointers allocated by backtrace_symbols(),
  // but not the strings themselves.
  free(trace_symbol);
#endif  // SHERPA_ONNX_HAVE_EXECINFO_H
  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/log.h
================================================
// sherpa-onnx/csrc/log.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_LOG_H_
#define SHERPA_ONNX_CSRC_LOG_H_

#include <stdio.h>

#include <mutex>  // NOLINT
#include <sstream>
#include <string>

namespace sherpa_onnx {

#if SHERPA_ONNX_ENABLE_CHECK

#if defined(NDEBUG)
constexpr bool kDisableDebug = true;
#else
constexpr bool kDisableDebug = false;
#endif

enum class LogLevel {
  kTrace = 0,
  kDebug = 1,
  kInfo = 2,
  kWarning = 3,
  kError = 4,
  kFatal = 5,  // print message and abort the program
};

// They are used in SHERPA_ONNX_LOG(xxx), so their names
// do not follow the google c++ code style
//
// You can use them in the following way:
//
//  SHERPA_ONNX_LOG(TRACE) << "some message";
//  SHERPA_ONNX_LOG(DEBUG) << "some message";
#ifndef _MSC_VER
constexpr LogLevel TRACE = LogLevel::kTrace;
constexpr LogLevel DEBUG = LogLevel::kDebug;
constexpr LogLevel INFO = LogLevel::kInfo;
constexpr LogLevel WARNING = LogLevel::kWarning;
constexpr LogLevel ERROR = LogLevel::kError;
constexpr LogLevel FATAL = LogLevel::kFatal;
#else
#define TRACE LogLevel::kTrace
#define DEBUG LogLevel::kDebug
#define INFO LogLevel::kInfo
#define WARNING LogLevel::kWarning
#define ERROR LogLevel::kError
#define FATAL LogLevel::kFatal
#endif

std::string GetStackTrace();

/* Return the current log level.


   If the current log level is TRACE, then all logged messages are printed out.

   If the current log level is DEBUG, log messages with "TRACE" level are not
   shown and all other levels are printed out.

   Similarly, if the current log level is INFO, log message with "TRACE" and
   "DEBUG" are not shown and all other levels are printed out.

   If it is FATAL, then only FATAL messages are shown.
 */
inline LogLevel GetCurrentLogLevel() {
  static LogLevel log_level = INFO;
  static std::once_flag init_flag;
  std::call_once(init_flag, []() {
    const char *env_log_level = std::getenv("SHERPA_ONNX_LOG_LEVEL");
    if (env_log_level == nullptr) return;

    std::string s = env_log_level;
    if (s == "TRACE")
      log_level = TRACE;
    else if (s == "DEBUG")
      log_level = DEBUG;
    else if (s == "INFO")
      log_level = INFO;
    else if (s == "WARNING")
      log_level = WARNING;
    else if (s == "ERROR")
      log_level = ERROR;
    else if (s == "FATAL")
      log_level = FATAL;
    else
      fprintf(stderr,
              "Unknown SHERPA_ONNX_LOG_LEVEL: %s"
              "\nSupported values are: "
              "TRACE, DEBUG, INFO, WARNING, ERROR, FATAL",
              s.c_str());
  });
  return log_level;
}

inline bool EnableAbort() {
  static std::once_flag init_flag;
  static bool enable_abort = false;
  std::call_once(init_flag, []() {
    enable_abort = (std::getenv("SHERPA_ONNX_ABORT") != nullptr);
  });
  return enable_abort;
}

class Logger {
 public:
  Logger(const char *filename, const char *func_name, uint32_t line_num,
         LogLevel level)
      : filename_(filename),
        func_name_(func_name),
        line_num_(line_num),
        level_(level) {
    cur_level_ = GetCurrentLogLevel();
    switch (level) {
      case TRACE:
        if (cur_level_ <= TRACE) fprintf(stderr, "[T] ");
        break;
      case DEBUG:
        if (cur_level_ <= DEBUG) fprintf(stderr, "[D] ");
        break;
      case INFO:
        if (cur_level_ <= INFO) fprintf(stderr, "[I] ");
        break;
      case WARNING:
        if (cur_level_ <= WARNING) fprintf(stderr, "[W] ");
        break;
      case ERROR:
        if (cur_level_ <= ERROR) fprintf(stderr, "[E] ");
        break;
      case FATAL:
        if (cur_level_ <= FATAL) fprintf(stderr, "[F] ");
        break;
    }

    if (cur_level_ <= level_) {
      fprintf(stderr, "%s:%u:%s ", filename, line_num, func_name);
    }
  }

  ~Logger() noexcept(false) {
    static constexpr const char *kErrMsg = R"(
    Some bad things happened. Please read the above error messages and stack
    trace. If you are using Python, the following command may be helpful:

      gdb --args python /path/to/your/code.py

    (You can use `gdb` to debug the code. Please consider compiling
    a debug version of sherpa_onnx.).

    If you are unable to fix it, please open an issue at:

      https://github.com/csukuangfj/kaldi-native-fbank/issues/new
    )";
    if (level_ == FATAL) {
      fprintf(stderr, "\n");
      std::string stack_trace = GetStackTrace();
      if (!stack_trace.empty()) {
        fprintf(stderr, "\n\n%s\n", stack_trace.c_str());
      }

      fflush(nullptr);

#ifndef __ANDROID_API__
      if (EnableAbort()) {
        // NOTE: abort() will terminate the program immediately without
        // printing the Python stack backtrace.
        abort();
      }

      throw std::runtime_error(kErrMsg);
#else
      abort();
#endif
    }
  }

  const Logger &operator<<(bool b) const {
    if (cur_level_ <= level_) {
      fprintf(stderr, b ? "true" : "false");
    }
    return *this;
  }

  const Logger &operator<<(int8_t i) const {
    if (cur_level_ <= level_) fprintf(stderr, "%d", i);
    return *this;
  }

  const Logger &operator<<(const char *s) const {
    if (cur_level_ <= level_) fprintf(stderr, "%s", s);
    return *this;
  }

  const Logger &operator<<(int32_t i) const {
    if (cur_level_ <= level_) fprintf(stderr, "%d", i);
    return *this;
  }

  const Logger &operator<<(uint32_t i) const {
    if (cur_level_ <= level_) fprintf(stderr, "%u", i);
    return *this;
  }

  const Logger &operator<<(uint64_t i) const {
    if (cur_level_ <= level_)
      fprintf(stderr, "%llu", (long long unsigned int)i);  // NOLINT
    return *this;
  }

  const Logger &operator<<(int64_t i) const {
    if (cur_level_ <= level_)
      fprintf(stderr, "%lli", (long long int)i);  // NOLINT
    return *this;
  }

  const Logger &operator<<(float f) const {
    if (cur_level_ <= level_) fprintf(stderr, "%f", f);
    return *this;
  }

  const Logger &operator<<(double d) const {
    if (cur_level_ <= level_) fprintf(stderr, "%f", d);
    return *this;
  }

  template <typename T>
  const Logger &operator<<(const T &t) const {
    // require T overloads operator<<
    std::ostringstream os;
    os << t;
    return *this << os.str().c_str();
  }

  // specialization to fix compile error: `stringstream << nullptr` is ambiguous
  const Logger &operator<<(const std::nullptr_t &null) const {
    if (cur_level_ <= level_) *this << "(null)";
    return *this;
  }

 private:
  const char *filename_;
  const char *func_name_;
  uint32_t line_num_;
  LogLevel level_;
  LogLevel cur_level_;
};
#endif  // SHERPA_ONNX_ENABLE_CHECK

class Voidifier {
 public:
#if SHERPA_ONNX_ENABLE_CHECK
  void operator&(const Logger &) const {}
#endif
};
#if !defined(SHERPA_ONNX_ENABLE_CHECK)
template <typename T>
const Voidifier &operator<<(const Voidifier &v, T &&) {
  return v;
}
#endif

}  // namespace sherpa_onnx

#define SHERPA_ONNX_STATIC_ASSERT(x) static_assert(x, "")

#ifdef SHERPA_ONNX_ENABLE_CHECK

#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \
    defined(__PRETTY_FUNCTION__)
// for clang and GCC
#define SHERPA_ONNX_FUNC __PRETTY_FUNCTION__
#else
// for other compilers
#define SHERPA_ONNX_FUNC __func__
#endif

#define SHERPA_ONNX_CHECK(x)                                            \
  (x) ? (void)0                                                         \
      : ::sherpa_onnx::Voidifier() &                                    \
            ::sherpa_onnx::Logger(__FILE__, SHERPA_ONNX_FUNC, __LINE__, \
                                  ::sherpa_onnx::FATAL)                 \
                << "Check failed: " << #x << " "

// WARNING: x and y may be evaluated multiple times, but this happens only
// when the check fails. Since the program aborts if it fails, we don't think
// the extra evaluation of x and y matters.
//
// CAUTION: we recommend the following use case:
//
//      auto x = Foo();
//      auto y = Bar();
//      SHERPA_ONNX_CHECK_EQ(x, y) << "Some message";
//
//  And please avoid
//
//      SHERPA_ONNX_CHECK_EQ(Foo(), Bar());
//
//  if `Foo()` or `Bar()` causes some side effects, e.g., changing some
//  local static variables or global variables.
#define _SHERPA_ONNX_CHECK_OP(x, y, op)                                        \
  ((x)op(y)) ? (void)0                                                         \
             : ::sherpa_onnx::Voidifier() &                                    \
                   ::sherpa_onnx::Logger(__FILE__, SHERPA_ONNX_FUNC, __LINE__, \
                                         ::sherpa_onnx::FATAL)                 \
                       << "Check failed: " << #x << " " << #op << " " << #y    \
                       << " (" << (x) << " vs. " << (y) << ") "

#define SHERPA_ONNX_CHECK_EQ(x, y) _SHERPA_ONNX_CHECK_OP(x, y, ==)
#define SHERPA_ONNX_CHECK_NE(x, y) _SHERPA_ONNX_CHECK_OP(x, y, !=)
#define SHERPA_ONNX_CHECK_LT(x, y) _SHERPA_ONNX_CHECK_OP(x, y, <)
#define SHERPA_ONNX_CHECK_LE(x, y) _SHERPA_ONNX_CHECK_OP(x, y, <=)
#define SHERPA_ONNX_CHECK_GT(x, y) _SHERPA_ONNX_CHECK_OP(x, y, >)
#define SHERPA_ONNX_CHECK_GE(x, y) _SHERPA_ONNX_CHECK_OP(x, y, >=)

#define SHERPA_ONNX_LOG(x) \
  ::sherpa_onnx::Logger(__FILE__, SHERPA_ONNX_FUNC, __LINE__, ::sherpa_onnx::x)

// ------------------------------------------------------------
//       For debug check
// ------------------------------------------------------------
// If you define the macro "-D NDEBUG" while compiling kaldi-native-fbank,
// the following macros are in fact empty and does nothing.

#define SHERPA_ONNX_DCHECK(x) \
  ::sherpa_onnx::kDisableDebug ? (void)0 : SHERPA_ONNX_CHECK(x)

#define SHERPA_ONNX_DCHECK_EQ(x, y) \
  ::sherpa_onnx::kDisableDebug ? (void)0 : SHERPA_ONNX_CHECK_EQ(x, y)

#define SHERPA_ONNX_DCHECK_NE(x, y) \
  ::sherpa_onnx::kDisableDebug ? (void)0 : SHERPA_ONNX_CHECK_NE(x, y)

#define SHERPA_ONNX_DCHECK_LT(x, y) \
  ::sherpa_onnx::kDisableDebug ? (void)0 : SHERPA_ONNX_CHECK_LT(x, y)

#define SHERPA_ONNX_DCHECK_LE(x, y) \
  ::sherpa_onnx::kDisableDebug ? (void)0 : SHERPA_ONNX_CHECK_LE(x, y)

#define SHERPA_ONNX_DCHECK_GT(x, y) \
  ::sherpa_onnx::kDisableDebug ? (void)0 : SHERPA_ONNX_CHECK_GT(x, y)

#define SHERPA_ONNX_DCHECK_GE(x, y) \
  ::sherpa_onnx::kDisableDebug ? (void)0 : SHERPA_ONNX_CHECK_GE(x, y)

#define SHERPA_ONNX_DLOG(x)    \
  ::sherpa_onnx::kDisableDebug \
      ? (void)0                \
      : ::sherpa_onnx::Voidifier() & SHERPA_ONNX_LOG(x)

#else

#define SHERPA_ONNX_CHECK(x) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_LOG(x) ::sherpa_onnx::Voidifier()

#define SHERPA_ONNX_CHECK_EQ(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_CHECK_NE(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_CHECK_LT(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_CHECK_LE(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_CHECK_GT(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_CHECK_GE(x, y) ::sherpa_onnx::Voidifier()

#define SHERPA_ONNX_DCHECK(x) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_DLOG(x) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_DCHECK_EQ(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_DCHECK_NE(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_DCHECK_LT(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_DCHECK_LE(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_DCHECK_GT(x, y) ::sherpa_onnx::Voidifier()
#define SHERPA_ONNX_DCHECK_GE(x, y) ::sherpa_onnx::Voidifier()

#endif  // SHERPA_ONNX_CHECK_NE

#endif  // SHERPA_ONNX_CSRC_LOG_H_


================================================
FILE: sherpa-onnx/csrc/macros.h
================================================
// sherpa-onnx/csrc/macros.h
//
// Copyright      2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_MACROS_H_
#define SHERPA_ONNX_CSRC_MACROS_H_
#include <stdio.h>
#include <stdlib.h>

#include <utility>
#if __OHOS__
#include "hilog/log.h"

#undef LOG_DOMAIN
#undef LOG_TAG

// https://gitee.com/openharmony/docs/blob/145a084f0b742e4325915e32f8184817927d1251/en/contribute/OpenHarmony-Log-guide.md#hilog-api-usage-specifications
#define LOG_DOMAIN 0x6666
#define LOG_TAG "sherpa_onnx"
#endif

#if __ANDROID_API__ >= 8
#include "android/log.h"
#define SHERPA_ONNX_LOGE(...)                                                  \
  do {                                                                         \
    fprintf(stderr, "%s:%s:%d ", __FILE__, __func__,                           \
            static_cast<int32_t>(__LINE__));                                   \
    fprintf(stderr, ##__VA_ARGS__);                                            \
    fprintf(stderr, "\n");                                                     \
    __android_log_print(ANDROID_LOG_WARN, "sherpa-onnx", "%s:%s:%d", __FILE__, \
                        __func__, static_cast<int32_t>(__LINE__));             \
    __android_log_print(ANDROID_LOG_WARN, "sherpa-onnx", ##__VA_ARGS__);       \
  } while (0)
#elif defined(__OHOS__)
#define SHERPA_ONNX_LOGE(...) OH_LOG_INFO(LOG_APP, ##__VA_ARGS__)
#elif SHERPA_ONNX_ENABLE_WASM
#define SHERPA_ONNX_LOGE(...)                        \
  do {                                               \
    fprintf(stdout, "%s:%s:%d ", __FILE__, __func__, \
            static_cast<int>(__LINE__));             \
    fprintf(stdout, ##__VA_ARGS__);                  \
    fprintf(stdout, "\n");                           \
  } while (0)
#else
#define SHERPA_ONNX_LOGE(...)                        \
  do {                                               \
    fprintf(stderr, "%s:%s:%d ", __FILE__, __func__, \
            static_cast<int>(__LINE__));             \
    fprintf(stderr, ##__VA_ARGS__);                  \
    fprintf(stderr, "\n");                           \
  } while (0)
#endif

#define SHERPA_ONNX_EXIT(code) exit(code)

// Read an integer
#define SHERPA_ONNX_READ_META_DATA(dst, src_key)                           \
  do {                                                                     \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
    if (value.empty()) {                                                   \
      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);    \
      SHERPA_ONNX_EXIT(-1);                                                \
    }                                                                      \
                                                                           \
    dst = atoi(value.c_str());                                             \
    if (dst < 0) {                                                         \
      SHERPA_ONNX_LOGE("Invalid value %d for '%s'", dst, src_key);         \
      SHERPA_ONNX_EXIT(-1);                                                \
    }                                                                      \
  } while (0)

#define SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(dst, src_key, default_value) \
  do {                                                                       \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator);   \
    if (value.empty()) {                                                     \
      dst = default_value;                                                   \
    } else {                                                                 \
      dst = atoi(value.c_str());                                             \
      if (dst < 0) {                                                         \
        SHERPA_ONNX_LOGE("Invalid value %d for '%s'", dst, src_key);         \
        SHERPA_ONNX_EXIT(-1);                                                \
      }                                                                      \
    }                                                                        \
  } while (0)

// read a vector of integers
#define SHERPA_ONNX_READ_META_DATA_VEC(dst, src_key)                           \
  do {                                                                         \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator);     \
    if (value.empty()) {                                                       \
      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);        \
      SHERPA_ONNX_EXIT(-1);                                                    \
    }                                                                          \
                                                                               \
    bool ret = SplitStringToIntegers(value.c_str(), ",", true, &dst);          \
    if (!ret) {                                                                \
      SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'", value.c_str(), src_key); \
      SHERPA_ONNX_EXIT(-1);                                                    \
    }                                                                          \
  } while (0)

// read a vector of floats
#define SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(dst, src_key)                     \
  do {                                                                         \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator);     \
    if (value.empty()) {                                                       \
      SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key);          \
      SHERPA_ONNX_EXIT(-1);                                                    \
    }                                                                          \
                                                                               \
    bool ret = SplitStringToFloats(value.c_str(), ",", true, &dst);            \
    if (!ret) {                                                                \
      SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'", value.c_str(), src_key); \
      SHERPA_ONNX_EXIT(-1);                                                    \
    }                                                                          \
  } while (0)

// read a vector of strings
#define SHERPA_ONNX_READ_META_DATA_VEC_STRING(dst, src_key)                \
  do {                                                                     \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
    if (value.empty()) {                                                   \
      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);    \
      SHERPA_ONNX_EXIT(-1);                                                \
    }                                                                      \
    SplitStringToVector(value.c_str(), ",", false, &dst);                  \
                                                                           \
    if (dst.empty()) {                                                     \
      SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'. Empty vector!",       \
                       value.c_str(), src_key);                            \
      SHERPA_ONNX_EXIT(-1);                                                \
    }                                                                      \
  } while (0)

// read a vector of strings separated by sep
#define SHERPA_ONNX_READ_META_DATA_VEC_STRING_SEP(dst, src_key, sep)       \
  do {                                                                     \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
    if (value.empty()) {                                                   \
      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);    \
      SHERPA_ONNX_EXIT(-1);                                                \
    }                                                                      \
    SplitStringToVector(value.c_str(), sep, false, &dst);                  \
                                                                           \
    if (dst.empty()) {                                                     \
      SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'. Empty vector!",       \
                       value.c_str(), src_key);                            \
      SHERPA_ONNX_EXIT(-1);                                                \
    }                                                                      \
  } while (0)

// Read a string
#define SHERPA_ONNX_READ_META_DATA_STR(dst, src_key)                       \
  do {                                                                     \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
    if (value.empty()) {                                                   \
      SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key);    \
      SHERPA_ONNX_EXIT(-1);                                                \
    }                                                                      \
                                                                           \
    dst = std::move(value);                                                \
    if (dst.empty()) {                                                     \
      SHERPA_ONNX_LOGE("Invalid value for '%s'\n", src_key);               \
      SHERPA_ONNX_EXIT(-1);                                                \
    }                                                                      \
  } while (0)

#define SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(dst, src_key)           \
  do {                                                                     \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
                                                                           \
    dst = std::move(value);                                                \
  } while (0)

#define SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(dst, src_key,          \
                                                    default_value)         \
  do {                                                                     \
    auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
    if (value.empty()) {                                                   \
      dst = default_value;                                                 \
    } else {                                                               \
      dst = std::move(value);                                              \
      if (dst.empty()) {                                                   \
        SHERPA_ONNX_LOGE("Invalid value for '%s'\n", src_key);             \
        SHERPA_ONNX_EXIT(-1);                                              \
      }                                                                    \
    }                                                                      \
  } while (0)

#endif  // SHERPA_ONNX_CSRC_MACROS_H_


================================================
FILE: sherpa-onnx/csrc/matcha-tts-lexicon.cc
================================================
// sherpa-onnx/csrc/matcha-tts-lexicon.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/matcha-tts-lexicon.h"

#include <ctype.h>

#include <algorithm>
#include <fstream>
#include <memory>
#include <regex>  // NOLINT
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "espeak-ng/speak_lib.h"
#include "phoneme_ids.hpp"  // NOLINT
#include "phonemize.hpp"    // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/phrase-matcher.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

namespace {
// Please see https://github.com/k2-fsa/sherpa-onnx/pull/2853
// for why we need to do the replacement
static const std::vector<std::pair<std::string, std::string>> kReplacements = {
    {"ɝ", "ɜɹ"}, {"ɚ", "əɹ"},

    {"eɪ", "A"}, {"aɪ", "I"}, {"ɔɪ", "Y"},
    {"oʊ", "O"}, {"əʊ", "O"}, {"aʊ", "W"},

    {"tʃ", "ʧ"}, {"dʒ", "ʤ"},

    {"ː", ""},

    {"g", "ɡ"},  {"r", "ɹ"},

    {"e", "ɛ"},
};

std::vector<std::string> ConvertPhonemesToUTF8(
    const std::vector<std::vector<char32_t>> &phonemes) {
  std::vector<std::string> out;

  for (const auto &word : phonemes) {
    for (char32_t cp : word) {
      out.push_back(Utf32ToUtf8(cp));
    }
  }

  return out;
}

std::string ApplyReplacements(std::string s) {
  for (const auto &p : kReplacements) {
    const std::string &from = p.first;
    const std::string &to = p.second;

    size_t pos = 0;
    while ((pos = s.find(from, pos)) != std::string::npos) {
      s.replace(pos, from.size(), to);
      pos += to.size();
    }
  }
  return s;
}

std::vector<std::string> SplitTokensUTF8(const std::string &s) {
  std::vector<std::string> out;

  for (size_t i = 0; i < s.size();) {
    unsigned char c = s[i];
    size_t len = (c < 0x80) ? 1 : (c < 0xE0) ? 2 : (c < 0xF0) ? 3 : 4;

    out.push_back(s.substr(i, len));
    i += len;
  }

  return out;
}

std::vector<std::string> ProcessPhonemes(
    const std::vector<std::vector<char32_t>> &phonemes, bool skip_replacement) {
  auto tokens = ConvertPhonemesToUTF8(phonemes);
  if (skip_replacement) {
    return tokens;
  }

  std::string joined = Join(tokens);
  std::string replaced = ApplyReplacements(joined);
  return SplitTokensUTF8(replaced);
}

}  // namespace

void CallPhonemizeEspeak(const std::string &text,
                         piper::eSpeakPhonemeConfig &config,  // NOLINT
                         std::vector<std::vector<piper::Phoneme>> *phonemes);

class MatchaTtsLexicon::Impl {
 public:
  Impl(const std::string &lexicon, const std::string &tokens,
       const std::string &data_dir, bool debug, bool skip_replacement)
      : debug_(debug), skip_replacement_(skip_replacement) {
    if (lexicon.empty()) {
      SHERPA_ONNX_LOGE("Please provide lexicon.txt for this model");
      SHERPA_ONNX_EXIT(-1);
    }

    {
      std::ifstream is(tokens);
      InitTokens(is);
    }

    InitLexicon(lexicon);

    if (data_dir.empty()) {
      SHERPA_ONNX_LOGE("Please provide data dir for this model");
      SHERPA_ONNX_EXIT(-1);
    }

    InitEspeak(data_dir);  // See ./piper-phonemize-lexicon.cc
  }

  template <typename Manager>
  Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
       const std::string &data_dir, bool debug, bool skip_replacement)
      : debug_(debug), skip_replacement_(skip_replacement) {
    if (lexicon.empty()) {
      SHERPA_ONNX_LOGE("Please provide lexicon.txt for this model");
      SHERPA_ONNX_EXIT(-1);
    }

    {
      auto buf = ReadFile(mgr, tokens);
      std::istringstream is(std::string(buf.data(), buf.size()));

      InitTokens(is);
    }

    std::vector<std::string> files;
    SplitStringToVector(lexicon, ",", false, &files);
    for (const auto &f : files) {
      auto buf = ReadFile(mgr, f);

      std::istringstream is(std::string(buf.data(), buf.size()));
      InitLexicon(is);
    }

    if (data_dir.empty()) {
      SHERPA_ONNX_LOGE("Please provide data dir for this model");
      SHERPA_ONNX_EXIT(-1);
    }

    InitEspeak(data_dir);  // See ./piper-phonemize-lexicon.cc
  }

  std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text) const {
    std::string text = _text;
    std::vector<std::pair<std::string, std::string>> replace_str_pairs = {
        {"，", ","}, {"、", ","}, {"；", ";"}, {"：", ","},   {":", ","},
        {"。", "."}, {"？", "?"}, {"！", "!"}, {"\\s+", " "},
    };
    for (const auto &p : replace_str_pairs) {
      std::regex re(p.first);
      text = std::regex_replace(text, re, p.second);
    }

    if (debug_) {
      SHERPA_ONNX_LOGE("After replacing punctuations and merging spaces:\n%s",
                       text.c_str());
    }

    std::vector<std::string> words = SplitUtf8(text);

    if (debug_) {
#if __OHOS__
      SHERPA_ONNX_LOGE("input text:\n%{public}s", _text.c_str());
      SHERPA_ONNX_LOGE("after replacing punctuations:\n%{public}s",
                       text.c_str());
#else
      SHERPA_ONNX_LOGE("input text:\n%s", _text.c_str());
      SHERPA_ONNX_LOGE("after replacing punctuations:\n%s", text.c_str());
#endif

      std::ostringstream os;
      std::string sep = "";
      for (const auto &w : words) {
        os << sep << w;
        sep = "_";
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("after splitting into UTF8:\n%{public}s",
                       os.str().c_str());
#else
      SHERPA_ONNX_LOGE("after splitting into UTF8:\n%s", os.str().c_str());
#endif
    }

    // remove spaces after punctuations
    std::vector<std::string> words2 = std::move(words);
    words.reserve(words2.size());

    for (int32_t i = 0; i < words2.size(); ++i) {
      if (i == 0) {
        words.push_back(std::move(words2[i]));
      } else if (words2[i] == " ") {
        if (words.back() == " " || IsPunct(words.back())) {
          continue;
        } else {
          words.push_back(std::move(words2[i]));
        }
      } else if (IsPunct(words2[i])) {
        if (words.back() == " " || IsPunct(words.back())) {
          continue;
        } else {
          words.push_back(std::move(words2[i]));
        }
      } else {
        words.push_back(std::move(words2[i]));
      }
    }

    if (debug_) {
      std::ostringstream os;
      std::string sep = "";
      for (const auto &w : words) {
        os << sep << w;
        sep = "_";
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("after removing spaces after punctuations:\n%{public}s",
                       os.str().c_str());
#else
      SHERPA_ONNX_LOGE("after removing spaces after punctuations:\n%s",
                       os.str().c_str());
#endif
    }

    std::vector<TokenIDs> ans;
    std::vector<int64_t> this_sentence;

    PhraseMatcher matcher(&all_words_, words, debug_);

    int32_t blank = token2id_.at(" ");

    std::vector<int32_t> ids;
    std::string last_word;
    for (const std::string &w : matcher) {
      ids = ConvertWordToIds(w);

      if (ids.empty()) {
#if __OHOS__
        SHERPA_ONNX_LOGE("Ignore OOV '%{public}s'", w.c_str());
#else
        SHERPA_ONNX_LOGE("Ignore OOV '%s'", w.c_str());
#endif

        last_word = w;
        continue;
      }

      if (!last_word.empty() && isalpha(last_word[0])) {
        this_sentence.push_back(blank);
      }

      this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());

      if (IsPunct(w)) {
        if (debug_) {
          std::ostringstream os;
          std::string sep;
          os << "new sentence: [";
          for (auto i : this_sentence) {
            os << sep << i;
            sep = ", ";
          }
          os << "]";
          SHERPA_ONNX_LOGE("%s", os.str().c_str());
        }

        ans.emplace_back(std::move(this_sentence));
        this_sentence = {};
      }

      last_word = w;
    }  // for (const std::string &w : matcher)

    if (!this_sentence.empty()) {
      ans.emplace_back(std::move(this_sentence));
    }

    return ans;
  }

 private:
  std::vector<int32_t> ConvertWordToIds(const std::string &w) const {
    std::vector<int32_t> ans;
    if (word2ids_.count(w)) {
      ans = word2ids_.at(w);
    } else if (token2id_.count(w)) {
      ans = {token2id_.at(w)};
    } else {
      if (ContainsCJK(w)) {
        std::vector<std::string> words = SplitUtf8(w);
        for (const auto &word : words) {
          if (word2ids_.count(word)) {
            auto ids = ConvertWordToIds(word);
            ans.insert(ans.end(), ids.begin(), ids.end());
          }
        }
      } else {
        if (debug_) {
          SHERPA_ONNX_LOGE("use espeak for %s", w.c_str());
        }
        // use espeak
        piper::eSpeakPhonemeConfig config;
        config.voice = "en-us";
        std::vector<std::vector<piper::Phoneme>> phonemes;
        CallPhonemizeEspeak(w, config, &phonemes);

        auto pp = ProcessPhonemes(phonemes, skip_replacement_);

        for (const auto &p : pp) {
          if (token2id_.count(p)) {
            ans.push_back(token2id_.at(p));
          } else {
            SHERPA_ONNX_LOGE("Skip token: %s", p.c_str());
          }
        }
      }
    }

    if (debug_) {
      std::ostringstream os;
      os << w << ": ";
      for (auto i : ans) {
        os << "'" << id2token_.at(i) << "'(" << i << ")" << ",";
      }
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    return ans;
  }

  void InitTokens(std::istream &is) {
    token2id_ = ReadTokens(is);

    if (debug_) {
      for (const auto &p : token2id_) {
        id2token_[p.second] = p.first;
      }
    }
  }

  void InitLexicon(const std::string &lexicon) {
    if (lexicon.empty()) {
      SHERPA_ONNX_LOGE("Empty lexicon!");
      return;
    }

    std::vector<std::string> files;
    SplitStringToVector(lexicon, ",", false, &files);
    for (const auto &f : files) {
      std::ifstream is(f);
      InitLexicon(is);
    }
  }

  void InitLexicon(std::istream &is) {
    std::string word;
    std::vector<std::string> token_list;
    std::string line;
    std::string phone;
    int32_t line_num = 0;

    while (std::getline(is, line)) {
      ++line_num;

      std::istringstream iss(line);

      token_list.clear();

      iss >> word;
      ToLowerCase(&word);

      if (word2ids_.count(word)) {
#if __OHOS__
        SHERPA_ONNX_LOGE(
            "Duplicated word: %{public}s at line %{public}d:%{public}s. Ignore "
            "it.",
            word.c_str(), line_num, line.c_str());
#else
        SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
                         word.c_str(), line_num, line.c_str());
#endif
        continue;
      }

      while (iss >> phone) {
        token_list.push_back(std::move(phone));
      }

      std::vector<int32_t> ids = ConvertTokensToIds(token2id_, token_list);
      if (ids.empty()) {
        if (debug_) {
#if __OHOS__
          SHERPA_ONNX_LOGE("Empty token ids for '%{public}s'", line.c_str());
#else
          SHERPA_ONNX_LOGE("Empty token ids for '%s'", line.c_str());
#endif
        }
        continue;
      }

      word2ids_.insert({std::move(word), std::move(ids)});
    }

    for (const auto &[key, _] : word2ids_) {
      all_words_.insert(key);
    }
  }

 private:
  // lexicon.txt is saved in word2ids_
  std::unordered_map<std::string, std::vector<int32_t>> word2ids_;
  std::unordered_set<std::string> all_words_;

  // tokens.txt is saved in token2id_
  std::unordered_map<std::string, int32_t> token2id_;

  std::unordered_map<int32_t, std::string> id2token_;

  bool debug_ = false;
  bool skip_replacement_ = false;
};  // namespace sherpa_onnx

MatchaTtsLexicon::~MatchaTtsLexicon() = default;

MatchaTtsLexicon::MatchaTtsLexicon(const std::string &lexicon,
                                   const std::string &tokens,
                                   const std::string &data_dir, bool debug,
                                   bool skip_replacement)
    : impl_(std::make_unique<Impl>(lexicon, tokens, data_dir, debug,
                                   skip_replacement)) {}  // NOLINT

template <typename Manager>
MatchaTtsLexicon::MatchaTtsLexicon(Manager *mgr, const std::string &lexicon,
                                   const std::string &tokens,
                                   const std::string &data_dir, bool debug,
                                   bool skip_replacement)
    : impl_(std::make_unique<Impl>(mgr, lexicon, tokens, data_dir, debug,
                                   skip_replacement)) {}  // NOLINT

std::vector<TokenIDs> MatchaTtsLexicon::ConvertTextToTokenIds(
    const std::string &text, const std::string & /*unused_voice = ""*/) const {
  return impl_->ConvertTextToTokenIds(text);
}

#if __ANDROID_API__ >= 9
template MatchaTtsLexicon::MatchaTtsLexicon(AAssetManager *mgr,
                                            const std::string &lexicon,
                                            const std::string &tokens,
                                            const std::string &data_dir,
                                            bool debug, bool skip_replacement);
#endif

#if __OHOS__
template MatchaTtsLexicon::MatchaTtsLexicon(NativeResourceManager *mgr,
                                            const std::string &lexicon,
                                            const std::string &tokens,
                                            const std::string &data_dir,
                                            bool debug, bool skip_replacement);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/matcha-tts-lexicon.h
================================================
// sherpa-onnx/csrc/matcha-tts-lexicon.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_MATCHA_TTS_LEXICON_H_
#define SHERPA_ONNX_CSRC_MATCHA_TTS_LEXICON_H_

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts-frontend.h"

namespace sherpa_onnx {

// For Chinese+English matcha tts
class MatchaTtsLexicon : public OfflineTtsFrontend {
 public:
  ~MatchaTtsLexicon() override;

  MatchaTtsLexicon(const std::string &lexicon, const std::string &tokens,
                   const std::string &data_dir, bool debug,
                   bool skip_replacement);

  template <typename Manager>
  MatchaTtsLexicon(Manager *mgr, const std::string &lexicon,
                   const std::string &tokens, const std::string &data_dir,
                   bool debug, bool skip_replacement);

  std::vector<TokenIDs> ConvertTextToTokenIds(
      const std::string &text,
      const std::string &unused_voice = "") const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_MATCHA_TTS_LEXICON_H_


================================================
FILE: sherpa-onnx/csrc/math-test.cc
================================================
// sherpa-onnx/csrc/math-test.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/math.h"

#include <vector>

#include "gtest/gtest.h"

namespace sherpa_onnx {

TEST(Transpose, Case1) {
  // 0 1 2
  // 3 4 5
  std::vector<float> in = {0, 1, 2, 3, 4, 5};
  std::vector<float> out = Transpose(in.data(), 2, 3);

  // 0 3
  // 1 4
  // 2 5
  std::vector<float> expected_out = {0, 3, 1, 4, 2, 5};
  EXPECT_EQ(out, expected_out);
}

TEST(Transpose, Case2) {
  // 0 1
  // 2 3
  // 4 5
  std::vector<float> in = {0, 1, 2, 3, 4, 5};
  std::vector<float> out = Transpose(in.data(), 3, 2);

  // 0 2 4
  // 1 3 5
  std::vector<float> expected_out = {0, 2, 4, 1, 3, 5};
  EXPECT_EQ(out, expected_out);
}

TEST(ScaleAdd, Case1) {
  std::vector<float> src = {1, 2, 3};
  float scale = 10;
  std::vector<float> in_out = {5, 6, 0};
  ScaleAdd(src.data(), scale, src.size(), in_out.data());

  std::vector<float> expected = {10 + 5, 20 + 6, 30 + 0};
  EXPECT_EQ(in_out, expected);
}

TEST(Scale, Case1) {
  std::vector<float> src = {1, 2, 3};
  float scale = 10;
  std::vector<float> in_out = {5, 6, 0};
  Scale(src.data(), scale, src.size(), in_out.data());

  std::vector<float> expected = {10, 20, 30};
  EXPECT_EQ(in_out, expected);
}

TEST(Scale, Case2InPlace) {
  std::vector<float> src = {1, 2, 3};
  float scale = 10;
  Scale(src.data(), scale, src.size(), src.data());

  std::vector<float> expected = {10, 20, 30};
  EXPECT_EQ(src, expected);
}

/*

import numpy as np

def compute_mean_and_inv_std(p: np.ndarray):
    mean = p.mean(axis=0)
    var = np.maximum((p**2).mean(axis=0) - mean**2, 0.0)
    std = np.sqrt(var)
    inv_std = 1.0 / (std + 1e-5)
    return mean.astype(np.float32), inv_std.astype(np.float32)

def dump_cpp_vector(name: str, arr: np.ndarray):
    flat = arr.flatten()
    print(f"std::vector<float> {name} = {{")
    line = ""
    for i, v in enumerate(flat):
        line += f"{v:.8f}f, "
        if (i + 1) % 8 == 0:
            print("  " + line)
            line = ""
    if line:
        print("  " + line)
    print("};\n")

np.random.seed(42)
num_rows, num_cols = 4, 6
x = np.random.randn(num_rows, num_cols).astype(np.float32)

mean, inv_std = compute_mean_and_inv_std(x)

dump_cpp_vector("x", x)
dump_cpp_vector("mean", mean)
dump_cpp_vector("inv_std", inv_std)

 */

TEST(ComputeMeanAndInvStd, Case1) {
  std::vector<float> x = {
      0.49671414f,  -0.13826430f, 0.64768857f, 1.52302980f,  -0.23415338f,
      -0.23413695f, 1.57921278f,  0.76743472f, -0.46947438f, 0.54256004f,
      -0.46341768f, -0.46572974f, 0.24196227f, -1.91328025f, -1.72491789f,
      -0.56228751f, -1.01283109f, 0.31424734f, -0.90802407f, -1.41230369f,
      1.46564877f,  -0.22577630f, 0.06752820f, -1.42474818f,
  };

  std::vector<float> expected_mean = {
      0.35246629f, -0.67410338f, -0.02026373f,
      0.31938151f, -0.41071847f, -0.45259190f,
  };

  std::vector<float> expected_inv_std = {
      1.13103926f, 0.94854516f, 0.83320111f,
      1.24679470f, 2.52932906f, 1.59057319f,
  };

  std::vector<float> mean;
  std::vector<float> inv_std;

  int32_t num_rows = 4;
  int32_t num_cols = 6;

  ComputeMeanAndInvStd(x.data(), num_rows, num_cols, &mean, &inv_std);

  ASSERT_EQ(mean.size(), num_cols);
  ASSERT_EQ(inv_std.size(), num_cols);

  for (int32_t i = 0; i < num_cols; ++i) {
    EXPECT_NEAR(mean[i], expected_mean[i], 1e-6f) << "at index " << i;
    EXPECT_NEAR(inv_std[i], expected_inv_std[i], 1e-6f) << "at index " << i;
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/math.cc
================================================
// sherpa-onnx/csrc/math.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include "sherpa-onnx/csrc/math.h"

#include <vector>

#include "Eigen/Dense"

namespace sherpa_onnx {

void ScaleAdd(const float *src, float scale, int32_t n, float *in_out) {
  Eigen::Map<const Eigen::ArrayXf> src_vec(src, n);
  Eigen::Map<Eigen::ArrayXf> inout_vec(in_out, n);

  inout_vec += scale * src_vec;
}

void Scale(const float *src, float scale, int32_t n, float *out) {
  Eigen::Map<const Eigen::ArrayXf> src_vec(src, n);
  Eigen::Map<Eigen::ArrayXf> out_vec(out, n);

  out_vec = scale * src_vec;
}

std::vector<float> MakeVorbisWindow(int32_t window_length) {
  constexpr float kPi = 3.14159265358979323846f;
  std::vector<float> window(window_length);
  const float half = window_length / 2.0f;
  for (int32_t i = 0; i != window_length; ++i) {
    float s = std::sin(0.5f * kPi * (i + 0.5f) / half);
    window[i] = std::sin(0.5f * kPi * s * s);
  }

  return window;
}

// this if for Paraformer
std::vector<float> ComputeAcousticEmbedding(
    const std::vector<float> &encoder_out, const std::vector<float> &alphas,
    int32_t encoder_dim) {
  std::vector<float> ans;
  ans.reserve(encoder_out.size());

  float acc = 0;
  std::vector<float> cur_emb(encoder_dim);
  for (int32_t i = 0; i < static_cast<int32_t>(alphas.size()); ++i) {
    float w = alphas[i];

    acc += w;
    if (acc >= 1) {
      float overflow = acc - 1;
      float remain = w - overflow;

      ScaleAdd(encoder_out.data() + i * encoder_dim, remain, encoder_dim,
               cur_emb.data());

      ans.insert(ans.end(), cur_emb.begin(), cur_emb.end());

      Scale(encoder_out.data() + i * encoder_dim, overflow, encoder_dim,
            cur_emb.data());

      acc = overflow;
    } else {
      ScaleAdd(encoder_out.data() + i * encoder_dim, w, encoder_dim,
               cur_emb.data());
    }
  }
  // TODO(fangjun): The last cur_emb is not used

  return ans;
}

std::vector<float> Transpose(const float *input, int32_t rows, int32_t cols) {
  std::vector<float> output(cols * rows);

  Eigen::Map<const Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic,
                                 Eigen::RowMajor>>
      in(input, rows, cols);

  Eigen::Map<
      Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
      out(output.data(), cols, rows);

  out.noalias() = in.transpose();

  return output;
}

void ComputeMeanAndInvStd(const float *p, int32_t num_rows, int32_t num_cols,
                          std::vector<float> *mean,
                          std::vector<float> *inv_stddev) {
  using RowMajorMat =
      Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;

  Eigen::Map<const RowMajorMat> X(p, num_rows, num_cols);

  Eigen::RowVectorXf mean_vec = X.colwise().mean();

  Eigen::RowVectorXf mean_sq = X.array().square().colwise().mean();

  Eigen::RowVectorXf var = mean_sq.array() - mean_vec.array().square();

  Eigen::RowVectorXf stddev = var.array().max(0.0f).sqrt();

  Eigen::RowVectorXf inv_std = (stddev.array() + 1e-5f).inverse();

  mean->assign(mean_vec.data(), mean_vec.data() + num_cols);

  inv_stddev->assign(inv_std.data(), inv_std.data() + num_cols);
}

void NormalizeWhisperFeatures(float *features, int32_t num_frames,
                              int32_t feat_dim) {
  // log_spec = torch.clamp(features, min=1e-10).log10()
  // log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
  // mel = (log_spec + 4.0) / 4.0

  using Eigen::ArrayXXf;
  using Eigen::Map;

  Map<ArrayXXf, Eigen::RowMajor> feats(features, num_frames, feat_dim);

  feats = feats.max(1e-10f).log10();

  float max_v = feats.maxCoeff() - 8.0f;

  feats = feats.max(max_v);
  feats = (feats + 4.0f) / 4.0f;
}

int32_t MaxElementIndex(const float *v, int32_t n) {
  // Map raw pointer to an Eigen vector (no copy)
  Eigen::Map<const Eigen::VectorXf> vec(v, n);

  Eigen::Index maxIndex;
  vec.maxCoeff(&maxIndex);

  return static_cast<int32_t>(maxIndex);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/math.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Daniel Povey)
 * Copyright (c)  2023                     (Pingfeng Luo)
 *
 */
// This file is copied from k2/csrc/utils.h
#ifndef SHERPA_ONNX_CSRC_MATH_H_
#define SHERPA_ONNX_CSRC_MATH_H_

#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cmath>
#include <numeric>
#include <vector>

#include "Eigen/Dense"

namespace sherpa_onnx {

// logf(FLT_EPSILON)
#define SHERPA_ONNX_MIN_LOG_DIFF_FLOAT -15.9423847198486328125f

// log(DBL_EPSILON)
#define SHERPA_ONNX_MIN_LOG_DIFF_DOUBLE \
  -36.0436533891171535515240975655615329742431640625

template <typename T>
struct LogAdd;

template <>
struct LogAdd<double> {
  double operator()(double x, double y) const {
    double diff;

    if (x < y) {
      diff = x - y;
      x = y;
    } else {
      diff = y - x;
    }
    // diff is negative.  x is now the larger one.

    if (diff >= SHERPA_ONNX_MIN_LOG_DIFF_DOUBLE) {
      double res;
      res = x + log1p(exp(diff));
      return res;
    }

    return x;  // return the larger one.
  }
};

template <>
struct LogAdd<float> {
  float operator()(float x, float y) const {
    float diff;

    if (x < y) {
      diff = x - y;
      x = y;
    } else {
      diff = y - x;
    }
    // diff is negative.  x is now the larger one.

    if (diff >= SHERPA_ONNX_MIN_LOG_DIFF_DOUBLE) {
      float res;
      res = x + log1pf(expf(diff));
      return res;
    }

    return x;  // return the larger one.
  }
};

template <class T>
void LogSoftmax(T *input, int32_t input_len) {
  assert(input);

  T m = *std::max_element(input, input + input_len);

  T sum = 0.0;
  for (int32_t i = 0; i < input_len; i++) {
    sum += exp(input[i] - m);
  }

  T offset = m + log(sum);
  for (int32_t i = 0; i < input_len; i++) {
    input[i] -= offset;
  }
}

template <typename T>
void LogSoftmax(T *in, int32_t w, int32_t h) {
  for (int32_t i = 0; i != h; ++i) {
    LogSoftmax(in, w);
    in += w;
  }
}

template <typename T>
void SubtractBlank(T *in, int32_t w, int32_t h, int32_t blank_idx,
                   float blank_penalty) {
  for (int32_t i = 0; i != h; ++i) {
    in[blank_idx] -= blank_penalty;
    in += w;
  }
}

template <class T>
std::vector<int32_t> TopkIndex(const T *vec, int32_t size, int32_t topk) {
  std::vector<int32_t> vec_index(size);
  std::iota(vec_index.begin(), vec_index.end(), 0);

  std::partial_sort(vec_index.begin(), vec_index.begin() + topk,
                    vec_index.end(), [vec](int32_t index_1, int32_t index_2) {
                      return vec[index_1] > vec[index_2];
                    });

  int32_t k_num = std::min<int32_t>(size, topk);
  return {vec_index.begin(), vec_index.begin() + k_num};
}

template <class T>
std::vector<int32_t> TopkIndex(const std::vector<std::vector<T>> &vec,
                               int32_t topk) {
  std::vector<T> flatten;
  flatten.reserve(vec.size() * vec[0].size());
  for (const auto &v : vec) {
    flatten.insert(flatten.end(), v.begin(), v.end());
  }

  return TopkIndex(flatten.data(), flatten.size(), topk);
}

// in_out[i] += src[i] * scale
void ScaleAdd(const float *src, float scale, int32_t n, float *in_out);

// out[i] = src[i] * scale
void Scale(const float *src, float scale, int32_t n, float *out);

std::vector<float> MakeVorbisWindow(int32_t window_length);

// For Paraformer
std::vector<float> ComputeAcousticEmbedding(
    const std::vector<float> &encoder_out, const std::vector<float> &alphas,
    int32_t encoder_dim);

// Transpose a 2-D matrix in row-major
std::vector<float> Transpose(const float *input, int32_t rows, int32_t cols);

/* Compute mean and inverse stddev over rows.
 *
 * @param p  A pointer to a 2-d array of shape (num_rows, num_cols)
 * @param num_rows Number of rows
 * @param num_cols Number of columns
 * @param mean On return, it contains p.mean(axis=0). You don't need to
 *             pre-allocate space for it.
 * @param inv_stddev On return, it contains 1/p.std(axis=0) You don't need to
 *                   pre-allocate space for it.
 */
void ComputeMeanAndInvStd(const float *p, int32_t num_rows, int32_t num_cols,
                          std::vector<float> *mean,
                          std::vector<float> *inv_stddev);

void NormalizeWhisperFeatures(float *features, int32_t num_frames,
                              int32_t feat_dim);

int32_t MaxElementIndex(const float *v, int32_t n);

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_MATH_H_


================================================
FILE: sherpa-onnx/csrc/melo-tts-lexicon.cc
================================================
// sherpa-onnx/csrc/melo-tts-lexicon.cc
//
// Copyright (c)  2022-2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/melo-tts-lexicon.h"

#include <fstream>
#include <regex>  // NOLINT
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/phrase-matcher.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class MeloTtsLexicon::Impl {
 public:
  Impl(const std::string &lexicon, const std::string &tokens,
       const OfflineTtsVitsModelMetaData &meta_data, bool debug)
      : meta_data_(meta_data), debug_(debug) {
    {
      std::ifstream is(tokens);
      InitTokens(is);
    }

    {
      std::ifstream is(lexicon);
      InitLexicon(is);
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
       const OfflineTtsVitsModelMetaData &meta_data, bool debug)
      : meta_data_(meta_data), debug_(debug) {
    {
      auto buf = ReadFile(mgr, tokens);

      std::istringstream is(std::string(buf.data(), buf.size()));
      InitTokens(is);
    }

    {
      auto buf = ReadFile(mgr, lexicon);

      std::istringstream is(std::string(buf.data(), buf.size()));
      InitLexicon(is);
    }
  }

  std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text) const {
    std::string text = ToLowerCase(_text);
    // see
    // https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
    std::regex punct_re{"：|、|；"};
    std::string s = std::regex_replace(text, punct_re, ",");

    std::regex punct_re2("。");
    s = std::regex_replace(s, punct_re2, ".");

    std::regex punct_re3("？");
    s = std::regex_replace(s, punct_re3, "?");

    std::regex punct_re4("！");
    s = std::regex_replace(s, punct_re4, "!");

    std::vector<std::string> words = SplitUtf8(text);

    if (debug_) {
#if __OHOS__
      SHERPA_ONNX_LOGE("input text:\n%{public}s", text.c_str());
      SHERPA_ONNX_LOGE("after replacing punctuations:\n%{public}s", s.c_str());
#else
      SHERPA_ONNX_LOGE("input text:\n%s", text.c_str());
      SHERPA_ONNX_LOGE("after replacing punctuations:\n%s", s.c_str());
#endif

      std::ostringstream os;
      std::string sep = "";
      for (const auto &w : words) {
        os << sep << w;
        sep = "_";
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("after splitting into UTF8:\n%{public}s",
                       os.str().c_str());
#else
      SHERPA_ONNX_LOGE("after splitting into UTF8:\n%s", os.str().c_str());
#endif
    }

    std::vector<TokenIDs> ans;
    TokenIDs this_sentence;

    PhraseMatcher matcher(&all_words_, words, debug_);

    for (const std::string &w : matcher) {
      auto ids = ConvertWordToIds(w);
      if (ids.tokens.empty()) {
#if __OHOS__
        SHERPA_ONNX_LOGE("Ignore OOV '%{public}s'", w.c_str());
#else
        SHERPA_ONNX_LOGE("Ignore OOV '%s'", w.c_str());
#endif
        continue;
      }

      if (debug_) {
        std::ostringstream os;
        os << w << ": ";
        for (auto i : ids.tokens) {
          os << id2token_.at(i) << " ";
        }

        for (auto i : ids.tones) {
          os << i << " ";
        }
        os << "\n";
#if __OHOS__
        SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
        SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
      }

      this_sentence.tokens.insert(this_sentence.tokens.end(),
                                  ids.tokens.begin(), ids.tokens.end());
      this_sentence.tones.insert(this_sentence.tones.end(), ids.tones.begin(),
                                 ids.tones.end());

      if (w == "." || w == "!" || w == "?" || w == "," || w == "。" ||
          w == "！" || w == "？" || w == "，") {
        ans.push_back(std::move(this_sentence));
        this_sentence = {};
      }
    }  // for (const std::string &w : matcher)

    if (!this_sentence.tokens.empty()) {
      ans.push_back(std::move(this_sentence));
    }

    return ans;
  }

 private:
  TokenIDs ConvertWordToIds(const std::string &w) const {
    if (word2ids_.count(w)) {
      return word2ids_.at(w);
    }

    if (token2id_.count(w)) {
      return {{token2id_.at(w)}, {0}};
    }

    TokenIDs ans;

    std::vector<std::string> words = SplitUtf8(w);
    for (const auto &word : words) {
      if (word2ids_.count(word)) {
        auto ids = ConvertWordToIds(word);
        ans.tokens.insert(ans.tokens.end(), ids.tokens.begin(),
                          ids.tokens.end());
        ans.tones.insert(ans.tones.end(), ids.tones.begin(), ids.tones.end());
      } else {
        // If the lexicon does not contain the word, we split the word into
        // characters.
        //
        // For instance, if the word is TTS and it is does not exist
        // in the lexicon, we split it into 3 characters: T T S
        std::string s;
        for (char c : word) {
          s = c;
          if (word2ids_.count(s)) {
            const auto &t = word2ids_.at(s);
            ans.tokens.insert(ans.tokens.end(), t.tokens.begin(),
                              t.tokens.end());
            ans.tones.insert(ans.tones.end(), t.tones.begin(), t.tones.end());
          }
        }
      }
    }

    return ans;
  }

  void InitTokens(std::istream &is) {
    token2id_ = ReadTokens(is);

    if (debug_) {
      for (const auto &p : token2id_) {
        id2token_[p.second] = p.first;
      }
    }

    token2id_[" "] = token2id_["_"];

    std::vector<std::pair<std::string, std::string>> puncts = {
        {",", "，"}, {".", "。"}, {"!", "！"}, {"?", "？"}};

    for (const auto &p : puncts) {
      if (token2id_.count(p.first) && !token2id_.count(p.second)) {
        token2id_[p.second] = token2id_[p.first];
      }

      if (!token2id_.count(p.first) && token2id_.count(p.second)) {
        token2id_[p.first] = token2id_[p.second];
      }
    }

    if (!token2id_.count("、") && token2id_.count("，")) {
      token2id_["、"] = token2id_["，"];
    }

    // Map 'v' to 'V' token (same as post_replace_ph in MeloTTS)
    // Only for English models
    if (meta_data_.language == "en" && token2id_.count("V")) {
      token2id_["v"] = token2id_["V"];
    }
  }

  void InitLexicon(std::istream &is) {
    std::string word;
    std::vector<std::string> token_list;

    std::vector<std::string> phone_list;
    std::vector<int64_t> tone_list;

    std::string line;
    std::string phone;
    int32_t line_num = 0;

    while (std::getline(is, line)) {
      ++line_num;

      std::istringstream iss(line);

      token_list.clear();
      phone_list.clear();
      tone_list.clear();

      iss >> word;
      ToLowerCase(&word);

      if (word2ids_.count(word)) {
        SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
                         word.c_str(), line_num, line.c_str());
        continue;
      }

      while (iss >> phone) {
        token_list.push_back(std::move(phone));
      }

      if ((token_list.size() & 1) != 0) {
        SHERPA_ONNX_LOGE("Invalid line %d: '%s'", line_num, line.c_str());
        exit(-1);
      }

      int32_t num_phones = token_list.size() / 2;
      phone_list.reserve(num_phones);
      tone_list.reserve(num_phones);

      for (int32_t i = 0; i != num_phones; ++i) {
        phone_list.push_back(std::move(token_list[i]));
        tone_list.push_back(std::stoi(token_list[i + num_phones], nullptr));
        if (tone_list.back() < 0 || tone_list.back() > 50) {
          SHERPA_ONNX_LOGE("Invalid line %d: '%s'", line_num, line.c_str());
          exit(-1);
        }
      }

      std::vector<int32_t> ids = ConvertTokensToIds(token2id_, phone_list);
      if (ids.empty()) {
        continue;
      }

      if (ids.size() != num_phones) {
        SHERPA_ONNX_LOGE("Invalid line %d: '%s'", line_num, line.c_str());
        exit(-1);
      }

      std::vector<int64_t> ids64{ids.begin(), ids.end()};

      word2ids_.insert(
          {std::move(word), TokenIDs{std::move(ids64), std::move(tone_list)}});
    }

    // For Chinese+English MeloTTS
    word2ids_["呣"] = word2ids_["母"];
    word2ids_["嗯"] = word2ids_["恩"];

    for (const auto &[key, _] : word2ids_) {
      all_words_.insert(key);
    }
  }

 private:
  // lexicon.txt is saved in word2ids_
  std::unordered_map<std::string, TokenIDs> word2ids_;
  std::unordered_set<std::string> all_words_;

  // tokens.txt is saved in token2id_
  std::unordered_map<std::string, int32_t> token2id_;
  std::unordered_map<int32_t, std::string> id2token_;

  OfflineTtsVitsModelMetaData meta_data_;

  bool debug_ = false;
};

MeloTtsLexicon::~MeloTtsLexicon() = default;

MeloTtsLexicon::MeloTtsLexicon(const std::string &lexicon,
                               const std::string &tokens,
                               const OfflineTtsVitsModelMetaData &meta_data,
                               bool debug)
    : impl_(std::make_unique<Impl>(lexicon, tokens, meta_data, debug)) {}

template <typename Manager>
MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
                               const std::string &tokens,
                               const OfflineTtsVitsModelMetaData &meta_data,
                               bool debug)
    : impl_(std::make_unique<Impl>(mgr, lexicon, tokens, meta_data, debug)) {}

std::vector<TokenIDs> MeloTtsLexicon::ConvertTextToTokenIds(
    const std::string &text, const std::string & /*unused_voice = ""*/) const {
  return impl_->ConvertTextToTokenIds(text);
}

#if __ANDROID_API__ >= 9
template MeloTtsLexicon::MeloTtsLexicon(
    AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
    const OfflineTtsVitsModelMetaData &meta_data, bool debug);
#endif

#if __OHOS__
template MeloTtsLexicon::MeloTtsLexicon(
    NativeResourceManager *mgr, const std::string &lexicon,
    const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data,
    bool debug);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/melo-tts-lexicon.h
================================================
// sherpa-onnx/csrc/melo-tts-lexicon.h
//
// Copyright (c)  2022-2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_MELO_TTS_LEXICON_H_
#define SHERPA_ONNX_CSRC_MELO_TTS_LEXICON_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h"

namespace sherpa_onnx {

class MeloTtsLexicon : public OfflineTtsFrontend {
 public:
  ~MeloTtsLexicon() override;
  MeloTtsLexicon(const std::string &lexicon, const std::string &tokens,
                 const OfflineTtsVitsModelMetaData &meta_data, bool debug);

  template <typename Manager>
  MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
                 const std::string &tokens,
                 const OfflineTtsVitsModelMetaData &meta_data, bool debug);

  std::vector<TokenIDs> ConvertTextToTokenIds(
      const std::string &text,
      const std::string &unused_voice = "") const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_MELO_TTS_LEXICON_H_


================================================
FILE: sherpa-onnx/csrc/microphone.cc
================================================
// sherpa-onnx/csrc/microphone.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/microphone.h"

#include <stdio.h>
#include <stdlib.h>

namespace sherpa_onnx {

Microphone::Microphone() {
  PaError err = Pa_Initialize();
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    exit(-1);
  }
}

Microphone::~Microphone() {
  CloseDevice();
  PaError err = Pa_Terminate();
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  }
}

int Microphone::GetDeviceCount() const { return Pa_GetDeviceCount(); }

int Microphone::GetDefaultInputDevice() const {
  return Pa_GetDefaultInputDevice();
}

void Microphone::PrintDevices(int device_index) const {
  int num_devices = Pa_GetDeviceCount();
  fprintf(stderr, "Num devices: %d\n", num_devices);
  for (int i = 0; i != num_devices; ++i) {
    const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
    fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
            info->name);
  }
}

bool Microphone::OpenDevice(int index, int sample_rate, int channel,
                            PaStreamCallback cb, void *userdata) {
  if (index < 0 || index >= Pa_GetDeviceCount()) {
    fprintf(stderr, "Invalid device index: %d\n", index);
    return false;
  }

  const PaDeviceInfo *info = Pa_GetDeviceInfo(index);
  if (!info) {
    fprintf(stderr, "No device info found for index: %d\n", index);
    return false;
  }

  CloseDevice();

  fprintf(stderr, "Use device: %d\n", index);
  fprintf(stderr, "  Name: %s\n", info->name);
  fprintf(stderr, "  Max input channels: %d\n", info->maxInputChannels);

  PaStreamParameters param;
  param.device = index;
  param.channelCount = channel;
  param.sampleFormat = paFloat32;
  param.suggestedLatency = info->defaultLowInputLatency;
  param.hostApiSpecificStreamInfo = nullptr;

  PaError err =
      Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
                    sample_rate,
                    0,          // frames per buffer
                    paClipOff,  // we won't output out of range samples
                                // so don't bother clipping them
                    cb, userdata);
  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    return false;
  }

  err = Pa_StartStream(stream);
  fprintf(stderr, "Started\n");

  if (err != paNoError) {
    fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
    CloseDevice();
    return false;
  }
  return true;
}

void Microphone::CloseDevice() {
  if (stream) {
    PaError err = Pa_CloseStream(stream);
    if (err != paNoError) {
      fprintf(stderr, "Pa_CloseStream error: %s\n", Pa_GetErrorText(err));
    }
    stream = nullptr;
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/microphone.h
================================================
// sherpa-onnx/csrc/microphone.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_MICROPHONE_H_
#define SHERPA_ONNX_CSRC_MICROPHONE_H_
#include <cstdint>

#include "portaudio.h"  // NOLINT
namespace sherpa_onnx {

class Microphone {
 public:
  Microphone();
  ~Microphone();

  int32_t GetDeviceCount() const;
  int32_t GetDefaultInputDevice() const;
  void PrintDevices(int32_t sel) const;

  bool OpenDevice(int32_t index, int32_t sample_rate, int32_t channel,
                  PaStreamCallback cb, void *userdata);

  void CloseDevice();

 private:
  PaStream *stream = nullptr;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_MICROPHONE_H_


================================================
FILE: sherpa-onnx/csrc/normal-data-generator.cc
================================================
// sherpa-onnx/csrc/normal-data-generator.cc
//
// Copyright      2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/normal-data-generator.h"

#include <random>
#include <thread>

namespace sherpa_onnx {

// Helper type hidden in translation unit
namespace {
struct RNGHolder {
  std::mt19937 rng;
  std::normal_distribution<float> dist;

  RNGHolder()
      : rng([] {
          std::random_device rd;
          std::seed_seq seq{rd(),
                            static_cast<unsigned>(std::hash<std::thread::id>{}(
                                std::this_thread::get_id()))};
          return std::mt19937(seq);
        }()),
        dist() {}
};
}  // namespace

NormalDataGenerator::NormalDataGenerator(float mean /* = 0.0f*/,
                                         float stddev /* = 1.0f*/,
                                         int32_t seed /* = -1*/)
    : mean_(mean), stddev_(stddev), seed_(seed) {
  if (seed_ >= 0) {
    rng_.seed(static_cast<unsigned>(seed_));
  }
}

void NormalDataGenerator::Fill(float *data, std::size_t size) const {
  if (seed_ >= 0) {
    // Deterministic mode: use instance-level RNG
    std::normal_distribution<float> dist(mean_, stddev_);
    for (std::size_t i = 0; i < size; ++i) {
      data[i] = dist(rng_);
    }
  } else {
    // Original behavior: thread-local random device
    static thread_local RNGHolder holder;

    holder.dist.param(
        std::normal_distribution<float>::param_type(mean_, stddev_));

    for (std::size_t i = 0; i < size; ++i) {
      data[i] = holder.dist(holder.rng);
    }
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/normal-data-generator.h
================================================
// sherpa-onnx/csrc/normal-data-generator.h
//
// Copyright      2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_NORMAL_DATA_GENERATOR_H_
#define SHERPA_ONNX_CSRC_NORMAL_DATA_GENERATOR_H_

#include <cstddef>
#include <cstdint>
#include <random>

namespace sherpa_onnx {

class NormalDataGenerator {
 public:
  explicit NormalDataGenerator(float mean = 0.0f, float stddev = 1.0f,
                               int32_t seed = -1);

  // Fill pre-allocated memory
  void Fill(float *data, std::size_t size) const;

 private:
  float mean_;
  float stddev_;
  int32_t seed_ = -1;         // -1 = use thread-local random device (default)
  mutable std::mt19937 rng_;  // used if seed_ >= 0
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_NORMAL_DATA_GENERATOR_H_


================================================
FILE: sherpa-onnx/csrc/offline-canary-model-config.cc
================================================
// sherpa-onnx/csrc/offline-canary-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-canary-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineCanaryModelConfig::Register(ParseOptions *po) {
  po->Register("canary-encoder", &encoder,
               "Path to onnx encoder of Canary, e.g., encoder.int8.onnx");

  po->Register("canary-decoder", &decoder,
               "Path to onnx decoder of Canary, e.g., decoder.int8.onnx");

  po->Register("canary-src-lang", &src_lang,
               "Valid values: en, de, es, fr. If empty, default to use en");

  po->Register("canary-tgt-lang", &tgt_lang,
               "Valid values: en, de, es, fr. If empty, default to use en");

  po->Register("canary-use-pnc", &use_pnc,
               "true to enable punctuations and casing. false to disable them");
}

bool OfflineCanaryModelConfig::Validate() const {
  if (encoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --canary-encoder");
    return false;
  }

  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("Canary encoder file '%s' does not exist",
                     encoder.c_str());
    return false;
  }

  if (decoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --canary-decoder");
    return false;
  }

  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("Canary decoder file '%s' does not exist",
                     decoder.c_str());
    return false;
  }

  if (!src_lang.empty()) {
    if (src_lang != "en" && src_lang != "de" && src_lang != "es" &&
        src_lang != "fr") {
      SHERPA_ONNX_LOGE("Please use en, de, es, or fr for --canary-src-lang");
      return false;
    }
  }

  if (!tgt_lang.empty()) {
    if (tgt_lang != "en" && tgt_lang != "de" && tgt_lang != "es" &&
        tgt_lang != "fr") {
      SHERPA_ONNX_LOGE("Please use en, de, es, or fr for --canary-tgt-lang");
      return false;
    }
  }

  return true;
}

std::string OfflineCanaryModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineCanaryModelConfig(";
  os << "encoder=\"" << encoder << "\", ";
  os << "decoder=\"" << decoder << "\", ";
  os << "src_lang=\"" << src_lang << "\", ";
  os << "tgt_lang=\"" << tgt_lang << "\", ";
  os << "use_pnc=" << (use_pnc ? "True" : "False") << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-canary-model-config.h
================================================
// sherpa-onnx/csrc/offline-canary-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineCanaryModelConfig {
  std::string encoder;
  std::string decoder;

  // en, de, es, fr, or leave it empty to use en
  std::string src_lang;

  // en, de, es, fr, or leave it empty to use en
  std::string tgt_lang;

  // true to enable punctuations and casing
  // false to disable punctuations and casing
  bool use_pnc = true;

  OfflineCanaryModelConfig() = default;
  OfflineCanaryModelConfig(const std::string &encoder,
                           const std::string &decoder,
                           const std::string &src_lang,
                           const std::string &tgt_lang, bool use_pnc)
      : encoder(encoder),
        decoder(decoder),
        src_lang(src_lang),
        tgt_lang(tgt_lang),
        use_pnc(use_pnc) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-canary-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-canary-model-meta-data.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_META_DATA_H_

#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

struct OfflineCanaryModelMetaData {
  int32_t vocab_size;
  int32_t subsampling_factor = 8;
  int32_t feat_dim = 120;
  int32_t num_decoder_layers = 6;
  int32_t decoder_hidden_size = 1024;
  std::string normalize_type;
  std::unordered_map<std::string, int32_t> lang2id;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-canary-model.cc
================================================
// sherpa-onnx/csrc/offline-canary-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-canary-model.h"

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-canary-model-meta-data.h"

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineCanaryModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.canary.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.canary.decoder);
      InitDecoder(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.canary.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.canary.decoder);
      InitDecoder(buf.data(), buf.size());
    }
  }

  std::vector<Ort::Value> ForwardEncoder(Ort::Value features,
                                         Ort::Value features_length) {
    std::array<Ort::Value, 2> encoder_inputs = {std::move(features),
                                                std::move(features_length)};

    auto encoder_out = encoder_sess_->Run(
        {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
        encoder_inputs.size(), encoder_output_names_ptr_.data(),
        encoder_output_names_ptr_.size());

    return encoder_out;
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardDecoder(
      Ort::Value tokens, std::vector<Ort::Value> decoder_states,
      Ort::Value encoder_states, Ort::Value enc_mask) {
    std::vector<Ort::Value> decoder_inputs;
    decoder_inputs.reserve(3 + decoder_states.size());

    decoder_inputs.push_back(std::move(tokens));
    for (auto &s : decoder_states) {
      decoder_inputs.push_back(std::move(s));
    }

    decoder_inputs.push_back(std::move(encoder_states));
    decoder_inputs.push_back(std::move(enc_mask));

    auto decoder_outputs = decoder_sess_->Run(
        {}, decoder_input_names_ptr_.data(), decoder_inputs.data(),
        decoder_inputs.size(), decoder_output_names_ptr_.data(),
        decoder_output_names_ptr_.size());

    Ort::Value logits = std::move(decoder_outputs[0]);

    std::vector<Ort::Value> output_decoder_states;
    output_decoder_states.reserve(decoder_states.size());

    int32_t i = 0;
    for (auto &s : decoder_outputs) {
      i += 1;
      if (i == 1) {
        continue;
      }
      output_decoder_states.push_back(std::move(s));
    }

    return {std::move(logits), std::move(output_decoder_states)};
  }

  std::vector<Ort::Value> GetInitialDecoderStates() {
    int32_t num_layers = meta_.num_decoder_layers;
    int64_t hidden_size = meta_.decoder_hidden_size;
    std::array<int64_t, 3> shape{1, 0, hidden_size};

    std::vector<Ort::Value> ans;
    ans.reserve(num_layers);
    for (int32_t i = 0; i < num_layers; ++i) {
      Ort::Value state = Ort::Value::CreateTensor<float>(
          Allocator(), shape.data(), shape.size());

      ans.push_back(std::move(state));
    }

    return ans;
  }

  OrtAllocator *Allocator() { return allocator_; }

  const OfflineCanaryModelMetaData &GetModelMetadata() const { return meta_; }

  OfflineCanaryModelMetaData &GetModelMetadata() { return meta_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---encoder---\n";
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");

    if (model_type != "EncDecMultiTaskModel") {
      SHERPA_ONNX_LOGE(
          "Expected model type 'EncDecMultiTaskModel'. Given: '%s'",
          model_type.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA(meta_.vocab_size, "vocab_size");
    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(meta_.normalize_type,
                                               "normalize_type");
    SHERPA_ONNX_READ_META_DATA(meta_.subsampling_factor, "subsampling_factor");
    SHERPA_ONNX_READ_META_DATA(meta_.feat_dim, "feat_dim");

    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_.num_decoder_layers,
                                            "num_decoder_layers", 6);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_.decoder_hidden_size,
                                            "decoder_hidden_size", 1024);
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);

    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);
  }

 private:
  OfflineCanaryModelMetaData meta_;
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;
};

OfflineCanaryModel::OfflineCanaryModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineCanaryModel::OfflineCanaryModel(Manager *mgr,
                                       const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineCanaryModel::~OfflineCanaryModel() = default;

std::vector<Ort::Value> OfflineCanaryModel::ForwardEncoder(
    Ort::Value features, Ort::Value features_length) const {
  return impl_->ForwardEncoder(std::move(features), std::move(features_length));
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OfflineCanaryModel::ForwardDecoder(Ort::Value tokens,
                                   std::vector<Ort::Value> decoder_states,
                                   Ort::Value encoder_states,
                                   Ort::Value enc_mask) const {
  return impl_->ForwardDecoder(std::move(tokens), std::move(decoder_states),
                               std::move(encoder_states), std::move(enc_mask));
}

std::vector<Ort::Value> OfflineCanaryModel::GetInitialDecoderStates() const {
  return impl_->GetInitialDecoderStates();
}

OrtAllocator *OfflineCanaryModel::Allocator() const {
  return impl_->Allocator();
}

const OfflineCanaryModelMetaData &OfflineCanaryModel::GetModelMetadata() const {
  return impl_->GetModelMetadata();
}
OfflineCanaryModelMetaData &OfflineCanaryModel::GetModelMetadata() {
  return impl_->GetModelMetadata();
}

#if __ANDROID_API__ >= 9
template OfflineCanaryModel::OfflineCanaryModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineCanaryModel::OfflineCanaryModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-canary-model.h
================================================
// sherpa-onnx/csrc/offline-canary-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_H_

#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-canary-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

// see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/nemo/canary/test_180m_flash.py
class OfflineCanaryModel {
 public:
  explicit OfflineCanaryModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineCanaryModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineCanaryModel();

  /** Run the encoder.
   *
   * @param features  A tensor of shape (N, T, C) of dtype float32.
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - encoder_states: A 3-D tensor of shape (N, T', encoder_dim)
   *  - encoder_len: A 1-D tensor of shape (N,) containing number
   *                        of frames in `encoder_out` before padding.
   *                        Its dtype is int64_t
   *  - enc_mask: A 2-D tensor of shape (N, T') with dtype bool
   */
  std::vector<Ort::Value> ForwardEncoder(Ort::Value features,
                                         Ort::Value features_length) const;

  /** Run the decoder model.
   *
   * @param tokens A int32 tensor of shape (N, num_tokens)
   * @param decoder_states std::vector<Ort::Value>
   * @param encoder_states Output from ForwardEncoder()
   * @param enc_mask Output from ForwardEncoder()
   *
   * @return Return a pair:
   *
   *  - logits A 3-D tensor of shape (N, num_words, vocab_size)
   *  - new_decoder_states: Can be used as input for ForwardDecoder()
   */
  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardDecoder(
      Ort::Value tokens, std::vector<Ort::Value> decoder_states,
      Ort::Value encoder_states, Ort::Value enc_mask) const;

  // The return value can be used as input for ForwardDecoder()
  std::vector<Ort::Value> GetInitialDecoderStates() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

  const OfflineCanaryModelMetaData &GetModelMetadata() const;

  OfflineCanaryModelMetaData &GetModelMetadata();

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CANARY_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-ced-model.cc
================================================
// sherpa-onnx/csrc/offline-ced-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-ced-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineCEDModel::Impl {
 public:
  explicit Impl(const AudioTaggingModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.ced);
    Init(buf.data(), buf.size());
  }

#if __ANDROID_API__ >= 9
  Impl(AAssetManager *mgr, const AudioTaggingModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.ced);
    Init(buf.data(), buf.size());
  }
#endif

  Ort::Value Forward(Ort::Value features) {
    features = Transpose12(allocator_, &features);

    auto ans = sess_->Run({}, input_names_ptr_.data(), &features, 1,
                          output_names_ptr_.data(), output_names_ptr_.size());
    return std::move(ans[0]);
  }

  int32_t NumEventClasses() const { return num_event_classes_; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
    }

    // get num_event_classes from the output[0].shape,
    // which is (N, num_event_classes)
    num_event_classes_ =
        sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape()[1];
  }

 private:
  AudioTaggingModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t num_event_classes_ = 0;
};

OfflineCEDModel::OfflineCEDModel(const AudioTaggingModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

#if __ANDROID_API__ >= 9
OfflineCEDModel::OfflineCEDModel(AAssetManager *mgr,
                                 const AudioTaggingModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}
#endif

OfflineCEDModel::~OfflineCEDModel() = default;

Ort::Value OfflineCEDModel::Forward(Ort::Value features) const {
  return impl_->Forward(std::move(features));
}

int32_t OfflineCEDModel::NumEventClasses() const {
  return impl_->NumEventClasses();
}

OrtAllocator *OfflineCEDModel::Allocator() const { return impl_->Allocator(); }

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-ced-model.h
================================================
// sherpa-onnx/csrc/offline-ced-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_CED_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CED_MODEL_H_
#include <memory>
#include <utility>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/audio-tagging-model-config.h"

namespace sherpa_onnx {

/** This class implements the CED model from
 * https://github.com/RicherMans/CED/blob/main/export_onnx.py
 */
class OfflineCEDModel {
 public:
  explicit OfflineCEDModel(const AudioTaggingModelConfig &config);

#if __ANDROID_API__ >= 9
  OfflineCEDModel(AAssetManager *mgr, const AudioTaggingModelConfig &config);
#endif

  ~OfflineCEDModel();

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   *
   * @return Return a tensor
   *  - probs: A 2-D tensor of shape (N, num_event_classes).
   */
  Ort::Value Forward(Ort::Value features) const;

  /** Return the number of event classes of the model
   */
  int32_t NumEventClasses() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CED_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-ct-transformer-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-ct-transformer-model-meta-data.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_META_DATA_H_

#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

struct OfflineCtTransformerModelMetaData {
  std::unordered_map<std::string, int32_t> token2id;
  std::unordered_map<std::string, int32_t> punct2id;
  std::vector<std::string> id2punct;

  int32_t unk_id;
  int32_t dot_id;
  int32_t comma_id;
  int32_t quest_id;
  int32_t pause_id;
  int32_t underline_id;
  int32_t num_punctuations;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-ct-transformer-model.cc
================================================
// sherpa-onnx/csrc/offline-ct-transformer-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-ct-transformer-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineCtTransformerModel::Impl {
 public:
  explicit Impl(const OfflinePunctuationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.ct_transformer);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflinePunctuationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.ct_transformer);
    Init(buf.data(), buf.size());
  }

  Ort::Value Forward(Ort::Value text, Ort::Value text_len) {
    std::array<Ort::Value, 2> inputs = {std::move(text), std::move(text_len)};

    auto ans =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());
    return std::move(ans[0]);
  }

  OrtAllocator *Allocator() { return allocator_; }

  const OfflineCtTransformerModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::vector<std::string> tokens;
    SHERPA_ONNX_READ_META_DATA_VEC_STRING_SEP(tokens, "tokens", "|");

    int32_t vocab_size = 0;
    SHERPA_ONNX_READ_META_DATA(vocab_size, "vocab_size");
    if (static_cast<int32_t>(tokens.size()) != vocab_size) {
      SHERPA_ONNX_LOGE("tokens.size() %d != vocab_size %d",
                       static_cast<int32_t>(tokens.size()), vocab_size);
      exit(-1);
    }

    SHERPA_ONNX_READ_META_DATA_VEC_STRING_SEP(meta_data_.id2punct,
                                              "punctuations", "|");

    std::string unk_symbol;
    SHERPA_ONNX_READ_META_DATA_STR(unk_symbol, "unk_symbol");

    // output shape is (N, T, num_punctuations)
    meta_data_.num_punctuations =
        sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape()[2];

    int32_t i = 0;
    for (const auto &t : tokens) {
      meta_data_.token2id[t] = i;
      i += 1;
    }

    i = 0;
    for (const auto &p : meta_data_.id2punct) {
      meta_data_.punct2id[p] = i;
      i += 1;
    }

    meta_data_.unk_id = meta_data_.token2id.at(unk_symbol);

    meta_data_.dot_id = meta_data_.punct2id.at("。");
    meta_data_.comma_id = meta_data_.punct2id.at("，");
    meta_data_.quest_id = meta_data_.punct2id.at("？");
    meta_data_.pause_id = meta_data_.punct2id.at("、");
    meta_data_.underline_id = meta_data_.punct2id.at("_");

    if (config_.debug) {
      std::ostringstream os;
      os << "vocab_size: " << meta_data_.token2id.size() << "\n";
      os << "num_punctuations: " << meta_data_.num_punctuations << "\n";
      os << "punctuations: ";
      for (const auto &s : meta_data_.id2punct) {
        os << s << " ";
      }
      os << "\n";
      SHERPA_ONNX_LOGE("\n%s\n", os.str().c_str());
    }
  }

 private:
  OfflinePunctuationModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OfflineCtTransformerModelMetaData meta_data_;
};

OfflineCtTransformerModel::OfflineCtTransformerModel(
    const OfflinePunctuationModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineCtTransformerModel::OfflineCtTransformerModel(
    Manager *mgr, const OfflinePunctuationModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

#if __ANDROID_API__ >= 9
template OfflineCtTransformerModel::OfflineCtTransformerModel(
    AAssetManager *mgr, const OfflinePunctuationModelConfig &config);
#endif

#if __OHOS__
template OfflineCtTransformerModel::OfflineCtTransformerModel(
    NativeResourceManager *mgr, const OfflinePunctuationModelConfig &config);
#endif

OfflineCtTransformerModel::~OfflineCtTransformerModel() = default;

Ort::Value OfflineCtTransformerModel::Forward(Ort::Value text,
                                              Ort::Value text_len) const {
  return impl_->Forward(std::move(text), std::move(text_len));
}

OrtAllocator *OfflineCtTransformerModel::Allocator() const {
  return impl_->Allocator();
}

const OfflineCtTransformerModelMetaData &
OfflineCtTransformerModel::GetModelMetadata() const {
  return impl_->GetModelMetadata();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-ct-transformer-model.h
================================================
// sherpa-onnx/csrc/offline-ct-transformer-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_H_
#include <memory>
#include <utility>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ct-transformer-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-punctuation-model-config.h"

namespace sherpa_onnx {

/** This class implements
 * https://github.com/alibaba-damo-academy/FunASR/blob/main/runtime/python/onnxruntime/funasr_onnx/punc_bin.py#L17
 * from FunASR
 */
class OfflineCtTransformerModel {
 public:
  explicit OfflineCtTransformerModel(
      const OfflinePunctuationModelConfig &config);

  template <typename Manager>
  OfflineCtTransformerModel(Manager *mgr,
                            const OfflinePunctuationModelConfig &config);

  ~OfflineCtTransformerModel();

  /** Run the forward method of the model.
   *
   * @param text  A tensor of shape (N, T) of dtype int32.
   * @param text  A tensor of shape (N) of dtype int32.
   *
   * @return Return a tensor
   *  - punctuation_ids: A 2-D tensor of shape (N, T).
   */
  Ort::Value Forward(Ort::Value text, Ort::Value text_len) const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

  const OfflineCtTransformerModelMetaData &GetModelMetadata() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CT_TRANSFORMER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-ctc-decoder.h
================================================
// sherpa-onnx/csrc/offline-ctc-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_CTC_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CTC_DECODER_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

struct OfflineCtcDecoderResult {
  /// The decoded token IDs
  std::vector<int64_t> tokens;

  /// The decoded word IDs
  /// Note: tokens.size() is usually not equal to words.size()
  /// words is empty for greedy search decoding.
  /// it is not empty when an HLG graph or an HLG graph is used.
  std::vector<int32_t> words;

  /// timestamps[i] contains the output frame index where tokens[i] is decoded.
  /// Note: The index is after subsampling
  ///
  /// tokens.size() == timestamps.size()
  std::vector<int32_t> timestamps;
};

class OfflineCtcDecoder {
 public:
  virtual ~OfflineCtcDecoder() = default;

  /** Run CTC decoding given the output from the encoder model.
   *
   * @param log_probs A 3-D tensor of shape (N, T, vocab_size) containing
   *                  lob_probs.
   * @param log_probs_length A 1-D tensor of shape (N,) containing number
   *                         of valid frames in log_probs before padding.
   *
   * @return Return a vector of size `N` containing the decoded results.
   */
  virtual std::vector<OfflineCtcDecoderResult> Decode(
      Ort::Value log_probs, Ort::Value log_probs_length) = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CTC_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-ctc-fst-decoder-config.cc
================================================
// sherpa-onnx/csrc/offline-ctc-fst-decoder-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-ctc-fst-decoder-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

std::string OfflineCtcFstDecoderConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineCtcFstDecoderConfig(";
  os << "graph=\"" << graph << "\", ";
  os << "max_active=" << max_active << ")";

  return os.str();
}

void OfflineCtcFstDecoderConfig::Register(ParseOptions *po) {
  std::string prefix = "ctc";
  ParseOptions p(prefix, po);

  p.Register("graph", &graph, "Path to H.fst, HL.fst, or HLG.fst");

  p.Register("max-active", &max_active,
             "Decoder max active states.  Larger->slower; more accurate");
}

bool OfflineCtcFstDecoderConfig::Validate() const {
  if (!graph.empty() && !FileExists(graph)) {
    SHERPA_ONNX_LOGE("graph: '%s' does not exist", graph.c_str());
    return false;
  }
  return true;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-ctc-fst-decoder-config.h
================================================
// sherpa-onnx/csrc/offline-ctc-fst-decoder-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_CTC_FST_DECODER_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CTC_FST_DECODER_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineCtcFstDecoderConfig {
  // Path to H.fst, HL.fst or HLG.fst
  std::string graph;
  int32_t max_active = 3000;

  OfflineCtcFstDecoderConfig() = default;

  OfflineCtcFstDecoderConfig(const std::string &graph, int32_t max_active)
      : graph(graph), max_active(max_active) {}

  std::string ToString() const;

  void Register(ParseOptions *po);
  bool Validate() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CTC_FST_DECODER_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-ctc-fst-decoder.cc
================================================
// sherpa-onnx/csrc/offline-ctc-fst-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-ctc-fst-decoder.h"

#include <string>
#include <utility>
#include <vector>

#include "fst/fstlib.h"
#include "kaldi-decoder/csrc/decodable-ctc.h"
#include "kaldi-decoder/csrc/eigen.h"
#include "kaldi-decoder/csrc/faster-decoder.h"
#include "sherpa-onnx/csrc/fst-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

/**
 * @param decoder
 * @param p Pointer to a 2-d array of shape (num_frames, vocab_size)
 * @param num_frames Number of rows in the 2-d array.
 * @param vocab_size Number of columns in the 2-d array.
 * @return Return the decoded result.
 */
static OfflineCtcDecoderResult DecodeOne(kaldi_decoder::FasterDecoder *decoder,
                                         const float *p, int32_t num_frames,
                                         int32_t vocab_size) {
  OfflineCtcDecoderResult r;
  kaldi_decoder::DecodableCtc decodable(p, num_frames, vocab_size);

  decoder->Decode(&decodable);

  if (!decoder->ReachedFinal()) {
    SHERPA_ONNX_LOGE("Not reached final!");
    return r;
  }

  fst::VectorFst<fst::LatticeArc> decoded;  // linear FST.
  decoder->GetBestPath(&decoded);

  if (decoded.NumStates() == 0) {
    SHERPA_ONNX_LOGE("Empty best path!");
    return r;
  }

  auto cur_state = decoded.Start();

  int32_t blank_id = 0;

  for (int32_t t = 0, prev = -1; decoded.NumArcs(cur_state) == 1; ++t) {
    fst::ArcIterator<fst::Fst<fst::LatticeArc>> iter(decoded, cur_state);
    const auto &arc = iter.Value();

    cur_state = arc.nextstate;

    if (arc.ilabel == prev) {
      continue;
    }

    // 0 is epsilon here
    if (arc.ilabel == 0 || arc.ilabel == blank_id + 1) {
      prev = arc.ilabel;
      continue;
    }

    // -1 here since the input labels are incremented during graph
    // construction
    r.tokens.push_back(arc.ilabel - 1);
    if (arc.olabel != 0) {
      r.words.push_back(arc.olabel);
    }

    r.timestamps.push_back(t);
    prev = arc.ilabel;
  }

  return r;
}

OfflineCtcFstDecoder::OfflineCtcFstDecoder(
    const OfflineCtcFstDecoderConfig &config)
    : config_(config), fst_(ReadGraph(config_.graph)) {}

std::vector<OfflineCtcDecoderResult> OfflineCtcFstDecoder::Decode(
    Ort::Value log_probs, Ort::Value log_probs_length) {
  std::vector<int64_t> shape = log_probs.GetTensorTypeAndShapeInfo().GetShape();

  assert(static_cast<int32_t>(shape.size()) == 3);
  int32_t batch_size = shape[0];
  int32_t T = shape[1];
  int32_t vocab_size = shape[2];

  std::vector<int64_t> length_shape =
      log_probs_length.GetTensorTypeAndShapeInfo().GetShape();
  assert(static_cast<int32_t>(length_shape.size()) == 1);

  assert(shape[0] == length_shape[0]);

  kaldi_decoder::FasterDecoderOptions opts;
  opts.max_active = config_.max_active;
  kaldi_decoder::FasterDecoder faster_decoder(*fst_, opts);

  const float *start = log_probs.GetTensorData<float>();

  std::vector<OfflineCtcDecoderResult> ans;
  ans.reserve(batch_size);

  for (int32_t i = 0; i != batch_size; ++i) {
    const float *p = start + i * T * vocab_size;
    int32_t num_frames = log_probs_length.GetTensorData<int64_t>()[i];
    auto r = DecodeOne(&faster_decoder, p, num_frames, vocab_size);
    ans.push_back(std::move(r));
  }

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-ctc-fst-decoder.h
================================================
// sherpa-onnx/csrc/offline-ctc-fst-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_CTC_FST_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CTC_FST_DECODER_H_

#include <memory>
#include <vector>

#include "fst/fst.h"
#include "sherpa-onnx/csrc/offline-ctc-decoder.h"
#include "sherpa-onnx/csrc/offline-ctc-fst-decoder-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

class OfflineCtcFstDecoder : public OfflineCtcDecoder {
 public:
  explicit OfflineCtcFstDecoder(const OfflineCtcFstDecoderConfig &config);

  std::vector<OfflineCtcDecoderResult> Decode(
      Ort::Value log_probs, Ort::Value log_probs_length) override;

 private:
  OfflineCtcFstDecoderConfig config_;

  std::unique_ptr<fst::Fst<fst::StdArc>> fst_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CTC_FST_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

std::vector<OfflineCtcDecoderResult> OfflineCtcGreedySearchDecoder::Decode(
    Ort::Value log_probs, Ort::Value log_probs_length) {
  std::vector<int64_t> shape = log_probs.GetTensorTypeAndShapeInfo().GetShape();
  int32_t batch_size = static_cast<int32_t>(shape[0]);
  int32_t num_frames = static_cast<int32_t>(shape[1]);
  int32_t vocab_size = static_cast<int32_t>(shape[2]);

  const int64_t *p_log_probs_length = log_probs_length.GetTensorData<int64_t>();

  std::vector<OfflineCtcDecoderResult> ans;
  ans.reserve(batch_size);

  for (int32_t b = 0; b != batch_size; ++b) {
    const float *p_log_probs =
        log_probs.GetTensorData<float>() + b * num_frames * vocab_size;

    OfflineCtcDecoderResult r;
    int64_t prev_id = -1;

    for (int32_t t = 0; t != static_cast<int32_t>(p_log_probs_length[b]); ++t) {
      auto y = static_cast<int64_t>(std::distance(
          static_cast<const float *>(p_log_probs),
          std::max_element(
              static_cast<const float *>(p_log_probs),
              static_cast<const float *>(p_log_probs) + vocab_size)));
      p_log_probs += vocab_size;

      if (y != blank_id_ && y != prev_id) {
        r.tokens.push_back(y);
        r.timestamps.push_back(t);
      }
      prev_id = y;
    }  // for (int32_t t = 0; ...)

    ans.push_back(std::move(r));
  }
  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_CTC_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CTC_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-ctc-decoder.h"

namespace sherpa_onnx {

class OfflineCtcGreedySearchDecoder : public OfflineCtcDecoder {
 public:
  explicit OfflineCtcGreedySearchDecoder(int32_t blank_id)
      : blank_id_(blank_id) {}

  std::vector<OfflineCtcDecoderResult> Decode(
      Ort::Value log_probs, Ort::Value log_probs_length) override;

 private:
  int32_t blank_id_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CTC_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-ctc-model.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-ctc-model.h"

#include <algorithm>
#include <memory>
#include <sstream>
#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-dolphin-model.h"
#include "sherpa-onnx/csrc/offline-fire-red-asr-ctc-model.h"
#include "sherpa-onnx/csrc/offline-medasr-ctc-model.h"
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
#include "sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model.h"
#include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
#include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
#include "sherpa-onnx/csrc/offline-wenet-ctc-model.h"
#include "sherpa-onnx/csrc/offline-zipformer-ctc-model.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace {

enum class ModelType : std::uint8_t {
  kEncDecCTCModelBPE,
  kEncDecCTCModel,
  kEncDecHybridRNNTCTCBPEModel,
  kTdnn,
  kZipformerCtc,
  kWenetCtc,
  kTeleSpeechCtc,
  kUnknown,
};

}  // namespace

namespace sherpa_onnx {

static ModelType GetModelType(char *model_data, size_t model_data_length,
                              bool debug) {
  Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  auto sess = std::make_unique<Ort::Session>(env, model_data, model_data_length,
                                             sess_opts);

  Ort::ModelMetadata meta_data = sess->GetModelMetadata();
  if (debug) {
    std::ostringstream os;
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;
  auto model_type =
      LookupCustomModelMetaData(meta_data, "model_type", allocator);
  if (model_type.empty()) {
    SHERPA_ONNX_LOGE(
        "No model_type in the metadata!\n"
        "If you are using models from NeMo, please refer to\n"
        "https://huggingface.co/csukuangfj/"
        "sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py\n"
        "or "
        "https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/nemo/"
        "fast-conformer-hybrid-transducer-ctc\n"
        "If you are using models from WeNet, please refer to\n"
        "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/"
        "run.sh\n"
        "If you are using models from TeleSpeech, please refer to\n"
        "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/tele-speech/"
        "add-metadata.py"
        "\n"
        "for how to add metadata to model.onnx\n");
    return ModelType::kUnknown;
  }

  if (model_type == "EncDecCTCModelBPE") {
    return ModelType::kEncDecCTCModelBPE;
  } else if (model_type == "EncDecCTCModel") {
    return ModelType::kEncDecCTCModel;
  } else if (model_type == "EncDecHybridRNNTCTCBPEModel") {
    return ModelType::kEncDecHybridRNNTCTCBPEModel;
  } else if (model_type == "tdnn") {
    return ModelType::kTdnn;
  } else if (model_type == "zipformer2_ctc") {
    return ModelType::kZipformerCtc;
  } else if (model_type == "wenet_ctc") {
    return ModelType::kWenetCtc;
  } else if (model_type == "telespeech_ctc") {
    return ModelType::kTeleSpeechCtc;
  } else {
    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
    return ModelType::kUnknown;
  }
}

std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
    const OfflineModelConfig &config) {
  if (!config.dolphin.model.empty()) {
    return std::make_unique<OfflineDolphinModel>(config);
  } else if (!config.nemo_ctc.model.empty()) {
    return std::make_unique<OfflineNemoEncDecCtcModel>(config);
  } else if (!config.tdnn.model.empty()) {
    return std::make_unique<OfflineTdnnCtcModel>(config);
  } else if (!config.zipformer_ctc.model.empty()) {
    return std::make_unique<OfflineZipformerCtcModel>(config);
  } else if (!config.wenet_ctc.model.empty()) {
    return std::make_unique<OfflineWenetCtcModel>(config);
  } else if (!config.telespeech_ctc.empty()) {
    return std::make_unique<OfflineTeleSpeechCtcModel>(config);
  } else if (!config.omnilingual.model.empty()) {
    return std::make_unique<OfflineOmnilingualAsrCtcModel>(config);
  } else if (!config.medasr.model.empty()) {
    return std::make_unique<OfflineMedAsrCtcModel>(config);
  } else if (!config.fire_red_asr_ctc.model.empty()) {
    return std::make_unique<OfflineFireRedAsrCtcModel>(config);
  }

  // TODO(fangjun): Refactor it. We don't need to use model_type here
  ModelType model_type = ModelType::kUnknown;

  std::string filename;
  if (!config.nemo_ctc.model.empty()) {
    filename = config.nemo_ctc.model;
  } else if (!config.tdnn.model.empty()) {
    filename = config.tdnn.model;
  } else if (!config.zipformer_ctc.model.empty()) {
    filename = config.zipformer_ctc.model;
  } else if (!config.wenet_ctc.model.empty()) {
    filename = config.wenet_ctc.model;
  } else if (!config.telespeech_ctc.empty()) {
    filename = config.telespeech_ctc;
  } else {
    SHERPA_ONNX_LOGE("Please specify a CTC model");
    exit(-1);
  }

  {
    auto buffer = ReadFile(filename);

    model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  }

  switch (model_type) {
    case ModelType::kEncDecCTCModelBPE:
    case ModelType::kEncDecCTCModel:
      return std::make_unique<OfflineNemoEncDecCtcModel>(config);
    case ModelType::kEncDecHybridRNNTCTCBPEModel:
      return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
    case ModelType::kTdnn:
      return std::make_unique<OfflineTdnnCtcModel>(config);
    case ModelType::kZipformerCtc:
      return std::make_unique<OfflineZipformerCtcModel>(config);
    case ModelType::kWenetCtc:
      return std::make_unique<OfflineWenetCtcModel>(config);
    case ModelType::kTeleSpeechCtc:
      return std::make_unique<OfflineTeleSpeechCtcModel>(config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE("Unknown model type in offline CTC!");
      return nullptr;
  }

  return nullptr;
}

template <typename Manager>
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
    Manager *mgr, const OfflineModelConfig &config) {
  if (!config.dolphin.model.empty()) {
    return std::make_unique<OfflineDolphinModel>(mgr, config);
  } else if (!config.nemo_ctc.model.empty()) {
    return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
  } else if (!config.tdnn.model.empty()) {
    return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
  } else if (!config.zipformer_ctc.model.empty()) {
    return std::make_unique<OfflineZipformerCtcModel>(mgr, config);
  } else if (!config.wenet_ctc.model.empty()) {
    return std::make_unique<OfflineWenetCtcModel>(mgr, config);
  } else if (!config.telespeech_ctc.empty()) {
    return std::make_unique<OfflineTeleSpeechCtcModel>(mgr, config);
  } else if (!config.omnilingual.model.empty()) {
    return std::make_unique<OfflineOmnilingualAsrCtcModel>(mgr, config);
  } else if (!config.medasr.model.empty()) {
    return std::make_unique<OfflineMedAsrCtcModel>(mgr, config);
  } else if (!config.fire_red_asr_ctc.model.empty()) {
    return std::make_unique<OfflineFireRedAsrCtcModel>(mgr, config);
  }

  // TODO(fangjun): Refactor it. We don't need to use model_type here
  ModelType model_type = ModelType::kUnknown;

  std::string filename;
  if (!config.nemo_ctc.model.empty()) {
    filename = config.nemo_ctc.model;
  } else if (!config.tdnn.model.empty()) {
    filename = config.tdnn.model;
  } else if (!config.zipformer_ctc.model.empty()) {
    filename = config.zipformer_ctc.model;
  } else if (!config.wenet_ctc.model.empty()) {
    filename = config.wenet_ctc.model;
  } else if (!config.telespeech_ctc.empty()) {
    filename = config.telespeech_ctc;
  } else {
    SHERPA_ONNX_LOGE("Please specify a CTC model");
    exit(-1);
  }

  {
    auto buffer = ReadFile(mgr, filename);

    model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  }

  switch (model_type) {
    case ModelType::kEncDecCTCModelBPE:
    case ModelType::kEncDecCTCModel:
      return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
    case ModelType::kEncDecHybridRNNTCTCBPEModel:
      return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(mgr,
                                                                      config);
    case ModelType::kTdnn:
      return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
    case ModelType::kZipformerCtc:
      return std::make_unique<OfflineZipformerCtcModel>(mgr, config);
    case ModelType::kWenetCtc:
      return std::make_unique<OfflineWenetCtcModel>(mgr, config);
    case ModelType::kTeleSpeechCtc:
      return std::make_unique<OfflineTeleSpeechCtcModel>(mgr, config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE("Unknown model type in offline CTC!");
      return nullptr;
  }

  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-ctc-model.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_CTC_MODEL_H_

#include <memory>
#include <string>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineCtcModel {
 public:
  virtual ~OfflineCtcModel() = default;

  static std::unique_ptr<OfflineCtcModel> Create(
      const OfflineModelConfig &config);

  template <typename Manager>
  static std::unique_ptr<OfflineCtcModel> Create(
      Manager *mgr, const OfflineModelConfig &config);

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  virtual std::vector<Ort::Value> Forward(Ort::Value features,
                                          Ort::Value features_length) = 0;

  /** Return the vocabulary size of the model
   */
  virtual int32_t VocabSize() const = 0;

  /** SubsamplingFactor of the model
   *
   * For NeMo Citrinet, the subsampling factor is usually 4.
   * For NeMo Conformer CTC, the subsampling factor is usually 8.
   */
  virtual int32_t SubsamplingFactor() const { return 1; }

  /** Return an allocator for allocating memory
   */
  virtual OrtAllocator *Allocator() const = 0;

  /** For some models, e.g., those from NeMo, they require some preprocessing
   * for the features.
   */
  virtual std::string FeatureNormalizationMethod() const { return {}; }

  // Return true if the model supports batch size > 1
  virtual bool SupportBatchProcessing() const { return true; }

  // return true for models from https://github.com/salute-developers/GigaAM
  // return false otherwise
  virtual bool IsGigaAM() const { return false; }

  // For Dolphin and FireRedASR CTC models, they use global CMVN
  virtual void NormalizeFeatures(float *features, int32_t num_frames,
                                 int32_t feat_dim) const {}
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-dolphin-model-config.cc
================================================
// sherpa-onnx/csrc/offline-dolphin-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineDolphinModelConfig::Register(ParseOptions *po) {
  po->Register("dolphin-model", &model,
               "Path to model.onnx of Dolphin CTC branch.");
}

bool OfflineDolphinModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("Dolphin model '%s' does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OfflineDolphinModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineDolphinModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-dolphin-model-config.h
================================================
// sherpa-onnx/csrc/offline-dolphin-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineDolphinModelConfig {
  std::string model;

  OfflineDolphinModelConfig() = default;
  explicit OfflineDolphinModelConfig(const std::string &model) : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_

#include <string>
#include <vector>

namespace sherpa_onnx {

struct OfflineDolphinModelMetaData {
  int32_t vocab_size;
  int32_t subsampling_factor = 4;
  std::vector<float> mean;
  std::vector<float> inv_stddev;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-dolphin-model.cc
================================================
// sherpa-onnx/csrc/offline-dolphin-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-dolphin-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineDolphinModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.dolphin.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.dolphin.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) {
    std::array<Ort::Value, 2> inputs = {
        std::move(features),
        std::move(features_length),
    };

    return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                      output_names_ptr_.data(), output_names_ptr_.size());
  }

  int32_t VocabSize() const { return meta_data_.vocab_size; }

  int32_t SubsamplingFactor() const { return meta_data_.subsampling_factor; }

  void NormalizeFeatures(float *features, int32_t num_frames,
                         int32_t feat_dim) const {
    using RowMajorMat =
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
    Eigen::Map<RowMajorMat> x(features, num_frames, feat_dim);

    Eigen::Map<const Eigen::RowVectorXf> mean(meta_data_.mean.data(), feat_dim);
    Eigen::Map<const Eigen::RowVectorXf> inv_std(meta_data_.inv_stddev.data(),
                                                 feat_dim);
    x.array() =
        (x.array().rowwise() - mean.array()).rowwise() * inv_std.array();
  }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(meta_data_.vocab_size, "vocab_size");

    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.mean, "mean");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.inv_stddev, "invstd");
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OfflineDolphinModelMetaData meta_data_;
};

OfflineDolphinModel::OfflineDolphinModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineDolphinModel::OfflineDolphinModel(Manager *mgr,
                                         const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineDolphinModel::~OfflineDolphinModel() = default;

std::vector<Ort::Value> OfflineDolphinModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineDolphinModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OfflineDolphinModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

void OfflineDolphinModel::NormalizeFeatures(float *features, int32_t num_frames,
                                            int32_t feat_dim) const {
  return impl_->NormalizeFeatures(features, num_frames, feat_dim);
}

OrtAllocator *OfflineDolphinModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineDolphinModel::OfflineDolphinModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineDolphinModel::OfflineDolphinModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-dolphin-model.h
================================================
// sherpa-onnx/csrc/offline-dolphin-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_

#include <memory>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-dolphin-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineDolphinModel : public OfflineCtcModel {
 public:
  explicit OfflineDolphinModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineDolphinModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineDolphinModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** SubsamplingFactor of the model
   *
   * For Citrinet, the subsampling factor is usually 4.
   * For Conformer CTC, the subsampling factor is usually 8.
   */
  int32_t SubsamplingFactor() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  bool SupportBatchProcessing() const override { return true; }

  void NormalizeFeatures(float *features, int32_t num_frames,
                         int32_t feat_dim) const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-ctc-model-config.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-fire-red-asr-ctc-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineFireRedAsrCtcModelConfig::Register(ParseOptions *po) {
  po->Register(
      "fire-red-asr-ctc", &model,
      "Path to model.onnx from FireRedASR CTC. "
      "Please see "
      "https://k2-fsa.github.io/sherpa/onnx/FireRedAsr/pretrained.html "
      "for available FireRedASR CTC models");
}

bool OfflineFireRedAsrCtcModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("FireRedASR CTC model: '%s' does not exist",
                     model.c_str());
    return false;
  }

  return true;
}

std::string OfflineFireRedAsrCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineFireRedAsrCtcModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-ctc-model-config.h
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-ctc-model-config.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineFireRedAsrCtcModelConfig {
  std::string model;

  OfflineFireRedAsrCtcModelConfig() = default;
  explicit OfflineFireRedAsrCtcModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-ctc-model.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-fire-red-asr-ctc-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineFireRedAsrCtcModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.fire_red_asr_ctc.model),
        sess_opts_);
    Init(nullptr, 0);
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.fire_red_asr_ctc.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) {
    std::array<Ort::Value, 2> inputs = {std::move(features),
                                        std::move(features_length)};

    return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                      output_names_ptr_.data(), output_names_ptr_.size());
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t SubsamplingFactor() const { return subsampling_factor_; }

  OrtAllocator *Allocator() { return allocator_; }

  void NormalizeFeatures(float *features, int32_t num_frames,
                         int32_t feat_dim) const {
    if (static_cast<int32_t>(mean_.size()) != feat_dim) {
      SHERPA_ONNX_LOGE("Bad things happened");
      SHERPA_ONNX_LOGE("Wrong feat dim %d. Expect: %d", feat_dim,
                       static_cast<int32_t>(mean_.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    using RowMajorMat =
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
    Eigen::Map<RowMajorMat> x(features, num_frames, feat_dim);

    Eigen::Map<const Eigen::RowVectorXf> mean(mean_.data(), feat_dim);
    Eigen::Map<const Eigen::RowVectorXf> inv_std(inv_stddev_.data(), feat_dim);
    x.array() =
        (x.array().rowwise() - mean.array()).rowwise() * inv_std.array();
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    if (model_data) {
      sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                             model_data_length, sess_opts_);
    } else if (!sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize session outside of this "
          "function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
    if (model_type != "fire-red-asr-2-ctc") {
      SHERPA_ONNX_LOGE("Expect model type fire-red-asr-2-ctc. Given: '%s'",
                       model_type.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(subsampling_factor_,
                                            "subsampling_factor", 4);

    auto shape =
        sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
    vocab_size_ = shape.back();

    if (config_.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE("subsampling_factor: %{public}d", subsampling_factor_);
      SHERPA_ONNX_LOGE("vocab_size: %{public}d", vocab_size_);
#else
      SHERPA_ONNX_LOGE("subsampling_factor: %d", subsampling_factor_);
      SHERPA_ONNX_LOGE("vocab_size: %d", vocab_size_);
#endif
    }

    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(mean_, "cmvn_mean");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(inv_stddev_, "cmvn_inv_stddev");
    if (mean_.size() != inv_stddev_.size()) {
      SHERPA_ONNX_LOGE("Incorrect cmvn. mean size: %d, inv_stddev size: %d",
                       static_cast<int32_t>(mean_.size()),
                       static_cast<int32_t>(inv_stddev_.size()));
      SHERPA_ONNX_EXIT(-1);
    }
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t vocab_size_ = 0;
  int32_t subsampling_factor_ = 0;

  std::vector<float> mean_;
  std::vector<float> inv_stddev_;
};

OfflineFireRedAsrCtcModel::OfflineFireRedAsrCtcModel(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineFireRedAsrCtcModel::OfflineFireRedAsrCtcModel(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineFireRedAsrCtcModel::~OfflineFireRedAsrCtcModel() = default;

std::vector<Ort::Value> OfflineFireRedAsrCtcModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineFireRedAsrCtcModel::VocabSize() const {
  return impl_->VocabSize();
}

int32_t OfflineFireRedAsrCtcModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

OrtAllocator *OfflineFireRedAsrCtcModel::Allocator() const {
  return impl_->Allocator();
}

void OfflineFireRedAsrCtcModel::NormalizeFeatures(float *features,
                                                  int32_t num_frames,
                                                  int32_t feat_dim) const {
  return impl_->NormalizeFeatures(features, num_frames, feat_dim);
}

#if __ANDROID_API__ >= 9
template OfflineFireRedAsrCtcModel::OfflineFireRedAsrCtcModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineFireRedAsrCtcModel::OfflineFireRedAsrCtcModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-ctc-model.h
//
// Copyright (c)  2026  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_H_
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

/** This class implements the CTC model from FIRE_RED_ASR.
 */
class OfflineFireRedAsrCtcModel : public OfflineCtcModel {
 public:
  explicit OfflineFireRedAsrCtcModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineFireRedAsrCtcModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineFireRedAsrCtcModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  int32_t SubsamplingFactor() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  void NormalizeFeatures(float *features, int32_t num_frames,
                         int32_t feat_dim) const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-decoder.h
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-decoder.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_DECODER_H_

#include <cstdint>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

struct OfflineFireRedAsrDecoderResult {
  /// The decoded token IDs
  std::vector<int32_t> tokens;
};

class OfflineFireRedAsrDecoder {
 public:
  virtual ~OfflineFireRedAsrDecoder() = default;

  /** Run beam search given the output from the FireRedAsr encoder model.
   *
   * @param n_layer_cross_k       A 4-D tensor of shape
   *                              (num_decoder_layers, N, T, d_model).
   * @param n_layer_cross_v       A 4-D tensor of shape
   *                              (num_decoder_layers, N, T, d_model).
   *
   * @return Return a vector of size `N` containing the decoded results.
   */
  virtual std::vector<OfflineFireRedAsrDecoderResult> Decode(
      Ort::Value n_layer_cross_k, Ort::Value n_layer_cross_v,
      int32_t num_feature_frames) = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-greedy-search-decoder.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-fire-red-asr-greedy-search-decoder.h"

#include <algorithm>
#include <tuple>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

// Note: this functions works only for batch size == 1 at present
std::vector<OfflineFireRedAsrDecoderResult>
OfflineFireRedAsrGreedySearchDecoder::Decode(Ort::Value cross_k,
                                             Ort::Value cross_v,
                                             int32_t num_feature_frames) {
  const auto &meta_data = model_->GetModelMetadata();

  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  // For multilingual models, initial_tokens contains [sot, language, task]
  //   - language is English by default
  //   - task is transcribe by default
  //
  // For non-multilingual models, initial_tokens contains [sot]
  std::array<int64_t, 2> token_shape = {1, 1};
  int64_t token = meta_data.sos_id;

  int32_t batch_size = 1;

  Ort::Value tokens = Ort::Value::CreateTensor(
      memory_info, &token, 1, token_shape.data(), token_shape.size());

  std::array<int64_t, 1> offset_shape{1};
  Ort::Value offset = Ort::Value::CreateTensor<int64_t>(
      model_->Allocator(), offset_shape.data(), offset_shape.size());
  *(offset.GetTensorMutableData<int64_t>()) = 0;

  std::vector<OfflineFireRedAsrDecoderResult> ans(1);

  auto self_kv_cache = model_->GetInitialSelfKVCache();

  std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value, Ort::Value,
             Ort::Value>
      decoder_out = {Ort::Value{nullptr},
                     std::move(self_kv_cache.first),
                     std::move(self_kv_cache.second),
                     std::move(cross_k),
                     std::move(cross_v),
                     std::move(offset)};

  // assume at most 6 tokens per second
  int32_t num_possible_tokens = num_feature_frames / 100.0 * 6;
  num_possible_tokens =
      std::min<int32_t>(num_possible_tokens, meta_data.max_len / 2);

  for (int32_t i = 0; i < num_possible_tokens; ++i) {
    decoder_out = model_->ForwardDecoder(View(&tokens),
                                         std::move(std::get<1>(decoder_out)),
                                         std::move(std::get<2>(decoder_out)),
                                         std::move(std::get<3>(decoder_out)),
                                         std::move(std::get<4>(decoder_out)),
                                         std::move(std::get<5>(decoder_out)));

    const auto &logits = std::get<0>(decoder_out);
    const float *p_logits = logits.GetTensorData<float>();

    auto logits_shape = logits.GetTensorTypeAndShapeInfo().GetShape();
    int32_t vocab_size = logits_shape[2];

    int32_t max_token_id = static_cast<int32_t>(std::distance(
        p_logits, std::max_element(p_logits, p_logits + vocab_size)));
    if (max_token_id == meta_data.eos_id) {
      break;
    }

    ans[0].tokens.push_back(max_token_id);

    token = max_token_id;

    // increment offset
    *(std::get<5>(decoder_out).GetTensorMutableData<int64_t>()) += 1;
  }

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-greedy-search-decoder.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-fire-red-asr-decoder.h"
#include "sherpa-onnx/csrc/offline-fire-red-asr-model.h"

namespace sherpa_onnx {

class OfflineFireRedAsrGreedySearchDecoder : public OfflineFireRedAsrDecoder {
 public:
  explicit OfflineFireRedAsrGreedySearchDecoder(OfflineFireRedAsrModel *model)
      : model_(model) {}

  std::vector<OfflineFireRedAsrDecoderResult> Decode(
      Ort::Value cross_k, Ort::Value cross_v,
      int32_t num_feature_frames) override;

 private:
  OfflineFireRedAsrModel *model_;  // not owned
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-model-config.cc
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-fire-red-asr-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineFireRedAsrModelConfig::Register(ParseOptions *po) {
  po->Register("fire-red-asr-encoder", &encoder,
               "Path to onnx encoder of FireRedAsr");

  po->Register("fire-red-asr-decoder", &decoder,
               "Path to onnx decoder of FireRedAsr");
}

bool OfflineFireRedAsrModelConfig::Validate() const {
  if (encoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --fire-red-asr-encoder");
    return false;
  }

  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("FireRedAsr encoder file '%s' does not exist",
                     encoder.c_str());
    return false;
  }

  if (decoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --fire-red-asr-decoder");
    return false;
  }

  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("FireRedAsr decoder file '%s' does not exist",
                     decoder.c_str());
    return false;
  }

  return true;
}

std::string OfflineFireRedAsrModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineFireRedAsrModelConfig(";
  os << "encoder=\"" << encoder << "\", ";
  os << "decoder=\"" << decoder << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-model-config.h
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

// see https://github.com/FireRedTeam/FireRedASR
struct OfflineFireRedAsrModelConfig {
  std::string encoder;
  std::string decoder;

  OfflineFireRedAsrModelConfig() = default;
  OfflineFireRedAsrModelConfig(const std::string &encoder,
                               const std::string &decoder)
      : encoder(encoder), decoder(decoder) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-model-meta-data.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_META_DATA_H_

#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

struct OfflineFireRedAsrModelMetaData {
  int32_t sos_id;
  int32_t eos_id;
  int32_t max_len;

  int32_t num_decoder_layers;
  int32_t num_head;
  int32_t head_dim;

  std::vector<float> mean;
  std::vector<float> inv_stddev;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-model.cc
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-fire-red-asr-model.h"

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

namespace {

static inline bool IsCudaProvider(const std::string &provider) {
  return provider == "cuda";
}

}  // namespace

class OfflineFireRedAsrModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        cpu_mem_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        is_cpu_provider_(config.provider == "cpu" || config.provider.empty()) {
    {
      auto buf = ReadFile(config.fire_red_asr.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.fire_red_asr.decoder);
      InitDecoder(buf.data(), buf.size());
    }

    InitCudaIOBinding();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        cpu_mem_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        is_cpu_provider_(config.provider == "cpu" || config.provider.empty()) {
    {
      auto buf = ReadFile(mgr, config.fire_red_asr.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.fire_red_asr.decoder);
      InitDecoder(buf.data(), buf.size());
    }

    InitCudaIOBinding();
  }

  std::pair<Ort::Value, Ort::Value> ForwardEncoder(Ort::Value features,
                                                   Ort::Value features_length) {
    std::array<Ort::Value, 2> inputs{std::move(features),
                                     std::move(features_length)};

    std::vector<Ort::Value> encoder_out;

    if (use_cuda_iobinding_) {
      // Encoder outputs (cross_k, cross_v) are used multiple times in decoder
      // steps, so keep them on GPU to avoid device<->host copies.
      Ort::IoBinding binding(*encoder_sess_);
      binding.BindInput(encoder_input_names_ptr_[0], inputs[0]);
      binding.BindInput(encoder_input_names_ptr_[1], inputs[1]);

      binding.BindOutput(encoder_output_names_ptr_[0], *cuda_mem_info_);
      binding.BindOutput(encoder_output_names_ptr_[1], *cuda_mem_info_);

      binding.SynchronizeInputs();
      encoder_sess_->Run(Ort::RunOptions{nullptr}, binding);
      binding.SynchronizeOutputs();
      encoder_out = binding.GetOutputValues();
    } else {
      encoder_out = encoder_sess_->Run(
          {}, encoder_input_names_ptr_.data(), inputs.data(), inputs.size(),
          encoder_output_names_ptr_.data(), encoder_output_names_ptr_.size());
    }

    return {std::move(encoder_out[0]), std::move(encoder_out[1])};
  }

  std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value, Ort::Value,
             Ort::Value>
  ForwardDecoder(Ort::Value tokens, Ort::Value n_layer_self_k_cache,
                 Ort::Value n_layer_self_v_cache, Ort::Value n_layer_cross_k,
                 Ort::Value n_layer_cross_v, Ort::Value offset) {
    std::array<Ort::Value, 6> decoder_input = {std::move(tokens),
                                               std::move(n_layer_self_k_cache),
                                               std::move(n_layer_self_v_cache),
                                               std::move(n_layer_cross_k),
                                               std::move(n_layer_cross_v),
                                               std::move(offset)};

    std::vector<Ort::Value> decoder_out;

    if (use_cuda_iobinding_) {
      // CPU-side sampling needs logits on CPU, while self KV cache should
      // remain on GPU to avoid large device<->host copies between decode steps.
      Ort::IoBinding binding(*decoder_sess_);
      for (size_t i = 0; i < decoder_input.size(); ++i) {
        binding.BindInput(decoder_input_names_ptr_[i], decoder_input[i]);
      }

      binding.BindOutput(decoder_output_names_ptr_[0], cpu_mem_info_);
      binding.BindOutput(decoder_output_names_ptr_[1], *cuda_mem_info_);
      binding.BindOutput(decoder_output_names_ptr_[2], *cuda_mem_info_);

      binding.SynchronizeInputs();
      decoder_sess_->Run(Ort::RunOptions{nullptr}, binding);
      binding.SynchronizeOutputs();
      decoder_out = binding.GetOutputValues();
    } else {
      decoder_out = decoder_sess_->Run(
          {}, decoder_input_names_ptr_.data(), decoder_input.data(),
          decoder_input.size(), decoder_output_names_ptr_.data(),
          decoder_output_names_ptr_.size());
    }

    return std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value,
                      Ort::Value, Ort::Value>{
        std::move(decoder_out[0]),   std::move(decoder_out[1]),
        std::move(decoder_out[2]),   std::move(decoder_input[3]),
        std::move(decoder_input[4]), std::move(decoder_input[5])};
  }

  std::pair<Ort::Value, Ort::Value> GetInitialSelfKVCache() {
    int32_t batch_size = 1;
    std::array<int64_t, 5> shape{meta_data_.num_decoder_layers, batch_size,
                                 meta_data_.max_len, meta_data_.num_head,
                                 meta_data_.head_dim};

    Ort::Value n_layer_self_k_cache = Ort::Value::CreateTensor<float>(
        Allocator(), shape.data(), shape.size());

    Ort::Value n_layer_self_v_cache = Ort::Value::CreateTensor<float>(
        Allocator(), shape.data(), shape.size());

    auto n = shape[0] * shape[1] * shape[2] * shape[3] * shape[4];

    float *p_k = n_layer_self_k_cache.GetTensorMutableData<float>();
    float *p_v = n_layer_self_v_cache.GetTensorMutableData<float>();

    memset(p_k, 0, sizeof(float) * n);
    memset(p_v, 0, sizeof(float) * n);

    return {std::move(n_layer_self_k_cache), std::move(n_layer_self_v_cache)};
  }

  OrtAllocator *Allocator() { return allocator_; }

  const OfflineFireRedAsrModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---encoder---\n";
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(meta_data_.num_decoder_layers,
                               "num_decoder_layers");
    SHERPA_ONNX_READ_META_DATA(meta_data_.num_head, "num_head");
    SHERPA_ONNX_READ_META_DATA(meta_data_.head_dim, "head_dim");
    SHERPA_ONNX_READ_META_DATA(meta_data_.sos_id, "sos");
    SHERPA_ONNX_READ_META_DATA(meta_data_.eos_id, "eos");
    SHERPA_ONNX_READ_META_DATA(meta_data_.max_len, "max_len");

    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.mean, "cmvn_mean");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.inv_stddev,
                                         "cmvn_inv_stddev");
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);

    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);
  }

  void InitCudaIOBinding() {
    use_cuda_iobinding_ =
        (!is_cpu_provider_ && IsCudaProvider(config_.provider));
    if (use_cuda_iobinding_) {
      // Use device 0 by default. SessionOptions() in sherpa-onnx usually
      // configures the CUDA EP device; binding here only affects output memory.
      cuda_mem_info_ = std::make_unique<Ort::MemoryInfo>(
          "Cuda", OrtDeviceAllocator, 0, OrtMemTypeDefault);
    }
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  Ort::MemoryInfo cpu_mem_info_;
  std::unique_ptr<Ort::MemoryInfo> cuda_mem_info_;
  bool use_cuda_iobinding_ = false;
  bool is_cpu_provider_ = false;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  OfflineFireRedAsrModelMetaData meta_data_;
};

OfflineFireRedAsrModel::OfflineFireRedAsrModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineFireRedAsrModel::OfflineFireRedAsrModel(Manager *mgr,
                                               const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineFireRedAsrModel::~OfflineFireRedAsrModel() = default;

std::pair<Ort::Value, Ort::Value> OfflineFireRedAsrModel::ForwardEncoder(
    Ort::Value features, Ort::Value features_length) const {
  return impl_->ForwardEncoder(std::move(features), std::move(features_length));
}

std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value, Ort::Value,
           Ort::Value>
OfflineFireRedAsrModel::ForwardDecoder(Ort::Value tokens,
                                       Ort::Value n_layer_self_k_cache,
                                       Ort::Value n_layer_self_v_cache,
                                       Ort::Value n_layer_cross_k,
                                       Ort::Value n_layer_cross_v,
                                       Ort::Value offset) const {
  return impl_->ForwardDecoder(
      std::move(tokens), std::move(n_layer_self_k_cache),
      std::move(n_layer_self_v_cache), std::move(n_layer_cross_k),
      std::move(n_layer_cross_v), std::move(offset));
}

std::pair<Ort::Value, Ort::Value>
OfflineFireRedAsrModel::GetInitialSelfKVCache() const {
  return impl_->GetInitialSelfKVCache();
}

OrtAllocator *OfflineFireRedAsrModel::Allocator() const {
  return impl_->Allocator();
}

const OfflineFireRedAsrModelMetaData &OfflineFireRedAsrModel::GetModelMetadata()
    const {
  return impl_->GetModelMetadata();
}

#if __ANDROID_API__ >= 9
template OfflineFireRedAsrModel::OfflineFireRedAsrModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineFireRedAsrModel::OfflineFireRedAsrModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-fire-red-asr-model.h
================================================
// sherpa-onnx/csrc/offline-fire-red-asr-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_H_

#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-fire-red-asr-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineFireRedAsrModel {
 public:
  explicit OfflineFireRedAsrModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineFireRedAsrModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineFireRedAsrModel();

  /** Run the encoder model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_len  A tensor of shape (N,) with dtype int64.
   *
   * @return Return a pair containing:
   *  - n_layer_cross_k: A 4-D tensor of shape
   *                     (num_decoder_layers, N, T, d_model)
   *  - n_layer_cross_v: A 4-D tensor of shape
   *                     (num_decoder_layers, N, T, d_model)
   */
  std::pair<Ort::Value, Ort::Value> ForwardEncoder(
      Ort::Value features, Ort::Value features_length) const;

  /** Run the decoder model.
   *
   * @param tokens A int64 tensor of shape (N, num_words)
   * @param n_layer_self_k_cache  A 5-D tensor of shape
   *                       (num_decoder_layers, N, max_len, num_head, head_dim).
   * @param n_layer_self_v_cache  A 5-D tensor of shape
   *                       (num_decoder_layers, N, max_len, num_head, head_dim).
   * @param n_layer_cross_k       A 5-D tensor of shape
   *                              (num_decoder_layers, N, T, d_model).
   * @param n_layer_cross_v       A 5-D tensor of shape
   *                              (num_decoder_layers, N, T, d_model).
   * @param offset A int64 tensor of shape (N,)
   *
   * @return Return a tuple containing 6 tensors:
   *
   *  - logits A 3-D tensor of shape (N, num_words, vocab_size)
   *  - out_n_layer_self_k_cache Same shape as n_layer_self_k_cache
   *  - out_n_layer_self_v_cache Same shape as n_layer_self_v_cache
   *  - out_n_layer_cross_k Same as n_layer_cross_k
   *  - out_n_layer_cross_v Same as n_layer_cross_v
   *  - out_offset Same as offset
   */
  std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value, Ort::Value,
             Ort::Value>
  ForwardDecoder(Ort::Value tokens, Ort::Value n_layer_self_k_cache,
                 Ort::Value n_layer_self_v_cache, Ort::Value n_layer_cross_k,
                 Ort::Value n_layer_cross_v, Ort::Value offset) const;

  /** Return the initial self kv cache in a pair
   *  - n_layer_self_k_cache A 5-D tensor of shape
   *                       (num_decoder_layers, N, max_len, num_head, head_dim).
   *  - n_layer_self_v_cache A 5-D tensor of shape
   *                       (num_decoder_layers, N, max_len, num_head, head_dim).
   */
  std::pair<Ort::Value, Ort::Value> GetInitialSelfKVCache() const;

  const OfflineFireRedAsrModelMetaData &GetModelMetadata() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-funasr-nano-model-config.cc
================================================
// sherpa-onnx/csrc/offline-funasr-nano-model-config.cc
//
// Copyright (c)  2025  zengyw

#include "sherpa-onnx/csrc/offline-funasr-nano-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineFunASRNanoModelConfig::Register(ParseOptions *po) {
  po->Register("funasr-nano-encoder-adaptor", &encoder_adaptor,
               "Path to encoder_adaptor.onnx for FunASR-nano");

  po->Register("funasr-nano-llm", &llm,
               "Path to llm.onnx for FunASR-nano (KV cache mode)");

  po->Register("funasr-nano-embedding", &embedding,
               "Path to embedding.onnx for FunASR-nano");

  po->Register(
      "funasr-nano-tokenizer", &tokenizer,
      "Path to tokenizer directory (e.g., Qwen3-0.6B) for FunASR-nano");

  po->Register("funasr-nano-system-prompt", &system_prompt,
               "System prompt for FunASR-nano");

  po->Register("funasr-nano-user-prompt", &user_prompt,
               "User prompt template for FunASR-nano");

  po->Register("funasr-nano-max-new-tokens", &max_new_tokens,
               "Maximum number of new tokens to generate for FunASR-nano");

  po->Register("funasr-nano-temperature", &temperature,
               "Sampling temperature for FunASR-nano");

  po->Register("funasr-nano-top-p", &top_p,
               "Top-p (nucleus) sampling threshold for FunASR-nano");

  po->Register("funasr-nano-seed", &seed, "Random seed for FunASR-nano");

  po->Register("funasr-nano-language", &language,
               "Language for transcription (empty string means None)");

  po->Register("funasr-nano-itn", &itn,
               "Whether to apply inverse text normalization (default: true)");

  po->Register("funasr-nano-hotwords", &hotwords,
               "Hotwords (comma-separated, e.g., \"Sherpa,FunASR\")");
}

bool OfflineFunASRNanoModelConfig::Validate() const {
  if (encoder_adaptor.empty()) {
    SHERPA_ONNX_LOGE("--funasr-nano-encoder-adaptor is required");
    return false;
  }

  if (!FileExists(encoder_adaptor)) {
    SHERPA_ONNX_LOGE("--funasr-nano-encoder-adaptor: '%s' does not exist",
                     encoder_adaptor.c_str());
    return false;
  }

  if (llm.empty()) {
    SHERPA_ONNX_LOGE("--funasr-nano-llm is required");
    return false;
  }

  if (!FileExists(llm)) {
    SHERPA_ONNX_LOGE("--funasr-nano-llm: '%s' does not exist", llm.c_str());
    return false;
  }

  if (tokenizer.empty()) {
    SHERPA_ONNX_LOGE("--funasr-nano-tokenizer is required");
    return false;
  }

  if (!FileExists(tokenizer + "/vocab.json")) {
    SHERPA_ONNX_LOGE(
        "'%s/vocab.json' does not exist. Please check --funasr-nano-tokenizer",
        tokenizer.c_str());
    return false;
  }

  if (!FileExists(tokenizer + "/merges.txt")) {
    SHERPA_ONNX_LOGE(
        "'%s/merges.txt' does not exist. Please check --funasr-nano-tokenizer",
        tokenizer.c_str());
    return false;
  }

  if (!FileExists(tokenizer + "/tokenizer.json")) {
    SHERPA_ONNX_LOGE(
        "'%s/tokenizer.json' does not exist. Please check "
        "--funasr-nano-tokenizer",
        tokenizer.c_str());
    return false;
  }

  if (embedding.empty()) {
    SHERPA_ONNX_LOGE("--funasr-nano-embedding is required");
    return false;
  }

  if (!FileExists(embedding)) {
    SHERPA_ONNX_LOGE("--funasr-nano-embedding: '%s' does not exist",
                     embedding.c_str());
    return false;
  }

  if (max_new_tokens <= 0) {
    SHERPA_ONNX_LOGE("--funasr-nano-max-new-tokens should be > 0. Given: %d",
                     max_new_tokens);
    return false;
  }

  if (temperature < 0.0f) {
    SHERPA_ONNX_LOGE("--funasr-nano-temperature should be >= 0.0. Given: %f",
                     temperature);
    return false;
  }

  if (top_p < 0.0f || top_p > 1.0f) {
    SHERPA_ONNX_LOGE("--funasr-nano-top-p should be in [0.0, 1.0]. Given: %f",
                     top_p);
    return false;
  }

  return true;
}

std::string OfflineFunASRNanoModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineFunASRNanoModelConfig(";
  os << "encoder_adaptor=\"" << encoder_adaptor << "\", ";
  os << "llm=\"" << llm << "\", ";
  os << "embedding=\"" << embedding << "\", ";
  os << "tokenizer=\"" << tokenizer << "\", ";
  os << "system_prompt=\"" << system_prompt << "\", ";
  os << "user_prompt=\"" << user_prompt << "\", ";
  os << "max_new_tokens=" << max_new_tokens << ", ";
  os << "temperature=" << temperature << ", ";
  os << "top_p=" << top_p << ", ";
  os << "seed=" << seed << ", ";
  os << "language=\"" << language << "\", ";
  os << "itn=" << (itn ? "True" : "False") << ", ";
  os << "hotwords=\"" << hotwords << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-funasr-nano-model-config.h
================================================
// sherpa-onnx/csrc/offline-funasr-nano-model-config.h
//
// Copyright (c)  2025  zengyw

#ifndef SHERPA_ONNX_CSRC_OFFLINE_FUNASR_NANO_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FUNASR_NANO_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineFunASRNanoModelConfig {
  // Path to encoder_adaptor.onnx
  std::string encoder_adaptor;

  // Path to llm.onnx (KV cache model)
  std::string llm;

  // Path to embedding.onnx
  std::string embedding;

  // Path to tokenizer directory (e.g., Qwen3-0.6B)
  std::string tokenizer;

  // System prompt
  std::string system_prompt = "You are a helpful assistant.";

  // User prompt template (will be filled with audio tokens)
  std::string user_prompt = "语音转写：";

  // Maximum number of new tokens to generate
  int32_t max_new_tokens = 512;

  // Sampling temperature
  float temperature = 1e-6f;

  // Top-p (nucleus) sampling threshold
  float top_p = 0.8f;

  // Random seed for reproducibility
  int32_t seed = 42;

  // Language for transcription (empty string means None)
  std::string language;

  // Whether to apply inverse text normalization (ITN)
  bool itn = true;

  // Hotwords
  std::string hotwords;

  OfflineFunASRNanoModelConfig() = default;

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FUNASR_NANO_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-funasr-nano-model.cc
================================================
// sherpa-onnx/csrc/offline-funasr-nano-model.cc
//
// Copyright (c)  2025  zengyw

#include "sherpa-onnx/csrc/offline-funasr-nano-model.h"

#include <algorithm>
#include <cctype>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

namespace {

// Calculate the total number of elements from a tensor shape.
static inline size_t NumelFromShape(const std::vector<int64_t> &shape) {
  if (shape.empty()) return 0;
  size_t n = 1;
  for (auto d : shape) {
    if (d <= 0) return 0;
    n *= static_cast<size_t>(d);
  }
  return n;
}

#if ORT_API_VERSION >= 14
static inline void AssertTensorIsCpu(const Ort::Value &v, const char *what) {
  if (!v.IsTensor()) return;
  auto mi = v.GetTensorMemoryInfo();
  if (mi.GetDeviceType() != OrtMemoryInfoDeviceType_CPU) {
    SHERPA_ONNX_LOGE(
        "%s: expected CPU tensor but got device_type=%d device_id=%d", what,
        (int)mi.GetDeviceType(), mi.GetDeviceId());
    SHERPA_ONNX_EXIT(-1);
  }
}
#else
static inline void AssertTensorIsCpu(const Ort::Value &v, const char *what) {
  if (!v.IsTensor()) return;

  const OrtValue *v_ptr = reinterpret_cast<const OrtValue *>(&v);
  const OrtMemoryInfo *memory_info = nullptr;

  // 1. Get memory info
  OrtStatus *status = Ort::GetApi().GetTensorMemoryInfo(v_ptr, &memory_info);
  if (status) {
    const char *msg = Ort::GetApi().GetErrorMessage(status);
    Ort::GetApi().ReleaseStatus(status);
    SHERPA_ONNX_LOGE("%s: failed to get tensor memory info: %s", what, msg);
    SHERPA_ONNX_EXIT(-1);
  }

  // 2. Get memory type (OrtMemType)
  OrtMemType mem_type;
  status = Ort::GetApi().MemoryInfoGetMemType(memory_info, &mem_type);
  if (status) {
    const char *msg = Ort::GetApi().GetErrorMessage(status);
    Ort::GetApi().ReleaseStatus(status);
    SHERPA_ONNX_LOGE("%s: failed to get mem type: %s", what, msg);
    SHERPA_ONNX_EXIT(-1);
  }

  // 3. Check CPU
  if (mem_type != OrtMemTypeCPU) {
    int device_id = 0;
    status = Ort::GetApi().MemoryInfoGetId(memory_info, &device_id);
    if (status) {
      const char *msg = Ort::GetApi().GetErrorMessage(status);
      Ort::GetApi().ReleaseStatus(status);
      SHERPA_ONNX_LOGE("%s: failed to get device id: %s", what, msg);
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_LOGE("%s: expected CPU tensor but got mem_type=%d device_id=%d",
                     what, static_cast<int>(mem_type), device_id);
    SHERPA_ONNX_EXIT(-1);
  }
}
#endif

static inline std::string ToLower(std::string s) {
  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) -> char {
    return static_cast<char>(std::tolower(c));
  });
  return s;
}

static inline bool IsCudaProvider(const std::string &provider) {
  auto p = ToLower(provider);
  // Keep it conservative. We only enable IO binding policy below when we
  // are on CUDA; other EPs keep the existing behavior.
  return p == "cuda" || (p.size() > 4 && p.find("cuda") == 0);
}

// Get the element type of a session input tensor.
static inline ONNXTensorElementDataType GetSessionInputElemType(
    Ort::Session *sess, size_t input_index) {
  auto ti = sess->GetInputTypeInfo(input_index);
  auto t = ti.GetTensorTypeAndShapeInfo();
  return static_cast<ONNXTensorElementDataType>(t.GetElementType());
}

template <typename T>
static Ort::Value AllocTensor(OrtAllocator *alloc,
                              const std::vector<int64_t> &shape) {
  return Ort::Value::CreateTensor<T>(alloc, shape.data(), shape.size());
}

template <>
Ort::Value AllocTensor<uint16_t>(OrtAllocator *alloc,
                                 const std::vector<int64_t> &shape) {
  return Ort::Value::CreateTensor(alloc, shape.data(), shape.size(),
                                  ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
}

// Allocate tensor by ONNX elem type (float/float16 only).
static inline Ort::Value AllocTensorByElemType(
    OrtAllocator *alloc, const std::vector<int64_t> &shape,
    ONNXTensorElementDataType t) {
  if (t == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
    return AllocTensor<float>(alloc, shape);
  }
  if (t == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 ||
      t == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16) {
    return AllocTensor<uint16_t>(alloc, shape);
  }
  SHERPA_ONNX_LOGE("AllocTensorByElemType: unsupported elem_type=%d", (int)t);
  SHERPA_ONNX_EXIT(-1);
  return AllocTensor<float>(alloc, shape);
}

// Convert tensor to float32, handling both float16 and float32 inputs.
// NOTE: This helper assumes the input tensor is on CPU memory.
// The caller must ensure the tensor is on CPU (e.g., via IO Binding).
static Ort::Value CastToFloat32(Ort::Value in, OrtAllocator *alloc) {
  if (!in.IsTensor()) return in;
  auto info = in.GetTensorTypeAndShapeInfo();
  auto shape = info.GetShape();
  size_t n = NumelFromShape(shape);
  if (n == 0) return in;
  auto et = info.GetElementType();

  AssertTensorIsCpu(in, "CastToFloat32");

  Ort::Value out = AllocTensor<float>(alloc, shape);
  float *dst = out.GetTensorMutableData<float>();
  if (et == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
    const float *src = in.GetTensorData<float>();
    std::memcpy(dst, src, n * sizeof(float));
    return out;
  }
  if (et == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 ||
      et == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16) {
    const uint16_t *src = in.GetTensorData<uint16_t>();
    for (size_t i = 0; i < n; ++i) dst[i] = HalfBitsToFloat(src[i]);
    return out;
  }
  SHERPA_ONNX_LOGE("CastToFloat32: unsupported input elem_type=%d", (int)et);
  return in;
}

// Convert tensor to float16, handling both float16 and float32 inputs.
// NOTE: This helper assumes the input tensor is on CPU memory.
static Ort::Value CastToFloat16(Ort::Value in, OrtAllocator *alloc) {
  if (!in.IsTensor()) return in;
  auto info = in.GetTensorTypeAndShapeInfo();
  auto shape = info.GetShape();
  size_t n = NumelFromShape(shape);
  if (n == 0) return in;
  auto et = static_cast<ONNXTensorElementDataType>(info.GetElementType());

  AssertTensorIsCpu(in, "CastToFloat16");

  Ort::Value out = AllocTensor<uint16_t>(alloc, shape);
  uint16_t *dst = out.GetTensorMutableData<uint16_t>();
  if (et == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 ||
      et == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16) {
    const uint16_t *src = in.GetTensorData<uint16_t>();
    std::memcpy(dst, src, n * sizeof(uint16_t));
    return out;
  }
  if (et == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
    const float *src = in.GetTensorData<float>();
    for (size_t i = 0; i < n; ++i) dst[i] = FloatToHalfBits(src[i]);
    return out;
  }
  SHERPA_ONNX_LOGE("CastToFloat16: unsupported input elem_type=%d", (int)et);
  return in;
}

// Cast tensor to the expected element type (float16 or float32).
// Returns the input unchanged if it already matches the expected type.
static Ort::Value CastFloatLikeForExpected(Ort::Value in,
                                           ONNXTensorElementDataType expected,
                                           OrtAllocator *alloc) {
  if (!in.IsTensor()) return in;
  auto info = in.GetTensorTypeAndShapeInfo();
  auto actual = static_cast<ONNXTensorElementDataType>(info.GetElementType());
  if (actual == expected) return in;
  if (expected == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) {
    return CastToFloat16(std::move(in), alloc);
  }
  if (expected == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
    return CastToFloat32(std::move(in), alloc);
  }
  SHERPA_ONNX_LOGE(
      "CastFloatLikeForExpected: unsupported expected elem_type=%d",
      (int)expected);
  return in;
}

static inline bool NeedsTypeConversion(Ort::Value &in,
                                       ONNXTensorElementDataType expected) {
  if (!in.IsTensor()) return false;
  auto info = in.GetTensorTypeAndShapeInfo();
  auto actual = static_cast<ONNXTensorElementDataType>(info.GetElementType());
  return actual != expected;
}

// Cast attention mask tensor to int64 if needed.
// Supports int32 to int64 conversion.
// NOTE: This helper assumes the input tensor is on CPU memory.
static Ort::Value CastMaskToInt64IfNeeded(Ort::Value in, OrtAllocator *alloc) {
  if (!in.IsTensor()) return in;
  auto info = in.GetTensorTypeAndShapeInfo();
  auto shape = info.GetShape();
  size_t n = NumelFromShape(shape);
  if (n == 0) return in;
  auto et = static_cast<ONNXTensorElementDataType>(info.GetElementType());
  if (et == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) return in;

  AssertTensorIsCpu(in, "CastMaskToInt64IfNeeded");

  if (et == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
    const int32_t *src = in.GetTensorData<int32_t>();
    Ort::Value out = AllocTensor<int64_t>(alloc, shape);
    int64_t *dst = out.GetTensorMutableData<int64_t>();
    for (size_t i = 0; i < n; ++i) dst[i] = static_cast<int64_t>(src[i]);
    return out;
  }

  SHERPA_ONNX_LOGE("attention_mask elem_type=%d not supported, expected int64",
                   (int)et);
  return in;
}

// Ensure attention_mask is [batch, target_len] on CPU, int64.
// If shorter: pad with 0. If longer: truncate.
static Ort::Value NormalizeAttentionMask(Ort::Value mask, int64_t target_len,
                                         OrtAllocator *alloc) {
  if (!mask.IsTensor()) return mask;
  AssertTensorIsCpu(mask, "NormalizeAttentionMask");

  auto info = mask.GetTensorTypeAndShapeInfo();
  auto shape = info.GetShape();
  if (shape.size() != 2) return mask;

  int64_t b = shape[0];
  int64_t l = shape[1];
  if (b <= 0 || l <= 0) return mask;

  if (static_cast<ONNXTensorElementDataType>(info.GetElementType()) !=
      ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
    mask = CastMaskToInt64IfNeeded(std::move(mask), alloc);
    info = mask.GetTensorTypeAndShapeInfo();
    shape = info.GetShape();
    if (shape.size() != 2) return mask;
    b = shape[0];
    l = shape[1];
  }

  if (l == target_len) return mask;

  std::vector<int64_t> new_shape = {b, target_len};
  Ort::Value out = AllocTensor<int64_t>(alloc, new_shape);
  int64_t *dst = out.GetTensorMutableData<int64_t>();
  const int64_t *src = mask.GetTensorData<int64_t>();

  std::memset(dst, 0,
              static_cast<size_t>(b) * static_cast<size_t>(target_len) *
                  sizeof(int64_t));

  int64_t copy_len = std::min<int64_t>(l, target_len);
  for (int64_t bi = 0; bi < b; ++bi) {
    const int64_t *srow = src + bi * l;
    int64_t *drow = dst + bi * target_len;
    std::memcpy(drow, srow, static_cast<size_t>(copy_len) * sizeof(int64_t));
  }

  return out;
}

}  // namespace

// Implementation class for OfflineFunASRNanoModel.
// Manages ONNX sessions for encoder, KV cache LLM, and embedding models.
class OfflineFunASRNanoModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR, "funasr-nano"),
        sess_opts_encoder_(GetSessionOptions(config)),
        sess_opts_llm_(GetSessionOptions(config)),
        sess_opts_embedding_(GetSessionOptions(config)),
        allocator_(),
        cpu_mem_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        is_cpu_provider_(config.provider == "cpu" || config.provider.empty()) {
    const auto &c = config_.funasr_nano;

    if (c.encoder_adaptor.empty()) {
      SHERPA_ONNX_LOGE("funasr_nano.encoder_adaptor is empty");
      SHERPA_ONNX_EXIT(-1);
    }

    if (c.llm.empty()) {
      SHERPA_ONNX_LOGE("funasr_nano.llm is required for KV cache mode");
      SHERPA_ONNX_EXIT(-1);
    }

    InitEncoderAdaptor(c.encoder_adaptor);
    InitLLM(c.llm);
    InitEmbedding(c.embedding);
    has_embedding_model_ = true;

    // FunASR-nano uses CPU-side sampling. When running on CUDA, we bind
    // logits to CPU (so sampling can read it safely).
    use_cuda_iobinding_ =
        (!is_cpu_provider_ && IsCudaProvider(config_.provider));
    if (use_cuda_iobinding_) {
      // Use device 0 by default. SessionOptions() in sherpa-onnx usually
      // configures the CUDA EP device; binding here only affects output memory.
      cuda_mem_info_ = std::make_unique<Ort::MemoryInfo>(
          "Cuda", OrtDeviceAllocator, 0, OrtMemTypeDefault);
    }
    CheckFp16OnCuda();
  }

  void InitEncoderAdaptorFromMemory(void *model_data,
                                    size_t model_data_length) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_encoder_);
    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);
    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);
    encoder_in_type_ = GetSessionInputElemType(encoder_sess_.get(), 0);
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(lfr_window_size_, "lfr_window_size");
    SHERPA_ONNX_READ_META_DATA(lfr_window_shift_, "lfr_window_shift");
    SHERPA_ONNX_READ_META_DATA(hidden_size_, "llm_dim");
  }

  void SetupLlmFromSession() {
    GetInputNames(llm_sess_.get(), &llm_input_names_, &llm_input_names_ptr_);
    GetOutputNames(llm_sess_.get(), &llm_output_names_, &llm_output_names_ptr_);

    llm_embeds_in_type_ = GetSessionInputElemType(llm_sess_.get(), 0);
    if (llm_embeds_in_type_ != ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
      SHERPA_ONNX_LOGE("LLM inputs_embeds must be float32, got elem_type=%d",
                       (int)llm_embeds_in_type_);
      SHERPA_ONNX_EXIT(-1);
    }

    Ort::ModelMetadata meta_data = llm_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("LLM model metadata:\n%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("LLM model metadata:\n%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
    if (hidden_size_ == 0) {
      SHERPA_ONNX_READ_META_DATA(hidden_size_, "hidden_size");
    }

    // Detect KV delta model type (model_type metadata should contain
    // "kv_delta")
    auto model_type_value =
        LookupCustomModelMetaData(meta_data, "model_type", allocator);
    is_kv_delta_model_ =
        (!model_type_value.empty() &&
         model_type_value.find("kv_delta") != std::string::npos);

    int32_t num_outputs = static_cast<int32_t>(llm_output_names_.size());
    if (num_outputs < 1 || (num_outputs - 1) % 2 != 0) {
      SHERPA_ONNX_LOGE(
          "LLM model must have 1 logits output + 2*num_layers KV outputs, got "
          "%d outputs",
          num_outputs);
      SHERPA_ONNX_EXIT(-1);
    }
    int32_t inferred_layers = (num_outputs - 1) / 2;

    auto num_layers_value =
        LookupCustomModelMetaData(meta_data, "num_layers", allocator);
    if (!num_layers_value.empty()) {
      num_layers_ = atoi(num_layers_value.c_str());
      if (num_layers_ <= 0) {
        SHERPA_ONNX_LOGE("Invalid num_layers=%d from metadata", num_layers_);
        SHERPA_ONNX_EXIT(-1);
      }
      if (num_layers_ != inferred_layers) {
        SHERPA_ONNX_LOGE("LLM num_layers mismatch: metadata=%d, inferred=%d",
                         num_layers_, inferred_layers);
        SHERPA_ONNX_EXIT(-1);
      }
    } else {
      num_layers_ = inferred_layers;
    }

    // Read KV cache capacity from metadata.
    auto max_total_len_value =
        LookupCustomModelMetaData(meta_data, "max_total_len", allocator);
    if (!max_total_len_value.empty()) {
      max_total_len_ = atoi(max_total_len_value.c_str());
    } else {
      auto attn_len_value =
          LookupCustomModelMetaData(meta_data, "attention_mask_len", allocator);
      if (!attn_len_value.empty())
        max_total_len_ = atoi(attn_len_value.c_str());
    }
    if (max_total_len_ <= 0) {
      // Fallback: use input[1] shape
      auto ti = llm_sess_->GetInputTypeInfo(1);
      auto shp = ti.GetTensorTypeAndShapeInfo().GetShape();
      if (shp.size() == 2 && shp[1] > 0) {
        max_total_len_ = static_cast<int32_t>(shp[1]);
      }
      if (max_total_len_ <= 0) {
        SHERPA_ONNX_LOGE(
            "Failed to determine max_total_len from metadata or input shape");
        SHERPA_ONNX_EXIT(-1);
      }
    }

    // Only KV delta models are supported
    if (!is_kv_delta_model_) {
      SHERPA_ONNX_LOGE(
          "Only KV delta models are supported, but model_type does not contain "
          "'kv_delta'");
      SHERPA_ONNX_EXIT(-1);
    }

    // Validate input layout: 0 embeds, 1 attention_mask, 2 cache_position, 3+
    // KV cache
    if (llm_input_names_.size() < 3u) {
      SHERPA_ONNX_LOGE(
          "LLM model inputs must be >=3 (embeds,mask,cache_position)");
      SHERPA_ONNX_EXIT(-1);
    }

    cache_position_input_index_ = 2;
    past_kv_input_start_index_ = 3;

    int32_t expected_inputs = 3 + 2 * num_layers_;
    int32_t actual_inputs = static_cast<int32_t>(llm_input_names_.size());
    if (actual_inputs != expected_inputs) {
      if (actual_inputs == 2 + 2 * num_layers_) {
        SHERPA_ONNX_LOGE(
            "LLM model inputs mismatch: expected %d (=3+2*num_layers with "
            "cache_position) "
            "got %d (=2+2*num_layers without cache_position). "
            "Please use a model exported with cache_position support.",
            expected_inputs, actual_inputs);
      } else {
        SHERPA_ONNX_LOGE(
            "LLM model inputs mismatch: expected %d (=3+2*num_layers) got %d",
            expected_inputs, actual_inputs);
      }
      SHERPA_ONNX_EXIT(-1);
    }

    // KV input element type (should be float16 or float32).
    kv_in_type_ =
        GetSessionInputElemType(llm_sess_.get(), past_kv_input_start_index_);
    kv_in_type_v_ = GetSessionInputElemType(llm_sess_.get(),
                                            past_kv_input_start_index_ + 1);
    if (!(kv_in_type_ == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT ||
          kv_in_type_ == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 ||
          kv_in_type_ == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16)) {
      SHERPA_ONNX_LOGE("LLM past_key elem_type=%d not supported",
                       (int)kv_in_type_);
      SHERPA_ONNX_EXIT(-1);
    }
    if (!(kv_in_type_v_ == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT ||
          kv_in_type_v_ == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16 ||
          kv_in_type_v_ == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16)) {
      SHERPA_ONNX_LOGE("LLM past_value elem_type=%d not supported",
                       (int)kv_in_type_v_);
      SHERPA_ONNX_EXIT(-1);
    }

    // Templates for KV shapes from session inputs.
    auto past_key_ti = llm_sess_->GetInputTypeInfo(past_kv_input_start_index_);
    past_key_shape_tpl_ = past_key_ti.GetTensorTypeAndShapeInfo().GetShape();

    auto past_value_ti =
        llm_sess_->GetInputTypeInfo(past_kv_input_start_index_ + 1);
    past_value_shape_tpl_ =
        past_value_ti.GetTensorTypeAndShapeInfo().GetShape();

    // Pre-allocate buffers for CPU IoBinding (decode step: [1, 1, vocab_size]
    // and [1, 1, kv_h, hd])
    int64_t kv_h = past_key_shape_tpl_[2];
    int64_t hd = past_key_shape_tpl_[3];
    std::vector<int64_t> logits_shape = {1, 1,
                                         static_cast<int64_t>(vocab_size_)};
    logits_buffer_ = AllocTensor<float>(allocator_, logits_shape);

    kv_delta_buffers_.reserve(num_layers_);
    std::vector<int64_t> kv_delta_shape = {1, 1, kv_h, hd};
    for (int32_t i = 0; i < num_layers_; ++i) {
      Ort::Value key_delta =
          AllocTensorByElemType(allocator_, kv_delta_shape, kv_in_type_);
      Ort::Value value_delta =
          AllocTensorByElemType(allocator_, kv_delta_shape, kv_in_type_v_);
      kv_delta_buffers_.emplace_back(std::move(key_delta),
                                     std::move(value_delta));
    }
    has_decode_buffers_ = true;
  }

  void InitLLMFromMemory(void *model_data, size_t model_data_length) {
    try {
      llm_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_llm_);
    } catch (const Ort::Exception &e) {
      SHERPA_ONNX_LOGE("InitLLMFromMemory: failed to create session: %s",
                       e.what());
      if (std::string(e.what()).find("external data") != std::string::npos ||
          std::string(e.what()).find("External data") != std::string::npos) {
        SHERPA_ONNX_LOGE(
            "LLM model requires external data (.data file) but loaded from "
            "memory. "
            "Please use fp16/int8 single-file model or load by file path "
            "instead.");
        SHERPA_ONNX_EXIT(-1);
      }
      throw;
    }

    SetupLlmFromSession();
  }

  void InitEmbeddingFromMemory(void *model_data, size_t model_data_length) {
    embedding_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_embedding_);
    GetInputNames(embedding_sess_.get(), &embedding_input_names_,
                  &embedding_input_names_ptr_);
    GetOutputNames(embedding_sess_.get(), &embedding_output_names_,
                   &embedding_output_names_ptr_);
    Ort::ModelMetadata meta_data = embedding_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    if (hidden_size_ == 0) {
      SHERPA_ONNX_READ_META_DATA(hidden_size_, "hidden_size");
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR, "funasr-nano"),
        sess_opts_encoder_(GetSessionOptions(config)),
        sess_opts_llm_(GetSessionOptions(config)),
        sess_opts_embedding_(GetSessionOptions(config)),
        allocator_(),
        cpu_mem_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        is_cpu_provider_(config.provider == "cpu" || config.provider.empty()) {
    const auto &c = config_.funasr_nano;

    if (c.encoder_adaptor.empty()) {
      SHERPA_ONNX_LOGE("funasr_nano.encoder_adaptor is empty");
      SHERPA_ONNX_EXIT(-1);
    }

    if (c.llm.empty()) {
      SHERPA_ONNX_LOGE("funasr_nano.llm is required for KV cache mode");
      SHERPA_ONNX_EXIT(-1);
    }

    auto buf_encoder = ReadFile(mgr, c.encoder_adaptor);
    InitEncoderAdaptorFromMemory(buf_encoder.data(), buf_encoder.size());

    auto buf_llm = ReadFile(mgr, c.llm);
    InitLLMFromMemory(buf_llm.data(), buf_llm.size());

    auto buf_embedding = ReadFile(mgr, c.embedding);
    InitEmbeddingFromMemory(buf_embedding.data(), buf_embedding.size());
    has_embedding_model_ = true;

    use_cuda_iobinding_ =
        (!is_cpu_provider_ && IsCudaProvider(config_.provider));
    if (use_cuda_iobinding_) {
      cuda_mem_info_ = std::make_unique<Ort::MemoryInfo>(
          "Cuda", OrtDeviceAllocator, 0, OrtMemTypeDefault);
    }
    CheckFp16OnCuda();
  }

  // Forward pass through encoder adaptor model.
  // Converts audio features to embeddings compatible with the LLM.
  Ort::Value ForwardEncoderAdaptor(Ort::Value features) {
    if (NeedsTypeConversion(features, encoder_in_type_)) {
      features = CastFloatLikeForExpected(std::move(features), encoder_in_type_,
                                          allocator_);
    }

    // Encoder output is consumed by CPU-side code (embedding packing), so we
    // bind it to CPU when running on CUDA to avoid returning a CUDA pointer.
    if (use_cuda_iobinding_) {
      Ort::IoBinding binding(*encoder_sess_);
      binding.BindInput(encoder_input_names_ptr_[0], features);
      binding.BindOutput(encoder_output_names_ptr_[0], cpu_mem_info_);
      binding.SynchronizeInputs();
      encoder_sess_->Run(Ort::RunOptions{nullptr}, binding);
      binding.SynchronizeOutputs();
      auto outs = binding.GetOutputValues();

      if (outs.empty()) {
        SHERPA_ONNX_LOGE("ForwardEncoderAdaptor: empty outputs");
        SHERPA_ONNX_EXIT(-1);
      }
      return std::move(outs[0]);
    }

    std::array<Ort::Value, 1> inputs = {std::move(features)};
    auto outputs = encoder_sess_->Run(
        {}, encoder_input_names_ptr_.data(), inputs.data(), inputs.size(),
        encoder_output_names_ptr_.data(), encoder_output_names_ptr_.size());
    return std::move(outputs[0]);
  }

  std::vector<std::pair<Ort::Value, Ort::Value>> CreateEmptyKVCache(
      int64_t batch) {
    std::vector<std::pair<Ort::Value, Ort::Value>> kv_cache;
    kv_cache.reserve(num_layers_);

    // Read kv_h, hd from input shape template (dim2, dim3)
    auto &tpl = past_key_shape_tpl_;
    if (tpl.size() < 4) {
      SHERPA_ONNX_LOGE("Invalid KV cache shape template, expected >=4 dims");
      SHERPA_ONNX_EXIT(-1);
    }
    int64_t kv_h = tpl[2];
    int64_t hd = tpl[3];
    std::vector<int64_t> key_shape = {
        batch, static_cast<int64_t>(max_total_len_), kv_h, hd};
    std::vector<int64_t> value_shape = key_shape;

    size_t key_numel = NumelFromShape(key_shape);
    size_t value_numel = NumelFromShape(value_shape);

    for (int32_t i = 0; i < num_layers_; ++i) {
      Ort::Value key_tensor =
          AllocTensorByElemType(allocator_, key_shape, kv_in_type_);
      Ort::Value value_tensor =
          AllocTensorByElemType(allocator_, value_shape, kv_in_type_);

      // Zero-initialize cache
      if (key_numel > 0) {
        if (kv_in_type_ == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
          std::memset(key_tensor.GetTensorMutableData<float>(), 0,
                      key_numel * sizeof(float));
        } else {
          std::memset(key_tensor.GetTensorMutableData<uint16_t>(), 0,
                      key_numel * sizeof(uint16_t));
        }
      }

      if (value_numel > 0) {
        if (kv_in_type_ == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
          std::memset(value_tensor.GetTensorMutableData<float>(), 0,
                      value_numel * sizeof(float));
        } else {
          std::memset(value_tensor.GetTensorMutableData<uint16_t>(), 0,
                      value_numel * sizeof(uint16_t));
        }
      }

      kv_cache.emplace_back(std::move(key_tensor), std::move(value_tensor));
    }
    return kv_cache;
  }

  std::pair<Ort::Value, std::vector<std::pair<Ort::Value, Ort::Value>>>
  ForwardLLM(Ort::Value inputs_embeds, Ort::Value attention_mask,
             const Ort::Value &cache_position,
             const std::vector<std::pair<Ort::Value, Ort::Value>> &cache_kv) {
    if (static_cast<int32_t>(cache_kv.size()) != num_layers_) {
      SHERPA_ONNX_LOGE("ForwardLLM: cache_kv size (%zu) != num_layers (%d)",
                       cache_kv.size(), num_layers_);
      SHERPA_ONNX_EXIT(-1);
    }

    if (!inputs_embeds.IsTensor()) {
      SHERPA_ONNX_LOGE("ForwardLLM: inputs_embeds is not a tensor");
      SHERPA_ONNX_EXIT(-1);
    }

    auto embeds_info = inputs_embeds.GetTensorTypeAndShapeInfo();
    auto embeds_type =
        static_cast<ONNXTensorElementDataType>(embeds_info.GetElementType());
    if (embeds_type != ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
      SHERPA_ONNX_LOGE(
          "ForwardLLM: inputs_embeds must be float32, got elem_type=%d",
          (int)embeds_type);
      SHERPA_ONNX_EXIT(-1);
    }

    // Prepare attention_mask: int64, truncate if length exceeds max_total_len
    if (attention_mask.IsTensor()) {
      auto mask_info = attention_mask.GetTensorTypeAndShapeInfo();
      auto mask_type =
          static_cast<ONNXTensorElementDataType>(mask_info.GetElementType());
      if (mask_type != ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) {
        attention_mask =
            CastMaskToInt64IfNeeded(std::move(attention_mask), allocator_);
        mask_info = attention_mask.GetTensorTypeAndShapeInfo();
      }

      auto mask_shape = mask_info.GetShape();
      if (mask_shape.size() == 2 && mask_shape[1] > max_total_len_) {
        // Truncate attention_mask if it exceeds max_total_len
        attention_mask = NormalizeAttentionMask(std::move(attention_mask),
                                                max_total_len_, allocator_);
      }
    }

    std::vector<Ort::Value> inputs;
    inputs.reserve(3 + 2 * cache_kv.size());
    inputs.push_back(std::move(inputs_embeds));
    inputs.push_back(std::move(attention_mask));
    inputs.push_back(View(const_cast<Ort::Value *>(&cache_position)));

    for (const auto &kv : cache_kv) {
      inputs.push_back(View(const_cast<Ort::Value *>(&kv.first)));
      inputs.push_back(View(const_cast<Ort::Value *>(&kv.second)));
    }

    std::vector<const char *> input_names_ptr;
    input_names_ptr.reserve(3 + 2 * cache_kv.size());
    input_names_ptr.push_back(llm_input_names_ptr_[0]);  // inputs_embeds
    input_names_ptr.push_back(llm_input_names_ptr_[1]);  // attention_mask
    input_names_ptr.push_back(llm_input_names_ptr_[2]);  // cache_position
    for (size_t i = 0; i < cache_kv.size(); ++i) {
      input_names_ptr.push_back(
          llm_input_names_ptr_[past_kv_input_start_index_ + 2 * i]);
      input_names_ptr.push_back(
          llm_input_names_ptr_[past_kv_input_start_index_ + 2 * i + 1]);
    }

    // Check if this is a decode step (seq_len == 1) for CPU buffer reuse
    auto embeds_shape = embeds_info.GetShape();
    bool is_decode_step = (embeds_shape.size() == 3 && embeds_shape[1] == 1);
    bool use_cpu_decode_buffers =
        (is_decode_step && has_decode_buffers_ && !use_cuda_iobinding_);

    std::vector<Ort::Value> outputs;

    if (use_cuda_iobinding_) {
      Ort::IoBinding binding(*llm_sess_);
      for (size_t i = 0; i < inputs.size(); ++i) {
        binding.BindInput(input_names_ptr[i], inputs[i]);
      }

      // logits must be CPU (we will read it on CPU).
      binding.BindOutput(llm_output_names_ptr_[0], cpu_mem_info_);

      // KV outputs: bind to CPU so ApplyKvDeltaInplace can work with CPU cache
      for (size_t i = 1; i < llm_output_names_ptr_.size(); ++i) {
        binding.BindOutput(llm_output_names_ptr_[i], cpu_mem_info_);
      }

      binding.SynchronizeInputs();
      llm_sess_->Run(Ort::RunOptions{nullptr}, binding);
      binding.SynchronizeOutputs();
      outputs = binding.GetOutputValues();
    } else if (use_cpu_decode_buffers) {
      // CPU path: use IoBinding with pre-allocated buffers for decode step
      Ort::IoBinding binding(*llm_sess_);
      for (size_t i = 0; i < inputs.size(); ++i) {
        binding.BindInput(input_names_ptr[i], inputs[i]);
      }

      // Bind outputs to pre-allocated buffers
      binding.BindOutput(llm_output_names_ptr_[0], logits_buffer_);
      for (size_t i = 0; i < kv_delta_buffers_.size(); ++i) {
        binding.BindOutput(llm_output_names_ptr_[1 + 2 * i],
                           kv_delta_buffers_[i].first);
        binding.BindOutput(llm_output_names_ptr_[1 + 2 * i + 1],
                           kv_delta_buffers_[i].second);
      }

      binding.SynchronizeInputs();
      llm_sess_->Run(Ort::RunOptions{nullptr}, binding);
      binding.SynchronizeOutputs();
      outputs = binding.GetOutputValues();
    } else {
      // Prefill step or buffers not initialized: use regular Run
      outputs = llm_sess_->Run({}, input_names_ptr.data(), inputs.data(),
                               inputs.size(), llm_output_names_ptr_.data(),
                               llm_output_names_ptr_.size());
    }

    Ort::Value logits{nullptr};
    if (use_cpu_decode_buffers) {
      // For decode step with pre-allocated buffer, create a view to return
      // (outputs will be destroyed but buffer persists)
      logits = View(&logits_buffer_);
    } else {
      if (outputs.empty()) {
        SHERPA_ONNX_LOGE("ForwardLLM: empty outputs");
        SHERPA_ONNX_EXIT(-1);
      }
      logits = std::move(outputs[0]);
    }

    if (!logits.IsTensor()) {
      SHERPA_ONNX_LOGE("ForwardLLM: logits is not a tensor");
      SHERPA_ONNX_EXIT(-1);
    }

    AssertTensorIsCpu(logits, "ForwardLLM logits");

    auto logits_info = logits.GetTensorTypeAndShapeInfo();
    auto logits_type =
        static_cast<ONNXTensorElementDataType>(logits_info.GetElementType());
    if (logits_type != ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
      SHERPA_ONNX_LOGE("ForwardLLM: logits must be float32, got elem_type=%d",
                       (int)logits_type);
      SHERPA_ONNX_EXIT(-1);
    }

    int32_t inferred_layers;
    if (use_cpu_decode_buffers) {
      // For decode step with pre-allocated buffers, we know the number of
      // layers
      inferred_layers = num_layers_;
    } else {
      if ((outputs.size() - 1) % 2 != 0) {
        SHERPA_ONNX_LOGE("ForwardLLM: invalid KV cache outputs size=%d",
                         static_cast<int>(outputs.size()));
        SHERPA_ONNX_EXIT(-1);
      }
      inferred_layers = static_cast<int32_t>((outputs.size() - 1) / 2);
      if (inferred_layers != num_layers_) {
        SHERPA_ONNX_LOGE(
            "ForwardLLM: KV outputs layers mismatch: expected=%d, got=%d",
            num_layers_, inferred_layers);
        SHERPA_ONNX_EXIT(-1);
      }
    }

    std::vector<std::pair<Ort::Value, Ort::Value>> kv_outputs;
    kv_outputs.reserve(num_layers_);
    if (use_cpu_decode_buffers) {
      // For decode step with pre-allocated buffers, create views
      for (int32_t i = 0; i < num_layers_; ++i) {
        kv_outputs.emplace_back(View(&kv_delta_buffers_[i].first),
                                View(&kv_delta_buffers_[i].second));
      }
    } else {
      for (int32_t i = 0; i < num_layers_; ++i) {
        kv_outputs.emplace_back(std::move(outputs[1 + 2 * i]),
                                std::move(outputs[1 + 2 * i + 1]));
      }
    }

    return {std::move(logits), std::move(kv_outputs)};
  }

  // Apply KV delta in-place to the KV cache.
  // Copy key_delta/value_delta into cache_key/value at positions [pos0:pos0+S)
  void ApplyKvDeltaInplace(
      std::vector<std::pair<Ort::Value, Ort::Value>> *cache_kv,
      const std::vector<std::pair<Ort::Value, Ort::Value>> &kv_delta,
      const Ort::Value &cache_position) const {
    if (!cache_kv || cache_kv->size() != static_cast<size_t>(num_layers_) ||
        kv_delta.size() != static_cast<size_t>(num_layers_)) {
      SHERPA_ONNX_LOGE(
          "ApplyKvDeltaInplace: invalid kv sizes: cache=%zu delta=%zu",
          cache_kv ? cache_kv->size() : 0, kv_delta.size());
      SHERPA_ONNX_EXIT(-1);
    }

    // cache_position: [S], first element is pos0 (contiguous write)
    auto pos_info = cache_position.GetTensorTypeAndShapeInfo();
    auto pos_shape = pos_info.GetShape();
    int64_t S = pos_shape.empty() ? 0 : pos_shape[0];
    if (S <= 0) {
      SHERPA_ONNX_LOGE("ApplyKvDeltaInplace: cache_position has invalid shape");
      SHERPA_ONNX_EXIT(-1);
    }

    const int64_t *pos_data = cache_position.GetTensorData<int64_t>();
    int64_t pos0 = pos_data[0];

    if (pos0 < 0) {
      SHERPA_ONNX_LOGE("ApplyKvDeltaInplace: pos0 < 0 (%d)",
                       static_cast<int32_t>(pos0));
      SHERPA_ONNX_EXIT(-1);
    }
    if (pos0 + S > max_total_len_) {
      SHERPA_ONNX_LOGE(
          "ApplyKvDeltaInplace: pos0+S exceeds max_total_len_ (%d + %d > "
          "%d), clamping S",
          static_cast<int32_t>(pos0), static_cast<int32_t>(S), max_total_len_);
      S = max_total_len_ - pos0;
      if (S <= 0) return;
    }

    for (int32_t layer = 0; layer < num_layers_; ++layer) {
      Ort::Value &cache_key = (*cache_kv)[layer].first;
      Ort::Value &cache_val = (*cache_kv)[layer].second;

      const Ort::Value &delta_key = kv_delta[layer].first;
      const Ort::Value &delta_val = kv_delta[layer].second;

      auto ck_info = cache_key.GetTensorTypeAndShapeInfo();
      auto dk_info = delta_key.GetTensorTypeAndShapeInfo();

      auto ck_shape = ck_info.GetShape();  // [B, max_total_len, kv_h, hd]
      auto dk_shape = dk_info.GetShape();  // [B, S, kv_h, hd]

      int64_t B = ck_shape[0];
      int64_t kv_h = ck_shape[2];
      int64_t hd = ck_shape[3];

      // bytes per element
      auto elem_type = ck_info.GetElementType();
      size_t elem_bytes = 0;
      switch (elem_type) {
        case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
          elem_bytes = 4;
          break;
        case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
        case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
          elem_bytes = 2;
          break;
        default:
          SHERPA_ONNX_LOGE("ApplyKvDeltaInplace: unsupported elem_type=%d",
                           elem_type);
          SHERPA_ONNX_EXIT(-1);
      }

      size_t bytes_per_pos =
          static_cast<size_t>(kv_h) * static_cast<size_t>(hd) * elem_bytes;

      void *dst_k = cache_key.GetTensorMutableData<void>();
      void *dst_v = cache_val.GetTensorMutableData<void>();
      const void *src_k = delta_key.GetTensorData<void>();
      const void *src_v = delta_val.GetTensorData<void>();

      for (int64_t b = 0; b < B; ++b) {
        size_t dst_off =
            (static_cast<size_t>(b) * static_cast<size_t>(max_total_len_) +
             static_cast<size_t>(pos0)) *
            bytes_per_pos;
        size_t src_off =
            (static_cast<size_t>(b) * static_cast<size_t>(dk_shape[1])) *
            bytes_per_pos;

        size_t copy_bytes = static_cast<size_t>(S) * bytes_per_pos;

        uint8_t *dst_k_ptr = static_cast<uint8_t *>(dst_k) + dst_off;
        uint8_t *dst_v_ptr = static_cast<uint8_t *>(dst_v) + dst_off;
        const uint8_t *src_k_ptr =
            static_cast<const uint8_t *>(src_k) + src_off;
        const uint8_t *src_v_ptr =
            static_cast<const uint8_t *>(src_v) + src_off;

        std::memcpy(dst_k_ptr, src_k_ptr, copy_bytes);
        std::memcpy(dst_v_ptr, src_v_ptr, copy_bytes);
      }
    }
  }

  // Forward pass through embedding model.
  // Converts token IDs to embeddings.
  Ort::Value ForwardEmbedding(Ort::Value input_ids) {
    // Embedding output is consumed by CPU-side packing code; bind it to CPU
    // when running on CUDA to avoid returning a CUDA pointer.
    if (use_cuda_iobinding_) {
      Ort::IoBinding binding(*embedding_sess_);
      binding.BindInput(embedding_input_names_ptr_[0], input_ids);
      binding.BindOutput(embedding_output_names_ptr_[0], cpu_mem_info_);
      binding.SynchronizeInputs();
      embedding_sess_->Run(Ort::RunOptions{nullptr}, binding);
      binding.SynchronizeOutputs();
      auto outs = binding.GetOutputValues();

      if (outs.empty()) {
        SHERPA_ONNX_LOGE("ForwardEmbedding: empty outputs");
        SHERPA_ONNX_EXIT(-1);
      }
      return std::move(outs[0]);
    }

    std::array<Ort::Value, 1> inputs = {std::move(input_ids)};
    auto outputs = embedding_sess_->Run(
        {}, embedding_input_names_ptr_.data(), inputs.data(), inputs.size(),
        embedding_output_names_ptr_.data(), embedding_output_names_ptr_.size());
    return std::move(outputs[0]);
  }

  int32_t VocabSize() const { return vocab_size_; }
  int32_t HiddenSize() const { return hidden_size_; }
  int32_t GetMaxTotalLen() const { return max_total_len_; }
  int32_t LfrWindowSize() const { return lfr_window_size_; }
  int32_t LfrWindowShift() const { return lfr_window_shift_; }
  OrtAllocator *Allocator() { return allocator_; }
  bool HasEmbeddingModel() const { return has_embedding_model_; }
  bool UseKVCache() const { return true; }
  bool IsCpuProvider() const { return is_cpu_provider_; }

 private:
  void CheckFp16OnCuda() {
    if (use_cuda_iobinding_) {
      Ort::ModelMetadata meta_data = llm_sess_->GetModelMetadata();
      Ort::AllocatorWithDefaultOptions allocator;
      auto quant_type =
          LookupCustomModelMetaData(meta_data, "quantization_type", allocator);

      if (!quant_type.empty() && quant_type == "fp16") {
        SHERPA_ONNX_LOGE(
            "fp16 LLM models are not supported on CUDA yet. Please use "
            "fp32/int8 models.");
        SHERPA_ONNX_EXIT(-1);
      }
    }
  }

  void InitEncoderAdaptor(const std::string &model_path) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(model_path), sess_opts_encoder_);
    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);
    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);
    encoder_in_type_ = GetSessionInputElemType(encoder_sess_.get(), 0);
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(lfr_window_size_, "lfr_window_size");
    SHERPA_ONNX_READ_META_DATA(lfr_window_shift_, "lfr_window_shift");
    SHERPA_ONNX_READ_META_DATA(hidden_size_, "llm_dim");
  }

  void InitLLM(const std::string &model_path) {
    // For fp32 models: check for .data file by replacing .onnx with .data
    // int8 and fp16 models don't have .data files, so no need to check
    std::string data_path = model_path;
    if (data_path.size() >= 5 &&
        data_path.substr(data_path.size() - 5) == ".onnx") {
      data_path = data_path.substr(0, data_path.size() - 5) + ".data";
    } else {
      data_path = model_path + ".data";
    }
    bool has_external_data = FileExists(data_path);

    // Resolve absolute path for model file
    std::string abs_model_path = ResolveAbsolutePath(model_path);

    if (has_external_data) {
      // When external data exists, use absolute file path to create session.
      // ONNX Runtime will automatically find .data file in the same directory
      // as the model file when using absolute path.
      llm_sess_ = std::make_unique<Ort::Session>(
          env_, SHERPA_ONNX_TO_ORT_PATH(abs_model_path), sess_opts_llm_);
    } else {
      // No external data: load entire model into memory
      std::vector<char> model_data = ReadFile(model_path);
      llm_sess_ = std::make_unique<Ort::Session>(
          env_, model_data.data(), model_data.size(), sess_opts_llm_);
    }

    SetupLlmFromSession();
  }

  void InitEmbedding(const std::string &model_path) {
    embedding_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(model_path), sess_opts_embedding_);
    GetInputNames(embedding_sess_.get(), &embedding_input_names_,
                  &embedding_input_names_ptr_);
    GetOutputNames(embedding_sess_.get(), &embedding_output_names_,
                   &embedding_output_names_ptr_);
    Ort::ModelMetadata meta_data = embedding_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    if (hidden_size_ == 0) {
      SHERPA_ONNX_READ_META_DATA(hidden_size_, "hidden_size");
    }
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_encoder_;
  Ort::SessionOptions sess_opts_llm_;
  Ort::SessionOptions sess_opts_embedding_;
  Ort::AllocatorWithDefaultOptions allocator_;

  Ort::MemoryInfo cpu_mem_info_;
  std::unique_ptr<Ort::MemoryInfo> cuda_mem_info_;
  bool use_cuda_iobinding_ = false;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> llm_sess_;
  std::unique_ptr<Ort::Session> embedding_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;
  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> llm_input_names_;
  std::vector<const char *> llm_input_names_ptr_;
  std::vector<std::string> llm_output_names_;
  std::vector<const char *> llm_output_names_ptr_;

  std::vector<std::string> embedding_input_names_;
  std::vector<const char *> embedding_input_names_ptr_;
  std::vector<std::string> embedding_output_names_;
  std::vector<const char *> embedding_output_names_ptr_;

  int32_t vocab_size_ = 0;
  int32_t hidden_size_ = 0;
  int32_t lfr_window_size_ = 0;
  int32_t lfr_window_shift_ = 0;

  int32_t num_layers_ = 0;
  int32_t max_total_len_ = 0;  // attention_mask length / cache capacity
  bool has_embedding_model_ = false;

  ONNXTensorElementDataType encoder_in_type_ =
      ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
  ONNXTensorElementDataType llm_embeds_in_type_ =
      ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;

  // KV input element types (for CreateEmptyKVCache).
  ONNXTensorElementDataType kv_in_type_ = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;
  ONNXTensorElementDataType kv_in_type_v_ = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT;

  // Input indices for KV cache LLM.
  size_t cache_position_input_index_ = 2;
  size_t past_kv_input_start_index_ = 3;

  std::vector<int64_t> past_key_shape_tpl_;
  std::vector<int64_t> past_value_shape_tpl_;

  bool is_cpu_provider_ = false;
  bool is_kv_delta_model_ = false;

  // Pre-allocated buffers for CPU IoBinding (decode step reuse)
  bool has_decode_buffers_ = false;
  Ort::Value logits_buffer_{nullptr};
  std::vector<std::pair<Ort::Value, Ort::Value>> kv_delta_buffers_;
};

OfflineFunASRNanoModel::OfflineFunASRNanoModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineFunASRNanoModel::OfflineFunASRNanoModel(Manager *mgr,
                                               const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineFunASRNanoModel::~OfflineFunASRNanoModel() = default;

Ort::Value OfflineFunASRNanoModel::ForwardEncoderAdaptor(Ort::Value features) {
  return impl_->ForwardEncoderAdaptor(std::move(features));
}

std::pair<Ort::Value, std::vector<std::pair<Ort::Value, Ort::Value>>>
OfflineFunASRNanoModel::ForwardLLM(
    Ort::Value inputs_embeds, Ort::Value attention_mask,
    const Ort::Value &cache_position,
    const std::vector<std::pair<Ort::Value, Ort::Value>> &cache_kv) {
  return impl_->ForwardLLM(std::move(inputs_embeds), std::move(attention_mask),
                           std::move(cache_position), cache_kv);
}

std::vector<std::pair<Ort::Value, Ort::Value>>
OfflineFunASRNanoModel::CreateEmptyKVCache(int64_t batch) {
  return impl_->CreateEmptyKVCache(batch);
}

void OfflineFunASRNanoModel::ApplyKvDeltaInplace(
    std::vector<std::pair<Ort::Value, Ort::Value>> *cache_kv,
    const std::vector<std::pair<Ort::Value, Ort::Value>> &kv_delta,
    const Ort::Value &cache_position) {
  return impl_->ApplyKvDeltaInplace(cache_kv, kv_delta, cache_position);
}

bool OfflineFunASRNanoModel::UseKVCache() const { return impl_->UseKVCache(); }

Ort::Value OfflineFunASRNanoModel::ForwardEmbedding(Ort::Value input_ids) {
  return impl_->ForwardEmbedding(std::move(input_ids));
}

int32_t OfflineFunASRNanoModel::VocabSize() const { return impl_->VocabSize(); }
int32_t OfflineFunASRNanoModel::HiddenSize() const {
  return impl_->HiddenSize();
}
int32_t OfflineFunASRNanoModel::GetMaxTotalLen() const {
  return impl_->GetMaxTotalLen();
}

int32_t OfflineFunASRNanoModel::LfrWindowSize() const {
  return impl_->LfrWindowSize();
}
int32_t OfflineFunASRNanoModel::LfrWindowShift() const {
  return impl_->LfrWindowShift();
}

OrtAllocator *OfflineFunASRNanoModel::Allocator() const {
  return impl_->Allocator();
}

bool OfflineFunASRNanoModel::HasEmbeddingModel() const {
  return impl_->HasEmbeddingModel();
}

#if __ANDROID_API__ >= 9
template OfflineFunASRNanoModel::OfflineFunASRNanoModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineFunASRNanoModel::OfflineFunASRNanoModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-funasr-nano-model.h
================================================
// sherpa-onnx/csrc/offline-funasr-nano-model.h
//
// Copyright (c)  2025  zengyw

#ifndef SHERPA_ONNX_CSRC_OFFLINE_FUNASR_NANO_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_FUNASR_NANO_MODEL_H_

#include <cstdint>
#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-funasr-nano-model-config.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineFunASRNanoModel {
 public:
  explicit OfflineFunASRNanoModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineFunASRNanoModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineFunASRNanoModel();

  /** Run the encoder+adaptor model.
   *
   * @param features  A tensor of shape (N, T, C). Audio features.
   * @return Return embeddings of shape (N, T', hidden_size)
   */
  Ort::Value ForwardEncoderAdaptor(Ort::Value features);

  /** Run the LLM model (KV cache mode).
   *
   * @param inputs_embeds  A tensor of shape (N, T, hidden_size), float32.
   * @param attention_mask  A tensor of shape (N, T) containing attention mask,
   * int64.
   * @param cache_position  A tensor of shape (T,) containing cache positions,
   * int64.
   * @param cache_kv  Fixed-size KV cache, vector of (key, value) pairs.
   * @return Return tuple (logits, kv_outputs...). Logits shape (N, T,
   * vocab_size), float32. kv_outputs is a vector of (key_delta, value_delta)
   * pairs for each layer.
   */
  std::pair<Ort::Value, std::vector<std::pair<Ort::Value, Ort::Value>>>
  ForwardLLM(Ort::Value inputs_embeds, Ort::Value attention_mask,
             const Ort::Value &cache_position,
             const std::vector<std::pair<Ort::Value, Ort::Value>> &cache_kv);

  /** Create fixed-size KV cache buffer.
   *
   * @param batch  Batch size (usually 1).
   * @return Return vector of (key, value) pairs with fixed cache dimensions [B,
   * max_total_len, kv_h, hd].
   */
  std::vector<std::pair<Ort::Value, Ort::Value>> CreateEmptyKVCache(
      int64_t batch);

  /** Apply KV delta in-place to KV cache buffer.
   *
   * @param cache_kv  Fixed-size KV cache to update, vector of (key, value)
   * pairs.
   * @param kv_delta  KV deltas from current step, vector of (key_delta,
   * value_delta) pairs.
   * @param cache_position  Cache position tensor indicating where to write
   * deltas.
   */
  void ApplyKvDeltaInplace(
      std::vector<std::pair<Ort::Value, Ort::Value>> *cache_kv,
      const std::vector<std::pair<Ort::Value, Ort::Value>> &kv_delta,
      const Ort::Value &cache_position);

  /** Check if using KV cache mode. Always returns true for FunASR-nano.
   */
  bool UseKVCache() const;

  /** Run the embedding model.
   *
   * @param input_ids  A tensor of shape (N, T) containing token IDs.
   * @return Return embeddings of shape (N, T, hidden_size)
   */
  Ort::Value ForwardEmbedding(Ort::Value input_ids);

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const;

  /** Return the hidden size of the model
   */
  int32_t HiddenSize() const;

  /** Return the maximum total sequence length (from metadata)
   */
  int32_t GetMaxTotalLen() const;

  /** It is lfr_window_size in metadata
   */
  int32_t LfrWindowSize() const;

  /** It is lfr_window_shift in metadata
   */
  int32_t LfrWindowShift() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

  /** Check if embedding model is available
   */
  bool HasEmbeddingModel() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_FUNASR_NANO_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-lm-config.cc
================================================
// sherpa-onnx/csrc/offline-lm-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-lm-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineLMConfig::Register(ParseOptions *po) {
  po->Register("lm", &model, "Path to LM model.");
  po->Register("lm-scale", &scale, "LM scale.");
  po->Register("lm-num-threads", &lm_num_threads,
               "Number of threads to run the neural network of LM model");
  po->Register("lm-provider", &lm_provider,
               "Specify a provider to LM model use: cpu, cuda, coreml");
  po->Register("lodr-fst", &lodr_fst, "Path to LODR FST model.");
  po->Register("lodr-scale", &lodr_scale, "LODR scale.");
  po->Register("lodr-backoff-id", &lodr_backoff_id,
               "ID of the backoff in the LODR FST. -1 means autodetect");
}

bool OfflineLMConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("'%s' does not exist", model.c_str());
    return false;
  }

  if (!lodr_fst.empty() && !FileExists(lodr_fst)) {
    SHERPA_ONNX_LOGE("'%s' does not exist", lodr_fst.c_str());
    return false;
  }

  return true;
}

std::string OfflineLMConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineLMConfig(";
  os << "model=\"" << model << "\", ";
  os << "scale=" << scale << ", ";
  os << "lodr_scale=" << lodr_scale << ", ";
  os << "lodr_fst=\"" << lodr_fst << "\", ";
  os << "lodr_backoff_id=" << lodr_backoff_id << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-lm-config.h
================================================
// sherpa-onnx/csrc/offline-lm-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_LM_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_LM_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineLMConfig {
  // path to the onnx model
  std::string model;

  // LM scale
  float scale = 0.5;
  int32_t lm_num_threads = 1;
  std::string lm_provider = "cpu";

  // LODR
  std::string lodr_fst;
  float lodr_scale = 0.01;
  int32_t lodr_backoff_id = -1;  // -1 means not set

  OfflineLMConfig() = default;

  OfflineLMConfig(const std::string &model, float scale, int32_t lm_num_threads,
                  const std::string &lm_provider, const std::string &lodr_fst,
                  float lodr_scale, int32_t lodr_backoff_id)
      : model(model),
        scale(scale),
        lm_num_threads(lm_num_threads),
        lm_provider(lm_provider),
        lodr_fst(lodr_fst),
        lodr_scale(lodr_scale),
        lodr_backoff_id(lodr_backoff_id) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_LM_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-lm.cc
================================================
// sherpa-onnx/csrc/offline-lm.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-lm.h"

#include <algorithm>
#include <memory>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/lodr-fst.h"
#include "sherpa-onnx/csrc/offline-rnn-lm.h"

namespace sherpa_onnx {

std::unique_ptr<OfflineLM> OfflineLM::Create(const OfflineLMConfig &config) {
  return std::make_unique<OfflineRnnLM>(config);
}

template <typename Manager>
std::unique_ptr<OfflineLM> OfflineLM::Create(Manager *mgr,
                                             const OfflineLMConfig &config) {
  return std::make_unique<OfflineRnnLM>(mgr, config);
}

void OfflineLM::ComputeLMScore(float scale, int32_t context_size,
                               std::vector<Hypotheses> *hyps) {
  // compute the max token seq so that we know how much space to allocate
  int32_t max_token_seq = 0;
  int32_t num_hyps = 0;

  // we subtract context_size below since each token sequence is prepended
  // with context_size blanks
  for (const auto &h : *hyps) {
    num_hyps += h.Size();
    for (const auto &t : h) {
      max_token_seq =
          std::max<int32_t>(max_token_seq, t.second.ys.size() - context_size);
    }
  }

  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 2> x_shape{num_hyps, max_token_seq};
  Ort::Value x = Ort::Value::CreateTensor<int64_t>(allocator, x_shape.data(),
                                                   x_shape.size());

  std::array<int64_t, 1> x_lens_shape{num_hyps};
  Ort::Value x_lens = Ort::Value::CreateTensor<int64_t>(
      allocator, x_lens_shape.data(), x_lens_shape.size());

  int64_t *p = x.GetTensorMutableData<int64_t>();
  std::fill(p, p + num_hyps * max_token_seq, 0);

  int64_t *p_lens = x_lens.GetTensorMutableData<int64_t>();

  for (const auto &h : *hyps) {
    for (const auto &t : h) {
      const auto &ys = t.second.ys;
      int32_t len = ys.size() - context_size;
      std::copy(ys.begin() + context_size, ys.end(), p);
      *p_lens = len;

      p += max_token_seq;
      ++p_lens;
    }
  }
  auto negative_loglike = Rescore(std::move(x), std::move(x_lens));
  const float *p_nll = negative_loglike.GetTensorData<float>();
  // We scale LODR scale with LM scale to replicate Icefall code
  auto lodr_scale = config_.lodr_scale * scale;
  for (auto &h : *hyps) {
    for (auto &t : h) {
      // Use -scale here since we want to change negative loglike to loglike.
      t.second.lm_log_prob = -scale * (*p_nll);
      ++p_nll;
      // apply LODR to hyp score
      if (lodr_fst_ != nullptr) {
        lodr_fst_->ComputeScore(lodr_scale, &t.second, context_size);
      }
    }
  }
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflineLM> OfflineLM::Create(
    AAssetManager *mgr, const OfflineLMConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OfflineLM> OfflineLM::Create(
    NativeResourceManager *mgr, const OfflineLMConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-lm.h
================================================
// sherpa-onnx/csrc/offline-lm.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_LM_H_
#define SHERPA_ONNX_CSRC_OFFLINE_LM_H_

#include <memory>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/lodr-fst.h"
#include "sherpa-onnx/csrc/offline-lm-config.h"

namespace sherpa_onnx {

class OfflineLM {
 public:
  explicit OfflineLM(const OfflineLMConfig &config) : config_(config) {
    if (!config_.lodr_fst.empty()) {
      try {
        lodr_fst_ = std::make_unique<LodrFst>(LodrFst(config_.lodr_fst,
                                                    config_.lodr_backoff_id));
      } catch (const std::exception& e) {
        throw std::runtime_error("Failed to load LODR FST from: " +
                                  config_.lodr_fst + ". Error: " + e.what());
      }
    }
  }
  virtual ~OfflineLM() = default;

  static std::unique_ptr<OfflineLM> Create(const OfflineLMConfig &config);

  template <typename Manager>
  static std::unique_ptr<OfflineLM> Create(Manager *mgr,
                                           const OfflineLMConfig &config);

  /** Rescore a batch of sentences.
   *
   * @param x A 2-D tensor of shape (N, L) with data type int64.
   * @param x_lens A 1-D tensor of shape (N,) with data type int64.
   *               It contains number of valid tokens in x before padding.
   * @return Return a 1-D tensor of shape (N,) containing the negative log
   *         likelihood of each utterance. Its data type is float32.
   *
   * Caution: It returns negative log likelihood (nll), not log likelihood
   */
  virtual Ort::Value Rescore(Ort::Value x, Ort::Value x_lens) = 0;

  // This function updates hyp.lm_lob_prob of hyps.
  //
  // @param scale LM score
  // @param context_size Context size of the transducer decoder model
  // @param hyps It is changed in-place.
  void ComputeLMScore(float scale, int32_t context_size,
                      std::vector<Hypotheses> *hyps);

 private:
  std::unique_ptr<LodrFst> lodr_fst_;
  float lodr_scale_;
  OfflineLMConfig config_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_LM_H_


================================================
FILE: sherpa-onnx/csrc/offline-medasr-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/offline-medasr-ctc-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-medasr-ctc-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineMedAsrCtcModelConfig::Register(ParseOptions *po) {
  po->Register(
      "medasr", &model,
      "Path to model.onnx from MedASR. Please see "
      "https://github.com/k2-fsa/sherpa-onnx/pull/2934 for available models");
}

bool OfflineMedAsrCtcModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("MedASR model: '%s' does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OfflineMedAsrCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineMedAsrCtcModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-medasr-ctc-model-config.h
================================================
// sherpa-onnx/csrc/offline-medasr-ctc-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_MEDASR_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MEDASR_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineMedAsrCtcModelConfig {
  std::string model;

  OfflineMedAsrCtcModelConfig() = default;
  explicit OfflineMedAsrCtcModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MEDASR_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-medasr-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-medasr-ctc-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-medasr-ctc-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

namespace {

std::vector<int64_t> GetMask(Ort::Value length) {
  auto shape = length.GetTensorTypeAndShapeInfo().GetShape();
  if (shape.size() != 1) {
    SHERPA_ONNX_LOGE("Invalid length dim %zu", shape.size());
    SHERPA_ONNX_EXIT(-1);
  }

  auto batch_size = shape[0];

  const int64_t *p = length.GetTensorData<int64_t>();

  int64_t max_len = *std::max_element(p, p + batch_size);

  std::vector<int64_t> ans(batch_size * max_len, 0);

  int64_t *p_mask = ans.data();

  for (int32_t i = 0; i < batch_size; ++i) {
    auto len = p[i];
    std::fill(p_mask, p_mask + len, 1);

    p_mask += max_len;
  }

  return ans;
}

}  // namespace

class OfflineMedAsrCtcModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.medasr.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.medasr.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) {
    std::vector<int64_t> mask = GetMask(std::move(features_length));

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> shape =
        features.GetTensorTypeAndShapeInfo().GetShape();
    shape.resize(2);

    Ort::Value mask_tensor = Ort::Value::CreateTensor<int64_t>(
        memory_info, mask.data(), mask.size(), shape.data(), shape.size());

    std::array<Ort::Value, 2> inputs = {std::move(features),
                                        std::move(mask_tensor)};

    return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                      output_names_ptr_.data(), output_names_ptr_.size());
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t SubsamplingFactor() const { return subsampling_factor_; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
    if (model_type != "medasr_ctc") {
      SHERPA_ONNX_LOGE("Expect model type medasr_ctc. Given: '%s'",
                       model_type.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(subsampling_factor_,
                                            "subsampling_factor", 4);
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t vocab_size_ = 0;
  int32_t subsampling_factor_ = 0;
};

OfflineMedAsrCtcModel::OfflineMedAsrCtcModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineMedAsrCtcModel::OfflineMedAsrCtcModel(Manager *mgr,
                                             const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineMedAsrCtcModel::~OfflineMedAsrCtcModel() = default;

std::vector<Ort::Value> OfflineMedAsrCtcModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineMedAsrCtcModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OfflineMedAsrCtcModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

OrtAllocator *OfflineMedAsrCtcModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineMedAsrCtcModel::OfflineMedAsrCtcModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineMedAsrCtcModel::OfflineMedAsrCtcModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-medasr-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-medasr-ctc-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_MEDASR_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MEDASR_CTC_MODEL_H_
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

/** This class implements the CTC model from MedASR.
 *
 * See
 * https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/medasr/export_onnx.py
 * https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/medasr/test_onnx.py
 * https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/medasr/run.sh
 *
 */
class OfflineMedAsrCtcModel : public OfflineCtcModel {
 public:
  explicit OfflineMedAsrCtcModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineMedAsrCtcModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineMedAsrCtcModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  int32_t SubsamplingFactor() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MEDASR_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-model-config.cc
================================================
// sherpa-onnx/csrc/offline-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OfflineModelConfig::Register(ParseOptions *po) {
  transducer.Register(po);
  paraformer.Register(po);
  nemo_ctc.Register(po);
  whisper.Register(po);
  fire_red_asr.Register(po);
  tdnn.Register(po);
  zipformer_ctc.Register(po);
  wenet_ctc.Register(po);
  sense_voice.Register(po);
  moonshine.Register(po);
  dolphin.Register(po);
  canary.Register(po);
  omnilingual.Register(po);
  funasr_nano.Register(po);
  medasr.Register(po);
  fire_red_asr_ctc.Register(po);

  po->Register("telespeech-ctc", &telespeech_ctc,
               "Path to model.onnx for telespeech ctc");

  po->Register("tokens", &tokens, "Path to tokens.txt");

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");

  po->Register("model-type", &model_type,
               "Specify it to reduce model initialization time. "
               "Valid values are: transducer, paraformer, nemo_ctc, whisper, "
               "tdnn, zipformer2_ctc, telespeech_ctc, fire_red_asr."
               "All other values lead to loading the model twice.");
  po->Register(
      "modeling-unit", &modeling_unit,
      "The modeling unit of the model, commonly used units are bpe, "
      "bbpe, cjkchar, cjkchar+bpe, etc. Currently, it is needed only when "
      "hotwords are provided, we need it to encode the hotwords into "
      "token sequence.");
  po->Register("bpe-vocab", &bpe_vocab,
               "The vocabulary generated by google's sentencepiece program. "
               "It is a file has two columns, one is the token, the other is "
               "the log probability, you can get it from the directory where "
               "your bpe model is generated. Only used when hotwords provided "
               "and the modeling unit is bpe, bbpe, or cjkchar+bpe");
}

bool OfflineModelConfig::Validate() const {
  // For RK NPU, we reinterpret num_threads:
  //
  // For RK3588 only
  // num_threads == 1 -> Select a core randomly
  // num_threads == 0 -> Use NPU core 0
  // num_threads == -1 -> Use NPU core 1
  // num_threads == -2 -> Use NPU core 2
  // num_threads == -3 -> Use NPU core 0 and core 1
  // num_threads == -4 -> Use NPU core 0, core 1, and core 2
  if (provider != "rknn") {
    if (num_threads < 1) {
      SHERPA_ONNX_LOGE("num_threads should be > 0. Given %d", num_threads);
      return false;
    }
    if (!sense_voice.model.empty() && (EndsWith(sense_voice.model, ".rknn"))) {
      SHERPA_ONNX_LOGE(
          "--provider is %s, which is not rknn, but you pass a rknn model "
          "filename. model: '%s'",
          provider.c_str(), sense_voice.model.c_str());
      return false;
    }
  }

  if (provider == "rknn") {
    if (!sense_voice.model.empty() && (EndsWith(sense_voice.model, ".onnx"))) {
      SHERPA_ONNX_LOGE(
          "--provider is rknn, but you pass an onnx model "
          "filename. model: '%s'",
          sense_voice.model.c_str());
      return false;
    }
  }

  // For FunASR-nano, tokens file is not required (tokenizer is loaded from
  // directory) Check tokens file only if not using funasr_nano
  if (funasr_nano.encoder_adaptor.empty()) {
    if (!FileExists(tokens)) {
      SHERPA_ONNX_LOGE("tokens: '%s' does not exist", tokens.c_str());
      return false;
    }
  }

  if (!modeling_unit.empty() &&
      (modeling_unit == "bpe" || modeling_unit == "cjkchar+bpe" ||
       modeling_unit == "bbpe")) {
    if (!FileExists(bpe_vocab)) {
      SHERPA_ONNX_LOGE("bpe_vocab: '%s' does not exist", bpe_vocab.c_str());
      return false;
    }
  }

  if (!paraformer.model.empty()) {
    return paraformer.Validate();
  }

  if (!nemo_ctc.model.empty()) {
    return nemo_ctc.Validate();
  }

  if (!whisper.encoder.empty()) {
    return whisper.Validate();
  }

  if (!fire_red_asr.encoder.empty()) {
    return fire_red_asr.Validate();
  }

  if (!tdnn.model.empty()) {
    return tdnn.Validate();
  }

  if (!zipformer_ctc.model.empty()) {
    return zipformer_ctc.Validate();
  }

  if (!wenet_ctc.model.empty()) {
    return wenet_ctc.Validate();
  }

  if (!sense_voice.model.empty() ||
      !sense_voice.qnn_config.context_binary.empty()) {
    return sense_voice.Validate();
  }

  if (!moonshine.encoder.empty()) {
    return moonshine.Validate();
  }

  if (!dolphin.model.empty()) {
    return dolphin.Validate();
  }

  if (!canary.encoder.empty()) {
    return canary.Validate();
  }

  if (!omnilingual.model.empty()) {
    return omnilingual.Validate();
  }

  if (!funasr_nano.encoder_adaptor.empty()) {
    return funasr_nano.Validate();
  }

  if (!medasr.model.empty()) {
    return medasr.Validate();
  }

  if (!fire_red_asr_ctc.model.empty()) {
    return fire_red_asr_ctc.Validate();
  }

  if (!telespeech_ctc.empty() && !FileExists(telespeech_ctc)) {
    SHERPA_ONNX_LOGE("telespeech_ctc: '%s' does not exist",
                     telespeech_ctc.c_str());
    return false;
  }

  if (!transducer.encoder_filename.empty()) {
    return transducer.Validate();
  }

  return true;
}

std::string OfflineModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineModelConfig(";
  os << "transducer=" << transducer.ToString() << ", ";
  os << "paraformer=" << paraformer.ToString() << ", ";
  os << "nemo_ctc=" << nemo_ctc.ToString() << ", ";
  os << "whisper=" << whisper.ToString() << ", ";
  os << "fire_red_asr=" << fire_red_asr.ToString() << ", ";
  os << "tdnn=" << tdnn.ToString() << ", ";
  os << "zipformer_ctc=" << zipformer_ctc.ToString() << ", ";
  os << "wenet_ctc=" << wenet_ctc.ToString() << ", ";
  os << "sense_voice=" << sense_voice.ToString() << ", ";
  os << "moonshine=" << moonshine.ToString() << ", ";
  os << "dolphin=" << dolphin.ToString() << ", ";
  os << "canary=" << canary.ToString() << ", ";
  os << "omnilingual=" << omnilingual.ToString() << ", ";
  os << "funasr_nano=" << funasr_nano.ToString() << ", ";
  os << "medasr=" << medasr.ToString() << ", ";
  os << "fire_red_asr_ctc=" << fire_red_asr_ctc.ToString() << ", ";
  os << "telespeech_ctc=\"" << telespeech_ctc << "\", ";
  os << "tokens=\"" << tokens << "\", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\", ";
  os << "model_type=\"" << model_type << "\", ";
  os << "modeling_unit=\"" << modeling_unit << "\", ";
  os << "bpe_vocab=\"" << bpe_vocab << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-model-config.h
================================================
// sherpa-onnx/csrc/offline-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/offline-canary-model-config.h"
#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
#include "sherpa-onnx/csrc/offline-fire-red-asr-ctc-model-config.h"
#include "sherpa-onnx/csrc/offline-fire-red-asr-model-config.h"
#include "sherpa-onnx/csrc/offline-funasr-nano-model-config.h"
#include "sherpa-onnx/csrc/offline-medasr-ctc-model-config.h"
#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h"
#include "sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model-config.h"
#include "sherpa-onnx/csrc/offline-paraformer-model-config.h"
#include "sherpa-onnx/csrc/offline-sense-voice-model-config.h"
#include "sherpa-onnx/csrc/offline-tdnn-model-config.h"
#include "sherpa-onnx/csrc/offline-transducer-model-config.h"
#include "sherpa-onnx/csrc/offline-wenet-ctc-model-config.h"
#include "sherpa-onnx/csrc/offline-whisper-model-config.h"
#include "sherpa-onnx/csrc/offline-zipformer-ctc-model-config.h"

namespace sherpa_onnx {

struct OfflineModelConfig {
  OfflineTransducerModelConfig transducer;
  OfflineParaformerModelConfig paraformer;
  OfflineNemoEncDecCtcModelConfig nemo_ctc;
  OfflineWhisperModelConfig whisper;
  OfflineFireRedAsrModelConfig fire_red_asr;
  OfflineTdnnModelConfig tdnn;
  OfflineZipformerCtcModelConfig zipformer_ctc;
  OfflineWenetCtcModelConfig wenet_ctc;
  OfflineSenseVoiceModelConfig sense_voice;
  OfflineMoonshineModelConfig moonshine;
  OfflineDolphinModelConfig dolphin;
  OfflineCanaryModelConfig canary;
  OfflineOmnilingualAsrCtcModelConfig omnilingual;
  OfflineFunASRNanoModelConfig funasr_nano;
  OfflineMedAsrCtcModelConfig medasr;
  OfflineFireRedAsrCtcModelConfig fire_red_asr_ctc;
  std::string telespeech_ctc;

  std::string tokens;
  int32_t num_threads = 2;
  bool debug = false;
  std::string provider = "cpu";

  // With the help of this field, we only need to load the model once
  // instead of twice; and therefore it reduces initialization time.
  //
  // Valid values:
  //  - transducer. The given model is from icefall
  //  - paraformer. It is a paraformer model
  //  - nemo_ctc. It is a NeMo CTC model.
  //
  // All other values are invalid and lead to loading the model twice.
  std::string model_type;

  std::string modeling_unit = "cjkchar";
  std::string bpe_vocab;

  OfflineModelConfig() = default;
  OfflineModelConfig(const OfflineTransducerModelConfig &transducer,
                     const OfflineParaformerModelConfig &paraformer,
                     const OfflineNemoEncDecCtcModelConfig &nemo_ctc,
                     const OfflineWhisperModelConfig &whisper,
                     const OfflineFireRedAsrModelConfig &fire_red_asr,
                     const OfflineTdnnModelConfig &tdnn,
                     const OfflineZipformerCtcModelConfig &zipformer_ctc,
                     const OfflineWenetCtcModelConfig &wenet_ctc,
                     const OfflineSenseVoiceModelConfig &sense_voice,
                     const OfflineMoonshineModelConfig &moonshine,
                     const OfflineDolphinModelConfig &dolphin,
                     const OfflineCanaryModelConfig &canary,
                     const OfflineOmnilingualAsrCtcModelConfig &omnilingual,
                     const OfflineFunASRNanoModelConfig &funasr_nano,
                     const OfflineMedAsrCtcModelConfig &medasr,
                     const OfflineFireRedAsrCtcModelConfig &fire_red_asr_ctc,
                     const std::string &telespeech_ctc,
                     const std::string &tokens, int32_t num_threads, bool debug,
                     const std::string &provider, const std::string &model_type,
                     const std::string &modeling_unit,
                     const std::string &bpe_vocab)
      : transducer(transducer),
        paraformer(paraformer),
        nemo_ctc(nemo_ctc),
        whisper(whisper),
        fire_red_asr(fire_red_asr),
        tdnn(tdnn),
        zipformer_ctc(zipformer_ctc),
        wenet_ctc(wenet_ctc),
        sense_voice(sense_voice),
        moonshine(moonshine),
        dolphin(dolphin),
        canary(canary),
        omnilingual(omnilingual),
        funasr_nano(funasr_nano),
        medasr(medasr),
        fire_red_asr_ctc(fire_red_asr_ctc),
        telespeech_ctc(telespeech_ctc),
        tokens(tokens),
        num_threads(num_threads),
        debug(debug),
        provider(provider),
        model_type(model_type),
        modeling_unit(modeling_unit),
        bpe_vocab(bpe_vocab) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-decoder.h
================================================
// sherpa-onnx/csrc/offline-moonshine-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_DECODER_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

struct OfflineMoonshineDecoderResult {
  /// The decoded token IDs
  std::vector<int32_t> tokens;
};

class OfflineMoonshineDecoder {
 public:
  virtual ~OfflineMoonshineDecoder() = default;

  /** Run beam search given the output from the moonshine encoder model.
   *
   * @param encoder_out A 3-D tensor of shape (batch_size, T, dim)
   * @return Return a vector of size `N` containing the decoded results.
   */
  virtual std::vector<OfflineMoonshineDecoderResult> Decode(
      Ort::Value encoder_out) = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

std::vector<OfflineMoonshineDecoderResult>
OfflineMoonshineGreedySearchDecoder::Decode(Ort::Value encoder_out) {
  auto encoder_out_shape = encoder_out.GetTensorTypeAndShapeInfo().GetShape();
  if (encoder_out_shape[0] != 1) {
    SHERPA_ONNX_LOGE("Support only batch size == 1. Given: %d\n",
                     static_cast<int32_t>(encoder_out_shape[0]));
    return {};
  }

  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  // encoder_out_shape[1] * 384 is the number of audio samples
  // 16000 is the sample rate
  //
  //
  // 384 is from the moonshine paper
  int32_t max_len =
      static_cast<int32_t>(encoder_out_shape[1] * 384 / 16000.0 * 6);

  int32_t sos = 1;
  int32_t eos = 2;
  int32_t seq_len = 1;

  std::vector<int32_t> tokens;

  std::array<int64_t, 2> token_shape = {1, 1};
  int64_t seq_len_shape = 1;

  Ort::Value token_tensor = Ort::Value::CreateTensor(
      memory_info, &sos, 1, token_shape.data(), token_shape.size());

  Ort::Value seq_len_tensor =
      Ort::Value::CreateTensor(memory_info, &seq_len, 1, &seq_len_shape, 1);

  Ort::Value logits{nullptr};
  std::vector<Ort::Value> states;

  std::tie(logits, states) = model_->ForwardUnCachedDecoder(
      std::move(token_tensor), std::move(seq_len_tensor), View(&encoder_out));

  int32_t vocab_size = logits.GetTensorTypeAndShapeInfo().GetShape()[2];

  for (int32_t i = 0; i != max_len; ++i) {
    const float *p = logits.GetTensorData<float>();

    int32_t max_token_id = static_cast<int32_t>(
        std::distance(p, std::max_element(p, p + vocab_size)));
    if (max_token_id == eos) {
      break;
    }
    tokens.push_back(max_token_id);

    seq_len += 1;

    token_tensor = Ort::Value::CreateTensor(
        memory_info, &tokens.back(), 1, token_shape.data(), token_shape.size());

    seq_len_tensor =
        Ort::Value::CreateTensor(memory_info, &seq_len, 1, &seq_len_shape, 1);

    // To fix the false alarm of clang-tidy
    // error: 'states' used after it was moved
    // [bugprone-use-after-move,-warnings-as-errors]
    // we use a tmp_states here
    std::vector<Ort::Value> tmp_states{std::move(states)};

    std::tie(logits, states) = model_->ForwardCachedDecoder(
        std::move(token_tensor), std::move(seq_len_tensor), View(&encoder_out),
        std::move(tmp_states));
  }

  OfflineMoonshineDecoderResult ans;
  ans.tokens = std::move(tokens);

  return {ans};
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-moonshine-decoder.h"
#include "sherpa-onnx/csrc/offline-moonshine-model.h"

namespace sherpa_onnx {

class OfflineMoonshineGreedySearchDecoder : public OfflineMoonshineDecoder {
 public:
  explicit OfflineMoonshineGreedySearchDecoder(OfflineMoonshineModel *model)
      : model_(model) {}

  std::vector<OfflineMoonshineDecoderResult> Decode(
      Ort::Value encoder_out) override;

 private:
  OfflineMoonshineModel *model_;  // not owned
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-model-config.cc
================================================
// sherpa-onnx/csrc/offline-moonshine-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineMoonshineModelConfig::Register(ParseOptions *po) {
  po->Register(
      "moonshine-preprocessor", &preprocessor,
      "Path to onnx preprocessor of moonshine v1, e.g., preprocess.onnx");

  po->Register("moonshine-encoder", &encoder,
               "Path to onnx encoder of moonshine v1 or v2, e.g., encode.onnx "
               "for v1, encoder_model.onnx for v2");

  po->Register("moonshine-uncached-decoder", &uncached_decoder,
               "Path to onnx uncached_decoder of moonshine v1, e.g., "
               "uncached_decode.onnx");

  po->Register(
      "moonshine-cached-decoder", &cached_decoder,
      "Path to onnx cached_decoder of moonshine v1, e.g., cached_decode.onnx");

  po->Register("moonshine-merged-decoder", &merged_decoder,
               "Path to onnx merged decoder of moonshine v2, e.g., "
               "decoder_model_merged.onnx");
}

bool OfflineMoonshineModelConfig::Validate() const {
  // both v1 and v2 require a encoder model
  if (encoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --moonshine-encoder");
    return false;
  }

  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("moonshine encoder file '%s' does not exist",
                     encoder.c_str());
    return false;
  }

  if (merged_decoder.empty()) {
    // for v1
    if (preprocessor.empty()) {
      SHERPA_ONNX_LOGE(
          "Please provide --moonshine-preprocessor for v1 or "
          "--moonshine-merged-decoder for v2");
      return false;
    }

    if (!FileExists(preprocessor)) {
      SHERPA_ONNX_LOGE("moonshine preprocessor file '%s' does not exist",
                       preprocessor.c_str());
      return false;
    }

    if (uncached_decoder.empty()) {
      SHERPA_ONNX_LOGE("Please provide --moonshine-uncached-decoder for v1");
      return false;
    }

    if (!FileExists(uncached_decoder)) {
      SHERPA_ONNX_LOGE("moonshine uncached decoder file '%s' does not exist",
                       uncached_decoder.c_str());
      return false;
    }

    if (cached_decoder.empty()) {
      SHERPA_ONNX_LOGE("Please provide --moonshine-cached-decoder for v1");
      return false;
    }

    if (!FileExists(cached_decoder)) {
      SHERPA_ONNX_LOGE("moonshine cached decoder file '%s' does not exist",
                       cached_decoder.c_str());
      return false;
    }
  } else {
    // v2
    if (!preprocessor.empty()) {
      SHERPA_ONNX_LOGE("Please don't provide preprocessor for moonshine v2");
      return false;
    }

    if (!uncached_decoder.empty()) {
      SHERPA_ONNX_LOGE(
          "Please don't provide uncached decoder for moonshine v2");
      return false;
    }

    if (!cached_decoder.empty()) {
      SHERPA_ONNX_LOGE("Please don't provide cached decoder for moonshine v2");
      return false;
    }

    if (!FileExists(merged_decoder)) {
      SHERPA_ONNX_LOGE(
          "moonshine v2 merged_decoder decoder file '%s' does not exist",
          merged_decoder.c_str());
      return false;
    }
  }

  return true;
}

std::string OfflineMoonshineModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineMoonshineModelConfig(";
  os << "preprocessor=\"" << preprocessor << "\", ";
  os << "encoder=\"" << encoder << "\", ";
  os << "uncached_decoder=\"" << uncached_decoder << "\", ";
  os << "cached_decoder=\"" << cached_decoder << "\", ";
  os << "merged_decoder=\"" << merged_decoder << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-model-config.h
================================================
// sherpa-onnx/csrc/offline-moonshine-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineMoonshineModelConfig {
  // For moonshine v1, it has 4 models:
  // preprocessor, encoder, uncached_decoder, cached_decoder
  //
  // For moonshine v2, it has 2 models:
  // encoder, merged_decoder
  //
  // You can choose either v1 by providing 4 models or
  // select v2 by providing 2 models, but not both

  std::string preprocessor;
  std::string encoder;
  std::string uncached_decoder;
  std::string cached_decoder;

  std::string merged_decoder;

  OfflineMoonshineModelConfig() = default;
  OfflineMoonshineModelConfig(const std::string &preprocessor,
                              const std::string &encoder,
                              const std::string &uncached_decoder,
                              const std::string &cached_decoder,
                              const std::string &merged_decoder)
      : preprocessor(preprocessor),
        encoder(encoder),
        uncached_decoder(uncached_decoder),
        cached_decoder(cached_decoder),
        merged_decoder(merged_decoder) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-model-v2.cc
================================================
// sherpa-onnx/csrc/offline-moonshine-model-v2.cc
//
// Copyright (c)  2024-2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-moonshine-model-v2.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineMoonshineModelV2::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.moonshine.encoder), sess_opts_);
    InitEncoder(nullptr, 0);

    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.moonshine.merged_decoder),
        sess_opts_);
    InitDecoder(nullptr, 0);
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.moonshine.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.moonshine.merged_decoder);
      InitDecoder(buf.data(), buf.size());
    }
  }

  Ort::Value ForwardEncoder(Ort::Value audio) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> mask;
    std::vector<Ort::Value> inputs;

    inputs.push_back(std::move(audio));

    if (encoder_input_names_.size() > 1) {
      std::vector<int64_t> shape =
          inputs.back().GetTensorTypeAndShapeInfo().GetShape();

      mask.resize(shape[1], 1);

      Ort::Value mask_tensor = Ort::Value::CreateTensor<int64_t>(
          memory_info, mask.data(), mask.size(), shape.data(), shape.size());
      inputs.push_back(std::move(mask_tensor));
    }

    auto features = encoder_sess_->Run(
        {}, encoder_input_names_ptr_.data(), inputs.data(), inputs.size(),
        encoder_output_names_ptr_.data(), encoder_output_names_ptr_.size());

    return std::move(features[0]);
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardDecoder(
      Ort::Value tokens, Ort::Value encoder_out,
      std::vector<Ort::Value> states) {
    auto encoder_seq_len = states[2].GetTensorTypeAndShapeInfo().GetShape()[2];
    bool use_cache_branch = encoder_seq_len > 1;

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> mask;

    std::vector<Ort::Value> inputs;

    inputs.reserve(4 + states.size());

    if (decoder_needs_mask_) {
      mask.resize(encoder_out.GetTensorTypeAndShapeInfo().GetShape()[1], 1);
      std::array<int64_t, 2> shape = {
          1, encoder_out.GetTensorTypeAndShapeInfo().GetShape()[1]};

      Ort::Value mask_tensor = Ort::Value::CreateTensor<int64_t>(
          memory_info, mask.data(), mask.size(), shape.data(), shape.size());

      inputs.push_back(std::move(mask_tensor));
    }

    inputs.push_back(std::move(tokens));
    inputs.push_back(std::move(encoder_out));

    for (auto &s : states) {
      inputs.push_back(View(&s));
    }

    int64_t shape = 1;

    Ort::Value tensor = Ort::Value::CreateTensor<bool>(
        memory_info, &use_cache_branch, 1, &shape, 1);

    inputs.push_back(std::move(tensor));

    auto out = decoder_sess_->Run(
        {}, decoder_input_names_ptr_.data(), inputs.data(), inputs.size(),
        decoder_output_names_ptr_.data(), decoder_output_names_ptr_.size());

    if (!use_cache_branch) {
      // update encoder and decoder
      for (int32_t i = 0; i < static_cast<int32_t>(states_.size()); ++i) {
        states[i] = std::move(out[1 + i]);
      }
    } else {
      // only update decoder kv
      for (int32_t i = 0; i < num_layers_; ++i) {
        states[4 * i + 0] = std::move(out[1 + 4 * i + 0]);
        states[4 * i + 1] = std::move(out[1 + 4 * i + 1]);
      }
    }

    return {std::move(out[0]), std::move(states)};
  }

  std::vector<Ort::Value> GetDecoderInitStates() {
    std::vector<Ort::Value> ans;

    ans.reserve(states_.size());

    for (auto &s : states_) {
      ans.push_back(View(&s));
    }

    return ans;
  }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      encoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!encoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass model data or initialize the encoder session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      decoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!decoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass model data or initialize the decoder session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);

    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);

    for (const auto &s : decoder_input_names_) {
      if (Contains(s, "encoder_attention_mask")) {
        decoder_needs_mask_ = true;
      }
    }

    int32_t k = 0;
    for (const auto &s : decoder_input_names_) {
      if (Contains(s, "key_values")) {
        auto shape = decoder_sess_->GetInputTypeInfo(k)
                         .GetTensorTypeAndShapeInfo()
                         .GetShape();
        if (static_cast<int32_t>(shape.size()) != 4) {
          SHERPA_ONNX_LOGE("The shape for %s should be 4-d. Given: %d-d",
                           s.c_str(), static_cast<int32_t>(shape.size()));
          SHERPA_ONNX_EXIT(-1);
        }

        num_head_ = shape[1];
        head_dim_ = shape[3];
        break;
      }
      k += 1;
    }

    if (decoder_needs_mask_) {
      // [ mask, ids, encoder_out, states, use_cache_branch]
      num_layers_ = (static_cast<int32_t>(decoder_input_names_.size()) - 4) / 4;
    } else {
      // [ ids, encoder_out, states, use_cache_branch]
      num_layers_ = (static_cast<int32_t>(decoder_input_names_.size()) - 3) / 4;
    }

    if (config_.debug) {
      SHERPA_ONNX_LOGE("need attention mask: %d",
                       static_cast<int32_t>(decoder_needs_mask_));
      SHERPA_ONNX_LOGE("num_head: %d", num_head_);
      SHERPA_ONNX_LOGE("head_dim: %d", head_dim_);
      SHERPA_ONNX_LOGE("num_layers: %d", num_layers_);
    }

    InitDecoderStates();
  }

  void InitDecoderStates() {
    states_.reserve(num_layers_ * 4);
    std::array<int64_t, 4> shape{1, num_head_, 0, head_dim_};

    auto n = shape[0] * shape[1] * shape[2] * shape[3];

    for (int32_t i = 0; i < 4 * num_layers_; ++i) {
      Ort::Value v = Ort::Value::CreateTensor<float>(Allocator(), shape.data(),
                                                     shape.size());

      float *p = v.GetTensorMutableData<float>();
      memset(p, 0, sizeof(float) * n);
      states_.push_back(std::move(v));
    }
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<Ort::Value> states_;

  int32_t num_head_ = 0;
  int32_t head_dim_ = 0;
  int32_t num_layers_ = 0;
  bool decoder_needs_mask_ = false;
};

OfflineMoonshineModelV2::OfflineMoonshineModelV2(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineMoonshineModelV2::OfflineMoonshineModelV2(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineMoonshineModelV2::~OfflineMoonshineModelV2() = default;

Ort::Value OfflineMoonshineModelV2::ForwardEncoder(Ort::Value audio) const {
  return impl_->ForwardEncoder(std::move(audio));
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OfflineMoonshineModelV2::ForwardDecoder(Ort::Value token,
                                        Ort::Value encoder_out,
                                        std::vector<Ort::Value> states) const {
  return impl_->ForwardDecoder(std::move(token), std::move(encoder_out),
                               std::move(states));
}

std::vector<Ort::Value> OfflineMoonshineModelV2::GetDecoderInitStates() const {
  return impl_->GetDecoderInitStates();
}

OrtAllocator *OfflineMoonshineModelV2::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineMoonshineModelV2::OfflineMoonshineModelV2(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineMoonshineModelV2::OfflineMoonshineModelV2(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-model-v2.h
================================================
// sherpa-onnx/csrc/offline-moonshine-model-v2.h
//
// Copyright (c)  2024-2026  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_V2_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_V2_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

// please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/moonshine/v2/test.py
class OfflineMoonshineModelV2 {
 public:
  explicit OfflineMoonshineModelV2(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineMoonshineModelV2(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineMoonshineModelV2();

  /** Run the encoder model.
   *
   * @param audio A float32 tensor of shape (batch_size, num_samples)
   *
   * @return Return a float32 tensor of shape (batch_size, T, dim) that
   *         can be used as the input of ForwardDecoder()
   *
   * Note it currently supports only batch size 1.
   */
  Ort::Value ForwardEncoder(Ort::Value audio) const;

  /** Run the merged decoder.
   *
   * @param token A int64 tensor of shape (batch_size, num_tokens)
   * @param encoder_out A float32 tensor of shape (batch_size, T, dim)
   * @param states Model States
   *
   * @returns Return a pair:
   *
   *          - logits, a float32 tensor of shape (batch_size, 1, dim)
   *          - states, a list of states
   *
   * Note it supports only batch_size 1.
   */
  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardDecoder(
      Ort::Value token, Ort::Value encoder_out,
      std::vector<Ort::Value> states) const;

  std::vector<Ort::Value> GetDecoderInitStates() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_V2_H_


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-model.cc
================================================
// sherpa-onnx/csrc/offline-moonshine-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-moonshine-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineMoonshineModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.moonshine.preprocessor);
      InitPreprocessor(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.moonshine.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.moonshine.uncached_decoder);
      InitUnCachedDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.moonshine.cached_decoder);
      InitCachedDecoder(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.moonshine.preprocessor);
      InitPreprocessor(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.moonshine.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.moonshine.uncached_decoder);
      InitUnCachedDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.moonshine.cached_decoder);
      InitCachedDecoder(buf.data(), buf.size());
    }
  }

  Ort::Value ForwardPreprocessor(Ort::Value audio) {
    auto features = preprocessor_sess_->Run(
        {}, preprocessor_input_names_ptr_.data(), &audio, 1,
        preprocessor_output_names_ptr_.data(),
        preprocessor_output_names_ptr_.size());

    return std::move(features[0]);
  }

  Ort::Value ForwardEncoder(Ort::Value features, Ort::Value features_len) {
    std::array<Ort::Value, 2> encoder_inputs{std::move(features),
                                             std::move(features_len)};
    auto encoder_out = encoder_sess_->Run(
        {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
        encoder_inputs.size(), encoder_output_names_ptr_.data(),
        encoder_output_names_ptr_.size());

    return std::move(encoder_out[0]);
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardUnCachedDecoder(
      Ort::Value tokens, Ort::Value seq_len, Ort::Value encoder_out) {
    std::array<Ort::Value, 3> uncached_decoder_input = {
        std::move(tokens),
        std::move(encoder_out),
        std::move(seq_len),
    };

    auto uncached_decoder_out = uncached_decoder_sess_->Run(
        {}, uncached_decoder_input_names_ptr_.data(),
        uncached_decoder_input.data(), uncached_decoder_input.size(),
        uncached_decoder_output_names_ptr_.data(),
        uncached_decoder_output_names_ptr_.size());

    std::vector<Ort::Value> states;
    states.reserve(uncached_decoder_out.size() - 1);

    int32_t i = -1;
    for (auto &s : uncached_decoder_out) {
      ++i;
      if (i == 0) {
        continue;
      }

      states.push_back(std::move(s));
    }

    return {std::move(uncached_decoder_out[0]), std::move(states)};
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardCachedDecoder(
      Ort::Value tokens, Ort::Value seq_len, Ort::Value encoder_out,
      std::vector<Ort::Value> states) {
    std::vector<Ort::Value> cached_decoder_input;
    cached_decoder_input.reserve(3 + states.size());
    cached_decoder_input.push_back(std::move(tokens));
    cached_decoder_input.push_back(std::move(encoder_out));
    cached_decoder_input.push_back(std::move(seq_len));

    for (auto &s : states) {
      cached_decoder_input.push_back(std::move(s));
    }

    auto cached_decoder_out = cached_decoder_sess_->Run(
        {}, cached_decoder_input_names_ptr_.data(), cached_decoder_input.data(),
        cached_decoder_input.size(), cached_decoder_output_names_ptr_.data(),
        cached_decoder_output_names_ptr_.size());

    std::vector<Ort::Value> next_states;
    next_states.reserve(cached_decoder_out.size() - 1);

    int32_t i = -1;
    for (auto &s : cached_decoder_out) {
      ++i;
      if (i == 0) {
        continue;
      }

      next_states.push_back(std::move(s));
    }

    return {std::move(cached_decoder_out[0]), std::move(next_states)};
  }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void InitPreprocessor(void *model_data, size_t model_data_length) {
    preprocessor_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(preprocessor_sess_.get(), &preprocessor_input_names_,
                  &preprocessor_input_names_ptr_);

    GetOutputNames(preprocessor_sess_.get(), &preprocessor_output_names_,
                   &preprocessor_output_names_ptr_);
  }

  void InitEncoder(void *model_data, size_t model_data_length) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);
  }

  void InitUnCachedDecoder(void *model_data, size_t model_data_length) {
    uncached_decoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(uncached_decoder_sess_.get(), &uncached_decoder_input_names_,
                  &uncached_decoder_input_names_ptr_);

    GetOutputNames(uncached_decoder_sess_.get(),
                   &uncached_decoder_output_names_,
                   &uncached_decoder_output_names_ptr_);
  }

  void InitCachedDecoder(void *model_data, size_t model_data_length) {
    cached_decoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(cached_decoder_sess_.get(), &cached_decoder_input_names_,
                  &cached_decoder_input_names_ptr_);

    GetOutputNames(cached_decoder_sess_.get(), &cached_decoder_output_names_,
                   &cached_decoder_output_names_ptr_);
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> preprocessor_sess_;
  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> uncached_decoder_sess_;
  std::unique_ptr<Ort::Session> cached_decoder_sess_;

  std::vector<std::string> preprocessor_input_names_;
  std::vector<const char *> preprocessor_input_names_ptr_;

  std::vector<std::string> preprocessor_output_names_;
  std::vector<const char *> preprocessor_output_names_ptr_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> uncached_decoder_input_names_;
  std::vector<const char *> uncached_decoder_input_names_ptr_;

  std::vector<std::string> uncached_decoder_output_names_;
  std::vector<const char *> uncached_decoder_output_names_ptr_;

  std::vector<std::string> cached_decoder_input_names_;
  std::vector<const char *> cached_decoder_input_names_ptr_;

  std::vector<std::string> cached_decoder_output_names_;
  std::vector<const char *> cached_decoder_output_names_ptr_;
};

OfflineMoonshineModel::OfflineMoonshineModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineMoonshineModel::OfflineMoonshineModel(Manager *mgr,
                                             const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineMoonshineModel::~OfflineMoonshineModel() = default;

Ort::Value OfflineMoonshineModel::ForwardPreprocessor(Ort::Value audio) const {
  return impl_->ForwardPreprocessor(std::move(audio));
}

Ort::Value OfflineMoonshineModel::ForwardEncoder(
    Ort::Value features, Ort::Value features_len) const {
  return impl_->ForwardEncoder(std::move(features), std::move(features_len));
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OfflineMoonshineModel::ForwardUnCachedDecoder(Ort::Value token,
                                              Ort::Value seq_len,
                                              Ort::Value encoder_out) const {
  return impl_->ForwardUnCachedDecoder(std::move(token), std::move(seq_len),
                                       std::move(encoder_out));
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OfflineMoonshineModel::ForwardCachedDecoder(
    Ort::Value token, Ort::Value seq_len, Ort::Value encoder_out,
    std::vector<Ort::Value> states) const {
  return impl_->ForwardCachedDecoder(std::move(token), std::move(seq_len),
                                     std::move(encoder_out), std::move(states));
}

OrtAllocator *OfflineMoonshineModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineMoonshineModel::OfflineMoonshineModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineMoonshineModel::OfflineMoonshineModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-model.h
================================================
// sherpa-onnx/csrc/offline-moonshine-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

// please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/moonshine/test.py
class OfflineMoonshineModel {
 public:
  explicit OfflineMoonshineModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineMoonshineModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineMoonshineModel();

  /** Run the preprocessor model.
   *
   * @param audio A float32 tensor of shape (batch_size, num_samples)
   *
   * @return Return a float32 tensor of shape (batch_size, T, dim) that
   *         can be used as the input of ForwardEncoder()
   */
  Ort::Value ForwardPreprocessor(Ort::Value audio) const;

  /** Run the encoder model.
   *
   * @param features A float32 tensor of shape (batch_size, T, dim)
   * @param features_len A int32 tensor of shape (batch_size,)
   * @returns A float32 tensor of shape (batch_size, T, dim).
   */
  Ort::Value ForwardEncoder(Ort::Value features, Ort::Value features_len) const;

  /** Run the uncached decoder.
   *
   * @param token A int32 tensor of shape (batch_size, num_tokens)
   * @param seq_len A int32 tensor of shape (batch_size,) containing number
   *                of predicted tokens so far
   * @param encoder_out A float32 tensor of shape (batch_size, T, dim)
   *
   * @returns Return a pair:
   *
   *          - logits, a float32 tensor of shape (batch_size, 1, dim)
   *          - states, a list of states
   */
  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardUnCachedDecoder(
      Ort::Value token, Ort::Value seq_len, Ort::Value encoder_out) const;

  /** Run the cached decoder.
   *
   * @param token A int32 tensor of shape (batch_size, num_tokens)
   * @param seq_len A int32 tensor of shape (batch_size,) containing number
   *                of predicted tokens so far
   * @param encoder_out A float32 tensor of shape (batch_size, T, dim)
   * @param states A list of previous states
   *
   * @returns Return a pair:
   *          - logits, a float32 tensor of shape (batch_size, 1, dim)
   *          - states, a list of new states
   */
  std::pair<Ort::Value, std::vector<Ort::Value>> ForwardCachedDecoder(
      Ort::Value token, Ort::Value seq_len, Ort::Value encoder_out,
      std::vector<Ort::Value> states) const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-v2-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/offline-moonshine-v2-greedy-search-decoder.cc
//
// Copyright (c)  2024-2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-moonshine-v2-greedy-search-decoder.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

std::vector<OfflineMoonshineDecoderResult>
OfflineMoonshineV2GreedySearchDecoder::Decode(Ort::Value encoder_out) {
  auto encoder_out_shape = encoder_out.GetTensorTypeAndShapeInfo().GetShape();
  if (encoder_out_shape[0] != 1) {
    SHERPA_ONNX_LOGE("Support only batch size == 1. Given: %d\n",
                     static_cast<int32_t>(encoder_out_shape[0]));
    return {};
  }

  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  // encoder_out_shape[1] * 384 is the number of audio samples
  // 16000 is the sample rate
  //
  //
  // 384 is from the moonshine paper
  int32_t max_len =
      static_cast<int32_t>(encoder_out_shape[1] * 384 / 16000.0 * 15);

  int64_t sos = 1;
  int32_t eos = 2;
  int32_t seq_len = 1;

  std::vector<int32_t> tokens;

  std::array<int64_t, 2> token_shape = {1, 1};

  Ort::Value token_tensor = Ort::Value::CreateTensor(
      memory_info, &sos, 1, token_shape.data(), token_shape.size());

  Ort::Value logits{nullptr};
  std::vector<Ort::Value> states = model_->GetDecoderInitStates();

  // To fix the false alarm of clang-tidy
  // error: 'states' used after it was moved
  // [bugprone-use-after-move,-warnings-as-errors]
  // we use a tmp_states here
  std::vector<Ort::Value> tmp_states{std::move(states)};

  std::tie(logits, states) = model_->ForwardDecoder(
      std::move(token_tensor), View(&encoder_out), std::move(tmp_states));

  int32_t vocab_size = logits.GetTensorTypeAndShapeInfo().GetShape()[2];

  int64_t max_token_id;

  for (int32_t i = 0; i != max_len; ++i) {
    const float *p = logits.GetTensorData<float>();

    max_token_id = static_cast<int64_t>(
        std::distance(p, std::max_element(p, p + vocab_size)));

    if (max_token_id == eos) {
      break;
    }

    tokens.push_back(max_token_id);

    seq_len += 1;

    token_tensor = Ort::Value::CreateTensor(
        memory_info, &max_token_id, 1, token_shape.data(), token_shape.size());

    tmp_states = std::move(states);

    std::tie(logits, states) = model_->ForwardDecoder(
        std::move(token_tensor), View(&encoder_out), std::move(tmp_states));
  }

  OfflineMoonshineDecoderResult ans;
  ans.tokens = std::move(tokens);

  return {ans};
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-moonshine-v2-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/offline-moonshine-v2-greedy-search-decoder.h
//
// Copyright (c)  2024-2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_V2_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_V2_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-moonshine-decoder.h"
#include "sherpa-onnx/csrc/offline-moonshine-model-v2.h"

namespace sherpa_onnx {

class OfflineMoonshineV2GreedySearchDecoder : public OfflineMoonshineDecoder {
 public:
  explicit OfflineMoonshineV2GreedySearchDecoder(OfflineMoonshineModelV2 *model)
      : model_(model) {}

  std::vector<OfflineMoonshineDecoderResult> Decode(
      Ort::Value encoder_out) override;

 private:
  OfflineMoonshineModelV2 *model_;  // not owned
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_MOONSHINE_V2_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineNemoEncDecCtcModelConfig::Register(ParseOptions *po) {
  po->Register("nemo-ctc-model", &model,
               "Path to model.onnx of Nemo EncDecCtcModel.");
}

bool OfflineNemoEncDecCtcModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("NeMo model: '%s' does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OfflineNemoEncDecCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineNemoEncDecCtcModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h
================================================
// sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineNemoEncDecCtcModelConfig {
  std::string model;

  OfflineNemoEncDecCtcModelConfig() = default;
  explicit OfflineNemoEncDecCtcModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.cc
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineNemoEncDecCtcModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.nemo_ctc.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.nemo_ctc.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) {
    std::vector<int64_t> shape =
        features_length.GetTensorTypeAndShapeInfo().GetShape();

    Ort::Value out_features_length = Ort::Value::CreateTensor<int64_t>(
        allocator_, shape.data(), shape.size());

    const int64_t *src = features_length.GetTensorData<int64_t>();
    int64_t *dst = out_features_length.GetTensorMutableData<int64_t>();
    for (int64_t i = 0; i != shape[0]; ++i) {
      dst[i] = src[i] / subsampling_factor_;
    }

    // (B, T, C) -> (B, C, T)
    features = Transpose12(allocator_, &features);

    std::array<Ort::Value, 2> inputs = {std::move(features),
                                        std::move(features_length)};
    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    std::vector<Ort::Value> ans;
    ans.reserve(2);
    ans.push_back(std::move(out[0]));
    ans.push_back(std::move(out_features_length));
    return ans;
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t SubsamplingFactor() const { return subsampling_factor_; }

  OrtAllocator *Allocator() { return allocator_; }

  std::string FeatureNormalizationMethod() const { return normalize_type_; }

  bool IsGigaAM() const { return is_giga_am_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
    SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_,
                                               "normalize_type");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(is_giga_am_, "is_giga_am", 0);
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t vocab_size_ = 0;
  int32_t subsampling_factor_ = 0;
  std::string normalize_type_;

  // it is 1 for models from
  // https://github.com/salute-developers/GigaAM
  int32_t is_giga_am_ = 0;
};

OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineNemoEncDecCtcModel::~OfflineNemoEncDecCtcModel() = default;

std::vector<Ort::Value> OfflineNemoEncDecCtcModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineNemoEncDecCtcModel::VocabSize() const {
  return impl_->VocabSize();
}
int32_t OfflineNemoEncDecCtcModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

OrtAllocator *OfflineNemoEncDecCtcModel::Allocator() const {
  return impl_->Allocator();
}

std::string OfflineNemoEncDecCtcModel::FeatureNormalizationMethod() const {
  return impl_->FeatureNormalizationMethod();
}

bool OfflineNemoEncDecCtcModel::IsGigaAM() const { return impl_->IsGigaAM(); }

#if __ANDROID_API__ >= 9
template OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

/** This class implements the EncDecCTCModelBPE model from NeMo.
 *
 * See
 * https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/models/ctc_bpe_models.py
 * https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/models/ctc_models.py
 */
class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
 public:
  explicit OfflineNemoEncDecCtcModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineNemoEncDecCtcModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineNemoEncDecCtcModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** SubsamplingFactor of the model
   *
   * For Citrinet, the subsampling factor is usually 4.
   * For Conformer CTC, the subsampling factor is usually 8.
   */
  int32_t SubsamplingFactor() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  // Possible values:
  // - per_feature
  // - all_features (not implemented yet)
  // - fixed_mean (not implemented)
  // - fixed_std (not implemented)
  // - or just leave it to empty
  // See
  // https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/parts/preprocessing/features.py#L59
  // for details
  std::string FeatureNormalizationMethod() const override;

  bool IsGigaAM() const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

using OfflineNemoEncDecHybridRNNTCTCBPEModel = OfflineNemoEncDecCtcModel;

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineOmnilingualAsrCtcModelConfig::Register(ParseOptions *po) {
  po->Register("omnilingual-asr-model", &model,
               "Path to Omnilingual ASR CTC model");
}

bool OfflineOmnilingualAsrCtcModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("Omnilingual ASR CTC model file '%s' does not exist",
                     model.c_str());
    return false;
  }

  return true;
}

std::string OfflineOmnilingualAsrCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineOmnilingualAsrCtcModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model-config.h
================================================
// sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

// for
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/omnilingual-asr/test.py
struct OfflineOmnilingualAsrCtcModelConfig {
  std::string model;

  OfflineOmnilingualAsrCtcModelConfig() = default;

  explicit OfflineOmnilingualAsrCtcModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model.h"

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineOmnilingualAsrCtcModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config_.omnilingual.model), sess_opts_);
    Init(nullptr, 0);
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.omnilingual.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value /*/features_length*/) {
    auto out_vec =
        sess_->Run({}, input_names_ptr_.data(), &features, 1,
                   output_names_ptr_.data(), output_names_ptr_.size());
    std::vector<int64_t> logits_shape =
        out_vec[0].GetTensorTypeAndShapeInfo().GetShape();

    std::vector<int64_t> num_frames(logits_shape[0], logits_shape[1]);

    int64_t shape = logits_shape[0];

    Ort::Value logits_len =
        Ort::Value::CreateTensor<int64_t>(allocator_, &shape, 1);
    std::copy(num_frames.begin(), num_frames.end(),
              logits_len.GetTensorMutableData<int64_t>());

    out_vec.push_back(std::move(logits_len));

    return out_vec;
  }

  int32_t VocabSize() const { return vocab_size_; }

  OrtAllocator *Allocator() { return allocator_; }

  static void NormalizeFeatures(float *features, int32_t num_frames,
                                int32_t feat_dim) {
    if (num_frames != 1) {
      SHERPA_ONNX_LOGE(
          "Unexpected error in collecting samples for Omnilingual ASR models!");
      return;
    }

    // Map the single-row feature vector
    Eigen::Map<Eigen::ArrayXf> x(features, feat_dim);
    float mean = x.mean();
    float var = (x.square().mean() - mean * mean);
    var = std::max(var, 0.0f);
    float inv_stddev = 1.0f / std::sqrt(var + 1e-5f);

    x = (x - mean) * inv_stddev;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    // For models with 1B parameters, weights are saved externally
    // in model.weights
    // We cannot create session from buffer in this case.
    if (model_data) {
      sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                             model_data_length, sess_opts_);
    } else if (!sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize session outside of this "
          "function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    // get vocab size from the output[0].shape, which is (N, T, vocab_size)
    vocab_size_ =
        sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape()[2];
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t vocab_size_ = 0;
};

OfflineOmnilingualAsrCtcModel::OfflineOmnilingualAsrCtcModel(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineOmnilingualAsrCtcModel::OfflineOmnilingualAsrCtcModel(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineOmnilingualAsrCtcModel::~OfflineOmnilingualAsrCtcModel() = default;

std::vector<Ort::Value> OfflineOmnilingualAsrCtcModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineOmnilingualAsrCtcModel::VocabSize() const {
  return impl_->VocabSize();
}

OrtAllocator *OfflineOmnilingualAsrCtcModel::Allocator() const {
  return impl_->Allocator();
}

void OfflineOmnilingualAsrCtcModel::NormalizeFeatures(float *features,
                                                      int32_t num_frames,
                                                      int32_t feat_dim) const {
  return impl_->NormalizeFeatures(features, num_frames, feat_dim);
}

#if __ANDROID_API__ >= 9
template OfflineOmnilingualAsrCtcModel::OfflineOmnilingualAsrCtcModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineOmnilingualAsrCtcModel::OfflineOmnilingualAsrCtcModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_H_
#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

/** This class implements the Omnilingual ASR CTC model
 * from
 * https://github.com/facebookresearch/omnilingual-asr
 *
 * See
 * https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/omnilingual-asr/export-onnx.py
 */
class OfflineOmnilingualAsrCtcModel : public OfflineCtcModel {
 public:
  explicit OfflineOmnilingualAsrCtcModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineOmnilingualAsrCtcModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineOmnilingualAsrCtcModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  void NormalizeFeatures(float *features, int32_t num_frames,
                         int32_t feat_dim) const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-paraformer-decoder.h
================================================
// sherpa-onnx/csrc/offline-paraformer-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_DECODER_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

struct OfflineParaformerDecoderResult {
  /// The decoded token IDs
  std::vector<int64_t> tokens;

  // it contains the start time of each token in seconds
  //
  // len(timestamps) == len(tokens)
  std::vector<float> timestamps;
};

class OfflineParaformerDecoder {
 public:
  virtual ~OfflineParaformerDecoder() = default;

  /** Run beam search given the output from the paraformer model.
   *
   * @param log_probs A 3-D tensor of shape (N, T, vocab_size)
   * @param token_num A 1-D tensor of shape (N). token_num equals to T.
   *
   * @return Return a vector of size `N` containing the decoded results.
   */
  virtual std::vector<OfflineParaformerDecoderResult> Decode(
      Ort::Value log_probs, Ort::Value token_num,
      Ort::Value us_cif_peak = Ort::Value(nullptr)) = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

std::vector<OfflineParaformerDecoderResult>
OfflineParaformerGreedySearchDecoder::Decode(
    Ort::Value log_probs, Ort::Value /*token_num*/,
    Ort::Value us_cif_peak /*=Ort::Value(nullptr)*/
) {
  std::vector<int64_t> shape = log_probs.GetTensorTypeAndShapeInfo().GetShape();
  int32_t batch_size = shape[0];
  int32_t num_tokens = shape[1];
  int32_t vocab_size = shape[2];

  std::vector<OfflineParaformerDecoderResult> results(batch_size);

  for (int32_t i = 0; i != batch_size; ++i) {
    const float *p =
        log_probs.GetTensorData<float>() + i * num_tokens * vocab_size;
    for (int32_t k = 0; k != num_tokens; ++k) {
      auto max_idx = static_cast<int64_t>(
          std::distance(p, std::max_element(p, p + vocab_size)));
      if (max_idx == eos_id_) {
        break;
      }

      results[i].tokens.push_back(max_idx);

      p += vocab_size;
    }

    if (us_cif_peak) {
      int32_t dim = us_cif_peak.GetTensorTypeAndShapeInfo().GetShape().back();

      const auto *peak = us_cif_peak.GetTensorData<float>() + i * dim;
      std::vector<float> timestamps;
      timestamps.reserve(results[i].tokens.size());

      // 10.0: frameshift is 10 milliseconds
      // 6: LfrWindowSize
      // 3: us_cif_peak is upsampled by a factor of 3
      // 1000: milliseconds to seconds
      float scale = 10.0 * 6 / 3 / 1000;

      for (int32_t k = 0; k != dim; ++k) {
        if (peak[k] > 1 - 1e-4) {
          timestamps.push_back(k * scale);
        }
      }

      if (!timestamps.empty()) {
        timestamps.pop_back();
      }

      if (timestamps.size() == results[i].tokens.size()) {
        results[i].timestamps = std::move(timestamps);
      }
    }
  }

  return results;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-paraformer-decoder.h"

namespace sherpa_onnx {

class OfflineParaformerGreedySearchDecoder : public OfflineParaformerDecoder {
 public:
  explicit OfflineParaformerGreedySearchDecoder(int32_t eos_id)
      : eos_id_(eos_id) {}

  std::vector<OfflineParaformerDecoderResult> Decode(
      Ort::Value log_probs, Ort::Value token_num,
      Ort::Value us_cif_peak = Ort::Value(nullptr)) override;

 private:
  int32_t eos_id_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-paraformer-model-config.cc
================================================
// sherpa-onnx/csrc/offline-paraformer-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-paraformer-model-config.h"

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OfflineParaformerModelConfig::Register(ParseOptions *po) {
  po->Register(
      "paraformer", &model,
      "Path to model.onnx of Paraformer. If you use Ascend NPU, it is "
      "/path/to/encoder.om,/path/to/predictor.om,/path/to/decoder.om"
      "If you use RK NPU, it is "
      "/path/to/encoder.rknn,/path/to/predictor.rknn,/path/to/decoder.rknn");

  std::string prefix = "paraformer";
  ParseOptions p(prefix, po);

  qnn_config.Register(&p);
}

bool OfflineParaformerModelConfig::Validate() const {
  if (EndsWith(model, ".onnx")) {
    if (!FileExists(model)) {
      SHERPA_ONNX_LOGE("Paraformer model '%s' does not exist", model.c_str());
      return false;
    }
    return true;
  }

  if (EndsWith(model, ".om")) {
    std::vector<std::string> filenames;
    SplitStringToVector(model, ",", false, &filenames);
    if (filenames.size() != 3 || !EndsWith(filenames[0], "encoder.om") ||
        !EndsWith(filenames[1], "predictor.om") ||
        !EndsWith(filenames[2], "decoder.om")) {
      SHERPA_ONNX_LOGE(
          "For Ascend NPU, you should pass "
          "/path/to/encoder.om,/path/to/predictor.om,/path/to/decoder.om. "
          "Given '%s'",
          model.c_str());
      return false;
    }

    for (const auto &name : filenames) {
      if (!FileExists(name)) {
        SHERPA_ONNX_LOGE("Paraformer model '%s' does not exist", name.c_str());
        return false;
      }
    }

    return true;
  }

  if (EndsWith(model, ".rknn")) {
    std::vector<std::string> filenames;
    SplitStringToVector(model, ",", false, &filenames);
    if (filenames.size() != 3 || !EndsWith(filenames[0], "encoder.rknn") ||
        !EndsWith(filenames[1], "predictor.rknn") ||
        !EndsWith(filenames[2], "decoder.rknn")) {
      SHERPA_ONNX_LOGE(
          "For RKNN, you should pass "
          "/path/encoder.rknn,/path/predictor.rknn,/path/decoder.rknn. "
          "Given '%s'",
          model.c_str());
      return false;
    }

    for (const auto &name : filenames) {
      if (!FileExists(name)) {
        SHERPA_ONNX_LOGE("Paraformer model '%s' does not exist", name.c_str());
        return false;
      }
    }

    return true;
  }

  if (EndsWith(model, ".so")) {
    std::vector<std::string> filenames;
    SplitStringToVector(model, ",", false, &filenames);
    if (filenames.size() != 3 || !EndsWith(filenames[0], "encoder.so") ||
        !EndsWith(filenames[1], "predictor.so") ||
        !EndsWith(filenames[2], "decoder.so")) {
      SHERPA_ONNX_LOGE(
          "For QNN, you should pass "
          "/path/libencoder.so,/path/libpredictor.so,/path/libdecoder.so. "
          "Given '%s'",
          model.c_str());
      return false;
    }

    for (const auto &name : filenames) {
      if (!FileExists(name)) {
        SHERPA_ONNX_LOGE("Paraformer model '%s' does not exist", name.c_str());
        return false;
      }
    }

    if (!qnn_config.Validate()) {
      return false;
    }

    return true;
  }

  if (model.empty() && !qnn_config.context_binary.empty()) {
    // we require that the context_binary exists
    if (!FileExists(qnn_config.context_binary)) {
      SHERPA_ONNX_LOGE(
          "Model is empty, but you provide a context binary that does not "
          "exist");
      return false;
    }

    std::vector<std::string> filenames;
    SplitStringToVector(model, ",", false, &filenames);
    if (filenames.size() != 3) {
      SHERPA_ONNX_LOGE(
          "For Paraformer with QNN, you should pass "
          "/path/encoder.bin,/path/predictor.bin,/path/decoder.bin"
          "Given '%s'",
          model.c_str());
      return false;
    }

    for (const auto &name : filenames) {
      if (!FileExists(name)) {
        SHERPA_ONNX_LOGE("Paraformer context binary '%s' does not exist",
                         name.c_str());
        return false;
      }
    }

    if (!qnn_config.Validate()) {
      return false;
    }

    return true;
  }

  SHERPA_ONNX_LOGE(
      "Please pass *.onnx, *.om, *.rknn, or *.so models. Given '%s'",
      model.c_str());
  return false;
}

std::string OfflineParaformerModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineParaformerModelConfig(";
  os << "model=\"" << model << "\"";

  if (!qnn_config.backend_lib.empty()) {
    os << ", qnn_config=" << qnn_config.ToString();
  }

  os << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-paraformer-model-config.h
================================================
// sherpa-onnx/csrc/offline-paraformer-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/qnn-config.h"

namespace sherpa_onnx {

struct OfflineParaformerModelConfig {
  // for ascend npu,
  // model is "/path/to/encoder.om,/path/to/predictor.om,/path/to/decoder.om"
  //
  // for rknn,
  // model is
  // "/path/to/encoder.rknn,/path/to/predictor.rknn,/path/to/decoder.rknn"
  //
  // for qnn with shared libs, model is
  // model is
  // "/path/to/libencoder.so,/path/to/libpredictor.so,/path/to/libdecoder.so"
  std::string model;

  QnnConfig qnn_config;

  OfflineParaformerModelConfig() = default;
  explicit OfflineParaformerModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-paraformer-model.cc
================================================
// sherpa-onnx/csrc/offline-paraformer-model.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-paraformer-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineParaformerModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.paraformer.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.paraformer.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) {
    std::array<Ort::Value, 2> inputs = {std::move(features),
                                        std::move(features_length)};

    return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                      output_names_ptr_.data(), output_names_ptr_.size());
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t LfrWindowSize() const { return lfr_window_size_; }

  int32_t LfrWindowShift() const { return lfr_window_shift_; }

  const std::vector<float> &NegativeMean() const { return neg_mean_; }

  const std::vector<float> &InverseStdDev() const { return inv_stddev_; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
    SHERPA_ONNX_READ_META_DATA(lfr_window_size_, "lfr_window_size");
    SHERPA_ONNX_READ_META_DATA(lfr_window_shift_, "lfr_window_shift");

    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(neg_mean_, "neg_mean");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(inv_stddev_, "inv_stddev");
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  std::vector<float> neg_mean_;
  std::vector<float> inv_stddev_;

  int32_t vocab_size_ = 0;  // initialized in Init
  int32_t lfr_window_size_ = 0;
  int32_t lfr_window_shift_ = 0;
};

OfflineParaformerModel::OfflineParaformerModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineParaformerModel::OfflineParaformerModel(Manager *mgr,
                                               const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineParaformerModel::~OfflineParaformerModel() = default;

std::vector<Ort::Value> OfflineParaformerModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineParaformerModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OfflineParaformerModel::LfrWindowSize() const {
  return impl_->LfrWindowSize();
}
int32_t OfflineParaformerModel::LfrWindowShift() const {
  return impl_->LfrWindowShift();
}
const std::vector<float> &OfflineParaformerModel::NegativeMean() const {
  return impl_->NegativeMean();
}
const std::vector<float> &OfflineParaformerModel::InverseStdDev() const {
  return impl_->InverseStdDev();
}

OrtAllocator *OfflineParaformerModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineParaformerModel::OfflineParaformerModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineParaformerModel::OfflineParaformerModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-paraformer-model.h
================================================
// sherpa-onnx/csrc/offline-paraformer-model.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_MODEL_H_

#include <memory>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineParaformerModel {
 public:
  explicit OfflineParaformerModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineParaformerModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineParaformerModel();

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C). It is changed in-place.
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int32_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size)
   *  - token_num: A 1-D tensor of shape (N, T') containing number
   *               of valid tokens in each utterance. Its dtype is int64_t.
   *  If it is a model supporting timestamps, then there are additional two
   *  outputs:
   *   - us_alphas
   *   - us_cif_peak
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length);

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const;

  /** It is lfr_m in config.yaml
   */
  int32_t LfrWindowSize() const;

  /** It is lfr_n in config.yaml
   */
  int32_t LfrWindowShift() const;

  /** Return negative mean for CMVN
   */
  const std::vector<float> &NegativeMean() const;

  /** Return inverse stddev for CMVN
   */
  const std::vector<float> &InverseStdDev() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_PARAFORMER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-punctuation-ct-transformer-impl.h
================================================
// sherpa-onnx/csrc/offline-punctuation-ct-transformer-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_CT_TRANSFORMER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_CT_TRANSFORMER_IMPL_H_

#include <math.h>

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-ct-transformer-model.h"
#include "sherpa-onnx/csrc/offline-punctuation-impl.h"
#include "sherpa-onnx/csrc/offline-punctuation.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflinePunctuationCtTransformerImpl : public OfflinePunctuationImpl {
 public:
  explicit OfflinePunctuationCtTransformerImpl(
      const OfflinePunctuationConfig &config)
      : config_(config), model_(config.model) {}

  template <typename Manager>
  OfflinePunctuationCtTransformerImpl(Manager *mgr,
                                      const OfflinePunctuationConfig &config)
      : config_(config), model_(mgr, config.model) {}

  std::string AddPunctuation(const std::string &text) const override {
    if (text.empty()) {
      return {};
    }

    std::vector<std::string> tokens = SplitUtf8(text);
    std::vector<int32_t> token_ids;
    token_ids.reserve(tokens.size());

    const auto &meta_data = model_.GetModelMetadata();

    for (const auto &t : tokens) {
      std::string token = ToLowerCase(t);
      if (meta_data.token2id.count(token)) {
        token_ids.push_back(meta_data.token2id.at(token));
      } else {
        token_ids.push_back(meta_data.unk_id);
      }
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t segment_size = 20;
    int32_t max_len = 200;
    int32_t num_segments =
        ceil((static_cast<float>(token_ids.size()) + segment_size - 1) /
             segment_size);

    std::vector<int32_t> punctuations;
    int32_t last = -1;
    for (int32_t i = 0; i != num_segments; ++i) {
      int32_t this_start = i * segment_size;         // included
      int32_t this_end = this_start + segment_size;  // not included
      if (this_end > static_cast<int32_t>(token_ids.size())) {
        this_end = token_ids.size();
      }

      if (last != -1) {
        this_start = last;
      }
      // token_ids[this_start:this_end] is sent to the model

      std::array<int64_t, 2> x_shape = {1, this_end - this_start};
      Ort::Value x =
          Ort::Value::CreateTensor(memory_info, token_ids.data() + this_start,
                                   x_shape[1], x_shape.data(), x_shape.size());

      int64_t len_shape = 1;
      int32_t len = x_shape[1];
      Ort::Value x_len =
          Ort::Value::CreateTensor(memory_info, &len, 1, &len_shape, 1);

      Ort::Value out = model_.Forward(std::move(x), std::move(x_len));

      // [N, T, num_punctuations]
      std::vector<int64_t> out_shape =
          out.GetTensorTypeAndShapeInfo().GetShape();

      assert(out_shape[0] == 1);
      assert(out_shape[1] == len);
      assert(out_shape[2] == meta_data.num_punctuations);

      std::vector<int32_t> this_punctuations;
      this_punctuations.reserve(len);

      const float *p = out.GetTensorData<float>();
      for (int32_t k = 0; k != len; ++k, p += meta_data.num_punctuations) {
        auto index = static_cast<int32_t>(std::distance(
            p, std::max_element(p, p + meta_data.num_punctuations)));
        this_punctuations.push_back(index);
      }  // for (int32_t k = 0; k != len; ++k, p += meta_data.num_punctuations)

      int32_t dot_index = -1;
      int32_t comma_index = -1;

      for (int32_t m = static_cast<int32_t>(this_punctuations.size()) - 2;
           m >= 1; --m) {
        int32_t punct_id = this_punctuations[m];

        if (punct_id == meta_data.dot_id || punct_id == meta_data.quest_id) {
          dot_index = m;
          break;
        }

        if (comma_index == -1 && punct_id == meta_data.comma_id) {
          comma_index = m;
        }
      }  // for (int32_t k = this_punctuations.size() - 1; k >= 1; --k)

      if (dot_index == -1 && len >= max_len && comma_index != -1) {
        dot_index = comma_index;
        this_punctuations[dot_index] = meta_data.dot_id;
      }

      if (dot_index == -1) {
        if (last == -1) {
          last = this_start;
        }

        if (i == num_segments - 1) {
          dot_index = static_cast<int32_t>(this_punctuations.size()) - 1;
        }
      } else {
        last = this_start + dot_index + 1;
      }

      if (dot_index != -1) {
        punctuations.insert(punctuations.end(), this_punctuations.begin(),
                            this_punctuations.begin() + (dot_index + 1));
      }
    }  // for (int32_t i = 0; i != num_segments; ++i)

    if (punctuations.empty()) {
      return text + meta_data.id2punct[meta_data.dot_id];
    }
    std::vector<std::string> words_punct;

    for (int32_t i = 0; i != static_cast<int32_t>(punctuations.size()); ++i) {
      if (i >= static_cast<int32_t>(tokens.size())) {
        break;
      }
      std::string &w = tokens[i];
      if (i > 0 && !(words_punct.back()[0] & 0x80) && !(w[0] & 0x80)) {
        words_punct.push_back(" ");
      }
      words_punct.push_back(std::move(w));

      if (punctuations[i] != meta_data.underline_id) {
        words_punct.push_back(meta_data.id2punct[punctuations[i]]);
      }
    }

    if (words_punct.back() == meta_data.id2punct[meta_data.comma_id] ||
        words_punct.back() == meta_data.id2punct[meta_data.pause_id]) {
      words_punct.back() = meta_data.id2punct[meta_data.dot_id];
    }

    if (words_punct.back() != meta_data.id2punct[meta_data.dot_id] &&
        words_punct.back() != meta_data.id2punct[meta_data.quest_id]) {
      words_punct.push_back(meta_data.id2punct[meta_data.dot_id]);
    }

    std::string ans;
    for (const auto &w : words_punct) {
      ans.append(w);
    }
    return ans;
  }

 private:
  OfflinePunctuationConfig config_;
  OfflineCtTransformerModel model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_CT_TRANSFORMER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-punctuation-impl.cc
================================================
// sherpa-onnx/csrc/offline-punctuation-impl.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-punctuation-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-punctuation-ct-transformer-impl.h"

namespace sherpa_onnx {

std::unique_ptr<OfflinePunctuationImpl> OfflinePunctuationImpl::Create(
    const OfflinePunctuationConfig &config) {
  if (!config.model.ct_transformer.empty()) {
    return std::make_unique<OfflinePunctuationCtTransformerImpl>(config);
  }

  SHERPA_ONNX_LOGE("Please specify a punctuation model! Return a null pointer");
  return nullptr;
}

template <typename Manager>
std::unique_ptr<OfflinePunctuationImpl> OfflinePunctuationImpl::Create(
    Manager *mgr, const OfflinePunctuationConfig &config) {
  if (!config.model.ct_transformer.empty()) {
    return std::make_unique<OfflinePunctuationCtTransformerImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please specify a punctuation model! Return a null pointer");
  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflinePunctuationImpl> OfflinePunctuationImpl::Create(
    AAssetManager *mgr, const OfflinePunctuationConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OfflinePunctuationImpl> OfflinePunctuationImpl::Create(
    NativeResourceManager *mgr, const OfflinePunctuationConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-punctuation-impl.h
================================================
// sherpa-onnx/csrc/offline-punctuation-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_IMPL_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-punctuation.h"

namespace sherpa_onnx {

class OfflinePunctuationImpl {
 public:
  virtual ~OfflinePunctuationImpl() = default;

  static std::unique_ptr<OfflinePunctuationImpl> Create(
      const OfflinePunctuationConfig &config);

  template <typename Manager>
  static std::unique_ptr<OfflinePunctuationImpl> Create(
      Manager *mgr, const OfflinePunctuationConfig &config);

  virtual std::string AddPunctuation(const std::string &text) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-punctuation-model-config.cc
================================================
// sherpa-onnx/csrc/offline-punctuation-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-punctuation-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflinePunctuationModelConfig::Register(ParseOptions *po) {
  po->Register("ct-transformer", &ct_transformer,
               "Path to the controllable time-delay (CT) transformer model");

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool OfflinePunctuationModelConfig::Validate() const {
  if (ct_transformer.empty()) {
    SHERPA_ONNX_LOGE("Please provide --ct-transformer");
    return false;
  }

  if (!FileExists(ct_transformer)) {
    SHERPA_ONNX_LOGE("--ct-transformer %s does not exist",
                     ct_transformer.c_str());
    return false;
  }

  return true;
}

std::string OfflinePunctuationModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflinePunctuationModelConfig(";
  os << "ct_transformer=\"" << ct_transformer << "\", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-punctuation-model-config.h
================================================
// sherpa-onnx/csrc/offline-punctuation-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflinePunctuationModelConfig {
  std::string ct_transformer;

  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  OfflinePunctuationModelConfig() = default;

  OfflinePunctuationModelConfig(const std::string &ct_transformer,
                                int32_t num_threads, bool debug,
                                const std::string &provider)
      : ct_transformer(ct_transformer),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-punctuation.cc
================================================
// sherpa-onnx/csrc/offline-punctuation.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-punctuation.h"

#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-punctuation-impl.h"

namespace sherpa_onnx {

void OfflinePunctuationConfig::Register(ParseOptions *po) {
  model.Register(po);
}

bool OfflinePunctuationConfig::Validate() const {
  if (!model.Validate()) {
    return false;
  }

  return true;
}

std::string OfflinePunctuationConfig::ToString() const {
  std::ostringstream os;

  os << "OfflinePunctuationConfig(";
  os << "model=" << model.ToString() << ")";

  return os.str();
}

OfflinePunctuation::OfflinePunctuation(const OfflinePunctuationConfig &config)
    : impl_(OfflinePunctuationImpl::Create(config)) {}

template <typename Manager>
OfflinePunctuation::OfflinePunctuation(Manager *mgr,
                                       const OfflinePunctuationConfig &config)
    : impl_(OfflinePunctuationImpl::Create(mgr, config)) {}

#if __ANDROID_API__ >= 9
template OfflinePunctuation::OfflinePunctuation(
    AAssetManager *mgr, const OfflinePunctuationConfig &config);
#endif

#if __OHOS__
template OfflinePunctuation::OfflinePunctuation(
    NativeResourceManager *mgr, const OfflinePunctuationConfig &config);
#endif

OfflinePunctuation::~OfflinePunctuation() = default;

std::string OfflinePunctuation::AddPunctuation(const std::string &text) const {
  return impl_->AddPunctuation(text);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-punctuation.h
================================================
// sherpa-onnx/csrc/offline-punctuation.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_H_
#define SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-punctuation-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflinePunctuationConfig {
  OfflinePunctuationModelConfig model;

  OfflinePunctuationConfig() = default;

  explicit OfflinePunctuationConfig(const OfflinePunctuationModelConfig &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

class OfflinePunctuationImpl;

class OfflinePunctuation {
 public:
  explicit OfflinePunctuation(const OfflinePunctuationConfig &config);

  template <typename Manager>
  OfflinePunctuation(Manager *mgr, const OfflinePunctuationConfig &config);

  ~OfflinePunctuation();

  // Add punctuation to the input text and return it.
  std::string AddPunctuation(const std::string &text) const;

 private:
  std::unique_ptr<OfflinePunctuationImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_PUNCTUATION_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-canary-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-canary-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_CANARY_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_CANARY_IMPL_H_

#include <algorithm>
#include <ios>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-canary-model.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/utils.h"

namespace sherpa_onnx {

class OfflineRecognizerCanaryImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerCanaryImpl(const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineCanaryModel>(config_.model_config)) {
    PostInit();
  }

  template <typename Manager>
  explicit OfflineRecognizerCanaryImpl(Manager *mgr,
                                       const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(
            std::make_unique<OfflineCanaryModel>(mgr, config_.model_config)) {
    PostInit();
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i < n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  void DecodeStream(OfflineStream *s) const {
    auto meta = model_->GetModelMetadata();
    auto enc_out = RunEncoder(s);
    Ort::Value enc_states = std::move(enc_out[0]);
    Ort::Value enc_mask = std::move(enc_out[2]);
    // enc_out[1] is discarded
    std::vector<int32_t> decoder_input = GetInitialDecoderInput();
    auto decoder_states = model_->GetInitialDecoderStates();
    Ort::Value logits{nullptr};

    for (int32_t i = 0; i < decoder_input.size(); ++i) {
      std::tie(logits, decoder_states) =
          RunDecoder(decoder_input[i], i, std::move(decoder_states),
                     View(&enc_states), View(&enc_mask));
    }

    int32_t max_token_id = GetMaxTokenId(&logits);
    int32_t eos = symbol_table_["<|endoftext|>"];

    int32_t num_feature_frames =
        enc_states.GetTensorTypeAndShapeInfo().GetShape()[1] *
        meta.subsampling_factor;

    std::vector<int32_t> tokens = {max_token_id};

    // Assume 30 tokens per second. It is to avoid the following for loop
    // running indefinitely.
    int32_t num_tokens =
        static_cast<int32_t>(num_feature_frames / 100.0 * 30) + 1;

    for (int32_t i = 1; i <= num_tokens; ++i) {
      if (tokens.back() == eos) {
        break;
      }

      std::tie(logits, decoder_states) =
          RunDecoder(tokens.back(), i, std::move(decoder_states),
                     View(&enc_states), View(&enc_mask));
      tokens.push_back(GetMaxTokenId(&logits));
    }

    // remove the last eos token
    tokens.pop_back();

    auto r = Convert(tokens);

    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));

    s->SetResult(r);
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

  void SetConfig(const OfflineRecognizerConfig &config) override {
    config_.model_config.canary.src_lang = config.model_config.canary.src_lang;
    config_.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang;
    config_.model_config.canary.use_pnc = config.model_config.canary.use_pnc;

    // we don't change the config_ in the base class
  }

 private:
  OfflineRecognitionResult Convert(const std::vector<int32_t> &tokens) const {
    OfflineRecognitionResult r;
    r.tokens.reserve(tokens.size());

    std::string text;
    for (auto i : tokens) {
      if (!symbol_table_.Contains(i)) {
        continue;
      }

      const auto &s = symbol_table_[i];
      text += s;
      r.tokens.push_back(s);
    }

    r.text = std::move(text);

    return r;
  }

  int32_t GetMaxTokenId(Ort::Value *logits) const {
    // logits is of shape (1, 1, vocab_size)
    auto meta = model_->GetModelMetadata();
    const float *p_logits = logits->GetTensorData<float>();

    int32_t max_token_id = static_cast<int32_t>(std::distance(
        p_logits, std::max_element(p_logits, p_logits + meta.vocab_size)));

    return max_token_id;
  }

  std::vector<Ort::Value> RunEncoder(OfflineStream *s) const {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = config_.feat_config.feature_dim;
    std::vector<float> f = s->GetFrames();

    int32_t num_frames = f.size() / feat_dim;

    std::array<int64_t, 3> shape = {1, num_frames, feat_dim};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
                                            shape.data(), shape.size());

    int64_t x_length_scalar = num_frames;
    std::array<int64_t, 1> x_length_shape = {1};
    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &x_length_scalar, 1,
                                 x_length_shape.data(), x_length_shape.size());
    return model_->ForwardEncoder(std::move(x), std::move(x_length));
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> RunDecoder(
      int32_t token, int32_t pos, std::vector<Ort::Value> decoder_states,
      Ort::Value enc_states, Ort::Value enc_mask) const {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> shape = {1, 2};
    std::array<int32_t, 2> _decoder_input = {token, pos};

    Ort::Value decoder_input = Ort::Value::CreateTensor(
        memory_info, _decoder_input.data(), _decoder_input.size(), shape.data(),
        shape.size());

    return model_->ForwardDecoder(std::move(decoder_input),
                                  std::move(decoder_states),
                                  std::move(enc_states), std::move(enc_mask));
  }

  // see
  // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/nemo/canary/test_180m_flash.py#L242
  std::vector<int32_t> GetInitialDecoderInput() const {
    auto canary_config = config_.model_config.canary;
    const auto &meta = model_->GetModelMetadata();

    std::vector<int32_t> decoder_input(9);
    decoder_input[0] = symbol_table_["<|startofcontext|>"];
    decoder_input[1] = symbol_table_["<|startoftranscript|>"];
    decoder_input[2] = symbol_table_["<|emo:undefined|>"];

    if (canary_config.src_lang.empty() ||
        !meta.lang2id.count(canary_config.src_lang)) {
      decoder_input[3] = meta.lang2id.at("en");
    } else {
      decoder_input[3] = meta.lang2id.at(canary_config.src_lang);
    }

    if (canary_config.tgt_lang.empty() ||
        !meta.lang2id.count(canary_config.tgt_lang)) {
      decoder_input[4] = meta.lang2id.at("en");
    } else {
      decoder_input[4] = meta.lang2id.at(canary_config.tgt_lang);
    }

    if (canary_config.use_pnc) {
      decoder_input[5] = symbol_table_["<|pnc|>"];
    } else {
      decoder_input[5] = symbol_table_["<|nopnc|>"];
    }

    decoder_input[6] = symbol_table_["<|noitn|>"];
    decoder_input[7] = symbol_table_["<|notimestamp|>"];
    decoder_input[8] = symbol_table_["<|nodiarize|>"];

    return decoder_input;
  }

 private:
  void PostInit() {
    auto &meta = model_->GetModelMetadata();
    config_.feat_config.feature_dim = meta.feat_dim;

    config_.feat_config.nemo_normalize_type = meta.normalize_type;

    config_.feat_config.dither = 0;
    config_.feat_config.remove_dc_offset = false;
    config_.feat_config.low_freq = 0;
    config_.feat_config.window_type = "hann";
    config_.feat_config.is_librosa = true;

    meta.lang2id["en"] = symbol_table_["<|en|>"];
    meta.lang2id["es"] = symbol_table_["<|es|>"];
    meta.lang2id["de"] = symbol_table_["<|de|>"];
    meta.lang2id["fr"] = symbol_table_["<|fr|>"];

    if (symbol_table_.NumSymbols() != meta.vocab_size) {
      SHERPA_ONNX_LOGE("number of lines in tokens.txt %d != %d (vocab_size)",
                       symbol_table_.NumSymbols(), meta.vocab_size);
      SHERPA_ONNX_EXIT(-1);
    }
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineCanaryModel> model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_CANARY_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_CTC_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_CTC_IMPL_H_

#include <ios>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-ctc-decoder.h"
#include "sherpa-onnx/csrc/offline-ctc-fst-decoder.h"
#include "sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/pad-sequence.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src,
                                 const SymbolTable &sym_table,
                                 int32_t frame_shift_ms,
                                 int32_t subsampling_factor) {
  OfflineRecognitionResult r;
  r.tokens.reserve(src.tokens.size());
  r.timestamps.reserve(src.timestamps.size());

  std::string text;

  for (int32_t i = 0; i != src.tokens.size(); ++i) {
    if (sym_table.Contains("SIL") && src.tokens[i] == sym_table["SIL"]) {
      // tdnn models from yesno have a SIL token, we should remove it.
      continue;
    }

    if (sym_table.Contains("</s>") && src.tokens[i] == sym_table["</s>"]) {
      // Skip </s> for Google MedASR
      continue;
    }
    auto sym = sym_table[src.tokens[i]];
    text.append(sym);

    if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
      // for bpe models with byte_fallback
      // (but don't rewrite printable characters 0x20..0x7e,
      //  which collide with standard BPE units)
      std::ostringstream os;
      os << "<0x" << std::hex << std::uppercase
         << (static_cast<int32_t>(sym[0]) & 0xff) << ">";
      sym = os.str();
    }

    r.tokens.push_back(std::move(sym));
  }

  if (sym_table.IsByteBpe()) {
    text = sym_table.DecodeByteBpe(text);
  }

  if (!text.empty() && text.front() == ' ') {
    text.erase(0, 1);
  }

  r.text = std::move(text);

  float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
  for (auto t : src.timestamps) {
    float time = frame_shift_s * t;
    r.timestamps.push_back(time);
  }

  r.words = std::move(src.words);

  return r;
}

class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerCtcImpl(const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(OfflineCtcModel::Create(config_.model_config)) {
    Init();
  }

  template <typename Manager>
  OfflineRecognizerCtcImpl(Manager *mgr, const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(OfflineCtcModel::Create(mgr, config_.model_config)) {
    Init();
  }

  void Init() {
    if (!config_.model_config.telespeech_ctc.empty()) {
      config_.feat_config.snip_edges = true;
      config_.feat_config.num_ceps = 40;
      config_.feat_config.feature_dim = 40;
      config_.feat_config.low_freq = 40;
      config_.feat_config.high_freq = -200;
      config_.feat_config.use_energy = false;
      config_.feat_config.normalize_samples = false;
      config_.feat_config.is_mfcc = true;
    }

    if (!config_.model_config.nemo_ctc.model.empty()) {
      if (model_->IsGigaAM()) {
        config_.feat_config.low_freq = 0;
        config_.feat_config.high_freq = 8000;
        config_.feat_config.remove_dc_offset = false;
        config_.feat_config.preemph_coeff = 0;
        config_.feat_config.window_type = "hann";
        config_.feat_config.feature_dim = 64;

        // see
        // https://github.com/salute-developers/GigaAM/blob/main/gigaam/preprocess.py#L68
        //
        // GigaAM uses n_fft 400
        config_.feat_config.round_to_power_of_two = false;
      } else {
        config_.feat_config.low_freq = 0;
        config_.feat_config.high_freq = 0;
        config_.feat_config.is_librosa = true;
        config_.feat_config.remove_dc_offset = false;
        config_.feat_config.window_type = "hann";
      }
    }

    if (!config_.model_config.dolphin.model.empty()) {
      config_.feat_config.low_freq = 0;
      config_.feat_config.high_freq = 8000;
      config_.feat_config.remove_dc_offset = false;
      config_.feat_config.dither = 0;
      config_.feat_config.preemph_coeff = 0;
      config_.feat_config.window_type = "hann";
      config_.feat_config.feature_dim = 80;
      config_.feat_config.is_librosa = true;
      config_.feat_config.frame_length_ms = 31.25;  // 16000/512 = 31.25
      config_.feat_config.snip_edges = false;
    }

    if (!config_.model_config.wenet_ctc.model.empty()) {
      // WeNet CTC models assume input samples are in the range
      // [-32768, 32767], so we set normalize_samples to false
      config_.feat_config.normalize_samples = false;
      config_.feat_config.dither = 1;
    }

    if (!config_.model_config.medasr.model.empty()) {
      config_.feat_config.low_freq = 125;
      config_.feat_config.high_freq = 7500;
      config_.feat_config.remove_dc_offset = false;
      config_.feat_config.dither = 0;
      config_.feat_config.preemph_coeff = 0;
      config_.feat_config.window_type = "hanning";
      config_.feat_config.feature_dim = 128;
      config_.feat_config.snip_edges = true;
    }

    if (!config_.model_config.fire_red_asr_ctc.model.empty()) {
      config_.feat_config.normalize_samples = false;
      config_.feat_config.high_freq = 0;
      config_.feat_config.snip_edges = true;
    }

    config_.feat_config.nemo_normalize_type =
        model_->FeatureNormalizationMethod();

    if (!config_.ctc_fst_decoder_config.graph.empty()) {
      // TODO(fangjun): Support android to read the graph from
      // asset_manager
      decoder_ = std::make_unique<OfflineCtcFstDecoder>(
          config_.ctc_fst_decoder_config);
    } else if (config_.decoding_method == "greedy_search") {
      if (!symbol_table_.Contains("<blk>") &&
          !symbol_table_.Contains("<eps>") &&
          !symbol_table_.Contains("<blank>") &&
          config_.model_config.omnilingual.model.empty()) {
        // for omnilingual asr, its blank id is 0
        SHERPA_ONNX_LOGE(
            "We expect that tokens.txt contains "
            "the symbol <blk> or <eps> or <blank> and its ID.");
        SHERPA_ONNX_EXIT(-1);
      }

      int32_t blank_id = 0;
      if (symbol_table_.Contains("<blk>")) {
        blank_id = symbol_table_["<blk>"];
      } else if (symbol_table_.Contains("<eps>")) {
        // for tdnn models of the yesno recipe from icefall
        blank_id = symbol_table_["<eps>"];
      } else if (symbol_table_.Contains("<blank>")) {
        // for Wenet CTC models
        blank_id = symbol_table_["<blank>"];
      }

      decoder_ = std::make_unique<OfflineCtcGreedySearchDecoder>(blank_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    if (config_.model_config.omnilingual.model.empty()) {
      return std::make_unique<OfflineStream>(config_.feat_config);
    } else {
      return std::make_unique<OfflineStream>(OmnilingualAsrTag{});
    }
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    if (!model_->SupportBatchProcessing() || (n == 1) ||
        !config_.model_config.omnilingual.model.empty()) {
      // If the model does not support batch processing,
      // we process each stream independently.
      //
      // omnilingual asr is disabled for batch processing at present
      for (int32_t i = 0; i != n; ++i) {
        DecodeStream(ss[i]);
      }
      return;
    }

    // Even if the omnilingual asr model can process batch input, the following
    // code does not support batching raw audio samples.

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = ss[0]->FeatureDim();

    std::vector<Ort::Value> features;
    features.reserve(n);

    std::vector<std::vector<float>> features_vec(n);
    std::vector<int64_t> features_length_vec(n);

    for (int32_t i = 0; i != n; ++i) {
      std::vector<float> f = ss[i]->GetFrames();

      int32_t num_frames = f.size() / feat_dim;

      model_->NormalizeFeatures(f.data(), num_frames, feat_dim);

      features_vec[i] = std::move(f);

      features_length_vec[i] = num_frames;

      std::array<int64_t, 2> shape = {num_frames, feat_dim};

      Ort::Value x = Ort::Value::CreateTensor(
          memory_info, features_vec[i].data(), features_vec[i].size(),
          shape.data(), shape.size());
      features.push_back(std::move(x));
    }  // for (int32_t i = 0; i != n; ++i)

    std::vector<const Ort::Value *> features_pointer(n);
    for (int32_t i = 0; i != n; ++i) {
      features_pointer[i] = &features[i];
    }

    std::array<int64_t, 1> features_length_shape = {n};
    Ort::Value x_length = Ort::Value::CreateTensor(
        memory_info, features_length_vec.data(), n,
        features_length_shape.data(), features_length_shape.size());

    Ort::Value x = PadSequence(model_->Allocator(), features_pointer,
                               -23.025850929940457f);
    auto t = model_->Forward(std::move(x), std::move(x_length));

    auto results = decoder_->Decode(std::move(t[0]), std::move(t[1]));

    int32_t frame_shift_ms = 10;
    for (int32_t i = 0; i != n; ++i) {
      auto r = Convert(results[i], symbol_table_, frame_shift_ms,
                       model_->SubsamplingFactor());
      r.text = ApplyInverseTextNormalization(std::move(r.text));
      r.text = ApplyHomophoneReplacer(std::move(r.text));
      ss[i]->SetResult(r);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  // Decode a single stream.
  // Some models do not support batch size > 1, e.g., WeNet CTC models.
  void DecodeStream(OfflineStream *s) const {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = s->FeatureDim();
    std::vector<float> f = s->GetFrames();

    int32_t num_frames = f.size() / feat_dim;

    model_->NormalizeFeatures(f.data(), num_frames, feat_dim);

    std::vector<int64_t> shape = {1, num_frames, feat_dim};
    if (!config_.model_config.omnilingual.model.empty()) {
      shape = {1, feat_dim};
    }

    Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
                                            shape.data(), shape.size());

    int64_t x_length_scalar = num_frames;
    std::array<int64_t, 1> x_length_shape = {1};
    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &x_length_scalar, 1,
                                 x_length_shape.data(), x_length_shape.size());

    auto t = model_->Forward(std::move(x), std::move(x_length));
    auto results = decoder_->Decode(std::move(t[0]), std::move(t[1]));
    int32_t frame_shift_ms = 10;

    if (!config_.model_config.omnilingual.model.empty()) {
      frame_shift_ms = 20;
    }

    auto r = Convert(results[0], symbol_table_, frame_shift_ms,
                     model_->SubsamplingFactor());
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    s->SetResult(r);
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineCtcModel> model_;
  std::unique_ptr<OfflineCtcDecoder> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_CTC_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-fire-red-asr-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-fire-red-asr-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_FIRE_RED_ASR_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_FIRE_RED_ASR_IMPL_H_

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/offline-fire-red-asr-decoder.h"
#include "sherpa-onnx/csrc/offline-fire-red-asr-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-fire-red-asr-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

static OfflineRecognitionResult Convert(
    const OfflineFireRedAsrDecoderResult &src, const SymbolTable &sym_table) {
  OfflineRecognitionResult r;
  r.tokens.reserve(src.tokens.size());

  std::string text;
  for (auto i : src.tokens) {
    if (!sym_table.Contains(i)) {
      continue;
    }

    const auto &s = sym_table[i];
    text += s;
    r.tokens.push_back(s);
  }

  r.text = std::move(text);

  return r;
}

class OfflineRecognizerFireRedAsrImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerFireRedAsrImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineFireRedAsrModel>(config.model_config)) {
    Init();
  }

  template <typename Manager>
  OfflineRecognizerFireRedAsrImpl(Manager *mgr,
                                  const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<OfflineFireRedAsrModel>(mgr,
                                                        config.model_config)) {
    Init();
  }

  void Init() {
    if (config_.decoding_method == "greedy_search") {
      decoder_ =
          std::make_unique<OfflineFireRedAsrGreedySearchDecoder>(model_.get());
    } else {
      SHERPA_ONNX_LOGE(
          "Only greedy_search is supported at present for FireRedAsr. Given %s",
          config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    const auto &meta_data = model_->GetModelMetadata();

    config_.feat_config.normalize_samples = false;
    config_.feat_config.high_freq = 0;
    config_.feat_config.snip_edges = true;
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    // batch decoding is not implemented yet
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void DecodeStream(OfflineStream *s) const {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = s->FeatureDim();
    std::vector<float> f = s->GetFrames();
    ApplyCMVN(&f);

    int64_t num_frames = f.size() / feat_dim;

    std::array<int64_t, 3> shape{1, num_frames, feat_dim};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
                                            shape.data(), shape.size());

    int64_t len_shape = 1;
    Ort::Value x_len =
        Ort::Value::CreateTensor(memory_info, &num_frames, 1, &len_shape, 1);

    auto cross_kv = model_->ForwardEncoder(std::move(x), std::move(x_len));

    auto results = decoder_->Decode(std::move(cross_kv.first),
                                    std::move(cross_kv.second), num_frames);

    auto r = Convert(results[0], symbol_table_);

    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    s->SetResult(r);
  }

  void ApplyCMVN(std::vector<float> *v) const {
    const auto &meta_data = model_->GetModelMetadata();
    const auto &mean_vec = meta_data.mean;
    const auto &inv_stddev_vec = meta_data.inv_stddev;
    int32_t feat_dim = static_cast<int32_t>(mean_vec.size());
    int32_t num_frames = static_cast<int32_t>(v->size()) / feat_dim;
    Eigen::Map<
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
        mat(v->data(), num_frames, feat_dim);
    Eigen::Map<const Eigen::RowVectorXf> mean(mean_vec.data(), feat_dim);
    Eigen::Map<const Eigen::RowVectorXf> inv_std(inv_stddev_vec.data(),
                                                 feat_dim);

    mat.array() =
        (mat.array().rowwise() - mean.array()).rowwise() * inv_std.array();
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineFireRedAsrModel> model_;
  std::unique_ptr<OfflineFireRedAsrDecoder> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_FIRE_RED_ASR_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-funasr-nano-impl.cc
================================================
// sherpa-onnx/csrc/offline-recognizer-funasr-nano-impl.cc
//
// Copyright (c)  2025  zengyw

#include "sherpa-onnx/csrc/offline-recognizer-funasr-nano-impl.h"

#include <algorithm>
#include <cctype>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <limits>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

namespace {
// Build cache_position tensor from attention_mask.
// Creates a [S] int64_t tensor where the first element is the starting position
// (pos0) for writing KV deltas. The remaining elements are consecutive
// positions [pos0, pos0+1, ..., pos0+S-1].
// For prefill: pos0 = 0, S = context_len
// For decode: pos0 = valid_len, S = 1 (mask_len = valid_len + 1)
static Ort::Value BuildCachePositionFromMask(const Ort::Value &attention_mask,
                                             int32_t seq_len,
                                             OrtAllocator *allocator) {
  auto mask_info = attention_mask.GetTensorTypeAndShapeInfo();
  auto mask_shape = mask_info.GetShape();

  // Get the current position from attention_mask length
  // mask_shape is [1, mask_len], where mask_len = past_len + seq_len
  int64_t pos0 = 0;
  if (mask_shape.size() == 2 && mask_shape[1] > 0) {
    // pos0 is the current position in cache (past length = mask_len - seq_len)
    pos0 = static_cast<int64_t>(mask_shape[1]) - seq_len;
  }
  if (pos0 < 0) pos0 = 0;

  // Create tensor using allocator
  std::array<int64_t, 1> pos_shape{seq_len};
  Ort::Value cache_position = Ort::Value::CreateTensor<int64_t>(
      allocator, pos_shape.data(), pos_shape.size());

  // Fill the tensor with position values
  int64_t *p = cache_position.GetTensorMutableData<int64_t>();
  for (int32_t i = 0; i < seq_len; ++i) {
    p[i] = pos0 + i;
  }

  return cache_position;
}

// Create attention_mask tensor view from pre-allocated buffer.
// Returns a tensor with shape [1, mask_len] (dynamic length).
static Ort::Value CreateAttentionMaskView(
    std::vector<int64_t> *attention_mask_vec, int32_t mask_len,
    const Ort::MemoryInfo &memory_info, bool update_new_pos = false) {
  if (update_new_pos && mask_len > 0) {
    (*attention_mask_vec)[mask_len - 1] = 1;
  }
  std::array<int64_t, 2> mask_shape{1, mask_len};
  return Ort::Value::CreateTensor<int64_t>(
      memory_info, attention_mask_vec->data(), static_cast<size_t>(mask_len),
      mask_shape.data(), mask_shape.size());
}

static inline void TrimInplace(std::string *s) {
  if (!s) return;
  auto &str = *s;
  auto not_space = [](unsigned char c) { return !std::isspace(c); };

  str.erase(str.begin(), std::find_if(str.begin(), str.end(), not_space));
  str.erase(std::find_if(str.rbegin(), str.rend(), not_space).base(),
            str.end());
}

static std::vector<std::string> ParseHotwordsCsv(const std::string &csv) {
  std::vector<std::string> out;
  std::string cur;
  cur.reserve(csv.size());

  for (size_t i = 0; i < csv.size(); ++i) {
    unsigned char ch = static_cast<unsigned char>(csv[i]);
    // Support both ASCII and Chinese separators
    // Check for Chinese comma (，) and semicolon (；) - UTF-8 encoding
    bool is_separator = false;
    if (ch == ',' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t') {
      is_separator = true;
    } else if (ch == 0xEF) {
      // Check for UTF-8 encoded Chinese comma (，) = EF BC 8C or semicolon (；)
      // = EF BC 9B. Otherwise consume full 3-byte sequence to avoid corrupting
      // other UTF-8 chars (e.g. 0xEF 0xBE 0xAD).
      if (i + 2 < csv.size()) {
        unsigned char ch1 = static_cast<unsigned char>(csv[i + 1]);
        unsigned char ch2 = static_cast<unsigned char>(csv[i + 2]);
        if (ch1 == 0xBC && (ch2 == 0x8C || ch2 == 0x9B)) {
          is_separator = true;
          i += 2;  // Skip the remaining UTF-8 bytes
        } else if (ch1 >= 0x80 && ch1 <= 0xBF && ch2 >= 0x80 && ch2 <= 0xBF) {
          cur.push_back(csv[i]);
          cur.push_back(csv[i + 1]);
          cur.push_back(csv[i + 2]);
          i += 2;
          continue;
        }
      }
    }

    if (is_separator) {
      TrimInplace(&cur);
      if (!cur.empty()) out.push_back(cur);
      cur.clear();
    } else {
      cur.push_back(csv[i]);
    }
  }
  TrimInplace(&cur);
  if (!cur.empty()) out.push_back(cur);
  return out;
}

static std::string JoinWithComma(const std::vector<std::string> &xs) {
  std::string s;
  for (size_t i = 0; i < xs.size(); ++i) {
    if (i) s += ", ";
    s += xs[i];
  }
  return s;
}

// Build user prompt based on hotwords, language, and itn settings.
static std::string BuildUserPrompt(const std::vector<std::string> &hotwords,
                                   const std::string *language, bool itn,
                                   const std::string *user_prompt) {
  const bool has_override =
      !hotwords.empty() || (language && !language->empty()) || !itn;
  if (user_prompt && !user_prompt->empty() && !has_override) {
    return *user_prompt;
  }

  std::string prefix;
  if (!hotwords.empty()) {
    std::string hw = JoinWithComma(hotwords);
    prefix =
        "请结合上下文信息，更加准确地完成语音转写任务。如果没有相关信息，我们会"
        "留空。\n\n\n"
        "**上下文信息：**\n\n\n";
    prefix += "热词列表：[" + hw + "]\n";
  }

  std::string task =
      (!language || language->empty()) ? "语音转写" : "语音转写成" + *language;
  if (!itn) {
    task += "，不进行文本规整";
  }
  task += "：";

  return prefix + task;
}

}  // namespace

OfflineRecognizerFunASRNanoImpl::OfflineRecognizerFunASRNanoImpl(
    const OfflineRecognizerConfig &config)
    : OfflineRecognizerImpl(config),
      config_(config),
      model_(std::make_unique<OfflineFunASRNanoModel>(config.model_config)),
      tokenizer_(std::make_unique<FunASRNanoTokenizer>(
          config.model_config.funasr_nano.tokenizer)),
      rng_(config.model_config.funasr_nano.seed) {
  InitFeatConfig();
}

template <typename Manager>
OfflineRecognizerFunASRNanoImpl::OfflineRecognizerFunASRNanoImpl(
    Manager *mgr, const OfflineRecognizerConfig &config)
    : OfflineRecognizerImpl(mgr, config),
      config_(config),
      model_(
          std::make_unique<OfflineFunASRNanoModel>(mgr, config.model_config)),
      tokenizer_(std::make_unique<FunASRNanoTokenizer>(
          mgr, config.model_config.funasr_nano.tokenizer)),
      rng_(config.model_config.funasr_nano.seed) {
  InitFeatConfig();
}

std::unique_ptr<OfflineStream> OfflineRecognizerFunASRNanoImpl::CreateStream()
    const {
  return std::make_unique<OfflineStream>(config_.feat_config);
}

// Initialize feature extraction configuration for FunASR-nano.
// Sets normalization, window type, and disables edge snipping and dithering
// to match the model's expected input format.
void OfflineRecognizerFunASRNanoImpl::InitFeatConfig() {
  config_.feat_config.normalize_samples = false;
  config_.feat_config.window_type = "hamming";
  config_.feat_config.snip_edges = false;
  config_.feat_config.dither = 0.0f;
}

// Apply Low Frame Rate (LFR) processing to reduce temporal resolution.
// Concatenates multiple consecutive frames into a single frame.
std::vector<float> OfflineRecognizerFunASRNanoImpl::ApplyLFR(
    const std::vector<float> &in) const {
  int32_t lfr_window_size = model_->LfrWindowSize();
  int32_t lfr_window_shift = model_->LfrWindowShift();
  int32_t in_feat_dim = config_.feat_config.feature_dim;
  int32_t in_num_frames = static_cast<int32_t>(in.size() / in_feat_dim);
  int32_t out_num_frames =
      (in_num_frames - lfr_window_size) / lfr_window_shift + 1;
  if (out_num_frames <= 0) return {};
  int32_t out_feat_dim = in_feat_dim * lfr_window_size;
  std::vector<float> out(out_num_frames * out_feat_dim);
  const float *p_in = in.data();
  float *p_out = out.data();
  for (int32_t i = 0; i != out_num_frames; ++i) {
    std::copy(p_in, p_in + out_feat_dim, p_out);
    p_out += out_feat_dim;
    p_in += lfr_window_shift * in_feat_dim;
  }
  return out;
}

// Build source token IDs with chat template format:
// [system_prompt] [user_prompt] [audio_tokens] [assistant_prompt]
// Returns the token sequence and sets fbank_beg_idx to the start position
// of audio tokens in the sequence.
std::vector<int64_t> OfflineRecognizerFunASRNanoImpl::BuildSourceIds(
    const std::string &system_prompt, const std::string &user_prompt,
    int32_t audio_token_len, int32_t &fbank_beg_idx,
    int32_t &fake_token_len) const {
  const std::string system_text =
      "<|im_start|>system\n" + system_prompt + "<|im_end|>\n";
  const std::string user_text = "<|im_start|>user\n" + user_prompt;
  const std::string after_text = "<|im_end|>\n<|im_start|>assistant\n";
  std::vector<int64_t> ids_before = tokenizer_->Encode(system_text + user_text);
  std::vector<int64_t> ids_after = tokenizer_->Encode(after_text);
  fbank_beg_idx = static_cast<int32_t>(ids_before.size());
  fake_token_len = audio_token_len;
  int64_t pad_id = tokenizer_->GetPadTokenId();
  if (pad_id < 0) pad_id = tokenizer_->GetEosTokenId();
  std::vector<int64_t> source_ids;
  source_ids.reserve(ids_before.size() + audio_token_len + ids_after.size());
  source_ids.insert(source_ids.end(), ids_before.begin(), ids_before.end());
  // Use pad tokens as placeholders for audio embeddings
  source_ids.insert(source_ids.end(), audio_token_len, pad_id);
  source_ids.insert(source_ids.end(), ids_after.begin(), ids_after.end());
  return source_ids;
}

// Sample token from logits using greedy decoding (argmax).
// Handles both FP16 and FP32 logits, skipping NaN/Inf values.
// Returns token ID 0 as fallback if all logits are invalid.
int64_t OfflineRecognizerFunASRNanoImpl::SampleTokenFromLogitsFp16OrFp32(
    const void *logits, bool is_fp16, int32_t vocab_size) const {
  int32_t best = 0;
  float best_val = -1e30f;
  bool found_valid = false;
  if (is_fp16) {
    const uint16_t *p = reinterpret_cast<const uint16_t *>(logits);
    for (int32_t i = 0; i < vocab_size; ++i) {
      float v = HalfBitsToFloat(p[i]);
      if (std::isfinite(v) && v > best_val) {
        best_val = v;
        best = i;
        found_valid = true;
      }
    }
  } else {
    const float *p = reinterpret_cast<const float *>(logits);
    for (int32_t i = 0; i < vocab_size; ++i) {
      if (std::isfinite(p[i]) && p[i] > best_val) {
        best_val = p[i];
        best = i;
        found_valid = true;
      }
    }
  }
  if (!found_valid) {
    return 0;
  }
  return static_cast<int64_t>(best);
}

// Sample token from logits using temperature and top-p (nucleus) sampling.
// Handles both FP16 and FP32 logits.
// Returns token ID 0 as fallback if all logits are invalid.
// If temperature is very small (<= 1e-6) or invalid, falls back to greedy
// decoding. If top_p >= 1.0, samples from all tokens without sorting (full
// vocabulary).
int64_t OfflineRecognizerFunASRNanoImpl::SampleTokenWithTemperatureAndTopP(
    const void *logits, bool is_fp16, int32_t vocab_size, float temperature,
    float top_p) const {
  if (temperature <= 1e-6f || !std::isfinite(temperature)) {
    return SampleTokenFromLogitsFp16OrFp32(logits, is_fp16, vocab_size);
  }

  if (!std::isfinite(top_p) || top_p <= 0.0f) {
    return SampleTokenFromLogitsFp16OrFp32(logits, is_fp16, vocab_size);
  }
  if (top_p > 1.0f) top_p = 1.0f;

  thread_local std::vector<float> probs;
  thread_local std::vector<int32_t> idx;

  probs.resize(vocab_size);
  idx.resize(vocab_size);

  float max_logit = -std::numeric_limits<float>::infinity();
  bool found_valid = false;

  if (is_fp16) {
    const uint16_t *p = reinterpret_cast<const uint16_t *>(logits);
    for (int32_t i = 0; i < vocab_size; ++i) {
      float v = HalfBitsToFloat(p[i]);
      if (std::isfinite(v)) {
        v /= temperature;
        probs[i] = v;
        if (v > max_logit) max_logit = v;
        found_valid = true;
      } else {
        probs[i] = -1e30f;
      }
      idx[i] = i;
    }
  } else {
    const float *p = reinterpret_cast<const float *>(logits);
    for (int32_t i = 0; i < vocab_size; ++i) {
      float v = p[i];
      if (std::isfinite(v)) {
        v /= temperature;
        probs[i] = v;
        if (v > max_logit) max_logit = v;
        found_valid = true;
      } else {
        probs[i] = -1e30f;
      }
      idx[i] = i;
    }
  }

  if (!found_valid) return 0;

  float sum_exp = 0.0f;
  for (int32_t i = 0; i < vocab_size; ++i) {
    float e = std::exp(probs[i] - max_logit);
    probs[i] = e;
    sum_exp += e;
  }
  if (sum_exp <= 0.0f || !std::isfinite(sum_exp)) return 0;
  for (int32_t i = 0; i < vocab_size; ++i) {
    probs[i] /= sum_exp;
  }

  if (top_p >= 1.0f) {
    std::uniform_real_distribution<float> dist(0.0f, 1.0f);
    float sample = dist(rng_);
    float cumsum = 0.0f;
    for (int32_t i = 0; i < vocab_size; ++i) {
      cumsum += probs[i];
      if (sample <= cumsum) return static_cast<int64_t>(i);
    }
    return static_cast<int64_t>(vocab_size - 1);
  }

  int32_t k = std::min<int32_t>(256, vocab_size);
  float cum_k = 0.0f;
  while (true) {
    std::partial_sort(
        idx.begin(), idx.begin() + k, idx.end(),
        [&](int32_t a, int32_t b) { return probs[a] > probs[b]; });

    cum_k = 0.0f;
    for (int32_t i = 0; i < k; ++i) cum_k += probs[idx[i]];

    if (cum_k >= top_p || k == vocab_size) break;

    int32_t new_k = std::min(vocab_size, k * 2);
    if (new_k == k) break;
    k = new_k;
  }

  float cumsum = 0.0f;
  int32_t cutoff = k;
  for (int32_t i = 0; i < k; ++i) {
    cumsum += probs[idx[i]];
    if (cumsum >= top_p) {
      cutoff = i + 1;
      break;
    }
  }

  float renorm_sum = 0.0f;
  for (int32_t i = 0; i < cutoff; ++i) renorm_sum += probs[idx[i]];
  if (renorm_sum <= 0.0f) return 0;

  std::uniform_real_distribution<float> dist(0.0f, renorm_sum);
  float sample = dist(rng_);
  float cumsum_sample = 0.0f;
  for (int32_t i = 0; i < cutoff; ++i) {
    cumsum_sample += probs[idx[i]];
    if (sample <= cumsum_sample) return static_cast<int64_t>(idx[i]);
  }
  return static_cast<int64_t>(idx[cutoff - 1]);
}

OfflineRecognitionResult OfflineRecognizerFunASRNanoImpl::GenerateText(
    Ort::Value encoder_out, const std::string &system_prompt,
    const std::string &user_prompt) const {
  OfflineRecognitionResult result;
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
  const auto &funasr_config = config_.model_config.funasr_nano;
  auto enc_shape = encoder_out.GetTensorTypeAndShapeInfo().GetShape();
  int32_t audio_token_len = static_cast<int32_t>(enc_shape[1]);
  int32_t hidden_size = static_cast<int32_t>(enc_shape[2]);
  int32_t fbank_beg_idx = 0;
  int32_t fake_token_len = 0;
  std::vector<int64_t> source_ids =
      BuildSourceIds(system_prompt, user_prompt, audio_token_len, fbank_beg_idx,
                     fake_token_len);
  int32_t context_len = static_cast<int32_t>(source_ids.size());

  // Create KV cache buffer [B, max_total_len, kv_h, hd].
  // This stores the accumulated KV cache. Model outputs are deltas that get
  // applied in-place.
  std::vector<std::pair<Ort::Value, Ort::Value>> cache_kv =
      model_->CreateEmptyKVCache(1);
  int32_t max_seq_len = model_->GetMaxTotalLen();
  if (max_seq_len <= 0) {
    SHERPA_ONNX_LOGE("Invalid max_seq_len=%d", max_seq_len);
    result.text = "";
    return result;
  }

  // If context exceeds KV capacity: prioritize truncating audio placeholders
  // (keep prompt scaffold intact).
  if (context_len > max_seq_len) {
    int32_t before_len = fbank_beg_idx;
    int32_t after_len = context_len - before_len - fake_token_len;
    if (after_len < 0) after_len = 0;

    int32_t keep_audio = max_seq_len - before_len - after_len;
    if (keep_audio < 0) {
      SHERPA_ONNX_LOGE(
          "Context_len (%d) too large for KV capacity (%d) and prompts already "
          "exceed capacity. "
          "Falling back to keep last %d tokens.",
          context_len, max_seq_len, max_seq_len);
      SHERPA_ONNX_LOGE(
          "The model max_total_len (%d) limits total context (prompt + audio "
          "tokens). Suggestions:",
          max_seq_len);
      SHERPA_ONNX_LOGE(
          "  1) Reduce hotwords: fewer or shorter hotwords shorten the "
          "prompt.");
      SHERPA_ONNX_LOGE(
          "  2) Shorten audio: use shorter clips so audio_token_len "
          "decreases.");
      SHERPA_ONNX_LOGE(
          "  3) Use a model with larger max_total_len: export with "
          "max_total_len>%d via scripts in "
          "https://github.com/Wasser1462/FunASR-nano-onnx , or download "
          "from https://modelscope.cn/models/zengshuishui/FunASR-nano-onnx/",
          max_seq_len);
      // Fallback: keep the suffix.
      source_ids.erase(source_ids.begin(), source_ids.end() - max_seq_len);
      // Audio alignment is no longer controllable, skip injecting audio
      // embeddings.
      fbank_beg_idx = -1;
      fake_token_len = 0;
      context_len = static_cast<int32_t>(source_ids.size());
    } else {
      if (keep_audio > audio_token_len) keep_audio = audio_token_len;

      SHERPA_ONNX_LOGE(
          "Context_len (%d) exceeds KV capacity (%d). Truncating audio "
          "placeholders: "
          "audio_token_len=%d -> keep_audio=%d (before=%d after=%d).",
          context_len, max_seq_len, audio_token_len, keep_audio, before_len,
          after_len);
      SHERPA_ONNX_LOGE(
          "The model max_total_len (%d) limits total context (prompt + audio "
          "tokens). Suggestions:",
          max_seq_len);
      SHERPA_ONNX_LOGE(
          "  1) Reduce hotwords: fewer or shorter hotwords shorten the "
          "prompt.");
      SHERPA_ONNX_LOGE(
          "  2) Shorten audio: use shorter clips so audio_token_len "
          "decreases.");
      SHERPA_ONNX_LOGE(
          "  3) Use a model with larger max_total_len: export with "
          "max_total_len>%d via scripts in "
          "https://github.com/Wasser1462/FunASR-nano-onnx , or download "
          "from https://modelscope.cn/models/zengshuishui/FunASR-nano-onnx/",
          max_seq_len);

      // Rebuild ids_before/ids_after using slices.
      std::vector<int64_t> ids_before(source_ids.begin(),
                                      source_ids.begin() + before_len);
      std::vector<int64_t> ids_after(source_ids.end() - after_len,
                                     source_ids.end());

      int64_t pad_id = tokenizer_->GetPadTokenId();
      if (pad_id < 0) pad_id = tokenizer_->GetEosTokenId();

      source_ids.clear();
      source_ids.reserve(before_len + keep_audio + after_len);
      source_ids.insert(source_ids.end(), ids_before.begin(), ids_before.end());
      source_ids.insert(source_ids.end(), keep_audio, pad_id);
      source_ids.insert(source_ids.end(), ids_after.begin(), ids_after.end());

      fake_token_len = keep_audio;
      fbank_beg_idx = before_len;
      context_len = static_cast<int32_t>(source_ids.size());
    }
  }

  // Get text embeddings for the prompt tokens
  std::vector<int64_t> input_ids = source_ids;
  std::array<int64_t, 2> ids_shape{1, context_len};
  Ort::Value input_ids_tensor =
      Ort::Value::CreateTensor(memory_info, input_ids.data(), input_ids.size(),
                               ids_shape.data(), ids_shape.size());

  Ort::Value text_embeds =
      model_->ForwardEmbedding(std::move(input_ids_tensor));

  auto te_info = text_embeds.GetTensorTypeAndShapeInfo();
  const auto te_type = te_info.GetElementType();
  const bool te_fp16 = (te_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);

  // Allocate inputs_embeds only for prefill (context_len * hidden_size).
  // Decode steps will use a separate reusable buffer.
  std::vector<float> inputs_embeds_fp32(
      static_cast<size_t>(context_len) * hidden_size, 0.0f);

  // Copy text embeddings.
  if (te_fp16) {
    const uint16_t *p = text_embeds.GetTensorData<uint16_t>();
    const size_t total = static_cast<size_t>(context_len) * hidden_size;
    for (size_t i = 0; i < total; ++i) {
      inputs_embeds_fp32[i] = HalfBitsToFloat(p[i]);
    }
  } else {
    const float *p = text_embeds.GetTensorData<float>();
    const size_t total = static_cast<size_t>(context_len) * hidden_size;
    std::memcpy(inputs_embeds_fp32.data(), p, total * sizeof(float));
  }

  // Inject audio embeddings into placeholder region (if alignment is still
  // possible).
  auto enc_info2 = encoder_out.GetTensorTypeAndShapeInfo();
  auto enc_et =
      static_cast<ONNXTensorElementDataType>(enc_info2.GetElementType());
  int32_t copy_len = std::min(fake_token_len, audio_token_len);

  if (copy_len > 0 && fbank_beg_idx >= 0) {
    if (enc_et == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16) {
      const uint16_t *enc = encoder_out.GetTensorData<uint16_t>();
      const size_t hidden_size_u = static_cast<size_t>(hidden_size);
      for (int32_t t = 0; t < copy_len; ++t) {
        const uint16_t *src = enc + static_cast<size_t>(t) * hidden_size_u;
        float *dst = inputs_embeds_fp32.data() +
                     static_cast<size_t>(fbank_beg_idx + t) * hidden_size_u;
        for (size_t d = 0; d < hidden_size_u; ++d) {
          dst[d] = HalfBitsToFloat(src[d]);
        }
      }
    } else if (enc_et == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) {
      const float *enc = encoder_out.GetTensorData<float>();
      const size_t hidden_size_u = static_cast<size_t>(hidden_size);
      for (int32_t t = 0; t < copy_len; ++t) {
        const float *src = enc + static_cast<size_t>(t) * hidden_size_u;
        float *dst = inputs_embeds_fp32.data() +
                     static_cast<size_t>(fbank_beg_idx + t) * hidden_size_u;
        std::memcpy(dst, src, hidden_size_u * sizeof(float));
      }
    } else {
      SHERPA_ONNX_LOGE("encoder_out elem_type=%d not supported", (int)enc_et);
      result.text = "";
      return result;
    }
  }

  // Pre-allocate attention_mask buffer to avoid per-step allocations
  std::vector<int64_t> attention_mask_vec(static_cast<size_t>(max_seq_len), 0);
  // Initialize first context_len positions to 1 for prefill
  std::fill(attention_mask_vec.begin(),
            attention_mask_vec.begin() + context_len, 1);

  // Pre-allocate reusable buffer for decode step embeddings (hidden_size)
  std::vector<float> next_embed_fp32(static_cast<size_t>(hidden_size));

  int32_t valid_len = context_len;

  std::vector<int64_t> generated_ids;
  generated_ids.reserve(funasr_config.max_new_tokens);

  const int64_t eos_id = tokenizer_->GetEosTokenId();
  const int64_t im_end_id = tokenizer_->GetImEndTokenId();
  const int32_t max_new_tokens = funasr_config.max_new_tokens;

  bool is_first_step = true;

  for (int32_t step = 0; step < max_new_tokens; ++step) {
    // valid_len represents the mask_len for the next decode step (= past +
    // current).
    if (valid_len >= max_seq_len) break;

    Ort::Value logits{nullptr};

    if (is_first_step) {
      // Prefill: seq = context_len, mask_len = context_len.
      if (config_.model_config.debug) {
        SHERPA_ONNX_LOGE(
            "GenerateText: starting prefill with context_len=%d, "
            "inputs_embeds_fp32.size()=%zu",
            context_len, inputs_embeds_fp32.size());
      }

      std::array<int64_t, 3> embeds_shape{1, context_len, hidden_size};
      Ort::Value inputs_embeds_tensor = Ort::Value::CreateTensor<float>(
          memory_info, inputs_embeds_fp32.data(),
          static_cast<size_t>(context_len) * hidden_size, embeds_shape.data(),
          embeds_shape.size());

      // Use pre-allocated attention_mask buffer (first context_len positions
      // already set to 1)
      Ort::Value attention_mask_view = CreateAttentionMaskView(
          &attention_mask_vec, context_len, memory_info, false);

      Ort::Value cache_position = BuildCachePositionFromMask(
          attention_mask_view, context_len, model_->Allocator());

      auto tmp = model_->ForwardLLM(std::move(inputs_embeds_tensor),
                                    std::move(attention_mask_view),
                                    cache_position, cache_kv);
      logits = std::move(tmp.first);
      auto kv_outputs = std::move(tmp.second);

      // Apply KV deltas to cache buffer in-place.
      // kv_outputs contains deltas that update cache_kv at positions specified
      // by cache_position.
      model_->ApplyKvDeltaInplace(&cache_kv, kv_outputs, cache_position);

    } else {
      // Decode: seq = 1, mask_len = valid_len + 1 (past + current)
      int64_t last_token_id = generated_ids.back();
      std::vector<int64_t> one_id{last_token_id};
      std::array<int64_t, 2> one_shape{1, 1};
      Ort::Value one_tensor =
          Ort::Value::CreateTensor(memory_info, one_id.data(), one_id.size(),
                                   one_shape.data(), one_shape.size());

      Ort::Value next_embed = model_->ForwardEmbedding(std::move(one_tensor));
      auto ne_info = next_embed.GetTensorTypeAndShapeInfo();
      bool ne_fp16 =
          (ne_info.GetElementType() == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);

      // Reuse pre-allocated buffer for decode step embedding
      if (ne_fp16) {
        const uint16_t *src = next_embed.GetTensorData<uint16_t>();
        for (size_t d = 0; d < static_cast<size_t>(hidden_size); ++d) {
          next_embed_fp32[d] = HalfBitsToFloat(src[d]);
        }
      } else {
        const float *src = next_embed.GetTensorData<float>();
        std::memcpy(next_embed_fp32.data(), src,
                    static_cast<size_t>(hidden_size) * sizeof(float));
      }

      std::array<int64_t, 3> embeds_shape{1, 1, hidden_size};
      Ort::Value inputs_embeds_tensor = Ort::Value::CreateTensor<float>(
          memory_info, next_embed_fp32.data(), static_cast<size_t>(hidden_size),
          embeds_shape.data(), embeds_shape.size());

      // mask_len must equal kv_seq_len (= past + current = valid_len + 1).
      // Use pre-allocated attention_mask buffer, update new position to 1
      int32_t mask_len = valid_len + 1;
      Ort::Value attention_mask_view = CreateAttentionMaskView(
          &attention_mask_vec, mask_len, memory_info, true);

      Ort::Value cache_position = BuildCachePositionFromMask(
          attention_mask_view, 1, model_->Allocator());

      auto tmp = model_->ForwardLLM(std::move(inputs_embeds_tensor),
                                    std::move(attention_mask_view),
                                    cache_position, cache_kv);
      logits = std::move(tmp.first);
      auto kv_outputs = std::move(tmp.second);

      // Apply KV deltas to cache buffer in-place.
      model_->ApplyKvDeltaInplace(&cache_kv, kv_outputs, cache_position);
    }

    auto log_info = logits.GetTensorTypeAndShapeInfo();
    auto log_shape = log_info.GetShape();

    // logits are [B, S, V]. Always pick the last available step.
    if (log_shape.size() < 3) {
      SHERPA_ONNX_LOGE("Unexpected logits rank=%zu", log_shape.size());
      result.text = "";
      return result;
    }

    int32_t time_dim = static_cast<int32_t>(log_shape[1]);
    int32_t vocab_size = static_cast<int32_t>(log_shape[2]);
    if (time_dim <= 0 || vocab_size <= 0) {
      SHERPA_ONNX_LOGE("Invalid logits shape [%d,%d,%d]",
                       static_cast<int32_t>(log_shape[0]),
                       static_cast<int32_t>(log_shape[1]),
                       static_cast<int32_t>(log_shape[2]));
      result.text = "";
      return result;
    }

    const bool log_fp16 =
        (log_info.GetElementType() == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);

    int32_t last_idx = time_dim - 1;

    const void *base = nullptr;
    if (log_fp16)
      base = logits.GetTensorData<uint16_t>();
    else
      base = logits.GetTensorData<float>();

    const size_t offset = static_cast<size_t>(last_idx) * vocab_size;
    const void *last_logits =
        log_fp16 ? static_cast<const void *>(
                       reinterpret_cast<const uint16_t *>(base) + offset)
                 : static_cast<const void *>(
                       reinterpret_cast<const float *>(base) + offset);

    int64_t next_id = SampleTokenWithTemperatureAndTopP(
        last_logits, log_fp16, vocab_size, funasr_config.temperature,
        funasr_config.top_p);

    if (next_id == eos_id || next_id == im_end_id) break;

    generated_ids.push_back(next_id);

    if (is_first_step) is_first_step = false;

    // valid_len represents the kv_seq_len for the next decode step.
    valid_len += 1;
  }

  result.text = tokenizer_->Decode(generated_ids);

  if (funasr_config.itn) {
    result.text = ApplyInverseTextNormalization(std::move(result.text));
    result.text = ApplyHomophoneReplacer(std::move(result.text));
  }

  if (config_.model_config.debug) {
    SHERPA_ONNX_LOGE("GenerateText: generated %zu tokens: %s",
                     generated_ids.size(), result.text.c_str());
    std::string token_str;
    for (size_t i = 0; i < generated_ids.size() && i < 10; ++i) {
      if (i > 0) token_str += ",";
      token_str += std::to_string(generated_ids[i]);
    }
    SHERPA_ONNX_LOGE("GenerateText: token ids: %s%s", token_str.c_str(),
                     generated_ids.size() > 10 ? "..." : "");
  }

  if (!generated_ids.empty()) {
    result.tokens.reserve(generated_ids.size());
    std::string pending_bytes;
    for (int64_t token_id : generated_ids) {
      // Use GetTokenStringStreaming() to handle cross-token UTF-8 sequences
      // This properly handles cases where a single character is split across
      // multiple BPE tokens
      std::string s =
          tokenizer_->GetTokenStringStreaming(token_id, &pending_bytes);
      result.tokens.push_back(std::move(s));
    }

    if (!pending_bytes.empty() && !result.tokens.empty()) {
      // Handle any remaining bytes from the last token, treating them as
      // invalid.
      std::string replacement_chars;
      replacement_chars.reserve(pending_bytes.size() * 3);
      for (size_t i = 0; i < pending_bytes.size(); ++i) {
        replacement_chars.append("\xEF\xBF\xBD");
      }
      result.tokens.back().append(replacement_chars);
    }

    // Calculate timestamps based on effective audio coverage duration
    // Use copy_len (actual injected audio token count) to determine
    result.timestamps.reserve(generated_ids.size());
    if (fbank_beg_idx >= 0 && copy_len > 0 && !generated_ids.empty()) {
      float frame_shift_ms = config_.feat_config.frame_shift_ms;

      int32_t lfr_shift = model_->LfrWindowShift();
      float token_time_sec =
          frame_shift_ms * static_cast<float>(lfr_shift) / 1000.0f;

      float effective_audio_duration =
          static_cast<float>(copy_len) * token_time_sec;

      if (effective_audio_duration > 0) {
        if (generated_ids.size() == 1) {
          result.timestamps.push_back(effective_audio_duration / 2.0f);
        } else {
          // Distribute timestamps evenly across effective_audio_duration
          // Use (size - 1) so the last timestamp equals
          // effective_audio_duration
          float time_per_token = effective_audio_duration /
                                 static_cast<float>(generated_ids.size() - 1);
          for (size_t i = 0; i < generated_ids.size(); ++i) {
            result.timestamps.push_back(static_cast<float>(i) * time_per_token);
          }
        }
      }
    }
  }

  return result;
}

// Decode multiple audio streams in batch.
// Applies LFR processing, runs encoder, and generates text for each stream.
void OfflineRecognizerFunASRNanoImpl::DecodeStreams(OfflineStream **ss,
                                                    int32_t n) const {
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
  const auto &funasr_config = config_.model_config.funasr_nano;
  for (int32_t i = 0; i != n; ++i) {
    std::vector<float> f = ss[i]->GetFrames();
    f = ApplyLFR(f);
    int32_t num_frames = static_cast<int32_t>(
        f.size() / (config_.feat_config.feature_dim * model_->LfrWindowSize()));
    if (num_frames <= 0) {
      OfflineRecognitionResult r;
      r.text = "";
      ss[i]->SetResult(r);
      continue;
    }

    std::array<int64_t, 3> shape{1, num_frames,
                                 static_cast<int64_t>(f.size() / num_frames)};

    Ort::Value features = Ort::Value::CreateTensor<float>(
        memory_info, const_cast<float *>(f.data()), f.size(), shape.data(),
        shape.size());

    Ort::Value encoder_out = model_->ForwardEncoderAdaptor(std::move(features));

    // Parse hotwords parameter
    std::vector<std::string> hotwords =
        ParseHotwordsCsv(funasr_config.hotwords);

    // language is empty means None
    const std::string *lang_ptr =
        funasr_config.language.empty() ? nullptr : &funasr_config.language;

    // Build user prompt: respect funasr_config.user_prompt; merge with
    // hotwords/language/itn when provided.
    std::string user_prompt_dyn = BuildUserPrompt(
        hotwords, lang_ptr, funasr_config.itn, &funasr_config.user_prompt);

    if (config_.model_config.debug) {
      SHERPA_ONNX_LOGE(
          "DecodeStreams: hotwords=%zu, language=%s, itn=%d", hotwords.size(),
          funasr_config.language.empty() ? "(empty)"
                                         : funasr_config.language.c_str(),
          funasr_config.itn ? 1 : 0);
      SHERPA_ONNX_LOGE("DecodeStreams: user_prompt_dyn=%s",
                       user_prompt_dyn.c_str());
    }

    OfflineRecognitionResult r = GenerateText(
        std::move(encoder_out), funasr_config.system_prompt, user_prompt_dyn);

    ss[i]->SetResult(r);
  }
}

#if __ANDROID_API__ >= 9
template OfflineRecognizerFunASRNanoImpl::OfflineRecognizerFunASRNanoImpl(
    AAssetManager *mgr, const OfflineRecognizerConfig &config);
#endif

#if __OHOS__
template OfflineRecognizerFunASRNanoImpl::OfflineRecognizerFunASRNanoImpl(
    NativeResourceManager *mgr, const OfflineRecognizerConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-funasr-nano-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-funasr-nano-impl.h
//
// Copyright (c)  2025  zengyw

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_FUNASR_NANO_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_FUNASR_NANO_IMPL_H_

#include <algorithm>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/funasr-nano-tokenizer.h"
#include "sherpa-onnx/csrc/offline-funasr-nano-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/pad-sequence.h"

namespace sherpa_onnx {

class OfflineRecognizerFunASRNanoImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerFunASRNanoImpl(
      const OfflineRecognizerConfig &config);

  template <typename Manager>
  OfflineRecognizerFunASRNanoImpl(Manager *mgr,
                                  const OfflineRecognizerConfig &config);

  std::unique_ptr<OfflineStream> CreateStream() const override;

  void DecodeStreams(OfflineStream **ss, int32_t n) const override;

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void InitFeatConfig();
  std::vector<float> ApplyLFR(const std::vector<float> &in) const;

  std::vector<int64_t> BuildSourceIds(const std::string &system_prompt,
                                      const std::string &user_prompt,
                                      int32_t audio_token_len,
                                      int32_t &fbank_beg_idx,
                                      int32_t &fake_token_len) const;

  int64_t SampleTokenFromLogitsFp16OrFp32(const void *logits,
                                         bool is_fp16,
                                         int32_t vocab_size) const;

  int64_t SampleTokenWithTemperatureAndTopP(const void *logits,
                                            bool is_fp16,
                                            int32_t vocab_size,
                                            float temperature,
                                            float top_p) const;

  OfflineRecognitionResult GenerateText(Ort::Value encoder_out,
                                       const std::string &system_prompt,
                                       const std::string &user_prompt) const;

  OfflineRecognizerConfig config_;
  std::unique_ptr<OfflineFunASRNanoModel> model_;
  std::unique_ptr<FunASRNanoTokenizer> tokenizer_;
  mutable std::mt19937 rng_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_FUNASR_NANO_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-impl.cc
================================================
// sherpa-onnx/csrc/offline-recognizer-impl.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-recognizer-impl.h"

#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9

#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer-canary-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-ctc-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-fire-red-asr-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-funasr-nano-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-moonshine-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-moonshine-v2-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-paraformer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-paraformer-tpl-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-sense-voice-tpl-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-transducer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-transducer-nemo-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-whisper-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer-whisper-tpl-impl.h"
#include "sherpa-onnx/csrc/text-utils.h"

#if SHERPA_ONNX_ENABLE_RKNN
#include "sherpa-onnx/csrc/rknn/offline-paraformer-model-rknn.h"
#include "sherpa-onnx/csrc/rknn/offline-sense-voice-model-rknn.h"
#endif

#if SHERPA_ONNX_ENABLE_AXERA
#include "sherpa-onnx/csrc/axera/offline-sense-voice-model-axera.h"
#endif

#if SHERPA_ONNX_ENABLE_AXCL
#include "sherpa-onnx/csrc/axcl/offline-sense-voice-model-axcl.h"
#endif

#if SHERPA_ONNX_ENABLE_ASCEND_NPU
#include "sherpa-onnx/csrc/ascend/offline-paraformer-model-ascend.h"
#include "sherpa-onnx/csrc/ascend/offline-recognizer-zipformer-ctc-ascend-impl.h"
#include "sherpa-onnx/csrc/ascend/offline-sense-voice-model-ascend.h"
#include "sherpa-onnx/csrc/ascend/offline-whisper-model-ascend.h"
#endif

#if SHERPA_ONNX_ENABLE_QNN
#include "sherpa-onnx/csrc/qnn/offline-paraformer-model-qnn.h"
#include "sherpa-onnx/csrc/qnn/offline-recognizer-zipformer-ctc-qnn-impl.h"
#include "sherpa-onnx/csrc/qnn/offline-sense-voice-model-qnn.h"
#endif

namespace sherpa_onnx {

std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
    const OfflineRecognizerConfig &config) {
  if (config.model_config.provider == "rknn") {
#if SHERPA_ONNX_ENABLE_RKNN
    if (!config.model_config.sense_voice.model.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelRknn>>(
          config);
    } else if (!config.model_config.paraformer.model.empty()) {
      return std::make_unique<
          OfflineRecognizerParaformerTplImpl<OfflineParaformerModelRknn>>(
          config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice and Paraformer models are currently supported "
          "by rknn for non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_RKNN=ON if you "
        "want to use rknn. See also "
        "https://k2-fsa.github.io/sherpa/onnx/rknn/install.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (config.model_config.provider == "axera") {
#if SHERPA_ONNX_ENABLE_AXERA
    if (!config.model_config.sense_voice.model.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelAxera>>(
          config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice models are currently supported by Axera NPU for "
          "non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_AXERA=ON if you "
        "want to use axera. See also "
        "https://k2-fsa.github.io/sherpa/onnx/axera/install.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (config.model_config.provider == "axcl") {
#if SHERPA_ONNX_ENABLE_AXCL
    if (!config.model_config.sense_voice.model.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelAxcl>>(
          config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice models are currently supported by axcl for "
          "non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }

#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_AXCL=ON if you "
        "want to use axcl. See also "
        "https://k2-fsa.github.io/sherpa/onnx/axcl/install.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (config.model_config.provider == "ascend") {
#if SHERPA_ONNX_ENABLE_ASCEND_NPU
    if (!config.model_config.sense_voice.model.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelAscend>>(
          config);
    } else if (!config.model_config.paraformer.model.empty()) {
      return std::make_unique<
          OfflineRecognizerParaformerTplImpl<OfflineParaformerModelAscend>>(
          config);
    } else if (!config.model_config.zipformer_ctc.model.empty()) {
      return std::make_unique<OfflineRecognizerZipformerCtcAscendImpl>(config);
    } else if (!config.model_config.whisper.encoder.empty()) {
      return std::make_unique<
          OfflineRecognizerWhisperTplImpl<OfflineWhisperModelAscend>>(config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice, Paraformer, Whisper, and Zipformer CTC models are "
          "currently supported by Ascend NPU for non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_ASCEND_NPU=ON if "
        "you want to use Ascend NPU. See also "
        "https://k2-fsa.github.io/sherpa/onnx/ascend/install.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (config.model_config.provider == "qnn") {
#if SHERPA_ONNX_ENABLE_QNN
    if (!config.model_config.sense_voice.model.empty() ||
        !config.model_config.sense_voice.qnn_config.context_binary.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelQnn>>(
          config);
    } else if (!config.model_config.zipformer_ctc.model.empty() ||
               !config.model_config.zipformer_ctc.qnn_config.context_binary
                    .empty()) {
      return std::make_unique<OfflineRecognizerZipformerCtcQnnImpl>(config);
    } else if (!config.model_config.paraformer.model.empty() ||
               !config.model_config.paraformer.qnn_config.context_binary
                    .empty()) {
      return std::make_unique<
          OfflineRecognizerParaformerTplImpl<OfflineParaformerModelQnn>>(
          config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice, Paraformer, and Zipformer CTC models are currently "
          "supported by QNN for non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_QNN=ON if "
        "you want to use qnn. See also "
        "https://k2-fsa.github.io/sherpa/onnx/qnn/build.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (!config.model_config.sense_voice.model.empty()) {
    return std::make_unique<OfflineRecognizerSenseVoiceImpl>(config);
  }

  if (!config.model_config.funasr_nano.encoder_adaptor.empty()) {
    return std::make_unique<OfflineRecognizerFunASRNanoImpl>(config);
  }

  if (!config.model_config.paraformer.model.empty()) {
    return std::make_unique<OfflineRecognizerParaformerImpl>(config);
  }

  if (!config.model_config.nemo_ctc.model.empty() ||
      !config.model_config.zipformer_ctc.model.empty() ||
      !config.model_config.tdnn.model.empty() ||
      !config.model_config.wenet_ctc.model.empty() ||
      !config.model_config.omnilingual.model.empty() ||
      !config.model_config.medasr.model.empty() ||
      !config.model_config.fire_red_asr_ctc.model.empty() ||
      !config.model_config.dolphin.model.empty()) {
    return std::make_unique<OfflineRecognizerCtcImpl>(config);
  }

  if (!config.model_config.whisper.encoder.empty()) {
    return std::make_unique<OfflineRecognizerWhisperImpl>(config);
  }

  if (!config.model_config.fire_red_asr.encoder.empty()) {
    return std::make_unique<OfflineRecognizerFireRedAsrImpl>(config);
  }

  if (!config.model_config.moonshine.preprocessor.empty()) {
    return std::make_unique<OfflineRecognizerMoonshineImpl>(config);
  }

  if (!config.model_config.moonshine.merged_decoder.empty()) {
    return std::make_unique<OfflineRecognizerMoonshineV2Impl>(config);
  }

  if (!config.model_config.canary.encoder.empty()) {
    return std::make_unique<OfflineRecognizerCanaryImpl>(config);
  }

  // TODO(fangjun): Refactor it. We only need to use model type for the
  // following models:
  //  1. transducer and nemo_transducer
  if (!config.model_config.model_type.empty()) {
    const auto &model_type = config.model_config.model_type;
    if (model_type == "transducer") {
      return std::make_unique<OfflineRecognizerTransducerImpl>(config);
    } else if (model_type == "nemo_transducer") {
      return std::make_unique<OfflineRecognizerTransducerNeMoImpl>(config);
    } else if (model_type == "paraformer") {
      return std::make_unique<OfflineRecognizerParaformerImpl>(config);
    } else if (model_type == "nemo_ctc" || model_type == "tdnn" ||
               model_type == "zipformer2_ctc" || model_type == "wenet_ctc" ||
               model_type == "telespeech_ctc") {
      return std::make_unique<OfflineRecognizerCtcImpl>(config);
    } else if (model_type == "whisper") {
      // unreachable
      return std::make_unique<OfflineRecognizerWhisperImpl>(config);
    } else if (model_type == "moonshine") {
      // unreachable
      return std::make_unique<OfflineRecognizerMoonshineImpl>(config);
    } else {
      SHERPA_ONNX_LOGE(
          "Invalid model_type: %s. Trying to load the model to get its type",
          model_type.c_str());
    }
  }

  Ort::Env env(ORT_LOGGING_LEVEL_ERROR);

  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  std::string model_filename;
  if (!config.model_config.transducer.encoder_filename.empty()) {
    model_filename = config.model_config.transducer.encoder_filename;
  } else if (!config.model_config.paraformer.model.empty()) {
    model_filename = config.model_config.paraformer.model;
  } else if (!config.model_config.nemo_ctc.model.empty()) {
    model_filename = config.model_config.nemo_ctc.model;
  } else if (!config.model_config.telespeech_ctc.empty()) {
    model_filename = config.model_config.telespeech_ctc;
  } else if (!config.model_config.tdnn.model.empty()) {
    model_filename = config.model_config.tdnn.model;
  } else if (!config.model_config.zipformer_ctc.model.empty()) {
    model_filename = config.model_config.zipformer_ctc.model;
  } else if (!config.model_config.wenet_ctc.model.empty()) {
    model_filename = config.model_config.wenet_ctc.model;
  } else if (!config.model_config.whisper.encoder.empty()) {
    model_filename = config.model_config.whisper.encoder;
  } else {
    SHERPA_ONNX_LOGE("Please provide a model");
    SHERPA_ONNX_EXIT(-1);
  }

  auto buf = ReadFile(model_filename);

  auto encoder_sess =
      std::make_unique<Ort::Session>(env, buf.data(), buf.size(), sess_opts);

  Ort::ModelMetadata meta_data = encoder_sess->GetModelMetadata();

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

  auto model_type =
      LookupCustomModelMetaData(meta_data, "model_type", allocator);
  if (model_type.empty()) {
    SHERPA_ONNX_LOGE(
        "No model_type in the metadata!\n\n"
        "Please refer to the following URLs to add metadata"
        "\n"
        "(0) Transducer models from icefall"
        "\n    "
        "https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/"
        "pruned_transducer_stateless7/export-onnx.py#L303"
        "\n"
        "(1) Nemo CTC models\n    "
        "https://huggingface.co/csukuangfj/"
        "sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py"
        "\n"
        "(2) Paraformer"
        "\n    "
        "https://huggingface.co/csukuangfj/"
        "paraformer-onnxruntime-python-example/blob/main/add-model-metadata.py"
        "\n    "
        "(3) Whisper"
        "\n    "
        "(4) Tdnn models of the yesno recipe from icefall"
        "\n    "
        "https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR/tdnn"
        "\n"
        "(5) Zipformer CTC models from icefall"
        "\n    "
        "https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/"
        "zipformer/export-onnx-ctc.py"
        "\n"
        "(6) CTC models from WeNet"
        "\n    "
        "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/run.sh"
        "\n"
        "(7) CTC models from TeleSpeech"
        "\n    "
        "https://github.com/Tele-AI/TeleSpeech-ASR"
        "\n"
        "\n");
    SHERPA_ONNX_EXIT(-1);
  }

  if (model_type == "conformer" || model_type == "zipformer" ||
      model_type == "zipformer2") {
    return std::make_unique<OfflineRecognizerTransducerImpl>(config);
  }

  if (model_type == "paraformer") {
    return std::make_unique<OfflineRecognizerParaformerImpl>(config);
  }

  if ((model_type == "EncDecHybridRNNTCTCBPEModel" ||
       model_type == "EncDecRNNTBPEModel") &&
      !config.model_config.transducer.decoder_filename.empty() &&
      !config.model_config.transducer.joiner_filename.empty()) {
    return std::make_unique<OfflineRecognizerTransducerNeMoImpl>(config);
  }

  if (model_type == "EncDecCTCModelBPE" || model_type == "EncDecCTCModel" ||
      model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
      model_type == "zipformer2_ctc" || model_type == "wenet_ctc" ||
      model_type == "telespeech_ctc") {
    return std::make_unique<OfflineRecognizerCtcImpl>(config);
  }

  if (strncmp(model_type.c_str(), "whisper", 7) == 0) {
    return std::make_unique<OfflineRecognizerWhisperImpl>(config);
  }

  SHERPA_ONNX_LOGE(
      "\nUnsupported model_type: %s\n"
      "We support only the following model types at present: \n"
      " - Non-streaming transducer models from icefall\n"
      " - Non-streaming Paraformer models from FunASR\n"
      " - EncDecCTCModelBPE models from NeMo\n"
      " - EncDecCTCModel models from NeMo\n"
      " - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
      " - EncDecRNNTBPEModel models from NeMO"
      " - Whisper models\n"
      " - Tdnn models\n"
      " - Zipformer CTC models\n"
      " - WeNet CTC models\n"
      " - TeleSpeech CTC models\n",
      model_type.c_str());

  SHERPA_ONNX_EXIT(-1);
}

template <typename Manager>
std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
    Manager *mgr, const OfflineRecognizerConfig &config) {
  if (config.model_config.provider == "rknn") {
#if SHERPA_ONNX_ENABLE_RKNN
    if (!config.model_config.sense_voice.model.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelRknn>>(
          mgr, config);
    } else if (!config.model_config.paraformer.model.empty()) {
      return std::make_unique<
          OfflineRecognizerParaformerTplImpl<OfflineParaformerModelRknn>>(
          mgr, config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice and Paraformer models are currently supported "
          "by rknn for non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_RKNN=ON if you "
        "want to use rknn. See also "
        "https://k2-fsa.github.io/sherpa/onnx/rknn/install.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (config.model_config.provider == "axera") {
#if SHERPA_ONNX_ENABLE_AXERA
    if (!config.model_config.sense_voice.model.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelAxera>>(
          mgr, config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice models are currently supported by Axera NPU for "
          "non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_AXERA=ON if you "
        "want to use axera. See also "
        "https://k2-fsa.github.io/sherpa/onnx/axera/install.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (config.model_config.provider == "axcl") {
#if SHERPA_ONNX_ENABLE_AXCL
    if (!config.model_config.sense_voice.model.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelAxcl>>(
          mgr, config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice models are currently supported by axcl for "
          "non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }

#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_AXCL=ON if you "
        "want to use axcl. See also "
        "https://k2-fsa.github.io/sherpa/onnx/axcl/install.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (config.model_config.provider == "ascend") {
#if SHERPA_ONNX_ENABLE_ASCEND_NPU
    if (!config.model_config.sense_voice.model.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelAscend>>(
          mgr, config);
    } else if (!config.model_config.paraformer.model.empty()) {
      return std::make_unique<
          OfflineRecognizerParaformerTplImpl<OfflineParaformerModelAscend>>(
          mgr, config);
    } else if (!config.model_config.zipformer_ctc.model.empty()) {
      return std::make_unique<OfflineRecognizerZipformerCtcAscendImpl>(mgr,
                                                                       config);
    } else if (!config.model_config.whisper.encoder.empty()) {
      return std::make_unique<
          OfflineRecognizerWhisperTplImpl<OfflineWhisperModelAscend>>(mgr,
                                                                      config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice, Paraformer, Whisper, and Zipformer CTC models are "
          "currently supported by Ascend NPU for non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_ASCEND_NPU=ON if "
        "you want to use Ascend NPU. See also "
        "https://k2-fsa.github.io/sherpa/onnx/ascend/install.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (config.model_config.provider == "qnn") {
#if SHERPA_ONNX_ENABLE_QNN
    if (!config.model_config.sense_voice.model.empty() ||
        !config.model_config.sense_voice.qnn_config.context_binary.empty()) {
      return std::make_unique<
          OfflineRecognizerSenseVoiceTplImpl<OfflineSenseVoiceModelQnn>>(
          mgr, config);
    } else if (!config.model_config.zipformer_ctc.model.empty() ||
               !config.model_config.zipformer_ctc.qnn_config.context_binary
                    .empty()) {
      return std::make_unique<OfflineRecognizerZipformerCtcQnnImpl>(mgr,
                                                                    config);
    } else if (!config.model_config.paraformer.model.empty() ||
               !config.model_config.paraformer.qnn_config.context_binary
                    .empty()) {
      return std::make_unique<
          OfflineRecognizerParaformerTplImpl<OfflineParaformerModelQnn>>(
          mgr, config);
    } else {
      SHERPA_ONNX_LOGE(
          "Only SenseVoice, Paraformer, and Zipformer CTC models are currently "
          "supported by QNN for non-streaming ASR.");
      SHERPA_ONNX_EXIT(-1);
      return nullptr;
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_QNN=ON if "
        "you want to use qnn. See also "
        "https://k2-fsa.github.io/sherpa/onnx/qnn/build.html");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (!config.model_config.sense_voice.model.empty()) {
    return std::make_unique<OfflineRecognizerSenseVoiceImpl>(mgr, config);
  }

  if (!config.model_config.funasr_nano.encoder_adaptor.empty()) {
    return std::make_unique<OfflineRecognizerFunASRNanoImpl>(mgr, config);
  }

  if (!config.model_config.paraformer.model.empty()) {
    return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
  }

  if (!config.model_config.nemo_ctc.model.empty() ||
      !config.model_config.zipformer_ctc.model.empty() ||
      !config.model_config.tdnn.model.empty() ||
      !config.model_config.wenet_ctc.model.empty() ||
      !config.model_config.omnilingual.model.empty() ||
      !config.model_config.medasr.model.empty() ||
      !config.model_config.fire_red_asr_ctc.model.empty() ||
      !config.model_config.dolphin.model.empty()) {
    return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
  }

  if (!config.model_config.whisper.encoder.empty()) {
    return std::make_unique<OfflineRecognizerWhisperImpl>(mgr, config);
  }

  if (!config.model_config.fire_red_asr.encoder.empty()) {
    return std::make_unique<OfflineRecognizerFireRedAsrImpl>(mgr, config);
  }

  if (!config.model_config.moonshine.preprocessor.empty()) {
    return std::make_unique<OfflineRecognizerMoonshineImpl>(mgr, config);
  }

  if (!config.model_config.moonshine.merged_decoder.empty()) {
    return std::make_unique<OfflineRecognizerMoonshineV2Impl>(mgr, config);
  }

  if (!config.model_config.canary.encoder.empty()) {
    return std::make_unique<OfflineRecognizerCanaryImpl>(mgr, config);
  }

  // TODO(fangjun): Refactor it. We only need to use model type for the
  // following models:
  //  1. transducer and nemo_transducer
  if (!config.model_config.model_type.empty()) {
    const auto &model_type = config.model_config.model_type;
    if (model_type == "transducer") {
      return std::make_unique<OfflineRecognizerTransducerImpl>(mgr, config);
    } else if (model_type == "nemo_transducer") {
      return std::make_unique<OfflineRecognizerTransducerNeMoImpl>(mgr, config);
    } else if (model_type == "paraformer") {
      return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
    } else if (model_type == "nemo_ctc" || model_type == "tdnn" ||
               model_type == "zipformer2_ctc" || model_type == "wenet_ctc" ||
               model_type == "telespeech_ctc") {
      return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
    } else if (model_type == "whisper") {
      return std::make_unique<OfflineRecognizerWhisperImpl>(mgr, config);
    } else if (model_type == "moonshine") {
      // unreachable code
      return std::make_unique<OfflineRecognizerMoonshineImpl>(mgr, config);
    } else {
      SHERPA_ONNX_LOGE(
          "Invalid model_type: %s. Trying to load the model to get its type",
          model_type.c_str());
    }
  }

  Ort::Env env(ORT_LOGGING_LEVEL_ERROR);

  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  std::string model_filename;
  if (!config.model_config.transducer.encoder_filename.empty()) {
    model_filename = config.model_config.transducer.encoder_filename;
  } else if (!config.model_config.paraformer.model.empty()) {
    model_filename = config.model_config.paraformer.model;
  } else if (!config.model_config.nemo_ctc.model.empty()) {
    model_filename = config.model_config.nemo_ctc.model;
  } else if (!config.model_config.tdnn.model.empty()) {
    model_filename = config.model_config.tdnn.model;
  } else if (!config.model_config.zipformer_ctc.model.empty()) {
    model_filename = config.model_config.zipformer_ctc.model;
  } else if (!config.model_config.wenet_ctc.model.empty()) {
    model_filename = config.model_config.wenet_ctc.model;
  } else if (!config.model_config.telespeech_ctc.empty()) {
    model_filename = config.model_config.telespeech_ctc;
  } else if (!config.model_config.whisper.encoder.empty()) {
    model_filename = config.model_config.whisper.encoder;
  } else {
    SHERPA_ONNX_LOGE("Please provide a model");
    SHERPA_ONNX_EXIT(-1);
  }

  auto buf = ReadFile(mgr, model_filename);

  auto encoder_sess =
      std::make_unique<Ort::Session>(env, buf.data(), buf.size(), sess_opts);

  Ort::ModelMetadata meta_data = encoder_sess->GetModelMetadata();

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

  auto model_type =
      LookupCustomModelMetaData(meta_data, "model_type", allocator);
  if (model_type.empty()) {
    SHERPA_ONNX_LOGE(
        "No model_type in the metadata!\n\n"
        "Please refer to the following URLs to add metadata"
        "\n"
        "(0) Transducer models from icefall"
        "\n    "
        "https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/"
        "pruned_transducer_stateless7/export-onnx.py#L303"
        "\n"
        "(1) Nemo CTC models\n    "
        "https://huggingface.co/csukuangfj/"
        "sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py"
        "\n"
        "(2) Paraformer"
        "\n    "
        "https://huggingface.co/csukuangfj/"
        "paraformer-onnxruntime-python-example/blob/main/add-model-metadata.py"
        "\n    "
        "(3) Whisper"
        "\n    "
        "(4) Tdnn models of the yesno recipe from icefall"
        "\n    "
        "https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR/tdnn"
        "\n"
        "(5) Zipformer CTC models from icefall"
        "\n    "
        "https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/"
        "zipformer/export-onnx-ctc.py"
        "\n"
        "(6) CTC models from WeNet"
        "\n    "
        "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/run.sh"
        "\n"
        "(7) CTC models from TeleSpeech"
        "\n    "
        "https://github.com/Tele-AI/TeleSpeech-ASR"
        "\n"
        "\n");
    SHERPA_ONNX_EXIT(-1);
  }

  if (model_type == "conformer" || model_type == "zipformer" ||
      model_type == "zipformer2") {
    return std::make_unique<OfflineRecognizerTransducerImpl>(mgr, config);
  }

  if (model_type == "paraformer") {
    return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
  }

  if ((model_type == "EncDecHybridRNNTCTCBPEModel" ||
       model_type == "EncDecRNNTBPEModel") &&
      !config.model_config.transducer.decoder_filename.empty() &&
      !config.model_config.transducer.joiner_filename.empty()) {
    return std::make_unique<OfflineRecognizerTransducerNeMoImpl>(mgr, config);
  }

  if (model_type == "EncDecCTCModelBPE" || model_type == "EncDecCTCModel" ||
      model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
      model_type == "zipformer2_ctc" || model_type == "wenet_ctc" ||
      model_type == "telespeech_ctc") {
    return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
  }

  if (strncmp(model_type.c_str(), "whisper", 7) == 0) {
    return std::make_unique<OfflineRecognizerWhisperImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE(
      "\nUnsupported model_type: %s\n"
      "We support only the following model types at present: \n"
      " - Non-streaming transducer models from icefall\n"
      " - Non-streaming Paraformer models from FunASR\n"
      " - EncDecCTCModelBPE models from NeMo\n"
      " - EncDecCTCModel models from NeMo\n"
      " - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
      " - EncDecRNNTBPEModel models from NeMo\n"
      " - Whisper models\n"
      " - Tdnn models\n"
      " - Zipformer CTC models\n"
      " - WeNet CTC models\n"
      " - TeleSpeech CTC models\n",
      model_type.c_str());

  SHERPA_ONNX_EXIT(-1);
}

OfflineRecognizerImpl::OfflineRecognizerImpl(
    const OfflineRecognizerConfig &config)
    : config_(config) {
  // TODO(fangjun): Refactor this function

  if (!config.rule_fsts.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(config.rule_fsts, ",", false, &files);
    itn_list_.reserve(files.size());
    for (const auto &f : files) {
      if (config.model_config.debug) {
        SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
      }
      itn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
    }
  }

  if (!config.rule_fars.empty()) {
    if (config.model_config.debug) {
      SHERPA_ONNX_LOGE("Loading FST archives");
    }
    std::vector<std::string> files;
    SplitStringToVector(config.rule_fars, ",", false, &files);

    itn_list_.reserve(files.size() + itn_list_.size());

    for (const auto &f : files) {
      if (config.model_config.debug) {
        SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
      }
      std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
          fst::FarReader<fst::StdArc>::Open(f));
      for (; !reader->Done(); reader->Next()) {
        std::unique_ptr<fst::StdConstFst> r(
            fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

        itn_list_.push_back(
            std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
      }
    }

    if (config.model_config.debug) {
      SHERPA_ONNX_LOGE("FST archives loaded!");
    }
  }

  if (!config.hr.lexicon.empty() && !config.hr.rule_fsts.empty()) {
    auto hr_config = config.hr;
    hr_config.debug = config.model_config.debug;
    hr_ = std::make_unique<HomophoneReplacer>(hr_config);
  }
}

template <typename Manager>
OfflineRecognizerImpl::OfflineRecognizerImpl(
    Manager *mgr, const OfflineRecognizerConfig &config)
    : config_(config) {
  if (!config.rule_fsts.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(config.rule_fsts, ",", false, &files);
    itn_list_.reserve(files.size());
    for (const auto &f : files) {
      if (config.model_config.debug) {
        SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
      }
      auto buf = ReadFile(mgr, f);
      std::istringstream is(std::string(buf.data(), buf.size()));
      itn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
    }
  }

  if (!config.rule_fars.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(config.rule_fars, ",", false, &files);
    itn_list_.reserve(files.size() + itn_list_.size());

    for (const auto &f : files) {
      if (config.model_config.debug) {
        SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
      }

      auto buf = ReadFile(mgr, f);

      std::unique_ptr<std::istream> s(
          new std::istringstream(std::string(buf.data(), buf.size())));

      std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
          fst::FarReader<fst::StdArc>::Open(std::move(s)));

      for (; !reader->Done(); reader->Next()) {
        std::unique_ptr<fst::StdConstFst> r(
            fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

        itn_list_.push_back(
            std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
      }  // for (; !reader->Done(); reader->Next())
    }  // for (const auto &f : files)
  }  // if (!config.rule_fars.empty())

  if (!config.hr.lexicon.empty() && !config.hr.rule_fsts.empty()) {
    auto hr_config = config.hr;
    hr_config.debug = config.model_config.debug;
    hr_ = std::make_unique<HomophoneReplacer>(mgr, hr_config);
  }
}

std::string OfflineRecognizerImpl::ApplyInverseTextNormalization(
    std::string text) const {
  text = RemoveInvalidUtf8Sequences(text);

  if (!itn_list_.empty()) {
    for (const auto &tn : itn_list_) {
      text = tn->Normalize(text);
    }
  }

  return text;
}

std::string OfflineRecognizerImpl::ApplyHomophoneReplacer(
    std::string text) const {
  if (hr_) {
    text = hr_->Apply(text);
  }

  return text;
}

void OfflineRecognizerImpl::SetConfig(const OfflineRecognizerConfig &config) {
  config_ = config;
}

#if __ANDROID_API__ >= 9
template OfflineRecognizerImpl::OfflineRecognizerImpl(
    AAssetManager *mgr, const OfflineRecognizerConfig &config);

template std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
    AAssetManager *mgr, const OfflineRecognizerConfig &config);
#endif

#if __OHOS__
template OfflineRecognizerImpl::OfflineRecognizerImpl(
    NativeResourceManager *mgr, const OfflineRecognizerConfig &config);
template std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
    NativeResourceManager *mgr, const OfflineRecognizerConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-impl.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_IMPL_H_

#include <memory>
#include <string>
#include <vector>

#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/homophone-replacer.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-stream.h"

namespace sherpa_onnx {

class OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerImpl(const OfflineRecognizerConfig &config);

  static std::unique_ptr<OfflineRecognizerImpl> Create(
      const OfflineRecognizerConfig &config);

  template <typename Manager>
  OfflineRecognizerImpl(Manager *mgr, const OfflineRecognizerConfig &config);

  template <typename Manager>
  static std::unique_ptr<OfflineRecognizerImpl> Create(
      Manager *mgr, const OfflineRecognizerConfig &config);

  virtual ~OfflineRecognizerImpl() = default;

  virtual std::unique_ptr<OfflineStream> CreateStream(
      const std::string &hotwords) const {
    SHERPA_ONNX_LOGE("Only transducer models support contextual biasing.");
    exit(-1);
  }

  virtual std::unique_ptr<OfflineStream> CreateStream() const = 0;

  virtual void DecodeStreams(OfflineStream **ss, int32_t n) const = 0;

  virtual void SetConfig(const OfflineRecognizerConfig &config);

  virtual OfflineRecognizerConfig GetConfig() const = 0;

  std::string ApplyInverseTextNormalization(std::string text) const;

  std::string ApplyHomophoneReplacer(std::string text) const;

 protected:
  OfflineRecognizerConfig config_;
  // for inverse text normalization. Used only if
  // config.rule_fsts is not empty or
  // config.rule_fars is not empty
  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> itn_list_;
  std::unique_ptr<HomophoneReplacer> hr_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-moonshine-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-moonshine-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_IMPL_H_

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-moonshine-decoder.h"
#include "sherpa-onnx/csrc/offline-moonshine-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-moonshine-model.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

OfflineRecognitionResult Convert(const OfflineMoonshineDecoderResult &src,
                                 const SymbolTable &sym_table) {
  OfflineRecognitionResult r;
  r.tokens.reserve(src.tokens.size());

  std::string text;
  for (auto i : src.tokens) {
    if (!sym_table.Contains(i)) {
      continue;
    }

    const auto &s = sym_table[i];
    text += s;
    r.tokens.push_back(s);
  }

  r.text = text;

  return r;
}

class OfflineRecognizerMoonshineImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerMoonshineImpl(const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineMoonshineModel>(config.model_config)) {
    Init();
  }

  template <typename Manager>
  OfflineRecognizerMoonshineImpl(Manager *mgr,
                                 const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(
            std::make_unique<OfflineMoonshineModel>(mgr, config.model_config)) {
    Init();
  }

  void Init() {
    if (config_.decoding_method == "greedy_search") {
      decoder_ =
          std::make_unique<OfflineMoonshineGreedySearchDecoder>(model_.get());
    } else {
      SHERPA_ONNX_LOGE(
          "Only greedy_search is supported at present for moonshine. Given %s",
          config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    MoonshineTag tag;
    return std::make_unique<OfflineStream>(tag);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    // batch decoding is not implemented yet
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void DecodeStream(OfflineStream *s) const {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<float> audio = s->GetFrames();

    try {
      std::array<int64_t, 2> shape{1, static_cast<int64_t>(audio.size())};

      Ort::Value audio_tensor = Ort::Value::CreateTensor(
          memory_info, audio.data(), audio.size(), shape.data(), shape.size());

      Ort::Value features =
          model_->ForwardPreprocessor(std::move(audio_tensor));

      int32_t features_len = features.GetTensorTypeAndShapeInfo().GetShape()[1];

      int64_t features_shape = 1;

      Ort::Value features_len_tensor = Ort::Value::CreateTensor(
          memory_info, &features_len, 1, &features_shape, 1);

      Ort::Value encoder_out = model_->ForwardEncoder(
          std::move(features), std::move(features_len_tensor));

      auto results = decoder_->Decode(std::move(encoder_out));

      auto r = Convert(results[0], symbol_table_);
      r.text = ApplyInverseTextNormalization(std::move(r.text));
      r.text = ApplyHomophoneReplacer(std::move(r.text));
      s->SetResult(r);
    } catch (const Ort::Exception &ex) {
      SHERPA_ONNX_LOGE(
          "\n\nCaught exception:\n\n%s\n\nReturn an empty result. Number of "
          "audio samples: %d",
          ex.what(), static_cast<int32_t>(audio.size()));
      return;
    }
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineMoonshineModel> model_;
  std::unique_ptr<OfflineMoonshineDecoder> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-moonshine-v2-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-moonshine-v2-impl.h
//
// Copyright (c)  2024-2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_V2_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_V2_IMPL_H_

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-moonshine-decoder.h"
#include "sherpa-onnx/csrc/offline-moonshine-model-v2.h"
#include "sherpa-onnx/csrc/offline-moonshine-v2-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

// defined in ./offline-recognizer-moonshine-impl.h
OfflineRecognitionResult Convert(const OfflineMoonshineDecoderResult &src,
                                 const SymbolTable &sym_table);

class OfflineRecognizerMoonshineV2Impl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerMoonshineV2Impl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineMoonshineModelV2>(config.model_config)) {
    Init();
  }

  template <typename Manager>
  OfflineRecognizerMoonshineV2Impl(Manager *mgr,
                                   const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<OfflineMoonshineModelV2>(mgr,
                                                         config.model_config)) {
    Init();
  }

  void Init() {
    // tokens.txt from whisper is base64 encoded, so we need to decode it
    // See also ../../scripts/moonshine/v2/generate_tokens.py
    symbol_table_.ApplyBase64Decode();

    if (config_.decoding_method == "greedy_search") {
      decoder_ =
          std::make_unique<OfflineMoonshineV2GreedySearchDecoder>(model_.get());
    } else {
      SHERPA_ONNX_LOGE(
          "Only greedy_search is supported at present for moonshine. Given %s",
          config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    MoonshineTag tag;
    return std::make_unique<OfflineStream>(tag);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    // batch decoding is not implemented yet
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void DecodeStream(OfflineStream *s) const {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<float> audio = s->GetFrames();

    try {
      std::array<int64_t, 2> shape{1, static_cast<int64_t>(audio.size())};

      Ort::Value audio_tensor = Ort::Value::CreateTensor(
          memory_info, audio.data(), audio.size(), shape.data(), shape.size());

      Ort::Value encoder_out = model_->ForwardEncoder(std::move(audio_tensor));

      auto results = decoder_->Decode(std::move(encoder_out));

      auto r = Convert(results[0], symbol_table_);
      r.text = ApplyInverseTextNormalization(std::move(r.text));
      r.text = ApplyHomophoneReplacer(std::move(r.text));
      s->SetResult(r);
    } catch (const Ort::Exception &ex) {
      SHERPA_ONNX_LOGE(
          "\n\nCaught exception:\n\n%s\n\nReturn an empty result. Number of "
          "audio samples: %d",
          ex.what(), static_cast<int32_t>(audio.size()));
      return;
    }
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineMoonshineModelV2> model_;
  std::unique_ptr<OfflineMoonshineDecoder> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_MOONSHINE_V2_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-paraformer-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-paraformer-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_PARAFORMER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_PARAFORMER_IMPL_H_

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-paraformer-decoder.h"
#include "sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-paraformer-model.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/pad-sequence.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

OfflineRecognitionResult Convert(const OfflineParaformerDecoderResult &src,
                                 const SymbolTable &sym_table) {
  OfflineRecognitionResult r;
  r.tokens.reserve(src.tokens.size());
  r.timestamps = src.timestamps;

  std::string text;

  // When the current token ends with "@@" we set mergeable to true
  bool mergeable = false;

  for (int32_t i = 0; i != src.tokens.size(); ++i) {
    auto sym = sym_table[src.tokens[i]];
    r.tokens.push_back(sym);

    if ((sym.back() != '@') || (sym.size() > 2 && sym[sym.size() - 2] != '@')) {
      // sym does not end with "@@"
      const uint8_t *p = reinterpret_cast<const uint8_t *>(sym.c_str());
      if (p[0] < 0x80) {
        // an ascii
        if (mergeable) {
          mergeable = false;
          text.append(sym);
        } else {
          text.append(" ");
          text.append(sym);
        }
      } else {
        // not an ascii
        mergeable = false;

        if (i > 0) {
          const uint8_t p = reinterpret_cast<const uint8_t *>(
              sym_table[src.tokens[i - 1]].c_str())[0];
          if (p < 0x80) {
            // put a space between ascii and non-ascii
            text.append(" ");
          }
        }
        text.append(sym);
      }
    } else {
      // this sym ends with @@
      sym = std::string(sym.data(), sym.size() - 2);
      if (mergeable) {
        text.append(sym);
      } else {
        text.append(" ");
        text.append(sym);
        mergeable = true;
      }
    }
  }
  r.text = std::move(text);

  return r;
}

class OfflineRecognizerParaformerImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerParaformerImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineParaformerModel>(config.model_config)) {
    if (config.decoding_method == "greedy_search") {
      int32_t eos_id = symbol_table_["</s>"];
      decoder_ = std::make_unique<OfflineParaformerGreedySearchDecoder>(eos_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    InitFeatConfig();
  }

  template <typename Manager>
  OfflineRecognizerParaformerImpl(Manager *mgr,
                                  const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<OfflineParaformerModel>(mgr,
                                                        config.model_config)) {
    if (config.decoding_method == "greedy_search") {
      int32_t eos_id = symbol_table_["</s>"];
      decoder_ = std::make_unique<OfflineParaformerGreedySearchDecoder>(eos_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    InitFeatConfig();
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    // 1. Apply LFR
    // 2. Apply CMVN
    //
    // Please refer to
    // https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45555.pdf
    // for what LFR means
    //
    // "Lower Frame Rate Neural Network Acoustic Models"
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<Ort::Value> features;
    features.reserve(n);

    int32_t feat_dim =
        config_.feat_config.feature_dim * model_->LfrWindowSize();

    std::vector<std::vector<float>> features_vec(n);
    std::vector<int32_t> features_length_vec(n);
    for (int32_t i = 0; i != n; ++i) {
      std::vector<float> f = ss[i]->GetFrames();

      f = ApplyLFR(f);
      ApplyCMVN(&f);

      int32_t num_frames = f.size() / feat_dim;
      features_vec[i] = std::move(f);

      features_length_vec[i] = num_frames;

      std::array<int64_t, 2> shape = {num_frames, feat_dim};

      Ort::Value x = Ort::Value::CreateTensor(
          memory_info, features_vec[i].data(), features_vec[i].size(),
          shape.data(), shape.size());
      features.push_back(std::move(x));
    }

    std::vector<const Ort::Value *> features_pointer(n);
    for (int32_t i = 0; i != n; ++i) {
      features_pointer[i] = &features[i];
    }

    std::array<int64_t, 1> features_length_shape = {n};
    Ort::Value x_length = Ort::Value::CreateTensor(
        memory_info, features_length_vec.data(), n,
        features_length_shape.data(), features_length_shape.size());

    // Caution(fangjun): We cannot pad it with log(eps),
    // i.e., -23.025850929940457f
    Ort::Value x = PadSequence(model_->Allocator(), features_pointer, 0);

    std::vector<Ort::Value> t;
    try {
      t = model_->Forward(std::move(x), std::move(x_length));
    } catch (const Ort::Exception &ex) {
      SHERPA_ONNX_LOGE("\n\nCaught exception:\n\n%s\n\nReturn an empty result",
                       ex.what());
      return;
    }

    std::vector<OfflineParaformerDecoderResult> results;
    if (t.size() == 2) {
      results = decoder_->Decode(std::move(t[0]), std::move(t[1]));
    } else {
      results =
          decoder_->Decode(std::move(t[0]), std::move(t[1]), std::move(t[3]));
    }

    for (int32_t i = 0; i != n; ++i) {
      auto r = Convert(results[i], symbol_table_);
      r.text = ApplyInverseTextNormalization(std::move(r.text));
      r.text = ApplyHomophoneReplacer(std::move(r.text));
      ss[i]->SetResult(r);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void InitFeatConfig() {
    // Paraformer models assume input samples are in the range
    // [-32768, 32767], so we set normalize_samples to false
    config_.feat_config.normalize_samples = false;
    config_.feat_config.window_type = "hamming";
    config_.feat_config.high_freq = 0;
    config_.feat_config.snip_edges = true;
  }

  std::vector<float> ApplyLFR(const std::vector<float> &in) const {
    int32_t lfr_window_size = model_->LfrWindowSize();
    int32_t lfr_window_shift = model_->LfrWindowShift();
    int32_t in_feat_dim = config_.feat_config.feature_dim;

    int32_t in_num_frames = in.size() / in_feat_dim;
    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;
    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    std::vector<float> out(out_num_frames * out_feat_dim);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

  void ApplyCMVN(std::vector<float> *v) const {
    const std::vector<float> &neg_mean = model_->NegativeMean();
    const std::vector<float> &inv_stddev = model_->InverseStdDev();
    int32_t dim = static_cast<int32_t>(neg_mean.size());
    int32_t num_frames = static_cast<int32_t>(v->size()) / dim;

    Eigen::Map<
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
        mat(v->data(), num_frames, dim);

    Eigen::Map<const Eigen::RowVectorXf> neg_mean_vec(neg_mean.data(), dim);
    Eigen::Map<const Eigen::RowVectorXf> inv_stddev_vec(inv_stddev.data(), dim);

    mat.array() = (mat.array().rowwise() + neg_mean_vec.array()).rowwise() *
                  inv_stddev_vec.array();
  }

  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineParaformerModel> model_;
  std::unique_ptr<OfflineParaformerDecoder> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_PARAFORMER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-paraformer-tpl-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-paraformer-tpl-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_PARAFORMER_TPL_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_PARAFORMER_TPL_IMPL_H_

#include <memory>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

// defined in ../offline-recognizer-paraformer-impl.h
OfflineRecognitionResult Convert(const OfflineParaformerDecoderResult &src,
                                 const SymbolTable &sym_table);

template <typename ParaformerModel>
class OfflineRecognizerParaformerTplImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerParaformerTplImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<ParaformerModel>(config.model_config)) {
    if (config.decoding_method != "greedy_search") {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    InitFeatConfig();
  }

  template <typename Manager>
  OfflineRecognizerParaformerTplImpl(Manager *mgr,
                                     const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<ParaformerModel>(mgr, config.model_config)) {
    if (config.decoding_method != "greedy_search") {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    InitFeatConfig();
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i < n; ++i) {
      DecodeOneStream(ss[i]);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void InitFeatConfig() {
    config_.feat_config.normalize_samples = false;
    config_.feat_config.window_type = "hamming";
    config_.feat_config.high_freq = 0;
    config_.feat_config.snip_edges = true;
  }

  void DecodeOneStream(OfflineStream *s) const {
    std::vector<float> f = s->GetFrames();

    std::vector<float> logits = model_->Run(std::move(f));
    if (logits.empty()) {
      SHERPA_ONNX_LOGE("No speech detected");
      return;
    }

    int32_t vocab_size = model_->VocabSize();
    int32_t num_tokens = logits.size() / vocab_size;

    int32_t eos_id = symbol_table_["</s>"];

    OfflineParaformerDecoderResult r;
    const float *p = logits.data();
    for (int32_t i = 0; i < num_tokens; ++i) {
      auto max_idx = static_cast<int64_t>(
          std::distance(p, std::max_element(p, p + vocab_size)));

      if (max_idx == eos_id) {
        break;
      }
      r.tokens.push_back(max_idx);
      p += vocab_size;
    }

    auto result = Convert(r, symbol_table_);
    result.text = ApplyInverseTextNormalization(std::move(result.text));
    result.text = ApplyHomophoneReplacer(std::move(result.text));
    s->SetResult(result);
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<ParaformerModel> model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_PARAFORMER_TPL_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-sense-voice-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_SENSE_VOICE_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_SENSE_VOICE_IMPL_H_

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-sense-voice-model.h"
#include "sherpa-onnx/csrc/pad-sequence.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

OfflineRecognitionResult ConvertSenseVoiceResult(
    const OfflineCtcDecoderResult &src, const SymbolTable &sym_table,
    int32_t frame_shift_ms, int32_t subsampling_factor,
    bool is_funasr_nano = false) {
  OfflineRecognitionResult r;
  r.tokens.reserve(src.tokens.size());
  r.timestamps.reserve(src.timestamps.size());

  std::string text;

  // Funasr NanO does not support emotion, event, language, etc.
  int32_t start = is_funasr_nano ? 0 : 4;

  for (int32_t i = start; i < src.tokens.size(); ++i) {
    auto sym = sym_table[src.tokens[i]];
    text.append(sym);

    r.tokens.push_back(std::move(sym));
  }
  r.text = std::move(text);

  float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;

  for (int32_t i = start; i < src.timestamps.size(); ++i) {
    float time = frame_shift_s * (src.timestamps[i] - start);
    r.timestamps.push_back(time);
  }

  r.words = std::move(src.words);

  if (!is_funasr_nano) {
    // parse lang, emotion and event from tokens.
    if (src.tokens.size() >= 3) {
      r.lang = sym_table[src.tokens[0]];
      r.emotion = sym_table[src.tokens[1]];
      r.event = sym_table[src.tokens[2]];
    }
  }

  return r;
}

class OfflineRecognizerSenseVoiceImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerSenseVoiceImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineSenseVoiceModel>(config.model_config)) {
    const auto &meta_data = model_->GetModelMetadata();
    if (config.decoding_method == "greedy_search") {
      decoder_ =
          std::make_unique<OfflineCtcGreedySearchDecoder>(meta_data.blank_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    PostInit();
  }

  template <typename Manager>
  OfflineRecognizerSenseVoiceImpl(Manager *mgr,
                                  const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<OfflineSenseVoiceModel>(mgr,
                                                        config.model_config)) {
    const auto &meta_data = model_->GetModelMetadata();
    if (config.decoding_method == "greedy_search") {
      decoder_ =
          std::make_unique<OfflineCtcGreedySearchDecoder>(meta_data.blank_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    PostInit();
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    const auto &meta_data = model_->GetModelMetadata();

    if (meta_data.is_funasr_nano) {
      for (int32_t i = 0; i < n; ++i) {
        DecodeOneStreamFunAsrNano(ss[i]);
      }

      return;
    }

    if (n == 1) {
      DecodeOneStream(ss[0]);
      return;
    }

    // 1. Apply LFR
    // 2. Apply CMVN
    //
    // Please refer to
    // https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45555.pdf
    // for what LFR means
    //
    // "Lower Frame Rate Neural Network Acoustic Models"
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<Ort::Value> features;
    features.reserve(n);

    int32_t feat_dim = config_.feat_config.feature_dim * meta_data.window_size;

    std::vector<std::vector<float>> features_vec(n);
    std::vector<int32_t> features_length_vec(n);
    for (int32_t i = 0; i != n; ++i) {
      std::vector<float> f = ss[i]->GetFrames();

      f = ApplyLFR(f);
      ApplyCMVN(&f);

      int32_t num_frames = f.size() / feat_dim;
      features_vec[i] = std::move(f);

      features_length_vec[i] = num_frames;

      std::array<int64_t, 2> shape = {num_frames, feat_dim};

      Ort::Value x = Ort::Value::CreateTensor(
          memory_info, features_vec[i].data(), features_vec[i].size(),
          shape.data(), shape.size());
      features.push_back(std::move(x));
    }

    std::vector<const Ort::Value *> features_pointer(n);
    for (int32_t i = 0; i != n; ++i) {
      features_pointer[i] = &features[i];
    }

    std::array<int64_t, 1> features_length_shape = {n};
    Ort::Value x_length = Ort::Value::CreateTensor(
        memory_info, features_length_vec.data(), n,
        features_length_shape.data(), features_length_shape.size());

    // Caution(fangjun): We cannot pad it with log(eps),
    // i.e., -23.025850929940457f
    Ort::Value x = PadSequence(model_->Allocator(), features_pointer, 0);

    int32_t language = 0;
    if (config_.model_config.sense_voice.language.empty()) {
      language = 0;
    } else if (meta_data.lang2id.count(
                   config_.model_config.sense_voice.language)) {
      language =
          meta_data.lang2id.at(config_.model_config.sense_voice.language);
    } else {
      SHERPA_ONNX_LOGE("Unknown language: %s. Use 0 instead.",
                       config_.model_config.sense_voice.language.c_str());
    }

    std::vector<int32_t> language_array(n);
    std::fill(language_array.begin(), language_array.end(), language);

    std::vector<int32_t> text_norm_array(n);
    std::fill(text_norm_array.begin(), text_norm_array.end(),
              config_.model_config.sense_voice.use_itn
                  ? meta_data.with_itn_id
                  : meta_data.without_itn_id);

    Ort::Value language_tensor = Ort::Value::CreateTensor(
        memory_info, language_array.data(), n, features_length_shape.data(),
        features_length_shape.size());

    Ort::Value text_norm_tensor = Ort::Value::CreateTensor(
        memory_info, text_norm_array.data(), n, features_length_shape.data(),
        features_length_shape.size());

    Ort::Value logits{nullptr};
    try {
      logits = model_->Forward(std::move(x), std::move(x_length),
                               std::move(language_tensor),
                               std::move(text_norm_tensor));
    } catch (const Ort::Exception &ex) {
      SHERPA_ONNX_LOGE("\n\nCaught exception:\n\n%s\n\nReturn an empty result",
                       ex.what());
      return;
    }

    // decoder_->Decode() requires that logits_length is of dtype int64
    std::vector<int64_t> features_length_vec_64;
    features_length_vec_64.reserve(n);
    for (auto i : features_length_vec) {
      i += 4;
      features_length_vec_64.push_back(i);
    }

    Ort::Value logits_length = Ort::Value::CreateTensor(
        memory_info, features_length_vec_64.data(), n,
        features_length_shape.data(), features_length_shape.size());

    auto results =
        decoder_->Decode(std::move(logits), std::move(logits_length));

    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = meta_data.window_shift;
    for (int32_t i = 0; i != n; ++i) {
      auto r = ConvertSenseVoiceResult(results[i], symbol_table_,
                                       frame_shift_ms, subsampling_factor);
      r.text = ApplyInverseTextNormalization(std::move(r.text));
      r.text = ApplyHomophoneReplacer(std::move(r.text));
      ss[i]->SetResult(r);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void DecodeOneStreamFunAsrNano(OfflineStream *s) const {
    const auto &meta_data = model_->GetModelMetadata();
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = config_.feat_config.feature_dim * meta_data.window_size;
    std::vector<float> f = s->GetFrames();
    f = ApplyLFR(f);

    int32_t num_frames = f.size() / feat_dim;
    std::array<int64_t, 3> shape = {1, num_frames, feat_dim};
    Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
                                            shape.data(), shape.size());

    Ort::Value logits{nullptr};
    try {
      logits = model_->Forward(std::move(x));
    } catch (const Ort::Exception &ex) {
      SHERPA_ONNX_LOGE("\n\nCaught exception:\n\n%s\n\nReturn an empty result",
                       ex.what());
      return;
    }

    int64_t new_num_frames = logits.GetTensorTypeAndShapeInfo().GetShape()[1];
    int64_t num_frame_shape = 1;
    Ort::Value logits_length = Ort::Value::CreateTensor(
        memory_info, &new_num_frames, 1, &num_frame_shape, 1);

    auto results =
        decoder_->Decode(std::move(logits), std::move(logits_length));

    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = meta_data.window_shift;
    auto r = ConvertSenseVoiceResult(results[0], symbol_table_, frame_shift_ms,
                                     subsampling_factor, true);

    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    s->SetResult(r);
  }

  void DecodeOneStream(OfflineStream *s) const {
    const auto &meta_data = model_->GetModelMetadata();

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = config_.feat_config.feature_dim * meta_data.window_size;
    std::vector<float> f = s->GetFrames();
    f = ApplyLFR(f);
    ApplyCMVN(&f);
    int32_t num_frames = f.size() / feat_dim;
    std::array<int64_t, 3> shape = {1, num_frames, feat_dim};
    Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
                                            shape.data(), shape.size());

    int64_t scale_shape = 1;

    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &num_frames, 1, &scale_shape, 1);

    int32_t language = 0;
    if (config_.model_config.sense_voice.language.empty()) {
      language = 0;
    } else if (meta_data.lang2id.count(
                   config_.model_config.sense_voice.language)) {
      language =
          meta_data.lang2id.at(config_.model_config.sense_voice.language);
    } else {
      SHERPA_ONNX_LOGE("Unknown language: %s. Use 0 instead.",
                       config_.model_config.sense_voice.language.c_str());
    }

    int32_t text_norm = config_.model_config.sense_voice.use_itn
                            ? meta_data.with_itn_id
                            : meta_data.without_itn_id;

    Ort::Value language_tensor =
        Ort::Value::CreateTensor(memory_info, &language, 1, &scale_shape, 1);

    Ort::Value text_norm_tensor =
        Ort::Value::CreateTensor(memory_info, &text_norm, 1, &scale_shape, 1);

    Ort::Value logits{nullptr};
    try {
      logits = model_->Forward(std::move(x), std::move(x_length),
                               std::move(language_tensor),
                               std::move(text_norm_tensor));
    } catch (const Ort::Exception &ex) {
      SHERPA_ONNX_LOGE("\n\nCaught exception:\n\n%s\n\nReturn an empty result",
                       ex.what());
      return;
    }

    int64_t new_num_frames = num_frames + 4;
    Ort::Value logits_length = Ort::Value::CreateTensor(
        memory_info, &new_num_frames, 1, &scale_shape, 1);

    auto results =
        decoder_->Decode(std::move(logits), std::move(logits_length));

    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = meta_data.window_shift;
    auto r = ConvertSenseVoiceResult(results[0], symbol_table_, frame_shift_ms,
                                     subsampling_factor);

    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    s->SetResult(r);
  }

  void PostInit() {
    InitFeatConfig();

    const auto &meta_data = model_->GetModelMetadata();
    if (meta_data.is_funasr_nano) {
      symbol_table_.ApplyBase64Decode();
    }
  }

  void InitFeatConfig() {
    const auto &meta_data = model_->GetModelMetadata();

    config_.feat_config.normalize_samples = meta_data.normalize_samples;
    config_.feat_config.window_type = "hamming";
    config_.feat_config.high_freq = 0;
    config_.feat_config.snip_edges = true;
  }

  std::vector<float> ApplyLFR(const std::vector<float> &in) const {
    const auto &meta_data = model_->GetModelMetadata();

    int32_t lfr_window_size = meta_data.window_size;
    int32_t lfr_window_shift = meta_data.window_shift;
    int32_t in_feat_dim = config_.feat_config.feature_dim;

    int32_t in_num_frames = in.size() / in_feat_dim;
    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;
    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    std::vector<float> out(out_num_frames * out_feat_dim);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

  void ApplyCMVN(std::vector<float> *v) const {
    const auto &meta_data = model_->GetModelMetadata();
    const std::vector<float> &neg_mean = meta_data.neg_mean;
    const std::vector<float> &inv_stddev = meta_data.inv_stddev;
    int32_t dim = static_cast<int32_t>(neg_mean.size());
    int32_t num_frames = static_cast<int32_t>(v->size()) / dim;
    Eigen::Map<
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
        mat(v->data(), num_frames, dim);
    Eigen::Map<const Eigen::RowVectorXf> neg_mean_vec(neg_mean.data(), dim);

    Eigen::Map<const Eigen::RowVectorXf> inv_stddev_vec(inv_stddev.data(), dim);
    mat.array() = (mat.array().rowwise() + neg_mean_vec.array()).rowwise() *
                  inv_stddev_vec.array();
  }

  SymbolTable symbol_table_;
  std::unique_ptr<OfflineSenseVoiceModel> model_;
  std::unique_ptr<OfflineCtcDecoder> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_SENSE_VOICE_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-sense-voice-tpl-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-sense-voice-tpl-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_SENSE_VOICE_TPL_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_SENSE_VOICE_TPL_IMPL_H_

#include <memory>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/rknn/offline-ctc-greedy-search-decoder-rknn.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

// defined in ../offline-recognizer-sense-voice-impl.h
OfflineRecognitionResult ConvertSenseVoiceResult(
    const OfflineCtcDecoderResult &src, const SymbolTable &sym_table,
    int32_t frame_shift_ms, int32_t subsampling_factor,
    bool is_funasr_nano /*= false*/);

template <typename SenseVoiceModel>
class OfflineRecognizerSenseVoiceTplImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerSenseVoiceTplImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<SenseVoiceModel>(config.model_config)) {
    const auto &meta_data = model_->GetModelMetadata();
    if (config.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OfflineCtcGreedySearchDecoderRknn>(
          meta_data.blank_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    InitFeatConfig();
  }

  template <typename Manager>
  OfflineRecognizerSenseVoiceTplImpl(Manager *mgr,
                                     const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<SenseVoiceModel>(mgr, config.model_config)) {
    const auto &meta_data = model_->GetModelMetadata();
    if (config.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OfflineCtcGreedySearchDecoderRknn>(
          meta_data.blank_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    InitFeatConfig();
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i < n; ++i) {
      DecodeOneStream(ss[i]);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void InitFeatConfig() {
    const auto &meta_data = model_->GetModelMetadata();

    config_.feat_config.normalize_samples = meta_data.normalize_samples;
    config_.feat_config.window_type = "hamming";
    config_.feat_config.high_freq = 0;
    config_.feat_config.snip_edges = true;
  }

  void DecodeOneStream(OfflineStream *s) const {
    const auto &meta_data = model_->GetModelMetadata();

    std::vector<float> f = s->GetFrames();

    int32_t language = 0;
    if (config_.model_config.sense_voice.language.empty()) {
      language = 0;
    } else if (meta_data.lang2id.count(
                   config_.model_config.sense_voice.language)) {
      language =
          meta_data.lang2id.at(config_.model_config.sense_voice.language);
    } else {
      SHERPA_ONNX_LOGE("Unknown language: %s. Use 0 instead.",
                       config_.model_config.sense_voice.language.c_str());
    }

    int32_t text_norm = config_.model_config.sense_voice.use_itn
                            ? meta_data.with_itn_id
                            : meta_data.without_itn_id;

    std::vector<float> logits = model_->Run(std::move(f), language, text_norm);
    if (logits.empty()) {
      return;
    }

    int32_t num_out_frames = logits.size() / meta_data.vocab_size;

    auto result =
        decoder_->Decode(logits.data(), num_out_frames, meta_data.vocab_size);

    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = meta_data.window_shift;
    auto r = ConvertSenseVoiceResult(result, symbol_table_, frame_shift_ms,
                                     subsampling_factor);

    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    s->SetResult(r);
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<SenseVoiceModel> model_;
  std::unique_ptr<OfflineCtcGreedySearchDecoderRknn> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_SENSE_VOICE_TPL_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-transducer-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-transducer-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_TRANSDUCER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_TRANSDUCER_IMPL_H_

#include <fstream>
#include <ios>
#include <memory>
#include <regex>  // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-transducer-decoder.h"
#include "sherpa-onnx/csrc/offline-transducer-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-transducer-model.h"
#include "sherpa-onnx/csrc/offline-transducer-modified-beam-search-decoder.h"
#include "sherpa-onnx/csrc/pad-sequence.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/utils.h"
#include "ssentencepiece/csrc/ssentencepiece.h"

namespace sherpa_onnx {

static OfflineRecognitionResult Convert(
    const OfflineTransducerDecoderResult &src, const SymbolTable &sym_table,
    int32_t frame_shift_ms, int32_t subsampling_factor) {
  OfflineRecognitionResult r;
  r.tokens.reserve(src.tokens.size());
  r.timestamps.reserve(src.timestamps.size());
  r.durations.reserve(src.durations.size());

  std::string text;
  for (auto i : src.tokens) {
    auto sym = sym_table[i];
    text.append(sym);

    if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
      // for bpe models with byte_fallback,
      // (but don't rewrite printable characters 0x20..0x7e,
      //  which collide with standard BPE units)
      std::ostringstream os;
      os << "<0x" << std::hex << std::uppercase
         << (static_cast<int32_t>(sym[0]) & 0xff) << ">";
      sym = os.str();
    }

    r.tokens.push_back(std::move(sym));
  }
  if (sym_table.IsByteBpe()) {
    text = sym_table.DecodeByteBpe(text);
  }

  r.text = std::move(text);

  float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
  for (auto t : src.timestamps) {
    float time = frame_shift_s * t;
    r.timestamps.push_back(time);
  }

  // Copy durations (if present)
  for (auto d : src.durations) {
    r.durations.push_back(d * frame_shift_s);
  }

  // Copy token log probabilities (confidence scores)
  r.ys_log_probs = src.ys_log_probs;

  return r;
}

class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerTransducerImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineTransducerModel>(config_.model_config)) {
    if (symbol_table_.Contains("<unk>")) {
      unk_id_ = symbol_table_["<unk>"];
    }

    if (config_.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OfflineTransducerGreedySearchDecoder>(
          model_.get(), unk_id_, config_.blank_penalty);
    } else if (config_.decoding_method == "modified_beam_search") {
      if (!config_.lm_config.model.empty()) {
        lm_ = OfflineLM::Create(config.lm_config);
      }

      if (!config_.model_config.bpe_vocab.empty()) {
        bpe_encoder_ = std::make_unique<ssentencepiece::Ssentencepiece>(
            config_.model_config.bpe_vocab);
      }

      if (!config_.hotwords_file.empty()) {
        InitHotwords();
      }

      decoder_ = std::make_unique<OfflineTransducerModifiedBeamSearchDecoder>(
          model_.get(), lm_.get(), config_.max_active_paths,
          config_.lm_config.scale, unk_id_, config_.blank_penalty);
    } else {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config_.decoding_method.c_str());
      exit(-1);
    }
  }

  template <typename Manager>
  explicit OfflineRecognizerTransducerImpl(
      Manager *mgr, const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<OfflineTransducerModel>(mgr,
                                                        config_.model_config)) {
    if (symbol_table_.Contains("<unk>")) {
      unk_id_ = symbol_table_["<unk>"];
    }

    if (config_.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OfflineTransducerGreedySearchDecoder>(
          model_.get(), unk_id_, config_.blank_penalty);
    } else if (config_.decoding_method == "modified_beam_search") {
      if (!config_.lm_config.model.empty()) {
        lm_ = OfflineLM::Create(mgr, config.lm_config);
      }

      if (!config_.model_config.bpe_vocab.empty()) {
        auto buf = ReadFile(mgr, config_.model_config.bpe_vocab);
        std::istringstream iss(std::string(buf.begin(), buf.end()));
        bpe_encoder_ = std::make_unique<ssentencepiece::Ssentencepiece>(iss);
      }

      if (!config_.hotwords_file.empty()) {
        InitHotwords(mgr);
      }

      decoder_ = std::make_unique<OfflineTransducerModifiedBeamSearchDecoder>(
          model_.get(), lm_.get(), config_.max_active_paths,
          config_.lm_config.scale, unk_id_, config_.blank_penalty);
    } else {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config_.decoding_method.c_str());
      exit(-1);
    }
  }

  std::unique_ptr<OfflineStream> CreateStream(
      const std::string &hotwords) const override {
    auto hws = std::regex_replace(hotwords, std::regex("/"), "\n");
    std::istringstream is(hws);
    std::vector<std::vector<int32_t>> current;
    std::vector<float> current_scores;
    if (!EncodeHotwords(is, config_.model_config.modeling_unit, symbol_table_,
                        bpe_encoder_.get(), &current, &current_scores)) {
      SHERPA_ONNX_LOGE("Encode hotwords failed, skipping, hotwords are : '%s'",
                       hotwords.c_str());
    }

    int32_t num_default_hws = hotwords_.size();
    int32_t num_hws = current.size();

    current.insert(current.end(), hotwords_.begin(), hotwords_.end());

    if (!current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else if (!current_scores.empty() && boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_default_hws,
                            config_.hotwords_score);
    } else if (current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_hws,
                            config_.hotwords_score);
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else {
      // Do nothing.
    }

    auto context_graph = std::make_shared<ContextGraph>(
        current, config_.hotwords_score, current_scores);
    return std::make_unique<OfflineStream>(config_.feat_config, context_graph);
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config,
                                           hotwords_graph_);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = ss[0]->FeatureDim();

    std::vector<Ort::Value> features;

    features.reserve(n);

    std::vector<std::vector<float>> features_vec(n);
    std::vector<int64_t> features_length_vec(n);
    for (int32_t i = 0; i != n; ++i) {
      auto f = ss[i]->GetFrames();
      int32_t num_frames = f.size() / feat_dim;

      features_length_vec[i] = num_frames;
      features_vec[i] = std::move(f);

      std::array<int64_t, 2> shape = {num_frames, feat_dim};

      Ort::Value x = Ort::Value::CreateTensor(
          memory_info, features_vec[i].data(), features_vec[i].size(),
          shape.data(), shape.size());
      features.push_back(std::move(x));
    }

    std::vector<const Ort::Value *> features_pointer(n);
    for (int32_t i = 0; i != n; ++i) {
      features_pointer[i] = &features[i];
    }

    std::array<int64_t, 1> features_length_shape = {n};
    Ort::Value x_length = Ort::Value::CreateTensor(
        memory_info, features_length_vec.data(), n,
        features_length_shape.data(), features_length_shape.size());

    Ort::Value x = PadSequence(model_->Allocator(), features_pointer,
                               -23.025850929940457f);

    auto t = model_->RunEncoder(std::move(x), std::move(x_length));
    auto results =
        decoder_->Decode(std::move(t.first), std::move(t.second), ss, n);

    int32_t frame_shift_ms = 10;
    for (int32_t i = 0; i != n; ++i) {
      auto r = Convert(results[i], symbol_table_, frame_shift_ms,
                       model_->SubsamplingFactor());
      r.text = ApplyInverseTextNormalization(std::move(r.text));
      r.text = ApplyHomophoneReplacer(std::move(r.text));

      ss[i]->SetResult(r);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

  void InitHotwords() {
    // each line in hotwords_file contains space-separated words

    std::ifstream is(config_.hotwords_file);
    if (!is) {
      SHERPA_ONNX_LOGE("Open hotwords file failed: '%s'",
                       config_.hotwords_file.c_str());
      exit(-1);
    }

    if (!EncodeHotwords(is, config_.model_config.modeling_unit, symbol_table_,
                        bpe_encoder_.get(), &hotwords_, &boost_scores_)) {
      SHERPA_ONNX_LOGE(
          "Failed to encode some hotwords, skip them already, see logs above "
          "for details.");
    }
    hotwords_graph_ = std::make_shared<ContextGraph>(
        hotwords_, config_.hotwords_score, boost_scores_);
  }

  template <typename Manager>
  void InitHotwords(Manager *mgr) {
    // each line in hotwords_file contains space-separated words

    auto buf = ReadFile(mgr, config_.hotwords_file);

    std::istringstream is(std::string(buf.begin(), buf.end()));

    if (!is) {
      SHERPA_ONNX_LOGE("Open hotwords file failed: '%s'",
                       config_.hotwords_file.c_str());
      exit(-1);
    }

    if (!EncodeHotwords(is, config_.model_config.modeling_unit, symbol_table_,
                        bpe_encoder_.get(), &hotwords_, &boost_scores_)) {
      SHERPA_ONNX_LOGE(
          "Failed to encode some hotwords, skip them already, see logs above "
          "for details.");
    }
    hotwords_graph_ = std::make_shared<ContextGraph>(
        hotwords_, config_.hotwords_score, boost_scores_);
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::vector<std::vector<int32_t>> hotwords_;
  std::vector<float> boost_scores_;
  ContextGraphPtr hotwords_graph_;
  std::unique_ptr<ssentencepiece::Ssentencepiece> bpe_encoder_;
  std::unique_ptr<OfflineTransducerModel> model_;
  std::unique_ptr<OfflineTransducerDecoder> decoder_;
  std::unique_ptr<OfflineLM> lm_;
  int32_t unk_id_ = -1;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_TRANSDUCER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-transducer-nemo-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-transducer-nemo-impl.h
//
// Copyright (c)  2022-2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_TRANSDUCER_NEMO_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_TRANSDUCER_NEMO_IMPL_H_

#include <fstream>
#include <ios>
#include <memory>
#include <regex>  // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-transducer-greedy-search-nemo-decoder.h"
#include "sherpa-onnx/csrc/offline-transducer-modified-beam-search-nemo-decoder.h"
#include "sherpa-onnx/csrc/offline-transducer-nemo-model.h"
#include "sherpa-onnx/csrc/pad-sequence.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/transpose.h"
#include "sherpa-onnx/csrc/utils.h"
#include "ssentencepiece/csrc/ssentencepiece.h"

namespace sherpa_onnx {

// defined in ./offline-recognizer-transducer-impl.h
OfflineRecognitionResult Convert(const OfflineTransducerDecoderResult &src,
                                 const SymbolTable &sym_table,
                                 int32_t frame_shift_ms,
                                 int32_t subsampling_factor);

class OfflineRecognizerTransducerNeMoImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerTransducerNeMoImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineTransducerNeMoModel>(
            config_.model_config)) {
    if (symbol_table_.Contains("<unk>")) {
      unk_id_ = symbol_table_["<unk>"];
    }

    if (config_.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OfflineTransducerGreedySearchNeMoDecoder>(
          model_.get(), config_.blank_penalty, model_->IsTDT());
    } else if (config_.decoding_method == "modified_beam_search") {
      // Initialize BPE encoder if provided
      if (!config_.model_config.bpe_vocab.empty()) {
        bpe_encoder_ = std::make_unique<ssentencepiece::Ssentencepiece>(
            config_.model_config.bpe_vocab);
      }

      // Initialize hotwords if provided
      if (!config_.hotwords_file.empty()) {
        InitHotwords();
      }

      decoder_ =
          std::make_unique<OfflineTransducerModifiedBeamSearchNeMoDecoder>(
              model_.get(), config_.max_active_paths, unk_id_,
              config_.blank_penalty, model_->IsTDT(), config_.hotwords_score);
    } else {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
    PostInit();
  }

  template <typename Manager>
  explicit OfflineRecognizerTransducerNeMoImpl(
      Manager *mgr, const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<OfflineTransducerNeMoModel>(
            mgr, config_.model_config)) {
    if (symbol_table_.Contains("<unk>")) {
      unk_id_ = symbol_table_["<unk>"];
    }

    if (config_.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OfflineTransducerGreedySearchNeMoDecoder>(
          model_.get(), config_.blank_penalty, model_->IsTDT());
    } else if (config_.decoding_method == "modified_beam_search") {
      // Initialize BPE encoder if provided
      if (!config_.model_config.bpe_vocab.empty()) {
        auto buf = ReadFile(mgr, config_.model_config.bpe_vocab);
        std::istringstream iss(std::string(buf.begin(), buf.end()));
        bpe_encoder_ = std::make_unique<ssentencepiece::Ssentencepiece>(iss);
      }

      // Initialize hotwords if provided
      if (!config_.hotwords_file.empty()) {
        InitHotwords(mgr);
      }

      decoder_ =
          std::make_unique<OfflineTransducerModifiedBeamSearchNeMoDecoder>(
              model_.get(), config_.max_active_paths, unk_id_,
              config_.blank_penalty, model_->IsTDT(), config_.hotwords_score);
    } else {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    PostInit();
  }

  std::unique_ptr<OfflineStream> CreateStream(
      const std::string &hotwords) const override {
    auto hws = std::regex_replace(hotwords, std::regex("/"), "\n");
    std::istringstream is(hws);
    std::vector<std::vector<int32_t>> current;
    std::vector<float> current_scores;
    if (!EncodeHotwords(is, config_.model_config.modeling_unit, symbol_table_,
                        bpe_encoder_.get(), &current, &current_scores)) {
      SHERPA_ONNX_LOGE("Encode hotwords failed, skipping, hotwords are : '%s'",
                       hotwords.c_str());
    }

    int32_t num_default_hws = hotwords_.size();
    int32_t num_hws = current.size();

    current.insert(current.end(), hotwords_.begin(), hotwords_.end());

    if (!current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else if (!current_scores.empty() && boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_default_hws,
                            config_.hotwords_score);
    } else if (current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_hws,
                            config_.hotwords_score);
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else {
      // Do nothing.
    }

    auto context_graph = std::make_shared<ContextGraph>(
        current, config_.hotwords_score, current_scores);
    return std::make_unique<OfflineStream>(config_.feat_config, context_graph);
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config,
                                           hotwords_graph_);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = ss[0]->FeatureDim();

    std::vector<Ort::Value> features;

    features.reserve(n);

    std::vector<std::vector<float>> features_vec(n);
    std::vector<int64_t> features_length_vec(n);
    for (int32_t i = 0; i != n; ++i) {
      auto f = ss[i]->GetFrames();
      int32_t num_frames = f.size() / feat_dim;

      features_length_vec[i] = num_frames;
      features_vec[i] = std::move(f);

      std::array<int64_t, 2> shape = {num_frames, feat_dim};

      Ort::Value x = Ort::Value::CreateTensor(
          memory_info, features_vec[i].data(), features_vec[i].size(),
          shape.data(), shape.size());
      features.push_back(std::move(x));
    }

    std::vector<const Ort::Value *> features_pointer(n);
    for (int32_t i = 0; i != n; ++i) {
      features_pointer[i] = &features[i];
    }

    std::array<int64_t, 1> features_length_shape = {n};
    Ort::Value x_length = Ort::Value::CreateTensor(
        memory_info, features_length_vec.data(), n,
        features_length_shape.data(), features_length_shape.size());

    Ort::Value x = PadSequence(model_->Allocator(), features_pointer, 0);

    auto t = model_->RunEncoder(std::move(x), std::move(x_length));
    // t[0] encoder_out, float tensor, (batch_size, dim, T)
    // t[1] encoder_out_length, int64 tensor, (batch_size,)

    Ort::Value encoder_out = Transpose12(model_->Allocator(), &t[0]);

    auto results =
        decoder_->Decode(std::move(encoder_out), std::move(t[1]), ss, n);

    int32_t frame_shift_ms = 10;
    for (int32_t i = 0; i != n; ++i) {
      auto r = Convert(results[i], symbol_table_, frame_shift_ms,
                       model_->SubsamplingFactor());

      // Remove leading space from BPE tokenization
      if (!r.text.empty() && r.text.front() == ' ') {
        r.text.erase(0, 1);
      }

      r.text = ApplyInverseTextNormalization(std::move(r.text));
      r.text = ApplyHomophoneReplacer(std::move(r.text));

      ss[i]->SetResult(r);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void PostInit() {
    int32_t feat_dim = model_->FeatureDim();

    if (feat_dim > 0) {
      config_.feat_config.feature_dim = feat_dim;
    }

    config_.feat_config.nemo_normalize_type =
        model_->FeatureNormalizationMethod();

    if (model_->IsGigaAM()) {
      config_.feat_config.low_freq = 0;
      config_.feat_config.high_freq = 8000;
      config_.feat_config.remove_dc_offset = false;
      config_.feat_config.preemph_coeff = 0;
      config_.feat_config.window_type = "hann";
      config_.feat_config.feature_dim = 64;

      // see
      // https://github.com/salute-developers/GigaAM/blob/main/gigaam/preprocess.py#L68
      //
      // GigaAM uses n_fft 400
      config_.feat_config.round_to_power_of_two = false;
    } else {
      config_.feat_config.low_freq = 0;
      // config_.feat_config.high_freq = 8000;
      config_.feat_config.is_librosa = true;
      config_.feat_config.remove_dc_offset = false;
      // config_.feat_config.window_type = "hann";
    }

    int32_t vocab_size = model_->VocabSize();

    // check the blank ID
    if (!symbol_table_.Contains("<blk>")) {
      SHERPA_ONNX_LOGE("tokens.txt does not include the blank token <blk>");
      SHERPA_ONNX_EXIT(-1);
    }

    if (symbol_table_["<blk>"] != vocab_size - 1) {
      SHERPA_ONNX_LOGE("<blk> is not the last token!");
      SHERPA_ONNX_EXIT(-1);
    }

    if (symbol_table_.NumSymbols() != vocab_size) {
      SHERPA_ONNX_LOGE("number of lines in tokens.txt %d != %d (vocab_size)",
                       symbol_table_.NumSymbols(), vocab_size);
      SHERPA_ONNX_EXIT(-1);
    }
  }

  void InitHotwords() {
    // each line in hotwords_file contains space-separated words

    std::ifstream is(config_.hotwords_file);
    if (!is) {
      SHERPA_ONNX_LOGE("Open hotwords file failed: '%s'",
                       config_.hotwords_file.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (!EncodeHotwords(is, config_.model_config.modeling_unit, symbol_table_,
                        bpe_encoder_.get(), &hotwords_, &boost_scores_)) {
      SHERPA_ONNX_LOGE(
          "Some hotwords failed to encode and were skipped. See above for "
          "details.");
    }
    hotwords_graph_ = std::make_shared<ContextGraph>(
        hotwords_, config_.hotwords_score, boost_scores_);
  }

  template <typename Manager>
  void InitHotwords(Manager *mgr) {
    // each line in hotwords_file contains space-separated words

    auto buf = ReadFile(mgr, config_.hotwords_file);

    std::istringstream is(std::string(buf.begin(), buf.end()));

    if (!EncodeHotwords(is, config_.model_config.modeling_unit, symbol_table_,
                        bpe_encoder_.get(), &hotwords_, &boost_scores_)) {
      SHERPA_ONNX_LOGE(
          "Some hotwords failed to encode and were skipped. See above for "
          "details.");
    }
    hotwords_graph_ = std::make_shared<ContextGraph>(
        hotwords_, config_.hotwords_score, boost_scores_);
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::vector<std::vector<int32_t>> hotwords_;
  std::vector<float> boost_scores_;
  ContextGraphPtr hotwords_graph_;
  std::unique_ptr<ssentencepiece::Ssentencepiece> bpe_encoder_;
  std::unique_ptr<OfflineTransducerNeMoModel> model_;
  std::unique_ptr<OfflineTransducerDecoder> decoder_;
  int32_t unk_id_ = -1;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_TRANSDUCER_NEMO_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-whisper-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-whisper-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_WHISPER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_WHISPER_IMPL_H_

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-whisper-decoder.h"
#include "sherpa-onnx/csrc/offline-whisper-dtw.h"
#include "sherpa-onnx/csrc/offline-whisper-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-whisper-model.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerWhisperImpl(const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineWhisperModel>(config.model_config)) {
    Init();
  }

  template <typename Manager>
  OfflineRecognizerWhisperImpl(Manager *mgr,
                               const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(
            std::make_unique<OfflineWhisperModel>(mgr, config.model_config)) {
    Init();
  }

  void Init() {
    // tokens.txt from whisper is base64 encoded, so we need to decode it
    symbol_table_.ApplyBase64Decode();

    if (config_.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OfflineWhisperGreedySearchDecoder>(
          config_.model_config.whisper, model_.get());
    } else {
      SHERPA_ONNX_LOGE(
          "Only greedy_search is supported at present for whisper. Given %s",
          config_.decoding_method.c_str());
      exit(-1);
    }
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    WhisperTag tag;
    tag.dim = model_->FeatureDim();
    return std::make_unique<OfflineStream>(tag);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    // batch decoding is not implemented yet
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  void SetConfig(const OfflineRecognizerConfig &config) override {
    config_.model_config.whisper = config.model_config.whisper;
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void DecodeStream(OfflineStream *s) const {
    decoder_->SetConfig(config_.model_config.whisper);

    int32_t max_num_frames = 3000;
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = s->FeatureDim();
    std::vector<float> f = s->GetFrames();
    int32_t num_frames = f.size() / feat_dim;

    // we use 50 here so that there will be some zero tail paddings
    if (num_frames >= max_num_frames - 50) {
      SHERPA_ONNX_LOGE(
          "Only waves less than 30 seconds are supported. We process only the "
          "first 30 seconds and discard the remaining data");
      num_frames = max_num_frames - 50;
    }

    model_->NormalizeFeatures(f.data(), num_frames, feat_dim);

    // note that 1000 is an experience-value.
    // You can replace 1000 by other values, say, 100.
    //
    // Since we have removed the 30 seconds constraint, we need
    // tail_padding_frames so that whisper is able to detect the eot token.
    int32_t tail_padding_frames = 1000;

    if (config_.model_config.whisper.tail_paddings > 0) {
      tail_padding_frames = config_.model_config.whisper.tail_paddings;
    }

    int32_t actual_frames =
        std::min(num_frames + tail_padding_frames, max_num_frames);

    std::array<int64_t, 3> shape{1, actual_frames, feat_dim};

    Ort::Value mel = Ort::Value::CreateTensor<float>(
        model_->Allocator(), shape.data(), shape.size());

    float *p_mel = mel.GetTensorMutableData<float>();
    std::copy(f.data(), f.data() + num_frames * feat_dim, p_mel);

    std::fill_n(p_mel + num_frames * feat_dim,
                (actual_frames - num_frames) * feat_dim, 0);

    mel = Transpose12(model_->Allocator(), &mel);

    try {
      auto cross_kv = model_->ForwardEncoder(std::move(mel));

      auto results = decoder_->Decode(std::move(cross_kv.first),
                                      std::move(cross_kv.second), num_frames);

      auto r = Convert(results[0], symbol_table_);
      s->SetResult(r);
    } catch (const Ort::Exception &ex) {
      SHERPA_ONNX_LOGE(
          "\n\nCaught exception:\n\n%s\n\nReturn an empty result. Number of "
          "input frames: %d, Current tail "
          "paddings: %d. If you see a lot of such exceptions, please consider "
          "using a larger --whisper-tail-paddings",
          ex.what(), num_frames, tail_padding_frames);
      return;
    }
  }

 private:
  OfflineRecognitionResult Convert(const OfflineWhisperDecoderResult &src,
                                   const SymbolTable &sym_table) const {
    OfflineRecognitionResult r;
    r.tokens.reserve(src.tokens.size());

    std::string text;

    // Get timestamp begin token ID to filter out timestamp tokens
    int32_t timestamp_begin = model_->TimestampBegin();
    bool enable_segment_timestamps =
        config_.model_config.whisper.enable_segment_timestamps;

    // Build text, skipping timestamp tokens if in segment timestamp mode
    for (auto i : src.tokens) {
      // Skip timestamp tokens (they are >= timestamp_begin)
      if (enable_segment_timestamps && i >= timestamp_begin) {
        continue;
      }

      if (!sym_table.Contains(i)) {
        continue;
      }

      std::string s = sym_table[i];
      s = ApplyInverseTextNormalization(s);
      s = ApplyHomophoneReplacer(std::move(s));

      text += s;
      r.tokens.push_back(s);
    }

    r.text = text;
    r.lang = src.lang;

    // Convert segments from segment timestamp mode to parallel vectors
    if (enable_segment_timestamps && !src.segments.empty()) {
      r.segment_timestamps.reserve(src.segments.size());
      r.segment_durations.reserve(src.segments.size());
      r.segment_texts.reserve(src.segments.size());

      // Total audio duration for fallback when segment has no explicit end time
      float total_audio_duration = src.num_audio_frames * 0.02f;

      for (const auto &seg : src.segments) {
        r.segment_timestamps.push_back(seg.start_time);
        // Use remaining audio duration if end_time is sentinel (-1.0f)
        float duration = (seg.end_time == -1.0f)
                             ? (total_audio_duration - seg.start_time)
                             : (seg.end_time - seg.start_time);
        // Clamp to non-negative to handle rounding/model quirks
        duration = std::max(0.0f, duration);
        r.segment_durations.push_back(duration);

        // Convert token IDs to text
        std::string seg_text;
        for (int32_t tok_id : seg.token_ids) {
          if (sym_table.Contains(tok_id)) {
            std::string s = sym_table[tok_id];
            s = ApplyInverseTextNormalization(s);
            s = ApplyHomophoneReplacer(std::move(s));
            seg_text += s;
          }
        }
        r.segment_texts.push_back(std::move(seg_text));
      }
    }

    // Compute token-level timestamps using DTW if enabled
    if (config_.model_config.whisper.enable_token_timestamps &&
        !src.attention_weights.empty() &&
        !r.tokens.empty()) {
      ComputeTimestamps(src, r);
    }

    return r;
  }

  // Compute token-level timestamps using cross-attention DTW
  void ComputeTimestamps(const OfflineWhisperDecoderResult &src,
                         OfflineRecognitionResult &r) const {
    WhisperDTW dtw;

    // Note: src.attention includes all tokens (initial + decoded)
    // The first few are SOT sequence tokens which DTW will skip.
    // Initial tokens are: [sot, lang, task, no_timestamps] for multilingual,
    // or [sot, no_timestamps] for English-only models.
    int32_t sot_sequence_length =
        static_cast<int32_t>(model_->GetInitialTokens().size());

    // Use ComputeTokenTimings which extracts both start times and durations
    // directly from the DTW jump_times, following OpenAI's approach:
    //   start_times[i] = jump_times[i]
    //   end_times[i] = jump_times[i+1]
    //   durations[i] = end_times[i] - start_times[i]
    // Pass timestamp_token_indices to filter out timestamp tokens from DTW
    // (needed when enable_segment_timestamps=true to avoid alignment issues)
    TokenTimingResult timing = dtw.ComputeTokenTimings(
        src.attention_weights.data(), src.attention_n_heads,
        src.attention_n_tokens, src.attention_n_frames, src.num_audio_frames,
        sot_sequence_length, static_cast<int32_t>(r.tokens.size()),
        src.timestamp_token_indices);

    // Populate timestamps and durations
    r.timestamps = std::move(timing.start_times);
    r.durations = std::move(timing.durations);

    // Ensure vectors match token count
    if (r.timestamps.size() != r.tokens.size()) {
      SHERPA_ONNX_LOGE(
          "DTW returned %zu timestamps for %zu tokens, padding/truncating",
          r.timestamps.size(), r.tokens.size());
    }
    float fill_time = r.timestamps.empty() ? 0.0f : r.timestamps.back();
    r.timestamps.resize(r.tokens.size(), fill_time);
    r.durations.resize(r.tokens.size(), 0.0f);

    // Clamp token end times to segment boundaries (like OpenAI timing.py)
    // If a token ends more than 0.5s after segment end, truncate it.
    // This prevents DTW-derived timings from extending past segment bounds.
    if (!src.segments.empty() && !r.timestamps.empty()) {
      float segment_end = src.segments.back().end_time;
      if (segment_end > 0) {
        for (size_t i = 0; i < r.timestamps.size(); ++i) {
          float token_end = r.timestamps[i] + r.durations[i];
          // Like OpenAI: if token_end > segment_end + 0.5, clamp it
          if (token_end > segment_end + 0.5f) {
            r.durations[i] = std::max(0.0f, segment_end - r.timestamps[i]);
          }
        }
      }
    }
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineWhisperModel> model_;
  std::unique_ptr<OfflineWhisperDecoder> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_WHISPER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer-whisper-tpl-impl.h
================================================
// sherpa-onnx/csrc/offline-recognizer-whisper-tpl-impl.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_WHISPER_TPL_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_WHISPER_TPL_IMPL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

template <typename WhisperModel>
class OfflineRecognizerWhisperTplImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerWhisperTplImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<WhisperModel>(config.model_config)) {
    Init();
  }

  template <typename Manager>
  OfflineRecognizerWhisperTplImpl(Manager *mgr,
                                  const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<WhisperModel>(mgr, config.model_config)) {
    Init();
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    WhisperTag tag;
    tag.dim = model_->FeatureDim();
    return std::make_unique<OfflineStream>(tag);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    // batch decoding is not implemented yet
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  void SetConfig(const OfflineRecognizerConfig &config) override {
    config_.model_config.whisper = config.model_config.whisper;
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  void Init() {
    // tokens.txt from whisper is base64 encoded, so we need to decode it
    symbol_table_.ApplyBase64Decode();

    if (config_.decoding_method == "greedy_search") {
      SHERPA_ONNX_LOGE("use greedy_search");
    } else {
      SHERPA_ONNX_LOGE(
          "Only greedy_search is supported at present for whisper. Given '%s'",
          config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  void DecodeStream(OfflineStream *s) const {
    int32_t feat_dim = s->FeatureDim();
    std::vector<float> f = s->GetFrames();
    int32_t num_frames = f.size() / feat_dim;

    NormalizeWhisperFeatures(f.data(), num_frames, feat_dim);

    auto r = model_->Run(std::move(f));
    auto res = Convert(r, symbol_table_);

    s->SetResult(res);
  }

  OfflineRecognitionResult Convert(const OfflineWhisperDecoderResult &src,
                                   const SymbolTable &sym_table) const {
    OfflineRecognitionResult r;
    r.tokens.reserve(src.tokens.size());

    std::string text;
    for (auto i : src.tokens) {
      if (!sym_table.Contains(i)) {
        continue;
      }

      std::string s = sym_table[i];
      s = ApplyInverseTextNormalization(s);
      s = ApplyHomophoneReplacer(std::move(s));

      text += s;
      r.tokens.push_back(s);
    }

    r.text = text;
    r.lang = src.lang;

    return r;
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<WhisperModel> model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_WHISPER_TPL_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-recognizer.cc
================================================
// sherpa-onnx/csrc/offline-recognizer.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-recognizer.h"

#include <memory>
#include <string>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-lm-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OfflineRecognizerConfig::Register(ParseOptions *po) {
  feat_config.Register(po);
  model_config.Register(po);
  lm_config.Register(po);
  ctc_fst_decoder_config.Register(po);
  hr.Register(po);

  po->Register(
      "decoding-method", &decoding_method,
      "decoding method,"
      "Valid values: greedy_search, modified_beam_search. "
      "modified_beam_search is applicable only for transducer models.");

  po->Register("max-active-paths", &max_active_paths,
               "Used only when decoding_method is modified_beam_search");

  po->Register("blank-penalty", &blank_penalty,
               "The penalty applied on blank symbol during decoding. "
               "Note: It is a positive value. "
               "Increasing value will lead to lower deletion at the cost"
               "of higher insertions. "
               "Currently only applicable for transducer models.");

  po->Register(
      "hotwords-file", &hotwords_file,
      "The file containing hotwords, one words/phrases per line, For example: "
      "HELLO WORLD"
      "你好世界");

  po->Register("hotwords-score", &hotwords_score,
               "The bonus score for each token in context word/phrase. "
               "Used only when decoding_method is modified_beam_search");

  po->Register(
      "rule-fsts", &rule_fsts,
      "If not empty, it specifies fsts for inverse text normalization. "
      "If there are multiple fsts, they are separated by a comma.");

  po->Register(
      "rule-fars", &rule_fars,
      "If not empty, it specifies fst archives for inverse text normalization. "
      "If there are multiple archives, they are separated by a comma.");
}

bool OfflineRecognizerConfig::Validate() const {
  if (decoding_method == "modified_beam_search" && !lm_config.model.empty()) {
    if (max_active_paths <= 0) {
      SHERPA_ONNX_LOGE("max_active_paths is less than 0! Given: %d",
                       max_active_paths);
      return false;
    }
    if (!lm_config.Validate()) {
      return false;
    }
  }

  if (!hotwords_file.empty() && decoding_method != "modified_beam_search") {
    SHERPA_ONNX_LOGE(
        "Please use --decoding-method=modified_beam_search if you"
        " provide --hotwords-file. Given --decoding-method='%s'",
        decoding_method.c_str());
    return false;
  }

  if (!ctc_fst_decoder_config.graph.empty() &&
      !ctc_fst_decoder_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in fst_decoder");
    return false;
  }

  if (!hotwords_file.empty() && !FileExists(hotwords_file)) {
    SHERPA_ONNX_LOGE("--hotwords-file: '%s' does not exist",
                     hotwords_file.c_str());
    return false;
  }

  if (!rule_fsts.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(rule_fsts, ",", false, &files);
    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE("Rule fst '%s' does not exist. ", f.c_str());
        return false;
      }
    }
  }

  if (!rule_fars.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(rule_fars, ",", false, &files);
    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE("Rule far '%s' does not exist. ", f.c_str());
        return false;
      }
    }
  }

  if (!hr.lexicon.empty() && !hr.rule_fsts.empty() && !hr.Validate()) {
    return false;
  }

  return model_config.Validate();
}

std::string OfflineRecognizerConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineRecognizerConfig(";
  os << "feat_config=" << feat_config.ToString() << ", ";
  os << "model_config=" << model_config.ToString() << ", ";
  os << "lm_config=" << lm_config.ToString() << ", ";
  os << "ctc_fst_decoder_config=" << ctc_fst_decoder_config.ToString() << ", ";

  os << "decoding_method=\"" << decoding_method << "\", ";
  os << "max_active_paths=" << max_active_paths << ", ";
  os << "hotwords_file=\"" << hotwords_file << "\", ";
  os << "hotwords_score=" << hotwords_score << ", ";
  os << "blank_penalty=" << blank_penalty << ", ";
  os << "rule_fsts=\"" << rule_fsts << "\", ";
  os << "rule_fars=\"" << rule_fars << "\", ";
  os << "hr=" << hr.ToString() << ")";

  return os.str();
}

template <typename Manager>
OfflineRecognizer::OfflineRecognizer(Manager *mgr,
                                     const OfflineRecognizerConfig &config)
    : impl_(OfflineRecognizerImpl::Create(mgr, config)) {}

OfflineRecognizer::OfflineRecognizer(const OfflineRecognizerConfig &config)
    : impl_(OfflineRecognizerImpl::Create(config)) {}

OfflineRecognizer::~OfflineRecognizer() = default;

std::unique_ptr<OfflineStream> OfflineRecognizer::CreateStream(
    const std::string &hotwords) const {
  return impl_->CreateStream(hotwords);
}

std::unique_ptr<OfflineStream> OfflineRecognizer::CreateStream() const {
  return impl_->CreateStream();
}

void OfflineRecognizer::DecodeStreams(OfflineStream **ss, int32_t n) const {
  impl_->DecodeStreams(ss, n);
}

void OfflineRecognizer::SetConfig(const OfflineRecognizerConfig &config) {
  impl_->SetConfig(config);
}

OfflineRecognizerConfig OfflineRecognizer::GetConfig() const {
  return impl_->GetConfig();
}

#if __ANDROID_API__ >= 9
template OfflineRecognizer::OfflineRecognizer(
    AAssetManager *mgr, const OfflineRecognizerConfig &config);
#endif

#if __OHOS__
template OfflineRecognizer::OfflineRecognizer(
    NativeResourceManager *mgr, const OfflineRecognizerConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-recognizer.h
================================================
// sherpa-onnx/csrc/offline-recognizer.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/features.h"
#include "sherpa-onnx/csrc/homophone-replacer.h"
#include "sherpa-onnx/csrc/offline-ctc-fst-decoder-config.h"
#include "sherpa-onnx/csrc/offline-lm-config.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-stream.h"
#include "sherpa-onnx/csrc/offline-transducer-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineRecognitionResult;

struct OfflineRecognizerConfig {
  FeatureExtractorConfig feat_config;
  OfflineModelConfig model_config;
  OfflineLMConfig lm_config;
  OfflineCtcFstDecoderConfig ctc_fst_decoder_config;

  std::string decoding_method = "greedy_search";
  int32_t max_active_paths = 4;

  std::string hotwords_file;
  float hotwords_score = 1.5;

  float blank_penalty = 0.0;

  // If there are multiple rules, they are applied from left to right.
  std::string rule_fsts;

  // If there are multiple FST archives, they are applied from left to right.
  std::string rule_fars;
  HomophoneReplacerConfig hr;

  // only greedy_search is implemented
  // TODO(fangjun): Implement modified_beam_search

  OfflineRecognizerConfig() = default;
  OfflineRecognizerConfig(
      const FeatureExtractorConfig &feat_config,
      const OfflineModelConfig &model_config, const OfflineLMConfig &lm_config,
      const OfflineCtcFstDecoderConfig &ctc_fst_decoder_config,
      const std::string &decoding_method, int32_t max_active_paths,
      const std::string &hotwords_file, float hotwords_score,
      float blank_penalty, const std::string &rule_fsts,
      const std::string &rule_fars, const HomophoneReplacerConfig &hr)
      : feat_config(feat_config),
        model_config(model_config),
        lm_config(lm_config),
        ctc_fst_decoder_config(ctc_fst_decoder_config),
        decoding_method(decoding_method),
        max_active_paths(max_active_paths),
        hotwords_file(hotwords_file),
        hotwords_score(hotwords_score),
        blank_penalty(blank_penalty),
        rule_fsts(rule_fsts),
        rule_fars(rule_fars),
        hr(hr) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

class OfflineRecognizerImpl;

class OfflineRecognizer {
 public:
  ~OfflineRecognizer();

  template <typename Manager>
  OfflineRecognizer(Manager *mgr, const OfflineRecognizerConfig &config);

  explicit OfflineRecognizer(const OfflineRecognizerConfig &config);

  /// Create a stream for decoding.
  std::unique_ptr<OfflineStream> CreateStream() const;

  /** Create a stream for decoding.
   *
   *  @param The hotwords for this string, it might contain several hotwords,
   *         the hotwords are separated by "/". In each of the hotwords, there
   *         are cjkchars or bpes, the bpe/cjkchar are separated by space (" ").
   *         For example, hotwords I LOVE YOU and HELLO WORLD, looks like:
   *
   *         "▁I ▁LOVE ▁YOU/▁HE LL O ▁WORLD"
   */
  std::unique_ptr<OfflineStream> CreateStream(
      const std::string &hotwords) const;

  /** Decode a single stream
   *
   * @param s The stream to decode.
   */
  void DecodeStream(OfflineStream *s) const {
    OfflineStream *ss[1] = {s};
    DecodeStreams(ss, 1);
  }

  /** Decode a list of streams.
   *
   * @param ss Pointer to an array of streams.
   * @param n  Size of the input array.
   */
  void DecodeStreams(OfflineStream **ss, int32_t n) const;

  /** Onnxruntime Session objects are not affected by this method.
   * The exact behavior can be defined by a specific recognizer impl.
   * For instance, for the whisper recognizer, you can retrieve the language and
   * task from the config and ignore any remaining fields in `config`.
   */
  void SetConfig(const OfflineRecognizerConfig &config);

  OfflineRecognizerConfig GetConfig() const;

 private:
  std::unique_ptr<OfflineRecognizerImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RECOGNIZER_H_


================================================
FILE: sherpa-onnx/csrc/offline-rnn-lm.cc
================================================
// sherpa-onnx/csrc/offline-rnn-lm.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-rnn-lm.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineRnnLM::Impl {
 public:
  explicit Impl(const OfflineLMConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_{GetSessionOptions(config)},
        allocator_{} {
    auto buf = ReadFile(config_.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineLMConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_{GetSessionOptions(config)},
        allocator_{} {
    auto buf = ReadFile(mgr, config_.model);
    Init(buf.data(), buf.size());
  }

  Ort::Value Rescore(Ort::Value x, Ort::Value x_lens) {
    std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    return std::move(out[0]);
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
  }

 private:
  OfflineLMConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;
};

OfflineRnnLM::OfflineRnnLM(const OfflineLMConfig &config)
    : impl_(std::make_unique<Impl>(config)), OfflineLM(config) {}

template <typename Manager>
OfflineRnnLM::OfflineRnnLM(Manager *mgr, const OfflineLMConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)), OfflineLM(config) {}

OfflineRnnLM::~OfflineRnnLM() = default;

Ort::Value OfflineRnnLM::Rescore(Ort::Value x, Ort::Value x_lens) {
  return impl_->Rescore(std::move(x), std::move(x_lens));
}

#if __ANDROID_API__ >= 9
template OfflineRnnLM::OfflineRnnLM(AAssetManager *mgr,
                                    const OfflineLMConfig &config);
#endif

#if __OHOS__
template OfflineRnnLM::OfflineRnnLM(NativeResourceManager *mgr,
                                    const OfflineLMConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-rnn-lm.h
================================================
// sherpa-onnx/csrc/offline-rnn-lm.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_RNN_LM_H_
#define SHERPA_ONNX_CSRC_OFFLINE_RNN_LM_H_

#include <memory>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-lm-config.h"
#include "sherpa-onnx/csrc/offline-lm.h"

namespace sherpa_onnx {

class OfflineRnnLM : public OfflineLM {
 public:
  ~OfflineRnnLM() override;

  explicit OfflineRnnLM(const OfflineLMConfig &config);

  template <typename Manager>
  OfflineRnnLM(Manager *mgr, const OfflineLMConfig &config);

  /** Rescore a batch of sentences.
   *
   * @param x A 2-D tensor of shape (N, L) with data type int64.
   * @param x_lens A 1-D tensor of shape (N,) with data type int64.
   *               It contains number of valid tokens in x before padding.
   * @return Return a 1-D tensor of shape (N,) containing the log likelihood
   *         of each utterance. Its data type is float32.
   *
   * Caution: It returns log likelihood, not negative log likelihood (nll).
   */
  Ort::Value Rescore(Ort::Value x, Ort::Value x_lens) override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_RNN_LM_H_


================================================
FILE: sherpa-onnx/csrc/offline-sense-voice-model-config.cc
================================================
// sherpa-onnx/csrc/offline-sense-voice-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-sense-voice-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OfflineSenseVoiceModelConfig::Register(ParseOptions *po) {
  po->Register("sense-voice-model", &model,
               "Path to model.onnx of SenseVoice.");
  po->Register(
      "sense-voice-language", &language,
      "Valid values: auto, zh, en, ja, ko, yue. If left empty, auto is used");
  po->Register(
      "sense-voice-use-itn", &use_itn,
      "True to enable inverse text normalization. False to disable it.");

  std::string prefix = "sense-voice";
  ParseOptions p(prefix, po);

  qnn_config.Register(&p);
}

bool OfflineSenseVoiceModelConfig::Validate() const {
  if (qnn_config.context_binary.empty()) {
    if (model.empty()) {
      SHERPA_ONNX_LOGE("Please provide a senseVoice model");
      return false;
    }

    if (!FileExists(model)) {
      SHERPA_ONNX_LOGE("SenseVoice model '%s' does not exist", model.c_str());
      return false;
    }
  }

  if (!language.empty()) {
    if (language != "auto" && language != "zh" && language != "en" &&
        language != "ja" && language != "ko" && language != "yue") {
      SHERPA_ONNX_LOGE(
          "Invalid sense-voice-language: '%s'. Valid values are: auto, zh, en, "
          "ja, ko, yue. Or you can leave it empty to use 'auto'",
          language.c_str());

      return false;
    }
  }

  if (model.empty() && !qnn_config.context_binary.empty()) {
    // we require that the context_binary exists
    if (!FileExists(qnn_config.context_binary)) {
      SHERPA_ONNX_LOGE(
          "Model is empty, but you provide a context binary that does not "
          "exist");
      return false;
    }
  }

  if (EndsWith(model, ".so") || EndsWith(model, ".bin") ||
      (model.empty() && !qnn_config.context_binary.empty())) {
    return qnn_config.Validate();
  }

  return true;
}

std::string OfflineSenseVoiceModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSenseVoiceModelConfig(";
  os << "model=\"" << model << "\", ";

  if (!qnn_config.backend_lib.empty()) {
    os << "qnn_config=" << qnn_config.ToString() << ", ";
  }

  os << "language=\"" << language << "\", ";
  os << "use_itn=" << (use_itn ? "True" : "False") << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-sense-voice-model-config.h
================================================
// sherpa-onnx/csrc/offline-sense-voice-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/qnn-config.h"

namespace sherpa_onnx {

struct OfflineSenseVoiceModelConfig {
  std::string model;

  // "" or "auto" to let the model recognize the language
  // valid values:
  //  zh, en, ja, ko, yue, auto
  std::string language = "auto";

  // true to use inverse text normalization
  // false to not use inverse text normalization
  bool use_itn = false;

  QnnConfig qnn_config;

  OfflineSenseVoiceModelConfig() = default;
  OfflineSenseVoiceModelConfig(const std::string &model,
                               const std::string &language, bool use_itn)
      : model(model), language(language), use_itn(use_itn) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_META_DATA_H_

#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

struct OfflineSenseVoiceModelMetaData {
  // ID for using inverse text normalization
  int32_t with_itn_id = 14;

  // ID for not using inverse text normalization
  int32_t without_itn_id = 15;

  int32_t window_size = 7;   // lfr_m
  int32_t window_shift = 6;  // lfr_n
  int32_t vocab_size = 25055;

  int32_t subsampling_factor = 1;

  // Usually 0 for SenseVoice models.
  // 0 means samples are scaled to [-32768, 32767] before are sent to the
  // feature extractor
  int32_t normalize_samples = 0;

  int32_t blank_id = 0;

  // possible values:
  // zh, en, ja, ko, yue, auto
  // where
  //  zh is Chinese (Mandarin)
  //  en is English
  //  ja is Japanese
  //  ko is Korean
  //  yue is Cantonese
  //  auto is to let the model recognize the language
  std::unordered_map<std::string, int32_t> lang2id{
      {"auto", 0}, {"zh", 3}, {"en", 4}, {"yue", 7}, {"ja", 11}, {"ko", 12},
  };

  std::vector<float> neg_mean;    // not used in rk npu and ascend npu
  std::vector<float> inv_stddev;  // not used in rk npu and ascend npu

  bool is_funasr_nano = false;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-sense-voice-model.cc
================================================
// sherpa-onnx/csrc/offline-sense-voice-model.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-sense-voice-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.sense_voice.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.sense_voice.model);
    Init(buf.data(), buf.size());
  }

  Ort::Value Forward(Ort::Value features, Ort::Value features_length,
                     Ort::Value language, Ort::Value text_norm) {
    std::array<Ort::Value, 4> inputs = {
        std::move(features),
        std::move(features_length),
        std::move(language),
        std::move(text_norm),
    };

    auto ans =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());
    return std::move(ans[0]);
  }

  Ort::Value Forward(Ort::Value features) {
    auto ans = sess_->Run({}, input_names_ptr_.data(), &features, 1,
                          output_names_ptr_.data(), output_names_ptr_.size());
    return std::move(ans[0]);
  }

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string comment;
    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(comment, "comment");

    meta_data_.is_funasr_nano = Contains(comment, "Nano");

    SHERPA_ONNX_READ_META_DATA(meta_data_.vocab_size, "vocab_size");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.blank_id, "blank_id", 0);

    SHERPA_ONNX_READ_META_DATA(meta_data_.window_size, "lfr_window_size");
    SHERPA_ONNX_READ_META_DATA(meta_data_.window_shift, "lfr_window_shift");
    SHERPA_ONNX_READ_META_DATA(meta_data_.normalize_samples,
                               "normalize_samples");

    if (!meta_data_.is_funasr_nano) {
      SHERPA_ONNX_READ_META_DATA(meta_data_.with_itn_id, "with_itn");

      SHERPA_ONNX_READ_META_DATA(meta_data_.without_itn_id, "without_itn");

      int32_t lang_auto = 0;
      int32_t lang_zh = 0;
      int32_t lang_en = 0;
      int32_t lang_ja = 0;
      int32_t lang_ko = 0;
      int32_t lang_yue = 0;

      SHERPA_ONNX_READ_META_DATA(lang_auto, "lang_auto");
      SHERPA_ONNX_READ_META_DATA(lang_zh, "lang_zh");
      SHERPA_ONNX_READ_META_DATA(lang_en, "lang_en");
      SHERPA_ONNX_READ_META_DATA(lang_ja, "lang_ja");
      SHERPA_ONNX_READ_META_DATA(lang_ko, "lang_ko");
      SHERPA_ONNX_READ_META_DATA(lang_yue, "lang_yue");

      meta_data_.lang2id = {
          {"auto", lang_auto}, {"zh", lang_zh}, {"en", lang_en},
          {"ja", lang_ja},     {"ko", lang_ko}, {"yue", lang_yue},
      };

      SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.neg_mean, "neg_mean");
      SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.inv_stddev, "inv_stddev");
    }
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OfflineSenseVoiceModelMetaData meta_data_;
};

OfflineSenseVoiceModel::OfflineSenseVoiceModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSenseVoiceModel::OfflineSenseVoiceModel(Manager *mgr,
                                               const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineSenseVoiceModel::~OfflineSenseVoiceModel() = default;

Ort::Value OfflineSenseVoiceModel::Forward(Ort::Value features,
                                           Ort::Value features_length,
                                           Ort::Value language,
                                           Ort::Value text_norm) const {
  return impl_->Forward(std::move(features), std::move(features_length),
                        std::move(language), std::move(text_norm));
}

Ort::Value OfflineSenseVoiceModel::Forward(Ort::Value features) const {
  return impl_->Forward(std::move(features));
}

const OfflineSenseVoiceModelMetaData &OfflineSenseVoiceModel::GetModelMetadata()
    const {
  return impl_->GetModelMetadata();
}

OrtAllocator *OfflineSenseVoiceModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineSenseVoiceModel::OfflineSenseVoiceModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineSenseVoiceModel::OfflineSenseVoiceModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-sense-voice-model.h
================================================
// sherpa-onnx/csrc/offline-sense-voice-model.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_H_

#include <memory>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModel {
 public:
  explicit OfflineSenseVoiceModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineSenseVoiceModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineSenseVoiceModel();

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C). It is changed in-place.
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int32_t.
   * @param language A 1-D tensor of shape (N,) with dtype int32_t
   * @param text_norm A 1-D tensor of shape (N,) with dtype int32_t
   *
   * @return Return logits of shape (N, T, C) with dtype float
   *
   * Note: The subsampling factor is 1 for SenseVoice, so there is
   *       no need to output logits_length.
   */
  Ort::Value Forward(Ort::Value features, Ort::Value features_length,
                     Ort::Value language, Ort::Value text_norm) const;

  /** For FunASR-Nano
   *
   * @param features A tensor of shape (1, T, C) with dtype float32
   * @return Return logits of shape (1, T, C) with dtype float32
   */
  Ort::Value Forward(Ort::Value features) const;

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SENSE_VOICE_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-impl.cc
================================================
// sherpa-onnx/csrc/offline-source-separation-impl.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-source-separation-impl.h"

#include <algorithm>
#include <memory>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/offline-source-separation-spleeter-impl.h"
#include "sherpa-onnx/csrc/offline-source-separation-uvr-impl.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

std::unique_ptr<OfflineSourceSeparationImpl>
OfflineSourceSeparationImpl::Create(
    const OfflineSourceSeparationConfig &config) {
  if (!config.model.spleeter.vocals.empty()) {
    return std::make_unique<OfflineSourceSeparationSpleeterImpl>(config);
  }

  if (!config.model.uvr.model.empty()) {
    return std::make_unique<OfflineSourceSeparationUvrImpl>(config);
  }

  SHERPA_ONNX_LOGE("Please provide a separation model!");

  return nullptr;
}

template <typename Manager>
std::unique_ptr<OfflineSourceSeparationImpl>
OfflineSourceSeparationImpl::Create(
    Manager *mgr, const OfflineSourceSeparationConfig &config) {
  if (!config.model.spleeter.vocals.empty()) {
    return std::make_unique<OfflineSourceSeparationSpleeterImpl>(mgr, config);
  }

  if (!config.model.uvr.model.empty()) {
    return std::make_unique<OfflineSourceSeparationUvrImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please provide a separation model!");

  return nullptr;
}

OfflineSourceSeparationInput OfflineSourceSeparationImpl::Resample(
    const OfflineSourceSeparationInput &input, bool debug /*= false*/) const {
  const OfflineSourceSeparationInput *p_input = &input;
  OfflineSourceSeparationInput tmp_input;

  int32_t output_sample_rate = GetOutputSampleRate();

  if (input.sample_rate != output_sample_rate) {
    SHERPA_ONNX_LOGE(
        "Creating a resampler:\n"
        "   in_sample_rate: %d\n"
        "   output_sample_rate: %d\n",
        input.sample_rate, output_sample_rate);

    float min_freq = std::min<int32_t>(input.sample_rate, output_sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    auto resampler =
        std::make_unique<LinearResample>(input.sample_rate, output_sample_rate,
                                         lowpass_cutoff, lowpass_filter_width);

    std::vector<float> s;
    for (const auto &samples : input.samples.data) {
      resampler->Reset();
      resampler->Resample(samples.data(), samples.size(), true, &s);
      tmp_input.samples.data.push_back(std::move(s));
    }

    tmp_input.sample_rate = output_sample_rate;
    p_input = &tmp_input;
  }

  if (p_input->samples.data.size() > 1) {
    if (debug) {
      SHERPA_ONNX_LOGE("input ch1 samples size: %d",
                       static_cast<int32_t>(p_input->samples.data[1].size()));
    }

    if (p_input->samples.data[0].size() != p_input->samples.data[1].size()) {
      SHERPA_ONNX_LOGE("ch0 samples size %d vs ch1 samples size %d",
                       static_cast<int32_t>(p_input->samples.data[0].size()),
                       static_cast<int32_t>(p_input->samples.data[1].size()));

      SHERPA_ONNX_EXIT(-1);
    }
  }

  return *p_input;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflineSourceSeparationImpl>
OfflineSourceSeparationImpl::Create(
    AAssetManager *mgr, const OfflineSourceSeparationConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OfflineSourceSeparationImpl>
OfflineSourceSeparationImpl::Create(
    NativeResourceManager *mgr, const OfflineSourceSeparationConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-impl.h
================================================
// sherpa-onnx/csrc/offline-source-separation-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-source-separation.h"

namespace sherpa_onnx {

class OfflineSourceSeparationImpl {
 public:
  static std::unique_ptr<OfflineSourceSeparationImpl> Create(
      const OfflineSourceSeparationConfig &config);

  template <typename Manager>
  static std::unique_ptr<OfflineSourceSeparationImpl> Create(
      Manager *mgr, const OfflineSourceSeparationConfig &config);

  virtual ~OfflineSourceSeparationImpl() = default;

  virtual OfflineSourceSeparationOutput Process(
      const OfflineSourceSeparationInput &input) const = 0;

  virtual int32_t GetOutputSampleRate() const = 0;

  virtual int32_t GetNumberOfStems() const = 0;

  OfflineSourceSeparationInput Resample(
      const OfflineSourceSeparationInput &input, bool debug = false) const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-model-config.cc
================================================
// sherpa-onnx/csrc/offline-source-separation-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-source-separation-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineSourceSeparationModelConfig::Register(ParseOptions *po) {
  spleeter.Register(po);
  uvr.Register(po);

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool OfflineSourceSeparationModelConfig::Validate() const {
  if (!spleeter.vocals.empty()) {
    return spleeter.Validate();
  }

  if (!uvr.model.empty()) {
    return uvr.Validate();
  }

  SHERPA_ONNX_LOGE("Please specify a source separation model");

  return false;
}

std::string OfflineSourceSeparationModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSourceSeparationModelConfig(";
  os << "spleeter=" << spleeter.ToString() << ", ";
  os << "uvr=" << uvr.ToString() << ", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-model-config.h
================================================
// sherpa-onnx/csrc/offline-source-separation-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h"
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSourceSeparationModelConfig {
  OfflineSourceSeparationSpleeterModelConfig spleeter;
  OfflineSourceSeparationUvrModelConfig uvr;

  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  OfflineSourceSeparationModelConfig() = default;

  OfflineSourceSeparationModelConfig(
      const OfflineSourceSeparationSpleeterModelConfig &spleeter,
      const OfflineSourceSeparationUvrModelConfig &uvr, int32_t num_threads,
      bool debug, const std::string &provider)
      : spleeter(spleeter),
        uvr(uvr),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-spleeter-impl.h
================================================
// sherpa-onnx/csrc/offline-source-separation-spleeter-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_

#include <algorithm>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "kaldi-native-fbank/csrc/istft.h"
#include "kaldi-native-fbank/csrc/stft.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model.h"
#include "sherpa-onnx/csrc/offline-source-separation.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

class OfflineSourceSeparationSpleeterImpl : public OfflineSourceSeparationImpl {
 public:
  explicit OfflineSourceSeparationSpleeterImpl(
      const OfflineSourceSeparationConfig &config)
      : config_(config), model_(config_.model) {}

  template <typename Manager>
  OfflineSourceSeparationSpleeterImpl(
      Manager *mgr, const OfflineSourceSeparationConfig &config)
      : config_(config), model_(mgr, config_.model) {}

  OfflineSourceSeparationOutput Process(
      const OfflineSourceSeparationInput &_input) const override {
    auto input = Resample(_input, config_.model.debug);

    auto stft_ch0 = ComputeStft(input, 0);

    auto stft_ch1 = ComputeStft(input, 1);
    knf::StftResult *p_stft_ch1 = stft_ch1.real.empty() ? &stft_ch0 : &stft_ch1;

    int32_t num_frames = stft_ch0.num_frames;
    int32_t fft_bins = stft_ch0.real.size() / num_frames;

    int32_t pad = 512 - (stft_ch0.num_frames % 512);
    if (pad < 512) {
      num_frames += pad;
    }

    if (num_frames % 512) {
      SHERPA_ONNX_LOGE("num_frames should be multiple of 512, actual: %d. %d",
                       num_frames, num_frames % 512);
      SHERPA_ONNX_EXIT(-1);
    }

    Eigen::VectorXf real(2 * num_frames * 1024);
    Eigen::VectorXf imag(2 * num_frames * 1024);
    real.setZero();
    imag.setZero();

    float *p_real = &real[0];
    float *p_imag = &imag[0];

    // copy stft result of channel 0
    for (int32_t i = 0; i != stft_ch0.num_frames; ++i) {
      std::copy(stft_ch0.real.data() + i * fft_bins,
                stft_ch0.real.data() + i * fft_bins + 1024, p_real + 1024 * i);

      std::copy(stft_ch0.imag.data() + i * fft_bins,
                stft_ch0.imag.data() + i * fft_bins + 1024, p_imag + 1024 * i);
    }

    p_real += num_frames * 1024;
    p_imag += num_frames * 1024;

    // copy stft result of channel 1
    for (int32_t i = 0; i != stft_ch1.num_frames; ++i) {
      std::copy(p_stft_ch1->real.data() + i * fft_bins,
                p_stft_ch1->real.data() + i * fft_bins + 1024,
                p_real + 1024 * i);

      std::copy(p_stft_ch1->imag.data() + i * fft_bins,
                p_stft_ch1->imag.data() + i * fft_bins + 1024,
                p_imag + 1024 * i);
    }

    Eigen::VectorXf x = (real.array().square() + imag.array().square()).sqrt();

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 4> x_shape{2, num_frames / 512, 512, 1024};
    Ort::Value x_tensor = Ort::Value::CreateTensor(
        memory_info, &x[0], x.size(), x_shape.data(), x_shape.size());

    Ort::Value vocals_spec_tensor = model_.RunVocals(View(&x_tensor));
    Ort::Value accompaniment_spec_tensor =
        model_.RunAccompaniment(std::move(x_tensor));

    Eigen::VectorXf vocals_spec = Eigen::Map<Eigen::VectorXf>(
        vocals_spec_tensor.GetTensorMutableData<float>(), x.size());

    Eigen::VectorXf accompaniment_spec = Eigen::Map<Eigen::VectorXf>(
        accompaniment_spec_tensor.GetTensorMutableData<float>(), x.size());

    Eigen::VectorXf sum_spec = vocals_spec.array().square() +
                               accompaniment_spec.array().square() + 1e-10;

    vocals_spec = (vocals_spec.array().square() + 1e-10 / 2) / sum_spec.array();

    accompaniment_spec =
        (accompaniment_spec.array().square() + 1e-10 / 2) / sum_spec.array();

    auto vocals_samples_ch0 = ProcessSpec(vocals_spec, stft_ch0, 0);
    auto vocals_samples_ch1 = ProcessSpec(vocals_spec, *p_stft_ch1, 1);

    auto accompaniment_samples_ch0 =
        ProcessSpec(accompaniment_spec, stft_ch0, 0);
    auto accompaniment_samples_ch1 =
        ProcessSpec(accompaniment_spec, *p_stft_ch1, 1);

    OfflineSourceSeparationOutput ans;
    ans.sample_rate = GetOutputSampleRate();

    ans.stems.resize(2);
    ans.stems[0].data.reserve(2);
    ans.stems[1].data.reserve(2);

    ans.stems[0].data.push_back(std::move(vocals_samples_ch0));
    ans.stems[0].data.push_back(std::move(vocals_samples_ch1));

    ans.stems[1].data.push_back(std::move(accompaniment_samples_ch0));
    ans.stems[1].data.push_back(std::move(accompaniment_samples_ch1));

    return ans;
  }

  int32_t GetOutputSampleRate() const override {
    return model_.GetMetaData().sample_rate;
  }

  int32_t GetNumberOfStems() const override {
    return model_.GetMetaData().num_stems;
  }

 private:
  // spec is of shape (2, num_chunks, 512, 1024)
  std::vector<float> ProcessSpec(const Eigen::VectorXf &spec,
                                 const knf::StftResult &stft,
                                 int32_t channel) const {
    int32_t fft_bins = stft.real.size() / stft.num_frames;

    Eigen::VectorXf mask(stft.real.size());
    mask.setZero();

    float *p_mask = &mask[0];

    // assume there are 2 channels
    const float *p_spec = &spec[0] + (spec.size() / 2) * channel;

    for (int32_t i = 0; i != stft.num_frames; ++i) {
      std::copy(p_spec + i * 1024, p_spec + (i + 1) * 1024,
                p_mask + i * fft_bins);
    }

    knf::StftResult masked_stft;

    masked_stft.num_frames = stft.num_frames;
    masked_stft.real.resize(stft.real.size());
    masked_stft.imag.resize(stft.imag.size());

    Eigen::Map<Eigen::VectorXf>(masked_stft.real.data(),
                                masked_stft.real.size()) =
        mask.array() *
        Eigen::Map<Eigen::VectorXf>(const_cast<float *>(stft.real.data()),
                                    stft.real.size())
            .array();

    Eigen::Map<Eigen::VectorXf>(masked_stft.imag.data(),
                                masked_stft.imag.size()) =
        mask.array() *
        Eigen::Map<Eigen::VectorXf>(const_cast<float *>(stft.imag.data()),
                                    stft.imag.size())
            .array();

    auto stft_config = GetStftConfig();
    knf::IStft istft(stft_config);

    return istft.Compute(masked_stft);
  }

  knf::StftResult ComputeStft(const OfflineSourceSeparationInput &input,
                              int32_t ch) const {
    if (ch >= input.samples.data.size()) {
      SHERPA_ONNX_LOGE("Invalid channel %d. Max %d", ch,
                       static_cast<int32_t>(input.samples.data.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (input.samples.data[ch].empty()) {
      return {};
    }

    return ComputeStft(input.samples.data[ch]);
  }

  knf::StftResult ComputeStft(const std::vector<float> &samples) const {
    auto stft_config = GetStftConfig();
    knf::Stft stft(stft_config);

    return stft.Compute(samples.data(), samples.size());
  }

  knf::StftConfig GetStftConfig() const {
    const auto &meta = model_.GetMetaData();

    knf::StftConfig stft_config;
    stft_config.n_fft = meta.n_fft;
    stft_config.hop_length = meta.hop_length;
    stft_config.win_length = meta.window_length;
    stft_config.window_type = meta.window_type;
    stft_config.center = meta.center;

    return stft_config;
  }

 private:
  OfflineSourceSeparationConfig config_;
  OfflineSourceSeparationSpleeterModel model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.cc
================================================
// sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineSourceSeparationSpleeterModelConfig::Register(ParseOptions *po) {
  po->Register("spleeter-vocals", &vocals, "Path to the spleeter vocals model");

  po->Register("spleeter-accompaniment", &accompaniment,
               "Path to the spleeter accompaniment model");
}

bool OfflineSourceSeparationSpleeterModelConfig::Validate() const {
  if (vocals.empty()) {
    SHERPA_ONNX_LOGE("Please provide --spleeter-vocals");
    return false;
  }

  if (!FileExists(vocals)) {
    SHERPA_ONNX_LOGE("spleeter vocals '%s' does not exist. ", vocals.c_str());
    return false;
  }

  if (accompaniment.empty()) {
    SHERPA_ONNX_LOGE("Please provide --spleeter-accompaniment");
    return false;
  }

  if (!FileExists(accompaniment)) {
    SHERPA_ONNX_LOGE("spleeter accompaniment '%s' does not exist. ",
                     accompaniment.c_str());
    return false;
  }

  return true;
}

std::string OfflineSourceSeparationSpleeterModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSourceSeparationSpleeterModelConfig(";
  os << "vocals=\"" << vocals << "\", ";
  os << "accompaniment=\"" << accompaniment << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h
================================================
// sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSourceSeparationSpleeterModelConfig {
  std::string vocals;

  std::string accompaniment;

  OfflineSourceSeparationSpleeterModelConfig() = default;

  OfflineSourceSeparationSpleeterModelConfig(const std::string &vocals,
                                             const std::string &accompaniment)
      : vocals(vocals), accompaniment(accompaniment) {}

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-spleeter-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-source-separation-spleeter-model-meta-data.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_

#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

// See also
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/spleeter/separate_onnx.py
struct OfflineSourceSeparationSpleeterModelMetaData {
  int32_t sample_rate = 44100;
  int32_t num_stems = 2;

  int32_t n_fft = 4096;
  int32_t hop_length = 1024;
  int32_t window_length = 4096;
  bool center = false;
  std::string window_type = "hann";
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-spleeter-model.cc
================================================
// sherpa-onnx/csrc/offline-source-separation-spleeter-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineSourceSeparationSpleeterModel::Impl {
 public:
  explicit Impl(const OfflineSourceSeparationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.spleeter.vocals);
      InitVocals(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.spleeter.accompaniment);
      InitAccompaniment(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineSourceSeparationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.spleeter.vocals);
      InitVocals(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.spleeter.accompaniment);
      InitAccompaniment(buf.data(), buf.size());
    }
  }

  const OfflineSourceSeparationSpleeterModelMetaData &GetMetaData() const {
    return meta_;
  }

  Ort::Value RunVocals(Ort::Value x) const {
    auto out = vocals_sess_->Run({}, vocals_input_names_ptr_.data(), &x, 1,
                                 vocals_output_names_ptr_.data(),
                                 vocals_output_names_ptr_.size());
    return std::move(out[0]);
  }

  Ort::Value RunAccompaniment(Ort::Value x) const {
    auto out =
        accompaniment_sess_->Run({}, accompaniment_input_names_ptr_.data(), &x,
                                 1, accompaniment_output_names_ptr_.data(),
                                 accompaniment_output_names_ptr_.size());
    return std::move(out[0]);
  }

 private:
  void InitVocals(void *model_data, size_t model_data_length) {
    vocals_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(vocals_sess_.get(), &vocals_input_names_,
                  &vocals_input_names_ptr_);

    GetOutputNames(vocals_sess_.get(), &vocals_output_names_,
                   &vocals_output_names_ptr_);

    Ort::ModelMetadata meta_data = vocals_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---vocals model---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : vocals_input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : vocals_output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
    if (model_type != "spleeter") {
      SHERPA_ONNX_LOGE("Expect model type 'spleeter'. Given: '%s'",
                       model_type.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA(meta_.num_stems, "stems");
    if (meta_.num_stems != 2) {
      SHERPA_ONNX_LOGE("Only 2stems is supported. Given %d stems",
                       meta_.num_stems);
      SHERPA_ONNX_EXIT(-1);
    }
  }

  void InitAccompaniment(void *model_data, size_t model_data_length) {
    accompaniment_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(accompaniment_sess_.get(), &accompaniment_input_names_,
                  &accompaniment_input_names_ptr_);

    GetOutputNames(accompaniment_sess_.get(), &accompaniment_output_names_,
                   &accompaniment_output_names_ptr_);
  }

 private:
  OfflineSourceSeparationModelConfig config_;
  OfflineSourceSeparationSpleeterModelMetaData meta_;

  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> vocals_sess_;

  std::vector<std::string> vocals_input_names_;
  std::vector<const char *> vocals_input_names_ptr_;

  std::vector<std::string> vocals_output_names_;
  std::vector<const char *> vocals_output_names_ptr_;

  std::unique_ptr<Ort::Session> accompaniment_sess_;

  std::vector<std::string> accompaniment_input_names_;
  std::vector<const char *> accompaniment_input_names_ptr_;

  std::vector<std::string> accompaniment_output_names_;
  std::vector<const char *> accompaniment_output_names_ptr_;
};

OfflineSourceSeparationSpleeterModel::~OfflineSourceSeparationSpleeterModel() =
    default;  // NOLINT

OfflineSourceSeparationSpleeterModel::OfflineSourceSeparationSpleeterModel(
    const OfflineSourceSeparationModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSourceSeparationSpleeterModel::OfflineSourceSeparationSpleeterModel(
    Manager *mgr, const OfflineSourceSeparationModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

Ort::Value OfflineSourceSeparationSpleeterModel::RunVocals(Ort::Value x) const {
  return impl_->RunVocals(std::move(x));
}

Ort::Value OfflineSourceSeparationSpleeterModel::RunAccompaniment(
    Ort::Value x) const {
  return impl_->RunAccompaniment(std::move(x));
}

const OfflineSourceSeparationSpleeterModelMetaData &
OfflineSourceSeparationSpleeterModel::GetMetaData() const {
  return impl_->GetMetaData();
}

#if __ANDROID_API__ >= 9
template OfflineSourceSeparationSpleeterModel::
    OfflineSourceSeparationSpleeterModel(
        AAssetManager *mgr, const OfflineSourceSeparationModelConfig &config);
#endif

#if __OHOS__
template OfflineSourceSeparationSpleeterModel::
    OfflineSourceSeparationSpleeterModel(
        NativeResourceManager *mgr,
        const OfflineSourceSeparationModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-spleeter-model.h
================================================
// sherpa-onnx/csrc/offline-source-separation-spleeter-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_H_
#include <memory>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSourceSeparationSpleeterModel {
 public:
  ~OfflineSourceSeparationSpleeterModel();

  explicit OfflineSourceSeparationSpleeterModel(
      const OfflineSourceSeparationModelConfig &config);

  template <typename Manager>
  OfflineSourceSeparationSpleeterModel(
      Manager *mgr, const OfflineSourceSeparationModelConfig &config);

  Ort::Value RunVocals(Ort::Value x) const;
  Ort::Value RunAccompaniment(Ort::Value x) const;

  const OfflineSourceSeparationSpleeterModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-uvr-impl.h
================================================
// sherpa-onnx/csrc/offline-source-separation-uvr-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_

#include <algorithm>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "kaldi-native-fbank/csrc/istft.h"
#include "kaldi-native-fbank/csrc/stft.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model.h"
#include "sherpa-onnx/csrc/offline-source-separation.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

class OfflineSourceSeparationUvrImpl : public OfflineSourceSeparationImpl {
 public:
  explicit OfflineSourceSeparationUvrImpl(
      const OfflineSourceSeparationConfig &config)
      : config_(config), model_(config_.model) {}

  template <typename Manager>
  OfflineSourceSeparationUvrImpl(Manager *mgr,
                                 const OfflineSourceSeparationConfig &config)
      : config_(config), model_(mgr, config_.model) {}

  OfflineSourceSeparationOutput Process(
      const OfflineSourceSeparationInput &_input) const override {
    auto input = Resample(_input, config_.model.debug);

    auto chunks_ch0 = SplitIntoChunks(input.samples.data[0]);

    std::vector<std::vector<float>> chunks_ch1;
    if (input.samples.data.size() > 1) {
      chunks_ch1 = SplitIntoChunks(input.samples.data[1]);
    }

    std::vector<float> samples_ch0;
    std::vector<float> samples_ch1;

    for (int32_t i = 0; i != static_cast<int32_t>(chunks_ch0.size()); ++i) {
      bool is_first_chunk = (i == 0);
      bool is_last_chunk = (i == static_cast<int32_t>(chunks_ch0.size()) - 1);

      auto s = ProcessChunk(
          chunks_ch0[i],
          chunks_ch1.empty() ? std::vector<float>{} : chunks_ch1[i],
          is_first_chunk, is_last_chunk);

      samples_ch0.insert(samples_ch0.end(), s.first.begin(), s.first.end());
      samples_ch1.insert(samples_ch1.end(), s.second.begin(), s.second.end());
    }

    auto &vocals_ch0 = samples_ch0;
    auto &vocals_ch1 = samples_ch1;

    std::vector<float> non_vocals_ch0(vocals_ch0.size());
    std::vector<float> non_vocals_ch1(vocals_ch1.size());

    Eigen::Map<Eigen::VectorXf>(non_vocals_ch0.data(), non_vocals_ch0.size()) =
        Eigen::Map<Eigen::VectorXf>(input.samples.data[0].data(),
                                    input.samples.data[0].size())
            .array() -
        Eigen::Map<Eigen::VectorXf>(vocals_ch0.data(), vocals_ch0.size())
            .array();

    if (input.samples.data.size() > 1) {
      Eigen::Map<Eigen::VectorXf>(non_vocals_ch1.data(),
                                  non_vocals_ch1.size()) =
          Eigen::Map<Eigen::VectorXf>(input.samples.data[1].data(),
                                      input.samples.data[1].size())
              .array() -
          Eigen::Map<Eigen::VectorXf>(vocals_ch1.data(), vocals_ch1.size())
              .array();
    } else {
      Eigen::Map<Eigen::VectorXf>(non_vocals_ch1.data(),
                                  non_vocals_ch1.size()) =
          Eigen::Map<Eigen::VectorXf>(input.samples.data[0].data(),
                                      input.samples.data[0].size())
              .array() -
          Eigen::Map<Eigen::VectorXf>(vocals_ch1.data(), vocals_ch1.size())
              .array();
    }

    OfflineSourceSeparationOutput ans;
    ans.sample_rate = GetOutputSampleRate();

    ans.stems.resize(2);
    ans.stems[0].data.reserve(2);
    ans.stems[1].data.reserve(2);

    ans.stems[0].data.push_back(std::move(vocals_ch0));
    ans.stems[0].data.push_back(std::move(vocals_ch1));

    ans.stems[1].data.push_back(std::move(non_vocals_ch0));
    ans.stems[1].data.push_back(std::move(non_vocals_ch1));

    return ans;
  }

  int32_t GetOutputSampleRate() const override {
    return model_.GetMetaData().sample_rate;
  }

  int32_t GetNumberOfStems() const override {
    return model_.GetMetaData().num_stems;
  }

 private:
  std::pair<std::vector<float>, std::vector<float>> ProcessChunk(
      const std::vector<float> &chunk_ch0, const std::vector<float> &chunk_ch1,
      bool is_first_chunk, bool is_last_chunk) const {
    int32_t pad0 = 0;

    auto stft_results_ch0 = ComputeStft(chunk_ch0, &pad0);

    int32_t pad1 = pad0;
    std::vector<knf::StftResult> stft_results_ch1;

    if (!chunk_ch1.empty()) {
      stft_results_ch1 = ComputeStft(chunk_ch1, &pad1);
    } else {
      stft_results_ch1 = stft_results_ch0;
    }

    const auto &meta_ = model_.GetMetaData();

    int32_t num_frames = stft_results_ch0[0].num_frames;
    int32_t dim_f = meta_.dim_f;
    int32_t dim_t = meta_.dim_t;
    int32_t n_fft_bin = meta_.n_fft / 2 + 1;
    if (num_frames != dim_t) {
      SHERPA_ONNX_LOGE("num_frames(%d) != dim_t(%d)", num_frames, dim_t);
      SHERPA_ONNX_EXIT(-1);
    }

    // the first 2: number of channels
    // the second 2: real and image
    std::vector<float> x(stft_results_ch0.size() * 2 * 2 * dim_f * dim_t);
    float *px = x.data();

    for (int32_t i = 0; i != static_cast<int32_t>(stft_results_ch0.size());
         ++i) {
      const auto &ch0 = stft_results_ch0[i];
      const auto &ch1 = stft_results_ch1[i];

      const float *p_real_ch0 = ch0.real.data();
      const float *p_imag_ch0 = ch0.imag.data();

      const float *p_real_ch1 = ch1.real.data();
      const float *p_imag_ch1 = ch1.imag.data();

      for (int32_t j = 0; j != dim_f; ++j) {
        for (int32_t k = 0; k != num_frames; ++k) {
          *px = p_real_ch0[k * n_fft_bin + j];
          ++px;
        }
      }

      for (int32_t j = 0; j != dim_f; ++j) {
        for (int32_t k = 0; k != num_frames; ++k) {
          *px = p_imag_ch0[k * n_fft_bin + j];
          ++px;
        }
      }

      for (int32_t j = 0; j != dim_f; ++j) {
        for (int32_t k = 0; k != num_frames; ++k) {
          *px = p_real_ch1[k * n_fft_bin + j];
          ++px;
        }
      }

      for (int32_t j = 0; j != dim_f; ++j) {
        for (int32_t k = 0; k != num_frames; ++k) {
          *px = p_imag_ch1[k * n_fft_bin + j];
          ++px;
        }
      }
    }  // for (int32_t i = 0; i !=

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 4> x_shape{
        static_cast<int32_t>(stft_results_ch0.size()) * 4 / meta_.dim_c,
        meta_.dim_c, dim_f, dim_t};

    Ort::Value x_tensor = Ort::Value::CreateTensor(
        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());

    Ort::Value spec = model_.Run(std::move(x_tensor));

    const float *p_spec = spec.GetTensorData<float>();

    for (int32_t i = 0; i != static_cast<int32_t>(stft_results_ch0.size());
         ++i) {
      auto &ch0 = stft_results_ch0[i];
      auto &ch1 = stft_results_ch1[i];

      float *p_real_ch0 = ch0.real.data();
      float *p_imag_ch0 = ch0.imag.data();

      float *p_real_ch1 = ch1.real.data();
      float *p_imag_ch1 = ch1.imag.data();

      for (int32_t j = 0; j != dim_f; ++j) {
        for (int32_t k = 0; k != num_frames; ++k) {
          p_real_ch0[k * n_fft_bin + j] = *p_spec;
          ++p_spec;
        }
      }

      for (int32_t j = 0; j != dim_f; ++j) {
        for (int32_t k = 0; k != num_frames; ++k) {
          p_imag_ch0[k * n_fft_bin + j] = *p_spec;
          ++p_spec;
        }
      }

      for (int32_t j = 0; j != dim_f; ++j) {
        for (int32_t k = 0; k != num_frames; ++k) {
          p_real_ch1[k * n_fft_bin + j] = *p_spec;
          ++p_spec;
        }
      }

      for (int32_t j = 0; j != dim_f; ++j) {
        for (int32_t k = 0; k != num_frames; ++k) {
          p_imag_ch1[k * n_fft_bin + j] = *p_spec;
          ++p_spec;
        }
      }

      for (int32_t k = 0; k != num_frames; ++k) {
        for (int32_t j = dim_f; j != n_fft_bin; ++j) {
          p_real_ch0[k * n_fft_bin + j] = 0;
          p_real_ch1[k * n_fft_bin + j] = 0;

          p_imag_ch0[k * n_fft_bin + j] = 0;
          p_imag_ch1[k * n_fft_bin + j] = 0;
        }
      }
    }

    auto samples_ch0 = ComputeInverseStft(stft_results_ch0, pad0,
                                          is_first_chunk, is_last_chunk);

    auto samples_ch1 = ComputeInverseStft(stft_results_ch1, pad1,
                                          is_first_chunk, is_last_chunk);

    return {std::move(samples_ch0), std::move(samples_ch1)};
  }

  std::vector<float> ComputeInverseStft(
      const std::vector<knf::StftResult> &stft_result, int32_t pad,
      bool is_first_chunk, bool is_last_chunk) const {
    const auto &meta_ = model_.GetMetaData();
    int32_t trim = meta_.n_fft / 2;

    int32_t margin = meta_.margin;

    int32_t chunk_size = meta_.num_chunks * meta_.sample_rate;

    if (margin > chunk_size) {
      margin = chunk_size;
    }

    auto stft_config = GetStftConfig();
    knf::IStft istft(stft_config);

    std::vector<float> ans;

    for (int32_t i = 0; i != static_cast<int32_t>(stft_result.size()); ++i) {
      auto samples = istft.Compute(stft_result[i]);
      int32_t num_samples = static_cast<int32_t>(samples.size());

      ans.insert(ans.end(), samples.begin() + trim,
                 samples.begin() + (num_samples - trim));
    }

    int32_t start = is_first_chunk ? 0 : margin;
    int32_t end =
        is_last_chunk ? (ans.size() - pad) : (ans.size() - pad - margin);

    return {ans.begin() + start, ans.begin() + end};
  }

  std::vector<knf::StftResult> ComputeStft(const std::vector<float> &chunk,
                                           int32_t *pad) const {
    const auto &meta_ = model_.GetMetaData();

    int32_t num_samples = static_cast<int32_t>(chunk.size());
    int32_t trim = meta_.n_fft / 2;
    int32_t chunk_size = meta_.hop_length * (meta_.dim_t - 1);
    int32_t gen_size = chunk_size - 2 * trim;
    *pad = gen_size - num_samples % gen_size;

    std::vector<float> samples(trim + chunk.size() + *pad + trim);
    std::copy(chunk.begin(), chunk.end(), samples.begin() + trim);

    auto stft_config = GetStftConfig();
    knf::Stft stft(stft_config);

    std::vector<knf::StftResult> stft_results;
    // split the chunk into short segments
    for (int32_t i = 0; i < num_samples + *pad; i += gen_size) {
      auto r = stft.Compute(samples.data() + i, chunk_size);
      stft_results.push_back(std::move(r));
    }

    return stft_results;
  }

  std::vector<std::vector<float>> SplitIntoChunks(
      const std::vector<float> &samples) const {
    std::vector<std::vector<float>> ans;

    if (samples.empty()) {
      return ans;
    }

    const auto &meta_ = model_.GetMetaData();
    int32_t margin = meta_.margin;

    int32_t chunk_size = meta_.num_chunks * meta_.sample_rate;

    if (static_cast<int32_t>(samples.size()) < chunk_size) {
      chunk_size = samples.size();
    }

    if (margin > chunk_size) {
      margin = chunk_size;
    }

    for (int32_t i = 0; i < static_cast<int32_t>(samples.size());
         i += chunk_size) {
      int32_t start = std::max<int32_t>(0, i - margin);
      int32_t end = std::min<int32_t>(i + chunk_size + margin,
                                      static_cast<int32_t>(samples.size()));
      if (start >= end) {
        break;
      }

      ans.emplace_back(samples.begin() + start, samples.begin() + end);

      if (end == static_cast<int32_t>(samples.size())) {
        break;
      }
    }

    return ans;
  }

  knf::StftConfig GetStftConfig() const {
    const auto &meta = model_.GetMetaData();

    knf::StftConfig stft_config;
    stft_config.n_fft = meta.n_fft;
    stft_config.hop_length = meta.hop_length;
    stft_config.win_length = meta.window_length;
    stft_config.window_type = meta.window_type;
    stft_config.center = meta.center;

    return stft_config;
  }

 private:
  OfflineSourceSeparationConfig config_;
  OfflineSourceSeparationUvrModel model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-uvr-model-config.cc
================================================
// sherpa-onnx/csrc/offline-source-separation-uvr-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineSourceSeparationUvrModelConfig::Register(ParseOptions *po) {
  po->Register("uvr-model", &model, "Path to the UVR model");
}

bool OfflineSourceSeparationUvrModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --uvr-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("UVR model '%s' does not exist. ", model.c_str());
    return false;
  }

  return true;
}

std::string OfflineSourceSeparationUvrModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSourceSeparationUvrModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h
================================================
// sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSourceSeparationUvrModelConfig {
  std::string model;

  OfflineSourceSeparationUvrModelConfig() = default;

  explicit OfflineSourceSeparationUvrModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_

#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

// See also
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/test.py
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/add_meta_data_and_quantize.py
struct OfflineSourceSeparationUvrModelMetaData {
  int32_t sample_rate = 44100;
  int32_t num_stems = 2;
  int32_t dim_c = -1;
  int32_t dim_f = -1;
  int32_t dim_t = -1;

  int32_t n_fft = -1;
  int32_t hop_length = 1024;

  int32_t window_length = -1;
  int32_t center = 1;
  std::string window_type = "hann";

  // the following fields are preconfigured. Please see
  // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/uvr_mdx/test.py
  int32_t margin = 0;  // changed in ./offline-source-separation-uvr-model.cc
  const int32_t num_chunks = 15;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-uvr-model.cc
================================================
// sherpa-onnx/csrc/offline-source-separation-uvr-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-source-separation-uvr-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineSourceSeparationUvrModel::Impl {
 public:
  explicit Impl(const OfflineSourceSeparationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config.uvr.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineSourceSeparationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config.uvr.model);
    Init(buf.data(), buf.size());
  }

  const OfflineSourceSeparationUvrModelMetaData &GetMetaData() const {
    return meta_;
  }

  Ort::Value Run(Ort::Value x) const {
    auto out = sess_->Run({}, input_names_ptr_.data(), &x, 1,
                          output_names_ptr_.data(), output_names_ptr_.size());
    return std::move(out[0]);
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---UVR model---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
    if (model_type != "UVR") {
      SHERPA_ONNX_LOGE("Expect model type 'UVR'. Given: '%s'",
                       model_type.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA(meta_.num_stems, "stems");
    if (meta_.num_stems != 2) {
      SHERPA_ONNX_LOGE("Only 2stems is supported. Given %d stems",
                       meta_.num_stems);
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA(meta_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft");
    SHERPA_ONNX_READ_META_DATA(meta_.center, "center");
    SHERPA_ONNX_READ_META_DATA(meta_.window_length, "win_length");
    SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length");
    SHERPA_ONNX_READ_META_DATA(meta_.dim_t, "dim_t");
    SHERPA_ONNX_READ_META_DATA(meta_.dim_f, "dim_f");
    SHERPA_ONNX_READ_META_DATA(meta_.dim_c, "dim_c");
    SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type");

    meta_.margin = meta_.sample_rate;
  }

 private:
  OfflineSourceSeparationModelConfig config_;
  OfflineSourceSeparationUvrModelMetaData meta_;

  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;
};

OfflineSourceSeparationUvrModel::~OfflineSourceSeparationUvrModel() = default;

OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
    const OfflineSourceSeparationModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
    Manager *mgr, const OfflineSourceSeparationModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

Ort::Value OfflineSourceSeparationUvrModel::Run(Ort::Value x) const {
  return impl_->Run(std::move(x));
}

const OfflineSourceSeparationUvrModelMetaData &
OfflineSourceSeparationUvrModel::GetMetaData() const {
  return impl_->GetMetaData();
}

#if __ANDROID_API__ >= 9
template OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
    AAssetManager *mgr, const OfflineSourceSeparationModelConfig &config);
#endif

#if __OHOS__
template OfflineSourceSeparationUvrModel::OfflineSourceSeparationUvrModel(
    NativeResourceManager *mgr,
    const OfflineSourceSeparationModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-source-separation-uvr-model.h
================================================
// sherpa-onnx/csrc/offline-source-separation-uvr-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_
#include <memory>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSourceSeparationUvrModel {
 public:
  ~OfflineSourceSeparationUvrModel();

  explicit OfflineSourceSeparationUvrModel(
      const OfflineSourceSeparationModelConfig &config);

  template <typename Manager>
  OfflineSourceSeparationUvrModel(
      Manager *mgr, const OfflineSourceSeparationModelConfig &config);

  Ort::Value Run(Ort::Value x) const;

  const OfflineSourceSeparationUvrModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-source-separation.cc
================================================
// sherpa-onnx/csrc/offline-source-separation.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-source-separation.h"

#include <memory>
#include <string>

#include "sherpa-onnx/csrc/offline-source-separation-impl.h"

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

namespace sherpa_onnx {

void OfflineSourceSeparationConfig::Register(ParseOptions *po) {
  model.Register(po);
}

bool OfflineSourceSeparationConfig::Validate() const {
  return model.Validate();
}

std::string OfflineSourceSeparationConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSourceSeparationConfig(";
  os << "model=" << model.ToString() << ")";

  return os.str();
}

template <typename Manager>
OfflineSourceSeparation::OfflineSourceSeparation(
    Manager *mgr, const OfflineSourceSeparationConfig &config)
    : impl_(OfflineSourceSeparationImpl::Create(mgr, config)) {}

OfflineSourceSeparation::OfflineSourceSeparation(
    const OfflineSourceSeparationConfig &config)
    : impl_(OfflineSourceSeparationImpl::Create(config)) {}

OfflineSourceSeparation::~OfflineSourceSeparation() = default;

OfflineSourceSeparationOutput OfflineSourceSeparation::Process(
    const OfflineSourceSeparationInput &input) const {
  return impl_->Process(input);
}

int32_t OfflineSourceSeparation::GetOutputSampleRate() const {
  return impl_->GetOutputSampleRate();
}

// e.g., it is 2 for 2stems from spleeter
int32_t OfflineSourceSeparation::GetNumberOfStems() const {
  return impl_->GetNumberOfStems();
}

#if __ANDROID_API__ >= 9
template OfflineSourceSeparation::OfflineSourceSeparation(
    AAssetManager *mgr, const OfflineSourceSeparationConfig &config);
#endif

#if __OHOS__
template OfflineSourceSeparation::OfflineSourceSeparation(
    NativeResourceManager *mgr, const OfflineSourceSeparationConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-source-separation.h
================================================
// sherpa-onnx/csrc/offline-source-separation.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSourceSeparationConfig {
  OfflineSourceSeparationModelConfig model;

  OfflineSourceSeparationConfig() = default;

  explicit OfflineSourceSeparationConfig(
      const OfflineSourceSeparationModelConfig &model)
      : model(model) {}

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

struct MultiChannelSamples {
  // data[i] is for the i-th channel
  //
  // each sample is in the range [-1, 1]
  std::vector<std::vector<float>> data;
};

struct OfflineSourceSeparationInput {
  MultiChannelSamples samples;

  int32_t sample_rate;
};

struct OfflineSourceSeparationOutput {
  std::vector<MultiChannelSamples> stems;

  int32_t sample_rate;
};

class OfflineSourceSeparationImpl;

class OfflineSourceSeparation {
 public:
  ~OfflineSourceSeparation();

  explicit OfflineSourceSeparation(const OfflineSourceSeparationConfig &config);

  template <typename Manager>
  OfflineSourceSeparation(Manager *mgr,
                          const OfflineSourceSeparationConfig &config);

  OfflineSourceSeparationOutput Process(
      const OfflineSourceSeparationInput &input) const;

  int32_t GetOutputSampleRate() const;

  // e.g., it is 2 for 2stems from spleeter
  int32_t GetNumberOfStems() const;

 private:
  std::unique_ptr<OfflineSourceSeparationImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SOURCE_SEPARATION_H_


================================================
FILE: sherpa-onnx/csrc/offline-speaker-diarization-impl.cc
================================================
// sherpa-onnx/csrc/offline-speaker-diarization-impl.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h"

namespace sherpa_onnx {

std::unique_ptr<OfflineSpeakerDiarizationImpl>
OfflineSpeakerDiarizationImpl::Create(
    const OfflineSpeakerDiarizationConfig &config) {
  if (!config.segmentation.pyannote.model.empty()) {
    return std::make_unique<OfflineSpeakerDiarizationPyannoteImpl>(config);
  }

  SHERPA_ONNX_LOGE("Please specify a speaker segmentation model.");

  return nullptr;
}

template <typename Manager>
std::unique_ptr<OfflineSpeakerDiarizationImpl>
OfflineSpeakerDiarizationImpl::Create(
    Manager *mgr, const OfflineSpeakerDiarizationConfig &config) {
  if (!config.segmentation.pyannote.model.empty()) {
    return std::make_unique<OfflineSpeakerDiarizationPyannoteImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please specify a speaker segmentation model.");

  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflineSpeakerDiarizationImpl>
OfflineSpeakerDiarizationImpl::Create(
    AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OfflineSpeakerDiarizationImpl>
OfflineSpeakerDiarizationImpl::Create(
    NativeResourceManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speaker-diarization-impl.h
================================================
// sherpa-onnx/csrc/offline-speaker-diarization-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_IMPL_H_

#include <functional>
#include <memory>

#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
namespace sherpa_onnx {

class OfflineSpeakerDiarizationImpl {
 public:
  static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create(
      const OfflineSpeakerDiarizationConfig &config);

  template <typename Manager>
  static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create(
      Manager *mgr, const OfflineSpeakerDiarizationConfig &config);

  virtual ~OfflineSpeakerDiarizationImpl() = default;

  virtual int32_t SampleRate() const = 0;

  // Note: Only config.clustering is used. All other fields in config are
  // ignored
  virtual void SetConfig(const OfflineSpeakerDiarizationConfig &config) = 0;

  virtual OfflineSpeakerDiarizationResult Process(
      const float *audio, int32_t n,
      OfflineSpeakerDiarizationProgressCallback callback = nullptr,
      void *callback_arg = nullptr) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h
================================================
// sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_

#include <algorithm>
#include <cmath>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/fast-clustering.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

namespace sherpa_onnx {

namespace {  // NOLINT

// copied from https://github.com/k2-fsa/k2/blob/master/k2/csrc/host/util.h#L41
template <class T>
inline void hash_combine(std::size_t *seed, const T &v) {  // NOLINT
  std::hash<T> hasher;
  *seed ^= hasher(v) + 0x9e3779b9 + ((*seed) << 6) + ((*seed) >> 2);  // NOLINT
}

// copied from https://github.com/k2-fsa/k2/blob/master/k2/csrc/host/util.h#L47
struct PairHash {
  template <class T1, class T2>
  std::size_t operator()(const std::pair<T1, T2> &pair) const {
    std::size_t result = 0;
    hash_combine(&result, pair.first);
    hash_combine(&result, pair.second);
    return result;
  }
};
}  // namespace

using Matrix2D = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic,
                               Eigen::RowMajor>;  // NOLINT

using Matrix2DInt32 = Eigen::Matrix<int32_t, Eigen::Dynamic, Eigen::Dynamic,
                                    Eigen::RowMajor>;  // NOLINT

using FloatRowVector = Eigen::Matrix<float, 1, Eigen::Dynamic>;
using Int32RowVector = Eigen::Matrix<int32_t, 1, Eigen::Dynamic>;

using Int32Pair = std::pair<int32_t, int32_t>;

class OfflineSpeakerDiarizationPyannoteImpl
    : public OfflineSpeakerDiarizationImpl {
 public:
  ~OfflineSpeakerDiarizationPyannoteImpl() override = default;

  explicit OfflineSpeakerDiarizationPyannoteImpl(
      const OfflineSpeakerDiarizationConfig &config)
      : config_(config),
        segmentation_model_(config_.segmentation),
        embedding_extractor_(config_.embedding),
        clustering_(std::make_unique<FastClustering>(config_.clustering)) {
    Init();
  }

  template <typename Manager>
  OfflineSpeakerDiarizationPyannoteImpl(
      Manager *mgr, const OfflineSpeakerDiarizationConfig &config)
      : config_(config),
        segmentation_model_(mgr, config_.segmentation),
        embedding_extractor_(mgr, config_.embedding),
        clustering_(std::make_unique<FastClustering>(config_.clustering)) {
    Init();
  }

  int32_t SampleRate() const override {
    const auto &meta_data = segmentation_model_.GetModelMetaData();

    return meta_data.sample_rate;
  }

  void SetConfig(const OfflineSpeakerDiarizationConfig &config) override {
    if (!config.clustering.Validate()) {
      SHERPA_ONNX_LOGE("Invalid clustering config. Skip it");
      return;
    }
    clustering_ = std::make_unique<FastClustering>(config.clustering);
    config_.clustering = config.clustering;
  }

  OfflineSpeakerDiarizationResult Process(
      const float *audio, int32_t n,
      OfflineSpeakerDiarizationProgressCallback callback = nullptr,
      void *callback_arg = nullptr) const override {
    std::vector<Matrix2D> segmentations = RunSpeakerSegmentationModel(audio, n);
    // segmentations[i] is for chunk_i
    // Each matrix is of shape (num_frames, num_powerset_classes)
    if (segmentations.empty()) {
      return {};
    }

    std::vector<Matrix2DInt32> labels;
    labels.reserve(segmentations.size());

    for (const auto &m : segmentations) {
      labels.push_back(ToMultiLabel(m));
    }

    segmentations.clear();

    if (labels.size() == 1) {
      if (callback) {
        callback(1, 1, callback_arg);
      }

      return HandleOneChunkSpecialCase(labels[0], n);
    }

    // labels[i] is a 0-1 matrix of shape (num_frames, num_speakers)

    // speaker count per frame
    Int32RowVector speakers_per_frame = ComputeSpeakersPerFrame(labels);

    if (speakers_per_frame.maxCoeff() == 0) {
      SHERPA_ONNX_LOGE("No speakers found in the audio samples");
      return {};
    }

    auto chunk_speaker_samples_list_pair = GetChunkSpeakerSampleIndexes(labels);

    // The embedding model may output NaN. valid_indexes contains indexes
    // in chunk_speaker_samples_list_pair.second that don't lead to
    // NaN embeddings.
    std::vector<int32_t> valid_indexes;
    valid_indexes.reserve(chunk_speaker_samples_list_pair.second.size());

    Matrix2D embeddings =
        ComputeEmbeddings(audio, n, chunk_speaker_samples_list_pair.second,
                          &valid_indexes, std::move(callback), callback_arg);

    if (valid_indexes.size() != chunk_speaker_samples_list_pair.second.size()) {
      std::vector<Int32Pair> chunk_speaker_pair;
      std::vector<std::vector<Int32Pair>> sample_indexes;

      chunk_speaker_pair.reserve(valid_indexes.size());
      sample_indexes.reserve(valid_indexes.size());
      for (auto i : valid_indexes) {
        chunk_speaker_pair.push_back(chunk_speaker_samples_list_pair.first[i]);
        sample_indexes.push_back(
            std::move(chunk_speaker_samples_list_pair.second[i]));
      }

      chunk_speaker_samples_list_pair.first = std::move(chunk_speaker_pair);
      chunk_speaker_samples_list_pair.second = std::move(sample_indexes);
    }

    std::vector<int32_t> cluster_labels = clustering_->Cluster(
        &embeddings(0, 0), embeddings.rows(), embeddings.cols());

    if (cluster_labels.empty()) {
      SHERPA_ONNX_LOGE("No speakers found in the audio samples");
      return {};
    }

    int32_t max_cluster_index =
        *std::max_element(cluster_labels.begin(), cluster_labels.end());

    auto chunk_speaker_to_cluster = ConvertChunkSpeakerToCluster(
        chunk_speaker_samples_list_pair.first, cluster_labels);

    auto new_labels =
        ReLabel(labels, max_cluster_index, chunk_speaker_to_cluster);

    Matrix2DInt32 speaker_count = ComputeSpeakerCount(new_labels, n);

    Matrix2DInt32 final_labels =
        FinalizeLabels(speaker_count, speakers_per_frame);

    auto result = ComputeResult(final_labels);

    return result;
  }

 private:
  void Init() { InitPowersetMapping(); }

  // see also
  // https://github.com/pyannote/pyannote-audio/blob/develop/pyannote/audio/utils/powerset.py#L68
  void InitPowersetMapping() {
    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t num_classes = meta_data.num_classes;
    int32_t powerset_max_classes = meta_data.powerset_max_classes;
    int32_t num_speakers = meta_data.num_speakers;

    powerset_mapping_ = Matrix2DInt32(num_classes, num_speakers);
    powerset_mapping_.setZero();

    int32_t k = 1;
    for (int32_t i = 1; i <= powerset_max_classes; ++i) {
      if (i == 1) {
        for (int32_t j = 0; j != num_speakers; ++j, ++k) {
          powerset_mapping_(k, j) = 1;
        }
      } else if (i == 2) {
        for (int32_t j = 0; j != num_speakers; ++j) {
          for (int32_t m = j + 1; m < num_speakers; ++m, ++k) {
            powerset_mapping_(k, j) = 1;
            powerset_mapping_(k, m) = 1;
          }
        }
      } else {
#if __OHOS__
        SHERPA_ONNX_LOGE(
            "powerset_max_classes = %{public}d is currently not supported!", i);
#else
        SHERPA_ONNX_LOGE(
            "powerset_max_classes = %d is currently not supported!", i);
#endif
        SHERPA_ONNX_EXIT(-1);
      }
    }
  }

  std::vector<Matrix2D> RunSpeakerSegmentationModel(const float *audio,
                                                    int32_t n) const {
    std::vector<Matrix2D> ans;

    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t window_size = meta_data.window_size;
    int32_t window_shift = meta_data.window_shift;

    if (n <= 0) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "number of audio samples is %{public}d (<= 0). Please provide a "
          "positive number",
          n);
#else
      SHERPA_ONNX_LOGE(
          "number of audio samples is %d (<= 0). Please provide a positive "
          "number",
          n);
#endif
      return {};
    }

    if (n <= window_size) {
      std::vector<float> buf(window_size);
      // NOTE: buf is zero initialized by default

      std::copy(audio, audio + n, buf.data());

      Matrix2D m = ProcessChunk(buf.data());

      ans.push_back(std::move(m));

      return ans;
    }

    int32_t num_chunks = (n - window_size) / window_shift + 1;
    bool has_last_chunk = ((n - window_size) % window_shift) > 0;

    ans.reserve(num_chunks + has_last_chunk);

    const float *p = audio;

    for (int32_t i = 0; i != num_chunks; ++i, p += window_shift) {
      Matrix2D m = ProcessChunk(p);

      ans.push_back(std::move(m));
    }

    if (has_last_chunk) {
      std::vector<float> buf(window_size);
      std::copy(p, audio + n, buf.data());

      Matrix2D m = ProcessChunk(buf.data());

      ans.push_back(std::move(m));
    }

    return ans;
  }

  Matrix2D ProcessChunk(const float *p) const {
    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t window_size = meta_data.window_size;

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> shape = {1, 1, window_size};

    Ort::Value x =
        Ort::Value::CreateTensor(memory_info, const_cast<float *>(p),
                                 window_size, shape.data(), shape.size());

    Ort::Value out = segmentation_model_.Forward(std::move(x));
    std::vector<int64_t> out_shape = out.GetTensorTypeAndShapeInfo().GetShape();
    Matrix2D m(out_shape[1], out_shape[2]);
    std::copy(out.GetTensorData<float>(), out.GetTensorData<float>() + m.size(),
              &m(0, 0));
    return m;
  }

  Matrix2DInt32 ToMultiLabel(const Matrix2D &m) const {
    int32_t num_rows = m.rows();
    Matrix2DInt32 ans(num_rows, powerset_mapping_.cols());

    std::ptrdiff_t col_id;

    for (int32_t i = 0; i != num_rows; ++i) {
      m.row(i).maxCoeff(&col_id);
      ans.row(i) = powerset_mapping_.row(col_id);
    }

    return ans;
  }

  // See also
  // https://github.com/pyannote/pyannote-audio/blob/develop/pyannote/audio/pipelines/utils/diarization.py#L122
  Int32RowVector ComputeSpeakersPerFrame(
      const std::vector<Matrix2DInt32> &labels) const {
    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t window_size = meta_data.window_size;
    int32_t window_shift = meta_data.window_shift;
    int32_t receptive_field_shift = meta_data.receptive_field_shift;

    int32_t num_chunks = labels.size();

    int32_t num_frames = (window_size + (num_chunks - 1) * window_shift) /
                             receptive_field_shift +
                         1;

    FloatRowVector count(num_frames);
    FloatRowVector weight(num_frames);
    count.setZero();
    weight.setZero();

    for (int32_t i = 0; i != num_chunks; ++i) {
      int32_t start =
          static_cast<float>(i) * window_shift / receptive_field_shift + 0.5;

      auto seq = Eigen::seqN(start, labels[i].rows());

      count(seq).array() += labels[i].rowwise().sum().array().cast<float>();

      weight(seq).array() += 1;
    }

    return ((count.array() / (weight.array() + 1e-12f)) + 0.5).cast<int32_t>();
  }

  // ans.first: a list of (chunk_id, speaker_id)
  // ans.second: a list of list of (start_sample_index, end_sample_index)
  //
  // ans.first[i] corresponds to ans.second[i]
  std::pair<std::vector<Int32Pair>, std::vector<std::vector<Int32Pair>>>
  GetChunkSpeakerSampleIndexes(const std::vector<Matrix2DInt32> &labels) const {
    auto new_labels = ExcludeOverlap(labels);

    std::vector<Int32Pair> chunk_speaker_list;
    std::vector<std::vector<Int32Pair>> samples_index_list;

    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t window_size = meta_data.window_size;
    int32_t window_shift = meta_data.window_shift;
    int32_t receptive_field_shift = meta_data.receptive_field_shift;
    int32_t num_speakers = meta_data.num_speakers;

    int32_t chunk_index = 0;
    for (const auto &label : new_labels) {
      Matrix2DInt32 tmp = label.transpose();
      // tmp: (num_speakers, num_frames)
      int32_t num_frames = tmp.cols();

      int32_t sample_offset = chunk_index * window_shift;

      for (int32_t speaker_index = 0; speaker_index != num_speakers;
           ++speaker_index) {
        auto d = tmp.row(speaker_index);
        if (d.sum() < 10) {
          // skip segments less than 10 frames
          continue;
        }

        Int32Pair this_chunk_speaker = {chunk_index, speaker_index};
        std::vector<Int32Pair> this_speaker_samples;

        bool is_active = false;
        int32_t start_index;

        for (int32_t k = 0; k != num_frames; ++k) {
          if (d[k] != 0) {
            if (!is_active) {
              is_active = true;
              start_index = k;
            }
          } else if (is_active) {
            is_active = false;

            int32_t start_samples =
                static_cast<float>(start_index) / num_frames * window_size +
                sample_offset;
            int32_t end_samples =
                static_cast<float>(k) / num_frames * window_size +
                sample_offset;

            this_speaker_samples.emplace_back(start_samples, end_samples);
          }
        }

        if (is_active) {
          int32_t start_samples =
              static_cast<float>(start_index) / num_frames * window_size +
              sample_offset;
          int32_t end_samples =
              static_cast<float>(num_frames - 1) / num_frames * window_size +
              sample_offset;
          this_speaker_samples.emplace_back(start_samples, end_samples);
        }

        chunk_speaker_list.push_back(std::move(this_chunk_speaker));
        samples_index_list.push_back(std::move(this_speaker_samples));
      }  // for (int32_t speaker_index = 0;
      chunk_index += 1;
    }  // for (const auto &label : new_labels)

    return {chunk_speaker_list, samples_index_list};
  }

  // If there are multiple speakers at a frame, then this frame is excluded.
  std::vector<Matrix2DInt32> ExcludeOverlap(
      const std::vector<Matrix2DInt32> &labels) const {
    int32_t num_chunks = labels.size();
    std::vector<Matrix2DInt32> ans;
    ans.reserve(num_chunks);

    for (const auto &label : labels) {
      Matrix2DInt32 new_label(label.rows(), label.cols());
      new_label.setZero();
      Int32RowVector v = label.rowwise().sum();

      for (int32_t i = 0; i != v.cols(); ++i) {
        if (v[i] < 2) {
          new_label.row(i) = label.row(i);
        }
      }

      ans.push_back(std::move(new_label));
    }

    return ans;
  }

  /**
   * @param sample_indexes[i] contains the sample segment start and end indexes
   *                          for the i-th (chunk, speaker) pair
   * @return Return a matrix of shape (sample_indexes.size(), embedding_dim)
   *         where ans.row[i] contains the embedding for the
   *         i-th (chunk, speaker) pair
   */
  Matrix2D ComputeEmbeddings(
      const float *audio, int32_t n,
      const std::vector<std::vector<Int32Pair>> &sample_indexes,
      std::vector<int32_t> *valid_indexes,
      OfflineSpeakerDiarizationProgressCallback callback,
      void *callback_arg) const {
    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t sample_rate = meta_data.sample_rate;
    Matrix2D ans(sample_indexes.size(), embedding_extractor_.Dim());

    auto IsNaNWrapper = [](float f) -> bool { return std::isnan(f); };

    int32_t k = 0;
    int32_t cur_row_index = 0;
    for (const auto &v : sample_indexes) {
      auto stream = embedding_extractor_.CreateStream();
      for (const auto &p : v) {
        int32_t end = (p.second <= n) ? p.second : n;
        int32_t num_samples = end - p.first;

        if (num_samples > 0) {
          stream->AcceptWaveform(sample_rate, audio + p.first, num_samples);
        }
      }

      stream->InputFinished();
      if (!embedding_extractor_.IsReady(stream.get())) {
        SHERPA_ONNX_LOGE(
            "This segment is too short, which should not happen since we have "
            "already filtered short segments");
        SHERPA_ONNX_EXIT(-1);
      }

      std::vector<float> embedding = embedding_extractor_.Compute(stream.get());

      if (std::none_of(embedding.begin(), embedding.end(), IsNaNWrapper)) {
        // a valid embedding
        std::copy(embedding.begin(), embedding.end(), &ans(cur_row_index, 0));
        cur_row_index += 1;
        valid_indexes->push_back(k);
      }

      k += 1;

      if (callback) {
        callback(k, ans.rows(), callback_arg);
      }
    }

    if (k != cur_row_index) {
      auto seq = Eigen::seqN(0, cur_row_index);
      ans = ans(seq, Eigen::all);
    }

    return ans;
  }

  std::unordered_map<Int32Pair, int32_t, PairHash> ConvertChunkSpeakerToCluster(
      const std::vector<Int32Pair> &chunk_speaker_pair,
      const std::vector<int32_t> &cluster_labels) const {
    std::unordered_map<Int32Pair, int32_t, PairHash> ans;

    int32_t k = 0;
    for (const auto &p : chunk_speaker_pair) {
      ans[p] = cluster_labels[k];
      k += 1;
    }

    return ans;
  }

  std::vector<Matrix2DInt32> ReLabel(
      const std::vector<Matrix2DInt32> &labels, int32_t max_cluster_index,
      std::unordered_map<Int32Pair, int32_t, PairHash> chunk_speaker_to_cluster)
      const {
    std::vector<Matrix2DInt32> new_labels;
    new_labels.reserve(labels.size());

    int32_t chunk_index = 0;
    for (const auto &label : labels) {
      Matrix2DInt32 new_label(label.rows(), max_cluster_index + 1);
      new_label.setZero();

      Matrix2DInt32 t = label.transpose();
      // t: (num_speakers, num_frames)

      for (int32_t speaker_index = 0; speaker_index != t.rows();
           ++speaker_index) {
        if (chunk_speaker_to_cluster.count({chunk_index, speaker_index}) == 0) {
          continue;
        }

        int32_t new_speaker_index =
            chunk_speaker_to_cluster.at({chunk_index, speaker_index});

        for (int32_t k = 0; k != t.cols(); ++k) {
          if (t(speaker_index, k) == 1) {
            new_label(k, new_speaker_index) = 1;
          }
        }
      }

      new_labels.push_back(std::move(new_label));

      chunk_index += 1;
    }

    return new_labels;
  }

  Matrix2DInt32 ComputeSpeakerCount(const std::vector<Matrix2DInt32> &labels,
                                    int32_t num_samples) const {
    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t window_size = meta_data.window_size;
    int32_t window_shift = meta_data.window_shift;
    int32_t receptive_field_shift = meta_data.receptive_field_shift;

    int32_t num_chunks = labels.size();

    int32_t num_frames = (window_size + (num_chunks - 1) * window_shift) /
                             receptive_field_shift +
                         1;

    Matrix2DInt32 count(num_frames, labels[0].cols());
    count.setZero();

    for (int32_t i = 0; i != num_chunks; ++i) {
      int32_t start =
          static_cast<float>(i) * window_shift / receptive_field_shift + 0.5;

      auto seq = Eigen::seqN(start, labels[i].rows());

      count(seq, Eigen::all).array() += labels[i].array();
    }

    bool has_last_chunk = ((num_samples - window_size) % window_shift) > 0;

    if (!has_last_chunk) {
      return count;
    }

    int32_t last_frame = num_samples / receptive_field_shift;
    return count(Eigen::seq(0, last_frame), Eigen::all);
  }

  Matrix2DInt32 FinalizeLabels(const Matrix2DInt32 &count,
                               const Int32RowVector &speakers_per_frame) const {
    int32_t num_rows = count.rows();
    int32_t num_cols = count.cols();

    Matrix2DInt32 ans(num_rows, num_cols);
    ans.setZero();

    for (int32_t i = 0; i != num_rows; ++i) {
      int32_t k = speakers_per_frame[i];
      if (k == 0) {
        continue;
      }
      auto top_k = TopkIndex(&count(i, 0), num_cols, k);

      for (int32_t m : top_k) {
        ans(i, m) = 1;
      }
    }

    return ans;
  }

  OfflineSpeakerDiarizationResult ComputeResult(
      const Matrix2DInt32 &final_labels) const {
    Matrix2DInt32 final_labels_t = final_labels.transpose();
    int32_t num_speakers = final_labels_t.rows();
    int32_t num_frames = final_labels_t.cols();

    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t window_size = meta_data.window_size;
    int32_t window_shift = meta_data.window_shift;
    int32_t receptive_field_shift = meta_data.receptive_field_shift;
    int32_t receptive_field_size = meta_data.receptive_field_size;
    int32_t sample_rate = meta_data.sample_rate;

    float scale = static_cast<float>(receptive_field_shift) / sample_rate;
    float scale_offset = 0.5 * receptive_field_size / sample_rate;

    OfflineSpeakerDiarizationResult ans;

    for (int32_t speaker_index = 0; speaker_index != num_speakers;
         ++speaker_index) {
      std::vector<OfflineSpeakerDiarizationSegment> this_speaker;

      bool is_active = final_labels_t(speaker_index, 0) > 0;
      int32_t start_index = is_active ? 0 : -1;

      for (int32_t frame_index = 1; frame_index != num_frames; ++frame_index) {
        if (is_active) {
          if (final_labels_t(speaker_index, frame_index) == 0) {
            float start_time = start_index * scale + scale_offset;
            float end_time = frame_index * scale + scale_offset;

            OfflineSpeakerDiarizationSegment segment(start_time, end_time,
                                                     speaker_index);
            this_speaker.push_back(segment);

            is_active = false;
          }
        } else if (final_labels_t(speaker_index, frame_index) == 1) {
          is_active = true;
          start_index = frame_index;
        }
      }

      if (is_active) {
        float start_time = start_index * scale + scale_offset;
        float end_time = (num_frames - 1) * scale + scale_offset;

        OfflineSpeakerDiarizationSegment segment(start_time, end_time,
                                                 speaker_index);
        this_speaker.push_back(segment);
      }

      // merge segments if the gap between them is less than min_duration_off
      MergeSegments(&this_speaker);

      for (const auto &seg : this_speaker) {
        if (seg.Duration() > config_.min_duration_on) {
          ans.Add(seg);
        }
      }
    }  // for (int32_t speaker_index = 0; speaker_index != num_speakers;

    return ans;
  }

  OfflineSpeakerDiarizationResult HandleOneChunkSpecialCase(
      const Matrix2DInt32 &final_labels, int32_t num_samples) const {
    const auto &meta_data = segmentation_model_.GetModelMetaData();
    int32_t window_size = meta_data.window_size;
    int32_t window_shift = meta_data.window_shift;
    int32_t receptive_field_shift = meta_data.receptive_field_shift;

    bool has_last_chunk = (num_samples - window_size) % window_shift > 0;
    if (!has_last_chunk) {
      return ComputeResult(final_labels);
    }

    int32_t num_frames = final_labels.rows();

    int32_t new_num_frames = num_samples / receptive_field_shift;

    num_frames = (new_num_frames <= num_frames) ? new_num_frames : num_frames;

    return ComputeResult(final_labels(Eigen::seq(0, num_frames), Eigen::all));
  }

  void MergeSegments(
      std::vector<OfflineSpeakerDiarizationSegment> *segments) const {
    float min_duration_off = config_.min_duration_off;
    bool changed = true;
    while (changed) {
      changed = false;
      for (int32_t i = 0; i < static_cast<int32_t>(segments->size()) - 1; ++i) {
        auto s = (*segments)[i].Merge((*segments)[i + 1], min_duration_off);
        if (s) {
          (*segments)[i] = s.value();
          segments->erase(segments->begin() + i + 1);

          changed = true;
          break;
        }
      }
    }
  }

 private:
  OfflineSpeakerDiarizationConfig config_;
  OfflineSpeakerSegmentationPyannoteModel segmentation_model_;
  SpeakerEmbeddingExtractor embedding_extractor_;
  std::unique_ptr<FastClustering> clustering_;
  Matrix2DInt32 powerset_mapping_;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-speaker-diarization-result.cc
================================================
// sherpa-onnx/csrc/offline-speaker-diarization-result.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speaker-diarization-result.h"

#include <algorithm>
#include <array>
#include <cstdio>
#include <sstream>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

OfflineSpeakerDiarizationSegment::OfflineSpeakerDiarizationSegment(
    float start, float end, int32_t speaker, const std::string &text /*= {}*/) {
  if (start > end) {
    SHERPA_ONNX_LOGE("start %.3f should be less than end %.3f", start, end);
    SHERPA_ONNX_EXIT(-1);
  }

  start_ = start;
  end_ = end;
  speaker_ = speaker;
  text_ = text;
}

std::optional<OfflineSpeakerDiarizationSegment>
OfflineSpeakerDiarizationSegment::Merge(
    const OfflineSpeakerDiarizationSegment &other, float gap) const {
  if (other.speaker_ != speaker_) {
    SHERPA_ONNX_LOGE(
        "The two segments should have the same speaker. this->speaker: %d, "
        "other.speaker: %d",
        speaker_, other.speaker_);
    return std::nullopt;
  }

  if (end_ < other.start_ && end_ + gap >= other.start_) {
    return OfflineSpeakerDiarizationSegment(start_, other.end_, speaker_);
  } else if (other.end_ < start_ && other.end_ + gap >= start_) {
    return OfflineSpeakerDiarizationSegment(other.start_, end_, speaker_);
  } else {
    return std::nullopt;
  }
}

std::string OfflineSpeakerDiarizationSegment::ToString() const {
  std::array<char, 128> s{};

  snprintf(s.data(), s.size(), "%.3f -- %.3f speaker_%02d", start_, end_,
           speaker_);

  std::ostringstream os;
  os << s.data();

  if (!text_.empty()) {
    os << " " << text_;
  }

  return os.str();
}

void OfflineSpeakerDiarizationResult::Add(
    const OfflineSpeakerDiarizationSegment &segment) {
  segments_.push_back(segment);
}

int32_t OfflineSpeakerDiarizationResult::NumSpeakers() const {
  std::unordered_set<int32_t> count;
  for (const auto &s : segments_) {
    count.insert(s.Speaker());
  }

  return count.size();
}

int32_t OfflineSpeakerDiarizationResult::NumSegments() const {
  return segments_.size();
}

// Return a list of segments sorted by segment.start time
std::vector<OfflineSpeakerDiarizationSegment>
OfflineSpeakerDiarizationResult::SortByStartTime() const {
  auto ans = segments_;
  std::sort(ans.begin(), ans.end(), [](const auto &a, const auto &b) {
    return (a.Start() < b.Start()) ||
           ((a.Start() == b.Start()) && (a.Speaker() < b.Speaker()));
  });

  return ans;
}

std::vector<std::vector<OfflineSpeakerDiarizationSegment>>
OfflineSpeakerDiarizationResult::SortBySpeaker() const {
  auto tmp = segments_;
  std::sort(tmp.begin(), tmp.end(), [](const auto &a, const auto &b) {
    return (a.Speaker() < b.Speaker()) ||
           ((a.Speaker() == b.Speaker()) && (a.Start() < b.Start()));
  });

  std::vector<std::vector<OfflineSpeakerDiarizationSegment>> ans(NumSpeakers());
  for (auto &s : tmp) {
    ans[s.Speaker()].push_back(std::move(s));
  }

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speaker-diarization-result.h
================================================
// sherpa-onnx/csrc/offline-speaker-diarization-result.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_

#include <cstdint>
#include <optional>
#include <string>
#include <vector>

namespace sherpa_onnx {

class OfflineSpeakerDiarizationSegment {
 public:
  OfflineSpeakerDiarizationSegment(float start, float end, int32_t speaker,
                                   const std::string &text = {});

  // If the gap between the two segments is less than the given gap, then we
  // merge them and return a new segment. Otherwise, it returns null.
  std::optional<OfflineSpeakerDiarizationSegment> Merge(
      const OfflineSpeakerDiarizationSegment &other, float gap) const;

  float Start() const { return start_; }
  float End() const { return end_; }
  int32_t Speaker() const { return speaker_; }
  const std::string &Text() const { return text_; }
  float Duration() const { return end_ - start_; }

  void SetText(const std::string &text) { text_ = text; }

  std::string ToString() const;

 private:
  float start_;       // in seconds
  float end_;         // in seconds
  int32_t speaker_;   // ID of the speaker, starting from 0
  std::string text_;  // If not empty, it contains the speech recognition result
                      // of this segment
};

class OfflineSpeakerDiarizationResult {
 public:
  // Add a new segment
  void Add(const OfflineSpeakerDiarizationSegment &segment);

  // Number of distinct speakers contained in this object at this point
  int32_t NumSpeakers() const;

  int32_t NumSegments() const;

  // Return a list of segments sorted by segment.start time
  std::vector<OfflineSpeakerDiarizationSegment> SortByStartTime() const;

  // ans.size() == NumSpeakers().
  // ans[i] is for speaker_i and is sorted by start time
  std::vector<std::vector<OfflineSpeakerDiarizationSegment>> SortBySpeaker()
      const;

 private:
  std::vector<OfflineSpeakerDiarizationSegment> segments_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_


================================================
FILE: sherpa-onnx/csrc/offline-speaker-diarization.cc
================================================
// sherpa-onnx/csrc/offline-speaker-diarization.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speaker-diarization.h"

#include <string>
#include <utility>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h"

namespace sherpa_onnx {

void OfflineSpeakerDiarizationConfig::Register(ParseOptions *po) {
  ParseOptions po_segmentation("segmentation", po);
  segmentation.Register(&po_segmentation);

  ParseOptions po_embedding("embedding", po);
  embedding.Register(&po_embedding);

  ParseOptions po_clustering("clustering", po);
  clustering.Register(&po_clustering);

  po->Register("min-duration-on", &min_duration_on,
               "if a segment is less than this value, then it is discarded. "
               "Set it to 0 so that no segment is discarded");

  po->Register("min-duration-off", &min_duration_off,
               "if the gap between to segments of the same speaker is less "
               "than this value, then these two segments are merged into a "
               "single segment. We do it recursively.");
}

bool OfflineSpeakerDiarizationConfig::Validate() const {
  if (!segmentation.Validate()) {
    return false;
  }

  if (!embedding.Validate()) {
    return false;
  }

  if (!clustering.Validate()) {
    return false;
  }

  if (min_duration_on < 0) {
    SHERPA_ONNX_LOGE("min_duration_on %.3f is negative", min_duration_on);
    return false;
  }

  if (min_duration_off < 0) {
    SHERPA_ONNX_LOGE("min_duration_off %.3f is negative", min_duration_off);
    return false;
  }

  return true;
}

std::string OfflineSpeakerDiarizationConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSpeakerDiarizationConfig(";
  os << "segmentation=" << segmentation.ToString() << ", ";
  os << "embedding=" << embedding.ToString() << ", ";
  os << "clustering=" << clustering.ToString() << ", ";
  os << "min_duration_on=" << min_duration_on << ", ";
  os << "min_duration_off=" << min_duration_off << ")";

  return os.str();
}

OfflineSpeakerDiarization::OfflineSpeakerDiarization(
    const OfflineSpeakerDiarizationConfig &config)
    : impl_(OfflineSpeakerDiarizationImpl::Create(config)) {}

template <typename Manager>
OfflineSpeakerDiarization::OfflineSpeakerDiarization(
    Manager *mgr, const OfflineSpeakerDiarizationConfig &config)
    : impl_(OfflineSpeakerDiarizationImpl::Create(mgr, config)) {}

OfflineSpeakerDiarization::~OfflineSpeakerDiarization() = default;

int32_t OfflineSpeakerDiarization::SampleRate() const {
  return impl_->SampleRate();
}

void OfflineSpeakerDiarization::SetConfig(
    const OfflineSpeakerDiarizationConfig &config) {
  impl_->SetConfig(config);
}

OfflineSpeakerDiarizationResult OfflineSpeakerDiarization::Process(
    const float *audio, int32_t n,
    OfflineSpeakerDiarizationProgressCallback callback /*= nullptr*/,
    void *callback_arg /*= nullptr*/) const {
  return impl_->Process(audio, n, std::move(callback), callback_arg);
}

#if __ANDROID_API__ >= 9
template OfflineSpeakerDiarization::OfflineSpeakerDiarization(
    AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif

#if __OHOS__
template OfflineSpeakerDiarization::OfflineSpeakerDiarization(
    NativeResourceManager *mgr, const OfflineSpeakerDiarizationConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speaker-diarization.h
================================================
// sherpa-onnx/csrc/offline-speaker-diarization.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_

#include <functional>
#include <memory>
#include <string>

#include "sherpa-onnx/csrc/fast-clustering-config.h"
#include "sherpa-onnx/csrc/offline-speaker-diarization-result.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

namespace sherpa_onnx {

struct OfflineSpeakerDiarizationConfig {
  OfflineSpeakerSegmentationModelConfig segmentation;
  SpeakerEmbeddingExtractorConfig embedding;
  FastClusteringConfig clustering;

  // if a segment is less than this value, then it is discarded
  float min_duration_on = 0.3;  // in seconds

  // if the gap between to segments of the same speaker is less than this value,
  // then these two segments are merged into a single segment.
  // We do this recursively.
  float min_duration_off = 0.5;  // in seconds

  OfflineSpeakerDiarizationConfig() = default;

  OfflineSpeakerDiarizationConfig(
      const OfflineSpeakerSegmentationModelConfig &segmentation,
      const SpeakerEmbeddingExtractorConfig &embedding,
      const FastClusteringConfig &clustering, float min_duration_on,
      float min_duration_off)
      : segmentation(segmentation),
        embedding(embedding),
        clustering(clustering),
        min_duration_on(min_duration_on),
        min_duration_off(min_duration_off) {}

  void Register(ParseOptions *po);
  bool Validate() const;
  std::string ToString() const;
};

class OfflineSpeakerDiarizationImpl;

using OfflineSpeakerDiarizationProgressCallback = std::function<int32_t(
    int32_t processed_chunks, int32_t num_chunks, void *arg)>;

class OfflineSpeakerDiarization {
 public:
  explicit OfflineSpeakerDiarization(
      const OfflineSpeakerDiarizationConfig &config);

  template <typename Manager>
  OfflineSpeakerDiarization(Manager *mgr,
                            const OfflineSpeakerDiarizationConfig &config);

  ~OfflineSpeakerDiarization();

  // Expected sample rate of the input audio samples
  int32_t SampleRate() const;

  // Note: Only config.clustering is used. All other fields in config are
  // ignored
  void SetConfig(const OfflineSpeakerDiarizationConfig &config);

  OfflineSpeakerDiarizationResult Process(
      const float *audio, int32_t n,
      OfflineSpeakerDiarizationProgressCallback callback = nullptr,
      void *callback_arg = nullptr) const;

 private:
  std::unique_ptr<OfflineSpeakerDiarizationImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_


================================================
FILE: sherpa-onnx/csrc/offline-speaker-segmentation-model-config.cc
================================================
// sherpa-onnx/csrc/offline-speaker-segmentation-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineSpeakerSegmentationModelConfig::Register(ParseOptions *po) {
  pyannote.Register(po);

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool OfflineSpeakerSegmentationModelConfig::Validate() const {
  if (num_threads < 1) {
    SHERPA_ONNX_LOGE("num_threads should be > 0. Given %d", num_threads);
    return false;
  }

  if (!pyannote.model.empty()) {
    return pyannote.Validate();
  }

  if (pyannote.model.empty()) {
    SHERPA_ONNX_LOGE(
        "You have to provide at least one speaker segmentation model");
    return false;
  }

  return true;
}

std::string OfflineSpeakerSegmentationModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSpeakerSegmentationModelConfig(";
  os << "pyannote=" << pyannote.ToString() << ", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h
================================================
// sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSpeakerSegmentationModelConfig {
  OfflineSpeakerSegmentationPyannoteModelConfig pyannote;

  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  OfflineSpeakerSegmentationModelConfig() = default;

  explicit OfflineSpeakerSegmentationModelConfig(
      const OfflineSpeakerSegmentationPyannoteModelConfig &pyannote,
      int32_t num_threads, bool debug, const std::string &provider)
      : pyannote(pyannote),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.cc
================================================
// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineSpeakerSegmentationPyannoteModelConfig::Register(ParseOptions *po) {
  po->Register("pyannote-model", &model,
               "Path to model.onnx of the Pyannote segmentation model.");
}

bool OfflineSpeakerSegmentationPyannoteModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("Pyannote segmentation model: '%s' does not exist",
                     model.c_str());
    return false;
  }

  return true;
}

std::string OfflineSpeakerSegmentationPyannoteModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSpeakerSegmentationPyannoteModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h
================================================
// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_CONFIG_H_
#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSpeakerSegmentationPyannoteModelConfig {
  std::string model;

  OfflineSpeakerSegmentationPyannoteModelConfig() = default;

  explicit OfflineSpeakerSegmentationPyannoteModelConfig(
      const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_META_DATA_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

// If you are not sure what each field means, please
// have a look of the Python file in the model directory that
// you have downloaded.
struct OfflineSpeakerSegmentationPyannoteModelMetaData {
  int32_t sample_rate = 0;
  int32_t window_size = 0;            // in samples
  int32_t window_shift = 0;           // in samples
  int32_t receptive_field_size = 0;   // in samples
  int32_t receptive_field_shift = 0;  // in samples
  int32_t num_speakers = 0;
  int32_t powerset_max_classes = 0;
  int32_t num_classes = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc
================================================
// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"

namespace sherpa_onnx {

class OfflineSpeakerSegmentationPyannoteModel::Impl {
 public:
  explicit Impl(const OfflineSpeakerSegmentationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.pyannote.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.pyannote.model);
    Init(buf.data(), buf.size());
  }

  const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData()
      const {
    return meta_data_;
  }

  Ort::Value Forward(Ort::Value x) {
    auto out = sess_->Run({}, input_names_ptr_.data(), &x, 1,
                          output_names_ptr_.data(), output_names_ptr_.size());

    return std::move(out[0]);
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA(meta_data_.window_size, "window_size");

    meta_data_.window_shift =
        static_cast<int32_t>(0.1 * meta_data_.window_size);

    SHERPA_ONNX_READ_META_DATA(meta_data_.receptive_field_size,
                               "receptive_field_size");
    SHERPA_ONNX_READ_META_DATA(meta_data_.receptive_field_shift,
                               "receptive_field_shift");
    SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "num_speakers");
    SHERPA_ONNX_READ_META_DATA(meta_data_.powerset_max_classes,
                               "powerset_max_classes");
    SHERPA_ONNX_READ_META_DATA(meta_data_.num_classes, "num_classes");
  }

 private:
  OfflineSpeakerSegmentationModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OfflineSpeakerSegmentationPyannoteModelMetaData meta_data_;
};

OfflineSpeakerSegmentationPyannoteModel::
    OfflineSpeakerSegmentationPyannoteModel(  // NOLINT
        const OfflineSpeakerSegmentationModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}  // NOLINT

template <typename Manager>
OfflineSpeakerSegmentationPyannoteModel::
    OfflineSpeakerSegmentationPyannoteModel(  // NOLINT
        Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}  // NOLINT

OfflineSpeakerSegmentationPyannoteModel::
    ~OfflineSpeakerSegmentationPyannoteModel() = default;  // NOLINT

const OfflineSpeakerSegmentationPyannoteModelMetaData &
OfflineSpeakerSegmentationPyannoteModel::GetModelMetaData() const {
  return impl_->GetModelMetaData();
}

Ort::Value OfflineSpeakerSegmentationPyannoteModel::Forward(
    Ort::Value x) const {
  return impl_->Forward(std::move(x));
}

#if __ANDROID_API__ >= 9
template OfflineSpeakerSegmentationPyannoteModel::
    OfflineSpeakerSegmentationPyannoteModel(  // NOLINT
        AAssetManager *mgr,
        const OfflineSpeakerSegmentationModelConfig &config);
#endif

#if __OHOS__
template OfflineSpeakerSegmentationPyannoteModel::
    OfflineSpeakerSegmentationPyannoteModel(  // NOLINT
        NativeResourceManager *mgr,
        const OfflineSpeakerSegmentationModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h
================================================
// sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_H_

#include <memory>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSpeakerSegmentationPyannoteModel {
 public:
  explicit OfflineSpeakerSegmentationPyannoteModel(
      const OfflineSpeakerSegmentationModelConfig &config);

  template <typename Manager>
  OfflineSpeakerSegmentationPyannoteModel(
      Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config);

  ~OfflineSpeakerSegmentationPyannoteModel();

  const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData()
      const;

  /**
   * @param x A 3-D float tensor of shape (batch_size, 1, num_samples)
   * @return Return a float tensor of
   *         shape (batch_size, num_frames, num_speakers). Note that
   *         num_speakers here uses powerset encoding.
   */
  Ort::Value Forward(Ort::Value x) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_SEGMENTATION_PYANNOTE_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-impl.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-impl.h
//
// Copyright (c)  2026  Ceva Inc

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_IMPL_H_

#include <algorithm>
#include <array>
#include <cmath>
#include <memory>
#include <utility>
#include <vector>

#include "kaldi-native-fbank/csrc/istft.h"
#include "kaldi-native-fbank/csrc/stft.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-impl.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

class OfflineSpeechDenoiserDpdfNetImpl : public OfflineSpeechDenoiserImpl {
 public:
  explicit OfflineSpeechDenoiserDpdfNetImpl(
      const OfflineSpeechDenoiserConfig &config)
      : model_(config.model) {}

  template <typename Manager>
  OfflineSpeechDenoiserDpdfNetImpl(Manager *mgr,
                                   const OfflineSpeechDenoiserConfig &config)
      : model_(mgr, config.model) {}

  DenoisedAudio Run(const float *samples, int32_t n,
                    int32_t sample_rate) const override {
    const auto &meta = model_.GetMetaData();

    std::vector<float> tmp;
    auto p = samples;

    if (sample_rate != meta.sample_rate) {
      SHERPA_ONNX_LOGE(
          "Creating a resampler:\n"
          "   in_sample_rate: %d\n"
          "   output_sample_rate: %d\n",
          sample_rate, meta.sample_rate);

      float min_freq = std::min<int32_t>(sample_rate, meta.sample_rate);
      float lowpass_cutoff = 0.99f * 0.5f * min_freq;

      int32_t lowpass_filter_width = 6;
      auto resampler = std::make_unique<LinearResample>(
          sample_rate, meta.sample_rate, lowpass_cutoff, lowpass_filter_width);
      resampler->Resample(samples, n, true, &tmp);
      p = tmp.data();
      n = tmp.size();
    }

    auto stft_config = GetStftConfig();
    knf::Stft stft(stft_config);
    knf::StftResult stft_result = stft.Compute(p, n);

    auto state = model_.GetInitState();
    Ort::Value next_state{nullptr};

    knf::StftResult enhanced_stft_result;
    enhanced_stft_result.num_frames = stft_result.num_frames;
    for (int32_t i = 0; i < stft_result.num_frames; ++i) {
      auto frame = Process(stft_result, i, std::move(state), &next_state);
      state = std::move(next_state);

      enhanced_stft_result.real.insert(enhanced_stft_result.real.end(),
                                       frame.first.begin(), frame.first.end());
      enhanced_stft_result.imag.insert(enhanced_stft_result.imag.end(),
                                       frame.second.begin(),
                                       frame.second.end());
    }

    knf::IStft istft(stft_config);

    DenoisedAudio denoised_audio;
    denoised_audio.sample_rate = meta.sample_rate;
    denoised_audio.samples = ShiftWaveform(istft.Compute(enhanced_stft_result),
                                           meta.window_length * 2);
    return denoised_audio;
  }

  int32_t GetSampleRate() const override {
    return model_.GetMetaData().sample_rate;
  }

 private:
  static std::vector<float> ShiftWaveform(std::vector<float> samples,
                                          int32_t shift) {
    if (samples.size() > static_cast<size_t>(shift)) {
      std::copy(samples.begin() + shift, samples.end(), samples.begin());
      samples.resize(samples.size() - shift);
    } else {
      samples.clear();
    }

    samples.resize(samples.size() + shift, 0.0f);
    return samples;
  }

  knf::StftConfig GetStftConfig() const {
    const auto &meta = model_.GetMetaData();

    knf::StftConfig stft_config;
    stft_config.n_fft = meta.n_fft;
    stft_config.hop_length = meta.hop_length;
    stft_config.win_length = meta.window_length;
    stft_config.normalized = meta.normalized;
    stft_config.center = meta.center;
    stft_config.pad_mode = meta.pad_mode;
    stft_config.window_type = meta.window_type;
    stft_config.window = MakeVorbisWindow(meta.window_length);

    return stft_config;
  }

  std::pair<std::vector<float>, std::vector<float>> Process(
      const knf::StftResult &stft_result, int32_t frame_index, Ort::Value state,
      Ort::Value *next_state) const {
    const auto &meta = model_.GetMetaData();
    const int32_t n_fft = meta.n_fft;

    std::vector<float> x((n_fft / 2 + 1) * 2);

    const float *p_real =
        stft_result.real.data() + frame_index * (n_fft / 2 + 1);
    const float *p_imag =
        stft_result.imag.data() + frame_index * (n_fft / 2 + 1);

    for (int32_t i = 0; i < n_fft / 2 + 1; ++i) {
      x[2 * i] = p_real[i];
      x[2 * i + 1] = p_imag[i];
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 4> x_shape{1, 1, n_fft / 2 + 1, 2};
    Ort::Value x_tensor = Ort::Value::CreateTensor<float>(
        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());

    Ort::Value output{nullptr};
    std::tie(output, *next_state) =
        model_.Run(std::move(x_tensor), std::move(state));

    std::vector<float> real(n_fft / 2 + 1);
    std::vector<float> imag(n_fft / 2 + 1);
    const auto *p = output.GetTensorData<float>();
    for (int32_t i = 0; i < n_fft / 2 + 1; ++i) {
      real[i] = p[2 * i];
      imag[i] = p[2 * i + 1];
    }

    return {std::move(real), std::move(imag)};
  }

 private:
  OfflineSpeechDenoiserDpdfNetModel model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-config.cc
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-config.cc
//
// Copyright (c)  2026  Ceva Inc

#include "sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineSpeechDenoiserDpdfNetModelConfig::Register(ParseOptions *po) {
  po->Register("speech-denoiser-dpdfnet-model", &model,
               "Path to a DPDFNet ONNX model for speech denoising, e.g. "
               "baseline/dpdfnet2/dpdfnet4/dpdfnet8 (16 kHz) or "
               "dpdfnet2_48khz_hr (48 kHz). Download DPDFNet models from the "
               "sherpa-onnx GitHub release or the official Hugging Face hub: "
               "https://github.com/k2-fsa/sherpa-onnx/releases/tag/"
               "speech-enhancement-models or "
               "https://huggingface.co/Ceva-IP/DPDFNet");
}

bool OfflineSpeechDenoiserDpdfNetModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --speech-denoiser-dpdfnet-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("dpdfnet model file '%s' does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OfflineSpeechDenoiserDpdfNetModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSpeechDenoiserDpdfNetModelConfig(";
  os << "model=\"" << model << "\")";
  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-config.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-config.h
//
// Copyright (c)  2026  Ceva Inc
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSpeechDenoiserDpdfNetModelConfig {
  std::string model;
  OfflineSpeechDenoiserDpdfNetModelConfig() = default;

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-meta-data.h
//
// Copyright (c)  2026  Ceva Inc

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_META_DATA_H_

#include <cstdint>
#include <string>
#include <vector>

namespace sherpa_onnx {

struct OfflineSpeechDenoiserDpdfNetModelMetaData {
  int32_t version = 1;
  int32_t sample_rate = 0;
  int32_t n_fft = 0;
  int32_t hop_length = 0;
  int32_t window_length = 0;
  bool normalized = false;
  bool center = true;
  std::string window_type = "vorbis";
  std::string pad_mode = "reflect";
  int32_t freq_bins = 0;
  int32_t erb_bins = 0;
  int32_t spec_bins = 0;
  int32_t state_size = 0;
  int32_t erb_norm_state_size = 0;
  int32_t spec_norm_state_size = 0;
  std::string profile;
  std::vector<float> erb_norm_init;
  std::vector<float> spec_norm_init;

  std::vector<int64_t> spec_shape;
  std::vector<int64_t> state_shape;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model.cc
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model.cc
//
// Copyright (c)  2026  Ceva Inc

#include "sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model.h"

#include <algorithm>
#include <array>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

namespace {

std::vector<int64_t> GetInputShape(Ort::Session *sess, size_t index) {
  return sess->GetInputTypeInfo(index).GetTensorTypeAndShapeInfo().GetShape();
}

std::vector<int64_t> GetOutputShape(Ort::Session *sess, size_t index) {
  return sess->GetOutputTypeInfo(index).GetTensorTypeAndShapeInfo().GetShape();
}

}  // namespace

class OfflineSpeechDenoiserDpdfNetModel::Impl {
 public:
  explicit Impl(const OfflineSpeechDenoiserModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config.dpdfnet.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineSpeechDenoiserModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config.dpdfnet.model);
    Init(buf.data(), buf.size());
  }

  Ort::Value GetInitState() {
    Ort::Value state = Ort::Value::CreateTensor<float>(
        allocator_, meta_.state_shape.data(), meta_.state_shape.size());

    auto *p = state.GetTensorMutableData<float>();
    std::fill_n(p, meta_.state_size, 0.f);
    std::copy(meta_.erb_norm_init.begin(), meta_.erb_norm_init.end(), p);
    std::copy(meta_.spec_norm_init.begin(), meta_.spec_norm_init.end(),
              p + meta_.erb_norm_state_size);

    return state;
  }

  std::pair<Ort::Value, Ort::Value> Run(Ort::Value x, Ort::Value state) const {
    std::array<Ort::Value, 2> inputs{std::move(x), std::move(state)};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    return {std::move(out[0]), std::move(out[1])};
  }

  const OfflineSpeechDenoiserDpdfNetModelMetaData &GetMetaData() const {
    return meta_;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    Ort::AllocatorWithDefaultOptions allocator;  // used in the macros below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
    if (model_type != "dpdfnet") {
      SHERPA_ONNX_LOGE("Expect model type 'dpdfnet'. Given: '%s'",
                       model_type.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_.version, "version", 1);
    SHERPA_ONNX_READ_META_DATA_STR(meta_.profile, "profile");
    SHERPA_ONNX_READ_META_DATA(meta_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft");
    SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length");
    SHERPA_ONNX_READ_META_DATA(meta_.window_length, "window_length");
    int32_t normalized = 0;
    int32_t center = 1;
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(normalized, "normalized", 0);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(center, "center", 1);
    SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type");
    SHERPA_ONNX_READ_META_DATA_STR(meta_.pad_mode, "pad_mode");
    SHERPA_ONNX_READ_META_DATA(meta_.freq_bins, "freq_bins");
    SHERPA_ONNX_READ_META_DATA(meta_.erb_bins, "erb_bins");
    SHERPA_ONNX_READ_META_DATA(meta_.spec_bins, "spec_bins");
    SHERPA_ONNX_READ_META_DATA(meta_.state_size, "state_size");
    SHERPA_ONNX_READ_META_DATA(meta_.erb_norm_state_size,
                               "erb_norm_state_size");
    SHERPA_ONNX_READ_META_DATA(meta_.spec_norm_state_size,
                               "spec_norm_state_size");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_.erb_norm_init, "erb_norm_init");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_.spec_norm_init,
                                         "spec_norm_init");

    if (normalized > 1 || center > 1) {
      SHERPA_ONNX_LOGE(
          "Invalid boolean metadata values. normalized=%d, center=%d.",
          normalized, center);
      SHERPA_ONNX_EXIT(-1);
    }

    meta_.normalized = normalized != 0;
    meta_.center = center != 0;

    if (meta_.sample_rate <= 0 || meta_.n_fft <= 0 || meta_.hop_length <= 0 ||
        meta_.window_length <= 0 || meta_.freq_bins <= 1 ||
        meta_.erb_bins <= 0 || meta_.spec_bins <= 0 || meta_.state_size <= 0) {
      SHERPA_ONNX_LOGE(
          "Invalid DPDFNet metadata. sample_rate=%d, n_fft=%d, "
          "hop_length=%d, window_length=%d, freq_bins=%d, erb_bins=%d, "
          "spec_bins=%d, state_size=%d.",
          meta_.sample_rate, meta_.n_fft, meta_.hop_length, meta_.window_length,
          meta_.freq_bins, meta_.erb_bins, meta_.spec_bins, meta_.state_size);
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_names_.size() != 2 || output_names_.size() != 2) {
      SHERPA_ONNX_LOGE(
          "Expect the dpdfnet model to have 2 inputs and 2 outputs. "
          "Got %zu inputs and %zu outputs.",
          input_names_.size(), output_names_.size());
      SHERPA_ONNX_EXIT(-1);
    }

    auto spec_shape = GetInputShape(sess_.get(), 0);
    auto state_shape = GetInputShape(sess_.get(), 1);
    auto out_spec_shape = GetOutputShape(sess_.get(), 0);
    auto out_state_shape = GetOutputShape(sess_.get(), 1);

    if (spec_shape.size() != 4 || state_shape.size() != 1 ||
        out_spec_shape.size() != 4 || out_state_shape.size() != 1) {
      SHERPA_ONNX_LOGE(
          "Unexpected dpdfnet ONNX signature. Expected "
          "(spec:[B,T,F,2], state:[S]) -> (spec_e:[B,T,F,2], state_out:[S]). "
          "Got spec ndim=%d, state ndim=%d, out_spec ndim=%d, out_state "
          "ndim=%d.",
          static_cast<int32_t>(spec_shape.size()),
          static_cast<int32_t>(state_shape.size()),
          static_cast<int32_t>(out_spec_shape.size()),
          static_cast<int32_t>(out_state_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    const int64_t freq_bins = spec_shape[2];
    const int64_t complex_dim = spec_shape[3];
    const int64_t state_size = state_shape[0];

    if (freq_bins <= 1 || complex_dim != 2 || state_size <= 0) {
      SHERPA_ONNX_LOGE(
          "Unsupported dpdfnet model shapes. spec ndim=%d, state ndim=%d, "
          "freq_bins=%d, complex_dim=%d, state_size=%d.",
          static_cast<int32_t>(spec_shape.size()),
          static_cast<int32_t>(state_shape.size()),
          static_cast<int32_t>(freq_bins), static_cast<int32_t>(complex_dim),
          static_cast<int32_t>(state_size));
      SHERPA_ONNX_EXIT(-1);
    }

    meta_.spec_shape = std::move(spec_shape);
    meta_.state_shape = std::move(state_shape);

    if (meta_.freq_bins != freq_bins) {
      SHERPA_ONNX_LOGE(
          "Mismatch between metadata and ONNX graph for freq_bins. "
          "metadata=%d, graph=%d.",
          meta_.freq_bins, static_cast<int32_t>(freq_bins));
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta_.n_fft != static_cast<int32_t>((freq_bins - 1) * 2)) {
      SHERPA_ONNX_LOGE(
          "Mismatch between metadata and ONNX graph for n_fft. metadata=%d, "
          "graph=%d.",
          meta_.n_fft, static_cast<int32_t>((freq_bins - 1) * 2));
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta_.state_size != state_size) {
      SHERPA_ONNX_LOGE(
          "Mismatch between metadata and ONNX graph for state_size. "
          "metadata=%d, graph=%d.",
          meta_.state_size, static_cast<int32_t>(state_size));
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta_.erb_norm_state_size !=
        static_cast<int32_t>(meta_.erb_norm_init.size())) {
      SHERPA_ONNX_LOGE(
          "Mismatch between erb_norm_state_size (%d) and erb_norm_init size "
          "(%zu).",
          meta_.erb_norm_state_size, meta_.erb_norm_init.size());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta_.spec_norm_state_size !=
        static_cast<int32_t>(meta_.spec_norm_init.size())) {
      SHERPA_ONNX_LOGE(
          "Mismatch between spec_norm_state_size (%d) and spec_norm_init size "
          "(%zu).",
          meta_.spec_norm_state_size, meta_.spec_norm_init.size());
      SHERPA_ONNX_EXIT(-1);
    }

    const int32_t init_prefix_state_size =
        meta_.erb_norm_state_size + meta_.spec_norm_state_size;
    if (meta_.erb_norm_state_size <= 0 || meta_.spec_norm_state_size <= 0) {
      SHERPA_ONNX_LOGE(
          "Invalid normalization state sizes in the metadata. "
          "erb_norm_state_size=%d, spec_norm_state_size=%d.",
          meta_.erb_norm_state_size, meta_.spec_norm_state_size);
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta_.state_size < init_prefix_state_size) {
      SHERPA_ONNX_LOGE(
          "The dpdfnet state tensor is too small: %d. It must be at least %d.",
          meta_.state_size, init_prefix_state_size);
      SHERPA_ONNX_EXIT(-1);
    }

    if (out_spec_shape[2] != freq_bins || out_spec_shape[3] != 2 ||
        out_state_shape[0] != state_size) {
      SHERPA_ONNX_LOGE(
          "Unexpected dpdfnet output shapes. out_spec[2]=%d, out_spec[3]=%d, "
          "out_state[0]=%d, expected freq_bins=%d, complex_dim=2, "
          "state_size=%d.",
          static_cast<int32_t>(out_spec_shape[2]),
          static_cast<int32_t>(out_spec_shape[3]),
          static_cast<int32_t>(out_state_shape[0]),
          static_cast<int32_t>(freq_bins), static_cast<int32_t>(state_size));
      SHERPA_ONNX_EXIT(-1);
    }

    if (config_.debug) {
      std::ostringstream os;
      os << "---dpdfnet model---\n";
      PrintModelMetadata(os, meta_data);
      os << "input names:\n";
      for (int32_t i = 0; i != static_cast<int32_t>(input_names_.size()); ++i) {
        os << i << " " << input_names_[i] << "\n";
      }

      os << "output names:\n";
      for (int32_t i = 0; i != static_cast<int32_t>(output_names_.size());
           ++i) {
        os << i << " " << output_names_[i] << "\n";
      }

      os << "spec shape: ";
      for (auto d : meta_.spec_shape) {
        os << d << " ";
      }
      os << "\nstate shape: ";
      for (auto d : meta_.state_shape) {
        os << d << " ";
      }
      os << "\nprofile: " << meta_.profile;
      os << "\nsample_rate: " << meta_.sample_rate;
      os << "\nn_fft: " << meta_.n_fft;
      os << "\nfreq_bins: " << meta_.freq_bins;
      os << "\nerb_bins: " << meta_.erb_bins;
      os << "\nspec_bins: " << meta_.spec_bins;
      os << "\nstate_size: " << meta_.state_size;
      os << "\nnormalized: " << static_cast<int32_t>(meta_.normalized);
      os << "\ncenter: " << static_cast<int32_t>(meta_.center);
      os << "\n";

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
  }

 private:
  OfflineSpeechDenoiserModelConfig config_;
  OfflineSpeechDenoiserDpdfNetModelMetaData meta_;

  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;
};

OfflineSpeechDenoiserDpdfNetModel::~OfflineSpeechDenoiserDpdfNetModel() =
    default;  // NOLINT

OfflineSpeechDenoiserDpdfNetModel::OfflineSpeechDenoiserDpdfNetModel(
    const OfflineSpeechDenoiserModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSpeechDenoiserDpdfNetModel::OfflineSpeechDenoiserDpdfNetModel(
    Manager *mgr, const OfflineSpeechDenoiserModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

Ort::Value OfflineSpeechDenoiserDpdfNetModel::GetInitState() const {
  return impl_->GetInitState();
}

std::pair<Ort::Value, Ort::Value> OfflineSpeechDenoiserDpdfNetModel::Run(
    Ort::Value x, Ort::Value state) const {
  return impl_->Run(std::move(x), std::move(state));
}

const OfflineSpeechDenoiserDpdfNetModelMetaData &
OfflineSpeechDenoiserDpdfNetModel::GetMetaData() const {
  return impl_->GetMetaData();
}

#if __ANDROID_API__ >= 9
template OfflineSpeechDenoiserDpdfNetModel::OfflineSpeechDenoiserDpdfNetModel(
    AAssetManager *mgr, const OfflineSpeechDenoiserModelConfig &config);
#endif

#if __OHOS__
template OfflineSpeechDenoiserDpdfNetModel::OfflineSpeechDenoiserDpdfNetModel(
    NativeResourceManager *mgr, const OfflineSpeechDenoiserModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model.h
//
// Copyright (c)  2026  Ceva Inc
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_H_

#include <memory>
#include <utility>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-model-config.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"

namespace sherpa_onnx {

class OfflineSpeechDenoiserDpdfNetModel {
 public:
  ~OfflineSpeechDenoiserDpdfNetModel();
  explicit OfflineSpeechDenoiserDpdfNetModel(
      const OfflineSpeechDenoiserModelConfig &config);

  template <typename Manager>
  OfflineSpeechDenoiserDpdfNetModel(
      Manager *mgr, const OfflineSpeechDenoiserModelConfig &config);

  Ort::Value GetInitState() const;

  std::pair<Ort::Value, Ort::Value> Run(Ort::Value x, Ort::Value state) const;

  const OfflineSpeechDenoiserDpdfNetModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-impl.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_IMPL_H_

#include <algorithm>
#include <cmath>
#include <memory>
#include <utility>
#include <vector>

#include "kaldi-native-fbank/csrc/feature-window.h"
#include "kaldi-native-fbank/csrc/istft.h"
#include "kaldi-native-fbank/csrc/stft.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-impl.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

class OfflineSpeechDenoiserGtcrnImpl : public OfflineSpeechDenoiserImpl {
 public:
  explicit OfflineSpeechDenoiserGtcrnImpl(
      const OfflineSpeechDenoiserConfig &config)
      : model_(config.model) {}

  template <typename Manager>
  OfflineSpeechDenoiserGtcrnImpl(Manager *mgr,
                                 const OfflineSpeechDenoiserConfig &config)
      : model_(mgr, config.model) {}

  DenoisedAudio Run(const float *samples, int32_t n,
                    int32_t sample_rate) const override {
    const auto &meta = model_.GetMetaData();

    std::vector<float> tmp;
    auto p = samples;

    if (sample_rate != meta.sample_rate) {
      SHERPA_ONNX_LOGE(
          "Creating a resampler:\n"
          "   in_sample_rate: %d\n"
          "   output_sample_rate: %d\n",
          sample_rate, meta.sample_rate);

      float min_freq = std::min<int32_t>(sample_rate, meta.sample_rate);
      float lowpass_cutoff = 0.99 * 0.5 * min_freq;

      int32_t lowpass_filter_width = 6;
      auto resampler = std::make_unique<LinearResample>(
          sample_rate, meta.sample_rate, lowpass_cutoff, lowpass_filter_width);
      resampler->Resample(samples, n, true, &tmp);
      p = tmp.data();
      n = tmp.size();
    }

    knf::StftConfig stft_config;
    stft_config.n_fft = meta.n_fft;
    stft_config.hop_length = meta.hop_length;
    stft_config.win_length = meta.window_length;
    stft_config.window_type = meta.window_type;
    if (stft_config.window_type == "hann_sqrt") {
      auto window = knf::GetWindow("hann", stft_config.win_length);
      for (auto &w : window) {
        w = std::sqrt(w);
      }
      stft_config.window = std::move(window);
    }

    knf::Stft stft(stft_config);
    knf::StftResult stft_result = stft.Compute(p, n);

    auto states = model_.GetInitStates();
    OfflineSpeechDenoiserGtcrnModel::States next_states;

    knf::StftResult enhanced_stft_result;
    enhanced_stft_result.num_frames = stft_result.num_frames;
    for (int32_t i = 0; i < stft_result.num_frames; ++i) {
      auto p = Process(stft_result, i, std::move(states), &next_states);
      states = std::move(next_states);

      enhanced_stft_result.real.insert(enhanced_stft_result.real.end(),
                                       p.first.begin(), p.first.end());
      enhanced_stft_result.imag.insert(enhanced_stft_result.imag.end(),
                                       p.second.begin(), p.second.end());
    }

    knf::IStft istft(stft_config);

    DenoisedAudio denoised_audio;
    denoised_audio.sample_rate = meta.sample_rate;
    denoised_audio.samples = istft.Compute(enhanced_stft_result);
    return denoised_audio;
  }

  int32_t GetSampleRate() const override {
    return model_.GetMetaData().sample_rate;
  }

 private:
  std::pair<std::vector<float>, std::vector<float>> Process(
      const knf::StftResult &stft_result, int32_t frame_index,
      OfflineSpeechDenoiserGtcrnModel::States states,
      OfflineSpeechDenoiserGtcrnModel::States *next_states) const {
    const auto &meta = model_.GetMetaData();
    int32_t n_fft = meta.n_fft;
    std::vector<float> x((n_fft / 2 + 1) * 2);

    const float *p_real =
        stft_result.real.data() + frame_index * (n_fft / 2 + 1);
    const float *p_imag =
        stft_result.imag.data() + frame_index * (n_fft / 2 + 1);

    for (int32_t i = 0; i < n_fft / 2 + 1; ++i) {
      x[2 * i] = p_real[i];
      x[2 * i + 1] = p_imag[i];
    }
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 4> x_shape{1, n_fft / 2 + 1, 1, 2};
    Ort::Value x_tensor = Ort::Value::CreateTensor(
        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());

    Ort::Value output{nullptr};
    std::tie(output, *next_states) =
        model_.Run(std::move(x_tensor), std::move(states));

    std::vector<float> real(n_fft / 2 + 1);
    std::vector<float> imag(n_fft / 2 + 1);
    const auto *p = output.GetTensorData<float>();
    for (int32_t i = 0; i < n_fft / 2 + 1; ++i) {
      real[i] = p[2 * i];
      imag[i] = p[2 * i + 1];
    }

    return {std::move(real), std::move(imag)};
  }

 private:
  OfflineSpeechDenoiserGtcrnModel model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-config.cc
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineSpeechDenoiserGtcrnModelConfig::Register(ParseOptions *po) {
  po->Register("speech-denoiser-gtcrn-model", &model,
               "Path to the gtcrn model for speech denoising");
}

bool OfflineSpeechDenoiserGtcrnModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --speech-denoiser-gtcrn-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("gtcrn model file '%s' does not exist", model.c_str());
    return false;
  }
  return true;
}

std::string OfflineSpeechDenoiserGtcrnModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSpeechDenoiserGtcrnModelConfig(";
  os << "model=\"" << model << "\")";
  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-config.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSpeechDenoiserGtcrnModelConfig {
  std::string model;
  OfflineSpeechDenoiserGtcrnModelConfig() = default;

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-meta-data.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_META_DATA_H_

#include <cstdint>
#include <string>
#include <vector>

namespace sherpa_onnx {

// please refer to
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/gtcrn/add_meta_data.py
struct OfflineSpeechDenoiserGtcrnModelMetaData {
  int32_t sample_rate = 0;
  int32_t version = 1;
  int32_t n_fft = 0;
  int32_t hop_length = 0;
  int32_t window_length = 0;
  std::string window_type;

  std::vector<int64_t> conv_cache_shape;
  std::vector<int64_t> tra_cache_shape;
  std::vector<int64_t> inter_cache_shape;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model.cc
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineSpeechDenoiserGtcrnModel::Impl {
 public:
  explicit Impl(const OfflineSpeechDenoiserModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.gtcrn.model);
      Init(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineSpeechDenoiserModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.gtcrn.model);
      Init(buf.data(), buf.size());
    }
  }

  const OfflineSpeechDenoiserGtcrnModelMetaData &GetMetaData() const {
    return meta_;
  }

  States GetInitStates() {
    Ort::Value conv_cache = Ort::Value::CreateTensor<float>(
        allocator_, meta_.conv_cache_shape.data(),
        meta_.conv_cache_shape.size());

    Ort::Value tra_cache = Ort::Value::CreateTensor<float>(
        allocator_, meta_.tra_cache_shape.data(), meta_.tra_cache_shape.size());

    Ort::Value inter_cache = Ort::Value::CreateTensor<float>(
        allocator_, meta_.inter_cache_shape.data(),
        meta_.inter_cache_shape.size());

    Fill<float>(&conv_cache, 0);
    Fill<float>(&tra_cache, 0);
    Fill<float>(&inter_cache, 0);

    std::vector<Ort::Value> states;

    states.reserve(3);
    states.push_back(std::move(conv_cache));
    states.push_back(std::move(tra_cache));
    states.push_back(std::move(inter_cache));

    return states;
  }

  std::pair<Ort::Value, States> Run(Ort::Value x, States states) const {
    std::vector<Ort::Value> inputs;
    inputs.reserve(1 + states.size());
    inputs.push_back(std::move(x));
    for (auto &s : states) {
      inputs.push_back(std::move(s));
    }

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    std::vector<Ort::Value> next_states;
    next_states.reserve(out.size() - 1);
    for (int32_t k = 1; k < out.size(); ++k) {
      next_states.push_back(std::move(out[k]));
    }

    return {std::move(out[0]), std::move(next_states)};
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---gtcrn model---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
    if (model_type != "gtcrn") {
      SHERPA_ONNX_LOGE("Expect model type 'gtcrn'. Given: '%s'",
                       model_type.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA(meta_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft");
    SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length");
    SHERPA_ONNX_READ_META_DATA(meta_.window_length, "window_length");
    SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type");
    SHERPA_ONNX_READ_META_DATA(meta_.version, "version");

    SHERPA_ONNX_READ_META_DATA_VEC(meta_.conv_cache_shape, "conv_cache_shape");
    SHERPA_ONNX_READ_META_DATA_VEC(meta_.tra_cache_shape, "tra_cache_shape");
    SHERPA_ONNX_READ_META_DATA_VEC(meta_.inter_cache_shape,
                                   "inter_cache_shape");
  }

 private:
  OfflineSpeechDenoiserModelConfig config_;
  OfflineSpeechDenoiserGtcrnModelMetaData meta_;

  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;
};

OfflineSpeechDenoiserGtcrnModel::~OfflineSpeechDenoiserGtcrnModel() = default;

OfflineSpeechDenoiserGtcrnModel::OfflineSpeechDenoiserGtcrnModel(
    const OfflineSpeechDenoiserModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSpeechDenoiserGtcrnModel::OfflineSpeechDenoiserGtcrnModel(
    Manager *mgr, const OfflineSpeechDenoiserModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineSpeechDenoiserGtcrnModel::States
OfflineSpeechDenoiserGtcrnModel::GetInitStates() const {
  return impl_->GetInitStates();
}

std::pair<Ort::Value, OfflineSpeechDenoiserGtcrnModel::States>
OfflineSpeechDenoiserGtcrnModel::Run(Ort::Value x, States states) const {
  return impl_->Run(std::move(x), std::move(states));
}

const OfflineSpeechDenoiserGtcrnModelMetaData &
OfflineSpeechDenoiserGtcrnModel::GetMetaData() const {
  return impl_->GetMetaData();
}

#if __ANDROID_API__ >= 9
template OfflineSpeechDenoiserGtcrnModel::OfflineSpeechDenoiserGtcrnModel(
    AAssetManager *mgr, const OfflineSpeechDenoiserModelConfig &config);
#endif

#if __OHOS__
template OfflineSpeechDenoiserGtcrnModel::OfflineSpeechDenoiserGtcrnModel(
    NativeResourceManager *mgr, const OfflineSpeechDenoiserModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_H_
#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-model-config.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"

namespace sherpa_onnx {

class OfflineSpeechDenoiserGtcrnModel {
 public:
  ~OfflineSpeechDenoiserGtcrnModel();
  explicit OfflineSpeechDenoiserGtcrnModel(
      const OfflineSpeechDenoiserModelConfig &config);

  template <typename Manager>
  OfflineSpeechDenoiserGtcrnModel(
      Manager *mgr, const OfflineSpeechDenoiserModelConfig &config);

  using States = std::vector<Ort::Value>;

  States GetInitStates() const;

  std::pair<Ort::Value, States> Run(Ort::Value x, States states) const;

  const OfflineSpeechDenoiserGtcrnModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-impl.cc
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-impl.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-speech-denoiser-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-impl.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-impl.h"

namespace sherpa_onnx {

std::unique_ptr<OfflineSpeechDenoiserImpl> OfflineSpeechDenoiserImpl::Create(
    const OfflineSpeechDenoiserConfig &config) {
  const bool has_gtcrn = !config.model.gtcrn.model.empty();
  const bool has_dpdfnet = !config.model.dpdfnet.model.empty();

  if (has_gtcrn) {
    return std::make_unique<OfflineSpeechDenoiserGtcrnImpl>(config);
  } else if (has_dpdfnet) {
    return std::make_unique<OfflineSpeechDenoiserDpdfNetImpl>(config);
  }

  SHERPA_ONNX_LOGE("Please provide one speech denoising model.");
  return nullptr;
}

template <typename Manager>
std::unique_ptr<OfflineSpeechDenoiserImpl> OfflineSpeechDenoiserImpl::Create(
    Manager *mgr, const OfflineSpeechDenoiserConfig &config) {
  const bool has_gtcrn = !config.model.gtcrn.model.empty();
  const bool has_dpdfnet = !config.model.dpdfnet.model.empty();

  if (has_gtcrn) {
    return std::make_unique<OfflineSpeechDenoiserGtcrnImpl>(mgr, config);
  } else if (has_dpdfnet) {
    return std::make_unique<OfflineSpeechDenoiserDpdfNetImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please provide one speech denoising model.");
  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflineSpeechDenoiserImpl>
OfflineSpeechDenoiserImpl::Create(AAssetManager *mgr,
                                  const OfflineSpeechDenoiserConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OfflineSpeechDenoiserImpl>
OfflineSpeechDenoiserImpl::Create(NativeResourceManager *mgr,
                                  const OfflineSpeechDenoiserConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-impl.h
================================================
// sherpa-onnx/csrc/offline-speaker-speech-denoiser-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_IMPL_H_

#include <memory>

#include "sherpa-onnx/csrc/offline-speech-denoiser.h"

namespace sherpa_onnx {

class OfflineSpeechDenoiserImpl {
 public:
  virtual ~OfflineSpeechDenoiserImpl() = default;

  static std::unique_ptr<OfflineSpeechDenoiserImpl> Create(
      const OfflineSpeechDenoiserConfig &config);

  template <typename Manager>
  static std::unique_ptr<OfflineSpeechDenoiserImpl> Create(
      Manager *mgr, const OfflineSpeechDenoiserConfig &config);

  virtual DenoisedAudio Run(const float *samples, int32_t n,
                            int32_t sample_rate) const = 0;

  virtual int32_t GetSampleRate() const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-model-config.cc
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speech-denoiser-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineSpeechDenoiserModelConfig::Register(ParseOptions *po) {
  gtcrn.Register(po);
  dpdfnet.Register(po);

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool OfflineSpeechDenoiserModelConfig::Validate() const {
  if (gtcrn.model.empty() && dpdfnet.model.empty()) {
    SHERPA_ONNX_LOGE("Please provide a speech denoising model.");
    return false;
  }

  if (!gtcrn.model.empty()) {
    return gtcrn.Validate();
  }

  return dpdfnet.Validate();
}

std::string OfflineSpeechDenoiserModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSpeechDenoiserModelConfig(";
  os << "gtcrn=" << gtcrn.ToString() << ", ";
  os << "dpdfnet=" << dpdfnet.ToString() << ", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser-model-config.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-config.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineSpeechDenoiserModelConfig {
  OfflineSpeechDenoiserGtcrnModelConfig gtcrn;
  OfflineSpeechDenoiserDpdfNetModelConfig dpdfnet;

  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  OfflineSpeechDenoiserModelConfig() = default;

  OfflineSpeechDenoiserModelConfig(
      const OfflineSpeechDenoiserGtcrnModelConfig &gtcrn,
      const OfflineSpeechDenoiserDpdfNetModelConfig &dpdfnet,
      int32_t num_threads, bool debug, const std::string &provider)
      : gtcrn(gtcrn),
        dpdfnet(dpdfnet),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser.cc
================================================
// sherpa-onnx/csrc/offline-speech-denoiser.h
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speech-denoiser.h"

#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/offline-speech-denoiser-impl.h"

namespace sherpa_onnx {

void OfflineSpeechDenoiserConfig::Register(ParseOptions *po) {
  model.Register(po);
}

bool OfflineSpeechDenoiserConfig::Validate() const { return model.Validate(); }

std::string OfflineSpeechDenoiserConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineSpeechDenoiserConfig(";
  os << "model=" << model.ToString() << ")";
  return os.str();
}

template <typename Manager>
OfflineSpeechDenoiser::OfflineSpeechDenoiser(
    Manager *mgr, const OfflineSpeechDenoiserConfig &config)
    : impl_(OfflineSpeechDenoiserImpl::Create(mgr, config)) {}

OfflineSpeechDenoiser::OfflineSpeechDenoiser(
    const OfflineSpeechDenoiserConfig &config)
    : impl_(OfflineSpeechDenoiserImpl::Create(config)) {}

OfflineSpeechDenoiser::~OfflineSpeechDenoiser() = default;

DenoisedAudio OfflineSpeechDenoiser::Run(const float *samples, int32_t n,
                                         int32_t sample_rate) const {
  return impl_->Run(samples, n, sample_rate);
}

int32_t OfflineSpeechDenoiser::GetSampleRate() const {
  return impl_->GetSampleRate();
}

#if __ANDROID_API__ >= 9
template OfflineSpeechDenoiser::OfflineSpeechDenoiser(
    AAssetManager *mgr, const OfflineSpeechDenoiserConfig &config);
#endif

#if __OHOS__
template OfflineSpeechDenoiser::OfflineSpeechDenoiser(
    NativeResourceManager *mgr, const OfflineSpeechDenoiserConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-speech-denoiser.h
================================================
// sherpa-onnx/csrc/offline-speech-denoiser.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-speech-denoiser-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct DenoisedAudio {
  std::vector<float> samples;
  int32_t sample_rate;
};

struct OfflineSpeechDenoiserConfig {
  OfflineSpeechDenoiserModelConfig model;

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

class OfflineSpeechDenoiserImpl;

class OfflineSpeechDenoiser {
 public:
  explicit OfflineSpeechDenoiser(const OfflineSpeechDenoiserConfig &config);
  ~OfflineSpeechDenoiser();

  template <typename Manager>
  OfflineSpeechDenoiser(Manager *mgr,
                        const OfflineSpeechDenoiserConfig &config);

  /*
   * @param samples 1-D array of audio samples. Each sample is in the
   *                range [-1, 1].
   * @param n Number of samples
   * @param sample_rate Sample rate of the input samples
   *
   */
  DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const;

  /*
   * Return the sample rate of the denoised audio
   */
  int32_t GetSampleRate() const;

 private:
  std::unique_ptr<OfflineSpeechDenoiserImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_SPEECH_DENOISER_H_


================================================
FILE: sherpa-onnx/csrc/offline-stream.cc
================================================
// sherpa-onnx/csrc/offline-stream.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-stream.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <iomanip>
#include <limits>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "Eigen/Core"
#include "kaldi-native-fbank/csrc/online-feature.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineStream::Impl {
 public:
  explicit Impl(const FeatureExtractorConfig &config,
                ContextGraphPtr context_graph)
      : config_(config), context_graph_(std::move(context_graph)) {
    if (config.is_mfcc) {
      mfcc_opts_.frame_opts.dither = config_.dither;
      mfcc_opts_.frame_opts.snip_edges = config_.snip_edges;
      mfcc_opts_.frame_opts.samp_freq = config_.sampling_rate;
      mfcc_opts_.frame_opts.frame_shift_ms = config_.frame_shift_ms;
      mfcc_opts_.frame_opts.frame_length_ms = config_.frame_length_ms;
      mfcc_opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
      mfcc_opts_.frame_opts.window_type = config_.window_type;

      mfcc_opts_.mel_opts.num_bins = config_.feature_dim;

      mfcc_opts_.mel_opts.high_freq = config_.high_freq;
      mfcc_opts_.mel_opts.low_freq = config_.low_freq;

      mfcc_opts_.mel_opts.is_librosa = config_.is_librosa;

      mfcc_opts_.num_ceps = config_.num_ceps;
      mfcc_opts_.use_energy = config_.use_energy;

      mfcc_ = std::make_unique<knf::OnlineMfcc>(mfcc_opts_);
    } else {
      opts_.frame_opts.dither = config.dither;
      opts_.frame_opts.snip_edges = config.snip_edges;
      opts_.frame_opts.samp_freq = config.sampling_rate;
      opts_.frame_opts.frame_shift_ms = config.frame_shift_ms;
      opts_.frame_opts.frame_length_ms = config.frame_length_ms;
      opts_.frame_opts.remove_dc_offset = config.remove_dc_offset;
      opts_.frame_opts.window_type = config.window_type;

      opts_.mel_opts.num_bins = config.feature_dim;

      opts_.mel_opts.high_freq = config.high_freq;
      opts_.mel_opts.low_freq = config.low_freq;

      opts_.mel_opts.is_librosa = config.is_librosa;

      fbank_ = std::make_unique<knf::OnlineFbank>(opts_);
    }
  }

  explicit Impl(WhisperTag tag) {
    config_.normalize_samples = true;
    opts_.frame_opts.samp_freq = 16000;
    opts_.mel_opts.num_bins = tag.dim;

    knf::WhisperFeatureOptions whisper_opts;
    whisper_opts.frame_opts = opts_.frame_opts;
    whisper_opts.dim = tag.dim;

    whisper_fbank_ = std::make_unique<knf::OnlineWhisperFbank>(whisper_opts);
    config_.sampling_rate = opts_.frame_opts.samp_freq;
  }

  explicit Impl(CEDTag /*tag*/) : is_ced_(true) {
    // see
    // https://github.com/RicherMans/CED/blob/main/onnx_inference_with_kaldi.py

    opts_.frame_opts.frame_length_ms = 32;
    opts_.frame_opts.dither = 0;
    opts_.frame_opts.preemph_coeff = 0;
    opts_.frame_opts.remove_dc_offset = false;
    opts_.frame_opts.window_type = "hann";
    opts_.frame_opts.snip_edges = false;

    opts_.frame_opts.samp_freq = 16000;  // fixed to 16000
    opts_.mel_opts.num_bins = 64;
    opts_.mel_opts.low_freq = 0;
    opts_.mel_opts.high_freq = 8000;
    opts_.use_log_fbank = false;

    config_.sampling_rate = opts_.frame_opts.samp_freq;

    fbank_ = std::make_unique<knf::OnlineFbank>(opts_);
  }

  explicit Impl(MoonshineTag /*tag*/) : is_moonshine_(true) {
    config_.sampling_rate = 16000;
  }

  explicit Impl(OmnilingualAsrTag /*tag*/) : is_omnilingual_asr_(true) {
    config_.sampling_rate = 16000;
  }

  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
    if (config_.normalize_samples) {
      AcceptWaveformImpl(sampling_rate, waveform, n);
    } else {
      std::vector<float> buf(n);
      for (int32_t i = 0; i != n; ++i) {
        buf[i] = waveform[i] * 32768;
      }
      AcceptWaveformImpl(sampling_rate, buf.data(), n);
    }
  }

  void AcceptWaveformImpl(int32_t sampling_rate, const float *waveform,
                          int32_t n) {
    if (sampling_rate != config_.sampling_rate) {
      SHERPA_ONNX_LOGE(
          "Creating a resampler:\n"
          "   in_sample_rate: %d\n"
          "   output_sample_rate: %d\n",
          sampling_rate, static_cast<int32_t>(config_.sampling_rate));

      float min_freq = std::min<int32_t>(sampling_rate, config_.sampling_rate);
      float lowpass_cutoff = 0.99 * 0.5 * min_freq;

      int32_t lowpass_filter_width = 6;
      auto resampler = std::make_unique<LinearResample>(
          sampling_rate, config_.sampling_rate, lowpass_cutoff,
          lowpass_filter_width);
      std::vector<float> samples;
      resampler->Resample(waveform, n, true, &samples);

      if (is_moonshine_ || is_omnilingual_asr_) {
        samples_.insert(samples_.end(), samples.begin(), samples.end());
      } else if (fbank_) {
        fbank_->AcceptWaveform(config_.sampling_rate, samples.data(),
                               samples.size());
        fbank_->InputFinished();
      } else if (mfcc_) {
        mfcc_->AcceptWaveform(config_.sampling_rate, samples.data(),
                              samples.size());
        mfcc_->InputFinished();
      } else {
        whisper_fbank_->AcceptWaveform(config_.sampling_rate, samples.data(),
                                       samples.size());
        whisper_fbank_->InputFinished();
      }

      return;
    }  // if (sampling_rate != config_.sampling_rate)

    if (is_moonshine_ || is_omnilingual_asr_) {
      samples_.insert(samples_.end(), waveform, waveform + n);
    } else if (fbank_) {
      fbank_->AcceptWaveform(sampling_rate, waveform, n);
      fbank_->InputFinished();
    } else if (mfcc_) {
      mfcc_->AcceptWaveform(sampling_rate, waveform, n);
      mfcc_->InputFinished();
    } else {
      whisper_fbank_->AcceptWaveform(sampling_rate, waveform, n);
      whisper_fbank_->InputFinished();
    }
  }

  int32_t FeatureDim() const {
    if (is_moonshine_ || is_omnilingual_asr_) {
      return samples_.size();
    }

    return mfcc_ ? mfcc_opts_.num_ceps : opts_.mel_opts.num_bins;
  }

  std::vector<float> GetFrames() const {
    if (is_moonshine_ || is_omnilingual_asr_) {
      return samples_;
    }

    int32_t n = fbank_  ? fbank_->NumFramesReady()
                : mfcc_ ? mfcc_->NumFramesReady()
                        : whisper_fbank_->NumFramesReady();
    assert(n > 0 && "Please first call AcceptWaveform()");

    int32_t feature_dim = FeatureDim();

    std::vector<float> features(n * feature_dim);

    float *p = features.data();

    for (int32_t i = 0; i != n; ++i) {
      const float *f = fbank_  ? fbank_->GetFrame(i)
                       : mfcc_ ? mfcc_->GetFrame(i)
                               : whisper_fbank_->GetFrame(i);
      std::copy(f, f + feature_dim, p);
      p += feature_dim;
    }

    NemoNormalizeFeatures(features.data(), n, feature_dim);

    if (is_ced_) {
      AmplitudeToDB(features.data(), features.size());
    }

    return features;
  }

  void SetResult(const OfflineRecognitionResult &r) { r_ = r; }

  const OfflineRecognitionResult &GetResult() const { return r_; }

  const ContextGraphPtr &GetContextGraph() const { return context_graph_; }

  void SetOption(const std::string &key, const std::string &value) {
    options_[key] = value;
  }

  bool HasOption(const std::string &key) const {
    return options_.count(key) != 0;
  }

  const std::string &GetOption(const std::string &key) const {
    auto it = options_.find(key);
    if (it != options_.end()) {
      return it->second;
    }
    static const std::string kEmpty;
    return kEmpty;
  }

  int32_t GetOptionInt(const std::string &key, int32_t default_value) const {
    auto it = options_.find(key);
    if (it != options_.end()) {
      return ToIntOrDefault(it->second, default_value);
    }
    return default_value;
  }

  float GetOptionFloat(const std::string &key, float default_value) const {
    auto it = options_.find(key);
    if (it != options_.end()) {
      return ToFloatOrDefault(it->second, default_value);
    }
    return default_value;
  }

 private:
  // see
  // https://github.com/pytorch/audio/blob/main/src/torchaudio/functional/functional.py#L359
  void AmplitudeToDB(float *p, int32_t n) const {
    float multiplier = 10;
    float top_db = 120;
    float amin = 1e-10;

    float max_x = std::numeric_limits<float>::min();

    for (int32_t i = 0; i != n; ++i) {
      float x = p[i];
      x = (x > amin) ? x : amin;
      x = log10f(x) * multiplier;

      max_x = (x > max_x) ? x : max_x;
      p[i] = x;
    }

    float d = max_x - top_db;
    for (int32_t i = 0; i != n; ++i) {
      float x = p[i];
      x = (x > d) ? x : d;
      p[i] = x;
    }
  }

  void NemoNormalizeFeatures(float *p, int32_t num_frames,
                             int32_t feature_dim) const {
    if (config_.nemo_normalize_type.empty()) {
      return;
    }

    if (config_.nemo_normalize_type != "per_feature") {
      SHERPA_ONNX_LOGE(
          "Only normalize_type=per_feature is implemented. Given: %s",
          config_.nemo_normalize_type.c_str());
      exit(-1);
    }

    NemoNormalizePerFeature(p, num_frames, feature_dim);
  }

  static void NemoNormalizePerFeature(float *p, int32_t num_frames,
                                      int32_t feature_dim) {
    using RowMajorMat =
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;

    Eigen::Map<RowMajorMat> x(p, num_frames, feature_dim);

    Eigen::RowVectorXf mean = x.colwise().mean();
    Eigen::RowVectorXf var =
        (x.array().square().colwise().mean() - mean.array().square())
            .max(0.0f);  // avoid negative due to FP error

    Eigen::RowVectorXf inv_std = (var.array().sqrt() + 1e-5f).inverse();

    x.array() =
        (x.array().rowwise() - mean.array()).rowwise() * inv_std.array();
  }

 private:
  FeatureExtractorConfig config_;
  std::unique_ptr<knf::OnlineFbank> fbank_;
  std::unique_ptr<knf::OnlineMfcc> mfcc_;
  std::unique_ptr<knf::OnlineWhisperFbank> whisper_fbank_;
  knf::FbankOptions opts_;
  knf::MfccOptions mfcc_opts_;
  OfflineRecognitionResult r_;
  ContextGraphPtr context_graph_;
  bool is_ced_ = false;
  bool is_moonshine_ = false;
  bool is_omnilingual_asr_ = false;

  // used only when (is_moonshine_ || is_omnilingual_asr_) == true
  std::vector<float> samples_;

  std::unordered_map<std::string, std::string> options_;
};

OfflineStream::OfflineStream(const FeatureExtractorConfig &config /*= {}*/,
                             ContextGraphPtr context_graph /*= nullptr*/)
    : impl_(std::make_unique<Impl>(config, std::move(context_graph))) {}

OfflineStream::OfflineStream(WhisperTag tag)
    : impl_(std::make_unique<Impl>(tag)) {}

OfflineStream::OfflineStream(CEDTag tag) : impl_(std::make_unique<Impl>(tag)) {}

OfflineStream::OfflineStream(MoonshineTag tag)
    : impl_(std::make_unique<Impl>(tag)) {}

OfflineStream::OfflineStream(OmnilingualAsrTag tag)
    : impl_(std::make_unique<Impl>(tag)) {}

OfflineStream::~OfflineStream() = default;

void OfflineStream::AcceptWaveform(int32_t sampling_rate, const float *waveform,
                                   int32_t n) const {
  impl_->AcceptWaveform(sampling_rate, waveform, n);
}

int32_t OfflineStream::FeatureDim() const { return impl_->FeatureDim(); }

std::vector<float> OfflineStream::GetFrames() const {
  return impl_->GetFrames();
}

void OfflineStream::SetResult(const OfflineRecognitionResult &r) {
  impl_->SetResult(r);
}

const ContextGraphPtr &OfflineStream::GetContextGraph() const {
  return impl_->GetContextGraph();
}

const OfflineRecognitionResult &OfflineStream::GetResult() const {
  return impl_->GetResult();
}

void OfflineStream::SetOption(const std::string &key,
                              const std::string &value) {
  impl_->SetOption(key, value);
}

bool OfflineStream::HasOption(const std::string &key) const {
  return impl_->HasOption(key);
}

const std::string &OfflineStream::GetOption(const std::string &key) const {
  return impl_->GetOption(key);
}

int32_t OfflineStream::GetOptionInt(const std::string &key,
                                    int32_t default_value) const {
  return impl_->GetOptionInt(key, default_value);
}

float OfflineStream::GetOptionFloat(const std::string &key,
                                    float default_value) const {
  return impl_->GetOptionFloat(key, default_value);
}

std::string OfflineRecognitionResult::AsJsonString() const {
  std::ostringstream os;
  os << "{";

  os << "\"lang\""
     << ": ";
  os << std::quoted(lang) << ", ";

  os << "\"emotion\""
     << ": ";
  os << std::quoted(emotion) << ", ";

  os << "\"event\""
     << ": ";
  os << std::quoted(event) << ", ";

  os << "\"text\""
     << ": ";
  os << std::quoted(text) << ", ";

  os << "\""
     << "timestamps"
     << "\""
     << ": ";
  os << "[";

  std::string sep = "";
  for (auto t : timestamps) {
    os << sep << std::fixed << std::setprecision(2) << t;
    sep = ", ";
  }
  os << "], ";

  os << "\""
     << "durations"
     << "\""
     << ": ";
  os << "[";
  sep = "";
  for (auto d : durations) {
    os << sep << std::fixed << std::setprecision(2) << d;
    sep = ", ";
  }
  os << "], ";

  os << "\""
     << "tokens"
     << "\""
     << ":";
  os << "[";

  sep = "";
  auto oldFlags = os.flags();
  for (const auto &t : tokens) {
    if (t.size() == 1 && static_cast<uint8_t>(t[0]) > 0x7f) {
      const uint8_t *p = reinterpret_cast<const uint8_t *>(t.c_str());
      os << sep << "\""
         << "<0x" << std::hex << std::uppercase << static_cast<uint32_t>(p[0])
         << ">"
         << "\"";
      os.flags(oldFlags);
    } else {
      os << sep << std::quoted(t);
    }
    sep = ", ";
  }
  os << "], ";

  os << "\""
     << "ys_log_probs"
     << "\""
     << ": ";
  os << "[";
  sep = "";
  for (auto p : ys_log_probs) {
    os << sep << std::fixed << std::setprecision(6) << p;
    sep = ", ";
  }
  os << "], ";

  sep = "";

  os << "\""
     << "words"
     << "\""
     << ": ";
  os << "[";
  for (int32_t w : words) {
    os << sep << w;
    sep = ", ";
  }
  os << "]";

  // Add segment-level data if present (from Whisper timestamp token mode)
  if (!segment_timestamps.empty()) {
    os << ", ";

    os << "\"segment_timestamps\": [";
    sep = "";
    for (auto t : segment_timestamps) {
      os << sep << std::fixed << std::setprecision(2) << t;
      sep = ", ";
    }
    os << "], ";

    os << "\"segment_durations\": [";
    sep = "";
    for (auto d : segment_durations) {
      os << sep << std::fixed << std::setprecision(2) << d;
      sep = ", ";
    }
    os << "], ";

    os << "\"segment_texts\": [";
    sep = "";
    for (const auto &t : segment_texts) {
      os << sep << std::quoted(t);
      sep = ", ";
    }
    os << "]";
  }

  os << "}";

  return os.str();
}
}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-stream.h
================================================
// sherpa-onnx/csrc/offline-stream.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_STREAM_H_
#define SHERPA_ONNX_CSRC_OFFLINE_STREAM_H_
#include <stdint.h>

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/features.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineRecognitionResult {
  // Recognition results.
  // For English, it consists of space separated words.
  // For Chinese, it consists of Chinese words without spaces.
  std::string text;

  // Decoded results at the token level.
  // For instance, for BPE-based models it consists of a list of BPE tokens.
  std::vector<std::string> tokens;

  std::string lang;

  // emotion target of the audio.
  std::string emotion;

  // event target of the audio.
  std::string event;

  /// timestamps.size() == tokens.size()
  /// timestamps[i] records the time in seconds when tokens[i] is decoded.
  std::vector<float> timestamps;

  /// durations[i] contains the duration (in seconds) for tokens[i] (TDT models
  /// only)
  std::vector<float> durations;

  /// ys_log_probs[i] contains the log probability (confidence) for tokens[i].
  std::vector<float> ys_log_probs;

  // Word IDs from FST decoding (CTC models with FST decoder only).
  std::vector<int32_t> words;

  // Segment-level data (from Whisper with segment timestamps enabled).
  // These are parallel vectors: segment_timestamps.size() ==
  // segment_durations.size() == segment_texts.size()
  std::vector<float> segment_timestamps;   // start time of each segment
  std::vector<float> segment_durations;    // duration of each segment
  std::vector<std::string> segment_texts;  // text of each segment

  std::string AsJsonString() const;
};

struct WhisperTag {
  int32_t dim = 80;
};

struct CEDTag {};

// It uses a neural network model, a preprocessor, to convert
// audio samples to features
struct MoonshineTag {};

// It is based on Wav2Vec, accepting raw audio samples as input
struct OmnilingualAsrTag {};

class OfflineStream {
 public:
  explicit OfflineStream(const FeatureExtractorConfig &config = {},
                         ContextGraphPtr context_graph = {});

  explicit OfflineStream(WhisperTag tag);
  explicit OfflineStream(CEDTag tag);
  explicit OfflineStream(MoonshineTag tag);
  explicit OfflineStream(OmnilingualAsrTag tag);
  ~OfflineStream();

  /**
     @param sampling_rate The sampling_rate of the input waveform. If it does
                          not equal to  config.sampling_rate, we will do
                          resampling inside.
     @param waveform Pointer to a 1-D array of size n. It must be normalized to
                     the range [-1, 1].
     @param n Number of entries in waveform

     Caution: You can only invoke this function once so you have to input
              all the samples at once
   */
  void AcceptWaveform(int32_t sampling_rate, const float *waveform,
                      int32_t n) const;

  /// Return feature dim of this extractor.
  ///
  /// Note: if it is Moonshine, then it returns the number of audio samples
  /// currently received.
  int32_t FeatureDim() const;

  // Get all the feature frames of this stream in a 1-D array, which is
  // flattened from a 2-D array of shape (num_frames, feat_dim).
  std::vector<float> GetFrames() const;

  /** Set the recognition result for this stream. */
  void SetResult(const OfflineRecognitionResult &r);

  /** Get the recognition result of this stream */
  const OfflineRecognitionResult &GetResult() const;

  /** Get the ContextGraph of this stream */
  const ContextGraphPtr &GetContextGraph() const;

  // Generic per-stream option mechanism (key-value string pairs).
  void SetOption(const std::string &key, const std::string &value);
  bool HasOption(const std::string &key) const;

  // Returns the value for the given key, or an empty string if the key
  // does not exist. No exception is thrown for missing keys.
  const std::string &GetOption(const std::string &key) const;
  int32_t GetOptionInt(const std::string &key,
                       int32_t default_value = 0) const;
  float GetOptionFloat(const std::string &key,
                       float default_value = 0.0f) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_STREAM_H_


================================================
FILE: sherpa-onnx/csrc/offline-tdnn-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-tdnn-ctc-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineTdnnCtcModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.tdnn.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.tdnn.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features) {
    auto nnet_out =
        sess_->Run({}, input_names_ptr_.data(), &features, 1,
                   output_names_ptr_.data(), output_names_ptr_.size());

    std::vector<int64_t> nnet_out_shape =
        nnet_out[0].GetTensorTypeAndShapeInfo().GetShape();

    std::vector<int64_t> out_length_vec(nnet_out_shape[0], nnet_out_shape[1]);
    std::vector<int64_t> out_length_shape(1, nnet_out_shape[0]);

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    Ort::Value nnet_out_length = Ort::Value::CreateTensor(
        memory_info, out_length_vec.data(), out_length_vec.size(),
        out_length_shape.data(), out_length_shape.size());

    std::vector<Ort::Value> ans;
    ans.reserve(2);
    ans.push_back(std::move(nnet_out[0]));
    ans.push_back(Clone(Allocator(), &nnet_out_length));
    return ans;
  }

  int32_t VocabSize() const { return vocab_size_; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t vocab_size_ = 0;
};

OfflineTdnnCtcModel::OfflineTdnnCtcModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTdnnCtcModel::OfflineTdnnCtcModel(Manager *mgr,
                                         const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTdnnCtcModel::~OfflineTdnnCtcModel() = default;

std::vector<Ort::Value> OfflineTdnnCtcModel::Forward(
    Ort::Value features, Ort::Value /*features_length*/) {
  return impl_->Forward(std::move(features));
}

int32_t OfflineTdnnCtcModel::VocabSize() const { return impl_->VocabSize(); }

OrtAllocator *OfflineTdnnCtcModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineTdnnCtcModel::OfflineTdnnCtcModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineTdnnCtcModel::OfflineTdnnCtcModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tdnn-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-tdnn-ctc-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TDNN_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TDNN_CTC_MODEL_H_
#include <memory>
#include <string>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

/** This class implements the tdnn model of the yesno recipe from icefall.
 *
 * See
 * https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR/tdnn
 */
class OfflineTdnnCtcModel : public OfflineCtcModel {
 public:
  explicit OfflineTdnnCtcModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineTdnnCtcModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineTdnnCtcModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a pair containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value /*features_length*/) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TDNN_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tdnn-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tdnn-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tdnn-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineTdnnModelConfig::Register(ParseOptions *po) {
  po->Register("tdnn-model", &model, "Path to onnx model");
}

bool OfflineTdnnModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("tdnn model file %s does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OfflineTdnnModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTdnnModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tdnn-model-config.h
================================================
// sherpa-onnx/csrc/offline-tdnn-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TDNN_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TDNN_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

// for https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR/tdnn
struct OfflineTdnnModelConfig {
  std::string model;

  OfflineTdnnModelConfig() = default;
  explicit OfflineTdnnModelConfig(const std::string &model) : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TDNN_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-telespeech-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-telespeech-ctc-model.cc
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineTeleSpeechCtcModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.telespeech_ctc);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.telespeech_ctc);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value /*features_length*/) {
    std::vector<int64_t> shape =
        features.GetTensorTypeAndShapeInfo().GetShape();

    if (static_cast<int32_t>(shape[0]) != 1) {
      SHERPA_ONNX_LOGE("This model supports only batch size 1. Given %d",
                       static_cast<int32_t>(shape[0]));
    }

    auto out = sess_->Run({}, input_names_ptr_.data(), &features, 1,
                          output_names_ptr_.data(), output_names_ptr_.size());

    std::vector<int64_t> logits_shape = {1};
    Ort::Value logits_length = Ort::Value::CreateTensor<int64_t>(
        allocator_, logits_shape.data(), logits_shape.size());

    int64_t *dst = logits_length.GetTensorMutableData<int64_t>();
    dst[0] = out[0].GetTensorTypeAndShapeInfo().GetShape()[0];

    // (T, B, C) -> (B, T, C)
    Ort::Value logits = Transpose01(allocator_, &out[0]);

    std::vector<Ort::Value> ans;
    ans.reserve(2);
    ans.push_back(std::move(logits));
    ans.push_back(std::move(logits_length));

    return ans;
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t SubsamplingFactor() const { return subsampling_factor_; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    {
      auto shape =
          sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
      vocab_size_ = shape[2];
    }
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t vocab_size_ = 0;
  int32_t subsampling_factor_ = 4;
};

OfflineTeleSpeechCtcModel::OfflineTeleSpeechCtcModel(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTeleSpeechCtcModel::OfflineTeleSpeechCtcModel(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTeleSpeechCtcModel::~OfflineTeleSpeechCtcModel() = default;

std::vector<Ort::Value> OfflineTeleSpeechCtcModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineTeleSpeechCtcModel::VocabSize() const {
  return impl_->VocabSize();
}
int32_t OfflineTeleSpeechCtcModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

OrtAllocator *OfflineTeleSpeechCtcModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineTeleSpeechCtcModel::OfflineTeleSpeechCtcModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineTeleSpeechCtcModel::OfflineTeleSpeechCtcModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-telespeech-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-telespeech-ctc-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TELESPEECH_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TELESPEECH_CTC_MODEL_H_
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

/** This class implements the CTC model from
 * https://github.com/Tele-AI/TeleSpeech-ASR.
 *
 * See
 * https://github.com/lovemefan/telespeech-asr-python/blob/main/telespeechasr/onnx/onnx_infer.py
 * and
 * https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/tele-speech/test.py
 */
class OfflineTeleSpeechCtcModel : public OfflineCtcModel {
 public:
  explicit OfflineTeleSpeechCtcModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineTeleSpeechCtcModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineTeleSpeechCtcModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** SubsamplingFactor of the model
   */
  int32_t SubsamplingFactor() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  // TeleSpeech CTC models do not support batch size > 1
  bool SupportBatchProcessing() const override { return false; }

  std::string FeatureNormalizationMethod() const override {
    return "per_feature";
  }

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TELESPEECH_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-transducer-decoder.h
================================================
// sherpa-onnx/csrc/offline-transducer-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_DECODER_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-stream.h"

namespace sherpa_onnx {

struct OfflineTransducerDecoderResult {
  /// The decoded token IDs
  std::vector<int64_t> tokens;

  /// timestamps[i] contains the output frame index where tokens[i] is decoded.
  /// Note: The index is after subsampling
  std::vector<int32_t> timestamps;

  /// durations[i] contains the duration for tokens[i] in output frames
  /// (post-subsampling). It is converted to seconds by higher layers
  /// (e.g., Convert() in offline-recognizer-transducer-impl.h).
  std::vector<float> durations;

  /// ys_log_probs[i] contains the log probability (confidence) for tokens[i].
  std::vector<float> ys_log_probs;
};

class OfflineTransducerDecoder {
 public:
  virtual ~OfflineTransducerDecoder() = default;

  /** Run transducer beam search given the output from the encoder model.
   *
   * @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)
   * @param encoder_out_length A 1-D tensor of shape (N,) containing number
   *                           of valid frames in encoder_out before padding.
   *
   * @return Return a vector of size `N` containing the decoded results.
   */
  virtual std::vector<OfflineTransducerDecoderResult> Decode(
      Ort::Value encoder_out, Ort::Value encoder_out_length,
      OfflineStream **ss = nullptr, int32_t n = 0) = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-transducer-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/offline-transducer-greedy-search-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-transducer-greedy-search-decoder.h"

#include <algorithm>
#include <iterator>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/packed-sequence.h"
#include "sherpa-onnx/csrc/slice.h"

namespace sherpa_onnx {

std::vector<OfflineTransducerDecoderResult>
OfflineTransducerGreedySearchDecoder::Decode(Ort::Value encoder_out,
                                             Ort::Value encoder_out_length,
                                             OfflineStream **ss /*= nullptr*/,
                                             int32_t n /*= 0*/) {
  PackedSequence packed_encoder_out = PackPaddedSequence(
      model_->Allocator(), &encoder_out, &encoder_out_length);

  int32_t batch_size =
      static_cast<int32_t>(packed_encoder_out.sorted_indexes.size());

  int32_t vocab_size = model_->VocabSize();
  int32_t context_size = model_->ContextSize();

  std::vector<OfflineTransducerDecoderResult> ans(batch_size);
  for (auto &r : ans) {
    r.tokens.resize(context_size, -1);
    // 0 is the ID of the blank token
    r.tokens.back() = 0;
  }

  auto decoder_input = model_->BuildDecoderInput(ans, ans.size());
  Ort::Value decoder_out = model_->RunDecoder(std::move(decoder_input));

  int32_t start = 0;
  int32_t t = 0;
  for (auto n : packed_encoder_out.batch_sizes) {
    Ort::Value cur_encoder_out = packed_encoder_out.Get(start, n);
    Ort::Value cur_decoder_out = Slice(model_->Allocator(), &decoder_out, 0, n);
    start += n;
    Ort::Value logit = model_->RunJoiner(std::move(cur_encoder_out),
                                         std::move(cur_decoder_out));
    float *p_logit = logit.GetTensorMutableData<float>();
    bool emitted = false;
    for (int32_t i = 0; i != n; ++i) {
      if (blank_penalty_ > 0.0) {
        p_logit[0] -= blank_penalty_;  // assuming blank id is 0
      }

      LogSoftmax(p_logit, vocab_size);

      auto y = static_cast<int32_t>(std::distance(
          p_logit, std::max_element(p_logit, p_logit + vocab_size)));

      float log_prob = p_logit[y];

      p_logit += vocab_size;
      // blank id is hardcoded to 0
      // also, it treats unk as blank
      if (y != 0 && y != unk_id_) {
        ans[i].tokens.push_back(y);
        ans[i].timestamps.push_back(t);
        ans[i].ys_log_probs.push_back(log_prob);
        emitted = true;
      }
    }
    if (emitted) {
      Ort::Value decoder_input = model_->BuildDecoderInput(ans, n);
      decoder_out = model_->RunDecoder(std::move(decoder_input));
    }
    ++t;
  }

  for (auto &r : ans) {
    r.tokens = {r.tokens.begin() + context_size, r.tokens.end()};
  }

  std::vector<OfflineTransducerDecoderResult> unsorted_ans(batch_size);
  for (int32_t i = 0; i != batch_size; ++i) {
    unsorted_ans[packed_encoder_out.sorted_indexes[i]] = std::move(ans[i]);
  }

  return unsorted_ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-transducer-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/offline-transducer-greedy-search-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-transducer-decoder.h"
#include "sherpa-onnx/csrc/offline-transducer-model.h"

namespace sherpa_onnx {

class OfflineTransducerGreedySearchDecoder : public OfflineTransducerDecoder {
 public:
  OfflineTransducerGreedySearchDecoder(OfflineTransducerModel *model,
                                       int32_t unk_id,
                                       float blank_penalty)
      : model_(model), unk_id_(unk_id), blank_penalty_(blank_penalty) {}

  std::vector<OfflineTransducerDecoderResult> Decode(
      Ort::Value encoder_out, Ort::Value encoder_out_length,
      OfflineStream **ss = nullptr, int32_t n = 0) override;

 private:
  OfflineTransducerModel *model_;  // Not owned
  int32_t unk_id_;
  float blank_penalty_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-transducer-greedy-search-nemo-decoder.cc
================================================
// sherpa-onnx/csrc/offline-transducer-greedy-search-nemo-decoder.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-transducer-greedy-search-nemo-decoder.h"

#include <algorithm>
#include <iterator>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

static std::pair<Ort::Value, Ort::Value> BuildDecoderInput(
    int32_t token, OrtAllocator *allocator) {
  std::array<int64_t, 2> shape{1, 1};

  Ort::Value decoder_input =
      Ort::Value::CreateTensor<int32_t>(allocator, shape.data(), shape.size());

  std::array<int64_t, 1> length_shape{1};
  Ort::Value decoder_input_length = Ort::Value::CreateTensor<int32_t>(
      allocator, length_shape.data(), length_shape.size());

  int32_t *p = decoder_input.GetTensorMutableData<int32_t>();

  int32_t *p_length = decoder_input_length.GetTensorMutableData<int32_t>();

  p[0] = token;

  p_length[0] = 1;

  return {std::move(decoder_input), std::move(decoder_input_length)};
}

static OfflineTransducerDecoderResult DecodeOne(
    const float *p, int32_t num_rows, int32_t num_cols,
    OfflineTransducerNeMoModel *model, float blank_penalty) {
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  OfflineTransducerDecoderResult ans;

  int32_t vocab_size = model->VocabSize();
  int32_t blank_id = vocab_size - 1;
  int32_t max_symbols_per_frame = 10;

  auto decoder_input_pair = BuildDecoderInput(blank_id, model->Allocator());

  std::pair<Ort::Value, std::vector<Ort::Value>> decoder_output_pair =
      model->RunDecoder(std::move(decoder_input_pair.first),
                        std::move(decoder_input_pair.second),
                        model->GetDecoderInitStates(1));

  std::array<int64_t, 3> encoder_shape{1, num_cols, 1};

  for (int32_t t = 0; t != num_rows; ++t) {
    Ort::Value cur_encoder_out = Ort::Value::CreateTensor(
        memory_info, const_cast<float *>(p) + t * num_cols, num_cols,
        encoder_shape.data(), encoder_shape.size());

    for (int32_t q = 0; q != max_symbols_per_frame; ++q) {
      Ort::Value logit = model->RunJoiner(View(&cur_encoder_out),
                                          View(&decoder_output_pair.first));

      float *p_logit = logit.GetTensorMutableData<float>();
      if (blank_penalty > 0) {
        p_logit[blank_id] -= blank_penalty;
      }

      auto y = static_cast<int32_t>(std::distance(
          static_cast<const float *>(p_logit),
          std::max_element(static_cast<const float *>(p_logit),
                           static_cast<const float *>(p_logit) + vocab_size)));

      // Apply LogSoftmax and get log probability for selected token
      LogSoftmax(p_logit, vocab_size);
      float log_prob = p_logit[y];

      if (y != blank_id) {
        ans.tokens.push_back(y);
        ans.timestamps.push_back(t);
        ans.ys_log_probs.push_back(log_prob);

        decoder_input_pair = BuildDecoderInput(y, model->Allocator());

        decoder_output_pair =
            model->RunDecoder(std::move(decoder_input_pair.first),
                              std::move(decoder_input_pair.second),
                              std::move(decoder_output_pair.second));
      } else {
        break;
      }  // if (y != blank_id)
    }
  }  // for (int32_t i = 0; i != num_rows; ++i)

  return ans;
}

static OfflineTransducerDecoderResult DecodeOneTDT(
    const float *p, int32_t num_rows, int32_t num_cols,
    OfflineTransducerNeMoModel *model, float blank_penalty) {
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  OfflineTransducerDecoderResult ans;

  int32_t vocab_size = model->VocabSize();
  int32_t blank_id = vocab_size - 1;

  auto decoder_input_pair = BuildDecoderInput(blank_id, model->Allocator());

  std::pair<Ort::Value, std::vector<Ort::Value>> decoder_output_pair =
      model->RunDecoder(std::move(decoder_input_pair.first),
                        std::move(decoder_input_pair.second),
                        model->GetDecoderInitStates(1));

  std::array<int64_t, 3> encoder_shape{1, num_cols, 1};

  int32_t max_tokens_per_frame = 5;
  int32_t tokens_this_frame = 0;

  int32_t skip = 0;
  std::vector<float> token_logits_copy(
      vocab_size);  // Reusable buffer for LogSoftmax
  for (int32_t t = 0; t < num_rows; t += skip) {
    Ort::Value cur_encoder_out = Ort::Value::CreateTensor(
        memory_info, const_cast<float *>(p) + t * num_cols, num_cols,
        encoder_shape.data(), encoder_shape.size());

    Ort::Value logit = model->RunJoiner(View(&cur_encoder_out),
                                        View(&decoder_output_pair.first));

    auto shape = logit.GetTensorTypeAndShapeInfo().GetShape();

    float *p_logit = logit.GetTensorMutableData<float>();
    if (blank_penalty > 0) {
      p_logit[blank_id] -= blank_penalty;
    }

    int32_t output_size = shape.back();
    int32_t num_durations = output_size - vocab_size;

    // Split logits into token and duration logits
    const float *token_logits = p_logit;
    const float *duration_logits = p_logit + vocab_size;

    auto y = static_cast<int32_t>(std::distance(
        token_logits,
        std::max_element(token_logits, token_logits + vocab_size)));

    // Apply LogSoftmax to token logits and get log probability
    std::copy(token_logits, token_logits + vocab_size,
              token_logits_copy.begin());
    LogSoftmax(token_logits_copy.data(), vocab_size);
    float log_prob = token_logits_copy[y];

    // note that skip can be 0
    skip = static_cast<int32_t>(std::distance(
        duration_logits,
        std::max_element(duration_logits, duration_logits + num_durations)));

    if (y != blank_id) {
      ans.tokens.push_back(y);
      ans.timestamps.push_back(t);
      ans.durations.push_back(skip);
      ans.ys_log_probs.push_back(log_prob);

      decoder_input_pair = BuildDecoderInput(y, model->Allocator());

      decoder_output_pair =
          model->RunDecoder(std::move(decoder_input_pair.first),
                            std::move(decoder_input_pair.second),
                            std::move(decoder_output_pair.second));

      tokens_this_frame += 1;
    }

    if (skip > 0) {
      tokens_this_frame = 0;
    }

    if (tokens_this_frame >= max_tokens_per_frame) {
      tokens_this_frame = 0;
      skip = 1;
    }

    if (y == blank_id && skip == 0) {
      tokens_this_frame = 0;
      skip = 1;
    }
  }  // for (int32_t t = 0; t < num_rows; t += skip)

  return ans;
}

std::vector<OfflineTransducerDecoderResult>
OfflineTransducerGreedySearchNeMoDecoder::Decode(
    Ort::Value encoder_out, Ort::Value encoder_out_length,
    OfflineStream ** /*ss = nullptr*/, int32_t /*n= 0*/) {
  auto shape = encoder_out.GetTensorTypeAndShapeInfo().GetShape();

  int32_t batch_size = static_cast<int32_t>(shape[0]);
  int32_t dim1 = static_cast<int32_t>(shape[1]);
  int32_t dim2 = static_cast<int32_t>(shape[2]);

  auto length_type =
      encoder_out_length.GetTensorTypeAndShapeInfo().GetElementType();
  if ((length_type != ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) &&
      (length_type != ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64)) {
    SHERPA_ONNX_LOGE("Unsupported encoder_out_length data type: %d",
                     static_cast<int32_t>(length_type));
    SHERPA_ONNX_EXIT(-1);
  }

  const float *p = encoder_out.GetTensorData<float>();

  std::vector<OfflineTransducerDecoderResult> ans(batch_size);

  for (int32_t i = 0; i != batch_size; ++i) {
    const float *this_p = p + dim1 * dim2 * i;
    int32_t this_len = length_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32
                           ? encoder_out_length.GetTensorData<int32_t>()[i]
                           : encoder_out_length.GetTensorData<int64_t>()[i];

    if (is_tdt_) {
      ans[i] = DecodeOneTDT(this_p, this_len, dim2, model_, blank_penalty_);
    } else {
      ans[i] = DecodeOne(this_p, this_len, dim2, model_, blank_penalty_);
    }
  }

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-transducer-greedy-search-nemo-decoder.h
================================================
// sherpa-onnx/csrc/offline-transducer-greedy-search-nemo-decoder.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_GREEDY_SEARCH_NEMO_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_GREEDY_SEARCH_NEMO_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-transducer-decoder.h"
#include "sherpa-onnx/csrc/offline-transducer-nemo-model.h"

namespace sherpa_onnx {

class OfflineTransducerGreedySearchNeMoDecoder
    : public OfflineTransducerDecoder {
 public:
  OfflineTransducerGreedySearchNeMoDecoder(OfflineTransducerNeMoModel *model,
                                           float blank_penalty, bool is_tdt)
      : model_(model), blank_penalty_(blank_penalty), is_tdt_(is_tdt) {}

  std::vector<OfflineTransducerDecoderResult> Decode(
      Ort::Value encoder_out, Ort::Value encoder_out_length,
      OfflineStream **ss = nullptr, int32_t n = 0) override;

 private:
  OfflineTransducerNeMoModel *model_;  // Not owned
  float blank_penalty_;
  bool is_tdt_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_GREEDY_SEARCH_NEMO_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-transducer-model-config.cc
================================================
// sherpa-onnx/csrc/offline-transducer-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-transducer-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineTransducerModelConfig::Register(ParseOptions *po) {
  po->Register("encoder", &encoder_filename, "Path to encoder.onnx");
  po->Register("decoder", &decoder_filename, "Path to decoder.onnx");
  po->Register("joiner", &joiner_filename, "Path to joiner.onnx");
}

bool OfflineTransducerModelConfig::Validate() const {
  if (!FileExists(encoder_filename)) {
    SHERPA_ONNX_LOGE("transducer encoder: '%s' does not exist",
                     encoder_filename.c_str());
    return false;
  }

  if (!FileExists(decoder_filename)) {
    SHERPA_ONNX_LOGE("transducer decoder: '%s' does not exist",
                     decoder_filename.c_str());
    return false;
  }

  if (!FileExists(joiner_filename)) {
    SHERPA_ONNX_LOGE("transducer joiner: '%s' does not exist",
                     joiner_filename.c_str());
    return false;
  }

  return true;
}

std::string OfflineTransducerModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTransducerModelConfig(";
  os << "encoder_filename=\"" << encoder_filename << "\", ";
  os << "decoder_filename=\"" << decoder_filename << "\", ";
  os << "joiner_filename=\"" << joiner_filename << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-transducer-model-config.h
================================================
// sherpa-onnx/csrc/offline-transducer-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTransducerModelConfig {
  std::string encoder_filename;
  std::string decoder_filename;
  std::string joiner_filename;

  OfflineTransducerModelConfig() = default;
  OfflineTransducerModelConfig(const std::string &encoder_filename,
                               const std::string &decoder_filename,
                               const std::string &joiner_filename)
      : encoder_filename(encoder_filename),
        decoder_filename(decoder_filename),
        joiner_filename(joiner_filename) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-transducer-model.cc
================================================
// sherpa-onnx/csrc/offline-transducer-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-transducer-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-transducer-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"

namespace sherpa_onnx {

class OfflineTransducerModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.transducer.encoder_filename);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.transducer.decoder_filename);
      InitDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.transducer.joiner_filename);
      InitJoiner(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.transducer.encoder_filename);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.transducer.decoder_filename);
      InitDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.transducer.joiner_filename);
      InitJoiner(buf.data(), buf.size());
    }
  }

  std::pair<Ort::Value, Ort::Value> RunEncoder(Ort::Value features,
                                               Ort::Value features_length) {
    std::array<Ort::Value, 2> encoder_inputs = {std::move(features),
                                                std::move(features_length)};

    auto encoder_out = encoder_sess_->Run(
        {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
        encoder_inputs.size(), encoder_output_names_ptr_.data(),
        encoder_output_names_ptr_.size());

    return {std::move(encoder_out[0]), std::move(encoder_out[1])};
  }

  Ort::Value RunDecoder(Ort::Value decoder_input) {
    auto decoder_out = decoder_sess_->Run(
        {}, decoder_input_names_ptr_.data(), &decoder_input, 1,
        decoder_output_names_ptr_.data(), decoder_output_names_ptr_.size());
    return std::move(decoder_out[0]);
  }

  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) {
    std::array<Ort::Value, 2> joiner_input = {std::move(encoder_out),
                                              std::move(decoder_out)};
    auto logit = joiner_sess_->Run({}, joiner_input_names_ptr_.data(),
                                   joiner_input.data(), joiner_input.size(),
                                   joiner_output_names_ptr_.data(),
                                   joiner_output_names_ptr_.size());

    return std::move(logit[0]);
  }

  int32_t VocabSize() const { return vocab_size_; }
  int32_t ContextSize() const { return context_size_; }
  int32_t SubsamplingFactor() const { return 4; }
  OrtAllocator *Allocator() { return allocator_; }

  Ort::Value BuildDecoderInput(
      const std::vector<OfflineTransducerDecoderResult> &results,
      int32_t end_index) {
    assert(end_index <= results.size());

    int32_t batch_size = end_index;
    int32_t context_size = ContextSize();
    std::array<int64_t, 2> shape{batch_size, context_size};

    Ort::Value decoder_input = Ort::Value::CreateTensor<int64_t>(
        Allocator(), shape.data(), shape.size());
    int64_t *p = decoder_input.GetTensorMutableData<int64_t>();

    for (int32_t i = 0; i != batch_size; ++i) {
      const auto &r = results[i];
      const int64_t *begin = r.tokens.data() + r.tokens.size() - context_size;
      const int64_t *end = r.tokens.data() + r.tokens.size();
      std::copy(begin, end, p);
      p += context_size;
    }

    return decoder_input;
  }

  Ort::Value BuildDecoderInput(const std::vector<Hypothesis> &results,
                               int32_t end_index) {
    assert(end_index <= results.size());

    int32_t batch_size = end_index;
    int32_t context_size = ContextSize();
    std::array<int64_t, 2> shape{batch_size, context_size};

    Ort::Value decoder_input = Ort::Value::CreateTensor<int64_t>(
        Allocator(), shape.data(), shape.size());
    int64_t *p = decoder_input.GetTensorMutableData<int64_t>();

    for (int32_t i = 0; i != batch_size; ++i) {
      const auto &r = results[i];
      const int64_t *begin = r.ys.data() + r.ys.size() - context_size;
      const int64_t *end = r.ys.data() + r.ys.size();
      std::copy(begin, end, p);
      p += context_size;
    }

    return decoder_input;
  }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---encoder---\n";
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);

    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = decoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---decoder---\n";
      PrintModelMetadata(os, meta_data);
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
    SHERPA_ONNX_READ_META_DATA(context_size_, "context_size");
  }

  void InitJoiner(void *model_data, size_t model_data_length) {
    joiner_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(joiner_sess_.get(), &joiner_input_names_,
                  &joiner_input_names_ptr_);

    GetOutputNames(joiner_sess_.get(), &joiner_output_names_,
                   &joiner_output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = joiner_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---joiner---\n";
      PrintModelMetadata(os, meta_data);
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
    }
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;
  std::unique_ptr<Ort::Session> joiner_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<std::string> joiner_input_names_;
  std::vector<const char *> joiner_input_names_ptr_;

  std::vector<std::string> joiner_output_names_;
  std::vector<const char *> joiner_output_names_ptr_;

  int32_t vocab_size_ = 0;    // initialized in InitDecoder
  int32_t context_size_ = 0;  // initialized in InitDecoder
};

OfflineTransducerModel::OfflineTransducerModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTransducerModel::OfflineTransducerModel(Manager *mgr,
                                               const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTransducerModel::~OfflineTransducerModel() = default;

std::pair<Ort::Value, Ort::Value> OfflineTransducerModel::RunEncoder(
    Ort::Value features, Ort::Value features_length) {
  return impl_->RunEncoder(std::move(features), std::move(features_length));
}

Ort::Value OfflineTransducerModel::RunDecoder(Ort::Value decoder_input) {
  return impl_->RunDecoder(std::move(decoder_input));
}

Ort::Value OfflineTransducerModel::RunJoiner(Ort::Value encoder_out,
                                             Ort::Value decoder_out) {
  return impl_->RunJoiner(std::move(encoder_out), std::move(decoder_out));
}

int32_t OfflineTransducerModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OfflineTransducerModel::ContextSize() const {
  return impl_->ContextSize();
}

int32_t OfflineTransducerModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

OrtAllocator *OfflineTransducerModel::Allocator() const {
  return impl_->Allocator();
}

Ort::Value OfflineTransducerModel::BuildDecoderInput(
    const std::vector<OfflineTransducerDecoderResult> &results,
    int32_t end_index) const {
  return impl_->BuildDecoderInput(results, end_index);
}

Ort::Value OfflineTransducerModel::BuildDecoderInput(
    const std::vector<Hypothesis> &results, int32_t end_index) const {
  return impl_->BuildDecoderInput(results, end_index);
}

#if __ANDROID_API__ >= 9
template OfflineTransducerModel::OfflineTransducerModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineTransducerModel::OfflineTransducerModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-transducer-model.h
================================================
// sherpa-onnx/csrc/offline-transducer-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODEL_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

struct OfflineTransducerDecoderResult;

class OfflineTransducerModel {
 public:
  explicit OfflineTransducerModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineTransducerModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineTransducerModel();

  /** Run the encoder.
   *
   * @param features  A tensor of shape (N, T, C). It is changed in-place.
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a pair containing:
   *  - encoder_out: A 3-D tensor of shape (N, T', encoder_dim)
   *  - encoder_out_length: A 1-D tensor of shape (N,) containing number
   *                        of frames in `encoder_out` before padding.
   */
  std::pair<Ort::Value, Ort::Value> RunEncoder(Ort::Value features,
                                               Ort::Value features_length);

  /** Run the decoder network.
   *
   * Caution: We assume there are no recurrent connections in the decoder and
   *          the decoder is stateless. See
   * https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless2/decoder.py
   *          for an example
   *
   * @param decoder_input It is usually of shape (N, context_size)
   * @return Return a tensor of shape (N, decoder_dim).
   */
  Ort::Value RunDecoder(Ort::Value decoder_input);

  /** Run the joint network.
   *
   * @param encoder_out Output of the encoder network. A tensor of shape
   *                    (N, joiner_dim).
   * @param decoder_out Output of the decoder network. A tensor of shape
   *                    (N, joiner_dim).
   * @return Return a tensor of shape (N, vocab_size). In icefall, the last
   *         last layer of the joint network is `nn.Linear`,
   *         not `nn.LogSoftmax`.
   */
  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out);

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const;

  /** Return the context_size of the decoder model.
   */
  int32_t ContextSize() const;

  /** Return the subsampling factor of the model.
   */
  int32_t SubsamplingFactor() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

  /** Build decoder_input from the current results.
   *
   * @param results Current decoded results.
   * @param end_index We only use results[0:end_index] to build
   *                  the decoder_input. results[end_index] is not used.
   * @return Return a tensor of shape (results.size(), ContextSize())
   */
  Ort::Value BuildDecoderInput(
      const std::vector<OfflineTransducerDecoderResult> &results,
      int32_t end_index) const;

  Ort::Value BuildDecoderInput(const std::vector<Hypothesis> &results,
                               int32_t end_index) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-transducer-modified-beam-search-decoder.cc
================================================
// sherpa-onnx/csrc/offline-transducer-modified-beam-search-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-transducer-modified-beam-search-decoder.h"

#include <deque>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/packed-sequence.h"
#include "sherpa-onnx/csrc/slice.h"

namespace sherpa_onnx {

std::vector<OfflineTransducerDecoderResult>
OfflineTransducerModifiedBeamSearchDecoder::Decode(
    Ort::Value encoder_out, Ort::Value encoder_out_length,
    OfflineStream **ss /*=nullptr */, int32_t n /*= 0*/) {
  PackedSequence packed_encoder_out = PackPaddedSequence(
      model_->Allocator(), &encoder_out, &encoder_out_length);

  int32_t batch_size =
      static_cast<int32_t>(packed_encoder_out.sorted_indexes.size());

  if (ss != nullptr) SHERPA_ONNX_CHECK_EQ(batch_size, n);

  int32_t vocab_size = model_->VocabSize();
  int32_t context_size = model_->ContextSize();

  std::vector<int64_t> blanks(context_size, -1);
  blanks.back() = 0;

  std::deque<Hypotheses> finalized;
  std::vector<Hypotheses> cur;
  std::vector<Hypothesis> prev;

  std::vector<ContextGraphPtr> context_graphs(batch_size, nullptr);

  for (int32_t i = 0; i < batch_size; ++i) {
    const ContextState *context_state = nullptr;
    if (ss != nullptr) {
      context_graphs[i] =
          ss[packed_encoder_out.sorted_indexes[i]]->GetContextGraph();
      if (context_graphs[i] != nullptr)
        context_state = context_graphs[i]->Root();
    }
    Hypotheses blank_hyp({{blanks, 0, context_state}});
    cur.emplace_back(std::move(blank_hyp));
  }

  int32_t start = 0;
  int32_t t = 0;
  for (auto n : packed_encoder_out.batch_sizes) {
    Ort::Value cur_encoder_out = packed_encoder_out.Get(start, n);
    start += n;

    if (n < static_cast<int32_t>(cur.size())) {
      for (int32_t k = static_cast<int32_t>(cur.size()) - 1; k >= n; --k) {
        finalized.push_front(std::move(cur[k]));
      }

      cur.erase(cur.begin() + n, cur.end());
    }  // if (n < static_cast<int32_t>(cur.size()))

    // Due to merging paths with identical token sequences,
    // not all utterances have "max_active_paths" paths.
    auto hyps_row_splits = GetHypsRowSplits(cur);
    int32_t num_hyps = hyps_row_splits.back();

    prev.clear();
    prev.reserve(num_hyps);

    for (auto &hyps : cur) {
      for (auto &h : hyps) {
        prev.push_back(std::move(h.second));
      }
    }
    cur.clear();
    cur.reserve(n);

    auto decoder_input = model_->BuildDecoderInput(prev, num_hyps);
    // decoder_input shape: (num_hyps, context_size)

    auto decoder_out = model_->RunDecoder(std::move(decoder_input));
    // decoder_out is (num_hyps, joiner_dim)

    cur_encoder_out =
        Repeat(model_->Allocator(), &cur_encoder_out, hyps_row_splits);
    // now cur_encoder_out is of shape (num_hyps, joiner_dim)

    Ort::Value logit =
        model_->RunJoiner(std::move(cur_encoder_out), View(&decoder_out));

    float *p_logit = logit.GetTensorMutableData<float>();
    if (blank_penalty_ > 0.0) {
      // assuming blank id is 0
      SubtractBlank(p_logit, vocab_size, num_hyps, 0, blank_penalty_);
    }
    LogSoftmax(p_logit, vocab_size, num_hyps);

    // now p_logit contains log_softmax output, we rename it to p_logprob
    // to match what it actually contains
    float *p_logprob = p_logit;

    // add log_prob of each hypothesis to p_logprob before taking top_k
    for (int32_t i = 0; i != num_hyps; ++i) {
      float log_prob = prev[i].log_prob;
      for (int32_t k = 0; k != vocab_size; ++k, ++p_logprob) {
        *p_logprob += log_prob;
      }
    }
    p_logprob = p_logit;  // we changed p_logprob in the above for loop

    // Now compute top_k for each utterance
    for (int32_t i = 0; i != n; ++i) {
      int32_t start = hyps_row_splits[i];
      int32_t end = hyps_row_splits[i + 1];
      auto topk =
          TopkIndex(p_logprob, vocab_size * (end - start), max_active_paths_);

      Hypotheses hyps;
      for (auto k : topk) {
        int32_t hyp_index = k / vocab_size + start;
        int32_t new_token = k % vocab_size;
        Hypothesis new_hyp = prev[hyp_index];

        float context_score = 0;
        auto context_state = new_hyp.context_state;
        // blank is hardcoded to 0
        // also, it treats unk as blank
        if (new_token != 0 && new_token != unk_id_) {
          new_hyp.ys.push_back(new_token);
          new_hyp.timestamps.push_back(t);

          // Store the token log probability (subtract prev log_prob to get
          // original)
          float token_log_prob = p_logprob[k] - prev[hyp_index].log_prob;
          new_hyp.ys_probs.push_back(token_log_prob);

          if (context_graphs[i] != nullptr) {
            auto context_res = context_graphs[i]->ForwardOneStep(
                context_state, new_token, false /* non-strict mode */);
            context_score = std::get<0>(context_res);
            new_hyp.context_state = std::get<1>(context_res);
          }
        }

        new_hyp.log_prob = p_logprob[k] + context_score;
        hyps.Add(std::move(new_hyp));
      }  // for (auto k : topk)
      p_logprob += (end - start) * vocab_size;
      cur.push_back(std::move(hyps));
    }  // for (int32_t i = 0; i != n; ++i)

    ++t;
  }  // for (auto n : packed_encoder_out.batch_sizes)

  for (auto &h : finalized) {
    cur.push_back(std::move(h));
  }

  // Finalize context biasing matching..
  for (int32_t i = 0; i < cur.size(); ++i) {
    for (auto iter = cur[i].begin(); iter != cur[i].end(); ++iter) {
      if (context_graphs[i] != nullptr) {
        auto context_res =
            context_graphs[i]->Finalize(iter->second.context_state);
        iter->second.log_prob += context_res.first;
        iter->second.context_state = context_res.second;
      }
    }
  }

  if (lm_) {
    // use LM for rescoring
    lm_->ComputeLMScore(lm_scale_, context_size, &cur);
  }

  std::vector<OfflineTransducerDecoderResult> unsorted_ans(batch_size);
  for (int32_t i = 0; i != batch_size; ++i) {
    Hypothesis hyp = cur[i].GetMostProbable(true);

    auto &r = unsorted_ans[packed_encoder_out.sorted_indexes[i]];

    // strip leading blanks
    r.tokens = {hyp.ys.begin() + context_size, hyp.ys.end()};
    r.timestamps = std::move(hyp.timestamps);
    r.ys_log_probs = std::move(hyp.ys_probs);
  }

  return unsorted_ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-transducer-modified-beam-search-decoder.h
================================================
// sherpa-onnx/csrc/offline-transducer-modified-beam-search-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-lm.h"
#include "sherpa-onnx/csrc/offline-transducer-decoder.h"
#include "sherpa-onnx/csrc/offline-transducer-model.h"

namespace sherpa_onnx {

class OfflineTransducerModifiedBeamSearchDecoder
    : public OfflineTransducerDecoder {
 public:
  OfflineTransducerModifiedBeamSearchDecoder(OfflineTransducerModel *model,
                                             OfflineLM *lm,
                                             int32_t max_active_paths,
                                             float lm_scale, int32_t unk_id,
                                             float blank_penalty)
      : model_(model),
        lm_(lm),
        max_active_paths_(max_active_paths),
        lm_scale_(lm_scale),
        unk_id_(unk_id),
        blank_penalty_(blank_penalty) {}

  std::vector<OfflineTransducerDecoderResult> Decode(
      Ort::Value encoder_out, Ort::Value encoder_out_length,
      OfflineStream **ss = nullptr, int32_t n = 0) override;

 private:
  OfflineTransducerModel *model_;  // Not owned
  OfflineLM *lm_;                  // Not owned; may be nullptr

  int32_t max_active_paths_;
  float lm_scale_;  // used only when lm_ is not nullptr
  int32_t unk_id_;
  float blank_penalty_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-transducer-modified-beam-search-nemo-decoder.cc
================================================
// sherpa-onnx/csrc/offline-transducer-modified-beam-search-nemo-decoder.cc
//
// Copyright (c)  2026  (authors: github.com/nefastosaturo, github.com/nullbio)

#include "sherpa-onnx/csrc/offline-transducer-modified-beam-search-nemo-decoder.h"

#include <algorithm>
#include <deque>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/packed-sequence.h"
#include "sherpa-onnx/csrc/slice.h"

namespace sherpa_onnx {

// Helper structure to track hypothesis with decoder state
struct NeMoHypothesis {
  std::vector<int32_t> ys;          // token sequence (excluding initial blank)
  std::vector<int32_t> timestamps;  // timestamps for each token
  std::vector<int32_t> durations;   // durations for TDT
  std::vector<float> ys_probs;      // log probability for each token
  float log_prob;                   // accumulated log probability
  std::vector<Ort::Value> decoder_states;  // RNN/LSTM states
  const ContextState *context_state;       // context graph state
  OrtAllocator *allocator;                 // allocator for cloning states
  int32_t frame_offset;  // current frame position for this hypothesis

  NeMoHypothesis()
      : log_prob(0.0f),
        context_state(nullptr),
        allocator(nullptr),
        frame_offset(0) {}

  // Copy constructor - needed for hypothesis expansion
  NeMoHypothesis(const NeMoHypothesis &other)
      : ys(other.ys),
        timestamps(other.timestamps),
        durations(other.durations),
        ys_probs(other.ys_probs),
        log_prob(other.log_prob),
        context_state(other.context_state),
        allocator(other.allocator),
        frame_offset(other.frame_offset) {
    // Deep copy of decoder states
    decoder_states.reserve(other.decoder_states.size());
    for (const auto &state : other.decoder_states) {
      decoder_states.push_back(Clone(allocator, &state));
    }
  }

  NeMoHypothesis &operator=(const NeMoHypothesis &other) {
    if (this != &other) {
      ys = other.ys;
      timestamps = other.timestamps;
      durations = other.durations;
      ys_probs = other.ys_probs;
      log_prob = other.log_prob;
      context_state = other.context_state;
      allocator = other.allocator;
      frame_offset = other.frame_offset;

      decoder_states.clear();
      decoder_states.reserve(other.decoder_states.size());
      for (const auto &state : other.decoder_states) {
        decoder_states.push_back(Clone(allocator, &state));
      }
    }
    return *this;
  }

  NeMoHypothesis(NeMoHypothesis &&) = default;
  NeMoHypothesis &operator=(NeMoHypothesis &&) = default;
};

std::vector<OfflineTransducerDecoderResult>
OfflineTransducerModifiedBeamSearchNeMoDecoder::Decode(
    Ort::Value encoder_out, Ort::Value encoder_out_length,
    OfflineStream **ss /*= nullptr*/, int32_t n /*= 0*/) {
  auto encoder_shape = encoder_out.GetTensorTypeAndShapeInfo().GetShape();
  int32_t batch_size = static_cast<int32_t>(encoder_shape[0]);
  int32_t num_frames = static_cast<int32_t>(encoder_shape[1]);
  int32_t encoder_dim = static_cast<int32_t>(encoder_shape[2]);

  if (ss != nullptr) SHERPA_ONNX_CHECK_EQ(batch_size, n);

  int32_t vocab_size = model_->VocabSize();
  int32_t blank_id = vocab_size - 1;  // NeMo models have blank at the end

  // For TDT models, we need to know the number of duration bins
  // We'll detect this from the joiner output size on first run
  int32_t num_durations = 0;

  std::vector<ContextGraphPtr> context_graphs(batch_size, nullptr);

  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  OrtAllocator *allocator = model_->Allocator();

  const float *encoder_data = encoder_out.GetTensorData<float>();

  // Get per-utterance lengths
  std::vector<int32_t> utterance_lengths(batch_size);
  auto length_type =
      encoder_out_length.GetTensorTypeAndShapeInfo().GetElementType();
  for (int32_t i = 0; i < batch_size; ++i) {
    utterance_lengths[i] =
        (length_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32)
            ? encoder_out_length.GetTensorData<int32_t>()[i]
            : static_cast<int32_t>(
                  encoder_out_length.GetTensorData<int64_t>()[i]);
  }

  std::vector<OfflineTransducerDecoderResult> results(batch_size);

  // Process each utterance independently (simpler for TDT with variable frame
  // positions)
  for (int32_t b = 0; b < batch_size; ++b) {
    const ContextState *context_state = nullptr;
    if (ss != nullptr) {
      context_graphs[b] = ss[b]->GetContextGraph();
      if (context_graphs[b] != nullptr) {
        context_state = context_graphs[b]->Root();
      }
    }

    int32_t this_num_frames = utterance_lengths[b];
    const float *this_encoder = encoder_data + b * num_frames * encoder_dim;

    // Initialize with single hypothesis
    std::vector<NeMoHypothesis> cur_hyps;
    {
      NeMoHypothesis blank_hyp;
      blank_hyp.log_prob = 0.0f;
      blank_hyp.context_state = context_state;
      blank_hyp.allocator = allocator;
      blank_hyp.frame_offset = 0;
      blank_hyp.decoder_states = model_->GetDecoderInitStates(1);
      cur_hyps.push_back(std::move(blank_hyp));
    }

    // Process until all hypotheses have finished
    while (true) {
      // Find minimum frame offset among active hypotheses
      int32_t min_frame = this_num_frames;
      for (const auto &hyp : cur_hyps) {
        if (hyp.frame_offset < min_frame) {
          min_frame = hyp.frame_offset;
        }
      }

      if (min_frame >= this_num_frames) {
        break;  // All hypotheses have finished
      }

      // Process hypotheses at the minimum frame
      std::vector<std::pair<float, NeMoHypothesis>> all_candidates;

      for (auto &hyp : cur_hyps) {
        if (hyp.frame_offset > min_frame) {
          // This hypothesis is ahead, keep it as-is
          all_candidates.emplace_back(hyp.log_prob, std::move(hyp));
          continue;
        }

        // Get encoder output for this frame
        std::array<int64_t, 3> encoder_3d_shape{1, encoder_dim, 1};
        const float *frame_data = this_encoder + hyp.frame_offset * encoder_dim;

        Ort::Value encoder_out_frame = Ort::Value::CreateTensor(
            memory_info, const_cast<float *>(frame_data), encoder_dim,
            encoder_3d_shape.data(), encoder_3d_shape.size());

        // Prepare decoder input: use blank_id as initial token, then last
        // emitted token
        int32_t last_token = hyp.ys.empty() ? blank_id : hyp.ys.back();
        std::array<int64_t, 2> decoder_input_shape = {1, 1};
        std::vector<int32_t> decoder_input_data = {last_token};

        Ort::Value decoder_input = Ort::Value::CreateTensor(
            memory_info, decoder_input_data.data(), 1,
            decoder_input_shape.data(), decoder_input_shape.size());

        std::array<int64_t, 1> decoder_input_length_shape = {1};
        std::vector<int32_t> decoder_input_length_data = {1};

        Ort::Value decoder_input_length = Ort::Value::CreateTensor(
            memory_info, decoder_input_length_data.data(), 1,
            decoder_input_length_shape.data(),
            decoder_input_length_shape.size());

        // Clone decoder states for this expansion
        std::vector<Ort::Value> decoder_states_copy;
        decoder_states_copy.reserve(hyp.decoder_states.size());
        for (const auto &state : hyp.decoder_states) {
          decoder_states_copy.push_back(Clone(allocator, &state));
        }

        auto decoder_result = model_->RunDecoder(
            std::move(decoder_input), std::move(decoder_input_length),
            std::move(decoder_states_copy));

        Ort::Value decoder_out = std::move(decoder_result.first);
        std::vector<Ort::Value> next_states = std::move(decoder_result.second);

        // Run joiner
        Ort::Value logit =
            model_->RunJoiner(View(&encoder_out_frame), View(&decoder_out));

        auto logit_shape = logit.GetTensorTypeAndShapeInfo().GetShape();
        int32_t output_size = static_cast<int32_t>(logit_shape.back());

        float *p_logit = logit.GetTensorMutableData<float>();

        // Detect TDT mode from joiner output size
        if (is_tdt_ && num_durations == 0 && output_size > vocab_size) {
          num_durations = output_size - vocab_size;
        }

        // Split into token and duration logits for TDT
        int32_t token_vocab_size = is_tdt_ ? vocab_size : output_size;
        float *token_logits = p_logit;
        float *duration_logits = is_tdt_ ? (p_logit + vocab_size) : nullptr;

        // Apply blank penalty
        if (blank_penalty_ > 0.0f) {
          token_logits[blank_id] -= blank_penalty_;
        }

        // Compute log softmax for tokens only
        LogSoftmax(token_logits, token_vocab_size, 1);

        // Apply context boosting BEFORE top-k selection so hotword tokens
        // have a chance to be selected even if their base probability is low
        if (context_graphs[b] != nullptr && hyp.context_state != nullptr) {
          for (const auto &pair : hyp.context_state->next) {
            int32_t token_id = pair.first;
            if (token_id >= 0 && token_id < token_vocab_size) {
              token_logits[token_id] += hotwords_score_;
            }
          }
        }

        auto top_k_tokens =
            TopkIndex(token_logits, token_vocab_size, max_active_paths_);

        // Determine duration/skip for TDT
        int32_t predicted_skip = 1;  // Default: advance by 1 frame
        float duration_log_prob = 0.0f;
        if (is_tdt_ && duration_logits != nullptr && num_durations > 0) {
          // Apply log softmax to duration logits
          LogSoftmax(duration_logits, num_durations, 1);

          // Find best duration
          predicted_skip = static_cast<int32_t>(
              std::distance(duration_logits,
                            std::max_element(duration_logits,
                                             duration_logits + num_durations)));

          // Get the log probability for the selected duration
          duration_log_prob = duration_logits[predicted_skip];
        }

        // Create candidate hypotheses
        for (int32_t idx : top_k_tokens) {
          int32_t token = idx;
          // For TDT: joint probability = P(token) * P(duration)
          // In log space: log P(token, duration) = log P(token) + log
          // P(duration)
          float token_log_prob =
              token_logits[token] + duration_log_prob + hyp.log_prob;

          NeMoHypothesis new_hyp;
          new_hyp.ys = hyp.ys;
          new_hyp.timestamps = hyp.timestamps;
          new_hyp.durations = hyp.durations;
          new_hyp.ys_probs = hyp.ys_probs;
          new_hyp.context_state = hyp.context_state;
          new_hyp.allocator = allocator;
          new_hyp.log_prob = token_log_prob;

          float context_score = 0.0f;

          if (token == blank_id || token == unk_id_) {
            // Blank or unk: keep decoder state, advance frame
            new_hyp.decoder_states.reserve(hyp.decoder_states.size());
            for (const auto &state : hyp.decoder_states) {
              new_hyp.decoder_states.push_back(Clone(allocator, &state));
            }
            // For blank/unk in TDT, always advance by at least 1
            new_hyp.frame_offset =
                hyp.frame_offset + std::max(1, predicted_skip);
          } else {
            // Non-blank: add token, use new decoder state
            new_hyp.ys.push_back(token);
            new_hyp.timestamps.push_back(hyp.frame_offset);
            new_hyp.ys_probs.push_back(token_logits[token]);
            if (is_tdt_) {
              new_hyp.durations.push_back(predicted_skip);
            }

            new_hyp.decoder_states.reserve(next_states.size());
            for (const auto &state : next_states) {
              new_hyp.decoder_states.push_back(Clone(allocator, &state));
            }

            // For non-blank in TDT, advance by predicted duration (can be 0 to
            // emit more tokens) For non-TDT, stay on same frame to allow more
            // tokens
            if (is_tdt_) {
              new_hyp.frame_offset = hyp.frame_offset + predicted_skip;
            } else {
              new_hyp.frame_offset = hyp.frame_offset;
            }

            // Update context graph
            if (context_graphs[b] != nullptr) {
              auto context_res = context_graphs[b]->ForwardOneStep(
                  new_hyp.context_state, token, false);
              context_score = std::get<0>(context_res);
              new_hyp.context_state = std::get<1>(context_res);
            }
            new_hyp.log_prob += context_score;
          }

          all_candidates.emplace_back(new_hyp.log_prob, std::move(new_hyp));
        }
      }

      // Keep top-k hypotheses
      if (all_candidates.empty()) {
        break;
      }

      std::partial_sort(
          all_candidates.begin(),
          all_candidates.begin() +
              std::min(max_active_paths_,
                       static_cast<int32_t>(all_candidates.size())),
          all_candidates.end(),
          [](const auto &a, const auto &b) { return a.first > b.first; });

      int32_t keep = std::min(max_active_paths_,
                              static_cast<int32_t>(all_candidates.size()));
      cur_hyps.clear();
      cur_hyps.reserve(keep);
      for (int32_t k = 0; k < keep; ++k) {
        cur_hyps.push_back(std::move(all_candidates[k].second));
      }
    }

    // Finalize context biasing
    for (auto &hyp : cur_hyps) {
      if (context_graphs[b] != nullptr) {
        auto context_res = context_graphs[b]->Finalize(hyp.context_state);
        hyp.log_prob += context_res.first;
        hyp.context_state = context_res.second;
      }
    }

    // Find best hypothesis
    auto best_it =
        std::max_element(cur_hyps.begin(), cur_hyps.end(),
                         [](const NeMoHypothesis &a, const NeMoHypothesis &b) {
                           return a.log_prob < b.log_prob;
                         });

    if (best_it != cur_hyps.end()) {
      // Convert int32_t to int64_t for tokens
      results[b].tokens.assign(best_it->ys.begin(), best_it->ys.end());
      results[b].timestamps = best_it->timestamps;
      results[b].ys_log_probs = best_it->ys_probs;
      // Convert int32_t durations to float
      results[b].durations.reserve(best_it->durations.size());
      for (int32_t d : best_it->durations) {
        results[b].durations.push_back(static_cast<float>(d));
      }
    }
  }

  return results;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-transducer-modified-beam-search-nemo-decoder.h
================================================
// sherpa-onnx/csrc/offline-transducer-modified-beam-search-nemo-decoder.h
//
// Copyright (c)  2026  (authors: github.com/nefastosaturo, github.com/nullbio)

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_NEMO_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_NEMO_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-transducer-decoder.h"
#include "sherpa-onnx/csrc/offline-transducer-nemo-model.h"

namespace sherpa_onnx {

class OfflineTransducerModifiedBeamSearchNeMoDecoder
    : public OfflineTransducerDecoder {
 public:
  OfflineTransducerModifiedBeamSearchNeMoDecoder(
      OfflineTransducerNeMoModel *model, int32_t max_active_paths,
      int32_t unk_id, float blank_penalty, bool is_tdt,
      float hotwords_score = 0.0f)
      : model_(model),
        max_active_paths_(max_active_paths),
        unk_id_(unk_id),
        blank_penalty_(blank_penalty),
        is_tdt_(is_tdt),
        hotwords_score_(hotwords_score) {}

  std::vector<OfflineTransducerDecoderResult> Decode(
      Ort::Value encoder_out,
      Ort::Value encoder_out_length,
      OfflineStream **ss = nullptr,
      int32_t n = 0) override;

 private:
  OfflineTransducerNeMoModel *model_;  // Not owned

  int32_t max_active_paths_;
  int32_t unk_id_;
  float blank_penalty_;
  bool is_tdt_;  // Token-and-Duration Transducer mode
  float hotwords_score_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_NEMO_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-transducer-nemo-model.cc
================================================
// sherpa-onnx/csrc/offline-transducer-nemo-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-transducer-nemo-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-transducer-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineTransducerNeMoModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.transducer.encoder_filename);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.transducer.decoder_filename);
      InitDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.transducer.joiner_filename);
      InitJoiner(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.transducer.encoder_filename);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.transducer.decoder_filename);
      InitDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.transducer.joiner_filename);
      InitJoiner(buf.data(), buf.size());
    }
  }

  std::vector<Ort::Value> RunEncoder(Ort::Value features,
                                     Ort::Value features_length) {
    // (B, T, C) -> (B, C, T)
    features = Transpose12(allocator_, &features);

    std::array<Ort::Value, 2> encoder_inputs = {std::move(features),
                                                std::move(features_length)};

    auto encoder_out = encoder_sess_->Run(
        {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
        encoder_inputs.size(), encoder_output_names_ptr_.data(),
        encoder_output_names_ptr_.size());

    return encoder_out;
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> RunDecoder(
      Ort::Value targets, Ort::Value targets_length,
      std::vector<Ort::Value> states) {
    std::vector<Ort::Value> decoder_inputs;
    decoder_inputs.reserve(2 + states.size());

    decoder_inputs.push_back(std::move(targets));
    decoder_inputs.push_back(std::move(targets_length));

    for (auto &s : states) {
      decoder_inputs.push_back(std::move(s));
    }

    auto decoder_out = decoder_sess_->Run(
        {}, decoder_input_names_ptr_.data(), decoder_inputs.data(),
        decoder_inputs.size(), decoder_output_names_ptr_.data(),
        decoder_output_names_ptr_.size());

    std::vector<Ort::Value> states_next;
    states_next.reserve(states.size());

    // decoder_out[0]: decoder_output
    // decoder_out[1]: decoder_output_length
    // decoder_out[2:] states_next

    for (int32_t i = 0; i != states.size(); ++i) {
      states_next.push_back(std::move(decoder_out[i + 2]));
    }

    // we discard decoder_out[1]
    return {std::move(decoder_out[0]), std::move(states_next)};
  }

  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) {
    std::array<Ort::Value, 2> joiner_input = {std::move(encoder_out),
                                              std::move(decoder_out)};
    auto logit = joiner_sess_->Run({}, joiner_input_names_ptr_.data(),
                                   joiner_input.data(), joiner_input.size(),
                                   joiner_output_names_ptr_.data(),
                                   joiner_output_names_ptr_.size());

    return std::move(logit[0]);
  }

  std::vector<Ort::Value> GetDecoderInitStates(int32_t batch_size) {
    std::array<int64_t, 3> s0_shape{pred_rnn_layers_, batch_size, pred_hidden_};
    Ort::Value s0 = Ort::Value::CreateTensor<float>(allocator_, s0_shape.data(),
                                                    s0_shape.size());

    Fill<float>(&s0, 0);

    std::array<int64_t, 3> s1_shape{pred_rnn_layers_, batch_size, pred_hidden_};

    Ort::Value s1 = Ort::Value::CreateTensor<float>(allocator_, s1_shape.data(),
                                                    s1_shape.size());

    Fill<float>(&s1, 0);

    std::vector<Ort::Value> states;

    states.reserve(2);
    states.push_back(std::move(s0));
    states.push_back(std::move(s1));

    return states;
  }

  int32_t SubsamplingFactor() const { return subsampling_factor_; }
  int32_t VocabSize() const { return vocab_size_; }

  OrtAllocator *Allocator() { return allocator_; }

  std::string FeatureNormalizationMethod() const { return normalize_type_; }

  bool IsGigaAM() const { return is_giga_am_; }
  bool IsTDT() const { return is_tdt_; }

  int32_t FeatureDim() const { return feat_dim_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---encoder---\n";
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");

    // need to increase by 1 since the blank token is not included in computing
    // vocab_size in NeMo.
    vocab_size_ += 1;

    SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_,
                                               "normalize_type");
    SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers");
    SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(is_giga_am_, "is_giga_am", 0);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(feat_dim_, "feat_dim", -1);

    if (normalize_type_ == "NA") {
      normalize_type_ = "";
    }

    std::string url;
    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(url, "url");
    if (url.find("tdt") != std::string::npos) {
      is_tdt_ = 1;
    }
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);

    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);
  }

  void InitJoiner(void *model_data, size_t model_data_length) {
    joiner_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(joiner_sess_.get(), &joiner_input_names_,
                  &joiner_input_names_ptr_);

    GetOutputNames(joiner_sess_.get(), &joiner_output_names_,
                   &joiner_output_names_ptr_);

    auto shape = joiner_sess_->GetOutputTypeInfo(0)
                     .GetTensorTypeAndShapeInfo()
                     .GetShape();
    int32_t output_size = shape.back();
    if (is_tdt_) {
      if (vocab_size_ == output_size) {
        SHERPA_ONNX_LOGE("It is not a TDT model!");
        SHERPA_ONNX_EXIT(-1);
      }

      if (config_.debug) {
        SHERPA_ONNX_LOGE("TDT model. vocab_size: %d, num_durations: %d",
                         vocab_size_, output_size - vocab_size_);
      }
    } else if (vocab_size_ != output_size) {
      SHERPA_ONNX_LOGE("vocab_size: %d != output_size: %d", vocab_size_,
                       output_size);
      SHERPA_ONNX_EXIT(-1);
    }
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;
  std::unique_ptr<Ort::Session> joiner_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<std::string> joiner_input_names_;
  std::vector<const char *> joiner_input_names_ptr_;

  std::vector<std::string> joiner_output_names_;
  std::vector<const char *> joiner_output_names_ptr_;

  int32_t vocab_size_ = 0;
  int32_t subsampling_factor_ = 8;
  std::string normalize_type_;
  int32_t pred_rnn_layers_ = -1;
  int32_t pred_hidden_ = -1;
  int32_t is_giga_am_ = 0;
  int32_t is_tdt_ = 0;

  // giga am uses 64
  // parakeet-tdt-0.6b-v2 uses 128
  // others use 80
  int32_t feat_dim_ = -1;  // -1 means to use default values.
};

OfflineTransducerNeMoModel::OfflineTransducerNeMoModel(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTransducerNeMoModel::OfflineTransducerNeMoModel(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTransducerNeMoModel::~OfflineTransducerNeMoModel() = default;

std::vector<Ort::Value> OfflineTransducerNeMoModel::RunEncoder(
    Ort::Value features, Ort::Value features_length) const {
  return impl_->RunEncoder(std::move(features), std::move(features_length));
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OfflineTransducerNeMoModel::RunDecoder(Ort::Value targets,
                                       Ort::Value targets_length,
                                       std::vector<Ort::Value> states) const {
  return impl_->RunDecoder(std::move(targets), std::move(targets_length),
                           std::move(states));
}

std::vector<Ort::Value> OfflineTransducerNeMoModel::GetDecoderInitStates(
    int32_t batch_size) const {
  return impl_->GetDecoderInitStates(batch_size);
}

Ort::Value OfflineTransducerNeMoModel::RunJoiner(Ort::Value encoder_out,
                                                 Ort::Value decoder_out) const {
  return impl_->RunJoiner(std::move(encoder_out), std::move(decoder_out));
}

int32_t OfflineTransducerNeMoModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

int32_t OfflineTransducerNeMoModel::VocabSize() const {
  return impl_->VocabSize();
}

OrtAllocator *OfflineTransducerNeMoModel::Allocator() const {
  return impl_->Allocator();
}

std::string OfflineTransducerNeMoModel::FeatureNormalizationMethod() const {
  return impl_->FeatureNormalizationMethod();
}

bool OfflineTransducerNeMoModel::IsGigaAM() const { return impl_->IsGigaAM(); }

bool OfflineTransducerNeMoModel::IsTDT() const { return impl_->IsTDT(); }

int32_t OfflineTransducerNeMoModel::FeatureDim() const {
  return impl_->FeatureDim();
}

#if __ANDROID_API__ >= 9
template OfflineTransducerNeMoModel::OfflineTransducerNeMoModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineTransducerNeMoModel::OfflineTransducerNeMoModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-transducer-nemo-model.h
================================================
// sherpa-onnx/csrc/offline-transducer-nemo-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_NEMO_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_NEMO_MODEL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

// see
// https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py#L40
// Its decoder is stateful, not stateless.
class OfflineTransducerNeMoModel {
 public:
  explicit OfflineTransducerNeMoModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineTransducerNeMoModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineTransducerNeMoModel();

  /** Run the encoder.
   *
   * @param features  A tensor of shape (N, T, C). It is changed in-place.
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - encoder_out: A 3-D tensor of shape (N, T', encoder_dim)
   *  - encoder_out_length: A 1-D tensor of shape (N,) containing number
   *                        of frames in `encoder_out` before padding.
   */
  std::vector<Ort::Value> RunEncoder(Ort::Value features,
                                     Ort::Value features_length) const;

  /** Run the decoder network.
   *
   * @param targets A int32 tensor of shape (batch_size, 1)
   * @param targets_length A int32 tensor of shape (batch_size,)
   * @param states The states for the decoder model.
   * @return Return a vector:
   *           - ans[0] is the decoder_out (a float tensor)
   *           - ans[1] is the decoder_out_length (a int32 tensor)
   *           - ans[2:] is the states_next
   */
  std::pair<Ort::Value, std::vector<Ort::Value>> RunDecoder(
      Ort::Value targets, Ort::Value targets_length,
      std::vector<Ort::Value> states) const;

  std::vector<Ort::Value> GetDecoderInitStates(int32_t batch_size) const;

  /** Run the joint network.
   *
   * @param encoder_out Output of the encoder network.
   * @param decoder_out Output of the decoder network.
   * @return Return a tensor of shape (N, 1, 1, vocab_size) containing logits.
   */
  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) const;

  /** Return the subsampling factor of the model.
   */
  int32_t SubsamplingFactor() const;

  int32_t VocabSize() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

  // Possible values:
  // - per_feature
  // - all_features (not implemented yet)
  // - fixed_mean (not implemented)
  // - fixed_std (not implemented)
  // - or just leave it to empty
  // See
  // https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/parts/preprocessing/features.py#L59
  // for details
  std::string FeatureNormalizationMethod() const;

  bool IsGigaAM() const;

  // true if it is a Token-and-Duration Transducer model
  // false otherwise
  bool IsTDT() const;

  int32_t FeatureDim() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TRANSDUCER_NEMO_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-character-frontend.cc
================================================
// sherpa-onnx/csrc/offline-tts-character-frontend.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include <algorithm>
#include <cctype>
#include <fstream>
#include <locale>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) {
  std::unordered_map<char32_t, int32_t> token2id;

  std::string line;

  std::string sym;
  std::u32string s;
  int32_t id = 0;
  while (std::getline(is, line)) {
    std::istringstream iss(line);
    iss >> sym;
    if (iss.eof()) {
      id = atoi(sym.c_str());
      sym = " ";
    } else {
      iss >> id;
    }

    // eat the trailing \r\n on windows
    iss >> std::ws;
    if (!iss.eof()) {
      SHERPA_ONNX_LOGE("Error when reading tokens: %s", line.c_str());
      exit(-1);
    }

    // Form models from coqui-ai/TTS, we have saved the IDs of the following
    // symbols in OfflineTtsVitsModelMetaData, so it is safe to skip them here.
    if (sym == "<PAD>" || sym == "<EOS>" || sym == "<BOS>" || sym == "<BLNK>") {
      continue;
    }

    s = Utf8ToUtf32(sym);
    if (s.size() != 1) {
      SHERPA_ONNX_LOGE("Error when reading tokens at Line %s. size: %d",
                       line.c_str(), static_cast<int32_t>(s.size()));
      exit(-1);
    }

    char32_t c = s[0];

    if (token2id.count(c)) {
      SHERPA_ONNX_LOGE("Duplicated token %s. Line %s. Existing ID: %d",
                       sym.c_str(), line.c_str(), token2id.at(c));
      exit(-1);
    }

    token2id.insert({c, id});
  }

  return token2id;
}

OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
    const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data)
    : meta_data_(meta_data) {
  std::ifstream is(tokens);
  token2id_ = ReadTokens(is);
}

template <typename Manager>
OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
    Manager *mgr, const std::string &tokens,
    const OfflineTtsVitsModelMetaData &meta_data)
    : meta_data_(meta_data) {
  auto buf = ReadFile(mgr, tokens);
  std::istringstream is(std::string(buf.data(), buf.size()));
  token2id_ = ReadTokens(is);
}

std::vector<TokenIDs> OfflineTtsCharacterFrontend::ConvertTextToTokenIds(
    const std::string &_text, const std::string & /*voice = ""*/) const {
  // see
  // https://github.com/coqui-ai/TTS/blob/dev/TTS/tts/utils/text/tokenizer.py#L87
  int32_t use_eos_bos = meta_data_.use_eos_bos;
  int32_t bos_id = meta_data_.bos_id;
  int32_t eos_id = meta_data_.eos_id;
  int32_t blank_id = meta_data_.blank_id;
  int32_t add_blank = meta_data_.add_blank;

  std::string text(_text.size(), 0);
  std::transform(_text.begin(), _text.end(), text.begin(),
                 [](auto c) { return std::tolower(c); });

  std::u32string s = Utf8ToUtf32(text);

  std::vector<TokenIDs> ans;

  std::vector<int64_t> this_sentence;
  if (add_blank) {
    if (use_eos_bos) {
      this_sentence.push_back(bos_id);
    }

    this_sentence.push_back(blank_id);

    for (char32_t c : s) {
      if (token2id_.count(c)) {
        this_sentence.push_back(token2id_.at(c));
        this_sentence.push_back(blank_id);
      } else {
        SHERPA_ONNX_LOGE("Skip unknown character. Unicode codepoint: \\U+%04x.",
                         static_cast<uint32_t>(c));
      }

      if (c == '.' || c == ':' || c == '?' || c == '!') {
        // end of a sentence
        if (use_eos_bos) {
          this_sentence.push_back(eos_id);
        }

        ans.emplace_back(std::move(this_sentence));
        this_sentence = {};

        // re-initialize this_sentence
        if (use_eos_bos) {
          this_sentence.push_back(bos_id);
        }
        this_sentence.push_back(blank_id);
      }
    }

    if (use_eos_bos) {
      this_sentence.push_back(eos_id);
    }

    if (static_cast<int32_t>(this_sentence.size()) > 1 + use_eos_bos) {
      ans.emplace_back(std::move(this_sentence));
    }
  } else {
    // not adding blank
    if (use_eos_bos) {
      this_sentence.push_back(bos_id);
    }

    for (char32_t c : s) {
      if (token2id_.count(c)) {
        this_sentence.push_back(token2id_.at(c));
      }

      if (c == '.' || c == ':' || c == '?' || c == '!') {
        // end of a sentence
        if (use_eos_bos) {
          this_sentence.push_back(eos_id);
        }

        ans.emplace_back(std::move(this_sentence));
        this_sentence = {};

        // re-initialize this_sentence
        if (use_eos_bos) {
          this_sentence.push_back(bos_id);
        }
      }
    }

    if (this_sentence.size() > 1) {
      ans.emplace_back(std::move(this_sentence));
    }
  }

  return ans;
}

#if __ANDROID_API__ >= 9
template OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
    AAssetManager *mgr, const std::string &tokens,
    const OfflineTtsVitsModelMetaData &meta_data);

#endif

#if __OHOS__
template OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend(
    NativeResourceManager *mgr, const std::string &tokens,
    const OfflineTtsVitsModelMetaData &meta_data);

#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-character-frontend.h
================================================
// sherpa-onnx/csrc/offline-tts-character-frontend.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_CHARACTER_FRONTEND_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_CHARACTER_FRONTEND_H_
#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h"

namespace sherpa_onnx {

class OfflineTtsCharacterFrontend : public OfflineTtsFrontend {
 public:
  OfflineTtsCharacterFrontend(const std::string &tokens,
                              const OfflineTtsVitsModelMetaData &meta_data);

  template <typename Manager>
  OfflineTtsCharacterFrontend(Manager *mgr, const std::string &tokens,
                              const OfflineTtsVitsModelMetaData &meta_data);

  /** Convert a string to token IDs.
   *
   * @param text The input text.
   *             Example 1: "This is the first sample sentence; this is the
   *             second one." Example 2: "这是第一句。这是第二句。"
   * @param voice Optional. It is for espeak-ng.
   *
   * @return Return a vector-of-vector of token IDs. Each subvector contains
   *         a sentence that can be processed independently.
   *         If a frontend does not support splitting the text into
   * sentences, the resulting vector contains only one subvector.
   */
  std::vector<TokenIDs> ConvertTextToTokenIds(
      const std::string &text, const std::string &voice = "") const override;

 private:
  OfflineTtsVitsModelMetaData meta_data_;
  std::unordered_map<char32_t, int32_t> token2id_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_CHARACTER_FRONTEND_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-frontend.cc
================================================
// sherpa-onnx/csrc/offline-tts-frontend.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-frontend.h"

#include <sstream>
#include <string>

namespace sherpa_onnx {

std::string TokenIDs::ToString() const {
  std::ostringstream os;
  os << "TokenIDs(";
  os << "tokens=[";
  std::string sep;
  for (auto i : tokens) {
    os << sep << i;
    sep = ", ";
  }
  os << "], ";

  os << "tones=[";
  sep = {};
  for (auto i : tones) {
    os << sep << i;
    sep = ", ";
  }
  os << "]";
  os << ")";
  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-frontend.h
================================================
// sherpa-onnx/csrc/offline-tts-frontend.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_
#include <cstdint>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

struct TokenIDs {
  TokenIDs() = default;

  /*implicit*/ TokenIDs(std::vector<int64_t> tokens)  // NOLINT
      : tokens{std::move(tokens)} {}

  /*implicit*/ TokenIDs(const std::vector<int32_t> &tokens)  // NOLINT
      : tokens{tokens.begin(), tokens.end()} {}

  TokenIDs(std::vector<int64_t> tokens,  // NOLINT
           std::vector<int64_t> tones)   // NOLINT
      : tokens{std::move(tokens)}, tones{std::move(tones)} {}

  std::string ToString() const;

  std::vector<int64_t> tokens;

  // Used only in MeloTTS
  std::vector<int64_t> tones;
};

class OfflineTtsFrontend {
 public:
  virtual ~OfflineTtsFrontend() = default;

  /** Convert a string to token IDs.
   *
   * @param text The input text.
   *             Example 1: "This is the first sample sentence; this is the
   *             second one." Example 2: "这是第一句。这是第二句。"
   * @param voice Optional. It is for espeak-ng.
   *
   * @return Return a vector-of-vector of token IDs. Each subvector contains
   *         a sentence that can be processed independently.
   *         If a frontend does not support splitting the text into sentences,
   *         the resulting vector contains only one subvector.
   */
  virtual std::vector<TokenIDs> ConvertTextToTokenIds(
      const std::string &text, const std::string &voice = "") const = 0;
};

// implementation is in ./piper-phonemize-lexicon.cc
void InitEspeak(const std::string &data_dir);

// implementation in ./piper-phonemize-lexicon.cc
std::vector<TokenIDs> ConvertTextToTokenIdsKokoroOrKitten(
    const std::unordered_map<char32_t, int32_t> &token2id,
    int32_t max_token_len, const std::string &text,
    const std::string &voice = "");

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-impl.cc
================================================
// sherpa-onnx/csrc/offline-tts-impl.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-impl.h"

#include <memory>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/offline-tts-kitten-impl.h"
#include "sherpa-onnx/csrc/offline-tts-kokoro-impl.h"
#include "sherpa-onnx/csrc/offline-tts-matcha-impl.h"
#include "sherpa-onnx/csrc/offline-tts-pocket-impl.h"
#include "sherpa-onnx/csrc/offline-tts-supertonic-impl.h"
#include "sherpa-onnx/csrc/offline-tts-vits-impl.h"
#include "sherpa-onnx/csrc/offline-tts-zipvoice-impl.h"

namespace sherpa_onnx {

std::vector<int64_t> OfflineTtsImpl::AddBlank(const std::vector<int64_t> &x,
                                              int32_t blank_id /*= 0*/) const {
  // we assume the blank ID is 0
  std::vector<int64_t> buffer(x.size() * 2 + 1, blank_id);
  int32_t i = 1;
  for (auto k : x) {
    buffer[i] = k;
    i += 2;
  }
  return buffer;
}

std::unique_ptr<OfflineTtsImpl> OfflineTtsImpl::Create(
    const OfflineTtsConfig &config) {
  if (!config.model.vits.model.empty()) {
    return std::make_unique<OfflineTtsVitsImpl>(config);
  } else if (!config.model.matcha.acoustic_model.empty()) {
    return std::make_unique<OfflineTtsMatchaImpl>(config);
  } else if (!config.model.zipvoice.encoder.empty() &&
             !config.model.zipvoice.decoder.empty()) {
    return std::make_unique<OfflineTtsZipvoiceImpl>(config);
  } else if (!config.model.kokoro.model.empty()) {
    return std::make_unique<OfflineTtsKokoroImpl>(config);
  } else if (!config.model.kitten.model.empty()) {
    return std::make_unique<OfflineTtsKittenImpl>(config);
  } else if (!config.model.pocket.lm_flow.empty()) {
    return std::make_unique<OfflineTtsPocketImpl>(config);
  } else if (!config.model.supertonic.tts_json.empty()) {
    return std::make_unique<OfflineTtsSupertonicImpl>(config);
  }

  SHERPA_ONNX_LOGE("Please provide a tts model.");

  return {};
}

template <typename Manager>
std::unique_ptr<OfflineTtsImpl> OfflineTtsImpl::Create(
    Manager *mgr, const OfflineTtsConfig &config) {
  if (!config.model.vits.model.empty()) {
    return std::make_unique<OfflineTtsVitsImpl>(mgr, config);
  } else if (!config.model.matcha.acoustic_model.empty()) {
    return std::make_unique<OfflineTtsMatchaImpl>(mgr, config);
  } else if (!config.model.zipvoice.encoder.empty() &&
             !config.model.zipvoice.decoder.empty()) {
    return std::make_unique<OfflineTtsZipvoiceImpl>(mgr, config);
  } else if (!config.model.kokoro.model.empty()) {
    return std::make_unique<OfflineTtsKokoroImpl>(mgr, config);
  } else if (!config.model.kitten.model.empty()) {
    return std::make_unique<OfflineTtsKittenImpl>(mgr, config);
  } else if (!config.model.pocket.lm_flow.empty()) {
    return std::make_unique<OfflineTtsPocketImpl>(mgr, config);
  } else if (!config.model.supertonic.tts_json.empty()) {
    return std::make_unique<OfflineTtsSupertonicImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please provide a tts model.");
  return {};
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflineTtsImpl> OfflineTtsImpl::Create(
    AAssetManager *mgr, const OfflineTtsConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OfflineTtsImpl> OfflineTtsImpl::Create(
    NativeResourceManager *mgr, const OfflineTtsConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-impl.h
================================================
// sherpa-onnx/csrc/offline-tts-impl.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_IMPL_H_

#include <memory>
#include <stdexcept>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-tts.h"

namespace sherpa_onnx {

class OfflineTtsImpl {
 public:
  virtual ~OfflineTtsImpl() = default;

  static std::unique_ptr<OfflineTtsImpl> Create(const OfflineTtsConfig &config);

  template <typename Manager>
  static std::unique_ptr<OfflineTtsImpl> Create(Manager *mgr,
                                                const OfflineTtsConfig &config);

  [[deprecated("Use Generate(text, GenerationConfig, callback) instead")]]
  virtual GeneratedAudio Generate(
      const std::string &text, int64_t sid = 0, float speed = 1.0,
      GeneratedAudioCallback callback = nullptr) const {
    SHERPA_ONNX_LOGE("Not implemented yet. Only some models support this");
    SHERPA_ONNX_LOGE("Please use sherpa-onnx > v1.12.30");
    return {};
  }

  virtual GeneratedAudio Generate(
      const std::string &text, const GenerationConfig &config,
      GeneratedAudioCallback callback = nullptr) const {
    SHERPA_ONNX_LOGE("Not implemented yet. Only some models support this");
    return {};
  }

  virtual GeneratedAudio Generate(
      const std::string &text, const std::string &prompt_text,
      const std::vector<float> &prompt_samples, int32_t sample_rate,
      float speed = 1.0, int32_t num_step = 4,
      GeneratedAudioCallback callback = nullptr) const {
    SHERPA_ONNX_LOGE("Not implemented yet. Only some models support this");
    return {};
  }

  // Return the sample rate of the generated audio
  virtual int32_t SampleRate() const = 0;

  // Number of supported speakers.
  // If it supports only a single speaker, then it return 0 or 1.
  virtual int32_t NumSpeakers() const { return 1; }

  std::vector<int64_t> AddBlank(const std::vector<int64_t> &x,
                                int32_t blank_id = 0) const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-kitten-impl.h
================================================
// sherpa-onnx/csrc/offline-tts-kitten-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_IMPL_H_

#include <iomanip>
#include <ios>
#include <memory>
#include <string>
#include <sstream>
#include <utility>
#include <vector>

#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/lexicon.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-impl.h"
#include "sherpa-onnx/csrc/offline-tts-kitten-model.h"
#include "sherpa-onnx/csrc/piper-phonemize-lexicon.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineTtsKittenImpl : public OfflineTtsImpl {
 public:
  explicit OfflineTtsKittenImpl(const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsKittenModel>(config.model)) {
    InitFrontend();

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
      }
    }

    if (!config.rule_fars.empty()) {
      if (config.model.debug) {
        SHERPA_ONNX_LOGE("Loading FST archives");
      }
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);

      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }
        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(f));
        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }
      }

      if (config.model.debug) {
        SHERPA_ONNX_LOGE("FST archives loaded!");
      }
    }
  }

  template <typename Manager>
  OfflineTtsKittenImpl(Manager *mgr, const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsKittenModel>(mgr, config.model)) {
    InitFrontend(mgr);

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        auto buf = ReadFile(mgr, f);
        std::istringstream is(std::string(buf.data(), buf.size()));
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
      }
    }

    if (!config.rule_fars.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);
      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }

        auto buf = ReadFile(mgr, f);

        std::unique_ptr<std::istream> s(
            new std::istringstream(std::string(buf.data(), buf.size())));

        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(std::move(s)));

        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }  // for (; !reader->Done(); reader->Next())
      }    // for (const auto &f : files)
    }      // if (!config.rule_fars.empty())
  }

  int32_t SampleRate() const override {
    return model_->GetMetaData().sample_rate;
  }

  int32_t NumSpeakers() const override {
    return model_->GetMetaData().num_speakers;
  }

  // Supported options in GenerationConfig:
  //   - sid: Speaker ID for multi-speaker models
  //   - speed: Speech speed factor (default: 1.0)
  //   - silence_scale: Scale applied to pauses in the generated audio
  //
  // Supported extra options in config.extra:
  //   - None
  GeneratedAudio Generate(
      const std::string &_text, const GenerationConfig &gen_config,
      GeneratedAudioCallback callback = nullptr) const override {
    if (config_.model.debug) {
      SHERPA_ONNX_LOGE("%s", gen_config.ToString().c_str());
    }

    int64_t sid = gen_config.sid;
    float speed = gen_config.speed;
    if (speed <= 0) {
      SHERPA_ONNX_LOGE("Speed must be > 0. Given: %f", speed);
      return {};
    }

    const auto &meta_data = model_->GetMetaData();
    int32_t num_speakers = meta_data.num_speakers;

    if (num_speakers == 0 && sid != 0) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "This is a single-speaker model and supports only sid 0. Given sid: "
          "%{public}d. sid is ignored",
          static_cast<int32_t>(sid));
#else
      SHERPA_ONNX_LOGE(
          "This is a single-speaker model and supports only sid 0. Given sid: "
          "%d. sid is ignored",
          static_cast<int32_t>(sid));
#endif
    }

    if (num_speakers != 0 && (sid >= num_speakers || sid < 0)) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "This model contains only %{public}d speakers. sid should be in the "
          "range [%{public}d, %{public}d]. Given: %{public}d. Use sid=0",
          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
#else
      SHERPA_ONNX_LOGE(
          "This model contains only %d speakers. sid should be in the range "
          "[%d, %d]. Given: %d. Use sid=0",
          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
#endif
      sid = 0;
    }

    std::string text = _text;
    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
#else
      SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
#endif
      std::ostringstream os;
      os << "In bytes (hex):\n";
      const auto p = reinterpret_cast<const uint8_t *>(text.c_str());
      for (int32_t i = 0; i != text.size(); ++i) {
        os << std::setw(2) << std::setfill('0') << std::hex
           << static_cast<uint32_t>(p[i]) << " ";
      }
      os << "\n";

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    if (!tn_list_.empty()) {
      for (const auto &tn : tn_list_) {
        text = tn->Normalize(text);
        if (config_.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
#else
          SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
#endif
        }
      }
    }

    std::vector<TokenIDs> token_ids =
        frontend_->ConvertTextToTokenIds(text, meta_data.voice);

    if (token_ids.empty() ||
        (token_ids.size() == 1 && token_ids[0].tokens.empty())) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Failed to convert '%{public}s' to token IDs",
                       text.c_str());
#else
      SHERPA_ONNX_LOGE("Failed to convert '%s' to token IDs", text.c_str());
#endif
      return {};
    }

    std::vector<std::vector<int64_t>> x;

    x.reserve(token_ids.size());

    for (auto &i : token_ids) {
      x.push_back(std::move(i.tokens));
    }

    int32_t x_size = static_cast<int32_t>(x.size());

    if (config_.max_num_sentences != 1) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "max_num_sentences (%{public}d) != 1 is ignored for Kitten TTS "
          "models",
          config_.max_num_sentences);
#else
      SHERPA_ONNX_LOGE(
          "max_num_sentences (%d) != 1 is ignored for Kitten TTS models",
          config_.max_num_sentences);
#endif
    }

    // the input text is too long, we process sentences within it in batches
    // to avoid OOM. Batch size is config_.max_num_sentences
    std::vector<std::vector<int64_t>> batch_x;

    int32_t batch_size = 1;
    batch_x.reserve(batch_size);
    int32_t num_batches = x_size / batch_size;

    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Split it into %{public}d batches. batch size: "
          "%{public}d. Number of sentences: %{public}d",
          num_batches, batch_size, x_size);
#else
      SHERPA_ONNX_LOGE(
          "Split it into %d batches. batch size: %d. Number "
          "of sentences: %d",
          num_batches, batch_size, x_size);
#endif
    }

    GeneratedAudio ans;

    int32_t should_continue = 1;

    int32_t k = 0;

    for (int32_t b = 0; b != num_batches && should_continue; ++b) {
      batch_x.clear();
      for (int32_t i = 0; i != batch_size; ++i, ++k) {
        batch_x.push_back(std::move(x[k]));
      }

      auto audio = Process(batch_x, sid, speed, gen_config.silence_scale);
      ans.sample_rate = audio.sample_rate;
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
        should_continue = callback(audio.samples.data(), audio.samples.size(),
                                   (b + 1) * 1.0 / num_batches);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }
    }

    batch_x.clear();
    while (k < static_cast<int32_t>(x.size()) && should_continue) {
      batch_x.push_back(std::move(x[k]));

      ++k;
    }

    if (!batch_x.empty()) {
      auto audio = Process(batch_x, sid, speed, gen_config.silence_scale);
      ans.sample_rate = audio.sample_rate;
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
        callback(audio.samples.data(), audio.samples.size(), 1.0);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }
    }

    return ans;
  }

  [[deprecated("Use Generate(text, GenerationConfig, callback) instead")]]
  GeneratedAudio Generate(
      const std::string &text, int64_t sid = 0, float speed = 1.0,
      GeneratedAudioCallback callback = nullptr) const override {
    GenerationConfig gen_config;
    gen_config.sid = sid;
    gen_config.speed = speed;
    gen_config.silence_scale = config_.silence_scale;
    return Generate(text, gen_config, std::move(callback));
  }

 private:
  template <typename Manager>
  void InitFrontend(Manager *mgr) {
    const auto &meta_data = model_->GetMetaData();
    frontend_ = std::make_unique<PiperPhonemizeLexicon>(
        mgr, config_.model.kitten.tokens, config_.model.kitten.data_dir,
        meta_data);
  }

  void InitFrontend() {
    const auto &meta_data = model_->GetMetaData();
    frontend_ = std::make_unique<PiperPhonemizeLexicon>(
        config_.model.kitten.tokens, config_.model.kitten.data_dir, meta_data);
  }

  GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens,
                         int32_t sid, float speed,
                         float silence_scale) const {
    int32_t num_tokens = 0;
    for (const auto &k : tokens) {
      num_tokens += k.size();
    }

    std::vector<int64_t> x;
    x.reserve(num_tokens);
    for (const auto &k : tokens) {
      x.insert(x.end(), k.begin(), k.end());
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> x_shape = {1, static_cast<int32_t>(x.size())};
    Ort::Value x_tensor = Ort::Value::CreateTensor(
        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());

    Ort::Value audio = model_->Run(std::move(x_tensor), sid, speed);

    std::vector<int64_t> audio_shape =
        audio.GetTensorTypeAndShapeInfo().GetShape();

    int64_t total = 1;
    // The output shape may be (1, 1, total) or (1, total) or (total,)
    for (auto i : audio_shape) {
      total *= i;
    }

    const float *p = audio.GetTensorData<float>();

    GeneratedAudio ans;
    ans.sample_rate = model_->GetMetaData().sample_rate;
    ans.samples = std::vector<float>(p, p + total);

    if (silence_scale != 1) {
      ans = ans.ScaleSilence(silence_scale);
    }

    return ans;
  }

 private:
  OfflineTtsConfig config_;
  std::unique_ptr<OfflineTtsKittenModel> model_;
  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
  std::unique_ptr<OfflineTtsFrontend> frontend_;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-kitten-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tts-kitten-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-kitten-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OfflineTtsKittenModelConfig::Register(ParseOptions *po) {
  po->Register("kitten-model", &model, "Path to kitten model");
  po->Register("kitten-voices", &voices,
               "Path to voices.bin for kitten models");
  po->Register("kitten-tokens", &tokens,
               "Path to tokens.txt for kitten models");
  po->Register("kitten-data-dir", &data_dir,
               "Path to the directory containing dict for espeak-ng.");
  po->Register("kitten-length-scale", &length_scale,
               "Inverse of speech speed. Larger->Slower; Smaller->faster.");
}

bool OfflineTtsKittenModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --kitten-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("--kitten-model: '%s' does not exist", model.c_str());
    return false;
  }

  if (voices.empty()) {
    SHERPA_ONNX_LOGE("Please provide --kitten-voices");
    return false;
  }

  if (!FileExists(voices)) {
    SHERPA_ONNX_LOGE("--kitten-voices: '%s' does not exist", voices.c_str());
    return false;
  }

  if (tokens.empty()) {
    SHERPA_ONNX_LOGE("Please provide --kitten-tokens");
    return false;
  }

  if (!FileExists(tokens)) {
    SHERPA_ONNX_LOGE("--kitten-tokens: '%s' does not exist", tokens.c_str());
    return false;
  }

  if (data_dir.empty()) {
    SHERPA_ONNX_LOGE("Please provide --kitten-data-dir");
    return false;
  }

  if (!FileExists(data_dir + "/phontab")) {
    SHERPA_ONNX_LOGE(
        "'%s/phontab' does not exist. Please check --kitten-data-dir",
        data_dir.c_str());
    return false;
  }

  if (!FileExists(data_dir + "/phonindex")) {
    SHERPA_ONNX_LOGE(
        "'%s/phonindex' does not exist. Please check --kitten-data-dir",
        data_dir.c_str());
    return false;
  }

  if (!FileExists(data_dir + "/phondata")) {
    SHERPA_ONNX_LOGE(
        "'%s/phondata' does not exist. Please check --kitten-data-dir",
        data_dir.c_str());
    return false;
  }

  if (!FileExists(data_dir + "/intonations")) {
    SHERPA_ONNX_LOGE(
        "'%s/intonations' does not exist. Please check --kitten-data-dir",
        data_dir.c_str());
    return false;
  }

  if (length_scale <= 0) {
    SHERPA_ONNX_LOGE(
        "Please provide a positive length_scale for --kitten-length-scale. "
        "Given: %.3f",
        length_scale);
    return false;
  }

  return true;
}

std::string OfflineTtsKittenModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTtsKittenModelConfig(";
  os << "model=\"" << model << "\", ";
  os << "voices=\"" << voices << "\", ";
  os << "tokens=\"" << tokens << "\", ";
  os << "data_dir=\"" << data_dir << "\", ";
  os << "length_scale=" << length_scale << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-kitten-model-config.h
================================================
// sherpa-onnx/csrc/offline-tts-kitten-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsKittenModelConfig {
  std::string model;
  std::string voices;
  std::string tokens;

  std::string data_dir;
  // speed = 1 / length_scale
  float length_scale = 1.0;

  OfflineTtsKittenModelConfig() = default;

  OfflineTtsKittenModelConfig(const std::string &model,
                              const std::string &voices,
                              const std::string &tokens,
                              const std::string &data_dir, float length_scale)
      : model(model),
        voices(voices),
        tokens(tokens),
        data_dir(data_dir),
        length_scale(length_scale) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-kitten-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-tts-kitten-model-meta-data.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_META_DATA_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

// please refer to
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/kitten-tts/nano_v0_1/add_meta_data.py
struct OfflineTtsKittenModelMetaData {
  int32_t sample_rate = 0;
  int32_t num_speakers = 0;
  int32_t version = 1;
  int32_t has_espeak = 1;

  int32_t max_token_len = 256;

  std::string voice;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-kitten-model.cc
================================================
// sherpa-onnx/csrc/offline-tts-kitten-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-kitten-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineTtsKittenModel::Impl {
 public:
  explicit Impl(const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto model_buf = ReadFile(config.kitten.model);
    auto voices_buf = ReadFile(config.kitten.voices);
    Init(model_buf.data(), model_buf.size(), voices_buf.data(),
         voices_buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto model_buf = ReadFile(mgr, config.kitten.model);
    auto voices_buf = ReadFile(mgr, config.kitten.voices);
    Init(model_buf.data(), model_buf.size(), voices_buf.data(),
         voices_buf.size());
  }

  const OfflineTtsKittenModelMetaData &GetMetaData() const {
    return meta_data_;
  }

  Ort::Value Run(Ort::Value x, int32_t sid, float speed) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> x_shape = x.GetTensorTypeAndShapeInfo().GetShape();
    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
                       static_cast<int32_t>(x_shape[0]));
      SHERPA_ONNX_EXIT(-1);
    }

    int32_t num_speakers = meta_data_.num_speakers;
    int32_t dim1 = style_dim_[1];

    /*const*/ float *p = styles_.data() + sid * dim1;

    std::array<int64_t, 2> style_embedding_shape = {1, dim1};
    Ort::Value style_embedding = Ort::Value::CreateTensor(
        memory_info, p, dim1, style_embedding_shape.data(),
        style_embedding_shape.size());

    int64_t speed_shape = 1;
    if (config_.kitten.length_scale != 1 && speed == 1) {
      speed = 1. / config_.kitten.length_scale;
    }

    Ort::Value speed_tensor =
        Ort::Value::CreateTensor(memory_info, &speed, 1, &speed_shape, 1);

    std::array<Ort::Value, 3> inputs = {
        std::move(x), std::move(style_embedding), std::move(speed_tensor)};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    return std::move(out[0]);
  }

 private:
  void Init(void *model_data, size_t model_data_length, const char *voices_data,
            size_t voices_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---kitten model---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR(model_type, "model_type");
    if (model_type != "kitten-tts") {
      SHERPA_ONNX_LOGE(
          "Please download the kitten tts model from us containing meta data");
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
    SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
    SHERPA_ONNX_READ_META_DATA(meta_data_.has_espeak, "has_espeak");
    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.voice, "voice",
                                                "en-us");
    if (meta_data_.has_espeak != 1) {
      SHERPA_ONNX_LOGE("It should require espeak-ng");
      SHERPA_ONNX_EXIT(-1);
    }

    if (config_.debug) {
      std::vector<std::string> speaker_names;
      SHERPA_ONNX_READ_META_DATA_VEC_STRING(speaker_names, "speaker_names");
      std::ostringstream os;
      os << "\n";
      for (int32_t i = 0; i != speaker_names.size(); ++i) {
        os << i << "->" << speaker_names[i] << ", ";
      }
      os << "\n";

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    SHERPA_ONNX_READ_META_DATA_VEC(style_dim_, "style_dim");
    if (style_dim_.size() != 2) {
      SHERPA_ONNX_LOGE("style_dim should be 2-d, given: %d",
                       static_cast<int32_t>(style_dim_.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (style_dim_[0] != 1) {
      SHERPA_ONNX_LOGE("style_dim[0] should be 1, given: %d", style_dim_[0]);
      SHERPA_ONNX_EXIT(-1);
    }

    int32_t actual_num_floats = voices_data_length / sizeof(float);
    int32_t expected_num_floats =
        style_dim_[0] * style_dim_[1] * meta_data_.num_speakers;

    if (actual_num_floats != expected_num_floats) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Corrupted --kitten-voices '%{public}s'. Expected #floats: "
          "%{public}d, actual: %{public}d",
          config_.kitten.voices.c_str(), expected_num_floats,
          actual_num_floats);
#else
      SHERPA_ONNX_LOGE(
          "Corrupted --kitten-voices '%s'. Expected #floats: %d, actual: %d",
          config_.kitten.voices.c_str(), expected_num_floats,
          actual_num_floats);
#endif

      SHERPA_ONNX_EXIT(-1);
    }

    styles_ = std::vector<float>(
        reinterpret_cast<const float *>(voices_data),
        reinterpret_cast<const float *>(voices_data) + expected_num_floats);
  }

 private:
  OfflineTtsModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OfflineTtsKittenModelMetaData meta_data_;
  std::vector<int32_t> style_dim_;

  // (num_speakers, style_dim_[1])
  std::vector<float> styles_;
};

OfflineTtsKittenModel::OfflineTtsKittenModel(
    const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTtsKittenModel::OfflineTtsKittenModel(
    Manager *mgr, const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTtsKittenModel::~OfflineTtsKittenModel() = default;

const OfflineTtsKittenModelMetaData &OfflineTtsKittenModel::GetMetaData()
    const {
  return impl_->GetMetaData();
}

Ort::Value OfflineTtsKittenModel::Run(Ort::Value x, int64_t sid /*= 0*/,
                                      float speed /*= 1.0*/) const {
  return impl_->Run(std::move(x), sid, speed);
}

#if __ANDROID_API__ >= 9
template OfflineTtsKittenModel::OfflineTtsKittenModel(
    AAssetManager *mgr, const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template OfflineTtsKittenModel::OfflineTtsKittenModel(
    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-kitten-model.h
================================================
// sherpa-onnx/csrc/offline-tts-kitten-model.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_H_

#include <memory>
#include <string>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-kitten-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-tts-model-config.h"

namespace sherpa_onnx {

class OfflineTtsKittenModel {
 public:
  ~OfflineTtsKittenModel();

  explicit OfflineTtsKittenModel(const OfflineTtsModelConfig &config);

  template <typename Manager>
  OfflineTtsKittenModel(Manager *mgr, const OfflineTtsModelConfig &config);

  // @params x An int64 tensor of shape (1, num_tokens)
  // @return Return a float32 tensor containing the
  //         samples of shape (num_samples,)
  Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0) const;

  const OfflineTtsKittenModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KITTEN_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-kokoro-impl.h
================================================
// sherpa-onnx/csrc/offline-tts-kokoro-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_

#include <iomanip>
#include <ios>
#include <memory>
#include <string>
#include <sstream>
#include <utility>
#include <vector>

#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/kokoro-multi-lang-lexicon.h"
#include "sherpa-onnx/csrc/lexicon.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-impl.h"
#include "sherpa-onnx/csrc/offline-tts-kokoro-model.h"
#include "sherpa-onnx/csrc/piper-phonemize-lexicon.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineTtsKokoroImpl : public OfflineTtsImpl {
 public:
  explicit OfflineTtsKokoroImpl(const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsKokoroModel>(config.model)) {
    InitFrontend();

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
      }
    }

    if (!config.rule_fars.empty()) {
      if (config.model.debug) {
        SHERPA_ONNX_LOGE("Loading FST archives");
      }
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);

      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }
        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(f));
        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }
      }

      if (config.model.debug) {
        SHERPA_ONNX_LOGE("FST archives loaded!");
      }
    }
  }

  template <typename Manager>
  OfflineTtsKokoroImpl(Manager *mgr, const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsKokoroModel>(mgr, config.model)) {
    InitFrontend(mgr);

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        auto buf = ReadFile(mgr, f);
        std::istringstream is(std::string(buf.data(), buf.size()));
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
      }
    }

    if (!config.rule_fars.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);
      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }

        auto buf = ReadFile(mgr, f);

        std::unique_ptr<std::istream> s(
            new std::istringstream(std::string(buf.data(), buf.size())));

        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(std::move(s)));

        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }  // for (; !reader->Done(); reader->Next())
      }    // for (const auto &f : files)
    }      // if (!config.rule_fars.empty())
  }

  int32_t SampleRate() const override {
    return model_->GetMetaData().sample_rate;
  }

  int32_t NumSpeakers() const override {
    return model_->GetMetaData().num_speakers;
  }

  // Supported options in GenerationConfig:
  //   - sid: Speaker ID for multi-speaker models
  //   - speed: Speech speed factor. If left at 1.0, it falls back to the
  //            default implied by kokoro.length_scale.
  //   - silence_scale: Scale applied to pauses in the generated audio. If left
  //                    at 0.2, it falls back to OfflineTtsConfig.silence_scale.
  //
  // Supported extra options in config.extra:
  //   - lang: Language override for Kokoro >= 1.0. Defaults to
  //           kokoro.lang if provided, otherwise meta_data.voice.
  GeneratedAudio Generate(
      const std::string &_text, const GenerationConfig &gen_config,
      GeneratedAudioCallback callback = nullptr) const override {
    if (config_.model.debug) {
      SHERPA_ONNX_LOGE("%s", gen_config.ToString().c_str());
    }

    int64_t sid = gen_config.sid;
    float speed = gen_config.speed;
    if (speed <= 0) {
      SHERPA_ONNX_LOGE("Speed must be > 0. Given: %f", speed);
      return {};
    }

    const auto &meta_data = model_->GetMetaData();
    int32_t num_speakers = meta_data.num_speakers;

    if (num_speakers == 0 && sid != 0) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "This is a single-speaker model and supports only sid 0. Given sid: "
          "%{public}d. sid is ignored",
          static_cast<int32_t>(sid));
#else
      SHERPA_ONNX_LOGE(
          "This is a single-speaker model and supports only sid 0. Given sid: "
          "%d. sid is ignored",
          static_cast<int32_t>(sid));
#endif
    }

    if (num_speakers != 0 && (sid >= num_speakers || sid < 0)) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "This model contains only %{public}d speakers. sid should be in the "
          "range [%{public}d, %{public}d]. Given: %{public}d. Use sid=0",
          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
#else
      SHERPA_ONNX_LOGE(
          "This model contains only %d speakers. sid should be in the range "
          "[%d, %d]. Given: %d. Use sid=0",
          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
#endif
      sid = 0;
    }

    std::string text = _text;
    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
#else
      SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
#endif
      std::ostringstream os;
      os << "In bytes (hex):\n";
      const auto p = reinterpret_cast<const uint8_t *>(text.c_str());
      for (int32_t i = 0; i != text.size(); ++i) {
        os << std::setw(2) << std::setfill('0') << std::hex
           << static_cast<uint32_t>(p[i]) << " ";
      }
      os << "\n";

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    if (!tn_list_.empty()) {
      for (const auto &tn : tn_list_) {
        text = tn->Normalize(text);
        if (config_.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
#else
          SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
#endif
        }
      }
    }

    std::string lang = gen_config.GetExtraString("lang");
    if (lang.empty()) {
      lang = config_.model.kokoro.lang.empty() ? meta_data.voice
                                               : config_.model.kokoro.lang;
    }

    std::vector<TokenIDs> token_ids = frontend_->ConvertTextToTokenIds(
        text, lang);

    if (token_ids.empty() ||
        (token_ids.size() == 1 && token_ids[0].tokens.empty())) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Failed to convert '%{public}s' to token IDs",
                       text.c_str());
#else
      SHERPA_ONNX_LOGE("Failed to convert '%s' to token IDs", text.c_str());
#endif
      return {};
    }

    std::vector<std::vector<int64_t>> x;

    x.reserve(token_ids.size());

    for (auto &i : token_ids) {
      x.push_back(std::move(i.tokens));
    }

    int32_t x_size = static_cast<int32_t>(x.size());

    if (config_.max_num_sentences != 1) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "max_num_sentences (%{public}d) != 1 is ignored for Kokoro TTS "
          "models",
          config_.max_num_sentences);
#else
      SHERPA_ONNX_LOGE(
          "max_num_sentences (%d) != 1 is ignored for Kokoro TTS models",
          config_.max_num_sentences);
#endif
    }

    // the input text is too long, we process sentences within it in batches
    // to avoid OOM. Batch size is config_.max_num_sentences
    std::vector<std::vector<int64_t>> batch_x;

    int32_t batch_size = 1;
    batch_x.reserve(config_.max_num_sentences);
    int32_t num_batches = x_size / batch_size;

    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Split it into %{public}d batches. batch size: "
          "%{public}d. Number of sentences: %{public}d",
          num_batches, batch_size, x_size);
#else
      SHERPA_ONNX_LOGE(
          "Split it into %d batches. batch size: %d. Number "
          "of sentences: %d",
          num_batches, batch_size, x_size);
#endif
    }

    GeneratedAudio ans;

    int32_t should_continue = 1;

    int32_t k = 0;

    for (int32_t b = 0; b != num_batches && should_continue; ++b) {
      batch_x.clear();
      for (int32_t i = 0; i != batch_size; ++i, ++k) {
        batch_x.push_back(std::move(x[k]));
      }

      auto audio =
          Process(batch_x, sid, speed, gen_config.silence_scale);
      ans.sample_rate = audio.sample_rate;
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
        should_continue = callback(audio.samples.data(), audio.samples.size(),
                                   (b + 1) * 1.0 / num_batches);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }
    }

    batch_x.clear();
    while (k < static_cast<int32_t>(x.size()) && should_continue) {
      batch_x.push_back(std::move(x[k]));

      ++k;
    }

    if (!batch_x.empty()) {
      auto audio =
          Process(batch_x, sid, speed, gen_config.silence_scale);
      ans.sample_rate = audio.sample_rate;
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
        callback(audio.samples.data(), audio.samples.size(), 1.0);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }
    }

    return ans;
  }

  [[deprecated("Use Generate(text, GenerationConfig, callback) instead")]]
  GeneratedAudio Generate(
      const std::string &text, int64_t sid = 0, float speed = 1.0,
      GeneratedAudioCallback callback = nullptr) const override {
    GenerationConfig gen_config;
    gen_config.sid = sid;
    gen_config.speed = speed;
    gen_config.silence_scale = config_.silence_scale;
    if (!config_.model.kokoro.lang.empty()) {
      gen_config.extra["lang"] = config_.model.kokoro.lang;
    }

    return Generate(text, gen_config, std::move(callback));
  }

 private:
  template <typename Manager>
  void InitFrontend(Manager *mgr) {
    const auto &meta_data = model_->GetMetaData();

    if (meta_data.version >= 2) {
      // this is a multi-lingual model, we require that you pass lexicon
      if (config_.model.kokoro.lexicon.empty() &&
          config_.model.kokoro.lang.empty()) {
        SHERPA_ONNX_LOGE("Current model version: '%d'", meta_data.version);
        SHERPA_ONNX_LOGE(
            "You are using a multi-lingual Kokoro model (e.g., Kokoro >= "
            "v1.0). Please pass --kokoro-lexicon or provide --kokoro-lang");
        SHERPA_ONNX_EXIT(-1);
      }

      frontend_ = std::make_unique<KokoroMultiLangLexicon>(
          mgr, config_.model.kokoro.tokens, config_.model.kokoro.lexicon,
          config_.model.kokoro.data_dir, meta_data, config_.model.debug);

      return;
    }

    frontend_ = std::make_unique<PiperPhonemizeLexicon>(
        mgr, config_.model.kokoro.tokens, config_.model.kokoro.data_dir,
        meta_data);
  }

  void InitFrontend() {
    const auto &meta_data = model_->GetMetaData();
    if (meta_data.version >= 2) {
      // this is a multi-lingual model, we require that you pass lexicon
      if (config_.model.kokoro.lexicon.empty() &&
          config_.model.kokoro.lang.empty()) {
        SHERPA_ONNX_LOGE("Current model version: '%d'", meta_data.version);
        SHERPA_ONNX_LOGE(
            "You are using a multi-lingual Kokoro model (e.g., Kokoro >= "
            "v1.0). please pass --kokoro-lexicon or --kokoro-lang");
        SHERPA_ONNX_EXIT(-1);
      }

      frontend_ = std::make_unique<KokoroMultiLangLexicon>(
          config_.model.kokoro.tokens, config_.model.kokoro.lexicon,
          config_.model.kokoro.data_dir, meta_data, config_.model.debug);

      return;
    }

    // this is for kokoro v0.19, which supports only English
    frontend_ = std::make_unique<PiperPhonemizeLexicon>(
        config_.model.kokoro.tokens, config_.model.kokoro.data_dir, meta_data);
  }

  GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens,
                         int32_t sid, float speed,
                         float silence_scale) const {
    int32_t num_tokens = 0;
    for (const auto &k : tokens) {
      num_tokens += k.size();
    }

    std::vector<int64_t> x;
    x.reserve(num_tokens);
    for (const auto &k : tokens) {
      x.insert(x.end(), k.begin(), k.end());
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> x_shape = {1, static_cast<int32_t>(x.size())};
    Ort::Value x_tensor = Ort::Value::CreateTensor(
        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());

    Ort::Value audio = model_->Run(std::move(x_tensor), sid, speed);

    std::vector<int64_t> audio_shape =
        audio.GetTensorTypeAndShapeInfo().GetShape();

    int64_t total = 1;
    // The output shape may be (1, 1, total) or (1, total) or (total,)
    for (auto i : audio_shape) {
      total *= i;
    }

    const float *p = audio.GetTensorData<float>();

    GeneratedAudio ans;
    ans.sample_rate = model_->GetMetaData().sample_rate;
    ans.samples = std::vector<float>(p, p + total);

    if (silence_scale == 0.2f) {
      silence_scale = config_.silence_scale;
    }

    if (silence_scale != 1) {
      ans = ans.ScaleSilence(silence_scale);
    }

    return ans;
  }

 private:
  OfflineTtsConfig config_;
  std::unique_ptr<OfflineTtsKokoroModel> model_;
  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
  std::unique_ptr<OfflineTtsFrontend> frontend_;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-kokoro-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tts-kokoro-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-kokoro-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OfflineTtsKokoroModelConfig::Register(ParseOptions *po) {
  po->Register("kokoro-model", &model, "Path to Kokoro model");
  po->Register("kokoro-voices", &voices,
               "Path to voices.bin for Kokoro models");
  po->Register("kokoro-tokens", &tokens,
               "Path to tokens.txt for Kokoro models");
  po->Register("kokoro-lang", &lang,
               "Used only by kokoro >= 1.0. Example values: "
               "en (English), "
               "es (Spanish), fr (French), hi (hindi), it (Italian), "
               "pt-br (Brazilian Portuguese)."
               "You can leave it empty, in which case you need to provide "
               "--kokoro-lexicon.");
  po->Register(
      "kokoro-lexicon", &lexicon,
      "Path to lexicon.txt for Kokoro models. Used only for Kokoro >= v1.0"
      "You can pass multiple files, separated by ','. Example: "
      "./lexicon-us-en.txt,./lexicon-zh.txt");
  po->Register("kokoro-data-dir", &data_dir,
               "Path to the directory containing dict for espeak-ng.");
  po->Register("kokoro-dict-dir", &dict_dir,
               "Not used. You don't need to provide a value for it");
  po->Register("kokoro-length-scale", &length_scale,
               "Speech speed. Larger->Slower; Smaller->faster.");
}

bool OfflineTtsKokoroModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --kokoro-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("--kokoro-model: '%s' does not exist", model.c_str());
    return false;
  }

  if (tokens.empty()) {
    SHERPA_ONNX_LOGE("Please provide --kokoro-tokens");
    return false;
  }

  if (!FileExists(tokens)) {
    SHERPA_ONNX_LOGE("--kokoro-tokens: '%s' does not exist", tokens.c_str());
    return false;
  }

  if (!lexicon.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(lexicon, ",", false, &files);
    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE(
            "lexicon '%s' does not exist. Please re-check --kokoro-lexicon",
            f.c_str());
        return false;
      }
    }
  }

  if (data_dir.empty()) {
    SHERPA_ONNX_LOGE("Please provide --kokoro-data-dir");
    return false;
  }

  if (!FileExists(data_dir + "/phontab")) {
    SHERPA_ONNX_LOGE(
        "'%s/phontab' does not exist. Please check --kokoro-data-dir",
        data_dir.c_str());
    return false;
  }

  if (!FileExists(data_dir + "/phonindex")) {
    SHERPA_ONNX_LOGE(
        "'%s/phonindex' does not exist. Please check --kokoro-data-dir",
        data_dir.c_str());
    return false;
  }

  if (!FileExists(data_dir + "/phondata")) {
    SHERPA_ONNX_LOGE(
        "'%s/phondata' does not exist. Please check --kokoro-data-dir",
        data_dir.c_str());
    return false;
  }

  if (!FileExists(data_dir + "/intonations")) {
    SHERPA_ONNX_LOGE(
        "'%s/intonations' does not exist. Please check --kokoro-data-dir",
        data_dir.c_str());
    return false;
  }

  if (!dict_dir.empty()) {
    SHERPA_ONNX_LOGE(
        "From sherpa-onnx v1.12.15, you don't need to provide dict_dir or "
        "dictDir for this model. Ignore this value.");
  }

  return true;
}

std::string OfflineTtsKokoroModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTtsKokoroModelConfig(";
  os << "model=\"" << model << "\", ";
  os << "voices=\"" << voices << "\", ";
  os << "tokens=\"" << tokens << "\", ";
  os << "lexicon=\"" << lexicon << "\", ";
  os << "data_dir=\"" << data_dir << "\", ";
  os << "length_scale=" << length_scale << ", ";
  os << "lang=\"" << lang << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-kokoro-model-config.h
================================================
// sherpa-onnx/csrc/offline-tts-kokoro-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsKokoroModelConfig {
  std::string model;
  std::string voices;
  std::string tokens;

  // Note: You can pass multiple files, separated by ",", to lexicon
  // Example: lexicon = "./lexicon-gb-en.txt,./lexicon-zh.txt";
  std::string lexicon;

  std::string data_dir;

  std::string dict_dir;

  // speed = 1 / length_scale
  float length_scale = 1.0;

  // Used only for Kokoro >= 1.0.
  //
  // If it is not empty, meta_data.voice is ignored.
  // Example values: es (Spanish), fr (French), pt (Portuguese)
  // See https://hf-mirror.com/hexgrad/Kokoro-82M/blob/main/VOICES.md
  std::string lang;

  OfflineTtsKokoroModelConfig() = default;

  OfflineTtsKokoroModelConfig(const std::string &model,
                              const std::string &voices,
                              const std::string &tokens,
                              const std::string &lexicon,
                              const std::string &data_dir,
                              const std::string &dict_dir, float length_scale,
                              const std::string &lang)
      : model(model),
        voices(voices),
        tokens(tokens),
        lexicon(lexicon),
        data_dir(data_dir),
        dict_dir(dict_dir),
        length_scale(length_scale),
        lang(lang) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_META_DATA_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

// please refer to
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/kokoro/v0.19/add_meta_data.py
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/kokoro/v1.0/add_meta_data.py
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/kokoro/v1.1-zh/add_meta_data.py
struct OfflineTtsKokoroModelMetaData {
  int32_t sample_rate = 0;
  int32_t num_speakers = 0;
  int32_t version = 1;
  int32_t has_espeak = 1;
  int32_t max_token_len = 0;

  std::string voice;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-kokoro-model.cc
================================================
// sherpa-onnx/csrc/offline-tts-kokoro-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-kokoro-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineTtsKokoroModel::Impl {
 public:
  explicit Impl(const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto model_buf = ReadFile(config.kokoro.model);
    auto voices_buf = ReadFile(config.kokoro.voices);
    Init(model_buf.data(), model_buf.size(), voices_buf.data(),
         voices_buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto model_buf = ReadFile(mgr, config.kokoro.model);
    auto voices_buf = ReadFile(mgr, config.kokoro.voices);
    Init(model_buf.data(), model_buf.size(), voices_buf.data(),
         voices_buf.size());
  }

  const OfflineTtsKokoroModelMetaData &GetMetaData() const {
    return meta_data_;
  }

  Ort::Value Run(Ort::Value x, int32_t sid, float speed) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> x_shape = x.GetTensorTypeAndShapeInfo().GetShape();
    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
                       static_cast<int32_t>(x_shape[0]));
      exit(-1);
    }

    // there is a 0 at the front and end of x
    int32_t len = static_cast<int32_t>(x_shape[1]) - 2;
    int32_t num_speakers = meta_data_.num_speakers;
    int32_t dim0 = style_dim_[0];
    int32_t dim1 = style_dim_[2];
    if (len >= dim0) {
      SHERPA_ONNX_LOGE("Bad things happened! %d vs %d", len, dim0);
      SHERPA_ONNX_EXIT(-1);
    }

    /*const*/ float *p = styles_.data() + sid * dim0 * dim1 + len * dim1;

    std::array<int64_t, 2> style_embedding_shape = {1, dim1};
    Ort::Value style_embedding = Ort::Value::CreateTensor(
        memory_info, p, dim1, style_embedding_shape.data(),
        style_embedding_shape.size());

    int64_t speed_shape = 1;
    if (config_.kokoro.length_scale != 1 && speed == 1) {
      speed = 1. / config_.kokoro.length_scale;
    }

    Ort::Value speed_tensor =
        Ort::Value::CreateTensor(memory_info, &speed, 1, &speed_shape, 1);

    std::array<Ort::Value, 3> inputs = {
        std::move(x), std::move(style_embedding), std::move(speed_tensor)};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    return std::move(out[0]);
  }

 private:
  void Init(void *model_data, size_t model_data_length, const char *voices_data,
            size_t voices_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---kokoro model---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
    SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
    SHERPA_ONNX_READ_META_DATA(meta_data_.has_espeak, "has_espeak");
    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.voice, "voice",
                                                "en-us");

    if (config_.debug) {
      std::vector<std::string> speaker_names;
      SHERPA_ONNX_READ_META_DATA_VEC_STRING(speaker_names, "speaker_names");
      std::ostringstream os;
      os << "\n";
      for (int32_t i = 0; i != speaker_names.size(); ++i) {
        os << i << "->" << speaker_names[i] << ", ";
      }
      os << "\n";

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    SHERPA_ONNX_READ_META_DATA_VEC(style_dim_, "style_dim");
    if (style_dim_.size() != 3) {
      SHERPA_ONNX_LOGE("style_dim should be 3-d, given: %d",
                       static_cast<int32_t>(style_dim_.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (style_dim_[1] != 1) {
      SHERPA_ONNX_LOGE("style_dim[1] should be 1, given: %d", style_dim_[1]);
      SHERPA_ONNX_EXIT(-1);
    }

    int32_t actual_num_floats = voices_data_length / sizeof(float);
    int32_t expected_num_floats =
        style_dim_[0] * style_dim_[2] * meta_data_.num_speakers;

    if (actual_num_floats != expected_num_floats) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Corrupted --kokoro-voices '%{public}s'. Expected #floats: "
          "%{public}d, actual: %{public}d",
          config_.kokoro.voices.c_str(), expected_num_floats,
          actual_num_floats);
#else
      SHERPA_ONNX_LOGE(
          "Corrupted --kokoro-voices '%s'. Expected #floats: %d, actual: %d",
          config_.kokoro.voices.c_str(), expected_num_floats,
          actual_num_floats);
#endif

      SHERPA_ONNX_EXIT(-1);
    }

    styles_ = std::vector<float>(
        reinterpret_cast<const float *>(voices_data),
        reinterpret_cast<const float *>(voices_data) + expected_num_floats);

    meta_data_.max_token_len = style_dim_[0];
  }

 private:
  OfflineTtsModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OfflineTtsKokoroModelMetaData meta_data_;
  std::vector<int32_t> style_dim_;

  // (num_speakers, style_dim_[0], style_dim_[2])
  std::vector<float> styles_;
};

OfflineTtsKokoroModel::OfflineTtsKokoroModel(
    const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTtsKokoroModel::OfflineTtsKokoroModel(
    Manager *mgr, const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTtsKokoroModel::~OfflineTtsKokoroModel() = default;

const OfflineTtsKokoroModelMetaData &OfflineTtsKokoroModel::GetMetaData()
    const {
  return impl_->GetMetaData();
}

Ort::Value OfflineTtsKokoroModel::Run(Ort::Value x, int64_t sid /*= 0*/,
                                      float speed /*= 1.0*/) const {
  return impl_->Run(std::move(x), sid, speed);
}

#if __ANDROID_API__ >= 9
template OfflineTtsKokoroModel::OfflineTtsKokoroModel(
    AAssetManager *mgr, const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template OfflineTtsKokoroModel::OfflineTtsKokoroModel(
    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-kokoro-model.h
================================================
// sherpa-onnx/csrc/offline-tts-kokoro-model.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_H_

#include <memory>
#include <string>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-tts-model-config.h"

namespace sherpa_onnx {

class OfflineTtsKokoroModel {
 public:
  ~OfflineTtsKokoroModel();

  explicit OfflineTtsKokoroModel(const OfflineTtsModelConfig &config);

  template <typename Manager>
  OfflineTtsKokoroModel(Manager *mgr, const OfflineTtsModelConfig &config);

  // Return a float32 tensor containing the samples
  // of shape (batch_size, num_samples)
  Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0) const;

  const OfflineTtsKokoroModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-matcha-impl.h
================================================
// sherpa-onnx/csrc/offline-tts-matcha-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_IMPL_H_

#include <algorithm>
#include <memory>
#include <string>
#include <sstream>
#include <utility>
#include <vector>

#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/character-lexicon.h"
#include "sherpa-onnx/csrc/lexicon.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/matcha-tts-lexicon.h"
#include "sherpa-onnx/csrc/melo-tts-lexicon.h"
#include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-impl.h"
#include "sherpa-onnx/csrc/offline-tts-matcha-model.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/piper-phonemize-lexicon.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/vocoder.h"

namespace sherpa_onnx {

class OfflineTtsMatchaImpl : public OfflineTtsImpl {
 public:
  explicit OfflineTtsMatchaImpl(const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsMatchaModel>(config.model)) {
    const auto &meta_data = model_->GetMetaData();
    if (meta_data.need_vocoder) {
      if (config.model.matcha.vocoder.empty()) {
        SHERPA_ONNX_LOGE("Please provide vocoder for this model");
        SHERPA_ONNX_EXIT(-1);
      }

      if (!FileExists(config.model.matcha.vocoder)) {
        SHERPA_ONNX_LOGE("Please vocoder '%s' does not exist",
                         config.model.matcha.vocoder.c_str());
        SHERPA_ONNX_EXIT(-1);
      }

      vocoder_ = Vocoder::Create(config.model);
    } else if (!config.model.matcha.vocoder.empty()) {
      SHERPA_ONNX_LOGE(
          "You don't need to provide vocoder for this model. Ignore it");
    }

    InitFrontend();

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
      }
    }

    if (!config.rule_fars.empty()) {
      if (config.model.debug) {
        SHERPA_ONNX_LOGE("Loading FST archives");
      }
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);

      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }
        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(f));
        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }
      }

      if (config.model.debug) {
        SHERPA_ONNX_LOGE("FST archives loaded!");
      }
    }

    if (meta_data.sample_rate == 16000 && meta_data.is_zh_en == 1) {
      if (!Contains(config.model.matcha.vocoder, "16") &&
          Contains(config.model.matcha.vocoder, "2")) {
        SHERPA_ONNX_LOGE(
            "This Chinese+English TTS model requires a 16khz Vocoder.");
        SHERPA_ONNX_LOGE("You should use vocos-16khz-univ.onnx.");
        SHERPA_ONNX_LOGE(
            "Please re-download a vocoder from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/"
            "vocoder-models.");
      }
    }
  }

  template <typename Manager>
  OfflineTtsMatchaImpl(Manager *mgr, const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsMatchaModel>(mgr, config.model)) {
    const auto &meta_data = model_->GetMetaData();
    if (meta_data.need_vocoder) {
      if (config.model.matcha.vocoder.empty()) {
        SHERPA_ONNX_LOGE("Please provide vocoder for this model");
        SHERPA_ONNX_EXIT(-1);
      }

      vocoder_ = Vocoder::Create(mgr, config.model);
    } else if (!config.model.matcha.vocoder.empty()) {
      SHERPA_ONNX_LOGE(
          "You don't need to provide vocoder for this model. Ignore it");
    }

    InitFrontend(mgr);

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        auto buf = ReadFile(mgr, f);
        std::istringstream is(std::string(buf.data(), buf.size()));
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
      }
    }

    if (!config.rule_fars.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);
      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }

        auto buf = ReadFile(mgr, f);

        std::unique_ptr<std::istream> s(
            new std::istringstream(std::string(buf.data(), buf.size())));

        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(std::move(s)));

        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }  // for (; !reader->Done(); reader->Next())
      }  // for (const auto &f : files)
    }  // if (!config.rule_fars.empty())

    if (meta_data.sample_rate == 16000 && meta_data.is_zh_en == 1) {
      if (!Contains(config.model.matcha.vocoder, "16") &&
          Contains(config.model.matcha.vocoder, "2")) {
        SHERPA_ONNX_LOGE(
            "This Chinese+English TTS model requires a 16khz Vocoder.");
        SHERPA_ONNX_LOGE("You should use vocos-16khz-univ.onnx.");
        SHERPA_ONNX_LOGE(
            "Please re-download a vocoder from "
            "https://github.com/k2-fsa/sherpa-onnx/releases/tag/"
            "vocoder-models.");
      }
    }
  }

  int32_t SampleRate() const override {
    return model_->GetMetaData().sample_rate;
  }

  int32_t NumSpeakers() const override {
    return model_->GetMetaData().num_speakers;
  }

  // Supported options in GenerationConfig:
  //   - sid: Speaker ID for multi-speaker models
  //   - speed: Speech speed factor (default: 1.0)
  //   - silence_scale: Scale applied to pauses in the generated audio
  //
  // Supported extra options in config.extra:
  //   - None
  GeneratedAudio Generate(
      const std::string &_text, const GenerationConfig &gen_config,
      GeneratedAudioCallback callback = nullptr) const override {
    if (config_.model.debug) {
      SHERPA_ONNX_LOGE("%s", gen_config.ToString().c_str());
    }

    int64_t sid = gen_config.sid;
    float speed = gen_config.speed;
    if (speed <= 0) {
      SHERPA_ONNX_LOGE("Speed must be > 0. Given: %f", speed);
      return {};
    }

    const auto &meta_data = model_->GetMetaData();
    int32_t num_speakers = meta_data.num_speakers;

    if (num_speakers == 0 && sid != 0) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "This is a single-speaker model and supports only sid 0. Given sid: "
          "%{public}d. sid is ignored",
          static_cast<int32_t>(sid));
#else
      SHERPA_ONNX_LOGE(
          "This is a single-speaker model and supports only sid 0. Given sid: "
          "%d. sid is ignored",
          static_cast<int32_t>(sid));
#endif
    }

    if (num_speakers != 0 && (sid >= num_speakers || sid < 0)) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "This model contains only %{public}d speakers. sid should be in the "
          "range [%{public}d, %{public}d]. Given: %{public}d. Use sid=0",
          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
#else
      SHERPA_ONNX_LOGE(
          "This model contains only %d speakers. sid should be in the range "
          "[%d, %d]. Given: %d. Use sid=0",
          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
#endif
      sid = 0;
    }

    std::string text = _text;
    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
#else
      SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
#endif
    }

    if (!tn_list_.empty()) {
      for (const auto &tn : tn_list_) {
        text = tn->Normalize(text);
        if (config_.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
#else
          SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
#endif
        }
      }
    }

    std::vector<TokenIDs> token_ids =
        frontend_->ConvertTextToTokenIds(text, meta_data.voice);

    if (token_ids.empty() ||
        (token_ids.size() == 1 && token_ids[0].tokens.empty())) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Failed to convert '%{public}s' to token IDs",
                       text.c_str());
#else
      SHERPA_ONNX_LOGE("Failed to convert '%s' to token IDs", text.c_str());
#endif
      return {};
    }

    std::vector<std::vector<int64_t>> x;

    x.reserve(token_ids.size());

    for (auto &i : token_ids) {
      x.push_back(std::move(i.tokens));
    }

    if (meta_data.add_blank) {
      for (auto &k : x) {
        k = AddBlank(k, meta_data.pad_id);
      }
    }

    int32_t x_size = static_cast<int32_t>(x.size());

    if (config_.max_num_sentences <= 0 || x_size <= config_.max_num_sentences) {
      auto ans = Process(x, sid, speed, gen_config.silence_scale);
      if (callback) {
        callback(ans.samples.data(), ans.samples.size(), 1.0);
      }
      return ans;
    }

    // the input text is too long, we process sentences within it in batches
    // to avoid OOM. Batch size is config_.max_num_sentences
    std::vector<std::vector<int64_t>> batch_x;

    int32_t batch_size = config_.max_num_sentences;
    batch_x.reserve(config_.max_num_sentences);
    int32_t num_batches = x_size / batch_size;

    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Text is too long. Split it into %{public}d batches. batch size: "
          "%{public}d. Number of sentences: %{public}d",
          num_batches, batch_size, x_size);
#else
      SHERPA_ONNX_LOGE(
          "Text is too long. Split it into %d batches. batch size: %d. Number "
          "of sentences: %d",
          num_batches, batch_size, x_size);
#endif
    }

    GeneratedAudio ans;

    int32_t should_continue = 1;

    int32_t k = 0;

    for (int32_t b = 0; b != num_batches && should_continue; ++b) {
      batch_x.clear();
      for (int32_t i = 0; i != batch_size; ++i, ++k) {
        batch_x.push_back(std::move(x[k]));
      }

      auto audio = Process(batch_x, sid, speed, gen_config.silence_scale);
      ans.sample_rate = audio.sample_rate;
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
        should_continue = callback(audio.samples.data(), audio.samples.size(),
                                   (b + 1) * 1.0 / num_batches);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }
    }

    batch_x.clear();
    while (k < static_cast<int32_t>(x.size()) && should_continue) {
      batch_x.push_back(std::move(x[k]));

      ++k;
    }

    if (!batch_x.empty()) {
      auto audio = Process(batch_x, sid, speed, gen_config.silence_scale);
      ans.sample_rate = audio.sample_rate;
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
        callback(audio.samples.data(), audio.samples.size(), 1.0);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }
    }

    return ans;
  }

  [[deprecated("Use Generate(text, GenerationConfig, callback) instead")]]
  GeneratedAudio Generate(
      const std::string &text, int64_t sid = 0, float speed = 1.0,
      GeneratedAudioCallback callback = nullptr) const override {
    GenerationConfig gen_config;
    gen_config.sid = sid;
    gen_config.speed = speed;
    gen_config.silence_scale = config_.silence_scale;
    return Generate(text, gen_config, std::move(callback));
  }

 private:
  template <typename Manager>
  void InitFrontend(Manager *mgr) {
    // for piper phonemizer
    // we require that you copy espeak_ng_data
    // from assets to disk
    const auto &meta_data = model_->GetMetaData();

    if (meta_data.is_zh_en) {
      frontend_ = std::make_unique<MatchaTtsLexicon>(
          mgr, config_.model.matcha.lexicon, config_.model.matcha.tokens,
          config_.model.matcha.data_dir, config_.model.debug, false);
    } else if (meta_data.jieba) {
      frontend_ = std::make_unique<CharacterLexicon>(
          mgr, config_.model.matcha.lexicon, config_.model.matcha.tokens,
          config_.model.debug);
    } else if (meta_data.has_espeak) {
      frontend_ = std::make_unique<PiperPhonemizeLexicon>(
          mgr, config_.model.matcha.tokens, config_.model.matcha.data_dir,
          meta_data);
    } else {
      SHERPA_ONNX_LOGE("Unsupported matcha tts model. Please ask for help");
      SHERPA_ONNX_EXIT(-1);
    }
  }

  void InitFrontend() {
    const auto &meta_data = model_->GetMetaData();

    if (meta_data.is_zh_en) {
      frontend_ = std::make_unique<MatchaTtsLexicon>(
          config_.model.matcha.lexicon, config_.model.matcha.tokens,
          config_.model.matcha.data_dir, config_.model.debug, false);
    } else if (meta_data.jieba) {
      frontend_ = std::make_unique<CharacterLexicon>(
          config_.model.matcha.lexicon, config_.model.matcha.tokens,
          config_.model.debug);
    } else if (meta_data.has_espeak) {
      frontend_ = std::make_unique<PiperPhonemizeLexicon>(
          config_.model.matcha.tokens, config_.model.matcha.data_dir,
          meta_data);
    } else {
      SHERPA_ONNX_LOGE("Unsupported matcha tts model. Please ask for help");
      SHERPA_ONNX_EXIT(-1);
    }
  }

  GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens,
                         int32_t sid, float speed,
                         float silence_scale) const {
    int32_t num_tokens = 0;
    for (const auto &k : tokens) {
      num_tokens += k.size();
    }

    std::vector<int64_t> x;
    x.reserve(num_tokens);
    for (const auto &k : tokens) {
      x.insert(x.end(), k.begin(), k.end());
    }

    if (config_.model.debug) {
      std::ostringstream oss;
      for (int32_t i : x) {
        oss << i << ", ";
      }
      oss << "\n";
      SHERPA_ONNX_LOGE("%s\n", oss.str().c_str());
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> x_shape = {1, static_cast<int32_t>(x.size())};
    Ort::Value x_tensor = Ort::Value::CreateTensor(
        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());

    GeneratedAudio ans;

    Ort::Value mel = model_->Run(std::move(x_tensor), sid, speed);

    const auto &meta_data = model_->GetMetaData();
    if (meta_data.need_vocoder) {
      ans.samples = vocoder_->Run(std::move(mel));
    } else {
      std::vector<int64_t> shape = mel.GetTensorTypeAndShapeInfo().GetShape();
      int64_t num_samples = 1;
      for (auto s : shape) {
        num_samples *= s;
      }
      ans.samples.resize(num_samples);
      auto p = mel.GetTensorData<float>();
      std::copy(p, p + num_samples, ans.samples.data());
    }

    ans.sample_rate = model_->GetMetaData().sample_rate;

    if (silence_scale != 1) {
      ans = ans.ScaleSilence(silence_scale);
    }

    return ans;
  }

 private:
  OfflineTtsConfig config_;
  std::unique_ptr<OfflineTtsMatchaModel> model_;
  std::unique_ptr<Vocoder> vocoder_;
  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
  std::unique_ptr<OfflineTtsFrontend> frontend_;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-matcha-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tts-matcha-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-matcha-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OfflineTtsMatchaModelConfig::Register(ParseOptions *po) {
  po->Register("matcha-acoustic-model", &acoustic_model,
               "Path to matcha acoustic model");
  po->Register("matcha-vocoder", &vocoder, "Path to matcha vocoder");
  po->Register(
      "matcha-lexicon", &lexicon,
      "Path to lexicon.txt for Matcha models. You can pass multiple "
      "files separated by comma , e.g., lexicon.txt,lexicon2.txt,lexicon3.txt");
  po->Register("matcha-tokens", &tokens,
               "Path to tokens.txt for Matcha models");
  po->Register("matcha-data-dir", &data_dir,
               "Path to the directory containing dict for espeak-ng. If it is "
               "given, --matcha-lexicon is ignored.");
  po->Register("matcha-dict-dir", &dict_dir,
               "Not used. You don't need to provide a value for it");
  po->Register("matcha-noise-scale", &noise_scale,
               "noise_scale for Matcha models");
  po->Register("matcha-length-scale", &length_scale,
               "Speech speed. Larger->Slower; Smaller->faster.");
}

bool OfflineTtsMatchaModelConfig::Validate() const {
  if (acoustic_model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --matcha-acoustic-model");
    return false;
  }

  if (!FileExists(acoustic_model)) {
    SHERPA_ONNX_LOGE("--matcha-acoustic-model: '%s' does not exist",
                     acoustic_model.c_str());
    return false;
  }

  if (tokens.empty()) {
    SHERPA_ONNX_LOGE("Please provide --matcha-tokens");
    return false;
  }

  if (!FileExists(tokens)) {
    SHERPA_ONNX_LOGE("--matcha-tokens: '%s' does not exist", tokens.c_str());
    return false;
  }

  if (!data_dir.empty()) {
    if (!FileExists(data_dir + "/phontab")) {
      SHERPA_ONNX_LOGE(
          "'%s/phontab' does not exist. Please check --matcha-data-dir",
          data_dir.c_str());
      return false;
    }

    if (!FileExists(data_dir + "/phonindex")) {
      SHERPA_ONNX_LOGE(
          "'%s/phonindex' does not exist. Please check --matcha-data-dir",
          data_dir.c_str());
      return false;
    }

    if (!FileExists(data_dir + "/phondata")) {
      SHERPA_ONNX_LOGE(
          "'%s/phondata' does not exist. Please check --matcha-data-dir",
          data_dir.c_str());
      return false;
    }

    if (!FileExists(data_dir + "/intonations")) {
      SHERPA_ONNX_LOGE(
          "'%s/intonations' does not exist. Please check --matcha-data-dir",
          data_dir.c_str());
      return false;
    }
  }

  if (!lexicon.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(lexicon, ",", false, &files);
    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE(
            "lexicon '%s' does not exist. Please re-check --matcha-lexicon",
            f.c_str());
        return false;
      }
    }
  }

  if (!dict_dir.empty()) {
    SHERPA_ONNX_LOGE(
        "From sherpa-onnx v1.12.15, you don't need to provide dict_dir for "
        "this model. Ignore it");
  }

  return true;
}

std::string OfflineTtsMatchaModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTtsMatchaModelConfig(";
  os << "acoustic_model=\"" << acoustic_model << "\", ";
  os << "vocoder=\"" << vocoder << "\", ";
  os << "lexicon=\"" << lexicon << "\", ";
  os << "tokens=\"" << tokens << "\", ";
  os << "data_dir=\"" << data_dir << "\", ";
  os << "noise_scale=" << noise_scale << ", ";
  os << "length_scale=" << length_scale << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-matcha-model-config.h
================================================
// sherpa-onnx/csrc/offline-tts-matcha-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsMatchaModelConfig {
  std::string acoustic_model;
  std::string vocoder;
  std::string lexicon;
  std::string tokens;

  // If data_dir is given, lexicon is ignored
  // data_dir is for piper-phonemizer, which uses espeak-ng
  std::string data_dir;

  // Used for Chinese TTS models using jieba
  std::string dict_dir;

  float noise_scale = 1;
  float length_scale = 1;

  OfflineTtsMatchaModelConfig() = default;

  OfflineTtsMatchaModelConfig(const std::string &acoustic_model,
                              const std::string &vocoder,
                              const std::string &lexicon,
                              const std::string &tokens,
                              const std::string &data_dir,
                              const std::string &dict_dir,
                              float noise_scale = 1.0, float length_scale = 1)
      : acoustic_model(acoustic_model),
        vocoder(vocoder),
        lexicon(lexicon),
        tokens(tokens),
        data_dir(data_dir),
        dict_dir(dict_dir),
        noise_scale(noise_scale),
        length_scale(length_scale) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_META_DATA_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

// If you are not sure what each field means, please
// have a look of the Python file in the model directory that
// you have downloaded.
struct OfflineTtsMatchaModelMetaData {
  int32_t sample_rate = 0;
  int32_t num_speakers = 0;
  int32_t version = 1;
  int32_t jieba = 0;
  int32_t has_espeak = 0;
  int32_t use_eos_bos = 0;
  int32_t pad_id = 0;
  int32_t add_blank = 1;
  int32_t is_zh_en = 0;
  bool need_vocoder = true;

  std::string voice;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-matcha-model.cc
================================================
// sherpa-onnx/csrc/offline-tts-matcha-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-matcha-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"

namespace sherpa_onnx {

class OfflineTtsMatchaModel::Impl {
 public:
  explicit Impl(const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config.matcha.acoustic_model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config.matcha.acoustic_model);
    Init(buf.data(), buf.size());
  }

  const OfflineTtsMatchaModelMetaData &GetMetaData() const {
    return meta_data_;
  }

  Ort::Value Run(Ort::Value x, int64_t sid, float speed) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> x_shape = x.GetTensorTypeAndShapeInfo().GetShape();
    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
                       static_cast<int32_t>(x_shape[0]));
      exit(-1);
    }

    int64_t len = x_shape[1];
    int64_t len_shape = 1;

    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &len, 1, &len_shape, 1);

    int64_t scale_shape = 1;
    float noise_scale = config_.matcha.noise_scale;
    float length_scale = config_.matcha.length_scale;

    if (speed != 1 && speed > 0) {
      length_scale = 1. / speed;
    }

    Ort::Value noise_scale_tensor =
        Ort::Value::CreateTensor(memory_info, &noise_scale, 1, &scale_shape, 1);

    Ort::Value length_scale_tensor = Ort::Value::CreateTensor(
        memory_info, &length_scale, 1, &scale_shape, 1);

    Ort::Value sid_tensor =
        Ort::Value::CreateTensor(memory_info, &sid, 1, &scale_shape, 1);

    std::array<float, 2> scales = {noise_scale, length_scale};
    int64_t scales_shape = 2;

    Ort::Value scales_tensor = Ort::Value::CreateTensor(
        memory_info, scales.data(), scales.size(), &scales_shape, 1);

    std::vector<Ort::Value> inputs;
    inputs.reserve(5);
    inputs.push_back(std::move(x));
    inputs.push_back(std::move(x_length));
    if (input_names_[2] == "scales") {
      // for models from
      // https://github.com/shivammehta25/Matcha-TTS
      inputs.push_back(std::move(scales_tensor));
    } else {
      // for models from icefall
      inputs.push_back(std::move(noise_scale_tensor));
      inputs.push_back(std::move(length_scale_tensor));
    }

    if (input_names_.size() == 5 && input_names_.back() == "sid") {
      // for models from icefall
      inputs.push_back(std::move(sid_tensor));

      // Note that we have not supported multi-speaker tts models from
      // https://github.com/shivammehta25/Matcha-TTS
    }

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    return std::move(out[0]);
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---matcha model---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
    SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.jieba, "jieba", 0);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.has_espeak, "has_espeak",
                                            0);
    SHERPA_ONNX_READ_META_DATA(meta_data_.use_eos_bos, "use_eos_bos");
    SHERPA_ONNX_READ_META_DATA(meta_data_.pad_id, "pad_id");
    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.voice, "voice",
                                                "en-us");

    if (meta_data_.voice == "zh en-us") {
      // for models from
      // https://modelscope.cn/models/dengcunqin/matcha_tts_zh_en_20251010
      meta_data_.add_blank = 0;
      meta_data_.is_zh_en = 1;
    }

    if (output_names_.front() == "audio_output") {
      meta_data_.need_vocoder = false;
    }
  }

 private:
  OfflineTtsModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OfflineTtsMatchaModelMetaData meta_data_;
};

OfflineTtsMatchaModel::OfflineTtsMatchaModel(
    const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTtsMatchaModel::OfflineTtsMatchaModel(
    Manager *mgr, const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTtsMatchaModel::~OfflineTtsMatchaModel() = default;

const OfflineTtsMatchaModelMetaData &OfflineTtsMatchaModel::GetMetaData()
    const {
  return impl_->GetMetaData();
}

Ort::Value OfflineTtsMatchaModel::Run(Ort::Value x, int64_t sid /*= 0*/,
                                      float speed /*= 1.0*/) const {
  return impl_->Run(std::move(x), sid, speed);
}

#if __ANDROID_API__ >= 9
template OfflineTtsMatchaModel::OfflineTtsMatchaModel(
    AAssetManager *mgr, const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template OfflineTtsMatchaModel::OfflineTtsMatchaModel(
    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-matcha-model.h
================================================
// sherpa-onnx/csrc/offline-tts-matcha-model.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_H_

#include <memory>
#include <string>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-tts-model-config.h"

namespace sherpa_onnx {

class OfflineTtsMatchaModel {
 public:
  ~OfflineTtsMatchaModel();

  explicit OfflineTtsMatchaModel(const OfflineTtsModelConfig &config);

  template <typename Manager>
  OfflineTtsMatchaModel(Manager *mgr, const OfflineTtsModelConfig &config);

  // Return a float32 tensor containing the mel
  // of shape (batch_size, mel_dim, num_frames)
  Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0) const;

  const OfflineTtsMatchaModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MATCHA_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tts-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineTtsModelConfig::Register(ParseOptions *po) {
  vits.Register(po);
  matcha.Register(po);
  kokoro.Register(po);
  zipvoice.Register(po);
  kitten.Register(po);
  pocket.Register(po);
  supertonic.Register(po);

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool OfflineTtsModelConfig::Validate() const {
  if (num_threads < 1) {
    SHERPA_ONNX_LOGE("num_threads should be > 0. Given %d", num_threads);
    return false;
  }

  if (!vits.model.empty()) {
    return vits.Validate();
  }

  if (!matcha.acoustic_model.empty()) {
    return matcha.Validate();
  }

  if (!zipvoice.decoder.empty()) {
    return zipvoice.Validate();
  }

  if (!kokoro.model.empty()) {
    return kokoro.Validate();
  }

  if (!kitten.model.empty()) {
    return kitten.Validate();
  }

  if (!pocket.lm_flow.empty()) {
    return pocket.Validate();
  }

  if (!supertonic.tts_json.empty()) {
    return supertonic.Validate();
  }

  SHERPA_ONNX_LOGE("Please provide exactly one tts model.");

  return false;
}

std::string OfflineTtsModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTtsModelConfig(";
  os << "vits=" << vits.ToString() << ", ";
  os << "matcha=" << matcha.ToString() << ", ";
  os << "kokoro=" << kokoro.ToString() << ", ";
  os << "zipvoice=" << zipvoice.ToString() << ", ";
  os << "kitten=" << kitten.ToString() << ", ";
  os << "pocket=" << pocket.ToString() << ", ";
  os << "supertonic=" << supertonic.ToString() << ", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-model-config.h
================================================
// sherpa-onnx/csrc/offline-tts-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/offline-tts-kitten-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-kokoro-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-matcha-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-pocket-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-supertonic-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-vits-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-zipvoice-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsModelConfig {
  OfflineTtsVitsModelConfig vits;
  OfflineTtsMatchaModelConfig matcha;
  OfflineTtsKokoroModelConfig kokoro;
  OfflineTtsZipvoiceModelConfig zipvoice;
  OfflineTtsKittenModelConfig kitten;
  OfflineTtsPocketModelConfig pocket;
  OfflineTtsSupertonicModelConfig supertonic;

  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  OfflineTtsModelConfig() = default;

  OfflineTtsModelConfig(const OfflineTtsVitsModelConfig &vits,
                        const OfflineTtsMatchaModelConfig &matcha,
                        const OfflineTtsKokoroModelConfig &kokoro,
                        const OfflineTtsZipvoiceModelConfig &zipvoice,
                        const OfflineTtsKittenModelConfig &kitten,
                        const OfflineTtsPocketModelConfig &pocket,
                        const OfflineTtsSupertonicModelConfig &supertonic,
                        int32_t num_threads, bool debug,
                        const std::string &provider)
      : vits(vits),
        matcha(matcha),
        kokoro(kokoro),
        zipvoice(zipvoice),
        kitten(kitten),
        pocket(pocket),
        supertonic(supertonic),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-pocket-impl.h
================================================
// sherpa-onnx/csrc/offline-tts-pocket-impl.h
//
// Copyright (c)  2026  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_IMPL_H_

#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <functional>
#include <iomanip>
#include <ios>
#include <limits>
#include <list>
#include <memory>
#include <mutex>
#include <sstream>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>

#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/normal-data-generator.h"
#include "sherpa-onnx/csrc/offline-tts-impl.h"
#include "sherpa-onnx/csrc/offline-tts-pocket-model.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/sentence-piece-tokenizer.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineTtsPocketImpl : public OfflineTtsImpl {
 public:
  explicit OfflineTtsPocketImpl(const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsPocketModel>(config.model)) {
    InitTokenizer();

    cache_.SetCapacity(config.model.pocket.voice_embedding_cache_capacity);

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
      }
    }

    if (!config.rule_fars.empty()) {
      if (config.model.debug) {
        SHERPA_ONNX_LOGE("Loading FST archives");
      }
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);

      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }
        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(f));
        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }
      }

      if (config.model.debug) {
        SHERPA_ONNX_LOGE("FST archives loaded!");
      }
    }
  }

  template <typename Manager>
  OfflineTtsPocketImpl(Manager *mgr, const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsPocketModel>(mgr, config.model)) {
    InitTokenizer(mgr);
    cache_.SetCapacity(config.model.pocket.voice_embedding_cache_capacity);

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        auto buf = ReadFile(mgr, f);
        std::istringstream is(std::string(buf.data(), buf.size()));
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
      }
    }

    if (!config.rule_fars.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);
      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }

        auto buf = ReadFile(mgr, f);

        std::unique_ptr<std::istream> s(
            new std::istringstream(std::string(buf.data(), buf.size())));

        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(std::move(s)));

        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }  // for (; !reader->Done(); reader->Next())
      }  // for (const auto &f : files)
    }  // if (!config.rule_fars.empty())
  }

  int32_t SampleRate() const override { return 24000; }

  int32_t NumSpeakers() const override { return 1; }

  /**
   *
   * Supported extra parameters:
   *
   *  - max_frames, int, default 500
   *  - frames_after_eos, int, default 3
   *  - temperature, float, default 0.7
   *  - chunk_size, int, default 15
   *  - max_reference_audio_len, float, default 10, in seconds
   *  - max_char_in_sentence, int, default 200
   *  - min_char_in_sentence, int, default 30
   *  - seed, int, default -1
   */
  GeneratedAudio Generate(
      const std::string &_text, const GenerationConfig &gen_config,
      GeneratedAudioCallback callback = nullptr) const override {
    if (config_.model.debug) {
      SHERPA_ONNX_LOGE("%s", gen_config.ToString().c_str());
    }

    std::string text = _text;
    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
#else
      SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
#endif
      std::ostringstream os;
      os << "In bytes (hex):\n";
      const auto p = reinterpret_cast<const uint8_t *>(text.c_str());
      for (int32_t i = 0; i != text.size(); ++i) {
        os << std::setw(2) << std::setfill('0') << std::hex
           << static_cast<uint32_t>(p[i]) << " ";
      }
      os << "\n";

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    if (!tn_list_.empty()) {
      for (const auto &tn : tn_list_) {
        text = tn->Normalize(text);
        if (config_.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
#else
          SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
#endif
        }
      }
    }

    auto sentences = SplitByPunctuation(text);

    if (sentences.empty()) {
      return {};
    }

    int32_t max_char_in_sentence =
        gen_config.GetExtraInt("max_char_in_sentence", 200);

    int32_t min_char_in_sentence =
        gen_config.GetExtraInt("min_char_in_sentence", 30);

    sentences = MergeShortSentences(sentences, min_char_in_sentence);

    std::vector<std::string> final_chunks;
    for (const auto &s : sentences) {
      auto pieces = SplitLongSentence(s, max_char_in_sentence);
      final_chunks.insert(final_chunks.end(), pieces.begin(), pieces.end());
    }

    sentences = std::move(final_chunks);

    Ort::Value voice_embedding = GetVoiceEmbedding(gen_config);
    if (!voice_embedding) {
      return {};
    }

    GeneratedAudio result;
    result.sample_rate = SampleRate();

    const int32_t total = sentences.size();

    bool should_continue = true;

    for (int32_t i = 0; i < total && should_continue; ++i) {
      if (config_.model.debug) {
#if __OHOS__
        SHERPA_ONNX_LOGE("Processing %{public}d/%{public}d: %{public}s", i + 1,
                         total, sentences[i].c_str());
#else
        SHERPA_ONNX_LOGE("Processing %d/%d: %s", i + 1, total,
                         sentences[i].c_str());
#endif
      }
      GeneratedAudioCallback wrapped_cb = nullptr;

      if (callback) {
        wrapped_cb = [&, i](const float *samples, int32_t n,
                            float sentence_progress) -> bool {
          float global_progress = (i + sentence_progress) / total;

          return callback(samples, n, global_progress);
        };
      }

      GeneratedAudio cur = GenerateSingleSentence(sentences[i], gen_config,
                                                  View(&voice_embedding),
                                                  should_continue, wrapped_cb);

      if (cur.samples.empty()) {
        continue;
      }

      result.samples.insert(result.samples.end(), cur.samples.begin(),
                            cur.samples.end());
    }

    float silence_scale = gen_config.silence_scale;
    if (silence_scale != 1) {
      result = result.ScaleSilence(silence_scale);
    }

    return result;
  }

  static size_t ComputeHash(const float *p, size_t n) {
    size_t hash = 0;

    auto hash_combine = [](size_t &seed, size_t value) {
      seed ^= value + 0x9e3779b97f4a7c15ull + (seed << 6) + (seed >> 2);
    };

    hash_combine(hash, n);

    for (size_t i = 0; i < n; ++i) {
      uint32_t bits;
      std::memcpy(&bits, &p[i], sizeof(float));
      hash_combine(hash, bits);
    }

    return hash;
  }

  GeneratedAudio GenerateSingleSentence(
      const std::string &text, const GenerationConfig &gen_config,
      Ort::Value voice_embedding, bool &should_continue,
      GeneratedAudioCallback callback = nullptr) const {
    Ort::Value text_embedding = GetTextEmbedding(text);

    auto lm_main_state = model_->GetLmMainInitState();

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    {
      std::array<int64_t, 3> empty_seq_shape = {1, 0, 32};

      Ort::Value empty_seq_tensor = Ort::Value::CreateTensor<float>(
          memory_info, nullptr, 0, empty_seq_shape.data(),
          empty_seq_shape.size());

      // voice conditioning
      // discard the return result
      RunLmMain(View(&empty_seq_tensor), std::move(voice_embedding),
                lm_main_state);

      // text conditioning
      // discard the return result
      RunLmMain(std::move(empty_seq_tensor), std::move(text_embedding),
                lm_main_state);
    }

    std::vector<float> cur(1 * 1 * 32, std::numeric_limits<float>::quiet_NaN());
    std::array<int64_t, 3> cur_shape = {1, 1, 32};

    int32_t num_steps = gen_config.num_steps;
    int32_t max_frames = gen_config.GetExtraInt("max_frames", 500);
    int32_t frames_after_eos = gen_config.GetExtraInt("frames_after_eos", 3);
    float temperature = gen_config.GetExtraFloat("temperature", 0.7f);
    float stddev = std::sqrt(temperature);
    int32_t seed = gen_config.GetExtraInt("seed", -1);

    NormalDataGenerator normal_gen(0, stddev, seed);
    std::vector<float> noise(32, 0);
    std::array<int64_t, 2> noise_shape = {1, 32};

    Ort::Value noise_tensor =
        Ort::Value::CreateTensor(memory_info, noise.data(), noise.size(),
                                 noise_shape.data(), noise_shape.size());

    std::array<int64_t, 3> empty_text_shape = {1, 0, 1024};

    Ort::Value empty_text_tensor = Ort::Value::CreateTensor<float>(
        memory_info, nullptr, 0, empty_text_shape.data(),
        empty_text_shape.size());

    Ort::Value conditioning{nullptr};
    Ort::Value eos_logit{nullptr};

    std::vector<float> latent_list;
    int32_t eos_step = -1;
    int32_t frame_size = -1;
    for (int32_t step = 0; step < max_frames; ++step) {
      Ort::Value cur_tensor =
          Ort::Value::CreateTensor(memory_info, cur.data(), cur.size(),
                                   cur_shape.data(), cur_shape.size());

      std::tie(conditioning, eos_logit) = RunLmMain(
          std::move(cur_tensor), View(&empty_text_tensor), lm_main_state);
      const float *p_logit = eos_logit.GetTensorData<float>();

      if (eos_step < 0 && p_logit[0] > -4) {
        eos_step = step;
      }

      if (eos_step > 0 && (step >= eos_step + frames_after_eos)) {
        break;
      }

      normal_gen.Fill(noise.data(), noise.size());

      Ort::Value latent =
          RunLmFlow(std::move(conditioning), View(&noise_tensor), num_steps);

      auto n = latent.GetTensorTypeAndShapeInfo().GetShape().back();
      if (frame_size == -1) {
        frame_size = n;
      }

      cur = {latent.GetTensorData<float>(), latent.GetTensorData<float>() + n};

      latent_list.insert(latent_list.end(), latent.GetTensorData<float>(),
                         latent.GetTensorData<float>() + n);
    }

    lm_main_state.values.clear();

    auto decoder_state = model_->GetMimiDecoderInitState();

    int32_t chunk_size = gen_config.GetExtraInt("chunk_size", 15);

    int32_t num_chunks = latent_list.size() / frame_size / chunk_size;
    std::array<int64_t, 3> chunk_shape = {1, chunk_size, frame_size};

    std::vector<float> audio_list;

    int32_t remaining_chunks =
        (latent_list.size() - num_chunks * chunk_size * frame_size) /
        frame_size;

    const float *p = latent_list.data();
    for (int32_t i = 0;
         (p < latent_list.data() + latent_list.size()) && should_continue;
         ++i) {
      int32_t this_chunk_size = chunk_size;
      if (i >= num_chunks) {
        this_chunk_size = remaining_chunks;
      }

      chunk_shape[1] = this_chunk_size;

      Ort::Value chunk_tensor = Ort::Value::CreateTensor(
          memory_info, const_cast<float *>(p), this_chunk_size * frame_size,
          chunk_shape.data(), chunk_shape.size());

      p += this_chunk_size * frame_size;

      Ort::Value out = RunMimiDecoder(std::move(chunk_tensor), decoder_state);

      auto n = out.GetTensorTypeAndShapeInfo().GetShape().back();

      if (callback) {
        should_continue =
            callback(out.GetTensorData<float>(), n,
                     (i + 1) * 1.0 / (num_chunks + !!remaining_chunks));
        // Caution(fangjun): out is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }

      audio_list.insert(audio_list.end(), out.GetTensorData<float>(),
                        out.GetTensorData<float>() + n);
    }

    GeneratedAudio ans;
    ans.sample_rate = SampleRate();
    ans.samples = std::move(audio_list);

    return ans;
  }

 private:
  template <typename Manager>
  void InitTokenizer(Manager *mgr) {
    tokenizer_ = std::make_unique<SentencePieceTokenizer>(
        mgr, config_.model.pocket.vocab_json,
        config_.model.pocket.token_scores_json);
  }

  void InitTokenizer() {
    tokenizer_ = std::make_unique<SentencePieceTokenizer>(
        config_.model.pocket.vocab_json,
        config_.model.pocket.token_scores_json);
  }

  Ort::Value GetVoiceEmbedding(const GenerationConfig &gen_config) const {
    if (gen_config.reference_sample_rate <= 0) {
      SHERPA_ONNX_LOGE("reference_sample_rate %d is invalid.",
                       gen_config.reference_sample_rate);
      return Ort::Value{nullptr};
    }

    if (gen_config.reference_audio.empty()) {
      SHERPA_ONNX_LOGE("reference audio is empty");
      return Ort::Value{nullptr};
    }

    std::vector<float> reference_audio;

    const float *p_audio;
    int32_t num_samples;
    if (gen_config.reference_sample_rate != SampleRate()) {
      SHERPA_ONNX_LOGE(
          "Creating a resampler:\n"
          "   in_sample_rate: %d\n"
          "   output_sample_rate: %d",
          gen_config.reference_sample_rate, SampleRate());

      float min_freq =
          std::min<int32_t>(gen_config.reference_sample_rate, SampleRate());
      float lowpass_cutoff = 0.99 * 0.5 * min_freq;

      int32_t lowpass_filter_width = 6;
      auto resampler = std::make_unique<sherpa_onnx::LinearResample>(
          gen_config.reference_sample_rate, SampleRate(), lowpass_cutoff,
          lowpass_filter_width);

      resampler->Resample(gen_config.reference_audio.data(),
                          gen_config.reference_audio.size(), true,
                          &reference_audio);
      p_audio = reference_audio.data();
      num_samples = reference_audio.size();
    } else {
      p_audio = gen_config.reference_audio.data();
      num_samples = gen_config.reference_audio.size();
    }

    float max_reference_audio_len =
        gen_config.GetExtraFloat("max_reference_audio_len", 10);

    // in seconds

    int32_t max_len =
        static_cast<int32_t>(max_reference_audio_len * SampleRate());

    if (num_samples > max_len) {
      if (config_.model.debug) {
        SHERPA_ONNX_LOGE(
            "max_reference_audio_len is %.3f seconds. Given reference audio of "
            "%.3f seconds. Only the first %.3f seconds are used",
            max_reference_audio_len, num_samples * 1.0f / SampleRate(),
            max_reference_audio_len);
      }
      num_samples = max_len;
    }

    // Compute hash of reference audio for cache lookup
    size_t audio_hash = ComputeHash(p_audio, num_samples);

    auto cached_embedding = cache_.Get(audio_hash);
    if (cached_embedding) {
      if (config_.model.debug) {
        SHERPA_ONNX_LOGE("CACHE HIT: voice embedding (hash=%zu)", audio_hash);
      }
      // Create an owned tensor and copy data to avoid use-after-free
      auto result = Ort::Value::CreateTensor<float>(
          model_->Allocator(), cached_embedding->second.data(),
          cached_embedding->second.size());
      std::copy(cached_embedding->first.begin(), cached_embedding->first.end(),
                result.GetTensorMutableData<float>());
      return result;
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> shape = {1, 1, num_samples};
    Ort::Value x =
        Ort::Value::CreateTensor(memory_info, const_cast<float *>(p_audio),
                                 num_samples, shape.data(), shape.size());

    Ort::Value result = model_->RunMimiEncoder(std::move(x));

    auto info = result.GetTensorTypeAndShapeInfo();
    auto result_shape = info.GetShape();
    size_t total = info.GetElementCount();
    const float *result_data = result.GetTensorData<float>();

    cache_.Put(audio_hash, std::vector<float>(result_data, result_data + total),
               std::move(result_shape));

    if (config_.model.debug) {
      SHERPA_ONNX_LOGE("CACHE MISS: cached embedding (hash=%zu, %zu floats)",
                       audio_hash, total);
    }

    return result;
  }

  Ort::Value GetTextEmbedding(const std::string &text) const {
    std::vector<int32_t> token_ids = tokenizer_->EncodeIds(text);
    if (config_.model.debug) {
      std::ostringstream os;
      os << "\ntoken_ids (len=" << token_ids.size() << "): ";
      for (auto i : token_ids) {
        os << i << " ";
      }
      os << "\n";

      auto tokens = tokenizer_->EncodeTokens(text);
      os << "tokens (len=" << tokens.size() << "):";
      for (const auto &t : tokens) {
        os << t << " ";
      }

      SHERPA_ONNX_LOGE("%s", os.str().c_str());
    }

    std::vector<int64_t> token_ids_i64 = {token_ids.begin(), token_ids.end()};

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> shape = {1,
                                    static_cast<int64_t>(token_ids_i64.size())};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, token_ids_i64.data(),
                                            token_ids_i64.size(), shape.data(),
                                            shape.size());
    return model_->RunTextConditioner(std::move(x));
  }

  // state is changed in-place
  std::pair<Ort::Value, Ort::Value> RunLmMain(Ort::Value seq,
                                              Ort::Value embedding,
                                              PocketLmMainState &state) const {
    std::tuple<Ort::Value, Ort::Value, PocketLmMainState> output =
        model_->RunLmMain(std::move(seq), std::move(embedding),
                          std::move(state));

    state = std::move(std::get<2>(output));

    return {std::move(std::get<0>(output)), std::move(std::get<1>(output))};
  }

  Ort::Value RunLmFlow(Ort::Value conditioning, Ort::Value noise,
                       int32_t num_steps) const {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    Ort::Value x = Clone(model_->Allocator(), &noise);

    std::array<int64_t, 2> shape = {1, 1};

    float dt = 1.0f / static_cast<float>(num_steps);

    float s = 0;
    float t = 0;

    Ort::Value s_tensor = Ort::Value::CreateTensor(memory_info, &s, 1,
                                                   shape.data(), shape.size());

    Ort::Value t_tensor = Ort::Value::CreateTensor(memory_info, &t, 1,
                                                   shape.data(), shape.size());

    for (int32_t i = 0; i < num_steps; ++i) {
      s = static_cast<float>(i) / static_cast<float>(num_steps);
      t = s + dt;

      Ort::Value out = model_->RunLmFlow(View(&conditioning), View(&s_tensor),
                                         View(&t_tensor), View(&x));

      auto n = out.GetTensorTypeAndShapeInfo().GetShape().back();

      ScaleAdd(out.GetTensorData<float>(), dt, n,
               x.GetTensorMutableData<float>());
    }

    return std::move(x);
  }

  // state is changed in-place
  Ort::Value RunMimiDecoder(Ort::Value latent,
                            PocketMimiDecoderState &state) const {
    std::pair<Ort::Value, PocketMimiDecoderState> output =
        model_->RunMimiDecoder(std::move(latent), std::move(state));

    state = std::move(output.second);

    return std::move(output.first);
  }

 private:
  OfflineTtsConfig config_;
  std::unique_ptr<OfflineTtsPocketModel> model_;
  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
  std::unique_ptr<SentencePieceTokenizer> tokenizer_;

  // Shared Thread-Safe LRU Cache for Voice Embeddings
  struct VoiceEmbeddingCache {
    using Embedding = std::pair<std::vector<float>, std::vector<int64_t>>;
    using EmbeddingPtr = std::shared_ptr<Embedding>;

   private:
    using ListNode = std::pair<size_t, EmbeddingPtr>;
    using ListIt = std::list<ListNode>::iterator;

    mutable std::mutex mutex_;
    size_t capacity_;

    // Front = most recently used
    std::list<ListNode> lru_list_;

    // Key -> iterator into lru_list_
    std::unordered_map<size_t, ListIt> map_;

   public:
    static constexpr size_t kDefaultCapacity = 50;

    explicit VoiceEmbeddingCache(size_t cap = kDefaultCapacity)
        : capacity_(cap) {}

    EmbeddingPtr Get(size_t key) {
      std::lock_guard<std::mutex> lock(mutex_);

      auto it = map_.find(key);
      if (it == map_.end()) {
        return nullptr;  // cache miss
      }

      // Move to front (most recently used)
      if (it->second != lru_list_.begin()) {
        lru_list_.splice(lru_list_.begin(), lru_list_, it->second);
      }

      return it->second->second;  // copy shared_ptr
    }

    void Put(size_t key, std::vector<float> data, std::vector<int64_t> shape) {
      std::lock_guard<std::mutex> lock(mutex_);

      if (capacity_ == 0) {
        return;
      }

      auto it = map_.find(key);

      // If exists, update and move to front
      if (it != map_.end()) {
        it->second->second =
            std::make_shared<Embedding>(std::move(data), std::move(shape));

        if (it->second != lru_list_.begin()) {
          lru_list_.splice(lru_list_.begin(), lru_list_, it->second);
        }
        return;
      }

      // Evict if full
      if (lru_list_.size() >= capacity_) {
        auto &last = lru_list_.back();
        size_t last_key = last.first;

        map_.erase(last_key);
        lru_list_.pop_back();  // shared_ptr released here
      }

      // Insert new at front
      lru_list_.emplace_front(
          key, std::make_shared<Embedding>(std::move(data), std::move(shape)));

      map_[key] = lru_list_.begin();
    }

    void SetCapacity(int32_t cap) {
      if (cap < 0) {
        SHERPA_ONNX_LOGE(
            "voice_embedding_cache_capacity must be >= 0. Given: %d", cap);
        SHERPA_ONNX_EXIT(-1);
      }

      std::lock_guard<std::mutex> lock(mutex_);
      capacity_ = cap;

      while (lru_list_.size() > capacity_) {
        auto &last = lru_list_.back();
        size_t last_key = last.first;

        map_.erase(last_key);
        lru_list_.pop_back();
      }
    }

    size_t Size() const {
      std::lock_guard<std::mutex> lock(mutex_);
      return lru_list_.size();
    }

    void Clear() {
      std::lock_guard<std::mutex> lock(mutex_);
      map_.clear();
      lru_list_.clear();
    }
  };

  mutable VoiceEmbeddingCache cache_;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-pocket-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tts-pocket-model-config.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-pocket-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineTtsPocketModelConfig::Register(ParseOptions *po) {
  po->Register("pocket-lm-flow", &lm_flow, "Path to PocketTTS lm flow model");
  po->Register("pocket-lm-main", &lm_main, "Path to PocketTTS lm main model");
  po->Register("pocket-encoder", &encoder, "Path to PocketTTS encoder model");
  po->Register("pocket-decoder", &decoder, "Path to PocketTTS decoder model");
  po->Register("pocket-text-conditioner", &text_conditioner,
               "Path to PocketTTS text conditioner model");
  po->Register("pocket-vocab-json", &vocab_json,
               "Path to PocketTTS vocab.json");
  po->Register("pocket-token-scores-json", &token_scores_json,
               "Path to PocketTTS token_scores.json");
  po->Register("pocket-voice-embedding-cache-capacity",
               &voice_embedding_cache_capacity,
               "Capacity of the voice embedding cache (number of items). "
               "Default: 50. 0 disables caching.");
}

bool OfflineTtsPocketModelConfig::Validate() const {
  if (lm_flow.empty()) {
    SHERPA_ONNX_LOGE("Please provide --pocket-lm-flow");
    return false;
  }

  if (!FileExists(lm_flow)) {
    SHERPA_ONNX_LOGE("--pocket-lm-flow '%s' does not exist", lm_flow.c_str());
    return false;
  }

  if (lm_main.empty()) {
    SHERPA_ONNX_LOGE("Please provide --pocket-lm-main");
    return false;
  }

  if (!FileExists(lm_main)) {
    SHERPA_ONNX_LOGE("--pocket-lm-main '%s' does not exist", lm_main.c_str());
    return false;
  }

  if (encoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --pocket-encoder");
    return false;
  }

  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("--pocket-encoder '%s' does not exist", encoder.c_str());
    return false;
  }

  if (decoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --pocket-decoder");
    return false;
  }

  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("--pocket-decoder '%s' does not exist", decoder.c_str());
    return false;
  }

  if (text_conditioner.empty()) {
    SHERPA_ONNX_LOGE("Please provide --pocket-text-conditioner");
    return false;
  }

  if (!FileExists(text_conditioner)) {
    SHERPA_ONNX_LOGE("--pocket-text-conditioner '%s' does not exist",
                     text_conditioner.c_str());
    return false;
  }

  if (vocab_json.empty()) {
    SHERPA_ONNX_LOGE("Please provide --pocket-vocab-json");
    return false;
  }

  if (!FileExists(vocab_json)) {
    SHERPA_ONNX_LOGE("--pocket-vocab-json '%s' does not exist",
                     vocab_json.c_str());
    return false;
  }

  if (token_scores_json.empty()) {
    SHERPA_ONNX_LOGE("Please provide --pocket-token-scores-json");
    return false;
  }

  if (!FileExists(token_scores_json)) {
    SHERPA_ONNX_LOGE("--pocket-token-scores-json '%s' does not exist",
                     token_scores_json.c_str());
    return false;
  }

  if (voice_embedding_cache_capacity < 0) {
    SHERPA_ONNX_LOGE(
        "voice_embedding_cache_capacity must be non-negative. Given: %d",
        voice_embedding_cache_capacity);
    return false;
  }

  return true;
}

std::string OfflineTtsPocketModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTtsPocketModelConfig(";
  os << "lm_flow=\"" << lm_flow << "\", ";
  os << "lm_main=\"" << lm_main << "\", ";
  os << "encoder=\"" << encoder << "\", ";
  os << "decoder=\"" << decoder << "\", ";
  os << "text_conditioner=\"" << text_conditioner << "\", ";
  os << "vocab_json=\"" << vocab_json << "\", ";
  os << "token_scores_json=\"" << token_scores_json << "\", ";
  os << "voice_embedding_cache_capacity=" << voice_embedding_cache_capacity
     << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-pocket-model-config.h
================================================
// sherpa-onnx/csrc/offline-tts-pocket-model-config.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsPocketModelConfig {
  std::string lm_flow;
  std::string lm_main;
  std::string encoder;
  std::string decoder;
  std::string text_conditioner;

  std::string vocab_json;
  std::string token_scores_json;

  OfflineTtsPocketModelConfig() = default;
  int32_t voice_embedding_cache_capacity = 50;

  OfflineTtsPocketModelConfig(const std::string &lm_flow,
                              const std::string &lm_main,
                              const std::string &encoder,
                              const std::string &decoder,
                              const std::string &text_conditioner,
                              const std::string &vocab_json,
                              const std::string &token_scores_json,
                              int32_t voice_embedding_cache_capacity = 50)
      : lm_flow(lm_flow),
        lm_main(lm_main),
        encoder(encoder),
        decoder(decoder),
        text_conditioner(text_conditioner),
        vocab_json(vocab_json),
        token_scores_json(token_scores_json),
        voice_embedding_cache_capacity(voice_embedding_cache_capacity) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-pocket-model.cc
================================================
// sherpa-onnx/csrc/offline-tts-pocket-model.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-pocket-model.h"

#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/file-utils.h"

namespace sherpa_onnx {

static Ort::Value CreateZeroTensorLike(Ort::Session &sess, int32_t input_index,
                                       OrtAllocator *allocator) {
  auto type_info = sess.GetInputTypeInfo(input_index);
  auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
  ONNXTensorElementDataType elem_type = tensor_info.GetElementType();
  std::vector<int64_t> shape = tensor_info.GetShape();

  // 3. Replace dynamic dims (-1) with 1
  for (auto &d : shape) {
    if (d < 0) {
      d = 1;
    }
  }

  Ort::Value v{nullptr};
  switch (elem_type) {
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
      v = Ort::Value::CreateTensor<float>(allocator, shape.data(),
                                          shape.size());
      Fill<float>(&v, 0);
      break;
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
      v = Ort::Value::CreateTensor<bool>(allocator, shape.data(), shape.size());
      Fill<bool>(&v, 0);
      break;
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
      v = Ort::Value::CreateTensor<int64_t>(allocator, shape.data(),
                                            shape.size());
      Fill<int64_t>(&v, 0);
      break;
    default:
      SHERPA_ONNX_LOGE("Unsupported tensor element type: %d", elem_type);
      SHERPA_ONNX_EXIT(-1);
  }

  return v;
}

class OfflineTtsPocketModel::Impl {
 public:
  explicit Impl(const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)) {
    lm_flow_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.pocket.lm_flow), sess_opts_);
    InitLmFlow(nullptr, 0);

    lm_main_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.pocket.lm_main), sess_opts_);
    InitLmMain(nullptr, 0);

    mimi_encoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.pocket.encoder), sess_opts_);
    InitMimiEncoder(nullptr, 0);

    mimi_decoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.pocket.decoder), sess_opts_);
    InitMimiDecoder(nullptr, 0);

    text_conditioner_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.pocket.text_conditioner),
        sess_opts_);
    InitTextConditioner(nullptr, 0);
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)) {
    {
      auto buf = ReadFile(mgr, config.pocket.lm_flow);
      InitLmFlow(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.pocket.lm_main);
      InitLmMain(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.pocket.encoder);
      InitMimiEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.pocket.decoder);
      InitMimiDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.pocket.text_conditioner);
      InitTextConditioner(buf.data(), buf.size());
    }
  }

  PocketLmMainState GetLmMainInitState() {
    PocketLmMainState s;
    s.values.reserve(lm_main_init_states_.values.size());
    for (auto &v : lm_main_init_states_.values) {
      s.values.push_back(View(&v));
    }
    return s;
  }

  PocketMimiDecoderState GetMimiDecoderInitState() {
    PocketMimiDecoderState s;
    s.values.reserve(mimi_decoder_init_states_.values.size());
    for (auto &v : mimi_decoder_init_states_.values) {
      s.values.push_back(View(&v));
    }

    return s;
  }

  Ort::Value RunMimiEncoder(Ort::Value audio) const {
    std::vector<Ort::Value> inputs;
    inputs.push_back(std::move(audio));

    auto outputs = mimi_encoder_sess_->Run(
        {}, mimi_encoder_input_names_ptr_.data(), inputs.data(), inputs.size(),
        mimi_encoder_output_names_ptr_.data(),
        mimi_encoder_output_names_ptr_.size());

    return std::move(outputs[0]);
  }

  Ort::Value RunTextConditioner(Ort::Value text_tokens) const {
    std::vector<Ort::Value> inputs;
    inputs.push_back(std::move(text_tokens));

    auto outputs = text_conditioner_sess_->Run(
        Ort::RunOptions{nullptr}, text_conditioner_input_names_ptr_.data(),
        inputs.data(), inputs.size(), text_conditioner_output_names_ptr_.data(),
        text_conditioner_output_names_ptr_.size());

    return std::move(outputs[0]);
  }

  std::tuple<Ort::Value, Ort::Value, PocketLmMainState> RunLmMain(
      Ort::Value seq, Ort::Value embeddings, PocketLmMainState state) const {
    std::vector<Ort::Value> inputs;
    inputs.reserve(2 + state.values.size());

    inputs.push_back(std::move(seq));
    inputs.push_back(std::move(embeddings));

    for (auto &v : state.values) {
      inputs.push_back(std::move(v));
    }

    auto outputs = lm_main_sess_->Run(
        Ort::RunOptions{nullptr}, lm_main_input_names_ptr_.data(),
        inputs.data(), inputs.size(), lm_main_output_names_ptr_.data(),
        lm_main_output_names_ptr_.size());

    PocketLmMainState new_state;
    new_state.values.reserve(outputs.size() - 2);
    for (size_t i = 2; i < outputs.size(); ++i) {
      new_state.values.push_back(std::move(outputs[i]));
    }

    return {std::move(outputs[0]), std::move(outputs[1]), std::move(new_state)};
  }

  Ort::Value RunLmFlow(Ort::Value c, Ort::Value s, Ort::Value t,
                       Ort::Value x) const {
    std::vector<Ort::Value> inputs;
    inputs.reserve(4);
    inputs.push_back(std::move(c));
    inputs.push_back(std::move(s));
    inputs.push_back(std::move(t));
    inputs.push_back(std::move(x));

    auto outputs = lm_flow_sess_->Run(
        {}, lm_flow_input_names_ptr_.data(), inputs.data(), inputs.size(),
        lm_flow_output_names_ptr_.data(), lm_flow_output_names_ptr_.size());

    return std::move(outputs[0]);
  }

  std::pair<Ort::Value, PocketMimiDecoderState> RunMimiDecoder(
      Ort::Value latent, PocketMimiDecoderState state) const {
    std::vector<Ort::Value> inputs;
    inputs.reserve(1 + state.values.size());

    inputs.push_back(std::move(latent));
    for (auto &v : state.values) {
      inputs.push_back(std::move(v));
    }

    auto outputs = mimi_decoder_sess_->Run(
        {}, mimi_decoder_input_names_ptr_.data(), inputs.data(), inputs.size(),
        mimi_decoder_output_names_ptr_.data(),
        mimi_decoder_output_names_ptr_.size());

    PocketMimiDecoderState new_state;
    new_state.values.reserve(outputs.size() - 1);
    for (size_t i = 1; i < outputs.size(); ++i) {
      new_state.values.push_back(std::move(outputs[i]));
    }

    return {std::move(outputs[0]), std::move(new_state)};
  }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void InitLmFlow(void *model_data, size_t model_data_length) {
    if (model_data) {
      lm_flow_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!lm_flow_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize lm flow session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(lm_flow_sess_.get(), &lm_flow_input_names_,
                  &lm_flow_input_names_ptr_);

    GetOutputNames(lm_flow_sess_.get(), &lm_flow_output_names_,
                   &lm_flow_output_names_ptr_);
  }

  void InitLmMain(void *model_data, size_t model_data_length) {
    if (model_data) {
      lm_main_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!lm_main_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize lm main session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(lm_main_sess_.get(), &lm_main_input_names_,
                  &lm_main_input_names_ptr_);

    GetOutputNames(lm_main_sess_.get(), &lm_main_output_names_,
                   &lm_main_output_names_ptr_);

    lm_main_init_states_.values.reserve(lm_main_input_names_.size() - 2);
    for (size_t i = 2; i < lm_main_input_names_.size(); ++i) {
      lm_main_init_states_.values.push_back(
          CreateZeroTensorLike(*lm_main_sess_, i, allocator_));
    }
  }

  void InitMimiEncoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      mimi_encoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!mimi_encoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize mimi encoder session outside "
          "of this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(mimi_encoder_sess_.get(), &mimi_encoder_input_names_,
                  &mimi_encoder_input_names_ptr_);

    GetOutputNames(mimi_encoder_sess_.get(), &mimi_encoder_output_names_,
                   &mimi_encoder_output_names_ptr_);
  }

  void InitMimiDecoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      mimi_decoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!mimi_decoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize mimi decoder session outside "
          "of this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(mimi_decoder_sess_.get(), &mimi_decoder_input_names_,
                  &mimi_decoder_input_names_ptr_);

    GetOutputNames(mimi_decoder_sess_.get(), &mimi_decoder_output_names_,
                   &mimi_decoder_output_names_ptr_);

    // init mimi_decoder_init_states_
    mimi_decoder_init_states_.values.reserve(mimi_decoder_input_names_.size() -
                                             1);
    for (size_t i = 1; i < mimi_decoder_input_names_.size(); ++i) {
      mimi_decoder_init_states_.values.push_back(
          CreateZeroTensorLike(*mimi_decoder_sess_, i, allocator_));
    }
  }

  void InitTextConditioner(void *model_data, size_t model_data_length) {
    if (model_data) {
      text_conditioner_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!text_conditioner_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize text conditioner session "
          "outside of this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(text_conditioner_sess_.get(), &text_conditioner_input_names_,
                  &text_conditioner_input_names_ptr_);

    GetOutputNames(text_conditioner_sess_.get(),
                   &text_conditioner_output_names_,
                   &text_conditioner_output_names_ptr_);
  }

 private:
  OfflineTtsModelConfig config_;

  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> lm_main_sess_;
  std::unique_ptr<Ort::Session> lm_flow_sess_;
  std::unique_ptr<Ort::Session> mimi_decoder_sess_;
  std::unique_ptr<Ort::Session> mimi_encoder_sess_;
  std::unique_ptr<Ort::Session> text_conditioner_sess_;

  std::vector<std::string> lm_flow_input_names_;
  std::vector<const char *> lm_flow_input_names_ptr_;

  std::vector<std::string> lm_flow_output_names_;
  std::vector<const char *> lm_flow_output_names_ptr_;

  std::vector<std::string> lm_main_input_names_;
  std::vector<const char *> lm_main_input_names_ptr_;

  std::vector<std::string> lm_main_output_names_;
  std::vector<const char *> lm_main_output_names_ptr_;

  std::vector<std::string> mimi_encoder_input_names_;
  std::vector<const char *> mimi_encoder_input_names_ptr_;

  std::vector<std::string> mimi_encoder_output_names_;
  std::vector<const char *> mimi_encoder_output_names_ptr_;

  std::vector<std::string> mimi_decoder_input_names_;
  std::vector<const char *> mimi_decoder_input_names_ptr_;

  std::vector<std::string> mimi_decoder_output_names_;
  std::vector<const char *> mimi_decoder_output_names_ptr_;

  std::vector<std::string> text_conditioner_input_names_;
  std::vector<const char *> text_conditioner_input_names_ptr_;

  std::vector<std::string> text_conditioner_output_names_;
  std::vector<const char *> text_conditioner_output_names_ptr_;

  PocketLmMainState lm_main_init_states_;
  PocketMimiDecoderState mimi_decoder_init_states_;
};

OfflineTtsPocketModel::OfflineTtsPocketModel(
    const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTtsPocketModel::OfflineTtsPocketModel(
    Manager *mgr, const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTtsPocketModel::~OfflineTtsPocketModel() = default;

PocketLmMainState OfflineTtsPocketModel::GetLmMainInitState() const {
  return impl_->GetLmMainInitState();
}

PocketMimiDecoderState OfflineTtsPocketModel::GetMimiDecoderInitState() const {
  return impl_->GetMimiDecoderInitState();
}

Ort::Value OfflineTtsPocketModel::RunMimiEncoder(Ort::Value audio) const {
  return impl_->RunMimiEncoder(std::move(audio));
}

Ort::Value OfflineTtsPocketModel::RunTextConditioner(
    Ort::Value text_tokens) const {
  return impl_->RunTextConditioner(std::move(text_tokens));
}

std::tuple<Ort::Value, Ort::Value, PocketLmMainState>
OfflineTtsPocketModel::RunLmMain(Ort::Value seq, Ort::Value embeddings,
                                 PocketLmMainState state) const {
  return impl_->RunLmMain(std::move(seq), std::move(embeddings),
                          std::move(state));
}

Ort::Value OfflineTtsPocketModel::RunLmFlow(Ort::Value c, Ort::Value s,
                                            Ort::Value t, Ort::Value x) const {
  return impl_->RunLmFlow(std::move(c), std::move(s), std::move(t),
                          std::move(x));
}

std::pair<Ort::Value, PocketMimiDecoderState>
OfflineTtsPocketModel::RunMimiDecoder(Ort::Value latent,
                                      PocketMimiDecoderState state) const {
  return impl_->RunMimiDecoder(std::move(latent), std::move(state));
}

OrtAllocator *OfflineTtsPocketModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineTtsPocketModel::OfflineTtsPocketModel(
    AAssetManager *mgr, const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template OfflineTtsPocketModel::OfflineTtsPocketModel(
    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-pocket-model.h
================================================
// sherpa-onnx/csrc/offline-tts-pocket-model.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_MODEL_H_

#include <memory>
#include <tuple>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-model-config.h"

namespace sherpa_onnx {

struct PocketLmMainState {
  std::vector<Ort::Value> values;
};

struct PocketMimiDecoderState {
  std::vector<Ort::Value> values;
};

// Please refer to
// https://huggingface.co/KevinAHM/pocket-tts-onnx/blob/main/pocket_tts_onnx.py
class OfflineTtsPocketModel {
 public:
  explicit OfflineTtsPocketModel(const OfflineTtsModelConfig &config);

  template <typename Manager>
  OfflineTtsPocketModel(Manager *mgr, const OfflineTtsModelConfig &config);

  ~OfflineTtsPocketModel();

  PocketLmMainState GetLmMainInitState() const;
  PocketMimiDecoderState GetMimiDecoderInitState() const;

  /**
   * @param audio should be of 24000Hz. Its shape is (1, 1, num_samples)
   * @returns a float32 tensor of shape (1, num_frames, 1024)
   */
  Ort::Value RunMimiEncoder(Ort::Value audio) const;

  /**
   * @param text_tokens (1, num_tokens) of shape int64
   * @return float32 tensor of shape (1, num_tokens, 1024)
   */
  Ort::Value RunTextConditioner(Ort::Value text_tokens) const;

  Ort::Value RunLmFlow(Ort::Value c, Ort::Value s, Ort::Value t,
                       Ort::Value x) const;

  std::tuple<Ort::Value, Ort::Value, PocketLmMainState> RunLmMain(
      Ort::Value seq, Ort::Value embeddings, PocketLmMainState state) const;

  std::pair<Ort::Value, PocketMimiDecoderState> RunMimiDecoder(
      Ort::Value latent, PocketMimiDecoderState state) const;

  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_POCKET_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-supertonic-impl.cc
================================================
// sherpa-onnx/csrc/offline-tts-supertonic-impl.cc
//
// Copyright (c)  2026 zengyw
//
// This file is based on Supertonic TTS
// (https://github.com/Supertone-Inc/supertonic) which is licensed under MIT
// License (Copyright (c) 2025 Supertone Inc.)

#include "sherpa-onnx/csrc/offline-tts-supertonic-impl.h"

#include <algorithm>
#include <array>
#include <cinttypes>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <limits>
#include <numeric>
#include <random>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/normal-data-generator.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {
namespace {

// Minimum duration (in seconds) to prevent zero-length audio
constexpr float kMinDuration = 0.1f;

// Maximum latent length to prevent excessive memory allocation and OOM.
constexpr int32_t kMaxLatentLen = 10000;

constexpr std::array<std::string_view, 5> kSupertonicAvailableLangs = {
    "en", "ko", "es", "pt", "fr",
};

void GetLatentMaskFlat(const std::vector<int64_t> &wav_lengths,
                       int32_t base_chunk_size, int32_t chunk_compress_factor,
                       std::vector<float> *mask_flat,
                       std::vector<int64_t> *mask_shape) {
  const int32_t bsz = static_cast<int32_t>(wav_lengths.size());
  int32_t wav_chunk_size = base_chunk_size * chunk_compress_factor;
  std::vector<int64_t> latent_lengths;
  latent_lengths.reserve(bsz);
  for (auto len : wav_lengths) {
    latent_lengths.push_back((len + wav_chunk_size - 1) / wav_chunk_size);
  }
  LengthsToMask(latent_lengths, mask_flat, mask_shape);
}

SupertonicStyle ParseVoiceStyleFromBinary(const std::vector<char> &buf) {
  constexpr size_t kHeaderSize = 6 * sizeof(int64_t);
  constexpr size_t kMaxPayloadBytes = 64 * 1024 * 1024;

  if (buf.size() < kHeaderSize) {
    SHERPA_ONNX_LOGE(
        "Invalid voice style .bin: file too small (got %zu bytes, need %zu "
        "header)",
        buf.size(), kHeaderSize);
    SHERPA_ONNX_EXIT(-1);
  }
  int64_t dims[6];
  std::memcpy(dims, buf.data(), kHeaderSize);
  for (int i = 0; i < 6; ++i) {
    if (dims[i] <= 0) {
      SHERPA_ONNX_LOGE("Invalid voice style .bin: dims[%d]=%" PRId64 " <= 0", i,
                       dims[i]);
      SHERPA_ONNX_EXIT(-1);
    }
  }

  auto mul3 = [](int64_t a, int64_t b, int64_t c, const char *name) -> size_t {
    constexpr int64_t kMax = std::numeric_limits<int64_t>::max();
    if (a <= 0 || b <= 0 || c <= 0 || a > kMax / b) {
      SHERPA_ONNX_LOGE("Invalid voice style .bin: %s dims overflow", name);
      SHERPA_ONNX_EXIT(-1);
    }
    int64_t ab = a * b;
    if (ab > kMax / c) {
      SHERPA_ONNX_LOGE("Invalid voice style .bin: %s dims overflow", name);
      SHERPA_ONNX_EXIT(-1);
    }
    return static_cast<size_t>(ab * c);
  };
  size_t ttl_elems = mul3(dims[0], dims[1], dims[2], "ttl");
  size_t dp_elems = mul3(dims[3], dims[4], dims[5], "dp");

  size_t ttl_bytes = ttl_elems * sizeof(float);
  size_t dp_bytes = dp_elems * sizeof(float);
  if (ttl_bytes / sizeof(float) != ttl_elems ||
      dp_bytes / sizeof(float) != dp_elems) {
    SHERPA_ONNX_LOGE("Invalid voice style .bin: byte size overflow");
    SHERPA_ONNX_EXIT(-1);
  }
  size_t payload_bytes = ttl_bytes + dp_bytes;
  if (payload_bytes < ttl_bytes || payload_bytes < dp_bytes) {
    SHERPA_ONNX_LOGE("Invalid voice style .bin: payload size overflow");
    SHERPA_ONNX_EXIT(-1);
  }
  if (payload_bytes > kMaxPayloadBytes) {
    SHERPA_ONNX_LOGE(
        "Invalid voice style .bin: payload too large (%zu bytes, max %zu)",
        payload_bytes, kMaxPayloadBytes);
    SHERPA_ONNX_EXIT(-1);
  }
  size_t expected_total = kHeaderSize + payload_bytes;
  if (expected_total < kHeaderSize) {
    SHERPA_ONNX_LOGE("Invalid voice style .bin: total size overflow");
    SHERPA_ONNX_EXIT(-1);
  }
  if (buf.size() != expected_total) {
    SHERPA_ONNX_LOGE(
        "Invalid voice style .bin: size mismatch (got %zu bytes, expected "
        "exactly %zu)",
        buf.size(), expected_total);
    SHERPA_ONNX_EXIT(-1);
  }

  std::vector<int64_t> ttl_shape = {dims[0], dims[1], dims[2]};
  std::vector<int64_t> dp_shape = {dims[3], dims[4], dims[5]};
  std::vector<float> ttl_data(ttl_elems);
  std::memcpy(ttl_data.data(), buf.data() + kHeaderSize, ttl_bytes);
  std::vector<float> dp_data(dp_elems);
  std::memcpy(dp_data.data(), buf.data() + kHeaderSize + ttl_bytes, dp_bytes);

  SupertonicStyle style;
  style.ttl_data = std::move(ttl_data);
  style.dp_data = std::move(dp_data);
  style.ttl_shape = std::move(ttl_shape);
  style.dp_shape = std::move(dp_shape);
  return style;
}
}  // namespace

OfflineTtsSupertonicImpl::OfflineTtsSupertonicImpl(
    const OfflineTtsConfig &config)
    : config_(config),
      model_(std::make_unique<OfflineTtsSupertonicModel>(config.model)),
      text_processor_(std::make_unique<SupertonicUnicodeProcessor>(
          config.model.supertonic.unicode_indexer)),
      memory_info_(
          Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)) {
  std::vector<char> buf = ReadFile(config.model.supertonic.voice_style);
  if (buf.empty()) {
    SHERPA_ONNX_LOGE("Failed to read voice style file: %s",
                     config.model.supertonic.voice_style.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  InitVoiceStyle(buf);
}

template <typename Manager>
OfflineTtsSupertonicImpl::OfflineTtsSupertonicImpl(
    Manager *mgr, const OfflineTtsConfig &config)
    : config_(config),
      model_(std::make_unique<OfflineTtsSupertonicModel>(mgr, config.model)),
      text_processor_(std::make_unique<SupertonicUnicodeProcessor>(
          mgr, config.model.supertonic.unicode_indexer)),
      memory_info_(
          Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)) {
  std::vector<char> buf = ReadFile(mgr, config.model.supertonic.voice_style);
  if (buf.empty()) {
    SHERPA_ONNX_LOGE("Failed to read voice style file: %s",
                     config.model.supertonic.voice_style.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  InitVoiceStyle(buf);
}

int32_t OfflineTtsSupertonicImpl::SampleRate() const {
  return model_->GetSampleRate();
}

GeneratedAudio OfflineTtsSupertonicImpl::Generate(
    const std::string &text, int64_t sid, float speed,
    GeneratedAudioCallback callback) const {
  GenerationConfig config;
  config.sid = sid;
  config.speed = speed;
  return Generate(text, config, callback);
}

GeneratedAudio OfflineTtsSupertonicImpl::Generate(
    const std::string &text, const GenerationConfig &config,
    GeneratedAudioCallback callback) const {
  // Supported extra options in config.extra:
  //   - "speed" (float): Speech speed factor (default: 1.05)
  //   - "num_steps" (int): Number of denoising steps (default: 5)
  //   - "lang" (string): Language code, e.g. "en", "ko" (default: "en")
  //   - sid selects speaker from voice.bin (0 .. NumSpeakers()-1).
  //   - "max_len" (int): Max chunk length. Default: 300 (non-Korean), 120 (ko).
  //   - "silence_duration" (float): Silence in seconds between chunks (default:
  //   0.3)
  //   - "seed" (int): RNG seed for reproducibility. -1 = random (default).

  if (config_.model.debug) {
    SHERPA_ONNX_LOGE("%s", config.ToString().c_str());
  }
  int32_t seed = config.GetExtraInt("seed", -1);
  float speed =
      config.GetExtraFloat("speed", config.speed > 0 ? config.speed : 1.05f);
  int32_t num_steps = config.GetExtraInt(
      "num_steps", config.num_steps > 0 ? config.num_steps : 5);
  if (speed <= 0) {
    SHERPA_ONNX_LOGE("Speed must be > 0. Given: %f", speed);
    return {};
  }
  if (num_steps <= 0) {
    SHERPA_ONNX_LOGE("Num steps must be > 0. Given: %d", num_steps);
    return {};
  }
  std::string text_single = Trim(text);
  if (text_single.empty()) {
    return {};
  }

  int64_t sid = config.sid;
  if (sid >= num_speakers_ || sid < 0) {
    SHERPA_ONNX_LOGE(
        "Model has %d speaker(s). sid must be in [0, %d]. Given sid=%d, "
        "using 0",
        num_speakers_, num_speakers_ - 1, static_cast<int32_t>(sid));
    sid = 0;
  }

  std::string lang = config.GetExtraString("lang", "en");
  bool lang_ok = std::any_of(kSupertonicAvailableLangs.begin(),
                             kSupertonicAvailableLangs.end(),
                             [&](std::string_view s) { return s == lang; });
  if (!lang_ok) {
    SHERPA_ONNX_LOGE("Invalid language: %s. Available: en, ko, es, pt, fr",
                     lang.c_str());
    return {};
  }

  float silence_duration = config.GetExtraFloat("silence_duration", 0.3f);
  size_t max_len =
      (lang == "ko") ? static_cast<size_t>(config.GetExtraInt("max_len", 120))
                     : static_cast<size_t>(config.GetExtraInt("max_len", 300));
  if (max_len == 0) {
    SHERPA_ONNX_LOGE("Max length must be > 0. Given: %zu", max_len);
    return {};
  }
  auto text_chunks = ChunkText(text_single, max_len);
  return ProcessChunksAndConcatenate(text_chunks, lang, sid, num_steps, speed,
                                     silence_duration, seed, callback);
}

GeneratedAudio OfflineTtsSupertonicImpl::Process(
    const std::string &text, const std::string &lang, int64_t sid,
    int32_t num_steps, float speed, NormalDataGenerator &gen) const {
  const auto &cfg = model_->GetConfig();
  StyleSliceView slice = GetStyleSliceForSid(sid);
  const int32_t bsz = 1;

  std::vector<int64_t> text_ids;
  std::vector<float> text_mask_flat;
  std::vector<int64_t> text_mask_shape;
  text_processor_->Process(text, lang, &text_ids, &text_mask_flat,
                           &text_mask_shape);
  if (text_ids.empty() || text_mask_flat.empty()) {
    SHERPA_ONNX_LOGE(
        "Text processing failed: empty text_ids or text_mask. Text: \"%s\"",
        text.c_str());
    return {};
  }
  if (text_mask_shape.size() != 3) {
    SHERPA_ONNX_LOGE(
        "Invalid text_mask_shape size: %zu (expected 3). Text: \"%s\"",
        text_mask_shape.size(), text.c_str());
    return {};
  }
  int64_t text_seq_len = static_cast<int64_t>(text_ids.size());
  int64_t text_mask_len = text_mask_shape[2];
  if (text_seq_len != text_mask_len) {
    SHERPA_ONNX_LOGE("Text sequence length mismatch: text_ids=%" PRId64
                     ", text_mask=%" PRId64 ". Text: \"%s\"",
                     text_seq_len, text_mask_len, text.c_str());
    return {};
  }

  std::vector<int64_t> text_ids_shape = {1, text_seq_len};

  Ort::Value text_ids_tensor = Ort::Value::CreateTensor<int64_t>(
      memory_info_, text_ids.data(), text_ids.size(), text_ids_shape.data(),
      text_ids_shape.size());
  Ort::Value style_dp_tensor = Ort::Value::CreateTensor<float>(
      memory_info_, const_cast<float *>(slice.dp_data), slice.dp_size,
      slice.dp_shape.data(), slice.dp_shape.size());
  Ort::Value text_mask_tensor = Ort::Value::CreateTensor<float>(
      memory_info_, text_mask_flat.data(), text_mask_flat.size(),
      text_mask_shape.data(), text_mask_shape.size());
  Ort::Value dp_output = model_->RunDurationPredictor(
      std::move(text_ids_tensor), std::move(style_dp_tensor),
      std::move(text_mask_tensor));
  auto dp_output_info = dp_output.GetTensorTypeAndShapeInfo();
  size_t dp_element_count = dp_output_info.GetElementCount();
  if (dp_element_count != 1) {
    SHERPA_ONNX_LOGE(
        "Duration predictor output size mismatch: expected 1, got %zu. Text: "
        "\"%s\"",
        dp_element_count, text.c_str());
    return {};
  }
  auto *dur_data = dp_output.GetTensorMutableData<float>();
  std::vector<float> duration(dur_data, dur_data + 1);
  if (speed != 1.0f) {
    for (auto &dur : duration) {
      dur /= speed;
      if (dur < kMinDuration) {
        dur = kMinDuration;
      }
    }
  }

  Ort::Value text_enc_output = model_->RunTextEncoder(
      Ort::Value::CreateTensor<int64_t>(memory_info_, text_ids.data(),
                                        text_ids.size(), text_ids_shape.data(),
                                        text_ids_shape.size()),
      Ort::Value::CreateTensor<float>(
          memory_info_, const_cast<float *>(slice.ttl_data), slice.ttl_size,
          slice.ttl_shape.data(), slice.ttl_shape.size()),
      Ort::Value::CreateTensor<float>(
          memory_info_, text_mask_flat.data(), text_mask_flat.size(),
          text_mask_shape.data(), text_mask_shape.size()));
  auto text_emb_info = text_enc_output.GetTensorTypeAndShapeInfo();
  size_t text_emb_size = text_emb_info.GetElementCount();
  if (text_emb_size == 0) {
    SHERPA_ONNX_LOGE("Text encoder output is empty. Text: \"%s\"",
                     text.c_str());
    return {};
  }
  auto *text_emb_data = text_enc_output.GetTensorMutableData<float>();
  auto text_emb_shape = text_emb_info.GetShape();

  float wav_len_max =
      *std::max_element(duration.begin(), duration.end()) * cfg.ae.sample_rate;
  std::vector<int64_t> wav_lengths;
  wav_lengths.reserve(bsz);
  for (float d : duration) {
    int64_t wav_len = static_cast<int64_t>(d * cfg.ae.sample_rate);
    if (wav_len < 1) {
      wav_len = 1;
    }
    wav_lengths.push_back(wav_len);
  }
  int32_t chunk_size = cfg.ae.base_chunk_size * cfg.ttl.chunk_compress_factor;
  int32_t latent_len =
      static_cast<int32_t>((wav_len_max + chunk_size - 1) / chunk_size);
  if (latent_len > kMaxLatentLen) {
    SHERPA_ONNX_LOGE(
        "Latent length (%d) exceeds maximum (%d), capping to prevent OOM",
        latent_len, kMaxLatentLen);
    latent_len = kMaxLatentLen;
  }

  int32_t latent_dim = cfg.ttl.latent_dim * cfg.ttl.chunk_compress_factor;
  size_t latent_total_size = static_cast<size_t>(bsz) *
                             static_cast<size_t>(latent_dim) *
                             static_cast<size_t>(latent_len);
  if (latent_total_size / static_cast<size_t>(bsz) /
          static_cast<size_t>(latent_dim) !=
      static_cast<size_t>(latent_len)) {
    SHERPA_ONNX_LOGE(
        "Latent total size overflow: bsz=%d, latent_dim=%d, latent_len=%d. "
        "Text: \"%s\"",
        bsz, latent_dim, latent_len, text.c_str());
    return {};
  }

  std::vector<float> xt_flat(latent_total_size);

  gen.Fill(xt_flat.data(), xt_flat.size());

  std::vector<float> latent_mask_flat;
  std::vector<int64_t> latent_mask_shape;
  GetLatentMaskFlat(wav_lengths, cfg.ae.base_chunk_size,
                    cfg.ttl.chunk_compress_factor, &latent_mask_flat,
                    &latent_mask_shape);
  int64_t latent_mask_len = latent_mask_shape[2];
  if (latent_mask_len != latent_len) {
    SHERPA_ONNX_LOGE("Latent mask length mismatch: expected %d, got %" PRId64
                     ". Text: \"%s\"",
                     latent_len, latent_mask_len, text.c_str());
    return {};
  }
  for (int32_t b = 0; b < bsz; ++b) {
    const float *mask_batch = latent_mask_flat.data() + b * latent_mask_len;
    float *xt_batch = xt_flat.data() + b * latent_dim * latent_len;
    for (int32_t d = 0; d < latent_dim; ++d) {
      float *xt_dim = xt_batch + d * latent_len;
      for (int32_t t = 0; t < latent_len; ++t) {
        xt_dim[t] *= mask_batch[t];
      }
    }
  }

  std::vector<int64_t> latent_shape = {bsz, latent_dim, latent_len};
  std::vector<float> total_step_vec(bsz, static_cast<float>(num_steps));
  std::array<int64_t, 1> step_shape = {bsz};

  // Constant inputs: create once outside loop, keep text_enc_output alive.
  Ort::Value text_emb_const = Ort::Value::CreateTensor<float>(
      memory_info_, text_emb_data, text_emb_size, text_emb_shape.data(),
      text_emb_shape.size());
  Ort::Value style_ttl_const = Ort::Value::CreateTensor<float>(
      memory_info_, const_cast<float *>(slice.ttl_data), slice.ttl_size,
      slice.ttl_shape.data(), slice.ttl_shape.size());
  Ort::Value text_mask_const = Ort::Value::CreateTensor<float>(
      memory_info_, text_mask_flat.data(), text_mask_flat.size(),
      text_mask_shape.data(), text_mask_shape.size());
  Ort::Value latent_mask_const = Ort::Value::CreateTensor<float>(
      memory_info_, latent_mask_flat.data(), latent_mask_flat.size(),
      latent_mask_shape.data(), latent_mask_shape.size());
  Ort::Value total_step_const = Ort::Value::CreateTensor<float>(
      memory_info_, total_step_vec.data(), total_step_vec.size(),
      step_shape.data(), step_shape.size());

  float current_step = 0.f;
  for (int32_t step = 0; step < num_steps; step++) {
    current_step = static_cast<float>(step);
    Ort::Value noisy_latent_tensor = Ort::Value::CreateTensor<float>(
        memory_info_, xt_flat.data(), xt_flat.size(), latent_shape.data(),
        latent_shape.size());
    Ort::Value current_step_tensor = Ort::Value::CreateTensor<float>(
        memory_info_, &current_step, 1, step_shape.data(), step_shape.size());

    Ort::Value vector_est_output = model_->RunVectorEstimator(
        std::move(noisy_latent_tensor), std::move(current_step_tensor),
        text_emb_const, style_ttl_const, latent_mask_const, text_mask_const,
        total_step_const);
    auto vector_est_output_info = vector_est_output.GetTensorTypeAndShapeInfo();
    size_t denoised_size = vector_est_output_info.GetElementCount();
    if (denoised_size != latent_total_size) {
      SHERPA_ONNX_LOGE(
          "Denoised latent size mismatch at step %d: expected %zu, got %zu. "
          "Text: \"%s\"",
          step, latent_total_size, denoised_size, text.c_str());
      return {};
    }
    auto *denoised_data = vector_est_output.GetTensorMutableData<float>();
    std::memcpy(xt_flat.data(), denoised_data,
                latent_total_size * sizeof(float));
  }

  Ort::Value latent_tensor = Ort::Value::CreateTensor<float>(
      memory_info_, xt_flat.data(), xt_flat.size(), latent_shape.data(),
      latent_shape.size());
  Ort::Value vocoder_output = model_->RunVocoder(std::move(latent_tensor));
  auto wav_info = vocoder_output.GetTensorTypeAndShapeInfo();
  auto wav_shape = wav_info.GetShape();
  size_t wav_size = wav_info.GetElementCount();
  if (wav_size == 0) {
    SHERPA_ONNX_LOGE("Vocoder output is empty. Text: \"%s\"", text.c_str());
    return {};
  }

  auto *wav_data = vocoder_output.GetTensorMutableData<float>();
  if (config_.model.debug) {
    std::ostringstream os;
    os << "Vocoder output shape: [";
    for (size_t i = 0; i < wav_shape.size(); ++i) {
      if (i > 0) os << ", ";
      os << wav_shape[i];
    }
    os << "], total elements: " << wav_size << ", bsz: " << bsz;
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }

  GeneratedAudio result;
  if ((wav_shape.size() == 2 && wav_shape[0] == bsz) ||
      (wav_shape.size() == 3 && wav_shape[0] == bsz && wav_shape[1] == 1)) {
    int64_t samples_per_batch =
        (wav_shape.size() == 2) ? wav_shape[1] : wav_shape[2];
    result.samples.reserve(static_cast<size_t>(std::accumulate(
        wav_lengths.begin(), wav_lengths.end(), static_cast<int64_t>(0))));
    for (int32_t b = 0; b < bsz; ++b) {
      int64_t actual_len = wav_lengths[b];
      if (actual_len > samples_per_batch) {
        actual_len = samples_per_batch;
      }
      const float *batch_wav = wav_data + b * samples_per_batch;
      result.samples.insert(result.samples.end(), batch_wav,
                            batch_wav + actual_len);
    }
  } else if (wav_shape.size() == 1 ||
             (wav_shape.size() == 2 && wav_shape[0] == 1)) {
    result.samples.assign(wav_data, wav_data + wav_size);
  } else {
    std::ostringstream os;
    os << "Unexpected vocoder output shape: [";
    for (size_t i = 0; i < wav_shape.size(); ++i) {
      if (i > 0) os << ", ";
      os << wav_shape[i];
    }
    os << "], bsz=" << bsz << ", using all samples";
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
    result.samples.assign(wav_data, wav_data + wav_size);
  }
  if (config_.model.debug && !result.samples.empty()) {
    float max_abs = 0.f;
    float min_abs = std::abs(result.samples[0]);
    for (float x : result.samples) {
      float ax = std::abs(x);
      max_abs = std::max(max_abs, ax);
      min_abs = std::min(min_abs, ax);
    }
    SHERPA_ONNX_LOGE("Audio samples: %zu, min_abs=%.6f, max_abs=%.6f",
                     result.samples.size(), min_abs, max_abs);
  }
  result.sample_rate = cfg.ae.sample_rate;
  return result;
}

GeneratedAudio OfflineTtsSupertonicImpl::ProcessChunksAndConcatenate(
    const std::vector<std::string> &text_chunks, const std::string &lang,
    int64_t sid, int32_t num_steps, float speed, float silence_duration,
    int32_t seed, GeneratedAudioCallback callback) const {
  NormalDataGenerator gen(0, 1, seed);
  GeneratedAudio result;
  std::vector<std::vector<float>> chunk_samples;
  chunk_samples.reserve(text_chunks.size());
  int32_t num_chunks = static_cast<int32_t>(text_chunks.size());
  for (int32_t i = 0; i < num_chunks; ++i) {
    auto chunk_result =
        Process(text_chunks[i], lang, sid, num_steps, speed, gen);
    if (chunk_result.samples.empty()) {
      continue;
    }
    if (callback) {
      float progress =
          static_cast<float>(i + 1) / static_cast<float>(num_chunks);
      callback(chunk_result.samples.data(), chunk_result.samples.size(),
               progress);
    }
    chunk_samples.push_back(std::move(chunk_result.samples));
  }

  if (chunk_samples.empty()) {
    result.sample_rate = model_->GetSampleRate();
    return result;
  }

  int32_t sample_rate = model_->GetSampleRate();
  size_t silence_len =
      static_cast<size_t>(silence_duration * static_cast<float>(sample_rate));
  size_t total = 0;
  for (const auto &s : chunk_samples) {
    total += s.size();
  }
  if (chunk_samples.size() > 1) {
    total += (chunk_samples.size() - 1) * silence_len;
  }

  std::vector<float> wav_cat;
  wav_cat.reserve(total);
  for (size_t i = 0; i < chunk_samples.size(); ++i) {
    if (i > 0) {
      wav_cat.insert(wav_cat.end(), silence_len, 0.f);
    }
    wav_cat.insert(wav_cat.end(), chunk_samples[i].begin(),
                   chunk_samples[i].end());
  }
  result.samples = std::move(wav_cat);
  result.sample_rate = sample_rate;
  return result;
}

void OfflineTtsSupertonicImpl::InitVoiceStyle(const std::vector<char> &buf) {
  SupertonicStyle style = ParseVoiceStyleFromBinary(buf);
  if (style.ttl_shape.size() != 3 || style.dp_shape.size() != 3) {
    SHERPA_ONNX_LOGE(
        "Invalid voice style: ttl_shape or dp_shape must have 3 dimensions");
    SHERPA_ONNX_EXIT(-1);
  }
  int32_t num_speakers = static_cast<int32_t>(style.ttl_shape[0]);
  if (num_speakers <= 0) {
    SHERPA_ONNX_LOGE(
        "Invalid voice style: num_speakers must be >= 1. Given: %d",
        num_speakers);
    SHERPA_ONNX_EXIT(-1);
  }
  if (style.ttl_shape[0] != style.dp_shape[0]) {
    SHERPA_ONNX_LOGE(
        "Invalid voice style: ttl_shape[0] != dp_shape[0]. Given: %d != %d",
        static_cast<int32_t>(style.ttl_shape[0]),
        static_cast<int32_t>(style.dp_shape[0]));
    SHERPA_ONNX_EXIT(-1);
  }
  num_speakers_ = num_speakers;
  full_style_ = std::move(style);

  if (config_.model.debug) {
    SHERPA_ONNX_LOGE("Number of speakers: %d", num_speakers_);
  }
}

OfflineTtsSupertonicImpl::StyleSliceView
OfflineTtsSupertonicImpl::GetStyleSliceForSid(int64_t sid) const {
  StyleSliceView out;
  int32_t s = 0;
  if (num_speakers_ != 1) {
    int64_t hi = static_cast<int64_t>(num_speakers_ - 1);
    int64_t clamped = std::clamp<int64_t>(sid, 0, hi);
    s = static_cast<int32_t>(clamped);
  }
  const SupertonicStyle &full = full_style_;
  out.ttl_shape = {1, full.ttl_shape[1], full.ttl_shape[2]};
  out.dp_shape = {1, full.dp_shape[1], full.dp_shape[2]};
  size_t ttl_slice = static_cast<size_t>(out.ttl_shape[1] * out.ttl_shape[2]);
  size_t dp_slice = static_cast<size_t>(out.dp_shape[1] * out.dp_shape[2]);
  out.ttl_size = ttl_slice;
  out.dp_size = dp_slice;
  out.ttl_data = full.ttl_data.data() + static_cast<size_t>(s) * ttl_slice;
  out.dp_data = full.dp_data.data() + static_cast<size_t>(s) * dp_slice;
  return out;
}

#if __ANDROID_API__ >= 9
template OfflineTtsSupertonicImpl::OfflineTtsSupertonicImpl(
    AAssetManager *mgr, const OfflineTtsConfig &config);
#endif

#if __OHOS__
template OfflineTtsSupertonicImpl::OfflineTtsSupertonicImpl(
    NativeResourceManager *mgr, const OfflineTtsConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-supertonic-impl.h
================================================
// sherpa-onnx/csrc/offline-tts-supertonic-impl.h
//
// Copyright (c)  2026 zengyw
//
// This file is based on Supertonic TTS
// (https://github.com/Supertone-Inc/supertonic) which is licensed under MIT
// License (Copyright (c) 2025 Supertone Inc.)

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_IMPL_H_

#include <array>
#include <memory>
#include <random>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/normal-data-generator.h"
#include "sherpa-onnx/csrc/offline-tts-impl.h"
#include "sherpa-onnx/csrc/offline-tts-supertonic-model.h"
#include "sherpa-onnx/csrc/offline-tts-supertonic-unicode-processor.h"

namespace sherpa_onnx {

class OfflineTtsSupertonicImpl : public OfflineTtsImpl {
 public:
  explicit OfflineTtsSupertonicImpl(const OfflineTtsConfig &config);

  template <typename Manager>
  OfflineTtsSupertonicImpl(Manager *mgr, const OfflineTtsConfig &config);

  int32_t SampleRate() const override;

  int32_t NumSpeakers() const override { return num_speakers_; }

  [[deprecated("Use Generate(text, GenerationConfig, callback) instead")]]
  GeneratedAudio Generate(
      const std::string &text, int64_t sid = 0, float speed = 1.0,
      GeneratedAudioCallback callback = nullptr) const override;

  GeneratedAudio Generate(
      const std::string &text, const GenerationConfig &config,
      GeneratedAudioCallback callback = nullptr) const override;

 private:
  GeneratedAudio Process(const std::string &text, const std::string &lang,
                         int64_t sid, int32_t num_steps, float speed,
                         NormalDataGenerator &gen) const;

  GeneratedAudio ProcessChunksAndConcatenate(
      const std::vector<std::string> &text_chunks, const std::string &lang,
      int64_t sid, int32_t num_steps, float speed, float silence_duration,
      int32_t seed, GeneratedAudioCallback callback) const;

  void InitVoiceStyle(const std::vector<char> &buf);

  struct StyleSliceView {
    const float *ttl_data;
    size_t ttl_size;
    std::array<int64_t, 3> ttl_shape;
    const float *dp_data;
    size_t dp_size;
    std::array<int64_t, 3> dp_shape;
  };
  StyleSliceView GetStyleSliceForSid(int64_t sid) const;

  OfflineTtsConfig config_;
  std::unique_ptr<OfflineTtsSupertonicModel> model_;
  std::unique_ptr<SupertonicUnicodeProcessor> text_processor_;
  int32_t num_speakers_ = 0;
  SupertonicStyle full_style_;  // shape [num_speakers_, ...]
  Ort::MemoryInfo memory_info_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-supertonic-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tts-supertonic-model-config.cc
//
// Copyright (c)  2026 zengyw

#include "sherpa-onnx/csrc/offline-tts-supertonic-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineTtsSupertonicModelConfig::Register(ParseOptions *po) {
  po->Register("supertonic-duration-predictor", &duration_predictor,
               "Path to duration_predictor.onnx for Supertonic TTS");
  po->Register("supertonic-text-encoder", &text_encoder,
               "Path to text_encoder.onnx for Supertonic TTS");
  po->Register("supertonic-vector-estimator", &vector_estimator,
               "Path to vector_estimator.onnx for Supertonic TTS");
  po->Register("supertonic-vocoder", &vocoder,
               "Path to vocoder.onnx for Supertonic TTS");
  po->Register("supertonic-tts-json", &tts_json,
               "Path to tts.json for Supertonic TTS");
  po->Register("supertonic-unicode-indexer", &unicode_indexer,
               "Path to unicode_indexer.bin for Supertonic TTS");
  po->Register("supertonic-voice-style", &voice_style,
               "Path to Supertonic voice.bin (use sid 0..NumSpeakers()-1 to "
               "select)");
}

bool OfflineTtsSupertonicModelConfig::Validate() const {
  if (duration_predictor.empty()) {
    SHERPA_ONNX_LOGE("Please provide --supertonic-duration-predictor");
    return false;
  }
  if (!FileExists(duration_predictor)) {
    SHERPA_ONNX_LOGE("--supertonic-duration-predictor '%s' does not exist",
                     duration_predictor.c_str());
    return false;
  }

  if (text_encoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --supertonic-text-encoder");
    return false;
  }
  if (!FileExists(text_encoder)) {
    SHERPA_ONNX_LOGE("--supertonic-text-encoder '%s' does not exist",
                     text_encoder.c_str());
    return false;
  }

  if (vector_estimator.empty()) {
    SHERPA_ONNX_LOGE("Please provide --supertonic-vector-estimator");
    return false;
  }
  if (!FileExists(vector_estimator)) {
    SHERPA_ONNX_LOGE("--supertonic-vector-estimator '%s' does not exist",
                     vector_estimator.c_str());
    return false;
  }

  if (vocoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --supertonic-vocoder");
    return false;
  }
  if (!FileExists(vocoder)) {
    SHERPA_ONNX_LOGE("--supertonic-vocoder '%s' does not exist",
                     vocoder.c_str());
    return false;
  }

  if (tts_json.empty()) {
    SHERPA_ONNX_LOGE("Please provide --supertonic-tts-json");
    return false;
  }
  if (!FileExists(tts_json)) {
    SHERPA_ONNX_LOGE("--supertonic-tts-json '%s' does not exist",
                     tts_json.c_str());
    return false;
  }

  if (unicode_indexer.empty()) {
    SHERPA_ONNX_LOGE("Please provide --supertonic-unicode-indexer");
    return false;
  }
  if (!FileExists(unicode_indexer)) {
    SHERPA_ONNX_LOGE("--supertonic-unicode-indexer '%s' does not exist",
                     unicode_indexer.c_str());
    return false;
  }

  if (voice_style.empty()) {
    SHERPA_ONNX_LOGE("Please provide --supertonic-voice-style");
    return false;
  }
  if (!FileExists(voice_style)) {
    SHERPA_ONNX_LOGE("--supertonic-voice-style '%s' does not exist",
                     voice_style.c_str());
    return false;
  }
  return true;
}

std::string OfflineTtsSupertonicModelConfig::ToString() const {
  std::ostringstream os;
  os << "OfflineTtsSupertonicModelConfig(";
  os << "duration_predictor=\"" << duration_predictor << "\", ";
  os << "text_encoder=\"" << text_encoder << "\", ";
  os << "vector_estimator=\"" << vector_estimator << "\", ";
  os << "vocoder=\"" << vocoder << "\", ";
  os << "tts_json=\"" << tts_json << "\", ";
  os << "unicode_indexer=\"" << unicode_indexer << "\", ";
  os << "voice_style=\"" << voice_style << "\")";
  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-supertonic-model-config.h
================================================
// sherpa-onnx/csrc/offline-tts-supertonic-model-config.h
//
// Copyright (c)  2026 zengyw

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsSupertonicModelConfig {
  // Individual model file paths
  std::string duration_predictor;
  std::string text_encoder;
  std::string vector_estimator;
  std::string vocoder;

  // Path to tts.json (TTS config: ae.sample_rate, ae.base_chunk_size, etc.)
  std::string tts_json;

  // Path to unicode_indexer.bin (raw int32 array)
  std::string unicode_indexer;

  // Path to voice.bin
  std::string voice_style;

  OfflineTtsSupertonicModelConfig() = default;

  OfflineTtsSupertonicModelConfig(const std::string &duration_predictor,
                                  const std::string &text_encoder,
                                  const std::string &vector_estimator,
                                  const std::string &vocoder,
                                  const std::string &tts_json,
                                  const std::string &unicode_indexer,
                                  const std::string &voice_style)
      : duration_predictor(duration_predictor),
        text_encoder(text_encoder),
        vector_estimator(vector_estimator),
        vocoder(vocoder),
        tts_json(tts_json),
        unicode_indexer(unicode_indexer),
        voice_style(voice_style) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-supertonic-model.cc
================================================
// sherpa-onnx/csrc/offline-tts-supertonic-model.cc
//
// Copyright (c)  2026 zengyw
//
// This file is based on Supertonic TTS
// (https://github.com/Supertone-Inc/supertonic) which is licensed under MIT
// License (Copyright (c) 2025 Supertone Inc.)

#include "sherpa-onnx/csrc/offline-tts-supertonic-model.h"

#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "nlohmann/json.hpp"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

using json = nlohmann::json;

class OfflineTtsSupertonicModel::Impl {
 public:
  explicit Impl(const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)) {
    Init();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)) {
    Init(mgr);
  }

  const SupertonicConfig &GetConfig() const { return cfg_; }
  int32_t GetSampleRate() const { return cfg_.ae.sample_rate; }

  Ort::Value RunDurationPredictor(Ort::Value text_ids, Ort::Value style_dp,
                                  Ort::Value text_mask) const {
    std::vector<Ort::Value> inputs;
    inputs.push_back(std::move(text_ids));
    inputs.push_back(std::move(style_dp));
    inputs.push_back(std::move(text_mask));
    auto outputs =
        dp_sess_->Run(Ort::RunOptions{nullptr}, dp_input_names_ptr_.data(),
                      inputs.data(), inputs.size(), dp_output_names_ptr_.data(),
                      dp_output_names_ptr_.size());
    return std::move(outputs[0]);
  }

  Ort::Value RunTextEncoder(Ort::Value text_ids, Ort::Value style_ttl,
                            Ort::Value text_mask) const {
    std::vector<Ort::Value> inputs;
    inputs.push_back(std::move(text_ids));
    inputs.push_back(std::move(style_ttl));
    inputs.push_back(std::move(text_mask));
    auto outputs = text_enc_sess_->Run(
        Ort::RunOptions{nullptr}, text_enc_input_names_ptr_.data(),
        inputs.data(), inputs.size(), text_enc_output_names_ptr_.data(),
        text_enc_output_names_ptr_.size());
    return std::move(outputs[0]);
  }

  Ort::Value RunVectorEstimator(Ort::Value noisy_latent,
                                Ort::Value current_step, Ort::Value &text_emb,
                                Ort::Value &style_ttl, Ort::Value &latent_mask,
                                Ort::Value &text_mask,
                                Ort::Value &total_step) const {
    std::vector<Ort::Value> inputs;
    inputs.push_back(std::move(noisy_latent));
    inputs.push_back(View(&text_emb));
    inputs.push_back(View(&style_ttl));
    inputs.push_back(View(&latent_mask));
    inputs.push_back(View(&text_mask));
    inputs.push_back(std::move(current_step));
    inputs.push_back(View(&total_step));
    auto outputs = vector_est_sess_->Run(
        Ort::RunOptions{nullptr}, vector_est_input_names_ptr_.data(),
        inputs.data(), inputs.size(), vector_est_output_names_ptr_.data(),
        vector_est_output_names_ptr_.size());
    return std::move(outputs[0]);
  }

  Ort::Value RunVocoder(Ort::Value latent) const {
    std::vector<Ort::Value> inputs;
    inputs.push_back(std::move(latent));
    auto outputs = vocoder_sess_->Run(
        Ort::RunOptions{nullptr}, vocoder_input_names_ptr_.data(),
        inputs.data(), inputs.size(), vocoder_output_names_ptr_.data(),
        vocoder_output_names_ptr_.size());
    return std::move(outputs[0]);
  }

 private:
  void PrintModelInfo(Ort::Session *sess, const std::string &name) const {
    if (!config_.debug) {
      return;
    }
    std::vector<std::string> input_names, output_names;
    std::vector<const char *> input_names_ptr, output_names_ptr;
    GetInputNames(sess, &input_names, &input_names_ptr);
    GetOutputNames(sess, &output_names, &output_names_ptr);
    std::ostringstream os;
    os << "----------" << name << "----------\n";
    os << "Input names: ";
    for (const auto &n : input_names) os << n << " ";
    os << "\nOutput names: ";
    for (const auto &n : output_names) os << n << " ";
    os << "\n";
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
  }

  void PrintDebugInfo(const std::string &tts_config_path) const {
    if (!config_.debug) {
      return;
    }
    std::ostringstream os;
    os << "---supertonic model---\n";
    os << "tts_config: " << tts_config_path << "\n";
    os << "sample_rate: " << cfg_.ae.sample_rate << "\n";
    os << "base_chunk_size: " << cfg_.ae.base_chunk_size << "\n";
    os << "chunk_compress_factor: " << cfg_.ttl.chunk_compress_factor << "\n";
    os << "latent_dim: " << cfg_.ttl.latent_dim << "\n";
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
  }

  void PrintModelInfos() const {
    if (!config_.debug) {
      return;
    }
    PrintModelInfo(dp_sess_.get(), "duration_predictor");
    PrintModelInfo(text_enc_sess_.get(), "text_encoder");
    PrintModelInfo(vector_est_sess_.get(), "vector_estimator");
    PrintModelInfo(vocoder_sess_.get(), "vocoder");
  }

  void InitDurationPredictor(void *model_data, size_t model_data_length) {
    if (model_data) {
      dp_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                model_data_length, sess_opts_);
    } else if (!dp_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize duration predictor session "
          "outside of this function");
      SHERPA_ONNX_EXIT(-1);
    }
    GetInputNames(dp_sess_.get(), &dp_input_names_, &dp_input_names_ptr_);
    GetOutputNames(dp_sess_.get(), &dp_output_names_, &dp_output_names_ptr_);
  }

  void InitTextEncoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      text_enc_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!text_enc_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize text encoder session outside "
          "of this function");
      SHERPA_ONNX_EXIT(-1);
    }
    GetInputNames(text_enc_sess_.get(), &text_enc_input_names_,
                  &text_enc_input_names_ptr_);
    GetOutputNames(text_enc_sess_.get(), &text_enc_output_names_,
                   &text_enc_output_names_ptr_);
  }

  void InitVectorEstimator(void *model_data, size_t model_data_length) {
    if (model_data) {
      vector_est_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!vector_est_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize vector estimator session "
          "outside of this function");
      SHERPA_ONNX_EXIT(-1);
    }
    GetInputNames(vector_est_sess_.get(), &vector_est_input_names_,
                  &vector_est_input_names_ptr_);
    GetOutputNames(vector_est_sess_.get(), &vector_est_output_names_,
                   &vector_est_output_names_ptr_);
  }

  void InitVocoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      vocoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!vocoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize vocoder session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }
    GetInputNames(vocoder_sess_.get(), &vocoder_input_names_,
                  &vocoder_input_names_ptr_);
    GetOutputNames(vocoder_sess_.get(), &vocoder_output_names_,
                   &vocoder_output_names_ptr_);
  }

  void LoadModels() {
    dp_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config_.supertonic.duration_predictor),
        sess_opts_);
    InitDurationPredictor(nullptr, 0);

    text_enc_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config_.supertonic.text_encoder),
        sess_opts_);
    InitTextEncoder(nullptr, 0);

    vector_est_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config_.supertonic.vector_estimator),
        sess_opts_);
    InitVectorEstimator(nullptr, 0);

    vocoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config_.supertonic.vocoder), sess_opts_);
    InitVocoder(nullptr, 0);
  }

  template <typename Manager>
  void LoadOneModel(Manager *mgr, const std::string &path,
                    const char *model_name,
                    const std::function<void(void *, size_t)> &init) {
    auto buf = ReadFile(mgr, path);
    if (buf.empty()) {
      SHERPA_ONNX_LOGE("Failed to read %s model: %s", model_name, path.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
    init(buf.data(), buf.size());
  }

  template <typename Manager>
  void LoadModels(Manager *mgr) {
    LoadOneModel(
        mgr, config_.supertonic.duration_predictor, "duration_predictor",
        [this](void *p, size_t len) { InitDurationPredictor(p, len); });
    LoadOneModel(mgr, config_.supertonic.text_encoder, "text_encoder",
                 [this](void *p, size_t len) { InitTextEncoder(p, len); });
    LoadOneModel(mgr, config_.supertonic.vector_estimator, "vector_estimator",
                 [this](void *p, size_t len) { InitVectorEstimator(p, len); });
    LoadOneModel(mgr, config_.supertonic.vocoder, "vocoder",
                 [this](void *p, size_t len) { InitVocoder(p, len); });
  }

  void Init() {
    std::string tts_config_path =
        ResolveAbsolutePath(config_.supertonic.tts_json);
    LoadConfig(tts_config_path);
    PrintDebugInfo(tts_config_path);
    LoadModels();
    PrintModelInfos();
  }

  template <typename Manager>
  void Init(Manager *mgr) {
    std::string tts_config_path =
        ResolveAbsolutePath(config_.supertonic.tts_json);
    LoadConfig(mgr, tts_config_path);
    PrintDebugInfo(tts_config_path);
    LoadModels(mgr);
    PrintModelInfos();
  }

  void ParseConfig(const json &j) {
    if (j.find("ae") == j.end() || j.find("ttl") == j.end()) {
      SHERPA_ONNX_LOGE("Invalid config file: missing 'ae' or 'ttl' section");
      SHERPA_ONNX_EXIT(-1);
    }
    const auto &ae = j["ae"];
    const auto &ttl = j["ttl"];
    auto get_int = [](const json &obj, const char *key,
                      const char *section) -> int32_t {
      if (obj.find(key) == obj.end()) {
        SHERPA_ONNX_LOGE("Invalid config: %s.%s missing", section, key);
        SHERPA_ONNX_EXIT(-1);
      }
      if (!obj[key].is_number_integer()) {
        SHERPA_ONNX_LOGE("Invalid config: %s.%s must be integer", section, key);
        SHERPA_ONNX_EXIT(-1);
      }
      return obj[key].get<int32_t>();
    };
    cfg_.ae.sample_rate = get_int(ae, "sample_rate", "ae");
    cfg_.ae.base_chunk_size = get_int(ae, "base_chunk_size", "ae");
    cfg_.ttl.chunk_compress_factor =
        get_int(ttl, "chunk_compress_factor", "ttl");
    cfg_.ttl.latent_dim = get_int(ttl, "latent_dim", "ttl");
    if (cfg_.ae.sample_rate <= 0) {
      SHERPA_ONNX_LOGE("Invalid sample_rate: %d", cfg_.ae.sample_rate);
      SHERPA_ONNX_EXIT(-1);
    }
    if (cfg_.ae.base_chunk_size <= 0) {
      SHERPA_ONNX_LOGE("Invalid base_chunk_size: %d", cfg_.ae.base_chunk_size);
      SHERPA_ONNX_EXIT(-1);
    }
    if (cfg_.ttl.chunk_compress_factor <= 0) {
      SHERPA_ONNX_LOGE("Invalid chunk_compress_factor: %d",
                       cfg_.ttl.chunk_compress_factor);
      SHERPA_ONNX_EXIT(-1);
    }
    if (cfg_.ttl.latent_dim <= 0) {
      SHERPA_ONNX_LOGE("Invalid latent_dim: %d", cfg_.ttl.latent_dim);
      SHERPA_ONNX_EXIT(-1);
    }
  }

  static json LoadJsonFromBuffer(const std::vector<char> &buf) {
    if (buf.empty()) {
      SHERPA_ONNX_LOGE("Empty json buffer");
      SHERPA_ONNX_EXIT(-1);
    }
    try {
      return json::parse(buf.begin(), buf.end());
    } catch (const std::exception &e) {
      SHERPA_ONNX_LOGE("Failed to parse JSON buffer: %s", e.what());
      SHERPA_ONNX_EXIT(-1);
    }
    return json{};
  }

  void LoadConfig(const std::string &config_path) {
    auto buf = ReadFile(config_path);
    if (buf.empty()) {
      SHERPA_ONNX_LOGE("Failed to read config: %s", config_path.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
    json j = LoadJsonFromBuffer(buf);
    ParseConfig(j);
  }

  template <typename Manager>
  void LoadConfig(Manager *mgr, const std::string &config_path) {
    auto buf = ReadFile(mgr, config_path);
    if (buf.empty()) {
      SHERPA_ONNX_LOGE("Failed to read config: %s", config_path.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
    json j = LoadJsonFromBuffer(buf);
    ParseConfig(j);
  }

  OfflineTtsModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  SupertonicConfig cfg_;

  std::unique_ptr<Ort::Session> dp_sess_;
  std::unique_ptr<Ort::Session> text_enc_sess_;
  std::unique_ptr<Ort::Session> vector_est_sess_;
  std::unique_ptr<Ort::Session> vocoder_sess_;

  std::vector<std::string> dp_input_names_;
  std::vector<const char *> dp_input_names_ptr_;
  std::vector<std::string> dp_output_names_;
  std::vector<const char *> dp_output_names_ptr_;

  std::vector<std::string> text_enc_input_names_;
  std::vector<const char *> text_enc_input_names_ptr_;
  std::vector<std::string> text_enc_output_names_;
  std::vector<const char *> text_enc_output_names_ptr_;

  std::vector<std::string> vector_est_input_names_;
  std::vector<const char *> vector_est_input_names_ptr_;
  std::vector<std::string> vector_est_output_names_;
  std::vector<const char *> vector_est_output_names_ptr_;

  std::vector<std::string> vocoder_input_names_;
  std::vector<const char *> vocoder_input_names_ptr_;
  std::vector<std::string> vocoder_output_names_;
  std::vector<const char *> vocoder_output_names_ptr_;
};

const SupertonicConfig &OfflineTtsSupertonicModel::GetConfig() const {
  return impl_->GetConfig();
}

int32_t OfflineTtsSupertonicModel::GetSampleRate() const {
  return impl_->GetSampleRate();
}

Ort::Value OfflineTtsSupertonicModel::RunDurationPredictor(
    Ort::Value text_ids, Ort::Value style_dp, Ort::Value text_mask) const {
  return impl_->RunDurationPredictor(std::move(text_ids), std::move(style_dp),
                                     std::move(text_mask));
}

Ort::Value OfflineTtsSupertonicModel::RunTextEncoder(
    Ort::Value text_ids, Ort::Value style_ttl, Ort::Value text_mask) const {
  return impl_->RunTextEncoder(std::move(text_ids), std::move(style_ttl),
                               std::move(text_mask));
}

Ort::Value OfflineTtsSupertonicModel::RunVectorEstimator(
    Ort::Value noisy_latent, Ort::Value current_step, Ort::Value &text_emb,
    Ort::Value &style_ttl, Ort::Value &latent_mask, Ort::Value &text_mask,
    Ort::Value &total_step) const {
  return impl_->RunVectorEstimator(std::move(noisy_latent),
                                   std::move(current_step), text_emb, style_ttl,
                                   latent_mask, text_mask, total_step);
}

Ort::Value OfflineTtsSupertonicModel::RunVocoder(Ort::Value latent) const {
  return impl_->RunVocoder(std::move(latent));
}

OfflineTtsSupertonicModel::OfflineTtsSupertonicModel(
    const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTtsSupertonicModel::OfflineTtsSupertonicModel(
    Manager *mgr, const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTtsSupertonicModel::~OfflineTtsSupertonicModel() = default;

#if __ANDROID_API__ >= 9
template OfflineTtsSupertonicModel::OfflineTtsSupertonicModel(
    AAssetManager *mgr, const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template OfflineTtsSupertonicModel::OfflineTtsSupertonicModel(
    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-supertonic-model.h
================================================
// sherpa-onnx/csrc/offline-tts-supertonic-model.h
//
// Copyright (c)  2026 zengyw
//
// This file is based on Supertonic TTS
// (https://github.com/Supertone-Inc/supertonic) which is licensed under MIT
// License (Copyright (c) 2025 Supertone Inc.)

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_H_

#include <memory>
#include <string>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-model-config.h"

namespace sherpa_onnx {

struct SupertonicConfig {
  struct AEConfig {
    int32_t sample_rate;
    int32_t base_chunk_size;
  } ae;

  struct TTLConfig {
    int32_t chunk_compress_factor;
    int32_t latent_dim;
  } ttl;
};

struct SupertonicStyle {
  std::vector<float> ttl_data;
  std::vector<float> dp_data;
  std::vector<int64_t> ttl_shape;
  std::vector<int64_t> dp_shape;
};

class OfflineTtsSupertonicModel {
 public:
  ~OfflineTtsSupertonicModel();

  explicit OfflineTtsSupertonicModel(const OfflineTtsModelConfig &config);

  template <typename Manager>
  OfflineTtsSupertonicModel(Manager *mgr, const OfflineTtsModelConfig &config);

  const SupertonicConfig &GetConfig() const;
  int32_t GetSampleRate() const;

  Ort::Value RunDurationPredictor(Ort::Value text_ids, Ort::Value style_dp,
                                  Ort::Value text_mask) const;
  Ort::Value RunTextEncoder(Ort::Value text_ids, Ort::Value style_ttl,
                            Ort::Value text_mask) const;

  Ort::Value RunVectorEstimator(Ort::Value noisy_latent,
                                Ort::Value current_step, Ort::Value &text_emb,
                                Ort::Value &style_ttl, Ort::Value &latent_mask,
                                Ort::Value &text_mask,
                                Ort::Value &total_step) const;
  Ort::Value RunVocoder(Ort::Value latent) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-supertonic-unicode-processor.cc
================================================
// sherpa-onnx/csrc/offline-tts-supertonic-unicode-processor.cc
//
// Copyright (c)  2026 zengyw
//
// This file is based on Supertonic TTS
// (https://github.com/Supertone-Inc/supertonic) which is licensed under MIT
// License (Copyright (c) 2025 Supertone Inc.)

#include "sherpa-onnx/csrc/offline-tts-supertonic-unicode-processor.h"

#include <array>
#include <cctype>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {
namespace {

// Hangul syllable decomposition constants (Unicode Standard Annex #15)
static constexpr uint32_t kHangulSbase = 0xAC00;  // Start of Hangul syllables
static constexpr uint32_t kHangulLbase = 0x1100;  // Start of Hangul Jamo
static constexpr uint32_t kHangulVbase = 0x1161;  // Start of Hangul vowels
static constexpr uint32_t kHangulTbase = 0x11A7;  // Start of Hangul trailing
static constexpr int32_t kHangulLcount = 19;
static constexpr int32_t kHangulVcount = 21;
static constexpr int32_t kHangulTcount = 28;
static constexpr int32_t kHangulNcount = kHangulVcount * kHangulTcount;  // 588
static constexpr int32_t kHangulScount =
    kHangulLcount * kHangulNcount;  // 11172  // NOLINT

// Latin NFKD decompositions via switch (no static map allocation).
// Returns true if codepoint was decomposed, false otherwise.
static bool DecomposeLatin(uint32_t codepoint, std::vector<uint16_t> *out) {
  auto push2 = [&](uint16_t a, uint16_t b) {
    out->push_back(a);
    out->push_back(b);
  };
  switch (codepoint) {
    case 0x00C1:
      push2(0x0041, 0x0301);
      return true;
    case 0x00C9:
      push2(0x0045, 0x0301);
      return true;
    case 0x00CD:
      push2(0x0049, 0x0301);
      return true;
    case 0x00D3:
      push2(0x004F, 0x0301);
      return true;
    case 0x00DA:
      push2(0x0055, 0x0301);
      return true;
    case 0x00E1:
      push2(0x0061, 0x0301);
      return true;
    case 0x00E9:
      push2(0x0065, 0x0301);
      return true;
    case 0x00ED:
      push2(0x0069, 0x0301);
      return true;
    case 0x00F3:
      push2(0x006F, 0x0301);
      return true;
    case 0x00FA:
      push2(0x0075, 0x0301);
      return true;
    case 0x00C0:
      push2(0x0041, 0x0300);
      return true;
    case 0x00C8:
      push2(0x0045, 0x0300);
      return true;
    case 0x00CC:
      push2(0x0049, 0x0300);
      return true;
    case 0x00D2:
      push2(0x004F, 0x0300);
      return true;
    case 0x00D9:
      push2(0x0055, 0x0300);
      return true;
    case 0x00E0:
      push2(0x0061, 0x0300);
      return true;
    case 0x00E8:
      push2(0x0065, 0x0300);
      return true;
    case 0x00EC:
      push2(0x0069, 0x0300);
      return true;
    case 0x00F2:
      push2(0x006F, 0x0300);
      return true;
    case 0x00F9:
      push2(0x0075, 0x0300);
      return true;
    case 0x00C2:
      push2(0x0041, 0x0302);
      return true;
    case 0x00CA:
      push2(0x0045, 0x0302);
      return true;
    case 0x00CE:
      push2(0x0049, 0x0302);
      return true;
    case 0x00D4:
      push2(0x004F, 0x0302);
      return true;
    case 0x00DB:
      push2(0x0055, 0x0302);
      return true;
    case 0x00E2:
      push2(0x0061, 0x0302);
      return true;
    case 0x00EA:
      push2(0x0065, 0x0302);
      return true;
    case 0x00EE:
      push2(0x0069, 0x0302);
      return true;
    case 0x00F4:
      push2(0x006F, 0x0302);
      return true;
    case 0x00FB:
      push2(0x0075, 0x0302);
      return true;
    case 0x00C3:
      push2(0x0041, 0x0303);
      return true;
    case 0x00D1:
      push2(0x004E, 0x0303);
      return true;
    case 0x00D5:
      push2(0x004F, 0x0303);
      return true;
    case 0x00E3:
      push2(0x0061, 0x0303);
      return true;
    case 0x00F1:
      push2(0x006E, 0x0303);
      return true;
    case 0x00F5:
      push2(0x006F, 0x0303);
      return true;
    case 0x00C4:
      push2(0x0041, 0x0308);
      return true;
    case 0x00CB:
      push2(0x0045, 0x0308);
      return true;
    case 0x00CF:
      push2(0x0049, 0x0308);
      return true;
    case 0x00D6:
      push2(0x004F, 0x0308);
      return true;
    case 0x00DC:
      push2(0x0055, 0x0308);
      return true;
    case 0x00E4:
      push2(0x0061, 0x0308);
      return true;
    case 0x00EB:
      push2(0x0065, 0x0308);
      return true;
    case 0x00EF:
      push2(0x0069, 0x0308);
      return true;
    case 0x00F6:
      push2(0x006F, 0x0308);
      return true;
    case 0x00FC:
      push2(0x0075, 0x0308);
      return true;
    case 0x00C7:
      push2(0x0043, 0x0327);
      return true;
    case 0x00E7:
      push2(0x0063, 0x0327);
      return true;
    default:
      return false;
  }
}

static void DecomposeCharacter(uint32_t codepoint,
                               std::vector<uint16_t> *output) {
  if (codepoint >= kHangulSbase && codepoint < kHangulSbase + kHangulScount) {
    uint32_t s_index = codepoint - kHangulSbase;
    uint32_t l_index = s_index / kHangulNcount;
    uint32_t v_index = (s_index % kHangulNcount) / kHangulTcount;
    uint32_t t_index = s_index % kHangulTcount;

    output->push_back(static_cast<uint16_t>(kHangulLbase + l_index));
    output->push_back(static_cast<uint16_t>(kHangulVbase + v_index));
    if (t_index > 0) {
      output->push_back(static_cast<uint16_t>(kHangulTbase + t_index));
    }
    return;
  }

  if (DecomposeLatin(codepoint, output)) return;

  if (codepoint > 0xFFFF) return;
  output->push_back(static_cast<uint16_t>(codepoint));
}

// Decode the last UTF-8 codepoint in s. Returns 0 if s is empty or invalid.
static uint32_t LastCodepointUtf8(const std::string &s) {
  if (s.empty()) return 0;

  size_t start = s.size() - 1;
  while (start > 0 && (static_cast<unsigned char>(s[start]) & 0xC0) == 0x80) {
    --start;
  }

  unsigned char c = static_cast<unsigned char>(s[start]);

  if ((c & 0x80) == 0) return c;

  if ((c & 0xE0) == 0xC0 && start + 1 < s.size()) {
    return ((c & 0x1F) << 6) |
           (static_cast<unsigned char>(s[start + 1]) & 0x3F);
  }

  if ((c & 0xF0) == 0xE0 && start + 2 < s.size()) {
    return ((c & 0x0F) << 12) |
           ((static_cast<unsigned char>(s[start + 1]) & 0x3F) << 6) |
           (static_cast<unsigned char>(s[start + 2]) & 0x3F);
  }

  if ((c & 0xF8) == 0xF0 && start + 3 < s.size()) {
    return ((c & 0x07) << 18) |
           ((static_cast<unsigned char>(s[start + 1]) & 0x3F) << 12) |
           ((static_cast<unsigned char>(s[start + 2]) & 0x3F) << 6) |
           (static_cast<unsigned char>(s[start + 3]) & 0x3F);
  }

  return 0;
}

static bool IsEndingPunctuationCodepoint(uint32_t cp) {
  switch (cp) {
    case 0x2026:  // …
    case 0x3002:  // 。
    case 0x300D:  // 」
    case 0x300F:  // 』
    case 0x3011:  // 】
    case 0x3009:  // 〉
    case 0x300B:  // 》
    case 0x203A:  // ›
    case 0x00BB:  // »
    case 0x201C:  // "
    case 0x201D:  // "
    case 0x2018:  // '
    case 0x2019:  // '
      return true;
    default:
      return false;
  }
}

static void ReplaceString(std::string *text, const std::string &from,
                          const std::string &to) {
  size_t pos = 0;
  while ((pos = text->find(from, pos)) != std::string::npos) {
    text->replace(pos, from.length(), to);
    pos += to.length();
  }
}

// Load indexer from raw int32_t binary (from generate_indexer_bin.py).
static std::vector<int32_t> LoadIndexerFromBinary(const char *data,
                                                  size_t size) {
  if (size == 0 || (size % sizeof(int32_t) != 0)) {
    SHERPA_ONNX_LOGE(
        "Invalid unicode indexer .bin size: %zu (must be multiple of %zu)",
        size, sizeof(int32_t));
    SHERPA_ONNX_EXIT(-1);
  }
  size_t count = size / sizeof(int32_t);
  std::vector<int32_t> out(count);
  std::memcpy(out.data(), data, size);
  return out;
}

static std::vector<int32_t> LoadIndexerFromPathImpl(
    const std::vector<char> &buf, const std::string &path) {
  if (buf.empty()) {
    SHERPA_ONNX_LOGE("Failed to read unicode indexer: %s", path.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  return LoadIndexerFromBinary(buf.data(), buf.size());
}

}  // namespace

SupertonicUnicodeProcessor::SupertonicUnicodeProcessor(
    const std::string &unicode_indexer_path) {
  if (!EndsWith(unicode_indexer_path, ".bin")) {
    SHERPA_ONNX_LOGE("Unicode indexer path must be end with .bin. Given: '%s'",
                     unicode_indexer_path.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  std::vector<char> buf = ReadFile(unicode_indexer_path);
  indexer_ = LoadIndexerFromPathImpl(buf, unicode_indexer_path);
}

template <typename Manager>
SupertonicUnicodeProcessor::SupertonicUnicodeProcessor(
    Manager *mgr, const std::string &unicode_indexer_path) {
  if (!EndsWith(unicode_indexer_path, ".bin")) {
    SHERPA_ONNX_LOGE("Unicode indexer path must be end with .bin. Given: '%s'",
                     unicode_indexer_path.c_str());
    SHERPA_ONNX_EXIT(-1);
  }
  std::vector<char> buf = ReadFile(mgr, unicode_indexer_path);
  indexer_ = LoadIndexerFromPathImpl(buf, unicode_indexer_path);
}

std::string SupertonicUnicodeProcessor::PreprocessText(
    const std::string &text, const std::string &lang) const {
  std::string result = text;

  static constexpr std::array<std::pair<const char *, const char *>, 25>
      replacements = {{
          {"–", "-"},
          {"‑", "-"},
          {"—", "-"},
          {"_", " "},
          {u8"\u201C", "\""},
          {u8"\u201D", "\""},
          {u8"\u2018", "'"},
          {u8"\u2019", "'"},
          {"´", "'"},
          {"`", "'"},
          {"[", " "},
          {"]", " "},
          {"|", " "},
          {"/", " "},
          {"#", " "},
          {"→", " "},
          {"←", " "},
          {"♥", ""},
          {"☆", ""},
          {"♡", ""},
          {"©", ""},
          {"\\", ""},
          {"@", " at "},
          {"e.g.,", "for example, "},
          {"i.e.,", "that is, "},
      }};

  for (const auto &repl : replacements) {
    ReplaceString(&result, repl.first, repl.second);
  }

  // Remove some U+1Fxxx emoji/symbols (4-byte UTF-8 sequences: F0 9F 80-BF
  // 80-BF). Note: This only removes a subset of emoji (U+1F000-U+1FFFF), not
  // all emoji. Optimized: manual scanning instead of regex.
  std::string emoji_removed;
  emoji_removed.reserve(result.size());
  for (size_t i = 0; i < result.size();) {
    if (i + 3 < result.size() &&
        static_cast<unsigned char>(result[i]) == 0xF0 &&
        static_cast<unsigned char>(result[i + 1]) == 0x9F &&
        (static_cast<unsigned char>(result[i + 2]) & 0xC0) == 0x80 &&
        (static_cast<unsigned char>(result[i + 3]) & 0xC0) == 0x80) {
      i += 4;  // Skip emoji
    } else {
      emoji_removed += result[i];
      ++i;
    }
  }
  result = std::move(emoji_removed);

  // Fix spacing around punctuation (optimized: single pass)
  std::string punct_fixed;
  punct_fixed.reserve(result.size());
  for (size_t i = 0; i < result.size(); ++i) {
    if (result[i] == ' ' && i + 1 < result.size()) {
      char next = result[i + 1];
      if (next == ',' || next == '.' || next == '!' || next == '?' ||
          next == ';' || next == ':' || next == '\'') {
        punct_fixed += next;
        ++i;  // Skip space and punctuation
        continue;
      }
    }
    punct_fixed += result[i];
  }
  result = std::move(punct_fixed);

  // Collapse adjacent duplicate quotes ("" -> ", '' -> ') while preserving
  // normal paired quotes. Discard backticks. Single-pass O(n) algorithm.
  std::string quotes_fixed;
  quotes_fixed.reserve(result.size());
  for (size_t i = 0; i < result.size(); ++i) {
    if (result[i] == '`') {
      // Skip backticks
      continue;
    }
    if (result[i] == '"' && i + 1 < result.size() && result[i + 1] == '"') {
      // Collapse adjacent double quotes: "" -> "
      quotes_fixed += '"';
      ++i;  // Skip the second quote
    } else if (result[i] == '\'' && i + 1 < result.size() &&
               result[i + 1] == '\'') {
      // Collapse adjacent single quotes: '' -> '
      quotes_fixed += '\'';
      ++i;  // Skip the second quote
    } else {
      quotes_fixed += result[i];
    }
  }
  result = std::move(quotes_fixed);

  // Remove extra spaces (optimized: single pass)
  std::string spaces_fixed;
  spaces_fixed.reserve(result.size());
  bool last_was_space = false;
  for (char c : result) {
    if (std::isspace(static_cast<unsigned char>(c))) {
      if (!last_was_space) {
        spaces_fixed += ' ';
        last_was_space = true;
      }
    } else {
      spaces_fixed += c;
      last_was_space = false;
    }
  }
  result = Trim(spaces_fixed);

  if (!result.empty()) {
    char last_char = result.back();
    bool ends_with_punct =
        (last_char == '.' || last_char == '!' || last_char == '?' ||
         last_char == ';' || last_char == ':' || last_char == ',' ||
         last_char == '\'' || last_char == '"' || last_char == ')' ||
         last_char == ']' || last_char == '}' || last_char == '>');
    if (!ends_with_punct) {
      ends_with_punct = IsEndingPunctuationCodepoint(LastCodepointUtf8(result));
    }
    if (!ends_with_punct) {
      result += ".";
    }
  }

  // Wrap text with language tags
  result = "<" + lang + ">" + result + "</" + lang + ">";

  return result;
}

std::vector<uint16_t> SupertonicUnicodeProcessor::TextToUnicodeValues(
    const std::string &text) const {
  std::vector<uint16_t> unicode_values;
  size_t i = 0;

  while (i < text.size()) {
    uint32_t codepoint = 0;
    unsigned char c = static_cast<unsigned char>(text[i]);

    if ((c & 0x80) == 0) {
      codepoint = c;
      i += 1;
    } else if ((c & 0xE0) == 0xC0 && i + 1 < text.size()) {
      codepoint = (c & 0x1F) << 6;
      codepoint |= (static_cast<unsigned char>(text[i + 1]) & 0x3F);
      i += 2;
    } else if ((c & 0xF0) == 0xE0 && i + 2 < text.size()) {
      codepoint = (c & 0x0F) << 12;
      codepoint |= (static_cast<unsigned char>(text[i + 1]) & 0x3F) << 6;
      codepoint |= (static_cast<unsigned char>(text[i + 2]) & 0x3F);
      i += 3;
    } else if ((c & 0xF8) == 0xF0 && i + 3 < text.size()) {
      codepoint = (c & 0x07) << 18;
      codepoint |= (static_cast<unsigned char>(text[i + 1]) & 0x3F) << 12;
      codepoint |= (static_cast<unsigned char>(text[i + 2]) & 0x3F) << 6;
      codepoint |= (static_cast<unsigned char>(text[i + 3]) & 0x3F);
      i += 4;
    } else {
      i += 1;
      continue;
    }

    DecomposeCharacter(codepoint, &unicode_values);
  }

  return unicode_values;
}

void SupertonicUnicodeProcessor::Process(
    const std::string &text, const std::string &lang,
    std::vector<int64_t> *text_ids, std::vector<float> *text_mask_flat,
    std::vector<int64_t> *text_mask_shape) const {
  const std::string processed = PreprocessText(text, lang);
  const std::vector<uint16_t> unicode_vals = TextToUnicodeValues(processed);
  const size_t seq_len = unicode_vals.size();

  constexpr int64_t kUnknownId = 0;
  text_ids->assign(seq_len, kUnknownId);
  for (size_t i = 0; i < seq_len; ++i) {
    const size_t u = unicode_vals[i];
    (*text_ids)[i] = (u < indexer_.size()) ? indexer_[u] : kUnknownId;
  }

  // Batch size is always 1: mask is all ones, shape [1, 1, seq_len].
  text_mask_flat->assign(seq_len, 1.0f);
  text_mask_shape->assign({1, 1, static_cast<int64_t>(seq_len)});
}

#if __ANDROID_API__ >= 9
template SupertonicUnicodeProcessor::SupertonicUnicodeProcessor(
    AAssetManager *mgr, const std::string &unicode_indexer_path);
#endif

#if __OHOS__
template SupertonicUnicodeProcessor::SupertonicUnicodeProcessor(
    NativeResourceManager *mgr, const std::string &unicode_indexer_path);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-supertonic-unicode-processor.h
================================================
// sherpa-onnx/csrc/offline-tts-supertonic-unicode-processor.h
//
// Copyright (c)  2026 zengyw
//
// This file is based on Supertonic TTS
// (https://github.com/Supertone-Inc/supertonic) which is licensed under MIT
// License (Copyright (c) 2025 Supertone Inc.)

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_UNICODE_PROCESSOR_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_UNICODE_PROCESSOR_H_

#include <cstdint>
#include <string>
#include <vector>

namespace sherpa_onnx {

// Unicode text processor for Supertonic TTS
class SupertonicUnicodeProcessor {
 public:
  explicit SupertonicUnicodeProcessor(const std::string &unicode_indexer_path);

  template <typename Manager>
  SupertonicUnicodeProcessor(Manager *mgr,
                             const std::string &unicode_indexer_path);

  void Process(const std::string &text, const std::string &lang,
               std::vector<int64_t> *text_ids,
               std::vector<float> *text_mask_flat,
               std::vector<int64_t> *text_mask_shape) const;

 private:
  std::string PreprocessText(const std::string &text,
                             const std::string &lang) const;
  std::vector<uint16_t> TextToUnicodeValues(const std::string &text) const;

  std::vector<int32_t> indexer_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_SUPERTONIC_UNICODE_PROCESSOR_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-vits-impl.h
================================================
// sherpa-onnx/csrc/offline-tts-vits-impl.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_IMPL_H_

#include <memory>
#include <string>
#include <sstream>
#include <utility>
#include <vector>

#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/character-lexicon.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/lexicon.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/melo-tts-lexicon.h"
#include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-impl.h"
#include "sherpa-onnx/csrc/offline-tts-vits-model.h"
#include "sherpa-onnx/csrc/piper-phonemize-lexicon.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineTtsVitsImpl : public OfflineTtsImpl {
 public:
  explicit OfflineTtsVitsImpl(const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsVitsModel>(config.model)) {
    InitFrontend();

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
      }
    }

    if (!config.rule_fars.empty()) {
      if (config.model.debug) {
        SHERPA_ONNX_LOGE("Loading FST archives");
      }
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);

      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }
        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(f));
        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }
      }

      if (config.model.debug) {
        SHERPA_ONNX_LOGE("FST archives loaded!");
      }
    }
  }

  template <typename Manager>
  OfflineTtsVitsImpl(Manager *mgr, const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsVitsModel>(mgr, config.model)) {
    InitFrontend(mgr);

    if (!config.rule_fsts.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fsts, ",", false, &files);
      tn_list_.reserve(files.size());
      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
#endif
        }
        auto buf = ReadFile(mgr, f);
        std::istringstream is(std::string(buf.data(), buf.size()));
        tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
      }
    }

    if (!config.rule_fars.empty()) {
      std::vector<std::string> files;
      SplitStringToVector(config.rule_fars, ",", false, &files);
      tn_list_.reserve(files.size() + tn_list_.size());

      for (const auto &f : files) {
        if (config.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
#else
          SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
#endif
        }

        auto buf = ReadFile(mgr, f);

        std::unique_ptr<std::istream> s(
            new std::istringstream(std::string(buf.data(), buf.size())));

        std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
            fst::FarReader<fst::StdArc>::Open(std::move(s)));

        for (; !reader->Done(); reader->Next()) {
          std::unique_ptr<fst::StdConstFst> r(
              fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

          tn_list_.push_back(
              std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
        }  // for (; !reader->Done(); reader->Next())
      }    // for (const auto &f : files)
    }      // if (!config.rule_fars.empty())
  }

  int32_t SampleRate() const override {
    return model_->GetMetaData().sample_rate;
  }

  int32_t NumSpeakers() const override {
    return model_->GetMetaData().num_speakers;
  }

  // Supported options in GenerationConfig:
  //   - sid: Speaker ID for multi-speaker models
  //   - speed: Speech speed factor (default: 1.0)
  //   - silence_scale: Scale applied to pauses in the generated audio
  //
  // Supported extra options in config.extra:
  //   - None
  GeneratedAudio Generate(
      const std::string &_text, const GenerationConfig &gen_config,
      GeneratedAudioCallback callback = nullptr) const override {
    if (config_.model.debug) {
      SHERPA_ONNX_LOGE("%s", gen_config.ToString().c_str());
    }

    int64_t sid = gen_config.sid;
    float speed = gen_config.speed;
    if (speed <= 0) {
      SHERPA_ONNX_LOGE("Speed must be > 0. Given: %f", speed);
      return {};
    }

    const auto &meta_data = model_->GetMetaData();
    int32_t num_speakers = meta_data.num_speakers;

    if (num_speakers == 0 && sid != 0) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "This is a single-speaker model and supports only sid 0. Given sid: "
          "%{public}d. sid is ignored",
          static_cast<int32_t>(sid));
#else
      SHERPA_ONNX_LOGE(
          "This is a single-speaker model and supports only sid 0. Given sid: "
          "%d. sid is ignored",
          static_cast<int32_t>(sid));
#endif
    }

    if (num_speakers != 0 && (sid >= num_speakers || sid < 0)) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "This model contains only %{public}d speakers. sid should be in the "
          "range [%{public}d, %{public}d]. Given: %{public}d. Use sid=0",
          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
#else
      SHERPA_ONNX_LOGE(
          "This model contains only %d speakers. sid should be in the range "
          "[%d, %d]. Given: %d. Use sid=0",
          num_speakers, 0, num_speakers - 1, static_cast<int32_t>(sid));
#endif
      sid = 0;
    }

    std::string text = _text;
    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
#else
      SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
#endif
    }

    if (!tn_list_.empty()) {
      for (const auto &tn : tn_list_) {
        text = tn->Normalize(text);
        if (config_.model.debug) {
#if __OHOS__
          SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
#else
          SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
#endif
        }
      }
    }

    std::vector<TokenIDs> token_ids =
        frontend_->ConvertTextToTokenIds(text, meta_data.voice);

    if (token_ids.empty() ||
        (token_ids.size() == 1 && token_ids[0].tokens.empty())) {
      SHERPA_ONNX_LOGE("Failed to convert %s to token IDs", text.c_str());
      return {};
    }

    std::vector<std::vector<int64_t>> x;
    std::vector<std::vector<int64_t>> tones;

    x.reserve(token_ids.size());

    for (auto &i : token_ids) {
      x.push_back(std::move(i.tokens));
    }

    if (!token_ids[0].tones.empty()) {
      tones.reserve(token_ids.size());
      for (auto &i : token_ids) {
        tones.push_back(std::move(i.tones));
      }
    }

    // TODO(fangjun): add blank inside the frontend, not here
    if (meta_data.add_blank && config_.model.vits.data_dir.empty() &&
        meta_data.frontend != "characters") {
      for (auto &k : x) {
        k = AddBlank(k);
      }

      for (auto &k : tones) {
        k = AddBlank(k);
      }
    }

    int32_t x_size = static_cast<int32_t>(x.size());

    if (config_.max_num_sentences <= 0 || x_size <= config_.max_num_sentences) {
      auto ans = Process(x, tones, sid, speed, gen_config.silence_scale);
      if (callback) {
        callback(ans.samples.data(), ans.samples.size(), 1.0);
      }
      return ans;
    }

    // the input text is too long, we process sentences within it in batches
    // to avoid OOM. Batch size is config_.max_num_sentences
    std::vector<std::vector<int64_t>> batch_x;
    std::vector<std::vector<int64_t>> batch_tones;

    int32_t batch_size = config_.max_num_sentences;
    batch_x.reserve(config_.max_num_sentences);
    batch_tones.reserve(config_.max_num_sentences);
    int32_t num_batches = x_size / batch_size;

    if (config_.model.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Text is too long. Split it into %{public}d batches. batch size: "
          "%{public}d. Number of sentences: %{public}d",
          num_batches, batch_size, x_size);
#else
      SHERPA_ONNX_LOGE(
          "Text is too long. Split it into %d batches. batch size: %d. Number "
          "of sentences: %d",
          num_batches, batch_size, x_size);
#endif
    }

    GeneratedAudio ans;

    int32_t should_continue = 1;

    int32_t k = 0;

    for (int32_t b = 0; b != num_batches && should_continue; ++b) {
      batch_x.clear();
      batch_tones.clear();
      for (int32_t i = 0; i != batch_size; ++i, ++k) {
        batch_x.push_back(std::move(x[k]));

        if (!tones.empty()) {
          batch_tones.push_back(std::move(tones[k]));
        }
      }

      auto audio = Process(batch_x, batch_tones, sid, speed,
                           gen_config.silence_scale);
      ans.sample_rate = audio.sample_rate;
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
        should_continue = callback(audio.samples.data(), audio.samples.size(),
                                   (b + 1) * 1.0 / num_batches);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }
    }

    batch_x.clear();
    batch_tones.clear();
    while (k < static_cast<int32_t>(x.size()) && should_continue) {
      batch_x.push_back(std::move(x[k]));
      if (!tones.empty()) {
        batch_tones.push_back(std::move(tones[k]));
      }

      ++k;
    }

    if (!batch_x.empty()) {
      auto audio =
          Process(batch_x, batch_tones, sid, speed, gen_config.silence_scale);
      ans.sample_rate = audio.sample_rate;
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
        callback(audio.samples.data(), audio.samples.size(), 1.0);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
      }
    }

    return ans;
  }

  [[deprecated("Use Generate(text, GenerationConfig, callback) instead")]]
  GeneratedAudio Generate(
      const std::string &text, int64_t sid = 0, float speed = 1.0,
      GeneratedAudioCallback callback = nullptr) const override {
    GenerationConfig gen_config;
    gen_config.sid = sid;
    gen_config.speed = speed;
    gen_config.silence_scale = config_.silence_scale;
    return Generate(text, gen_config, std::move(callback));
  }

 private:
  template <typename Manager>
  void InitFrontend(Manager *mgr) {
    const auto &meta_data = model_->GetMetaData();

    if (meta_data.frontend == "characters") {
      frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
          mgr, config_.model.vits.tokens, meta_data);
    } else if (meta_data.jieba && meta_data.is_melo_tts) {
      frontend_ = std::make_unique<MeloTtsLexicon>(
          mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
          model_->GetMetaData(), config_.model.debug);
    } else if (meta_data.jieba) {
      frontend_ = std::make_unique<CharacterLexicon>(
          mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
          config_.model.debug);
    } else if (meta_data.is_melo_tts && meta_data.language == "English") {
      frontend_ = std::make_unique<MeloTtsLexicon>(
          mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
          model_->GetMetaData(), config_.model.debug);
    } else if ((meta_data.is_piper || meta_data.is_coqui ||
                meta_data.is_icefall) &&
               !config_.model.vits.data_dir.empty()) {
      frontend_ = std::make_unique<PiperPhonemizeLexicon>(
          mgr, config_.model.vits.tokens, config_.model.vits.data_dir,
          meta_data);
    } else {
      if (config_.model.vits.lexicon.empty()) {
        SHERPA_ONNX_LOGE(
            "Not a model using characters as modeling unit. Please provide "
            "--vits-lexicon if you leave --vits-data-dir empty");
        SHERPA_ONNX_EXIT(-1);
      }

      frontend_ = std::make_unique<Lexicon>(
          mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
          meta_data.punctuations, meta_data.language, config_.model.debug);
    }
  }

  void InitFrontend() {
    const auto &meta_data = model_->GetMetaData();

    if (meta_data.frontend == "characters") {
      frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
          config_.model.vits.tokens, meta_data);
    } else if (meta_data.jieba && meta_data.is_melo_tts) {
      frontend_ = std::make_unique<MeloTtsLexicon>(
          config_.model.vits.lexicon, config_.model.vits.tokens,
          model_->GetMetaData(), config_.model.debug);
    } else if (meta_data.is_melo_tts && meta_data.language == "English") {
      frontend_ = std::make_unique<MeloTtsLexicon>(
          config_.model.vits.lexicon, config_.model.vits.tokens,
          model_->GetMetaData(), config_.model.debug);
    } else if (meta_data.jieba) {
      frontend_ = std::make_unique<CharacterLexicon>(config_.model.vits.lexicon,
                                                     config_.model.vits.tokens,
                                                     config_.model.debug);
    } else if ((meta_data.is_piper || meta_data.is_coqui ||
                meta_data.is_icefall) &&
               !config_.model.vits.data_dir.empty()) {
      frontend_ = std::make_unique<PiperPhonemizeLexicon>(
          config_.model.vits.tokens, config_.model.vits.data_dir,
          model_->GetMetaData());
    } else {
      if (config_.model.vits.lexicon.empty()) {
        SHERPA_ONNX_LOGE(
            "Not a model using characters as modeling unit. Please provide "
            "--vits-lexicon if you leave --vits-data-dir empty");
        SHERPA_ONNX_EXIT(-1);
      }
      frontend_ = std::make_unique<Lexicon>(
          config_.model.vits.lexicon, config_.model.vits.tokens,
          meta_data.punctuations, meta_data.language, config_.model.debug);
    }
  }

  GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens,
                         const std::vector<std::vector<int64_t>> &tones,
                         int32_t sid, float speed,
                         float silence_scale) const {
    int32_t num_tokens = 0;
    for (const auto &k : tokens) {
      num_tokens += k.size();
    }

    std::vector<int64_t> x;
    x.reserve(num_tokens);
    for (const auto &k : tokens) {
      x.insert(x.end(), k.begin(), k.end());
    }

    std::vector<int64_t> tone_list;
    if (!tones.empty()) {
      tone_list.reserve(num_tokens);
      for (const auto &k : tones) {
        tone_list.insert(tone_list.end(), k.begin(), k.end());
      }
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> x_shape = {1, static_cast<int32_t>(x.size())};
    Ort::Value x_tensor = Ort::Value::CreateTensor(
        memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());

    Ort::Value tones_tensor{nullptr};
    if (!tones.empty()) {
      tones_tensor = Ort::Value::CreateTensor(memory_info, tone_list.data(),
                                              tone_list.size(), x_shape.data(),
                                              x_shape.size());
    }

    Ort::Value audio{nullptr};
    if (tones.empty()) {
      audio = model_->Run(std::move(x_tensor), sid, speed);
    } else {
      audio =
          model_->Run(std::move(x_tensor), std::move(tones_tensor), sid, speed);
    }

    std::vector<int64_t> audio_shape =
        audio.GetTensorTypeAndShapeInfo().GetShape();

    int64_t total = 1;
    // The output shape may be (1, 1, total) or (1, total) or (total,)
    for (auto i : audio_shape) {
      total *= i;
    }

    const float *p = audio.GetTensorData<float>();

    GeneratedAudio ans;
    ans.sample_rate = model_->GetMetaData().sample_rate;
    ans.samples = std::vector<float>(p, p + total);

    if (silence_scale != 1) {
      ans = ans.ScaleSilence(silence_scale);
    }

    return ans;
  }

 private:
  OfflineTtsConfig config_;
  std::unique_ptr<OfflineTtsVitsModel> model_;
  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
  std::unique_ptr<OfflineTtsFrontend> frontend_;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-vits-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tts-vits-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-vits-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineTtsVitsModelConfig::Register(ParseOptions *po) {
  po->Register("vits-model", &model, "Path to VITS model");
  po->Register("vits-lexicon", &lexicon, "Path to lexicon.txt for VITS models");
  po->Register("vits-tokens", &tokens, "Path to tokens.txt for VITS models");
  po->Register("vits-data-dir", &data_dir,
               "Path to the directory containing dict for espeak-ng. If it is "
               "given, --vits-lexicon is ignored.");
  po->Register("vits-dict-dir", &dict_dir,
               "Not used. You don't need to provide a value for it");
  po->Register("vits-noise-scale", &noise_scale, "noise_scale for VITS models");
  po->Register("vits-noise-scale-w", &noise_scale_w,
               "noise_scale_w for VITS models");
  po->Register("vits-length-scale", &length_scale,
               "Speech speed. Larger->Slower; Smaller->faster.");
}

bool OfflineTtsVitsModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --vits-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("--vits-model: '%s' does not exist", model.c_str());
    return false;
  }

  if (tokens.empty()) {
    SHERPA_ONNX_LOGE("Please provide --vits-tokens");
    return false;
  }

  if (!FileExists(tokens)) {
    SHERPA_ONNX_LOGE("--vits-tokens: '%s' does not exist", tokens.c_str());
    return false;
  }

  if (!data_dir.empty()) {
    if (!FileExists(data_dir + "/phontab")) {
      SHERPA_ONNX_LOGE(
          "'%s/phontab' does not exist. Please check --vits-data-dir",
          data_dir.c_str());
      return false;
    }

    if (!FileExists(data_dir + "/phonindex")) {
      SHERPA_ONNX_LOGE(
          "'%s/phonindex' does not exist. Please check --vits-data-dir",
          data_dir.c_str());
      return false;
    }

    if (!FileExists(data_dir + "/phondata")) {
      SHERPA_ONNX_LOGE(
          "'%s/phondata' does not exist. Please check --vits-data-dir",
          data_dir.c_str());
      return false;
    }

    if (!FileExists(data_dir + "/intonations")) {
      SHERPA_ONNX_LOGE(
          "'%s/intonations' does not exist. Please check --vits-data-dir",
          data_dir.c_str());
      return false;
    }
  }

  if (!dict_dir.empty()) {
    SHERPA_ONNX_LOGE(
        "From sherpa-onnx v1.12.15, you don't need to provide dict_dir for "
        "this model. Ignore it");
  }

  return true;
}

std::string OfflineTtsVitsModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTtsVitsModelConfig(";
  os << "model=\"" << model << "\", ";
  os << "lexicon=\"" << lexicon << "\", ";
  os << "tokens=\"" << tokens << "\", ";
  os << "data_dir=\"" << data_dir << "\", ";
  os << "noise_scale=" << noise_scale << ", ";
  os << "noise_scale_w=" << noise_scale_w << ", ";
  os << "length_scale=" << length_scale << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-vits-model-config.h
================================================
// sherpa-onnx/csrc/offline-tts-vits-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsVitsModelConfig {
  std::string model;
  std::string lexicon;
  std::string tokens;

  // If data_dir is given, lexicon is ignored
  // data_dir is for piper-phonemize, which uses espeak-ng
  std::string data_dir;

  // Used for Chinese TTS models using jieba
  std::string dict_dir;

  float noise_scale = 0.667;
  float noise_scale_w = 0.8;
  float length_scale = 1;

  // used only for multi-speaker models, e.g, vctk speech dataset.
  // Not applicable for single-speaker models, e.g., ljspeech dataset

  OfflineTtsVitsModelConfig() = default;

  OfflineTtsVitsModelConfig(const std::string &model,
                            const std::string &lexicon,
                            const std::string &tokens,
                            const std::string &data_dir,
                            const std::string &dict_dir,
                            float noise_scale = 0.667,
                            float noise_scale_w = 0.8, float length_scale = 1)
      : model(model),
        lexicon(lexicon),
        tokens(tokens),
        data_dir(data_dir),
        dict_dir(dict_dir),
        noise_scale(noise_scale),
        noise_scale_w(noise_scale_w),
        length_scale(length_scale) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_META_DATA_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

// If you are not sure what each field means, please
// have a look of the Python file in the model directory that
// you have downloaded.
struct OfflineTtsVitsModelMetaData {
  int32_t sample_rate = 0;
  int32_t add_blank = 0;
  int32_t num_speakers = 0;

  bool is_piper = false;
  bool is_coqui = false;
  bool is_icefall = false;
  bool is_melo_tts = false;

  // for Chinese TTS models from
  // https://github.com/Plachtaa/VITS-fast-fine-tuning
  int32_t jieba = 0;

  // the following options are for models from coqui-ai/TTS
  int32_t blank_id = 0;
  int32_t bos_id = 0;
  int32_t eos_id = 0;
  int32_t use_eos_bos = 0;
  int32_t pad_id = 0;

  // for melo tts
  int32_t speaker_id = 0;
  int32_t version = 0;

  std::string punctuations;
  std::string language;
  std::string voice;
  std::string frontend;  // characters
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-vits-model.cc
================================================
// sherpa-onnx/csrc/offline-tts-vits-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-vits-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"

namespace sherpa_onnx {

class OfflineTtsVitsModel::Impl {
 public:
  explicit Impl(const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config.vits.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config.vits.model);
    Init(buf.data(), buf.size());
  }

  Ort::Value Run(Ort::Value x, int64_t sid, float speed) {
    if (meta_data_.is_piper || meta_data_.is_coqui) {
      return RunVitsPiperOrCoqui(std::move(x), sid, speed);
    }

    return RunVits(std::move(x), sid, speed);
  }

  Ort::Value Run(Ort::Value x, Ort::Value tones, int64_t sid, float speed) {
    if (meta_data_.num_speakers == 1) {
      // For MeloTTS, we hardcode sid to the one contained in the meta data
      sid = meta_data_.speaker_id;
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> x_shape = x.GetTensorTypeAndShapeInfo().GetShape();
    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
                       static_cast<int32_t>(x_shape[0]));
      exit(-1);
    }

    int64_t len = x_shape[1];
    int64_t len_shape = 1;

    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &len, 1, &len_shape, 1);

    int64_t scale_shape = 1;
    float noise_scale = config_.vits.noise_scale;
    float length_scale = config_.vits.length_scale;
    float noise_scale_w = config_.vits.noise_scale_w;

    if (speed != 1 && speed > 0) {
      length_scale = 1. / speed;
    }

    Ort::Value noise_scale_tensor =
        Ort::Value::CreateTensor(memory_info, &noise_scale, 1, &scale_shape, 1);

    Ort::Value length_scale_tensor = Ort::Value::CreateTensor(
        memory_info, &length_scale, 1, &scale_shape, 1);

    Ort::Value noise_scale_w_tensor = Ort::Value::CreateTensor(
        memory_info, &noise_scale_w, 1, &scale_shape, 1);

    Ort::Value sid_tensor =
        Ort::Value::CreateTensor(memory_info, &sid, 1, &scale_shape, 1);

    std::vector<Ort::Value> inputs;
    inputs.reserve(7);
    inputs.push_back(std::move(x));
    inputs.push_back(std::move(x_length));
    inputs.push_back(std::move(tones));
    inputs.push_back(std::move(sid_tensor));
    inputs.push_back(std::move(noise_scale_tensor));
    inputs.push_back(std::move(length_scale_tensor));
    inputs.push_back(std::move(noise_scale_w_tensor));

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    return std::move(out[0]);
  }

  const OfflineTtsVitsModelMetaData &GetMetaData() const { return meta_data_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---vits model---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.add_blank, "add_blank",
                                            0);

    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.speaker_id, "speaker_id",
                                            0);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 0);
    SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.punctuations,
                                                "punctuation", "");
    SHERPA_ONNX_READ_META_DATA_STR(meta_data_.language, "language");

    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.voice, "voice", "");

    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.frontend, "frontend",
                                                "");

    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.jieba, "jieba", 0);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.blank_id, "blank_id", 0);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.bos_id, "bos_id", 0);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.eos_id, "eos_id", 0);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.use_eos_bos,
                                            "use_eos_bos", 1);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.pad_id, "pad_id", 0);

    std::string comment;
    SHERPA_ONNX_READ_META_DATA_STR(comment, "comment");

    if (comment.find("piper") != std::string::npos) {
      meta_data_.is_piper = true;
    }

    if (comment.find("coqui") != std::string::npos) {
      meta_data_.is_coqui = true;
    }

    if (comment.find("icefall") != std::string::npos) {
      meta_data_.is_icefall = true;
    }

    if (comment.find("melo") != std::string::npos) {
      meta_data_.is_melo_tts = true;
      int32_t expected_version = 2;
      if (meta_data_.version < expected_version) {
        SHERPA_ONNX_LOGE(
            "Please download the latest MeloTTS model and retry. Current "
            "version: %d. Expected version: %d",
            meta_data_.version, expected_version);
        exit(-1);
      }

      // NOTE(fangjun):
      // version 0 is the first version
      // version 2: add jieba=1 to the metadata
    }
  }

  Ort::Value RunVitsPiperOrCoqui(Ort::Value x, int64_t sid, float speed) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> x_shape = x.GetTensorTypeAndShapeInfo().GetShape();
    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
                       static_cast<int32_t>(x_shape[0]));
      exit(-1);
    }

    int64_t len = x_shape[1];
    int64_t len_shape = 1;

    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &len, 1, &len_shape, 1);

    float noise_scale = config_.vits.noise_scale;
    float length_scale = config_.vits.length_scale;
    float noise_scale_w = config_.vits.noise_scale_w;

    if (speed != 1 && speed > 0) {
      length_scale = 1. / speed;
    }
    std::array<float, 3> scales = {noise_scale, length_scale, noise_scale_w};

    int64_t scale_shape = 3;

    Ort::Value scales_tensor = Ort::Value::CreateTensor(
        memory_info, scales.data(), scales.size(), &scale_shape, 1);

    int64_t sid_shape = 1;
    Ort::Value sid_tensor =
        Ort::Value::CreateTensor(memory_info, &sid, 1, &sid_shape, 1);

    int64_t lang_id_shape = 1;
    int64_t lang_id = 0;
    Ort::Value lang_id_tensor =
        Ort::Value::CreateTensor(memory_info, &lang_id, 1, &lang_id_shape, 1);

    std::vector<Ort::Value> inputs;
    inputs.reserve(5);
    inputs.push_back(std::move(x));
    inputs.push_back(std::move(x_length));
    inputs.push_back(std::move(scales_tensor));

    if (input_names_.size() >= 4 && input_names_[3] == "sid") {
      inputs.push_back(std::move(sid_tensor));
    }

    if (input_names_.size() >= 5 && input_names_[4] == "langid") {
      inputs.push_back(std::move(lang_id_tensor));
    }

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    return std::move(out[0]);
  }

  Ort::Value RunVits(Ort::Value x, int64_t sid, float speed) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> x_shape = x.GetTensorTypeAndShapeInfo().GetShape();
    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
                       static_cast<int32_t>(x_shape[0]));
      exit(-1);
    }

    int64_t len = x_shape[1];
    int64_t len_shape = 1;

    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &len, 1, &len_shape, 1);

    int64_t scale_shape = 1;
    float noise_scale = config_.vits.noise_scale;
    float length_scale = config_.vits.length_scale;
    float noise_scale_w = config_.vits.noise_scale_w;

    if (speed != 1 && speed > 0) {
      length_scale = 1. / speed;
    }

    Ort::Value noise_scale_tensor =
        Ort::Value::CreateTensor(memory_info, &noise_scale, 1, &scale_shape, 1);

    Ort::Value length_scale_tensor = Ort::Value::CreateTensor(
        memory_info, &length_scale, 1, &scale_shape, 1);

    Ort::Value noise_scale_w_tensor = Ort::Value::CreateTensor(
        memory_info, &noise_scale_w, 1, &scale_shape, 1);

    Ort::Value sid_tensor =
        Ort::Value::CreateTensor(memory_info, &sid, 1, &scale_shape, 1);

    std::vector<Ort::Value> inputs;
    inputs.reserve(6);
    inputs.push_back(std::move(x));
    inputs.push_back(std::move(x_length));
    inputs.push_back(std::move(noise_scale_tensor));
    inputs.push_back(std::move(length_scale_tensor));
    inputs.push_back(std::move(noise_scale_w_tensor));

    if (input_names_.size() == 6 &&
        (input_names_.back() == "sid" || input_names_.back() == "speaker")) {
      inputs.push_back(std::move(sid_tensor));
    }

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    return std::move(out[0]);
  }

 private:
  OfflineTtsModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OfflineTtsVitsModelMetaData meta_data_;
};

OfflineTtsVitsModel::OfflineTtsVitsModel(const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTtsVitsModel::OfflineTtsVitsModel(Manager *mgr,
                                         const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTtsVitsModel::~OfflineTtsVitsModel() = default;

Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/,
                                    float speed /*= 1.0*/) {
  return impl_->Run(std::move(x), sid, speed);
}

Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, Ort::Value tones,
                                    int64_t sid /*= 0*/,
                                    float speed /*= 1.0*/) const {
  return impl_->Run(std::move(x), std::move(tones), sid, speed);
}

const OfflineTtsVitsModelMetaData &OfflineTtsVitsModel::GetMetaData() const {
  return impl_->GetMetaData();
}

#if __ANDROID_API__ >= 9
template OfflineTtsVitsModel::OfflineTtsVitsModel(
    AAssetManager *mgr, const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template OfflineTtsVitsModel::OfflineTtsVitsModel(
    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-vits-model.h
================================================
// sherpa-onnx/csrc/offline-tts-vits-model.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_H_

#include <memory>
#include <string>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h"

namespace sherpa_onnx {

class OfflineTtsVitsModel {
 public:
  ~OfflineTtsVitsModel();

  explicit OfflineTtsVitsModel(const OfflineTtsModelConfig &config);

  template <typename Manager>
  OfflineTtsVitsModel(Manager *mgr, const OfflineTtsModelConfig &config);

  /** Run the model.
   *
   * @param x A int64 tensor of shape (1, num_tokens)
  // @param sid Speaker ID. Used only for multi-speaker models, e.g., models
  //            trained using the VCTK dataset. It is not used for
  //            single-speaker models, e.g., models trained using the ljspeech
  //            dataset.
   * @return Return a float32 tensor containing audio samples. You can flatten
   *         it to a 1-D tensor.
   */
  Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0);

  // This is for MeloTTS
  Ort::Value Run(Ort::Value x, Ort::Value tones, int64_t sid = 0,
                 float speed = 1.0) const;

  const OfflineTtsVitsModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_VITS_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-zipvoice-impl.h
================================================
// sherpa-onnx/csrc/offline-tts-zipvoice-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_IMPL_H_

#include <algorithm>
#include <cmath>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "kaldi-native-fbank/csrc/mel-computations.h"
#include "kaldi-native-fbank/csrc/stft.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/matcha-tts-lexicon.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-impl.h"
#include "sherpa-onnx/csrc/offline-tts-zipvoice-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-zipvoice-model.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/vocoder.h"

namespace sherpa_onnx {

class OfflineTtsZipvoiceImpl : public OfflineTtsImpl {
 public:
  explicit OfflineTtsZipvoiceImpl(const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsZipvoiceModel>(config.model)),
        vocoder_(Vocoder::Create(config.model)) {
    InitFrontend();

    PostInit();
  }

  template <typename Manager>
  OfflineTtsZipvoiceImpl(Manager *mgr, const OfflineTtsConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineTtsZipvoiceModel>(mgr, config.model)),
        vocoder_(Vocoder::Create(mgr, config.model)) {
    InitFrontend(mgr);

    PostInit();
  }

  int32_t SampleRate() const override {
    return model_->GetMetaData().sample_rate;
  }

  GeneratedAudio Generate(
      const std::string &text, const GenerationConfig &config,
      GeneratedAudioCallback callback = nullptr) const override {
    // Supported extra options in config.extra:
    //   - "speed" (float): Speech speed factor (default: 1.0)
    //   - "num_steps" (int): Number of flow-matching steps (default: 4)
    //   - "max_char_in_sentence" (int): Max characters per chunk (default: 200)
    //   - "min_char_in_sentence" (int): Merge shorter chunks until this size
    //     (default: 30)
    //   - "feat_scale" (float): Prompt mel log scaling factor (default:
    //     config.model.zipvoice.feat_scale)
    //   - "t_shift" (float): Timestep shift used by the decoder schedule
    //     (default: config.model.zipvoice.t_shift)
    //   - "target_rms" (float): Prompt RMS normalization target (default:
    //     config.model.zipvoice.target_rms)
    //   - "guidance_scale" (float): Classifier-free guidance scale for the
    //     decoder (default: config.model.zipvoice.guidance_scale)
    if (config_.model.debug) {
      SHERPA_ONNX_LOGE("%s", config.ToString().c_str());
    }

    if (config.reference_sample_rate <= 0) {
      SHERPA_ONNX_LOGE("reference_sample_rate %d is invalid.",
                       config.reference_sample_rate);
      return {};
    }

    if (config.reference_audio.empty()) {
      SHERPA_ONNX_LOGE("reference_audio is empty.");
      return {};
    }

    if (config.reference_text.empty()) {
      SHERPA_ONNX_LOGE("reference_text is empty.");
      return {};
    }

    float speed =
        config.GetExtraFloat("speed", config.speed > 0 ? config.speed : 1.0f);
    if (speed <= 0) {
      SHERPA_ONNX_LOGE("Speed must be > 0. Given: %f", speed);
      return {};
    }

    int32_t num_steps = config.GetExtraInt(
        "num_steps", config.num_steps > 0 ? config.num_steps : 4);
    if (num_steps <= 0) {
      SHERPA_ONNX_LOGE("Num steps must be > 0. Given: %d", num_steps);
      return {};
    }

    float feat_scale =
        config.GetExtraFloat("feat_scale", config_.model.zipvoice.feat_scale);
    if (feat_scale <= 0) {
      SHERPA_ONNX_LOGE("feat_scale must be > 0. Given: %f", feat_scale);
      return {};
    }

    float t_shift =
        config.GetExtraFloat("t_shift", config_.model.zipvoice.t_shift);
    if (t_shift < 0) {
      SHERPA_ONNX_LOGE("t_shift must be >= 0. Given: %f", t_shift);
      return {};
    }

    float target_rms =
        config.GetExtraFloat("target_rms", config_.model.zipvoice.target_rms);
    if (target_rms <= 0) {
      SHERPA_ONNX_LOGE("target_rms must be > 0. Given: %f", target_rms);
      return {};
    }

    float guidance_scale = config.GetExtraFloat(
        "guidance_scale", config_.model.zipvoice.guidance_scale);
    if (guidance_scale <= 0) {
      SHERPA_ONNX_LOGE("guidance_scale must be > 0. Given: %f", guidance_scale);
      return {};
    }

    std::vector<TokenIDs> prompt_token_ids =
        frontend_->ConvertTextToTokenIds(config.reference_text);
    if (prompt_token_ids.empty() ||
        (prompt_token_ids.size() == 1 && prompt_token_ids[0].tokens.empty())) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Failed to convert prompt text '%{public}s' to token IDs",
          config.reference_text.c_str());
#else
      SHERPA_ONNX_LOGE("Failed to convert prompt text '%s' to token IDs",
                       config.reference_text.c_str());
#endif
      return {};
    }

    std::vector<int64_t> prompt_tokens;
    for (const auto &t : prompt_token_ids) {
      prompt_tokens.insert(prompt_tokens.end(), t.tokens.begin(),
                           t.tokens.end());
    }

    std::vector<float> prompt_features = ComputePromptFeatures(
        config.reference_audio, config.reference_sample_rate, feat_scale,
        target_rms);
    if (prompt_features.empty()) {
      SHERPA_ONNX_LOGE("No frames extracted from the prompt audio");
      return {};
    }

    auto sentences = SplitByPunctuation(text);
    if (sentences.empty()) {
      return {};
    }

    int32_t max_char_in_sentence =
        config.GetExtraInt("max_char_in_sentence", 200);
    int32_t min_char_in_sentence =
        config.GetExtraInt("min_char_in_sentence", 30);

    if (max_char_in_sentence <= 0) {
      SHERPA_ONNX_LOGE("max_char_in_sentence must be > 0. Given: %d",
                       max_char_in_sentence);
      return {};
    }

    if (min_char_in_sentence <= 0) {
      SHERPA_ONNX_LOGE("min_char_in_sentence must be > 0. Given: %d",
                       min_char_in_sentence);
      return {};
    }

    sentences = MergeShortSentences(sentences, min_char_in_sentence);

    std::vector<std::string> final_chunks;
    for (const auto &s : sentences) {
      auto pieces = SplitLongSentence(s, max_char_in_sentence);
      final_chunks.insert(final_chunks.end(), pieces.begin(), pieces.end());
    }

    sentences = std::move(final_chunks);
    if (sentences.empty()) {
      return {};
    }

    GeneratedAudio result;
    result.sample_rate = SampleRate();

    const int32_t total = static_cast<int32_t>(sentences.size());

    for (int32_t i = 0; i < total; ++i) {
      if (config_.model.debug) {
#if __OHOS__
        SHERPA_ONNX_LOGE("Processing %{public}d/%{public}d: %{public}s", i + 1,
                         total, sentences[i].c_str());
#else
        SHERPA_ONNX_LOGE("Processing %d/%d: %s", i + 1, total,
                         sentences[i].c_str());
#endif
      }

      GeneratedAudio cur = GenerateChunk(
          sentences[i], prompt_tokens, prompt_features, speed, num_steps,
          feat_scale, t_shift, guidance_scale);

      if (cur.samples.empty()) {
        continue;
      }

      result.samples.insert(result.samples.end(), cur.samples.begin(),
                            cur.samples.end());

      if (callback) {
        if (!callback(cur.samples.data(),
                      static_cast<int32_t>(cur.samples.size()),
                      (i + 1) * 1.0f / total)) {
          break;
        }
      }
    }

    if (config.silence_scale != 1) {
      result = result.ScaleSilence(config.silence_scale);
    }

    return result;
  }

  GeneratedAudio Generate(
      const std::string &text, const std::string &prompt_text,
      const std::vector<float> &prompt_samples, int32_t sample_rate,
      float speed, int32_t num_steps,
      GeneratedAudioCallback callback = nullptr) const override {
    GenerationConfig config;
    config.speed = speed;
    config.num_steps = num_steps;
    config.reference_text = prompt_text;
    config.reference_audio = prompt_samples;
    config.reference_sample_rate = sample_rate;
    return Generate(text, config, std::move(callback));
  }

 private:
  void PostInit() { InitMelBanks(); }

  void InitMelBanks() {
    const auto &meta = model_->GetMetaData();
    int32_t sample_rate = meta.sample_rate;
    int32_t n_fft = meta.n_fft;
    int32_t hop_length = meta.hop_length;
    int32_t win_length = meta.window_length;
    int32_t num_mels = meta.num_mels;

    knf::FrameExtractionOptions frame_opts;
    frame_opts.samp_freq = sample_rate;
    frame_opts.frame_length_ms = win_length * 1000 / sample_rate;
    frame_opts.frame_shift_ms = hop_length * 1000 / sample_rate;
    frame_opts.window_type = "hanning";

    knf::MelBanksOptions mel_opts;
    mel_opts.num_bins = num_mels;
    mel_opts.low_freq = 0;
    mel_opts.high_freq = sample_rate / 2;
    mel_opts.is_librosa = true;
    mel_opts.use_slaney_mel_scale = false;
    mel_opts.norm = "";

    mel_banks_ = std::make_unique<knf::MelBanks>(mel_opts, frame_opts, 1.0f);
  }

  template <typename Manager>
  void InitFrontend(Manager *mgr) {
    frontend_ = std::make_unique<MatchaTtsLexicon>(
        mgr, config_.model.zipvoice.lexicon, config_.model.zipvoice.tokens,
        config_.model.zipvoice.data_dir, config_.model.debug, true);
  }

  void InitFrontend() {
    frontend_ = std::make_unique<MatchaTtsLexicon>(
        config_.model.zipvoice.lexicon, config_.model.zipvoice.tokens,
        config_.model.zipvoice.data_dir, config_.model.debug, true);
  }

  void ComputeMelSpectrogram(const std::vector<float> &_samples,
                             int32_t sample_rate, float feat_scale,
                             std::vector<float> *prompt_features) const {
    const auto &meta = model_->GetMetaData();
    if (sample_rate != meta.sample_rate) {
      SHERPA_ONNX_LOGE(
          "Creating a resampler:\n"
          "   in_sample_rate: %d\n"
          "   output_sample_rate: %d\n",
          sample_rate, static_cast<int32_t>(meta.sample_rate));

      float min_freq = std::min<int32_t>(sample_rate, meta.sample_rate);
      float lowpass_cutoff = 0.99 * 0.5 * min_freq;

      int32_t lowpass_filter_width = 6;
      auto resampler = std::make_unique<LinearResample>(
          sample_rate, meta.sample_rate, lowpass_cutoff, lowpass_filter_width);
      std::vector<float> samples;
      resampler->Resample(_samples.data(), _samples.size(), true, &samples);
      ComputeMelSpectrogram(samples, feat_scale, prompt_features);
      return;
    }

    ComputeMelSpectrogram(_samples, feat_scale, prompt_features);
  }

  void ComputeMelSpectrogram(const std::vector<float> &samples,
                             float feat_scale,
                             std::vector<float> *prompt_features) const {
    const auto &meta = model_->GetMetaData();

    int32_t n_fft = meta.n_fft;
    int32_t hop_length = meta.hop_length;
    int32_t win_length = meta.window_length;
    int32_t num_mels = meta.num_mels;

    knf::StftConfig stft_config;
    stft_config.n_fft = n_fft;
    stft_config.hop_length = hop_length;
    stft_config.win_length = win_length;
    stft_config.window_type = "hann";
    stft_config.center = true;

    knf::Stft stft(stft_config);
    auto stft_result = stft.Compute(samples.data(), samples.size());
    int32_t num_frames = stft_result.num_frames;
    int32_t fft_bins = n_fft / 2 + 1;

    prompt_features->resize(num_frames * num_mels);
    float *p = prompt_features->data();

    std::vector<float> magnitude_spectrum(fft_bins);

    for (int32_t i = 0; i < num_frames; ++i, p += num_mels) {
      for (int32_t k = 0; k < fft_bins; ++k) {
        float real = stft_result.real[i * fft_bins + k];
        float imag = stft_result.imag[i * fft_bins + k];
        magnitude_spectrum[k] = std::sqrt(real * real + imag * imag);
      }

      mel_banks_->Compute(magnitude_spectrum.data(), p);

      for (int32_t j = 0; j < num_mels; ++j) {
        p[j] = std::log(p[j] + 1e-10f) * feat_scale;
      }
    }
  }

  GeneratedAudio GenerateChunk(const std::string &text,
                               const std::vector<int64_t> &prompt_tokens,
                               const std::vector<float> &prompt_features,
                               float speed, int32_t num_steps, float feat_scale,
                               float t_shift, float guidance_scale) const {
    std::vector<TokenIDs> text_token_ids =
        frontend_->ConvertTextToTokenIds(text);

    if (text_token_ids.empty() ||
        (text_token_ids.size() == 1 && text_token_ids[0].tokens.empty())) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Failed to convert '%{public}s' to token IDs",
                       text.c_str());
#else
      SHERPA_ONNX_LOGE("Failed to convert '%s' to token IDs", text.c_str());
#endif
      return {};
    }

    std::vector<int64_t> tokens;
    for (const auto &t : text_token_ids) {
      tokens.insert(tokens.end(), t.tokens.begin(), t.tokens.end());
    }

    return Process(tokens, prompt_tokens, prompt_features, speed, num_steps,
                   feat_scale, t_shift, guidance_scale);
  }

  std::vector<float> ComputePromptFeatures(
      const std::vector<float> &prompt_samples, int32_t sample_rate,
      float feat_scale, float target_rms) const {
    std::vector<float> prompt_samples_scaled = prompt_samples;
    double prompt_rms = 0.0;
    double sum_sq = 0.0;
    for (float s : prompt_samples_scaled) {
      sum_sq += s * s;
    }
    prompt_rms = std::sqrt(sum_sq / prompt_samples_scaled.size());
    if (prompt_rms < target_rms && prompt_rms > 0.0f) {
      float scale = target_rms / prompt_rms;
      for (auto &s : prompt_samples_scaled) {
        s *= scale;
      }
    }

    std::vector<float> prompt_features;
    ComputeMelSpectrogram(prompt_samples_scaled, sample_rate, feat_scale,
                          &prompt_features);

    return prompt_features;
  }

  GeneratedAudio Process(const std::vector<int64_t> &tokens,
                         const std::vector<int64_t> &prompt_tokens,
                         const std::vector<float> &prompt_features, float speed,
                         int32_t num_steps, float feat_scale, float t_shift,
                         float guidance_scale) const {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> tokens_shape = {1,
                                           static_cast<int64_t>(tokens.size())};

    Ort::Value tokens_tensor = Ort::Value::CreateTensor(
        memory_info, const_cast<int64_t *>(tokens.data()), tokens.size(),
        tokens_shape.data(), tokens_shape.size());

    std::array<int64_t, 2> prompt_tokens_shape = {
        1, static_cast<int64_t>(prompt_tokens.size())};

    Ort::Value prompt_tokens_tensor = Ort::Value::CreateTensor(
        memory_info, const_cast<int64_t *>(prompt_tokens.data()),
        prompt_tokens.size(), prompt_tokens_shape.data(),
        prompt_tokens_shape.size());

    int32_t mel_dim = model_->GetMetaData().num_mels;

    int32_t num_frames = prompt_features.size() / mel_dim;

    std::array<int64_t, 3> shape = {1, num_frames, mel_dim};
    auto prompt_features_tensor = Ort::Value::CreateTensor(
        memory_info, const_cast<float *>(prompt_features.data()),
        prompt_features.size(), shape.data(), shape.size());

    Ort::Value mel =
        model_->Run(std::move(tokens_tensor), std::move(prompt_tokens_tensor),
                    std::move(prompt_features_tensor), speed, num_steps,
                    t_shift, guidance_scale);

    // Assume mel_shape = {1, T, C}
    std::vector<int64_t> mel_shape = mel.GetTensorTypeAndShapeInfo().GetShape();
    int64_t T = mel_shape[1];
    int64_t C = mel_shape[2];

    const float *mel_data = mel.GetTensorData<float>();

    float inv_feat_scale = 1 / feat_scale;

    // mel_permuted is (C, T)
    std::vector<float> mel_permuted = Transpose(mel_data, T, C);

    Scale(mel_permuted.data(), inv_feat_scale, mel_permuted.size(),
          mel_permuted.data());

    std::array<int64_t, 3> new_shape = {1, C, T};
    Ort::Value mel_new = Ort::Value::CreateTensor<float>(
        memory_info, mel_permuted.data(), mel_permuted.size(), new_shape.data(),
        new_shape.size());

    GeneratedAudio ans;
    ans.samples = vocoder_->Run(std::move(mel_new));
    ans.sample_rate = model_->GetMetaData().sample_rate;
    return ans;
  }

 private:
  OfflineTtsConfig config_;
  std::unique_ptr<OfflineTtsZipvoiceModel> model_;
  std::unique_ptr<Vocoder> vocoder_;
  std::unique_ptr<OfflineTtsFrontend> frontend_;

  std::unique_ptr<knf::MelBanks> mel_banks_;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-zipvoice-model-config.cc
================================================
// sherpa-onnx/csrc/offline-tts-zipvoice-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-zipvoice-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineTtsZipvoiceModelConfig::Register(ParseOptions *po) {
  po->Register("zipvoice-tokens", &tokens,
               "Path to tokens.txt for ZipVoice models");
  po->Register("zipvoice-data-dir", &data_dir,
               "Path to the directory containing dict for espeak-ng.");
  po->Register("zipvoice-lexicon", &lexicon, "Path to lexicon.txt for Chinese");
  po->Register("zipvoice-encoder", &encoder, "Path to zipvoice text model");
  po->Register("zipvoice-decoder", &decoder,
               "Path to zipvoice flow-matching decoder model");
  po->Register("zipvoice-vocoder", &vocoder, "Path to zipvoice vocoder");
  po->Register("zipvoice-feat-scale", &feat_scale,
               "Feature scale for ZipVoice (default: 0.1)");
  po->Register("zipvoice-t-shift", &t_shift,
               "Shift t to smaller ones if t_shift < 1.0 (default: 0.5)");
  po->Register(
      "zipvoice-target-rms", &target_rms,
      "Target speech normalization rms value for ZipVoice (default: 0.1)");
  po->Register(
      "zipvoice-guidance-scale", &guidance_scale,
      "The scale of classifier-free guidance during inference for ZipVoice "
      "(default: 1.0)");
}

bool OfflineTtsZipvoiceModelConfig::Validate() const {
  if (tokens.empty()) {
    SHERPA_ONNX_LOGE("Please provide --zipvoice-tokens");
    return false;
  }
  if (!FileExists(tokens)) {
    SHERPA_ONNX_LOGE("--zipvoice-tokens: '%s' does not exist", tokens.c_str());
    return false;
  }

  if (encoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --zipvoice-encoder");
    return false;
  }
  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("--zipvoice-encoder: '%s' does not exist",
                     encoder.c_str());
    return false;
  }

  if (decoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --zipvoice-decoder");
    return false;
  }
  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("--zipvoice-decoder: '%s' does not exist",
                     decoder.c_str());
    return false;
  }

  if (vocoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --zipvoice-vocoder");
    return false;
  }

  if (!FileExists(vocoder)) {
    SHERPA_ONNX_LOGE("--zipvoice-vocoder: '%s' does not exist",
                     vocoder.c_str());
    return false;
  }

  if (!data_dir.empty()) {
    std::vector<std::string> required_files = {
        "phontab",
        "phonindex",
        "phondata",
        "intonations",
    };
    for (const auto &f : required_files) {
      if (!FileExists(data_dir + "/" + f)) {
        SHERPA_ONNX_LOGE(
            "'%s/%s' does not exist. Please check zipvoice-data-dir",
            data_dir.c_str(), f.c_str());
        return false;
      }
    }
  }

  if (feat_scale <= 0) {
    SHERPA_ONNX_LOGE("--zipvoice-feat-scale must be positive. Given: %f",
                     feat_scale);
    return false;
  }

  if (t_shift < 0) {
    SHERPA_ONNX_LOGE("--zipvoice-t-shift must be non-negative. Given: %f",
                     t_shift);
    return false;
  }

  if (target_rms <= 0) {
    SHERPA_ONNX_LOGE("--zipvoice-target-rms must be positive. Given: %f",
                     target_rms);
    return false;
  }

  if (guidance_scale <= 0) {
    SHERPA_ONNX_LOGE("--zipvoice-guidance-scale must be positive. Given: %f",
                     guidance_scale);
    return false;
  }

  return true;
}

std::string OfflineTtsZipvoiceModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTtsZipvoiceModelConfig(";
  os << "tokens=\"" << tokens << "\", ";
  os << "encoder=\"" << encoder << "\", ";
  os << "decoder=\"" << decoder << "\", ";
  os << "vocoder=\"" << vocoder << "\", ";
  os << "data_dir=\"" << data_dir << "\", ";
  os << "lexicon=\"" << lexicon << "\", ";
  os << "feat_scale=" << feat_scale << ", ";
  os << "t_shift=" << t_shift << ", ";
  os << "target_rms=" << target_rms << ", ";
  os << "guidance_scale=" << guidance_scale << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-zipvoice-model-config.h
================================================
// sherpa-onnx/csrc/offline-tts-zipvoice-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_

#include <cstdint>
#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsZipvoiceModelConfig {
  std::string tokens;
  std::string encoder;
  std::string decoder;
  std::string vocoder;

  std::string data_dir;
  std::string lexicon;

  float feat_scale = 0.1;
  float t_shift = 0.5;
  float target_rms = 0.1;
  float guidance_scale = 1.0;

  OfflineTtsZipvoiceModelConfig() = default;

  OfflineTtsZipvoiceModelConfig(
      const std::string &tokens, const std::string &encoder,
      const std::string &decoder, const std::string &vocoder,
      const std::string &data_dir, const std::string &lexicon,
      float feat_scale = 0.1, float t_shift = 0.5, float target_rms = 0.1,
      float guidance_scale = 1.0)
      : tokens(tokens),
        encoder(encoder),
        decoder(decoder),
        vocoder(vocoder),
        data_dir(data_dir),
        lexicon(lexicon),
        feat_scale(feat_scale),
        t_shift(t_shift),
        target_rms(target_rms),
        guidance_scale(guidance_scale) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-zipvoice-model-meta-data.h
================================================
// sherpa-onnx/csrc/offline-tts-zipvoice-model-meta-data.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_META_DATA_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

// If you are not sure what each field means, please
// have a look of the Python file in the model directory that
// you have downloaded.
struct OfflineTtsZipvoiceModelMetaData {
  int32_t version = 1;
  int32_t feat_dim = 100;
  int32_t sample_rate = 24000;
  int32_t n_fft = 1024;
  int32_t hop_length = 256;
  int32_t window_length = 1024;
  int32_t num_mels = 100;
  int32_t use_espeak = 1;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts-zipvoice-model.cc
================================================
// sherpa-onnx/csrc/offline-tts-zipvoice-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts-zipvoice-model.h"

#include <algorithm>
#include <cstring>
#include <iostream>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/normal-data-generator.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineTtsZipvoiceModel::Impl {
 public:
  explicit Impl(const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config.zipvoice.encoder);
    InitEncoder(buf.data(), buf.size());

    buf = ReadFile(config.zipvoice.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config.zipvoice.encoder);
    InitEncoder(buf.data(), buf.size());

    buf = ReadFile(mgr, config.zipvoice.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  const OfflineTtsZipvoiceModelMetaData &GetMetaData() const {
    return meta_data_;
  }

  Ort::Value Run(Ort::Value tokens, Ort::Value prompt_tokens,
                 Ort::Value prompt_features, float speed, int32_t num_steps,
                 float t_shift, float guidance_scale) {
    std::vector<int64_t> tokens_shape =
        tokens.GetTensorTypeAndShapeInfo().GetShape();

    int64_t batch_size = tokens_shape[0];

    std::vector<int64_t> prompt_feat_shape =
        prompt_features.GetTensorTypeAndShapeInfo().GetShape();

    int64_t prompt_feat_len = prompt_feat_shape[1];

    Ort::Value text_condition =
        RunEncoder(std::move(tokens), std::move(prompt_tokens),
                   View(&prompt_features), speed);

    std::vector<int64_t> text_cond_shape =
        text_condition.GetTensorTypeAndShapeInfo().GetShape();
    int64_t num_frames = text_cond_shape[1];

    int64_t feat_dim = meta_data_.feat_dim;

    std::vector<float> x_data(batch_size * num_frames * feat_dim);

    normal_gen_.Fill(x_data.data(), x_data.size());

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> x_shape = {batch_size, num_frames, feat_dim};
    Ort::Value x = Ort::Value::CreateTensor<float>(
        memory_info, x_data.data(), x_data.size(), x_shape.data(),
        x_shape.size());

    std::vector<float> speech_cond_data(batch_size * num_frames * feat_dim);
    const float *src = prompt_features.GetTensorData<float>();
    float *dst = speech_cond_data.data();
    std::copy(src, src + batch_size * prompt_feat_len * feat_dim, dst);
    prompt_features = Ort::Value{nullptr};

    std::vector<int64_t> speech_cond_shape = {batch_size, num_frames, feat_dim};

    Ort::Value speech_condition = Ort::Value::CreateTensor<float>(
        memory_info, speech_cond_data.data(), speech_cond_data.size(),
        speech_cond_shape.data(), speech_cond_shape.size());

    std::vector<float> timesteps(num_steps + 1);
    for (int32_t i = 0; i <= num_steps; ++i) {
      float t = static_cast<float>(i) / num_steps;
      timesteps[i] = t_shift * t / (1.0f + (t_shift - 1.0f) * t);
    }

    int64_t guidance_scale_shape = 1;
    Ort::Value guidance_scale_tensor = Ort::Value::CreateTensor<float>(
        memory_info, &guidance_scale, 1, &guidance_scale_shape, 1);

    float *x_ptr = x.GetTensorMutableData<float>();

    int64_t N = batch_size * num_frames * feat_dim;

    for (int32_t step = 0; step < num_steps; ++step) {
      float t = timesteps[step];

      Ort::Value v =
          RunDecoder(t, View(&x), View(&text_condition),
                     View(&speech_condition), View(&guidance_scale_tensor));

      float delta_t = timesteps[step + 1] - timesteps[step];

      const float *v_ptr = v.GetTensorData<float>();
      for (int64_t i = 0; i < N; ++i) {
        x_ptr[i] += v_ptr[i] * delta_t;
      }
    }

    int64_t kept_frames = num_frames - prompt_feat_len;

    std::vector<int64_t> out_shape = {batch_size, kept_frames, feat_dim};

    Ort::Value ans = Ort::Value::CreateTensor<float>(
        allocator_, out_shape.data(), out_shape.size());

    float *p_out = ans.GetTensorMutableData<float>();

    for (int64_t b = 0; b < batch_size; ++b) {
      auto begin = x_ptr + (b * num_frames + prompt_feat_len) * feat_dim;
      auto end = begin + kept_frames * feat_dim;
      std::copy(begin, end, p_out);
      p_out += kept_frames * feat_dim;
    }

    return ans;
  }

 private:
  void InitEncoder(void *encoder_data, size_t encoder_data_length) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, encoder_data, encoder_data_length, sess_opts_);
    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_names_ptr_);
    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.use_espeak, "use_espeak",
                                            1);

    if (config_.debug) {
      std::ostringstream os;

      os << "---encoder---\n";
      Ort::ModelMetadata text_meta_data = encoder_sess_->GetModelMetadata();
      PrintModelMetadata(os, text_meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : encoder_input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : encoder_output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
  }

  void InitDecoder(void *decoder_data, size_t decoder_data_length) {
    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, decoder_data, decoder_data_length, sess_opts_);
    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);
    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    auto meta_data = decoder_sess_->GetModelMetadata();

    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.feat_dim, "feat_dim",
                                            100);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.sample_rate,
                                            "sample_rate", 24000);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.n_fft, "n_fft", 1024);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.hop_length, "hop_length",
                                            256);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.window_length,
                                            "window_length", 1024);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.num_mels, "num_mels",
                                            100);

    if (config_.debug) {
      std::ostringstream os;

      os << "---decoder---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : decoder_input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : decoder_output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
  }

  Ort::Value RunEncoder(Ort::Value tokens, Ort::Value prompt_tokens,
                        Ort::Value prompt_features, float speed) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::vector<int64_t> tokens_shape =
        tokens.GetTensorTypeAndShapeInfo().GetShape();

    int64_t batch_size = tokens_shape[0];
    if (batch_size != 1) {
      SHERPA_ONNX_LOGE("Support only batch_size == 1. Given: %d",
                       static_cast<int32_t>(batch_size));
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int64_t> prompt_feat_shape =
        prompt_features.GetTensorTypeAndShapeInfo().GetShape();

    int64_t prompt_feat_len = prompt_feat_shape[1];
    int64_t prompt_feat_len_shape = 1;
    Ort::Value prompt_feat_len_tensor = Ort::Value::CreateTensor<int64_t>(
        memory_info, &prompt_feat_len, 1, &prompt_feat_len_shape, 1);

    int64_t speed_shape = 1;
    Ort::Value speed_tensor = Ort::Value::CreateTensor<float>(
        memory_info, &speed, 1, &speed_shape, 1);

    std::vector<Ort::Value> encoder_inputs;
    encoder_inputs.reserve(4);
    encoder_inputs.push_back(std::move(tokens));
    encoder_inputs.push_back(std::move(prompt_tokens));
    encoder_inputs.push_back(std::move(prompt_feat_len_tensor));
    encoder_inputs.push_back(std::move(speed_tensor));

    auto encoder_out = encoder_sess_->Run(
        {}, encoder_names_ptr_.data(), encoder_inputs.data(),
        encoder_inputs.size(), encoder_output_names_ptr_.data(),
        encoder_output_names_ptr_.size());

    return std::move(encoder_out[0]);
  }

  Ort::Value RunDecoder(float t, Ort::Value x, Ort::Value text_condition,
                        Ort::Value speech_condition,
                        Ort::Value guidance_scale_tensor) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int64_t t_shape = 1;
    Ort::Value t_tensor =
        Ort::Value::CreateTensor<float>(memory_info, &t, 1, &t_shape, 1);

    std::vector<Ort::Value> decoder_inputs;
    decoder_inputs.reserve(5);
    decoder_inputs.emplace_back(std::move(t_tensor));
    decoder_inputs.push_back(std::move(x));
    decoder_inputs.push_back(std::move(text_condition));
    decoder_inputs.push_back(std::move(speech_condition));
    decoder_inputs.push_back(std::move(guidance_scale_tensor));

    auto decoder_out = decoder_sess_->Run(
        {}, decoder_input_names_ptr_.data(), decoder_inputs.data(),
        decoder_inputs.size(), decoder_output_names_ptr_.data(),
        decoder_output_names_ptr_.size());

    return std::move(decoder_out[0]);
  }

 private:
  OfflineTtsModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  OfflineTtsZipvoiceModelMetaData meta_data_;
  NormalDataGenerator normal_gen_;
};

OfflineTtsZipvoiceModel::OfflineTtsZipvoiceModel(
    const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineTtsZipvoiceModel::OfflineTtsZipvoiceModel(
    Manager *mgr, const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineTtsZipvoiceModel::~OfflineTtsZipvoiceModel() = default;

const OfflineTtsZipvoiceModelMetaData &OfflineTtsZipvoiceModel::GetMetaData()
    const {
  return impl_->GetMetaData();
}

Ort::Value OfflineTtsZipvoiceModel::Run(Ort::Value tokens,
                                        Ort::Value prompt_tokens,
                                        Ort::Value prompt_features,
                                        float speed /*= 1.0*/,
                                        int32_t num_steps /*= 16*/,
                                        float t_shift /*= 0.5f*/,
                                        float guidance_scale /*= 1.0f*/) const {
  return impl_->Run(std::move(tokens), std::move(prompt_tokens),
                    std::move(prompt_features), speed, num_steps, t_shift,
                    guidance_scale);
}

#if __ANDROID_API__ >= 9
template OfflineTtsZipvoiceModel::OfflineTtsZipvoiceModel(
    AAssetManager *mgr, const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template OfflineTtsZipvoiceModel::OfflineTtsZipvoiceModel(
    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts-zipvoice-model.h
================================================
// sherpa-onnx/csrc/offline-tts-zipvoice-model.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_H_

#include <memory>
#include <string>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-model-config.h"
#include "sherpa-onnx/csrc/offline-tts-zipvoice-model-meta-data.h"

namespace sherpa_onnx {

class OfflineTtsZipvoiceModel {
 public:
  ~OfflineTtsZipvoiceModel();

  explicit OfflineTtsZipvoiceModel(const OfflineTtsModelConfig &config);

  template <typename Manager>
  OfflineTtsZipvoiceModel(Manager *mgr, const OfflineTtsModelConfig &config);

  // Return a float32 tensor containing the mel
  // of shape (batch_size, mel_dim, num_frames)
  Ort::Value Run(Ort::Value tokens, Ort::Value prompt_tokens,
                 Ort::Value prompt_features, float speed, int32_t num_steps,
                 float t_shift = 0.5f,
                 float guidance_scale = 1.0f) const;

  const OfflineTtsZipvoiceModelMetaData &GetMetaData() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-tts.cc
================================================
// sherpa-onnx/csrc/offline-tts.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts.h"

#include <cmath>
#include <map>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-tts-impl.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

struct SilenceInterval {
  int32_t start;
  int32_t end;
};

GeneratedAudio GeneratedAudio::ScaleSilence(float scale) const {
  if (scale == 1) {
    return *this;
  }
  // if the interval is larger than 0.2 second, then we assume it is a pause
  int32_t threshold = static_cast<int32_t>(sample_rate * 0.2);

  std::vector<SilenceInterval> intervals;
  int32_t num_samples = static_cast<int32_t>(samples.size());

  int32_t last = -1;
  int32_t i;
  for (i = 0; i != num_samples; ++i) {
    if (fabs(samples[i]) <= 0.01) {
      if (last == -1) {
        last = i;
      }
      continue;
    }

    if (last != -1 && i - last < threshold) {
      last = -1;
      continue;
    }

    if (last != -1) {
      intervals.push_back({last, i});
      last = -1;
    }
  }

  if (last != -1 && num_samples - last > threshold) {
    intervals.push_back({last, num_samples});
  }

  if (intervals.empty()) {
    return *this;
  }

  GeneratedAudio ans;
  ans.sample_rate = sample_rate;
  ans.samples.reserve(samples.size());

  i = 0;
  for (const auto &interval : intervals) {
    ans.samples.insert(ans.samples.end(), samples.begin() + i,
                       samples.begin() + interval.start);
    i = interval.end;
    int32_t n = static_cast<int32_t>((interval.end - interval.start) * scale);

    ans.samples.insert(ans.samples.end(), samples.begin() + interval.start,
                       samples.begin() + interval.start + n);
  }

  if (i < num_samples) {
    ans.samples.insert(ans.samples.end(), samples.begin() + i, samples.end());
  }

  return ans;
}

std::string GenerationConfig::GetExtraString(
    const std::string &key, const std::string &def /*= ""*/) const {
  auto it = extra.find(key);
  return it == extra.end() ? def : it->second;
}

int32_t GenerationConfig::GetExtraInt(const std::string &key,
                                      int32_t def) const {
  auto it = extra.find(key);
  if (it == extra.end()) {
    return def;
  }

  return ToIntOrDefault(it->second, def);
}

float GenerationConfig::GetExtraFloat(const std::string &key, float def) const {
  auto it = extra.find(key);
  if (it == extra.end()) {
    return def;
  }

  return ToFloatOrDefault(it->second, def);
}

std::string GenerationConfig::ToString() const {
  std::ostringstream os;

  os << "GenerationConfig(";
  os << "silence_scale=" << silence_scale;
  os << ", speed=" << speed;
  os << ", sid=" << sid;
  os << ", num_steps=" << num_steps;
  os << ", reference_audio_len=" << reference_audio.size();
  os << ", reference_sample_rate=" << reference_sample_rate;

  if (!reference_text.empty()) {
    os << ", reference_text=\"" << reference_text << "\"";
  }

  if (!extra.empty()) {
    os << ", extra={";
    std::string sep;

    std::map<std::string, std::string> sorted(extra.begin(), extra.end());

    for (const auto &kv : sorted) {
      os << sep << kv.first << ": \"" << kv.second << "\"";
      sep = ", ";
    }
    os << "}";
  }

  os << ")";
  return os.str();
}

void OfflineTtsConfig::Register(ParseOptions *po) {
  model.Register(po);

  po->Register("tts-rule-fsts", &rule_fsts,
               "It not empty, it contains a list of rule FST filenames."
               "Multiple filenames are separated by a comma and they are "
               "applied from left to right. An example value: "
               "rule1.fst,rule2.fst,rule3.fst");

  po->Register("tts-rule-fars", &rule_fars,
               "It not empty, it contains a list of rule FST archive filenames."
               "Multiple filenames are separated by a comma and they are "
               "applied from left to right. An example value: "
               "rule1.far,rule2.far,rule3.far. Note that an *.far can contain "
               "multiple *.fst files");

  po->Register(
      "tts-max-num-sentences", &max_num_sentences,
      "Maximum number of sentences that we process at a time. "
      "This is to avoid OOM for very long input text. "
      "If you set it to -1, then we process all sentences in a single batch.");

  po->Register("tts-silence-scale", &silence_scale,
               "Duration of the pause is scaled by this number. So a smaller "
               "value leads to a shorter pause.");
}

bool OfflineTtsConfig::Validate() const {
  if (!rule_fsts.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(rule_fsts, ",", false, &files);
    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE("Rule fst '%s' does not exist. ", f.c_str());
        return false;
      }
    }
  }

  if (!rule_fars.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(rule_fars, ",", false, &files);
    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE("Rule far '%s' does not exist. ", f.c_str());
        return false;
      }
    }
  }

  if (silence_scale < 0.001) {
    SHERPA_ONNX_LOGE("--tts-silence-scale '%.3f' is too small", silence_scale);
    return false;
  }

  return model.Validate();
}

std::string OfflineTtsConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineTtsConfig(";
  os << "model=" << model.ToString() << ", ";
  os << "rule_fsts=\"" << rule_fsts << "\", ";
  os << "rule_fars=\"" << rule_fars << "\", ";
  os << "max_num_sentences=" << max_num_sentences << ", ";
  os << "silence_scale=" << silence_scale << ")";

  return os.str();
}

OfflineTts::OfflineTts(const OfflineTtsConfig &config)
    : impl_(OfflineTtsImpl::Create(config)) {}

template <typename Manager>
OfflineTts::OfflineTts(Manager *mgr, const OfflineTtsConfig &config)
    : impl_(OfflineTtsImpl::Create(mgr, config)) {}

OfflineTts::~OfflineTts() = default;

GeneratedAudio OfflineTts::Generate(
    const std::string &text, int64_t sid /*=0*/, float speed /*= 1.0*/,
    GeneratedAudioCallback callback /*= nullptr*/) const {
#if !defined(_WIN32)
  return impl_->Generate(text, sid, speed, std::move(callback));
#else
  if (IsUtf8(text)) {
    return impl_->Generate(text, sid, speed, std::move(callback));
  } else if (IsGB2312(text)) {
    auto utf8_text = Gb2312ToUtf8(text);
    static bool printed = false;
    if (!printed) {
      SHERPA_ONNX_LOGE(
          "Detected GB2312 encoded string! Converting it to UTF8.");
      printed = true;
    }
    return impl_->Generate(utf8_text, sid, speed, std::move(callback));
  } else {
    SHERPA_ONNX_LOGE(
        "Non UTF8 encoded string is received. You would not get expected "
        "results!");
    return impl_->Generate(text, sid, speed, std::move(callback));
  }
#endif
}

GeneratedAudio OfflineTts::Generate(
    const std::string &text, const std::string &prompt_text,
    const std::vector<float> &prompt_samples, int32_t sample_rate,
    float speed /*=1.0*/, int32_t num_steps /*=4*/,
    GeneratedAudioCallback callback /*=nullptr*/) const {
#if !defined(_WIN32)
  return impl_->Generate(text, prompt_text, prompt_samples, sample_rate, speed,
                         num_steps, std::move(callback));
#else
  static bool printed = false;
  auto utf8_text = text;
  if (IsGB2312(text)) {
    utf8_text = Gb2312ToUtf8(text);
    if (!printed) {
      SHERPA_ONNX_LOGE("Detected GB2312 encoded text! Converting it to UTF8.");
      printed = true;
    }
  }
  auto utf8_prompt_text = prompt_text;
  if (IsGB2312(prompt_text)) {
    utf8_prompt_text = Gb2312ToUtf8(prompt_text);
    if (!printed) {
      SHERPA_ONNX_LOGE(
          "Detected GB2312 encoded prompt text! Converting it to UTF8.");
      printed = true;
    }
  }
  if (IsUtf8(utf8_text) && IsUtf8(utf8_prompt_text)) {
    return impl_->Generate(utf8_text, utf8_prompt_text, prompt_samples,
                           sample_rate, speed, num_steps, std::move(callback));
  } else {
    SHERPA_ONNX_LOGE(
        "Non UTF8 encoded string is received. You would not get expected "
        "results!");
    return impl_->Generate(utf8_text, utf8_prompt_text, prompt_samples,
                           sample_rate, speed, num_steps, std::move(callback));
  }
#endif
}

GeneratedAudio OfflineTts::Generate(
    const std::string &text, const GenerationConfig &config,
    GeneratedAudioCallback callback /*= nullptr*/) const {
#if !defined(_WIN32)
  return impl_->Generate(text, config, std::move(callback));
#else
  if (IsUtf8(text)) {
    return impl_->Generate(text, config, std::move(callback));
  } else if (IsGB2312(text)) {
    auto utf8_text = Gb2312ToUtf8(text);
    static bool printed = false;
    if (!printed) {
      SHERPA_ONNX_LOGE(
          "Detected GB2312 encoded string! Converting it to UTF8.");
      printed = true;
    }
    return impl_->Generate(utf8_text, config, std::move(callback));
  } else {
    SHERPA_ONNX_LOGE(
        "Non UTF8 encoded string is received. You would not get expected "
        "results!");
    return impl_->Generate(text, config, std::move(callback));
  }
#endif
}

int32_t OfflineTts::SampleRate() const { return impl_->SampleRate(); }

int32_t OfflineTts::NumSpeakers() const { return impl_->NumSpeakers(); }

#if __ANDROID_API__ >= 9
template OfflineTts::OfflineTts(AAssetManager *mgr,
                                const OfflineTtsConfig &config);
#endif

#if __OHOS__
template OfflineTts::OfflineTts(NativeResourceManager *mgr,
                                const OfflineTtsConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-tts.h
================================================
// sherpa-onnx/csrc/offline-tts.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_H_

#include <cstdint>
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineTtsConfig {
  OfflineTtsModelConfig model;
  // If not empty, it contains a list of rule FST filenames.
  // Filenames are separated by a comma.
  // Example value: rule1.fst,rule2,fst,rule3.fst
  //
  // If there are multiple rules, they are applied from left to right.
  std::string rule_fsts;

  // If there are multiple FST archives, they are applied from left to right.
  std::string rule_fars;

  // Maximum number of sentences that we process at a time.
  // This is to avoid OOM for very long input text.
  // If you set it to -1, then we process all sentences in a single batch.
  int32_t max_num_sentences = 1;

  // A silence interval contains audio samples with value close to 0.
  //
  // the duration of the new interval is old_duration * silence_scale.
  float silence_scale = 0.2;

  OfflineTtsConfig() = default;
  OfflineTtsConfig(const OfflineTtsModelConfig &model,
                   const std::string &rule_fsts, const std::string &rule_fars,
                   int32_t max_num_sentences, float silence_scale)
      : model(model),
        rule_fsts(rule_fsts),
        rule_fars(rule_fars),
        max_num_sentences(max_num_sentences),
        silence_scale(silence_scale) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

struct GeneratedAudio {
  std::vector<float> samples;
  int32_t sample_rate;

  // Silence means pause here.
  // If scale > 1, then it increases the duration of a pause
  // If scale < 1, then it reduces the duration of a pause
  GeneratedAudio ScaleSilence(float scale) const;
};

struct GenerationConfig {
  float silence_scale = 0.2;

  float speed = 1.0f;  // used only by some models.
  int32_t sid = 0;     // used only by models support multi-speakers

  std::vector<float> reference_audio;  // mono, [-1, 1]
  int32_t reference_sample_rate = 0;   // sample rate of reference_audio
  std::string reference_text;          // not all models require this
  int32_t num_steps = 5;               // number of steps in flow matching

  // model specific
  // Please see the Generate method of each model in ./offline-tts-xx-impl.h
  // e.g., in ./offline-tts-pocket-impl.h
  std::unordered_map<std::string, std::string> extra;

  std::string GetExtraString(const std::string &key,
                             const std::string &def = "") const;

  int32_t GetExtraInt(const std::string &key, int32_t def) const;

  float GetExtraFloat(const std::string &key, float def) const;

  std::string ToString() const;
};

class OfflineTtsImpl;

// If the callback returns 0, then it stops generating
// if the callback returns 1, then it keeps generating
using GeneratedAudioCallback = std::function<int32_t(
    const float * /*samples*/, int32_t /*n*/, float /*progress*/)>;

class OfflineTts {
 public:
  ~OfflineTts();
  explicit OfflineTts(const OfflineTtsConfig &config);

  template <typename Manager>
  OfflineTts(Manager *mgr, const OfflineTtsConfig &config);

  // @param text A string containing words separated by spaces
  // @param sid Speaker ID. Used only for multi-speaker models, e.g., models
  //            trained using the VCTK dataset. It is not used for
  //            single-speaker models, e.g., models trained using the ljspeech
  //            dataset.
  // @param speed The speed for the generated speech. E.g., 2 means 2x faster.
  // @param callback If not NULL, it is called whenever config.max_num_sentences
  //                 sentences have been processed. Note that the passed
  //                 pointer `samples` for the callback might be invalidated
  //                 after the callback is returned, so the caller should not
  //                 keep a reference to it. The caller can copy the data if
  //                 he/she wants to access the samples after the callback
  //                 returns. The callback is called in the current thread.
  [[deprecated("Use Generate(text, GenerationConfig, callback) instead")]]
  GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
                          float speed = 1.0,
                          GeneratedAudioCallback callback = nullptr) const;

  // @param text The string to be synthesized.
  // @param prompt_text The transcribe of `prompt_sampes`.
  // @param prompt_samples The prompt audio samples (mono PCM floats in [-1,1]).
  // @param sample_rate The sample rate of `prompt_audio` in Hz.
  // @param speed The speed for the generated speech. E.g., 2 means 2x faster.
  // @param num_steps The number of flow steps to generate the audio.
  // @param callback If not NULL, it is called whenever config.max_num_sentences
  //                 sentences have been processed. Note that the passed
  //                 pointer `samples` for the callback might be invalidated
  //                 after the callback is returned, so the caller should not
  //                 keep a reference to it. The caller can copy the data if
  //                 he/she wants to access the samples after the callback
  //                 returns. The callback is called in the current thread.
  [[deprecated("Use Generate(text, GenerationConfig, callback) instead")]]
  GeneratedAudio Generate(const std::string &text,
                          const std::string &prompt_text,
                          const std::vector<float> &prompt_samples,
                          int32_t sample_rate, float speed = 1.0,
                          int32_t num_steps = 4,
                          GeneratedAudioCallback callback = nullptr) const;

  GeneratedAudio Generate(const std::string &text,
                          const GenerationConfig &config,
                          GeneratedAudioCallback callback = nullptr) const;

  // Return the sample rate of the generated audio
  int32_t SampleRate() const;

  // Number of supported speakers.
  // If it supports only a single speaker, then it return 0 or 1.
  int32_t NumSpeakers() const;

 private:
  std::unique_ptr<OfflineTtsImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_TTS_H_


================================================
FILE: sherpa-onnx/csrc/offline-websocket-server-impl.cc
================================================
// sherpa-onnx/csrc/offline-websocket-server-impl.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-websocket-server-impl.h"

#include <algorithm>
#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineWebsocketDecoderConfig::Register(ParseOptions *po) {
  recognizer_config.Register(po);

  po->Register("max-batch-size", &max_batch_size,
               "Max batch size for decoding.");

  po->Register(
      "max-utterance-length", &max_utterance_length,
      "Max utterance length in seconds. If we receive an utterance "
      "longer than this value, we will reject the connection. "
      "If you have enough memory, you can select a large value for it.");
}

void OfflineWebsocketDecoderConfig::Validate() const {
  if (!recognizer_config.Validate()) {
    SHERPA_ONNX_LOGE("Error in recognizer config");
    exit(-1);
  }

  if (max_batch_size <= 0) {
    SHERPA_ONNX_LOGE("Expect --max-batch-size > 0. Given: %d", max_batch_size);
    exit(-1);
  }

  if (max_utterance_length <= 0) {
    SHERPA_ONNX_LOGE("Expect --max-utterance-length > 0. Given: %f",
                     max_utterance_length);
    exit(-1);
  }
}

OfflineWebsocketDecoder::OfflineWebsocketDecoder(OfflineWebsocketServer *server)
    : config_(server->GetConfig().decoder_config),
      server_(server),
      recognizer_(config_.recognizer_config) {}  // NOLINT

void OfflineWebsocketDecoder::Push(connection_hdl hdl, ConnectionDataPtr d) {
  std::lock_guard<std::mutex> lock(mutex_);
  streams_.push_back({hdl, d});
}

void OfflineWebsocketDecoder::Decode() {
  std::unique_lock<std::mutex> lock(mutex_);
  if (streams_.empty()) {
    return;
  }

  int32_t size =
      std::min(static_cast<int32_t>(streams_.size()), config_.max_batch_size);
  SHERPA_ONNX_LOGE("size: %d", size);

  // We first lock the mutex for streams_, take items from it, and then
  // unlock the mutex; in doing so we don't need to lock the mutex to
  // access hdl and connection_data later.
  std::vector<connection_hdl> handles(size);

  // Store connection_data here to prevent the data from being freed
  // while we are still using it.
  std::vector<ConnectionDataPtr> connection_data(size);

  std::vector<const float *> samples(size);
  std::vector<int32_t> samples_length(size);
  std::vector<std::unique_ptr<OfflineStream>> ss(size);
  std::vector<OfflineStream *> p_ss(size);

  for (int32_t i = 0; i != size; ++i) {
    auto &p = streams_.front();
    handles[i] = p.first;
    connection_data[i] = p.second;
    streams_.pop_front();

    auto sample_rate = connection_data[i]->sample_rate;
    auto samples =
        reinterpret_cast<const float *>(&connection_data[i]->data[0]);
    auto num_samples = connection_data[i]->expected_byte_size / sizeof(float);
    auto s = recognizer_.CreateStream();
    s->AcceptWaveform(sample_rate, samples, num_samples);

    ss[i] = std::move(s);
    p_ss[i] = ss[i].get();
  }

  lock.unlock();

  // Note: DecodeStreams is thread-safe
  recognizer_.DecodeStreams(p_ss.data(), size);

  for (int32_t i = 0; i != size; ++i) {
    connection_hdl hdl = handles[i];
    asio::post(server_->GetConnectionContext(),
               [this, hdl, result = ss[i]->GetResult()]() {
                 websocketpp::lib::error_code ec;
                 server_->GetServer().send(hdl, result.AsJsonString(),
                                           websocketpp::frame::opcode::text,
                                           ec);
                 if (ec) {
                   server_->GetServer().get_alog().write(
                       websocketpp::log::alevel::app, ec.message());
                 }
               });
  }
}

void OfflineWebsocketServerConfig::Register(ParseOptions *po) {
  decoder_config.Register(po);
  po->Register("log-file", &log_file,
               "Path to the log file. Logs are "
               "appended to this file");
}

void OfflineWebsocketServerConfig::Validate() const {
  decoder_config.Validate();
}

OfflineWebsocketServer::OfflineWebsocketServer(
    asio::io_context &io_conn,  // NOLINT
    asio::io_context &io_work,  // NOLINT
    const OfflineWebsocketServerConfig &config)
    : io_conn_(io_conn),
      io_work_(io_work),
      config_(config),
      log_(config.log_file, std::ios::app),
      tee_(std::cout, log_),
      decoder_(this) {
  SetupLog();

  server_.init_asio(&io_conn_);

  server_.set_open_handler([this](connection_hdl hdl) { OnOpen(hdl); });

  server_.set_close_handler([this](connection_hdl hdl) { OnClose(hdl); });

  server_.set_message_handler(
      [this](connection_hdl hdl, server::message_ptr msg) {
        OnMessage(hdl, msg);
      });
}

void OfflineWebsocketServer::SetupLog() {
  server_.clear_access_channels(websocketpp::log::alevel::all);
  server_.set_access_channels(websocketpp::log::alevel::connect);
  server_.set_access_channels(websocketpp::log::alevel::disconnect);

  // So that it also prints to std::cout and std::cerr
  server_.get_alog().set_ostream(&tee_);
  server_.get_elog().set_ostream(&tee_);
}

void OfflineWebsocketServer::OnOpen(connection_hdl hdl) {
  std::lock_guard<std::mutex> lock(mutex_);
  connections_.emplace(hdl, std::make_shared<ConnectionData>());

  SHERPA_ONNX_LOGE("Number of active connections: %d",
                   static_cast<int32_t>(connections_.size()));
}

void OfflineWebsocketServer::OnClose(connection_hdl hdl) {
  std::lock_guard<std::mutex> lock(mutex_);
  connections_.erase(hdl);

  SHERPA_ONNX_LOGE("Number of active connections: %d",
                   static_cast<int32_t>(connections_.size()));
}

void OfflineWebsocketServer::OnMessage(connection_hdl hdl,
                                       server::message_ptr msg) {
  std::unique_lock<std::mutex> lock(mutex_);
  auto connection_data = connections_.find(hdl)->second;
  lock.unlock();
  const std::string &payload = msg->get_payload();

  switch (msg->get_opcode()) {
    case websocketpp::frame::opcode::text:
      if (payload == "Done") {
        // The client will not send any more data. We can close the
        // connection now.
        Close(hdl, websocketpp::close::status::normal, "Done");
      } else {
        Close(hdl, websocketpp::close::status::normal,
              std::string("Invalid payload: ") + payload);
      }
      break;

    case websocketpp::frame::opcode::binary: {
      auto p = reinterpret_cast<const int8_t *>(payload.data());

      if (connection_data->expected_byte_size == 0) {
        if (payload.size() < 8) {
          Close(hdl, websocketpp::close::status::normal,
                "Payload is too short");
          break;
        }

        connection_data->sample_rate = *reinterpret_cast<const int32_t *>(p);

        connection_data->expected_byte_size =
            *reinterpret_cast<const int32_t *>(p + 4);

        int32_t max_byte_size_ = decoder_.GetConfig().max_utterance_length *
                                 connection_data->sample_rate * sizeof(float);
        if (connection_data->expected_byte_size > max_byte_size_) {
          float num_samples =
              connection_data->expected_byte_size / sizeof(float);

          float duration = num_samples / connection_data->sample_rate;

          std::ostringstream os;
          os << "Max utterance length is configured to "
             << decoder_.GetConfig().max_utterance_length
             << " seconds, received length is " << duration << " seconds. "
             << "Payload is too large!";
          Close(hdl, websocketpp::close::status::message_too_big, os.str());
          break;
        }

        connection_data->data.resize(connection_data->expected_byte_size);
        std::copy(payload.begin() + 8, payload.end(),
                  connection_data->data.data());
        connection_data->cur = payload.size() - 8;
      } else {
        std::copy(payload.begin(), payload.end(),
                  connection_data->data.data() + connection_data->cur);
        connection_data->cur += payload.size();
      }

      if (connection_data->expected_byte_size == connection_data->cur) {
        auto d = std::make_shared<ConnectionData>(std::move(*connection_data));
        // Clear it so that we can handle the next audio file from the client.
        // The client can send multiple audio files for recognition without
        // the need to create another connection.
        connection_data->sample_rate = 0;
        connection_data->expected_byte_size = 0;
        connection_data->cur = 0;

        decoder_.Push(hdl, d);

        connection_data->Clear();

        asio::post(io_work_, [this]() { decoder_.Decode(); });
      }
      break;
    }

    default:
      // Unexpected message, ignore it
      break;
  }
}

void OfflineWebsocketServer::Close(connection_hdl hdl,
                                   websocketpp::close::status::value code,
                                   const std::string &reason) {
  auto con = server_.get_con_from_hdl(hdl);

  std::ostringstream os;
  os << "Closing " << con->get_remote_endpoint() << " with reason: " << reason
     << "\n";

  websocketpp::lib::error_code ec;
  server_.close(hdl, code, reason, ec);
  if (ec) {
    os << "Failed to close" << con->get_remote_endpoint() << ". "
       << ec.message() << "\n";
  }
  server_.get_alog().write(websocketpp::log::alevel::app, os.str());
}

void OfflineWebsocketServer::Run(uint16_t port) {
  server_.set_reuse_addr(true);
  server_.listen(asio::ip::tcp::v4(), port);
  server_.start_accept();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-websocket-server-impl.h
================================================
// sherpa-onnx/csrc/offline-websocket-server-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_WEBSOCKET_SERVER_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WEBSOCKET_SERVER_IMPL_H_

#include <deque>
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/tee-stream.h"
#include "websocketpp/config/asio_no_tls.hpp"  // TODO(fangjun): support TLS
#include "websocketpp/server.hpp"

using server = websocketpp::server<websocketpp::config::asio>;
using connection_hdl = websocketpp::connection_hdl;

namespace sherpa_onnx {

/** Communication protocol
 *
 * The client sends a byte stream to the server. The first 4 bytes in little
 * endian indicates the sample rate of the audio data that the client will send.
 * The next 4 bytes in little endian indicates the total samples in bytes the
 * client will send. The remaining bytes represent audio samples. Each audio
 * sample is a float occupying 4 bytes and is normalized into the range
 * [-1, 1].
 *
 * The byte stream can be broken into arbitrary number of messages.
 * We require that the first message has to be at least 8 bytes so that
 * we can get `sample_rate` and `expected_byte_size` from the first message.
 */
struct ConnectionData {
  // Sample rate of the audio samples the client
  int32_t sample_rate;

  // Number of expected bytes sent from the client
  int32_t expected_byte_size = 0;

  // Number of bytes received so far
  int32_t cur = 0;

  // It saves the received samples from the client.
  // We will **reinterpret_cast** it to float.
  // We expect that data.size() == expected_byte_size
  std::vector<int8_t> data;

  void Clear() {
    sample_rate = 0;
    expected_byte_size = 0;
    cur = 0;
    data.clear();
  }
};

using ConnectionDataPtr = std::shared_ptr<ConnectionData>;

struct OfflineWebsocketDecoderConfig {
  OfflineRecognizerConfig recognizer_config;

  int32_t max_batch_size = 5;

  float max_utterance_length = 300;  // seconds

  void Register(ParseOptions *po);
  void Validate() const;
};

class OfflineWebsocketServer;

class OfflineWebsocketDecoder {
 public:
  /**
   * @param config Configuration for the decoder.
   * @param server **Borrowed** from outside.
   */
  explicit OfflineWebsocketDecoder(OfflineWebsocketServer *server);

  /** Insert received data to the queue for decoding.
   *
   * @param hdl A handle to the connection. We can use it to send the result
   *            back to the client once it finishes decoding.
   * @param d  The received data
   */
  void Push(connection_hdl hdl, ConnectionDataPtr d);

  /** It is called by one of the work thread.
   */
  void Decode();

  const OfflineWebsocketDecoderConfig &GetConfig() const { return config_; }

 private:
  OfflineWebsocketDecoderConfig config_;

  /** When we have received all the data from the client, we put it into
   * this queue; the worker threads will get items from this queue for
   * decoding.
   *
   * Number of items to take from this queue is determined by
   * `--max-batch-size`. If there are not enough items in the queue, we won't
   * wait and take whatever we have for decoding.
   */
  std::mutex mutex_;
  std::deque<std::pair<connection_hdl, ConnectionDataPtr>> streams_;

  OfflineWebsocketServer *server_;  // Not owned
  OfflineRecognizer recognizer_;
};

struct OfflineWebsocketServerConfig {
  OfflineWebsocketDecoderConfig decoder_config;
  std::string log_file = "./log.txt";

  void Register(ParseOptions *po);
  void Validate() const;
};

class OfflineWebsocketServer {
 public:
  OfflineWebsocketServer(asio::io_context &io_conn,  // NOLINT
                         asio::io_context &io_work,  // NOLINT
                         const OfflineWebsocketServerConfig &config);

  asio::io_context &GetConnectionContext() { return io_conn_; }
  server &GetServer() { return server_; }

  void Run(uint16_t port);

  const OfflineWebsocketServerConfig &GetConfig() const { return config_; }

 private:
  void SetupLog();

  // When a websocket client is connected, it will invoke this method
  // (Not for HTTP)
  void OnOpen(connection_hdl hdl);

  // When a websocket client is disconnected, it will invoke this method
  void OnClose(connection_hdl hdl);

  // When a message is received from a websocket client, this method will
  // be invoked.
  //
  // The protocol between the client and the server is as follows:
  //
  // (1) The client connects to the server
  // (2) The client starts to send binary byte stream to the server.
  //     The byte stream can be broken into multiple messages or it can
  //     be put into a single message.
  //     The first message has to contain at least 8 bytes. The first
  //     4 bytes in little endian contains a int32_t indicating the
  //     sampling rate. The next 4 bytes in little endian contains a int32_t
  //     indicating total number of bytes of samples the client will send.
  //     We assume each sample is a float containing 4 bytes and has been
  //     normalized to the range [-1, 1].
  // (4) When the server receives all the samples from the client, it will
  //     start to decode them. Once decoded, the server sends a text message
  //     to the client containing the decoded results
  // (5) After receiving the decoded results from the server, if the client has
  //     another audio file to send, it repeats (2), (3), (4)
  // (6) If the client has no more audio files to decode, the client sends a
  //     text message containing "Done" to the server and closes the connection
  // (7) The server receives a text message "Done" and closes the connection
  //
  // Note:
  //  (a) All models in icefall use features extracted from audio samples
  //      normalized to the range [-1, 1]. Please send normalized audio samples
  //      if you use models from icefall.
  //  (b) Only sound files with a single channel is supported
  //  (c) Only audio samples are sent. For instance, if we want to decode
  //      a WAVE file, the RIFF header of the WAVE is not sent.
  void OnMessage(connection_hdl hdl, server::message_ptr msg);

  // Close a websocket connection with given code and reason
  void Close(connection_hdl hdl, websocketpp::close::status::value code,
             const std::string &reason);

 private:
  asio::io_context &io_conn_;
  asio::io_context &io_work_;
  server server_;

  std::map<connection_hdl, ConnectionDataPtr, std::owner_less<connection_hdl>>
      connections_;
  std::mutex mutex_;

  OfflineWebsocketServerConfig config_;

  std::ofstream log_;
  TeeStream tee_;

  OfflineWebsocketDecoder decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WEBSOCKET_SERVER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/offline-websocket-server.cc
================================================
// sherpa-onnx/csrc/offline-websocket-server.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include <vector>

#include "asio.hpp"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-websocket-server-impl.h"
#include "sherpa-onnx/csrc/parse-options.h"

static constexpr const char *kUsageMessage = R"(
Automatic speech recognition with sherpa-onnx using websocket.

Usage:

./bin/sherpa-onnx-offline-websocket-server --help

(1) For transducer models

./bin/sherpa-onnx-offline-websocket-server \
  --port=6006 \
  --num-work-threads=5 \
  --tokens=/path/to/tokens.txt \
  --encoder=/path/to/encoder.onnx \
  --decoder=/path/to/decoder.onnx \
  --joiner=/path/to/joiner.onnx \
  --log-file=./log.txt \
  --max-batch-size=5

(2) For Paraformer

./bin/sherpa-onnx-offline-websocket-server \
  --port=6006 \
  --num-work-threads=5 \
  --tokens=/path/to/tokens.txt \
  --paraformer=/path/to/model.onnx \
  --log-file=./log.txt \
  --max-batch-size=5

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)";

int32_t main(int32_t argc, char *argv[]) {
  sherpa_onnx::ParseOptions po(kUsageMessage);

  sherpa_onnx::OfflineWebsocketServerConfig config;

  // the server will listen on this port
  int32_t port = 6006;

  // size of the thread pool for handling network connections
  int32_t num_io_threads = 1;

  // size of the thread pool for neural network computation and decoding
  int32_t num_work_threads = 3;

  po.Register("num-io-threads", &num_io_threads,
              "Thread pool size for network connections.");

  po.Register("num-work-threads", &num_work_threads,
              "Thread pool size for for neural network "
              "computation and decoding.");

  po.Register("port", &port, "The port on which the server will listen.");

  config.Register(&po);
  po.DisableOption("sample-rate");

  if (argc == 1) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  po.Read(argc, argv);

  if (po.NumArgs() != 0) {
    SHERPA_ONNX_LOGE("Unrecognized positional arguments!");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  config.Validate();

  asio::io_context io_conn;  // for network connections
  asio::io_context io_work;  // for neural network and decoding

  sherpa_onnx::OfflineWebsocketServer server(io_conn, io_work, config);
  server.Run(port);

  SHERPA_ONNX_LOGE("Started!");
  SHERPA_ONNX_LOGE("Listening on: %d", port);
  SHERPA_ONNX_LOGE("Number of work threads: %d", num_work_threads);

  // give some work to do for the io_work pool
  auto work_guard = asio::make_work_guard(io_work);

  std::vector<std::thread> io_threads;

  // decrement since the main thread is also used for network communications
  for (int32_t i = 0; i < num_io_threads - 1; ++i) {
    io_threads.emplace_back([&io_conn]() { io_conn.run(); });
  }

  std::vector<std::thread> work_threads;
  for (int32_t i = 0; i < num_work_threads; ++i) {
    work_threads.emplace_back([&io_work]() { io_work.run(); });
  }

  io_conn.run();

  for (auto &t : io_threads) {
    t.join();
  }

  for (auto &t : work_threads) {
    t.join();
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/offline-wenet-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/offline-wenet-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-wenet-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineWenetCtcModelConfig::Register(ParseOptions *po) {
  po->Register(
      "wenet-ctc-model", &model,
      "Path to model.onnx from WeNet. Please see "
      "https://github.com/k2-fsa/sherpa-onnx/pull/425 for available models");
}

bool OfflineWenetCtcModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("WeNet model: '%s' does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OfflineWenetCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineWenetCtcModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-wenet-ctc-model-config.h
================================================
// sherpa-onnx/csrc/offline-wenet-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_WENET_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WENET_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineWenetCtcModelConfig {
  std::string model;

  OfflineWenetCtcModelConfig() = default;
  explicit OfflineWenetCtcModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WENET_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-wenet-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-wenet-ctc-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-wenet-ctc-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineWenetCtcModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.wenet_ctc.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.wenet_ctc.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) {
    std::array<Ort::Value, 2> inputs = {std::move(features),
                                        std::move(features_length)};

    return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                      output_names_ptr_.data(), output_names_ptr_.size());
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t SubsamplingFactor() const { return subsampling_factor_; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
    SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t vocab_size_ = 0;
  int32_t subsampling_factor_ = 0;
};

OfflineWenetCtcModel::OfflineWenetCtcModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineWenetCtcModel::OfflineWenetCtcModel(Manager *mgr,
                                           const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineWenetCtcModel::~OfflineWenetCtcModel() = default;

std::vector<Ort::Value> OfflineWenetCtcModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineWenetCtcModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OfflineWenetCtcModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

OrtAllocator *OfflineWenetCtcModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OfflineWenetCtcModel::OfflineWenetCtcModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineWenetCtcModel::OfflineWenetCtcModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-wenet-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-wenet-ctc-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_WENET_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WENET_CTC_MODEL_H_
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

/** This class implements the CTC model from WeNet.
 *
 * See
 * https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/export-onnx.py
 * https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/test-onnx.py
 * https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/run.sh
 *
 */
class OfflineWenetCtcModel : public OfflineCtcModel {
 public:
  explicit OfflineWenetCtcModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineWenetCtcModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineWenetCtcModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** SubsamplingFactor of the model
   *
   * For Citrinet, the subsampling factor is usually 4.
   * For Conformer CTC, the subsampling factor is usually 8.
   */
  int32_t SubsamplingFactor() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  // WeNet CTC models do not support batch size > 1
  bool SupportBatchProcessing() const override { return false; }

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WENET_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-whisper-decoder.h
================================================
// sherpa-onnx/csrc/offline-whisper-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_WHISPER_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WHISPER_DECODER_H_

#include <string>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-whisper-model-config.h"

namespace sherpa_onnx {

class OfflineWhisperDecoder {
 public:
  virtual ~OfflineWhisperDecoder() = default;

  /** Run beam search given the output from the whisper encoder model.
   *
   * @param n_layer_cross_k       A 4-D tensor of shape
   *                              (n_text_layer, N, n_audio_ctx, n_text_state).
   * @param n_layer_cross_v       A 4-D tensor of shape
   *                              (n_text_layer, N, n_audio_ctx, n_text_state).
   *
   * @return Return a vector of size `N` containing the decoded results.
   */
  virtual std::vector<OfflineWhisperDecoderResult> Decode(
      Ort::Value n_layer_cross_k, Ort::Value n_layer_cross_v,
      int32_t num_feature_frames) = 0;

  virtual void SetConfig(const OfflineWhisperModelConfig &config) = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WHISPER_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-whisper-dtw.cc
================================================
// sherpa-onnx/csrc/offline-whisper-dtw.cc
//
// Copyright (c)  2026  Posit Software, PBC

#include "sherpa-onnx/csrc/offline-whisper-dtw.h"

#include <algorithm>
#include <cmath>
#include <cstdio>  // For debug output
#include <limits>
#include <numeric>
#include <vector>

// Set to 1 to enable debug output
#define DTW_DEBUG 0

namespace sherpa_onnx {

TokenTimingResult WhisperDTW::ComputeTokenTimings(
    const float *attention, int32_t n_heads, int32_t n_tokens, int32_t n_frames,
    int32_t num_audio_frames, int32_t sot_sequence_length,
    int32_t num_text_tokens,
    const std::vector<int32_t> &timestamp_token_indices) {
  TokenTimingResult result;

  if (n_heads <= 0 || n_tokens <= 0 || n_frames <= 0 || num_text_tokens <= 0) {
    return result;
  }

#if DTW_DEBUG
  fprintf(stderr, "\n========== DTW TIMING DEBUG ==========\n");
  fprintf(stderr, "Input: n_heads=%d, n_tokens=%d, n_frames=%d\n", n_heads,
          n_tokens, n_frames);
  fprintf(stderr,
          "num_audio_frames=%d, sot_sequence_length=%d, num_text_tokens=%d\n",
          num_audio_frames, sot_sequence_length, num_text_tokens);
  fprintf(stderr, "timestamp_token_indices count: %zu\n",
          timestamp_token_indices.size());
#endif

  // Clip to actual audio frames (like OpenAI: weights[:, :, :num_frames//2])
  int32_t clipped_frames = std::min(n_frames, num_audio_frames);
  if (clipped_frames <= 0) {
    clipped_frames = n_frames;
  }

  // Process attention weights per-head, then average (like OpenAI)
  std::vector<float> processed(n_tokens * clipped_frames, 0.0f);
  std::vector<float> head_data(n_tokens * clipped_frames);

  for (int32_t h = 0; h < n_heads; ++h) {
    const float *src = attention + h * n_tokens * n_frames;
    for (int32_t t = 0; t < n_tokens; ++t) {
      for (int32_t f = 0; f < clipped_frames; ++f) {
        head_data[t * clipped_frames + f] = src[t * n_frames + f];
      }
    }

    ApplySoftmax(head_data.data(), n_tokens, clipped_frames);
    ApplyZScoreNormalization(head_data.data(), n_tokens, clipped_frames);
    ApplyMedianFilter(head_data.data(), n_tokens, clipped_frames, 7);

    for (int32_t i = 0; i < n_tokens * clipped_frames; ++i) {
      processed[i] += head_data[i];
    }
  }

  float inv_n_heads = 1.0f / static_cast<float>(n_heads);
  for (int32_t i = 0; i < n_tokens * clipped_frames; ++i) {
    processed[i] *= inv_n_heads;
  }

  // Build a set of timestamp token indices for quick lookup.
  // The DTW algorithm needs an "anchor" token at position sot_sequence_length
  // to establish the time=0 reference point (like OpenAI's timing.py).
  //
  // Two modes, same anchor position:
  // - enable_segment_timestamps=true: the first timestamp token (e.g. <|0.00|>)
  //   is at index sot_sequence_length. We keep it as the anchor and filter
  //   out subsequent timestamp tokens to avoid alignment drift.
  // - enable_segment_timestamps=false: timestamp_token_indices is empty,
  //   no filtering occurs. But the implementation of enable_segment_timestamps
  //   being false happens to insert a <no_timestamps> token at index
  //   sot_sequence_length, so that will serve as the anchor in this case.
  std::vector<bool> is_timestamp_token(n_tokens, false);
  bool found_first_timestamp = false;
  for (int32_t idx : timestamp_token_indices) {
    if (idx >= 0 && idx < n_tokens) {
      // Keep the first timestamp token (it's the anchor), filter the rest
      if (!found_first_timestamp && idx >= sot_sequence_length) {
        found_first_timestamp = true;
        // Don't mark as timestamp - keep it in the DTW matrix
      } else {
        is_timestamp_token[idx] = true;
      }
    }
  }

  // Skip SOT sequence and filter out timestamp tokens (except first one)
  // Like OpenAI: we skip sot_sequence_length tokens at the start
  // Additionally, we now filter out timestamp tokens from the middle
  int32_t start_token = sot_sequence_length;

  // Build filtered token list (indices into original processed array)
  // and mapping from filtered index back to original index
  std::vector<int32_t> filtered_to_original;
  for (int32_t i = start_token; i < n_tokens; ++i) {
    if (!is_timestamp_token[i]) {
      filtered_to_original.push_back(i);
    }
  }

  int32_t dtw_tokens = static_cast<int32_t>(filtered_to_original.size());

#if DTW_DEBUG
  fprintf(
      stderr,
      "DTW tokens after filtering: %d (filtered out %zu timestamp tokens)\n",
      dtw_tokens, timestamp_token_indices.size());
#endif

  if (dtw_tokens <= 1) {
    return result;
  }

  // Extract the filtered portion for DTW and negate
  std::vector<float> cost_matrix(dtw_tokens * clipped_frames);
  for (int32_t i = 0; i < dtw_tokens; ++i) {
    int32_t orig_idx = filtered_to_original[i];
    for (int32_t j = 0; j < clipped_frames; ++j) {
      cost_matrix[i * clipped_frames + j] =
          -processed[orig_idx * clipped_frames + j];
    }
  }

  // Run DTW
  DTWResult dtw_result = RunDTW(cost_matrix.data(), dtw_tokens, clipped_frames);

  if (dtw_result.text_indices.empty()) {
    return result;
  }

  // Extract jump times (where text_idx changes)
  // Like OpenAI: jumps = np.pad(np.diff(text_indices), (1, 0),
  // constant_values=1)
  //              jump_times = time_indices[jumps] / TOKENS_PER_SECOND
  std::vector<int32_t> jump_frame_indices;
  jump_frame_indices.push_back(
      dtw_result.time_indices[0]);  // First is always a jump

  for (size_t i = 1; i < dtw_result.text_indices.size(); ++i) {
    if (dtw_result.text_indices[i] != dtw_result.text_indices[i - 1]) {
      jump_frame_indices.push_back(dtw_result.time_indices[i]);
    }
  }

#if DTW_DEBUG
  fprintf(stderr, "jump_frame_indices count: %zu\n", jump_frame_indices.size());
  fprintf(stderr, "jump_times (first 10): ");
  for (size_t i = 0; i < std::min(size_t(10), jump_frame_indices.size()); ++i) {
    fprintf(stderr, "%.2f ", jump_frame_indices[i] * kWhisperSecondsPerToken);
  }
  fprintf(stderr, "\n");
#endif

  // Now extract start_times and durations for text tokens only (not EOT)
  // Like OpenAI: start_times = jump_times[word_boundaries[:-1]]
  //              end_times = jump_times[word_boundaries[1:]]
  // For tokens (each token is one "word"): boundaries = [0, 1, 2, ..., N]
  // So: start_times[i] = jump_times[i], end_times[i] = jump_times[i+1]
  result.start_times.reserve(num_text_tokens);
  result.durations.reserve(num_text_tokens);

  for (int32_t i = 0; i < num_text_tokens; ++i) {
    if (i < static_cast<int32_t>(jump_frame_indices.size())) {
      float start =
          static_cast<float>(jump_frame_indices[i]) * kWhisperSecondsPerToken;
      result.start_times.push_back(start);

      // Duration = end_time - start_time = jump_times[i+1] - jump_times[i]
      if (i + 1 < static_cast<int32_t>(jump_frame_indices.size())) {
        float end = static_cast<float>(jump_frame_indices[i + 1]) *
                    kWhisperSecondsPerToken;
        result.durations.push_back(end - start);
      } else {
        // Last token: duration to end of audio
        float audio_end =
            static_cast<float>(clipped_frames) * kWhisperSecondsPerToken;
        result.durations.push_back(std::max(0.0f, audio_end - start));
      }
    } else {
      // Fallback: use last known time
      float last_time =
          result.start_times.empty() ? 0.0f : result.start_times.back();
      result.start_times.push_back(last_time);
      result.durations.push_back(0.0f);
    }
  }

#if DTW_DEBUG
  fprintf(stderr, "Result: %zu start_times, %zu durations\n",
          result.start_times.size(), result.durations.size());
  fprintf(stderr, "========== END DTW TIMING DEBUG ==========\n\n");
#endif

  return result;
}

void WhisperDTW::ApplySoftmax(float *data, int32_t n_tokens, int32_t n_frames) {
  for (int32_t t = 0; t < n_tokens; ++t) {
    float *row = data + t * n_frames;

    // Find max for numerical stability
    float max_val = *std::max_element(row, row + n_frames);

    // Compute exp and sum
    float sum = 0.0f;
    for (int32_t f = 0; f < n_frames; ++f) {
      row[f] = std::exp(row[f] - max_val);
      sum += row[f];
    }

    // Normalize
    if (sum > 0.0f) {
      float inv_sum = 1.0f / sum;
      for (int32_t f = 0; f < n_frames; ++f) {
        row[f] *= inv_sum;
      }
    }
  }
}

void WhisperDTW::ApplyZScoreNormalization(float *data, int32_t n_tokens,
                                          int32_t n_frames) {
  // Normalize across tokens (dim=-2) for each frame
  for (int32_t f = 0; f < n_frames; ++f) {
    // Compute mean
    float sum = 0.0f;
    for (int32_t t = 0; t < n_tokens; ++t) {
      sum += data[t * n_frames + f];
    }
    float mean = sum / static_cast<float>(n_tokens);

    // Compute std
    float sq_sum = 0.0f;
    for (int32_t t = 0; t < n_tokens; ++t) {
      float diff = data[t * n_frames + f] - mean;
      sq_sum += diff * diff;
    }
    float std_dev = std::sqrt(sq_sum / static_cast<float>(n_tokens) + 1e-9f);

    // Normalize
    float inv_std = 1.0f / std_dev;
    for (int32_t t = 0; t < n_tokens; ++t) {
      data[t * n_frames + f] = (data[t * n_frames + f] - mean) * inv_std;
    }
  }
}

void WhisperDTW::ApplyMedianFilter(float *data, int32_t n_tokens,
                                   int32_t n_frames, int32_t width) {
  if (width <= 1 || n_frames <= 1) {
    return;
  }

  int32_t half_width = width / 2;
  std::vector<float> temp(n_frames);
  std::vector<float> window(width);

  for (int32_t t = 0; t < n_tokens; ++t) {
    float *row = data + t * n_frames;

    // Copy original row
    std::copy(row, row + n_frames, temp.begin());

    for (int32_t f = 0; f < n_frames; ++f) {
      // Gather window values with reflection padding
      int32_t w_idx = 0;
      for (int32_t k = -half_width; k <= half_width && w_idx < width; ++k) {
        int32_t src_idx = f + k;
        // Reflect at boundaries
        if (src_idx < 0) {
          src_idx = -src_idx;
        } else if (src_idx >= n_frames) {
          src_idx = 2 * n_frames - 2 - src_idx;
        }
        src_idx = std::max(0, std::min(src_idx, n_frames - 1));
        window[w_idx++] = temp[src_idx];
      }

      // Sort and take median
      std::sort(window.begin(), window.begin() + w_idx);
      row[f] = window[w_idx / 2];
    }
  }
}

DTWResult WhisperDTW::RunDTW(const float *cost_matrix, int32_t n_tokens,
                             int32_t n_frames) {
  // DTW algorithm based on whisper.cpp and OpenAI Whisper
  // O(N*M) time and space complexity

  DTWResult result;

  if (n_tokens <= 0 || n_frames <= 0) {
    return result;
  }

  constexpr float kInf = std::numeric_limits<float>::infinity();

  int32_t N = n_tokens;
  int32_t M = n_frames;

  // Cost and trace matrices (N+1 x M+1)
  std::vector<float> cost((N + 1) * (M + 1), kInf);
  std::vector<int32_t> trace((N + 1) * (M + 1), -1);

  auto cost_at = [&](int32_t i, int32_t j) -> float & {
    return cost[i * (M + 1) + j];
  };
  auto trace_at = [&](int32_t i, int32_t j) -> int32_t & {
    return trace[i * (M + 1) + j];
  };

  // Initialize
  cost_at(0, 0) = 0.0f;

  // Fill cost matrix
  for (int32_t j = 1; j <= M; ++j) {
    for (int32_t i = 1; i <= N; ++i) {
      float c0 = cost_at(i - 1, j - 1);  // diagonal
      float c1 = cost_at(i - 1, j);      // up
      float c2 = cost_at(i, j - 1);      // left

      float min_cost;
      int32_t trace_dir;

      if (c0 <= c1 && c0 <= c2) {
        min_cost = c0;
        trace_dir = 0;  // diagonal
      } else if (c1 <= c0 && c1 <= c2) {
        min_cost = c1;
        trace_dir = 1;  // up
      } else {
        min_cost = c2;
        trace_dir = 2;  // left
      }

      // Add current cost
      cost_at(i, j) = cost_matrix[(i - 1) * M + (j - 1)] + min_cost;
      trace_at(i, j) = trace_dir;
    }
  }

  // Backtrace
  int32_t i = N;
  int32_t j = M;

  // Force horizontal movement at row 0 and vertical at column 0
  for (int32_t jj = 0; jj <= M; ++jj) {
    trace_at(0, jj) = 2;  // left
  }
  for (int32_t ii = 0; ii <= N; ++ii) {
    trace_at(ii, 0) = 1;  // up
  }

  std::vector<std::pair<int32_t, int32_t>> path;
  path.reserve(N + M);

  while (i > 0 || j > 0) {
    path.push_back({i - 1, j - 1});

    int32_t dir = trace_at(i, j);
    if (dir == 0) {  // diagonal
      --i;
      --j;
    } else if (dir == 1) {  // up
      --i;
    } else {  // left
      --j;
    }
  }

  // Reverse path (we built it backwards)
  std::reverse(path.begin(), path.end());

  // Extract result
  result.text_indices.reserve(path.size());
  result.time_indices.reserve(path.size());

  for (const auto &p : path) {
    if (p.first >= 0 && p.second >= 0) {
      result.text_indices.push_back(p.first);
      result.time_indices.push_back(p.second);
    }
  }

  return result;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-whisper-dtw.h
================================================
// sherpa-onnx/csrc/offline-whisper-dtw.h
//
// Copyright (c)  2026  Posit Software, PBC

#ifndef SHERPA_ONNX_CSRC_OFFLINE_WHISPER_DTW_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WHISPER_DTW_H_

#include <cstdint>
#include <utility>
#include <vector>

namespace sherpa_onnx {

// Result of DTW alignment
struct DTWResult {
  std::vector<int32_t> text_indices;  // Token index at each alignment point
  std::vector<int32_t> time_indices;  // Frame index at each alignment point
};

// Token timing result from DTW
struct TokenTimingResult {
  std::vector<float> start_times;  // Start time in seconds for each token
  std::vector<float> durations;    // Duration in seconds for each token
};

// Class for processing cross-attention weights and computing DTW alignment
// for token-level timestamps in Whisper.
//
// Based on OpenAI Whisper (whisper/timing.py) and whisper.cpp implementations.
class WhisperDTW {
 public:
  // Compute token timings (start times and durations) from raw cross-attention.
  // This follows OpenAI's approach of extracting both start and end times
  // directly from DTW jump_times, where:
  //   start_times[i] = jump_times[i]
  //   end_times[i] = jump_times[i+1]
  //   durations[i] = end_times[i] - start_times[i]
  //
  // @param attention Raw attention weights from decoder.
  //                  Shape: (n_heads, n_tokens, n_audio_frames)
  // @param n_heads Number of alignment heads
  // @param n_tokens Number of text tokens (including SOT sequence and EOT)
  // @param n_frames Number of audio frames (full context, e.g., 1500)
  // @param num_audio_frames Actual audio frames to use (for clipping)
  // @param sot_sequence_length Number of special tokens at start (to skip)
  // @param num_text_tokens Number of actual text tokens to return timings for
  //                        (excluding SOT sequence and EOT)
  // @param timestamp_token_indices Indices of timestamp tokens to filter out
  //                                (0-based, relative to attention sequence)
  //
  // @return TokenTimingResult with start_times and durations for each token
  TokenTimingResult ComputeTokenTimings(
      const float *attention, int32_t n_heads, int32_t n_tokens,
      int32_t n_frames, int32_t num_audio_frames, int32_t sot_sequence_length,
      int32_t num_text_tokens,
      const std::vector<int32_t> &timestamp_token_indices = {});

 private:
  // Apply softmax normalization across the last dimension (frames)
  void ApplySoftmax(float *data, int32_t n_tokens, int32_t n_frames);

  // Apply z-score normalization across tokens (dim=-2)
  void ApplyZScoreNormalization(float *data, int32_t n_tokens,
                                int32_t n_frames);

  // Apply median filter across frames with given width
  void ApplyMedianFilter(float *data, int32_t n_tokens, int32_t n_frames,
                         int32_t width = 7);

  // Run DTW algorithm on cost matrix
  //
  // @param cost_matrix Negated alignment matrix (n_tokens, n_frames)
  //                    Lower values = better alignment
  // @param n_tokens Number of rows (text tokens)
  // @param n_frames Number of columns (audio frames)
  //
  // @return DTW alignment path
  DTWResult RunDTW(const float *cost_matrix, int32_t n_tokens,
                   int32_t n_frames);
};

// Time conversion constant: 50 tokens per second (20ms per token/frame)
constexpr float kWhisperTokensPerSecond = 50.0f;
constexpr float kWhisperSecondsPerToken = 0.02f;

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WHISPER_DTW_H_


================================================
FILE: sherpa-onnx/csrc/offline-whisper-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/offline-whisper-greedy-search-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-whisper-greedy-search-decoder.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-whisper-timestamp-rules.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

void OfflineWhisperGreedySearchDecoder::SetConfig(
    const OfflineWhisperModelConfig &config) {
  config_ = config;
}

std::vector<OfflineWhisperDecoderResult>
OfflineWhisperGreedySearchDecoder::Decode(Ort::Value cross_k,
                                          Ort::Value cross_v,
                                          int32_t num_feature_frames) {
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  // Check if we should collect attention weights for DTW timestamp computation
  bool collect_attention =
      config_.enable_token_timestamps && model_->HasAttentionOutput();

  // Warn once if timestamps requested but model doesn't support it
  static bool warned_no_attention = false;
  if (config_.enable_token_timestamps && !model_->HasAttentionOutput() &&
      !warned_no_attention) {
    warned_no_attention = true;
    SHERPA_ONNX_LOGE(
        "Warning: enable_token_timestamps=true but the decoder model does not "
        "have cross-attention outputs. Timestamps will not be available. "
        "To enable timestamps, export the model with attention outputs using: "
        "python scripts/whisper/export-onnx-with-attention.py");
  }

  // For multilingual models, initial_tokens contains [sot, language, task]
  //   - language is English by default
  //   - task is transcribe by default
  //
  // For non-multilingual models, initial_tokens contains [sot]
  std::vector<int64_t> initial_tokens = model_->GetInitialTokens();

  if (model_->IsMultiLingual()) {
    if (!config_.language.empty()) {
      const auto &lang2id = model_->GetLang2ID();

      if (!lang2id.count(config_.language)) {
        SHERPA_ONNX_LOGE("Invalid language: %s", config_.language.c_str());
        exit(-1);
      }

      int32_t lang_id = lang2id.at(config_.language);

      // 0: sot, 1: lang_id, 2: task, 3: no_timestamps
      initial_tokens[1] = lang_id;
    } else {
      int32_t lang_id = model_->DetectLanguage(cross_k, cross_v);

      // 0: sot, 1: lang_id, 2: task, 3: no_timestamps
      initial_tokens[1] = lang_id;
    }

    if (config_.task == "translate") {
      initial_tokens[2] = model_->Translate();
    } else if (config_.task != "transcribe") {
      // initial_tokens[2] is transcribe by default
      SHERPA_ONNX_LOGE(
          "Unsupported task: %s. Valid values are: transcribe, translate.",
          config_.task.c_str());
    }
  }

  // Add no_timestamps token when NOT using segment timestamp mode.
  // When enable_segment_timestamps=true, we let the decoder output timestamp
  // tokens (like <|0.00|>) which serve as alignment anchors.
  // When enable_token_timestamps=true (DTW mode), we MUST include no_timestamps
  // because OpenAI's alignment (timing.py) uses it as an anchor token at the
  // start of the DTW matrix. Without it, the first text token is misaligned.
  if (!config_.enable_segment_timestamps) {
    initial_tokens.push_back(model_->NoTimeStampsToken());
  }

  // Track if we're using segment timestamp mode
  bool enable_segment_timestamps = config_.enable_segment_timestamps;

  // Get token IDs for timestamp rules
  int32_t timestamp_begin = model_->TimestampBegin();
  int32_t no_timestamps = model_->NoTimeStampsToken();
  int32_t eot = model_->EOT();

  // Max initial timestamp: 50 = 1.0 second (each timestamp is 0.02s)
  constexpr int32_t kMaxInitialTimestampIndex = 50;

  // Maintain running list of all tokens for timestamp rules
  std::vector<int64_t> all_tokens = initial_tokens;
  int32_t sample_begin = static_cast<int32_t>(initial_tokens.size());

  int32_t batch_size = 1;
  std::array<int64_t, 2> token_shape{
      batch_size, static_cast<int64_t>(initial_tokens.size())};

  Ort::Value tokens = Ort::Value::CreateTensor(
      memory_info, initial_tokens.data(), initial_tokens.size(),
      token_shape.data(), token_shape.size());

  std::array<int64_t, 1> offset_shape{1};
  Ort::Value offset = Ort::Value::CreateTensor<int64_t>(
      model_->Allocator(), offset_shape.data(), offset_shape.size());
  *(offset.GetTensorMutableData<int64_t>()) = 0;

  auto self_kv_cache = model_->GetInitialSelfKVCache();

  auto decoder_out = model_->ForwardDecoder(
      std::move(tokens), std::move(self_kv_cache.first),
      std::move(self_kv_cache.second), std::move(cross_k), std::move(cross_v),
      std::move(offset));

  // Note: decoder_out is now a 7-tuple with attention weights as 7th element
  // Indices: 0=logits, 1=self_k, 2=self_v, 3=cross_k, 4=cross_v, 5=offset,
  // 6=attention
  *(std::get<5>(decoder_out).GetTensorMutableData<int64_t>()) =
      initial_tokens.size();

  auto logits_shape =
      std::get<0>(decoder_out).GetTensorTypeAndShapeInfo().GetShape();
  int32_t vocab_size = logits_shape[2];

  int32_t n_text_ctx = model_->TextCtx();
  int32_t max_token_id = 0;

  // Get initial logits
  {
    const float *p_logits = std::get<0>(decoder_out).GetTensorData<float>();
    const float *p_start = p_logits + (logits_shape[1] - 1) * vocab_size;

    if (enable_segment_timestamps) {
      // Make a copy of logits for applying timestamp rules
      std::vector<float> logits_copy(p_start, p_start + vocab_size);
      ApplyTimestampRules(logits_copy.data(), vocab_size, all_tokens,
                          sample_begin, timestamp_begin, no_timestamps, eot,
                          kMaxInitialTimestampIndex);
      max_token_id = MaxElementIndex(logits_copy.data(), vocab_size);
    } else {
      max_token_id = MaxElementIndex(p_start, vocab_size);
    }
  }

  std::vector<int32_t> predicted_tokens;

  // Storage for accumulated attention weights
  std::vector<std::vector<float>> all_attention_weights;
  int32_t attention_n_heads = 0;
  int32_t attention_n_frames = 0;

  // Track indices of timestamp tokens in the attention sequence
  // (0-based, relative to the start of all_attention_weights)
  std::vector<int32_t> timestamp_token_indices;

  // Collect attention from initial tokens if enabled
  if (collect_attention) {
    auto &attn = std::get<6>(decoder_out);
    auto attn_shape = attn.GetTensorTypeAndShapeInfo().GetShape();
    // Shape: (batch, n_heads, n_tokens, n_audio_ctx)
    if (attn_shape.size() >= 4 && attn_shape[1] > 0) {
      attention_n_heads = static_cast<int32_t>(attn_shape[1]);
      attention_n_frames = static_cast<int32_t>(attn_shape[3]);
      int32_t n_initial_tokens = static_cast<int32_t>(attn_shape[2]);

      const float *p_attn = attn.GetTensorData<float>();
      int32_t stride = attention_n_frames;

      // Store attention for each initial token
      for (int32_t t = 0; t < n_initial_tokens; ++t) {
        std::vector<float> token_attn(attention_n_heads * attention_n_frames);
        for (int32_t h = 0; h < attention_n_heads; ++h) {
          const float *src =
              p_attn + h * n_initial_tokens * stride + t * stride;
          std::copy(src, src + attention_n_frames,
                    token_attn.begin() + h * attention_n_frames);
        }
        all_attention_weights.push_back(std::move(token_attn));
      }
    }
  }

  // assume at most 6 tokens per second
  int32_t num_possible_tokens = num_feature_frames / 100.0 * 6;
  num_possible_tokens = std::min<int32_t>(num_possible_tokens, n_text_ctx / 2);

  for (int32_t i = 0; i < num_possible_tokens; ++i) {
    if (max_token_id == eot) {
      break;
    }

    predicted_tokens.push_back(max_token_id);
    all_tokens.push_back(max_token_id);

    // Track if this is a timestamp token (for filtering in DTW)
    if (max_token_id >= timestamp_begin) {
      // The attention index is: initial_tokens.size() + current predicted index
      int32_t attn_idx = static_cast<int32_t>(initial_tokens.size()) +
                         static_cast<int32_t>(predicted_tokens.size()) - 1;
      timestamp_token_indices.push_back(attn_idx);
    }

    std::array<int64_t, 2> token_shape{1, 1};
    Ort::Value tokens = Ort::Value::CreateTensor<int64_t>(
        model_->Allocator(), token_shape.data(), token_shape.size());

    int64_t *p_tokens = tokens.GetTensorMutableData<int64_t>();
    p_tokens[0] = max_token_id;

    decoder_out = model_->ForwardDecoder(std::move(tokens),
                                         std::move(std::get<1>(decoder_out)),
                                         std::move(std::get<2>(decoder_out)),
                                         std::move(std::get<3>(decoder_out)),
                                         std::move(std::get<4>(decoder_out)),
                                         std::move(std::get<5>(decoder_out)));

    // Collect attention for this token
    if (collect_attention) {
      auto &attn = std::get<6>(decoder_out);
      auto attn_shape = attn.GetTensorTypeAndShapeInfo().GetShape();
      if (attn_shape.size() >= 4 && attn_shape[1] == attention_n_heads) {
        const float *p_attn = attn.GetTensorData<float>();
        // Shape: (batch, n_heads, 1, n_audio_ctx) - single token
        std::vector<float> token_attn(attention_n_heads * attention_n_frames);
        for (int32_t h = 0; h < attention_n_heads; ++h) {
          const float *src = p_attn + h * attention_n_frames;
          std::copy(src, src + attention_n_frames,
                    token_attn.begin() + h * attention_n_frames);
        }
        all_attention_weights.push_back(std::move(token_attn));
      }
    }

    int64_t *p_offset =
        std::get<5>(decoder_out).GetTensorMutableData<int64_t>();

    *p_offset += 1;
    if (*p_offset >= n_text_ctx - 1) {
      break;
    }

    const float *p_logits = std::get<0>(decoder_out).GetTensorData<float>();

    if (enable_segment_timestamps) {
      // Make a copy of logits for applying timestamp rules
      std::vector<float> logits_copy(p_logits, p_logits + vocab_size);
      // After first token, don't apply max_initial_timestamp constraint
      ApplyTimestampRules(logits_copy.data(), vocab_size, all_tokens,
                          sample_begin, timestamp_begin, no_timestamps, eot,
                          -1);  // -1 = no max_initial constraint
      max_token_id = MaxElementIndex(logits_copy.data(), vocab_size);
    } else {
      max_token_id = MaxElementIndex(p_logits, vocab_size);
    }
  }

  std::vector<OfflineWhisperDecoderResult> ans(1);

  const auto &id2lang = model_->GetID2Lang();
  if (id2lang.count(initial_tokens[1])) {
    ans[0].lang = id2lang.at(initial_tokens[1]);
  } else {
    ans[0].lang = "";
  }

  ans[0].tokens = std::move(predicted_tokens);

  // Parse timestamp tokens into segments if using segment timestamp mode
  if (enable_segment_timestamps) {
    ans[0].segments = ParseTimestampTokens(ans[0].tokens, timestamp_begin, eot);
  }

  // Add accumulated attention weights if available
  if (collect_attention && !all_attention_weights.empty()) {
    int32_t n_tokens = static_cast<int32_t>(all_attention_weights.size());
    ans[0].attention_n_heads = attention_n_heads;
    ans[0].attention_n_tokens = n_tokens;
    ans[0].attention_n_frames = attention_n_frames;
    // Actual audio frames for clipping (encoder downsamples by factor of 2)
    ans[0].num_audio_frames = num_feature_frames / 2;

    // Flatten to (n_heads, n_tokens, n_frames)
    ans[0].attention_weights.resize(attention_n_heads * n_tokens *
                                    attention_n_frames);
    for (int32_t h = 0; h < attention_n_heads; ++h) {
      for (int32_t t = 0; t < n_tokens; ++t) {
        const float *src =
            all_attention_weights[t].data() + h * attention_n_frames;
        float *dst = ans[0].attention_weights.data() +
                     h * n_tokens * attention_n_frames + t * attention_n_frames;
        std::copy(src, src + attention_n_frames, dst);
      }
    }

    // Add timestamp token indices for DTW filtering
    ans[0].timestamp_token_indices = std::move(timestamp_token_indices);
  }

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-whisper-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/offline-whisper-greedy-search-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_OFFLINE_WHISPER_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WHISPER_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-whisper-decoder.h"
#include "sherpa-onnx/csrc/offline-whisper-model.h"

namespace sherpa_onnx {

class OfflineWhisperGreedySearchDecoder : public OfflineWhisperDecoder {
 public:
  OfflineWhisperGreedySearchDecoder(const OfflineWhisperModelConfig &config,
                                    OfflineWhisperModel *model)
      : config_(config), model_(model) {}

  std::vector<OfflineWhisperDecoderResult> Decode(
      Ort::Value cross_k, Ort::Value cross_v,
      int32_t num_feature_frames) override;

  void SetConfig(const OfflineWhisperModelConfig &config) override;

 private:
  OfflineWhisperModelConfig config_;
  OfflineWhisperModel *model_;  // not owned
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WHISPER_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/offline-whisper-model-config.cc
================================================
// sherpa-onnx/csrc/offline-whisper-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-whisper-model-config.h"

#include <string>
#include <unordered_map>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineWhisperModelConfig::Register(ParseOptions *po) {
  po->Register("whisper-encoder", &encoder,
               "Path to onnx encoder of whisper, e.g., tiny-encoder.onnx, "
               "medium.en-encoder.onnx.");

  po->Register("whisper-decoder", &decoder,
               "Path to onnx decoder of whisper, e.g., tiny-decoder.onnx, "
               "medium.en-decoder.onnx.");

  po->Register(
      "whisper-language", &language,
      "The spoken language in the input audio file. Example values: "
      "en, de, fr, zh, jp. If it is not given for a multilingual model, we will"
      " infer the language from the input audio file. "
      "Please refer to "
      "https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10"
      " for valid values. Note that for non-multilingual models, it supports "
      "only 'en'");

  po->Register("whisper-task", &task,
               "Valid values: transcribe, translate. "
               "Note that for non-multilingual models, it supports "
               "only 'transcribe'");

  po->Register(
      "whisper-tail-paddings", &tail_paddings,
      "Suggested value: 50 for English models. 300 for multilingual models. "
      "Since we have removed the 30-second constraint, we need to add some "
      "tail padding frames "
      "so that whisper can detect the eot token. Leave it to -1 to use 1000.");

  po->Register(
      "whisper-enable-token-timestamps", &enable_token_timestamps,
      "If true, use cross-attention weights and DTW to compute token-level "
      "timestamps. Requires ONNX models exported with attention outputs. "
      "Default: false.");

  po->Register(
      "whisper-enable-segment-timestamps", &enable_segment_timestamps,
      "If true, use Whisper's native timestamp token mode to produce "
      "segment-level timestamps. The decoder outputs timestamp tokens like "
      "<|0.00|> interleaved with text, creating segments with start/end times. "
      "Does not require attention outputs. Can be combined with "
      "--whisper-enable-token-timestamps for both segment-level and "
      "token-level "
      "timestamps. Default: false.");
}

bool OfflineWhisperModelConfig::Validate() const {
  if (encoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --whisper-encoder");
    return false;
  }

  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("whisper encoder file '%s' does not exist",
                     encoder.c_str());
    return false;
  }

  if (decoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --whisper-decoder");
    return false;
  }

  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("whisper decoder file '%s' does not exist",
                     decoder.c_str());
    return false;
  }

  if (task != "translate" && task != "transcribe") {
    SHERPA_ONNX_LOGE(
        "--whisper-task supports only translate and transcribe. Given: %s",
        task.c_str());

    return false;
  }

  return true;
}

std::string OfflineWhisperModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineWhisperModelConfig(";
  os << "encoder=\"" << encoder << "\", ";
  os << "decoder=\"" << decoder << "\", ";
  os << "language=\"" << language << "\", ";
  os << "task=\"" << task << "\", ";
  os << "tail_paddings=" << tail_paddings << ", ";
  os << "enable_token_timestamps="
     << (enable_token_timestamps ? "True" : "False") << ", ";
  os << "enable_segment_timestamps="
     << (enable_segment_timestamps ? "True" : "False") << ")";

  return os.str();
}

bool IsMultilingual(WhisperModelType model_type) {
  switch (model_type) {
    case WhisperModelType::TinyEn:
    case WhisperModelType::BaseEn:
    case WhisperModelType::SmallEn:
    case WhisperModelType::MediumEn:
      return false;  // English-only models

    case WhisperModelType::Tiny:
    case WhisperModelType::Base:
    case WhisperModelType::Small:
    case WhisperModelType::Medium:
    case WhisperModelType::Large:
      return true;  // Multilingual models
  }

  SHERPA_ONNX_LOGE("Unsupported model: %s", ToString(model_type).c_str());
  SHERPA_ONNX_EXIT(-1);
  // Safety fallback (should never be hit)
  return false;
}

std::string ToString(WhisperModelType model) {
  switch (model) {
    case WhisperModelType::Tiny:
      return "tiny";
    case WhisperModelType::TinyEn:
      return "tiny.en";
    case WhisperModelType::Base:
      return "base";
    case WhisperModelType::BaseEn:
      return "base.en";
    case WhisperModelType::Small:
      return "small";
    case WhisperModelType::SmallEn:
      return "small.en";
    case WhisperModelType::Medium:
      return "medium";
    case WhisperModelType::MediumEn:
      return "medium.en";
    case WhisperModelType::Large:
      return "large";
  }
  return "unknown";
}

WhisperModelType ParseWhisperModelType(const std::string &name) {
  if (name == "tiny") return WhisperModelType::Tiny;
  if (name == "tiny.en") return WhisperModelType::TinyEn;
  if (name == "base") return WhisperModelType::Base;
  if (name == "base.en") return WhisperModelType::BaseEn;
  if (name == "small") return WhisperModelType::Small;
  if (name == "small.en") return WhisperModelType::SmallEn;
  if (name == "medium") return WhisperModelType::Medium;
  if (name == "medium.en") return WhisperModelType::MediumEn;
  if (name == "large") return WhisperModelType::Large;

  SHERPA_ONNX_LOGE("Unknown Whisper model: '%s'", name.c_str());
  SHERPA_ONNX_EXIT(-1);

  // Unreachable code
  return WhisperModelType::Tiny;
}

int32_t GetWhisperLanguageTokenId(const std::string &lang) {
  static const std::unordered_map<std::string, int32_t> kLangToToken = {
      {"hi", 50276},  {"cy", 50297}, {"oc", 50328}, {"so", 50326},
      {"fr", 50265},  {"az", 50304}, {"eu", 50310}, {"ba", 50355},
      {"no", 50288},  {"as", 50350}, {"nl", 50271}, {"bn", 50302},
      {"es", 50262},  {"ml", 50296}, {"km", 50323}, {"mk", 50308},
      {"sq", 50317},  {"mt", 50343}, {"et", 50307}, {"ms", 50282},
      {"tr", 50268},  {"bg", 50292}, {"ps", 50340}, {"br", 50309},
      {"ht", 50339},  {"tt", 50351}, {"tk", 50341}, {"la", 50294},
      {"de", 50261},  {"ur", 50290}, {"ro", 50284}, {"fa", 50300},
      {"uk", 50280},  {"mg", 50349}, {"lo", 50336}, {"sr", 50303},
      {"yo", 50325},  {"id", 50275}, {"da", 50285}, {"pt", 50267},
      {"nn", 50342},  {"sn", 50324}, {"sa", 50344}, {"sd", 50332},
      {"gl", 50319},  {"ja", 50266}, {"pl", 50269}, {"ru", 50263},
      {"ko", 50264},  {"ne", 50313}, {"kn", 50306}, {"zh", 50260},
      {"be", 50330},  {"ca", 50270}, {"el", 50281}, {"it", 50274},
      {"hu", 50286},  {"lt", 50293}, {"ta", 50287}, {"is", 50311},
      {"jw", 50356},  {"fi", 50277}, {"bo", 50347}, {"sv", 50273},
      {"mi", 50295},  {"hr", 50291}, {"bs", 50315}, {"yi", 50335},
      {"sk", 50298},  {"lv", 50301}, {"af", 50327}, {"vi", 50278},
      {"ha", 50354},  {"mn", 50314}, {"cs", 50283}, {"sl", 50305},
      {"pa", 50321},  {"su", 50357}, {"ka", 50329}, {"ln", 50353},
      {"lb", 50345},  {"sw", 50318}, {"en", 50259}, {"tl", 50348},
      {"hy", 50312},  {"te", 50299}, {"he", 50279}, {"my", 50346},
      {"haw", 50352}, {"fo", 50338}, {"kk", 50316}, {"si", 50322},
      {"tg", 50331},  {"th", 50289}, {"ar", 50272}, {"am", 50334},
      {"mr", 50320},  {"uz", 50337}, {"gu", 50333}};

  auto it = kLangToToken.find(lang);

  return (it != kLangToToken.end()) ? it->second : -1;
}

std::string GetWhisperLanguageCode(int32_t token_id) {
  static const std::unordered_map<int32_t, std::string> kTokenToLang = {
      {50276, "hi"},  {50297, "cy"}, {50328, "oc"}, {50326, "so"},
      {50265, "fr"},  {50304, "az"}, {50310, "eu"}, {50355, "ba"},
      {50288, "no"},  {50350, "as"}, {50271, "nl"}, {50302, "bn"},
      {50262, "es"},  {50296, "ml"}, {50323, "km"}, {50308, "mk"},
      {50317, "sq"},  {50343, "mt"}, {50307, "et"}, {50282, "ms"},
      {50268, "tr"},  {50292, "bg"}, {50340, "ps"}, {50309, "br"},
      {50339, "ht"},  {50351, "tt"}, {50341, "tk"}, {50294, "la"},
      {50261, "de"},  {50290, "ur"}, {50284, "ro"}, {50300, "fa"},
      {50280, "uk"},  {50349, "mg"}, {50336, "lo"}, {50303, "sr"},
      {50325, "yo"},  {50275, "id"}, {50285, "da"}, {50267, "pt"},
      {50342, "nn"},  {50324, "sn"}, {50344, "sa"}, {50332, "sd"},
      {50319, "gl"},  {50266, "ja"}, {50269, "pl"}, {50263, "ru"},
      {50264, "ko"},  {50313, "ne"}, {50306, "kn"}, {50260, "zh"},
      {50330, "be"},  {50270, "ca"}, {50281, "el"}, {50274, "it"},
      {50286, "hu"},  {50293, "lt"}, {50287, "ta"}, {50311, "is"},
      {50356, "jw"},  {50277, "fi"}, {50347, "bo"}, {50273, "sv"},
      {50295, "mi"},  {50291, "hr"}, {50315, "bs"}, {50335, "yi"},
      {50298, "sk"},  {50301, "lv"}, {50327, "af"}, {50278, "vi"},
      {50354, "ha"},  {50314, "mn"}, {50283, "cs"}, {50305, "sl"},
      {50321, "pa"},  {50357, "su"}, {50329, "ka"}, {50353, "ln"},
      {50345, "lb"},  {50318, "sw"}, {50259, "en"}, {50348, "tl"},
      {50312, "hy"},  {50299, "te"}, {50279, "he"}, {50346, "my"},
      {50352, "haw"}, {50338, "fo"}, {50316, "kk"}, {50322, "si"},
      {50331, "tg"},  {50289, "th"}, {50272, "ar"}, {50334, "am"},
      {50320, "mr"},  {50337, "uz"}, {50333, "gu"}};

  auto it = kTokenToLang.find(token_id);
  return (it != kTokenToLang.end()) ? it->second : std::string{};
}

const std::vector<int32_t> &GetAllWhisperLanguageTokenIds() {
  static const std::vector<int32_t> kLanguageTokenIds = {
      50276, 50297, 50328, 50326, 50265, 50304, 50310, 50355, 50288, 50350,
      50271, 50302, 50262, 50296, 50323, 50308, 50317, 50343, 50307, 50282,
      50268, 50292, 50340, 50309, 50339, 50351, 50341, 50294, 50261, 50290,
      50284, 50300, 50280, 50349, 50336, 50303, 50325, 50275, 50285, 50267,
      50342, 50324, 50344, 50332, 50319, 50266, 50269, 50263, 50264, 50313,
      50306, 50260, 50330, 50270, 50281, 50274, 50286, 50293, 50287, 50311,
      50356, 50277, 50347, 50273, 50295, 50291, 50315, 50335, 50298, 50301,
      50327, 50278, 50354, 50314, 50283, 50305, 50321, 50357, 50329, 50353,
      50345, 50318, 50259, 50348, 50312, 50299, 50279, 50346, 50352, 50338,
      50316, 50322, 50331, 50289, 50272, 50334, 50320, 50337, 50333};

  return kLanguageTokenIds;
}

const std::vector<std::string> &GetAllWhisperLanguageCodes() {
  static const std::vector<std::string> kLanguageCodes = {
      "hi",  "cy", "oc", "so", "fr", "az", "eu", "ba", "no", "as", "nl",
      "bn",  "es", "ml", "km", "mk", "sq", "mt", "et", "ms", "tr", "bg",
      "ps",  "br", "ht", "tt", "tk", "la", "de", "ur", "ro", "fa", "uk",
      "mg",  "lo", "sr", "yo", "id", "da", "pt", "nn", "sn", "sa", "sd",
      "gl",  "ja", "pl", "ru", "ko", "ne", "kn", "zh", "be", "ca", "el",
      "it",  "hu", "lt", "ta", "is", "jw", "fi", "bo", "sv", "mi", "hr",
      "bs",  "yi", "sk", "lv", "af", "vi", "ha", "mn", "cs", "sl", "pa",
      "su",  "ka", "ln", "lb", "sw", "en", "tl", "hy", "te", "he", "my",
      "haw", "fo", "kk", "si", "tg", "th", "ar", "am", "mr", "uz", "gu"};

  return kLanguageCodes;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-whisper-model-config.h
================================================
// sherpa-onnx/csrc/offline-whisper-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_WHISPER_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WHISPER_MODEL_CONFIG_H_

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineWhisperModelConfig {
  std::string encoder;
  std::string decoder;

  // Available languages can be found at
  // https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
  //
  // Note: For non-multilingual models, it supports only "en"
  //
  // If empty, we will infer it from the input audio file when
  // the model is multilingual.
  std::string language;

  // Valid values are transcribe and translate
  //
  // Note: For non-multilingual models, it supports only "transcribe"
  std::string task = "transcribe";

  // Number of tail padding frames.
  //
  // Since we remove the 30-second constraint, we need to add some paddings
  // at the end.
  //
  // Recommended values:
  //   - 50 for English models
  //   - 300 for multilingual models
  int32_t tail_paddings = -1;

  // If true, use cross-attention weights and DTW to compute token-level
  // timestamps. This requires ONNX models exported with attention outputs.
  bool enable_token_timestamps = false;

  // If true, use Whisper's native timestamp token mode to produce segment-level
  // timestamps. The decoder outputs timestamp tokens like <|0.00|> interleaved
  // with text, creating segments with start/end times. Does not require
  // attention outputs. Can be combined with enable_token_timestamps for both
  // segment-level and token-level timestamps.
  bool enable_segment_timestamps = false;

  OfflineWhisperModelConfig() = default;
  OfflineWhisperModelConfig(const std::string &encoder,
                            const std::string &decoder,
                            const std::string &language,
                            const std::string &task, int32_t tail_paddings,
                            bool enable_token_timestamps = false,
                            bool enable_segment_timestamps = false)
      : encoder(encoder),
        decoder(decoder),
        language(language),
        task(task),
        tail_paddings(tail_paddings),
        enable_token_timestamps(enable_token_timestamps),
        enable_segment_timestamps(enable_segment_timestamps) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

// Represents a segment with start/end timestamps from timestamp tokens
struct OfflineWhisperSegment {
  float start_time = 0.0f;
  float end_time = 0.0f;
  std::vector<int32_t> token_ids;  // Text token IDs in this segment
};

struct OfflineWhisperDecoderResult {
  /// The decoded token IDs
  std::vector<int32_t> tokens;
  std::string lang;

  /// Cross-attention weights for token-level timestamps (if enabled)
  /// Shape: (n_heads, n_tokens, n_audio_frames), flattened to 1D
  /// Empty if timestamps are not enabled or model doesn't support it
  std::vector<float> attention_weights;

  /// Dimensions of attention weights
  int32_t attention_n_heads = 0;
  int32_t attention_n_tokens = 0;
  int32_t attention_n_frames = 0;

  /// Number of actual audio feature frames (for clipping attention)
  /// This is num_feature_frames / 2 (due to encoder downsampling)
  int32_t num_audio_frames = 0;

  /// Indices of timestamp tokens in the attention weights (0-based, relative
  /// to the start of the attention sequence which includes initial tokens).
  /// Used to filter out timestamp tokens before DTW alignment.
  std::vector<int32_t> timestamp_token_indices;

  /// Segments with timestamps (when using timestamp token mode)
  std::vector<OfflineWhisperSegment> segments;
};

// used by ascend/rknn/qnn/axera, etc.
enum class WhisperModelType {
  Tiny,
  TinyEn,
  Base,
  BaseEn,
  Small,
  SmallEn,
  Medium,
  MediumEn,
  Large
};

std::string ToString(WhisperModelType model);
bool IsMultilingual(WhisperModelType model_type);

WhisperModelType ParseWhisperModelType(const std::string &name);
int32_t GetWhisperLanguageTokenId(const std::string &lang);
std::string GetWhisperLanguageCode(int32_t token_id);
const std::vector<int32_t> &GetAllWhisperLanguageTokenIds();
const std::vector<std::string> &GetAllWhisperLanguageCodes();

struct WhisperModelMultilingualTokens {
  int32_t sot = 50258;
  int32_t eot = 50257;
  int32_t transcribe = 50359;
  int32_t translate = 50358;
  int32_t no_timestamps = 50363;
};

struct WhisperModelEnglishTokens {
  int32_t sot = 50257;
  int32_t eot = 50256;
  int32_t no_timestamps = 50362;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WHISPER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-whisper-model.cc
================================================
// sherpa-onnx/csrc/offline-whisper-model.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-whisper-model.h"

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

namespace {

static inline bool IsCudaProvider(const std::string &provider) {
  return provider == "cuda";
}

}  // namespace

class OfflineWhisperModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        cpu_mem_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        is_cpu_provider_(config.provider == "cpu" || config.provider.empty()) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.whisper.encoder), sess_opts_);
    InitEncoder(nullptr, 0);

    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.whisper.decoder), sess_opts_);
    InitDecoder(nullptr, 0);

    InitCudaIOBinding();
  }

  explicit Impl(const SpokenLanguageIdentificationConfig &config)
      : lid_config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        cpu_mem_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        is_cpu_provider_(config.provider == "cpu" || config.provider.empty()) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.whisper.encoder), sess_opts_);
    InitEncoder(nullptr, 0);

    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.whisper.decoder), sess_opts_);
    InitDecoder(nullptr, 0);

    InitCudaIOBinding();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        cpu_mem_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        is_cpu_provider_(config.provider == "cpu" || config.provider.empty()) {
    {
      auto buf = ReadFile(mgr, config.whisper.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.whisper.decoder);
      InitDecoder(buf.data(), buf.size());
    }

    InitCudaIOBinding();
  }

  template <typename Manager>
  Impl(Manager *mgr, const SpokenLanguageIdentificationConfig &config)
      : lid_config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        cpu_mem_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        is_cpu_provider_(config.provider == "cpu" || config.provider.empty()) {
    {
      auto buf = ReadFile(mgr, config.whisper.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.whisper.decoder);
      InitDecoder(buf.data(), buf.size());
    }

    InitCudaIOBinding();
  }

  std::pair<Ort::Value, Ort::Value> ForwardEncoder(Ort::Value features) {
    std::vector<Ort::Value> encoder_out;

    if (use_cuda_iobinding_) {
      // Encoder outputs are n_layer_cross_k and n_layer_cross_v, which are used
      // multiple times in decoder steps. Keep them on GPU to avoid
      // device<->host copies.
      Ort::IoBinding binding(*encoder_sess_);
      binding.BindInput(encoder_input_names_ptr_[0], features);

      binding.BindOutput(encoder_output_names_ptr_[0], *cuda_mem_info_);
      binding.BindOutput(encoder_output_names_ptr_[1], *cuda_mem_info_);

      binding.SynchronizeInputs();
      encoder_sess_->Run(Ort::RunOptions{nullptr}, binding);
      binding.SynchronizeOutputs();
      encoder_out = binding.GetOutputValues();
    } else {
      encoder_out = encoder_sess_->Run(
          {}, encoder_input_names_ptr_.data(), &features, 1,
          encoder_output_names_ptr_.data(), encoder_output_names_ptr_.size());
    }

    return {std::move(encoder_out[0]), std::move(encoder_out[1])};
  }

  std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value, Ort::Value,
             Ort::Value, Ort::Value>
  ForwardDecoder(Ort::Value tokens, Ort::Value n_layer_self_k_cache,
                 Ort::Value n_layer_self_v_cache, Ort::Value n_layer_cross_k,
                 Ort::Value n_layer_cross_v, Ort::Value offset) {
    std::array<Ort::Value, 6> decoder_input = {std::move(tokens),
                                               std::move(n_layer_self_k_cache),
                                               std::move(n_layer_self_v_cache),
                                               std::move(n_layer_cross_k),
                                               std::move(n_layer_cross_v),
                                               std::move(offset)};

    std::vector<Ort::Value> decoder_out;

    if (use_cuda_iobinding_) {
      // CPU-side sampling needs logits on CPU, while self KV cache should
      // remain on GPU to avoid large device<->host copies between decode steps.
      Ort::IoBinding binding(*decoder_sess_);
      for (size_t i = 0; i < decoder_input.size(); ++i) {
        binding.BindInput(decoder_input_names_ptr_[i], decoder_input[i]);
      }

      binding.BindOutput(decoder_output_names_ptr_[0], cpu_mem_info_);
      binding.BindOutput(decoder_output_names_ptr_[1], *cuda_mem_info_);
      binding.BindOutput(decoder_output_names_ptr_[2], *cuda_mem_info_);
      if (has_attention_output_ && decoder_output_names_ptr_.size() > 3) {
        binding.BindOutput(decoder_output_names_ptr_[3], cpu_mem_info_);
      }

      binding.SynchronizeInputs();
      decoder_sess_->Run(Ort::RunOptions{nullptr}, binding);
      binding.SynchronizeOutputs();
      decoder_out = binding.GetOutputValues();
    } else {
      decoder_out = decoder_sess_->Run(
          {}, decoder_input_names_ptr_.data(), decoder_input.data(),
          decoder_input.size(), decoder_output_names_ptr_.data(),
          decoder_output_names_ptr_.size());
    }

    // Handle attention output (4th output) if present
    // For models without attention output, this remains nullptr
    Ort::Value attention_weights{nullptr};
    if (has_attention_output_ && decoder_out.size() > 3) {
      attention_weights = std::move(decoder_out[3]);
    }

    return std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value,
                      Ort::Value, Ort::Value, Ort::Value>{
        std::move(decoder_out[0]),   std::move(decoder_out[1]),
        std::move(decoder_out[2]),   std::move(decoder_input[3]),
        std::move(decoder_input[4]), std::move(decoder_input[5]),
        std::move(attention_weights)};
  }

  bool HasAttentionOutput() const { return has_attention_output_; }

  int32_t NumAlignmentHeads() const { return n_alignment_heads_; }

  int32_t DetectLanguage(Ort::Value &cross_k,    // NOLINT
                         Ort::Value &cross_v) {  // NOLINT
    int64_t token_val = SOT();
    std::array<int64_t, 2> token_shape{1, 1};

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    Ort::Value tokens = Ort::Value::CreateTensor(
        memory_info, &token_val, 1, token_shape.data(), token_shape.size());

    auto self_kv_cache = GetInitialSelfKVCache();

    std::array<int64_t, 1> offset_shape{1};
    Ort::Value offset = Ort::Value::CreateTensor<int64_t>(
        Allocator(), offset_shape.data(), offset_shape.size());
    *(offset.GetTensorMutableData<int64_t>()) = 0;

    auto decoder_out =
        ForwardDecoder(std::move(tokens), std::move(self_kv_cache.first),
                       std::move(self_kv_cache.second), std::move(cross_k),
                       std::move(cross_v), std::move(offset));

    cross_k = std::move(std::get<3>(decoder_out));
    cross_v = std::move(std::get<4>(decoder_out));

    const float *p_logits = std::get<0>(decoder_out).GetTensorData<float>();
    const auto &all_language_ids = GetAllLanguageIDs();

    int32_t lang_id = all_language_ids[0];
    float this_logit = p_logits[lang_id];

    for (int32_t i = 1; i != all_language_ids.size(); ++i) {
      int32_t id = all_language_ids[i];
      float p = p_logits[id];

      if (p > this_logit) {
        this_logit = p;
        lang_id = id;
      }
    }

    if (config_.debug) {
      SHERPA_ONNX_LOGE("Detected language: %s",
                       GetID2Lang().at(lang_id).c_str());
    }

    return lang_id;
  }

  std::pair<Ort::Value, Ort::Value> GetInitialSelfKVCache() {
    std::array<int64_t, 4> shape{n_text_layer_, 1, n_text_ctx_, n_text_state_};

    Ort::Value n_layer_self_k_cache = Ort::Value::CreateTensor<float>(
        Allocator(), shape.data(), shape.size());

    Ort::Value n_layer_self_v_cache = Ort::Value::CreateTensor<float>(
        Allocator(), shape.data(), shape.size());

    auto n = shape[0] * shape[1] * shape[2] * shape[3];

    float *p_k = n_layer_self_k_cache.GetTensorMutableData<float>();
    float *p_v = n_layer_self_v_cache.GetTensorMutableData<float>();

    memset(p_k, 0, sizeof(float) * n);
    memset(p_v, 0, sizeof(float) * n);

    return {std::move(n_layer_self_k_cache), std::move(n_layer_self_v_cache)};
  }

  OrtAllocator *Allocator() { return allocator_; }

  const std::vector<int64_t> &GetInitialTokens() const { return sot_sequence_; }

  const std::vector<int32_t> &GetAllLanguageIDs() const {
    return all_language_tokens_;
  }

  const std::unordered_map<std::string, int32_t> &GetLang2ID() const {
    return lang2id_;
  }

  const std::unordered_map<int32_t, std::string> &GetID2Lang() const {
    return id2lang_;
  }

  int32_t NoTimeStampsToken() const { return no_timestamps_; }

  // First timestamp token (represents 0.00s)
  // Timestamp tokens are: timestamp_begin, timestamp_begin+1, ...,
  // timestamp_end Each token represents 0.02s (20ms) intervals from 0.00s
  // to 30.00s
  int32_t TimestampBegin() const { return timestamp_begin_; }

  // Last timestamp token (represents 30.00s)
  // There are 1501 timestamp tokens total (0.00s to 30.00s at 0.02s intervals)
  int32_t TimestampEnd() const { return timestamp_begin_ + 1500; }

  int32_t EOT() const { return eot_; }

  int32_t SOT() const { return sot_; }

  int32_t TextCtx() const { return n_text_ctx_; }

  int32_t VocabSize() const { return n_vocab_; }

  int32_t FeatureDim() const { return n_mels_; }

  int32_t Translate() const { return translate_; }

  bool IsMultiLingual() const { return is_multilingual_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      encoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!encoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize encoder session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---encoder---\n";
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(n_mels_, "n_mels");
    SHERPA_ONNX_READ_META_DATA(n_text_layer_, "n_text_layer");
    SHERPA_ONNX_READ_META_DATA(n_text_ctx_, "n_text_ctx");
    SHERPA_ONNX_READ_META_DATA(n_text_state_, "n_text_state");
    SHERPA_ONNX_READ_META_DATA(n_vocab_, "n_vocab");
    SHERPA_ONNX_READ_META_DATA(sot_, "sot");
    SHERPA_ONNX_READ_META_DATA(eot_, "eot");
    SHERPA_ONNX_READ_META_DATA(blank_, "blank_id");
    SHERPA_ONNX_READ_META_DATA(translate_, "translate");
    SHERPA_ONNX_READ_META_DATA(transcribe_, "transcribe");
    SHERPA_ONNX_READ_META_DATA(is_multilingual_, "is_multilingual");
    SHERPA_ONNX_READ_META_DATA(no_timestamps_, "no_timestamps");
    // timestamp_begin is the first timestamp token (0.00s)
    // It's typically no_timestamps + 1 in OpenAI Whisper tokenizer
    timestamp_begin_ = no_timestamps_ + 1;
    SHERPA_ONNX_READ_META_DATA(no_speech_, "no_speech");
    SHERPA_ONNX_READ_META_DATA_VEC(sot_sequence_, "sot_sequence");

    if (is_multilingual_) {
      SHERPA_ONNX_READ_META_DATA_VEC(all_language_tokens_,
                                     "all_language_tokens");
      SHERPA_ONNX_READ_META_DATA_VEC_STRING(all_language_codes_,
                                            "all_language_codes");
      if (all_language_tokens_.size() != all_language_codes_.size()) {
        SHERPA_ONNX_LOGE("# lang_id: %d != # lang_code: %d",
                         static_cast<int32_t>(all_language_tokens_.size()),
                         static_cast<int32_t>(all_language_codes_.size()));
        exit(-1);
      }

      for (int32_t i = 0;
           i != static_cast<int32_t>(all_language_tokens_.size()); ++i) {
        lang2id_[all_language_codes_[i]] = all_language_tokens_[i];
        id2lang_[all_language_tokens_[i]] = all_language_codes_[i];
      }
    }
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      decoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!decoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize decoder session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);

    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);

    // Check if decoder has attention output (4 outputs instead of 3)
    // Outputs are: logits, self_k_cache, self_v_cache,
    // [cross_attention_weights]
    has_attention_output_ = (decoder_output_names_.size() >= 4);

    if (has_attention_output_) {
      // Try to read n_alignment_heads from encoder metadata
      Ort::AllocatorWithDefaultOptions allocator;
      Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
      SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(n_alignment_heads_,
                                              "n_alignment_heads", 0);

      if (config_.debug) {
        SHERPA_ONNX_LOGE("Decoder has attention output with %d alignment heads",
                         n_alignment_heads_);
      }
    }
  }

  void InitCudaIOBinding() {
    use_cuda_iobinding_ = (!is_cpu_provider_ && IsCudaProvider(GetProvider()));
    if (use_cuda_iobinding_) {
      // Use device 0 by default. SessionOptions() in sherpa-onnx usually
      // configures the CUDA EP device; binding here only affects output memory.
      cuda_mem_info_ = std::make_unique<Ort::MemoryInfo>(
          "Cuda", OrtDeviceAllocator, 0, OrtMemTypeDefault);
    }
  }

  std::string GetProvider() const {
    if (!config_.provider.empty()) {
      return config_.provider;
    }
    return lid_config_.provider;
  }

  OfflineModelConfig config_;
  SpokenLanguageIdentificationConfig lid_config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  Ort::MemoryInfo cpu_mem_info_;
  std::unique_ptr<Ort::MemoryInfo> cuda_mem_info_;
  bool use_cuda_iobinding_ = false;
  bool is_cpu_provider_ = false;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<int32_t> all_language_tokens_;
  std::vector<std::string> all_language_codes_;
  std::unordered_map<std::string, int32_t> lang2id_;
  std::unordered_map<int32_t, std::string> id2lang_;

  // model meta data
  int32_t n_mels_ = 80;
  int32_t n_text_layer_ = 0;
  int32_t n_text_ctx_ = 0;
  int32_t n_text_state_ = 0;
  int32_t n_vocab_ = 0;
  int32_t sot_ = 0;
  int32_t eot_ = 0;
  int32_t blank_ = 0;
  int32_t translate_ = 0;
  int32_t transcribe_ = 0;
  int32_t no_timestamps_ = 0;
  int32_t timestamp_begin_ =
      0;  // First timestamp token, typically no_timestamps_ + 1
  int32_t no_speech_ = 0;
  int32_t is_multilingual_ = 0;
  std::vector<int64_t> sot_sequence_;

  // For cross-attention token-level timestamps
  bool has_attention_output_ = false;
  int32_t n_alignment_heads_ = 0;
};

OfflineWhisperModel::OfflineWhisperModel(const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

OfflineWhisperModel::OfflineWhisperModel(
    const SpokenLanguageIdentificationConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineWhisperModel::OfflineWhisperModel(Manager *mgr,
                                         const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

template <typename Manager>
OfflineWhisperModel::OfflineWhisperModel(
    Manager *mgr, const SpokenLanguageIdentificationConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineWhisperModel::~OfflineWhisperModel() = default;

std::pair<Ort::Value, Ort::Value> OfflineWhisperModel::ForwardEncoder(
    Ort::Value features) const {
  return impl_->ForwardEncoder(std::move(features));
}

std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value, Ort::Value,
           Ort::Value, Ort::Value>
OfflineWhisperModel::ForwardDecoder(Ort::Value tokens,
                                    Ort::Value n_layer_self_k_cache,
                                    Ort::Value n_layer_self_v_cache,
                                    Ort::Value n_layer_cross_k,
                                    Ort::Value n_layer_cross_v,
                                    Ort::Value offset) const {
  return impl_->ForwardDecoder(
      std::move(tokens), std::move(n_layer_self_k_cache),
      std::move(n_layer_self_v_cache), std::move(n_layer_cross_k),
      std::move(n_layer_cross_v), std::move(offset));
}

int32_t OfflineWhisperModel::DetectLanguage(Ort::Value &cross_k,    // NOLINT
                                            Ort::Value &cross_v) {  // NOLINT
  return impl_->DetectLanguage(cross_k, cross_v);
}

std::pair<Ort::Value, Ort::Value> OfflineWhisperModel::GetInitialSelfKVCache()
    const {
  return impl_->GetInitialSelfKVCache();
}

OrtAllocator *OfflineWhisperModel::Allocator() const {
  return impl_->Allocator();
}

const std::vector<int64_t> &OfflineWhisperModel::GetInitialTokens() const {
  return impl_->GetInitialTokens();
}

const std::vector<int32_t> &OfflineWhisperModel::GetAllLanguageIDs() const {
  return impl_->GetAllLanguageIDs();
}

const std::unordered_map<std::string, int32_t> &
OfflineWhisperModel::GetLang2ID() const {
  return impl_->GetLang2ID();
}

const std::unordered_map<int32_t, std::string> &
OfflineWhisperModel::GetID2Lang() const {
  return impl_->GetID2Lang();
}

int32_t OfflineWhisperModel::NoTimeStampsToken() const {
  return impl_->NoTimeStampsToken();
}

int32_t OfflineWhisperModel::TimestampBegin() const {
  return impl_->TimestampBegin();
}

int32_t OfflineWhisperModel::TimestampEnd() const {
  return impl_->TimestampEnd();
}

int32_t OfflineWhisperModel::EOT() const { return impl_->EOT(); }

int32_t OfflineWhisperModel::SOT() const { return impl_->SOT(); }

int32_t OfflineWhisperModel::TextCtx() const { return impl_->TextCtx(); }

int32_t OfflineWhisperModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OfflineWhisperModel::FeatureDim() const { return impl_->FeatureDim(); }

int32_t OfflineWhisperModel::Translate() const { return impl_->Translate(); }

bool OfflineWhisperModel::IsMultiLingual() const {
  return impl_->IsMultiLingual();
}

bool OfflineWhisperModel::HasAttentionOutput() const {
  return impl_->HasAttentionOutput();
}

int32_t OfflineWhisperModel::NumAlignmentHeads() const {
  return impl_->NumAlignmentHeads();
}

void OfflineWhisperModel::NormalizeFeatures(float *features, int32_t num_frames,
                                            int32_t feat_dim) {
  NormalizeWhisperFeatures(features, num_frames, feat_dim);
}

#if __ANDROID_API__ >= 9
template OfflineWhisperModel::OfflineWhisperModel(
    AAssetManager *mgr, const OfflineModelConfig &config);

template OfflineWhisperModel::OfflineWhisperModel(
    AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config);
#endif

#if __OHOS__
template OfflineWhisperModel::OfflineWhisperModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);

template OfflineWhisperModel::OfflineWhisperModel(
    NativeResourceManager *mgr,
    const SpokenLanguageIdentificationConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-whisper-model.h
================================================
// sherpa-onnx/csrc/offline-whisper-model.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_WHISPER_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WHISPER_MODEL_H_

#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/spoken-language-identification.h"

namespace sherpa_onnx {

class OfflineWhisperModel {
 public:
  explicit OfflineWhisperModel(const OfflineModelConfig &config);

  explicit OfflineWhisperModel(
      const SpokenLanguageIdentificationConfig &config);

  template <typename Manager>
  OfflineWhisperModel(Manager *mgr, const OfflineModelConfig &config);

  template <typename Manager>
  OfflineWhisperModel(Manager *mgr,
                      const SpokenLanguageIdentificationConfig &config);

  ~OfflineWhisperModel();

  /** Run the encoder model.
   *
   * @param features  A tensor of shape (N, C, T). It is changed in-place.
   *                  C is 80 and T is 3000.
   *
   * @return Return a pair containing:
   *  - n_layer_cross_k: A 4-D tensor of shape
   *                     (n_text_layer, N, n_audio_ctx, n_text_state)
   *  - n_layer_cross_v: A 4-D tensor of shape
   *                     (n_text_layer, N, n_audio_ctx, n_text_state)
   */
  std::pair<Ort::Value, Ort::Value> ForwardEncoder(Ort::Value features) const;

  /** Run the decoder model.
   *
   * @param tokens A int64 tensor of shape (N, num_words)
   * @param n_layer_self_k_cache  A 4-D tensor of shape
   *                              (n_text_layer, N, n_text_ctx, n_text_state).
   * @param n_layer_self_v_cache  A 4-D tensor of shape
   *                              (n_text_layer, N, n_text_ctx, n_text_state).
   * @param n_layer_cross_k       A 4-D tensor of shape
   *                              (n_text_layer, N, n_audio_ctx, n_text_state).
   * @param n_layer_cross_v       A 4-D tensor of shape
   *                              (n_text_layer, N, n_audio_ctx, n_text_state).
   * @param offset A int64 tensor of shape (N,)
   *
   * @return Return a tuple containing 7 tensors:
   *
   *  - logits A 3-D tensor of shape (N, num_words, vocab_size)
   *  - out_n_layer_self_k_cache Same shape as n_layer_self_k_cache
   *  - out_n_layer_self_v_cache Same shape as n_layer_self_v_cache
   *  - out_n_layer_cross_k Same as n_layer_cross_k
   *  - out_n_layer_cross_v Same as n_layer_cross_v
   *  - out_offset Same as offset
   *  - cross_attention_weights (if HasAttentionOutput()) A 4-D tensor of shape
   *                            (N, n_alignment_heads, n_tokens, n_audio_ctx)
   *                            Empty tensor if model doesn't have attention output
   */
  std::tuple<Ort::Value, Ort::Value, Ort::Value, Ort::Value, Ort::Value,
             Ort::Value, Ort::Value>
  ForwardDecoder(Ort::Value tokens, Ort::Value n_layer_self_k_cache,
                 Ort::Value n_layer_self_v_cache, Ort::Value n_layer_cross_k,
                 Ort::Value n_layer_cross_v, Ort::Value offset) const;

  int32_t DetectLanguage(Ort::Value &cross_k,   // NOLINT
                         Ort::Value &cross_v);  // NOLINT

  /** Return the initial self kv cache in a pair
   *  - n_layer_self_k_cache A 4-D tensor of shape
   *                         (n_text_layer, N, n_audio_ctx, n_text_state).
   *  - n_layer_self_v_cache A 4-D tensor of shape
   *                         (n_text_layer, N, n_audio_ctx, n_text_state).
   */
  std::pair<Ort::Value, Ort::Value> GetInitialSelfKVCache() const;
  const std::vector<int64_t> &GetInitialTokens() const;
  const std::vector<int32_t> &GetAllLanguageIDs() const;
  const std::unordered_map<std::string, int32_t> &GetLang2ID() const;
  const std::unordered_map<int32_t, std::string> &GetID2Lang() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

  int32_t NoTimeStampsToken() const;
  int32_t TimestampBegin() const;  // First timestamp token (0.00s)
  int32_t TimestampEnd() const;    // Last timestamp token (30.00s)
  int32_t EOT() const;
  int32_t SOT() const;
  int32_t TextCtx() const;
  int32_t VocabSize() const;
  int32_t FeatureDim() const;
  int32_t Translate() const;
  bool IsMultiLingual() const;

  // Check if the decoder model has cross-attention weight outputs
  bool HasAttentionOutput() const;

  // Get number of alignment heads (0 if no attention output)
  int32_t NumAlignmentHeads() const;

  static void NormalizeFeatures(float *features, int32_t num_frames,
                                int32_t feat_dim);

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WHISPER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-whisper-timestamp-rules-test.cc
================================================
// sherpa-onnx/csrc/offline-whisper-timestamp-rules-test.cc
//
// Copyright (c)  2026  Posit Software, PBC

#include "sherpa-onnx/csrc/offline-whisper-timestamp-rules.h"

#include <cmath>
#include <limits>
#include <vector>

#include "gtest/gtest.h"

namespace sherpa_onnx {

// Realistic Whisper token IDs (from multilingual model)
constexpr int32_t kTimestampBegin = 50364;  // <|0.00|>
constexpr int32_t kEot = 50257;             // <|endoftranscript|>
constexpr int32_t kNoTimestamps = 50363;    // <|notimestamps|>
constexpr int32_t kVocabSize = 51865;
constexpr int32_t kSampleBegin = 3;  // After [sot, language, task]

constexpr float kNegInf = -std::numeric_limits<float>::infinity();

// Helper to check if a logit is suppressed (is -inf)
bool IsSuppressed(float logit) { return std::isinf(logit) && logit < 0; }

// Helper to count non-suppressed logits in a range
int32_t CountNonSuppressed(const float *logits, int32_t start, int32_t end) {
  int32_t count = 0;
  for (int32_t i = start; i < end; ++i) {
    if (!IsSuppressed(logits[i])) {
      ++count;
    }
  }
  return count;
}

// Helper to initialize logits with uniform values
void InitLogits(std::vector<float> *logits, float value = 0.0f) {
  logits->assign(kVocabSize, value);
}

class ApplyTimestampRulesTest : public ::testing::Test {
 protected:
  std::vector<float> logits_;

  void SetUp() override { InitLogits(&logits_); }
};

// =============================================================================
// Rule 1: Always suppress no_timestamps token
// =============================================================================

TEST_F(ApplyTimestampRulesTest, AlwaysSuppressNoTimestamps) {
  std::vector<int64_t> tokens = {1, 2, 3};  // SOT sequence only
  logits_[kNoTimestamps] = 5.0f;            // Give it a high value

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  EXPECT_TRUE(IsSuppressed(logits_[kNoTimestamps]));
}

// =============================================================================
// Rule 5: First sampled token must be a timestamp
// =============================================================================

TEST_F(ApplyTimestampRulesTest, FirstTokenMustBeTimestamp) {
  // Only SOT sequence, no sampled tokens yet
  std::vector<int64_t> tokens = {1, 2, 3};

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // All text tokens should be suppressed
  for (int32_t i = 0; i < kTimestampBegin; ++i) {
    if (i != kNoTimestamps) {  // no_timestamps is already suppressed
      EXPECT_TRUE(IsSuppressed(logits_[i]))
          << "Text token " << i << " should be suppressed on first sample";
    }
  }

  // Timestamps within max_initial_timestamp_index should NOT be suppressed
  for (int32_t i = kTimestampBegin; i <= kTimestampBegin + 50; ++i) {
    EXPECT_FALSE(IsSuppressed(logits_[i]))
        << "Timestamp " << i << " should be allowed on first sample";
  }

  // Timestamps beyond max_initial_timestamp_index should be suppressed
  for (int32_t i = kTimestampBegin + 51; i < kVocabSize; ++i) {
    EXPECT_TRUE(IsSuppressed(logits_[i]))
        << "Timestamp " << i << " should be suppressed (beyond max_initial)";
  }
}

TEST_F(ApplyTimestampRulesTest, FirstTokenNoMaxInitialConstraint) {
  std::vector<int64_t> tokens = {1, 2, 3};

  // Pass -1 for max_initial_timestamp_index to disable the constraint
  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, -1);

  // All timestamps should be allowed
  for (int32_t i = kTimestampBegin; i < kVocabSize; ++i) {
    EXPECT_FALSE(IsSuppressed(logits_[i]))
        << "All timestamps should be allowed when max_initial is -1";
  }
}

// =============================================================================
// Rule 3: Timestamp pairing - after opening timestamp, force text
// =============================================================================

TEST_F(ApplyTimestampRulesTest, AfterFirstTimestampForceText) {
  // SOT sequence + first timestamp <|0.00|>
  std::vector<int64_t> tokens = {1, 2, 3, kTimestampBegin};

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // All timestamps should be suppressed (force text)
  for (int32_t i = kTimestampBegin; i < kVocabSize; ++i) {
    EXPECT_TRUE(IsSuppressed(logits_[i]))
        << "Timestamp " << i << " should be suppressed after opening timestamp";
  }

  // Text tokens should NOT be suppressed (except no_timestamps)
  // Note: EOT is also a "text" token in this context
  int32_t text_allowed = CountNonSuppressed(logits_.data(), 0, kTimestampBegin);
  EXPECT_GT(text_allowed, 0) << "Some text tokens should be allowed";
}

TEST_F(ApplyTimestampRulesTest, AfterTwoConsecutiveTimestampsForceText) {
  // Pattern: <|0.00|><|0.00|> - two consecutive timestamps
  std::vector<int64_t> tokens = {1, 2, 3, kTimestampBegin, kTimestampBegin};

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // All timestamps should be suppressed (force text)
  for (int32_t i = kTimestampBegin; i < kVocabSize; ++i) {
    EXPECT_TRUE(IsSuppressed(logits_[i]))
        << "Timestamp " << i << " should be suppressed after double timestamp";
  }
}

// =============================================================================
// Rule 3: After text+timestamp, force timestamp/EOT (suppress text)
// =============================================================================

TEST_F(ApplyTimestampRulesTest, AfterTextThenTimestampForceTimestampOrEot) {
  // Pattern: <|0.00|> "hello" <|2.00|> - segment just closed
  int32_t ts_0_00 = kTimestampBegin;
  int32_t ts_2_00 = kTimestampBegin + 100;  // 2.00 seconds = 100 * 0.02
  int32_t text_token = 500;                 // some text token

  std::vector<int64_t> tokens = {1, 2, 3, ts_0_00, text_token, ts_2_00};

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // Text tokens before EOT should be suppressed
  for (int32_t i = 0; i < kEot; ++i) {
    EXPECT_TRUE(IsSuppressed(logits_[i]))
        << "Text token " << i << " should be suppressed after segment closed";
  }

  // EOT should be allowed
  EXPECT_FALSE(IsSuppressed(logits_[kEot])) << "EOT should be allowed";

  // Text tokens after EOT but before timestamp_begin should be suppressed
  for (int32_t i = kEot + 1; i < kTimestampBegin; ++i) {
    EXPECT_TRUE(IsSuppressed(logits_[i]))
        << "Token " << i << " should be suppressed after segment closed";
  }

  // Timestamps >= last_ts should be allowed (monotonicity allows same ts)
  EXPECT_FALSE(IsSuppressed(logits_[ts_2_00]))
      << "Same timestamp should be allowed for next segment opening";
}

// =============================================================================
// Rule 4: Monotonicity - timestamps must not decrease
// =============================================================================

TEST_F(ApplyTimestampRulesTest, MonotonicityPreventsEarlierTimestamps) {
  // After <|0.00|> "text" - we're in text, last timestamp was 0.00
  int32_t ts_0_00 = kTimestampBegin;
  int32_t text_token = 500;

  std::vector<int64_t> tokens = {1, 2, 3, ts_0_00, text_token};

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // Timestamps before ts_0_00 + 1 should be suppressed (strictly increasing)
  // Since last token was text, we require strictly increasing
  EXPECT_TRUE(IsSuppressed(logits_[ts_0_00]))
      << "Same timestamp should be suppressed when not closing segment";

  // Timestamps after should be allowed
  EXPECT_FALSE(IsSuppressed(logits_[ts_0_00 + 1]))
      << "Next timestamp should be allowed";
}

TEST_F(ApplyTimestampRulesTest, MonotonicityAllowsSameTimestampAfterClose) {
  // After <|0.00|> "text" <|2.00|> - segment just closed
  int32_t ts_0_00 = kTimestampBegin;
  int32_t ts_2_00 = kTimestampBegin + 100;
  int32_t text_token = 500;

  std::vector<int64_t> tokens = {1, 2, 3, ts_0_00, text_token, ts_2_00};

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // Same timestamp should be allowed (for opening next segment)
  EXPECT_FALSE(IsSuppressed(logits_[ts_2_00]))
      << "Same timestamp allowed when segment just closed";

  // Earlier timestamps should still be suppressed
  EXPECT_TRUE(IsSuppressed(logits_[ts_2_00 - 1]))
      << "Earlier timestamps should be suppressed";
}

// =============================================================================
// Rule 6: Probability rule - force timestamp when sum > max text
// =============================================================================

TEST_F(ApplyTimestampRulesTest, ProbabilityRuleForcesTimestamp) {
  // Set up: we're in text (last token was not timestamp)
  int32_t ts_0_00 = kTimestampBegin;
  int32_t text_token = 500;

  std::vector<int64_t> tokens = {1, 2, 3, ts_0_00, text_token};

  // Give timestamps high logits, text tokens low logits
  for (int32_t i = 0; i < kTimestampBegin; ++i) {
    logits_[i] = -10.0f;
  }
  for (int32_t i = kTimestampBegin; i < kVocabSize; ++i) {
    logits_[i] = 0.0f;  // After logsumexp, this will dominate
  }

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // Text tokens should be suppressed due to probability rule
  for (int32_t i = 0; i < kTimestampBegin; ++i) {
    EXPECT_TRUE(IsSuppressed(logits_[i]))
        << "Text token " << i << " should be suppressed by probability rule";
  }
}

TEST_F(ApplyTimestampRulesTest, ProbabilityRuleDoesNotApplyWhenTextDominates) {
  // Set up: we're in text, but text logits are higher
  int32_t ts_0_00 = kTimestampBegin;
  int32_t text_token = 500;

  std::vector<int64_t> tokens = {1, 2, 3, ts_0_00, text_token};

  // Give text tokens high logits, timestamps low
  for (int32_t i = 0; i < kTimestampBegin; ++i) {
    logits_[i] = 0.0f;
  }
  for (int32_t i = kTimestampBegin; i < kVocabSize; ++i) {
    logits_[i] = -100.0f;
  }

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // Text tokens should NOT all be suppressed
  int32_t text_allowed = CountNonSuppressed(logits_.data(), 0, kTimestampBegin);
  EXPECT_GT(text_allowed, 0)
      << "Text tokens should be allowed when they dominate";
}

TEST_F(ApplyTimestampRulesTest, ProbabilityRuleSkippedAfterTimestamp) {
  // After timestamp, probability rule doesn't apply
  std::vector<int64_t> tokens = {1, 2, 3, kTimestampBegin};

  // Even with high timestamp logits, the pairing rule takes precedence
  for (int32_t i = kTimestampBegin; i < kVocabSize; ++i) {
    logits_[i] = 100.0f;
  }

  ApplyTimestampRules(logits_.data(), kVocabSize, tokens, kSampleBegin,
                      kTimestampBegin, kNoTimestamps, kEot, 50);

  // Timestamps should be suppressed (pairing rule), not text
  for (int32_t i = kTimestampBegin; i < kVocabSize; ++i) {
    EXPECT_TRUE(IsSuppressed(logits_[i]));
  }
}

// =============================================================================
// ParseTimestampTokens tests
// =============================================================================

class ParseTimestampTokensTest : public ::testing::Test {};

TEST_F(ParseTimestampTokensTest, BasicSingleSegment) {
  // <|0.00|> "hello" <|2.00|> EOT
  int32_t ts_0_00 = kTimestampBegin;
  int32_t ts_2_00 = kTimestampBegin + 100;
  std::vector<int32_t> tokens = {ts_0_00, 100, 200, 300, ts_2_00, kEot};

  auto segments = ParseTimestampTokens(tokens, kTimestampBegin, kEot);

  ASSERT_EQ(segments.size(), 1);
  EXPECT_FLOAT_EQ(segments[0].start_time, 0.0f);
  EXPECT_FLOAT_EQ(segments[0].end_time, 2.0f);
  ASSERT_EQ(segments[0].token_ids.size(), 3);
  EXPECT_EQ(segments[0].token_ids[0], 100);
  EXPECT_EQ(segments[0].token_ids[1], 200);
  EXPECT_EQ(segments[0].token_ids[2], 300);
}

TEST_F(ParseTimestampTokensTest, MultipleSegments) {
  // <|0.00|> "hi" <|1.00|><|1.00|> "bye" <|2.00|> EOT
  int32_t ts_0_00 = kTimestampBegin;
  int32_t ts_1_00 = kTimestampBegin + 50;
  int32_t ts_2_00 = kTimestampBegin + 100;
  std::vector<int32_t> tokens = {ts_0_00, 100,     ts_1_00, ts_1_00,
                                 200,     ts_2_00, kEot};

  auto segments = ParseTimestampTokens(tokens, kTimestampBegin, kEot);

  ASSERT_EQ(segments.size(), 2);

  EXPECT_FLOAT_EQ(segments[0].start_time, 0.0f);
  EXPECT_FLOAT_EQ(segments[0].end_time, 1.0f);
  ASSERT_EQ(segments[0].token_ids.size(), 1);
  EXPECT_EQ(segments[0].token_ids[0], 100);

  EXPECT_FLOAT_EQ(segments[1].start_time, 1.0f);
  EXPECT_FLOAT_EQ(segments[1].end_time, 2.0f);
  ASSERT_EQ(segments[1].token_ids.size(), 1);
  EXPECT_EQ(segments[1].token_ids[0], 200);
}

TEST_F(ParseTimestampTokensTest, EotClosesOpenSegment) {
  // <|0.00|> "hello" EOT (no closing timestamp)
  int32_t ts_0_00 = kTimestampBegin;
  std::vector<int32_t> tokens = {ts_0_00, 100, 200, kEot};

  auto segments = ParseTimestampTokens(tokens, kTimestampBegin, kEot);

  ASSERT_EQ(segments.size(), 1);
  EXPECT_FLOAT_EQ(segments[0].start_time, 0.0f);
  // EOT closes the segment without a closing timestamp, so end_time is sentinel
  EXPECT_FLOAT_EQ(segments[0].end_time, -1.0f);
  ASSERT_EQ(segments[0].token_ids.size(), 2);
  EXPECT_EQ(segments[0].token_ids[0], 100);
  EXPECT_EQ(segments[0].token_ids[1], 200);
}

TEST_F(ParseTimestampTokensTest, EmptySegmentSkipped) {
  // <|0.00|><|1.00|><|1.00|> "text" <|2.00|> EOT
  // The first "segment" between 0.00 and 1.00 has no text, should be skipped
  int32_t ts_0_00 = kTimestampBegin;
  int32_t ts_1_00 = kTimestampBegin + 50;
  int32_t ts_2_00 = kTimestampBegin + 100;
  std::vector<int32_t> tokens = {ts_0_00, ts_1_00, ts_1_00, 100, ts_2_00, kEot};

  auto segments = ParseTimestampTokens(tokens, kTimestampBegin, kEot);

  ASSERT_EQ(segments.size(), 1);
  EXPECT_FLOAT_EQ(segments[0].start_time, 1.0f);
  EXPECT_FLOAT_EQ(segments[0].end_time, 2.0f);
}

TEST_F(ParseTimestampTokensTest, IncompleteSegmentGetsSentinel) {
  // <|0.00|> "hello" (no closing timestamp, no EOT)
  int32_t ts_0_00 = kTimestampBegin;
  std::vector<int32_t> tokens = {ts_0_00, 100, 200};

  auto segments = ParseTimestampTokens(tokens, kTimestampBegin, kEot);

  ASSERT_EQ(segments.size(), 1);
  EXPECT_FLOAT_EQ(segments[0].start_time, 0.0f);
  EXPECT_FLOAT_EQ(segments[0].end_time, -1.0f);  // Sentinel for incomplete
  ASSERT_EQ(segments[0].token_ids.size(), 2);
}

TEST_F(ParseTimestampTokensTest, SentinelConsistencyBetweenEotAndIncomplete) {
  // Verify that both EOT-closed and incomplete segments use the same sentinel
  // This ensures consistent handling by downstream code

  // Case 1: EOT-closed segment (no closing timestamp before EOT)
  int32_t ts_1_00 = kTimestampBegin + 50;
  std::vector<int32_t> tokens_eot = {ts_1_00, 100, kEot};
  auto segments_eot = ParseTimestampTokens(tokens_eot, kTimestampBegin, kEot);

  // Case 2: Incomplete segment (tokens end without closing timestamp or EOT)
  std::vector<int32_t> tokens_incomplete = {ts_1_00, 100};
  auto segments_incomplete =
      ParseTimestampTokens(tokens_incomplete, kTimestampBegin, kEot);

  ASSERT_EQ(segments_eot.size(), 1);
  ASSERT_EQ(segments_incomplete.size(), 1);

  // Both should have the same start_time
  EXPECT_FLOAT_EQ(segments_eot[0].start_time, 1.0f);
  EXPECT_FLOAT_EQ(segments_incomplete[0].start_time, 1.0f);

  // Both should use the same sentinel value for end_time
  EXPECT_FLOAT_EQ(segments_eot[0].end_time, -1.0f);
  EXPECT_FLOAT_EQ(segments_incomplete[0].end_time, -1.0f);
  EXPECT_FLOAT_EQ(segments_eot[0].end_time, segments_incomplete[0].end_time)
      << "EOT-closed and incomplete segments must use the same sentinel";
}

TEST_F(ParseTimestampTokensTest, NoSegmentsFromEmptyInput) {
  std::vector<int32_t> tokens = {};

  auto segments = ParseTimestampTokens(tokens, kTimestampBegin, kEot);

  EXPECT_EQ(segments.size(), 0);
}

TEST_F(ParseTimestampTokensTest, OnlyEot) {
  std::vector<int32_t> tokens = {kEot};

  auto segments = ParseTimestampTokens(tokens, kTimestampBegin, kEot);

  EXPECT_EQ(segments.size(), 0);
}

TEST_F(ParseTimestampTokensTest, TextBeforeFirstTimestampIgnored) {
  // Text tokens before any timestamp should be ignored
  int32_t ts_1_00 = kTimestampBegin + 50;
  int32_t ts_2_00 = kTimestampBegin + 100;
  std::vector<int32_t> tokens = {100, 200, ts_1_00, 300, ts_2_00, kEot};

  auto segments = ParseTimestampTokens(tokens, kTimestampBegin, kEot);

  ASSERT_EQ(segments.size(), 1);
  EXPECT_FLOAT_EQ(segments[0].start_time, 1.0f);
  EXPECT_FLOAT_EQ(segments[0].end_time, 2.0f);
  ASSERT_EQ(segments[0].token_ids.size(), 1);
  EXPECT_EQ(segments[0].token_ids[0], 300);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-whisper-timestamp-rules.cc
================================================
// sherpa-onnx/csrc/offline-whisper-timestamp-rules.cc
//
// Copyright (c)  2026  Posit Software, PBC

#include "sherpa-onnx/csrc/offline-whisper-timestamp-rules.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <limits>
#include <utility>
#include <vector>

namespace sherpa_onnx {

namespace {

constexpr float kNegInf = -std::numeric_limits<float>::infinity();

// =============================================================================
// Step 1: State Determination
// =============================================================================

// Mutually exclusive decoding states
// The expected token pattern is:
//   <|0.00|> text text <|6.60|><|6.60|> text text <|12.00|> EOT
enum class TimestampDecodingState {
  kStart,           // num_sampled == 0: first token must be timestamp
  kAfterOpeningTs,  // last=TS, penult=TS: after opening or double TS, force
                    // text
  kSegmentClosing,  // last=TS, penult=text: segment just closed, force TS/EOT
  kInText           // last=text: in text, probability rule may apply
};

// Raw information extracted from the token sequence
struct TokenSequenceInfo {
  int32_t num_sampled;      // tokens sampled so far (excluding SOT sequence)
  bool last_was_timestamp;  // was the last token a timestamp?
  bool penultimate_was_timestamp;  // was the second-to-last token a timestamp?
  int32_t last_ts;                 // last timestamp token ID (-1 if none)
};

// Extract information from the token sequence
TokenSequenceInfo ExtractTokenSequenceInfo(const std::vector<int64_t> &tokens,
                                           int32_t sample_begin,
                                           int32_t timestamp_begin) {
  TokenSequenceInfo info;
  info.num_sampled = static_cast<int32_t>(tokens.size()) - sample_begin;
  info.last_was_timestamp =
      info.num_sampled >= 1 && tokens.back() >= timestamp_begin;
  // IMPORTANT: penultimate defaults to TRUE when len < 2
  // This matches OpenAI's behavior and ensures text follows the first timestamp
  info.penultimate_was_timestamp =
      info.num_sampled < 2 || tokens[tokens.size() - 2] >= timestamp_begin;

  info.last_ts = -1;
  // Find the last timestamp in the sequence (for monotonicity)
  for (int32_t i = sample_begin; i < static_cast<int32_t>(tokens.size()); ++i) {
    if (tokens[i] >= timestamp_begin) {
      info.last_ts = static_cast<int32_t>(tokens[i]);
    }
  }

  return info;
}

// Map raw token info to a mutually exclusive state
TimestampDecodingState DetermineDecodingState(const TokenSequenceInfo &info) {
  if (info.num_sampled == 0) {
    return TimestampDecodingState::kStart;
  }
  if (info.last_was_timestamp && info.penultimate_was_timestamp) {
    return TimestampDecodingState::kAfterOpeningTs;
  }
  if (info.last_was_timestamp && !info.penultimate_was_timestamp) {
    return TimestampDecodingState::kSegmentClosing;
  }
  return TimestampDecodingState::kInText;
}

// =============================================================================
// Step 2: Decision Making
// =============================================================================

// What actions to take based on the current state
struct TimestampDecision {
  bool suppress_text;        // suppress text tokens
  bool suppress_timestamps;  // suppress timestamp tokens
  bool suppress_eot;         // suppress EOT token
  int32_t min_timestamp;     // minimum allowed timestamp (-1 = no constraint)
  int32_t max_timestamp;     // maximum allowed timestamp (-1 = no constraint)
  bool check_probability_rule;  // apply probability rule after other
                                // suppressions
};

// Map state to actions - each case must set ALL variables
TimestampDecision DecideTimestampAction(TimestampDecodingState state,
                                        const TokenSequenceInfo &info,
                                        int32_t timestamp_begin,
                                        int32_t max_initial_timestamp_index) {
  // Declare all decision variables - must be set by every case
  bool suppress_text;
  bool suppress_timestamps;
  bool suppress_eot;
  int32_t max_timestamp;
  bool check_probability_rule;

  // Compute monotonicity constraint (cross-cutting concern, used by all cases)
  int32_t min_timestamp = -1;
  if (info.last_ts >= 0) {
    if (state == TimestampDecodingState::kSegmentClosing) {
      // Same timestamp allowed for next segment opening
      min_timestamp = info.last_ts;
    } else {
      // Strictly increasing timestamps
      min_timestamp = info.last_ts + 1;
    }
  }

  switch (state) {
    case TimestampDecodingState::kStart:
      // First token must be a timestamp
      suppress_text = true;
      suppress_timestamps = false;
      suppress_eot = true;
      max_timestamp = (max_initial_timestamp_index >= 0)
                          ? timestamp_begin + max_initial_timestamp_index
                          : -1;
      check_probability_rule = false;
      break;

    case TimestampDecodingState::kAfterOpeningTs:
      // After opening timestamp (or double timestamp), force text
      suppress_text = false;
      suppress_timestamps = true;
      suppress_eot = false;
      max_timestamp = -1;
      check_probability_rule = false;
      break;

    case TimestampDecodingState::kSegmentClosing:
      // Segment just closed, force timestamp or EOT
      suppress_text = true;
      suppress_timestamps = false;
      suppress_eot = false;  // EOT allowed to end transcript
      max_timestamp = -1;
      check_probability_rule = false;
      break;

    case TimestampDecodingState::kInText:
      // In text, probability rule may force timestamp
      suppress_text = false;
      suppress_timestamps = false;
      suppress_eot = false;
      max_timestamp = -1;
      check_probability_rule = true;
      break;
  }

  return TimestampDecision{suppress_text, suppress_timestamps,
                           suppress_eot,  min_timestamp,
                           max_timestamp, check_probability_rule};
}

// =============================================================================
// Step 3: Execution
// =============================================================================

// Apply the suppression decisions to the logits
void ApplyTimestampDecision(float *logits, int32_t vocab_size,
                            const TimestampDecision &decision,
                            int32_t timestamp_begin, int32_t eot) {
  // Suppress text tokens if needed
  if (decision.suppress_text) {
    if (decision.suppress_eot) {
      // Suppress all text tokens including EOT
      std::fill(logits, logits + timestamp_begin, kNegInf);
    } else {
      // Suppress text tokens but preserve EOT
      std::fill(logits, logits + eot, kNegInf);
      std::fill(logits + eot + 1, logits + timestamp_begin, kNegInf);
    }
  }

  // Suppress timestamp tokens if needed
  if (decision.suppress_timestamps) {
    std::fill(logits + timestamp_begin, logits + vocab_size, kNegInf);
  }

  // Apply monotonicity constraint (suppress timestamps below minimum)
  if (decision.min_timestamp >= 0) {
    std::fill(logits + timestamp_begin, logits + decision.min_timestamp,
              kNegInf);
  }

  // Apply max_initial constraint (suppress timestamps above maximum)
  if (decision.max_timestamp >= 0) {
    // Clamp to valid range to avoid out-of-bounds access
    int32_t safe_max = std::min(decision.max_timestamp, vocab_size - 1);
    if (safe_max + 1 < vocab_size) {
      std::fill(logits + safe_max + 1, logits + vocab_size, kNegInf);
    }
  }
}

// Apply the probability rule: if timestamp probability > max text probability,
// force timestamp. This is the "sum rule" from OpenAI's implementation.
void ApplyProbabilityRule(float *logits, int32_t vocab_size,
                          int32_t timestamp_begin) {
  // Compute logsumexp of timestamp logits
  float max_ts_logit =
      *std::max_element(logits + timestamp_begin, logits + vocab_size);
  if (max_ts_logit == kNegInf) {
    return;  // All timestamps suppressed, nothing to do
  }

  float ts_logsum = 0.0f;
  for (int32_t i = timestamp_begin; i < vocab_size; ++i) {
    if (logits[i] > kNegInf) {
      ts_logsum += std::exp(logits[i] - max_ts_logit);
    }
  }
  ts_logsum = max_ts_logit + std::log(ts_logsum);

  // Find max text logit (including EOT - matches OpenAI behavior)
  float max_text_logit = *std::max_element(logits, logits + timestamp_begin);

  // If timestamp logsumexp > max text logit, force timestamp
  if (ts_logsum > max_text_logit) {
    std::fill(logits, logits + timestamp_begin, kNegInf);
  }
}

}  // namespace

// =============================================================================
// Public API
// =============================================================================

void ApplyTimestampRules(float *logits, int32_t vocab_size,
                         const std::vector<int64_t> &tokens,
                         int32_t sample_begin, int32_t timestamp_begin,
                         int32_t no_timestamps, int32_t eot,
                         int32_t max_initial_timestamp_index) {
  // Validate parameters
  assert(logits != nullptr && "logits must not be null");
  assert(vocab_size > 0 && "vocab_size must be positive");
  assert(sample_begin >= 0 && "sample_begin must be non-negative");
  assert(sample_begin <= static_cast<int32_t>(tokens.size()) &&
         "sample_begin must not exceed tokens size");
  assert(timestamp_begin > 0 && "timestamp_begin must be positive");
  assert(timestamp_begin < vocab_size &&
         "timestamp_begin must be less than vocab_size");
  assert(eot >= 0 && eot < timestamp_begin &&
         "eot must be in range [0, timestamp_begin)");
  assert(no_timestamps >= 0 && no_timestamps < vocab_size &&
         "no_timestamps must be in range [0, vocab_size)");

  // Always suppress no_timestamps token
  logits[no_timestamps] = kNegInf;

  // Step 1: Extract token info and determine state
  TokenSequenceInfo info =
      ExtractTokenSequenceInfo(tokens, sample_begin, timestamp_begin);
  TimestampDecodingState state = DetermineDecodingState(info);

  // Step 2: Map state to actions
  TimestampDecision decision = DecideTimestampAction(
      state, info, timestamp_begin, max_initial_timestamp_index);

  // Step 3: Execute the decisions
  ApplyTimestampDecision(logits, vocab_size, decision, timestamp_begin, eot);

  if (decision.check_probability_rule) {
    ApplyProbabilityRule(logits, vocab_size, timestamp_begin);
  }
}

std::vector<OfflineWhisperSegment> ParseTimestampTokens(
    const std::vector<int32_t> &tokens, int32_t timestamp_begin, int32_t eot) {
  // Validate parameters
  assert(timestamp_begin > 0 && "timestamp_begin must be positive");
  assert(eot >= 0 && eot < timestamp_begin &&
         "eot must be in range [0, timestamp_begin)");

  std::vector<OfflineWhisperSegment> segments;

  // Each timestamp token represents 0.02 seconds (20ms)
  constexpr float kSecondsPerTimestamp = 0.02f;

  OfflineWhisperSegment current_segment;
  bool in_segment = false;

  for (size_t i = 0; i < tokens.size(); ++i) {
    int32_t token = tokens[i];

    if (token == eot) {
      // End of transcript - close any open segment
      if (in_segment && !current_segment.token_ids.empty()) {
        current_segment.end_time =
            -1.0f;  // Use sentinel for EOT-closed segment
        segments.push_back(std::move(current_segment));
        current_segment = OfflineWhisperSegment();
      }
      break;
    }

    if (token >= timestamp_begin) {
      // This is a timestamp token
      float time = (token - timestamp_begin) * kSecondsPerTimestamp;

      if (!in_segment) {
        // Start of a new segment
        current_segment.start_time = time;
        in_segment = true;
      } else {
        // End of current segment
        current_segment.end_time = time;
        if (!current_segment.token_ids.empty()) {
          segments.push_back(std::move(current_segment));
        }
        // Start new segment at same timestamp
        current_segment = OfflineWhisperSegment();
        current_segment.start_time = time;
      }
    } else {
      // Text token - add to current segment
      if (in_segment) {
        current_segment.token_ids.push_back(token);
      }
    }
  }

  // Handle any remaining segment without closing timestamp
  if (in_segment && !current_segment.token_ids.empty()) {
    // Use a sentinel value to indicate incomplete segment
    current_segment.end_time = -1.0f;
    segments.push_back(std::move(current_segment));
  }

  return segments;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-whisper-timestamp-rules.h
================================================
// sherpa-onnx/csrc/offline-whisper-timestamp-rules.h
//
// Copyright (c)  2026  Posit Software, PBC

#ifndef SHERPA_ONNX_CSRC_OFFLINE_WHISPER_TIMESTAMP_RULES_H_
#define SHERPA_ONNX_CSRC_OFFLINE_WHISPER_TIMESTAMP_RULES_H_

#include <cstdint>
#include <vector>

#include "sherpa-onnx/csrc/offline-whisper-decoder.h"

namespace sherpa_onnx {

// Apply OpenAI Whisper's timestamp token rules to logits
// Reference: whisper/decoding.py ApplyTimestampRules
//
// Parameters:
//   logits: pointer to logits array of size vocab_size (modified in-place)
//   vocab_size: size of vocabulary
//   tokens: all tokens decoded so far (including initial SOT sequence)
//   sample_begin: index in tokens where actual sampling began (after SOT seq)
//   timestamp_begin: token ID of first timestamp (<|0.00|>)
//   no_timestamps: token ID of no_timestamps token
//   eot: token ID of end-of-transcript
//   max_initial_timestamp_index: limit for first timestamp (e.g., 50 = 1.0s)
void ApplyTimestampRules(float *logits, int32_t vocab_size,
                         const std::vector<int64_t> &tokens,
                         int32_t sample_begin, int32_t timestamp_begin,
                         int32_t no_timestamps, int32_t eot,
                         int32_t max_initial_timestamp_index);

// Parse timestamp tokens from decoded sequence and create segments
// Pattern: <|start_time|> text tokens... <|end_time|>
//
// Parameters:
//   tokens: decoded tokens (text + timestamp tokens interleaved)
//   timestamp_begin: token ID of first timestamp (<|0.00|>)
//   eot: token ID of end-of-transcript
//
// Returns: vector of segments with start/end times and token IDs
std::vector<OfflineWhisperSegment> ParseTimestampTokens(
    const std::vector<int32_t> &tokens, int32_t timestamp_begin, int32_t eot);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_WHISPER_TIMESTAMP_RULES_H_


================================================
FILE: sherpa-onnx/csrc/offline-zipformer-audio-tagging-model-config.cc
================================================
// sherpa-onnx/csrc/offline-zipformer-audio-tagging-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-zipformer-audio-tagging-model-config.h"

#include <memory>
#include <string>
#include <utility>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineZipformerAudioTaggingModelConfig::Register(ParseOptions *po) {
  po->Register("zipformer-model", &model,
               "Path to zipformer model for audio tagging");
}

bool OfflineZipformerAudioTaggingModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --zipformer-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("--zipformer-model: '%s' does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OfflineZipformerAudioTaggingModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineZipformerAudioTaggingModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-zipformer-audio-tagging-model-config.h
================================================
// sherpa-onnx/csrc/offline-zipformer-audio-tagging-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_AUDIO_TAGGING_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_AUDIO_TAGGING_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineZipformerAudioTaggingModelConfig {
  std::string model;

  OfflineZipformerAudioTaggingModelConfig() = default;

  explicit OfflineZipformerAudioTaggingModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_AUDIO_TAGGING_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.cc
================================================
// sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineZipformerAudioTaggingModel::Impl {
 public:
  explicit Impl(const AudioTaggingModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.zipformer.model);
    Init(buf.data(), buf.size());
  }

#if __ANDROID_API__ >= 9
  Impl(AAssetManager *mgr, const AudioTaggingModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.zipformer.model);
    Init(buf.data(), buf.size());
  }
#endif

  Ort::Value Forward(Ort::Value features, Ort::Value features_length) {
    std::array<Ort::Value, 2> inputs = {std::move(features),
                                        std::move(features_length)};

    auto ans =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());
    return std::move(ans[0]);
  }

  int32_t NumEventClasses() const { return num_event_classes_; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
    }

    // get num_event_classes from the output[0].shape,
    // which is (N, num_event_classes)
    num_event_classes_ =
        sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape()[1];
  }

 private:
  AudioTaggingModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t num_event_classes_ = 0;
};

OfflineZipformerAudioTaggingModel::OfflineZipformerAudioTaggingModel(
    const AudioTaggingModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

#if __ANDROID_API__ >= 9
OfflineZipformerAudioTaggingModel::OfflineZipformerAudioTaggingModel(
    AAssetManager *mgr, const AudioTaggingModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}
#endif

OfflineZipformerAudioTaggingModel::~OfflineZipformerAudioTaggingModel() =
    default;

Ort::Value OfflineZipformerAudioTaggingModel::Forward(
    Ort::Value features, Ort::Value features_length) const {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineZipformerAudioTaggingModel::NumEventClasses() const {
  return impl_->NumEventClasses();
}

OrtAllocator *OfflineZipformerAudioTaggingModel::Allocator() const {
  return impl_->Allocator();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.h
================================================
// sherpa-onnx/csrc/offline-zipformer-audio-tagging-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_AUDIO_TAGGING_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_AUDIO_TAGGING_MODEL_H_
#include <memory>
#include <utility>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/audio-tagging-model-config.h"

namespace sherpa_onnx {

/** This class implements the zipformer CTC model of the librispeech recipe
 * from icefall.
 *
 * See
 * https://github.com/k2-fsa/icefall/blob/master/egs/audioset/AT/zipformer/export-onnx.py
 */
class OfflineZipformerAudioTaggingModel {
 public:
  explicit OfflineZipformerAudioTaggingModel(
      const AudioTaggingModelConfig &config);

#if __ANDROID_API__ >= 9
  OfflineZipformerAudioTaggingModel(AAssetManager *mgr,
                                    const AudioTaggingModelConfig &config);
#endif

  ~OfflineZipformerAudioTaggingModel();

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a tensor
   *  - probs: A 2-D tensor of shape (N, num_event_classes).
   */
  Ort::Value Forward(Ort::Value features, Ort::Value features_length) const;

  /** Return the number of event classes of the model
   */
  int32_t NumEventClasses() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_AUDIO_TAGGING_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/offline-zipformer-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/offline-zipformer-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-zipformer-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OfflineZipformerCtcModelConfig::Register(ParseOptions *po) {
  po->Register("zipformer-ctc-model", &model, "Path to zipformer CTC model");

  std::string prefix = "zipformer-ctc";
  ParseOptions p(prefix, po);

  qnn_config.Register(&p);
}

bool OfflineZipformerCtcModelConfig::Validate() const {
  if (qnn_config.context_binary.empty()) {
    if (model.empty()) {
      SHERPA_ONNX_LOGE("Please provide a Zipformer CTC model");
      return false;
    }

    if (!FileExists(model)) {
      SHERPA_ONNX_LOGE("Zipformer CTC model '%s' does not exist",
                       model.c_str());
      return false;
    }
  }

  if (model.empty() && !qnn_config.context_binary.empty()) {
    // we require that the context_binary exists
    if (!FileExists(qnn_config.context_binary)) {
      SHERPA_ONNX_LOGE(
          "Model is empty, but you provide a context binary that does not "
          "exist");
      return false;
    }
  }

  if (EndsWith(model, ".so") || EndsWith(model, ".bin") ||
      (model.empty() && !qnn_config.context_binary.empty())) {
    return qnn_config.Validate();
  }

  return true;
}

std::string OfflineZipformerCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OfflineZipformerCtcModelConfig(";
  os << "model=\"" << model << "\"";

  if (!qnn_config.backend_lib.empty()) {
    os << ", qnn_config=" << qnn_config.ToString() << ", ";
  }

  os << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-zipformer-ctc-model-config.h
================================================
// sherpa-onnx/csrc/offline-zipformer-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/qnn-config.h"

namespace sherpa_onnx {

// for
// https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/zipformer/export-onnx-ctc.py
struct OfflineZipformerCtcModelConfig {
  std::string model;
  QnnConfig qnn_config;

  OfflineZipformerCtcModelConfig() = default;

  explicit OfflineZipformerCtcModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/offline-zipformer-ctc-model.cc
================================================
// sherpa-onnx/csrc/offline-zipformer-ctc-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-zipformer-ctc-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class OfflineZipformerCtcModel::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.zipformer_ctc.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.zipformer_ctc.model);
    Init(buf.data(), buf.size());
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) {
    std::array<Ort::Value, 2> inputs = {std::move(features),
                                        std::move(features_length)};

    return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                      output_names_ptr_.data(), output_names_ptr_.size());
  }

  int32_t VocabSize() const { return vocab_size_; }
  int32_t SubsamplingFactor() const { return 4; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    // get vocab size from the output[0].shape, which is (N, T, vocab_size)
    vocab_size_ =
        sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape()[2];
  }

 private:
  OfflineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t vocab_size_ = 0;
};

OfflineZipformerCtcModel::OfflineZipformerCtcModel(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineZipformerCtcModel::OfflineZipformerCtcModel(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineZipformerCtcModel::~OfflineZipformerCtcModel() = default;

std::vector<Ort::Value> OfflineZipformerCtcModel::Forward(
    Ort::Value features, Ort::Value features_length) {
  return impl_->Forward(std::move(features), std::move(features_length));
}

int32_t OfflineZipformerCtcModel::VocabSize() const {
  return impl_->VocabSize();
}

OrtAllocator *OfflineZipformerCtcModel::Allocator() const {
  return impl_->Allocator();
}

int32_t OfflineZipformerCtcModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

#if __ANDROID_API__ >= 9
template OfflineZipformerCtcModel::OfflineZipformerCtcModel(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineZipformerCtcModel::OfflineZipformerCtcModel(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/offline-zipformer-ctc-model.h
================================================
// sherpa-onnx/csrc/offline-zipformer-ctc-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_H_
#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

/** This class implements the zipformer CTC model of the librispeech recipe
 * from icefall.
 *
 * See
 * https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/zipformer/export-onnx-ctc.py
 */
class OfflineZipformerCtcModel : public OfflineCtcModel {
 public:
  explicit OfflineZipformerCtcModel(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineZipformerCtcModel(Manager *mgr, const OfflineModelConfig &config);

  ~OfflineZipformerCtcModel() override;

  /** Run the forward method of the model.
   *
   * @param features  A tensor of shape (N, T, C).
   * @param features_length  A 1-D tensor of shape (N,) containing number of
   *                         valid frames in `features` before padding.
   *                         Its dtype is int64_t.
   *
   * @return Return a vector containing:
   *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
   *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
   */
  std::vector<Ort::Value> Forward(Ort::Value features,
                                  Ort::Value features_length) override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  int32_t SubsamplingFactor() const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-cnn-bilstm-model-meta-data.h
================================================
// sherpa-onnx/csrc/online-cnn-bilstm-model-meta-data.h
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#ifndef SHERPA_ONNX_CSRC_ONLINE_CNN_BILSTM_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_ONLINE_CNN_BILSTM_MODEL_META_DATA_H_

namespace sherpa_onnx {

struct OnlineCNNBiLSTMModelMetaData {
  int32_t comma_id = -1;
  int32_t period_id = -1;
  int32_t quest_id = -1;

  int32_t upper_id = -1;
  int32_t cap_id = -1;
  int32_t mix_case_id = -1;

  int32_t num_cases = -1;
  int32_t num_punctuations = -1;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_CNN_BILSTM_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/online-cnn-bilstm-model.cc
================================================
// sherpa-onnx/csrc/online-cnn-bilstm-model.cc
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#include "sherpa-onnx/csrc/online-cnn-bilstm-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OnlineCNNBiLSTMModel::Impl {
 public:
  explicit Impl(const OnlinePunctuationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(config_.cnn_bilstm);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlinePunctuationModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    auto buf = ReadFile(mgr, config_.cnn_bilstm);
    Init(buf.data(), buf.size());
  }

  std::pair<Ort::Value, Ort::Value> Forward(Ort::Value token_ids,
                                            Ort::Value valid_ids,
                                            Ort::Value label_lens) {
    std::array<Ort::Value, 3> inputs = {
        std::move(token_ids), std::move(valid_ids), std::move(label_lens)};

    auto ans =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());
    return {std::move(ans[0]), std::move(ans[1])};
  }

  OrtAllocator *Allocator() { return allocator_; }

  const OnlineCNNBiLSTMModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    SHERPA_ONNX_READ_META_DATA(meta_data_.comma_id, "COMMA");
    SHERPA_ONNX_READ_META_DATA(meta_data_.period_id, "PERIOD");
    SHERPA_ONNX_READ_META_DATA(meta_data_.quest_id, "QUESTION");

    // assert here, because we will use the constant value
    assert(meta_data_.comma_id == 1);
    assert(meta_data_.period_id == 2);
    assert(meta_data_.quest_id == 3);

    SHERPA_ONNX_READ_META_DATA(meta_data_.upper_id, "UPPER");
    SHERPA_ONNX_READ_META_DATA(meta_data_.cap_id, "CAP");
    SHERPA_ONNX_READ_META_DATA(meta_data_.mix_case_id, "MIX_CASE");

    assert(meta_data_.upper_id == 1);
    assert(meta_data_.cap_id == 2);
    assert(meta_data_.mix_case_id == 3);

    // output shape is (T', num_cases)
    meta_data_.num_cases =
        sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape()[1];
    meta_data_.num_punctuations =
        sess_->GetOutputTypeInfo(1).GetTensorTypeAndShapeInfo().GetShape()[1];
  }

 private:
  OnlinePunctuationModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  OnlineCNNBiLSTMModelMetaData meta_data_;
};

OnlineCNNBiLSTMModel::OnlineCNNBiLSTMModel(
    const OnlinePunctuationModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineCNNBiLSTMModel::OnlineCNNBiLSTMModel(
    Manager *mgr, const OnlinePunctuationModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OnlineCNNBiLSTMModel::~OnlineCNNBiLSTMModel() = default;

std::pair<Ort::Value, Ort::Value> OnlineCNNBiLSTMModel::Forward(
    Ort::Value token_ids, Ort::Value valid_ids, Ort::Value label_lens) const {
  return impl_->Forward(std::move(token_ids), std::move(valid_ids),
                        std::move(label_lens));
}

OrtAllocator *OnlineCNNBiLSTMModel::Allocator() const {
  return impl_->Allocator();
}

const OnlineCNNBiLSTMModelMetaData &OnlineCNNBiLSTMModel::GetModelMetadata()
    const {
  return impl_->GetModelMetadata();
}

#if __ANDROID_API__ >= 9
template OnlineCNNBiLSTMModel::OnlineCNNBiLSTMModel(
    AAssetManager *mgr, const OnlinePunctuationModelConfig &config);
#endif

#if __OHOS__
template OnlineCNNBiLSTMModel::OnlineCNNBiLSTMModel(
    NativeResourceManager *mgr, const OnlinePunctuationModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-cnn-bilstm-model.h
================================================
// sherpa-onnx/csrc/online-cnn-bilstm-model.h
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#ifndef SHERPA_ONNX_CSRC_ONLINE_CNN_BILSTM_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_CNN_BILSTM_MODEL_H_
#include <memory>
#include <utility>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-cnn-bilstm-model-meta-data.h"
#include "sherpa-onnx/csrc/online-punctuation-model-config.h"

namespace sherpa_onnx {

/** This class implements
 *  https://github.com/frankyoujian/Edge-Punct-Casing/blob/main/onnx_decode_sentence.py
 */
class OnlineCNNBiLSTMModel {
 public:
  explicit OnlineCNNBiLSTMModel(const OnlinePunctuationModelConfig &config);

  template <typename Manager>
  OnlineCNNBiLSTMModel(Manager *mgr,
                       const OnlinePunctuationModelConfig &config);

  ~OnlineCNNBiLSTMModel();

  /** Run the forward method of the model.
   *
   * @param token_ids  A tensor of shape (N, T) of dtype int32.
   * @param valid_ids  A tensor of shape (N, T) of dtype int32.
   * @param label_lens A tensor of shape (N) of dtype int32.
   *
   * @return Return a pair of tensors
   *  - case_logits:  A 2-D tensor of shape (T', num_cases).
   *  - punct_logits: A 2-D tensor of shape (T', num_puncts).
   */
  std::pair<Ort::Value, Ort::Value> Forward(Ort::Value token_ids,
                                            Ort::Value valid_ids,
                                            Ort::Value label_lens) const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

  const OnlineCNNBiLSTMModelMetaData &GetModelMetadata() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_CNN_BILSTM_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-conformer-transducer-model.cc
================================================
// sherpa-onnx/csrc/online-conformer-transducer-model.cc
//
// Copyright (c)  2023 Jingzhao Ou (jingzhao.ou@gmail.com)

#include "sherpa-onnx/csrc/online-conformer-transducer-model.h"

#include <algorithm>
#include <cassert>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

OnlineConformerTransducerModel::OnlineConformerTransducerModel(
    const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      config_(config),
      sess_opts_(GetSessionOptions(config)),
      allocator_{} {
  {
    auto buf = ReadFile(config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

template <typename Manager>
OnlineConformerTransducerModel::OnlineConformerTransducerModel(
    Manager *mgr, const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      config_(config),
      sess_opts_(GetSessionOptions(config)),
      allocator_{} {
  {
    auto buf = ReadFile(mgr, config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

void OnlineConformerTransducerModel::InitEncoder(void *model_data,
                                                 size_t model_data_length) {
  encoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                 model_data_length, sess_opts_);

  GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                &encoder_input_names_ptr_);

  GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                 &encoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---encoder---\n";
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA(num_encoder_layers_, "num_encoder_layers");
  SHERPA_ONNX_READ_META_DATA(T_, "T");
  SHERPA_ONNX_READ_META_DATA(decode_chunk_len_, "decode_chunk_len");
  SHERPA_ONNX_READ_META_DATA(left_context_, "left_context");
  SHERPA_ONNX_READ_META_DATA(encoder_dim_, "encoder_dim");
  SHERPA_ONNX_READ_META_DATA(pad_length_, "pad_length");
  SHERPA_ONNX_READ_META_DATA(cnn_module_kernel_, "cnn_module_kernel");
}

void OnlineConformerTransducerModel::InitDecoder(void *model_data,
                                                 size_t model_data_length) {
  decoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                 model_data_length, sess_opts_);

  GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                &decoder_input_names_ptr_);

  GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                 &decoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = decoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---decoder---\n";
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
  SHERPA_ONNX_READ_META_DATA(context_size_, "context_size");
}

void OnlineConformerTransducerModel::InitJoiner(void *model_data,
                                                size_t model_data_length) {
  joiner_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                model_data_length, sess_opts_);

  GetInputNames(joiner_sess_.get(), &joiner_input_names_,
                &joiner_input_names_ptr_);

  GetOutputNames(joiner_sess_.get(), &joiner_output_names_,
                 &joiner_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = joiner_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---joiner---\n";
    PrintModelMetadata(os, meta_data);
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }
}

std::vector<Ort::Value> OnlineConformerTransducerModel::StackStates(
    const std::vector<std::vector<Ort::Value>> &states) const {
  int32_t batch_size = static_cast<int32_t>(states.size());

  std::vector<const Ort::Value *> attn_vec(batch_size);
  std::vector<const Ort::Value *> conv_vec(batch_size);

  for (int32_t i = 0; i != batch_size; ++i) {
    assert(states[i].size() == 2);
    attn_vec[i] = &states[i][0];
    conv_vec[i] = &states[i][1];
  }

  auto allocator =
      const_cast<OnlineConformerTransducerModel *>(this)->allocator_;

  Ort::Value attn = Cat(allocator, attn_vec, 2);
  Ort::Value conv = Cat(allocator, conv_vec, 2);

  std::vector<Ort::Value> ans;
  ans.reserve(2);
  ans.push_back(std::move(attn));
  ans.push_back(std::move(conv));

  return ans;
}

std::vector<std::vector<Ort::Value>>
OnlineConformerTransducerModel::UnStackStates(
    const std::vector<Ort::Value> &states) const {
  const int32_t batch_size =
      states[0].GetTensorTypeAndShapeInfo().GetShape()[2];
  assert(states.size() == 2);

  std::vector<std::vector<Ort::Value>> ans(batch_size);

  auto allocator =
      const_cast<OnlineConformerTransducerModel *>(this)->allocator_;

  std::vector<Ort::Value> attn_vec = Unbind(allocator, &states[0], 2);
  std::vector<Ort::Value> conv_vec = Unbind(allocator, &states[1], 2);

  assert(attn_vec.size() == batch_size);
  assert(conv_vec.size() == batch_size);

  for (int32_t i = 0; i != batch_size; ++i) {
    ans[i].push_back(std::move(attn_vec[i]));
    ans[i].push_back(std::move(conv_vec[i]));
  }

  return ans;
}

std::vector<Ort::Value> OnlineConformerTransducerModel::GetEncoderInitStates() {
  // Please see
  // https://github.com/k2-fsa/icefall/blob/86b0db6eb9c84d9bc90a71d92774fe2a7f73e6ab/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py#L203
  // for details
  constexpr int32_t kBatchSize = 1;
  std::array<int64_t, 4> h_shape{num_encoder_layers_, left_context_, kBatchSize,
                                 encoder_dim_};
  Ort::Value h = Ort::Value::CreateTensor<float>(allocator_, h_shape.data(),
                                                 h_shape.size());

  Fill<float>(&h, 0);

  std::array<int64_t, 4> c_shape{num_encoder_layers_, cnn_module_kernel_ - 1,
                                 kBatchSize, encoder_dim_};

  Ort::Value c = Ort::Value::CreateTensor<float>(allocator_, c_shape.data(),
                                                 c_shape.size());

  Fill<float>(&c, 0);

  std::vector<Ort::Value> states;

  states.reserve(2);
  states.push_back(std::move(h));
  states.push_back(std::move(c));

  return states;
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OnlineConformerTransducerModel::RunEncoder(Ort::Value features,
                                           std::vector<Ort::Value> states,
                                           Ort::Value processed_frames) {
  std::array<Ort::Value, 4> encoder_inputs = {
      std::move(features), std::move(states[0]), std::move(states[1]),
      std::move(processed_frames)};

  auto encoder_out = encoder_sess_->Run(
      {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
      encoder_inputs.size(), encoder_output_names_ptr_.data(),
      encoder_output_names_ptr_.size());

  std::vector<Ort::Value> next_states;
  next_states.reserve(2);
  next_states.push_back(std::move(encoder_out[1]));
  next_states.push_back(std::move(encoder_out[2]));

  return {std::move(encoder_out[0]), std::move(next_states)};
}

Ort::Value OnlineConformerTransducerModel::RunDecoder(
    Ort::Value decoder_input) {
  auto decoder_out = decoder_sess_->Run(
      {}, decoder_input_names_ptr_.data(), &decoder_input, 1,
      decoder_output_names_ptr_.data(), decoder_output_names_ptr_.size());
  return std::move(decoder_out[0]);
}

Ort::Value OnlineConformerTransducerModel::RunJoiner(Ort::Value encoder_out,
                                                     Ort::Value decoder_out) {
  std::array<Ort::Value, 2> joiner_input = {std::move(encoder_out),
                                            std::move(decoder_out)};
  auto logit =
      joiner_sess_->Run({}, joiner_input_names_ptr_.data(), joiner_input.data(),
                        joiner_input.size(), joiner_output_names_ptr_.data(),
                        joiner_output_names_ptr_.size());

  return std::move(logit[0]);
}

#if __ANDROID_API__ >= 9
template OnlineConformerTransducerModel::OnlineConformerTransducerModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineConformerTransducerModel::OnlineConformerTransducerModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-conformer-transducer-model.h
================================================
// sherpa-onnx/csrc/online-conformer-transducer-model.h
//
// Copyright (c) 2023 Jingzhao Ou (jingzhao.ou@gmail.com)

#ifndef SHERPA_ONNX_CSRC_ONLINE_CONFORMER_TRANSDUCER_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_CONFORMER_TRANSDUCER_MODEL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

class OnlineConformerTransducerModel : public OnlineTransducerModel {
 public:
  explicit OnlineConformerTransducerModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineConformerTransducerModel(Manager *mgr, const OnlineModelConfig &config);

  std::vector<Ort::Value> StackStates(
      const std::vector<std::vector<Ort::Value>> &states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      const std::vector<Ort::Value> &states) const override;

  std::vector<Ort::Value> GetEncoderInitStates() override;

  std::pair<Ort::Value, std::vector<Ort::Value>> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states,
      Ort::Value processed_frames) override;

  Ort::Value RunDecoder(Ort::Value decoder_input) override;

  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) override;

  int32_t ContextSize() const override { return context_size_; }

  int32_t ChunkSize() const override { return T_; }

  int32_t ChunkShift() const override { return decode_chunk_len_; }

  int32_t VocabSize() const override { return vocab_size_; }
  OrtAllocator *Allocator() override { return allocator_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length);
  void InitDecoder(void *model_data, size_t model_data_length);
  void InitJoiner(void *model_data, size_t model_data_length);

 private:
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;
  std::unique_ptr<Ort::Session> joiner_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<std::string> joiner_input_names_;
  std::vector<const char *> joiner_input_names_ptr_;

  std::vector<std::string> joiner_output_names_;
  std::vector<const char *> joiner_output_names_ptr_;

  OnlineModelConfig config_;

  int32_t num_encoder_layers_ = 0;
  int32_t T_ = 0;
  int32_t decode_chunk_len_ = 0;
  int32_t cnn_module_kernel_ = 0;
  int32_t context_size_ = 0;
  int32_t left_context_ = 0;
  // TODO(jingzhaoou): to retrieve from model metadata
  int32_t right_context_ = 4;
  int32_t encoder_dim_ = 0;
  int32_t pad_length_ = 0;
  int32_t vocab_size_ = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_CONFORMER_TRANSDUCER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-ctc-decoder.h
================================================
// sherpa-onnx/csrc/online-ctc-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_CTC_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_CTC_DECODER_H_

#include <memory>
#include <vector>

#include "kaldi-decoder/csrc/faster-decoder.h"
#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

class OnlineStream;

struct OnlineCtcDecoderResult {
  /// Number of frames after subsampling we have decoded so far
  int32_t frame_offset = 0;

  /// The decoded token IDs
  std::vector<int64_t> tokens;

  /// The decoded word IDs
  /// Note: tokens.size() is usually not equal to words.size()
  /// words is empty for greedy search decoding.
  /// it is not empty when an HLG graph or an HLG graph is used.
  std::vector<int32_t> words;

  /// timestamps[i] contains the output frame index where tokens[i] is decoded.
  /// Note: The index is after subsampling
  ///
  /// tokens.size() == timestamps.size()
  std::vector<int32_t> timestamps;

  int32_t num_trailing_blanks = 0;
};

class OnlineCtcDecoder {
 public:
  virtual ~OnlineCtcDecoder() = default;

  /** Run streaming CTC decoding given the output from the encoder model.
   *
   * @param log_probs A 3-D tensor of shape
   *                  (batch_size, num_frames, vocab_size) containing
   *                  lob_probs in row major.
   *
   * @param  results Input & Output parameters..
   */
  virtual void Decode(const float *log_probs, int32_t batch_size,
                      int32_t num_frames, int32_t vocab_size,
                      std::vector<OnlineCtcDecoderResult> *results,
                      OnlineStream **ss = nullptr, int32_t n = 0) = 0;

  virtual std::unique_ptr<kaldi_decoder::FasterDecoder> CreateFasterDecoder()
      const {
    return nullptr;
  }
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_CTC_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/online-ctc-fst-decoder-config.cc
================================================
// sherpa-onnx/csrc/online-ctc-fst-decoder-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-ctc-fst-decoder-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

std::string OnlineCtcFstDecoderConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineCtcFstDecoderConfig(";
  os << "graph=\"" << graph << "\", ";
  os << "max_active=" << max_active << ")";

  return os.str();
}

void OnlineCtcFstDecoderConfig::Register(ParseOptions *po) {
  po->Register("ctc-graph", &graph, "Path to H.fst, HL.fst, or HLG.fst");

  po->Register("ctc-max-active", &max_active,
               "Decoder max active states.  Larger->slower; more accurate");
}

bool OnlineCtcFstDecoderConfig::Validate() const {
  if (!graph.empty() && !FileExists(graph)) {
    SHERPA_ONNX_LOGE("graph: '%s' does not exist", graph.c_str());
    return false;
  }
  return true;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-ctc-fst-decoder-config.h
================================================
// sherpa-onnx/csrc/online-ctc-fst-decoder-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_CTC_FST_DECODER_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_CTC_FST_DECODER_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineCtcFstDecoderConfig {
  // Path to H.fst, HL.fst or HLG.fst
  std::string graph;
  int32_t max_active = 3000;

  OnlineCtcFstDecoderConfig() = default;

  OnlineCtcFstDecoderConfig(const std::string &graph, int32_t max_active)
      : graph(graph), max_active(max_active) {}

  std::string ToString() const;

  void Register(ParseOptions *po);
  bool Validate() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_CTC_FST_DECODER_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-ctc-fst-decoder.cc
================================================
// sherpa-onnx/csrc/online-ctc-fst-decoder.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-ctc-fst-decoder.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "fst/fstlib.h"
#include "kaldi-decoder/csrc/decodable-ctc.h"
#include "kaldifst/csrc/fstext-utils.h"
#include "sherpa-onnx/csrc/fst-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-stream.h"

namespace sherpa_onnx {

OnlineCtcFstDecoder::OnlineCtcFstDecoder(
    const OnlineCtcFstDecoderConfig &config, int32_t blank_id)
    : config_(config), fst_(ReadGraph(config.graph)), blank_id_(blank_id) {
  options_.max_active = config_.max_active;
}

std::unique_ptr<kaldi_decoder::FasterDecoder>
OnlineCtcFstDecoder::CreateFasterDecoder() const {
  return std::make_unique<kaldi_decoder::FasterDecoder>(*fst_, options_);
}

static void DecodeOne(const float *log_probs, int32_t num_rows,
                      int32_t num_cols, OnlineCtcDecoderResult *result,
                      OnlineStream *s, int32_t blank_id) {
  int32_t &processed_frames = s->GetFasterDecoderProcessedFrames();
  kaldi_decoder::DecodableCtc decodable(log_probs, num_rows, num_cols,
                                        processed_frames);

  kaldi_decoder::FasterDecoder *decoder = s->GetFasterDecoder();
  if (processed_frames == 0) {
    decoder->InitDecoding();
  }

  decoder->AdvanceDecoding(&decodable);

  if (decoder->ReachedFinal()) {
    fst::VectorFst<fst::LatticeArc> fst_out;
    bool ok = decoder->GetBestPath(&fst_out);
    if (ok) {
      std::vector<int32_t> isymbols_out;
      std::vector<int32_t> osymbols_out;
      /*ok =*/fst::GetLinearSymbolSequence(fst_out, &isymbols_out,
                                           &osymbols_out, nullptr);
      // TODO(fangjun): handle ok is false
      std::vector<int64_t> tokens;
      tokens.reserve(isymbols_out.size());

      std::vector<int32_t> timestamps;
      timestamps.reserve(isymbols_out.size());

      std::ostringstream os;
      int32_t prev_id = -1;
      int32_t &num_trailing_blanks = result->num_trailing_blanks;
      int32_t f = 0;  // frame number

      for (auto i : isymbols_out) {
        i -= 1;

        if (i == blank_id) {
          num_trailing_blanks += 1;
        } else {
          num_trailing_blanks = 0;
        }

        if (i != blank_id && i != prev_id) {
          tokens.push_back(i);
          timestamps.push_back(f);
        }
        prev_id = i;
        f += 1;
      }

      result->tokens = std::move(tokens);
      result->words = std::move(osymbols_out);
      result->timestamps = std::move(timestamps);
      // no need to set frame_offset
    }
  }

  processed_frames += num_rows;
}

void OnlineCtcFstDecoder::Decode(const float *log_probs, int32_t batch_size,
                                 int32_t num_frames, int32_t vocab_size,
                                 std::vector<OnlineCtcDecoderResult> *results,
                                 OnlineStream **ss, int32_t n) {
  if (batch_size != results->size()) {
    SHERPA_ONNX_LOGE("Size mismatch! log_probs.size(0) %d, results.size(0): %d",
                     batch_size, static_cast<int32_t>(results->size()));
    exit(-1);
  }

  if (batch_size != n) {
    SHERPA_ONNX_LOGE("Size mismatch! log_probs.size(0) %d, n: %d", batch_size,
                     n);
    exit(-1);
  }

  const float *p = log_probs;

  for (int32_t i = 0; i != batch_size; ++i) {
    DecodeOne(p + i * num_frames * vocab_size, num_frames, vocab_size,
              &(*results)[i], ss[i], blank_id_);
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-ctc-fst-decoder.h
================================================
// sherpa-onnx/csrc/online-ctc-fst-decoder.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_CTC_FST_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_CTC_FST_DECODER_H_

#include <memory>
#include <vector>

#include "fst/fst.h"
#include "sherpa-onnx/csrc/online-ctc-decoder.h"
#include "sherpa-onnx/csrc/online-ctc-fst-decoder-config.h"

namespace sherpa_onnx {

class OnlineCtcFstDecoder : public OnlineCtcDecoder {
 public:
  OnlineCtcFstDecoder(const OnlineCtcFstDecoderConfig &config,
                      int32_t blank_id);

  void Decode(const float *log_probs, int32_t batch_size, int32_t num_frames,
              int32_t vocab_size, std::vector<OnlineCtcDecoderResult> *results,
              OnlineStream **ss = nullptr, int32_t n = 0) override;

  std::unique_ptr<kaldi_decoder::FasterDecoder> CreateFasterDecoder()
      const override;

 private:
  OnlineCtcFstDecoderConfig config_;
  kaldi_decoder::FasterDecoderOptions options_;

  std::unique_ptr<fst::Fst<fst::StdArc>> fst_;
  int32_t blank_id_ = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_CTC_FST_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/online-ctc-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/online-ctc-greedy-search-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-ctc-greedy-search-decoder.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlineCtcGreedySearchDecoder::Decode(
    const float *log_probs, int32_t batch_size, int32_t num_frames,
    int32_t vocab_size, std::vector<OnlineCtcDecoderResult> *results,
    OnlineStream ** /*ss=nullptr*/, int32_t /*n = 0*/) {
  if (batch_size != results->size()) {
    SHERPA_ONNX_LOGE("Size mismatch! log_probs.size(0) %d, results.size(0): %d",
                     batch_size, static_cast<int32_t>(results->size()));
    exit(-1);
  }

  const float *p = log_probs;

  for (int32_t b = 0; b != batch_size; ++b) {
    auto &r = (*results)[b];

    int32_t prev_id = -1;
    if (!r.tokens.empty()) {
      if (r.num_trailing_blanks > 0) {
        prev_id = blank_id_;
      } else {
        prev_id = r.tokens.back();
      }
    }

    for (int32_t t = 0; t != num_frames; ++t, p += vocab_size) {
      int32_t y = static_cast<int32_t>(std::distance(
          static_cast<const float *>(p),
          std::max_element(static_cast<const float *>(p),
                           static_cast<const float *>(p) + vocab_size)));

      if (y == blank_id_) {
        r.num_trailing_blanks += 1;
      } else {
        r.num_trailing_blanks = 0;
      }

      if (y != blank_id_ && y != prev_id) {
        r.tokens.push_back(y);
        r.timestamps.push_back(t + r.frame_offset);
      }

      prev_id = y;
    }  // for (int32_t t = 0; t != num_frames; ++t) {
  }    // for (int32_t b = 0; b != batch_size; ++b)

  // Update frame_offset
  for (auto &r : *results) {
    r.frame_offset += num_frames;
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-ctc-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/online-ctc-greedy-search-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_CTC_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_CTC_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/online-ctc-decoder.h"

namespace sherpa_onnx {

class OnlineCtcGreedySearchDecoder : public OnlineCtcDecoder {
 public:
  explicit OnlineCtcGreedySearchDecoder(int32_t blank_id)
      : blank_id_(blank_id) {}

  void Decode(const float *log_probs, int32_t batch_size, int32_t num_frames,
              int32_t vocab_size, std::vector<OnlineCtcDecoderResult> *results,
              OnlineStream **ss = nullptr, int32_t n = 0) override;

 private:
  int32_t blank_id_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_CTC_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/online-ctc-model.cc
================================================
// sherpa-onnx/csrc/online-ctc-model.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-ctc-model.h"

#include <algorithm>
#include <memory>
#include <sstream>
#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-nemo-ctc-model.h"
#include "sherpa-onnx/csrc/online-t-one-ctc-model.h"
#include "sherpa-onnx/csrc/online-wenet-ctc-model.h"
#include "sherpa-onnx/csrc/online-zipformer2-ctc-model.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
    const OnlineModelConfig &config) {
  if (!config.wenet_ctc.model.empty()) {
    return std::make_unique<OnlineWenetCtcModel>(config);
  } else if (!config.zipformer2_ctc.model.empty()) {
    return std::make_unique<OnlineZipformer2CtcModel>(config);
  } else if (!config.nemo_ctc.model.empty()) {
    return std::make_unique<OnlineNeMoCtcModel>(config);
  } else if (!config.t_one_ctc.model.empty()) {
    return std::make_unique<OnlineToneCtcModel>(config);
  } else {
    SHERPA_ONNX_LOGE("Please specify a CTC model");
    SHERPA_ONNX_EXIT(-1);
  }
}

template <typename Manager>
std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
    Manager *mgr, const OnlineModelConfig &config) {
  if (!config.wenet_ctc.model.empty()) {
    return std::make_unique<OnlineWenetCtcModel>(mgr, config);
  } else if (!config.zipformer2_ctc.model.empty()) {
    return std::make_unique<OnlineZipformer2CtcModel>(mgr, config);
  } else if (!config.nemo_ctc.model.empty()) {
    return std::make_unique<OnlineNeMoCtcModel>(mgr, config);
  } else if (!config.t_one_ctc.model.empty()) {
    return std::make_unique<OnlineToneCtcModel>(mgr, config);
  } else {
    SHERPA_ONNX_LOGE("Please specify a CTC model");
    SHERPA_ONNX_EXIT(-1);
  }
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OnlineCtcModel> OnlineCtcModel::Create(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-ctc-model.h
================================================
// sherpa-onnx/csrc/online-ctc-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_CTC_MODEL_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

class OnlineCtcModel {
 public:
  virtual ~OnlineCtcModel() = default;

  static std::unique_ptr<OnlineCtcModel> Create(
      const OnlineModelConfig &config);

  template <typename Manager>
  static std::unique_ptr<OnlineCtcModel> Create(
      Manager *mgr, const OnlineModelConfig &config);

  // Return a list of tensors containing the initial states
  virtual std::vector<Ort::Value> GetInitStates() const = 0;

  /** Stack a list of individual states into a batch.
   *
   * It is the inverse operation of `UnStackStates`.
   *
   * @param states states[i] contains the state for the i-th utterance.
   * @return Return a single value representing the batched state.
   */
  virtual std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) const = 0;

  /** Unstack a batch state into a list of individual states.
   *
   * It is the inverse operation of `StackStates`.
   *
   * @param states A batched state.
   * @return ans[i] contains the state for the i-th utterance.
   */
  virtual std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) const = 0;

  /**
   *
   * @param x A 3-D tensor of shape (N, T, C). N has to be 1.
   * @param states  It is from GetInitStates() or returned from this method.
   *
   * @return Return a list of tensors
   *    - ans[0] contains log_probs, of shape (N, T, C)
   *    - ans[1:] contains next_states
   */
  virtual std::vector<Ort::Value> Forward(
      Ort::Value x, std::vector<Ort::Value> states) const = 0;

  /** Return the vocabulary size of the model
   */
  virtual int32_t VocabSize() const = 0;

  /** Return an allocator for allocating memory
   */
  virtual OrtAllocator *Allocator() const = 0;

  // The model accepts this number of frames before subsampling as input
  virtual int32_t ChunkLength() const = 0;

  // Similar to frame_shift in feature extractor, after processing
  // ChunkLength() frames, we advance by ChunkShift() frames
  // before we process the next chunk.
  virtual int32_t ChunkShift() const = 0;

  // Return true if the model supports batch size > 1
  virtual bool SupportBatchProcessing() const { return true; }

  virtual bool UseWhisperFeature() const { return false; }
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-ebranchformer-transducer-model.cc
================================================
// sherpa-onnx/csrc/online-ebranchformer-transducer-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation
//                2025  Brno University of Technology (author: Karel Vesely)

#include "sherpa-onnx/csrc/online-ebranchformer-transducer-model.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <memory>
#include <numeric>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

OnlineEbranchformerTransducerModel::OnlineEbranchformerTransducerModel(
    const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      encoder_sess_opts_(GetSessionOptions(config)),
      decoder_sess_opts_(GetSessionOptions(config, "decoder")),
      joiner_sess_opts_(GetSessionOptions(config, "joiner")),
      config_(config),
      allocator_{} {
  {
    auto buf = ReadFile(config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

template <typename Manager>
OnlineEbranchformerTransducerModel::OnlineEbranchformerTransducerModel(
    Manager *mgr, const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      config_(config),
      encoder_sess_opts_(GetSessionOptions(config)),
      decoder_sess_opts_(GetSessionOptions(config, "decoder")),
      joiner_sess_opts_(GetSessionOptions(config, "joiner")),
      allocator_{} {
  {
    auto buf = ReadFile(mgr, config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

void OnlineEbranchformerTransducerModel::InitEncoder(void *model_data,
                                                     size_t model_data_length) {
  encoder_sess_ = std::make_unique<Ort::Session>(
      env_, model_data, model_data_length, encoder_sess_opts_);

  GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                &encoder_input_names_ptr_);

  GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                 &encoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---encoder---\n";
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

  SHERPA_ONNX_READ_META_DATA(decode_chunk_len_, "decode_chunk_len");
  SHERPA_ONNX_READ_META_DATA(T_, "T");

  SHERPA_ONNX_READ_META_DATA(num_hidden_layers_, "num_hidden_layers");
  SHERPA_ONNX_READ_META_DATA(hidden_size_, "hidden_size");
  SHERPA_ONNX_READ_META_DATA(intermediate_size_, "intermediate_size");
  SHERPA_ONNX_READ_META_DATA(csgu_kernel_size_, "csgu_kernel_size");
  SHERPA_ONNX_READ_META_DATA(merge_conv_kernel_, "merge_conv_kernel");
  SHERPA_ONNX_READ_META_DATA(left_context_len_, "left_context_len");
  SHERPA_ONNX_READ_META_DATA(num_heads_, "num_heads");
  SHERPA_ONNX_READ_META_DATA(head_dim_, "head_dim");

  if (config_.debug) {
#if __OHOS__
    SHERPA_ONNX_LOGE("T: %{public}d", T_);
    SHERPA_ONNX_LOGE("decode_chunk_len_: %{public}d", decode_chunk_len_);

    SHERPA_ONNX_LOGE("num_hidden_layers_: %{public}d", num_hidden_layers_);
    SHERPA_ONNX_LOGE("hidden_size_: %{public}d", hidden_size_);
    SHERPA_ONNX_LOGE("intermediate_size_: %{public}d", intermediate_size_);
    SHERPA_ONNX_LOGE("csgu_kernel_size_: %{public}d", csgu_kernel_size_);
    SHERPA_ONNX_LOGE("merge_conv_kernel_: %{public}d", merge_conv_kernel_);
    SHERPA_ONNX_LOGE("left_context_len_: %{public}d", left_context_len_);
    SHERPA_ONNX_LOGE("num_heads_: %{public}d", num_heads_);
    SHERPA_ONNX_LOGE("head_dim_: %{public}d", head_dim_);
#else
    SHERPA_ONNX_LOGE("T: %d", T_);
    SHERPA_ONNX_LOGE("decode_chunk_len_: %d", decode_chunk_len_);

    SHERPA_ONNX_LOGE("num_hidden_layers_: %d", num_hidden_layers_);
    SHERPA_ONNX_LOGE("hidden_size_: %d", hidden_size_);
    SHERPA_ONNX_LOGE("intermediate_size_: %d", intermediate_size_);
    SHERPA_ONNX_LOGE("csgu_kernel_size_: %d", csgu_kernel_size_);
    SHERPA_ONNX_LOGE("merge_conv_kernel_: %d", merge_conv_kernel_);
    SHERPA_ONNX_LOGE("left_context_len_: %d", left_context_len_);
    SHERPA_ONNX_LOGE("num_heads_: %d", num_heads_);
    SHERPA_ONNX_LOGE("head_dim_: %d", head_dim_);
#endif
  }
}

void OnlineEbranchformerTransducerModel::InitDecoder(void *model_data,
                                                     size_t model_data_length) {
  decoder_sess_ = std::make_unique<Ort::Session>(
      env_, model_data, model_data_length, decoder_sess_opts_);

  GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                &decoder_input_names_ptr_);

  GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                 &decoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = decoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---decoder---\n";
    PrintModelMetadata(os, meta_data);
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
  SHERPA_ONNX_READ_META_DATA(context_size_, "context_size");
}

void OnlineEbranchformerTransducerModel::InitJoiner(void *model_data,
                                                    size_t model_data_length) {
  joiner_sess_ = std::make_unique<Ort::Session>(
      env_, model_data, model_data_length, joiner_sess_opts_);

  GetInputNames(joiner_sess_.get(), &joiner_input_names_,
                &joiner_input_names_ptr_);

  GetOutputNames(joiner_sess_.get(), &joiner_output_names_,
                 &joiner_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = joiner_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---joiner---\n";
    PrintModelMetadata(os, meta_data);
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }
}

std::vector<Ort::Value> OnlineEbranchformerTransducerModel::StackStates(
    const std::vector<std::vector<Ort::Value>> &states) const {
  int32_t batch_size = static_cast<int32_t>(states.size());

  std::vector<const Ort::Value *> buf(batch_size);

  auto allocator =
      const_cast<OnlineEbranchformerTransducerModel *>(this)->allocator_;

  std::vector<Ort::Value> ans;
  int32_t num_states = static_cast<int32_t>(states[0].size());
  ans.reserve(num_states);

  for (int32_t i = 0; i != num_hidden_layers_; ++i) {
    {  // cached_key
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][4 * i];
      }
      auto v = Cat(allocator, buf, /* axis */ 0);
      ans.push_back(std::move(v));
    }
    {  // cached_value
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][4 * i + 1];
      }
      auto v = Cat(allocator, buf, 0);
      ans.push_back(std::move(v));
    }
    {  // cached_conv
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][4 * i + 2];
      }
      auto v = Cat(allocator, buf, 0);
      ans.push_back(std::move(v));
    }
    {  // cached_conv_fusion
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][4 * i + 3];
      }
      auto v = Cat(allocator, buf, 0);
      ans.push_back(std::move(v));
    }
  }

  {  // processed_lens
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_states - 1];
    }
    auto v = Cat<int64_t>(allocator, buf, 0);
    ans.push_back(std::move(v));
  }

  return ans;
}

std::vector<std::vector<Ort::Value>>
OnlineEbranchformerTransducerModel::UnStackStates(
    const std::vector<Ort::Value> &states) const {
  assert(static_cast<int32_t>(states.size()) == num_hidden_layers_ * 4 + 1);

  int32_t batch_size = states[0].GetTensorTypeAndShapeInfo().GetShape()[0];

  auto allocator =
      const_cast<OnlineEbranchformerTransducerModel *>(this)->allocator_;

  std::vector<std::vector<Ort::Value>> ans;
  ans.resize(batch_size);

  for (int32_t i = 0; i != num_hidden_layers_; ++i) {
    {  // cached_key
      auto v = Unbind(allocator, &states[i * 4], /* axis */ 0);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {  // cached_value
      auto v = Unbind(allocator, &states[i * 4 + 1], 0);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {  // cached_conv
      auto v = Unbind(allocator, &states[i * 4 + 2], 0);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {  // cached_conv_fusion
      auto v = Unbind(allocator, &states[i * 4 + 3], 0);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
  }

  {  // processed_lens
    auto v = Unbind<int64_t>(allocator, &states.back(), 0);
    assert(static_cast<int32_t>(v.size()) == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  return ans;
}

std::vector<Ort::Value>
OnlineEbranchformerTransducerModel::GetEncoderInitStates() {
  std::vector<Ort::Value> ans;

  ans.reserve(num_hidden_layers_ * 4 + 1);

  int32_t left_context_conv = csgu_kernel_size_ - 1;
  int32_t channels_conv = intermediate_size_ / 2;

  int32_t left_context_conv_fusion = merge_conv_kernel_ - 1;
  int32_t channels_conv_fusion = 2 * hidden_size_;

  for (int32_t i = 0; i != num_hidden_layers_; ++i) {
    {  // cached_key_{i}
      std::array<int64_t, 4> s{1, num_heads_, left_context_len_, head_dim_};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      ans.push_back(std::move(v));
    }

    {  // cahced_value_{i}
      std::array<int64_t, 4> s{1, num_heads_, left_context_len_, head_dim_};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      ans.push_back(std::move(v));
    }

    {  // cached_conv_{i}
      std::array<int64_t, 3> s{1, channels_conv, left_context_conv};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      ans.push_back(std::move(v));
    }

    {  // cached_conv_fusion_{i}
      std::array<int64_t, 3> s{1, channels_conv_fusion,
                               left_context_conv_fusion};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      ans.push_back(std::move(v));
    }
  }  // num_hidden_layers_

  {  // processed_lens
    std::array<int64_t, 1> s{1};
    auto v = Ort::Value::CreateTensor<int64_t>(allocator_, s.data(), s.size());
    Fill<int64_t>(&v, 0);
    ans.push_back(std::move(v));
  }

  return ans;
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OnlineEbranchformerTransducerModel::RunEncoder(
    Ort::Value features, std::vector<Ort::Value> states,
    Ort::Value /* processed_frames */) {
  std::vector<Ort::Value> encoder_inputs;
  encoder_inputs.reserve(1 + states.size());

  encoder_inputs.push_back(std::move(features));
  for (auto &v : states) {
    encoder_inputs.push_back(std::move(v));
  }

  auto encoder_out = encoder_sess_->Run(
      {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
      encoder_inputs.size(), encoder_output_names_ptr_.data(),
      encoder_output_names_ptr_.size());

  std::vector<Ort::Value> next_states;
  next_states.reserve(states.size());

  for (int32_t i = 1; i != static_cast<int32_t>(encoder_out.size()); ++i) {
    next_states.push_back(std::move(encoder_out[i]));
  }
  return {std::move(encoder_out[0]), std::move(next_states)};
}

Ort::Value OnlineEbranchformerTransducerModel::RunDecoder(
    Ort::Value decoder_input) {
  auto decoder_out = decoder_sess_->Run(
      {}, decoder_input_names_ptr_.data(), &decoder_input, 1,
      decoder_output_names_ptr_.data(), decoder_output_names_ptr_.size());
  return std::move(decoder_out[0]);
}

Ort::Value OnlineEbranchformerTransducerModel::RunJoiner(
    Ort::Value encoder_out, Ort::Value decoder_out) {
  std::array<Ort::Value, 2> joiner_input = {std::move(encoder_out),
                                            std::move(decoder_out)};
  auto logit =
      joiner_sess_->Run({}, joiner_input_names_ptr_.data(), joiner_input.data(),
                        joiner_input.size(), joiner_output_names_ptr_.data(),
                        joiner_output_names_ptr_.size());

  return std::move(logit[0]);
}

#if __ANDROID_API__ >= 9
template OnlineEbranchformerTransducerModel::OnlineEbranchformerTransducerModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineEbranchformerTransducerModel::OnlineEbranchformerTransducerModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-ebranchformer-transducer-model.h
================================================
// sherpa-onnx/csrc/online-ebranchformer-transducer-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
//                2025  Brno University of Technology (author: Karel Vesely)
#ifndef SHERPA_ONNX_CSRC_ONLINE_EBRANCHFORMER_TRANSDUCER_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_EBRANCHFORMER_TRANSDUCER_MODEL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

class OnlineEbranchformerTransducerModel : public OnlineTransducerModel {
 public:
  explicit OnlineEbranchformerTransducerModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineEbranchformerTransducerModel(Manager *mgr,
                                     const OnlineModelConfig &config);

  std::vector<Ort::Value> StackStates(
      const std::vector<std::vector<Ort::Value>> &states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      const std::vector<Ort::Value> &states) const override;

  std::vector<Ort::Value> GetEncoderInitStates() override;

  void SetFeatureDim(int32_t feature_dim) override {
    feature_dim_ = feature_dim;
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states,
      Ort::Value processed_frames) override;

  Ort::Value RunDecoder(Ort::Value decoder_input) override;

  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) override;

  int32_t ContextSize() const override { return context_size_; }

  int32_t ChunkSize() const override { return T_; }

  int32_t ChunkShift() const override { return decode_chunk_len_; }

  int32_t VocabSize() const override { return vocab_size_; }
  OrtAllocator *Allocator() override { return allocator_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length);
  void InitDecoder(void *model_data, size_t model_data_length);
  void InitJoiner(void *model_data, size_t model_data_length);

 private:
  Ort::Env env_;
  Ort::SessionOptions encoder_sess_opts_;
  Ort::SessionOptions decoder_sess_opts_;
  Ort::SessionOptions joiner_sess_opts_;

  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;
  std::unique_ptr<Ort::Session> joiner_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<std::string> joiner_input_names_;
  std::vector<const char *> joiner_input_names_ptr_;

  std::vector<std::string> joiner_output_names_;
  std::vector<const char *> joiner_output_names_ptr_;

  OnlineModelConfig config_;

  int32_t decode_chunk_len_ = 0;
  int32_t T_ = 0;

  int32_t num_hidden_layers_ = 0;
  int32_t hidden_size_ = 0;
  int32_t intermediate_size_ = 0;
  int32_t csgu_kernel_size_ = 0;
  int32_t merge_conv_kernel_ = 0;
  int32_t left_context_len_ = 0;
  int32_t num_heads_ = 0;
  int32_t head_dim_ = 0;

  int32_t context_size_ = 0;
  int32_t vocab_size_ = 0;
  int32_t feature_dim_ = 80;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_EBRANCHFORMER_TRANSDUCER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-lm-config.cc
================================================
// sherpa-onnx/csrc/online-lm-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-lm-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlineLMConfig::Register(ParseOptions *po) {
  po->Register("lm", &model, "Path to LM model.");
  po->Register("lm-scale", &scale, "LM scale.");
  po->Register("lm-num-threads", &lm_num_threads,
               "Number of threads to run the neural network of LM model");
  po->Register("lm-provider", &lm_provider,
               "Specify a provider to LM model use: cpu, cuda, coreml");
  po->Register("lm-shallow-fusion", &shallow_fusion,
               "Boolean whether to use shallow fusion or rescore.");
  po->Register("lodr-fst", &lodr_fst, "Path to LODR FST model.");
  po->Register("lodr-scale", &lodr_scale, "LODR scale.");
  po->Register("lodr-backoff-id", &lodr_backoff_id,
               "ID of the backoff in the LODR FST. -1 means autodetect");
}

bool OnlineLMConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("'%s' does not exist", model.c_str());
    return false;
  }

  if (!lodr_fst.empty() && !FileExists(lodr_fst)) {
    SHERPA_ONNX_LOGE("'%s' does not exist", lodr_fst.c_str());
    return false;
  }

  return true;
}

std::string OnlineLMConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineLMConfig(";
  os << "model=\"" << model << "\", ";
  os << "scale=" << scale << ", ";
  os << "lodr_scale=" << lodr_scale << ", ";
  os << "lodr_fst=\"" << lodr_fst << "\", ";
  os << "lodr_backoff_id=" << lodr_backoff_id << ", ";
  os << "shallow_fusion=" << (shallow_fusion ? "True" : "False") << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-lm-config.h
================================================
// sherpa-onnx/csrc/online-lm-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_LM_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_LM_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineLMConfig {
  // path to the onnx model
  std::string model;

  // LM scale
  float scale = 0.5;
  int32_t lm_num_threads = 1;
  std::string lm_provider = "cpu";
  std::string lodr_fst;
  float lodr_scale = 0.01;
  int32_t lodr_backoff_id = -1;  // -1 means not set
  // enable shallow fusion
  bool shallow_fusion = true;

  OnlineLMConfig() = default;

  OnlineLMConfig(const std::string &model, float scale, int32_t lm_num_threads,
                 const std::string &lm_provider, bool shallow_fusion,
                 const std::string &lodr_fst, float lodr_scale,
                 int32_t lodr_backoff_id)
      : model(model),
        scale(scale),
        lm_num_threads(lm_num_threads),
        lm_provider(lm_provider),
        shallow_fusion(shallow_fusion),
        lodr_fst(lodr_fst),
        lodr_scale(lodr_scale),
        lodr_backoff_id(lodr_backoff_id) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_LM_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-lm.cc
================================================
// sherpa-onnx/csrc/online-lm.cc
//
// Copyright (c)  2023  Pingfeng Luo
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-lm.h"

#include <algorithm>
#include <memory>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/online-rnn-lm.h"

namespace sherpa_onnx {

std::unique_ptr<OnlineLM> OnlineLM::Create(const OnlineLMConfig &config) {
  return std::make_unique<OnlineRnnLM>(config);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-lm.h
================================================
// sherpa-onnx/csrc/online-lm.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_LM_H_
#define SHERPA_ONNX_CSRC_ONLINE_LM_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/online-lm-config.h"

namespace sherpa_onnx {

class OnlineLM {
 public:
  virtual ~OnlineLM() = default;

  static std::unique_ptr<OnlineLM> Create(const OnlineLMConfig &config);

  // init states for classic rescore
  virtual std::vector<Ort::Value> GetInitStates() = 0;

  // init states for shallow fusion
  virtual std::pair<Ort::Value, std::vector<Ort::Value>> GetInitStatesSF() = 0;

   /** ScoreToken a batch of sentences (shallow fusion).
   *
   * @param x A 2-D tensor of shape (N, 1) with data type int64.
   * @param states It contains the states for the LM model
   * @return Return a pair containing
   *          - log_prob of NN LM
   *          - updated states
   *
   */
  virtual std::pair<Ort::Value, std::vector<Ort::Value>> ScoreToken(
      Ort::Value x, std::vector<Ort::Value> states) = 0;

  /** This function updates hyp.lm_log_prob of hyps (classic rescore).
   *
   * @param scale LM score
   * @param context_size Context size of the transducer decoder model
   * @param hyps It is changed in-place.
   *
   */
  virtual void ComputeLMScore(float scale, int32_t context_size,
                      std::vector<Hypotheses> *hyps) = 0;

  /** This function updates lm_log_prob and nn_lm_scores of hyp (shallow fusion).
   *
   * @param scale LM score
   * @param hyps It is changed in-place.
   *
   */
  virtual void ComputeLMScoreSF(float scale, Hypothesis *hyp) = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_LM_H_


================================================
FILE: sherpa-onnx/csrc/online-lstm-transducer-model.cc
================================================
// sherpa-onnx/csrc/online-lstm-transducer-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation
#include "sherpa-onnx/csrc/online-lstm-transducer-model.h"

#include <algorithm>
#include <cassert>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

OnlineLstmTransducerModel::OnlineLstmTransducerModel(
    const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      config_(config),
      sess_opts_(GetSessionOptions(config)),
      allocator_{} {
  {
    auto buf = ReadFile(config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

template <typename Manager>
OnlineLstmTransducerModel::OnlineLstmTransducerModel(
    Manager *mgr, const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      config_(config),
      sess_opts_(GetSessionOptions(config)),
      allocator_{} {
  {
    auto buf = ReadFile(mgr, config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

void OnlineLstmTransducerModel::InitEncoder(void *model_data,
                                            size_t model_data_length) {
  encoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                 model_data_length, sess_opts_);

  GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                &encoder_input_names_ptr_);

  GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                 &encoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---encoder---\n";
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA(num_encoder_layers_, "num_encoder_layers");
  SHERPA_ONNX_READ_META_DATA(T_, "T");
  SHERPA_ONNX_READ_META_DATA(decode_chunk_len_, "decode_chunk_len");
  SHERPA_ONNX_READ_META_DATA(rnn_hidden_size_, "rnn_hidden_size");
  SHERPA_ONNX_READ_META_DATA(d_model_, "d_model");
}

void OnlineLstmTransducerModel::InitDecoder(void *model_data,
                                            size_t model_data_length) {
  decoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                 model_data_length, sess_opts_);

  GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                &decoder_input_names_ptr_);

  GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                 &decoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = decoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---decoder---\n";
    PrintModelMetadata(os, meta_data);
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
  SHERPA_ONNX_READ_META_DATA(context_size_, "context_size");
}

void OnlineLstmTransducerModel::InitJoiner(void *model_data,
                                           size_t model_data_length) {
  joiner_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                model_data_length, sess_opts_);

  GetInputNames(joiner_sess_.get(), &joiner_input_names_,
                &joiner_input_names_ptr_);

  GetOutputNames(joiner_sess_.get(), &joiner_output_names_,
                 &joiner_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = joiner_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---joiner---\n";
    PrintModelMetadata(os, meta_data);
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }
}

std::vector<Ort::Value> OnlineLstmTransducerModel::StackStates(
    const std::vector<std::vector<Ort::Value>> &states) const {
  int32_t batch_size = static_cast<int32_t>(states.size());

  std::vector<const Ort::Value *> h_buf(batch_size);
  std::vector<const Ort::Value *> c_buf(batch_size);

  for (int32_t i = 0; i != batch_size; ++i) {
    assert(states[i].size() == 2);
    h_buf[i] = &states[i][0];
    c_buf[i] = &states[i][1];
  }
  auto allocator = const_cast<OnlineLstmTransducerModel *>(this)->allocator_;

  Ort::Value h = Cat(allocator, h_buf, 1);
  Ort::Value c = Cat(allocator, c_buf, 1);

  std::vector<Ort::Value> ans;
  ans.reserve(2);
  ans.push_back(std::move(h));
  ans.push_back(std::move(c));

  return ans;
}

std::vector<std::vector<Ort::Value>> OnlineLstmTransducerModel::UnStackStates(
    const std::vector<Ort::Value> &states) const {
  int32_t batch_size = states[0].GetTensorTypeAndShapeInfo().GetShape()[1];
  assert(states.size() == 2);

  std::vector<std::vector<Ort::Value>> ans(batch_size);

  auto allocator = const_cast<OnlineLstmTransducerModel *>(this)->allocator_;

  std::vector<Ort::Value> h_vec = Unbind(allocator, &states[0], 1);
  std::vector<Ort::Value> c_vec = Unbind(allocator, &states[1], 1);

  assert(h_vec.size() == batch_size);
  assert(c_vec.size() == batch_size);

  for (int32_t i = 0; i != batch_size; ++i) {
    ans[i].push_back(std::move(h_vec[i]));
    ans[i].push_back(std::move(c_vec[i]));
  }

  return ans;
}

std::vector<Ort::Value> OnlineLstmTransducerModel::GetEncoderInitStates() {
  // Please see
  // https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py#L185
  // for details
  constexpr int32_t kBatchSize = 1;
  std::array<int64_t, 3> h_shape{num_encoder_layers_, kBatchSize, d_model_};
  Ort::Value h = Ort::Value::CreateTensor<float>(allocator_, h_shape.data(),
                                                 h_shape.size());

  Fill<float>(&h, 0);

  std::array<int64_t, 3> c_shape{num_encoder_layers_, kBatchSize,
                                 rnn_hidden_size_};

  Ort::Value c = Ort::Value::CreateTensor<float>(allocator_, c_shape.data(),
                                                 c_shape.size());

  Fill<float>(&c, 0);

  std::vector<Ort::Value> states;

  states.reserve(2);
  states.push_back(std::move(h));
  states.push_back(std::move(c));

  return states;
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OnlineLstmTransducerModel::RunEncoder(Ort::Value features,
                                      std::vector<Ort::Value> states,
                                      Ort::Value /* processed_frames */) {
  std::array<Ort::Value, 3> encoder_inputs = {
      std::move(features), std::move(states[0]), std::move(states[1])};

  auto encoder_out = encoder_sess_->Run(
      {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
      encoder_inputs.size(), encoder_output_names_ptr_.data(),
      encoder_output_names_ptr_.size());

  std::vector<Ort::Value> next_states;
  next_states.reserve(2);
  next_states.push_back(std::move(encoder_out[1]));
  next_states.push_back(std::move(encoder_out[2]));

  return {std::move(encoder_out[0]), std::move(next_states)};
}

Ort::Value OnlineLstmTransducerModel::RunDecoder(Ort::Value decoder_input) {
  auto decoder_out = decoder_sess_->Run(
      {}, decoder_input_names_ptr_.data(), &decoder_input, 1,
      decoder_output_names_ptr_.data(), decoder_output_names_ptr_.size());
  return std::move(decoder_out[0]);
}

Ort::Value OnlineLstmTransducerModel::RunJoiner(Ort::Value encoder_out,
                                                Ort::Value decoder_out) {
  std::array<Ort::Value, 2> joiner_input = {std::move(encoder_out),
                                            std::move(decoder_out)};
  auto logit =
      joiner_sess_->Run({}, joiner_input_names_ptr_.data(), joiner_input.data(),
                        joiner_input.size(), joiner_output_names_ptr_.data(),
                        joiner_output_names_ptr_.size());

  return std::move(logit[0]);
}

#if __ANDROID_API__ >= 9
template OnlineLstmTransducerModel::OnlineLstmTransducerModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineLstmTransducerModel::OnlineLstmTransducerModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-lstm-transducer-model.h
================================================
// sherpa-onnx/csrc/online-lstm-transducer-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_LSTM_TRANSDUCER_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_LSTM_TRANSDUCER_MODEL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

class OnlineLstmTransducerModel : public OnlineTransducerModel {
 public:
  explicit OnlineLstmTransducerModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineLstmTransducerModel(Manager *mgr, const OnlineModelConfig &config);

  std::vector<Ort::Value> StackStates(
      const std::vector<std::vector<Ort::Value>> &states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      const std::vector<Ort::Value> &states) const override;

  std::vector<Ort::Value> GetEncoderInitStates() override;

  std::pair<Ort::Value, std::vector<Ort::Value>> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states,
      Ort::Value processed_frames) override;

  Ort::Value RunDecoder(Ort::Value decoder_input) override;

  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) override;

  int32_t ContextSize() const override { return context_size_; }

  int32_t ChunkSize() const override { return T_; }

  int32_t ChunkShift() const override { return decode_chunk_len_; }

  int32_t VocabSize() const override { return vocab_size_; }
  OrtAllocator *Allocator() override { return allocator_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length);
  void InitDecoder(void *model_data, size_t model_data_length);
  void InitJoiner(void *model_data, size_t model_data_length);

 private:
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;
  std::unique_ptr<Ort::Session> joiner_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<std::string> joiner_input_names_;
  std::vector<const char *> joiner_input_names_ptr_;

  std::vector<std::string> joiner_output_names_;
  std::vector<const char *> joiner_output_names_ptr_;

  OnlineModelConfig config_;

  int32_t num_encoder_layers_ = 0;
  int32_t T_ = 0;
  int32_t decode_chunk_len_ = 0;
  int32_t rnn_hidden_size_ = 0;
  int32_t d_model_ = 0;
  int32_t context_size_ = 0;
  int32_t vocab_size_ = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_LSTM_TRANSDUCER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-model-config.cc
================================================
// sherpa-onnx/csrc/online-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation
#include "sherpa-onnx/csrc/online-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void OnlineModelConfig::Register(ParseOptions *po) {
  transducer.Register(po);
  paraformer.Register(po);
  wenet_ctc.Register(po);
  zipformer2_ctc.Register(po);
  nemo_ctc.Register(po);
  t_one_ctc.Register(po);
  provider_config.Register(po);

  po->Register("tokens", &tokens, "Path to tokens.txt");

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("warm-up", &warm_up,
               "Number of warm-up to run the onnxruntime"
               "Valid vales are: zipformer2");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("modeling-unit", &modeling_unit,
               "The modeling unit of the model, commonly used units are bpe, "
               "cjkchar, cjkchar+bpe, etc. Currently, it is needed only when "
               "hotwords are provided, we need it to encode the hotwords into "
               "token sequence.");

  po->Register("bpe-vocab", &bpe_vocab,
               "The vocabulary generated by google's sentencepiece program. "
               "It is a file has two columns, one is the token, the other is "
               "the log probability, you can get it from the directory where "
               "your bpe model is generated. Only used when hotwords provided "
               "and the modeling unit is bpe or cjkchar+bpe");

  po->Register("model-type", &model_type,
               "Specify it to reduce model initialization time. "
               "Valid values are: conformer, lstm, zipformer, zipformer2, "
               "wenet_ctc, nemo_ctc. "
               "All other values lead to loading the model twice.");
}

bool OnlineModelConfig::Validate() const {
  // For RK NPU, we reinterpret num_threads:
  //
  // For RK3588 only
  // num_threads == 1 -> Select a core randomly
  // num_threads == 0 -> Use NPU core 0
  // num_threads == -1 -> Use NPU core 1
  // num_threads == -2 -> Use NPU core 2
  // num_threads == -3 -> Use NPU core 0 and core 1
  // num_threads == -4 -> Use NPU core 0, core 1, and core 2
  if (provider_config.provider != "rknn") {
    if (num_threads < 1) {
      SHERPA_ONNX_LOGE("num_threads should be > 0. Given %d", num_threads);
      return false;
    }
    if (!transducer.encoder.empty() && (EndsWith(transducer.encoder, ".rknn") ||
                                        EndsWith(transducer.decoder, ".rknn") ||
                                        EndsWith(transducer.joiner, ".rknn"))) {
      SHERPA_ONNX_LOGE(
          "--provider is %s, which is not rknn, but you pass rknn model "
          "filenames. encoder: '%s', decoder: '%s', joiner: '%s'",
          provider_config.provider.c_str(), transducer.encoder.c_str(),
          transducer.decoder.c_str(), transducer.joiner.c_str());
      return false;
    }

    if (!zipformer2_ctc.model.empty() &&
        EndsWith(zipformer2_ctc.model, ".rknn")) {
      SHERPA_ONNX_LOGE(
          "--provider is %s, which is not rknn, but you pass rknn model "
          "filename for zipformer2_ctc: '%s'",
          provider_config.provider.c_str(), zipformer2_ctc.model.c_str());
      return false;
    }
  }

  if (provider_config.provider == "rknn") {
    if (!transducer.encoder.empty() && (EndsWith(transducer.encoder, ".onnx") ||
                                        EndsWith(transducer.decoder, ".onnx") ||
                                        EndsWith(transducer.joiner, ".onnx"))) {
      SHERPA_ONNX_LOGE(
          "--provider is rknn, but you pass onnx model "
          "filenames. encoder: '%s', decoder: '%s', joiner: '%s'",
          transducer.encoder.c_str(), transducer.decoder.c_str(),
          transducer.joiner.c_str());
      return false;
    }

    if (!zipformer2_ctc.model.empty() &&
        EndsWith(zipformer2_ctc.model, ".onnx")) {
      SHERPA_ONNX_LOGE(
          "--provider rknn, but you pass onnx model filename for "
          "zipformer2_ctc: '%s'",
          zipformer2_ctc.model.c_str());
      return false;
    }
  }

  if (!tokens_buf.empty() && FileExists(tokens)) {
    SHERPA_ONNX_LOGE(
        "you can not provide a tokens_buf and a tokens file: '%s', "
        "at the same time, which is confusing",
        tokens.c_str());
    return false;
  }

  if (tokens_buf.empty() && !FileExists(tokens)) {
    SHERPA_ONNX_LOGE(
        "tokens: '%s' does not exist, you should provide "
        "either a tokens buffer or a tokens file",
        tokens.c_str());
    return false;
  }

  if (!modeling_unit.empty() &&
      (modeling_unit == "bpe" || modeling_unit == "cjkchar+bpe")) {
    if (!FileExists(bpe_vocab)) {
      SHERPA_ONNX_LOGE("bpe_vocab: '%s' does not exist", bpe_vocab.c_str());
      return false;
    }
  }

  if (!provider_config.Validate()) {
    return false;
  }

  if (!paraformer.encoder.empty()) {
    return paraformer.Validate();
  }

  if (!wenet_ctc.model.empty()) {
    return wenet_ctc.Validate();
  }

  if (!zipformer2_ctc.model.empty()) {
    return zipformer2_ctc.Validate();
  }

  if (!nemo_ctc.model.empty()) {
    return nemo_ctc.Validate();
  }

  if (!t_one_ctc.model.empty()) {
    return t_one_ctc.Validate();
  }

  return transducer.Validate();
}

std::string OnlineModelConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineModelConfig(";
  os << "transducer=" << transducer.ToString() << ", ";
  os << "paraformer=" << paraformer.ToString() << ", ";
  os << "wenet_ctc=" << wenet_ctc.ToString() << ", ";
  os << "zipformer2_ctc=" << zipformer2_ctc.ToString() << ", ";
  os << "nemo_ctc=" << nemo_ctc.ToString() << ", ";
  os << "t_one_ctc=" << t_one_ctc.ToString() << ", ";
  os << "provider_config=" << provider_config.ToString() << ", ";
  os << "tokens=\"" << tokens << "\", ";
  os << "num_threads=" << num_threads << ", ";
  os << "warm_up=" << warm_up << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "model_type=\"" << model_type << "\", ";
  os << "modeling_unit=\"" << modeling_unit << "\", ";
  os << "bpe_vocab=\"" << bpe_vocab << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-model-config.h
================================================
// sherpa-onnx/csrc/online-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/online-nemo-ctc-model-config.h"
#include "sherpa-onnx/csrc/online-paraformer-model-config.h"
#include "sherpa-onnx/csrc/online-t-one-ctc-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-model-config.h"
#include "sherpa-onnx/csrc/online-wenet-ctc-model-config.h"
#include "sherpa-onnx/csrc/online-zipformer2-ctc-model-config.h"
#include "sherpa-onnx/csrc/provider-config.h"

namespace sherpa_onnx {

struct OnlineModelConfig {
  OnlineTransducerModelConfig transducer;
  OnlineParaformerModelConfig paraformer;
  OnlineWenetCtcModelConfig wenet_ctc;
  OnlineZipformer2CtcModelConfig zipformer2_ctc;
  OnlineNeMoCtcModelConfig nemo_ctc;
  OnlineToneCtcModelConfig t_one_ctc;
  ProviderConfig provider_config;
  std::string tokens;
  int32_t num_threads = 1;
  int32_t warm_up = 0;
  bool debug = false;

  // Valid values:
  //  - conformer, conformer transducer from icefall
  //  - lstm, lstm transducer from icefall
  //  - zipformer, zipformer transducer from icefall
  //  - zipformer2, zipformer2 transducer or CTC from icefall
  //  - wenet_ctc, wenet CTC model
  //  - nemo_ctc, NeMo CTC model
  //
  // All other values are invalid and lead to loading the model twice.
  std::string model_type;

  // Valid values:
  //  - cjkchar
  //  - bpe
  //  - cjkchar+bpe
  std::string modeling_unit = "cjkchar";
  std::string bpe_vocab;

  /// if tokens_buf is non-empty,
  /// the tokens will be loaded from the buffer instead of from the
  /// "tokens" file
  std::string tokens_buf;

  OnlineModelConfig() = default;
  OnlineModelConfig(const OnlineTransducerModelConfig &transducer,
                    const OnlineParaformerModelConfig &paraformer,
                    const OnlineWenetCtcModelConfig &wenet_ctc,
                    const OnlineZipformer2CtcModelConfig &zipformer2_ctc,
                    const OnlineNeMoCtcModelConfig &nemo_ctc,
                    const OnlineToneCtcModelConfig &t_one_ctc,
                    const ProviderConfig &provider_config,
                    const std::string &tokens, int32_t num_threads,
                    int32_t warm_up, bool debug, const std::string &model_type,
                    const std::string &modeling_unit,
                    const std::string &bpe_vocab)
      : transducer(transducer),
        paraformer(paraformer),
        wenet_ctc(wenet_ctc),
        zipformer2_ctc(zipformer2_ctc),
        nemo_ctc(nemo_ctc),
        t_one_ctc(t_one_ctc),
        provider_config(provider_config),
        tokens(tokens),
        num_threads(num_threads),
        warm_up(warm_up),
        debug(debug),
        model_type(model_type),
        modeling_unit(modeling_unit),
        bpe_vocab(bpe_vocab) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-nemo-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/online-nemo-ctc-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-nemo-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlineNeMoCtcModelConfig::Register(ParseOptions *po) {
  po->Register("nemo-ctc-model", &model,
               "Path to CTC model.onnx from NeMo. Please see "
               "https://github.com/k2-fsa/sherpa-onnx/pull/843");
}

bool OnlineNeMoCtcModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("NeMo CTC model '%s' does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OnlineNeMoCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineNeMoCtcModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-nemo-ctc-model-config.h
================================================
// sherpa-onnx/csrc/online-nemo-ctc-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_NEMO_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_NEMO_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineNeMoCtcModelConfig {
  std::string model;

  OnlineNeMoCtcModelConfig() = default;

  explicit OnlineNeMoCtcModelConfig(const std::string &model) : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_NEMO_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-nemo-ctc-model.cc
================================================
// sherpa-onnx/csrc/online-nemo-ctc-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-nemo-ctc-model.h"

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

class OnlineNeMoCtcModel::Impl {
 public:
  explicit Impl(const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.nemo_ctc.model);
      Init(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.nemo_ctc.model);
      Init(buf.data(), buf.size());
    }
  }

  std::vector<Ort::Value> Forward(Ort::Value x,
                                  std::vector<Ort::Value> states) {
    Ort::Value &cache_last_channel = states[0];
    Ort::Value &cache_last_time = states[1];
    Ort::Value &cache_last_channel_len = states[2];

    int32_t batch_size = x.GetTensorTypeAndShapeInfo().GetShape()[0];

    std::array<int64_t, 1> length_shape{batch_size};

    Ort::Value length = Ort::Value::CreateTensor<int64_t>(
        allocator_, length_shape.data(), length_shape.size());

    int64_t *p_length = length.GetTensorMutableData<int64_t>();

    std::fill(p_length, p_length + batch_size, ChunkLength());

    // (B, T, C) -> (B, C, T)
    x = Transpose12(allocator_, &x);

    std::array<Ort::Value, 5> inputs = {
        std::move(x), View(&length), std::move(cache_last_channel),
        std::move(cache_last_time), std::move(cache_last_channel_len)};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());
    // out[0]: logit
    // out[1] logit_length
    // out[2:] states_next
    //
    // we need to remove out[1]

    std::vector<Ort::Value> ans;
    ans.reserve(out.size() - 1);

    for (int32_t i = 0; i != out.size(); ++i) {
      if (i == 1) {
        continue;
      }

      ans.push_back(std::move(out[i]));
    }

    return ans;
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t ChunkLength() const { return window_size_; }

  int32_t ChunkShift() const { return chunk_shift_; }

  OrtAllocator *Allocator() { return allocator_; }

  // Return a vector containing 3 tensors
  // - cache_last_channel
  // - cache_last_time_
  // - cache_last_channel_len
  std::vector<Ort::Value> GetInitStates() {
    std::vector<Ort::Value> ans;
    ans.reserve(3);
    ans.push_back(View(&cache_last_channel_));
    ans.push_back(View(&cache_last_time_));
    ans.push_back(View(&cache_last_channel_len_));

    return ans;
  }

  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) {
    int32_t batch_size = static_cast<int32_t>(states.size());
    if (batch_size == 1) {
      return std::move(states[0]);
    }

    std::vector<Ort::Value> ans;

    // stack cache_last_channel
    std::vector<const Ort::Value *> buf(batch_size);

    // there are 3 states to be stacked
    for (int32_t i = 0; i != 3; ++i) {
      buf.clear();
      buf.reserve(batch_size);

      for (int32_t b = 0; b != batch_size; ++b) {
        assert(states[b].size() == 3);
        buf.push_back(&states[b][i]);
      }

      Ort::Value c{nullptr};
      if (i == 2) {
        c = Cat<int64_t>(allocator_, buf, 0);
      } else {
        c = Cat(allocator_, buf, 0);
      }

      ans.push_back(std::move(c));
    }

    return ans;
  }

  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) const {
    assert(states.size() == 3);

    auto allocator = const_cast<Impl *>(this)->allocator_;

    std::vector<std::vector<Ort::Value>> ans;

    auto shape = states[0].GetTensorTypeAndShapeInfo().GetShape();
    int32_t batch_size = shape[0];
    ans.resize(batch_size);

    if (batch_size == 1) {
      ans[0] = std::move(states);
      return ans;
    }

    for (int32_t i = 0; i != 3; ++i) {
      std::vector<Ort::Value> v;
      if (i == 2) {
        v = Unbind<int64_t>(allocator, &states[i], 0);
      } else {
        v = Unbind(allocator, &states[i], 0);
      }

      assert(v.size() == batch_size);

      for (int32_t b = 0; b != batch_size; ++b) {
        ans[b].push_back(std::move(v[b]));
      }
    }

    return ans;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(window_size_, "window_size");
    SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift");
    SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
    SHERPA_ONNX_READ_META_DATA(cache_last_channel_dim1_,
                               "cache_last_channel_dim1");
    SHERPA_ONNX_READ_META_DATA(cache_last_channel_dim2_,
                               "cache_last_channel_dim2");
    SHERPA_ONNX_READ_META_DATA(cache_last_channel_dim3_,
                               "cache_last_channel_dim3");
    SHERPA_ONNX_READ_META_DATA(cache_last_time_dim1_, "cache_last_time_dim1");
    SHERPA_ONNX_READ_META_DATA(cache_last_time_dim2_, "cache_last_time_dim2");
    SHERPA_ONNX_READ_META_DATA(cache_last_time_dim3_, "cache_last_time_dim3");

    // need to increase by 1 since the blank token is not included in computing
    // vocab_size in NeMo.
    vocab_size_ += 1;

    InitStates();
  }

  void InitStates() {
    std::array<int64_t, 4> cache_last_channel_shape{1, cache_last_channel_dim1_,
                                                    cache_last_channel_dim2_,
                                                    cache_last_channel_dim3_};

    cache_last_channel_ = Ort::Value::CreateTensor<float>(
        allocator_, cache_last_channel_shape.data(),
        cache_last_channel_shape.size());

    Fill<float>(&cache_last_channel_, 0);

    std::array<int64_t, 4> cache_last_time_shape{
        1, cache_last_time_dim1_, cache_last_time_dim2_, cache_last_time_dim3_};

    cache_last_time_ = Ort::Value::CreateTensor<float>(
        allocator_, cache_last_time_shape.data(), cache_last_time_shape.size());

    Fill<float>(&cache_last_time_, 0);

    int64_t shape = 1;
    cache_last_channel_len_ =
        Ort::Value::CreateTensor<int64_t>(allocator_, &shape, 1);

    cache_last_channel_len_.GetTensorMutableData<int64_t>()[0] = 0;
  }

 private:
  OnlineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t window_size_ = 0;
  int32_t chunk_shift_ = 0;
  int32_t subsampling_factor_ = 0;
  int32_t vocab_size_ = 0;
  int32_t cache_last_channel_dim1_ = 0;
  int32_t cache_last_channel_dim2_ = 0;
  int32_t cache_last_channel_dim3_ = 0;
  int32_t cache_last_time_dim1_ = 0;
  int32_t cache_last_time_dim2_ = 0;
  int32_t cache_last_time_dim3_ = 0;

  Ort::Value cache_last_channel_{nullptr};
  Ort::Value cache_last_time_{nullptr};
  Ort::Value cache_last_channel_len_{nullptr};
};

OnlineNeMoCtcModel::OnlineNeMoCtcModel(const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineNeMoCtcModel::OnlineNeMoCtcModel(Manager *mgr,
                                       const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OnlineNeMoCtcModel::~OnlineNeMoCtcModel() = default;

std::vector<Ort::Value> OnlineNeMoCtcModel::Forward(
    Ort::Value x, std::vector<Ort::Value> states) const {
  return impl_->Forward(std::move(x), std::move(states));
}

int32_t OnlineNeMoCtcModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OnlineNeMoCtcModel::ChunkLength() const { return impl_->ChunkLength(); }

int32_t OnlineNeMoCtcModel::ChunkShift() const { return impl_->ChunkShift(); }

OrtAllocator *OnlineNeMoCtcModel::Allocator() const {
  return impl_->Allocator();
}

std::vector<Ort::Value> OnlineNeMoCtcModel::GetInitStates() const {
  return impl_->GetInitStates();
}

std::vector<Ort::Value> OnlineNeMoCtcModel::StackStates(
    std::vector<std::vector<Ort::Value>> states) const {
  return impl_->StackStates(std::move(states));
}

std::vector<std::vector<Ort::Value>> OnlineNeMoCtcModel::UnStackStates(
    std::vector<Ort::Value> states) const {
  return impl_->UnStackStates(std::move(states));
}

#if __ANDROID_API__ >= 9
template OnlineNeMoCtcModel::OnlineNeMoCtcModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineNeMoCtcModel::OnlineNeMoCtcModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-nemo-ctc-model.h
================================================
// sherpa-onnx/csrc/online-nemo-ctc-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_NEMO_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_NEMO_CTC_MODEL_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-ctc-model.h"
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

class OnlineNeMoCtcModel : public OnlineCtcModel {
 public:
  explicit OnlineNeMoCtcModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineNeMoCtcModel(Manager *mgr, const OnlineModelConfig &config);

  ~OnlineNeMoCtcModel() override;

  // A list of 3 tensors:
  //  - cache_last_channel
  //  - cache_last_time
  //  - cache_last_channel_len
  std::vector<Ort::Value> GetInitStates() const override;

  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) const override;

  /**
   *
   * @param x A 3-D tensor of shape (N, T, C). N has to be 1.
   * @param states  It is from GetInitStates() or returned from this method.
   *
   * @return Return a list of tensors
   *    - ans[0] contains log_probs, of shape (N, T, C)
   *    - ans[1:] contains next_states
   */
  std::vector<Ort::Value> Forward(
      Ort::Value x, std::vector<Ort::Value> states) const override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  // The model accepts this number of frames before subsampling as input
  int32_t ChunkLength() const override;

  // Similar to frame_shift in feature extractor, after processing
  // ChunkLength() frames, we advance by ChunkShift() frames
  // before we process the next chunk.
  int32_t ChunkShift() const override;

  bool SupportBatchProcessing() const override { return true; }

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_NEMO_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-paraformer-decoder.h
================================================
// sherpa-onnx/csrc/online-paraformer-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_DECODER_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

struct OnlineParaformerDecoderResult {
  /// The decoded token IDs
  std::vector<int32_t> tokens;

  int32_t last_non_blank_frame_index = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/online-paraformer-model-config.cc
================================================
// sherpa-onnx/csrc/online-paraformer-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-paraformer-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlineParaformerModelConfig::Register(ParseOptions *po) {
  po->Register("paraformer-encoder", &encoder,
               "Path to encoder.onnx of paraformer.");
  po->Register("paraformer-decoder", &decoder,
               "Path to decoder.onnx of paraformer.");
}

bool OnlineParaformerModelConfig::Validate() const {
  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("Paraformer encoder '%s' does not exist", encoder.c_str());
    return false;
  }

  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("Paraformer decoder '%s' does not exist", decoder.c_str());
    return false;
  }

  return true;
}

std::string OnlineParaformerModelConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineParaformerModelConfig(";
  os << "encoder=\"" << encoder << "\", ";
  os << "decoder=\"" << decoder << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-paraformer-model-config.h
================================================
// sherpa-onnx/csrc/online-paraformer-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineParaformerModelConfig {
  std::string encoder;
  std::string decoder;

  OnlineParaformerModelConfig() = default;

  OnlineParaformerModelConfig(const std::string &encoder,
                              const std::string &decoder)
      : encoder(encoder), decoder(decoder) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-paraformer-model.cc
================================================
// sherpa-onnx/csrc/online-paraformer-model.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-paraformer-model.h"

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OnlineParaformerModel::Impl {
 public:
  explicit Impl(const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.paraformer.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.paraformer.decoder);
      InitDecoder(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.paraformer.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.paraformer.decoder);
      InitDecoder(buf.data(), buf.size());
    }
  }

  std::vector<Ort::Value> ForwardEncoder(Ort::Value features,
                                         Ort::Value features_length) {
    std::array<Ort::Value, 2> inputs = {std::move(features),
                                        std::move(features_length)};

    return encoder_sess_->Run(
        {}, encoder_input_names_ptr_.data(), inputs.data(), inputs.size(),
        encoder_output_names_ptr_.data(), encoder_output_names_ptr_.size());
  }

  std::vector<Ort::Value> ForwardDecoder(Ort::Value encoder_out,
                                         Ort::Value encoder_out_length,
                                         Ort::Value acoustic_embedding,
                                         Ort::Value acoustic_embedding_length,
                                         std::vector<Ort::Value> states) {
    std::vector<Ort::Value> decoder_inputs;
    decoder_inputs.reserve(4 + states.size());

    decoder_inputs.push_back(std::move(encoder_out));
    decoder_inputs.push_back(std::move(encoder_out_length));
    decoder_inputs.push_back(std::move(acoustic_embedding));
    decoder_inputs.push_back(std::move(acoustic_embedding_length));

    for (auto &v : states) {
      decoder_inputs.push_back(std::move(v));
    }

    return decoder_sess_->Run({}, decoder_input_names_ptr_.data(),
                              decoder_inputs.data(), decoder_inputs.size(),
                              decoder_output_names_ptr_.data(),
                              decoder_output_names_ptr_.size());
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t LfrWindowSize() const { return lfr_window_size_; }

  int32_t LfrWindowShift() const { return lfr_window_shift_; }

  int32_t EncoderOutputSize() const { return encoder_output_size_; }

  int32_t DecoderKernelSize() const { return decoder_kernel_size_; }

  int32_t DecoderNumBlocks() const { return decoder_num_blocks_; }

  const std::vector<float> &NegativeMean() const { return neg_mean_; }

  const std::vector<float> &InverseStdDev() const { return inv_stddev_; }

  OrtAllocator *Allocator() { return allocator_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
    SHERPA_ONNX_READ_META_DATA(lfr_window_size_, "lfr_window_size");
    SHERPA_ONNX_READ_META_DATA(lfr_window_shift_, "lfr_window_shift");
    SHERPA_ONNX_READ_META_DATA(encoder_output_size_, "encoder_output_size");
    SHERPA_ONNX_READ_META_DATA(decoder_num_blocks_, "decoder_num_blocks");
    SHERPA_ONNX_READ_META_DATA(decoder_kernel_size_, "decoder_kernel_size");

    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(neg_mean_, "neg_mean");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(inv_stddev_, "inv_stddev");

    float scale = std::sqrt(encoder_output_size_);
    for (auto &f : inv_stddev_) {
      f *= scale;
    }
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, model_data, model_data_length, sess_opts_);

    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);

    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);
  }

 private:
  OnlineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::unique_ptr<Ort::Session> decoder_sess_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<float> neg_mean_;
  std::vector<float> inv_stddev_;

  int32_t vocab_size_ = 0;  // initialized in Init
  int32_t lfr_window_size_ = 0;
  int32_t lfr_window_shift_ = 0;

  int32_t encoder_output_size_ = 0;
  int32_t decoder_num_blocks_ = 0;
  int32_t decoder_kernel_size_ = 0;
};

OnlineParaformerModel::OnlineParaformerModel(const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineParaformerModel::OnlineParaformerModel(Manager *mgr,
                                             const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OnlineParaformerModel::~OnlineParaformerModel() = default;

std::vector<Ort::Value> OnlineParaformerModel::ForwardEncoder(
    Ort::Value features, Ort::Value features_length) const {
  return impl_->ForwardEncoder(std::move(features), std::move(features_length));
}

std::vector<Ort::Value> OnlineParaformerModel::ForwardDecoder(
    Ort::Value encoder_out, Ort::Value encoder_out_length,
    Ort::Value acoustic_embedding, Ort::Value acoustic_embedding_length,
    std::vector<Ort::Value> states) const {
  return impl_->ForwardDecoder(
      std::move(encoder_out), std::move(encoder_out_length),
      std::move(acoustic_embedding), std::move(acoustic_embedding_length),
      std::move(states));
}

int32_t OnlineParaformerModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OnlineParaformerModel::LfrWindowSize() const {
  return impl_->LfrWindowSize();
}
int32_t OnlineParaformerModel::LfrWindowShift() const {
  return impl_->LfrWindowShift();
}

int32_t OnlineParaformerModel::EncoderOutputSize() const {
  return impl_->EncoderOutputSize();
}

int32_t OnlineParaformerModel::DecoderKernelSize() const {
  return impl_->DecoderKernelSize();
}

int32_t OnlineParaformerModel::DecoderNumBlocks() const {
  return impl_->DecoderNumBlocks();
}

const std::vector<float> &OnlineParaformerModel::NegativeMean() const {
  return impl_->NegativeMean();
}
const std::vector<float> &OnlineParaformerModel::InverseStdDev() const {
  return impl_->InverseStdDev();
}

OrtAllocator *OnlineParaformerModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template OnlineParaformerModel::OnlineParaformerModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineParaformerModel::OnlineParaformerModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-paraformer-model.h
================================================
// sherpa-onnx/csrc/online-paraformer-model.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_MODEL_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

class OnlineParaformerModel {
 public:
  explicit OnlineParaformerModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineParaformerModel(Manager *mgr, const OnlineModelConfig &config);

  ~OnlineParaformerModel();

  std::vector<Ort::Value> ForwardEncoder(Ort::Value features,
                                         Ort::Value features_length) const;

  std::vector<Ort::Value> ForwardDecoder(Ort::Value encoder_out,
                                         Ort::Value encoder_out_length,
                                         Ort::Value acoustic_embedding,
                                         Ort::Value acoustic_embedding_length,
                                         std::vector<Ort::Value> states) const;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const;

  /** It is lfr_m in config.yaml
   */
  int32_t LfrWindowSize() const;

  /** It is lfr_n in config.yaml
   */
  int32_t LfrWindowShift() const;

  int32_t EncoderOutputSize() const;

  int32_t DecoderKernelSize() const;
  int32_t DecoderNumBlocks() const;

  /** Return negative mean for CMVN
   */
  const std::vector<float> &NegativeMean() const;

  /** Return inverse stddev for CMVN
   */
  const std::vector<float> &InverseStdDev() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_PARAFORMER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-punctuation-cnn-bilstm-impl.h
================================================
// sherpa-onnx/csrc/online-punctuation-cnn-bilstm-impl.h
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#ifndef SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_CNN_BILSTM_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_CNN_BILSTM_IMPL_H_

#include <math.h>

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include <chrono>  // NOLINT

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/online-cnn-bilstm-model-meta-data.h"
#include "sherpa-onnx/csrc/online-cnn-bilstm-model.h"
#include "sherpa-onnx/csrc/online-punctuation-impl.h"
#include "sherpa-onnx/csrc/online-punctuation.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "ssentencepiece/csrc/ssentencepiece.h"

namespace sherpa_onnx {

static const int32_t kMaxSeqLen = 200;

class OnlinePunctuationCNNBiLSTMImpl : public OnlinePunctuationImpl {
 public:
  explicit OnlinePunctuationCNNBiLSTMImpl(const OnlinePunctuationConfig &config)
      : config_(config), model_(config.model) {
    if (!config_.model.bpe_vocab.empty()) {
      bpe_encoder_ = std::make_unique<ssentencepiece::Ssentencepiece>(
          config_.model.bpe_vocab);
    }
  }

  template <typename Manager>
  OnlinePunctuationCNNBiLSTMImpl(Manager *mgr,
                                 const OnlinePunctuationConfig &config)
      : config_(config), model_(mgr, config.model) {
    if (!config_.model.bpe_vocab.empty()) {
      auto buf = ReadFile(mgr, config_.model.bpe_vocab);
      std::istringstream iss(std::string(buf.begin(), buf.end()));
      bpe_encoder_ = std::make_unique<ssentencepiece::Ssentencepiece>(iss);
    }
  }

  std::string AddPunctuationWithCase(const std::string &text) const override {
    if (text.empty()) {
      return {};
    }

    std::vector<int32_t> tokens_list;     // N * kMaxSeqLen
    std::vector<int32_t> valids_list;     // N * kMaxSeqLen
    std::vector<int32_t> label_len_list;  // N

    EncodeSentences(text, tokens_list, valids_list, label_len_list);

    const auto &meta_data = model_.GetModelMetadata();

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t n = label_len_list.size();

    std::array<int64_t, 2> token_ids_shape = {n, kMaxSeqLen};
    Ort::Value token_ids = Ort::Value::CreateTensor(
        memory_info, tokens_list.data(), tokens_list.size(),
        token_ids_shape.data(), token_ids_shape.size());

    std::array<int64_t, 2> valid_ids_shape = {n, kMaxSeqLen};
    Ort::Value valid_ids = Ort::Value::CreateTensor(
        memory_info, valids_list.data(), valids_list.size(),
        valid_ids_shape.data(), valid_ids_shape.size());

    std::array<int64_t, 1> label_len_shape = {n};
    Ort::Value label_len = Ort::Value::CreateTensor(
        memory_info, label_len_list.data(), label_len_list.size(),
        label_len_shape.data(), label_len_shape.size());

    auto pair = model_.Forward(std::move(token_ids), std::move(valid_ids),
                               std::move(label_len));

    std::vector<int32_t> case_pred;
    std::vector<int32_t> punct_pred;
    const float *active_case_logits = pair.first.GetTensorData<float>();
    const float *active_punct_logits = pair.second.GetTensorData<float>();
    std::vector<int64_t> case_logits_shape =
        pair.first.GetTensorTypeAndShapeInfo().GetShape();

    for (int32_t i = 0; i < case_logits_shape[0]; ++i) {
      const float *p_cur_case = active_case_logits + i * meta_data.num_cases;
      auto index_case = static_cast<int32_t>(std::distance(
          p_cur_case,
          std::max_element(p_cur_case, p_cur_case + meta_data.num_cases)));
      case_pred.push_back(index_case);

      const float *p_cur_punct =
          active_punct_logits + i * meta_data.num_punctuations;
      auto index_punct = static_cast<int32_t>(std::distance(
          p_cur_punct,
          std::max_element(p_cur_punct,
                           p_cur_punct + meta_data.num_punctuations)));
      punct_pred.push_back(index_punct);
    }

    std::string ans = DecodeSentences(text, case_pred, punct_pred);

    return ans;
  }

 private:
  void EncodeSentences(const std::string &text,
                       std::vector<int32_t> &tokens_list,             // NOLINT
                       std::vector<int32_t> &valids_list,             // NOLINT
                       std::vector<int32_t> &label_len_list) const {  // NOLINT
    std::vector<int32_t> tokens;
    std::vector<int32_t> valids;
    int32_t label_len = 0;

    tokens.push_back(1);  // hardcode 1 now, 1 - <s>
    valids.push_back(1);

    std::stringstream ss(text);
    std::string word;
    while (ss >> word) {
      std::vector<int32_t> word_tokens;
      bpe_encoder_->Encode(word, &word_tokens);

      int32_t seq_len = tokens.size() + word_tokens.size();
      if (seq_len > kMaxSeqLen - 1) {
        tokens.push_back(2);  // hardcode 2 now, 2 - </s>
        valids.push_back(1);

        label_len = std::count(valids.begin(), valids.end(), 1);

        if (tokens.size() < kMaxSeqLen) {
          tokens.resize(kMaxSeqLen, 0);
          valids.resize(kMaxSeqLen, 0);
        }

        assert(tokens.size() == kMaxSeqLen);
        assert(valids.size() == kMaxSeqLen);

        tokens_list.insert(tokens_list.end(), tokens.begin(), tokens.end());
        valids_list.insert(valids_list.end(), valids.begin(), valids.end());
        label_len_list.push_back(label_len);

        std::vector<int32_t>().swap(tokens);
        std::vector<int32_t>().swap(valids);
        label_len = 0;
        tokens.push_back(1);  // hardcode 1 now, 1 - <s>
        valids.push_back(1);
      }

      tokens.insert(tokens.end(), word_tokens.begin(), word_tokens.end());
      valids.push_back(1);  // only the first sub word is valid
      int32_t remaining_size = static_cast<int32_t>(word_tokens.size()) - 1;
      if (remaining_size > 0) {
        int32_t valids_cur_size = static_cast<int32_t>(valids.size());
        valids.resize(valids_cur_size + remaining_size, 0);
      }
    }

    if (tokens.size() > 0) {
      tokens.push_back(2);  // hardcode 2 now, 2 - </s>
      valids.push_back(1);

      label_len = std::count(valids.begin(), valids.end(), 1);

      if (tokens.size() < kMaxSeqLen) {
        tokens.resize(kMaxSeqLen, 0);
        valids.resize(kMaxSeqLen, 0);
      }

      assert(tokens.size() == kMaxSeqLen);
      assert(valids.size() == kMaxSeqLen);

      tokens_list.insert(tokens_list.end(), tokens.begin(), tokens.end());
      valids_list.insert(valids_list.end(), valids.begin(), valids.end());
      label_len_list.push_back(label_len);
    }
  }

  std::string DecodeSentences(const std::string &raw_text,
                              const std::vector<int32_t> &case_pred,
                              const std::vector<int32_t> &punct_pred) const {
    std::string result_text;
    std::istringstream iss(raw_text);
    std::vector<std::string> words;
    std::string word;

    while (iss >> word) {
      words.emplace_back(word);
    }

    assert(words.size() == case_pred.size());
    assert(words.size() == punct_pred.size());

    for (int32_t i = 0; i < words.size(); ++i) {
      std::string prefix = ((i != 0) ? " " : "");
      result_text += prefix;
      switch (case_pred[i]) {
        case 1:  // upper
        {
          std::transform(words[i].begin(), words[i].end(), words[i].begin(),
                         [](auto c) { return std::toupper(c); });
          result_text += words[i];
          break;
        }
        case 2:  // cap
        {
          words[i][0] = std::toupper(words[i][0]);
          result_text += words[i];
          break;
        }
        case 3:  // mix case
        {
          // TODO(frankyoujian):
          // Need to add a map containing supported mix case words so that we
          // can fetch the predicted word from the map e.g. mcdonald's ->
          // McDonald's
          result_text += words[i];
          break;
        }
        default: {
          result_text += words[i];
          break;
        }
      }

      std::string suffix;
      switch (punct_pred[i]) {
        case 1:  // comma
        {
          suffix = ",";
          break;
        }
        case 2:  // period
        {
          suffix = ".";
          break;
        }
        case 3:  // question
        {
          suffix = "?";
          break;
        }
        default:
          break;
      }

      result_text += suffix;
    }

    return result_text;
  }

 private:
  OnlinePunctuationConfig config_;
  OnlineCNNBiLSTMModel model_;
  std::unique_ptr<ssentencepiece::Ssentencepiece> bpe_encoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_CNN_BILSTM_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-punctuation-impl.cc
================================================
// sherpa-onnx/csrc/online-punctuation-impl.cc
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#include "sherpa-onnx/csrc/online-punctuation-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-punctuation-cnn-bilstm-impl.h"

namespace sherpa_onnx {

std::unique_ptr<OnlinePunctuationImpl> OnlinePunctuationImpl::Create(
    const OnlinePunctuationConfig &config) {
  if (!config.model.cnn_bilstm.empty() && !config.model.bpe_vocab.empty()) {
    return std::make_unique<OnlinePunctuationCNNBiLSTMImpl>(config);
  }

  SHERPA_ONNX_LOGE(
      "Please specify a punctuation model and bpe vocab! Return a null "
      "pointer");
  return nullptr;
}

template <typename Manager>
std::unique_ptr<OnlinePunctuationImpl> OnlinePunctuationImpl::Create(
    Manager *mgr, const OnlinePunctuationConfig &config) {
  if (!config.model.cnn_bilstm.empty() && !config.model.bpe_vocab.empty()) {
    return std::make_unique<OnlinePunctuationCNNBiLSTMImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE(
      "Please specify a punctuation model and bpe vocab! Return a null "
      "pointer");
  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OnlinePunctuationImpl> OnlinePunctuationImpl::Create(
    AAssetManager *mgr, const OnlinePunctuationConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OnlinePunctuationImpl> OnlinePunctuationImpl::Create(
    NativeResourceManager *mgr, const OnlinePunctuationConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-punctuation-impl.h
================================================
// sherpa-onnx/csrc/online-punctuation-impl.h
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#ifndef SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_IMPL_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-punctuation.h"

namespace sherpa_onnx {

class OnlinePunctuationImpl {
 public:
  virtual ~OnlinePunctuationImpl() = default;

  static std::unique_ptr<OnlinePunctuationImpl> Create(
      const OnlinePunctuationConfig &config);

  template <typename Manager>
  static std::unique_ptr<OnlinePunctuationImpl> Create(
      Manager *mgr, const OnlinePunctuationConfig &config);

  virtual std::string AddPunctuationWithCase(const std::string &text) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-punctuation-model-config.cc
================================================
// sherpa-onnx/csrc/online-punctuation-model-config.cc
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#include "sherpa-onnx/csrc/online-punctuation-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlinePunctuationModelConfig::Register(ParseOptions *po) {
  po->Register("cnn-bilstm", &cnn_bilstm,
               "Path to the light-weight CNN-BiLSTM model");

  po->Register("bpe-vocab", &bpe_vocab, "Path to the bpe vocab file");

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool OnlinePunctuationModelConfig::Validate() const {
  if (cnn_bilstm.empty()) {
    SHERPA_ONNX_LOGE("Please provide --cnn-bilstm");
    return false;
  }

  if (!FileExists(cnn_bilstm)) {
    SHERPA_ONNX_LOGE("--cnn-bilstm '%s' does not exist", cnn_bilstm.c_str());
    return false;
  }

  if (bpe_vocab.empty()) {
    SHERPA_ONNX_LOGE("Please provide --bpe-vocab");
    return false;
  }

  if (!FileExists(bpe_vocab)) {
    SHERPA_ONNX_LOGE("--bpe-vocab '%s' does not exist", bpe_vocab.c_str());
    return false;
  }

  return true;
}

std::string OnlinePunctuationModelConfig::ToString() const {
  std::ostringstream os;

  os << "OnlinePunctuationModelConfig(";
  os << "cnn_bilstm=\"" << cnn_bilstm << "\", ";
  os << "bpe_vocab=\"" << bpe_vocab << "\", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-punctuation-model-config.h
================================================
// sherpa-onnx/csrc/online-punctuation-model-config.h
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#ifndef SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlinePunctuationModelConfig {
  std::string cnn_bilstm;
  std::string bpe_vocab;

  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  OnlinePunctuationModelConfig() = default;

  OnlinePunctuationModelConfig(const std::string &cnn_bilstm,
                               const std::string &bpe_vocab,
                               int32_t num_threads, bool debug,
                               const std::string &provider)
      : cnn_bilstm(cnn_bilstm),
        bpe_vocab(bpe_vocab),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-punctuation.cc
================================================
// sherpa-onnx/csrc/online-punctuation.cc
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#include "sherpa-onnx/csrc/online-punctuation.h"

#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-punctuation-impl.h"

namespace sherpa_onnx {

void OnlinePunctuationConfig::Register(ParseOptions *po) { model.Register(po); }

bool OnlinePunctuationConfig::Validate() const {
  if (!model.Validate()) {
    return false;
  }

  return true;
}

std::string OnlinePunctuationConfig::ToString() const {
  std::ostringstream os;

  os << "OnlinePunctuationConfig(";
  os << "model=" << model.ToString() << ")";

  return os.str();
}

OnlinePunctuation::OnlinePunctuation(const OnlinePunctuationConfig &config)
    : impl_(OnlinePunctuationImpl::Create(config)) {}

template <typename Manager>
OnlinePunctuation::OnlinePunctuation(Manager *mgr,
                                     const OnlinePunctuationConfig &config)
    : impl_(OnlinePunctuationImpl::Create(mgr, config)) {}

OnlinePunctuation::~OnlinePunctuation() = default;

std::string OnlinePunctuation::AddPunctuationWithCase(
    const std::string &text) const {
  return impl_->AddPunctuationWithCase(text);
}

#if __ANDROID_API__ >= 9
template OnlinePunctuation::OnlinePunctuation(
    AAssetManager *mgr, const OnlinePunctuationConfig &config);
#endif

#if __OHOS__
template OnlinePunctuation::OnlinePunctuation(
    NativeResourceManager *mgr, const OnlinePunctuationConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-punctuation.h
================================================
// sherpa-onnx/csrc/online-punctuation.h
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#ifndef SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_H_
#define SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-punctuation-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlinePunctuationConfig {
  OnlinePunctuationModelConfig model;

  OnlinePunctuationConfig() = default;

  explicit OnlinePunctuationConfig(const OnlinePunctuationModelConfig &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

class OnlinePunctuationImpl;

class OnlinePunctuation {
 public:
  explicit OnlinePunctuation(const OnlinePunctuationConfig &config);

  template <typename Manager>
  OnlinePunctuation(Manager *mgr, const OnlinePunctuationConfig &config);

  ~OnlinePunctuation();

  // Add punctuation and casing to the input text and return it.
  std::string AddPunctuationWithCase(const std::string &text) const;

 private:
  std::unique_ptr<OnlinePunctuationImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_PUNCTUATION_H_


================================================
FILE: sherpa-onnx/csrc/online-recognizer-ctc-impl.h
================================================
// sherpa-onnx/csrc/online-recognizer-ctc-impl.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_CTC_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_CTC_IMPL_H_

#include <algorithm>
#include <cassert>
#include <ios>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-whisper-model.h"
#include "sherpa-onnx/csrc/online-ctc-decoder.h"
#include "sherpa-onnx/csrc/online-ctc-fst-decoder.h"
#include "sherpa-onnx/csrc/online-ctc-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/online-ctc-model.h"
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

static OnlineRecognizerResult ConvertCtc(const OnlineCtcDecoderResult &src,
                                  const SymbolTable &sym_table,
                                  float frame_shift_ms,
                                  int32_t subsampling_factor, int32_t segment,
                                  int32_t frames_since_start) {
  OnlineRecognizerResult r;
  r.tokens.reserve(src.tokens.size());
  r.timestamps.reserve(src.tokens.size());

  std::string text;
  for (auto i : src.tokens) {
    auto sym = sym_table[i];

    text.append(sym);

    if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
      // for bpe models with byte_fallback
      // (but don't rewrite printable characters 0x20..0x7e,
      //  which collide with standard BPE units)
      std::ostringstream os;
      os << "<0x" << std::hex << std::uppercase
         << (static_cast<int32_t>(sym[0]) & 0xff) << ">";
      sym = os.str();
    }

    r.tokens.push_back(std::move(sym));
  }

  if (sym_table.IsByteBpe()) {
    text = sym_table.DecodeByteBpe(text);
  }

  r.text = std::move(text);

  float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
  for (auto t : src.timestamps) {
    float time = frame_shift_s * t;
    r.timestamps.push_back(time);
  }

  r.segment = segment;
  r.words = std::move(src.words);
  r.start_time = frames_since_start * frame_shift_ms / 1000.;

  return r;
}

class OnlineRecognizerCtcImpl : public OnlineRecognizerImpl {
 public:
  explicit OnlineRecognizerCtcImpl(const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(config),
        config_(config),
        model_(OnlineCtcModel::Create(config.model_config)),
        endpoint_(config_.endpoint_config) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      sym_ = SymbolTable(config.model_config.tokens, true);
    }
    PostInit();
  }

  template <typename Manager>
  explicit OnlineRecognizerCtcImpl(Manager *mgr,
                                   const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(mgr, config),
        config_(config),
        model_(OnlineCtcModel::Create(mgr, config.model_config)),
        endpoint_(config_.endpoint_config) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      sym_ = SymbolTable(mgr, config.model_config.tokens);
    }
    PostInit();
  }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    auto stream = std::make_unique<OnlineStream>(config_.feat_config);
    stream->SetStates(model_->GetInitStates());
    stream->SetFasterDecoder(decoder_->CreateFasterDecoder());

    return stream;
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() + model_->ChunkLength() <
           s->NumFramesReady();
  }

  void DecodeStreams(OnlineStream **ss, int32_t n) const override {
    if (n == 1 || !model_->SupportBatchProcessing()) {
      for (int32_t i = 0; i != n; ++i) {
        DecodeStream(ss[i]);
      }
      return;
    }

    // batch processing
    int32_t chunk_length = model_->ChunkLength();
    int32_t chunk_shift = model_->ChunkShift();

    int32_t feat_dim = ss[0]->FeatureDim();

    std::vector<OnlineCtcDecoderResult> results(n);
    std::vector<float> features_vec(n * chunk_length * feat_dim);
    std::vector<std::vector<Ort::Value>> states_vec(n);
    std::vector<int64_t> all_processed_frames(n);

    for (int32_t i = 0; i != n; ++i) {
      const auto num_processed_frames = ss[i]->GetNumProcessedFrames();
      std::vector<float> features =
          ss[i]->GetFrames(num_processed_frames, chunk_length);
      if (config_.feat_config.is_whisper) {
        OfflineWhisperModel::NormalizeFeatures(features.data(), chunk_length,
                                               feat_dim);
      }

      // Question: should num_processed_frames include chunk_shift?
      ss[i]->GetNumProcessedFrames() += chunk_shift;

      std::copy(features.begin(), features.end(),
                features_vec.data() + i * chunk_length * feat_dim);

      results[i] = std::move(ss[i]->GetCtcResult());
      states_vec[i] = std::move(ss[i]->GetStates());
      all_processed_frames[i] = num_processed_frames;
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{n, chunk_length, feat_dim};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, features_vec.data(),
                                            features_vec.size(), x_shape.data(),
                                            x_shape.size());

    auto states = model_->StackStates(std::move(states_vec));
    int32_t num_states = states.size();
    auto out = model_->Forward(std::move(x), std::move(states));
    std::vector<Ort::Value> out_states;
    out_states.reserve(num_states);

    for (int32_t k = 1; k != num_states + 1; ++k) {
      out_states.push_back(std::move(out[k]));
    }

    std::vector<std::vector<Ort::Value>> next_states =
        model_->UnStackStates(std::move(out_states));

    std::vector<int64_t> log_probs_shape =
        out[0].GetTensorTypeAndShapeInfo().GetShape();
    decoder_->Decode(out[0].GetTensorData<float>(), log_probs_shape[0],
                     log_probs_shape[1], log_probs_shape[2], &results, ss, n);

    for (int32_t k = 0; k != n; ++k) {
      ss[k]->SetCtcResult(results[k]);
      ss[k]->SetStates(std::move(next_states[k]));
    }
  }

  OnlineRecognizerResult GetResult(OnlineStream *s) const override {
    OnlineCtcDecoderResult decoder_result = s->GetCtcResult();

    // TODO(fangjun): Remember to change these constants if needed
    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = 4;
    if (!config_.model_config.t_one_ctc.model.empty()) {
      // each input frame is of 300ms long, which produces 10 output frames.
      // so frame_shift_ms is 300/10 = 30ms
      //
      frame_shift_ms = 30;
      subsampling_factor = 1;
    }

    auto r =
        ConvertCtc(decoder_result, sym_, frame_shift_ms, subsampling_factor,
                   s->GetCurrentSegment(), s->GetNumFramesSinceStart());
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    return r;
  }

  bool IsEndpoint(OnlineStream *s) const override {
    if (!config_.enable_endpoint) {
      return false;
    }

    int32_t num_processed_frames = s->GetNumProcessedFrames();

    float frame_shift_in_seconds = 0.01;
    int32_t subsampling_factor = 4;
    if (!config_.model_config.t_one_ctc.model.empty()) {
      frame_shift_in_seconds = 0.03;
      subsampling_factor = 1;
    }

    int32_t trailing_silence_frames =
        s->GetCtcResult().num_trailing_blanks * subsampling_factor;

    return endpoint_.IsEndpoint(num_processed_frames, trailing_silence_frames,
                                frame_shift_in_seconds);
  }

  void Reset(OnlineStream *s) const override {
    // segment is incremented only when the last
    // result is not empty
    const auto &r = s->GetCtcResult();
    if (!r.tokens.empty()) {
      s->GetCurrentSegment() += 1;
    }

    // clear result
    s->SetCtcResult({});

    // clear states
    s->SetStates(model_->GetInitStates());

    s->GetFasterDecoderProcessedFrames() = 0;

    // Note: We only update counters. The underlying audio samples
    // are not discarded.
    s->Reset();
  }

 private:
  void PostInit() {
    if (!config_.model_config.wenet_ctc.model.empty()) {
      // WeNet CTC models assume input samples are in the range
      // [-32768, 32767], so we set normalize_samples to false
      config_.feat_config.normalize_samples = false;
    }

    if (!config_.model_config.t_one_ctc.model.empty()) {
      config_.feat_config.is_t_one = true;
      config_.feat_config.frame_length_ms = 300;
      config_.feat_config.frame_shift_ms = 300;
      config_.feat_config.sampling_rate = 8000;
    }

    if (model_->UseWhisperFeature()) {
      config_.feat_config.is_whisper = true;
    }

    InitDecoder();
  }
  void InitDecoder() {
    if (!sym_.Contains("<blk>") && !sym_.Contains("<eps>") &&
        !sym_.Contains("<blank>")) {
      SHERPA_ONNX_LOGE(
          "We expect that tokens.txt contains "
          "the symbol <blk> or <eps> or <blank> and its ID.");
      exit(-1);
    }

    int32_t blank_id = 0;
    if (sym_.Contains("<blk>")) {
      blank_id = sym_["<blk>"];
    } else if (sym_.Contains("<eps>")) {
      // for tdnn models of the yesno recipe from icefall
      blank_id = sym_["<eps>"];
    } else if (sym_.Contains("<blank>")) {
      // for WeNet CTC models
      blank_id = sym_["<blank>"];
    }

    if (!config_.ctc_fst_decoder_config.graph.empty()) {
      decoder_ = std::make_unique<OnlineCtcFstDecoder>(
          config_.ctc_fst_decoder_config, blank_id);
    } else if (config_.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OnlineCtcGreedySearchDecoder>(blank_id);
    } else {
      SHERPA_ONNX_LOGE(
          "Unsupported decoding method: %s for streaming CTC models",
          config_.decoding_method.c_str());
      exit(-1);
    }
  }

  void DecodeStream(OnlineStream *s) const {
    int32_t chunk_length = model_->ChunkLength();
    int32_t chunk_shift = model_->ChunkShift();

    int32_t feat_dim = s->FeatureDim();

    const auto num_processed_frames = s->GetNumProcessedFrames();
    std::vector<float> frames =
        s->GetFrames(num_processed_frames, chunk_length);

    if (config_.feat_config.is_whisper) {
      OfflineWhisperModel::NormalizeFeatures(frames.data(), chunk_length,
                                             feat_dim);
    }

    s->GetNumProcessedFrames() += chunk_shift;

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{1, chunk_length, feat_dim};
    Ort::Value x =
        Ort::Value::CreateTensor(memory_info, frames.data(), frames.size(),
                                 x_shape.data(), x_shape.size());
    auto out = model_->Forward(std::move(x), std::move(s->GetStates()));
    int32_t num_states = static_cast<int32_t>(out.size()) - 1;

    std::vector<Ort::Value> states;
    states.reserve(num_states);

    for (int32_t i = 0; i != num_states; ++i) {
      states.push_back(std::move(out[i + 1]));
    }
    s->SetStates(std::move(states));

    std::vector<OnlineCtcDecoderResult> results(1);
    results[0] = std::move(s->GetCtcResult());

    std::vector<int64_t> log_probs_shape =
        out[0].GetTensorTypeAndShapeInfo().GetShape();
    decoder_->Decode(out[0].GetTensorData<float>(), log_probs_shape[0],
                     log_probs_shape[1], log_probs_shape[2], &results, &s, 1);
    s->SetCtcResult(results[0]);
  }

 private:
  OnlineRecognizerConfig config_;
  std::unique_ptr<OnlineCtcModel> model_;
  std::unique_ptr<OnlineCtcDecoder> decoder_;
  SymbolTable sym_;
  Endpoint endpoint_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_CTC_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-recognizer-impl.cc
================================================
// sherpa-onnx/csrc/online-recognizer-impl.cc
//
// Copyright (c)  2023-2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-recognizer-impl.h"

#include <memory>
#include <string>
#include <sstream>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-recognizer-ctc-impl.h"
#include "sherpa-onnx/csrc/online-recognizer-paraformer-impl.h"
#include "sherpa-onnx/csrc/online-recognizer-transducer-impl.h"
#include "sherpa-onnx/csrc/online-recognizer-transducer-nemo-impl.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/text-utils.h"

#if SHERPA_ONNX_ENABLE_RKNN
#include "sherpa-onnx/csrc/rknn/online-recognizer-ctc-rknn-impl.h"
#include "sherpa-onnx/csrc/rknn/online-recognizer-transducer-rknn-impl.h"
#endif

namespace sherpa_onnx {

std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
    const OnlineRecognizerConfig &config) {
  if (config.model_config.provider_config.provider == "rknn") {
#if SHERPA_ONNX_ENABLE_RKNN
    if (config.model_config.transducer.encoder.empty() &&
        config.model_config.zipformer2_ctc.model.empty()) {
      SHERPA_ONNX_LOGE(
          "Only Zipformer transducers and CTC models are currently supported "
          "by rknn. Fallback to CPU. Make sure you pass an onnx model");
    } else if (!config.model_config.transducer.encoder.empty()) {
      return std::make_unique<OnlineRecognizerTransducerRknnImpl>(config);
    } else if (!config.model_config.zipformer2_ctc.model.empty()) {
      return std::make_unique<OnlineRecognizerCtcRknnImpl>(config);
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_RKNN=ON if you "
        "want to use rknn.");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (!config.model_config.transducer.encoder.empty()) {
    Ort::Env env(ORT_LOGGING_LEVEL_ERROR);

    Ort::SessionOptions sess_opts;
    sess_opts.SetIntraOpNumThreads(1);
    sess_opts.SetInterOpNumThreads(1);

    auto decoder_model = ReadFile(config.model_config.transducer.decoder);
    auto sess = std::make_unique<Ort::Session>(env, decoder_model.data(),
                                               decoder_model.size(), sess_opts);

    size_t node_count = sess->GetOutputCount();

    if (node_count == 1) {
      return std::make_unique<OnlineRecognizerTransducerImpl>(config);
    } else {
      return std::make_unique<OnlineRecognizerTransducerNeMoImpl>(config);
    }
  }

  if (!config.model_config.paraformer.encoder.empty()) {
    return std::make_unique<OnlineRecognizerParaformerImpl>(config);
  }

  if (!config.model_config.wenet_ctc.model.empty() ||
      !config.model_config.zipformer2_ctc.model.empty() ||
      !config.model_config.nemo_ctc.model.empty() ||
      !config.model_config.t_one_ctc.model.empty()) {
    return std::make_unique<OnlineRecognizerCtcImpl>(config);
  }

  SHERPA_ONNX_LOGE("Please specify a model");
  SHERPA_ONNX_EXIT(-1);
  return nullptr;
}

template <typename Manager>
std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
    Manager *mgr, const OnlineRecognizerConfig &config) {
  if (config.model_config.provider_config.provider == "rknn") {
#if SHERPA_ONNX_ENABLE_RKNN
    // Currently, only zipformer v1 is supported for rknn
    if (config.model_config.transducer.encoder.empty() &&
        config.model_config.zipformer2_ctc.model.empty()) {
      SHERPA_ONNX_LOGE(
          "Only Zipformer transducers and CTC models are currently supported "
          "by rknn. Fallback to CPU");
    } else if (!config.model_config.transducer.encoder.empty()) {
      return std::make_unique<OnlineRecognizerTransducerRknnImpl>(mgr, config);
    } else if (!config.model_config.zipformer2_ctc.model.empty()) {
      return std::make_unique<OnlineRecognizerCtcRknnImpl>(mgr, config);
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_RKNN=ON if you "
        "want to use rknn.");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (!config.model_config.transducer.encoder.empty()) {
    Ort::Env env(ORT_LOGGING_LEVEL_ERROR);

    Ort::SessionOptions sess_opts;
    sess_opts.SetIntraOpNumThreads(1);
    sess_opts.SetInterOpNumThreads(1);

    auto decoder_model = ReadFile(mgr, config.model_config.transducer.decoder);
    auto sess = std::make_unique<Ort::Session>(env, decoder_model.data(),
                                               decoder_model.size(), sess_opts);

    size_t node_count = sess->GetOutputCount();

    if (node_count == 1) {
      return std::make_unique<OnlineRecognizerTransducerImpl>(mgr, config);
    } else {
      return std::make_unique<OnlineRecognizerTransducerNeMoImpl>(mgr, config);
    }
  }

  if (!config.model_config.paraformer.encoder.empty()) {
    return std::make_unique<OnlineRecognizerParaformerImpl>(mgr, config);
  }

  if (!config.model_config.wenet_ctc.model.empty() ||
      !config.model_config.zipformer2_ctc.model.empty() ||
      !config.model_config.nemo_ctc.model.empty() ||
      !config.model_config.t_one_ctc.model.empty()) {
    return std::make_unique<OnlineRecognizerCtcImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please specify a model");
  SHERPA_ONNX_EXIT(-1);
  return nullptr;
}

OnlineRecognizerImpl::OnlineRecognizerImpl(const OnlineRecognizerConfig &config)
    : config_(config) {
  if (!config.rule_fsts.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(config.rule_fsts, ",", false, &files);
    itn_list_.reserve(files.size());
    for (const auto &f : files) {
      if (config.model_config.debug) {
        SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
      }
      itn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
    }
  }

  if (!config.rule_fars.empty()) {
    if (config.model_config.debug) {
      SHERPA_ONNX_LOGE("Loading FST archives");
    }
    std::vector<std::string> files;
    SplitStringToVector(config.rule_fars, ",", false, &files);

    itn_list_.reserve(files.size() + itn_list_.size());

    for (const auto &f : files) {
      if (config.model_config.debug) {
        SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
      }
      std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
          fst::FarReader<fst::StdArc>::Open(f));
      for (; !reader->Done(); reader->Next()) {
        std::unique_ptr<fst::StdConstFst> r(
            fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

        itn_list_.push_back(
            std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
      }
    }

    if (config.model_config.debug) {
      SHERPA_ONNX_LOGE("FST archives loaded!");
    }
  }

  if (!config.hr.lexicon.empty() && !config.hr.rule_fsts.empty()) {
    auto hr_config = config.hr;
    hr_config.debug = config.model_config.debug;
    hr_ = std::make_unique<HomophoneReplacer>(hr_config);
  }
}

template <typename Manager>
OnlineRecognizerImpl::OnlineRecognizerImpl(Manager *mgr,
                                           const OnlineRecognizerConfig &config)
    : config_(config) {
  if (!config.rule_fsts.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(config.rule_fsts, ",", false, &files);
    itn_list_.reserve(files.size());
    for (const auto &f : files) {
      if (config.model_config.debug) {
        SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
      }
      auto buf = ReadFile(mgr, f);
      std::istringstream is(std::string(buf.data(), buf.size()));
      itn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(is));
    }
  }

  if (!config.rule_fars.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(config.rule_fars, ",", false, &files);
    itn_list_.reserve(files.size() + itn_list_.size());

    for (const auto &f : files) {
      if (config.model_config.debug) {
        SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
      }

      auto buf = ReadFile(mgr, f);

      std::unique_ptr<std::istream> s(
          new std::istringstream(std::string(buf.data(), buf.size())));

      std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
          fst::FarReader<fst::StdArc>::Open(std::move(s)));

      for (; !reader->Done(); reader->Next()) {
        std::unique_ptr<fst::StdConstFst> r(
            fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));

        itn_list_.push_back(
            std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
      }  // for (; !reader->Done(); reader->Next())
    }  // for (const auto &f : files)
  }  // if (!config.rule_fars.empty())
  if (!config.hr.lexicon.empty() && !config.hr.rule_fsts.empty()) {
    auto hr_config = config.hr;
    hr_config.debug = config.model_config.debug;
    hr_ = std::make_unique<HomophoneReplacer>(mgr, hr_config);
  }
}

std::string OnlineRecognizerImpl::ApplyInverseTextNormalization(
    std::string text) const {
  text = RemoveInvalidUtf8Sequences(text);

  if (!itn_list_.empty()) {
    for (const auto &tn : itn_list_) {
      text = tn->Normalize(text);
    }
  }

  return text;
}

std::string OnlineRecognizerImpl::ApplyHomophoneReplacer(
    std::string text) const {
  if (hr_) {
    text = hr_->Apply(text);
  }

  return text;
}

#if __ANDROID_API__ >= 9
template OnlineRecognizerImpl::OnlineRecognizerImpl(
    AAssetManager *mgr, const OnlineRecognizerConfig &config);

template std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
    AAssetManager *mgr, const OnlineRecognizerConfig &config);
#endif

#if __OHOS__
template OnlineRecognizerImpl::OnlineRecognizerImpl(
    NativeResourceManager *mgr, const OnlineRecognizerConfig &config);

template std::unique_ptr<OnlineRecognizerImpl> OnlineRecognizerImpl::Create(
    NativeResourceManager *mgr, const OnlineRecognizerConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-recognizer-impl.h
================================================
// sherpa-onnx/csrc/online-recognizer-impl.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_IMPL_H_

#include <memory>
#include <string>
#include <vector>

#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/homophone-replacer.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/online-stream.h"

namespace sherpa_onnx {

class OnlineRecognizerImpl {
 public:
  explicit OnlineRecognizerImpl(const OnlineRecognizerConfig &config);

  static std::unique_ptr<OnlineRecognizerImpl> Create(
      const OnlineRecognizerConfig &config);

  template <typename Manager>
  OnlineRecognizerImpl(Manager *mgr, const OnlineRecognizerConfig &config);

  template <typename Manager>
  static std::unique_ptr<OnlineRecognizerImpl> Create(
      Manager *mgr, const OnlineRecognizerConfig &config);

  virtual ~OnlineRecognizerImpl() = default;

  virtual std::unique_ptr<OnlineStream> CreateStream() const = 0;

  virtual std::unique_ptr<OnlineStream> CreateStream(
      const std::string &hotwords) const {
    SHERPA_ONNX_LOGE("Only transducer models support contextual biasing.");
    exit(-1);
  }

  virtual bool IsReady(OnlineStream *s) const = 0;

  virtual void WarmpUpRecognizer(int32_t warmup, int32_t mbs) const {
    // ToDo extending to other  models
    SHERPA_ONNX_LOGE("Only zipformer2 model supports Warm up for now.");
    exit(-1);
  }

  virtual void DecodeStreams(OnlineStream **ss, int32_t n) const = 0;

  virtual OnlineRecognizerResult GetResult(OnlineStream *s) const = 0;

  virtual bool IsEndpoint(OnlineStream *s) const = 0;

  virtual void Reset(OnlineStream *s) const = 0;

  std::string ApplyInverseTextNormalization(std::string text) const;
  std::string ApplyHomophoneReplacer(std::string text) const;

 private:
  OnlineRecognizerConfig config_;
  // for inverse text normalization. Used only if
  // config.rule_fsts is not empty or
  // config.rule_fars is not empty
  std::vector<std::unique_ptr<kaldifst::TextNormalizer>> itn_list_;
  std::unique_ptr<HomophoneReplacer> hr_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-recognizer-paraformer-impl.h
================================================
// sherpa-onnx/csrc/online-recognizer-paraformer-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_PARAFORMER_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_PARAFORMER_IMPL_H_

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-lm.h"
#include "sherpa-onnx/csrc/online-paraformer-decoder.h"
#include "sherpa-onnx/csrc/online-paraformer-model.h"
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

static OnlineRecognizerResult Convert(const OnlineParaformerDecoderResult &src,
                                      const SymbolTable &sym_table) {
  OnlineRecognizerResult r;
  r.tokens.reserve(src.tokens.size());

  std::string text;

  // When the current token ends with "@@" we set mergeable to true
  bool mergeable = false;

  for (int32_t i = 0; i != src.tokens.size(); ++i) {
    auto sym = sym_table[src.tokens[i]];
    r.tokens.push_back(sym);

    if ((sym.back() != '@') || (sym.size() > 2 && sym[sym.size() - 2] != '@')) {
      // sym does not end with "@@"
      const uint8_t *p = reinterpret_cast<const uint8_t *>(sym.c_str());
      if (p[0] < 0x80) {
        // an ascii
        if (mergeable) {
          mergeable = false;
          text.append(sym);
        } else {
          text.append(" ");
          text.append(sym);
        }
      } else {
        // not an ascii
        mergeable = false;

        if (i > 0) {
          const uint8_t p = reinterpret_cast<const uint8_t *>(
              sym_table[src.tokens[i - 1]].c_str())[0];
          if (p < 0x80) {
            // put a space between ascii and non-ascii
            text.append(" ");
          }
        }
        text.append(sym);
      }
    } else {
      // this sym ends with @@
      sym = std::string(sym.data(), sym.size() - 2);
      if (mergeable) {
        text.append(sym);
      } else {
        text.append(" ");
        text.append(sym);
        mergeable = true;
      }
    }
  }
  r.text = std::move(text);

  return r;
}

// y[i] += x[i] * scale
static void ScaleAddInPlace(const float *x, int32_t n, float scale, float *y) {
  for (int32_t i = 0; i != n; ++i) {
    y[i] += x[i] * scale;
  }
}

// y[i] = x[i] * scale
static void Scale(const float *x, int32_t n, float scale, float *y) {
  for (int32_t i = 0; i != n; ++i) {
    y[i] = x[i] * scale;
  }
}

class OnlineRecognizerParaformerImpl : public OnlineRecognizerImpl {
 public:
  explicit OnlineRecognizerParaformerImpl(const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(config),
        config_(config),
        model_(config.model_config),
        endpoint_(config_.endpoint_config) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      /// assuming tokens_buf and tokens are guaranteed not being both empty
      sym_ = SymbolTable(config.model_config.tokens, true);
    }

    if (config.decoding_method != "greedy_search") {
      SHERPA_ONNX_LOGE(
          "Unsupported decoding method: %s. Support only greedy_search at "
          "present",
          config.decoding_method.c_str());
      exit(-1);
    }

    // Paraformer models assume input samples are in the range
    // [-32768, 32767], so we set normalize_samples to false
    config_.feat_config.normalize_samples = false;
  }

  template <typename Manager>
  explicit OnlineRecognizerParaformerImpl(Manager *mgr,
                                          const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(mgr, config),
        config_(config),
        model_(mgr, config.model_config),
        endpoint_(config_.endpoint_config) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      sym_ = SymbolTable(mgr, config.model_config.tokens);
    }
    if (config.decoding_method != "greedy_search") {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config.decoding_method.c_str());
      exit(-1);
    }

    // Paraformer models assume input samples are in the range
    // [-32768, 32767], so we set normalize_samples to false
    config_.feat_config.normalize_samples = false;
  }

  OnlineRecognizerParaformerImpl(const OnlineRecognizerParaformerImpl &) =
      delete;

  OnlineRecognizerParaformerImpl operator=(
      const OnlineRecognizerParaformerImpl &) = delete;

  std::unique_ptr<OnlineStream> CreateStream() const override {
    auto stream = std::make_unique<OnlineStream>(config_.feat_config);

    OnlineParaformerDecoderResult r;
    stream->SetParaformerResult(r);

    return stream;
  }

  bool IsReady(OnlineStream *s) const override {
    if (s->GetNumProcessedFrames() + chunk_size_ < s->NumFramesReady()) {
      return true;
    }
    // is_final: accept short chunks (less than chunk_size_ frames)
    // Users should call SetOption("is_final", "1") before the last decode.
    if (s->GetOptionInt("is_final", 0) &&
        s->GetNumProcessedFrames() < s->NumFramesReady()) {
      return true;
    }
    return false;
  }

  void DecodeStreams(OnlineStream **ss, int32_t n) const override {
    // TODO(fangjun): Support batch size > 1
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  OnlineRecognizerResult GetResult(OnlineStream *s) const override {
    auto decoder_result = s->GetParaformerResult();

    auto r = Convert(decoder_result, sym_);
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    return r;
  }

  bool IsEndpoint(OnlineStream *s) const override {
    if (!config_.enable_endpoint) {
      return false;
    }

    const auto &result = s->GetParaformerResult();

    int32_t num_processed_frames = s->GetNumProcessedFrames();

    // frame shift is 10 milliseconds
    float frame_shift_in_seconds = 0.01;

    int32_t trailing_silence_frames =
        num_processed_frames - result.last_non_blank_frame_index;

    return endpoint_.IsEndpoint(num_processed_frames, trailing_silence_frames,
                                frame_shift_in_seconds);
  }

  void Reset(OnlineStream *s) const override {
    // segment is incremented only when the last result is not empty
    const auto &r = s->GetParaformerResult();
    if (!r.tokens.empty()) {
      s->GetCurrentSegment() += 1;
    }

    OnlineParaformerDecoderResult empty;
    s->SetParaformerResult(empty);

    s->GetStates().clear();
    s->GetParaformerEncoderOutCache().clear();
    s->GetParaformerAlphaCache().clear();

    // s->GetParaformerFeatCache().clear();

    // Note: We only update counters. The underlying audio samples
    // are not discarded.
    s->Reset();
  }

 private:
  void DecodeStream(OnlineStream *s) const {
    const auto num_processed_frames = s->GetNumProcessedFrames();
    int32_t available_frames = s->NumFramesReady() - num_processed_frames;
    bool is_final = s->GetOptionInt("is_final", 0);

    // For the final short chunk (fewer frames than chunk_size_):
    // read the remaining frames and pad with zeros to chunk_size_.
    bool is_short_final = is_final && available_frames < chunk_size_;

    std::vector<float> frames =
        s->GetFrames(num_processed_frames,
                     is_short_final ? available_frames : chunk_size_);

    if (is_short_final) {
      int32_t feat_dim_raw = config_.feat_config.feature_dim;
      frames.resize(chunk_size_ * feat_dim_raw, 0.0f);
      // Consume all remaining frames (no overlap needed).
      s->GetNumProcessedFrames() += available_frames;
    } else {
      // Normal: advance by chunk_size_ - 1 to keep 1-frame overlap.
      s->GetNumProcessedFrames() += chunk_size_ - 1;
    }

    frames = ApplyLFR(frames);
    ApplyCMVN(&frames);
    PositionalEncoding(&frames, num_processed_frames / model_.LfrWindowShift());

    int32_t feat_dim = model_.NegativeMean().size();

    // We have scaled inv_stddev by sqrt(encoder_output_size)
    // so the following line can be commented out
    // frames *= encoder_output_size ** 0.5

    // add overlap chunk
    std::vector<float> &feat_cache = s->GetParaformerFeatCache();
    if (feat_cache.empty()) {
      int32_t n = (left_chunk_size_ + right_chunk_size_) * feat_dim;
      feat_cache.resize(n, 0);
    }

    frames.insert(frames.begin(), feat_cache.begin(), feat_cache.end());
    std::copy(frames.end() - feat_cache.size(), frames.end(),
              feat_cache.begin());

    int32_t num_frames = frames.size() / feat_dim;

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{1, num_frames, feat_dim};
    Ort::Value x =
        Ort::Value::CreateTensor(memory_info, frames.data(), frames.size(),
                                 x_shape.data(), x_shape.size());

    int64_t x_len_shape = 1;
    int32_t x_len_val = num_frames;

    Ort::Value x_length =
        Ort::Value::CreateTensor(memory_info, &x_len_val, 1, &x_len_shape, 1);

    auto encoder_out_vec =
        model_.ForwardEncoder(std::move(x), std::move(x_length));

    // CIF search
    auto &encoder_out = encoder_out_vec[0];
    auto &encoder_out_len = encoder_out_vec[1];
    auto &alpha = encoder_out_vec[2];

    float *p_alpha = alpha.GetTensorMutableData<float>();

    std::vector<int64_t> alpha_shape =
        alpha.GetTensorTypeAndShapeInfo().GetShape();

    std::fill(p_alpha, p_alpha + left_chunk_size_, 0);
    std::fill(p_alpha + alpha_shape[1] - right_chunk_size_,
              p_alpha + alpha_shape[1], 0);

    const float *p_encoder_out = encoder_out.GetTensorData<float>();

    std::vector<int64_t> encoder_out_shape =
        encoder_out.GetTensorTypeAndShapeInfo().GetShape();

    std::vector<float> &initial_hidden = s->GetParaformerEncoderOutCache();
    if (initial_hidden.empty()) {
      initial_hidden.resize(encoder_out_shape[2]);
    }

    std::vector<float> &alpha_cache = s->GetParaformerAlphaCache();
    if (alpha_cache.empty()) {
      alpha_cache.resize(1);
    }

    std::vector<float> acoustic_embedding;
    acoustic_embedding.reserve(encoder_out_shape[1] * encoder_out_shape[2]);

    float threshold = 1.0;

    float integrate = alpha_cache[0];

    for (int32_t i = 0; i != encoder_out_shape[1]; ++i) {
      float this_alpha = p_alpha[i];
      if (integrate + this_alpha < threshold) {
        integrate += this_alpha;
        ScaleAddInPlace(p_encoder_out + i * encoder_out_shape[2],
                        encoder_out_shape[2], this_alpha,
                        initial_hidden.data());
        continue;
      }

      // fire
      ScaleAddInPlace(p_encoder_out + i * encoder_out_shape[2],
                      encoder_out_shape[2], threshold - integrate,
                      initial_hidden.data());
      acoustic_embedding.insert(acoustic_embedding.end(),
                                initial_hidden.begin(), initial_hidden.end());
      integrate += this_alpha - threshold;

      Scale(p_encoder_out + i * encoder_out_shape[2], encoder_out_shape[2],
            integrate, initial_hidden.data());
    }

    alpha_cache[0] = integrate;

    if (acoustic_embedding.empty()) {
      return;
    }

    auto &states = s->GetStates();
    if (states.empty()) {
      states.reserve(model_.DecoderNumBlocks());

      std::array<int64_t, 3> shape{1, model_.EncoderOutputSize(),
                                   model_.DecoderKernelSize() - 1};

      int32_t num_bytes = sizeof(float) * shape[0] * shape[1] * shape[2];

      for (int32_t i = 0; i != model_.DecoderNumBlocks(); ++i) {
        Ort::Value this_state = Ort::Value::CreateTensor<float>(
            model_.Allocator(), shape.data(), shape.size());

        memset(this_state.GetTensorMutableData<float>(), 0, num_bytes);

        states.push_back(std::move(this_state));
      }
    }

    int32_t num_tokens = acoustic_embedding.size() / initial_hidden.size();
    std::array<int64_t, 3> acoustic_embedding_shape{
        1, num_tokens, static_cast<int32_t>(initial_hidden.size())};

    Ort::Value acoustic_embedding_tensor = Ort::Value::CreateTensor(
        memory_info, acoustic_embedding.data(), acoustic_embedding.size(),
        acoustic_embedding_shape.data(), acoustic_embedding_shape.size());

    std::array<int64_t, 1> acoustic_embedding_length_shape{1};
    Ort::Value acoustic_embedding_length_tensor = Ort::Value::CreateTensor(
        memory_info, &num_tokens, 1, acoustic_embedding_length_shape.data(),
        acoustic_embedding_length_shape.size());

    auto decoder_out_vec = model_.ForwardDecoder(
        std::move(encoder_out), std::move(encoder_out_len),
        std::move(acoustic_embedding_tensor),
        std::move(acoustic_embedding_length_tensor), std::move(states));

    states.reserve(model_.DecoderNumBlocks());
    for (int32_t i = 2; i != decoder_out_vec.size(); ++i) {
      // TODO(fangjun): When we change chunk_size_, we need to
      // slice decoder_out_vec[i] accordingly.
      states.push_back(std::move(decoder_out_vec[i]));
    }

    const auto &sample_ids = decoder_out_vec[1];
    const int64_t *p_sample_ids = sample_ids.GetTensorData<int64_t>();

    bool non_blank_detected = false;

    auto &result = s->GetParaformerResult();

    for (int32_t i = 0; i != num_tokens; ++i) {
      int32_t t = p_sample_ids[i];
      if (t == 0) {
        continue;
      }

      non_blank_detected = true;
      result.tokens.push_back(t);
    }

    if (non_blank_detected) {
      result.last_non_blank_frame_index = num_processed_frames;
    }
  }

  std::vector<float> ApplyLFR(const std::vector<float> &in) const {
    int32_t lfr_window_size = model_.LfrWindowSize();
    int32_t lfr_window_shift = model_.LfrWindowShift();
    int32_t in_feat_dim = config_.feat_config.feature_dim;

    int32_t in_num_frames = in.size() / in_feat_dim;
    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;
    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    std::vector<float> out(out_num_frames * out_feat_dim);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

  void ApplyCMVN(std::vector<float> *v) const {
    const std::vector<float> &neg_mean = model_.NegativeMean();
    const std::vector<float> &inv_stddev = model_.InverseStdDev();
    int dim = static_cast<int>(neg_mean.size());
    int num_frames = static_cast<int>(v->size()) / dim;

    Eigen::Map<
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
        mat(v->data(), num_frames, dim);

    Eigen::Map<const Eigen::RowVectorXf> neg_mean_vec(neg_mean.data(), dim);
    Eigen::Map<const Eigen::RowVectorXf> inv_stddev_vec(inv_stddev.data(), dim);

    mat.array() = (mat.array().rowwise() + neg_mean_vec.array()).rowwise() *
                  inv_stddev_vec.array();
  }

  void PositionalEncoding(std::vector<float> *v, int32_t t_offset) const {
    int32_t lfr_window_size = model_.LfrWindowSize();
    int32_t in_feat_dim = config_.feat_config.feature_dim;

    int32_t feat_dim = in_feat_dim * lfr_window_size;
    int32_t T = v->size() / feat_dim;

    // log(10000)/(7*80/2-1) == 0.03301197265941284
    // 7 is lfr_window_size
    // 80 is in_feat_dim
    // 7*80 is feat_dim
    constexpr float kScale = -0.03301197265941284;

    for (int32_t t = 0; t != T; ++t) {
      float *p = v->data() + t * feat_dim;

      int32_t offset = t + 1 + t_offset;

      for (int32_t d = 0; d < feat_dim / 2; ++d) {
        float inv_timescale = offset * std::exp(d * kScale);

        float sin_d = std::sin(inv_timescale);
        float cos_d = std::cos(inv_timescale);

        p[d] += sin_d;
        p[d + feat_dim / 2] += cos_d;
      }
    }
  }

 private:
  OnlineRecognizerConfig config_;
  OnlineParaformerModel model_;
  SymbolTable sym_;
  Endpoint endpoint_;

  // 0.61 seconds
  int32_t chunk_size_ = 61;
  // (61 - 7) / 6 + 1 = 10

  int32_t left_chunk_size_ = 5;
  int32_t right_chunk_size_ = 3;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_PARAFORMER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-recognizer-transducer-impl.h
================================================
// sherpa-onnx/csrc/online-recognizer-transducer-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_TRANSDUCER_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_TRANSDUCER_IMPL_H_

#include <algorithm>
#include <ios>
#include <memory>
#include <regex>  // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-whisper-model.h"
#include "sherpa-onnx/csrc/online-lm.h"
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/online-transducer-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"
#include "sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/utils.h"
#include "ssentencepiece/csrc/ssentencepiece.h"

namespace sherpa_onnx {

OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
                               const SymbolTable &sym_table,
                               float frame_shift_ms, int32_t subsampling_factor,
                               int32_t segment, int32_t frames_since_start) {
  OnlineRecognizerResult r;
  r.tokens.reserve(src.tokens.size());
  r.timestamps.reserve(src.tokens.size());

  std::string text;
  for (auto i : src.tokens) {
    auto sym = sym_table[i];
    if (sym == "<unk>") {
      continue;
    }

    text.append(sym);

    if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
      // for bpe models with byte_fallback
      // (but don't rewrite printable characters 0x20..0x7e,
      //  which collide with standard BPE units)
      std::ostringstream os;
      os << "<0x" << std::hex << std::uppercase
         << (static_cast<int32_t>(sym[0]) & 0xff) << ">";
      sym = os.str();
    }

    r.tokens.push_back(std::move(sym));
  }

  if (sym_table.IsByteBpe()) {
    text = sym_table.DecodeByteBpe(text);
  }

  r.text = std::move(text);

  float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
  for (auto t : src.timestamps) {
    float time = frame_shift_s * t;
    r.timestamps.push_back(time);
  }

  r.ys_probs = std::move(src.ys_probs);
  r.lm_probs = std::move(src.lm_probs);
  r.context_scores = std::move(src.context_scores);

  r.segment = segment;
  r.start_time = frames_since_start * frame_shift_ms / 1000.;

  return r;
}

class OnlineRecognizerTransducerImpl : public OnlineRecognizerImpl {
 public:
  explicit OnlineRecognizerTransducerImpl(const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(config),
        config_(config),
        model_(OnlineTransducerModel::Create(config.model_config)),
        endpoint_(config_.endpoint_config) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      /// assuming tokens_buf and tokens are guaranteed not being both empty
      sym_ = SymbolTable(config.model_config.tokens, true);
    }

    if (sym_.Contains("<unk>")) {
      unk_id_ = sym_["<unk>"];
    }

    model_->SetFeatureDim(config.feat_config.feature_dim);

    if (config.decoding_method == "modified_beam_search") {
      if (!config_.model_config.bpe_vocab.empty()) {
        bpe_encoder_ = std::make_unique<ssentencepiece::Ssentencepiece>(
            config_.model_config.bpe_vocab);
      }

      if (!config_.hotwords_buf.empty()) {
        InitHotwordsFromBufStr();
      } else if (!config_.hotwords_file.empty()) {
        InitHotwords();
      }

      if (!config_.lm_config.model.empty()) {
        lm_ = OnlineLM::Create(config.lm_config);
      }

      decoder_ = std::make_unique<OnlineTransducerModifiedBeamSearchDecoder>(
          model_.get(), lm_.get(), config_.max_active_paths,
          config_.lm_config.scale, config_.lm_config.shallow_fusion, unk_id_,
          config_.blank_penalty, config_.temperature_scale);

    } else if (config.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OnlineTransducerGreedySearchDecoder>(
          model_.get(), unk_id_, config_.blank_penalty,
          config_.temperature_scale);

    } else {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config.decoding_method.c_str());
      exit(-1);
    }

    if (model_->UseWhisperFeature()) {
      config_.feat_config.is_whisper = true;
    }
  }

  template <typename Manager>
  explicit OnlineRecognizerTransducerImpl(Manager *mgr,
                                          const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(mgr, config),
        config_(config),
        model_(OnlineTransducerModel::Create(mgr, config.model_config)),
        sym_(mgr, config.model_config.tokens),
        endpoint_(config_.endpoint_config) {
    if (sym_.Contains("<unk>")) {
      unk_id_ = sym_["<unk>"];
    }

    model_->SetFeatureDim(config.feat_config.feature_dim);

    if (config.decoding_method == "modified_beam_search") {
#if 0
      // TODO(fangjun): Implement it
      if (!config_.lm_config.model.empty()) {
        lm_ = OnlineLM::Create(mgr, config.lm_config);
      }
#endif

      if (!config_.model_config.bpe_vocab.empty()) {
        auto buf = ReadFile(mgr, config_.model_config.bpe_vocab);
        std::istringstream iss(std::string(buf.begin(), buf.end()));
        bpe_encoder_ = std::make_unique<ssentencepiece::Ssentencepiece>(iss);
      }

      if (!config_.hotwords_buf.empty()) {
        InitHotwordsFromBufStr();
      } else if (!config_.hotwords_file.empty()) {
        InitHotwords(mgr);
      }

      decoder_ = std::make_unique<OnlineTransducerModifiedBeamSearchDecoder>(
          model_.get(), lm_.get(), config_.max_active_paths,
          config_.lm_config.scale, config_.lm_config.shallow_fusion, unk_id_,
          config_.blank_penalty, config_.temperature_scale);

    } else if (config.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OnlineTransducerGreedySearchDecoder>(
          model_.get(), unk_id_, config_.blank_penalty,
          config_.temperature_scale);

    } else {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config.decoding_method.c_str());
      exit(-1);
    }

    if (model_->UseWhisperFeature()) {
      config_.feat_config.is_whisper = true;
    }
  }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    auto stream =
        std::make_unique<OnlineStream>(config_.feat_config, hotwords_graph_);
    InitOnlineStream(stream.get());
    return stream;
  }

  std::unique_ptr<OnlineStream> CreateStream(
      const std::string &hotwords) const override {
    auto hws = std::regex_replace(hotwords, std::regex("/"), "\n");
    std::istringstream is(hws);
    std::vector<std::vector<int32_t>> current;
    std::vector<float> current_scores;
    if (!EncodeHotwords(is, config_.model_config.modeling_unit, sym_,
                        bpe_encoder_.get(), &current, &current_scores)) {
      SHERPA_ONNX_LOGE("Encode hotwords failed, skipping, hotwords are : %s",
                       hotwords.c_str());
    }

    int32_t num_default_hws = hotwords_.size();
    int32_t num_hws = current.size();

    current.insert(current.end(), hotwords_.begin(), hotwords_.end());

    if (!current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else if (!current_scores.empty() && boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_default_hws,
                            config_.hotwords_score);
    } else if (current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_hws,
                            config_.hotwords_score);
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else {
      // Do nothing.
    }

    auto context_graph = std::make_shared<ContextGraph>(
        current, config_.hotwords_score, current_scores);
    auto stream =
        std::make_unique<OnlineStream>(config_.feat_config, context_graph);
    InitOnlineStream(stream.get());
    return stream;
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() + model_->ChunkSize() <
           s->NumFramesReady();
  }

  // Warmping up engine with wp: warm_up count and max-batch-size
  void WarmpUpRecognizer(int32_t warmup, int32_t mbs) const override {
    auto max_batch_size = mbs;
    if (warmup <= 0 || warmup > 100) {
      return;
    }
    int32_t chunk_size = model_->ChunkSize();
    int32_t feature_dim = config_.feat_config.feature_dim;
    std::vector<OnlineTransducerDecoderResult> results(max_batch_size);
    std::vector<float> features_vec(max_batch_size * chunk_size * feature_dim);
    std::vector<std::vector<Ort::Value>> states_vec(max_batch_size);

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{max_batch_size, chunk_size, feature_dim};

    for (int32_t i = 0; i != max_batch_size; ++i) {
      states_vec[i] = model_->GetEncoderInitStates();
      results[i] = decoder_->GetEmptyResult();
    }

    for (int32_t i = 0; i != warmup; ++i) {
      auto states = model_->StackStates(states_vec);
      Ort::Value x = Ort::Value::CreateTensor(memory_info, features_vec.data(),
                                              features_vec.size(),
                                              x_shape.data(), x_shape.size());
      auto x_copy = Clone(model_->Allocator(), &x);
      auto pair = model_->RunEncoder(std::move(x), std::move(states),
                                     std::move(x_copy));
      decoder_->Decode(std::move(pair.first), &results);
    }
  }

  void DecodeStreams(OnlineStream **ss, int32_t n) const override {
    int32_t chunk_size = model_->ChunkSize();
    int32_t chunk_shift = model_->ChunkShift();

    int32_t feature_dim = ss[0]->FeatureDim();

    std::vector<OnlineTransducerDecoderResult> results(n);
    std::vector<float> features_vec(n * chunk_size * feature_dim);
    std::vector<std::vector<Ort::Value>> states_vec(n);
    std::vector<int64_t> all_processed_frames(n);
    bool has_context_graph = false;

    for (int32_t i = 0; i != n; ++i) {
      if (!has_context_graph && ss[i]->GetContextGraph()) {
        has_context_graph = true;
      }

      const auto num_processed_frames = ss[i]->GetNumProcessedFrames();
      std::vector<float> features =
          ss[i]->GetFrames(num_processed_frames, chunk_size);

      if (config_.feat_config.is_whisper) {
        OfflineWhisperModel::NormalizeFeatures(features.data(), chunk_size,
                                               feature_dim);
      }

      // Question: should num_processed_frames include chunk_shift?
      ss[i]->GetNumProcessedFrames() += chunk_shift;

      std::copy(features.begin(), features.end(),
                features_vec.data() + i * chunk_size * feature_dim);

      results[i] = std::move(ss[i]->GetResult());
      states_vec[i] = std::move(ss[i]->GetStates());
      all_processed_frames[i] = num_processed_frames;
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{n, chunk_size, feature_dim};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, features_vec.data(),
                                            features_vec.size(), x_shape.data(),
                                            x_shape.size());

    std::array<int64_t, 1> processed_frames_shape{
        static_cast<int64_t>(all_processed_frames.size())};

    Ort::Value processed_frames = Ort::Value::CreateTensor(
        memory_info, all_processed_frames.data(), all_processed_frames.size(),
        processed_frames_shape.data(), processed_frames_shape.size());

    auto states = model_->StackStates(states_vec);

    auto pair = model_->RunEncoder(std::move(x), std::move(states),
                                   std::move(processed_frames));

    if (has_context_graph) {
      decoder_->Decode(std::move(pair.first), ss, &results);
    } else {
      decoder_->Decode(std::move(pair.first), &results);
    }

    std::vector<std::vector<Ort::Value>> next_states =
        model_->UnStackStates(pair.second);

    for (int32_t i = 0; i != n; ++i) {
      ss[i]->SetResult(results[i]);
      ss[i]->SetStates(std::move(next_states[i]));
    }
  }

  OnlineRecognizerResult GetResult(OnlineStream *s) const override {
    OnlineTransducerDecoderResult decoder_result = s->GetResult();
    decoder_->StripLeadingBlanks(&decoder_result);

    // TODO(fangjun): Remember to change these constants if needed
    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = 4;
    auto r = Convert(decoder_result, sym_, frame_shift_ms, subsampling_factor,
                     s->GetCurrentSegment(), s->GetNumFramesSinceStart());
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    return r;
  }

  bool IsEndpoint(OnlineStream *s) const override {
    if (!config_.enable_endpoint) {
      return false;
    }

    int32_t num_processed_frames = s->GetNumProcessedFrames();

    // frame shift is 10 milliseconds
    float frame_shift_in_seconds = 0.01;

    // subsampling factor is 4
    int32_t trailing_silence_frames = s->GetResult().num_trailing_blanks * 4;

    return endpoint_.IsEndpoint(num_processed_frames, trailing_silence_frames,
                                frame_shift_in_seconds);
  }

  void Reset(OnlineStream *s) const override {
    int32_t context_size = model_->ContextSize();

    {
      // segment is incremented only when the last
      // result is not empty, contains non-blanks and longer than context_size)
      const auto &r = s->GetResult();
      if (!r.tokens.empty() && r.tokens.back() != 0 &&
          r.tokens.size() > context_size) {
        s->GetCurrentSegment() += 1;
      }
    }

    auto r = decoder_->GetEmptyResult();
    auto last_result = s->GetResult();

    if (static_cast<int32_t>(last_result.tokens.size()) > context_size) {
      // if last result is not empty, then
      // truncate all last hyps and save as the 'ys' context for next result
      // (the encoder state buffers are kept)
      for (const auto &it : last_result.hyps) {
        auto h = it.second;
        r.hyps.Add({std::vector<int64_t>(h.ys.end() - context_size, h.ys.end()),
                    h.log_prob});
      }

      r.tokens = std::vector<int64_t>(last_result.tokens.end() - context_size,
                                      last_result.tokens.end());
    } else {
      if (config_.reset_encoder) {
        // reset encoder states, use blanks as 'ys' context
        s->SetStates(model_->GetEncoderInitStates());
      }
    }

    // but reset all contextual biasing graph states to root
    if (config_.decoding_method == "modified_beam_search" &&
        nullptr != s->GetContextGraph()) {
      for (auto it = r.hyps.begin(); it != r.hyps.end(); ++it) {
        it->second.context_state = s->GetContextGraph()->Root();
      }
    }

    s->SetResult(r);

    // Note: We only update counters. The underlying audio samples
    // are not discarded.
    s->Reset();
  }

 private:
  void InitHotwords() {
    // each line in hotwords_file contains space-separated words

    std::ifstream is(config_.hotwords_file);
    if (!is) {
      SHERPA_ONNX_LOGE("Open hotwords file failed: %s",
                       config_.hotwords_file.c_str());
      exit(-1);
    }

    if (!EncodeHotwords(is, config_.model_config.modeling_unit, sym_,
                        bpe_encoder_.get(), &hotwords_, &boost_scores_)) {
      SHERPA_ONNX_LOGE(
          "Failed to encode some hotwords, skip them already, see logs above "
          "for details.");
    }
    hotwords_graph_ = std::make_shared<ContextGraph>(
        hotwords_, config_.hotwords_score, boost_scores_);
  }

  template <typename Manager>
  void InitHotwords(Manager *mgr) {
    // each line in hotwords_file contains space-separated words

    auto buf = ReadFile(mgr, config_.hotwords_file);

    std::istringstream is(std::string(buf.begin(), buf.end()));

    if (!is) {
      SHERPA_ONNX_LOGE("Open hotwords file failed: %s",
                       config_.hotwords_file.c_str());
      exit(-1);
    }

    if (!EncodeHotwords(is, config_.model_config.modeling_unit, sym_,
                        bpe_encoder_.get(), &hotwords_, &boost_scores_)) {
      SHERPA_ONNX_LOGE(
          "Failed to encode some hotwords, skip them already, see logs above "
          "for details.");
    }
    hotwords_graph_ = std::make_shared<ContextGraph>(
        hotwords_, config_.hotwords_score, boost_scores_);
  }

  void InitHotwordsFromBufStr() {
    // each line in hotwords_file contains space-separated words

    std::istringstream iss(config_.hotwords_buf);
    if (!EncodeHotwords(iss, config_.model_config.modeling_unit, sym_,
                        bpe_encoder_.get(), &hotwords_, &boost_scores_)) {
      SHERPA_ONNX_LOGE(
          "Failed to encode some hotwords, skip them already, see logs above "
          "for details.");
    }
    hotwords_graph_ = std::make_shared<ContextGraph>(
        hotwords_, config_.hotwords_score, boost_scores_);
  }

  void InitOnlineStream(OnlineStream *stream) const {
    auto r = decoder_->GetEmptyResult();

    if (config_.decoding_method == "modified_beam_search" &&
        nullptr != stream->GetContextGraph()) {
      // r.hyps has only one element.
      for (auto it = r.hyps.begin(); it != r.hyps.end(); ++it) {
        it->second.context_state = stream->GetContextGraph()->Root();
      }
    }

    stream->SetResult(r);
    stream->SetStates(model_->GetEncoderInitStates());
  }

 private:
  OnlineRecognizerConfig config_;
  std::vector<std::vector<int32_t>> hotwords_;
  std::vector<float> boost_scores_;
  ContextGraphPtr hotwords_graph_;
  std::unique_ptr<ssentencepiece::Ssentencepiece> bpe_encoder_;
  std::unique_ptr<OnlineTransducerModel> model_;
  std::unique_ptr<OnlineLM> lm_;
  std::unique_ptr<OnlineTransducerDecoder> decoder_;
  SymbolTable sym_;
  Endpoint endpoint_;
  int32_t unk_id_ = -1;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_TRANSDUCER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-recognizer-transducer-nemo-impl.h
================================================
// sherpa-onnx/csrc/online-recognizer-transducer-nemo-impl.h
//
// Copyright (c)  2022-2024  Xiaomi Corporation
// Copyright (c)  2024  Sangeet Sagar

#ifndef SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_TRANSDUCER_NEMO_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_TRANSDUCER_NEMO_IMPL_H_

#include <algorithm>
#include <fstream>
#include <ios>
#include <memory>
#include <regex>  // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/online-transducer-greedy-search-nemo-decoder.h"
#include "sherpa-onnx/csrc/online-transducer-nemo-model.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/transpose.h"
#include "sherpa-onnx/csrc/utils.h"

namespace sherpa_onnx {

// defined in ./online-recognizer-transducer-impl.h
OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
                               const SymbolTable &sym_table,
                               float frame_shift_ms, int32_t subsampling_factor,
                               int32_t segment, int32_t frames_since_start);

class OnlineRecognizerTransducerNeMoImpl : public OnlineRecognizerImpl {
 public:
  explicit OnlineRecognizerTransducerNeMoImpl(
      const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(config),
        config_(config),
        endpoint_(config_.endpoint_config),
        model_(
            std::make_unique<OnlineTransducerNeMoModel>(config.model_config)) {
    if (!config.model_config.tokens_buf.empty()) {
      symbol_table_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      /// assuming tokens_buf and tokens are guaranteed not being both empty
      symbol_table_ = SymbolTable(config.model_config.tokens, true);
    }

    if (config.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OnlineTransducerGreedySearchNeMoDecoder>(
          model_.get(), config_.blank_penalty);
    } else {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config.decoding_method.c_str());
      exit(-1);
    }
    PostInit();
  }

  template <typename Manager>
  explicit OnlineRecognizerTransducerNeMoImpl(
      Manager *mgr, const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(mgr, config),
        config_(config),
        endpoint_(config_.endpoint_config),
        model_(std::make_unique<OnlineTransducerNeMoModel>(
            mgr, config.model_config)) {
    if (!config.model_config.tokens_buf.empty()) {
      symbol_table_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      symbol_table_ = SymbolTable(mgr, config.model_config.tokens);
    }
    if (config.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OnlineTransducerGreedySearchNeMoDecoder>(
          model_.get(), config_.blank_penalty);
    } else {
      SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
                       config.decoding_method.c_str());
      exit(-1);
    }

    PostInit();
  }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    auto stream = std::make_unique<OnlineStream>(config_.feat_config);
    InitOnlineStream(stream.get());
    return stream;
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() + model_->ChunkSize() <
           s->NumFramesReady();
  }

  OnlineRecognizerResult GetResult(OnlineStream *s) const override {
    // TODO(fangjun): Remember to change these constants if needed
    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = model_->SubsamplingFactor();
    auto r = Convert(s->GetResult(), symbol_table_, frame_shift_ms,
                     subsampling_factor, s->GetCurrentSegment(),
                     s->GetNumFramesSinceStart());
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    return r;
  }

  bool IsEndpoint(OnlineStream *s) const override {
    if (!config_.enable_endpoint) {
      return false;
    }

    int32_t num_processed_frames = s->GetNumProcessedFrames();

    // frame shift is 10 milliseconds
    float frame_shift_in_seconds = 0.01;

    int32_t trailing_silence_frames =
        s->GetResult().num_trailing_blanks * model_->SubsamplingFactor();

    return endpoint_.IsEndpoint(num_processed_frames, trailing_silence_frames,
                                frame_shift_in_seconds);
  }

  void Reset(OnlineStream *s) const override {
    {
      // segment is incremented only when the last
      // result is not empty
      const auto &r = s->GetResult();
      if (!r.tokens.empty()) {
        s->GetCurrentSegment() += 1;
      }
    }

    s->SetResult({});

    s->SetStates(model_->GetEncoderInitStates());

    s->SetNeMoDecoderStates(model_->GetDecoderInitStates());

    // Note: We only update counters. The underlying audio samples
    // are not discarded.
    s->Reset();
  }

  void DecodeStreams(OnlineStream **ss, int32_t n) const override {
    int32_t chunk_size = model_->ChunkSize();
    int32_t chunk_shift = model_->ChunkShift();

    int32_t feature_dim = ss[0]->FeatureDim();

    std::vector<float> features_vec(n * chunk_size * feature_dim);
    std::vector<std::vector<Ort::Value>> encoder_states(n);

    for (int32_t i = 0; i != n; ++i) {
      const auto num_processed_frames = ss[i]->GetNumProcessedFrames();
      std::vector<float> features =
          ss[i]->GetFrames(num_processed_frames, chunk_size);

      // Question: should num_processed_frames include chunk_shift?
      ss[i]->GetNumProcessedFrames() += chunk_shift;

      std::copy(features.begin(), features.end(),
                features_vec.data() + i * chunk_size * feature_dim);

      encoder_states[i] = std::move(ss[i]->GetStates());
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{n, chunk_size, feature_dim};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, features_vec.data(),
                                            features_vec.size(), x_shape.data(),
                                            x_shape.size());

    auto states = model_->StackStates(std::move(encoder_states));
    int32_t num_states = states.size();  // num_states = 3
    auto t = model_->RunEncoder(std::move(x), std::move(states));
    // t[0] encoder_out, float tensor, (batch_size, dim, T)
    // t[1] next states

    std::vector<Ort::Value> out_states;
    out_states.reserve(num_states);

    for (int32_t k = 1; k != num_states + 1; ++k) {
      out_states.push_back(std::move(t[k]));
    }

    auto unstacked_states = model_->UnStackStates(std::move(out_states));
    for (int32_t i = 0; i != n; ++i) {
      ss[i]->SetStates(std::move(unstacked_states[i]));
    }

    Ort::Value encoder_out = Transpose12(model_->Allocator(), &t[0]);

    decoder_->Decode(std::move(encoder_out), ss, n);
  }

  void InitOnlineStream(OnlineStream *stream) const {
    // set encoder states
    stream->SetStates(model_->GetEncoderInitStates());

    // set decoder states
    stream->SetNeMoDecoderStates(model_->GetDecoderInitStates());
  }

 private:
  void PostInit() {
    config_.feat_config.feature_dim = model_->FeatureDim();

    config_.feat_config.low_freq = 0;
    config_.feat_config.high_freq = 8000;
    config_.feat_config.is_librosa = true;
    config_.feat_config.remove_dc_offset = false;
    config_.feat_config.window_type = "hann";
    config_.feat_config.dither = 0;
    config_.feat_config.nemo_normalize_type =
        model_->FeatureNormalizationMethod();

    int32_t vocab_size = model_->VocabSize();

    // check the blank ID
    if (!symbol_table_.Contains("<blk>")) {
      SHERPA_ONNX_LOGE("tokens.txt does not include the blank token <blk>");
      exit(-1);
    }

    if (symbol_table_["<blk>"] != vocab_size - 1) {
      SHERPA_ONNX_LOGE("<blk> is not the last token!");
      exit(-1);
    }

    if (symbol_table_.NumSymbols() != vocab_size) {
      SHERPA_ONNX_LOGE("number of lines in tokens.txt %d != %d (vocab_size)",
                       symbol_table_.NumSymbols(), vocab_size);
      exit(-1);
    }
  }

 private:
  OnlineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OnlineTransducerNeMoModel> model_;
  std::unique_ptr<OnlineTransducerGreedySearchNeMoDecoder> decoder_;
  Endpoint endpoint_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_TRANSDUCER_NEMO_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-recognizer.cc
================================================
// sherpa-onnx/csrc/online-recognizer.cc
//
// Copyright (c)  2023  Xiaomi Corporation
// Copyright (c)  2023  Pingfeng Luo

#include "sherpa-onnx/csrc/online-recognizer.h"

#include <algorithm>
#include <cassert>
#include <iomanip>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

namespace {

/// Helper for `OnlineRecognizerResult::AsJsonString()`
template <typename T>
std::string VecToString(const std::vector<T> &vec, int32_t precision = 6) {
  std::ostringstream oss;
  if (precision != 0) {
    oss << std::fixed << std::setprecision(precision);
  }
  oss << "[";
  std::string sep = "";
  for (const auto &item : vec) {
    oss << sep << item;
    sep = ", ";
  }
  oss << "]";
  return oss.str();
}

/// Helper for `OnlineRecognizerResult::AsJsonString()`
template <>  // explicit specialization for T = std::string
std::string VecToString<std::string>(const std::vector<std::string> &vec,
                                     int32_t) {  // ignore 2nd arg
  std::ostringstream oss;
  oss << "[";
  std::string sep = "";
  for (const auto &item : vec) {
    oss << sep << std::quoted(item);
    sep = ", ";
  }
  oss << "]";
  return oss.str();
}

}  // namespace

std::string OnlineRecognizerResult::AsJsonString() const {
  std::ostringstream os;
  os << "{ ";
  os << "\"text\": " << std::quoted(text) << ", ";
  os << "\"tokens\": " << VecToString(tokens) << ", ";
  os << "\"timestamps\": " << VecToString(timestamps, 2) << ", ";
  os << "\"ys_probs\": " << VecToString(ys_probs, 6) << ", ";
  os << "\"lm_probs\": " << VecToString(lm_probs, 6) << ", ";
  os << "\"context_scores\": " << VecToString(context_scores, 6) << ", ";
  os << "\"segment\": " << segment << ", ";
  os << "\"words\": " << VecToString(words, 0) << ", ";
  os << "\"start_time\": " << std::fixed << std::setprecision(2) << start_time
     << ", ";
  os << "\"is_final\": " << (is_final ? "true" : "false") << ", ";
  os << "\"is_eof\": " << (is_eof ? "true" : "false");
  os << "}";
  return os.str();
}

void OnlineRecognizerConfig::Register(ParseOptions *po) {
  feat_config.Register(po);
  model_config.Register(po);
  endpoint_config.Register(po);
  lm_config.Register(po);
  ctc_fst_decoder_config.Register(po);
  hr.Register(po);

  po->Register("enable-endpoint", &enable_endpoint,
               "True to enable endpoint detection. False to disable it.");
  po->Register("max-active-paths", &max_active_paths,
               "beam size used in modified beam search.");
  po->Register("blank-penalty", &blank_penalty,
               "The penalty applied on blank symbol during decoding. "
               "Note: It is a positive value. "
               "Increasing value will lead to lower deletion at the cost"
               "of higher insertions. "
               "Currently only applicable for transducer models.");
  po->Register("hotwords-score", &hotwords_score,
               "The bonus score for each token in context word/phrase. "
               "Used only when decoding_method is modified_beam_search");
  po->Register(
      "hotwords-file", &hotwords_file,
      "The file containing hotwords, one words/phrases per line, For example: "
      "HELLO WORLD"
      "你好世界");
  po->Register("decoding-method", &decoding_method,
               "decoding method,"
               "now support greedy_search and modified_beam_search.");
  po->Register("temperature-scale", &temperature_scale,
               "Temperature scale for confidence computation in decoding.");
  po->Register(
      "rule-fsts", &rule_fsts,
      "If not empty, it specifies fsts for inverse text normalization. "
      "If there are multiple fsts, they are separated by a comma.");

  po->Register(
      "rule-fars", &rule_fars,
      "If not empty, it specifies fst archives for inverse text normalization. "
      "If there are multiple archives, they are separated by a comma.");

  po->Register("reset-encoder", &reset_encoder,
               "True to reset encoder_state on an endpoint after empty segment."
               "Done in `Reset()` method, after an endpoint was detected.");
}

bool OnlineRecognizerConfig::Validate() const {
  if (decoding_method == "modified_beam_search") {
    if (max_active_paths <= 0) {
      SHERPA_ONNX_LOGE("max_active_paths must be > 0. Given: %d",
                       max_active_paths);
      return false;
    }
  }

  if (decoding_method == "modified_beam_search" && !lm_config.model.empty()) {
    if (!lm_config.Validate()) {
      return false;
    }
  }

  if (!hotwords_file.empty() && decoding_method != "modified_beam_search") {
    SHERPA_ONNX_LOGE(
        "Please use --decoding-method=modified_beam_search if you"
        " provide --hotwords-file. Given --decoding-method=%s",
        decoding_method.c_str());
    return false;
  }

  if (!ctc_fst_decoder_config.graph.empty() &&
      !ctc_fst_decoder_config.Validate()) {
    SHERPA_ONNX_LOGE("Errors in ctc_fst_decoder_config");
    return false;
  }

  if (!hotwords_file.empty() && !FileExists(hotwords_file)) {
    SHERPA_ONNX_LOGE("--hotwords-file: '%s' does not exist",
                     hotwords_file.c_str());
    return false;
  }

  if (!rule_fsts.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(rule_fsts, ",", false, &files);
    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE("Rule fst '%s' does not exist. ", f.c_str());
        return false;
      }
    }
  }

  if (!rule_fars.empty()) {
    std::vector<std::string> files;
    SplitStringToVector(rule_fars, ",", false, &files);
    for (const auto &f : files) {
      if (!FileExists(f)) {
        SHERPA_ONNX_LOGE("Rule far '%s' does not exist. ", f.c_str());
        return false;
      }
    }
  }

  if (!hr.lexicon.empty() && !hr.rule_fsts.empty() && !hr.Validate()) {
    return false;
  }

  return model_config.Validate();
}

std::string OnlineRecognizerConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineRecognizerConfig(";
  os << "feat_config=" << feat_config.ToString() << ", ";
  os << "model_config=" << model_config.ToString() << ", ";
  os << "lm_config=" << lm_config.ToString() << ", ";
  os << "endpoint_config=" << endpoint_config.ToString() << ", ";
  os << "ctc_fst_decoder_config=" << ctc_fst_decoder_config.ToString() << ", ";
  os << "enable_endpoint=" << (enable_endpoint ? "True" : "False") << ", ";
  os << "max_active_paths=" << max_active_paths << ", ";
  os << "hotwords_score=" << hotwords_score << ", ";
  os << "hotwords_file=\"" << hotwords_file << "\", ";
  os << "decoding_method=\"" << decoding_method << "\", ";
  os << "blank_penalty=" << blank_penalty << ", ";
  os << "temperature_scale=" << temperature_scale << ", ";
  os << "rule_fsts=\"" << rule_fsts << "\", ";
  os << "rule_fars=\"" << rule_fars << "\", ";
  os << "reset_encoder=" << (reset_encoder ? "True" : "False") << ", ";
  os << "hr=" << hr.ToString() << ")";

  return os.str();
}

OnlineRecognizer::OnlineRecognizer(const OnlineRecognizerConfig &config)
    : impl_(OnlineRecognizerImpl::Create(config)) {}

template <typename Manager>
OnlineRecognizer::OnlineRecognizer(Manager *mgr,
                                   const OnlineRecognizerConfig &config)
    : impl_(OnlineRecognizerImpl::Create(mgr, config)) {}

OnlineRecognizer::~OnlineRecognizer() = default;

std::unique_ptr<OnlineStream> OnlineRecognizer::CreateStream() const {
  return impl_->CreateStream();
}

std::unique_ptr<OnlineStream> OnlineRecognizer::CreateStream(
    const std::string &hotwords) const {
  return impl_->CreateStream(hotwords);
}

bool OnlineRecognizer::IsReady(OnlineStream *s) const {
  return impl_->IsReady(s);
}

void OnlineRecognizer::WarmpUpRecognizer(int32_t warmup, int32_t mbs) const {
  if (warmup > 0) {
    impl_->WarmpUpRecognizer(warmup, mbs);
  }
}

void OnlineRecognizer::DecodeStreams(OnlineStream **ss, int32_t n) const {
  impl_->DecodeStreams(ss, n);
}

OnlineRecognizerResult OnlineRecognizer::GetResult(OnlineStream *s) const {
  return impl_->GetResult(s);
}

bool OnlineRecognizer::IsEndpoint(OnlineStream *s) const {
  return impl_->IsEndpoint(s);
}

void OnlineRecognizer::Reset(OnlineStream *s) const { impl_->Reset(s); }

#if __ANDROID_API__ >= 9
template OnlineRecognizer::OnlineRecognizer(
    AAssetManager *mgr, const OnlineRecognizerConfig &config);
#endif

#if __OHOS__
template OnlineRecognizer::OnlineRecognizer(
    NativeResourceManager *mgr, const OnlineRecognizerConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-recognizer.h
================================================
// sherpa-onnx/csrc/online-recognizer.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_H_
#define SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/endpoint.h"
#include "sherpa-onnx/csrc/features.h"
#include "sherpa-onnx/csrc/homophone-replacer.h"
#include "sherpa-onnx/csrc/online-ctc-fst-decoder-config.h"
#include "sherpa-onnx/csrc/online-lm-config.h"
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/online-transducer-model-config.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineRecognizerResult {
  /// Recognition results.
  /// For English, it consists of space separated words.
  /// For Chinese, it consists of Chinese words without spaces.
  /// Example 1: "hello world"
  /// Example 2: "你好世界"
  std::string text;

  /// Decoded results at the token level.
  /// For instance, for BPE-based models it consists of a list of BPE tokens.
  std::vector<std::string> tokens;

  /// timestamps.size() == tokens.size()
  /// timestamps[i] records the time in seconds when tokens[i] is decoded.
  std::vector<float> timestamps;

  std::vector<float> ys_probs;  //< log-prob scores from ASR model
  std::vector<float> lm_probs;  //< log-prob scores from language model
                                //
  /// log-domain scores from "hot-phrase" contextual boosting
  std::vector<float> context_scores;

  std::vector<int32_t> words;

  /// ID of this segment
  /// When an endpoint is detected, it is incremented
  int32_t segment = 0;

  /// Starting time of this segment.
  /// When an endpoint is detected, it will change
  float start_time = 0;

  /// True if the end of this segment is reached, i.e., an endpoint is detected
  /// used only in ./online-websocket-server-impl.cc
  bool is_final = false;

  /// used only in ./online-websocket-server-impl.cc
  /// If it is true, it means the server has processed all received samples
  bool is_eof = false;

  /** Return a json string.
   *
   * The returned string contains:
   *   {
   *     "text": "The recognition result",
   *     "tokens": [x, x, x],
   *     "timestamps": [x, x, x],
   *     "ys_probs": [x, x, x],
   *     "lm_probs": [x, x, x],
   *     "context_scores": [x, x, x],
   *     "segment": x,
   *     "start_time": x,
   *     "is_final": true|false
   *     "is_eof": true|false
   *   }
   */
  std::string AsJsonString() const;
};

struct OnlineRecognizerConfig {
  FeatureExtractorConfig feat_config;
  OnlineModelConfig model_config;
  OnlineLMConfig lm_config;
  EndpointConfig endpoint_config;
  OnlineCtcFstDecoderConfig ctc_fst_decoder_config;

  bool enable_endpoint = true;

  std::string decoding_method = "greedy_search";
  // now support modified_beam_search and greedy_search

  // used only for modified_beam_search
  int32_t max_active_paths = 4;

  /// used only for modified_beam_search
  std::string hotwords_file;
  float hotwords_score = 1.5;

  float blank_penalty = 0.0;

  float temperature_scale = 2.0;

  // If there are multiple rules, they are applied from left to right.
  std::string rule_fsts;

  // If there are multiple FST archives, they are applied from left to right.
  std::string rule_fars;

  // True to reset encoder_state on an endpoint after empty segment.
  // Done in `Reset()` method, after an endpoint was detected,
  // currently only in `OnlineRecognizerTransducerImpl`.
  bool reset_encoder = false;

  HomophoneReplacerConfig hr;

  /// used only for modified_beam_search, if hotwords_buf is non-empty,
  /// the hotwords will be loaded from the buffered string instead of from the
  /// "hotwords_file"
  std::string hotwords_buf;

  OnlineRecognizerConfig() = default;

  OnlineRecognizerConfig(
      const FeatureExtractorConfig &feat_config,
      const OnlineModelConfig &model_config, const OnlineLMConfig &lm_config,
      const EndpointConfig &endpoint_config,
      const OnlineCtcFstDecoderConfig &ctc_fst_decoder_config,
      bool enable_endpoint, const std::string &decoding_method,
      int32_t max_active_paths, const std::string &hotwords_file,
      float hotwords_score, float blank_penalty, float temperature_scale,
      const std::string &rule_fsts, const std::string &rule_fars,
      bool reset_encoder, const HomophoneReplacerConfig &hr,
      const std::string &hotwords_buf = {})
      : feat_config(feat_config),
        model_config(model_config),
        lm_config(lm_config),
        endpoint_config(endpoint_config),
        ctc_fst_decoder_config(ctc_fst_decoder_config),
        enable_endpoint(enable_endpoint),
        decoding_method(decoding_method),
        max_active_paths(max_active_paths),
        hotwords_file(hotwords_file),
        hotwords_score(hotwords_score),
        blank_penalty(blank_penalty),
        temperature_scale(temperature_scale),
        rule_fsts(rule_fsts),
        rule_fars(rule_fars),
        reset_encoder(reset_encoder),
        hr(hr),
        hotwords_buf(hotwords_buf) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

class OnlineRecognizerImpl;

class OnlineRecognizer {
 public:
  explicit OnlineRecognizer(const OnlineRecognizerConfig &config);

  template <typename Manager>
  OnlineRecognizer(Manager *mgr, const OnlineRecognizerConfig &config);

  ~OnlineRecognizer();

  /// Create a stream for decoding.
  std::unique_ptr<OnlineStream> CreateStream() const;

  /** Create a stream for decoding.
   *
   *  @param The hotwords for this string, it might contain several hotwords,
   *         the hotwords are separated by "/". In each of the hotwords, there
   *         are cjkchars or bpes, the bpe/cjkchar are separated by space (" ").
   *         For example, hotwords I LOVE YOU and HELLO WORLD, looks like:
   *
   *         "▁I ▁LOVE ▁YOU/▁HE LL O ▁WORLD"
   */
  std::unique_ptr<OnlineStream> CreateStream(const std::string &hotwords) const;

  /**
   * Return true if the given stream has enough frames for decoding.
   * Return false otherwise
   */
  bool IsReady(OnlineStream *s) const;

  /** Decode a single stream. */
  void DecodeStream(OnlineStream *s) const {
    OnlineStream *ss[1] = {s};
    DecodeStreams(ss, 1);
  }

  /**
   * Warmups up onnxruntime sessions by apply optimization and
   * allocating memory prior
   *
   * @param warmup Number of warmups.
   * @param mbs : max-batch-size Max batch size for the models
   */
  void WarmpUpRecognizer(int32_t warmup, int32_t mbs) const;

  /** Decode multiple streams in parallel
   *
   * @param ss Pointer array containing streams to be decoded.
   * @param n Number of streams in `ss`.
   */
  void DecodeStreams(OnlineStream **ss, int32_t n) const;

  OnlineRecognizerResult GetResult(OnlineStream *s) const;

  // Return true if we detect an endpoint for this stream.
  // Note: If this function returns true, you usually want to
  // invoke Reset(s).
  bool IsEndpoint(OnlineStream *s) const;

  // Clear the state of this stream. If IsEndpoint(s) returns true,
  // after calling this function, IsEndpoint(s) will return false
  void Reset(OnlineStream *s) const;

 private:
  std::unique_ptr<OnlineRecognizerImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_RECOGNIZER_H_


================================================
FILE: sherpa-onnx/csrc/online-rnn-lm.cc
================================================
// sherpa-onnx/csrc/on-rnn-lm.cc
//
// Copyright (c)  2023  Pingfeng Luo
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-rnn-lm.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/lodr-fst.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OnlineRnnLM::Impl {
 public:
  explicit Impl(const OnlineLMConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_{GetSessionOptions(config)},
        allocator_{} {
    Init(config);
  }

  // shallow fusion scoring function
  void ComputeLMScoreSF(float scale, Hypothesis *hyp) {
    if (hyp->nn_lm_states.empty()) {
      auto init_states = GetInitStatesSF();
      hyp->nn_lm_scores.value = std::move(init_states.first);
      hyp->nn_lm_states = Convert(std::move(init_states.second));
      // if LODR enabled, we need to initialize the LODR state
      if (lodr_fst_ != nullptr) {
        hyp->lodr_state = std::make_unique<LodrStateCost>(lodr_fst_.get());
      }
    }

    // get lm score for cur token given the hyp->ys[:-1] and save to lm_log_prob
    const float *nn_lm_scores = hyp->nn_lm_scores.value.GetTensorData<float>();
    hyp->lm_log_prob += nn_lm_scores[hyp->ys.back()] * scale;

    // if LODR enabled, we need to update the LODR state
    if (lodr_fst_ != nullptr) {
      auto next_lodr_state = std::make_unique<LodrStateCost>(
          hyp->lodr_state->ForwardOneStep(hyp->ys.back()));
      // calculate the score of the latest token
      auto score = next_lodr_state->Score() - hyp->lodr_state->Score();
      hyp->lodr_state = std::move(next_lodr_state);
      // apply LODR to hyp score
      hyp->lm_log_prob += score * config_.lodr_scale;
    }

    // get lm scores for next tokens given the hyp->ys[:] and save to
    // nn_lm_scores
    std::array<int64_t, 2> x_shape{1, 1};
    Ort::Value x = Ort::Value::CreateTensor<int64_t>(allocator_, x_shape.data(),
                                                     x_shape.size());
    *x.GetTensorMutableData<int64_t>() = hyp->ys.back();
    auto lm_out = ScoreToken(std::move(x), Convert(hyp->nn_lm_states));
    hyp->nn_lm_scores.value = std::move(lm_out.first);
    hyp->nn_lm_states = Convert(std::move(lm_out.second));
  }

  // classic rescore function
  void ComputeLMScore(float scale, int32_t context_size,
                      std::vector<Hypotheses> *hyps) {
    Ort::AllocatorWithDefaultOptions allocator;

    for (auto &hyp : *hyps) {
      for (auto &h_m : hyp) {
        auto &h = h_m.second;
        auto &ys = h.ys;
        const int32_t token_num_in_chunk =
            ys.size() - context_size - h.cur_scored_pos - 1;

        if (token_num_in_chunk < 1) {
          continue;
        }

        if (h.nn_lm_states.empty()) {
          h.nn_lm_states = Convert(GetInitStates());
        }

        if (token_num_in_chunk >= h.lm_rescore_min_chunk) {
          std::array<int64_t, 2> x_shape{1, token_num_in_chunk};

          Ort::Value x = Ort::Value::CreateTensor<int64_t>(
              allocator, x_shape.data(), x_shape.size());
          int64_t *p_x = x.GetTensorMutableData<int64_t>();
          std::copy(ys.begin() + context_size + h.cur_scored_pos, ys.end() - 1,
                    p_x);

          // streaming forward by NN LM
          auto out =
              ScoreToken(std::move(x), Convert(std::move(h.nn_lm_states)));

          // update NN LM score in hyp
          const float *p_nll = out.first.GetTensorData<float>();
          h.lm_log_prob = -scale * (*p_nll);

          // apply LODR to hyp score
          if (lodr_fst_ != nullptr) {
            // We scale LODR scale with LM scale to replicate Icefall code
            lodr_fst_->ComputeScore(config_.lodr_scale * scale, &h,
                                    context_size);
          }

          // update NN LM states in hyp
          h.nn_lm_states = Convert(std::move(out.second));

          h.cur_scored_pos += token_num_in_chunk;
        }
      }
    }
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> ScoreToken(
      Ort::Value x, std::vector<Ort::Value> states) {
    std::array<Ort::Value, 3> inputs = {std::move(x), std::move(states[0]),
                                        std::move(states[1])};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    std::vector<Ort::Value> next_states;
    next_states.reserve(2);
    next_states.push_back(std::move(out[1]));
    next_states.push_back(std::move(out[2]));

    return {std::move(out[0]), std::move(next_states)};
  }

  // get init states for shallow fusion
  std::pair<Ort::Value, std::vector<Ort::Value>> GetInitStatesSF() {
    std::vector<Ort::Value> ans;
    ans.reserve(init_states_.size());
    for (auto &s : init_states_) {
      ans.emplace_back(View(&s));
    }
    return {View(&init_scores_.value), std::move(ans)};
  }

  // get init states for classic rescore
  std::vector<Ort::Value> GetInitStates() {
    std::vector<Ort::Value> ans;
    ans.reserve(init_states_.size());

    for (const auto &s : init_states_) {
      ans.emplace_back(Clone(allocator_, &s));
    }

    return ans;
  }

 private:
  void Init(const OnlineLMConfig &config) {
    auto buf = ReadFile(config_.model);

    sess_ = std::make_unique<Ort::Session>(env_, buf.data(), buf.size(),
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(rnn_num_layers_, "num_layers");
    SHERPA_ONNX_READ_META_DATA(rnn_hidden_size_, "hidden_size");
    SHERPA_ONNX_READ_META_DATA(sos_id_, "sos_id");

    ComputeInitStates();

    if (!config_.lodr_fst.empty()) {
      lodr_fst_ = std::make_unique<LodrFst>(
          LodrFst(config_.lodr_fst, config_.lodr_backoff_id));
    }
  }

  void ComputeInitStates() {
    constexpr int32_t kBatchSize = 1;
    std::array<int64_t, 3> h_shape{rnn_num_layers_, kBatchSize,
                                   rnn_hidden_size_};
    std::array<int64_t, 3> c_shape{rnn_num_layers_, kBatchSize,
                                   rnn_hidden_size_};
    Ort::Value h = Ort::Value::CreateTensor<float>(allocator_, h_shape.data(),
                                                   h_shape.size());
    Ort::Value c = Ort::Value::CreateTensor<float>(allocator_, c_shape.data(),
                                                   c_shape.size());
    Fill<float>(&h, 0);
    Fill<float>(&c, 0);
    std::array<int64_t, 2> x_shape{1, 1};
    Ort::Value x = Ort::Value::CreateTensor<int64_t>(allocator_, x_shape.data(),
                                                     x_shape.size());
    *x.GetTensorMutableData<int64_t>() = sos_id_;

    std::vector<Ort::Value> states;
    states.push_back(std::move(h));
    states.push_back(std::move(c));
    auto pair = ScoreToken(std::move(x), std::move(states));

    init_scores_.value = std::move(pair.first);  // only used during
                                                 // shallow fusion
    init_states_ = std::move(pair.second);
  }

 private:
  OnlineLMConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  CopyableOrtValue init_scores_;
  std::vector<Ort::Value> init_states_;

  int32_t rnn_num_layers_ = 2;
  int32_t rnn_hidden_size_ = 512;
  int32_t sos_id_ = 1;

  std::unique_ptr<LodrFst> lodr_fst_;
};

OnlineRnnLM::OnlineRnnLM(const OnlineLMConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

OnlineRnnLM::~OnlineRnnLM() = default;

// classic rescore state init
std::vector<Ort::Value> OnlineRnnLM::GetInitStates() {
  return impl_->GetInitStates();
}

// shallow fusion state init
std::pair<Ort::Value, std::vector<Ort::Value>> OnlineRnnLM::GetInitStatesSF() {
  return impl_->GetInitStatesSF();
}

std::pair<Ort::Value, std::vector<Ort::Value>> OnlineRnnLM::ScoreToken(
    Ort::Value x, std::vector<Ort::Value> states) {
  return impl_->ScoreToken(std::move(x), std::move(states));
}

// classic rescore scores
void OnlineRnnLM::ComputeLMScore(float scale, int32_t context_size,
                                 std::vector<Hypotheses> *hyps) {
  return impl_->ComputeLMScore(scale, context_size, hyps);
}

// shallow fusion scores
void OnlineRnnLM::ComputeLMScoreSF(float scale, Hypothesis *hyp) {
  return impl_->ComputeLMScoreSF(scale, hyp);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-rnn-lm.h
================================================
// sherpa-onnx/csrc/online-rnn-lm.h
//
// Copyright (c)  2023  Pingfeng Luo
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_RNN_LM_H_
#define SHERPA_ONNX_CSRC_ONLINE_RNN_LM_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-lm-config.h"
#include "sherpa-onnx/csrc/online-lm.h"

namespace sherpa_onnx {

class OnlineRnnLM : public OnlineLM {
 public:
  ~OnlineRnnLM() override;

  explicit OnlineRnnLM(const OnlineLMConfig &config);

  // init scores for classic rescore
  std::vector<Ort::Value> GetInitStates() override;

  // init scores for shallow fusion
  std::pair<Ort::Value, std::vector<Ort::Value>> GetInitStatesSF() override;

   /** ScoreToken a batch of sentences (shallow fusion).
   *
   * @param x A 2-D tensor of shape (N, L) with data type int64.
   * @param states It contains the states for the LM model
   * @return Return a pair containing
   *          - log_prob of NN LM
   *          - updated states
   *
   */
  std::pair<Ort::Value, std::vector<Ort::Value>> ScoreToken(
      Ort::Value x, std::vector<Ort::Value> states) override;

   /** This function updates hyp.lm_lob_prob of hyps (classic rescore).
   *
   * @param scale LM score
   * @param context_size Context size of the transducer decoder model
   * @param hyps It is changed in-place.
   *
   */
  void ComputeLMScore(float scale, int32_t context_size,
                              std::vector<Hypotheses> *hyps) override;

   /** This function updates lm_lob_prob and nn_lm_scores of hyp (shallow fusion).
   *
   * @param scale LM score
   * @param hyps It is changed in-place.
   *
   */
  void ComputeLMScoreSF(float scale, Hypothesis *hyp) override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_RNN_LM_H_


================================================
FILE: sherpa-onnx/csrc/online-speech-denoiser-dpdfnet-impl.h
================================================
// sherpa-onnx/csrc/online-speech-denoiser-dpdfnet-impl.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_DPDFNET_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_DPDFNET_IMPL_H_

#include <algorithm>
#include <cstdint>
#include <memory>
#include <utility>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model.h"
#include "sherpa-onnx/csrc/online-speech-denoiser-impl.h"
#include "sherpa-onnx/csrc/online-speech-denoiser-stft-impl.h"

namespace sherpa_onnx {

class OnlineSpeechDenoiserDpdfNetImpl : public OnlineSpeechDenoiserImpl {
 public:
  explicit OnlineSpeechDenoiserDpdfNetImpl(
      const OnlineSpeechDenoiserConfig &config)
      : model_(config.model),
        stream_(GetStftConfig(model_.GetMetaData())),
        state_(model_.GetInitState()) {
    Init();
  }

  template <typename Manager>
  OnlineSpeechDenoiserDpdfNetImpl(Manager *mgr,
                                  const OnlineSpeechDenoiserConfig &config)
      : model_(mgr, config.model),
        stream_(GetStftConfig(model_.GetMetaData())),
        state_(model_.GetInitState()) {
    Init();
  }

  DenoisedAudio Run(const float *samples, int32_t n,
                    int32_t sample_rate) override {
    return stream_.Run(samples, n, sample_rate,
                       [this](float *spec, size_t spec_size, float *enhanced) {
                         ProcessFrame(spec, spec_size, enhanced);
                       });
  }

  DenoisedAudio Flush() override {
    return stream_.Flush(
        [this](float *spec, size_t spec_size, float *enhanced) {
          ProcessFrame(spec, spec_size, enhanced);
        },
        [this]() { state_ = model_.GetInitState(); });
  }

  void Reset() override {
    stream_.Reset();
    state_ = model_.GetInitState();
  }

  int32_t GetSampleRate() const override { return stream_.GetSampleRate(); }

  int32_t GetFrameShiftInSamples() const override {
    return stream_.GetFrameShiftInSamples();
  }

 private:
  void Init() {
    const auto &meta = model_.GetMetaData();
    if (meta.profile != "dpdfnet_16khz" &&
        meta.profile != "dpdfnet2_48khz_hr") {
      SHERPA_ONNX_LOGE(
          "Online speech denoiser currently supports only DPDFNet streaming "
          "exports. Given profile: %s",
          meta.profile.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta.spec_shape.size() != 4 || meta.spec_shape[0] != 1 ||
        meta.spec_shape[1] != 1 || meta.spec_shape[3] != 2) {
      SHERPA_ONNX_LOGE(
          "Online speech denoiser expects a single-frame DPDFNet ONNX "
          "signature shaped like [1, 1, F, 2].");
      SHERPA_ONNX_EXIT(-1);
    }
  }

  static OnlineSpeechDenoiserStftConfig GetStftConfig(
      const OfflineSpeechDenoiserDpdfNetModelMetaData &meta) {
    OnlineSpeechDenoiserStftConfig config;
    config.sample_rate = meta.sample_rate;
    config.n_fft = meta.n_fft;
    config.hop_length = meta.hop_length;
    config.window_length = meta.window_length;
    config.window_type = meta.window_type;
    return config;
  }

  void ProcessFrame(float *spec, size_t spec_size, float *enhanced) {
    const auto &meta = model_.GetMetaData();
    const int32_t expected_size = meta.spec_shape[2] * meta.spec_shape[3];
    if (spec_size != static_cast<size_t>(expected_size)) {
      SHERPA_ONNX_LOGE("Unexpected DPDFNet spec size. Expected: %d. Given: %d",
                       expected_size, static_cast<int32_t>(spec_size));
      SHERPA_ONNX_EXIT(-1);
    }

    Ort::Value spec_tensor = Ort::Value::CreateTensor<float>(
        stream_.GetMemoryInfo(), spec, spec_size, meta.spec_shape.data(),
        meta.spec_shape.size());

    auto out = model_.Run(std::move(spec_tensor), std::move(state_));
    state_ = std::move(out.second);

    const float *enhanced_spec = out.first.GetTensorData<float>();
    std::copy(enhanced_spec, enhanced_spec + spec_size, enhanced);
  }

 private:
  OfflineSpeechDenoiserDpdfNetModel model_;
  OnlineSpeechDenoiserStftImpl stream_;
  Ort::Value state_{nullptr};
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_DPDFNET_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-speech-denoiser-gtcrn-impl.h
================================================
// sherpa-onnx/csrc/online-speech-denoiser-gtcrn-impl.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_GTCRN_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_GTCRN_IMPL_H_

#include <algorithm>
#include <array>
#include <cstdint>
#include <memory>
#include <utility>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model.h"
#include "sherpa-onnx/csrc/online-speech-denoiser-impl.h"
#include "sherpa-onnx/csrc/online-speech-denoiser-stft-impl.h"

namespace sherpa_onnx {

class OnlineSpeechDenoiserGtcrnImpl : public OnlineSpeechDenoiserImpl {
 public:
  explicit OnlineSpeechDenoiserGtcrnImpl(
      const OnlineSpeechDenoiserConfig &config)
      : model_(config.model),
        stream_(GetStftConfig(model_.GetMetaData())),
        states_(model_.GetInitStates()) {}

  template <typename Manager>
  OnlineSpeechDenoiserGtcrnImpl(Manager *mgr,
                                const OnlineSpeechDenoiserConfig &config)
      : model_(mgr, config.model),
        stream_(GetStftConfig(model_.GetMetaData())),
        states_(model_.GetInitStates()) {}

  DenoisedAudio Run(const float *samples, int32_t n,
                    int32_t sample_rate) override {
    return stream_.Run(samples, n, sample_rate,
                       [this](float *spec, size_t spec_size, float *enhanced) {
                         ProcessFrame(spec, spec_size, enhanced);
                       });
  }

  DenoisedAudio Flush() override {
    return stream_.Flush(
        [this](float *spec, size_t spec_size, float *enhanced) {
          ProcessFrame(spec, spec_size, enhanced);
        },
        [this]() { states_ = model_.GetInitStates(); });
  }

  void Reset() override {
    stream_.Reset();
    states_ = model_.GetInitStates();
  }

  int32_t GetSampleRate() const override { return stream_.GetSampleRate(); }

  int32_t GetFrameShiftInSamples() const override {
    return stream_.GetFrameShiftInSamples();
  }

 private:
  static OnlineSpeechDenoiserStftConfig GetStftConfig(
      const OfflineSpeechDenoiserGtcrnModelMetaData &meta) {
    OnlineSpeechDenoiserStftConfig config;
    config.sample_rate = meta.sample_rate;
    config.n_fft = meta.n_fft;
    config.hop_length = meta.hop_length;
    config.window_length = meta.window_length;
    config.window_type = meta.window_type;
    return config;
  }

  void ProcessFrame(float *spec, size_t spec_size, float *enhanced) {
    const int32_t num_bins = stream_.GetNumBins();
    const size_t expected_size = static_cast<size_t>(num_bins * 2);
    if (spec_size != expected_size) {
      SHERPA_ONNX_LOGE("Unexpected GTCRN spec size. Expected: %d. Given: %d",
                       num_bins * 2, static_cast<int32_t>(spec_size));
      SHERPA_ONNX_EXIT(-1);
    }

    std::array<int64_t, 4> x_shape{1, num_bins, 1, 2};
    Ort::Value x_tensor = Ort::Value::CreateTensor<float>(
        stream_.GetMemoryInfo(), spec, spec_size, x_shape.data(),
        x_shape.size());

    Ort::Value output{nullptr};
    std::tie(output, states_) =
        model_.Run(std::move(x_tensor), std::move(states_));

    const float *enhanced_spec = output.GetTensorData<float>();
    std::copy(enhanced_spec, enhanced_spec + spec_size, enhanced);
  }

 private:
  OfflineSpeechDenoiserGtcrnModel model_;
  OnlineSpeechDenoiserStftImpl stream_;
  OfflineSpeechDenoiserGtcrnModel::States states_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_GTCRN_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-speech-denoiser-impl.cc
================================================
// sherpa-onnx/csrc/online-speech-denoiser-impl.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-speech-denoiser-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-speech-denoiser-dpdfnet-impl.h"
#include "sherpa-onnx/csrc/online-speech-denoiser-gtcrn-impl.h"

namespace sherpa_onnx {

std::unique_ptr<OnlineSpeechDenoiserImpl> OnlineSpeechDenoiserImpl::Create(
    const OnlineSpeechDenoiserConfig &config) {
  const bool has_gtcrn = !config.model.gtcrn.model.empty();
  const bool has_dpdfnet = !config.model.dpdfnet.model.empty();

  if (has_gtcrn) {
    return std::make_unique<OnlineSpeechDenoiserGtcrnImpl>(config);
  } else if (has_dpdfnet) {
    return std::make_unique<OnlineSpeechDenoiserDpdfNetImpl>(config);
  }

  SHERPA_ONNX_LOGE("Please provide one speech denoising model.");
  return nullptr;
}

template <typename Manager>
std::unique_ptr<OnlineSpeechDenoiserImpl> OnlineSpeechDenoiserImpl::Create(
    Manager *mgr, const OnlineSpeechDenoiserConfig &config) {
  const bool has_gtcrn = !config.model.gtcrn.model.empty();
  const bool has_dpdfnet = !config.model.dpdfnet.model.empty();

  if (has_gtcrn) {
    return std::make_unique<OnlineSpeechDenoiserGtcrnImpl>(mgr, config);
  } else if (has_dpdfnet) {
    return std::make_unique<OnlineSpeechDenoiserDpdfNetImpl>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please provide one speech denoising model.");
  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OnlineSpeechDenoiserImpl>
OnlineSpeechDenoiserImpl::Create(AAssetManager *mgr,
                                 const OnlineSpeechDenoiserConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OnlineSpeechDenoiserImpl>
OnlineSpeechDenoiserImpl::Create(NativeResourceManager *mgr,
                                 const OnlineSpeechDenoiserConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-speech-denoiser-impl.h
================================================
// sherpa-onnx/csrc/online-speech-denoiser-impl.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_IMPL_H_

#include <memory>

#include "sherpa-onnx/csrc/online-speech-denoiser.h"

namespace sherpa_onnx {

class OnlineSpeechDenoiserImpl {
 public:
  virtual ~OnlineSpeechDenoiserImpl() = default;

  static std::unique_ptr<OnlineSpeechDenoiserImpl> Create(
      const OnlineSpeechDenoiserConfig &config);

  template <typename Manager>
  static std::unique_ptr<OnlineSpeechDenoiserImpl> Create(
      Manager *mgr, const OnlineSpeechDenoiserConfig &config);

  virtual DenoisedAudio Run(const float *samples, int32_t n,
                            int32_t sample_rate) = 0;
  virtual DenoisedAudio Flush() = 0;
  virtual void Reset() = 0;
  virtual int32_t GetSampleRate() const = 0;
  virtual int32_t GetFrameShiftInSamples() const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-speech-denoiser-stft-impl.h
================================================
// sherpa-onnx/csrc/online-speech-denoiser-stft-impl.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_STFT_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_STFT_IMPL_H_

#include <algorithm>
#include <cmath>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "kaldi-native-fbank/csrc/feature-window.h"
#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/csrc/resample.h"

namespace sherpa_onnx {

struct OnlineSpeechDenoiserStftConfig {
  int32_t sample_rate = 0;
  int32_t n_fft = 0;
  int32_t hop_length = 0;
  int32_t window_length = 0;
  std::string window_type;
};

inline std::vector<float> MakeOnlineSpeechDenoiserWindow(
    const std::string &window_type, int32_t window_length) {
  if (window_type == "vorbis") {
    return MakeVorbisWindow(window_length);
  }

  if (window_type == "hann_sqrt") {
    auto window = knf::GetWindow("hann", window_length);
    for (auto &w : window) {
      w = std::sqrt(w);
    }
    return window;
  }

  return knf::GetWindow(window_type, window_length);
}

class StreamingDft {
 public:
  explicit StreamingDft(int32_t n_fft)
      : n_fft_(n_fft),
        num_bins_(n_fft / 2 + 1),
        cos_f_(num_bins_ * n_fft_),
        sin_f_(num_bins_ * n_fft_),
        cos_i_(n_fft_ * num_bins_),
        sin_i_(n_fft_ * num_bins_) {
    constexpr double kPi = 3.14159265358979323846;
    for (int32_t k = 0; k < num_bins_; ++k) {
      for (int32_t n = 0; n < n_fft_; ++n) {
        double angle = 2.0 * kPi * k * n / n_fft_;
        double c = std::cos(angle);
        double s = std::sin(angle);

        cos_f_[k * n_fft_ + n] = c;
        sin_f_[k * n_fft_ + n] = s;

        cos_i_[n * num_bins_ + k] = c;
        sin_i_[n * num_bins_ + k] = s;
      }
    }
  }

  void Forward(const float *input, float *output) const {
    for (int32_t k = 0; k != num_bins_; ++k) {
      double real = 0;
      double imag = 0;
      const double *p_cos = cos_f_.data() + k * n_fft_;
      const double *p_sin = sin_f_.data() + k * n_fft_;
      for (int32_t n = 0; n != n_fft_; ++n) {
        double v = input[n];
        real += v * p_cos[n];
        imag -= v * p_sin[n];
      }
      output[2 * k] = static_cast<float>(real);
      output[2 * k + 1] = static_cast<float>(imag);
    }
  }

  void Inverse(const float *input, float *output) const {
    for (int32_t n = 0; n != n_fft_; ++n) {
      double sum = input[0];
      if (n_fft_ % 2 == 0) {
        sum += input[2 * (num_bins_ - 1)] * ((n & 1) ? -1.0 : 1.0);
      }

      const double *p_cos = cos_i_.data() + n * num_bins_;
      const double *p_sin = sin_i_.data() + n * num_bins_;
      for (int32_t k = 1; k != num_bins_ - 1; ++k) {
        double real = input[2 * k];
        double imag = input[2 * k + 1];
        sum += 2.0 * (real * p_cos[k] - imag * p_sin[k]);
      }

      output[n] = static_cast<float>(sum / n_fft_);
    }
  }

 private:
  int32_t n_fft_ = 0;
  int32_t num_bins_ = 0;
  std::vector<double> cos_f_;
  std::vector<double> sin_f_;
  std::vector<double> cos_i_;
  std::vector<double> sin_i_;
};

class OnlineSpeechDenoiserStftImpl {
 public:
  explicit OnlineSpeechDenoiserStftImpl(OnlineSpeechDenoiserStftConfig config)
      : config_(std::move(config)),
        fft_(config_.n_fft),
        memory_info_(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault)),
        window_(
            MakeOnlineSpeechDenoiserWindow(config_.window_type,
                                           config_.window_length)),
        analysis_buffer_(config_.window_length),
        overlap_add_buffer_(config_.window_length),
        fft_input_(config_.window_length),
        fft_output_(2 * (config_.n_fft / 2 + 1)),
        enhanced_fft_output_(2 * (config_.n_fft / 2 + 1)),
        ifft_output_(config_.window_length),
        zero_hop_(config_.hop_length) {}

  template <typename ProcessFrame>
  DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate,
                    ProcessFrame process_frame) {
    if (sample_rate <= 0) {
      SHERPA_ONNX_LOGE("Expected sample_rate > 0. Given: %d", sample_rate);
      SHERPA_ONNX_EXIT(-1);
    }

    if (n < 0) {
      SHERPA_ONNX_LOGE("Expected n >= 0. Given: %d", n);
      SHERPA_ONNX_EXIT(-1);
    }

    if (n == 0) {
      return {{}, config_.sample_rate};
    }

    if (input_sample_rate_ == -1) {
      input_sample_rate_ = sample_rate;
      CreateResamplerIfNeeded();
    } else if (sample_rate != input_sample_rate_) {
      SHERPA_ONNX_LOGE(
          "Streaming denoiser expects a fixed input sample rate. Previous: %d. "
          "Current: %d.",
          input_sample_rate_, sample_rate);
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<float> resampled;
    if (resampler_) {
      resampler_->Resample(samples, n, false, &resampled);
    } else {
      resampled.assign(samples, samples + n);
    }

    total_input_samples_ += resampled.size();
    pending_input_.insert(pending_input_.end(), resampled.begin(),
                          resampled.end());

    DenoisedAudio ans;
    ans.sample_rate = config_.sample_rate;
    ans.samples = ProcessPending(process_frame);
    total_output_samples_ += ans.samples.size();
    return ans;
  }

  template <typename ProcessFrame, typename ResetModelState>
  DenoisedAudio Flush(ProcessFrame process_frame,
                      ResetModelState reset_model_state) {
    DenoisedAudio ans;
    ans.sample_rate = config_.sample_rate;

    std::vector<float> tail;
    if (resampler_) {
      float dummy = 0;
      resampler_->Resample(&dummy, 0, true, &tail);
      total_input_samples_ += tail.size();
      pending_input_.insert(pending_input_.end(), tail.begin(), tail.end());
    }

    ans.samples = ProcessPending(process_frame);

    if (!pending_input_.empty()) {
      std::vector<float> padded(config_.hop_length, 0.0f);
      std::copy(pending_input_.begin(), pending_input_.end(), padded.begin());
      ProcessHop(padded.data(), &ans.samples, process_frame);
      pending_input_.clear();
    }

    if (started_) {
      ProcessHop(zero_hop_.data(), &ans.samples, process_frame);
    }

    int64_t remaining = total_input_samples_ - total_output_samples_;
    if (remaining < 0) {
      remaining = 0;
    }

    if (ans.samples.size() > static_cast<size_t>(remaining)) {
      ans.samples.resize(static_cast<size_t>(remaining));
    }

    total_output_samples_ += ans.samples.size();
    Reset();
    reset_model_state();
    return ans;
  }

  void Reset() {
    std::fill(analysis_buffer_.begin(), analysis_buffer_.end(), 0.0f);
    std::fill(overlap_add_buffer_.begin(), overlap_add_buffer_.end(), 0.0f);
    pending_input_.clear();
    resampler_.reset();
    input_sample_rate_ = -1;
    started_ = false;
    total_input_samples_ = 0;
    total_output_samples_ = 0;
  }

  int32_t GetSampleRate() const { return config_.sample_rate; }

  int32_t GetFrameShiftInSamples() const { return config_.hop_length; }

  const Ort::MemoryInfo &GetMemoryInfo() const { return memory_info_; }

  int32_t GetNumBins() const { return config_.n_fft / 2 + 1; }

 private:
  void CreateResamplerIfNeeded() {
    if (input_sample_rate_ == config_.sample_rate) {
      return;
    }

    SHERPA_ONNX_LOGE(
        "Creating a streaming resampler:\n"
        "   in_sample_rate: %d\n"
        "   output_sample_rate: %d\n",
        input_sample_rate_, config_.sample_rate);

    float min_freq = std::min<int32_t>(input_sample_rate_, config_.sample_rate);
    float lowpass_cutoff = 0.99f * 0.5f * min_freq;
    int32_t lowpass_filter_width = 6;
    resampler_ = std::make_unique<LinearResample>(
        input_sample_rate_, config_.sample_rate, lowpass_cutoff,
        lowpass_filter_width);
  }

  template <typename ProcessFrame>
  std::vector<float> ProcessPending(ProcessFrame process_frame) {
    std::vector<float> ans;

    int32_t consumed = 0;
    while (static_cast<int32_t>(pending_input_.size()) - consumed >=
           config_.hop_length) {
      ProcessHop(pending_input_.data() + consumed, &ans, process_frame);
      consumed += config_.hop_length;
    }

    if (consumed != 0) {
      pending_input_.erase(pending_input_.begin(),
                           pending_input_.begin() + consumed);
    }

    return ans;
  }

  template <typename ProcessFrame>
  void ProcessHop(const float *hop, std::vector<float> *output,
                  ProcessFrame process_frame) {
    std::move(analysis_buffer_.begin() + config_.hop_length,
              analysis_buffer_.end(), analysis_buffer_.begin());
    std::copy(hop, hop + config_.hop_length,
              analysis_buffer_.end() - config_.hop_length);

    for (int32_t i = 0; i != config_.window_length; ++i) {
      fft_input_[i] = analysis_buffer_[i] * window_[i];
    }

    fft_.Forward(fft_input_.data(), fft_output_.data());
    process_frame(fft_output_.data(), fft_output_.size(),
                  enhanced_fft_output_.data());
    fft_.Inverse(enhanced_fft_output_.data(), ifft_output_.data());

    std::move(overlap_add_buffer_.begin() + config_.hop_length,
              overlap_add_buffer_.end(), overlap_add_buffer_.begin());
    std::fill(overlap_add_buffer_.end() - config_.hop_length,
              overlap_add_buffer_.end(), 0.0f);

    for (int32_t i = 0; i != config_.window_length; ++i) {
      overlap_add_buffer_[i] += ifft_output_[i] * window_[i];
    }

    if (!started_) {
      started_ = true;
      return;
    }

    output->insert(output->end(), overlap_add_buffer_.begin(),
                   overlap_add_buffer_.begin() + config_.hop_length);
  }

 private:
  OnlineSpeechDenoiserStftConfig config_;
  StreamingDft fft_;
  Ort::MemoryInfo memory_info_;

  std::vector<float> window_;
  std::vector<float> analysis_buffer_;
  std::vector<float> overlap_add_buffer_;
  std::vector<float> pending_input_;
  std::vector<float> fft_input_;
  std::vector<float> fft_output_;
  std::vector<float> enhanced_fft_output_;
  std::vector<float> ifft_output_;
  std::vector<float> zero_hop_;
  std::unique_ptr<LinearResample> resampler_;

  int32_t input_sample_rate_ = -1;
  bool started_ = false;
  int64_t total_input_samples_ = 0;
  int64_t total_output_samples_ = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_STFT_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-speech-denoiser.cc
================================================
// sherpa-onnx/csrc/online-speech-denoiser.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-speech-denoiser.h"

#include <memory>
#include <sstream>
#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/online-speech-denoiser-impl.h"

namespace sherpa_onnx {

void OnlineSpeechDenoiserConfig::Register(ParseOptions *po) {
  model.Register(po);
}

bool OnlineSpeechDenoiserConfig::Validate() const { return model.Validate(); }

std::string OnlineSpeechDenoiserConfig::ToString() const {
  std::ostringstream os;
  os << "OnlineSpeechDenoiserConfig(";
  os << "model=" << model.ToString() << ")";
  return os.str();
}

template <typename Manager>
OnlineSpeechDenoiser::OnlineSpeechDenoiser(
    Manager *mgr, const OnlineSpeechDenoiserConfig &config)
    : impl_(OnlineSpeechDenoiserImpl::Create(mgr, config)) {}

OnlineSpeechDenoiser::OnlineSpeechDenoiser(
    const OnlineSpeechDenoiserConfig &config)
    : impl_(OnlineSpeechDenoiserImpl::Create(config)) {}

OnlineSpeechDenoiser::~OnlineSpeechDenoiser() = default;

DenoisedAudio OnlineSpeechDenoiser::Run(const float *samples, int32_t n,
                                        int32_t sample_rate) {
  return impl_->Run(samples, n, sample_rate);
}

DenoisedAudio OnlineSpeechDenoiser::Flush() { return impl_->Flush(); }

void OnlineSpeechDenoiser::Reset() { impl_->Reset(); }

int32_t OnlineSpeechDenoiser::GetSampleRate() const {
  return impl_->GetSampleRate();
}

int32_t OnlineSpeechDenoiser::GetFrameShiftInSamples() const {
  return impl_->GetFrameShiftInSamples();
}

#if __ANDROID_API__ >= 9
template OnlineSpeechDenoiser::OnlineSpeechDenoiser(
    AAssetManager *mgr, const OnlineSpeechDenoiserConfig &config);
#endif

#if __OHOS__
template OnlineSpeechDenoiser::OnlineSpeechDenoiser(
    NativeResourceManager *mgr, const OnlineSpeechDenoiserConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-speech-denoiser.h
================================================
// sherpa-onnx/csrc/online-speech-denoiser.h
//
// Copyright (c)  2026  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_H_
#define SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_H_

#include <memory>
#include <string>

#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

class OnlineSpeechDenoiserImpl;

struct OnlineSpeechDenoiserConfig {
  OfflineSpeechDenoiserModelConfig model;

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

class OnlineSpeechDenoiser {
 public:
  explicit OnlineSpeechDenoiser(const OnlineSpeechDenoiserConfig &config);
  ~OnlineSpeechDenoiser();

  template <typename Manager>
  OnlineSpeechDenoiser(Manager *mgr, const OnlineSpeechDenoiserConfig &config);

  /*
   * Process one chunk of streaming audio and return the enhanced samples
   * currently available. Internally this keeps model and overlap-add state
   * across calls.
   */
  DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate);

  /*
   * Flush any buffered audio and reset the denoiser to an empty state so it
   * can be reused for a new stream.
   */
  DenoisedAudio Flush();

  void Reset();

  int32_t GetSampleRate() const;
  int32_t GetFrameShiftInSamples() const;

 private:
  std::unique_ptr<OnlineSpeechDenoiserImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_SPEECH_DENOISER_H_


================================================
FILE: sherpa-onnx/csrc/online-stream.cc
================================================
// sherpa-onnx/csrc/online-stream.cc
//
// Copyright (c)  2023  Xiaomi Corporation
#include "sherpa-onnx/csrc/online-stream.h"

#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/features.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transducer-keyword-decoder.h"

namespace sherpa_onnx {

class OnlineStream::Impl {
 public:
  explicit Impl(const FeatureExtractorConfig &config,
                ContextGraphPtr context_graph)
      : feat_extractor_(config), context_graph_(std::move(context_graph)) {}

  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
    std::lock_guard<std::mutex> lock(mutex_);
    feat_extractor_.AcceptWaveform(sampling_rate, waveform, n);
  }

  void InputFinished() const {
    std::lock_guard<std::mutex> lock(mutex_);
    feat_extractor_.InputFinished();
  }

  int32_t NumFramesReady() const {
    std::lock_guard<std::mutex> lock(mutex_);
    return feat_extractor_.NumFramesReady() - start_frame_index_;
  }

  bool IsLastFrame(int32_t frame) const {
    std::lock_guard<std::mutex> lock(mutex_);
    return feat_extractor_.IsLastFrame(frame);
  }

  std::vector<float> GetFrames(int32_t frame_index, int32_t n) const {
    std::lock_guard<std::mutex> lock(mutex_);
    return feat_extractor_.GetFrames(frame_index + start_frame_index_, n);
  }

  void Reset() {
    std::lock_guard<std::mutex> lock(mutex_);
    // we don't reset the feature extractor
    start_frame_index_ += num_processed_frames_;
    num_processed_frames_ = 0;
  }

  int32_t &GetNumProcessedFrames() {
    std::lock_guard<std::mutex> lock(mutex_);
    return num_processed_frames_;
  }

  int32_t GetNumFramesSinceStart() const {
    std::lock_guard<std::mutex> lock(mutex_);
    return start_frame_index_;
  }

  int32_t &GetCurrentSegment() {
    std::lock_guard<std::mutex> lock(mutex_);
    return segment_;
  }

  void SetResult(const OnlineTransducerDecoderResult &r) { result_ = r; }

  OnlineTransducerDecoderResult &GetResult() { return result_; }

  void SetKeywordResult(const TransducerKeywordResult &r) {
    keyword_result_ = r;
  }
  TransducerKeywordResult &GetKeywordResult(bool remove_duplicates) {
    if (remove_duplicates) {
      if (!prev_keyword_result_.timestamps.empty() &&
          !keyword_result_.timestamps.empty() &&
          keyword_result_.timestamps[0] <=
              prev_keyword_result_.timestamps.back()) {
        return empty_keyword_result_;
      } else {
        prev_keyword_result_ = keyword_result_;
      }
      return keyword_result_;
    } else {
      return keyword_result_;
    }
  }

  OnlineCtcDecoderResult &GetCtcResult() { return ctc_result_; }

  void SetCtcResult(const OnlineCtcDecoderResult &r) { ctc_result_ = r; }

  void SetParaformerResult(const OnlineParaformerDecoderResult &r) {
    paraformer_result_ = r;
  }

  OnlineParaformerDecoderResult &GetParaformerResult() {
    return paraformer_result_;
  }

  int32_t FeatureDim() const { return feat_extractor_.FeatureDim(); }

  void SetStates(std::vector<Ort::Value> states) {
    states_ = std::move(states);
  }

  std::vector<Ort::Value> &GetStates() { return states_; }

  void SetNeMoDecoderStates(std::vector<Ort::Value> decoder_states) {
    decoder_states_ = std::move(decoder_states);
  }

  std::vector<Ort::Value> &GetNeMoDecoderStates() { return decoder_states_; }

  const ContextGraphPtr &GetContextGraph() const { return context_graph_; }

  std::vector<float> &GetParaformerFeatCache() {
    return paraformer_feat_cache_;
  }

  std::vector<float> &GetParaformerEncoderOutCache() {
    return paraformer_encoder_out_cache_;
  }

  std::vector<float> &GetParaformerAlphaCache() {
    return paraformer_alpha_cache_;
  }

  void SetOption(const std::string &key, const std::string &value) {
    options_[key] = value;
  }

  bool HasOption(const std::string &key) const {
    return options_.count(key) != 0;
  }

  const std::string &GetOption(const std::string &key) const {
    auto it = options_.find(key);
    if (it != options_.end()) {
      return it->second;
    }
    static const std::string kEmpty;
    return kEmpty;
  }

  int32_t GetOptionInt(const std::string &key, int32_t default_value) const {
    auto it = options_.find(key);
    if (it != options_.end()) {
      return ToIntOrDefault(it->second, default_value);
    }
    return default_value;
  }

  float GetOptionFloat(const std::string &key, float default_value) const {
    auto it = options_.find(key);
    if (it != options_.end()) {
      return ToFloatOrDefault(it->second, default_value);
    }
    return default_value;
  }

  void SetFasterDecoder(std::unique_ptr<kaldi_decoder::FasterDecoder> decoder) {
    faster_decoder_ = std::move(decoder);
  }

  kaldi_decoder::FasterDecoder *GetFasterDecoder() const {
    return faster_decoder_.get();
  }

  int32_t &GetFasterDecoderProcessedFrames() {
    return faster_decoder_processed_frames_;
  }

 private:
  FeatureExtractor feat_extractor_;
  mutable std::mutex mutex_;
  /// For contextual-biasing
  ContextGraphPtr context_graph_;
  int32_t num_processed_frames_ = 0;  // before subsampling
  int32_t start_frame_index_ = 0;     // never reset
  int32_t segment_ = 0;
  OnlineTransducerDecoderResult result_;
  TransducerKeywordResult prev_keyword_result_;
  TransducerKeywordResult keyword_result_;
  TransducerKeywordResult empty_keyword_result_;
  OnlineCtcDecoderResult ctc_result_;
  std::vector<Ort::Value> states_;  // states for transducer or ctc models
  std::vector<Ort::Value> decoder_states_;  // states for nemo transducer models
  std::vector<float> paraformer_feat_cache_;
  std::vector<float> paraformer_encoder_out_cache_;
  std::vector<float> paraformer_alpha_cache_;
  OnlineParaformerDecoderResult paraformer_result_;
  std::unordered_map<std::string, std::string> options_;
  std::unique_ptr<kaldi_decoder::FasterDecoder> faster_decoder_;
  int32_t faster_decoder_processed_frames_ = 0;
};

OnlineStream::OnlineStream(const FeatureExtractorConfig &config /*= {}*/,
                           ContextGraphPtr context_graph /*= nullptr */)
    : impl_(std::make_unique<Impl>(config, std::move(context_graph))) {}

OnlineStream::~OnlineStream() = default;

void OnlineStream::AcceptWaveform(int32_t sampling_rate, const float *waveform,
                                  int32_t n) const {
  impl_->AcceptWaveform(sampling_rate, waveform, n);
}

void OnlineStream::InputFinished() const { impl_->InputFinished(); }

int32_t OnlineStream::NumFramesReady() const { return impl_->NumFramesReady(); }

bool OnlineStream::IsLastFrame(int32_t frame) const {
  return impl_->IsLastFrame(frame);
}

std::vector<float> OnlineStream::GetFrames(int32_t frame_index,
                                           int32_t n) const {
  return impl_->GetFrames(frame_index, n);
}

void OnlineStream::Reset() { impl_->Reset(); }

int32_t OnlineStream::FeatureDim() const { return impl_->FeatureDim(); }

int32_t &OnlineStream::GetNumProcessedFrames() {
  return impl_->GetNumProcessedFrames();
}

int32_t OnlineStream::GetNumFramesSinceStart() const {
  return impl_->GetNumFramesSinceStart();
}

int32_t &OnlineStream::GetCurrentSegment() {
  return impl_->GetCurrentSegment();
}

void OnlineStream::SetResult(const OnlineTransducerDecoderResult &r) {
  impl_->SetResult(r);
}

OnlineTransducerDecoderResult &OnlineStream::GetResult() {
  return impl_->GetResult();
}

void OnlineStream::SetKeywordResult(const TransducerKeywordResult &r) {
  impl_->SetKeywordResult(r);
}

TransducerKeywordResult &OnlineStream::GetKeywordResult(
    bool remove_duplicates /*=false*/) {
  return impl_->GetKeywordResult(remove_duplicates);
}

OnlineCtcDecoderResult &OnlineStream::GetCtcResult() {
  return impl_->GetCtcResult();
}

void OnlineStream::SetCtcResult(const OnlineCtcDecoderResult &r) {
  impl_->SetCtcResult(r);
}

void OnlineStream::SetParaformerResult(const OnlineParaformerDecoderResult &r) {
  impl_->SetParaformerResult(r);
}

OnlineParaformerDecoderResult &OnlineStream::GetParaformerResult() {
  return impl_->GetParaformerResult();
}

void OnlineStream::SetStates(std::vector<Ort::Value> states) {
  impl_->SetStates(std::move(states));
}

std::vector<Ort::Value> &OnlineStream::GetStates() {
  return impl_->GetStates();
}

void OnlineStream::SetNeMoDecoderStates(
    std::vector<Ort::Value> decoder_states) {
  return impl_->SetNeMoDecoderStates(std::move(decoder_states));
}

std::vector<Ort::Value> &OnlineStream::GetNeMoDecoderStates() {
  return impl_->GetNeMoDecoderStates();
}

const ContextGraphPtr &OnlineStream::GetContextGraph() const {
  return impl_->GetContextGraph();
}

void OnlineStream::SetFasterDecoder(
    std::unique_ptr<kaldi_decoder::FasterDecoder> decoder) {
  impl_->SetFasterDecoder(std::move(decoder));
}

kaldi_decoder::FasterDecoder *OnlineStream::GetFasterDecoder() const {
  return impl_->GetFasterDecoder();
}

int32_t &OnlineStream::GetFasterDecoderProcessedFrames() {
  return impl_->GetFasterDecoderProcessedFrames();
}

std::vector<float> &OnlineStream::GetParaformerFeatCache() {
  return impl_->GetParaformerFeatCache();
}

std::vector<float> &OnlineStream::GetParaformerEncoderOutCache() {
  return impl_->GetParaformerEncoderOutCache();
}

std::vector<float> &OnlineStream::GetParaformerAlphaCache() {
  return impl_->GetParaformerAlphaCache();
}

void OnlineStream::SetOption(const std::string &key,
                             const std::string &value) {
  impl_->SetOption(key, value);
}

bool OnlineStream::HasOption(const std::string &key) const {
  return impl_->HasOption(key);
}

const std::string &OnlineStream::GetOption(const std::string &key) const {
  return impl_->GetOption(key);
}

int32_t OnlineStream::GetOptionInt(const std::string &key,
                                   int32_t default_value) const {
  return impl_->GetOptionInt(key, default_value);
}

float OnlineStream::GetOptionFloat(const std::string &key,
                                   float default_value) const {
  return impl_->GetOptionFloat(key, default_value);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-stream.h
================================================
// sherpa-onnx/csrc/online-stream.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_STREAM_H_
#define SHERPA_ONNX_CSRC_ONLINE_STREAM_H_

#include <memory>
#include <string>
#include <vector>

#include "kaldi-decoder/csrc/faster-decoder.h"
#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/features.h"
#include "sherpa-onnx/csrc/online-ctc-decoder.h"
#include "sherpa-onnx/csrc/online-paraformer-decoder.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"

namespace sherpa_onnx {

struct TransducerKeywordResult;
class OnlineStream {
 public:
  explicit OnlineStream(const FeatureExtractorConfig &config = {},
                        ContextGraphPtr context_graph = nullptr);

  virtual ~OnlineStream();

  /**
     @param sampling_rate The sampling_rate of the input waveform. If it does
                          not equal to  config.sampling_rate, we will do
                          resampling inside.
     @param waveform Pointer to a 1-D array of size n. It must be normalized to
                     the range [-1, 1].
     @param n Number of entries in waveform
   */
  void AcceptWaveform(int32_t sampling_rate, const float *waveform,
                      int32_t n) const;

  /**
   * InputFinished() tells the class you won't be providing any
   * more waveform.  This will help flush out the last frame or two
   * of features, in the case where snip-edges == false; it also
   * affects the return value of IsLastFrame().
   */
  void InputFinished() const;

  int32_t NumFramesReady() const;

  /** Note: IsLastFrame() will only ever return true if you have called
   * InputFinished() (and this frame is the last frame).
   */
  bool IsLastFrame(int32_t frame) const;

  /** Get n frames starting from the given frame index.
   *
   * @param frame_index  The starting frame index
   * @param n  Number of frames to get.
   * @return Return a 2-D tensor of shape (n, feature_dim).
   *         which is flattened into a 1-D vector (flattened in row major)
   */
  std::vector<float> GetFrames(int32_t frame_index, int32_t n) const;

  void Reset();

  int32_t FeatureDim() const;

  // Return a reference to the number of processed frames so far
  // before subsampling..
  // Initially, it is 0. It is always less than NumFramesReady().
  //
  // The returned reference is valid as long as this object is alive.
  int32_t &GetNumProcessedFrames();  // It's reset after calling Reset()

  int32_t GetNumFramesSinceStart() const;

  int32_t &GetCurrentSegment();

  void SetResult(const OnlineTransducerDecoderResult &r);
  OnlineTransducerDecoderResult &GetResult();

  void SetKeywordResult(const TransducerKeywordResult &r);
  TransducerKeywordResult &GetKeywordResult(bool remove_duplicates = false);

  void SetCtcResult(const OnlineCtcDecoderResult &r);
  OnlineCtcDecoderResult &GetCtcResult();

  void SetParaformerResult(const OnlineParaformerDecoderResult &r);
  OnlineParaformerDecoderResult &GetParaformerResult();

  void SetStates(std::vector<Ort::Value> states);
  std::vector<Ort::Value> &GetStates();

  void SetNeMoDecoderStates(std::vector<Ort::Value> decoder_states);
  std::vector<Ort::Value> &GetNeMoDecoderStates();

  /**
   * Get the context graph corresponding to this stream.
   *
   * @return Return the context graph for this stream.
   */
  const ContextGraphPtr &GetContextGraph() const;

  // for online ctc decoder
  void SetFasterDecoder(std::unique_ptr<kaldi_decoder::FasterDecoder> decoder);
  kaldi_decoder::FasterDecoder *GetFasterDecoder() const;
  int32_t &GetFasterDecoderProcessedFrames();

  // for streaming paraformer
  std::vector<float> &GetParaformerFeatCache();
  std::vector<float> &GetParaformerEncoderOutCache();
  std::vector<float> &GetParaformerAlphaCache();

  // Generic per-stream option mechanism (key-value string pairs).
  void SetOption(const std::string &key, const std::string &value);
  bool HasOption(const std::string &key) const;

  // Returns the value for the given key, or an empty string if the key
  // does not exist. No exception is thrown for missing keys.
  const std::string &GetOption(const std::string &key) const;
  int32_t GetOptionInt(const std::string &key,
                       int32_t default_value = 0) const;
  float GetOptionFloat(const std::string &key,
                       float default_value = 0.0f) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_STREAM_H_


================================================
FILE: sherpa-onnx/csrc/online-t-one-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/online-t-one-ctc-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-t-one-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlineToneCtcModelConfig::Register(ParseOptions *po) {
  po->Register("t-one-ctc-model", &model,
               "Path to CTC model.onnx from T-one. Please see "
               "https://github.com/k2-fsa/sherpa-onnx/pull/2571");
}

bool OnlineToneCtcModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("T-one CTC model '%s' does not exist", model.c_str());
    return false;
  }

  return true;
}

std::string OnlineToneCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineToneCtcModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-t-one-ctc-model-config.h
================================================
// sherpa-onnx/csrc/online-t-one-ctc-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_T_ONE_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_T_ONE_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineToneCtcModelConfig {
  std::string model;

  OnlineToneCtcModelConfig() = default;

  explicit OnlineToneCtcModelConfig(const std::string &model) : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_T_ONE_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-t-one-ctc-model.cc
================================================
// sherpa-onnx/csrc/online-t-one-ctc-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-t-one-ctc-model.h"

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

class OnlineToneCtcModel::Impl {
 public:
  explicit Impl(const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.t_one_ctc.model);
      Init(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.t_one_ctc.model);
      Init(buf.data(), buf.size());
    }
  }

  std::vector<Ort::Value> Forward(Ort::Value x,
                                  std::vector<Ort::Value> states) {
    // shape0 is (batch_size, 1, num_samples)
    auto shape0 = x.GetTensorTypeAndShapeInfo().GetShape();
    std::array<int64_t, 3> shape = {shape0[0], shape0[2], shape0[1]};
    std::vector<int32_t> samples(shape[0] * shape[1] * shape[2]);
    const float *px = x.GetTensorData<float>();

    for (int32_t i = 0; i < samples.size(); ++i) {
      float f = px[i];
      f = f > 1 ? 1 : f;
      f = f < -1 ? -1 : f;
      samples[i] = static_cast<int32_t>(f * 32767);
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    Ort::Value xx =
        Ort::Value::CreateTensor(memory_info, samples.data(), samples.size(),
                                 shape.data(), shape.size());

    std::array<Ort::Value, 2> inputs = {std::move(xx), std::move(states[0])};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());
    // out[0]: log_probs
    // out[1] next_states

    return out;
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t ChunkLength() const { return 1; }

  int32_t ChunkShift() const { return 1; }

  OrtAllocator *Allocator() { return allocator_; }

  // Return a vector containing 1 tensor
  // - state_
  std::vector<Ort::Value> GetInitStates() {
    std::vector<Ort::Value> ans;
    ans.push_back(View(&state_));

    return ans;
  }

  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) {
    int32_t batch_size = static_cast<int32_t>(states.size());
    if (batch_size == 1) {
      return std::move(states[0]);
    }

    std::vector<Ort::Value> ans;
    ans.reserve(1);

    std::vector<const Ort::Value *> buf;
    buf.reserve(batch_size);

    for (int32_t b = 0; b != batch_size; ++b) {
      buf.push_back(&states[b][0]);
    }

    Ort::Value c{nullptr};
    c = CatFloat16(allocator_, buf, 0);

    ans.push_back(std::move(c));

    return ans;
  }

  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) const {
    auto allocator = const_cast<Impl *>(this)->allocator_;

    std::vector<std::vector<Ort::Value>> ans;

    auto shape = states[0].GetTensorTypeAndShapeInfo().GetShape();
    int32_t batch_size = shape[0];
    ans.resize(batch_size);

    if (batch_size == 1) {
      ans[0] = std::move(states);
      return ans;
    }

    std::vector<Ort::Value> v;
    v = UnbindFloat16(allocator, &states[0], 0);

    for (int32_t b = 0; b != batch_size; ++b) {
      ans[b].push_back(std::move(v[b]));
    }

    return ans;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(frame_length_ms_, "frame_length_ms");
    SHERPA_ONNX_READ_META_DATA(state_dim_, "state_dim");
    SHERPA_ONNX_READ_META_DATA(sample_rate_, "sample_rate");

    InitStates();

    vocab_size_ = sess_->GetOutputTypeInfo(0)
                      .GetTensorTypeAndShapeInfo()
                      .GetShape()
                      .back();
  }

  void InitStates() {
    std::array<int64_t, 2> state_shape{1, state_dim_};

    state_ = Ort::Value::CreateTensor(allocator_, state_shape.data(),
                                      state_shape.size(),
                                      ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);

    auto p = state_.GetTensorMutableData<uint16_t>();
    std::fill(p, p + state_dim_, 0);
  }

 private:
  OnlineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  // One input frame is of  length is 300ms
  // For each input frame, there are 10 output frames,
  // so each output frame is 30ms
  int32_t frame_length_ms_ = 0;
  int32_t state_dim_ = 0;
  int32_t sample_rate_ = 0;
  int32_t vocab_size_ = 0;

  Ort::Value state_{nullptr};
};

OnlineToneCtcModel::OnlineToneCtcModel(const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineToneCtcModel::OnlineToneCtcModel(Manager *mgr,
                                       const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OnlineToneCtcModel::~OnlineToneCtcModel() = default;

std::vector<Ort::Value> OnlineToneCtcModel::Forward(
    Ort::Value x, std::vector<Ort::Value> states) const {
  return impl_->Forward(std::move(x), std::move(states));
}

int32_t OnlineToneCtcModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OnlineToneCtcModel::ChunkLength() const { return impl_->ChunkLength(); }

int32_t OnlineToneCtcModel::ChunkShift() const { return impl_->ChunkShift(); }

OrtAllocator *OnlineToneCtcModel::Allocator() const {
  return impl_->Allocator();
}

std::vector<Ort::Value> OnlineToneCtcModel::GetInitStates() const {
  return impl_->GetInitStates();
}

std::vector<Ort::Value> OnlineToneCtcModel::StackStates(
    std::vector<std::vector<Ort::Value>> states) const {
  return impl_->StackStates(std::move(states));
}

std::vector<std::vector<Ort::Value>> OnlineToneCtcModel::UnStackStates(
    std::vector<Ort::Value> states) const {
  return impl_->UnStackStates(std::move(states));
}

#if __ANDROID_API__ >= 9
template OnlineToneCtcModel::OnlineToneCtcModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineToneCtcModel::OnlineToneCtcModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-t-one-ctc-model.h
================================================
// sherpa-onnx/csrc/online-t-one-ctc-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_T_ONE_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_T_ONE_CTC_MODEL_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-ctc-model.h"
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

class OnlineToneCtcModel : public OnlineCtcModel {
 public:
  explicit OnlineToneCtcModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineToneCtcModel(Manager *mgr, const OnlineModelConfig &config);

  ~OnlineToneCtcModel() override;

  // A list of 1 tensor:
  //   - (batch_size, state_dim)
  std::vector<Ort::Value> GetInitStates() const override;

  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) const override;

  /**
   *
   * @param x A 3-D tensor of shape (batch_size, num_samples).
   * @param states  It is from GetInitStates() or returned from this method.
   *
   * @return Return a list of tensors
   *    - ans[0] contains log_probs, of shape (N, T, C)
   *    - ans[1:] contains next_states
   */
  std::vector<Ort::Value> Forward(
      Ort::Value x, std::vector<Ort::Value> states) const override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  // The model accepts this number of frames before subsampling as input
  int32_t ChunkLength() const override;

  // Similar to frame_shift in feature extractor, after processing
  // ChunkLength() frames, we advance by ChunkShift() frames
  // before we process the next chunk.
  int32_t ChunkShift() const override;

  bool SupportBatchProcessing() const override { return true; }

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_T_ONE_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-transducer-decoder.cc
================================================
// sherpa-onnx/csrc/online-transducer-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-transducer-decoder.h"

#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

OnlineTransducerDecoderResult::OnlineTransducerDecoderResult(
    const OnlineTransducerDecoderResult &other)
    : OnlineTransducerDecoderResult() {
  *this = other;
}

OnlineTransducerDecoderResult &OnlineTransducerDecoderResult::operator=(
    const OnlineTransducerDecoderResult &other) {
  if (this == &other) {
    return *this;
  }

  tokens = other.tokens;
  num_trailing_blanks = other.num_trailing_blanks;

  Ort::AllocatorWithDefaultOptions allocator;
  if (other.decoder_out) {
    decoder_out = Clone(allocator, &other.decoder_out);
  }

  hyps = other.hyps;

  frame_offset = other.frame_offset;
  timestamps = other.timestamps;

  ys_probs = other.ys_probs;
  lm_probs = other.lm_probs;
  context_scores = other.context_scores;

  return *this;
}

OnlineTransducerDecoderResult::OnlineTransducerDecoderResult(
    OnlineTransducerDecoderResult &&other) noexcept
    : OnlineTransducerDecoderResult() {
  *this = std::move(other);
}

OnlineTransducerDecoderResult &OnlineTransducerDecoderResult::operator=(
    OnlineTransducerDecoderResult &&other) noexcept {
  if (this == &other) {
    return *this;
  }

  tokens = std::move(other.tokens);
  num_trailing_blanks = other.num_trailing_blanks;
  decoder_out = std::move(other.decoder_out);
  hyps = std::move(other.hyps);

  frame_offset = other.frame_offset;
  timestamps = std::move(other.timestamps);

  ys_probs = std::move(other.ys_probs);
  lm_probs = std::move(other.lm_probs);
  context_scores = std::move(other.context_scores);

  return *this;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-transducer-decoder.h
================================================
// sherpa-onnx/csrc/online-transducer-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

struct OnlineTransducerDecoderResult {
  /// Number of frames after subsampling we have decoded so far
  int32_t frame_offset = 0;

  /// The decoded token IDs so far
  std::vector<int64_t> tokens;

  /// number of trailing blank frames decoded so far
  int32_t num_trailing_blanks = 0;

  /// timestamps[i] contains the output frame index where tokens[i] is decoded.
  std::vector<int32_t> timestamps;

  std::vector<float> ys_probs;
  std::vector<float> lm_probs;
  std::vector<float> context_scores;

  // Cache decoder_out for endpointing
  Ort::Value decoder_out;

  // used only in modified beam_search
  Hypotheses hyps;

  OnlineTransducerDecoderResult()
      : tokens{}, num_trailing_blanks(0), decoder_out{nullptr}, hyps{} {}

  OnlineTransducerDecoderResult(const OnlineTransducerDecoderResult &other);

  OnlineTransducerDecoderResult &operator=(
      const OnlineTransducerDecoderResult &other);

  OnlineTransducerDecoderResult(OnlineTransducerDecoderResult &&other) noexcept;

  OnlineTransducerDecoderResult &operator=(
      OnlineTransducerDecoderResult &&other) noexcept;
};

class OnlineStream;
class OnlineTransducerDecoder {
 public:
  virtual ~OnlineTransducerDecoder() = default;

  /* Return an empty result.
   *
   * To simplify the decoding code, we add `context_size` blanks
   * to the beginning of the decoding result, which will be
   * stripped by calling `StripPrecedingBlanks()`.
   */
  virtual OnlineTransducerDecoderResult GetEmptyResult() const = 0;

  /** Strip blanks added by `GetEmptyResult()`.
   *
   * @param r It is changed in-place.
   */
  virtual void StripLeadingBlanks(OnlineTransducerDecoderResult * /*r*/) const {
  }

  /** Run transducer beam search given the output from the encoder model.
   *
   * @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)
   * @param result  It is modified in-place.
   *
   * @note There is no need to pass encoder_out_length here since for the
   * online decoding case, each utterance has the same number of frames
   * and there are no paddings.
   */
  virtual void Decode(Ort::Value encoder_out,
                      std::vector<OnlineTransducerDecoderResult> *result) = 0;

  /** Run transducer beam search given the output from the encoder model.
   *
   * Note: Currently this interface is for contextual-biasing feature which
   *       needs a ContextGraph owned by the OnlineStream.
   *
   * @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)
   * @param ss  A list of OnlineStreams.
   * @param result  It is modified in-place.
   *
   * @note There is no need to pass encoder_out_length here since for the
   * online decoding case, each utterance has the same number of frames
   * and there are no paddings.
   */
  virtual void Decode(Ort::Value /*encoder_out*/, OnlineStream ** /*ss*/,
                      std::vector<OnlineTransducerDecoderResult> * /*result*/) {
    SHERPA_ONNX_LOGE(
        "This interface is for OnlineTransducerModifiedBeamSearchDecoder.");
    exit(-1);
  }

  // used for endpointing. We need to keep decoder_out after reset
  virtual void UpdateDecoderOut(OnlineTransducerDecoderResult * /*result*/) {}
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/online-transducer-greedy-search-decoder.cc
================================================
// sherpa-onnx/csrc/online-transducer-greedy-search-decoder.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-transducer-greedy-search-decoder.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

static void UseCachedDecoderOut(
    const std::vector<OnlineTransducerDecoderResult> &results,
    Ort::Value *decoder_out) {
  std::vector<int64_t> shape =
      decoder_out->GetTensorTypeAndShapeInfo().GetShape();
  float *dst = decoder_out->GetTensorMutableData<float>();
  for (const auto &r : results) {
    if (r.decoder_out) {
      const float *src = r.decoder_out.GetTensorData<float>();
      std::copy(src, src + shape[1], dst);
    }
    dst += shape[1];
  }
}

static void UpdateCachedDecoderOut(
    OrtAllocator *allocator, const Ort::Value *decoder_out,
    std::vector<OnlineTransducerDecoderResult> *results) {
  std::vector<int64_t> shape =
      decoder_out->GetTensorTypeAndShapeInfo().GetShape();
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
  std::array<int64_t, 2> v_shape{1, shape[1]};

  const float *src = decoder_out->GetTensorData<float>();
  for (auto &r : *results) {
    if (!r.decoder_out) {
      r.decoder_out = Ort::Value::CreateTensor<float>(allocator, v_shape.data(),
                                                      v_shape.size());
    }

    float *dst = r.decoder_out.GetTensorMutableData<float>();
    std::copy(src, src + shape[1], dst);
    src += shape[1];
  }
}

OnlineTransducerDecoderResult
OnlineTransducerGreedySearchDecoder::GetEmptyResult() const {
  int32_t context_size = model_->ContextSize();
  int32_t blank_id = 0;  // always 0
  OnlineTransducerDecoderResult r;
  r.tokens.resize(context_size, -1);
  r.tokens.back() = blank_id;

  return r;
}

void OnlineTransducerGreedySearchDecoder::StripLeadingBlanks(
    OnlineTransducerDecoderResult *r) const {
  int32_t context_size = model_->ContextSize();

  auto start = r->tokens.begin() + context_size;
  auto end = r->tokens.end();

  r->tokens = std::vector<int64_t>(start, end);
}

void OnlineTransducerGreedySearchDecoder::Decode(
    Ort::Value encoder_out,
    std::vector<OnlineTransducerDecoderResult> *result) {
  std::vector<int64_t> encoder_out_shape =
      encoder_out.GetTensorTypeAndShapeInfo().GetShape();

  if (encoder_out_shape[0] != static_cast<int32_t>(result->size())) {
    SHERPA_ONNX_LOGE(
        "Size mismatch! encoder_out.size(0) %d, result.size(0): %d",
        static_cast<int32_t>(encoder_out_shape[0]),
        static_cast<int32_t>(result->size()));
    exit(-1);
  }

  int32_t batch_size = static_cast<int32_t>(encoder_out_shape[0]);
  int32_t num_frames = static_cast<int32_t>(encoder_out_shape[1]);
  int32_t vocab_size = model_->VocabSize();

  Ort::Value decoder_out{nullptr};
  bool is_batch_decoder_out_cached = true;
  for (const auto &r : *result) {
    if (!r.decoder_out) {
      is_batch_decoder_out_cached = false;
      break;
    }
  }

  if (is_batch_decoder_out_cached) {
    auto &r = result->front();
    std::vector<int64_t> decoder_out_shape =
        r.decoder_out.GetTensorTypeAndShapeInfo().GetShape();
    decoder_out_shape[0] = batch_size;
    decoder_out = Ort::Value::CreateTensor<float>(model_->Allocator(),
                                                  decoder_out_shape.data(),
                                                  decoder_out_shape.size());
    UseCachedDecoderOut(*result, &decoder_out);
  } else {
    Ort::Value decoder_input = model_->BuildDecoderInput(*result);
    decoder_out = model_->RunDecoder(std::move(decoder_input));
  }

  for (int32_t t = 0; t != num_frames; ++t) {
    Ort::Value cur_encoder_out =
        GetEncoderOutFrame(model_->Allocator(), &encoder_out, t);
    Ort::Value logit =
        model_->RunJoiner(std::move(cur_encoder_out), View(&decoder_out));

    float *p_logit = logit.GetTensorMutableData<float>();

    bool emitted = false;
    for (int32_t i = 0; i < batch_size; ++i, p_logit += vocab_size) {
      auto &r = (*result)[i];
      if (blank_penalty_ > 0.0) {
        p_logit[0] -= blank_penalty_;  // assuming blank id is 0
      }

      auto y = static_cast<int32_t>(std::distance(
          static_cast<const float *>(p_logit),
          std::max_element(static_cast<const float *>(p_logit),
                           static_cast<const float *>(p_logit) + vocab_size)));
      // blank id is hardcoded to 0
      // also, it treats unk as blank
      if (y != 0 && y != unk_id_) {
        emitted = true;
        r.tokens.push_back(y);
        r.timestamps.push_back(t + r.frame_offset);
        r.num_trailing_blanks = 0;
      } else {
        ++r.num_trailing_blanks;
      }

      // export the per-token log scores
      if (y != 0 && y != unk_id_) {
        // apply temperature-scaling
        for (int32_t n = 0; n < vocab_size; ++n) {
          p_logit[n] /= temperature_scale_;
        }
        LogSoftmax(p_logit, vocab_size);   // renormalize probabilities,
                                           // save time by doing it only for
                                           // emitted symbols
        const float *p_logprob = p_logit;  // rename p_logit as p_logprob,
                                           // now it contains normalized
                                           // probability
        r.ys_probs.push_back(p_logprob[y]);
      }
    }
    if (emitted) {
      Ort::Value decoder_input = model_->BuildDecoderInput(*result);
      decoder_out = model_->RunDecoder(std::move(decoder_input));
    }
  }

  UpdateCachedDecoderOut(model_->Allocator(), &decoder_out, result);

  // Update frame_offset
  for (auto &r : *result) {
    r.frame_offset += num_frames;
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-transducer-greedy-search-decoder.h
================================================
// sherpa-onnx/csrc/online-transducer-greedy-search-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

class OnlineTransducerGreedySearchDecoder : public OnlineTransducerDecoder {
 public:
  OnlineTransducerGreedySearchDecoder(OnlineTransducerModel *model,
                                      int32_t unk_id,
                                      float blank_penalty,
                                      float temperature_scale)
      : model_(model),
      unk_id_(unk_id),
      blank_penalty_(blank_penalty),
      temperature_scale_(temperature_scale) {}

  OnlineTransducerDecoderResult GetEmptyResult() const override;

  void StripLeadingBlanks(OnlineTransducerDecoderResult *r) const override;

  void Decode(Ort::Value encoder_out,
              std::vector<OnlineTransducerDecoderResult> *result) override;

 private:
  OnlineTransducerModel *model_;  // Not owned
  int32_t unk_id_;
  float blank_penalty_;
  float temperature_scale_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/online-transducer-greedy-search-nemo-decoder.cc
================================================
// sherpa-onnx/csrc/online-transducer-greedy-search-nemo-decoder.cc
//
// Copyright (c)  2024  Xiaomi Corporation
// Copyright (c)  2024  Sangeet Sagar

#include "sherpa-onnx/csrc/online-transducer-greedy-search-nemo-decoder.h"

#include <algorithm>
#include <iterator>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

static Ort::Value BuildDecoderInput(int32_t token, OrtAllocator *allocator) {
  std::array<int64_t, 2> shape{1, 1};

  Ort::Value decoder_input =
      Ort::Value::CreateTensor<int32_t>(allocator, shape.data(), shape.size());

  int32_t *p = decoder_input.GetTensorMutableData<int32_t>();

  p[0] = token;

  return decoder_input;
}

static void DecodeOne(const float *encoder_out, int32_t num_rows,
                      int32_t num_cols, OnlineTransducerNeMoModel *model,
                      float blank_penalty, OnlineStream *s) {
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  int32_t vocab_size = model->VocabSize();
  int32_t blank_id = vocab_size - 1;

  auto &r = s->GetResult();

  Ort::Value decoder_out{nullptr};

  auto decoder_input = BuildDecoderInput(
      r.tokens.empty() ? blank_id : r.tokens.back(), model->Allocator());

  std::vector<Ort::Value> &last_decoder_states = s->GetNeMoDecoderStates();

  std::vector<Ort::Value> tmp_decoder_states;
  tmp_decoder_states.reserve(last_decoder_states.size());
  for (auto &v : last_decoder_states) {
    tmp_decoder_states.push_back(View(&v));
  }

  // decoder_output_pair.second returns the next decoder state
  std::pair<Ort::Value, std::vector<Ort::Value>> decoder_output_pair =
      model->RunDecoder(std::move(decoder_input),
                        std::move(tmp_decoder_states));

  std::array<int64_t, 3> encoder_shape{1, num_cols, 1};

  bool emitted = false;

  for (int32_t t = 0; t != num_rows; ++t) {
    Ort::Value cur_encoder_out = Ort::Value::CreateTensor(
        memory_info, const_cast<float *>(encoder_out) + t * num_cols, num_cols,
        encoder_shape.data(), encoder_shape.size());

    Ort::Value logit = model->RunJoiner(std::move(cur_encoder_out),
                                        View(&decoder_output_pair.first));

    float *p_logit = logit.GetTensorMutableData<float>();
    if (blank_penalty > 0) {
      p_logit[blank_id] -= blank_penalty;
    }

    auto y = static_cast<int32_t>(std::distance(
        static_cast<const float *>(p_logit),
        std::max_element(static_cast<const float *>(p_logit),
                         static_cast<const float *>(p_logit) + vocab_size)));

    if (y != blank_id) {
      emitted = true;
      r.tokens.push_back(y);
      r.timestamps.push_back(t + r.frame_offset);
      r.num_trailing_blanks = 0;

      decoder_input = BuildDecoderInput(y, model->Allocator());

      // last decoder state becomes the current state for the first chunk
      decoder_output_pair = model->RunDecoder(
          std::move(decoder_input), std::move(decoder_output_pair.second));
    } else {
      ++r.num_trailing_blanks;
    }
  }

  if (emitted) {
    s->SetNeMoDecoderStates(std::move(decoder_output_pair.second));
  }

  r.frame_offset += num_rows;
}

void OnlineTransducerGreedySearchNeMoDecoder::Decode(Ort::Value encoder_out,
                                                     OnlineStream **ss,
                                                     int32_t n) const {
  auto shape = encoder_out.GetTensorTypeAndShapeInfo().GetShape();
  int32_t batch_size = static_cast<int32_t>(shape[0]);  // bs = 1

  if (batch_size != n) {
    SHERPA_ONNX_LOGE("Size mismatch! encoder_out.size(0) %d, n: %d",
                     static_cast<int32_t>(shape[0]), n);
    exit(-1);
  }

  int32_t dim1 = static_cast<int32_t>(shape[1]);  // T
  int32_t dim2 = static_cast<int32_t>(shape[2]);  // encoder_out_dim

  const float *p = encoder_out.GetTensorData<float>();

  for (int32_t i = 0; i != batch_size; ++i) {
    const float *this_p = p + dim1 * dim2 * i;

    DecodeOne(this_p, dim1, dim2, model_, blank_penalty_, ss[i]);
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-transducer-greedy-search-nemo-decoder.h
================================================
// sherpa-onnx/csrc/online-transducer-greedy-search-nemo-decoder.h
//
// Copyright (c)  2024  Xiaomi Corporation
// Copyright (c)  2024  Sangeet Sagar

#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_GREEDY_SEARCH_NEMO_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_GREEDY_SEARCH_NEMO_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/online-transducer-nemo-model.h"

namespace sherpa_onnx {

class OnlineStream;

class OnlineTransducerGreedySearchNeMoDecoder {
 public:
  OnlineTransducerGreedySearchNeMoDecoder(OnlineTransducerNeMoModel *model,
                                          float blank_penalty)
      : model_(model), blank_penalty_(blank_penalty) {}

  // @param n number of elements in ss
  void Decode(Ort::Value encoder_out, OnlineStream **ss, int32_t n) const;

 private:
  OnlineTransducerNeMoModel *model_;  // Not owned
  float blank_penalty_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_GREEDY_SEARCH_NEMO_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/online-transducer-model-config.cc
================================================
// sherpa-onnx/csrc/online-transducer-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation
#include "sherpa-onnx/csrc/online-transducer-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlineTransducerModelConfig::Register(ParseOptions *po) {
  po->Register("encoder", &encoder, "Path to encoder.onnx");
  po->Register("decoder", &decoder, "Path to decoder.onnx");
  po->Register("joiner", &joiner, "Path to joiner.onnx");
}

bool OnlineTransducerModelConfig::Validate() const {
  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("transducer encoder: '%s' does not exist",
                     encoder.c_str());
    return false;
  }

  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("transducer decoder: '%s' does not exist",
                     decoder.c_str());
    return false;
  }

  if (!FileExists(joiner)) {
    SHERPA_ONNX_LOGE("joiner: '%s' does not exist", joiner.c_str());
    return false;
  }

  return true;
}

std::string OnlineTransducerModelConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineTransducerModelConfig(";
  os << "encoder=\"" << encoder << "\", ";
  os << "decoder=\"" << decoder << "\", ";
  os << "joiner=\"" << joiner << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-transducer-model-config.h
================================================
// sherpa-onnx/csrc/online-transducer-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineTransducerModelConfig {
  std::string encoder;
  std::string decoder;
  std::string joiner;

  OnlineTransducerModelConfig() = default;
  OnlineTransducerModelConfig(const std::string &encoder,
                              const std::string &decoder,
                              const std::string &joiner)
      : encoder(encoder), decoder(decoder), joiner(joiner) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-transducer-model.cc
================================================
// sherpa-onnx/csrc/online-transducer-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation
// Copyright (c)  2023  Pingfeng Luo
#include "sherpa-onnx/csrc/online-transducer-model.h"

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include <algorithm>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-conformer-transducer-model.h"
#include "sherpa-onnx/csrc/online-ebranchformer-transducer-model.h"
#include "sherpa-onnx/csrc/online-lstm-transducer-model.h"
#include "sherpa-onnx/csrc/online-zipformer-transducer-model.h"
#include "sherpa-onnx/csrc/online-zipformer2-transducer-model.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace {

enum class ModelType : std::uint8_t {
  kConformer,
  kEbranchformer,
  kLstm,
  kZipformer,
  kZipformer2,
  kUnknown,
};

}  // namespace

namespace sherpa_onnx {

static ModelType GetModelType(char *model_data, size_t model_data_length,
                              bool debug) {
  Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  auto sess = std::make_unique<Ort::Session>(env, model_data, model_data_length,
                                             sess_opts);

  Ort::ModelMetadata meta_data = sess->GetModelMetadata();
  if (debug) {
    std::ostringstream os;
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;
  auto model_type =
      LookupCustomModelMetaData(meta_data, "model_type", allocator);
  if (model_type.empty()) {
    SHERPA_ONNX_LOGE(
        "No model_type in the metadata!\n"
        "Please make sure you are using the latest export-onnx.py from icefall "
        "to export your transducer models");
    return ModelType::kUnknown;
  }

  if (model_type == "conformer") {
    return ModelType::kConformer;
  } else if (model_type == "ebranchformer") {
    return ModelType::kEbranchformer;
  } else if (model_type == "lstm") {
    return ModelType::kLstm;
  } else if (model_type == "zipformer") {
    return ModelType::kZipformer;
  } else if (model_type == "zipformer2") {
    return ModelType::kZipformer2;
  } else {
    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
    return ModelType::kUnknown;
  }
}

std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
    const OnlineModelConfig &config) {
  if (!config.model_type.empty()) {
    const auto &model_type = config.model_type;
    if (model_type == "conformer") {
      return std::make_unique<OnlineConformerTransducerModel>(config);
    } else if (model_type == "ebranchformer") {
      return std::make_unique<OnlineEbranchformerTransducerModel>(config);
    } else if (model_type == "lstm") {
      return std::make_unique<OnlineLstmTransducerModel>(config);
    } else if (model_type == "zipformer") {
      return std::make_unique<OnlineZipformerTransducerModel>(config);
    } else if (model_type == "zipformer2") {
      return std::make_unique<OnlineZipformer2TransducerModel>(config);
    } else {
      SHERPA_ONNX_LOGE(
          "Invalid model_type: %s. Trying to load the model to get its type",
          model_type.c_str());
    }
  }
  ModelType model_type = ModelType::kUnknown;

  {
    auto buffer = ReadFile(config.transducer.encoder);

    model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  }

  switch (model_type) {
    case ModelType::kConformer:
      return std::make_unique<OnlineConformerTransducerModel>(config);
    case ModelType::kEbranchformer:
      return std::make_unique<OnlineEbranchformerTransducerModel>(config);
    case ModelType::kLstm:
      return std::make_unique<OnlineLstmTransducerModel>(config);
    case ModelType::kZipformer:
      return std::make_unique<OnlineZipformerTransducerModel>(config);
    case ModelType::kZipformer2:
      return std::make_unique<OnlineZipformer2TransducerModel>(config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE("Unknown model type in online transducer!");
      return nullptr;
  }

  // unreachable code
  return nullptr;
}

Ort::Value OnlineTransducerModel::BuildDecoderInput(
    const std::vector<OnlineTransducerDecoderResult> &results) {
  int32_t batch_size = static_cast<int32_t>(results.size());
  int32_t context_size = ContextSize();
  std::array<int64_t, 2> shape{batch_size, context_size};
  Ort::Value decoder_input = Ort::Value::CreateTensor<int64_t>(
      Allocator(), shape.data(), shape.size());
  int64_t *p = decoder_input.GetTensorMutableData<int64_t>();

  for (const auto &r : results) {
    const int64_t *begin = r.tokens.data() + r.tokens.size() - context_size;
    const int64_t *end = r.tokens.data() + r.tokens.size();
    std::copy(begin, end, p);
    p += context_size;
  }
  return decoder_input;
}

Ort::Value OnlineTransducerModel::BuildDecoderInput(
    const std::vector<Hypothesis> &hyps) {
  int32_t batch_size = static_cast<int32_t>(hyps.size());
  int32_t context_size = ContextSize();
  std::array<int64_t, 2> shape{batch_size, context_size};
  Ort::Value decoder_input = Ort::Value::CreateTensor<int64_t>(
      Allocator(), shape.data(), shape.size());
  int64_t *p = decoder_input.GetTensorMutableData<int64_t>();

  for (const auto &h : hyps) {
    std::copy(h.ys.end() - context_size, h.ys.end(), p);
    p += context_size;
  }
  return decoder_input;
}

template <typename Manager>
std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
    Manager *mgr, const OnlineModelConfig &config) {
  if (!config.model_type.empty()) {
    const auto &model_type = config.model_type;
    if (model_type == "conformer") {
      return std::make_unique<OnlineConformerTransducerModel>(mgr, config);
    } else if (model_type == "ebranchformer") {
      return std::make_unique<OnlineEbranchformerTransducerModel>(mgr, config);
    } else if (model_type == "lstm") {
      return std::make_unique<OnlineLstmTransducerModel>(mgr, config);
    } else if (model_type == "zipformer") {
      return std::make_unique<OnlineZipformerTransducerModel>(mgr, config);
    } else if (model_type == "zipformer2") {
      return std::make_unique<OnlineZipformer2TransducerModel>(mgr, config);
    } else {
      SHERPA_ONNX_LOGE(
          "Invalid model_type: %s. Trying to load the model to get its type",
          model_type.c_str());
    }
  }

  auto buffer = ReadFile(mgr, config.transducer.encoder);
  auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug);

  switch (model_type) {
    case ModelType::kConformer:
      return std::make_unique<OnlineConformerTransducerModel>(mgr, config);
    case ModelType::kEbranchformer:
      return std::make_unique<OnlineEbranchformerTransducerModel>(mgr, config);
    case ModelType::kLstm:
      return std::make_unique<OnlineLstmTransducerModel>(mgr, config);
    case ModelType::kZipformer:
      return std::make_unique<OnlineZipformerTransducerModel>(mgr, config);
    case ModelType::kZipformer2:
      return std::make_unique<OnlineZipformer2TransducerModel>(mgr, config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE("Unknown model type in online transducer!");
      return nullptr;
  }

  // unreachable code
  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-transducer-model.h
================================================
// sherpa-onnx/csrc/online-transducer-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODEL_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/online-transducer-model-config.h"

namespace sherpa_onnx {

struct OnlineTransducerDecoderResult;

class OnlineTransducerModel {
 public:
  virtual ~OnlineTransducerModel() = default;

  static std::unique_ptr<OnlineTransducerModel> Create(
      const OnlineModelConfig &config);

  template <typename Manager>
  static std::unique_ptr<OnlineTransducerModel> Create(
      Manager *mgr, const OnlineModelConfig &config);

  /** Stack a list of individual states into a batch.
   *
   * It is the inverse operation of `UnStackStates`.
   *
   * @param states states[i] contains the state for the i-th utterance.
   * @return Return a single value representing the batched state.
   */
  virtual std::vector<Ort::Value> StackStates(
      const std::vector<std::vector<Ort::Value>> &states) const = 0;

  /** Unstack a batch state into a list of individual states.
   *
   * It is the inverse operation of `StackStates`.
   *
   * @param states A batched state.
   * @return ans[i] contains the state for the i-th utterance.
   */
  virtual std::vector<std::vector<Ort::Value>> UnStackStates(
      const std::vector<Ort::Value> &states) const = 0;

  /** Get the initial encoder states.
   *
   * @return Return the initial encoder state.
   */
  virtual std::vector<Ort::Value> GetEncoderInitStates() = 0;

  /** Set feature dim.
   *
   * This is used in `OnlineZipformer2TransducerModel`,
   * to pass `feature_dim` for `GetEncoderInitStates()`.
   *
   * This has to be called before GetEncoderInitStates(), so the `encoder_embed`
   * init state has the correct `embed_dim` of its output.
   */
  virtual void SetFeatureDim(int32_t /*feature_dim*/) {}

  /** Run the encoder.
   *
   * @param features  A tensor of shape (N, T, C). It is changed in-place.
   * @param states  Encoder state of the previous chunk. It is changed in-place.
   * @param processed_frames  Processed frames before subsampling. It is a 1-D
   * tensor with data type int64_t.
   *
   * @return Return a tuple containing:
   *           - encoder_out, a tensor of shape (N, T', encoder_out_dim)
   *           - next_states  Encoder state for the next chunk.
   */
  virtual std::pair<Ort::Value, std::vector<Ort::Value>> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states,
      Ort::Value processed_frames) = 0;  // NOLINT

  /** Run the decoder network.
   *
   * Caution: We assume there are no recurrent connections in the decoder and
   *          the decoder is stateless. See
   * https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless2/decoder.py
   *          for an example
   *
   * @param decoder_input It is usually of shape (N, context_size)
   * @return Return a tensor of shape (N, decoder_dim).
   */
  virtual Ort::Value RunDecoder(Ort::Value decoder_input) = 0;

  /** Run the joint network.
   *
   * @param encoder_out Output of the encoder network. A tensor of shape
   *                    (N, joiner_dim).
   * @param decoder_out Output of the decoder network. A tensor of shape
   *                    (N, joiner_dim).
   * @return Return a tensor of shape (N, vocab_size). In icefall, the last
   *         last layer of the joint network is `nn.Linear`,
   *         not `nn.LogSoftmax`.
   */
  virtual Ort::Value RunJoiner(Ort::Value encoder_out,
                               Ort::Value decoder_out) = 0;

  /** If we are using a stateless decoder and if it contains a
   *  Conv1D, this function returns the kernel size of the convolution layer.
   */
  virtual int32_t ContextSize() const = 0;

  /** We send this number of feature frames to the encoder at a time. */
  virtual int32_t ChunkSize() const = 0;

  /** Number of input frames to discard after each call to RunEncoder.
   *
   * For instance, if we have 30 frames, chunk_size=8, chunk_shift=6.
   *
   * In the first call of RunEncoder, we use frames 0~7 since chunk_size is 8.
   * Then we discard frame 0~5 since chunk_shift is 6.
   * In the second call of RunEncoder, we use frames 6~13; and then we discard
   * frames 6~11.
   * In the third call of RunEncoder, we use frames 12~19; and then we discard
   * frames 12~16.
   *
   * Note: ChunkSize() - ChunkShift() == right context size
   */
  virtual int32_t ChunkShift() const = 0;

  virtual int32_t VocabSize() const = 0;

  virtual int32_t SubsamplingFactor() const { return 4; }

  virtual bool UseWhisperFeature() const { return false; }

  virtual OrtAllocator *Allocator() = 0;

  Ort::Value BuildDecoderInput(
      const std::vector<OnlineTransducerDecoderResult> &results);

  Ort::Value BuildDecoderInput(const std::vector<Hypothesis> &hyps);
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc
================================================
// sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc
//
// Copyright (c)  2023  Pingfeng Luo
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

static void UseCachedDecoderOut(
    const std::vector<int32_t> &hyps_row_splits,
    const std::vector<OnlineTransducerDecoderResult> &results,
    Ort::Value *decoder_out) {
  std::vector<int64_t> shape =
      decoder_out->GetTensorTypeAndShapeInfo().GetShape();

  float *dst = decoder_out->GetTensorMutableData<float>();

  int32_t batch_size = static_cast<int32_t>(results.size());
  for (int32_t i = 0; i != batch_size; ++i) {
    int32_t num_hyps = hyps_row_splits[i + 1] - hyps_row_splits[i];
    if (num_hyps > 1 || !results[i].decoder_out) {
      dst += num_hyps * shape[1];
      continue;
    }

    const float *src = results[i].decoder_out.GetTensorData<float>();
    std::copy(src, src + shape[1], dst);
    dst += shape[1];
  }
}

OnlineTransducerDecoderResult
OnlineTransducerModifiedBeamSearchDecoder::GetEmptyResult() const {
  int32_t context_size = model_->ContextSize();
  int32_t blank_id = 0;  // always 0
  OnlineTransducerDecoderResult r;
  std::vector<int64_t> blanks(context_size, -1);
  blanks.back() = blank_id;

  Hypotheses blank_hyp({{blanks, 0}});
  r.hyps = std::move(blank_hyp);
  r.tokens = std::move(blanks);
  return r;
}

void OnlineTransducerModifiedBeamSearchDecoder::StripLeadingBlanks(
    OnlineTransducerDecoderResult *r) const {
  int32_t context_size = model_->ContextSize();
  auto hyp = r->hyps.GetMostProbable(true);

  std::vector<int64_t> tokens(hyp.ys.begin() + context_size, hyp.ys.end());
  r->tokens = std::move(tokens);
  r->timestamps = std::move(hyp.timestamps);

  // export per-token scores
  r->ys_probs = std::move(hyp.ys_probs);
  r->lm_probs = std::move(hyp.lm_probs);
  r->context_scores = std::move(hyp.context_scores);

  r->num_trailing_blanks = hyp.num_trailing_blanks;
}

void OnlineTransducerModifiedBeamSearchDecoder::Decode(
    Ort::Value encoder_out,
    std::vector<OnlineTransducerDecoderResult> *result) {
  Decode(std::move(encoder_out), nullptr, result);
}

void OnlineTransducerModifiedBeamSearchDecoder::Decode(
    Ort::Value encoder_out, OnlineStream **ss,
    std::vector<OnlineTransducerDecoderResult> *result) {
  std::vector<int64_t> encoder_out_shape =
      encoder_out.GetTensorTypeAndShapeInfo().GetShape();

  if (static_cast<int32_t>(encoder_out_shape[0]) !=
      static_cast<int32_t>(result->size())) {
    SHERPA_ONNX_LOGE(
        "Size mismatch! encoder_out.size(0) %d, result.size(0): %d\n",
        static_cast<int32_t>(encoder_out_shape[0]),
        static_cast<int32_t>(result->size()));
    exit(-1);
  }

  int32_t batch_size = static_cast<int32_t>(encoder_out_shape[0]);

  int32_t num_frames = static_cast<int32_t>(encoder_out_shape[1]);
  int32_t vocab_size = model_->VocabSize();

  std::vector<Hypotheses> cur;
  for (auto &r : *result) {
    cur.push_back(std::move(r.hyps));
  }
  std::vector<Hypothesis> prev;

  for (int32_t t = 0; t != num_frames; ++t) {
    // Due to merging paths with identical token sequences,
    // not all utterances have "num_active_paths" paths.
    auto hyps_row_splits = GetHypsRowSplits(cur);
    int32_t num_hyps =
        hyps_row_splits.back();  // total num hyps for all utterance
    prev.clear();
    for (auto &hyps : cur) {
      for (auto &h : hyps) {
        prev.push_back(std::move(h.second));
      }
    }
    cur.clear();
    cur.reserve(batch_size);

    Ort::Value decoder_input = model_->BuildDecoderInput(prev);
    Ort::Value decoder_out = model_->RunDecoder(std::move(decoder_input));
    if (t == 0) {
      UseCachedDecoderOut(hyps_row_splits, *result, &decoder_out);
    }

    Ort::Value cur_encoder_out =
        GetEncoderOutFrame(model_->Allocator(), &encoder_out, t);
    cur_encoder_out =
        Repeat(model_->Allocator(), &cur_encoder_out, hyps_row_splits);
    Ort::Value logit =
        model_->RunJoiner(std::move(cur_encoder_out), View(&decoder_out));

    float *p_logit = logit.GetTensorMutableData<float>();

    // copy raw logits, apply temperature-scaling  (for confidences)
    // Note: temperature scaling is used only for the confidences,
    //       the decoding algorithm uses the original logits
    int32_t p_logit_items = vocab_size * num_hyps;
    std::vector<float> logit_with_temperature(p_logit_items);
    {
      std::copy(p_logit, p_logit + p_logit_items,
                logit_with_temperature.begin());
      for (float &elem : logit_with_temperature) {
        elem /= temperature_scale_;
      }
      LogSoftmax(logit_with_temperature.data(), vocab_size, num_hyps);
    }

    if (blank_penalty_ > 0.0) {
      // assuming blank id is 0
      SubtractBlank(p_logit, vocab_size, num_hyps, 0, blank_penalty_);
    }
    LogSoftmax(p_logit, vocab_size, num_hyps);

    // now p_logit contains log_softmax output, we rename it to p_logprob
    // to match what it actually contains
    float *p_logprob = p_logit;

    // add log_prob of each hypothesis to p_logprob before taking top_k
    for (int32_t i = 0; i != num_hyps; ++i) {
      float log_prob = prev[i].log_prob;
      if (lm_ && shallow_fusion_) {
         log_prob += prev[i].lm_log_prob;
      }

      for (int32_t k = 0; k != vocab_size; ++k, ++p_logprob) {
        *p_logprob += log_prob;
      }
    }
    p_logprob = p_logit;  // we changed p_logprob in the above for loop

    for (int32_t b = 0; b != batch_size; ++b) {
      int32_t frame_offset = (*result)[b].frame_offset;
      int32_t start = hyps_row_splits[b];
      int32_t end = hyps_row_splits[b + 1];
      auto topk =
          TopkIndex(p_logprob, vocab_size * (end - start), max_active_paths_);

      Hypotheses hyps;
      for (auto k : topk) {
        int32_t hyp_index = k / vocab_size + start;
        int32_t new_token = k % vocab_size;

        Hypothesis new_hyp = prev[hyp_index];
        const float prev_lm_log_prob = new_hyp.lm_log_prob;
        float context_score = 0;
        auto context_state = new_hyp.context_state;

        // blank is hardcoded to 0
        // also, it treats unk as blank
        if (new_token != 0 && new_token != unk_id_) {
          new_hyp.ys.push_back(new_token);
          new_hyp.timestamps.push_back(t + frame_offset);
          new_hyp.num_trailing_blanks = 0;
          if (ss != nullptr && ss[b]->GetContextGraph() != nullptr) {
            auto context_res = ss[b]->GetContextGraph()->ForwardOneStep(
                context_state, new_token, false /*strict mode*/);
            context_score = std::get<0>(context_res);
            new_hyp.context_state = std::get<1>(context_res);
          }
          if (lm_ && shallow_fusion_) {
            lm_->ComputeLMScoreSF(lm_scale_, &new_hyp);
          }
        } else {
          ++new_hyp.num_trailing_blanks;
        }
        if (lm_ && shallow_fusion_) {
           new_hyp.log_prob = p_logprob[k] + context_score -
                           prev_lm_log_prob;  // log_prob only includes the
                                              // score of the transducer
        } else {
           new_hyp.log_prob = p_logprob[k] + context_score;  // rescore or no LM
                                                             // previous token
                                                             // score is ignored
        }

        // export the per-token log scores
        if (new_token != 0 && new_token != unk_id_) {
          float y_prob = logit_with_temperature[start * vocab_size + k];
          new_hyp.ys_probs.push_back(y_prob);

          if (lm_ && shallow_fusion_) {  // export only if
                                         // LM shallow fusion is used
            float lm_prob = new_hyp.lm_log_prob - prev_lm_log_prob;

            if (lm_scale_ != 0.0) {
              lm_prob /= lm_scale_;  // remove lm-scale
            }
            new_hyp.lm_probs.push_back(lm_prob);
          }

          // export only when `ContextGraph` is used
          if (ss != nullptr && ss[b]->GetContextGraph() != nullptr) {
            new_hyp.context_scores.push_back(context_score);
          }
        }

        hyps.Add(std::move(new_hyp));
      }  // for (auto k : topk)
      cur.push_back(std::move(hyps));
      p_logprob += (end - start) * vocab_size;
    }  // for (int32_t b = 0; b != batch_size; ++b)
  }    // for (int32_t t = 0; t != num_frames; ++t)

  // classic lm rescore
  if (lm_ && !shallow_fusion_) {
    lm_->ComputeLMScore(lm_scale_, model_->ContextSize(), &cur);
  }

  for (int32_t b = 0; b != batch_size; ++b) {
    auto &hyps = cur[b];
    auto best_hyp = hyps.GetMostProbable(true);
    auto &r = (*result)[b];

    r.hyps = std::move(hyps);
    r.tokens = std::move(best_hyp.ys);
    r.num_trailing_blanks = best_hyp.num_trailing_blanks;
    r.frame_offset += num_frames;
  }
}

void OnlineTransducerModifiedBeamSearchDecoder::UpdateDecoderOut(
    OnlineTransducerDecoderResult *result) {
  if (static_cast<int32_t>(result->tokens.size()) == model_->ContextSize()) {
    result->decoder_out = Ort::Value{nullptr};
    return;
  }
  Ort::Value decoder_input = model_->BuildDecoderInput({*result});
  result->decoder_out = model_->RunDecoder(std::move(decoder_input));
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.h
================================================
// sherpa-onnx/csrc/online-transducer-modified_beam-search-decoder.h
//
// Copyright (c)  2023  Pingfeng Luo
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_

#include <vector>

#include "sherpa-onnx/csrc/online-lm.h"
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

class OnlineTransducerModifiedBeamSearchDecoder
    : public OnlineTransducerDecoder {
 public:
  OnlineTransducerModifiedBeamSearchDecoder(OnlineTransducerModel *model,
                                            OnlineLM *lm,
                                            int32_t max_active_paths,
                                            float lm_scale,
                                            bool shallow_fusion,
                                            int32_t unk_id,
                                            float blank_penalty,
                                            float temperature_scale)
      : model_(model),
        lm_(lm),
        max_active_paths_(max_active_paths),
        lm_scale_(lm_scale),
        shallow_fusion_(shallow_fusion),
        unk_id_(unk_id),
        blank_penalty_(blank_penalty),
        temperature_scale_(temperature_scale) {}

  OnlineTransducerDecoderResult GetEmptyResult() const override;

  void StripLeadingBlanks(OnlineTransducerDecoderResult *r) const override;

  void Decode(Ort::Value encoder_out,
              std::vector<OnlineTransducerDecoderResult> *result) override;

  void Decode(Ort::Value encoder_out, OnlineStream **ss,
              std::vector<OnlineTransducerDecoderResult> *result) override;

  void UpdateDecoderOut(OnlineTransducerDecoderResult *result) override;

 private:
  OnlineTransducerModel *model_;  // Not owned
  OnlineLM *lm_;                  // Not owned

  int32_t max_active_paths_;
  float lm_scale_;  // used only when lm_ is not nullptr
  bool shallow_fusion_;  // used only when lm_ is not nullptr
  int32_t unk_id_;
  float blank_penalty_;
  float temperature_scale_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/online-transducer-nemo-model.cc
================================================
// sherpa-onnx/csrc/online-transducer-nemo-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation
// Copyright (c)  2024  Sangeet Sagar

#include "sherpa-onnx/csrc/online-transducer-nemo-model.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <memory>
#include <numeric>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/transpose.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

class OnlineTransducerNeMoModel::Impl {
 public:
  explicit Impl(const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    encoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.transducer.encoder), sess_opts_);
    InitEncoder(nullptr, 0);

    decoder_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.transducer.decoder), sess_opts_);
    InitDecoder(nullptr, 0);

    joiner_sess_ = std::make_unique<Ort::Session>(
        env_, SHERPA_ONNX_TO_ORT_PATH(config.transducer.joiner), sess_opts_);
    InitJoiner(nullptr, 0);
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.transducer.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.transducer.decoder);
      InitDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.transducer.joiner);
      InitJoiner(buf.data(), buf.size());
    }
  }

  std::vector<Ort::Value> RunEncoder(Ort::Value features,
                                     std::vector<Ort::Value> states) {
    Ort::Value &cache_last_channel = states[0];
    Ort::Value &cache_last_time = states[1];
    Ort::Value &cache_last_channel_len = states[2];

    int32_t batch_size = features.GetTensorTypeAndShapeInfo().GetShape()[0];

    std::array<int64_t, 1> length_shape{batch_size};

    Ort::Value length = Ort::Value::CreateTensor<int64_t>(
        allocator_, length_shape.data(), length_shape.size());

    int64_t *p_length = length.GetTensorMutableData<int64_t>();

    std::fill(p_length, p_length + batch_size, ChunkSize());

    // (B, T, C) -> (B, C, T)
    features = Transpose12(allocator_, &features);

    std::array<Ort::Value, 5> inputs = {
        std::move(features), View(&length), std::move(cache_last_channel),
        std::move(cache_last_time), std::move(cache_last_channel_len)};

    auto out = encoder_sess_->Run(
        {}, encoder_input_names_ptr_.data(), inputs.data(), inputs.size(),
        encoder_output_names_ptr_.data(), encoder_output_names_ptr_.size());
    // out[0]: logit
    // out[1] logit_length
    // out[2:] states_next
    //
    // we need to remove out[1]

    std::vector<Ort::Value> ans;
    ans.reserve(out.size() - 1);

    for (int32_t i = 0; i != out.size(); ++i) {
      if (i == 1) {
        continue;
      }

      ans.push_back(std::move(out[i]));
    }

    return ans;
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> RunDecoder(
      Ort::Value targets, std::vector<Ort::Value> states) {
    Ort::MemoryInfo memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);

    auto shape = targets.GetTensorTypeAndShapeInfo().GetShape();
    int32_t batch_size = static_cast<int32_t>(shape[0]);

    std::vector<int64_t> length_shape = {batch_size};
    std::vector<int32_t> length_value(batch_size, 1);

    Ort::Value targets_length = Ort::Value::CreateTensor<int32_t>(
        memory_info, length_value.data(), batch_size, length_shape.data(),
        length_shape.size());

    std::vector<Ort::Value> decoder_inputs;
    decoder_inputs.reserve(2 + states.size());

    decoder_inputs.push_back(std::move(targets));
    decoder_inputs.push_back(std::move(targets_length));

    for (auto &s : states) {
      decoder_inputs.push_back(std::move(s));
    }

    auto decoder_out = decoder_sess_->Run(
        {}, decoder_input_names_ptr_.data(), decoder_inputs.data(),
        decoder_inputs.size(), decoder_output_names_ptr_.data(),
        decoder_output_names_ptr_.size());

    std::vector<Ort::Value> states_next;
    states_next.reserve(states.size());

    // decoder_out[0]: decoder_output
    // decoder_out[1]: decoder_output_length (discarded)
    // decoder_out[2:] states_next

    for (int32_t i = 0; i != states.size(); ++i) {
      states_next.push_back(std::move(decoder_out[i + 2]));
    }

    // we discard decoder_out[1]
    return {std::move(decoder_out[0]), std::move(states_next)};
  }

  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) {
    std::array<Ort::Value, 2> joiner_input = {std::move(encoder_out),
                                              std::move(decoder_out)};
    auto logit = joiner_sess_->Run({}, joiner_input_names_ptr_.data(),
                                   joiner_input.data(), joiner_input.size(),
                                   joiner_output_names_ptr_.data(),
                                   joiner_output_names_ptr_.size());

    return std::move(logit[0]);
  }

  std::vector<Ort::Value> GetDecoderInitStates() {
    std::vector<Ort::Value> ans;
    ans.reserve(2);
    ans.push_back(View(&lstm0_));
    ans.push_back(View(&lstm1_));

    return ans;
  }

  int32_t ChunkSize() const { return window_size_; }

  int32_t ChunkShift() const { return chunk_shift_; }

  int32_t SubsamplingFactor() const { return subsampling_factor_; }

  int32_t FeatureDim() const { return feat_dim_; }

  int32_t VocabSize() const { return vocab_size_; }

  OrtAllocator *Allocator() { return allocator_; }

  std::string FeatureNormalizationMethod() const { return normalize_type_; }

  // Return a vector containing 3 tensors
  // - cache_last_channel
  // - cache_last_time_
  // - cache_last_channel_len
  std::vector<Ort::Value> GetEncoderInitStates() {
    std::vector<Ort::Value> ans;
    ans.reserve(3);
    ans.push_back(View(&cache_last_channel_));
    ans.push_back(View(&cache_last_time_));
    ans.push_back(View(&cache_last_channel_len_));

    return ans;
  }

  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) const {
    int32_t batch_size = static_cast<int32_t>(states.size());
    if (batch_size == 1) {
      return std::move(states[0]);
    }

    std::vector<Ort::Value> ans;

    auto allocator = const_cast<Impl *>(this)->allocator_;

    // stack cache_last_channel
    std::vector<const Ort::Value *> buf(batch_size);

    // there are 3 states to be stacked
    for (int32_t i = 0; i != 3; ++i) {
      buf.clear();
      buf.reserve(batch_size);

      for (int32_t b = 0; b != batch_size; ++b) {
        assert(states[b].size() == 3);
        buf.push_back(&states[b][i]);
      }

      Ort::Value c{nullptr};
      if (i == 2) {
        c = Cat<int64_t>(allocator, buf, 0);
      } else {
        c = Cat(allocator, buf, 0);
      }

      ans.push_back(std::move(c));
    }

    return ans;
  }

  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) {
    assert(states.size() == 3);

    std::vector<std::vector<Ort::Value>> ans;

    auto shape = states[0].GetTensorTypeAndShapeInfo().GetShape();
    int32_t batch_size = shape[0];
    ans.resize(batch_size);

    if (batch_size == 1) {
      ans[0] = std::move(states);
      return ans;
    }

    for (int32_t i = 0; i != 3; ++i) {
      std::vector<Ort::Value> v;
      if (i == 2) {
        v = Unbind<int64_t>(allocator_, &states[i], 0);
      } else {
        v = Unbind(allocator_, &states[i], 0);
      }

      assert(v.size() == batch_size);

      for (int32_t b = 0; b != batch_size; ++b) {
        ans[b].push_back(std::move(v[b]));
      }
    }

    return ans;
  }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      encoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!encoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize encoder session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                  &encoder_input_names_ptr_);

    GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                   &encoder_output_names_ptr_);

    feat_dim_ = encoder_sess_->GetInputTypeInfo(0)
                    .GetTensorTypeAndShapeInfo()
                    .GetShape()[1];

    // get meta data
    Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---encoder---\n";
      PrintModelMetadata(os, meta_data);
      os << "feat_dim: " << feat_dim_ << "\n";
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");

    // need to increase by 1 since the blank token is not included in computing
    // vocab_size in NeMo.
    vocab_size_ += 1;

    SHERPA_ONNX_READ_META_DATA(window_size_, "window_size");
    SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift");
    SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");

    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_,
                                               "normalize_type");
    SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers");
    SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden");

    SHERPA_ONNX_READ_META_DATA(cache_last_channel_dim1_,
                               "cache_last_channel_dim1");
    SHERPA_ONNX_READ_META_DATA(cache_last_channel_dim2_,
                               "cache_last_channel_dim2");
    SHERPA_ONNX_READ_META_DATA(cache_last_channel_dim3_,
                               "cache_last_channel_dim3");
    SHERPA_ONNX_READ_META_DATA(cache_last_time_dim1_, "cache_last_time_dim1");
    SHERPA_ONNX_READ_META_DATA(cache_last_time_dim2_, "cache_last_time_dim2");
    SHERPA_ONNX_READ_META_DATA(cache_last_time_dim3_, "cache_last_time_dim3");

    if (normalize_type_ == "NA") {
      normalize_type_ = "";
    }

    InitEncoderStates();
  }

  void InitEncoderStates() {
    std::array<int64_t, 4> cache_last_channel_shape{1, cache_last_channel_dim1_,
                                                    cache_last_channel_dim2_,
                                                    cache_last_channel_dim3_};

    cache_last_channel_ = Ort::Value::CreateTensor<float>(
        allocator_, cache_last_channel_shape.data(),
        cache_last_channel_shape.size());

    Fill<float>(&cache_last_channel_, 0);

    std::array<int64_t, 4> cache_last_time_shape{
        1, cache_last_time_dim1_, cache_last_time_dim2_, cache_last_time_dim3_};

    cache_last_time_ = Ort::Value::CreateTensor<float>(
        allocator_, cache_last_time_shape.data(), cache_last_time_shape.size());

    Fill<float>(&cache_last_time_, 0);

    int64_t shape = 1;
    cache_last_channel_len_ =
        Ort::Value::CreateTensor<int64_t>(allocator_, &shape, 1);

    cache_last_channel_len_.GetTensorMutableData<int64_t>()[0] = 0;
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    if (model_data) {
      decoder_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!decoder_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize decoder session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                  &decoder_input_names_ptr_);

    GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                   &decoder_output_names_ptr_);

    InitDecoderStates();
  }

  void InitDecoderStates() {
    int32_t batch_size = 1;
    std::array<int64_t, 3> s0_shape{pred_rnn_layers_, batch_size, pred_hidden_};
    lstm0_ = Ort::Value::CreateTensor<float>(allocator_, s0_shape.data(),
                                             s0_shape.size());

    Fill<float>(&lstm0_, 0);

    std::array<int64_t, 3> s1_shape{pred_rnn_layers_, batch_size, pred_hidden_};

    lstm1_ = Ort::Value::CreateTensor<float>(allocator_, s1_shape.data(),
                                             s1_shape.size());

    Fill<float>(&lstm1_, 0);
  }

  void InitJoiner(void *model_data, size_t model_data_length) {
    if (model_data) {
      joiner_sess_ = std::make_unique<Ort::Session>(
          env_, model_data, model_data_length, sess_opts_);
    } else if (!joiner_sess_) {
      SHERPA_ONNX_LOGE(
          "Please pass buffer data or initialize joiner session outside of "
          "this function");
      SHERPA_ONNX_EXIT(-1);
    }

    GetInputNames(joiner_sess_.get(), &joiner_input_names_,
                  &joiner_input_names_ptr_);

    GetOutputNames(joiner_sess_.get(), &joiner_output_names_,
                   &joiner_output_names_ptr_);
  }

 private:
  OnlineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;
  std::unique_ptr<Ort::Session> joiner_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<std::string> joiner_input_names_;
  std::vector<const char *> joiner_input_names_ptr_;

  std::vector<std::string> joiner_output_names_;
  std::vector<const char *> joiner_output_names_ptr_;

  int32_t window_size_ = 0;
  int32_t chunk_shift_ = 0;
  int32_t vocab_size_ = 0;
  int32_t subsampling_factor_ = 8;
  int32_t feat_dim_ = 80;
  std::string normalize_type_;
  int32_t pred_rnn_layers_ = -1;
  int32_t pred_hidden_ = -1;

  // encoder states
  int32_t cache_last_channel_dim1_ = 0;
  int32_t cache_last_channel_dim2_ = 0;
  int32_t cache_last_channel_dim3_ = 0;
  int32_t cache_last_time_dim1_ = 0;
  int32_t cache_last_time_dim2_ = 0;
  int32_t cache_last_time_dim3_ = 0;

  // init encoder states
  Ort::Value cache_last_channel_{nullptr};
  Ort::Value cache_last_time_{nullptr};
  Ort::Value cache_last_channel_len_{nullptr};

  // init decoder states
  Ort::Value lstm0_{nullptr};
  Ort::Value lstm1_{nullptr};
};

OnlineTransducerNeMoModel::OnlineTransducerNeMoModel(
    const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineTransducerNeMoModel::OnlineTransducerNeMoModel(
    Manager *mgr, const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OnlineTransducerNeMoModel::~OnlineTransducerNeMoModel() = default;

std::vector<Ort::Value> OnlineTransducerNeMoModel::RunEncoder(
    Ort::Value features, std::vector<Ort::Value> states) const {
  return impl_->RunEncoder(std::move(features), std::move(states));
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OnlineTransducerNeMoModel::RunDecoder(Ort::Value targets,
                                      std::vector<Ort::Value> states) const {
  return impl_->RunDecoder(std::move(targets), std::move(states));
}

std::vector<Ort::Value> OnlineTransducerNeMoModel::GetDecoderInitStates()
    const {
  return impl_->GetDecoderInitStates();
}

Ort::Value OnlineTransducerNeMoModel::RunJoiner(Ort::Value encoder_out,
                                                Ort::Value decoder_out) const {
  return impl_->RunJoiner(std::move(encoder_out), std::move(decoder_out));
}

int32_t OnlineTransducerNeMoModel::ChunkSize() const {
  return impl_->ChunkSize();
}

int32_t OnlineTransducerNeMoModel::ChunkShift() const {
  return impl_->ChunkShift();
}

int32_t OnlineTransducerNeMoModel::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

int32_t OnlineTransducerNeMoModel::VocabSize() const {
  return impl_->VocabSize();
}

int32_t OnlineTransducerNeMoModel::FeatureDim() const {
  return impl_->FeatureDim();
}

OrtAllocator *OnlineTransducerNeMoModel::Allocator() const {
  return impl_->Allocator();
}

std::string OnlineTransducerNeMoModel::FeatureNormalizationMethod() const {
  return impl_->FeatureNormalizationMethod();
}

std::vector<Ort::Value> OnlineTransducerNeMoModel::GetEncoderInitStates()
    const {
  return impl_->GetEncoderInitStates();
}

std::vector<Ort::Value> OnlineTransducerNeMoModel::StackStates(
    std::vector<std::vector<Ort::Value>> states) const {
  return impl_->StackStates(std::move(states));
}

std::vector<std::vector<Ort::Value>> OnlineTransducerNeMoModel::UnStackStates(
    std::vector<Ort::Value> states) const {
  return impl_->UnStackStates(std::move(states));
}

#if __ANDROID_API__ >= 9
template OnlineTransducerNeMoModel::OnlineTransducerNeMoModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineTransducerNeMoModel::OnlineTransducerNeMoModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-transducer-nemo-model.h
================================================
// sherpa-onnx/csrc/online-transducer-nemo-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
// Copyright (c)  2024  Sangeet Sagar

#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_NEMO_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_NEMO_MODEL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

// see
// https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py#L40
// Its decoder is stateful, not stateless.
class OnlineTransducerNeMoModel {
 public:
  explicit OnlineTransducerNeMoModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineTransducerNeMoModel(Manager *mgr, const OnlineModelConfig &config);

  ~OnlineTransducerNeMoModel();
  // A list of 3 tensors:
  //  - cache_last_channel
  //  - cache_last_time
  //  - cache_last_channel_len
  std::vector<Ort::Value> GetEncoderInitStates() const;

  // stack encoder states
  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) const;

  // unstack encoder states
  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) const;

  /** Run the encoder.
   *
   * @param features  A tensor of shape (N, T, C). It is changed in-place.
   * @param states  It is from GetEncoderInitStates() or returned from this
   *                method.
   *
   * @return Return a tuple containing:
   *           - ans[0]: encoder_out, a tensor of shape (N, encoder_out_dim, T')
   *           - ans[1:]: contains next states
   */
  std::vector<Ort::Value> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states) const;  // NOLINT

  /** Run the decoder network.
   *
   * @param targets A int32 tensor of shape (batch_size, 1)
   * @param states The states for the decoder model.
   * @return Return a vector:
   *           - ans[0] is the decoder_out (a float tensor)
   *           - ans[1:] is the next states
   */
  std::pair<Ort::Value, std::vector<Ort::Value>> RunDecoder(
      Ort::Value targets, std::vector<Ort::Value> states) const;

  std::vector<Ort::Value> GetDecoderInitStates() const;

  /** Run the joint network.
   *
   * @param encoder_out Output of the encoder network.
   * @param decoder_out Output of the decoder network.
   * @return Return a tensor of shape (N, 1, 1, vocab_size) containing logits.
   */
  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) const;

  /** We send this number of feature frames to the encoder at a time. */
  int32_t ChunkSize() const;

  /** Number of input frames to discard after each call to RunEncoder.
   *
   * For instance, if we have 30 frames, chunk_size=8, chunk_shift=6.
   *
   * In the first call of RunEncoder, we use frames 0~7 since chunk_size is 8.
   * Then we discard frame 0~5 since chunk_shift is 6.
   * In the second call of RunEncoder, we use frames 6~13; and then we discard
   * frames 6~11.
   * In the third call of RunEncoder, we use frames 12~19; and then we discard
   * frames 12~16.
   *
   * Note: ChunkSize() - ChunkShift() == right context size
   */
  int32_t ChunkShift() const;

  /** Return the subsampling factor of the model.
   */
  int32_t SubsamplingFactor() const;

  int32_t VocabSize() const;

  int32_t FeatureDim() const;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const;

  // Possible values:
  // - per_feature
  // - all_features (not implemented yet)
  // - fixed_mean (not implemented)
  // - fixed_std (not implemented)
  // - or just leave it to empty
  // See
  // https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/parts/preprocessing/features.py#L59
  // for details
  std::string FeatureNormalizationMethod() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_NEMO_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-websocket-client.cc
================================================
// sherpa/cpp_api/websocket/online-websocket-client.cc
//
// Copyright (c)  2022  Xiaomi Corporation
#include <chrono>  // NOLINT
#include <fstream>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "websocketpp/client.hpp"
#include "websocketpp/config/asio_no_tls_client.hpp"
#include "websocketpp/uri.hpp"

using client = websocketpp::client<websocketpp::config::asio_client>;

using message_ptr = client::message_ptr;
using websocketpp::connection_hdl;

static constexpr const char *kUsageMessage = R"(
Automatic speech recognition with sherpa-onnx using websocket.

Usage:

./bin/sherpa-onnx-online-websocket-client --help

./bin/sherpa-onnx-online-websocket-client \
  --server-ip=127.0.0.1 \
  --server-port=6006 \
  --samples-per-message=8000 \
  --seconds-per-message=0.2 \
  /path/to/foo.wav

It support only wave of with a single channel, 16kHz, 16-bit samples.
)";

class Client {
 public:
  Client(asio::io_context &io,  // NOLINT
         const std::string &ip, int16_t port, const std::vector<float> &samples,
         int32_t samples_per_message, float seconds_per_message)
      : io_(io),
        uri_(/*secure*/ false, ip, port, /*resource*/ "/"),
        samples_(samples),
        samples_per_message_(samples_per_message),
        seconds_per_message_(seconds_per_message) {
    c_.clear_access_channels(websocketpp::log::alevel::all);
    // c_.set_access_channels(websocketpp::log::alevel::connect);
    // c_.set_access_channels(websocketpp::log::alevel::disconnect);

    c_.init_asio(&io_);
    c_.set_open_handler([this](connection_hdl hdl) { OnOpen(hdl); });
    c_.set_close_handler(
        [](connection_hdl /*hdl*/) { SHERPA_ONNX_LOGE("Disconnected"); });
    c_.set_message_handler(
        [this](connection_hdl hdl, message_ptr msg) { OnMessage(hdl, msg); });

    Run();
  }

 private:
  void Run() {
    websocketpp::lib::error_code ec;
    client::connection_ptr con = c_.get_connection(uri_.str(), ec);
    if (ec) {
      SHERPA_ONNX_LOGE("Could not create connection to %s because %s",
                       uri_.str().c_str(), ec.message().c_str());
      exit(EXIT_FAILURE);
    }

    c_.connect(con);
  }

  void OnOpen(connection_hdl hdl) {
    auto start_time = std::chrono::steady_clock::now();
    asio::post(
        io_, [this, hdl, start_time]() { this->SendMessage(hdl, start_time); });
  }

  void OnMessage(connection_hdl hdl, message_ptr msg) {
    const std::string &payload = msg->get_payload();

    if (payload == "Done!") {
      websocketpp::lib::error_code ec;
      c_.close(hdl, websocketpp::close::status::normal, "I'm exiting now", ec);
      if (ec) {
        SHERPA_ONNX_LOGE("Failed to close because %s", ec.message().c_str());
        exit(EXIT_FAILURE);
      }
    } else {
      SHERPA_ONNX_LOGE("%s", payload.c_str());
    }
  }

  void SendMessage(
      connection_hdl hdl,
      std::chrono::time_point<std::chrono::steady_clock> start_time) {
    int32_t num_samples = samples_.size();
    int32_t num_messages = num_samples / samples_per_message_;

    websocketpp::lib::error_code ec;
    auto time = std::chrono::steady_clock::now();
    int elapsed_time_ms =
        std::chrono::duration_cast<std::chrono::milliseconds>(time - start_time)
            .count();

    if (elapsed_time_ms <
        static_cast<int>(seconds_per_message_ * num_sent_messages_ * 1000)) {
      std::this_thread::sleep_for(std::chrono::milliseconds(int(
          seconds_per_message_ * num_sent_messages_ * 1000 - elapsed_time_ms)));
    }

    if (num_sent_messages_ < 1) {
      SHERPA_ONNX_LOGE("Starting to send audio");
    }

    if (num_sent_messages_ < num_messages) {
      c_.send(hdl, samples_.data() + num_sent_messages_ * samples_per_message_,
              samples_per_message_ * sizeof(float),
              websocketpp::frame::opcode::binary, ec);

      if (ec) {
        SHERPA_ONNX_LOGE("Failed to send audio samples because %s",
                         ec.message().c_str());
        exit(EXIT_FAILURE);
      }

      ec.clear();

      ++num_sent_messages_;
    }

    if (num_sent_messages_ == num_messages) {
      int32_t remaining_samples = num_samples % samples_per_message_;
      if (remaining_samples) {
        c_.send(hdl,
                samples_.data() + num_sent_messages_ * samples_per_message_,
                remaining_samples * sizeof(float),
                websocketpp::frame::opcode::binary, ec);

        if (ec) {
          SHERPA_ONNX_LOGE("Failed to send audio samples because %s",
                           ec.message().c_str());
          exit(EXIT_FAILURE);
        }
        ec.clear();
      }

      // To signal that we have send all the messages
      c_.send(hdl, "Done", websocketpp::frame::opcode::text, ec);
      SHERPA_ONNX_LOGE("Sent Done Signal");

      if (ec) {
        SHERPA_ONNX_LOGE("Failed to send audio samples because %s",
                         ec.message().c_str());
        exit(EXIT_FAILURE);
      }
    } else {
      asio::post(io_, [this, hdl, start_time]() {
        this->SendMessage(hdl, start_time);
      });
    }
  }

 private:
  client c_;
  asio::io_context &io_;
  websocketpp::uri uri_;
  std::vector<float> samples_;
  int32_t samples_per_message_ = 8000;  // 0.5 seconds
  float seconds_per_message_ = 0.2;
  int32_t num_sent_messages_ = 0;
};

int32_t main(int32_t argc, char *argv[]) {
  std::string server_ip = "127.0.0.1";
  int32_t server_port = 6006;

  // Sample rate of the input wave. No resampling is made.
  int32_t sample_rate = 16000;
  int32_t samples_per_message = 8000;
  float seconds_per_message = 0.2;

  sherpa_onnx::ParseOptions po(kUsageMessage);

  po.Register("server-ip", &server_ip, "IP address of the websocket server");
  po.Register("server-port", &server_port, "Port of the websocket server");
  po.Register("sample-rate", &sample_rate,
              "Sample rate of the input wave. Should be the one expected by "
              "the server");

  po.Register("samples-per-message", &samples_per_message,
              "Send this number of samples per message.");

  po.Register("seconds-per-message", &seconds_per_message,
              "We will simulate that each message takes this number of seconds "
              "to send. If you select a very large value, it will take a long "
              "time to send all the samples");

  po.Read(argc, argv);

  if (!websocketpp::uri_helper::ipv4_literal(server_ip.begin(),
                                             server_ip.end())) {
    SHERPA_ONNX_LOGE("Invalid server IP: %s", server_ip.c_str());
    return -1;
  }

  if (server_port <= 0 || server_port > 65535) {
    SHERPA_ONNX_LOGE("Invalid server port: %d", server_port);
    return -1;
  }

  // 0.01 is an arbitrary value. You can change it.
  if (samples_per_message <= 0.01 * sample_rate) {
    SHERPA_ONNX_LOGE("--samples-per-message is too small: %d",
                     samples_per_message);
    return -1;
  }

  // 100 is an arbitrary value. You can change it.
  if (samples_per_message >= sample_rate * 100) {
    SHERPA_ONNX_LOGE("--samples-per-message is too small: %d",
                     samples_per_message);
    return -1;
  }

  if (seconds_per_message < 0) {
    SHERPA_ONNX_LOGE("--seconds-per-message is too small: %.3f",
                     seconds_per_message);
    return -1;
  }

  // 1 is an arbitrary value.
  if (seconds_per_message > 1) {
    SHERPA_ONNX_LOGE(
        "--seconds-per-message is too large: %.3f. You will wait a long time "
        "to "
        "send all the samples",
        seconds_per_message);
    return -1;
  }

  if (po.NumArgs() != 1) {
    po.PrintUsage();
    return -1;
  }

  std::string wave_filename = po.GetArg(1);

  bool is_ok = false;
  int32_t actual_sample_rate = -1;
  std::vector<float> samples =
      sherpa_onnx::ReadWave(wave_filename, &actual_sample_rate, &is_ok);

  if (!is_ok) {
    SHERPA_ONNX_LOGE("Failed to read '%s'", wave_filename.c_str());
    return -1;
  }

  if (actual_sample_rate != sample_rate) {
    SHERPA_ONNX_LOGE("Expected sample rate: %d, given %d", sample_rate,
                     actual_sample_rate);
    return -1;
  }

  asio::io_context io_conn;  // for network connections
  Client c(io_conn, server_ip, server_port, samples, samples_per_message,
           seconds_per_message);

  io_conn.run();  // will exit when the above connection is closed

  SHERPA_ONNX_LOGE("Done!");
  return 0;
}


================================================
FILE: sherpa-onnx/csrc/online-websocket-server-impl.cc
================================================
// sherpa-onnx/csrc/online-websocket-server-impl.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-websocket-server-impl.h"

#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/log.h"

namespace sherpa_onnx {

void OnlineWebsocketDecoderConfig::Register(ParseOptions *po) {
  recognizer_config.Register(po);

  po->Register("loop-interval-ms", &loop_interval_ms,
               "It determines how often the decoder loop runs. ");

  po->Register("max-batch-size", &max_batch_size,
               "Max batch size for recognition.");

  po->Register("end-tail-padding", &end_tail_padding,
               "It determines the length of tail_padding at the end of audio.");
}

void OnlineWebsocketDecoderConfig::Validate() const {
  recognizer_config.Validate();
  SHERPA_ONNX_CHECK_GT(loop_interval_ms, 0);
  SHERPA_ONNX_CHECK_GT(max_batch_size, 0);
  SHERPA_ONNX_CHECK_GT(end_tail_padding, 0);
}

void OnlineWebsocketServerConfig::Register(sherpa_onnx::ParseOptions *po) {
  decoder_config.Register(po);

  po->Register("log-file", &log_file,
               "Path to the log file. Logs are "
               "appended to this file");
}

void OnlineWebsocketServerConfig::Validate() const {
  decoder_config.Validate();
}

OnlineWebsocketDecoder::OnlineWebsocketDecoder(OnlineWebsocketServer *server)
    : server_(server),
      config_(server->GetConfig().decoder_config),
      timer_(server->GetWorkContext()) {
  recognizer_ = std::make_unique<OnlineRecognizer>(config_.recognizer_config);
}

std::shared_ptr<Connection> OnlineWebsocketDecoder::GetOrCreateConnection(
    connection_hdl hdl) {
  std::lock_guard<std::mutex> lock(mutex_);
  auto it = connections_.find(hdl);
  if (it != connections_.end()) {
    return it->second;
  } else {
    // create a new connection
    std::shared_ptr<OnlineStream> s = recognizer_->CreateStream();
    auto c = std::make_shared<Connection>(hdl, s);
    connections_.insert({hdl, c});
    return c;
  }
}

void OnlineWebsocketDecoder::AcceptWaveform(std::shared_ptr<Connection> c) {
  std::lock_guard<std::mutex> lock(c->mutex);
  float sample_rate = config_.recognizer_config.feat_config.sampling_rate;
  while (!c->samples.empty()) {
    const auto &s = c->samples.front();
    c->s->AcceptWaveform(sample_rate, s.data(), s.size());
    c->samples.pop_front();
  }
}

void OnlineWebsocketDecoder::InputFinished(std::shared_ptr<Connection> c) {
  std::lock_guard<std::mutex> lock(c->mutex);

  float sample_rate = config_.recognizer_config.feat_config.sampling_rate;

  while (!c->samples.empty()) {
    const auto &s = c->samples.front();
    c->s->AcceptWaveform(sample_rate, s.data(), s.size());
    c->samples.pop_front();
  }

  std::vector<float> tail_padding(
      static_cast<int64_t>(config_.end_tail_padding * sample_rate));

  c->s->AcceptWaveform(sample_rate, tail_padding.data(), tail_padding.size());

  c->s->InputFinished();
  c->eof = true;
}

void OnlineWebsocketDecoder::Warmup() const {
  recognizer_->WarmpUpRecognizer(config_.recognizer_config.model_config.warm_up,
                                 config_.max_batch_size);
}

void OnlineWebsocketDecoder::Run() {
  timer_.expires_after(std::chrono::milliseconds(config_.loop_interval_ms));

  timer_.async_wait(
      [this](const asio::error_code &ec) { ProcessConnections(ec); });
}

void OnlineWebsocketDecoder::ProcessConnections(const asio::error_code &ec) {
  if (ec) {
    SHERPA_ONNX_LOG(FATAL) << "The decoder loop is aborted!";
  }

  std::lock_guard<std::mutex> lock(mutex_);
  std::vector<connection_hdl> to_remove;
  for (auto &p : connections_) {
    auto hdl = p.first;
    auto c = p.second;

    // The order of `if` below matters!
    if (!server_->Contains(hdl)) {
      // If the connection is disconnected, we stop processing it
      to_remove.push_back(hdl);
      continue;
    }

    if (active_.count(hdl)) {
      // Another thread is decoding this stream, so skip it
      continue;
    }

    if (!recognizer_->IsReady(c->s.get()) && !c->eof) {
      // this stream has not enough frames to decode, so skip it
      continue;
    }

    if (!recognizer_->IsReady(c->s.get()) && c->eof) {
      // We won't receive samples from the client, so send a Done! to client

      asio::post(server_->GetWorkContext(),
                 [this, hdl = c->hdl]() { server_->Send(hdl, "Done!"); });

      to_remove.push_back(hdl);
      continue;
    }

    // TODO(fangun): If the connection is timed out, we need to also
    // add it to `to_remove`

    // this stream has enough frames and is currently not processed by any
    // threads, so put it into the ready queue
    ready_connections_.push_back(c);

    // In `Decode()`, it will remove hdl from `active_`
    active_.insert(c->hdl);
  }

  for (auto hdl : to_remove) {
    connections_.erase(hdl);
  }

  if (!ready_connections_.empty()) {
    asio::post(server_->GetWorkContext(), [this]() { Decode(); });
  }

  // Schedule another call
  timer_.expires_after(std::chrono::milliseconds(config_.loop_interval_ms));

  timer_.async_wait(
      [this](const asio::error_code &ec) { ProcessConnections(ec); });
}

void OnlineWebsocketDecoder::Decode() {
  std::unique_lock<std::mutex> lock(mutex_);
  if (ready_connections_.empty()) {
    // There are no connections that are ready for decoding,
    // so we return directly
    return;
  }

  std::vector<std::shared_ptr<Connection>> c_vec;
  std::vector<OnlineStream *> s_vec;
  while (!ready_connections_.empty() &&
         static_cast<int32_t>(s_vec.size()) < config_.max_batch_size) {
    auto c = ready_connections_.front();
    ready_connections_.pop_front();

    c_vec.push_back(c);
    s_vec.push_back(c->s.get());
  }

  if (!ready_connections_.empty()) {
    // there are too many ready connections but this thread can only handle
    // max_batch_size connections at a time, so we schedule another call
    // to Decode() and let other threads to process the ready connections
    asio::post(server_->GetWorkContext(), [this]() { Decode(); });
  }

  lock.unlock();
  recognizer_->DecodeStreams(s_vec.data(), s_vec.size());
  lock.lock();

  for (auto c : c_vec) {
    auto result = recognizer_->GetResult(c->s.get());
    if (recognizer_->IsEndpoint(c->s.get())) {
      result.is_final = true;
      recognizer_->Reset(c->s.get());
    }

    if (!recognizer_->IsReady(c->s.get()) && c->eof) {
      result.is_final = true;
      result.is_eof = true;
    }

    asio::post(server_->GetConnectionContext(),
               [this, hdl = c->hdl, str = result.AsJsonString()]() {
                 server_->Send(hdl, str);
               });
    active_.erase(c->hdl);
  }
}

OnlineWebsocketServer::OnlineWebsocketServer(
    asio::io_context &io_conn, asio::io_context &io_work,
    const OnlineWebsocketServerConfig &config)
    : config_(config),
      io_conn_(io_conn),
      io_work_(io_work),
      log_(config.log_file, std::ios::app),
      tee_(std::cout, log_),
      decoder_(this) {
  SetupLog();

  server_.init_asio(&io_conn_);

  server_.set_open_handler([this](connection_hdl hdl) { OnOpen(hdl); });

  server_.set_close_handler([this](connection_hdl hdl) { OnClose(hdl); });

  server_.set_message_handler(
      [this](connection_hdl hdl, server::message_ptr msg) {
        OnMessage(hdl, msg);
      });
}

void OnlineWebsocketServer::Run(uint16_t port) {
  server_.set_reuse_addr(true);
  server_.listen(asio::ip::tcp::v4(), port);
  server_.start_accept();
  auto recognizer_config = config_.decoder_config.recognizer_config;
  int32_t warm_up = recognizer_config.model_config.warm_up;
  const std::string &model_type = recognizer_config.model_config.model_type;
  if (0 < warm_up && warm_up < 100) {
    if (model_type == "zipformer2") {
      decoder_.Warmup();
      SHERPA_ONNX_LOGE("Warm up completed : %d times.", warm_up);
    } else {
      SHERPA_ONNX_LOGE("Only Zipformer2 has warmup support for now.");
      SHERPA_ONNX_LOGE("Given: %s", model_type.c_str());
      exit(0);
    }
  } else if (warm_up == 0) {
    SHERPA_ONNX_LOGE("Starting without warmup!");
  } else {
    SHERPA_ONNX_LOGE("Invalid Warm up Value!. Expected 0 < warm_up < 100");
    exit(0);
  }
  decoder_.Run();
}

void OnlineWebsocketServer::SetupLog() {
  server_.clear_access_channels(websocketpp::log::alevel::all);
  // server_.set_access_channels(websocketpp::log::alevel::connect);
  // server_.set_access_channels(websocketpp::log::alevel::disconnect);

  // So that it also prints to std::cout and std::cerr
  server_.get_alog().set_ostream(&tee_);
  server_.get_elog().set_ostream(&tee_);
}

void OnlineWebsocketServer::Send(connection_hdl hdl, const std::string &text) {
  websocketpp::lib::error_code ec;
  if (!Contains(hdl)) {
    return;
  }

  server_.send(hdl, text, websocketpp::frame::opcode::text, ec);
  if (ec) {
    server_.get_alog().write(websocketpp::log::alevel::app, ec.message());
  }
}

void OnlineWebsocketServer::OnOpen(connection_hdl hdl) {
  std::lock_guard<std::mutex> lock(mutex_);
  connections_.insert(hdl);

  std::ostringstream os;
  os << "New connection: "
     << server_.get_con_from_hdl(hdl)->get_remote_endpoint() << ". "
     << "Number of active connections: " << connections_.size() << ".\n";
  SHERPA_ONNX_LOG(INFO) << os.str();
}

void OnlineWebsocketServer::OnClose(connection_hdl hdl) {
  std::lock_guard<std::mutex> lock(mutex_);
  connections_.erase(hdl);

  SHERPA_ONNX_LOG(INFO) << "Number of active connections: "
                        << connections_.size() << "\n";
}

bool OnlineWebsocketServer::Contains(connection_hdl hdl) const {
  std::lock_guard<std::mutex> lock(mutex_);
  return connections_.count(hdl);
}

void OnlineWebsocketServer::OnMessage(connection_hdl hdl,
                                      server::message_ptr msg) {
  auto c = decoder_.GetOrCreateConnection(hdl);

  const std::string &payload = msg->get_payload();

  switch (msg->get_opcode()) {
    case websocketpp::frame::opcode::text:
      if (payload == "Done") {
        asio::post(io_work_, [this, c]() { decoder_.InputFinished(c); });
      }
      break;
    case websocketpp::frame::opcode::binary: {
      auto p = reinterpret_cast<const float *>(payload.data());
      int32_t num_samples = payload.size() / sizeof(float);
      std::vector<float> samples(p, p + num_samples);

      {
        std::lock_guard<std::mutex> lock(c->mutex);
        c->samples.push_back(std::move(samples));
      }

      asio::post(io_work_, [this, c]() { decoder_.AcceptWaveform(c); });
      break;
    }
    default:
      break;
  }
}

void OnlineWebsocketServer::Close(connection_hdl hdl,
                                  websocketpp::close::status::value code,
                                  const std::string &reason) {
  auto con = server_.get_con_from_hdl(hdl);

  std::ostringstream os;
  os << "Closing " << con->get_remote_endpoint() << " with reason: " << reason
     << "\n";

  websocketpp::lib::error_code ec;
  server_.close(hdl, code, reason, ec);
  if (ec) {
    os << "Failed to close" << con->get_remote_endpoint() << ". "
       << ec.message() << "\n";
  }
  server_.get_alog().write(websocketpp::log::alevel::app, os.str());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-websocket-server-impl.h
================================================
// sherpa-onnx/csrc/online-websocket-server-impl.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_WEBSOCKET_SERVER_IMPL_H_
#define SHERPA_ONNX_CSRC_ONLINE_WEBSOCKET_SERVER_IMPL_H_

#include <deque>
#include <fstream>
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>

#include "asio.hpp"  // NOLINT
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/tee-stream.h"
#include "websocketpp/config/asio_no_tls.hpp"  // TODO(fangjun): support TLS
#include "websocketpp/server.hpp"
using server = websocketpp::server<websocketpp::config::asio>;
using connection_hdl = websocketpp::connection_hdl;

namespace sherpa_onnx {

struct Connection {
  // handle to the connection. We can use it to send messages to the client
  connection_hdl hdl;
  std::shared_ptr<OnlineStream> s;

  // set it to true when InputFinished() is called
  bool eof = false;

  // The last time we received a message from the client
  // TODO(fangjun): Use it to disconnect from a client if it is inactive
  // for a specified time.
  std::chrono::steady_clock::time_point last_active;

  std::mutex mutex;  // protect samples

  // Audio samples received from the client.
  //
  // The I/O threads receive audio samples into this queue
  // and invoke work threads to compute features
  std::deque<std::vector<float>> samples;

  Connection() = default;
  Connection(connection_hdl hdl, std::shared_ptr<OnlineStream> s)
      : hdl(hdl), s(s), last_active(std::chrono::steady_clock::now()) {}
};

struct OnlineWebsocketDecoderConfig {
  OnlineRecognizerConfig recognizer_config;

  // It determines how often the decoder loop runs.
  int32_t loop_interval_ms = 10;

  int32_t max_batch_size = 5;

  float end_tail_padding = 0.8;

  void Register(ParseOptions *po);
  void Validate() const;
};

class OnlineWebsocketServer;

class OnlineWebsocketDecoder {
 public:
  /**
   * @param server  Not owned.
   */
  explicit OnlineWebsocketDecoder(OnlineWebsocketServer *server);

  std::shared_ptr<Connection> GetOrCreateConnection(connection_hdl hdl);

  // Compute features for a stream given audio samples
  void AcceptWaveform(std::shared_ptr<Connection> c);

  // signal that there will be no more audio samples for a stream
  void InputFinished(std::shared_ptr<Connection> c);

  void Warmup() const;

  void Run();

 private:
  void ProcessConnections(const asio::error_code &ec);

  /** It is called by one of the worker thread.
   */
  void Decode();

 private:
  OnlineWebsocketServer *server_;  // not owned
  std::unique_ptr<OnlineRecognizer> recognizer_;
  OnlineWebsocketDecoderConfig config_;
  asio::steady_timer timer_;

  // It protects `connections_`, `ready_connections_`, and `active_`
  std::mutex mutex_;

  std::map<connection_hdl, std::shared_ptr<Connection>,
           std::owner_less<connection_hdl>>
      connections_;

  // Whenever a connection has enough feature frames for decoding, we put
  // it in this queue
  std::deque<std::shared_ptr<Connection>> ready_connections_;

  // If we are decoding a stream, we put it in the active_ set so that
  // only one thread can decode a stream at a time.
  std::set<connection_hdl, std::owner_less<connection_hdl>> active_;
};

struct OnlineWebsocketServerConfig {
  OnlineWebsocketDecoderConfig decoder_config;

  std::string log_file = "./log.txt";

  void Register(sherpa_onnx::ParseOptions *po);
  void Validate() const;
};

class OnlineWebsocketServer {
 public:
  explicit OnlineWebsocketServer(asio::io_context &io_conn,  // NOLINT
                                 asio::io_context &io_work,  // NOLINT
                                 const OnlineWebsocketServerConfig &config);

  void Run(uint16_t port);

  const OnlineWebsocketServerConfig &GetConfig() const { return config_; }
  asio::io_context &GetConnectionContext() { return io_conn_; }
  asio::io_context &GetWorkContext() { return io_work_; }
  server &GetServer() { return server_; }

  void Send(connection_hdl hdl, const std::string &text);

  bool Contains(connection_hdl hdl) const;

 private:
  void SetupLog();

  // When a websocket client is connected, it will invoke this method
  // (Not for HTTP)
  void OnOpen(connection_hdl hdl);

  // When a websocket client is disconnected, it will invoke this method
  void OnClose(connection_hdl hdl);

  void OnMessage(connection_hdl hdl, server::message_ptr msg);

  // Close a websocket connection with given code and reason
  void Close(connection_hdl hdl, websocketpp::close::status::value code,
             const std::string &reason);

 private:
  OnlineWebsocketServerConfig config_;
  asio::io_context &io_conn_;
  asio::io_context &io_work_;
  server server_;

  std::ofstream log_;
  sherpa_onnx::TeeStream tee_;

  OnlineWebsocketDecoder decoder_;

  mutable std::mutex mutex_;

  std::set<connection_hdl, std::owner_less<connection_hdl>> connections_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_WEBSOCKET_SERVER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/online-websocket-server.cc
================================================
// sherpa-onnx/csrc/online-websocket-server.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include <vector>

#include "asio.hpp"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-websocket-server-impl.h"
#include "sherpa-onnx/csrc/parse-options.h"

static constexpr const char *kUsageMessage = R"(
Automatic speech recognition with sherpa-onnx using websocket.

Usage:

./bin/sherpa-onnx-online-websocket-server --help

./bin/sherpa-onnx-online-websocket-server \
  --port=6006 \
  --num-work-threads=5 \
  --tokens=/path/to/tokens.txt \
  --encoder=/path/to/encoder.onnx \
  --decoder=/path/to/decoder.onnx \
  --joiner=/path/to/joiner.onnx \
  --log-file=./log.txt \
  --max-batch-size=5 \
  --loop-interval-ms=10

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)";

int32_t main(int32_t argc, char *argv[]) {
  sherpa_onnx::ParseOptions po(kUsageMessage);

  sherpa_onnx::OnlineWebsocketServerConfig config;

  // the server will listen on this port
  int32_t port = 6006;

  // size of the thread pool for handling network connections
  int32_t num_io_threads = 1;

  // size of the thread pool for neural network computation and decoding
  int32_t num_work_threads = 3;

  po.Register("num-io-threads", &num_io_threads,
              "Thread pool size for network connections.");

  po.Register("num-work-threads", &num_work_threads,
              "Thread pool size for for neural network "
              "computation and decoding.");

  po.Register("port", &port, "The port on which the server will listen.");

  config.Register(&po);

  if (argc == 1) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  po.Read(argc, argv);

  if (po.NumArgs() != 0) {
    SHERPA_ONNX_LOGE("Unrecognized positional arguments!");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  config.Validate();

  asio::io_context io_conn;  // for network connections
  asio::io_context io_work;  // for neural network and decoding

  sherpa_onnx::OnlineWebsocketServer server(io_conn, io_work, config);
  server.Run(port);

  SHERPA_ONNX_LOGE("Started!");
  SHERPA_ONNX_LOGE("Listening on: %d", port);
  SHERPA_ONNX_LOGE("Number of work threads: %d", num_work_threads);

  // give some work to do for the io_work pool
  auto work_guard = asio::make_work_guard(io_work);

  std::vector<std::thread> io_threads;

  // decrement since the main thread is also used for network communications
  for (int32_t i = 0; i < num_io_threads - 1; ++i) {
    io_threads.emplace_back([&io_conn]() { io_conn.run(); });
  }

  std::vector<std::thread> work_threads;
  for (int32_t i = 0; i < num_work_threads; ++i) {
    work_threads.emplace_back([&io_work]() { io_work.run(); });
  }

  io_conn.run();

  for (auto &t : io_threads) {
    t.join();
  }

  for (auto &t : work_threads) {
    t.join();
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/online-wenet-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/online-wenet-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-wenet-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlineWenetCtcModelConfig::Register(ParseOptions *po) {
  po->Register("wenet-ctc-model", &model,
               "Path to CTC model.onnx from WeNet. Please see "
               "https://github.com/k2-fsa/sherpa-onnx/pull/425");
  po->Register("wenet-ctc-chunk-size", &chunk_size,
               "Chunk size after subsampling used for decoding.");
  po->Register("wenet-ctc-num-left-chunks", &num_left_chunks,
               "Number of left chunks after subsampling used for decoding.");
}

bool OnlineWenetCtcModelConfig::Validate() const {
  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("WeNet CTC model '%s' does not exist", model.c_str());
    return false;
  }

  if (chunk_size <= 0) {
    SHERPA_ONNX_LOGE(
        "Please specify a positive value for --wenet-ctc-chunk-size. Currently "
        "given: %d",
        chunk_size);
    return false;
  }

  if (num_left_chunks <= 0) {
    SHERPA_ONNX_LOGE(
        "Please specify a positive value for --wenet-ctc-num-left-chunks. "
        "Currently given: %d. Note that if you want to use -1, please consider "
        "using a non-streaming model.",
        num_left_chunks);
    return false;
  }

  return true;
}

std::string OnlineWenetCtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineWenetCtcModelConfig(";
  os << "model=\"" << model << "\", ";
  os << "chunk_size=" << chunk_size << ", ";
  os << "num_left_chunks=" << num_left_chunks << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-wenet-ctc-model-config.h
================================================
// sherpa-onnx/csrc/online-wenet-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_WENET_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_WENET_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineWenetCtcModelConfig {
  std::string model;

  // --chunk_size from wenet
  int32_t chunk_size = 16;

  // --num_left_chunks from wenet
  int32_t num_left_chunks = 4;

  OnlineWenetCtcModelConfig() = default;

  OnlineWenetCtcModelConfig(const std::string &model, int32_t chunk_size,
                            int32_t num_left_chunks)
      : model(model),
        chunk_size(chunk_size),
        num_left_chunks(num_left_chunks) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_WENET_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-wenet-ctc-model.cc
================================================
// sherpa-onnx/csrc/online-wenet-ctc-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-wenet-ctc-model.h"

#include <algorithm>
#include <cmath>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OnlineWenetCtcModel::Impl {
 public:
  explicit Impl(const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.wenet_ctc.model);
      Init(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.wenet_ctc.model);
      Init(buf.data(), buf.size());
    }
  }

  std::vector<Ort::Value> Forward(Ort::Value x,
                                  std::vector<Ort::Value> states) {
    Ort::Value &attn_cache = states[0];
    Ort::Value &conv_cache = states[1];
    Ort::Value &offset = states[2];

    int32_t chunk_size = config_.wenet_ctc.chunk_size;
    int32_t left_chunks = config_.wenet_ctc.num_left_chunks;
    // build attn_mask
    std::array<int64_t, 3> attn_mask_shape{1, 1,
                                           required_cache_size_ + chunk_size};
    Ort::Value attn_mask = Ort::Value::CreateTensor<bool>(
        allocator_, attn_mask_shape.data(), attn_mask_shape.size());
    bool *p = attn_mask.GetTensorMutableData<bool>();
    int32_t chunk_idx =
        offset.GetTensorData<int64_t>()[0] / chunk_size - left_chunks;
    if (chunk_idx < left_chunks) {
      std::fill(p, p + required_cache_size_ - chunk_idx * chunk_size, 0);
      std::fill(p + required_cache_size_ - chunk_idx * chunk_size,
                p + attn_mask_shape[2], 1);
    } else {
      std::fill(p, p + attn_mask_shape[2], 1);
    }

    std::array<Ort::Value, 6> inputs = {std::move(x),
                                        View(&offset),
                                        View(&required_cache_size_tensor_),
                                        std::move(attn_cache),
                                        std::move(conv_cache),
                                        std::move(attn_mask)};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    offset.GetTensorMutableData<int64_t>()[0] +=
        out[0].GetTensorTypeAndShapeInfo().GetShape()[1];
    out.push_back(std::move(offset));

    return out;
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t ChunkLength() const {
    // When chunk_size is 16, subsampling_factor_ is 4, right_context_ is 6,
    // the returned value is (16 - 1)*4 + 6 + 1 = 67
    return (config_.wenet_ctc.chunk_size - 1) * subsampling_factor_ +
           right_context_ + 1;
  }

  int32_t ChunkShift() const {
    return config_.wenet_ctc.chunk_size * subsampling_factor_;
  }

  OrtAllocator *Allocator() { return allocator_; }

  // Return a vector containing 3 tensors
  // - attn_cache
  // - conv_cache
  // - offset
  std::vector<Ort::Value> GetInitStates() {
    std::vector<Ort::Value> ans;
    ans.reserve(3);
    ans.push_back(View(&attn_cache_));
    ans.push_back(View(&conv_cache_));

    int64_t offset_shape = 1;

    Ort::Value offset =
        Ort::Value::CreateTensor<int64_t>(allocator_, &offset_shape, 1);

    offset.GetTensorMutableData<int64_t>()[0] = required_cache_size_;

    ans.push_back(std::move(offset));

    return ans;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(head_, "head");
    SHERPA_ONNX_READ_META_DATA(num_blocks_, "num_blocks");
    SHERPA_ONNX_READ_META_DATA(output_size_, "output_size");
    SHERPA_ONNX_READ_META_DATA(cnn_module_kernel_, "cnn_module_kernel");
    SHERPA_ONNX_READ_META_DATA(right_context_, "right_context");
    SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
    SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");

    required_cache_size_ =
        config_.wenet_ctc.chunk_size * config_.wenet_ctc.num_left_chunks;

    InitStates();
  }

  void InitStates() {
    std::array<int64_t, 4> attn_cache_shape{
        num_blocks_, head_, required_cache_size_, output_size_ / head_ * 2};
    attn_cache_ = Ort::Value::CreateTensor<float>(
        allocator_, attn_cache_shape.data(), attn_cache_shape.size());

    Fill<float>(&attn_cache_, 0);

    std::array<int64_t, 4> conv_cache_shape{num_blocks_, 1, output_size_,
                                            cnn_module_kernel_ - 1};
    conv_cache_ = Ort::Value::CreateTensor<float>(
        allocator_, conv_cache_shape.data(), conv_cache_shape.size());

    Fill<float>(&conv_cache_, 0);

    int64_t shape = 1;
    required_cache_size_tensor_ =
        Ort::Value::CreateTensor<int64_t>(allocator_, &shape, 1);

    required_cache_size_tensor_.GetTensorMutableData<int64_t>()[0] =
        required_cache_size_;
  }

 private:
  OnlineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  int32_t head_ = 0;
  int32_t num_blocks_ = 0;
  int32_t output_size_ = 0;
  int32_t cnn_module_kernel_ = 0;
  int32_t right_context_ = 0;
  int32_t subsampling_factor_ = 0;
  int32_t vocab_size_ = 0;

  int32_t required_cache_size_ = 0;

  Ort::Value attn_cache_{nullptr};
  Ort::Value conv_cache_{nullptr};
  Ort::Value required_cache_size_tensor_{nullptr};
};

OnlineWenetCtcModel::OnlineWenetCtcModel(const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineWenetCtcModel::OnlineWenetCtcModel(Manager *mgr,
                                         const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OnlineWenetCtcModel::~OnlineWenetCtcModel() = default;

std::vector<Ort::Value> OnlineWenetCtcModel::Forward(
    Ort::Value x, std::vector<Ort::Value> states) const {
  return impl_->Forward(std::move(x), std::move(states));
}

int32_t OnlineWenetCtcModel::VocabSize() const { return impl_->VocabSize(); }

int32_t OnlineWenetCtcModel::ChunkLength() const {
  return impl_->ChunkLength();
}

int32_t OnlineWenetCtcModel::ChunkShift() const { return impl_->ChunkShift(); }

OrtAllocator *OnlineWenetCtcModel::Allocator() const {
  return impl_->Allocator();
}

std::vector<Ort::Value> OnlineWenetCtcModel::GetInitStates() const {
  return impl_->GetInitStates();
}

std::vector<Ort::Value> OnlineWenetCtcModel::StackStates(
    std::vector<std::vector<Ort::Value>> states) const {
  if (states.size() != 1) {
    SHERPA_ONNX_LOGE("wenet CTC model supports only batch_size==1. Given: %d",
                     static_cast<int32_t>(states.size()));
    SHERPA_ONNX_EXIT(-1);
  }

  return std::move(states[0]);
}

std::vector<std::vector<Ort::Value>> OnlineWenetCtcModel::UnStackStates(
    std::vector<Ort::Value> states) const {
  std::vector<std::vector<Ort::Value>> ans(1);
  ans[0] = std::move(states);
  return ans;
}

#if __ANDROID_API__ >= 9
template OnlineWenetCtcModel::OnlineWenetCtcModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineWenetCtcModel::OnlineWenetCtcModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-wenet-ctc-model.h
================================================
// sherpa-onnx/csrc/online-wenet-ctc-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_WENET_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_WENET_CTC_MODEL_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-ctc-model.h"
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

class OnlineWenetCtcModel : public OnlineCtcModel {
 public:
  explicit OnlineWenetCtcModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineWenetCtcModel(Manager *mgr, const OnlineModelConfig &config);

  ~OnlineWenetCtcModel() override;

  // A list of 3 tensors:
  //  - attn_cache
  //  - conv_cache
  //  - offset
  std::vector<Ort::Value> GetInitStates() const override;

  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) const override;

  /**
   *
   * @param x A 3-D tensor of shape (N, T, C). N has to be 1.
   * @param states  It is from GetInitStates() or returned from this method.
   *
   * @return Return a list of tensors
   *    - ans[0] contains log_probs, of shape (N, T, C)
   *    - ans[1:] contains next_states
   */
  std::vector<Ort::Value> Forward(
      Ort::Value x, std::vector<Ort::Value> states) const override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  // The model accepts this number of frames before subsampling as input
  int32_t ChunkLength() const override;

  // Similar to frame_shift in feature extractor, after processing
  // ChunkLength() frames, we advance by ChunkShift() frames
  // before we process the next chunk.
  int32_t ChunkShift() const override;

  bool SupportBatchProcessing() const override { return false; }

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_WENET_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-zipformer-transducer-model.cc
================================================
// sherpa-onnx/csrc/online-zipformer-transducer-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-zipformer-transducer-model.h"

#include <algorithm>
#include <cassert>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
    const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      config_(config),
      sess_opts_(GetSessionOptions(config)),
      allocator_{} {
  {
    auto buf = ReadFile(config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

template <typename Manager>
OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
    Manager *mgr, const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      config_(config),
      sess_opts_(GetSessionOptions(config)),
      allocator_{} {
  {
    auto buf = ReadFile(mgr, config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

void OnlineZipformerTransducerModel::InitEncoder(void *model_data,
                                                 size_t model_data_length) {
  encoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                 model_data_length, sess_opts_);

  GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                &encoder_input_names_ptr_);

  GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                 &encoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---encoder---\n";
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA_VEC(encoder_dims_, "encoder_dims");
  SHERPA_ONNX_READ_META_DATA_VEC(attention_dims_, "attention_dims");
  SHERPA_ONNX_READ_META_DATA_VEC(num_encoder_layers_, "num_encoder_layers");
  SHERPA_ONNX_READ_META_DATA_VEC(cnn_module_kernels_, "cnn_module_kernels");
  SHERPA_ONNX_READ_META_DATA_VEC(left_context_len_, "left_context_len");

  SHERPA_ONNX_READ_META_DATA(T_, "T");
  SHERPA_ONNX_READ_META_DATA(decode_chunk_len_, "decode_chunk_len");

  if (config_.debug) {
    auto print = [](const std::vector<int32_t> &v, const char *name) {
      std::ostringstream os;
      os << name << ": ";
      for (auto i : v) {
        os << i << " ";
      }
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    };
    print(encoder_dims_, "encoder_dims");
    print(attention_dims_, "attention_dims");
    print(num_encoder_layers_, "num_encoder_layers");
    print(cnn_module_kernels_, "cnn_module_kernels");
    print(left_context_len_, "left_context_len");
#if __OHOS__
    SHERPA_ONNX_LOGE("T: %{public}d", T_);
    SHERPA_ONNX_LOGE("decode_chunk_len_: %{public}d", decode_chunk_len_);
#else
    SHERPA_ONNX_LOGE("T: %d", T_);
    SHERPA_ONNX_LOGE("decode_chunk_len_: %d", decode_chunk_len_);
#endif
  }
}

void OnlineZipformerTransducerModel::InitDecoder(void *model_data,
                                                 size_t model_data_length) {
  decoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                 model_data_length, sess_opts_);

  GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                &decoder_input_names_ptr_);

  GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                 &decoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = decoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---decoder---\n";
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
  SHERPA_ONNX_READ_META_DATA(context_size_, "context_size");
}

void OnlineZipformerTransducerModel::InitJoiner(void *model_data,
                                                size_t model_data_length) {
  joiner_sess_ = std::make_unique<Ort::Session>(env_, model_data,
                                                model_data_length, sess_opts_);

  GetInputNames(joiner_sess_.get(), &joiner_input_names_,
                &joiner_input_names_ptr_);

  GetOutputNames(joiner_sess_.get(), &joiner_output_names_,
                 &joiner_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = joiner_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---joiner---\n";
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }
}

std::vector<Ort::Value> OnlineZipformerTransducerModel::StackStates(
    const std::vector<std::vector<Ort::Value>> &states) const {
  int32_t batch_size = static_cast<int32_t>(states.size());
  int32_t num_encoders = static_cast<int32_t>(num_encoder_layers_.size());

  std::vector<const Ort::Value *> buf(batch_size);

  std::vector<Ort::Value> ans;
  ans.reserve(states[0].size());

  auto allocator =
      const_cast<OnlineZipformerTransducerModel *>(this)->allocator_;

  // cached_len
  for (int32_t i = 0; i != num_encoders; ++i) {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][i];
    }
    auto v = Cat<int64_t>(allocator, buf, 1);  // (num_layers, 1)
    ans.push_back(std::move(v));
  }

  // cached_avg
  for (int32_t i = 0; i != num_encoders; ++i) {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_encoders + i];
    }
    auto v = Cat(allocator, buf, 1);  // (num_layers, 1, encoder_dims)
    ans.push_back(std::move(v));
  }

  // cached_key
  for (int32_t i = 0; i != num_encoders; ++i) {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_encoders * 2 + i];
    }
    // (num_layers, left_context_len, 1, attention_dims)
    auto v = Cat(allocator, buf, 2);
    ans.push_back(std::move(v));
  }

  // cached_val
  for (int32_t i = 0; i != num_encoders; ++i) {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_encoders * 3 + i];
    }
    // (num_layers, left_context_len, 1, attention_dims/2)
    auto v = Cat(allocator, buf, 2);
    ans.push_back(std::move(v));
  }

  // cached_val2
  for (int32_t i = 0; i != num_encoders; ++i) {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_encoders * 4 + i];
    }
    // (num_layers, left_context_len, 1, attention_dims/2)
    auto v = Cat(allocator, buf, 2);
    ans.push_back(std::move(v));
  }

  // cached_conv1
  for (int32_t i = 0; i != num_encoders; ++i) {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_encoders * 5 + i];
    }
    // (num_layers, 1, encoder_dims, cnn_module_kernels-1)
    auto v = Cat(allocator, buf, 1);
    ans.push_back(std::move(v));
  }

  // cached_conv2
  for (int32_t i = 0; i != num_encoders; ++i) {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_encoders * 6 + i];
    }
    // (num_layers, 1, encoder_dims, cnn_module_kernels-1)
    auto v = Cat(allocator, buf, 1);
    ans.push_back(std::move(v));
  }

  return ans;
}

std::vector<std::vector<Ort::Value>>
OnlineZipformerTransducerModel::UnStackStates(
    const std::vector<Ort::Value> &states) const {
  assert(states.size() == num_encoder_layers_.size() * 7);

  int32_t batch_size = states[0].GetTensorTypeAndShapeInfo().GetShape()[1];
  int32_t num_encoders = num_encoder_layers_.size();

  auto allocator =
      const_cast<OnlineZipformerTransducerModel *>(this)->allocator_;

  std::vector<std::vector<Ort::Value>> ans;
  ans.resize(batch_size);

  // cached_len
  for (int32_t i = 0; i != num_encoders; ++i) {
    auto v = Unbind<int64_t>(allocator, &states[i], 1);
    assert(v.size() == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  // cached_avg
  for (int32_t i = num_encoders; i != 2 * num_encoders; ++i) {
    auto v = Unbind(allocator, &states[i], 1);
    assert(v.size() == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  // cached_key
  for (int32_t i = 2 * num_encoders; i != 3 * num_encoders; ++i) {
    auto v = Unbind(allocator, &states[i], 2);
    assert(v.size() == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  // cached_val
  for (int32_t i = 3 * num_encoders; i != 4 * num_encoders; ++i) {
    auto v = Unbind(allocator, &states[i], 2);
    assert(v.size() == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  // cached_val2
  for (int32_t i = 4 * num_encoders; i != 5 * num_encoders; ++i) {
    auto v = Unbind(allocator, &states[i], 2);
    assert(v.size() == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  // cached_conv1
  for (int32_t i = 5 * num_encoders; i != 6 * num_encoders; ++i) {
    auto v = Unbind(allocator, &states[i], 1);
    assert(v.size() == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  // cached_conv2
  for (int32_t i = 6 * num_encoders; i != 7 * num_encoders; ++i) {
    auto v = Unbind(allocator, &states[i], 1);
    assert(v.size() == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  return ans;
}

std::vector<Ort::Value> OnlineZipformerTransducerModel::GetEncoderInitStates() {
  // Please see
  // https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer.py#L673
  // for details

  int32_t n = static_cast<int32_t>(encoder_dims_.size());
  std::vector<Ort::Value> cached_len_vec;
  std::vector<Ort::Value> cached_avg_vec;
  std::vector<Ort::Value> cached_key_vec;
  std::vector<Ort::Value> cached_val_vec;
  std::vector<Ort::Value> cached_val2_vec;
  std::vector<Ort::Value> cached_conv1_vec;
  std::vector<Ort::Value> cached_conv2_vec;

  cached_len_vec.reserve(n);
  cached_avg_vec.reserve(n);
  cached_key_vec.reserve(n);
  cached_val_vec.reserve(n);
  cached_val2_vec.reserve(n);
  cached_conv1_vec.reserve(n);
  cached_conv2_vec.reserve(n);

  for (int32_t i = 0; i != n; ++i) {
    {
      std::array<int64_t, 2> s{num_encoder_layers_[i], 1};
      auto v =
          Ort::Value::CreateTensor<int64_t>(allocator_, s.data(), s.size());
      Fill<int64_t>(&v, 0);
      cached_len_vec.push_back(std::move(v));
    }

    {
      std::array<int64_t, 3> s{num_encoder_layers_[i], 1, encoder_dims_[i]};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      cached_avg_vec.push_back(std::move(v));
    }

    {
      std::array<int64_t, 4> s{num_encoder_layers_[i], left_context_len_[i], 1,
                               attention_dims_[i]};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      cached_key_vec.push_back(std::move(v));
    }

    {
      std::array<int64_t, 4> s{num_encoder_layers_[i], left_context_len_[i], 1,
                               attention_dims_[i] / 2};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      cached_val_vec.push_back(std::move(v));
    }

    {
      std::array<int64_t, 4> s{num_encoder_layers_[i], left_context_len_[i], 1,
                               attention_dims_[i] / 2};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      cached_val2_vec.push_back(std::move(v));
    }

    {
      std::array<int64_t, 4> s{num_encoder_layers_[i], 1, encoder_dims_[i],
                               cnn_module_kernels_[i] - 1};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      cached_conv1_vec.push_back(std::move(v));
    }

    {
      std::array<int64_t, 4> s{num_encoder_layers_[i], 1, encoder_dims_[i],
                               cnn_module_kernels_[i] - 1};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      cached_conv2_vec.push_back(std::move(v));
    }
  }

  std::vector<Ort::Value> ans;
  ans.reserve(n * 7);

  for (auto &v : cached_len_vec) ans.push_back(std::move(v));
  for (auto &v : cached_avg_vec) ans.push_back(std::move(v));
  for (auto &v : cached_key_vec) ans.push_back(std::move(v));
  for (auto &v : cached_val_vec) ans.push_back(std::move(v));
  for (auto &v : cached_val2_vec) ans.push_back(std::move(v));
  for (auto &v : cached_conv1_vec) ans.push_back(std::move(v));
  for (auto &v : cached_conv2_vec) ans.push_back(std::move(v));

  return ans;
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OnlineZipformerTransducerModel::RunEncoder(Ort::Value features,
                                           std::vector<Ort::Value> states,
                                           Ort::Value /* processed_frames */) {
  std::vector<Ort::Value> encoder_inputs;
  encoder_inputs.reserve(1 + states.size());

  encoder_inputs.push_back(std::move(features));
  for (auto &v : states) {
    encoder_inputs.push_back(std::move(v));
  }

  auto encoder_out = encoder_sess_->Run(
      {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
      encoder_inputs.size(), encoder_output_names_ptr_.data(),
      encoder_output_names_ptr_.size());

  std::vector<Ort::Value> next_states;
  next_states.reserve(states.size());

  for (int32_t i = 1; i != static_cast<int32_t>(encoder_out.size()); ++i) {
    next_states.push_back(std::move(encoder_out[i]));
  }

  return {std::move(encoder_out[0]), std::move(next_states)};
}

Ort::Value OnlineZipformerTransducerModel::RunDecoder(
    Ort::Value decoder_input) {
  auto decoder_out = decoder_sess_->Run(
      {}, decoder_input_names_ptr_.data(), &decoder_input, 1,
      decoder_output_names_ptr_.data(), decoder_output_names_ptr_.size());
  return std::move(decoder_out[0]);
}

Ort::Value OnlineZipformerTransducerModel::RunJoiner(Ort::Value encoder_out,
                                                     Ort::Value decoder_out) {
  std::array<Ort::Value, 2> joiner_input = {std::move(encoder_out),
                                            std::move(decoder_out)};
  auto logit =
      joiner_sess_->Run({}, joiner_input_names_ptr_.data(), joiner_input.data(),
                        joiner_input.size(), joiner_output_names_ptr_.data(),
                        joiner_output_names_ptr_.size());

  return std::move(logit[0]);
}

#if __ANDROID_API__ >= 9
template OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineZipformerTransducerModel::OnlineZipformerTransducerModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-zipformer-transducer-model.h
================================================
// sherpa-onnx/csrc/online-zipformer-transducer-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER_TRANSDUCER_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER_TRANSDUCER_MODEL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

class OnlineZipformerTransducerModel : public OnlineTransducerModel {
 public:
  explicit OnlineZipformerTransducerModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineZipformerTransducerModel(Manager *mgr, const OnlineModelConfig &config);

  std::vector<Ort::Value> StackStates(
      const std::vector<std::vector<Ort::Value>> &states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      const std::vector<Ort::Value> &states) const override;

  std::vector<Ort::Value> GetEncoderInitStates() override;

  std::pair<Ort::Value, std::vector<Ort::Value>> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states,
      Ort::Value processed_frames) override;

  Ort::Value RunDecoder(Ort::Value decoder_input) override;

  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) override;

  int32_t ContextSize() const override { return context_size_; }

  int32_t ChunkSize() const override { return T_; }

  int32_t ChunkShift() const override { return decode_chunk_len_; }

  int32_t VocabSize() const override { return vocab_size_; }
  OrtAllocator *Allocator() override { return allocator_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length);
  void InitDecoder(void *model_data, size_t model_data_length);
  void InitJoiner(void *model_data, size_t model_data_length);

 private:
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;
  std::unique_ptr<Ort::Session> joiner_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<std::string> joiner_input_names_;
  std::vector<const char *> joiner_input_names_ptr_;

  std::vector<std::string> joiner_output_names_;
  std::vector<const char *> joiner_output_names_ptr_;

  OnlineModelConfig config_;

  std::vector<int32_t> encoder_dims_;
  std::vector<int32_t> attention_dims_;
  std::vector<int32_t> num_encoder_layers_;
  std::vector<int32_t> cnn_module_kernels_;
  std::vector<int32_t> left_context_len_;

  int32_t T_ = 0;
  int32_t decode_chunk_len_ = 0;

  int32_t context_size_ = 0;
  int32_t vocab_size_ = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER_TRANSDUCER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-zipformer2-ctc-model-config.cc
================================================
// sherpa-onnx/csrc/online-zipformer2-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-zipformer2-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OnlineZipformer2CtcModelConfig::Register(ParseOptions *po) {
  po->Register("zipformer2-ctc-model", &model,
               "Path to CTC model.onnx. See also "
               "https://github.com/k2-fsa/icefall/pull/1413");
}

bool OnlineZipformer2CtcModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("--zipformer2-ctc-model is empty!");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("--zipformer2-ctc-model '%s' does not exist",
                     model.c_str());
    return false;
  }

  return true;
}

std::string OnlineZipformer2CtcModelConfig::ToString() const {
  std::ostringstream os;

  os << "OnlineZipformer2CtcModelConfig(";
  os << "model=\"" << model << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-zipformer2-ctc-model-config.h
================================================
// sherpa-onnx/csrc/online-zipformer2-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OnlineZipformer2CtcModelConfig {
  std::string model;

  OnlineZipformer2CtcModelConfig() = default;

  explicit OnlineZipformer2CtcModelConfig(const std::string &model)
      : model(model) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/online-zipformer2-ctc-model.cc
================================================
// sherpa-onnx/csrc/online-zipformer2-ctc-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-zipformer2-ctc-model.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <memory>
#include <numeric>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

class OnlineZipformer2CtcModel::Impl {
 public:
  explicit Impl(const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.zipformer2_ctc.model);
      Init(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlineModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.zipformer2_ctc.model);
      Init(buf.data(), buf.size());
    }
  }

  std::vector<Ort::Value> Forward(Ort::Value features,
                                  std::vector<Ort::Value> states) {
    std::vector<Ort::Value> inputs;
    inputs.reserve(1 + states.size());

    inputs.push_back(std::move(features));
    for (auto &v : states) {
      inputs.push_back(std::move(v));
    }

    return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                      output_names_ptr_.data(), output_names_ptr_.size());
  }

  int32_t VocabSize() const { return vocab_size_; }

  int32_t ChunkLength() const { return T_; }

  int32_t ChunkShift() const { return decode_chunk_len_; }

  bool UseWhisperFeature() const { return use_whisper_feature_; }

  OrtAllocator *Allocator() { return allocator_; }

  // Return a vector containing 3 tensors
  // - attn_cache
  // - conv_cache
  // - offset
  std::vector<Ort::Value> GetInitStates() {
    std::vector<Ort::Value> ans;
    ans.reserve(initial_states_.size());
    for (auto &s : initial_states_) {
      ans.push_back(View(&s));
    }
    return ans;
  }

  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) {
    int32_t batch_size = static_cast<int32_t>(states.size());

    std::vector<const Ort::Value *> buf(batch_size);

    std::vector<Ort::Value> ans;
    int32_t num_states = static_cast<int32_t>(states[0].size());
    ans.reserve(num_states);

    for (int32_t i = 0; i != (num_states - 2) / 6; ++i) {
      {
        for (int32_t n = 0; n != batch_size; ++n) {
          buf[n] = &states[n][6 * i];
        }
        auto v = Cat(allocator_, buf, 1);
        ans.push_back(std::move(v));
      }
      {
        for (int32_t n = 0; n != batch_size; ++n) {
          buf[n] = &states[n][6 * i + 1];
        }
        auto v = Cat(allocator_, buf, 1);
        ans.push_back(std::move(v));
      }
      {
        for (int32_t n = 0; n != batch_size; ++n) {
          buf[n] = &states[n][6 * i + 2];
        }
        auto v = Cat(allocator_, buf, 1);
        ans.push_back(std::move(v));
      }
      {
        for (int32_t n = 0; n != batch_size; ++n) {
          buf[n] = &states[n][6 * i + 3];
        }
        auto v = Cat(allocator_, buf, 1);
        ans.push_back(std::move(v));
      }
      {
        for (int32_t n = 0; n != batch_size; ++n) {
          buf[n] = &states[n][6 * i + 4];
        }
        auto v = Cat(allocator_, buf, 0);
        ans.push_back(std::move(v));
      }
      {
        for (int32_t n = 0; n != batch_size; ++n) {
          buf[n] = &states[n][6 * i + 5];
        }
        auto v = Cat(allocator_, buf, 0);
        ans.push_back(std::move(v));
      }
    }

    {
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][num_states - 2];
      }
      auto v = Cat(allocator_, buf, 0);
      ans.push_back(std::move(v));
    }

    {
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][num_states - 1];
      }
      auto v = Cat<int64_t>(allocator_, buf, 0);
      ans.push_back(std::move(v));
    }
    return ans;
  }

  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) {
    int32_t m = std::accumulate(num_encoder_layers_.begin(),
                                num_encoder_layers_.end(), 0);
    assert(states.size() == m * 6 + 2);

    int32_t batch_size = states[0].GetTensorTypeAndShapeInfo().GetShape()[1];

    std::vector<std::vector<Ort::Value>> ans;
    ans.resize(batch_size);

    for (int32_t i = 0; i != m; ++i) {
      {
        auto v = Unbind(allocator_, &states[i * 6], 1);
        assert(v.size() == batch_size);

        for (int32_t n = 0; n != batch_size; ++n) {
          ans[n].push_back(std::move(v[n]));
        }
      }
      {
        auto v = Unbind(allocator_, &states[i * 6 + 1], 1);
        assert(v.size() == batch_size);

        for (int32_t n = 0; n != batch_size; ++n) {
          ans[n].push_back(std::move(v[n]));
        }
      }
      {
        auto v = Unbind(allocator_, &states[i * 6 + 2], 1);
        assert(v.size() == batch_size);

        for (int32_t n = 0; n != batch_size; ++n) {
          ans[n].push_back(std::move(v[n]));
        }
      }
      {
        auto v = Unbind(allocator_, &states[i * 6 + 3], 1);
        assert(v.size() == batch_size);

        for (int32_t n = 0; n != batch_size; ++n) {
          ans[n].push_back(std::move(v[n]));
        }
      }
      {
        auto v = Unbind(allocator_, &states[i * 6 + 4], 0);
        assert(v.size() == batch_size);

        for (int32_t n = 0; n != batch_size; ++n) {
          ans[n].push_back(std::move(v[n]));
        }
      }
      {
        auto v = Unbind(allocator_, &states[i * 6 + 5], 0);
        assert(v.size() == batch_size);

        for (int32_t n = 0; n != batch_size; ++n) {
          ans[n].push_back(std::move(v[n]));
        }
      }
    }

    {
      auto v = Unbind(allocator_, &states[m * 6], 0);
      assert(v.size() == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {
      auto v = Unbind<int64_t>(allocator_, &states[m * 6 + 1], 0);
      assert(v.size() == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }

    return ans;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---zipformer2_ctc---\n";
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA_VEC(encoder_dims_, "encoder_dims");
    SHERPA_ONNX_READ_META_DATA_VEC(query_head_dims_, "query_head_dims");
    SHERPA_ONNX_READ_META_DATA_VEC(value_head_dims_, "value_head_dims");
    SHERPA_ONNX_READ_META_DATA_VEC(num_heads_, "num_heads");
    SHERPA_ONNX_READ_META_DATA_VEC(num_encoder_layers_, "num_encoder_layers");
    SHERPA_ONNX_READ_META_DATA_VEC(cnn_module_kernels_, "cnn_module_kernels");
    SHERPA_ONNX_READ_META_DATA_VEC(left_context_len_, "left_context_len");

    SHERPA_ONNX_READ_META_DATA(T_, "T");
    SHERPA_ONNX_READ_META_DATA(decode_chunk_len_, "decode_chunk_len");

    std::string feature_type;
    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(feature_type, "feature", "");
    if (feature_type == "whisper") {
      use_whisper_feature_ = true;
    }

    {
      auto shape =
          sess_->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
      vocab_size_ = shape[2];
    }

    if (config_.debug) {
      auto print = [](const std::vector<int32_t> &v, const char *name) {
        std::ostringstream os;
        os << name << ": ";
        for (auto i : v) {
          os << i << " ";
        }
        SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
      };
      print(encoder_dims_, "encoder_dims");
      print(query_head_dims_, "query_head_dims");
      print(value_head_dims_, "value_head_dims");
      print(num_heads_, "num_heads");
      print(num_encoder_layers_, "num_encoder_layers");
      print(cnn_module_kernels_, "cnn_module_kernels");
      print(left_context_len_, "left_context_len");
      SHERPA_ONNX_LOGE("T: %d", T_);
      SHERPA_ONNX_LOGE("decode_chunk_len_: %d", decode_chunk_len_);
      SHERPA_ONNX_LOGE("vocab_size_: %d", vocab_size_);
    }

    InitStates();
  }

  void InitStates() {
    int32_t n = static_cast<int32_t>(encoder_dims_.size());
    int32_t m = std::accumulate(num_encoder_layers_.begin(),
                                num_encoder_layers_.end(), 0);
    initial_states_.reserve(m * 6 + 2);

    for (int32_t i = 0; i != n; ++i) {
      int32_t num_layers = num_encoder_layers_[i];
      int32_t key_dim = query_head_dims_[i] * num_heads_[i];
      int32_t value_dim = value_head_dims_[i] * num_heads_[i];
      int32_t nonlin_attn_head_dim = 3 * encoder_dims_[i] / 4;

      for (int32_t j = 0; j != num_layers; ++j) {
        {
          std::array<int64_t, 3> s{left_context_len_[i], 1, key_dim};
          auto v =
              Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
          Fill(&v, 0);
          initial_states_.push_back(std::move(v));
        }

        {
          std::array<int64_t, 4> s{1, 1, left_context_len_[i],
                                   nonlin_attn_head_dim};
          auto v =
              Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
          Fill(&v, 0);
          initial_states_.push_back(std::move(v));
        }

        {
          std::array<int64_t, 3> s{left_context_len_[i], 1, value_dim};
          auto v =
              Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
          Fill(&v, 0);
          initial_states_.push_back(std::move(v));
        }

        {
          std::array<int64_t, 3> s{left_context_len_[i], 1, value_dim};
          auto v =
              Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
          Fill(&v, 0);
          initial_states_.push_back(std::move(v));
        }

        {
          std::array<int64_t, 3> s{1, encoder_dims_[i],
                                   cnn_module_kernels_[i] / 2};
          auto v =
              Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
          Fill(&v, 0);
          initial_states_.push_back(std::move(v));
        }

        {
          std::array<int64_t, 3> s{1, encoder_dims_[i],
                                   cnn_module_kernels_[i] / 2};
          auto v =
              Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
          Fill(&v, 0);
          initial_states_.push_back(std::move(v));
        }
      }
    }

    {
      std::array<int64_t, 4> s{1, 128, 3, 19};
      auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
      Fill(&v, 0);
      initial_states_.push_back(std::move(v));
    }

    {
      std::array<int64_t, 1> s{1};
      auto v =
          Ort::Value::CreateTensor<int64_t>(allocator_, s.data(), s.size());
      Fill<int64_t>(&v, 0);
      initial_states_.push_back(std::move(v));
    }
  }

 private:
  OnlineModelConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  std::vector<Ort::Value> initial_states_;

  std::vector<int32_t> encoder_dims_;
  std::vector<int32_t> query_head_dims_;
  std::vector<int32_t> value_head_dims_;
  std::vector<int32_t> num_heads_;
  std::vector<int32_t> num_encoder_layers_;
  std::vector<int32_t> cnn_module_kernels_;
  std::vector<int32_t> left_context_len_;

  int32_t T_ = 0;
  int32_t decode_chunk_len_ = 0;
  int32_t vocab_size_ = 0;

  // for models from
  // https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#streaming-with-ctc-head
  bool use_whisper_feature_ = false;
};

OnlineZipformer2CtcModel::OnlineZipformer2CtcModel(
    const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineZipformer2CtcModel::OnlineZipformer2CtcModel(
    Manager *mgr, const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OnlineZipformer2CtcModel::~OnlineZipformer2CtcModel() = default;

std::vector<Ort::Value> OnlineZipformer2CtcModel::Forward(
    Ort::Value x, std::vector<Ort::Value> states) const {
  return impl_->Forward(std::move(x), std::move(states));
}

int32_t OnlineZipformer2CtcModel::VocabSize() const {
  return impl_->VocabSize();
}

int32_t OnlineZipformer2CtcModel::ChunkLength() const {
  return impl_->ChunkLength();
}

int32_t OnlineZipformer2CtcModel::ChunkShift() const {
  return impl_->ChunkShift();
}

bool OnlineZipformer2CtcModel::UseWhisperFeature() const {
  return impl_->UseWhisperFeature();
}

OrtAllocator *OnlineZipformer2CtcModel::Allocator() const {
  return impl_->Allocator();
}

std::vector<Ort::Value> OnlineZipformer2CtcModel::GetInitStates() const {
  return impl_->GetInitStates();
}

std::vector<Ort::Value> OnlineZipformer2CtcModel::StackStates(
    std::vector<std::vector<Ort::Value>> states) const {
  return impl_->StackStates(std::move(states));
}

std::vector<std::vector<Ort::Value>> OnlineZipformer2CtcModel::UnStackStates(
    std::vector<Ort::Value> states) const {
  return impl_->UnStackStates(std::move(states));
}

#if __ANDROID_API__ >= 9
template OnlineZipformer2CtcModel::OnlineZipformer2CtcModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineZipformer2CtcModel::OnlineZipformer2CtcModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-zipformer2-ctc-model.h
================================================
// sherpa-onnx/csrc/online-zipformer2-ctc-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_H_

#include <memory>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-ctc-model.h"
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

class OnlineZipformer2CtcModel : public OnlineCtcModel {
 public:
  explicit OnlineZipformer2CtcModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineZipformer2CtcModel(Manager *mgr, const OnlineModelConfig &config);

  ~OnlineZipformer2CtcModel() override;

  // A list of tensors.
  // See also
  // https://github.com/k2-fsa/icefall/pull/1413
  // and
  // https://github.com/k2-fsa/icefall/pull/1415
  std::vector<Ort::Value> GetInitStates() const override;

  std::vector<Ort::Value> StackStates(
      std::vector<std::vector<Ort::Value>> states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      std::vector<Ort::Value> states) const override;

  /**
   *
   * @param x A 3-D tensor of shape (N, T, C). N has to be 1.
   * @param states  It is from GetInitStates() or returned from this method.
   *
   * @return Return a list of tensors
   *    - ans[0] contains log_probs, of shape (N, T, C)
   *    - ans[1:] contains next_states
   */
  std::vector<Ort::Value> Forward(
      Ort::Value x, std::vector<Ort::Value> states) const override;

  /** Return the vocabulary size of the model
   */
  int32_t VocabSize() const override;

  /** Return an allocator for allocating memory
   */
  OrtAllocator *Allocator() const override;

  // The model accepts this number of frames before subsampling as input
  int32_t ChunkLength() const override;

  // Similar to frame_shift in feature extractor, after processing
  // ChunkLength() frames, we advance by ChunkShift() frames
  // before we process the next chunk.
  int32_t ChunkShift() const override;

  bool UseWhisperFeature() const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/online-zipformer2-transducer-model.cc
================================================
// sherpa-onnx/csrc/online-zipformer2-transducer-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-zipformer2-transducer-model.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <memory>
#include <numeric>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/unbind.h"

namespace sherpa_onnx {

OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
    const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      encoder_sess_opts_(GetSessionOptions(config)),
      decoder_sess_opts_(GetSessionOptions(config, "decoder")),
      joiner_sess_opts_(GetSessionOptions(config, "joiner")),
      config_(config),
      allocator_{} {
  {
    auto buf = ReadFile(config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

template <typename Manager>
OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
    Manager *mgr, const OnlineModelConfig &config)
    : env_(ORT_LOGGING_LEVEL_ERROR),
      config_(config),
      encoder_sess_opts_(GetSessionOptions(config)),
      decoder_sess_opts_(GetSessionOptions(config, "decoder")),
      joiner_sess_opts_(GetSessionOptions(config, "joiner")),
      allocator_{} {
  {
    auto buf = ReadFile(mgr, config.transducer.encoder);
    InitEncoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.decoder);
    InitDecoder(buf.data(), buf.size());
  }

  {
    auto buf = ReadFile(mgr, config.transducer.joiner);
    InitJoiner(buf.data(), buf.size());
  }
}

void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
                                                  size_t model_data_length) {
  encoder_sess_ = std::make_unique<Ort::Session>(
      env_, model_data, model_data_length, encoder_sess_opts_);

  GetInputNames(encoder_sess_.get(), &encoder_input_names_,
                &encoder_input_names_ptr_);

  GetOutputNames(encoder_sess_.get(), &encoder_output_names_,
                 &encoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = encoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---encoder---\n";
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA_VEC(encoder_dims_, "encoder_dims");
  SHERPA_ONNX_READ_META_DATA_VEC(query_head_dims_, "query_head_dims");
  SHERPA_ONNX_READ_META_DATA_VEC(value_head_dims_, "value_head_dims");
  SHERPA_ONNX_READ_META_DATA_VEC(num_heads_, "num_heads");
  SHERPA_ONNX_READ_META_DATA_VEC(num_encoder_layers_, "num_encoder_layers");
  SHERPA_ONNX_READ_META_DATA_VEC(cnn_module_kernels_, "cnn_module_kernels");
  SHERPA_ONNX_READ_META_DATA_VEC(left_context_len_, "left_context_len");

  SHERPA_ONNX_READ_META_DATA(T_, "T");
  SHERPA_ONNX_READ_META_DATA(decode_chunk_len_, "decode_chunk_len");

  std::string feature_type;
  SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(feature_type, "feature", "");
  if (feature_type == "whisper") {
    use_whisper_feature_ = true;
  }

  if (config_.debug) {
    auto print = [](const std::vector<int32_t> &v, const char *name) {
      std::ostringstream os;
      os << name << ": ";
      for (auto i : v) {
        os << i << " ";
      }
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    };
    print(encoder_dims_, "encoder_dims");
    print(query_head_dims_, "query_head_dims");
    print(value_head_dims_, "value_head_dims");
    print(num_heads_, "num_heads");
    print(num_encoder_layers_, "num_encoder_layers");
    print(cnn_module_kernels_, "cnn_module_kernels");
    print(left_context_len_, "left_context_len");

#if __OHOS__
    SHERPA_ONNX_LOGE("T: %{public}d", T_);
    SHERPA_ONNX_LOGE("decode_chunk_len_: %{public}d", decode_chunk_len_);
#else
    SHERPA_ONNX_LOGE("T: %d", T_);
    SHERPA_ONNX_LOGE("decode_chunk_len_: %d", decode_chunk_len_);
#endif
  }
}

void OnlineZipformer2TransducerModel::InitDecoder(void *model_data,
                                                  size_t model_data_length) {
  decoder_sess_ = std::make_unique<Ort::Session>(
      env_, model_data, model_data_length, decoder_sess_opts_);

  GetInputNames(decoder_sess_.get(), &decoder_input_names_,
                &decoder_input_names_ptr_);

  GetOutputNames(decoder_sess_.get(), &decoder_output_names_,
                 &decoder_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = decoder_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---decoder---\n";
    PrintModelMetadata(os, meta_data);
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }

  Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
  SHERPA_ONNX_READ_META_DATA(vocab_size_, "vocab_size");
  SHERPA_ONNX_READ_META_DATA(context_size_, "context_size");
}

void OnlineZipformer2TransducerModel::InitJoiner(void *model_data,
                                                 size_t model_data_length) {
  joiner_sess_ = std::make_unique<Ort::Session>(
      env_, model_data, model_data_length, joiner_sess_opts_);

  GetInputNames(joiner_sess_.get(), &joiner_input_names_,
                &joiner_input_names_ptr_);

  GetOutputNames(joiner_sess_.get(), &joiner_output_names_,
                 &joiner_output_names_ptr_);

  // get meta data
  Ort::ModelMetadata meta_data = joiner_sess_->GetModelMetadata();
  if (config_.debug) {
    std::ostringstream os;
    os << "---joiner---\n";
    PrintModelMetadata(os, meta_data);
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }
}

std::vector<Ort::Value> OnlineZipformer2TransducerModel::StackStates(
    const std::vector<std::vector<Ort::Value>> &states) const {
  int32_t batch_size = static_cast<int32_t>(states.size());

  std::vector<const Ort::Value *> buf(batch_size);

  auto allocator =
      const_cast<OnlineZipformer2TransducerModel *>(this)->allocator_;

  std::vector<Ort::Value> ans;
  int32_t num_states = static_cast<int32_t>(states[0].size());
  ans.reserve(num_states);

  for (int32_t i = 0; i != (num_states - 2) / 6; ++i) {
    {
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][6 * i];
      }
      auto v = Cat(allocator, buf, 1);
      ans.push_back(std::move(v));
    }
    {
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][6 * i + 1];
      }
      auto v = Cat(allocator, buf, 1);
      ans.push_back(std::move(v));
    }
    {
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][6 * i + 2];
      }
      auto v = Cat(allocator, buf, 1);
      ans.push_back(std::move(v));
    }
    {
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][6 * i + 3];
      }
      auto v = Cat(allocator, buf, 1);
      ans.push_back(std::move(v));
    }
    {
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][6 * i + 4];
      }
      auto v = Cat(allocator, buf, 0);
      ans.push_back(std::move(v));
    }
    {
      for (int32_t n = 0; n != batch_size; ++n) {
        buf[n] = &states[n][6 * i + 5];
      }
      auto v = Cat(allocator, buf, 0);
      ans.push_back(std::move(v));
    }
  }

  {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_states - 2];
    }
    auto v = Cat(allocator, buf, 0);
    ans.push_back(std::move(v));
  }

  {
    for (int32_t n = 0; n != batch_size; ++n) {
      buf[n] = &states[n][num_states - 1];
    }
    auto v = Cat<int64_t>(allocator, buf, 0);
    ans.push_back(std::move(v));
  }
  return ans;
}

std::vector<std::vector<Ort::Value>>
OnlineZipformer2TransducerModel::UnStackStates(
    const std::vector<Ort::Value> &states) const {
  int32_t m = std::accumulate(num_encoder_layers_.begin(),
                              num_encoder_layers_.end(), 0);
  assert(static_cast<int32_t>(states.size()) == m * 6 + 2);

  int32_t batch_size = states[0].GetTensorTypeAndShapeInfo().GetShape()[1];

  auto allocator =
      const_cast<OnlineZipformer2TransducerModel *>(this)->allocator_;

  std::vector<std::vector<Ort::Value>> ans;
  ans.resize(batch_size);

  for (int32_t i = 0; i != m; ++i) {
    {
      auto v = Unbind(allocator, &states[i * 6], 1);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {
      auto v = Unbind(allocator, &states[i * 6 + 1], 1);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {
      auto v = Unbind(allocator, &states[i * 6 + 2], 1);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {
      auto v = Unbind(allocator, &states[i * 6 + 3], 1);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {
      auto v = Unbind(allocator, &states[i * 6 + 4], 0);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
    {
      auto v = Unbind(allocator, &states[i * 6 + 5], 0);
      assert(static_cast<int32_t>(v.size()) == batch_size);

      for (int32_t n = 0; n != batch_size; ++n) {
        ans[n].push_back(std::move(v[n]));
      }
    }
  }

  {
    auto v = Unbind(allocator, &states[m * 6], 0);
    assert(static_cast<int32_t>(v.size()) == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }
  {
    auto v = Unbind<int64_t>(allocator, &states[m * 6 + 1], 0);
    assert(static_cast<int32_t>(v.size()) == batch_size);

    for (int32_t n = 0; n != batch_size; ++n) {
      ans[n].push_back(std::move(v[n]));
    }
  }

  return ans;
}

std::vector<Ort::Value>
OnlineZipformer2TransducerModel::GetEncoderInitStates() {
  std::vector<Ort::Value> ans;
  int32_t n = static_cast<int32_t>(encoder_dims_.size());
  int32_t m = std::accumulate(num_encoder_layers_.begin(),
                              num_encoder_layers_.end(), 0);
  ans.reserve(m * 6 + 2);

  for (int32_t i = 0; i != n; ++i) {
    int32_t num_layers = num_encoder_layers_[i];
    int32_t key_dim = query_head_dims_[i] * num_heads_[i];
    int32_t value_dim = value_head_dims_[i] * num_heads_[i];
    int32_t nonlin_attn_head_dim = 3 * encoder_dims_[i] / 4;

    for (int32_t j = 0; j != num_layers; ++j) {
      {
        std::array<int64_t, 3> s{left_context_len_[i], 1, key_dim};
        auto v =
            Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
        Fill(&v, 0);
        ans.push_back(std::move(v));
      }

      {
        std::array<int64_t, 4> s{1, 1, left_context_len_[i],
                                 nonlin_attn_head_dim};
        auto v =
            Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
        Fill(&v, 0);
        ans.push_back(std::move(v));
      }

      {
        std::array<int64_t, 3> s{left_context_len_[i], 1, value_dim};
        auto v =
            Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
        Fill(&v, 0);
        ans.push_back(std::move(v));
      }

      {
        std::array<int64_t, 3> s{left_context_len_[i], 1, value_dim};
        auto v =
            Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
        Fill(&v, 0);
        ans.push_back(std::move(v));
      }

      {
        std::array<int64_t, 3> s{1, encoder_dims_[i],
                                 cnn_module_kernels_[i] / 2};
        auto v =
            Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
        Fill(&v, 0);
        ans.push_back(std::move(v));
      }

      {
        std::array<int64_t, 3> s{1, encoder_dims_[i],
                                 cnn_module_kernels_[i] / 2};
        auto v =
            Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
        Fill(&v, 0);
        ans.push_back(std::move(v));
      }
    }
  }

  {
    SHERPA_ONNX_CHECK_NE(feature_dim_, 0);
    int32_t embed_dim = (((feature_dim_ - 1) / 2) - 1) / 2;
    std::array<int64_t, 4> s{1, 128, 3, embed_dim};

    auto v = Ort::Value::CreateTensor<float>(allocator_, s.data(), s.size());
    Fill(&v, 0);
    ans.push_back(std::move(v));
  }

  {
    std::array<int64_t, 1> s{1};
    auto v = Ort::Value::CreateTensor<int64_t>(allocator_, s.data(), s.size());
    Fill<int64_t>(&v, 0);
    ans.push_back(std::move(v));
  }
  return ans;
}

std::pair<Ort::Value, std::vector<Ort::Value>>
OnlineZipformer2TransducerModel::RunEncoder(Ort::Value features,
                                            std::vector<Ort::Value> states,
                                            Ort::Value /* processed_frames */) {
  std::vector<Ort::Value> encoder_inputs;
  encoder_inputs.reserve(1 + states.size());

  encoder_inputs.push_back(std::move(features));
  for (auto &v : states) {
    encoder_inputs.push_back(std::move(v));
  }

  auto encoder_out = encoder_sess_->Run(
      {}, encoder_input_names_ptr_.data(), encoder_inputs.data(),
      encoder_inputs.size(), encoder_output_names_ptr_.data(),
      encoder_output_names_ptr_.size());

  std::vector<Ort::Value> next_states;
  next_states.reserve(states.size());

  for (int32_t i = 1; i != static_cast<int32_t>(encoder_out.size()); ++i) {
    next_states.push_back(std::move(encoder_out[i]));
  }
  return {std::move(encoder_out[0]), std::move(next_states)};
}

Ort::Value OnlineZipformer2TransducerModel::RunDecoder(
    Ort::Value decoder_input) {
  auto decoder_out = decoder_sess_->Run(
      {}, decoder_input_names_ptr_.data(), &decoder_input, 1,
      decoder_output_names_ptr_.data(), decoder_output_names_ptr_.size());
  return std::move(decoder_out[0]);
}

Ort::Value OnlineZipformer2TransducerModel::RunJoiner(Ort::Value encoder_out,
                                                      Ort::Value decoder_out) {
  std::array<Ort::Value, 2> joiner_input = {std::move(encoder_out),
                                            std::move(decoder_out)};
  auto logit =
      joiner_sess_->Run({}, joiner_input_names_ptr_.data(), joiner_input.data(),
                        joiner_input.size(), joiner_output_names_ptr_.data(),
                        joiner_output_names_ptr_.size());

  return std::move(logit[0]);
}

#if __ANDROID_API__ >= 9
template OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/online-zipformer2-transducer-model.h
================================================
// sherpa-onnx/csrc/online-zipformer2-transducer-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_TRANSDUCER_MODEL_H_
#define SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_TRANSDUCER_MODEL_H_

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

class OnlineZipformer2TransducerModel : public OnlineTransducerModel {
 public:
  explicit OnlineZipformer2TransducerModel(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineZipformer2TransducerModel(Manager *mgr,
                                  const OnlineModelConfig &config);

  std::vector<Ort::Value> StackStates(
      const std::vector<std::vector<Ort::Value>> &states) const override;

  std::vector<std::vector<Ort::Value>> UnStackStates(
      const std::vector<Ort::Value> &states) const override;

  std::vector<Ort::Value> GetEncoderInitStates() override;

  void SetFeatureDim(int32_t feature_dim) override {
    feature_dim_ = feature_dim;
  }

  std::pair<Ort::Value, std::vector<Ort::Value>> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states,
      Ort::Value processed_frames) override;

  Ort::Value RunDecoder(Ort::Value decoder_input) override;

  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) override;

  int32_t ContextSize() const override { return context_size_; }

  int32_t ChunkSize() const override { return T_; }

  int32_t ChunkShift() const override { return decode_chunk_len_; }

  int32_t VocabSize() const override { return vocab_size_; }
  OrtAllocator *Allocator() override { return allocator_; }

  bool UseWhisperFeature() const override { return use_whisper_feature_; }

 private:
  void InitEncoder(void *model_data, size_t model_data_length);
  void InitDecoder(void *model_data, size_t model_data_length);
  void InitJoiner(void *model_data, size_t model_data_length);

 private:
  Ort::Env env_;
  Ort::SessionOptions encoder_sess_opts_;
  Ort::SessionOptions decoder_sess_opts_;
  Ort::SessionOptions joiner_sess_opts_;

  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> encoder_sess_;
  std::unique_ptr<Ort::Session> decoder_sess_;
  std::unique_ptr<Ort::Session> joiner_sess_;

  std::vector<std::string> encoder_input_names_;
  std::vector<const char *> encoder_input_names_ptr_;

  std::vector<std::string> encoder_output_names_;
  std::vector<const char *> encoder_output_names_ptr_;

  std::vector<std::string> decoder_input_names_;
  std::vector<const char *> decoder_input_names_ptr_;

  std::vector<std::string> decoder_output_names_;
  std::vector<const char *> decoder_output_names_ptr_;

  std::vector<std::string> joiner_input_names_;
  std::vector<const char *> joiner_input_names_ptr_;

  std::vector<std::string> joiner_output_names_;
  std::vector<const char *> joiner_output_names_ptr_;

  OnlineModelConfig config_;

  std::vector<int32_t> encoder_dims_;
  std::vector<int32_t> query_head_dims_;
  std::vector<int32_t> value_head_dims_;
  std::vector<int32_t> num_heads_;
  std::vector<int32_t> num_encoder_layers_;
  std::vector<int32_t> cnn_module_kernels_;
  std::vector<int32_t> left_context_len_;

  int32_t T_ = 0;
  int32_t decode_chunk_len_ = 0;

  int32_t context_size_ = 0;
  int32_t vocab_size_ = 0;
  int32_t feature_dim_ = 80;

  // for models from
  // https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#streaming-with-ctc-head
  bool use_whisper_feature_ = false;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_ZIPFORMER2_TRANSDUCER_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/onnx-utils.cc
================================================
// sherpa-onnx/csrc/onnx-utils.cc
//
// Copyright (c)  2023  Xiaomi Corporation
// Copyright (c)  2023  Pingfeng Luo
#include "sherpa-onnx/csrc/onnx-utils.h"

#include <algorithm>
#include <cstdint>
#include <cstring>
#include <fstream>
#include <functional>
#include <memory>
#include <numeric>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

static std::string GetInputName(Ort::Session *sess, size_t index,
                                OrtAllocator *allocator) {
// Note(fangjun): We only tested 1.17.1 and 1.11.0
// For other versions, we may need to change it
#if ORT_API_VERSION >= 12
  auto v = sess->GetInputNameAllocated(index, allocator);
  return v.get();
#else
  auto v = sess->GetInputName(index, allocator);
  std::string ans = v;
  allocator->Free(allocator, v);
  return ans;
#endif
}

static std::string GetOutputName(Ort::Session *sess, size_t index,
                                 OrtAllocator *allocator) {
// Note(fangjun): We only tested 1.17.1 and 1.11.0
// For other versions, we may need to change it
#if ORT_API_VERSION >= 12
  auto v = sess->GetOutputNameAllocated(index, allocator);
  return v.get();
#else
  auto v = sess->GetOutputName(index, allocator);
  std::string ans = v;
  allocator->Free(allocator, v);
  return ans;
#endif
}

void GetInputNames(Ort::Session *sess, std::vector<std::string> *input_names,
                   std::vector<const char *> *input_names_ptr) {
  Ort::AllocatorWithDefaultOptions allocator;
  size_t node_count = sess->GetInputCount();
  input_names->resize(node_count);
  input_names_ptr->resize(node_count);
  for (size_t i = 0; i != node_count; ++i) {
    (*input_names)[i] = GetInputName(sess, i, allocator);
    (*input_names_ptr)[i] = (*input_names)[i].c_str();
  }
}

void GetOutputNames(Ort::Session *sess, std::vector<std::string> *output_names,
                    std::vector<const char *> *output_names_ptr) {
  Ort::AllocatorWithDefaultOptions allocator;
  size_t node_count = sess->GetOutputCount();
  output_names->resize(node_count);
  output_names_ptr->resize(node_count);
  for (size_t i = 0; i != node_count; ++i) {
    (*output_names)[i] = GetOutputName(sess, i, allocator);
    (*output_names_ptr)[i] = (*output_names)[i].c_str();
  }
}

Ort::Value GetEncoderOutFrame(OrtAllocator *allocator, Ort::Value *encoder_out,
                              int32_t t) {
  std::vector<int64_t> encoder_out_shape =
      encoder_out->GetTensorTypeAndShapeInfo().GetShape();

  auto batch_size = encoder_out_shape[0];
  auto num_frames = encoder_out_shape[1];
  assert(t < num_frames);

  auto encoder_out_dim = encoder_out_shape[2];

  auto offset = num_frames * encoder_out_dim;

  std::array<int64_t, 2> shape{batch_size, encoder_out_dim};

  Ort::Value ans =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());

  float *dst = ans.GetTensorMutableData<float>();
  const float *src = encoder_out->GetTensorData<float>();

  for (int32_t i = 0; i != batch_size; ++i) {
    std::copy(src + t * encoder_out_dim, src + (t + 1) * encoder_out_dim, dst);
    src += offset;
    dst += encoder_out_dim;
  }
  return ans;
}

void PrintModelMetadata(std::ostream &os, const Ort::ModelMetadata &meta_data) {
  Ort::AllocatorWithDefaultOptions allocator;
#if ORT_API_VERSION >= 12
  std::vector<Ort::AllocatedStringPtr> v =
      meta_data.GetCustomMetadataMapKeysAllocated(allocator);
  for (const auto &key : v) {
    auto p = meta_data.LookupCustomMetadataMapAllocated(key.get(), allocator);
    os << key.get() << "=" << p.get() << "\n";
  }
#else
  int64_t num_keys = 0;
  char **keys = meta_data.GetCustomMetadataMapKeys(allocator, num_keys);
  for (int32_t i = 0; i < num_keys; ++i) {
    auto v = LookupCustomModelMetaData(meta_data, keys[i], allocator);
    os << keys[i] << "=" << v << "\n";
    allocator.Free(keys[i]);
  }

  allocator.Free(keys);
#endif
}

Ort::Value Clone(OrtAllocator *allocator, const Ort::Value *v) {
  auto type_and_shape = v->GetTensorTypeAndShapeInfo();
  std::vector<int64_t> shape = type_and_shape.GetShape();

  switch (type_and_shape.GetElementType()) {
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: {
      Ort::Value ans = Ort::Value::CreateTensor<int32_t>(
          allocator, shape.data(), shape.size());
      const int32_t *start = v->GetTensorData<int32_t>();
      const int32_t *end = start + type_and_shape.GetElementCount();
      int32_t *dst = ans.GetTensorMutableData<int32_t>();
      std::copy(start, end, dst);
      return ans;
    }
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: {
      Ort::Value ans = Ort::Value::CreateTensor<int64_t>(
          allocator, shape.data(), shape.size());
      const int64_t *start = v->GetTensorData<int64_t>();
      const int64_t *end = start + type_and_shape.GetElementCount();
      int64_t *dst = ans.GetTensorMutableData<int64_t>();
      std::copy(start, end, dst);
      return ans;
    }
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: {
      Ort::Value ans = Ort::Value::CreateTensor<float>(allocator, shape.data(),
                                                       shape.size());
      const float *start = v->GetTensorData<float>();
      const float *end = start + type_and_shape.GetElementCount();
      float *dst = ans.GetTensorMutableData<float>();
      std::copy(start, end, dst);
      return ans;
    }
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: {
      Ort::Value ans =
          Ort::Value::CreateTensor(allocator, shape.data(), shape.size(),
                                   ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
      const auto *start = v->GetTensorData<uint16_t>();
      const auto *end = start + type_and_shape.GetElementCount();
      auto *dst = ans.GetTensorMutableData<uint16_t>();
      std::copy(start, end, dst);
      return ans;
    }
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: {
      Ort::Value ans = Ort::Value::CreateTensor<uint16_t>(
          allocator, shape.data(), shape.size());
      const auto *start = v->GetTensorData<uint16_t>();
      const auto *end = start + type_and_shape.GetElementCount();
      auto *dst = ans.GetTensorMutableData<uint16_t>();
      std::copy(start, end, dst);
      return ans;
    }

    default:
      SHERPA_ONNX_LOGE("Unsupported type: %d\n",
                       static_cast<int32_t>(type_and_shape.GetElementType()));
      SHERPA_ONNX_EXIT(-1);
      // unreachable code
      return Ort::Value{nullptr};
  }
}

Ort::Value View(Ort::Value *v) {
  auto type_and_shape = v->GetTensorTypeAndShapeInfo();
  std::vector<int64_t> shape = type_and_shape.GetShape();

#if ORT_API_VERSION >= 14
  auto memory_info = v->GetTensorMemoryInfo();
#else
  const OrtMemoryInfo *memory_info = nullptr;
  OrtStatus *status = Ort::GetApi().GetTensorMemoryInfo(*v, &memory_info);
  if (status != nullptr) {
    const char *msg = Ort::GetApi().GetErrorMessage(status);
    Ort::GetApi().ReleaseStatus(status);
    SHERPA_ONNX_LOGE("Failed to get tensor memory info with error: '%s'", msg);
    SHERPA_ONNX_EXIT(-1);
  }
#endif

  switch (type_and_shape.GetElementType()) {
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
      return Ort::Value::CreateTensor(
          memory_info, v->GetTensorMutableData<int32_t>(),
          type_and_shape.GetElementCount(), shape.data(), shape.size());
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
      return Ort::Value::CreateTensor(
          memory_info, v->GetTensorMutableData<int64_t>(),
          type_and_shape.GetElementCount(), shape.data(), shape.size());
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
      return Ort::Value::CreateTensor(
          memory_info, v->GetTensorMutableData<float>(),
          type_and_shape.GetElementCount(), shape.data(), shape.size());
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
      return Ort::Value::CreateTensor(
          memory_info, v->GetTensorMutableData<uint16_t>(),
          type_and_shape.GetElementCount() * sizeof(uint16_t), shape.data(),
          shape.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
      return Ort::Value::CreateTensor(
          memory_info, v->GetTensorMutableData<uint16_t>(),
          type_and_shape.GetElementCount(), shape.data(), shape.size());
    case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
      return Ort::Value::CreateTensor(
          memory_info, v->GetTensorMutableData<bool>(),
          type_and_shape.GetElementCount(), shape.data(), shape.size());
    default:
      SHERPA_ONNX_LOGE("Unsupported type: %d\n",
                       static_cast<int32_t>(type_and_shape.GetElementType()));
      SHERPA_ONNX_EXIT(-1);
      // unreachable code
      return Ort::Value{nullptr};
  }
}

float ComputeSum(const Ort::Value *v, int32_t n /*= -1*/) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  auto size = static_cast<int32_t>(
      std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>()));
  if (n != -1 && n < size && n > 0) {
    size = n;
  }

  const float *p = v->GetTensorData<float>();

  return std::accumulate(p, p + size, 1.0f);
}

float ComputeMean(const Ort::Value *v, int32_t n /*= -1*/) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  auto size = static_cast<int32_t>(
      std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>()));

  if (n != -1 && n < size && n > 0) {
    size = n;
  }

  auto sum = ComputeSum(v, n);
  return sum / size;
}

void PrintShape(const Ort::Value *v) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  std::ostringstream os;
  for (auto i : shape) {
    os << i << ", ";
  }
  os << "\n";
  SHERPA_ONNX_LOGE("%s", os.str().c_str());
}

template <typename T /*= float*/>
void Print1D(const Ort::Value *v) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  const T *d = v->GetTensorData<T>();
  std::ostringstream os;
  for (int32_t i = 0; i != static_cast<int32_t>(shape[0]); ++i) {
    os << d[i] << " ";
  }
  os << "\n";
  SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
}

template void Print1D<int64_t>(const Ort::Value *v);
template void Print1D<int32_t>(const Ort::Value *v);
template void Print1D<float>(const Ort::Value *v);

template <typename T /*= float*/>
void Print2D(const Ort::Value *v) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  const T *d = v->GetTensorData<T>();

  std::ostringstream os;
  for (int32_t r = 0; r != static_cast<int32_t>(shape[0]); ++r) {
    for (int32_t c = 0; c != static_cast<int32_t>(shape[1]); ++c, ++d) {
      os << *d << " ";
    }
    os << "\n";
  }
  SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
}

template void Print2D<int64_t>(const Ort::Value *v);
template void Print2D<float>(const Ort::Value *v);

void Print3D(const Ort::Value *v) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  const float *d = v->GetTensorData<float>();

  for (int32_t p = 0; p != static_cast<int32_t>(shape[0]); ++p) {
    SHERPA_ONNX_LOGE("---plane %d---\n", p);
    for (int32_t r = 0; r != static_cast<int32_t>(shape[1]); ++r) {
      for (int32_t c = 0; c != static_cast<int32_t>(shape[2]); ++c, ++d) {
        SHERPA_ONNX_LOGE("%.3f ", *d);
      }
      SHERPA_ONNX_LOGE("\n");
    }
  }
  SHERPA_ONNX_LOGE("\n");
}

void Print4D(const Ort::Value *v) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  const float *d = v->GetTensorData<float>();

  for (int32_t p = 0; p != static_cast<int32_t>(shape[0]); ++p) {
    SHERPA_ONNX_LOGE("---plane %d---\n", p);
    for (int32_t q = 0; q != static_cast<int32_t>(shape[1]); ++q) {
      SHERPA_ONNX_LOGE("---subplane %d---\n", q);
      for (int32_t r = 0; r != static_cast<int32_t>(shape[2]); ++r) {
        for (int32_t c = 0; c != static_cast<int32_t>(shape[3]); ++c, ++d) {
          SHERPA_ONNX_LOGE("%.3f ", *d);
        }
        SHERPA_ONNX_LOGE("\n");
      }
      SHERPA_ONNX_LOGE("\n");
    }
  }
  SHERPA_ONNX_LOGE("\n");
}

Ort::Value Repeat(OrtAllocator *allocator, Ort::Value *cur_encoder_out,
                  const std::vector<int32_t> &hyps_num_split) {
  std::vector<int64_t> cur_encoder_out_shape =
      cur_encoder_out->GetTensorTypeAndShapeInfo().GetShape();

  std::array<int64_t, 2> ans_shape{hyps_num_split.back(),
                                   cur_encoder_out_shape[1]};

  Ort::Value ans = Ort::Value::CreateTensor<float>(allocator, ans_shape.data(),
                                                   ans_shape.size());

  const float *src = cur_encoder_out->GetTensorData<float>();
  float *dst = ans.GetTensorMutableData<float>();
  int32_t batch_size = static_cast<int32_t>(hyps_num_split.size()) - 1;
  for (int32_t b = 0; b != batch_size; ++b) {
    int32_t cur_stream_hyps_num = hyps_num_split[b + 1] - hyps_num_split[b];
    for (int32_t i = 0; i != cur_stream_hyps_num; ++i) {
      std::copy(src, src + cur_encoder_out_shape[1], dst);
      dst += cur_encoder_out_shape[1];
    }
    src += cur_encoder_out_shape[1];
  }
  return ans;
}

CopyableOrtValue::CopyableOrtValue(const CopyableOrtValue &other) {
  *this = other;
}

CopyableOrtValue &CopyableOrtValue::operator=(const CopyableOrtValue &other) {
  if (this == &other) {
    return *this;
  }
  if (other.value) {
    Ort::AllocatorWithDefaultOptions allocator;
    value = Clone(allocator, &other.value);
  }
  return *this;
}

CopyableOrtValue::CopyableOrtValue(CopyableOrtValue &&other) noexcept {
  *this = std::move(other);
}

CopyableOrtValue &CopyableOrtValue::operator=(
    CopyableOrtValue &&other) noexcept {
  if (this == &other) {
    return *this;
  }
  value = std::move(other.value);
  return *this;
}

std::vector<CopyableOrtValue> Convert(std::vector<Ort::Value> values) {
  std::vector<CopyableOrtValue> ans;
  ans.reserve(values.size());

  for (auto &v : values) {
    ans.emplace_back(std::move(v));
  }

  return ans;
}

std::vector<Ort::Value> Convert(std::vector<CopyableOrtValue> values) {
  std::vector<Ort::Value> ans;
  ans.reserve(values.size());

  for (auto &v : values) {
    ans.emplace_back(std::move(v.value));
  }

  return ans;
}

std::string LookupCustomModelMetaData(const Ort::ModelMetadata &meta_data,
                                      const char *key,
                                      OrtAllocator *allocator) {
// Note(fangjun): We only tested 1.17.1 and 1.11.0
// For other versions, we may need to change it
#if ORT_API_VERSION >= 12
  auto v = meta_data.LookupCustomMetadataMapAllocated(key, allocator);
  return v ? v.get() : "";
#else
  auto v = meta_data.LookupCustomMetadataMap(key, allocator);
  std::string ans = v ? v : "";
  allocator->Free(allocator, v);
  return ans;
#endif
}

// Convert IEEE 754 half-precision (16-bit) float to single-precision (32-bit)
// float. Handles special cases: zero, subnormal, normal, infinity, and NaN.
float HalfBitsToFloat(uint16_t h) {
  const uint32_t sign = (static_cast<uint32_t>(h & 0x8000u)) << 16;
  const uint32_t exp = (h & 0x7C00u) >> 10;
  const uint32_t mant = (h & 0x03FFu);
  uint32_t fbits = 0;
  if (exp == 0) {
    if (mant == 0) {
      fbits = sign;
    } else {
      uint32_t m = mant;
      uint32_t e = 127 - 15 + 1;
      while ((m & 0x0400u) == 0) {
        m <<= 1;
        --e;
      }
      m &= 0x03FFu;
      fbits = sign | (e << 23) | (m << 13);
    }
  } else if (exp == 31) {
    fbits = sign | 0x7F800000u | (mant << 13);
  } else {
    const uint32_t e = exp + (127 - 15);
    fbits = sign | (e << 23) | (mant << 13);
  }
  float out;
  std::memcpy(&out, &fbits, sizeof(out));
  return out;
}

// Convert IEEE 754 single-precision (32-bit) float to half-precision (16-bit)
// float. Handles overflow (clamped to infinity), underflow (clamped to zero),
// and normal values with proper rounding.
uint16_t FloatToHalfBits(float f) {
  uint32_t x;
  std::memcpy(&x, &f, sizeof(x));
  const uint32_t sign = (x >> 16) & 0x8000u;
  const int32_t exp = static_cast<int32_t>((x >> 23) & 0xFFu);
  const uint32_t mant = x & 0x007FFFFFu;
  if (exp == 255) {
    if (mant == 0) return static_cast<uint16_t>(sign | 0x7C00u);
    return static_cast<uint16_t>(sign | 0x7C00u | (mant ? 0x1u : 0));
  }
  int32_t new_exp = exp - 127 + 15;
  if (new_exp >= 31) {
    return static_cast<uint16_t>(sign | 0x7C00u);
  } else if (new_exp <= 0) {
    if (new_exp < -10) {
      return static_cast<uint16_t>(sign);
    }
    uint32_t m = mant | 0x00800000u;
    int32_t shift = 14 - new_exp;
    uint32_t half_m = m >> shift;
    if ((m >> (shift - 1)) & 1u) {
      half_m += 1;
    }
    return static_cast<uint16_t>(sign | (half_m & 0x03FFu));
  } else {
    uint16_t half_exp = static_cast<uint16_t>(new_exp << 10);
    uint32_t half_m = mant >> 13;
    if (mant & 0x00001000u) {
      half_m += 1;
      if (half_m == 0x0400u) {
        half_m = 0;
        half_exp = static_cast<uint16_t>((new_exp + 1) << 10);
        if ((half_exp >> 10) >= 31) {
          return static_cast<uint16_t>(sign | 0x7C00u);
        }
      }
    }
    return static_cast<uint16_t>(sign | half_exp | (half_m & 0x03FFu));
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/onnx-utils.h
================================================
// sherpa-onnx/csrc/onnx-utils.h
//
// Copyright (c)  2023  Xiaomi Corporation
// Copyright (c)  2023  Pingfeng Luo
#ifndef SHERPA_ONNX_CSRC_ONNX_UTILS_H_
#define SHERPA_ONNX_CSRC_ONNX_UTILS_H_

#ifdef _MSC_VER
// For ToWide() below
#include <codecvt>
#include <locale>
#endif

#include <algorithm>
#include <cassert>
#include <ostream>
#include <string>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

/**
 * Get the input names of a model.
 *
 * @param sess An onnxruntime session.
 * @param input_names. On return, it contains the input names of the model.
 * @param input_names_ptr. On return, input_names_ptr[i] contains
 *                         input_names[i].c_str()
 */
void GetInputNames(Ort::Session *sess, std::vector<std::string> *input_names,
                   std::vector<const char *> *input_names_ptr);

/**
 * Get the output names of a model.
 *
 * @param sess An onnxruntime session.
 * @param output_names. On return, it contains the output names of the model.
 * @param output_names_ptr. On return, output_names_ptr[i] contains
 *                         output_names[i].c_str()
 */
void GetOutputNames(Ort::Session *sess, std::vector<std::string> *output_names,
                    std::vector<const char *> *output_names_ptr);

/**
 * Get the output frame of Encoder
 *
 * @param allocator allocator of onnxruntime
 * @param encoder_out encoder out tensor
 * @param t frame_index
 *
 */
Ort::Value GetEncoderOutFrame(OrtAllocator *allocator, Ort::Value *encoder_out,
                              int32_t t);

std::string LookupCustomModelMetaData(const Ort::ModelMetadata &meta_data,
                                      const char *key, OrtAllocator *allocator);

void PrintModelMetadata(std::ostream &os,
                        const Ort::ModelMetadata &meta_data);  // NOLINT

// Return a deep copy of v
Ort::Value Clone(OrtAllocator *allocator, const Ort::Value *v);

// Return a shallow copy
Ort::Value View(Ort::Value *v);

float ComputeSum(const Ort::Value *v, int32_t n = -1);
float ComputeMean(const Ort::Value *v, int32_t n = -1);

// Print a 1-D tensor to stderr
template <typename T = float>
void Print1D(const Ort::Value *v);

// Print a 2-D tensor to stderr
template <typename T = float>
void Print2D(const Ort::Value *v);

// Print a 3-D tensor to stderr
void Print3D(const Ort::Value *v);

// Print a 4-D tensor to stderr
void Print4D(const Ort::Value *v);

void PrintShape(const Ort::Value *v);

template <typename T = float>
void Fill(Ort::Value *tensor, T value) {
  auto n = tensor->GetTypeInfo().GetTensorTypeAndShapeInfo().GetElementCount();
  auto p = tensor->GetTensorMutableData<T>();
  std::fill(p, p + n, value);
}

// TODO(fangjun): Document it
Ort::Value Repeat(OrtAllocator *allocator, Ort::Value *cur_encoder_out,
                  const std::vector<int32_t> &hyps_num_split);

struct CopyableOrtValue {
  Ort::Value value{nullptr};

  CopyableOrtValue() = default;

  /*explicit*/ CopyableOrtValue(Ort::Value v)  // NOLINT
      : value(std::move(v)) {}

  CopyableOrtValue(const CopyableOrtValue &other);

  CopyableOrtValue &operator=(const CopyableOrtValue &other);

  CopyableOrtValue(CopyableOrtValue &&other) noexcept;

  CopyableOrtValue &operator=(CopyableOrtValue &&other) noexcept;
};

std::vector<CopyableOrtValue> Convert(std::vector<Ort::Value> values);

std::vector<Ort::Value> Convert(std::vector<CopyableOrtValue> values);

float HalfBitsToFloat(uint16_t h);

uint16_t FloatToHalfBits(float f);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONNX_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/packed-sequence-test.cc
================================================
// sherpa-onnx/csrc/packed-sequence-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/packed-sequence.h"

#include <cstdio>
#include <numeric>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

TEST(PackedSequence, Case1) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 3> shape{5, 5, 4};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  std::iota(p, p + shape[0] * shape[1] * shape[2], 0);

  Ort::Value length =
      Ort::Value::CreateTensor<int64_t>(allocator, shape.data(), 1);
  int64_t *p_length = length.GetTensorMutableData<int64_t>();
  p_length[0] = 1;
  p_length[1] = 2;
  p_length[2] = 3;
  p_length[3] = 5;
  p_length[4] = 2;

  auto packed_seq = PackPaddedSequence(allocator, &v, &length);
  fprintf(stderr, "sorted indexes: ");
  for (auto i : packed_seq.sorted_indexes) {
    fprintf(stderr, "%d ", static_cast<int32_t>(i));
  }
  fprintf(stderr, "\n");
  // output index:   0 1 2 3 4
  // sorted indexes: 3 2 1 4 0
  // length:         5 3 2 2 1
  Print3D(&v);
  Print2D(&packed_seq.data);
  fprintf(stderr, "batch sizes per time step: ");
  for (auto i : packed_seq.batch_sizes) {
    fprintf(stderr, "%d ", static_cast<int32_t>(i));
  }
  fprintf(stderr, "\n");

  // TODO(fangjun): Check that the return value is correct
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/packed-sequence.cc
================================================
// sherpa-onnx/csrc/packed-sequence.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/packed-sequence.h"

#include <algorithm>
#include <cassert>
#include <numeric>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/slice.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

static Ort::Value IndexSelect(OrtAllocator *allocator, const Ort::Value *value,
                              const std::vector<int32_t> &sorted_indexes) {
  auto shape = value->GetTensorTypeAndShapeInfo().GetShape();
  assert(shape.size() == 3);
  std::array<int64_t, 3> ans_shape{static_cast<int64_t>(sorted_indexes.size()),
                                   shape[1], shape[2]};

  Ort::Value ans = Ort::Value::CreateTensor<float>(allocator, ans_shape.data(),
                                                   ans_shape.size());
  float *dst = ans.GetTensorMutableData<float>();
  const float *src = value->GetTensorData<float>();

  for (auto i : sorted_indexes) {
    const float *start = src + i * shape[1] * shape[2];
    std::copy(start, start + shape[1] * shape[2], dst);
    dst += shape[1] * shape[2];
  }
  return ans;
}

PackedSequence PackPaddedSequence(OrtAllocator *allocator,
                                  const Ort::Value *value, Ort::Value *length) {
  std::vector<int64_t> v_shape = value->GetTensorTypeAndShapeInfo().GetShape();
  std::vector<int64_t> l_shape = length->GetTensorTypeAndShapeInfo().GetShape();

  assert(v_shape.size() == 3);
  assert(l_shape.size() == 1);
  assert(v_shape[0] == l_shape[0]);

  std::vector<int32_t> indexes(v_shape[0]);
  std::iota(indexes.begin(), indexes.end(), 0);

  const int64_t *p_length = length->GetTensorData<int64_t>();
  // sort in descending order
  std::sort(indexes.begin(), indexes.end(), [p_length](int32_t i, int32_t j) {
    return p_length[i] > p_length[j];
  });

  int32_t n = static_cast<int32_t>(v_shape[0]);

  int64_t max_T = p_length[indexes[0]];

  auto sum_T = std::accumulate(p_length, p_length + n, static_cast<int64_t>(0));

  std::array<int64_t, 2> data_shape{sum_T, v_shape[2]};

  Ort::Value data = Ort::Value::CreateTensor<float>(
      allocator, data_shape.data(), data_shape.size());
  float *dst = data.GetTensorMutableData<float>();

  Ort::Value tensor = IndexSelect(allocator, value, indexes);
  tensor = Transpose01(allocator, &tensor);

  // batch size at each time step
  std::vector<int32_t> batch_sizes;
  batch_sizes.reserve(max_T);

  int64_t prev_l = 0;
  for (int32_t i = 0; i != n; ++i) {
    auto cur_l = p_length[indexes[n - 1 - i]];
    assert(cur_l >= prev_l);
    if (cur_l == prev_l) {
      continue;
    }

    auto cur_batch_size = n - i;

    Ort::Value cur_batch =
        Slice(allocator, &tensor, prev_l, cur_l, 0, cur_batch_size);
    auto count = cur_batch.GetTensorTypeAndShapeInfo().GetElementCount();
    const float *src = cur_batch.GetTensorData<float>();
    std::copy(src, src + count, dst);
    dst += count;

    for (int32_t j = prev_l; j < cur_l; ++j) {
      batch_sizes.push_back(cur_batch_size);
    }

    prev_l = cur_l;
  }

  PackedSequence packed_seq;
  packed_seq.sorted_indexes = std::move(indexes);
  packed_seq.data = std::move(data);
  packed_seq.batch_sizes = std::move(batch_sizes);

  return packed_seq;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/packed-sequence.h
================================================
// sherpa-onnx/csrc/packed-sequence.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_PACKED_SEQUENCE_H_
#define SHERPA_ONNX_CSRC_PACKED_SEQUENCE_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

struct PackedSequence {
  std::vector<int32_t> sorted_indexes;
  std::vector<int32_t> batch_sizes;

  // data is a 2-D tensor of shape (sum(batch_sizes), channels)
  Ort::Value data{nullptr};

  // Return a shallow copy of data[start:start+size, :]
  Ort::Value Get(int32_t start, int32_t size) {
    auto shape = data.GetTensorTypeAndShapeInfo().GetShape();

    std::array<int64_t, 2> ans_shape{size, shape[1]};

    float *p = data.GetTensorMutableData<float>();

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    // a shallow copy
    return Ort::Value::CreateTensor(memory_info, p + start * shape[1],
                                    size * shape[1], ans_shape.data(),
                                    ans_shape.size());
  }
};

/** Similar to torch.nn.utils.rnn.pad_sequence but it supports only
 * batch_first=true.
 *
 * @param allocator
 * @param value  A 3-D tensor of shape (B, T, C). Its dtype is float.
 * @param length A 1-D tensor of shape (B,). Its dtype is int64_t. Each
 *               element in it specifies the valid length of the corresponding
 *               entry in value before padding.
 */
PackedSequence PackPaddedSequence(OrtAllocator *allocator,
                                  const Ort::Value *value, Ort::Value *length);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_PACKED_SEQUENCE_H_


================================================
FILE: sherpa-onnx/csrc/pad-sequence-test.cc
================================================
// sherpa-onnx/csrc/pad-sequence-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/pad-sequence.h"

#include <numeric>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

TEST(PadSequence, ThreeTensors) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 2> shape1{3, 5};
  Ort::Value v1 =
      Ort::Value::CreateTensor<float>(allocator, shape1.data(), shape1.size());
  float *p1 = v1.GetTensorMutableData<float>();
  std::iota(p1, p1 + shape1[0] * shape1[1], 0);

  std::array<int64_t, 2> shape2{4, 5};
  Ort::Value v2 =
      Ort::Value::CreateTensor<float>(allocator, shape2.data(), shape2.size());
  float *p2 = v2.GetTensorMutableData<float>();
  std::iota(p2, p2 + shape2[0] * shape2[1], 0);

  std::array<int64_t, 2> shape3{2, 5};
  Ort::Value v3 =
      Ort::Value::CreateTensor<float>(allocator, shape3.data(), shape3.size());
  float *p3 = v3.GetTensorMutableData<float>();
  std::iota(p3, p3 + shape3[0] * shape3[1], 0);

  auto ans = PadSequence(allocator, {&v1, &v2, &v3}, -1);

  Print2D(&v1);
  Print2D(&v2);
  Print2D(&v3);
  Print3D(&ans);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/pad-sequence.cc
================================================
// sherpa-onnx/csrc/pad-sequence.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/pad-sequence.h"

#include <algorithm>
#include <cassert>
#include <vector>

namespace sherpa_onnx {

Ort::Value PadSequence(OrtAllocator *allocator,
                       const std::vector<const Ort::Value *> &values,
                       float padding_value) {
  int32_t batch_size = static_cast<int32_t>(values.size());

  std::vector<int64_t> shape0 =
      values[0]->GetTensorTypeAndShapeInfo().GetShape();
  assert(shape0.size() == 2);

  auto feature_dim = shape0[1];
  auto max_T = shape0[0];

  for (int32_t i = 1; i != batch_size; ++i) {
    auto shape = values[i]->GetTensorTypeAndShapeInfo().GetShape();

    assert(shape.size() == 2);
    assert(shape[1] == feature_dim);

    max_T = std::max(max_T, shape[0]);
  }
  std::array<int64_t, 3> ans_shape{batch_size, max_T, feature_dim};

  Ort::Value ans = Ort::Value::CreateTensor<float>(allocator, ans_shape.data(),
                                                   ans_shape.size());
  float *dst = ans.GetTensorMutableData<float>();
  std::fill(dst, dst + batch_size * max_T * feature_dim, padding_value);

  for (const auto *v : values) {
    const float *src = v->GetTensorData<float>();
    auto shape = v->GetTensorTypeAndShapeInfo().GetShape();
    std::copy(src, src + shape[0] * shape[1], dst);
    dst += max_T * feature_dim;
  }

  return ans;

  // TODO(fangjun): Check that the returned value is correct.
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/pad-sequence.h
================================================
// sherpa-onnx/csrc/pad-sequence.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_PAD_SEQUENCE_H_
#define SHERPA_ONNX_CSRC_PAD_SEQUENCE_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

/** Similar to torch.nn.utils.rnn.pad_sequence but it supports only
 * batch_first=true.
 *
 * @param allocator
 * @param values A list of 2-D tensors. Each tensor's second dimension
 *               must be the same and the data type of each tensor should
 *               be float.
 * @param padding_value Value used for padding. For log-fbank, you usually use
 *                      -23.025850929940457f as the padding value.
 *
 * @return Return a 3-D tensor of shape (B, max_T, C).
 */
Ort::Value PadSequence(OrtAllocator *allocator,
                       const std::vector<const Ort::Value *> &values,
                       float padding_value);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_PAD_SEQUENCE_H_


================================================
FILE: sherpa-onnx/csrc/parse-options.cc
================================================
// sherpa-onnx/csrc/parse-options.cc
/**
 * Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
 *                      Saarland University (Author: Arnab Ghoshal);
 * Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
 *                      Frantisek Skala;  Arnab Ghoshal
 * Copyright 2013       Tanel Alumae
 */

// This file is copied and modified from kaldi/src/util/parse-options.cu

#include "sherpa-onnx/csrc/parse-options.h"

#include <algorithm>
#include <array>
#include <cctype>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <string>

#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

ParseOptions::ParseOptions(const std::string &prefix, ParseOptions *po)
    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(nullptr) {
  if (po != nullptr && po->other_parser_ != nullptr) {
    // we get here if this constructor is used twice, recursively.
    other_parser_ = po->other_parser_;
  } else {
    other_parser_ = po;
  }
  if (po != nullptr && !po->prefix_.empty()) {
    prefix_ = po->prefix_ + std::string(".") + prefix;
  } else {
    prefix_ = prefix;
  }
}

void ParseOptions::Register(const std::string &name, bool *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, int32_t *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, int64_t *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, uint32_t *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, float *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, double *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, std::string *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

// old-style, used for registering application-specific parameters
template <typename T>
void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
                                const std::string &doc) {
  if (other_parser_ == nullptr) {
    this->RegisterCommon(name, ptr, doc, false);
  } else {
    SHERPA_ONNX_CHECK(prefix_ != "")
        << "prefix: " << prefix_ << "\n"
        << "Cannot use empty prefix when registering with prefix.";
    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
    other_parser_->Register(new_name, ptr, doc);
  }
}

// does the common part of the job of registering a parameter
template <typename T>
void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
                                  const std::string &doc, bool is_standard) {
  SHERPA_ONNX_CHECK(ptr != nullptr);
  std::string idx = name;
  NormalizeArgName(&idx);
  if (doc_map_.find(idx) != doc_map_.end()) {
    SHERPA_ONNX_LOGE("Registering option twice, ignoring second time: %s",
                     name.c_str());
  } else {
    this->RegisterSpecific(name, idx, ptr, doc, is_standard);
  }
}

// used to register standard parameters (those that are present in all of the
// applications)
template <typename T>
void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
                                    const std::string &doc) {
  this->RegisterCommon(name, ptr, doc, true);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, bool *b,
                                    const std::string &doc, bool is_standard) {
  bool_map_[idx] = b;
  doc_map_[idx] =
      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
              is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, int32_t *i,
                                    const std::string &doc, bool is_standard) {
  int_map_[idx] = i;
  std::ostringstream ss;
  ss << doc << " (int, default = " << *i << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, int64_t *i,
                                    const std::string &doc, bool is_standard) {
  int64_map_[idx] = i;
  std::ostringstream ss;
  ss << doc << " (int64, default = " << *i << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, uint32_t *u,
                                    const std::string &doc, bool is_standard) {
  uint_map_[idx] = u;
  std::ostringstream ss;
  ss << doc << " (uint, default = " << *u << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, float *f,
                                    const std::string &doc, bool is_standard) {
  float_map_[idx] = f;
  std::ostringstream ss;
  ss << doc << " (float, default = " << *f << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, double *f,
                                    const std::string &doc, bool is_standard) {
  double_map_[idx] = f;
  std::ostringstream ss;
  ss << doc << " (double, default = " << *f << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, std::string *s,
                                    const std::string &doc, bool is_standard) {
  string_map_[idx] = s;
  doc_map_[idx] =
      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
}

void ParseOptions::DisableOption(const std::string &name) {
  if (argv_ != nullptr) {
    SHERPA_ONNX_LOGE("DisableOption must not be called after calling Read().");
    exit(-1);
  }
  if (doc_map_.erase(name) == 0) {
    SHERPA_ONNX_LOGE("Option %s was not registered so cannot be disabled: ",
                     name.c_str());
    exit(-1);
  }
  bool_map_.erase(name);
  int_map_.erase(name);
  int64_map_.erase(name);
  uint_map_.erase(name);
  float_map_.erase(name);
  double_map_.erase(name);
  string_map_.erase(name);
}

int32_t ParseOptions::NumArgs() const { return positional_args_.size(); }

std::string ParseOptions::GetArg(int32_t i) const {
  if (i < 1 || i > static_cast<int32_t>(positional_args_.size())) {
    SHERPA_ONNX_LOGE("ParseOptions::GetArg, invalid index %d", i);
    exit(-1);
  }

  return positional_args_[i - 1];
}

// We currently do not support any other options.
enum ShellType : std::uint8_t { kBash = 0 };

// This can be changed in the code if it ever does need to be changed (as it's
// unlikely that one compilation of this tool-set would use both shells).
static ShellType kShellType = kBash;

// Returns true if we need to escape a string before putting it into
// a shell (mainly thinking of bash shell, but should work for others)
// This is for the convenience of the user so command-lines that are
// printed out by ParseOptions::Read (with --print-args=true) are
// paste-able into the shell and will run. If you use a different type of
// shell, it might be necessary to change this function.
// But it's mostly a cosmetic issue as it basically affects how
// the program echoes its command-line arguments to the screen.
static bool MustBeQuoted(const std::string &str, ShellType st) {
  // Only Bash is supported (for the moment).
  SHERPA_ONNX_CHECK_EQ(st, kBash) << "Invalid shell type.";

  const char *c = str.c_str();
  if (*c == '\0') {
    return true;  // Must quote empty string
  } else {
    std::array<const char *, 2> ok_chars{};

    // These seem not to be interpreted as long as there are no other "bad"
    // characters involved (e.g. "," would be interpreted as part of something
    // like a{b,c}, but not on its own.
    ok_chars[kBash] = "[]~#^_-+=:.,/";

    // Just want to make sure that a space character doesn't get automatically
    // inserted here via an automated style-checking script, like it did before.
    SHERPA_ONNX_CHECK(!strchr(ok_chars[kBash], ' '));

    for (; *c != '\0'; ++c) {
      // For non-alphanumeric characters we have a list of characters which
      // are OK. All others are forbidden (this is easier since the shell
      // interprets most non-alphanumeric characters).
      if (!isalnum(*c)) {
        const char *d = nullptr;
        for (d = ok_chars[st]; *d != '\0'; ++d) {
          if (*c == *d) break;
        }
        // If not alphanumeric or one of the "ok_chars", it must be escaped.
        if (*d == '\0') return true;
      }
    }
    return false;  // The string was OK. No quoting or escaping.
  }
}

// Returns a quoted and escaped version of "str"
// which has previously been determined to need escaping.
// Our aim is to print out the command line in such a way that if it's
// pasted into a shell of ShellType "st" (only bash for now), it
// will get passed to the program in the same way.
static std::string QuoteAndEscape(const std::string &str, ShellType /*st*/) {
  // For now we use the following rules:
  // In the normal case, we quote with single-quote "'", and to escape
  // a single-quote we use the string: '\'' (interpreted as closing the
  // single-quote, putting an escaped single-quote from the shell, and
  // then reopening the single quote).
  char quote_char = '\'';
  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b

  // If the string contains single-quotes that would need escaping this
  // way, and we determine that the string could be safely double-quoted
  // without requiring any escaping, then we double-quote the string.
  // This is the case if the characters "`$\ do not appear in the string.
  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
  const char *c_str = str.c_str();
  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
    quote_char = '"';
    escape_str = "\\\"";  // should never be accessed.
  }

  std::array<char, 2> buf{};
  buf[1] = '\0';

  buf[0] = quote_char;
  std::string ans = buf.data();
  const char *c = str.c_str();
  for (; *c != '\0'; ++c) {
    if (*c == quote_char) {
      ans += escape_str;
    } else {
      buf[0] = *c;
      ans += buf.data();
    }
  }
  buf[0] = quote_char;
  ans += buf.data();
  return ans;
}

// static function
std::string ParseOptions::Escape(const std::string &str) {
  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
}

int32_t ParseOptions::Read(int32_t argc, const char *const *argv) {
  argc_ = argc;
  argv_ = argv;
  std::string key, value;
  int32_t i = 0;

  // first pass: look for config parameter, look for priority
  for (i = 1; i < argc; ++i) {
    if (std::strncmp(argv[i], "--", 2) == 0) {
      if (std::strcmp(argv[i], "--") == 0) {
        // a lone "--" marks the end of named options
        break;
      }
      bool has_equal_sign = false;
      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
      NormalizeArgName(&key);
      Trim(&value);
      if (key == "config") {
        ReadConfigFile(value);
      } else if (key == "help") {
        PrintUsage();
        exit(0);
      }
    }
  }

  bool double_dash_seen = false;
  // second pass: add the command line options
  for (i = 1; i < argc; ++i) {
    if (std::strncmp(argv[i], "--", 2) == 0) {
      if (std::strcmp(argv[i], "--") == 0) {
        // A lone "--" marks the end of named options.
        // Skip that option and break the processing of named options
        i += 1;
        double_dash_seen = true;
        break;
      }
      bool has_equal_sign = false;
      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
      NormalizeArgName(&key);
      Trim(&value);
      if (!SetOption(key, value, has_equal_sign)) {
        PrintUsage(true);
        SHERPA_ONNX_LOGE("Invalid option %s", argv[i]);
        exit(-1);
      }
    } else {
      break;
    }
  }

  // process remaining arguments as positional
  for (; i < argc; ++i) {
    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
      double_dash_seen = true;
    } else {
      positional_args_.emplace_back(argv[i]);
    }
  }

  // if the user did not suppress this with --print-args = false....
  if (print_args_) {
    std::ostringstream strm;
    for (int32_t j = 0; j < argc; ++j) strm << Escape(argv[j]) << " ";
    strm << '\n';
    SHERPA_ONNX_LOGE("%s", strm.str().c_str());
  }
  return i;
}

void ParseOptions::PrintUsage(bool print_command_line /*=false*/) const {
  std::ostringstream os;
  os << '\n' << usage_ << '\n';
  // first we print application-specific options
  bool app_specific_header_printed = false;
  for (const auto &it : doc_map_) {
    if (it.second.is_standard_ == false) {  // application-specific option
      if (app_specific_header_printed == false) {  // header was not yet printed
        os << "Options:" << '\n';
        app_specific_header_printed = true;
      }
      os << "  --" << std::setw(25) << std::left << it.second.name_ << " : "
         << it.second.use_msg_ << '\n';
    }
  }
  if (app_specific_header_printed == true) {
    os << '\n';
  }

  // then the standard options
  os << "Standard options:" << '\n';
  for (const auto &it : doc_map_) {
    if (it.second.is_standard_ == true) {  // we have standard option
      os << "  --" << std::setw(25) << std::left << it.second.name_ << " : "
         << it.second.use_msg_ << '\n';
    }
  }
  os << '\n';
  if (print_command_line) {
    std::ostringstream strm;
    strm << "Command line was: ";
    for (int32_t j = 0; j < argc_; ++j) strm << Escape(argv_[j]) << " ";
    strm << '\n';
    os << strm.str();
  }

  SHERPA_ONNX_LOGE("%s", os.str().c_str());
}

void ParseOptions::PrintConfig(std::ostream &os) const {
  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
  std::string key;
  for (const auto &it : doc_map_) {
    key = it.first;
    os << it.second.name_ << " = ";
    if (bool_map_.end() != bool_map_.find(key)) {
      os << (*bool_map_.at(key) ? "true" : "false");
    } else if (int_map_.end() != int_map_.find(key)) {
      os << (*int_map_.at(key));
    } else if (int64_map_.end() != int64_map_.find(key)) {
      os << (*int64_map_.at(key));
    } else if (uint_map_.end() != uint_map_.find(key)) {
      os << (*uint_map_.at(key));
    } else if (float_map_.end() != float_map_.find(key)) {
      os << (*float_map_.at(key));
    } else if (double_map_.end() != double_map_.find(key)) {
      os << (*double_map_.at(key));
    } else if (string_map_.end() != string_map_.find(key)) {
      os << "'" << *string_map_.at(key) << "'";
    } else {
      SHERPA_ONNX_LOGE("PrintConfig: unrecognized option %s [code error]",
                       key.c_str());
      exit(-1);
    }
    os << '\n';
  }
  os << '\n';
}

void ParseOptions::ReadConfigFile(const std::string &filename) {
  std::ifstream is(filename.c_str(), std::ifstream::in);
  if (!is.good()) {
    SHERPA_ONNX_LOGE("Cannot open config file: %s", filename.c_str());
    exit(-1);
  }

  std::string line, key, value;
  int32_t line_number = 0;
  while (std::getline(is, line)) {
    ++line_number;
    // trim out the comments
    size_t pos = line.find_first_of('#');
    if (pos != std::string::npos) {
      line.erase(pos);
    }
    // skip empty lines
    Trim(&line);
    if (line.empty()) continue;

    if (line.substr(0, 2) != "--") {
      SHERPA_ONNX_LOGE(
          "Reading config file %s: line %d does not look like a line "
          "from a sherpa-onnx command-line program's config file: should "
          "be of the form --x=y.  Note: config files intended to "
          "be sourced by shell scripts lack the '--'.",
          filename.c_str(), line_number);
      exit(-1);
    }

    // parse option
    bool has_equal_sign = false;
    SplitLongArg(line, &key, &value, &has_equal_sign);
    NormalizeArgName(&key);
    Trim(&value);
    if (!SetOption(key, value, has_equal_sign)) {
      PrintUsage(true);
      SHERPA_ONNX_LOGE("Invalid option %s in config file %s: line %d",
                       line.c_str(), filename.c_str(), line_number);
      exit(-1);
    }
  }
}

void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
                                std::string *value,
                                bool *has_equal_sign) const {
  SHERPA_ONNX_CHECK(in.substr(0, 2) == "--") << in;  // precondition.
  size_t pos = in.find_first_of('=', 0);
  if (pos == std::string::npos) {  // we allow --option for bools
    // defaults to empty.  We handle this differently in different cases.
    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
    *value = "";
    *has_equal_sign = false;
  } else if (pos == 2) {  // we also don't allow empty keys: --=value
    PrintUsage(true);
    SHERPA_ONNX_LOGE("Invalid option (no key): %s", in.c_str());
    exit(-1);
  } else {                         // normal case: --option=value
    *key = in.substr(2, pos - 2);  // 2 because starts with --.
    *value = in.substr(pos + 1);
    *has_equal_sign = true;
  }
}

void ParseOptions::NormalizeArgName(std::string *str) const {
  std::string out;
  std::string::iterator it;

  for (it = str->begin(); it != str->end(); ++it) {
    if (*it == '_') {
      out += '-';  // convert _ to -
    } else {
      out += std::tolower(*it);
    }
  }
  *str = out;

  SHERPA_ONNX_CHECK_GT(str->length(), 0);
}

void ParseOptions::Trim(std::string *str) const {
  const char *white_chars = " \t\n\r\f\v";

  std::string::size_type pos = str->find_last_not_of(white_chars);
  if (pos != std::string::npos) {
    str->erase(pos + 1);
    pos = str->find_first_not_of(white_chars);
    if (pos != std::string::npos) str->erase(0, pos);
  } else {
    str->erase(str->begin(), str->end());
  }
}

bool ParseOptions::SetOption(const std::string &key, const std::string &value,
                             bool has_equal_sign) {
  if (bool_map_.end() != bool_map_.find(key)) {
    if (has_equal_sign && value.empty()) {
      SHERPA_ONNX_LOGE("Invalid option --%s=", key.c_str());
      exit(-1);
    }
    *(bool_map_[key]) = ToBool(value);
  } else if (int_map_.end() != int_map_.find(key)) {
    *(int_map_[key]) = ToInt(value);
  } else if (int64_map_.end() != int64_map_.find(key)) {
    *(int64_map_[key]) = ToInt64(value);
  } else if (uint_map_.end() != uint_map_.find(key)) {
    *(uint_map_[key]) = ToUint(value);
  } else if (float_map_.end() != float_map_.find(key)) {
    *(float_map_[key]) = ToFloat(value);
  } else if (double_map_.end() != double_map_.find(key)) {
    *(double_map_[key]) = ToDouble(value);
  } else if (string_map_.end() != string_map_.find(key)) {
    if (!has_equal_sign) {
      SHERPA_ONNX_LOGE("Invalid option --%s (option format is --x=y).",
                       key.c_str());
      exit(-1);
    }
    *(string_map_[key]) = value;
  } else {
    return false;
  }
  return true;
}

bool ParseOptions::ToBool(std::string str) const {
  std::transform(str.begin(), str.end(), str.begin(), ::tolower);

  // allow "" as a valid option for "true", so that --x is the same as --x=true
  if (str == "true" || str == "t" || str == "1" || str.empty()) {
    return true;
  }
  if (str == "false" || str == "f" || str == "0") {
    return false;
  }
  // if it is neither true nor false:
  PrintUsage(true);
  SHERPA_ONNX_LOGE(
      "Invalid format for boolean argument [expected true or false]: %s",
      str.c_str());
  exit(-1);
  return false;  // never reached
}

int32_t ParseOptions::ToInt(const std::string &str) const {
  int32_t ret = 0;
  if (!ConvertStringToInteger(str, &ret)) {
    SHERPA_ONNX_LOGE("Invalid integer option \"%s\"", str.c_str());
    exit(-1);
  }
  return ret;
}

int64_t ParseOptions::ToInt64(const std::string &str) const {
  int64_t ret = 0;
  if (!ConvertStringToInteger(str, &ret)) {
    SHERPA_ONNX_LOGE("Invalid integer int64 option \"%s\"", str.c_str());
    exit(-1);
  }
  return ret;
}

uint32_t ParseOptions::ToUint(const std::string &str) const {
  uint32_t ret = 0;
  if (!ConvertStringToInteger(str, &ret)) {
    SHERPA_ONNX_LOGE("Invalid integer option \"%s\"", str.c_str());
    exit(-1);
  }
  return ret;
}

float ParseOptions::ToFloat(const std::string &str) const {
  float ret = 0;
  if (!ConvertStringToReal(str, &ret)) {
    SHERPA_ONNX_LOGE("Invalid floating-point option \"%s\"", str.c_str());
    exit(-1);
  }
  return ret;
}

double ParseOptions::ToDouble(const std::string &str) const {
  double ret = 0;
  if (!ConvertStringToReal(str, &ret)) {
    SHERPA_ONNX_LOGE("Invalid floating-point option \"%s\"", str.c_str());
    exit(-1);
  }
  return ret;
}

// instantiate templates
template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, int32_t *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, int64_t *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, uint32_t *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name,
                                         std::string *ptr,
                                         const std::string &doc);

template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             int32_t *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             int64_t *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             uint32_t *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             float *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             double *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             std::string *ptr,
                                             const std::string &doc);

template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                                           int32_t *ptr, const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                                           int64_t *ptr, const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                                           uint32_t *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                                           std::string *ptr,
                                           const std::string &doc,
                                           bool is_standard);

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/parse-options.h
================================================
// sherpa-onnx/csrc/parse-options.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation
//
// This file is copied and modified from kaldi/src/util/parse-options.h

#ifndef SHERPA_ONNX_CSRC_PARSE_OPTIONS_H_
#define SHERPA_ONNX_CSRC_PARSE_OPTIONS_H_

#include <cstdint>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

class ParseOptions {
 public:
  explicit ParseOptions(const char *usage)
      : print_args_(true),
        help_(false),
        usage_(usage),
        argc_(0),
        argv_(nullptr),
        prefix_(""),
        other_parser_(nullptr) {
#if !defined(_MSC_VER) && !defined(__CYGWIN__)
    // This is just a convenient place to set the stderr to line
    // buffering mode, since it's called at program start.
    // This helps ensure different programs' output is not mixed up.
    setlinebuf(stderr);
#endif
    RegisterStandard("config", &config_,
                     "Configuration file to read (this "
                     "option may be repeated)");
    RegisterStandard("print-args", &print_args_,
                     "Print the command line arguments (to stderr)");
    RegisterStandard("help", &help_, "Print out usage message");
  }

  /**
    This is a constructor for the special case where some options are
    registered with a prefix to avoid conflicts.  The object thus created will
    only be used temporarily to register an options class with the original
    options parser (which is passed as the *other pointer) using the given
    prefix.  It should not be used for any other purpose, and the prefix must
    not be the empty string.  It seems to be the least bad way of implementing
    options with prefixes at this point.
    Example of usage is:
     ParseOptions po;  // original ParseOptions object
     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
     MfccOptions mfcc_opts;
     mfcc_opts.Register(&po_mfcc);
    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
    instead of just --frame-shift=10.0
   */
  ParseOptions(const std::string &prefix, ParseOptions *other);

  ParseOptions(const ParseOptions &) = delete;
  ParseOptions &operator=(const ParseOptions &) = delete;
  ~ParseOptions() = default;

  void Register(const std::string &name, bool *ptr, const std::string &doc);
  void Register(const std::string &name, int32_t *ptr, const std::string &doc);
  void Register(const std::string &name, int64_t *ptr, const std::string &doc);
  void Register(const std::string &name, uint32_t *ptr, const std::string &doc);
  void Register(const std::string &name, float *ptr, const std::string &doc);
  void Register(const std::string &name, double *ptr, const std::string &doc);
  void Register(const std::string &name, std::string *ptr,
                const std::string &doc);

  /// If called after registering an option and before calling
  /// Read(), disables that option from being used.  Will crash
  /// at runtime if that option had not been registered.
  void DisableOption(const std::string &name);

  /// This one is used for registering standard parameters of all the programs
  template <typename T>
  void RegisterStandard(const std::string &name, T *ptr,
                        const std::string &doc);

  /**
    Parses the command line options and fills the ParseOptions-registered
    variables. This must be called after all the variables were registered!!!

    Initially the variables have implicit values,
    then the config file values are set-up,
    finally the command line values given.
    Returns the first position in argv that was not used.
    [typically not useful: use NumParams() and GetParam(). ]
   */
  int Read(int argc, const char *const *argv);

  /// Prints the usage documentation [provided in the constructor].
  void PrintUsage(bool print_command_line = false) const;

  /// Prints the actual configuration of all the registered variables
  void PrintConfig(std::ostream &os) const;

  /// Reads the options values from a config file.  Must be called after
  /// registering all options.  This is usually used internally after the
  /// standard --config option is used, but it may also be called from a
  /// program.
  void ReadConfigFile(const std::string &filename);

  /// Number of positional parameters (c.f. argc-1).
  int NumArgs() const;

  /// Returns one of the positional parameters; 1-based indexing for argc/argv
  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
  ///
  /// Note: Index is 1 based.
  std::string GetArg(int param) const;

  std::string GetOptArg(int param) const {
    return (param <= NumArgs() ? GetArg(param) : "");
  }

  /// The following function will return a possibly quoted and escaped
  /// version of "str", according to the current shell.  Currently
  /// this is just hardwired to bash.  It's useful for debug output.
  static std::string Escape(const std::string &str);

 private:
  /// Template to register various variable types,
  /// used for program-specific parameters
  template <typename T>
  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);

  // Following functions do just the datatype-specific part of the job
  /// Register boolean variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        bool *b, const std::string &doc, bool is_standard);
  /// Register int32_t variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        int32_t *i, const std::string &doc, bool is_standard);
  /// Register int64_t variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        int64_t *i, const std::string &doc, bool is_standard);
  /// Register unsigned  int32_t variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        uint32_t *u, const std::string &doc, bool is_standard);
  /// Register float variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        float *f, const std::string &doc, bool is_standard);
  /// Register double variable [useful as we change BaseFloat type].
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        double *f, const std::string &doc, bool is_standard);
  /// Register string variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        std::string *s, const std::string &doc,
                        bool is_standard);

  /// Does the actual job for both kinds of parameters
  /// Does the common part of the job for all datatypes,
  /// then calls RegisterSpecific
  template <typename T>
  void RegisterCommon(const std::string &name, T *ptr, const std::string &doc,
                      bool is_standard);

  /// Set option with name "key" to "value"; will crash if can't do it.
  /// "has_equal_sign" is used to allow --x for a boolean option x,
  /// and --y=, for a string option y.
  bool SetOption(const std::string &key, const std::string &value,
                 bool has_equal_sign);

  bool ToBool(std::string str) const;
  int32_t ToInt(const std::string &str) const;
  int64_t ToInt64(const std::string &str) const;
  uint32_t ToUint(const std::string &str) const;
  float ToFloat(const std::string &str) const;
  double ToDouble(const std::string &str) const;

  // maps for option variables
  std::unordered_map<std::string, bool *> bool_map_;
  std::unordered_map<std::string, int32_t *> int_map_;
  std::unordered_map<std::string, int64_t *> int64_map_;
  std::unordered_map<std::string, uint32_t *> uint_map_;
  std::unordered_map<std::string, float *> float_map_;
  std::unordered_map<std::string, double *> double_map_;
  std::unordered_map<std::string, std::string *> string_map_;

  /**
     Structure for options' documentation
   */
  struct DocInfo {
    DocInfo() = default;
    DocInfo(const std::string &name, const std::string &usemsg)
        : name_(name), use_msg_(usemsg), is_standard_(false) {}
    DocInfo(const std::string &name, const std::string &usemsg,
            bool is_standard)
        : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}

    std::string name_;
    std::string use_msg_;
    bool is_standard_;
  };
  using DocMapType = std::unordered_map<std::string, DocInfo>;
  DocMapType doc_map_;  ///< map for the documentation

  bool print_args_;     ///< variable for the implicit --print-args parameter
  bool help_;           ///< variable for the implicit --help parameter
  std::string config_;  ///< variable for the implicit --config parameter
  std::vector<std::string> positional_args_;
  const char *usage_;
  int argc_;
  const char *const *argv_;

  /// These members are not normally used. They are only used when the object
  /// is constructed with a prefix
  std::string prefix_;
  ParseOptions *other_parser_;

 protected:
  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
  /// this is needed in order to correctly allow --x for a boolean option
  /// x, and --y= for a string option y, and to disallow --x= and --y.
  void SplitLongArg(const std::string &in, std::string *key, std::string *value,
                    bool *has_equal_sign) const;

  void NormalizeArgName(std::string *str) const;

  /// Removes the beginning and trailing whitespaces from a string
  void Trim(std::string *str) const;
};

/// This template is provided for convenience in reading config classes from
/// files; this is not the standard way to read configuration options, but may
/// occasionally be needed.  This function assumes the config has a function
/// "void Register(ParseOptions *opts)" which it can call to register the
/// ParseOptions object.
template <class C>
void ReadConfigFromFile(const std::string &config_filename, C *c) {
  std::ostringstream usage_str;
  usage_str << "Parsing config from "
            << "from '" << config_filename << "'";
  ParseOptions po(usage_str.str().c_str());
  c->Register(&po);
  po.ReadConfigFile(config_filename);
}

/// This variant of the template ReadConfigFromFile is for if you need to read
/// two config classes from the same file.
template <class C1, class C2>
void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2) {
  std::ostringstream usage_str;
  usage_str << "Parsing config from "
            << "from '" << conf << "'";
  ParseOptions po(usage_str.str().c_str());
  c1->Register(&po);
  c2->Register(&po);
  po.ReadConfigFile(conf);
}

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_PARSE_OPTIONS_H_


================================================
FILE: sherpa-onnx/csrc/phrase-matcher.cc
================================================
// sherpa-onnx/csrc/phrase-matcher.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include "sherpa-onnx/csrc/phrase-matcher.h"

#include <algorithm>
#include <sstream>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {
class PhraseMatcher::Impl {
 public:
  Impl(const std::unordered_set<std::string> *lexicon,
       const std::vector<std::string> &words, bool debug,
       int32_t max_search_len)
      : lexicon_(lexicon), max_search_len_(max_search_len), debug_(debug) {
    if (max_search_len_ < 1) {
      max_search_len_ = 1;
    }
    if (debug_) {
#if __OHOS__
      SHERPA_ONNX_LOGE("max_search_len %{public}d", max_search_len_);
#else
      SHERPA_ONNX_LOGE("max_search_len %d", max_search_len_);
#endif
    }

    Build(words);

    if (debug_) {
      std::ostringstream os;
      std::string sep;
      os << "After phrase matching: ";
      for (const auto &p : phrases_) {
        os << sep << p;
        sep = "_";
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }
  }

  auto begin() const { return phrases_.begin(); }

  auto end() const { return phrases_.end(); }

 private:
  void Build(const std::vector<std::string> &words) {
    int32_t num_words = static_cast<int32_t>(words.size());
    for (int32_t i = 0; i < num_words;) {
      int32_t start = i;

      std::string w;

      if (!IsAlphaOrPunct(words[i].front())) {
        int32_t end = std::min(i + max_search_len_ - 1, num_words - 1);

        while (end > start) {
          auto this_word = GetWord(words, start, end);
          if (IsAlphaOrPunct(this_word.back())) {
            --end;
            continue;
          }

          if (debug_) {
#if __OHOS__
            SHERPA_ONNX_LOGE("%{public}d-%{public}d: %{public}s", start, end,
                             this_word.c_str());
#else
            SHERPA_ONNX_LOGE("%d-%d: %s", start, end, this_word.c_str());
#endif
          }
          if (lexicon_->count(this_word)) {
            i = end + 1;
            w = std::move(this_word);
            if (debug_) {
#if __OHOS__
              SHERPA_ONNX_LOGE("matched %{public}d-%{public}d: %{public}s",
                               start, end, w.c_str());
#else
              SHERPA_ONNX_LOGE("matched %d-%d: %s", start, end, w.c_str());
#endif
            }
            break;
          }

          end -= 1;
        }
      }

      if (w.empty()) {
        w = words[i];

        if (debug_) {
#if __OHOS__
          SHERPA_ONNX_LOGE("single word %{public}d-%{public}d: %{public}s", i,
                           i, w.c_str());
#else
          SHERPA_ONNX_LOGE("single word %d-%d: %s", i, i, w.c_str());
#endif
        }

        i += 1;
      }

      phrases_.push_back(std::move(w));
    }
  }

 private:
  std::vector<std::string> phrases_;
  const std::unordered_set<std::string> *lexicon_;
  int32_t max_search_len_;
  bool debug_;
};

PhraseMatcher::PhraseMatcher(const std::unordered_set<std::string> *lexicon,
                             const std::vector<std::string> &words,
                             bool debug /*= false*/,
                             int32_t max_search_len /*= 10*/)
    : impl_(std::make_unique<Impl>(lexicon, words, debug, max_search_len)) {}

PhraseMatcher::~PhraseMatcher() = default;

std::vector<std::string>::const_iterator PhraseMatcher::begin() const {
  return impl_->begin();
}
std::vector<std::string>::const_iterator PhraseMatcher::end() const {
  return impl_->end();
}
}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/phrase-matcher.h
================================================
// sherpa-onnx/csrc/phrase-matcher.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_PHRASE_MATCHER_H_
#define SHERPA_ONNX_CSRC_PHRASE_MATCHER_H_

#include <cstdint>
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>

namespace sherpa_onnx {

class PhraseMatcher {
 public:
  PhraseMatcher(const std::unordered_set<std::string>
                    *lexicon,  // Not owned by this instance. The passed lexicon
                               // should live longer than this instance
                const std::vector<std::string> &words, bool debug = false,
                int32_t max_search_len = 10);
  ~PhraseMatcher();

  std::vector<std::string>::const_iterator begin() const;
  std::vector<std::string>::const_iterator end() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_PHRASE_MATCHER_H_


================================================
FILE: sherpa-onnx/csrc/piper-phonemize-lexicon.cc
================================================
// sherpa-onnx/csrc/piper-phonemize-lexicon.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/piper-phonemize-lexicon.h"

#include <fstream>
#include <locale>
#include <map>
#include <mutex>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "espeak-ng/speak_lib.h"
#include "phoneme_ids.hpp"  // NOLINT
#include "phonemize.hpp"    // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

// Encode a single char32_t to UTF-8 string. For debugging only
static std::string ToString(char32_t cp) {
  std::string result;

  if (cp <= 0x7F) {
    result += static_cast<char>(cp);
  } else if (cp <= 0x7FF) {
    result += static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
    result += static_cast<char>(0x80 | (cp & 0x3F));
  } else if (cp <= 0xFFFF) {
    result += static_cast<char>(0xE0 | ((cp >> 12) & 0x0F));
    result += static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
    result += static_cast<char>(0x80 | (cp & 0x3F));
  } else if (cp <= 0x10FFFF) {
    result += static_cast<char>(0xF0 | ((cp >> 18) & 0x07));
    result += static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
    result += static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
    result += static_cast<char>(0x80 | (cp & 0x3F));
  } else {
    SHERPA_ONNX_LOGE("Invalid Unicode code point: %d",
                     static_cast<int32_t>(cp));
  }

  return result;
}

void CallPhonemizeEspeak(const std::string &text,
                         piper::eSpeakPhonemeConfig &config,  // NOLINT
                         std::vector<std::vector<piper::Phoneme>> *phonemes) {
  static std::mutex espeak_mutex;

  std::lock_guard<std::mutex> lock(espeak_mutex);

  // keep multi threads from calling into piper::phonemize_eSpeak
  piper::phonemize_eSpeak(text, config, *phonemes);
}

static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) {
  std::unordered_map<char32_t, int32_t> token2id;

  std::string line;

  std::string sym;
  std::u32string s;
  int32_t id = 0;
  while (std::getline(is, line)) {
    std::istringstream iss(line);
    iss >> sym;
    if (iss.eof()) {
      id = atoi(sym.c_str());
      sym = " ";
    } else {
      iss >> id;
    }

    // eat the trailing \r\n on windows
    iss >> std::ws;
    if (!iss.eof()) {
      SHERPA_ONNX_LOGE("Error when reading tokens: %s", line.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    s = Utf8ToUtf32(sym);
    if (s.size() != 1) {
      // for tokens.txt from coqui-ai/TTS, the last token is <BLNK>
      if (s.size() == 6 && s[0] == '<' && s[1] == 'B' && s[2] == 'L' &&
          s[3] == 'N' && s[4] == 'K' && s[5] == '>') {
        continue;
      }

      SHERPA_ONNX_LOGE("Error when reading tokens at Line %s. size: %d",
                       line.c_str(), static_cast<int32_t>(s.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    char32_t c = s[0];

    if (token2id.count(c)) {
      SHERPA_ONNX_LOGE("Duplicated token %s. Line %s. Existing ID: %d",
                       sym.c_str(), line.c_str(), token2id.at(c));
      SHERPA_ONNX_EXIT(-1);
    }

    token2id.insert({c, id});
  }

  return token2id;
}

// see the function "phonemes_to_ids" from
// https://github.com/rhasspy/piper/blob/master/notebooks/piper_inference_(ONNX).ipynb
static std::vector<int64_t> PiperPhonemesToIdsVits(
    const std::unordered_map<char32_t, int32_t> &token2id,
    const std::vector<piper::Phoneme> &phonemes) {
  // see
  // https://github.com/rhasspy/piper-phonemize/blob/master/src/phoneme_ids.hpp#L17
  int32_t pad = token2id.at(U'_');
  int32_t bos = token2id.at(U'^');
  int32_t eos = token2id.at(U'$');

  std::vector<int64_t> ans;
  ans.reserve(phonemes.size());

  ans.push_back(bos);
  for (auto p : phonemes) {
    if (token2id.count(p)) {
      ans.push_back(token2id.at(p));
      ans.push_back(pad);
    } else {
      SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
                       static_cast<uint32_t>(p));
    }
  }
  ans.push_back(eos);

  return ans;
}

static std::vector<std::vector<int64_t>> PiperPhonemesToIdsMatcha(
    const std::unordered_map<char32_t, int32_t> &token2id,
    const std::vector<piper::Phoneme> &phonemes, bool use_eos_bos,
    int32_t max_token_len = 400) {
  // We set max_token_len to 400 here to fix
  // https://github.com/k2-fsa/sherpa-onnx/issues/2666
  std::vector<std::vector<int64_t>> ans;
  std::vector<int64_t> current;

  int32_t bos = token2id.at(U'^');
  int32_t eos = token2id.at(U'$');

  if (use_eos_bos) {
    current.push_back(bos);
  }

  for (auto p : phonemes) {
    if (token2id.count(p)) {
      current.push_back(token2id.at(p));
    } else {
      SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
                       static_cast<uint32_t>(p));
    }

    if (current.size() > max_token_len + 1) {
      if (use_eos_bos) {
        current.push_back(eos);
      }

      ans.push_back(std::move(current));

      if (use_eos_bos) {
        current.push_back(bos);
      }
    }
  }  // for (auto p : phonemes)

  if (!current.empty()) {
    if (use_eos_bos) {
      if (current.size() > 1) {
        current.push_back(eos);

        ans.push_back(std::move(current));
      }
    } else {
      ans.push_back(std::move(current));
    }
  }

  return ans;
}

static std::vector<std::vector<int64_t>> PiperPhonemesToIdsKokoroOrKitten(
    const std::unordered_map<char32_t, int32_t> &token2id,
    const std::vector<piper::Phoneme> &phonemes, int32_t max_len) {
  std::vector<std::vector<int64_t>> ans;

  std::vector<int64_t> current;
  current.reserve(phonemes.size());

  current.push_back(0);

  for (auto p : phonemes) {
    // SHERPA_ONNX_LOGE("%d %s", static_cast<int32_t>(p), ToString(p).c_str());
    if (token2id.count(p)) {
      if (current.size() > max_len - 1) {
        current.push_back(0);
        ans.push_back(std::move(current));

        current.reserve(phonemes.size());
        current.push_back(0);
      }

      current.push_back(token2id.at(p));
      if (p == '.') {
        current.push_back(token2id.at(' '));
      }
    } else {
      SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
                       static_cast<uint32_t>(p));
    }
  }

  current.push_back(0);
  ans.push_back(std::move(current));
  return ans;
}

static std::vector<int64_t> CoquiPhonemesToIds(
    const std::unordered_map<char32_t, int32_t> &token2id,
    const std::vector<piper::Phoneme> &phonemes,
    const OfflineTtsVitsModelMetaData &vits_meta_data) {
  // see
  // https://github.com/coqui-ai/TTS/blob/dev/TTS/tts/utils/text/tokenizer.py#L87
  int32_t use_eos_bos = vits_meta_data.use_eos_bos;
  int32_t bos_id = vits_meta_data.bos_id;
  int32_t eos_id = vits_meta_data.eos_id;
  int32_t blank_id = vits_meta_data.blank_id;
  int32_t add_blank = vits_meta_data.add_blank;
  int32_t comma_id = token2id.at(',');

  std::vector<int64_t> ans;
  if (add_blank) {
    ans.reserve(phonemes.size() * 2 + 3);
  } else {
    ans.reserve(phonemes.size() + 2);
  }

  if (use_eos_bos) {
    ans.push_back(bos_id);
  }

  if (add_blank) {
    ans.push_back(blank_id);

    for (auto p : phonemes) {
      if (token2id.count(p)) {
        ans.push_back(token2id.at(p));
        ans.push_back(blank_id);
      } else {
        SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
                         static_cast<uint32_t>(p));
      }
    }
  } else {
    // not adding blank
    for (auto p : phonemes) {
      if (token2id.count(p)) {
        ans.push_back(token2id.at(p));
      } else {
        SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
                         static_cast<uint32_t>(p));
      }
    }
  }

  // add a comma at the end of a sentence so that we can have a longer pause.
  ans.push_back(comma_id);

  if (use_eos_bos) {
    ans.push_back(eos_id);
  }

  return ans;
}

void InitEspeak(const std::string &data_dir) {
  static std::once_flag init_flag;
  std::call_once(init_flag, [data_dir]() {
#if __ANDROID_API__ >= 9 || defined(__OHOS__)
    if (data_dir[0] != '/') {
      SHERPA_ONNX_LOGE(
          "You need to follow our examples to copy the espeak-ng-data "
          "directory from the assets folder to an external storage directory.");

      SHERPA_ONNX_LOGE(
          "Hint: Please see\n"
          "https://github.com/k2-fsa/sherpa-onnx/blob/master/android/"
          "SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/"
          "engine/TtsEngine.kt#L188\n"
          "The function copyDataDir()\n");
    }
#endif

    int32_t result =
        espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, data_dir.c_str(), 0);
    if (result != 22050) {
      SHERPA_ONNX_LOGE(
          "Failed to initialize espeak-ng with data dir: %s. Return code is: "
          "%d",
          data_dir.c_str(), result);
      SHERPA_ONNX_EXIT(-1);
    }
  });
}

PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    const std::string &tokens, const std::string &data_dir,
    const OfflineTtsVitsModelMetaData &vits_meta_data)
    : vits_meta_data_(vits_meta_data) {
  {
    std::ifstream is(tokens);
    token2id_ = ReadTokens(is);
  }

  InitEspeak(data_dir);
}

template <typename Manager>
PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    Manager *mgr, const std::string &tokens, const std::string &data_dir,
    const OfflineTtsVitsModelMetaData &vits_meta_data)
    : vits_meta_data_(vits_meta_data) {
  {
    auto buf = ReadFile(mgr, tokens);
    std::istringstream is(std::string(buf.data(), buf.size()));
    token2id_ = ReadTokens(is);
  }

  // We should copy the directory of espeak-ng-data from the asset to
  // some internal or external storage and then pass the directory to
  // data_dir.
  InitEspeak(data_dir);
}

PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    const std::string &tokens, const std::string &data_dir,
    const OfflineTtsMatchaModelMetaData &matcha_meta_data)
    : matcha_meta_data_(matcha_meta_data), is_matcha_(true) {
  {
    std::ifstream is(tokens);
    token2id_ = ReadTokens(is);
  }

  InitEspeak(data_dir);
}

PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    const std::string &tokens, const std::string &data_dir,
    const OfflineTtsKokoroModelMetaData &kokoro_meta_data)
    : kokoro_meta_data_(kokoro_meta_data), is_kokoro_(true) {
  {
    std::ifstream is(tokens);
    token2id_ = ReadTokens(is);
  }

  InitEspeak(data_dir);
}

PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    const std::string &tokens, const std::string &data_dir,
    const OfflineTtsKittenModelMetaData &kitten_meta_data)
    : kitten_meta_data_(kitten_meta_data), is_kitten_(true) {
  {
    std::ifstream is(tokens);
    token2id_ = ReadTokens(is);
  }

  InitEspeak(data_dir);
}

template <typename Manager>
PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    Manager *mgr, const std::string &tokens, const std::string &data_dir,
    const OfflineTtsMatchaModelMetaData &matcha_meta_data)
    : matcha_meta_data_(matcha_meta_data), is_matcha_(true) {
  {
    auto buf = ReadFile(mgr, tokens);
    std::istringstream is(std::string(buf.data(), buf.size()));
    token2id_ = ReadTokens(is);
  }

  // We should copy the directory of espeak-ng-data from the asset to
  // some internal or external storage and then pass the directory to
  // data_dir.
  InitEspeak(data_dir);
}

template <typename Manager>
PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    Manager *mgr, const std::string &tokens, const std::string &data_dir,
    const OfflineTtsKokoroModelMetaData &kokoro_meta_data)
    : kokoro_meta_data_(kokoro_meta_data), is_kokoro_(true) {
  {
    auto buf = ReadFile(mgr, tokens);
    std::istringstream is(std::string(buf.data(), buf.size()));
    token2id_ = ReadTokens(is);
  }

  // We should copy the directory of espeak-ng-data from the asset to
  // some internal or external storage and then pass the directory to
  // data_dir.
  InitEspeak(data_dir);
}

template <typename Manager>
PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    Manager *mgr, const std::string &tokens, const std::string &data_dir,
    const OfflineTtsKittenModelMetaData &kitten_meta_data)
    : kitten_meta_data_(kitten_meta_data), is_kitten_(true) {
  {
    auto buf = ReadFile(mgr, tokens);
    std::istringstream is(std::string(buf.data(), buf.size()));
    token2id_ = ReadTokens(is);
  }

  // We should copy the directory of espeak-ng-data from the asset to
  // some internal or external storage and then pass the directory to
  // data_dir.
  InitEspeak(data_dir);
}

std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds(
    const std::string &text, const std::string &voice /*= ""*/) const {
  if (is_matcha_) {
    return ConvertTextToTokenIdsMatcha(text, voice);
  } else if (is_kokoro_) {
    return ConvertTextToTokenIdsKokoroOrKitten(
        token2id_, kokoro_meta_data_.max_token_len, text, voice);
  } else if (is_kitten_) {
    return ConvertTextToTokenIdsKokoroOrKitten(
        token2id_, kitten_meta_data_.max_token_len, text, voice);
  } else {
    return ConvertTextToTokenIdsVits(text, voice);
  }
}

std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsMatcha(
    const std::string &text, const std::string &voice /*= ""*/) const {
  piper::eSpeakPhonemeConfig config;

  // ./bin/espeak-ng-bin --path  ./install/share/espeak-ng-data/ --voices
  // to list available voices
  config.voice = voice;  // e.g., voice is en-us

  std::vector<std::vector<piper::Phoneme>> phonemes;

  CallPhonemizeEspeak(text, config, &phonemes);

  std::vector<TokenIDs> ans;

  for (const auto &p : phonemes) {
    auto phoneme_ids =
        PiperPhonemesToIdsMatcha(token2id_, p, matcha_meta_data_.use_eos_bos);

    for (auto &ids : phoneme_ids) {
      ans.emplace_back(std::move(ids));
    }
  }

  return ans;
}

std::vector<TokenIDs> ConvertTextToTokenIdsKokoroOrKitten(
    const std::unordered_map<char32_t, int32_t> &token2id,
    int32_t max_token_len, const std::string &text,
    const std::string &voice /*= ""*/) {
  piper::eSpeakPhonemeConfig config;

  // ./bin/espeak-ng-bin --path  ./install/share/espeak-ng-data/ --voices
  // to list available voices
  config.voice = voice;  // e.g., voice is en-us

  std::vector<std::vector<piper::Phoneme>> phonemes;

  CallPhonemizeEspeak(text, config, &phonemes);

  std::vector<TokenIDs> ans;

  for (const auto &p : phonemes) {
    auto phoneme_ids =
        PiperPhonemesToIdsKokoroOrKitten(token2id, p, max_token_len);

    for (auto &ids : phoneme_ids) {
      ans.emplace_back(std::move(ids));
    }
  }

  return ans;
}

std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsVits(
    const std::string &text, const std::string &voice /*= ""*/) const {
  piper::eSpeakPhonemeConfig config;

  // ./bin/espeak-ng-bin --path  ./install/share/espeak-ng-data/ --voices
  // to list available voices
  config.voice = voice;  // e.g., voice is en-us

  std::vector<std::vector<piper::Phoneme>> phonemes;

  CallPhonemizeEspeak(text, config, &phonemes);

  std::vector<TokenIDs> ans;

  std::vector<int64_t> phoneme_ids;

  if (vits_meta_data_.is_piper || vits_meta_data_.is_icefall) {
    for (const auto &p : phonemes) {
      phoneme_ids = PiperPhonemesToIdsVits(token2id_, p);
      ans.emplace_back(std::move(phoneme_ids));
    }
  } else if (vits_meta_data_.is_coqui) {
    for (const auto &p : phonemes) {
      phoneme_ids = CoquiPhonemesToIds(token2id_, p, vits_meta_data_);
      ans.emplace_back(std::move(phoneme_ids));
    }

  } else {
    SHERPA_ONNX_LOGE("Unsupported model");
    SHERPA_ONNX_EXIT(-1);
  }

  return ans;
}

#if __ANDROID_API__ >= 9
template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
    const OfflineTtsVitsModelMetaData &vits_meta_data);

template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
    const OfflineTtsMatchaModelMetaData &matcha_meta_data);

template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
    const OfflineTtsKokoroModelMetaData &kokoro_meta_data);

template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
    const OfflineTtsKittenModelMetaData &kokoro_meta_data);
#endif

#if __OHOS__
template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    NativeResourceManager *mgr, const std::string &tokens,
    const std::string &data_dir,
    const OfflineTtsVitsModelMetaData &vits_meta_data);

template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    NativeResourceManager *mgr, const std::string &tokens,
    const std::string &data_dir,
    const OfflineTtsMatchaModelMetaData &matcha_meta_data);

template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    NativeResourceManager *mgr, const std::string &tokens,
    const std::string &data_dir,
    const OfflineTtsKokoroModelMetaData &kokoro_meta_data);

template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
    NativeResourceManager *mgr, const std::string &tokens,
    const std::string &data_dir,
    const OfflineTtsKittenModelMetaData &kokoro_meta_data);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/piper-phonemize-lexicon.h
================================================
// sherpa-onnx/csrc/piper-phonemize-lexicon.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_PIPER_PHONEMIZE_LEXICON_H_
#define SHERPA_ONNX_CSRC_PIPER_PHONEMIZE_LEXICON_H_

#include <string>
#include <unordered_map>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-kitten-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h"
#include "sherpa-onnx/csrc/offline-tts-vits-model-meta-data.h"

namespace sherpa_onnx {

class PiperPhonemizeLexicon : public OfflineTtsFrontend {
 public:
  PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
                        const OfflineTtsVitsModelMetaData &vits_meta_data);

  PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
                        const OfflineTtsMatchaModelMetaData &matcha_meta_data);

  PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
                        const OfflineTtsKokoroModelMetaData &kokoro_meta_data);

  PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
                        const OfflineTtsKittenModelMetaData &kitten_meta_data);

  template <typename Manager>
  PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
                        const std::string &data_dir,
                        const OfflineTtsVitsModelMetaData &vits_meta_data);

  template <typename Manager>
  PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
                        const std::string &data_dir,
                        const OfflineTtsMatchaModelMetaData &matcha_meta_data);

  template <typename Manager>
  PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
                        const std::string &data_dir,
                        const OfflineTtsKokoroModelMetaData &kokoro_meta_data);

  template <typename Manager>
  PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
                        const std::string &data_dir,
                        const OfflineTtsKittenModelMetaData &kitten_meta_data);

  std::vector<TokenIDs> ConvertTextToTokenIds(
      const std::string &text, const std::string &voice = "") const override;

 private:
  std::vector<TokenIDs> ConvertTextToTokenIdsVits(
      const std::string &text, const std::string &voice = "") const;

  std::vector<TokenIDs> ConvertTextToTokenIdsMatcha(
      const std::string &text, const std::string &voice = "") const;

 private:
  // map unicode codepoint to an integer ID
  std::unordered_map<char32_t, int32_t> token2id_;
  OfflineTtsVitsModelMetaData vits_meta_data_;
  OfflineTtsMatchaModelMetaData matcha_meta_data_;
  OfflineTtsKokoroModelMetaData kokoro_meta_data_;
  OfflineTtsKittenModelMetaData kitten_meta_data_;
  bool is_matcha_ = false;
  bool is_kokoro_ = false;
  bool is_kitten_ = false;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_PIPER_PHONEMIZE_LEXICON_H_


================================================
FILE: sherpa-onnx/csrc/piper-phonemize-test.cc
================================================
// sherpa-onnx/csrc/piper-phonemize-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include <iostream>
#include <map>
#include <string>
#include <vector>

#include "espeak-ng/speak_lib.h"
#include "gtest/gtest.h"
#include "phoneme_ids.hpp"  // NOLINT
#include "phonemize.hpp"    // NOLINT
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

TEST(PiperPhonemize, Case1) {
  std::string data_dir = "./install/share/espeak-ng-data";
  if (!FileExists(data_dir + "/en_dict")) {
    SHERPA_ONNX_LOGE("%s/en_dict does not exist. Skipping test",
                     data_dir.c_str());
    return;
  }

  if (!FileExists(data_dir + "/phontab")) {
    SHERPA_ONNX_LOGE("%s/phontab does not exist. Skipping test",
                     data_dir.c_str());
    return;
  }

  if (!FileExists(data_dir + "/phonindex")) {
    SHERPA_ONNX_LOGE("%s/phonindex does not exist. Skipping test",
                     data_dir.c_str());
    return;
  }

  if (!FileExists(data_dir + "/phondata")) {
    SHERPA_ONNX_LOGE("%s/phondata does not exist. Skipping test",
                     data_dir.c_str());
    return;
  }

  if (!FileExists(data_dir + "/intonations")) {
    SHERPA_ONNX_LOGE("%s/intonations does not exist. Skipping test",
                     data_dir.c_str());
    return;
  }
  int32_t result =
      espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, data_dir.c_str(), 0);
  EXPECT_EQ(result, 22050);

  piper::eSpeakPhonemeConfig config;

  // ./bin/espeak-ng-bin --path  ./install/share/espeak-ng-data/ --voices
  // to list available voices
  config.voice = "en-us";

  std::vector<std::vector<piper::Phoneme>> phonemes;
  std::string text = "how are you doing?";
  piper::phonemize_eSpeak(text, config, phonemes);

  for (int32_t p : phonemes[0]) {
    std::cout << p << " ";
  }
  std::cout << "\n";

  std::vector<piper::PhonemeId> phoneme_ids;
  std::map<piper::Phoneme, std::size_t> missing_phonemes;

  {
    piper::PhonemeIdConfig config;
    phonemes_to_ids(phonemes[0], config, phoneme_ids, missing_phonemes);
  }

  for (int32_t p : phoneme_ids) {
    std::cout << p << " ";
  }
  std::cout << "\n";
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/provider-config.cc
================================================
// sherpa-onnx/csrc/provider-config.cc
//
// Copyright (c)  2024  Uniphore (Author: Manickavela)

#include "sherpa-onnx/csrc/provider-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void CudaConfig::Register(ParseOptions *po) {
  po->Register("cuda-cudnn-conv-algo-search", &cudnn_conv_algo_search,
               "CuDNN convolution algrorithm search");
}

bool CudaConfig::Validate() const {
  if (cudnn_conv_algo_search < 1 || cudnn_conv_algo_search > 3) {
    SHERPA_ONNX_LOGE(
        "cudnn_conv_algo_search: '%d' is not a valid option."
        "Options : [1,3]. Check OnnxRT docs",
        cudnn_conv_algo_search);
    return false;
  }
  return true;
}

std::string CudaConfig::ToString() const {
  std::ostringstream os;

  os << "CudaConfig(";
  os << "cudnn_conv_algo_search=" << cudnn_conv_algo_search << ")";

  return os.str();
}

void TensorrtConfig::Register(ParseOptions *po) {
  po->Register("trt-max-workspace-size", &trt_max_workspace_size,
               "Set TensorRT EP GPU memory usage limit.");
  po->Register("trt-max-partition-iterations", &trt_max_partition_iterations,
               "Limit partitioning iterations for model conversion.");
  po->Register("trt-min-subgraph-size", &trt_min_subgraph_size,
               "Set minimum size for subgraphs in partitioning.");
  po->Register("trt-fp16-enable", &trt_fp16_enable,
               "Enable FP16 precision for faster performance.");
  po->Register("trt-detailed-build-log", &trt_detailed_build_log,
               "Enable detailed logging of build steps.");
  po->Register("trt-engine-cache-enable", &trt_engine_cache_enable,
               "Enable caching of TensorRT engines.");
  po->Register("trt-timing-cache-enable", &trt_timing_cache_enable,
               "Enable use of timing cache to speed up builds.");
  po->Register("trt-engine-cache-path", &trt_engine_cache_path,
               "Set path to store cached TensorRT engines.");
  po->Register("trt-timing-cache-path", &trt_timing_cache_path,
               "Set path for storing timing cache.");
  po->Register("trt-dump-subgraphs", &trt_dump_subgraphs,
               "Dump optimized subgraphs for debugging.");
}

bool TensorrtConfig::Validate() const {
  if (trt_max_workspace_size < 0) {
    std::ostringstream os;
    os << "trt_max_workspace_size: " << trt_max_workspace_size
       << " is not valid.";
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
    return false;
  }
  if (trt_max_partition_iterations < 0) {
    SHERPA_ONNX_LOGE("trt_max_partition_iterations: %d is not valid.",
                     trt_max_partition_iterations);
    return false;
  }
  if (trt_min_subgraph_size < 0) {
    SHERPA_ONNX_LOGE("trt_min_subgraph_size: %d is not valid.",
                     trt_min_subgraph_size);
    return false;
  }

  return true;
}

std::string TensorrtConfig::ToString() const {
  std::ostringstream os;

  os << "TensorrtConfig(";
  os << "trt_max_workspace_size=" << trt_max_workspace_size << ", ";
  os << "trt_max_partition_iterations=" << trt_max_partition_iterations << ", ";
  os << "trt_min_subgraph_size=" << trt_min_subgraph_size << ", ";
  os << "trt_fp16_enable=\"" << (trt_fp16_enable ? "True" : "False") << "\", ";
  os << "trt_detailed_build_log=\""
     << (trt_detailed_build_log ? "True" : "False") << "\", ";
  os << "trt_engine_cache_enable=\""
     << (trt_engine_cache_enable ? "True" : "False") << "\", ";
  os << "trt_engine_cache_path=\"" << trt_engine_cache_path.c_str() << "\", ";
  os << "trt_timing_cache_enable=\""
     << (trt_timing_cache_enable ? "True" : "False") << "\", ";
  os << "trt_timing_cache_path=\"" << trt_timing_cache_path.c_str() << "\",";
  os << "trt_dump_subgraphs=\"" << (trt_dump_subgraphs ? "True" : "False")
     << "\" )";
  return os.str();
}

void ProviderConfig::Register(ParseOptions *po) {
  cuda_config.Register(po);
  trt_config.Register(po);

  po->Register("device", &device, "GPU device index for CUDA and Trt EP");
  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool ProviderConfig::Validate() const {
  if (device < 0) {
    SHERPA_ONNX_LOGE("device: '%d' is invalid.", device);
    return false;
  }

  if (provider == "cuda" && !cuda_config.Validate()) {
    return false;
  }

  if (provider == "trt" && !trt_config.Validate()) {
    return false;
  }

  return true;
}

std::string ProviderConfig::ToString() const {
  std::ostringstream os;

  os << "ProviderConfig(";
  os << "device=" << device << ", ";
  os << "provider=\"" << provider << "\", ";
  os << "cuda_config=" << cuda_config.ToString() << ", ";
  os << "trt_config=" << trt_config.ToString() << ")";
  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/provider-config.h
================================================
// sherpa-onnx/csrc/provider-config.h
//
// Copyright (c)  2024  Uniphore (Author: Manickavela)

#ifndef SHERPA_ONNX_CSRC_PROVIDER_CONFIG_H_
#define SHERPA_ONNX_CSRC_PROVIDER_CONFIG_H_

#include <string>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct CudaConfig {
  int32_t cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;

  CudaConfig() = default;
  explicit CudaConfig(int32_t cudnn_conv_algo_search)
      : cudnn_conv_algo_search(cudnn_conv_algo_search) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

struct TensorrtConfig {
  int64_t trt_max_workspace_size = 2147483647;
  int32_t trt_max_partition_iterations = 10;
  int32_t trt_min_subgraph_size = 5;
  bool trt_fp16_enable = true;
  bool trt_detailed_build_log = false;
  bool trt_engine_cache_enable = true;
  bool trt_timing_cache_enable = true;
  std::string trt_engine_cache_path = ".";
  std::string trt_timing_cache_path = ".";
  bool trt_dump_subgraphs = false;

  TensorrtConfig() = default;
  TensorrtConfig(int64_t trt_max_workspace_size,
                 int32_t trt_max_partition_iterations,
                 int32_t trt_min_subgraph_size, bool trt_fp16_enable,
                 bool trt_detailed_build_log, bool trt_engine_cache_enable,
                 bool trt_timing_cache_enable,
                 const std::string &trt_engine_cache_path,
                 const std::string &trt_timing_cache_path,
                 bool trt_dump_subgraphs)
      : trt_max_workspace_size(trt_max_workspace_size),
        trt_max_partition_iterations(trt_max_partition_iterations),
        trt_min_subgraph_size(trt_min_subgraph_size),
        trt_fp16_enable(trt_fp16_enable),
        trt_detailed_build_log(trt_detailed_build_log),
        trt_engine_cache_enable(trt_engine_cache_enable),
        trt_timing_cache_enable(trt_timing_cache_enable),
        trt_engine_cache_path(trt_engine_cache_path),
        trt_timing_cache_path(trt_timing_cache_path),
        trt_dump_subgraphs(trt_dump_subgraphs) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

struct ProviderConfig {
  TensorrtConfig trt_config;
  CudaConfig cuda_config;
  std::string provider = "cpu";
  int32_t device = 0;
  // device only used for cuda and trt

  ProviderConfig() = default;
  ProviderConfig(const std::string &provider, int32_t device)
      : provider(provider), device(device) {}
  ProviderConfig(const TensorrtConfig &trt_config,
                 const CudaConfig &cuda_config, const std::string &provider,
                 int32_t device)
      : trt_config(trt_config),
        cuda_config(cuda_config),
        provider(provider),
        device(device) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_PROVIDER_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/provider.cc
================================================
// sherpa-onnx/csrc/provider.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/provider.h"

#include <algorithm>
#include <cctype>
#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

Provider StringToProvider(std::string s) {
  std::transform(s.cbegin(), s.cend(), s.begin(),
                 [](unsigned char c) { return std::tolower(c); });
  if (s == "cpu") {
    return Provider::kCPU;
  } else if (s == "cuda") {
    return Provider::kCUDA;
  } else if (s == "coreml") {
    return Provider::kCoreML;
  } else if (s == "xnnpack") {
    return Provider::kXnnpack;
  } else if (s == "nnapi") {
    return Provider::kNNAPI;
  } else if (s == "trt") {
    return Provider::kTRT;
  } else if (s == "directml") {
    return Provider::kDirectML;
  } else if (s == "spacemit") {
    return Provider::kSpacemiT;
  } else {
    SHERPA_ONNX_LOGE("Unsupported string: %s. Fallback to cpu", s.c_str());
    return Provider::kCPU;
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/provider.h
================================================
// sherpa-onnx/csrc/provider.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_PROVIDER_H_
#define SHERPA_ONNX_CSRC_PROVIDER_H_

#include <string>

#include "sherpa-onnx/csrc/provider-config.h"
namespace sherpa_onnx {

// Please refer to
// https://github.com/microsoft/onnxruntime/blob/main/java/src/main/java/ai/onnxruntime/OrtProvider.java
// for a list of available providers
enum class Provider {
  kCPU = 0,       // CPUExecutionProvider
  kCUDA = 1,      // CUDAExecutionProvider
  kCoreML = 2,    // CoreMLExecutionProvider
  kXnnpack = 3,   // XnnpackExecutionProvider
  kNNAPI = 4,     // NnapiExecutionProvider
  kTRT = 5,       // TensorRTExecutionProvider
  kDirectML = 6,  // DmlExecutionProvider
  kSpacemiT = 7,  // SpacemiTExecutionProvider
};

/**
 * Convert a string to an enum.
 *
 * @param s We will convert it to lowercase before comparing.
 * @return Return an instance of Provider.
 */
Provider StringToProvider(std::string s);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_PROVIDER_H_


================================================
FILE: sherpa-onnx/csrc/qnn/macros.h
================================================
// sherpa-onnx/csrc/qnn/macros.h
//
// Copyright      2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_QNN_MACROS_H_
#define SHERPA_ONNX_CSRC_QNN_MACROS_H_

#include "sherpa-onnx/csrc/macros.h"

#define SHERPA_ONNX_QNN_CHECK(ret, msg, ...)                             \
  do {                                                                   \
    if (ret != QNN_SUCCESS) {                                            \
      SHERPA_ONNX_LOGE("Return code is: %d", static_cast<int32_t>(ret)); \
      SHERPA_ONNX_LOGE(msg, ##__VA_ARGS__);                              \
      SHERPA_ONNX_EXIT(-1);                                              \
    }                                                                    \
  } while (0)

#endif  // SHERPA_ONNX_CSRC_QNN_MACROS_H_


================================================
FILE: sherpa-onnx/csrc/qnn/offline-paraformer-model-qnn.cc
================================================
// sherpa-onnx/csrc/qnn/offline-paraformer-model-qnn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/qnn/offline-paraformer-model-qnn.h"

#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/qnn/macros.h"
#include "sherpa-onnx/csrc/qnn/qnn-backend.h"
#include "sherpa-onnx/csrc/qnn/qnn-model.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineParaformerModelQnn::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    std::vector<std::string> filenames;
    SplitStringToVector(config_.paraformer.model, ",", true, &filenames);
    if (!filenames.empty()) {
      if (filenames.size() != 3) {
        SHERPA_ONNX_LOGE("Invalid Paraformer QNN model '%s'",
                         config_.paraformer.model.c_str());
        SHERPA_ONNX_EXIT(-1);
      }
    }

    std::vector<std::string> binary_filenames;
    SplitStringToVector(config_.paraformer.qnn_config.context_binary, ",", true,
                        &binary_filenames);
    if (!binary_filenames.empty()) {
      if (binary_filenames.size() != 3) {
        SHERPA_ONNX_LOGE(
            "There should be 3 files for Paraformer context binary. Actual: "
            "%d. '%s'",
            static_cast<int32_t>(binary_filenames.size()),
            config_.paraformer.qnn_config.context_binary.c_str());
        SHERPA_ONNX_EXIT(-1);
      }
    }

    if (filenames.empty() && binary_filenames.empty()) {
      SHERPA_ONNX_LOGE(
          "You need to provide either a model or a context binary for "
          "Paraformer with QNN");
      SHERPA_ONNX_EXIT(-1);
    }

    bool ok = InitEncoder(filenames.empty() ? "" : filenames[0],
                          binary_filenames.empty() ? "" : binary_filenames[0]);
    if (!ok) {
      SHERPA_ONNX_LOGE(
          "Failed to init encoder with lib file '%s', context binary: '%s'",
          filenames.empty() ? "" : filenames[0].c_str(),
          binary_filenames.empty() ? "" : binary_filenames[0].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    ok = InitPredictor(filenames.empty() ? "" : filenames[1],
                       binary_filenames.empty() ? "" : binary_filenames[1]);
    if (!ok) {
      SHERPA_ONNX_LOGE(
          "Failed to init predictor with lib file '%s', context binary: '%s'",
          filenames.empty() ? "" : filenames[1].c_str(),
          binary_filenames.empty() ? "" : binary_filenames[1].c_str());
      return;
    }

    ok = InitDecoder(filenames.empty() ? "" : filenames[2],
                     binary_filenames.empty() ? "" : binary_filenames[2]);
    if (!ok) {
      SHERPA_ONNX_LOGE(
          "Failed to init decoder with lib file '%s', context binary: '%s'",
          filenames.empty() ? "" : filenames[2].c_str(),
          binary_filenames.empty() ? "" : binary_filenames[2].c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) {
    SHERPA_ONNX_LOGE(
        "Please copy all files from assets to SD card and set assetManager to "
        "null");
    SHERPA_ONNX_EXIT(-1);
  }

  std::vector<float> Run(std::vector<float> features) {
    std::lock_guard<std::mutex> lock(mutex_);

    std::vector<float> encoder_out = RunEncoder(std::move(features));
    std::vector<float> transposed_encoder_out =
        Transpose(encoder_out.data(), encoder_out_dim1_, encoder_out_dim2_);

    std::vector<float> alphas = RunPredictor(transposed_encoder_out);

    std::vector<float> acoustic_embedding =
        ComputeAcousticEmbedding(encoder_out, alphas, encoder_out_dim2_);

    int32_t num_tokens = acoustic_embedding.size() / encoder_out_dim2_;

    acoustic_embedding.resize(encoder_out.size());

    std::vector<float> transposed_acoustic_embedding = Transpose(
        acoustic_embedding.data(), encoder_out_dim1_, encoder_out_dim2_);

    std::vector<float> decoder_out = RunDecoder(
        transposed_encoder_out, transposed_acoustic_embedding, num_tokens);

    decoder_out = Transpose(decoder_out.data(), vocab_size_, encoder_out_dim1_);
    decoder_out.resize(num_tokens * vocab_size_);
    return decoder_out;
  }

  int32_t VocabSize() const { return vocab_size_; }

 private:
  std::vector<float> RunEncoder(std::vector<float> features) const {
    features = ApplyLFR(std::move(features));
    if (features.empty()) {
      return {};
    }

    encoder_model_->SetInputTensorData("x", features.data(), features.size());
    encoder_model_->Run();
    return encoder_model_->GetOutputTensorData("encoder_out");
  }

  std::vector<float> RunPredictor(
      const std::vector<float> &transposed_encoder_out) const {
    predictor_model_->SetInputTensorData("encoder_out",
                                         transposed_encoder_out.data(),
                                         transposed_encoder_out.size());
    predictor_model_->Run();
    return predictor_model_->GetOutputTensorData("alphas");
  }

  std::vector<float> RunDecoder(
      const std::vector<float> &transposed_encoder_out,
      const std::vector<float> &transposed_acoustic_embedding,
      int32_t num_tokens) const {
    std::vector<int32_t> mask(encoder_out_dim1_, 1);
    std::fill(mask.begin() + num_tokens, mask.end(), 0);

    decoder_model_->SetInputTensorData("encoder_out",
                                       transposed_encoder_out.data(),
                                       transposed_encoder_out.size());

    decoder_model_->SetInputTensorData("acoustic_embedding",
                                       transposed_acoustic_embedding.data(),
                                       transposed_acoustic_embedding.size());

    decoder_model_->SetInputTensorData("mask", mask.data(), mask.size());

    decoder_model_->Run();

    return decoder_model_->GetOutputTensorData("decoder_out");
  }

  std::vector<float> ApplyLFR(std::vector<float> in) const {
    int32_t lfr_window_size = 7;
    int32_t lfr_window_shift = 6;
    int32_t in_feat_dim = 80;

    int32_t in_num_frames = in.size() / in_feat_dim;
    if (in_num_frames < lfr_window_size) {
      return {};
    }

    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;

    if (out_num_frames > num_input_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          out_num_frames, num_input_frames_);

      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios.");

      out_num_frames = num_input_frames_;
    }

    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    std::vector<float> out(num_input_frames_ * out_feat_dim);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

  bool InitEncoder(const std::string &lib_filename,
                   const std::string &context_binary) {
    encoder_backend_ = std::make_unique<QnnBackend>(
        config_.paraformer.qnn_config.backend_lib, config_.debug);

    if (context_binary.empty()) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init from encoder model lib '%s' since context binary is not "
            "given.",
            lib_filename.c_str());
      }

      InitEncoderFromModelLib(lib_filename);

      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Skip generating encoder context binary since you don't provide a "
            "path to save it");
      }
    } else if (!FileExists(context_binary)) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init encoder from model lib '%s' since context binary '%s' does "
            "not exist",
            lib_filename.c_str(), context_binary.c_str());
      }

      InitEncoderFromModelLib(lib_filename);

      CreateContextBinary(encoder_model_.get(), context_binary);
    } else {
      if (config_.debug) {
        SHERPA_ONNX_LOGE("Init from encoder context binary '%s'",
                         context_binary.c_str());
      }
      InitEncoderFromContextBinary(context_binary);
    }

    PostInitEncoder();

    return true;
  }

  bool InitPredictor(const std::string &lib_filename,
                     const std::string &context_binary) {
    predictor_backend_ = std::make_unique<QnnBackend>(
        config_.paraformer.qnn_config.backend_lib, config_.debug);

    if (context_binary.empty()) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init from predictor model lib '%s' since context binary is not "
            "given.",
            lib_filename.c_str());
      }

      InitPredictorFromModelLib(lib_filename);

      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Skip generating predictor context binary since you don't provide "
            "a path to save it");
      }
    } else if (!FileExists(context_binary)) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init predictor from model lib '%s' since context binary '%s' does "
            "not exist",
            lib_filename.c_str(), context_binary.c_str());
      }

      InitPredictorFromModelLib(lib_filename);
      CreateContextBinary(predictor_model_.get(), context_binary);
    } else {
      if (config_.debug) {
        SHERPA_ONNX_LOGE("Init from predictor context binary '%s'",
                         context_binary.c_str());
      }
      InitPredictorFromContextBinary(context_binary);
    }

    PostInitPredictor();

    return true;
  }

  bool InitDecoder(const std::string &lib_filename,
                   const std::string &context_binary) {
    decoder_backend_ = std::make_unique<QnnBackend>(
        config_.paraformer.qnn_config.backend_lib, config_.debug);

    if (context_binary.empty()) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init from decoder model lib since context binary is not given");
      }

      InitDecoderFromModelLib(lib_filename);

      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Skip generating decoder context binary since you don't provide "
            "a path to save it");
      }
    } else if (!FileExists(context_binary)) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init decoder from model lib since context binary '%s' does not "
            "exist",
            context_binary.c_str());
      }

      InitDecoderFromModelLib(lib_filename);
      CreateContextBinary(decoder_model_.get(), context_binary);
    } else {
      if (config_.debug) {
        SHERPA_ONNX_LOGE("Init from decoder context binary '%s'",
                         context_binary.c_str());
      }
      InitDecoderFromContextBinary(context_binary);
    }

    PostInitDecoder();

    return true;
  }

  void InitEncoderFromModelLib(const std::string &lib_filename) {
    encoder_backend_->InitContext();
    encoder_model_ = std::make_unique<QnnModel>(
        lib_filename, encoder_backend_.get(), config_.debug);
  }

  void InitPredictorFromModelLib(const std::string &lib_filename) {
    predictor_backend_->InitContext();
    predictor_model_ = std::make_unique<QnnModel>(
        lib_filename, predictor_backend_.get(), config_.debug);
  }

  void InitDecoderFromModelLib(const std::string &lib_filename) {
    decoder_backend_->InitContext();
    decoder_model_ = std::make_unique<QnnModel>(
        lib_filename, decoder_backend_.get(), config_.debug);
  }

  void CreateContextBinary(QnnModel *model, const std::string &context_binary) {
    if (config_.debug) {
      SHERPA_ONNX_LOGE("Creating context binary '%s'.", context_binary.c_str());
    }

    bool ok = model->SaveBinaryContext(context_binary);

    if (!ok) {
      SHERPA_ONNX_LOGE("Failed to save context binary to '%s'",
                       context_binary.c_str());
    }

    if (config_.debug && ok) {
      SHERPA_ONNX_LOGE("Saved context binary to '%s'.", context_binary.c_str());
      SHERPA_ONNX_LOGE(
          "It should be super fast the next time you init the system.");
      SHERPA_ONNX_LOGE("Remember to also provide libQnnSystem.so.");
    }
  }

  void InitEncoderFromContextBinary(const std::string &context_binary) {
    if (config_.paraformer.qnn_config.system_lib.empty()) {
      SHERPA_ONNX_LOGE(
          "You should provide --paraformer.qnn-system-lib if you also provide "
          "context binary");
      SHERPA_ONNX_EXIT(-1);
    }

    encoder_model_ = std::make_unique<QnnModel>(
        context_binary, config_.paraformer.qnn_config.system_lib,
        encoder_backend_.get(), BinaryContextTag{}, config_.debug);
  }

  void InitPredictorFromContextBinary(const std::string &context_binary) {
    if (config_.paraformer.qnn_config.system_lib.empty()) {
      SHERPA_ONNX_LOGE(
          "You should provide --paraformer.qnn-system-lib if you also provide "
          "context binary");
      SHERPA_ONNX_EXIT(-1);
    }

    predictor_model_ = std::make_unique<QnnModel>(
        context_binary, config_.paraformer.qnn_config.system_lib,
        predictor_backend_.get(), BinaryContextTag{}, config_.debug);
  }

  void InitDecoderFromContextBinary(const std::string &context_binary) {
    if (config_.paraformer.qnn_config.system_lib.empty()) {
      SHERPA_ONNX_LOGE(
          "You should provide --paraformer.qnn-system-lib if you also provide "
          "context binary");
      SHERPA_ONNX_EXIT(-1);
    }

    decoder_model_ = std::make_unique<QnnModel>(
        context_binary, config_.paraformer.qnn_config.system_lib,
        decoder_backend_.get(), BinaryContextTag{}, config_.debug);
  }

  void PostInitEncoder() { CheckEncoderModel(); }

  void PostInitPredictor() { CheckPredictorModel(); }

  void PostInitDecoder() { CheckDecoderModel(); }

  void CheckEncoderModel() {
    const auto &input_tensor_names = encoder_model_->InputTensorNames();
    if (input_tensor_names.size() != 1) {
      SHERPA_ONNX_LOGE("Expect 1 input tensor. Actual %d",
                       static_cast<int32_t>(input_tensor_names.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_tensor_names[0] != "x") {
      SHERPA_ONNX_LOGE("The 1st input should be x, actual '%s'",
                       input_tensor_names[0].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int32_t> x_shape =
        encoder_model_->TensorShape(input_tensor_names[0]);
    if (x_shape.size() != 3) {
      SHERPA_ONNX_LOGE("The 1st input should be 3-d, actual '%d'",
                       static_cast<int32_t>(x_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("The x.shape[0] should be 1, actual '%d'", x_shape[0]);
      SHERPA_ONNX_EXIT(-1);
    }

    num_input_frames_ = x_shape[1];
    feat_dim_ = x_shape[2];

    if (!encoder_model_->HasTensor("encoder_out")) {
      SHERPA_ONNX_LOGE("Model does not have output node 'encoder_out'");
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int32_t> encoder_out_shape =
        encoder_model_->TensorShape("encoder_out");

    encoder_out_dim1_ = encoder_out_shape[1];
    encoder_out_dim2_ = encoder_out_shape[2];

    if (config_.debug) {
      SHERPA_ONNX_LOGE("num_input_frames: %d", num_input_frames_);
      SHERPA_ONNX_LOGE("feat_dim: %d", feat_dim_);
      SHERPA_ONNX_LOGE("encoder_out_dim1: %d", encoder_out_dim1_);
      SHERPA_ONNX_LOGE("encoder_out_dim2: %d", encoder_out_dim2_);
    }
  }

  void CheckPredictorModel() {
    const auto &input_tensor_names = predictor_model_->InputTensorNames();
    if (input_tensor_names.size() != 1) {
      SHERPA_ONNX_LOGE("Expect 1 input tensor. Actual %d",
                       static_cast<int32_t>(input_tensor_names.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_tensor_names[0] != "encoder_out") {
      SHERPA_ONNX_LOGE("The 1st input should be encoder_out, actual '%s'",
                       input_tensor_names[0].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int32_t> x_shape =
        predictor_model_->TensorShape(input_tensor_names[0]);
    if (x_shape.size() != 3) {
      SHERPA_ONNX_LOGE("The 1st input should be 3-d, actual '%d'",
                       static_cast<int32_t>(x_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("The x.shape[0] should be 1, actual '%d'", x_shape[0]);
      SHERPA_ONNX_EXIT(-1);
    }

    if (x_shape[1] != encoder_out_dim2_) {
      SHERPA_ONNX_LOGE(
          "The input dim 1 of the predictor should be %d, given: %d",
          encoder_out_dim2_, x_shape[1]);
      SHERPA_ONNX_EXIT(-1);
    }

    if (x_shape[2] != encoder_out_dim1_) {
      SHERPA_ONNX_LOGE(
          "The input dim 2 of the predictor should be %d, given: %d",
          encoder_out_dim1_, x_shape[2]);
      SHERPA_ONNX_EXIT(-1);
    }

    if (!predictor_model_->HasTensor("alphas")) {
      SHERPA_ONNX_LOGE("Model does not have output node 'alphas'");
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int32_t> alphas_shape = predictor_model_->TensorShape("alphas");
    if (alphas_shape.size() != 2) {
      SHERPA_ONNX_LOGE("alphas should be 2-d, given: %d",
                       static_cast<int32_t>(alphas_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (alphas_shape[0] != 1) {
      SHERPA_ONNX_LOGE("We support only batch size 1 for alphas. Given: %d",
                       alphas_shape[0]);
      SHERPA_ONNX_EXIT(-1);
    }

    if (alphas_shape[1] != encoder_out_dim1_) {
      SHERPA_ONNX_LOGE("Expected output dim %d for alphas. Given: %d",
                       encoder_out_dim1_, alphas_shape[1]);
      SHERPA_ONNX_EXIT(-1);
    }
  }

  void CheckDecoderModel() {
    const auto &input_tensor_names = decoder_model_->InputTensorNames();
    if (input_tensor_names.size() != 3) {
      SHERPA_ONNX_LOGE("Expect 3 input tensors. Actual %d",
                       static_cast<int32_t>(input_tensor_names.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_tensor_names[0] != "encoder_out") {
      SHERPA_ONNX_LOGE("The 1st input should be encoder_out, actual '%s'",
                       input_tensor_names[0].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_tensor_names[1] != "acoustic_embedding") {
      SHERPA_ONNX_LOGE(
          "The 2nd input should be acoustic_embedding, actual '%s'",
          input_tensor_names[1].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_tensor_names[2] != "mask") {
      SHERPA_ONNX_LOGE("The 3rd input should be mask, actual '%s'",
                       input_tensor_names[2].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (!decoder_model_->HasTensor("decoder_out")) {
      SHERPA_ONNX_LOGE("Model does not have output node 'decoder_out'");
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int32_t> decoder_out_shape =
        decoder_model_->TensorShape("decoder_out");
    if (decoder_out_shape.size() != 3) {
      SHERPA_ONNX_LOGE("decoder_out should be 3-d, given: %d",
                       static_cast<int32_t>(decoder_out_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (decoder_out_shape[0] != 1) {
      SHERPA_ONNX_LOGE("We support only batch size 1 for decoder. Given: %d",
                       decoder_out_shape[0]);
      SHERPA_ONNX_EXIT(-1);
    }

    if (decoder_out_shape[2] != encoder_out_dim1_) {
      SHERPA_ONNX_LOGE("Expected output dim %d for decoder_out. Given: %d",
                       encoder_out_dim1_, decoder_out_shape[2]);
      SHERPA_ONNX_EXIT(-1);
    }

    vocab_size_ = decoder_out_shape[1];

    if (config_.debug) {
      SHERPA_ONNX_LOGE("vocab_size: %d", vocab_size_);
    }
  }

 private:
  std::mutex mutex_;
  OfflineModelConfig config_;

  std::unique_ptr<QnnBackend> encoder_backend_;
  std::unique_ptr<QnnModel> encoder_model_;

  std::unique_ptr<QnnBackend> predictor_backend_;
  std::unique_ptr<QnnModel> predictor_model_;

  std::unique_ptr<QnnBackend> decoder_backend_;
  std::unique_ptr<QnnModel> decoder_model_;

  int32_t num_input_frames_ = 0;
  int32_t feat_dim_ = 0;

  int32_t encoder_out_dim1_ = 0;
  int32_t encoder_out_dim2_ = 0;
  int32_t vocab_size_ = 0;
};

OfflineParaformerModelQnn::~OfflineParaformerModelQnn() = default;

OfflineParaformerModelQnn::OfflineParaformerModelQnn(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineParaformerModelQnn::OfflineParaformerModelQnn(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

std::vector<float> OfflineParaformerModelQnn::Run(
    std::vector<float> features) const {
  return impl_->Run(std::move(features));
}

int32_t OfflineParaformerModelQnn::VocabSize() const {
  return impl_->VocabSize();
}

#if __ANDROID_API__ >= 9
template OfflineParaformerModelQnn::OfflineParaformerModelQnn(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineParaformerModelQnn::OfflineParaformerModelQnn(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/qnn/offline-paraformer-model-qnn.h
================================================
// sherpa-onnx/csrc/qnn/offline-paraformer-model-qnn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_QNN_OFFLINE_PARAFORMER_MODEL_QNN_H_
#define SHERPA_ONNX_CSRC_QNN_OFFLINE_PARAFORMER_MODEL_QNN_H_

#include <memory>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineParaformerModelQnn {
 public:
  ~OfflineParaformerModelQnn();

  explicit OfflineParaformerModelQnn(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineParaformerModelQnn(Manager *mgr, const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (num_frames, feature_dim)
   *                 before applying LFR.
   * @returns Return a tensor of shape (num_output_frames, vocab_size)
   */
  std::vector<float> Run(std::vector<float> features) const;

  int32_t VocabSize() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_QNN_OFFLINE_PARAFORMER_MODEL_QNN_H_


================================================
FILE: sherpa-onnx/csrc/qnn/offline-recognizer-zipformer-ctc-qnn-impl.h
================================================
// sherpa-onnx/csrc/qnn/offline-recognizer-zipformer-ctc-qnn-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_QNN_OFFLINE_RECOGNIZER_ZIPFORMER_CTC_QNN_IMPL_H_
#define SHERPA_ONNX_CSRC_QNN_OFFLINE_RECOGNIZER_ZIPFORMER_CTC_QNN_IMPL_H_

#include <ios>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/qnn/offline-zipformer-ctc-model-qnn.h"
#include "sherpa-onnx/csrc/rknn/offline-ctc-greedy-search-decoder-rknn.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

// defined in ../offline-recognizer-ctc-impl.h
OfflineRecognitionResult Convert(const OfflineCtcDecoderResult &src,
                                 const SymbolTable &sym_table,
                                 int32_t frame_shift_ms,
                                 int32_t subsampling_factor);

class OfflineRecognizerZipformerCtcQnnImpl : public OfflineRecognizerImpl {
 public:
  explicit OfflineRecognizerZipformerCtcQnnImpl(
      const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(config),
        config_(config),
        symbol_table_(config_.model_config.tokens),
        model_(std::make_unique<OfflineZipformerCtcModelQnn>(
            config.model_config)) {
    Init();
  }

  template <typename Manager>
  OfflineRecognizerZipformerCtcQnnImpl(Manager *mgr,
                                       const OfflineRecognizerConfig &config)
      : OfflineRecognizerImpl(mgr, config),
        config_(config),
        symbol_table_(mgr, config_.model_config.tokens),
        model_(std::make_unique<OfflineZipformerCtcModelQnn>(
            mgr, config.model_config)) {
    Init();
  }

  void Init() {
    if (config_.decoding_method == "greedy_search") {
      if (!symbol_table_.Contains("<blk>") &&
          !symbol_table_.Contains("<eps>") &&
          !symbol_table_.Contains("<blank>") &&
          config_.model_config.omnilingual.model.empty()) {
        // for omnilingual asr, its blank id is 0
        SHERPA_ONNX_LOGE(
            "We expect that tokens.txt contains "
            "the symbol <blk> or <eps> or <blank> and its ID.");
        SHERPA_ONNX_EXIT(-1);
      }

      int32_t blank_id = 0;
      if (symbol_table_.Contains("<blk>")) {
        blank_id = symbol_table_["<blk>"];
      } else if (symbol_table_.Contains("<eps>")) {
        // for tdnn models of the yesno recipe from icefall
        blank_id = symbol_table_["<eps>"];
      } else if (symbol_table_.Contains("<blank>")) {
        // for Wenet CTC models
        blank_id = symbol_table_["<blank>"];
      }

      decoder_ = std::make_unique<OfflineCtcGreedySearchDecoderRknn>(blank_id);
    } else {
      SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                       config_.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(config_.feat_config);
  }

  void DecodeStreams(OfflineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(ss[i]);
    }
  }

  OfflineRecognizerConfig GetConfig() const override { return config_; }

 private:
  // Decode a single stream.
  // Some models do not support batch size > 1, e.g., WeNet CTC models.
  void DecodeStream(OfflineStream *s) const {
    std::vector<float> f = s->GetFrames();

    int32_t vocab_size = model_->VocabSize();

    std::vector<float> log_probs = model_->Run(std::move(f));
    int32_t num_out_frames = log_probs.size() / vocab_size;

    auto result =
        decoder_->Decode(log_probs.data(), num_out_frames, vocab_size);

    int32_t frame_shift_ms = 10;

    auto r = Convert(result, symbol_table_, frame_shift_ms,
                     model_->SubsamplingFactor());
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    s->SetResult(r);
  }

 private:
  OfflineRecognizerConfig config_;
  SymbolTable symbol_table_;
  std::unique_ptr<OfflineZipformerCtcModelQnn> model_;
  std::unique_ptr<OfflineCtcGreedySearchDecoderRknn> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_QNN_OFFLINE_RECOGNIZER_ZIPFORMER_CTC_QNN_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/qnn/offline-sense-voice-model-qnn.cc
================================================
// sherpa-onnx/csrc/qnn/offline-sense-voice-model-qnn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/qnn/offline-sense-voice-model-qnn.h"

#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/qnn/macros.h"
#include "sherpa-onnx/csrc/qnn/qnn-backend.h"
#include "sherpa-onnx/csrc/qnn/qnn-model.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelQnn::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    backend_ = std::make_unique<QnnBackend>(
        config.sense_voice.qnn_config.backend_lib, config_.debug);

    const auto &context_binary = config_.sense_voice.qnn_config.context_binary;

    if (context_binary.empty()) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init from model lib since context binary is not given");
      }

      InitFromModelLib();

      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Skip generating context binary since you don't provide a path to "
            "save it");
      }

    } else if (!FileExists(context_binary)) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init from model lib since context binary '%s' does not exist",
            context_binary.c_str());
      }

      InitFromModelLib();

      CreateContextBinary();
    } else {
      if (config_.debug) {
        SHERPA_ONNX_LOGE("Init from context binary '%s'",
                         context_binary.c_str());
      }
      InitFromContextBinary();
    }

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    SHERPA_ONNX_LOGE(
        "Please copy all files from assets to SD card and set assetManager to "
        "null");
    SHERPA_ONNX_EXIT(-1);
  }

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) {
    std::lock_guard<std::mutex> lock(mutex_);

    features = ApplyLFR(std::move(features));
    if (features.empty()) {
      return {};
    }

    int32_t num_frames = features.size() / feat_dim_;

    model_->SetInputTensorData("x", features.data(), features.size());

    std::array<int32_t, 4> prompt = {language, 1, 2, text_norm};
    model_->SetInputTensorData("prompt", prompt.data(), prompt.size());

    model_->Run();

    return model_->GetOutputTensorData("logits");
  }

 private:
  void InitFromModelLib() {
    backend_->InitContext();

    model_ = std::make_unique<QnnModel>(config_.sense_voice.model,
                                        backend_.get(), config_.debug);
  }

  void InitFromContextBinary() {
    model_ = std::make_unique<QnnModel>(
        config_.sense_voice.qnn_config.context_binary,
        config_.sense_voice.qnn_config.system_lib, backend_.get(),
        BinaryContextTag{}, config_.debug);
  }

  void CreateContextBinary() {
    const auto &context_binary = config_.sense_voice.qnn_config.context_binary;

    if (config_.debug) {
      SHERPA_ONNX_LOGE("Creating context binary '%s'.", context_binary.c_str());
    }

    bool ok = model_->SaveBinaryContext(context_binary);

    if (!ok) {
      SHERPA_ONNX_LOGE("Failed to save context binary to '%s'",
                       context_binary.c_str());
    }

    if (config_.debug && ok) {
      SHERPA_ONNX_LOGE("Saved context binary to '%s'.", context_binary.c_str());
      SHERPA_ONNX_LOGE(
          "It should be super fast the next time you init the system.");
      SHERPA_ONNX_LOGE("Remember to also provide libQnnSystem.so.");
    }
  }

  void PostInit() { CheckModel(); }

  void CheckModel() {
    const auto &input_tensor_names = model_->InputTensorNames();
    if (input_tensor_names.size() != 2) {
      SHERPA_ONNX_LOGE("Expect two input tensors. Actual %d",
                       static_cast<int32_t>(input_tensor_names.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_tensor_names[0] != "x") {
      SHERPA_ONNX_LOGE("The 1st input should be x, actual '%s'",
                       input_tensor_names[0].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_tensor_names[1] != "prompt") {
      SHERPA_ONNX_LOGE("The 2nd input should be prompt, actual '%s'",
                       input_tensor_names[1].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int32_t> x_shape = model_->TensorShape(input_tensor_names[0]);
    if (x_shape.size() != 3) {
      SHERPA_ONNX_LOGE("The 1st input should be 3-d, actual '%d'",
                       static_cast<int32_t>(x_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("The x.shape[0] should be 1, actual '%d'", x_shape[0]);
      SHERPA_ONNX_EXIT(-1);
    }

    if (x_shape[2] != feat_dim_) {
      SHERPA_ONNX_LOGE("The x.shape[2] should be %d, actual '%d'", feat_dim_,
                       x_shape[2]);
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int32_t> prompt_shape =
        model_->TensorShape(input_tensor_names[1]);

    if (prompt_shape.size() != 1) {
      SHERPA_ONNX_LOGE("The 2nd input should be 1-d, actual '%d'",
                       static_cast<int32_t>(prompt_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (prompt_shape[0] != 4) {
      SHERPA_ONNX_LOGE("The prompt.shape[0] should be 4, actual '%d'",
                       prompt_shape[0]);
      SHERPA_ONNX_EXIT(-1);
    }

    if (!model_->HasTensor("logits")) {
      SHERPA_ONNX_LOGE("Model does not have output node 'logits'");
      SHERPA_ONNX_EXIT(-1);
    }

    expected_num_frames_ = x_shape[1];
  }

  std::vector<float> ApplyLFR(std::vector<float> in) const {
    int32_t lfr_window_size = meta_data_.window_size;
    int32_t lfr_window_shift = meta_data_.window_shift;
    int32_t in_feat_dim = 80;

    int32_t in_num_frames = in.size() / in_feat_dim;

    if (in_num_frames < lfr_window_size) {
      return {};
    }

    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;

    if (out_num_frames > expected_num_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          out_num_frames, expected_num_frames_);

      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios.");

      out_num_frames = expected_num_frames_;
    }

    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    // if out_num_frames < expected_num_frames_, it uses 0 padding
    std::vector<float> out(expected_num_frames_ * out_feat_dim, 0);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

 private:
  std::mutex mutex_;

  OfflineModelConfig config_;
  OfflineSenseVoiceModelMetaData meta_data_;

  std::unique_ptr<QnnBackend> backend_;
  std::unique_ptr<QnnModel> model_;

  int32_t expected_num_frames_ = 0;
  int32_t feat_dim_ = 560;
};

OfflineSenseVoiceModelQnn::OfflineSenseVoiceModelQnn(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSenseVoiceModelQnn::OfflineSenseVoiceModelQnn(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineSenseVoiceModelQnn::~OfflineSenseVoiceModelQnn() = default;

std::vector<float> OfflineSenseVoiceModelQnn::Run(std::vector<float> features,
                                                  int32_t language,
                                                  int32_t text_norm) const {
  return impl_->Run(std::move(features), language, text_norm);
}

const OfflineSenseVoiceModelMetaData &
OfflineSenseVoiceModelQnn::GetModelMetadata() const {
  return impl_->GetModelMetadata();
}

#if __ANDROID_API__ >= 9
template OfflineSenseVoiceModelQnn::OfflineSenseVoiceModelQnn(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineSenseVoiceModelQnn::OfflineSenseVoiceModelQnn(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/qnn/offline-sense-voice-model-qnn.h
================================================
// sherpa-onnx/csrc/qnn/offline-sense-voice-model-qnn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_QNN_OFFLINE_SENSE_VOICE_MODEL_QNN_H_
#define SHERPA_ONNX_CSRC_QNN_OFFLINE_SENSE_VOICE_MODEL_QNN_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelQnn {
 public:
  ~OfflineSenseVoiceModelQnn();

  explicit OfflineSenseVoiceModelQnn(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineSenseVoiceModelQnn(Manager *mgr, const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (num_frames, feature_dim)
   *                 before applying LFR.
   * @param language
   * @param text_norm
   * @returns Return a tensor of shape (num_output_frames, vocab_size)
   */
  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) const;

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_QNN_OFFLINE_SENSE_VOICE_MODEL_QNN_H_


================================================
FILE: sherpa-onnx/csrc/qnn/offline-zipformer-ctc-model-qnn.cc
================================================
// sherpa-onnx/csrc/qnn/offline-zipformer-ctc-model-qnn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/qnn/offline-zipformer-ctc-model-qnn.h"

#include <algorithm>
#include <array>
#include <memory>
#include <mutex>  // NOLINT
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/qnn/macros.h"
#include "sherpa-onnx/csrc/qnn/qnn-backend.h"
#include "sherpa-onnx/csrc/qnn/qnn-model.h"

namespace sherpa_onnx {

class OfflineZipformerCtcModelQnn::Impl {
 public:
  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    backend_ = std::make_unique<QnnBackend>(
        config.zipformer_ctc.qnn_config.backend_lib, config_.debug);

    const auto &context_binary =
        config_.zipformer_ctc.qnn_config.context_binary;

    if (context_binary.empty()) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init from model lib since context binary is not given");
      }

      InitFromModelLib();

      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Skip generating context binary since you don't provide a path to "
            "save it");
      }
    } else if (!FileExists(context_binary)) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE(
            "Init from model lib since context binary '%s' does not exist",
            context_binary.c_str());
      }

      InitFromModelLib();

      CreateContextBinary();
    } else {
      if (config_.debug) {
        SHERPA_ONNX_LOGE("Init from context binary '%s'",
                         context_binary.c_str());
      }
      InitFromContextBinary();
    }

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    SHERPA_ONNX_LOGE(
        "Please copy all files from assets to SD card and set assetManager to "
        "null");
    SHERPA_ONNX_EXIT(-1);
  }

  std::vector<float> Run(std::vector<float> features) {
    int32_t num_frames = features.size() / feat_dim_;

    if (num_frames != max_num_frames_) {
      if (num_frames > max_num_frames_) {
        SHERPA_ONNX_LOGE(
            "Number of input frames %d is too large. Truncate it to %d frames.",
            num_frames, max_num_frames_);

        SHERPA_ONNX_LOGE(
            "Recognition result may be truncated/incomplete. Please select a "
            "model accepting longer audios.");
      }

      features.resize(max_num_frames_ * feat_dim_);

      num_frames = max_num_frames_;
    }

    std::lock_guard<std::mutex> lock(mutex_);

    model_->SetInputTensorData("x", features.data(), features.size());

    model_->Run();

    return model_->GetOutputTensorData("log_probs");
  }

  int32_t VocabSize() const { return vocab_size_; }
  int32_t SubsamplingFactor() const { return subsampling_factor_; }

 private:
  void InitFromModelLib() {
    backend_->InitContext();

    model_ = std::make_unique<QnnModel>(config_.zipformer_ctc.model,
                                        backend_.get(), config_.debug);
  }

  void InitFromContextBinary() {
    model_ = std::make_unique<QnnModel>(
        config_.zipformer_ctc.qnn_config.context_binary,
        config_.zipformer_ctc.qnn_config.system_lib, backend_.get(),
        BinaryContextTag{}, config_.debug);
  }

  void CreateContextBinary() {
    const auto &context_binary =
        config_.zipformer_ctc.qnn_config.context_binary;

    if (config_.debug) {
      SHERPA_ONNX_LOGE("Creating context binary '%s'.", context_binary.c_str());
    }

    bool ok = model_->SaveBinaryContext(context_binary);

    if (!ok) {
      SHERPA_ONNX_LOGE("Failed to save context binary to '%s'",
                       context_binary.c_str());
    }

    if (config_.debug && ok) {
      SHERPA_ONNX_LOGE("Saved context binary to '%s'.", context_binary.c_str());
      SHERPA_ONNX_LOGE(
          "It should be super fast the next time you init the system.");
      SHERPA_ONNX_LOGE("Remember to also provide libQnnSystem.so.");
    }
  }

  void PostInit() { CheckModel(); }

  void CheckModel() {
    const auto &input_tensor_names = model_->InputTensorNames();
    if (input_tensor_names.size() != 1) {
      SHERPA_ONNX_LOGE("Expect 1 input tensor. Actual %d",
                       static_cast<int32_t>(input_tensor_names.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (input_tensor_names[0] != "x") {
      SHERPA_ONNX_LOGE("The 1st input should be x, actual '%s'",
                       input_tensor_names[0].c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int32_t> x_shape = model_->TensorShape(input_tensor_names[0]);
    if (x_shape.size() != 3) {
      SHERPA_ONNX_LOGE("The 1st input should be 3-d, actual '%d'",
                       static_cast<int32_t>(x_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (x_shape[0] != 1) {
      SHERPA_ONNX_LOGE("The x.shape[0] should be 1, actual '%d'", x_shape[0]);
      SHERPA_ONNX_EXIT(-1);
    }

    max_num_frames_ = x_shape[1];
    feat_dim_ = x_shape[2];

    if (!model_->HasTensor("log_probs")) {
      SHERPA_ONNX_LOGE("Model does not have output node 'log_probs'");
      SHERPA_ONNX_EXIT(-1);
    }

    auto out_shape = model_->TensorShape("log_probs");
    vocab_size_ = out_shape[2];

    subsampling_factor_ = max_num_frames_ / out_shape[1];
    if (config_.debug) {
      SHERPA_ONNX_LOGE("max_num_frames: %d", max_num_frames_);
      SHERPA_ONNX_LOGE("feat_dim: %d", feat_dim_);
      SHERPA_ONNX_LOGE("vocab_size: %d", vocab_size_);
      SHERPA_ONNX_LOGE("subsampling_factor: %d", subsampling_factor_);
    }
  }

 private:
  std::mutex mutex_;

  OfflineModelConfig config_;

  std::unique_ptr<QnnBackend> backend_;
  std::unique_ptr<QnnModel> model_;

  int32_t max_num_frames_ = 0;
  int32_t feat_dim_ = 0;
  int32_t vocab_size_ = 0;
  int32_t subsampling_factor_ = 1;
};

OfflineZipformerCtcModelQnn::OfflineZipformerCtcModelQnn(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineZipformerCtcModelQnn::OfflineZipformerCtcModelQnn(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

OfflineZipformerCtcModelQnn::~OfflineZipformerCtcModelQnn() = default;

std::vector<float> OfflineZipformerCtcModelQnn::Run(
    std::vector<float> features) const {
  return impl_->Run(std::move(features));
}

int32_t OfflineZipformerCtcModelQnn::VocabSize() const {
  return impl_->VocabSize();
}

int32_t OfflineZipformerCtcModelQnn::SubsamplingFactor() const {
  return impl_->SubsamplingFactor();
}

#if __ANDROID_API__ >= 9
template OfflineZipformerCtcModelQnn::OfflineZipformerCtcModelQnn(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineZipformerCtcModelQnn::OfflineZipformerCtcModelQnn(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/qnn/offline-zipformer-ctc-model-qnn.h
================================================
// sherpa-onnx/csrc/qnn/offline-zipformer-ctc-model-qnn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_QNN_OFFLINE_ZIPFORMER_CTC_MODEL_QNN_H_
#define SHERPA_ONNX_CSRC_QNN_OFFLINE_ZIPFORMER_CTC_MODEL_QNN_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineZipformerCtcModelQnn {
 public:
  ~OfflineZipformerCtcModelQnn();

  explicit OfflineZipformerCtcModelQnn(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineZipformerCtcModelQnn(Manager *mgr, const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (num_frames, feature_dim)
   * @returns Return a tensor of shape (num_output_frames, vocab_size)
   */
  std::vector<float> Run(std::vector<float> features) const;

  int32_t VocabSize() const;
  int32_t SubsamplingFactor() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_QNN_OFFLINE_ZIPFORMER_CTC_MODEL_QNN_H_


================================================
FILE: sherpa-onnx/csrc/qnn/qnn-backend.cc
================================================
// sherpa-onnx/csrc/qnn/qnn-backend.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/qnn/qnn-backend.h"

#include <dlfcn.h>
#include <stdio.h>

#include <cstdint>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

#include "QnnInterface.h"
#include "System/QnnSystemInterface.h"
#include "sherpa-onnx/csrc/qnn/macros.h"
#include "sherpa-onnx/csrc/qnn/utils.h"

namespace sherpa_onnx {

class QnnBackend::Impl {
 public:
  explicit Impl(const std::string &backend_lib, bool debug) : debug_(debug) {
    bool ok = InitQnnInterface(backend_lib);
    if (!ok) {
      SHERPA_ONNX_LOGE("Failed to init qnn interface from '%s'",
                       backend_lib.c_str());
      return;
    }

    InitLog();
    InitBackend();
    InitDevice();

    is_initialized_ = true;
  }

  ~Impl() {
    if (context_handle_) {
      auto ret = qnn_interface_.contextFree(context_handle_, nullptr);
      SHERPA_ONNX_QNN_CHECK(ret, "Failed to call contextFree");
    }

    if (device_handle_) {
      auto ret = qnn_interface_.deviceFree(device_handle_);
      SHERPA_ONNX_QNN_CHECK(ret, "Failed to call deviceFree");
    }

    if (backend_handle_) {
      auto ret = qnn_interface_.backendFree(backend_handle_);
      SHERPA_ONNX_QNN_CHECK(ret, "Failed to call backendFree");
    }

    if (log_handle_) {
      auto ret = qnn_interface_.logFree(log_handle_);
      SHERPA_ONNX_QNN_CHECK(ret, "Failed to call logFree");
    }
  }

  void InitContext() {
    if (context_handle_) {
      SHERPA_ONNX_LOGE("context handle is already initialized");
      return;
    }

    auto ret = qnn_interface_.contextCreate(backend_handle_, device_handle_,
                                            context_config_, &context_handle_);
    SHERPA_ONNX_QNN_CHECK(ret, "Failed to call contextCreate");
  }

  void InitContext(Qnn_ContextHandle_t t) { context_handle_ = t; }

  Qnn_LogHandle_t LogHandle() const { return log_handle_; }

  Qnn_BackendHandle_t BackendHandle() const { return backend_handle_; }

  Qnn_DeviceHandle_t DeviceHandle() const { return device_handle_; }

  Qnn_ContextHandle_t ContextHandle() const { return context_handle_; }

  QNN_INTERFACE_VER_TYPE QnnInterface() const { return qnn_interface_; }

  QnnLog_Level_t LogLevel() const { return log_level_; }

  bool IsInitialized() const { return is_initialized_; }

 private:
  bool InitQnnInterface(const std::string &backend_lib) {
    backend_lib_handle_ = std::unique_ptr<void, decltype(&dlclose)>(
        dlopen(backend_lib.c_str(), RTLD_NOW | RTLD_LOCAL), &dlclose);
    if (!backend_lib_handle_) {
      SHERPA_ONNX_LOGE("Failed to dlopen '%s'. Error is: '%s'",
                       backend_lib.c_str(), dlerror());
      return false;
    }

    if (debug_) {
      SHERPA_ONNX_LOGE("loaded %s", backend_lib.c_str());
    }

    const char *symbol = "QnnInterface_getProviders";
    auto get_interface_providers =
        reinterpret_cast<QnnInterfaceGetProvidersFnType>(
            dlsym(backend_lib_handle_.get(), symbol));
    if (!get_interface_providers) {
      SHERPA_ONNX_LOGE("Failed to dlsym for '%s'. Error is: '%s'", symbol,
                       dlerror());
      return false;
    }

    if (debug_) {
      SHERPA_ONNX_LOGE("Got %s", symbol);
    }

    const QnnInterface_t **interface_providers = nullptr;
    uint32_t num_providers = 0;

    auto ret = get_interface_providers(&interface_providers, &num_providers);
    SHERPA_ONNX_QNN_CHECK(ret, "Failed to call get_interface_providers");

    if (!interface_providers) {
      SHERPA_ONNX_LOGE("interface_providers is nullptr");
      return false;
    }

    if (num_providers == 0) {
      SHERPA_ONNX_LOGE("Number of providers is 0");
      return false;
    }

    bool found_valid_interface = false;

    if (debug_) {
      SHERPA_ONNX_LOGE("QNN_API_VERSION_MAJOR: %d", QNN_API_VERSION_MAJOR);
      SHERPA_ONNX_LOGE("QNN_API_VERSION_MINOR: %d", QNN_API_VERSION_MINOR);
      SHERPA_ONNX_LOGE("QNN_API_VERSION_PATCH: %d", QNN_API_VERSION_PATCH);
    }

    for (size_t idx = 0; idx < num_providers; ++idx) {
      auto p = interface_providers[idx];

      if (debug_) {
        std::ostringstream os;
        os << "---" << idx << "----\n";
        os << "backendId: " << p->backendId << "\n";
        os << "coreApiVersion.major: " << p->apiVersion.coreApiVersion.major
           << "\n";
        os << "coreApiVersion.minor: " << p->apiVersion.coreApiVersion.minor
           << "\n";
        os << "coreApiVersion.patch: " << p->apiVersion.coreApiVersion.patch
           << "\n";

        os << "backendApiVersion.major: "
           << p->apiVersion.backendApiVersion.major << "\n";
        os << "backendApiVersion.minor: "
           << p->apiVersion.backendApiVersion.minor << "\n";
        os << "backendApiVersion.patch: "
           << p->apiVersion.backendApiVersion.patch << "\n";
        SHERPA_ONNX_LOGE("%s", os.str().c_str());
      }

      qnn_interface_ = p->QNN_INTERFACE_VER_NAME;
      found_valid_interface = true;
      break;
    }

    if (!found_valid_interface) {
      SHERPA_ONNX_LOGE("Failed to find valid interface");
      return false;
    }

    if (debug_) {
      const char *build_id = nullptr;
      ret = qnn_interface_.backendGetBuildId(&build_id);
      SHERPA_ONNX_QNN_CHECK(ret, "Failed to call backendGetBuildId()");

      SHERPA_ONNX_LOGE("backend build ID: %s", build_id);
    }

    return true;
  }

  void InitLog() {
    auto ret = qnn_interface_.logCreate(LogCallback, log_level_, &log_handle_);
    SHERPA_ONNX_QNN_CHECK(ret, "Failed to call logCreate");
  }

  void InitBackend() {
    auto ret = qnn_interface_.backendCreate(log_handle_, backend_config_,
                                            &backend_handle_);
    SHERPA_ONNX_QNN_CHECK(ret, "Failed to call backendCreate");
  }

  void InitDevice() {
    auto ret =
        qnn_interface_.deviceCreate(log_handle_, nullptr, &device_handle_);
    SHERPA_ONNX_QNN_CHECK(ret, "Failed to call deviceCreate");
  }

 private:
  bool debug_ = true;
  std::unique_ptr<void, decltype(&dlclose)> backend_lib_handle_{nullptr,
                                                                &dlclose};

  QNN_INTERFACE_VER_TYPE qnn_interface_;

  QnnLog_Level_t log_level_ = QNN_LOG_LEVEL_WARN;
  // QnnLog_Level_t log_level_ = QNN_LOG_LEVEL_INFO;
  // QnnLog_Level_t log_level_ = QNN_LOG_LEVEL_VERBOSE;

  Qnn_LogHandle_t log_handle_ = nullptr;

  const QnnBackend_Config_t **backend_config_ = nullptr;
  Qnn_BackendHandle_t backend_handle_ = nullptr;

  Qnn_DeviceHandle_t device_handle_ = nullptr;

  Qnn_ContextHandle_t context_handle_ = nullptr;
  const QnnContext_Config_t **context_config_ = nullptr;
  bool is_initialized_ = false;
};

QnnBackend::~QnnBackend() = default;

QnnBackend::QnnBackend(const std::string &backend_lib, bool debug)
    : impl_(std::make_unique<Impl>(backend_lib, debug)) {}

void QnnBackend::InitContext() const { impl_->InitContext(); }

void QnnBackend::InitContext(Qnn_ContextHandle_t context_handle) const {
  impl_->InitContext(context_handle);
}

Qnn_LogHandle_t QnnBackend::LogHandle() const { return impl_->LogHandle(); }

Qnn_BackendHandle_t QnnBackend::BackendHandle() const {
  return impl_->BackendHandle();
}

Qnn_DeviceHandle_t QnnBackend::DeviceHandle() const {
  return impl_->DeviceHandle();
}

Qnn_ContextHandle_t QnnBackend::ContextHandle() const {
  return impl_->ContextHandle();
}

QNN_INTERFACE_VER_TYPE QnnBackend::QnnInterface() const {
  return impl_->QnnInterface();
}

QnnLog_Level_t QnnBackend::LogLevel() const { return impl_->LogLevel(); }

bool QnnBackend::IsInitialized() const { return impl_->IsInitialized(); }

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/qnn/qnn-backend.h
================================================
// sherpa-onnx/csrc/qnn/qnn-backend.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_QNN_QNN_BACKEND_H_
#define SHERPA_ONNX_CSRC_QNN_QNN_BACKEND_H_

#include <memory>
#include <string>

#include "QnnInterface.h"

namespace sherpa_onnx {

class QnnBackend {
 public:
  explicit QnnBackend(const std::string &backend_lib, bool debug);
  ~QnnBackend();

  void InitContext() const;
  void InitContext(Qnn_ContextHandle_t context_handle) const;
  Qnn_LogHandle_t LogHandle() const;
  Qnn_BackendHandle_t BackendHandle() const;
  Qnn_DeviceHandle_t DeviceHandle() const;
  Qnn_ContextHandle_t ContextHandle() const;
  QNN_INTERFACE_VER_TYPE QnnInterface() const;
  QnnLog_Level_t LogLevel() const;
  bool IsInitialized() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_QNN_QNN_BACKEND_H_


================================================
FILE: sherpa-onnx/csrc/qnn/qnn-model.cc
================================================
// sherpa-onnx/csrc/qnn/qnn-model.h
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/qnn/qnn-model.h"

#include <dlfcn.h>

#include <fstream>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/qnn/macros.h"
#include "sherpa-onnx/csrc/qnn/qnn-backend.h"
#include "sherpa-onnx/csrc/qnn/utils.h"

namespace sherpa_onnx {

class QnnModel::Impl {
 public:
  Impl(const std::string &model_so, const QnnBackend *backend, bool debug)
      : debug_(debug), backend_(backend) {
    bool ok = InitModel(model_so);
    if (!ok) {
      SHERPA_ONNX_LOGE("Failed to load '%s'", model_so.c_str());
      return;
    }

    ok = InitSymbols();
    if (!ok) {
      SHERPA_ONNX_LOGE("Failed to get model symbols from '%s'",
                       model_so.c_str());
      return;
    }

    InitGraph();

    PostInit();
  }

  Impl(const std::string &binary_context_file, const std::string &system_lib,
       const QnnBackend *backend, BinaryContextTag, bool debug)
      : debug_(debug), backend_(backend) {
    bool ok = LoadSystemLib(binary_context_file, system_lib);
    if (!ok) {
      return;
    }

    PostInit();
  }

  bool LoadSystemLib(const std::string &binary_context_file,
                     const std::string &system_lib) {
    system_lib_handle_ = std::unique_ptr<void, decltype(&dlclose)>(
        dlopen(system_lib.c_str(), RTLD_NOW | RTLD_LOCAL), &dlclose);
    if (!system_lib_handle_) {
      SHERPA_ONNX_LOGE("Failed to dlopen '%s'. Error is: '%s'",
                       system_lib.c_str(), dlerror());
      return false;
    }
    if (debug_) {
      SHERPA_ONNX_LOGE("loaded %s", system_lib.c_str());
    }

    auto get_system_interface_providers =
        reinterpret_cast<QnnSystemInterfaceGetProvidersFnType>(
            dlsym(system_lib_handle_.get(), "QnnSystemInterface_getProviders"));

    if (!get_system_interface_providers) {
      SHERPA_ONNX_LOGE("Failed to get QnnSystemInterface_getProviders");
      return false;
    }

    const QnnSystemInterface_t **system_interface_providers = nullptr;
    uint32_t num_providers = 0;
    if (get_system_interface_providers(&system_interface_providers,
                                       &num_providers) != QNN_SUCCESS) {
      SHERPA_ONNX_LOGE("Failed to get system interface providers.");
      return false;
    }

    if (!system_interface_providers) {
      SHERPA_ONNX_LOGE(
          "Failed to get system interface providers: null "
          "interface providers received.");
      return false;
    }

    if (!num_providers) {
      SHERPA_ONNX_LOGE(
          "Failed to get interface providers: 0 interface providers.");
      return false;
    }

    for (uint32_t i = 0; i < num_providers; ++i) {
      if (debug_) {
        SHERPA_ONNX_LOGE("QNN_SYSTEM_API_VERSION_MAJOR: %d",
                         static_cast<int32_t>(QNN_SYSTEM_API_VERSION_MAJOR));
        SHERPA_ONNX_LOGE("QNN_SYSTEM_API_VERSION_MINOR: %d",
                         static_cast<int32_t>(QNN_SYSTEM_API_VERSION_MINOR));
        SHERPA_ONNX_LOGE(
            "systemApiVersion.major: %d",
            static_cast<int32_t>(
                system_interface_providers[i]->systemApiVersion.major));
        SHERPA_ONNX_LOGE(
            "systemApiVersion.minor: %d",
            static_cast<int32_t>(
                system_interface_providers[i]->systemApiVersion.minor));
      }

      qnn_system_interface_ =
          system_interface_providers[i]->QNN_SYSTEM_INTERFACE_VER_NAME;
    }

    // read file into a buffer
    std::vector<uint8_t> buffer = ReadFile<uint8_t>(binary_context_file);

    QnnSystemContext_Handle_t sys_ctx_handle = nullptr;
    if (qnn_system_interface_.systemContextCreate(&sys_ctx_handle) !=
        QNN_SUCCESS) {
      SHERPA_ONNX_LOGE("Could not create system handle.");
      return false;
    }

    const QnnSystemContext_BinaryInfo_t *binary_info = nullptr;
    Qnn_ContextBinarySize_t binary_info_size = 0;

    auto ret = qnn_system_interface_.systemContextGetBinaryInfo(
        sys_ctx_handle, static_cast<void *>(buffer.data()), buffer.size(),
        &binary_info, &binary_info_size);
    if (ret != QNN_SUCCESS) {
      SHERPA_ONNX_LOGE(
          "Failed to get context binary info from '%s'. ret code is %d",
          binary_context_file.c_str(), static_cast<int32_t>(ret));

      qnn_system_interface_.systemContextFree(sys_ctx_handle);
      return false;
    }

    const GraphConfigInfo **graph_configs_info = nullptr;

    uint32_t graph_configs_info_count = 0;
    GraphInfo **graphs_info = nullptr;
    uint32_t graphs_count = 0;

    if (!CopyMetadataToGraphsInfo(binary_info, graphs_info, graphs_count)) {
      SHERPA_ONNX_LOGE("Failed to call CopyMetadataToGraphsInfo");

      qnn_system_interface_.systemContextFree(sys_ctx_handle);
      return false;
    }

    qnn_system_interface_.systemContextFree(sys_ctx_handle);

    auto free_graphs_info = [&graphs_info, &graphs_count] {
      for (uint32_t i = 0; i < graphs_count; ++i) {
        for (uint32_t k = 0; k < graphs_info[i]->num_input_tensors; ++k) {
          FreeTensor(&graphs_info[i]->input_tensors[k]);
        }

        for (uint32_t k = 0; k < graphs_info[i]->num_output_tensors; ++k) {
          FreeTensor(&graphs_info[i]->output_tensors[k]);
        }

        free(graphs_info[i]->input_tensors);
        free(graphs_info[i]->output_tensors);

        free(graphs_info[i]->graph_name);
      }

      free(graphs_info[0]);
      free(graphs_info);
    };

    if (graphs_count > 1) {
      SHERPA_ONNX_LOGE("Only the first graph is used");
    }

    Qnn_ContextHandle_t context_handle = nullptr;

    if (backend_->QnnInterface().contextCreateFromBinary(
            backend_->BackendHandle(), backend_->DeviceHandle(),
            context_config_, static_cast<void *>(buffer.data()), buffer.size(),
            &context_handle, nullptr) != QNN_SUCCESS) {
      free_graphs_info();
      SHERPA_ONNX_LOGE("Could not create context from binary.");
      return false;
    }

    backend_->InitContext(context_handle);

    if (backend_->QnnInterface().graphRetrieve(
            context_handle, (*graphs_info)[0].graph_name,
            &((*graphs_info)[0].graph)) != QNN_SUCCESS) {
      free_graphs_info();
      SHERPA_ONNX_LOGE("Unable to retrieve graph handle for graph %d", 0);
      return false;
    }

    graph_handle_ = (*graphs_info)[0].graph;

    InitInputTensors((*graphs_info)[0]);
    InitOutputTensors((*graphs_info)[0]);

    free_graphs_info();

    return true;
  }

  ~Impl() = default;

  bool SaveBinaryContext(const std::string &filename) {
    auto qnn_interface = backend_->QnnInterface();

    if (!qnn_interface.contextGetBinarySize ||
        !qnn_interface.contextGetBinary) {
      SHERPA_ONNX_LOGE(
          "contextGetBinarySizeFnHandle or "
          "contextGetBinaryFnHandle is nullptr.");
      return false;
    }

    uint64_t required_buffer_size{0};
    auto ret = qnn_interface.contextGetBinarySize(backend_->ContextHandle(),
                                                  &required_buffer_size);
    SHERPA_ONNX_QNN_CHECK(ret, "Failed to call contextGetBinarySize");

    if (debug_) {
      SHERPA_ONNX_LOGE("context binary size: %.3f MB",
                       static_cast<float>(required_buffer_size) / 1024 / 1024);
    }
    std::vector<uint8_t> saveBuffer(required_buffer_size);
    uint64_t writtenBufferSize{0};

    ret = qnn_interface.contextGetBinary(
        backend_->ContextHandle(), reinterpret_cast<void *>(saveBuffer.data()),
        required_buffer_size, &writtenBufferSize);

    SHERPA_ONNX_QNN_CHECK(ret, "Failed to call contextGetBinary");

    if (required_buffer_size < writtenBufferSize) {
      SHERPA_ONNX_LOGE(
          "Illegal written buffer size %d bytes. Cannot exceed "
          "allocated memory of %d bytes",
          static_cast<int32_t>(writtenBufferSize),
          static_cast<int32_t>(required_buffer_size));
      return false;
    }
    std::ofstream ofs(filename, std::ios::binary | std::ios::trunc);
    if (!ofs) {
      SHERPA_ONNX_LOGE("Failed to create '%s'", filename.c_str());
      return false;
    }

    ofs.write(reinterpret_cast<const char *>(saveBuffer.data()),
              saveBuffer.size());

    if (!ofs) {
      SHERPA_ONNX_LOGE("Failed to write '%s'", filename.c_str());
      return false;
    }

    return true;
  }

  const std::vector<std::string> &InputTensorNames() const {
    return input_tensor_names_;
  }

  const std::vector<std::string> &OutputTensorNames() const {
    return output_tensor_names_;
  }

  std::vector<int32_t> TensorShape(const std::string &name) const {
    std::vector<int32_t> shape;

    if (!HasTensor(name)) {
      SHERPA_ONNX_LOGE("No such tensor '%s'", name.c_str());
      return shape;
    }

    auto t = name2tensor_.at(name);

    shape = {t->v1.dimensions, t->v1.dimensions + t->v1.rank};

    return shape;
  }

  int32_t TensorSizeInBytes(const std::string &name) const {
    if (!HasTensor(name)) {
      return 0;
    }

    return name2tensor_.at(name)->v1.clientBuf.dataSize;
  }

  bool HasTensor(const std::string &name) const {
    return name2tensor_.count(name);
  }

  bool SetInputTensorData(const std::string &name, const float *p, int32_t n) {
    if (!HasTensor(name)) {
      SHERPA_ONNX_LOGE("No such tensor '%s'", name.c_str());
      return false;
    }

    auto t = name2tensor_.at(name);
    if (t->v1.dataType != QNN_DATATYPE_UFIXED_POINT_16) {
      SHERPA_ONNX_LOGE(
          "tensor '%s' should be of type "
          "QNN_DATATYPE_UFIXED_POINT_16, but it is %s",
          name.c_str(), TensorDataTypeToString(t->v1.dataType).c_str());
      return false;
    }

    if (t->v1.quantizeParams.quantizationEncoding !=
        QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
      SHERPA_ONNX_LOGE(
          "tensor '%s' should be quantized with "
          "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET, but it is %s",
          name.c_str(),
          QuantizationEncodingToString(
              t->v1.quantizeParams.quantizationEncoding)
              .c_str());
      return false;
    }

    if (n * sizeof(uint16_t) != t->v1.clientBuf.dataSize) {
      SHERPA_ONNX_LOGE("tensor '%s' expects %d bytes, but you provide %d bytes",
                       name.c_str(),
                       static_cast<int32_t>(t->v1.clientBuf.dataSize),
                       static_cast<int32_t>(n * sizeof(uint16_t)));
      return false;
    }

    FillData(t, p, n);

    return true;
  }

  bool SetInputTensorData(const std::string &name, const int32_t *p,
                          int32_t n) {
    if (!HasTensor(name)) {
      SHERPA_ONNX_LOGE("No such tensor '%s'", name.c_str());
      return false;
    }

    auto t = name2tensor_.at(name);
    if (t->v1.dataType != QNN_DATATYPE_INT_32) {
      SHERPA_ONNX_LOGE(
          "tensor '%s' should be of type "
          "QNN_DATATYPE_INT_32, but it is %s",
          name.c_str(), TensorDataTypeToString(t->v1.dataType).c_str());
      return false;
    }

    if (n * sizeof(int32_t) != t->v1.clientBuf.dataSize) {
      SHERPA_ONNX_LOGE("tensor '%s' expects %d bytes, but you provide %d bytes",
                       name.c_str(),
                       static_cast<int32_t>(t->v1.clientBuf.dataSize),
                       static_cast<int32_t>(n * sizeof(int32_t)));
      return false;
    }

    FillData(t, p, n);

    return true;
  }

  std::vector<float> GetOutputTensorData(const std::string &name) {
    if (!HasTensor(name)) {
      SHERPA_ONNX_LOGE("No such tensor '%s'", name.c_str());
      return {};
    }

    auto t = name2tensor_.at(name);
    if (t->v1.dataType != QNN_DATATYPE_UFIXED_POINT_16) {
      SHERPA_ONNX_LOGE(
          "tensor '%s' should be of type "
          "QNN_DATATYPE_UFIXED_POINT_16, but it is %s",
          name.c_str(), TensorDataTypeToString(t->v1.dataType).c_str());
      return {};
    }

    if (t->v1.quantizeParams.quantizationEncoding !=
        QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
      SHERPA_ONNX_LOGE(
          "tensor '%s' should be quantized with "
          "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET, but it is %s",
          name.c_str(),
          QuantizationEncodingToString(
              t->v1.quantizeParams.quantizationEncoding)
              .c_str());
      return {};
    }

    int32_t n = t->v1.clientBuf.dataSize / sizeof(uint16_t);
    std::vector<float> ans(n);

    GetData(t, ans.data(), n);

    return ans;
  }

  bool Run() {
    std::vector<Qnn_Tensor_t> input_tensors_raw;
    std::vector<Qnn_Tensor_t> output_tensors_raw;

    input_tensors_raw.reserve(input_tensors_.size());
    output_tensors_raw.reserve(output_tensors_.size());

    for (const auto &p : input_tensors_) {
      input_tensors_raw.push_back(*p);
    }

    for (const auto &p : output_tensors_) {
      output_tensors_raw.push_back(*p);
    }

    auto ret = backend_->QnnInterface().graphExecute(
        graph_handle_, input_tensors_raw.data(), input_tensors_raw.size(),
        output_tensors_raw.data(), output_tensors_raw.size(), nullptr, nullptr);
    SHERPA_ONNX_QNN_CHECK(ret, "Failed to run graphExecute");

    return true;
  }

  bool IsInitialized() const { return is_initialized_; }

 private:
  void PostInit() {
    AllocateBuffer();
    SetupPointers();

    is_initialized_ = true;
  }

  bool InitModel(const std::string &model_so) {
    model_lib_handle_ = std::unique_ptr<void, decltype(&dlclose)>(
        dlopen(model_so.c_str(), RTLD_NOW | RTLD_LOCAL), &dlclose);
    if (!model_lib_handle_) {
      SHERPA_ONNX_LOGE("Failed to dlopen '%s'. Error is: '%s'",
                       model_so.c_str(), dlerror());
      return false;
    }

    if (debug_) {
      SHERPA_ONNX_LOGE("loaded %s", model_so.c_str());
    }

    return true;
  }

  bool InitSymbols() {
    const char *symbol = "QnnModel_composeGraphs";

    compose_graphs_fn_handle_ = reinterpret_cast<ComposeGraphsFnHandleType>(
        dlsym(model_lib_handle_.get(), symbol));
    if (!compose_graphs_fn_handle_) {
      SHERPA_ONNX_LOGE("Failed to dlsym for '%s'. Error is: '%s'", symbol,
                       dlerror());
      return false;
    }

    symbol = "QnnModel_freeGraphsInfo";
    free_graph_info_fn_handle_ = reinterpret_cast<FreeGraphInfoFnHandleType>(
        dlsym(model_lib_handle_.get(), symbol));
    if (!free_graph_info_fn_handle_) {
      SHERPA_ONNX_LOGE("Failed to dlsym for '%s'. Error is: '%s'", symbol,
                       dlerror());
      return false;
    }
    return true;
  }

  void InitGraph() {
    const GraphConfigInfo **graph_configs_info = nullptr;

    uint32_t graph_configs_info_count = 0;
    GraphInfo **graphs_info = nullptr;
    uint32_t graphs_count = 0;

    auto ret = compose_graphs_fn_handle_(
        backend_->BackendHandle(), backend_->QnnInterface(),
        backend_->ContextHandle(), graph_configs_info, graph_configs_info_count,
        &graphs_info, &graphs_count, debug_, LogCallback, backend_->LogLevel());
    SHERPA_ONNX_QNN_CHECK(ret, "Failed to call compose_graphs_fn_handle_");

    if (debug_) {
      SHERPA_ONNX_LOGE("graphs_count: %d", (int32_t)graphs_count);
    }

    for (uint32_t i = 0; i < graphs_count; ++i) {
      if (debug_) {
        SHERPA_ONNX_LOGE(
            "Finalizing graph %d/%d: '%s'", static_cast<int32_t>(i),
            static_cast<int32_t>(graphs_count), (*graphs_info)[i].graph_name);
      }
      ret = backend_->QnnInterface().graphFinalize((*graphs_info)[i].graph,
                                                   nullptr, nullptr);
      SHERPA_ONNX_QNN_CHECK(ret, "Failed to call graph_finalize");
    }

    if (graphs_count > 1) {
      SHERPA_ONNX_LOGE("We only use the first graph: %s",
                       (*graphs_info)[0].graph_name);
    }

    InitInputTensors((*graphs_info)[0]);
    InitOutputTensors((*graphs_info)[0]);

    graph_handle_ = (*graphs_info)[0].graph;
  }

  void InitInputTensors(GraphInfo graph) {
    input_tensors_.reserve(graph.num_input_tensors);
    input_tensor_names_.reserve(graph.num_input_tensors);

    for (uint32_t i = 0; i < graph.num_input_tensors; ++i) {
      auto p = TensorPtr(new Qnn_Tensor_t(QNN_TENSOR_INIT), &FreeTensor);

      CopyTensorInfo(graph.input_tensors[i], *p);

      if (debug_) {
        SHERPA_ONNX_LOGE("input %d", (int)i);
        PrintTensor(p->v2);
      }

      std::string name = p->v1.name;
      name2tensor_[name] = p.get();
      input_tensor_names_.push_back(std::move(name));

      input_tensors_.push_back(std::move(p));
    }
  }

  void InitOutputTensors(GraphInfo graph) {
    output_tensors_.reserve(graph.num_output_tensors);
    output_tensor_names_.reserve(graph.num_output_tensors);
    for (uint32_t i = 0; i < graph.num_output_tensors; ++i) {
      auto p = TensorPtr(new Qnn_Tensor_t(QNN_TENSOR_INIT), &FreeTensor);

      CopyTensorInfo(graph.output_tensors[i], *p);

      if (debug_ && (i + 3 > graph.num_output_tensors)) {
        SHERPA_ONNX_LOGE("output %d", (int)i);

        PrintTensor(p->v2);
      }

      std::string name = p->v1.name;
      name2tensor_[name] = p.get();
      output_tensor_names_.push_back(std::move(name));

      output_tensors_.push_back(std::move(p));
    }
  }

  void AllocateBuffer() {
    uint32_t n = 0;
    for (const auto &p : name2tensor_) {
      n += p.second->v1.clientBuf.dataSize;
    }

    if (debug_) {
      SHERPA_ONNX_LOGE("Allocate %d bytes, or %.3f MB", static_cast<int32_t>(n),
                       static_cast<float>(n) / 1024 / 1024);
    }

    buffer_.resize(n);
  }

  void SetupPointers() {
    uint8_t *p = buffer_.data();
    uint32_t n = 0;
    for (auto &t : input_tensors_) {
      t->v1.clientBuf.data = p;
      p += t->v1.clientBuf.dataSize;
    }

    for (auto &t : output_tensors_) {
      t->v1.clientBuf.data = p;
      p += t->v1.clientBuf.dataSize;
    }

    if (debug_) {
      if (p == buffer_.data() + buffer_.size()) {
        SHERPA_ONNX_LOGE("Setup pointers successfully.");
      } else {
        SHERPA_ONNX_LOGE("Bad things happened in setting up pointers.");
      }
    }
  }

 private:
  bool debug_ = true;
  std::unique_ptr<void, decltype(&dlclose)> model_lib_handle_{nullptr,
                                                              &dlclose};

  std::unique_ptr<void, decltype(&dlclose)> system_lib_handle_{nullptr,
                                                               &dlclose};

  QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface_;

  ComposeGraphsFnHandleType compose_graphs_fn_handle_ = nullptr;
  FreeGraphInfoFnHandleType free_graph_info_fn_handle_ = nullptr;

  std::vector<TensorPtr> input_tensors_;
  std::vector<TensorPtr> output_tensors_;

  std::vector<std::string> input_tensor_names_;
  std::vector<std::string> output_tensor_names_;

  std::unordered_map<std::string, Qnn_Tensor_t *> name2tensor_;

  std::vector<uint8_t> buffer_;
  const QnnBackend *backend_ = nullptr;

  Qnn_GraphHandle_t graph_handle_ = nullptr;

  const QnnContext_Config_t **context_config_ = nullptr;
  bool is_initialized_ = false;
};

QnnModel::~QnnModel() = default;

QnnModel::QnnModel(const std::string &model_so, const QnnBackend *backend,
                   bool debug)
    : impl_(std::make_unique<Impl>(model_so, backend, debug)) {}

QnnModel::QnnModel(const std::string &binary_context_file,
                   const std::string &system_lib, const QnnBackend *backend,
                   BinaryContextTag tag, bool debug)
    : impl_(std::make_unique<Impl>(binary_context_file, system_lib, backend,
                                   tag, debug)) {}  // NOLINT

bool QnnModel::SaveBinaryContext(const std::string &filename) const {
  return impl_->SaveBinaryContext(filename);
}

const std::vector<std::string> &QnnModel::InputTensorNames() const {
  return impl_->InputTensorNames();
}

const std::vector<std::string> &QnnModel::OutputTensorNames() const {
  return impl_->OutputTensorNames();
}

std::vector<int32_t> QnnModel::TensorShape(const std::string &name) const {
  return impl_->TensorShape(name);
}

int32_t QnnModel::TensorSizeInBytes(const std::string &name) const {
  return impl_->TensorSizeInBytes(name);
}

bool QnnModel::HasTensor(const std::string &name) const {
  return impl_->HasTensor(name);
}

bool QnnModel::SetInputTensorData(const std::string &name, const float *p,
                                  int32_t n) const {
  return impl_->SetInputTensorData(name, p, n);
}

bool QnnModel::SetInputTensorData(const std::string &name, const int32_t *p,
                                  int32_t n) const {
  return impl_->SetInputTensorData(name, p, n);
}

std::vector<float> QnnModel::GetOutputTensorData(
    const std::string &name) const {
  return impl_->GetOutputTensorData(name);
}

bool QnnModel::Run() const { return impl_->Run(); }

bool QnnModel::IsInitialized() const { return impl_->IsInitialized(); }

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/qnn/qnn-model.h
================================================
// sherpa-onnx/csrc/qnn/qnn-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_QNN_QNN_MODEL_H_
#define SHERPA_ONNX_CSRC_QNN_QNN_MODEL_H_

#include <memory>
#include <string>
#include <vector>

#include "QnnInterface.h"

namespace sherpa_onnx {

class QnnBackend;

struct BinaryContextTag {};

class QnnModel {
 public:
  QnnModel(const std::string &model_so, const QnnBackend *backend, bool debug);
  QnnModel(const std::string &binary_context_file,
           const std::string &system_lib, const QnnBackend *backend,
           BinaryContextTag tag, bool debug);
  ~QnnModel();

  bool SaveBinaryContext(const std::string &filename) const;

  const std::vector<std::string> &InputTensorNames() const;
  const std::vector<std::string> &OutputTensorNames() const;

  std::vector<int32_t> TensorShape(const std::string &name) const;
  int32_t TensorSizeInBytes(const std::string &name) const;

  bool HasTensor(const std::string &name) const;

  bool SetInputTensorData(const std::string &name, const float *p,
                          int32_t n) const;

  bool SetInputTensorData(const std::string &name, const int32_t *p,
                          int32_t n) const;

  std::vector<float> GetOutputTensorData(const std::string &name) const;

  bool Run() const;
  bool IsInitialized() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_QNN_QNN_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/qnn/utils.cc
================================================
// sherpa-onnx/csrc/qnn/utils.h
//
// Copyright (c)  2025  Xiaomi Corporation
#include "sherpa-onnx/csrc/qnn/utils.h"

#include <math.h>
#include <stdio.h>

#include <algorithm>
#include <functional>
#include <numeric>
#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/qnn/macros.h"

#define SHERPA_ONNX_TO_STRING(s) \
  case s:                        \
    return #s

std::string TensorTypeToString(Qnn_TensorType_t t) {
  switch (t) {
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_APP_WRITE);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_APP_READ);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_APP_READWRITE);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_NATIVE);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_STATIC);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_NULL);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_UPDATEABLE_STATIC);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_UPDATEABLE_NATIVE);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_UPDATEABLE_APP_WRITE);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_UPDATEABLE_APP_READ);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_UPDATEABLE_APP_READWRITE);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_OPTIONAL_APP_WRITE);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_OPTIONAL_APP_READ);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_OPTIONAL_APP_READWRITE);
    SHERPA_ONNX_TO_STRING(QNN_TENSOR_TYPE_UNDEFINED);
  }
  return "Unknown";
}

std::string QuantizationEncodingToString(Qnn_QuantizationEncoding_t q) {
  switch (q) {
    SHERPA_ONNX_TO_STRING(QNN_QUANTIZATION_ENCODING_SCALE_OFFSET);
    SHERPA_ONNX_TO_STRING(QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET);
    SHERPA_ONNX_TO_STRING(QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET);
    SHERPA_ONNX_TO_STRING(QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET);
    SHERPA_ONNX_TO_STRING(QNN_QUANTIZATION_ENCODING_BLOCK);
    SHERPA_ONNX_TO_STRING(QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION);
    SHERPA_ONNX_TO_STRING(QNN_QUANTIZATION_ENCODING_VECTOR);
    SHERPA_ONNX_TO_STRING(QNN_QUANTIZATION_ENCODING_UNDEFINED);
  }
  return "Unknown";
}

std::string TensorDataTypeToString(Qnn_DataType_t t) {
  switch (t) {
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_INT_8);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_INT_16);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_INT_32);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_INT_64);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UINT_8);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UINT_16);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UINT_32);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UINT_64);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_FLOAT_16);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_FLOAT_32);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_FLOAT_64);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_SFIXED_POINT_4);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_SFIXED_POINT_8);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_SFIXED_POINT_16);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_SFIXED_POINT_32);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UFIXED_POINT_4);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UFIXED_POINT_8);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UFIXED_POINT_16);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UFIXED_POINT_32);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_BOOL_8);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_STRING);
    SHERPA_ONNX_TO_STRING(QNN_DATATYPE_UNDEFINED);
  }
  return "unknown";
}

std::string TensorMemTypeToString(Qnn_TensorMemType_t t) {
  switch (t) {
    SHERPA_ONNX_TO_STRING(QNN_TENSORMEMTYPE_RAW);
    SHERPA_ONNX_TO_STRING(QNN_TENSORMEMTYPE_MEMHANDLE);
    SHERPA_ONNX_TO_STRING(QNN_TENSORMEMTYPE_RETRIEVE_RAW);
    SHERPA_ONNX_TO_STRING(QNN_TENSORMEMTYPE_UNDEFINED);
  }
  return "Unknown";
}

#undef SHERPA_ONNX_TO_STRING

// quantized = float / scale - offset;
void FillData(Qnn_Tensor_t *t, const float *data, int32_t n) {
  float scale = t->v1.quantizeParams.scaleOffsetEncoding.scale;
  int32_t offset = t->v1.quantizeParams.scaleOffsetEncoding.offset;

  size_t bit_width = 16;
  double true_bit_width_max = pow(2, bit_width) - 1;
  double encoding_min = offset * scale;
  double encoding_max = (true_bit_width_max + offset) * scale;
  double encoding_range = encoding_max - encoding_min;

  uint16_t *out = reinterpret_cast<uint16_t *>(t->v1.clientBuf.data);

  for (size_t i = 0; i < n; ++i) {
    int32_t quantized_value =
        round(true_bit_width_max * (data[i] - encoding_min) / encoding_range);

    if (quantized_value < 0) {
      quantized_value = 0;
    } else if (quantized_value > static_cast<int32_t>(true_bit_width_max)) {
      quantized_value = static_cast<int32_t>(true_bit_width_max);
    }
    out[i] = static_cast<uint16_t>(quantized_value);
  }
}

void FillData(Qnn_Tensor_t *t, const int32_t *data, int32_t n) {
  int32_t *out = reinterpret_cast<int32_t *>(t->v1.clientBuf.data);
  std::copy(data, data + n, out);
}

void GetData(const Qnn_Tensor_t *t, float *data, int32_t n) {
  double scale = t->v1.quantizeParams.scaleOffsetEncoding.scale;
  double offset = t->v1.quantizeParams.scaleOffsetEncoding.offset;

  const uint16_t *p = reinterpret_cast<const uint16_t *>(t->v1.clientBuf.data);
  for (int32_t i = 0; i < n; ++i) {
    double quantizedValue = static_cast<double>(p[i]);
    data[i] = (quantizedValue + offset) * scale;
  }
}

static void FreeTensorV1(Qnn_Tensor_t *t) {
  free(const_cast<char *>(t->v1.name));

  delete[] t->v1.dimensions;
}

static void FreeTensorV2(Qnn_Tensor_t *t) {
  free(const_cast<char *>(t->v2.name));

  delete[] t->v2.dimensions;
  delete[] t->v2.isDynamicDimensions;
}

void FreeTensor(Qnn_Tensor_t *t) {
  if (t->version == QNN_TENSOR_VERSION_1) {
    FreeTensorV1(t);
  } else if (t->version == QNN_TENSOR_VERSION_2) {
    FreeTensorV2(t);
  } else {
    SHERPA_ONNX_LOGE("Unknown tensor version: %d", t->version);
  }
}

uint32_t GetSizeInBytes(const uint32_t *dimensions, uint32_t n,
                        Qnn_DataType_t type) {
  if (n == 0) {
    return 0;
  }

  auto count = std::accumulate(dimensions, dimensions + n, 1,
                               std::multiplies<uint32_t>());

  uint32_t b = 1;
  switch (type) {
    case QNN_DATATYPE_INT_8:
      b = 1;
      break;
    case QNN_DATATYPE_INT_16:
      b = 2;
      break;
    case QNN_DATATYPE_INT_32:
      b = 4;
      break;
    case QNN_DATATYPE_INT_64:
      b = 8;
      break;
    case QNN_DATATYPE_UINT_8:
      b = 1;
      break;
    case QNN_DATATYPE_UINT_16:
      b = 2;
      break;
    case QNN_DATATYPE_UINT_32:
      b = 4;
      break;
    case QNN_DATATYPE_UINT_64:
      b = 8;
      break;
    case QNN_DATATYPE_FLOAT_16:
      b = 2;
      break;
    case QNN_DATATYPE_FLOAT_32:
      b = 4;
      break;
    case QNN_DATATYPE_FLOAT_64:
      b = 8;
      break;
    case QNN_DATATYPE_SFIXED_POINT_8:
      b = 1;
      break;
    case QNN_DATATYPE_SFIXED_POINT_16:
      b = 2;
      break;
    case QNN_DATATYPE_SFIXED_POINT_32:
      b = 4;
      break;
    case QNN_DATATYPE_UFIXED_POINT_8:
      b = 1;
      break;
    case QNN_DATATYPE_UFIXED_POINT_16:
      b = 2;
      break;
    case QNN_DATATYPE_UFIXED_POINT_32:
      b = 4;
      break;
    case QNN_DATATYPE_BOOL_8:
      b = 1;
      break;
    default:
      SHERPA_ONNX_LOGE("Unsupported data type: %s",
                       TensorDataTypeToString(type).c_str());
      break;
  }

  return count * b;
}

template <typename T>
void CopyDimensions(const T *src, uint32_t n, T **dst) {
  if (!src || n == 0) {
    *dst = nullptr;
    return;
  }

  *dst = new T[n];
  std::copy(src, src + n, *dst);
}

static void CopyQuantizeParams(const Qnn_QuantizeParams_t &src,
                               Qnn_QuantizeParams_t &dst) {  // NOLINT
  dst.encodingDefinition = src.encodingDefinition;
  dst.quantizationEncoding = src.quantizationEncoding;

  switch (src.quantizationEncoding) {
    case QNN_QUANTIZATION_ENCODING_SCALE_OFFSET:
      dst.scaleOffsetEncoding = src.scaleOffsetEncoding;
      break;
    case QNN_QUANTIZATION_ENCODING_UNDEFINED:
      // do nothing in this case
      break;
    default:
      SHERPA_ONNX_LOGE(
          "Unsupported quantizationEncoding: %s",
          QuantizationEncodingToString(src.quantizationEncoding).c_str());
  }
}

static void CopyTensorInfoV1(const Qnn_Tensor_t &src,
                             Qnn_Tensor_t &dst) {  // NOLINT
  dst.version = src.version;
  dst.v1.id = src.v1.id;
  if (src.v1.name) {
    dst.v1.name = strdup(src.v1.name);
  } else {
    dst.v1.name = strdup("");
  }

  dst.v1.type = src.v1.type;
  dst.v1.dataFormat = src.v1.dataFormat;
  dst.v1.dataType = src.v1.dataType;

  CopyQuantizeParams(src.v1.quantizeParams, dst.v1.quantizeParams);

  dst.v1.rank = src.v1.rank;

  CopyDimensions(src.v1.dimensions, src.v1.rank, &dst.v1.dimensions);

  dst.v1.memType = src.v1.memType;
  if (dst.v1.memType != QNN_TENSORMEMTYPE_RAW) {
    SHERPA_ONNX_LOGE("Unsupported mem type: %s",
                     TensorMemTypeToString(dst.v1.memType).c_str());
  } else {
    dst.v1.clientBuf.data = nullptr;
    dst.v1.clientBuf.dataSize =
        GetSizeInBytes(dst.v1.dimensions, dst.v1.rank, dst.v1.dataType);
  }
}

static void CopyTensorInfoV2(const Qnn_Tensor_t &src,
                             Qnn_Tensor_t &dst) {  // NOLINT
  dst.version = src.version;
  dst.v2.id = src.v2.id;
  if (src.v2.name) {
    dst.v2.name = strdup(src.v2.name);
  } else {
    dst.v2.name = strdup("");
  }

  dst.v2.type = src.v2.type;
  dst.v2.dataFormat = src.v2.dataFormat;
  dst.v2.dataType = src.v2.dataType;

  CopyQuantizeParams(src.v2.quantizeParams, dst.v2.quantizeParams);

  dst.v2.rank = src.v2.rank;

  CopyDimensions(src.v2.dimensions, src.v2.rank, &dst.v2.dimensions);

  dst.v2.memType = src.v2.memType;
  if (dst.v2.memType != QNN_TENSORMEMTYPE_RAW) {
    SHERPA_ONNX_LOGE("Unsupported mem type: %s",
                     TensorMemTypeToString(dst.v2.memType).c_str());
  } else {
    dst.v2.clientBuf.data = nullptr;
    dst.v2.clientBuf.dataSize =
        GetSizeInBytes(dst.v2.dimensions, dst.v2.rank, dst.v2.dataType);
  }

  CopyDimensions(src.v2.isDynamicDimensions, src.v2.rank,
                 &dst.v2.isDynamicDimensions);

  dst.v2.sparseParams.type = src.v2.sparseParams.type;
  dst.v2.sparseParams.hybridCoo.numSpecifiedElements =
      src.v2.sparseParams.hybridCoo.numSpecifiedElements;
  dst.v2.sparseParams.hybridCoo.numSparseDimensions =
      src.v2.sparseParams.hybridCoo.numSparseDimensions;
  dst.v2.isProduced = src.v2.isProduced;
}

void CopyTensorInfo(const Qnn_Tensor_t &src, Qnn_Tensor_t &dst) {  // NOLINT
  if (src.version == QNN_TENSOR_VERSION_1) {
    CopyTensorInfoV1(src, dst);
  } else if (src.version == QNN_TENSOR_VERSION_2) {
    CopyTensorInfoV2(src, dst);
  } else {
    SHERPA_ONNX_LOGE("Unknown tensor version: %d", dst.version);
  }
}

void LogCallback(const char *fmt, QnnLog_Level_t level, uint64_t timestamp,
                 va_list args) {
  std::string s;
  switch (level) {
    case QNN_LOG_LEVEL_ERROR:
      s = "ERROR";
      break;
    case QNN_LOG_LEVEL_WARN:
      s = "WARN";
      break;
    case QNN_LOG_LEVEL_INFO:
      s = "INFO";
      break;
    case QNN_LOG_LEVEL_DEBUG:
      s = "DEBUG";
      break;
    case QNN_LOG_LEVEL_VERBOSE:
      s = "VERBOSE";
      break;
    case QNN_LOG_LEVEL_MAX:
      s = "UNKNOWN";
      break;
  }

  double ms = timestamp / 1000000.0;
  fprintf(stdout, "%8.1fms [%-7s] ", ms, s.c_str());
  vfprintf(stdout, fmt, args);
}

void PrintTensor(Qnn_TensorV2_t t) {
  std::ostringstream os;
  os << "  id: " << t.id << "\n";
  os << "  name: " << t.name << "\n";
  os << "  type: " << TensorTypeToString(t.type) << "\n";
  os << "  data format: " << t.dataFormat << "\n";
  os << "  data type: " << TensorDataTypeToString(t.dataType) << "\n";
  os << "  quantize info: \n";
  auto qp = t.quantizeParams;
  os << "    encodingDefinition: " << std::hex << "0x" << qp.encodingDefinition
     << std::dec << "\n";
  os << "    quantizationEncoding: "
     << QuantizationEncodingToString(qp.quantizationEncoding) << "\n";
  if (qp.quantizationEncoding == QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
    Qnn_ScaleOffset_t s = qp.scaleOffsetEncoding;
    os << "     scale: " << s.scale << "\n";
    os << "     offset: " << s.offset << "\n";
  }
  os << "  rank: " << t.rank << "\n";
  os << "  dimensions: ";
  for (int32_t i = 0; i < t.rank; ++i) {
    os << t.dimensions[i] << ", ";
    if (i + 1 == t.rank) {
      os << "\n";
    }
  }
  os << "  memType: " << TensorMemTypeToString(t.memType) << "\n";
  if (t.memType == QNN_TENSORMEMTYPE_RAW) {
    os << " memType raw data size: " << t.clientBuf.dataSize << "\n";
  }
  os << "  isDynamicDimensions: "
     << ((t.isDynamicDimensions != nullptr) ? "True" : "False") << "\n";
  os << "  isProduced: " << static_cast<int32_t>(t.isProduced) << "\n";

  SHERPA_ONNX_LOGE("%s", os.str().c_str());
}

static bool CopyGraphsInfoV3(const QnnSystemContext_GraphInfoV3_t *src,
                             GraphInfo *dst) {
  if (src->graphName) {
    dst->graph_name = strdup(src->graphName);
  } else {
    dst->graph_name = strdup("");
  }

  dst->input_tensors = nullptr;
  dst->num_input_tensors = 0;

  if (src->graphInputs) {
    dst->input_tensors = reinterpret_cast<Qnn_Tensor_t *>(
        calloc(src->numGraphInputs, sizeof(Qnn_Tensor_t)));

    for (uint32_t i = 0; i < src->numGraphInputs; ++i) {
      dst->input_tensors[i] = QNN_TENSOR_INIT;

      CopyTensorInfo(src->graphInputs[i], dst->input_tensors[i]);
    }

    dst->num_input_tensors = src->numGraphInputs;
  }

  dst->output_tensors = nullptr;
  dst->num_output_tensors = 0;

  if (src->graphOutputs) {
    dst->output_tensors = reinterpret_cast<Qnn_Tensor_t *>(
        calloc(src->numGraphOutputs, sizeof(Qnn_Tensor_t)));

    for (uint32_t i = 0; i < src->numGraphOutputs; ++i) {
      dst->output_tensors[i] = QNN_TENSOR_INIT;

      CopyTensorInfo(src->graphOutputs[i], dst->output_tensors[i]);
    }

    dst->num_output_tensors = src->numGraphOutputs;
  }

  return true;
}

static bool CopyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphs_input,
                           uint32_t num_graphs,
                           GraphInfo **&graphs_info) {  // NOLINT
  if (num_graphs == 0) {
    SHERPA_ONNX_LOGE("empty graphs");
    graphs_info = nullptr;
    return false;
  }

  SHERPA_ONNX_LOGE("version: %d", (int)graphs_input[0].version);

  // remember to free graphs_info
  graphs_info =
      reinterpret_cast<GraphInfo **>(calloc(num_graphs, sizeof(GraphInfo *)));

  GraphInfo *graph_info_arr =
      reinterpret_cast<GraphInfo *>(calloc(num_graphs, sizeof(GraphInfo)));

  if (!graphs_info || !graph_info_arr) {
    SHERPA_ONNX_LOGE("Failure to allocate memory for *graphInfo");
    return false;
  }

  for (uint32_t i = 0; i < num_graphs; ++i) {
    switch (graphs_input[i].version) {
      case QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1:
        SHERPA_ONNX_LOGE("Unsupported version: %d",
                         static_cast<int32_t>(graphs_input[i].version));
        return false;

      case QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_2:
        SHERPA_ONNX_LOGE("Unsupported version: %d",
                         static_cast<int32_t>(graphs_input[i].version));
        return false;

      case QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3: {
        bool ok =
            CopyGraphsInfoV3(&graphs_input[i].graphInfoV3, &graph_info_arr[i]);
        if (!ok) {
          SHERPA_ONNX_LOGE("Failed to copy graphs info v3");
        }
        graphs_info[i] = graph_info_arr + i;

        break;
      }

      default:
        SHERPA_ONNX_LOGE("Unsupported version: %d",
                         static_cast<int32_t>(graphs_input[i].version));
        return false;
    }
  }

  return true;
}

bool CopyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *binary_info,
                              GraphInfo **&graphs_info,  // NOLINT
                              uint32_t &graphs_count) {  // NOLINT
  graphs_count = 0;

  switch (binary_info->version) {
    case QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1: {
      SHERPA_ONNX_LOGE("Unsupported binary context version: %d",
                       binary_info->version);
      return false;
    }
    case QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2: {
      SHERPA_ONNX_LOGE("Unsupported binary context version: %d",
                       binary_info->version);
      return false;
    }
    case QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3: {
      bool ok = CopyGraphsInfo(binary_info->contextBinaryInfoV3.graphs,
                               binary_info->contextBinaryInfoV3.numGraphs,
                               graphs_info);

      if (!ok) {
        SHERPA_ONNX_LOGE("Failed while copying graphs Info v3.");
        return false;
      }
      graphs_count = binary_info->contextBinaryInfoV3.numGraphs;
      return true;
    }
    default: {
      SHERPA_ONNX_LOGE("Unsupported binary context version: %d",
                       binary_info->version);
      return false;
    }
  }
}


================================================
FILE: sherpa-onnx/csrc/qnn/utils.h
================================================
// sherpa-onnx/csrc/qnn/utils.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_QNN_UTILS_H_
#define SHERPA_ONNX_CSRC_QNN_UTILS_H_
#include <stdio.h>

#include <cstdint>
#include <memory>
#include <string>
#include <vector>

#include "QnnInterface.h"
#include "System/QnnSystemInterface.h"
#include "sherpa-onnx/csrc/macros.h"

template <typename T>
std::vector<T> ReadFile(const std::string &filename) {
  FILE *fp = fopen(filename.c_str(), "rb");
  if (!fp) {
    SHERPA_ONNX_LOGE("Failed to open '%s'", filename.c_str());
    return {};
  }

  fseek(fp, 0, SEEK_END);
  int32_t n = ftell(fp);
  fseek(fp, 0, SEEK_SET);

  std::vector<T> ans(n / sizeof(T));
  fread(ans.data(), sizeof(T), ans.size(), fp);
  fclose(fp);

  return ans;
}

void PrintTensor(Qnn_TensorV2_t t);

// float -> uint16_t
void FillData(Qnn_Tensor_t *t, const float *data, int32_t n);

// int32_t -> int32_t
void FillData(Qnn_Tensor_t *t, const int32_t *data, int32_t n);

// uint16_t -> float
void GetData(const Qnn_Tensor_t *t, float *data, int32_t n);

void FreeTensor(Qnn_Tensor_t *t);

using TensorPtr = std::unique_ptr<Qnn_Tensor_t, decltype(&FreeTensor)>;

void CopyTensorInfo(const Qnn_Tensor_t &src, Qnn_Tensor_t &dst);  // NOLINT

std::string QuantizationEncodingToString(Qnn_QuantizationEncoding_t q);

std::string TensorDataTypeToString(Qnn_DataType_t t);

using QnnInterfaceGetProvidersFnType = Qnn_ErrorHandle_t (*)(
    const QnnInterface_t ***provider_list, uint32_t *num_providers);

using QnnSystemInterfaceGetProvidersFnType = Qnn_ErrorHandle_t (*)(
    const QnnSystemInterface_t ***provider_list, uint32_t *num_providers);

struct GraphInfo {
  Qnn_GraphHandle_t graph;
  char *graph_name;
  Qnn_Tensor_t *input_tensors;
  uint32_t num_input_tensors;
  Qnn_Tensor_t *output_tensors;
  uint32_t num_output_tensors;
};

struct GraphConfigInfo {
  char *graph_name;
  const QnnGraph_Config_t **graph_configs;
};

using ComposeGraphsFnHandleType = Qnn_ErrorHandle_t (*)(
    Qnn_BackendHandle_t backend_handle, QNN_INTERFACE_VER_TYPE interface,
    Qnn_ContextHandle_t context_handle,
    const GraphConfigInfo **graphs_config_info,
    const uint32_t num_graphs_config_info, GraphInfo ***graphs_info,
    uint32_t *num_graphs_info, bool debug, QnnLog_Callback_t logCallback,
    QnnLog_Level_t max_log_level);

using FreeGraphInfoFnHandleType =
    Qnn_ErrorHandle_t (*)(GraphInfo ***, uint32_t num_graphs_info);

void LogCallback(const char *fmt, QnnLog_Level_t level, uint64_t timestamp,
                 va_list args);

bool CopyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *binary_info,
                              GraphInfo **&graphs_info,  // NOLINT
                              uint32_t &graphs_count);   // NOLINT
#endif  // SHERPA_ONNX_CSRC_QNN_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/qnn-config.cc
================================================
// sherpa-onnx/csrc/qnn-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/qnn-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void QnnConfig::Register(ParseOptions *po) {
  po->Register("qnn-backend-lib", &backend_lib,
               "Path to libQnnHtp.so "
               "Used only when provider is qnn."
               "Leave it empty if you don't use qnn");

  po->Register(
      "qnn-context-binary", &context_binary,
      "Path to model.bin. Used only when provider is qnn."
      "If it exists, libmodel.so is ignored."
      "If it does not exist, Context binary is saved to this path so that "
      "it is loaded the next time you run it. You can leave it empty if you "
      "don't use qnn");

  po->Register("qnn-system-lib", &system_lib,
               "Required and used only when --qnn-context-binary is not empty "
               "and exists. You can leave it empty if you don't use qnn.");
}

bool QnnConfig::Validate() const {
  if (backend_lib.empty()) {
    SHERPA_ONNX_LOGE("Please provide path to libQnnHtp.so if you use qnn");
    return false;
  }

  // we don't check whether backend_lib and system_lib exist or not since
  // dlopen() will find them by searching predefined paths

  if (!context_binary.empty() && FileExists(context_binary)) {
    if (system_lib.empty()) {
      SHERPA_ONNX_LOGE(
          "Please provide --qnn-system-lib when you provide "
          "--qnn-context-binary");
      return false;
    }
  }

  return true;
}

std::string QnnConfig::ToString() const {
  std::ostringstream os;

  os << "QnnConfig(";
  os << "backend_lib=\"" << backend_lib << "\", ";
  os << "context_binary=\"" << context_binary << "\", ";
  os << "system_lib=\"" << system_lib << "\")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/qnn-config.h
================================================
// sherpa-onnx/csrc/qnn-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_QNN_CONFIG_H_
#define SHERPA_ONNX_CSRC_QNN_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct QnnConfig {
  // Path to the backend library, e.g.,
  // /some/path/to/libQnnHtp.so
  std::string backend_lib;

  // If it exists, you need to also provide system_lib.
  // In this case, the model lib, i.e., libmodel.so, is ignored
  //
  // If it does not exist and if the user want to save the context binary,
  // it will save it to this path.
  std::string context_binary;

  // Required and used only when context_binary exists
  // Example value: /some/path/to/libQnnSystem.so
  std::string system_lib;

  std::string ToString() const;

  void Register(ParseOptions *po);

  bool Validate() const;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_QNN_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/regex-lang-test.cc
================================================
// sherpa-onnx/csrc/regex-lang-test.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include <iostream>
#include <regex>  // NOLINT
#include <string>
#include <vector>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/text-utils.cc"

namespace sherpa_onnx {

static void TestLang(const std::string &expr, const std::string &text,
                     const std::vector<std::string> &expected) {
  auto ws = ToWideString(text);
  std::wstring wexpr = ToWideString(expr);
  std::wregex we(wexpr);

  auto begin = std::wsregex_iterator(ws.begin(), ws.end(), we);
  auto end = std::wsregex_iterator();
  int32_t k = 0;
  for (std::wsregex_iterator i = begin; i != end; ++i) {
    std::wsmatch match = *i;
    std::wstring match_str = match.str();
    auto ms = ToString(match_str);
    std::cout << ms << "\n";
    EXPECT_EQ(ms, expected[k]);
    k++;
  }
  EXPECT_EQ(k, expected.size());
}

TEST(German, Case1) {
  std::cout << "----------Test German----------";
  // see https://character-table.netlify.app/german/
  std::string expr =
      "([\\u0020-\\u005f\\u0061-"
      "\\u007d\\u00a0\\u00a7\\u00a9\\u00ab\\u00bb\\u00c4\\u00d6\\u00dc\\u00df\\"
      "u00e4\\u00f6\\u00fc\\u2010-\\u2011\\u2013-"
      "\\u2014\\u2018\\u201a\\u201c\\u201e\\u2026\\u2030\\u20ac]+)";

  std::string text =
      "开始Übeltäter übergibt Ärzten 中间öfters äußerst ätzende Öle结束3€";

  std::vector<std::string> expected = {"Übeltäter übergibt Ärzten ",
                                       "öfters äußerst ätzende Öle", "3€"};

  TestLang(expr, text, expected);
}

TEST(French, Case1) {
  std::string expr =
      "([\\u0020-\\u005f\\u0061-"
      "\\u007a\\u007c\\u00a0\\u00a7\\u00a9\\u00ab\\u00b2-"
      "\\u00b3\\u00bb\\u00c0\\u00c2\\u00c6-\\u00cb\\u00ce-"
      "\\u00cf\\u00d4\\u00d9\\u00db-\\u00dc\\u00e0\\u00e2\\u00e6-"
      "\\u00eb\\u00ee-\\u00ef\\u00f4\\u00f9\\u00fb-\\u00fc\\u00ff\\u0152-"
      "\\u0153\\u0178\\u02b3\\u02e2\\u1d48-\\u1d49\\u2010-\\u2011\\u2013-"
      "\\u2014\\u2019\\u201c-\\u201d\\u2020-\\u2021\\u2026\\u202f-"
      "\\u2030\\u20ac\\u2212]+)";
  std::string text =
      "L'été, 一avec son ciel bleuâtre, 二est un moment où, 三Noël, maçon";
  std::vector<std::string> expected = {
      "L'été, ",
      "avec son ciel bleuâtre, ",
      "est un moment où, ",
      "Noël, maçon",
  };
  TestLang(expr, text, expected);
}

TEST(English, Case1) {
  // https://character-table.netlify.app/english/
  std::string expr =
      "([\\u0020-\\u005f\\u0061-\\u007a\\u007c\\u00a0\\u00a7\\u00a9\\u2010-"
      "\\u2011\\u2013-\\u2014\\u2018-\\u2019\\u201c-\\u201d\\u2020-"
      "\\u2021\\u2026\\u2030\\u2032-\\u2033\\u20ac]+)";
  std::string text = "一how are you doing? 二Thank you!";

  std::vector<std::string> expected = {
      "how are you doing? ",
      "Thank you!",
  };
  TestLang(expr, text, expected);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/resample.cc
================================================
/**
 * Copyright     2013  Pegah Ghahremani
 *               2014  IMSL, PKU-HKUST (author: Wei Shi)
 *               2014  Yanqing Sun, Junjie Wang
 *               2014  Johns Hopkins University (author: Daniel Povey)
 * Copyright     2023  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
// this file is copied and modified from
// kaldi/src/feat/resample.cc

#include "sherpa-onnx/csrc/resample.h"

#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <type_traits>
#include <vector>

#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif

#ifndef M_PI
#define M_PI 3.1415926535897932384626433832795
#endif

namespace sherpa_onnx {

template <class I>
static I Gcd(I m, I n) {
  // this function is copied from kaldi/src/base/kaldi-math.h
  if (m == 0 || n == 0) {
    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
      fprintf(stderr, "Undefined GCD since m = 0, n = 0.\n");
      exit(-1);
    }
    return (m == 0 ? (n > 0 ? n : -n) : (m > 0 ? m : -m));
    // return absolute value of whichever is nonzero
  }
  // could use compile-time assertion
  // but involves messing with complex template stuff.
  static_assert(std::is_integral_v<I>);
  while (true) {
    m %= n;
    if (m == 0) return (n > 0 ? n : -n);
    n %= m;
    if (n == 0) return (m > 0 ? m : -m);
  }
}

/// Returns the least common multiple of two integers.  Will
/// crash unless the inputs are positive.
template <class I>
static I Lcm(I m, I n) {
  // This function is copied from kaldi/src/base/kaldi-math.h
  assert(m > 0 && n > 0);
  I gcd = Gcd(m, n);
  return gcd * (m / gcd) * (n / gcd);
}

static float DotProduct(const float *a, const float *b, int32_t n) {
  float sum = 0;
  for (int32_t i = 0; i != n; ++i) {
    sum += a[i] * b[i];
  }
  return sum;
}

LinearResample::LinearResample(int32_t samp_rate_in_hz,
                               int32_t samp_rate_out_hz, float filter_cutoff_hz,
                               int32_t num_zeros)
    : samp_rate_in_(samp_rate_in_hz),
      samp_rate_out_(samp_rate_out_hz),
      filter_cutoff_(filter_cutoff_hz),
      num_zeros_(num_zeros) {
  assert(samp_rate_in_hz > 0.0 && samp_rate_out_hz > 0.0 &&
         filter_cutoff_hz > 0.0 && filter_cutoff_hz * 2 <= samp_rate_in_hz &&
         filter_cutoff_hz * 2 <= samp_rate_out_hz && num_zeros > 0);

  // base_freq is the frequency of the repeating unit, which is the gcd
  // of the input frequencies.
  int32_t base_freq = Gcd(samp_rate_in_, samp_rate_out_);
  input_samples_in_unit_ = samp_rate_in_ / base_freq;
  output_samples_in_unit_ = samp_rate_out_ / base_freq;

  SetIndexesAndWeights();
  Reset();
}

void LinearResample::SetIndexesAndWeights() {
  first_index_.resize(output_samples_in_unit_);
  weights_.resize(output_samples_in_unit_);

  double window_width = num_zeros_ / (2.0 * filter_cutoff_);

  for (int32_t i = 0; i < output_samples_in_unit_; i++) {
    double output_t = i / static_cast<double>(samp_rate_out_);
    double min_t = output_t - window_width, max_t = output_t + window_width;
    // we do ceil on the min and floor on the max, because if we did it
    // the other way around we would unnecessarily include indexes just
    // outside the window, with zero coefficients.  It's possible
    // if the arguments to the ceil and floor expressions are integers
    // (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
    // that we unnecessarily include something with a zero coefficient,
    // but this is only a slight efficiency issue.
    int32_t min_input_index = ceil(min_t * samp_rate_in_),
            max_input_index = floor(max_t * samp_rate_in_),
            num_indices = max_input_index - min_input_index + 1;
    first_index_[i] = min_input_index;
    weights_[i].resize(num_indices);
    for (int32_t j = 0; j < num_indices; j++) {
      int32_t input_index = min_input_index + j;
      double input_t = input_index / static_cast<double>(samp_rate_in_),
             delta_t = input_t - output_t;
      // sign of delta_t doesn't matter.
      weights_[i][j] = FilterFunc(delta_t) / samp_rate_in_;
    }
  }
}

/** Here, t is a time in seconds representing an offset from
    the center of the windowed filter function, and FilterFunction(t)
    returns the windowed filter function, described
    in the header as h(t) = f(t)g(t), evaluated at t.
*/
float LinearResample::FilterFunc(float t) const {
  float window = 0,  // raised-cosine (Hanning) window of width
                     // num_zeros_/2*filter_cutoff_
      filter = 0;    // sinc filter function
  if (std::fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
    window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
  else
    window = 0.0;  // outside support of window function
  if (t != 0)
    filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
  else
    filter = 2 * filter_cutoff_;  // limit of the function at t = 0
  return filter * window;
}

void LinearResample::Reset() {
  input_sample_offset_ = 0;
  output_sample_offset_ = 0;
  input_remainder_.resize(0);
}

void LinearResample::Resample(const float *input, int32_t input_dim, bool flush,
                              std::vector<float> *output) {
  int64_t tot_input_samp = input_sample_offset_ + input_dim,
          tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);

  assert(tot_output_samp >= output_sample_offset_);

  output->resize(tot_output_samp - output_sample_offset_);

  // samp_out is the index into the total output signal, not just the part
  // of it we are producing here.
  for (int64_t samp_out = output_sample_offset_; samp_out < tot_output_samp;
       samp_out++) {
    int64_t first_samp_in = 0;
    int32_t samp_out_wrapped = 0;
    GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
    const std::vector<float> &weights = weights_[samp_out_wrapped];
    // first_input_index is the first index into "input" that we have a weight
    // for.
    int32_t first_input_index =
        static_cast<int32_t>(first_samp_in - input_sample_offset_);
    float this_output = 0;
    if (first_input_index >= 0 &&
        first_input_index + static_cast<int32_t>(weights.size()) <= input_dim) {
      this_output =
          DotProduct(input + first_input_index, weights.data(), weights.size());
    } else {  // Handle edge cases.
      this_output = 0.0;
      for (int32_t i = 0; i < static_cast<int32_t>(weights.size()); i++) {
        float weight = weights[i];
        int32_t input_index = first_input_index + i;
        if (input_index < 0 &&
            static_cast<int32_t>(input_remainder_.size()) + input_index >= 0) {
          this_output +=
              weight * input_remainder_[input_remainder_.size() + input_index];
        } else if (input_index >= 0 && input_index < input_dim) {
          this_output += weight * input[input_index];
        } else if (input_index >= input_dim) {
          // We're past the end of the input and are adding zero; should only
          // happen if the user specified flush == true, or else we would not
          // be trying to output this sample.
          assert(flush);
        }
      }
    }
    int32_t output_index =
        static_cast<int32_t>(samp_out - output_sample_offset_);
    (*output)[output_index] = this_output;
  }

  if (flush) {
    Reset();  // Reset the internal state.
  } else {
    SetRemainder(input, input_dim);
    input_sample_offset_ = tot_input_samp;
    output_sample_offset_ = tot_output_samp;
  }
}

int64_t LinearResample::GetNumOutputSamples(int64_t input_num_samp,
                                            bool flush) const {
  // For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
  // where tick_freq is the least common multiple of samp_rate_in_ and
  // samp_rate_out_.
  int32_t tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
  int32_t ticks_per_input_period = tick_freq / samp_rate_in_;

  // work out the number of ticks in the time interval
  // [ 0, input_num_samp/samp_rate_in_ ).
  int64_t interval_length_in_ticks = input_num_samp * ticks_per_input_period;
  if (!flush) {
    float window_width = num_zeros_ / (2.0 * filter_cutoff_);
    // To count the window-width in ticks we take the floor.  This
    // is because since we're looking for the largest integer num-out-samp
    // that fits in the interval, which is open on the right, a reduction
    // in interval length of less than a tick will never make a difference.
    // For example, the largest integer in the interval [ 0, 2 ) and the
    // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
    // So when we're subtracting the window-width we can ignore the fractional
    // part.
    int32_t window_width_ticks = std::floor(window_width * tick_freq);
    // The time-period of the output that we can sample gets reduced
    // by the window-width (which is actually the distance from the
    // center to the edge of the windowing function) if we're not
    // "flushing the output".
    interval_length_in_ticks -= window_width_ticks;
  }
  if (interval_length_in_ticks <= 0) return 0;

  int32_t ticks_per_output_period = tick_freq / samp_rate_out_;
  // Get the last output-sample in the closed interval, i.e. replacing [ ) with
  // [ ].  Note: integer division rounds down.  See
  // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
  // the notation.
  int64_t last_output_samp = interval_length_in_ticks / ticks_per_output_period;
  // We need the last output-sample in the open interval, so if it takes us to
  // the end of the interval exactly, subtract one.
  if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
    last_output_samp--;

  // First output-sample index is zero, so the number of output samples
  // is the last output-sample plus one.
  int64_t num_output_samp = last_output_samp + 1;
  return num_output_samp;
}

// inline
void LinearResample::GetIndexes(int64_t samp_out, int64_t *first_samp_in,
                                int32_t *samp_out_wrapped) const {
  // A unit is the smallest nonzero amount of time that is an exact
  // multiple of the input and output sample periods.  The unit index
  // is the answer to "which numbered unit we are in".
  int64_t unit_index = samp_out / output_samples_in_unit_;
  // samp_out_wrapped is equal to samp_out % output_samples_in_unit_
  *samp_out_wrapped =
      static_cast<int32_t>(samp_out - unit_index * output_samples_in_unit_);
  *first_samp_in =
      first_index_[*samp_out_wrapped] + unit_index * input_samples_in_unit_;
}

void LinearResample::SetRemainder(const float *input, int32_t input_dim) {
  std::vector<float> old_remainder(input_remainder_);
  // max_remainder_needed is the width of the filter from side to side,
  // measured in input samples.  you might think it should be half that,
  // but you have to consider that you might be wanting to output samples
  // that are "in the past" relative to the beginning of the latest
  // input... anyway, storing more remainder than needed is not harmful.
  int32_t max_remainder_needed =
      std::ceil(samp_rate_in_ * num_zeros_ / filter_cutoff_);
  input_remainder_.resize(max_remainder_needed);
  for (int32_t index = -static_cast<int32_t>(input_remainder_.size());
       index < 0; index++) {
    // we interpret "index" as an offset from the end of "input" and
    // from the end of input_remainder_.
    int32_t input_index = index + input_dim;
    if (input_index >= 0) {
      input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
          input[input_index];
    } else if (input_index + static_cast<int32_t>(old_remainder.size()) >= 0) {
      input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
          old_remainder[input_index +
                        static_cast<int32_t>(old_remainder.size())];
      // else leave it at zero.
    }
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/resample.h
================================================
/**
 * Copyright     2013  Pegah Ghahremani
 *               2014  IMSL, PKU-HKUST (author: Wei Shi)
 *               2014  Yanqing Sun, Junjie Wang
 *               2014  Johns Hopkins University (author: Daniel Povey)
 * Copyright     2023  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
// this file is copied and modified from
// kaldi/src/feat/resample.h
#ifndef SHERPA_ONNX_CSRC_RESAMPLE_H_
#define SHERPA_ONNX_CSRC_RESAMPLE_H_

#include <cstdint>
#include <vector>

namespace sherpa_onnx {

/*
   We require that the input and output sampling rate be specified as
   integers, as this is an easy way to specify that their ratio be rational.
*/

class LinearResample {
 public:
  /// Constructor.  We make the input and output sample rates integers, because
  /// we are going to need to find a common divisor.  This should just remind
  /// you that they need to be integers.  The filter cutoff needs to be less
  /// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2.  num_zeros
  /// controls the sharpness of the filter, more == sharper but less efficient.
  /// We suggest around 4 to 10 for normal use.
  LinearResample(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz,
                 float filter_cutoff_hz, int32_t num_zeros);

  /// Calling the function Reset() resets the state of the object prior to
  /// processing a new signal; it is only necessary if you have called
  /// Resample(x, x_size, false, y) for some signal, leading to a remainder of
  /// the signal being called, but then abandon processing the signal before
  /// calling Resample(x, x_size, true, y) for the last piece.  Call it
  /// unnecessarily between signals will not do any harm.
  void Reset();

  /// This function does the resampling.  If you call it with flush == true and
  /// you have never called it with flush == false, it just resamples the input
  /// signal (it resizes the output to a suitable number of samples).
  ///
  /// You can also use this function to process a signal a piece at a time.
  /// suppose you break it into piece1, piece2, ... pieceN.  You can call
  /// \code{.cc}
  /// Resample(piece1, piece1_size, false, &output1);
  /// Resample(piece2, piece2_size, false, &output2);
  /// Resample(piece3, piece3_size, true, &output3);
  /// \endcode
  /// If you call it with flush == false, it won't output the last few samples
  /// but will remember them, so that if you later give it a second piece of
  /// the input signal it can process it correctly.
  /// If your most recent call to the object was with flush == false, it will
  /// have internal state; you can remove this by calling Reset().
  /// Empty input is acceptable.
  void Resample(const float *input, int32_t input_dim, bool flush,
                std::vector<float> *output);

  //// Return the input and output sampling rates (for checks, for example)
  int32_t GetInputSamplingRate() const { return samp_rate_in_; }
  int32_t GetOutputSamplingRate() const { return samp_rate_out_; }

 private:
  void SetIndexesAndWeights();

  float FilterFunc(float) const;

  /// This function outputs the number of output samples we will output
  /// for a signal with "input_num_samp" input samples.  If flush == true,
  /// we return the largest n such that
  /// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ),
  /// and note that the interval is half-open.  If flush == false,
  /// define window_width as num_zeros / (2.0 * filter_cutoff_);
  /// we return the largest n such that (n/samp_rate_out_) is in the interval
  /// [ 0, input_num_samp/samp_rate_in_ - window_width ).
  int64_t GetNumOutputSamples(int64_t input_num_samp, bool flush) const;

  /// Given an output-sample index, this function outputs to *first_samp_in the
  /// first input-sample index that we have a weight on (may be negative),
  /// and to *samp_out_wrapped the index into weights_ where we can get the
  /// corresponding weights on the input.
  inline void GetIndexes(int64_t samp_out, int64_t *first_samp_in,
                         int32_t *samp_out_wrapped) const;

  void SetRemainder(const float *input, int32_t input_dim);

 private:
  // The following variables are provided by the user.
  int32_t samp_rate_in_;
  int32_t samp_rate_out_;
  float filter_cutoff_;
  int32_t num_zeros_;

  int32_t input_samples_in_unit_;  ///< The number of input samples in the
                                   ///< smallest repeating unit: num_samp_in_ =
                                   ///< samp_rate_in_hz / Gcd(samp_rate_in_hz,
                                   ///< samp_rate_out_hz)

  int32_t output_samples_in_unit_;  ///< The number of output samples in the
                                    ///< smallest repeating unit: num_samp_out_
                                    ///< = samp_rate_out_hz /
                                    ///< Gcd(samp_rate_in_hz, samp_rate_out_hz)

  /// The first input-sample index that we sum over, for this output-sample
  /// index.  May be negative; any truncation at the beginning is handled
  /// separately.  This is just for the first few output samples, but we can
  /// extrapolate the correct input-sample index for arbitrary output samples.
  std::vector<int32_t> first_index_;

  /// Weights on the input samples, for this output-sample index.
  std::vector<std::vector<float>> weights_;

  // the following variables keep track of where we are in a particular signal,
  // if it is being provided over multiple calls to Resample().

  int64_t input_sample_offset_ = 0;   ///< The number of input samples we have
                                      ///< already received for this signal
                                      ///< (including anything in remainder_)
  int64_t output_sample_offset_ = 0;  ///< The number of samples we have already
                                      ///< output for this signal.
  std::vector<float> input_remainder_;  ///< A small trailing part of the
                                        ///< previously seen input signal.
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RESAMPLE_H_


================================================
FILE: sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.cc
//
// Copyright      2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.h"

#include <condition_variable>
#include <mutex>
#include <queue>

#include "sherpa-onnx/csrc/rknn/macros.h"
#include "sherpa-onnx/csrc/rknn/utils.h"

namespace sherpa_onnx {

class ContextBlockingQueueRknn::Impl {
 public:
  Impl(rknn_context context, int32_t num_threads, int32_t capacity) {
    for (int32_t i = 0; i < capacity; ++i) {
      rknn_context bak = 0;
      auto ret = rknn_dup_context(&context, &bak);
      SHERPA_ONNX_RKNN_CHECK(ret, "Failed to duplicate context");

      SetCoreMask(bak, num_threads);
      queue_.push(bak);
    }
  }
  rknn_context Take() {
    std::unique_lock<std::mutex> lock(mutex_);

    cv_.wait(lock, [&] { return stopped_ || !queue_.empty(); });

    if (stopped_ && queue_.empty()) {
      return 0;
    }

    rknn_context ctx = queue_.front();
    queue_.pop();
    return ctx;
  }

  void Put(rknn_context ctx) {
    {
      std::lock_guard<std::mutex> lock(mutex_);
      if (stopped_) {
        rknn_destroy(ctx);
        return;
      }
      queue_.push(ctx);
    }
    cv_.notify_one();
  }

  ~Impl() {
    {
      std::lock_guard<std::mutex> lock(mutex_);
      stopped_ = true;
    }
    cv_.notify_all();
    Cleanup();
  }

 private:
  void Cleanup() {
    while (!queue_.empty()) {
      rknn_destroy(queue_.front());
      queue_.pop();
    }
  }

  std::queue<rknn_context> queue_;
  std::mutex mutex_;
  std::condition_variable cv_;
  bool stopped_ = false;
};

ContextBlockingQueueRknn::ContextBlockingQueueRknn(rknn_context context,
                                                   int32_t num_threads,
                                                   int32_t capacity /*= 10*/)
    : impl_(std::make_unique<Impl>(context, num_threads, capacity)) {}

ContextBlockingQueueRknn::~ContextBlockingQueueRknn() = default;

rknn_context ContextBlockingQueueRknn::Take() { return impl_->Take(); }

void ContextBlockingQueueRknn::Put(rknn_context context) {
  impl_->Put(context);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.h
================================================
// sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.h
//
// Copyright      2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_RKNN_CONTEXT_BLOCKING_QUEUE_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_CONTEXT_BLOCKING_QUEUE_RKNN_H_

#include <memory>

#include "rknn_api.h"  // NOLINT

namespace sherpa_onnx {

class ContextBlockingQueueRknn {
 public:
  ContextBlockingQueueRknn(rknn_context context, int32_t num_threads,
                           int32_t capacity = 10);
  ~ContextBlockingQueueRknn();

  rknn_context Take();
  void Put(rknn_context context);

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_CONTEXT_BLOCKING_QUEUE_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/keyword-spotter-transducer-rknn-impl.h
================================================
// sherpa-onnx/csrc/rknn/keyword-spotter-transducer-rknn-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_KEYWORD_SPOTTER_TRANSDUCER_RKNN_IMPL_H_
#define SHERPA_ONNX_CSRC_RKNN_KEYWORD_SPOTTER_TRANSDUCER_RKNN_IMPL_H_

#include <algorithm>
#include <memory>
#include <regex>  // NOLINT
#include <string>
#include <sstream>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/keyword-spotter-impl.h"
#include "sherpa-onnx/csrc/keyword-spotter.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/rknn/online-stream-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.h"
#include "sherpa-onnx/csrc/rknn/transducer-keyword-decoder-rknn.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/utils.h"

namespace sherpa_onnx {

KeywordResult Convert(const TransducerKeywordResult &src,
                      const SymbolTable &sym_table, float frame_shift_ms,
                      int32_t subsampling_factor, int32_t frames_since_start);

class KeywordSpotterTransducerRknnImpl : public KeywordSpotterImpl {
 public:
  explicit KeywordSpotterTransducerRknnImpl(const KeywordSpotterConfig &config)
      : config_(config),
        model_(std::make_unique<OnlineZipformerTransducerModelRknn>(
            config.model_config)) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      /// assuming tokens_buf and tokens are guaranteed not being both empty
      sym_ = SymbolTable(config.model_config.tokens, true);
    }

    if (sym_.Contains("<unk>")) {
      unk_id_ = sym_["<unk>"];
    }

    if (config.keywords_buf.empty()) {
      InitKeywords();
    } else {
      InitKeywordsFromBufStr();
    }

    decoder_ = std::make_unique<TransducerKeywordDecoderRknn>(
        model_.get(), config_.max_active_paths, config_.num_trailing_blanks,
        unk_id_);
  }

  template <typename Manager>
  KeywordSpotterTransducerRknnImpl(Manager *mgr,
                                   const KeywordSpotterConfig &config)
      : config_(config),
        model_(std::make_unique<OnlineZipformerTransducerModelRknn>(
            mgr, config.model_config)),
        sym_(mgr, config.model_config.tokens) {
    if (sym_.Contains("<unk>")) {
      unk_id_ = sym_["<unk>"];
    }

    InitKeywords(mgr);

    decoder_ = std::make_unique<TransducerKeywordDecoderRknn>(
        model_.get(), config_.max_active_paths, config_.num_trailing_blanks,
        unk_id_);
  }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    auto stream = std::make_unique<OnlineStreamRknn>(config_.feat_config,
                                                     keywords_graph_);

    InitOnlineStream(stream.get());
    return stream;
  }

  std::unique_ptr<OnlineStream> CreateStream(
      const std::string &keywords) const override {
    auto kws = std::regex_replace(keywords, std::regex("/"), "\n");
    std::istringstream is(kws);

    std::vector<std::vector<int32_t>> current_ids;
    std::vector<std::string> current_kws;
    std::vector<float> current_scores;
    std::vector<float> current_thresholds;

    if (!EncodeKeywords(is, sym_, &current_ids, &current_kws, &current_scores,
                        &current_thresholds)) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Encode keywords %{public}s failed.", keywords.c_str());
#else
      SHERPA_ONNX_LOGE("Encode keywords %s failed.", keywords.c_str());
#endif
      return nullptr;
    }

    int32_t num_kws = current_ids.size();
    int32_t num_default_kws = keywords_id_.size();

    current_ids.insert(current_ids.end(), keywords_id_.begin(),
                       keywords_id_.end());

    if (!current_kws.empty() && !keywords_.empty()) {
      current_kws.insert(current_kws.end(), keywords_.begin(), keywords_.end());
    } else if (!current_kws.empty() && keywords_.empty()) {
      current_kws.insert(current_kws.end(), num_default_kws, std::string());
    } else if (current_kws.empty() && !keywords_.empty()) {
      current_kws.insert(current_kws.end(), num_kws, std::string());
      current_kws.insert(current_kws.end(), keywords_.begin(), keywords_.end());
    } else {
      // Do nothing.
    }

    if (!current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else if (!current_scores.empty() && boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_default_kws,
                            config_.keywords_score);
    } else if (current_scores.empty() && !boost_scores_.empty()) {
      current_scores.insert(current_scores.end(), num_kws,
                            config_.keywords_score);
      current_scores.insert(current_scores.end(), boost_scores_.begin(),
                            boost_scores_.end());
    } else {
      // Do nothing.
    }

    if (!current_thresholds.empty() && !thresholds_.empty()) {
      current_thresholds.insert(current_thresholds.end(), thresholds_.begin(),
                                thresholds_.end());
    } else if (!current_thresholds.empty() && thresholds_.empty()) {
      current_thresholds.insert(current_thresholds.end(), num_default_kws,
                                config_.keywords_threshold);
    } else if (current_thresholds.empty() && !thresholds_.empty()) {
      current_thresholds.insert(current_thresholds.end(), num_kws,
                                config_.keywords_threshold);
      current_thresholds.insert(current_thresholds.end(), thresholds_.begin(),
                                thresholds_.end());
    } else {
      // Do nothing.
    }

    auto keywords_graph = std::make_shared<ContextGraph>(
        current_ids, config_.keywords_score, config_.keywords_threshold,
        current_scores, current_kws, current_thresholds);

    auto stream =
        std::make_unique<OnlineStreamRknn>(config_.feat_config, keywords_graph);
    InitOnlineStream(stream.get());
    return stream;
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() + model_->ChunkSize() <
           s->NumFramesReady();
  }

  void Reset(OnlineStream *s) const override {
    InitOnlineStream(reinterpret_cast<OnlineStreamRknn *>(s));
  }

  void DecodeStream(OnlineStreamRknn *s) const {
    auto r = s->GetKeywordResult(true);
    int32_t num_trailing_blanks = r.num_trailing_blanks;
    // assume subsampling_factor is 4
    // assume frameshift is 0.01 second
    float trailing_slience = num_trailing_blanks * 4 * 0.01;

    // it resets automatically after detecting 1.5 seconds of silence
    float threshold = 1.5;
    if (trailing_slience > threshold) {
      Reset(s);
    }

    int32_t chunk_size = model_->ChunkSize();
    int32_t chunk_shift = model_->ChunkShift();

    int32_t feature_dim = s->FeatureDim();

    const auto num_processed_frames = s->GetNumProcessedFrames();

    std::vector<float> features =
        s->GetFrames(num_processed_frames, chunk_size);
    s->GetNumProcessedFrames() += chunk_shift;

    auto &states = s->GetZipformerEncoderStates();

    auto p = model_->RunEncoder(features, std::move(states));

    states = std::move(p.second);

    decoder_->Decode(std::move(p.first), s);
  }

  void DecodeStreams(OnlineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i < n; ++i) {
      DecodeStream(reinterpret_cast<OnlineStreamRknn *>(ss[i]));
    }
  }

  KeywordResult GetResult(OnlineStream *s) const override {
    TransducerKeywordResult decoder_result = s->GetKeywordResult(true);

    // TODO(fangjun): Remember to change these constants if needed
    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = 4;
    return Convert(decoder_result, sym_, frame_shift_ms, subsampling_factor,
                   s->GetNumFramesSinceStart());
  }

 private:
  void InitKeywords(std::istream &is) {
    if (!EncodeKeywords(is, sym_, &keywords_id_, &keywords_, &boost_scores_,
                        &thresholds_)) {
      SHERPA_ONNX_LOGE("Encode keywords failed.");
      exit(-1);
    }
    keywords_graph_ = std::make_shared<ContextGraph>(
        keywords_id_, config_.keywords_score, config_.keywords_threshold,
        boost_scores_, keywords_, thresholds_);
  }

  void InitKeywords() {
#ifdef SHERPA_ONNX_ENABLE_WASM_KWS
    // Due to the limitations of the wasm file system,
    // the keyword_file variable is directly parsed as a string of keywords
    // if WASM KWS on
    std::istringstream is(config_.keywords_file);
    InitKeywords(is);
#else
    // each line in keywords_file contains space-separated words
    std::ifstream is(config_.keywords_file);
    if (!is) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Open keywords file failed: %{public}s",
                       config_.keywords_file.c_str());
#else
      SHERPA_ONNX_LOGE("Open keywords file failed: %s",
                       config_.keywords_file.c_str());
#endif
      exit(-1);
    }
    InitKeywords(is);
#endif
  }

  template <typename Manager>
  void InitKeywords(Manager *mgr) {
    // each line in keywords_file contains space-separated words

    auto buf = ReadFile(mgr, config_.keywords_file);

    std::istringstream is(std::string(buf.data(), buf.size()));

    if (!is) {
#if __OHOS__
      SHERPA_ONNX_LOGE("Open keywords file failed: %{public}s",
                       config_.keywords_file.c_str());
#else
      SHERPA_ONNX_LOGE("Open keywords file failed: %s",
                       config_.keywords_file.c_str());
#endif
      exit(-1);
    }
    InitKeywords(is);
  }

  void InitKeywordsFromBufStr() {
    // keywords_buf's content is supposed to be same as the keywords_file's
    std::istringstream is(config_.keywords_buf);
    InitKeywords(is);
  }

  void InitOnlineStream(OnlineStreamRknn *stream) const {
    auto r = decoder_->GetEmptyResult();
    SHERPA_ONNX_CHECK_EQ(r.hyps.Size(), 1);

    SHERPA_ONNX_CHECK(stream->GetContextGraph() != nullptr);
    r.hyps.begin()->second.context_state = stream->GetContextGraph()->Root();

    stream->SetKeywordResult(r);
    stream->SetZipformerEncoderStates(model_->GetEncoderInitStates());
  }

 private:
  KeywordSpotterConfig config_;
  std::vector<std::vector<int32_t>> keywords_id_;
  std::vector<float> boost_scores_;
  std::vector<float> thresholds_;
  std::vector<std::string> keywords_;
  ContextGraphPtr keywords_graph_;
  std::unique_ptr<OnlineZipformerTransducerModelRknn> model_;

  std::unique_ptr<TransducerKeywordDecoderRknn> decoder_;
  SymbolTable sym_;
  int32_t unk_id_ = -1;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_KEYWORD_SPOTTER_TRANSDUCER_RKNN_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/rknn/macros.h
================================================
// sherpa-onnx/csrc/rknn/macros.h
//
// Copyright      2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_MACROS_H_
#define SHERPA_ONNX_CSRC_RKNN_MACROS_H_

#include "sherpa-onnx/csrc/macros.h"

#define SHERPA_ONNX_RKNN_CHECK(ret, msg, ...)      \
  do {                                             \
    if (ret != RKNN_SUCC) {                        \
      SHERPA_ONNX_LOGE("Return code is: %d", ret); \
      SHERPA_ONNX_LOGE(msg, ##__VA_ARGS__);        \
      SHERPA_ONNX_EXIT(-1);                        \
    }                                              \
  } while (0)

#endif  // SHERPA_ONNX_CSRC_RKNN_MACROS_H_


================================================
FILE: sherpa-onnx/csrc/rknn/offline-ctc-greedy-search-decoder-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/offline-ctc-greedy-search-decoder-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/offline-ctc-greedy-search-decoder-rknn.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

OfflineCtcDecoderResult OfflineCtcGreedySearchDecoderRknn::Decode(
    const float *logits, int32_t num_frames, int32_t vocab_size) {
  OfflineCtcDecoderResult ans;

  int64_t prev_id = -1;

  for (int32_t t = 0; t != num_frames; ++t) {
    auto y = static_cast<int64_t>(std::distance(
        static_cast<const float *>(logits),
        std::max_element(static_cast<const float *>(logits),
                         static_cast<const float *>(logits) + vocab_size)));
    logits += vocab_size;

    if (y != blank_id_ && y != prev_id) {
      ans.tokens.push_back(y);
      ans.timestamps.push_back(t);
    }
    prev_id = y;
  }  // for (int32_t t = 0; ...)

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/offline-ctc-greedy-search-decoder-rknn.h
================================================
// sherpa-onnx/csrc/rknn/offline-ctc-greedy-search-decoder-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_OFFLINE_CTC_GREEDY_SEARCH_DECODER_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_OFFLINE_CTC_GREEDY_SEARCH_DECODER_RKNN_H_

#include <vector>

#include "sherpa-onnx/csrc/offline-ctc-decoder.h"

namespace sherpa_onnx {

class OfflineCtcGreedySearchDecoderRknn {
 public:
  explicit OfflineCtcGreedySearchDecoderRknn(int32_t blank_id)
      : blank_id_(blank_id) {}

  OfflineCtcDecoderResult Decode(const float *logits, int32_t num_frames,
                                 int32_t vocab_size);

 private:
  int32_t blank_id_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_OFFLINE_CTC_GREEDY_SEARCH_DECODER_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/offline-paraformer-model-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/offline-paraformer-model-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/offline-paraformer-model-rknn.h"

#include <algorithm>
#include <array>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.h"
#include "sherpa-onnx/csrc/rknn/macros.h"
#include "sherpa-onnx/csrc/rknn/utils.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OfflineParaformerModelRknn::Impl {
 public:
  ~Impl() {
    auto ret = rknn_destroy(encoder_ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the encoder context");
    }

    ret = rknn_destroy(predictor_ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the predictor context");
    }

    ret = rknn_destroy(decoder_ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the decoder context");
    }
  }

  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    std::vector<std::string> filenames;
    SplitStringToVector(config_.paraformer.model, ",", false, &filenames);
    if (filenames.size() != 3) {
      SHERPA_ONNX_LOGE("Invalid Paraformer RK NPU model '%s'",
                       config_.paraformer.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    {
      auto buf = ReadFile(filenames[0]);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(filenames[1]);
      InitPredictor(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(filenames[2]);
      InitDecoder(buf.data(), buf.size());
    }

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    std::vector<std::string> filenames;
    SplitStringToVector(config_.paraformer.model, ",", false, &filenames);
    if (filenames.size() != 3) {
      SHERPA_ONNX_LOGE("Invalid Paraformer RK NPU model '%s'",
                       config_.paraformer.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    {
      auto buf = ReadFile(mgr, filenames[0]);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, filenames[1]);
      InitPredictor(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, filenames[2]);
      InitDecoder(buf.data(), buf.size());
    }

    PostInit();
  }

  std::vector<float> Run(std::vector<float> features) {
    std::vector<float> encoder_out = RunEncoder(features);
    if (encoder_out.empty()) {
      return {};
    }

    std::vector<float> alphas = RunPredictor(encoder_out);

    std::vector<float> acoustic_embedding =
        ComputeAcousticEmbedding(encoder_out, alphas, encoder_out_dim_);
    if (acoustic_embedding.empty()) {
      if (config_.debug) {
        SHERPA_ONNX_LOGE("No speech found in the input audio");
      }

      return {};
    }

    int32_t num_tokens = acoustic_embedding.size() / encoder_out_dim_;

    acoustic_embedding.resize(encoder_out.size());

    return RunDecoder(std::move(encoder_out), std::move(acoustic_embedding),
                      num_tokens);
  }

  int32_t VocabSize() const { return vocab_size_; }

 private:
  std::vector<float> RunEncoder(std::vector<float> features) {
    features = ApplyLFR(std::move(features));
    if (features.empty()) {
      return {};
    }

    std::vector<rknn_input> inputs(encoder_input_attrs_.size());

    inputs[0].index = encoder_input_attrs_[0].index;
    inputs[0].type = RKNN_TENSOR_FLOAT32;
    inputs[0].fmt = encoder_input_attrs_[0].fmt;
    inputs[0].buf = reinterpret_cast<void *>(features.data());
    inputs[0].size = features.size() * sizeof(float);

    std::vector<float> out(encoder_output_attrs_[0].n_elems);

    std::vector<rknn_output> outputs(encoder_output_attrs_.size());
    outputs[0].index = encoder_output_attrs_[0].index;
    outputs[0].is_prealloc = 1;
    outputs[0].want_float = 1;
    outputs[0].size = out.size() * sizeof(float);
    outputs[0].buf = reinterpret_cast<void *>(out.data());

    rknn_context ctx = encoder_ctx_queue_->Take();

    auto ret = rknn_inputs_set(ctx, inputs.size(), inputs.data());
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set encoder inputs");

    ret = rknn_run(ctx, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the encoder model");

    ret = rknn_outputs_get(ctx, outputs.size(), outputs.data(), nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get encoder output");

    encoder_ctx_queue_->Put(ctx);

    return out;
  }

  std::vector<float> RunPredictor(const std::vector<float> &encoder_out) {
    std::vector<rknn_input> inputs(predictor_input_attrs_.size());

    inputs[0].index = predictor_input_attrs_[0].index;
    inputs[0].type = RKNN_TENSOR_FLOAT32;
    inputs[0].fmt = predictor_input_attrs_[0].fmt;
    inputs[0].buf =
        reinterpret_cast<void *>(const_cast<float *>(encoder_out.data()));
    inputs[0].size = encoder_out.size() * sizeof(float);

    std::vector<float> out(predictor_output_attrs_[0].n_elems);

    std::vector<rknn_output> outputs(predictor_output_attrs_.size());
    outputs[0].index = predictor_output_attrs_[0].index;
    outputs[0].is_prealloc = 1;
    outputs[0].want_float = 1;
    outputs[0].size = out.size() * sizeof(float);
    outputs[0].buf = reinterpret_cast<void *>(out.data());

    rknn_context ctx = predictor_ctx_queue_->Take();

    auto ret = rknn_inputs_set(ctx, inputs.size(), inputs.data());
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set predictor inputs");

    ret = rknn_run(ctx, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the predictor model");

    ret = rknn_outputs_get(ctx, outputs.size(), outputs.data(), nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get predictor output");

    predictor_ctx_queue_->Put(ctx);

    return out;
  }

  std::vector<float> RunDecoder(std::vector<float> encoder_out,
                                std::vector<float> acoustic_embedding,
                                int32_t num_tokens) {
    int32_t num_frames = encoder_out.size() / encoder_out_dim_;

    std::vector<rknn_input> inputs(decoder_input_attrs_.size());

    inputs[0].index = decoder_input_attrs_[0].index;
    inputs[0].type = RKNN_TENSOR_FLOAT32;
    inputs[0].fmt = decoder_input_attrs_[0].fmt;
    inputs[0].buf = reinterpret_cast<void *>(encoder_out.data());
    inputs[0].size = encoder_out.size() * sizeof(float);

    inputs[1].index = decoder_input_attrs_[1].index;
    inputs[1].type = RKNN_TENSOR_FLOAT32;
    inputs[1].fmt = decoder_input_attrs_[1].fmt;
    inputs[1].buf = reinterpret_cast<void *>(acoustic_embedding.data());
    inputs[1].size = acoustic_embedding.size() * sizeof(float);

    std::vector<float> mask(num_frames, 1);
    std::fill(mask.begin() + num_tokens, mask.end(), 0);

    inputs[2].index = decoder_input_attrs_[2].index;
    inputs[2].type = RKNN_TENSOR_FLOAT32;
    inputs[2].fmt = decoder_input_attrs_[2].fmt;
    inputs[2].buf = reinterpret_cast<void *>(mask.data());
    inputs[2].size = mask.size() * sizeof(float);

    std::vector<float> out(decoder_output_attrs_[0].n_elems);

    std::vector<rknn_output> outputs(decoder_output_attrs_.size());
    outputs[0].index = decoder_output_attrs_[0].index;
    outputs[0].is_prealloc = 1;
    outputs[0].want_float = 1;
    outputs[0].size = out.size() * sizeof(float);
    outputs[0].buf = reinterpret_cast<void *>(out.data());

    rknn_context ctx = decoder_ctx_queue_->Take();

    auto ret = rknn_inputs_set(ctx, inputs.size(), inputs.data());
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set decoder inputs");

    ret = rknn_run(ctx, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the decoder model");

    ret = rknn_outputs_get(ctx, outputs.size(), outputs.data(), nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get decoder output");

    decoder_ctx_queue_->Put(ctx);

    return out;
  }

  void InitEncoder(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &encoder_ctx_);

    InitInputOutputAttrs(encoder_ctx_, config_.debug, &encoder_input_attrs_,
                         &encoder_output_attrs_);

    num_input_frames_ = encoder_input_attrs_[0].dims[1];
    encoder_out_dim_ = encoder_output_attrs_[0].dims[2];
    if (config_.debug) {
      SHERPA_ONNX_LOGE("num_input_frames_: %d", num_input_frames_);
      SHERPA_ONNX_LOGE("encoder_out_dim:: %d", encoder_out_dim_);
    }
  }

  void InitPredictor(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &predictor_ctx_);

    InitInputOutputAttrs(predictor_ctx_, config_.debug, &predictor_input_attrs_,
                         &predictor_output_attrs_);
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &decoder_ctx_);

    InitInputOutputAttrs(decoder_ctx_, config_.debug, &decoder_input_attrs_,
                         &decoder_output_attrs_);
    vocab_size_ = decoder_output_attrs_[0].dims[2];
    if (config_.debug) {
      SHERPA_ONNX_LOGE("vocab_size: %d", vocab_size_);
    }
  }

  std::vector<float> ApplyLFR(std::vector<float> in) const {
    int32_t lfr_window_size = 7;
    int32_t lfr_window_shift = 6;
    int32_t in_feat_dim = 80;

    int32_t in_num_frames = in.size() / in_feat_dim;
    if (in_num_frames < lfr_window_size) {
      return {};
    }

    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;

    if (out_num_frames > num_input_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          out_num_frames, num_input_frames_);

      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios.");

      out_num_frames = num_input_frames_;
    }

    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    std::vector<float> out(num_input_frames_ * out_feat_dim);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

  void PostInit() {
    if (config_.num_threads > 1) {
      config_.num_threads = 1;
    }

    encoder_ctx_queue_ = std::make_unique<ContextBlockingQueueRknn>(
        encoder_ctx_, config_.num_threads);

    predictor_ctx_queue_ = std::make_unique<ContextBlockingQueueRknn>(
        predictor_ctx_, config_.num_threads);

    decoder_ctx_queue_ = std::make_unique<ContextBlockingQueueRknn>(
        decoder_ctx_, config_.num_threads);
  }

 private:
  OfflineModelConfig config_;

  rknn_context encoder_ctx_ = 0;
  rknn_context predictor_ctx_ = 0;
  rknn_context decoder_ctx_ = 0;

  std::unique_ptr<ContextBlockingQueueRknn> encoder_ctx_queue_;
  std::unique_ptr<ContextBlockingQueueRknn> predictor_ctx_queue_;
  std::unique_ptr<ContextBlockingQueueRknn> decoder_ctx_queue_;

  std::vector<rknn_tensor_attr> encoder_input_attrs_;
  std::vector<rknn_tensor_attr> encoder_output_attrs_;

  std::vector<rknn_tensor_attr> predictor_input_attrs_;
  std::vector<rknn_tensor_attr> predictor_output_attrs_;

  std::vector<rknn_tensor_attr> decoder_input_attrs_;
  std::vector<rknn_tensor_attr> decoder_output_attrs_;

  int32_t vocab_size_ = 0;
  int32_t num_input_frames_ = -1;
  int32_t encoder_out_dim_ = -1;
};

OfflineParaformerModelRknn::~OfflineParaformerModelRknn() = default;

OfflineParaformerModelRknn::OfflineParaformerModelRknn(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineParaformerModelRknn::OfflineParaformerModelRknn(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

std::vector<float> OfflineParaformerModelRknn::Run(
    std::vector<float> features) const {
  return impl_->Run(std::move(features));
}

int32_t OfflineParaformerModelRknn::VocabSize() const {
  return impl_->VocabSize();
}

#if __ANDROID_API__ >= 9
template OfflineParaformerModelRknn::OfflineParaformerModelRknn(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineParaformerModelRknn::OfflineParaformerModelRknn(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/offline-paraformer-model-rknn.h
================================================
// sherpa-onnx/csrc/rknn/offline-paraformer-model-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_RKNN_OFFLINE_PARAFORMER_MODEL_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_OFFLINE_PARAFORMER_MODEL_RKNN_H_

#include <memory>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"

namespace sherpa_onnx {

class OfflineParaformerModelRknn {
 public:
  ~OfflineParaformerModelRknn();

  explicit OfflineParaformerModelRknn(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineParaformerModelRknn(Manager *mgr, const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (num_frames, feature_dim)
   *                 before applying LFR.
   * @returns Return a tensor of shape (num_output_frames, vocab_size)
   */
  std::vector<float> Run(std::vector<float> features) const;

  int32_t VocabSize() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_OFFLINE_PARAFORMER_MODEL_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/offline-sense-voice-model-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/offline-sense-voice-model-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/offline-sense-voice-model-rknn.h"

#include <algorithm>
#include <array>
#include <memory>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.h"
#include "sherpa-onnx/csrc/rknn/macros.h"
#include "sherpa-onnx/csrc/rknn/utils.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelRknn::Impl {
 public:
  ~Impl() {
    auto ret = rknn_destroy(ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the context");
    }
  }

  explicit Impl(const OfflineModelConfig &config) : config_(config) {
    auto buf = ReadFile(config_.sense_voice.model);
    Init(buf.data(), buf.size());

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OfflineModelConfig &config) : config_(config) {
    auto buf = ReadFile(mgr, config_.sense_voice.model);
    Init(buf.data(), buf.size());

    PostInit();
  }

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const {
    return meta_data_;
  }

  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) {
    features = ApplyLFR(std::move(features));
    if (features.empty()) {
      return {};
    }

    std::vector<rknn_input> inputs(input_attrs_.size());

    std::array<int32_t, 4> prompt{language, 1, 2, text_norm};

    inputs[0].index = input_attrs_[0].index;
    inputs[0].type = RKNN_TENSOR_FLOAT32;
    inputs[0].fmt = input_attrs_[0].fmt;
    inputs[0].buf = reinterpret_cast<void *>(features.data());
    inputs[0].size = features.size() * sizeof(float);

    inputs[1].index = input_attrs_[1].index;
    inputs[1].type = RKNN_TENSOR_INT32;
    inputs[1].fmt = input_attrs_[1].fmt;
    inputs[1].buf = reinterpret_cast<void *>(prompt.data());
    inputs[1].size = prompt.size() * sizeof(int32_t);

    std::vector<float> out(output_attrs_[0].n_elems);

    std::vector<rknn_output> outputs(output_attrs_.size());
    outputs[0].index = output_attrs_[0].index;
    outputs[0].is_prealloc = 1;
    outputs[0].want_float = 1;
    outputs[0].size = out.size() * sizeof(float);
    outputs[0].buf = reinterpret_cast<void *>(out.data());

    rknn_context ctx = ctx_queue_->Take();

    auto ret = rknn_inputs_set(ctx, inputs.size(), inputs.data());
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set inputs");

    ret = rknn_run(ctx, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the model");

    ret = rknn_outputs_get(ctx, outputs.size(), outputs.data(), nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get model output");

    ctx_queue_->Put(ctx);

    return out;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &ctx_);

    InitInputOutputAttrs(ctx_, config_.debug, &input_attrs_, &output_attrs_);

    rknn_custom_string custom_string = GetCustomString(ctx_, config_.debug);

    auto meta = Parse(custom_string, config_.debug);

#define SHERPA_ONNX_RKNN_READ_META_DATA_INT(dst, src_key)                     \
  do {                                                                        \
    if (!meta.count(#src_key)) {                                              \
      SHERPA_ONNX_LOGE("'%s' does not exist in the custom_string", #src_key); \
      SHERPA_ONNX_EXIT(-1);                                                   \
    }                                                                         \
                                                                              \
    dst = atoi(meta.at(#src_key).c_str());                                    \
  } while (0)

    SHERPA_ONNX_RKNN_READ_META_DATA_INT(meta_data_.with_itn_id, with_itn);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(meta_data_.without_itn_id, without_itn);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(meta_data_.window_size,
                                        lfr_window_size);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(meta_data_.window_shift,
                                        lfr_window_shift);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(meta_data_.vocab_size, vocab_size);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(meta_data_.normalize_samples,
                                        normalize_samples);

    int32_t lang_auto = 0;
    int32_t lang_zh = 0;
    int32_t lang_en = 0;
    int32_t lang_ja = 0;
    int32_t lang_ko = 0;
    int32_t lang_yue = 0;

    SHERPA_ONNX_RKNN_READ_META_DATA_INT(lang_auto, lang_auto);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(lang_zh, lang_zh);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(lang_en, lang_en);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(lang_ja, lang_ja);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(lang_ko, lang_ko);
    SHERPA_ONNX_RKNN_READ_META_DATA_INT(lang_yue, lang_yue);

    meta_data_.lang2id = {
        {"auto", lang_auto}, {"zh", lang_zh}, {"en", lang_en},
        {"ja", lang_ja},     {"ko", lang_ko}, {"yue", lang_yue},
    };

    // for rknn models, neg_mean and inv_stddev are stored inside the model

#undef SHERPA_ONNX_RKNN_READ_META_DATA_INT

    num_input_frames_ = input_attrs_[0].dims[1];
  }

  std::vector<float> ApplyLFR(std::vector<float> in) const {
    int32_t lfr_window_size = meta_data_.window_size;
    int32_t lfr_window_shift = meta_data_.window_shift;
    int32_t in_feat_dim = 80;

    int32_t in_num_frames = in.size() / in_feat_dim;

    if (in_num_frames < lfr_window_size) {
      return {};
    }

    int32_t out_num_frames =
        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;

    if (out_num_frames > num_input_frames_) {
      SHERPA_ONNX_LOGE(
          "Number of input frames %d is too large. Truncate it to %d frames.",
          out_num_frames, num_input_frames_);

      SHERPA_ONNX_LOGE(
          "Recognition result may be truncated/incomplete. Please select a "
          "model accepting longer audios.");

      out_num_frames = num_input_frames_;
    }

    int32_t out_feat_dim = in_feat_dim * lfr_window_size;

    std::vector<float> out(num_input_frames_ * out_feat_dim);

    const float *p_in = in.data();
    float *p_out = out.data();

    for (int32_t i = 0; i != out_num_frames; ++i) {
      std::copy(p_in, p_in + out_feat_dim, p_out);

      p_out += out_feat_dim;
      p_in += lfr_window_shift * in_feat_dim;
    }

    return out;
  }

  void PostInit() {
    ctx_queue_ =
        std::make_unique<ContextBlockingQueueRknn>(ctx_, config_.num_threads);
  }

 private:
  OfflineModelConfig config_;

  rknn_context ctx_ = 0;
  std::unique_ptr<ContextBlockingQueueRknn> ctx_queue_;

  std::vector<rknn_tensor_attr> input_attrs_;
  std::vector<rknn_tensor_attr> output_attrs_;

  OfflineSenseVoiceModelMetaData meta_data_;
  int32_t num_input_frames_ = -1;
};

OfflineSenseVoiceModelRknn::~OfflineSenseVoiceModelRknn() = default;

OfflineSenseVoiceModelRknn::OfflineSenseVoiceModelRknn(
    const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OfflineSenseVoiceModelRknn::OfflineSenseVoiceModelRknn(
    Manager *mgr, const OfflineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

std::vector<float> OfflineSenseVoiceModelRknn::Run(std::vector<float> features,
                                                   int32_t language,
                                                   int32_t text_norm) const {
  return impl_->Run(std::move(features), language, text_norm);
}

const OfflineSenseVoiceModelMetaData &
OfflineSenseVoiceModelRknn::GetModelMetadata() const {
  return impl_->GetModelMetadata();
}

#if __ANDROID_API__ >= 9
template OfflineSenseVoiceModelRknn::OfflineSenseVoiceModelRknn(
    AAssetManager *mgr, const OfflineModelConfig &config);
#endif

#if __OHOS__
template OfflineSenseVoiceModelRknn::OfflineSenseVoiceModelRknn(
    NativeResourceManager *mgr, const OfflineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/offline-sense-voice-model-rknn.h
================================================
// sherpa-onnx/csrc/rknn/offline-sense-voice-model-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_RKNN_OFFLINE_SENSE_VOICE_MODEL_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_OFFLINE_SENSE_VOICE_MODEL_RKNN_H_

#include <memory>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-sense-voice-model-meta-data.h"

namespace sherpa_onnx {

class OfflineSenseVoiceModelRknn {
 public:
  ~OfflineSenseVoiceModelRknn();

  explicit OfflineSenseVoiceModelRknn(const OfflineModelConfig &config);

  template <typename Manager>
  OfflineSenseVoiceModelRknn(Manager *mgr, const OfflineModelConfig &config);

  /**
   * @param features A tensor of shape (num_frames, feature_dim)
   *                 before applying LFR.
   * @param language
   * @param text_norm
   * @returns Return a tensor of shape (num_output_frames, vocab_size)
   */
  std::vector<float> Run(std::vector<float> features, int32_t language,
                         int32_t text_norm) const;

  const OfflineSenseVoiceModelMetaData &GetModelMetadata() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_OFFLINE_SENSE_VOICE_MODEL_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/online-recognizer-ctc-rknn-impl.h
================================================
// sherpa-onnx/csrc/rknn/online-recognizer-ctc-rknn-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_ONLINE_RECOGNIZER_CTC_RKNN_IMPL_H_
#define SHERPA_ONNX_CSRC_RKNN_ONLINE_RECOGNIZER_CTC_RKNN_IMPL_H_

#include <algorithm>
#include <ios>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-ctc-decoder.h"
#include "sherpa-onnx/csrc/online-ctc-fst-decoder.h"
#include "sherpa-onnx/csrc/online-ctc-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
#include "sherpa-onnx/csrc/rknn/online-stream-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-zipformer-ctc-model-rknn.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

// defined in ../online-recognizer-ctc-impl.h
OnlineRecognizerResult ConvertCtc(const OnlineCtcDecoderResult &src,
                                  const SymbolTable &sym_table,
                                  float frame_shift_ms,
                                  int32_t subsampling_factor, int32_t segment,
                                  int32_t frames_since_start);

class OnlineRecognizerCtcRknnImpl : public OnlineRecognizerImpl {
 public:
  explicit OnlineRecognizerCtcRknnImpl(const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(config),
        config_(config),
        model_(
            std::make_unique<OnlineZipformerCtcModelRknn>(config.model_config)),
        endpoint_(config_.endpoint_config) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      /// assuming tokens_buf and tokens are guaranteed not being both empty
      sym_ = SymbolTable(config.model_config.tokens, true);
    }

    InitDecoder();
  }

  template <typename Manager>
  explicit OnlineRecognizerCtcRknnImpl(Manager *mgr,
                                       const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(mgr, config),
        config_(config),
        model_(std::make_unique<OnlineZipformerCtcModelRknn>(
            mgr, config_.model_config)),
        sym_(mgr, config_.model_config.tokens),
        endpoint_(config_.endpoint_config) {
    InitDecoder();
  }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    auto stream = std::make_unique<OnlineStreamRknn>(config_.feat_config);
    stream->SetZipformerEncoderStates(model_->GetInitStates());
    stream->SetFasterDecoder(decoder_->CreateFasterDecoder());
    return stream;
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() + model_->ChunkSize() <
           s->NumFramesReady();
  }

  void DecodeStreams(OnlineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i != n; ++i) {
      DecodeStream(reinterpret_cast<OnlineStreamRknn *>(ss[i]));
    }
  }

  OnlineRecognizerResult GetResult(OnlineStream *s) const override {
    OnlineCtcDecoderResult decoder_result = s->GetCtcResult();

    // TODO(fangjun): Remember to change these constants if needed
    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = 4;
    auto r =
        ConvertCtc(decoder_result, sym_, frame_shift_ms, subsampling_factor,
                   s->GetCurrentSegment(), s->GetNumFramesSinceStart());
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    return r;
  }

  bool IsEndpoint(OnlineStream *s) const override {
    if (!config_.enable_endpoint) {
      return false;
    }

    int32_t num_processed_frames = s->GetNumProcessedFrames();

    // frame shift is 10 milliseconds
    float frame_shift_in_seconds = 0.01;

    // subsampling factor is 4
    int32_t trailing_silence_frames = s->GetCtcResult().num_trailing_blanks * 4;

    return endpoint_.IsEndpoint(num_processed_frames, trailing_silence_frames,
                                frame_shift_in_seconds);
  }

  void Reset(OnlineStream *s) const override {
    // segment is incremented only when the last
    // result is not empty
    const auto &r = s->GetCtcResult();
    if (!r.tokens.empty()) {
      s->GetCurrentSegment() += 1;
    }

    // clear result
    s->SetCtcResult({});

    // clear states
    reinterpret_cast<OnlineStreamRknn *>(s)->SetZipformerEncoderStates(
        model_->GetInitStates());

    s->GetFasterDecoderProcessedFrames() = 0;

    // Note: We only update counters. The underlying audio samples
    // are not discarded.
    s->Reset();
  }

 private:
  void InitDecoder() {
    if (!sym_.Contains("<blk>") && !sym_.Contains("<eps>") &&
        !sym_.Contains("<blank>")) {
      SHERPA_ONNX_LOGE(
          "We expect that tokens.txt contains "
          "the symbol <blk> or <eps> or <blank> and its ID.");
      exit(-1);
    }

    int32_t blank_id = 0;
    if (sym_.Contains("<blk>")) {
      blank_id = sym_["<blk>"];
    } else if (sym_.Contains("<eps>")) {
      // for tdnn models of the yesno recipe from icefall
      blank_id = sym_["<eps>"];
    } else if (sym_.Contains("<blank>")) {
      // for WeNet CTC models
      blank_id = sym_["<blank>"];
    }

    if (!config_.ctc_fst_decoder_config.graph.empty()) {
      decoder_ = std::make_unique<OnlineCtcFstDecoder>(
          config_.ctc_fst_decoder_config, blank_id);
    } else if (config_.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OnlineCtcGreedySearchDecoder>(blank_id);
    } else {
      SHERPA_ONNX_LOGE(
          "Unsupported decoding method: %s for streaming CTC models",
          config_.decoding_method.c_str());
      exit(-1);
    }
  }

  void DecodeStream(OnlineStreamRknn *s) const {
    int32_t chunk_size = model_->ChunkSize();
    int32_t chunk_shift = model_->ChunkShift();

    int32_t feat_dim = s->FeatureDim();

    const auto num_processed_frames = s->GetNumProcessedFrames();
    std::vector<float> features =
        s->GetFrames(num_processed_frames, chunk_size);
    s->GetNumProcessedFrames() += chunk_shift;

    auto &states = s->GetZipformerEncoderStates();
    auto p = model_->Run(features, std::move(states));
    states = std::move(p.second);

    std::vector<OnlineCtcDecoderResult> results(1);
    results[0] = std::move(s->GetCtcResult());

    auto attr = model_->GetOutAttr();

    decoder_->Decode(p.first.data(), attr.dims[0], attr.dims[1], attr.dims[2],
                     &results, reinterpret_cast<OnlineStream **>(&s), 1);
    s->SetCtcResult(results[0]);
  }

 private:
  OnlineRecognizerConfig config_;
  std::unique_ptr<OnlineZipformerCtcModelRknn> model_;
  std::unique_ptr<OnlineCtcDecoder> decoder_;
  SymbolTable sym_;
  Endpoint endpoint_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_ONLINE_RECOGNIZER_CTC_RKNN_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/rknn/online-recognizer-transducer-rknn-impl.h
================================================
// sherpa-onnx/csrc/rknn/online-recognizer-transducer-rknn-impl.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_ONLINE_RECOGNIZER_TRANSDUCER_RKNN_IMPL_H_
#define SHERPA_ONNX_CSRC_RKNN_ONLINE_RECOGNIZER_TRANSDUCER_RKNN_IMPL_H_

#include <algorithm>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-recognizer-impl.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/rknn/online-stream-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-transducer-decoder-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-transducer-greedy-search-decoder-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-transducer-modified-beam-search-decoder-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.h"
#include "sherpa-onnx/csrc/symbol-table.h"

namespace sherpa_onnx {

OnlineRecognizerResult Convert(const OnlineTransducerDecoderResultRknn &src,
                               const SymbolTable &sym_table,
                               float frame_shift_ms, int32_t subsampling_factor,
                               int32_t segment, int32_t frames_since_start) {
  OnlineRecognizerResult r;
  r.tokens.reserve(src.tokens.size());
  r.timestamps.reserve(src.tokens.size());

  std::string text;
  for (auto i : src.tokens) {
    auto sym = sym_table[i];

    text.append(sym);

    if (sym.size() == 1 && (sym[0] < 0x20 || sym[0] > 0x7e)) {
      // for bpe models with byte_fallback
      // (but don't rewrite printable characters 0x20..0x7e,
      //  which collide with standard BPE units)
      std::ostringstream os;
      os << "<0x" << std::hex << std::uppercase
         << (static_cast<int32_t>(sym[0]) & 0xff) << ">";
      sym = os.str();
    }

    r.tokens.push_back(std::move(sym));
  }

  if (sym_table.IsByteBpe()) {
    text = sym_table.DecodeByteBpe(text);
  }

  r.text = std::move(text);

  float frame_shift_s = frame_shift_ms / 1000. * subsampling_factor;
  for (auto t : src.timestamps) {
    float time = frame_shift_s * t;
    r.timestamps.push_back(time);
  }

  r.segment = segment;
  r.start_time = frames_since_start * frame_shift_ms / 1000.;

  return r;
}

class OnlineRecognizerTransducerRknnImpl : public OnlineRecognizerImpl {
 public:
  explicit OnlineRecognizerTransducerRknnImpl(
      const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(config),
        config_(config),
        endpoint_(config_.endpoint_config),
        model_(std::make_unique<OnlineZipformerTransducerModelRknn>(
            config.model_config)) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      /// assuming tokens_buf and tokens are guaranteed not being both empty
      sym_ = SymbolTable(config.model_config.tokens, true);
    }

    if (sym_.Contains("<unk>")) {
      unk_id_ = sym_["<unk>"];
    }

    if (config.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OnlineTransducerGreedySearchDecoderRknn>(
          model_.get(), unk_id_);
    } else if (config.decoding_method == "modified_beam_search") {
      decoder_ =
          std::make_unique<OnlineTransducerModifiedBeamSearchDecoderRknn>(
              model_.get(), config.max_active_paths, unk_id_);
    } else {
      SHERPA_ONNX_LOGE(
          "Invalid decoding method: '%s'. Support only greedy_search and "
          "modified_beam_search.",
          config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  template <typename Manager>
  explicit OnlineRecognizerTransducerRknnImpl(
      Manager *mgr, const OnlineRecognizerConfig &config)
      : OnlineRecognizerImpl(mgr, config),
        config_(config),
        endpoint_(config_.endpoint_config),
        model_(std::make_unique<OnlineZipformerTransducerModelRknn>(
            mgr, config_.model_config)) {
    if (!config.model_config.tokens_buf.empty()) {
      sym_ = SymbolTable(config.model_config.tokens_buf, false);
    } else {
      /// assuming tokens_buf and tokens are guaranteed not being both empty
      sym_ = SymbolTable(mgr, config.model_config.tokens);
    }

    if (sym_.Contains("<unk>")) {
      unk_id_ = sym_["<unk>"];
    }

    if (config.decoding_method == "greedy_search") {
      decoder_ = std::make_unique<OnlineTransducerGreedySearchDecoderRknn>(
          model_.get(), unk_id_);
    } else if (config.decoding_method == "modified_beam_search") {
      decoder_ =
          std::make_unique<OnlineTransducerModifiedBeamSearchDecoderRknn>(
              model_.get(), config.max_active_paths, unk_id_);
    } else {
      SHERPA_ONNX_LOGE(
          "Invalid decoding method: '%s'. Support only greedy_search and "
          "modified_beam_search.",
          config.decoding_method.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
  }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    auto stream = std::make_unique<OnlineStreamRknn>(config_.feat_config);
    auto r = decoder_->GetEmptyResult();
    stream->SetZipformerResult(std::move(r));
    stream->SetZipformerEncoderStates(model_->GetEncoderInitStates());
    return stream;
  }

  std::unique_ptr<OnlineStream> CreateStream(
      const std::string &hotwords) const override {
    SHERPA_ONNX_LOGE("Hotwords for RKNN is not supported now.");
    return CreateStream();
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() + model_->ChunkSize() <
           s->NumFramesReady();
  }

  // Warmping up engine with wp: warm_up count and max-batch-size

  void DecodeStreams(OnlineStream **ss, int32_t n) const override {
    for (int32_t i = 0; i < n; ++i) {
      DecodeStream(reinterpret_cast<OnlineStreamRknn *>(ss[i]));
    }
  }

  OnlineRecognizerResult GetResult(OnlineStream *s) const override {
    OnlineTransducerDecoderResultRknn decoder_result =
        reinterpret_cast<OnlineStreamRknn *>(s)->GetZipformerResult();
    decoder_->StripLeadingBlanks(&decoder_result);
    // TODO(fangjun): Remember to change these constants if needed
    int32_t frame_shift_ms = 10;
    int32_t subsampling_factor = 4;
    auto r = Convert(decoder_result, sym_, frame_shift_ms, subsampling_factor,
                     s->GetCurrentSegment(), s->GetNumFramesSinceStart());
    r.text = ApplyInverseTextNormalization(std::move(r.text));
    r.text = ApplyHomophoneReplacer(std::move(r.text));
    return r;
  }

  bool IsEndpoint(OnlineStream *s) const override {
    if (!config_.enable_endpoint) {
      return false;
    }

    int32_t num_processed_frames = s->GetNumProcessedFrames();

    // frame shift is 10 milliseconds
    float frame_shift_in_seconds = 0.01;

    // subsampling factor is 4
    int32_t trailing_silence_frames = reinterpret_cast<OnlineStreamRknn *>(s)
                                          ->GetZipformerResult()
                                          .num_trailing_blanks *
                                      4;

    return endpoint_.IsEndpoint(num_processed_frames, trailing_silence_frames,
                                frame_shift_in_seconds);
  }

  void Reset(OnlineStream *s) const override {
    int32_t context_size = model_->ContextSize();

    {
      // segment is incremented only when the last
      // result is not empty, contains non-blanks and longer than context_size)
      const auto &r =
          reinterpret_cast<OnlineStreamRknn *>(s)->GetZipformerResult();
      if (!r.tokens.empty() && r.tokens.back() != 0 &&
          r.tokens.size() > context_size) {
        s->GetCurrentSegment() += 1;
      }
    }

    // reset encoder states
    // reinterpret_cast<OnlineStreamRknn*>(s)->SetZipformerEncoderStates(model_->GetEncoderInitStates());
    auto r = decoder_->GetEmptyResult();
    auto last_result =
        reinterpret_cast<OnlineStreamRknn *>(s)->GetZipformerResult();

    // if last result is not empty, then
    // preserve last tokens as the context for next result
    if (static_cast<int32_t>(last_result.tokens.size()) > context_size) {
      r.tokens = {last_result.tokens.end() - context_size,
                  last_result.tokens.end()};
    }
    reinterpret_cast<OnlineStreamRknn *>(s)->SetZipformerResult(std::move(r));

    // Note: We only update counters. The underlying audio samples
    // are not discarded.
    s->Reset();
  }

 private:
  void DecodeStream(OnlineStreamRknn *s) const {
    int32_t chunk_size = model_->ChunkSize();
    int32_t chunk_shift = model_->ChunkShift();

    int32_t feature_dim = s->FeatureDim();

    const auto num_processed_frames = s->GetNumProcessedFrames();

    std::vector<float> features =
        s->GetFrames(num_processed_frames, chunk_size);
    s->GetNumProcessedFrames() += chunk_shift;

    auto &states = s->GetZipformerEncoderStates();

    auto p = model_->RunEncoder(features, std::move(states));
    states = std::move(p.second);

    auto &r = s->GetZipformerResult();
    decoder_->Decode(std::move(p.first), &r);
  }

 private:
  OnlineRecognizerConfig config_;
  SymbolTable sym_;
  Endpoint endpoint_;
  int32_t unk_id_ = -1;
  std::unique_ptr<OnlineZipformerTransducerModelRknn> model_;
  std::unique_ptr<OnlineTransducerDecoderRknn> decoder_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_ONLINE_RECOGNIZER_TRANSDUCER_RKNN_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/rknn/online-stream-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/online-stream-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/online-stream-rknn.h"

#include <utility>
#include <vector>

namespace sherpa_onnx {

class OnlineStreamRknn::Impl {
 public:
  void SetZipformerEncoderStates(std::vector<std::vector<uint8_t>> states) {
    states_ = std::move(states);
  }

  std::vector<std::vector<uint8_t>> &GetZipformerEncoderStates() {
    return states_;
  }

  void SetZipformerResult(OnlineTransducerDecoderResultRknn r) {
    result_ = std::move(r);
  }

  OnlineTransducerDecoderResultRknn &GetZipformerResult() { return result_; }

 private:
  std::vector<std::vector<uint8_t>> states_;
  OnlineTransducerDecoderResultRknn result_;
};

OnlineStreamRknn::OnlineStreamRknn(
    const FeatureExtractorConfig &config /*= {}*/,
    ContextGraphPtr context_graph /*= nullptr*/)
    : OnlineStream(config, context_graph), impl_(std::make_unique<Impl>()) {}

OnlineStreamRknn::~OnlineStreamRknn() = default;

void OnlineStreamRknn::SetZipformerEncoderStates(
    std::vector<std::vector<uint8_t>> states) const {
  impl_->SetZipformerEncoderStates(std::move(states));
}

std::vector<std::vector<uint8_t>> &OnlineStreamRknn::GetZipformerEncoderStates()
    const {
  return impl_->GetZipformerEncoderStates();
}

void OnlineStreamRknn::SetZipformerResult(
    OnlineTransducerDecoderResultRknn r) const {
  impl_->SetZipformerResult(std::move(r));
}

OnlineTransducerDecoderResultRknn &OnlineStreamRknn::GetZipformerResult()
    const {
  return impl_->GetZipformerResult();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/online-stream-rknn.h
================================================
// sherpa-onnx/csrc/rknn/online-stream-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_RKNN_ONLINE_STREAM_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_ONLINE_STREAM_RKNN_H_
#include <memory>
#include <vector>

#include "rknn_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/rknn/online-transducer-decoder-rknn.h"

namespace sherpa_onnx {

class OnlineStreamRknn : public OnlineStream {
 public:
  explicit OnlineStreamRknn(const FeatureExtractorConfig &config = {},
                            ContextGraphPtr context_graph = nullptr);

  ~OnlineStreamRknn();

  void SetZipformerEncoderStates(
      std::vector<std::vector<uint8_t>> states) const;

  std::vector<std::vector<uint8_t>> &GetZipformerEncoderStates() const;

  void SetZipformerResult(OnlineTransducerDecoderResultRknn r) const;

  OnlineTransducerDecoderResultRknn &GetZipformerResult() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_ONLINE_STREAM_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/online-transducer-decoder-rknn.h
================================================
// sherpa-onnx/csrc/rknn/online-transducer-decoder-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_DECODER_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_DECODER_RKNN_H_

#include <vector>

#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

struct OnlineTransducerDecoderResultRknn {
  /// Number of frames after subsampling we have decoded so far
  int32_t frame_offset = 0;

  /// The decoded token IDs so far
  std::vector<int64_t> tokens;

  /// number of trailing blank frames decoded so far
  int32_t num_trailing_blanks = 0;

  /// timestamps[i] contains the output frame index where tokens[i] is decoded.
  std::vector<int32_t> timestamps;

  // used only by greedy_search
  std::vector<float> previous_decoder_out;

  // used only in modified beam_search
  Hypotheses hyps;

  // used only by modified_beam_search
  std::vector<std::vector<float>> previous_decoder_out2;
};

class OnlineTransducerDecoderRknn {
 public:
  virtual ~OnlineTransducerDecoderRknn() = default;

  /* Return an empty result.
   *
   * To simplify the decoding code, we add `context_size` blanks
   * to the beginning of the decoding result, which will be
   * stripped by calling `StripPrecedingBlanks()`.
   */
  virtual OnlineTransducerDecoderResultRknn GetEmptyResult() const = 0;

  /** Strip blanks added by `GetEmptyResult()`.
   *
   * @param r It is changed in-place.
   */
  virtual void StripLeadingBlanks(
      OnlineTransducerDecoderResultRknn * /*r*/) const {}

  virtual void Decode(std::vector<float> encoder_out,
                      OnlineTransducerDecoderResultRknn *result) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_DECODER_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/online-transducer-greedy-search-decoder-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/online-transducer-greedy-search-decoder-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/online-transducer-greedy-search-decoder-rknn.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

OnlineTransducerDecoderResultRknn
OnlineTransducerGreedySearchDecoderRknn::GetEmptyResult() const {
  int32_t context_size = model_->ContextSize();
  int32_t blank_id = 0;  // always 0
  OnlineTransducerDecoderResultRknn r;
  r.tokens.resize(context_size, -1);
  r.tokens.back() = blank_id;

  return r;
}

void OnlineTransducerGreedySearchDecoderRknn::StripLeadingBlanks(
    OnlineTransducerDecoderResultRknn *r) const {
  int32_t context_size = model_->ContextSize();

  auto start = r->tokens.begin() + context_size;
  auto end = r->tokens.end();

  r->tokens = std::vector<int64_t>(start, end);
}

void OnlineTransducerGreedySearchDecoderRknn::Decode(
    std::vector<float> encoder_out,
    OnlineTransducerDecoderResultRknn *result) const {
  auto &r = result[0];
  auto attr = model_->GetEncoderOutAttr();
  int32_t num_frames = attr.dims[1];
  int32_t encoder_out_dim = attr.dims[2];

  int32_t vocab_size = model_->VocabSize();
  int32_t context_size = model_->ContextSize();

  std::vector<int64_t> decoder_input;
  std::vector<float> decoder_out;

  if (r.previous_decoder_out.empty()) {
    decoder_input = {r.tokens.begin() + (r.tokens.size() - context_size),
                     r.tokens.end()};
    decoder_out = model_->RunDecoder(std::move(decoder_input));

  } else {
    decoder_out = std::move(r.previous_decoder_out);
  }

  const float *p_encoder_out = encoder_out.data();
  for (int32_t t = 0; t != num_frames; ++t) {
    auto logit = model_->RunJoiner(p_encoder_out, decoder_out.data());
    p_encoder_out += encoder_out_dim;

    bool emitted = false;
    if (blank_penalty_ > 0.0) {
      logit[0] -= blank_penalty_;  // assuming blank id is 0
    }

    auto y = static_cast<int32_t>(std::distance(
        logit.data(),
        std::max_element(logit.data(), logit.data() + vocab_size)));
    // blank id is hardcoded to 0
    // also, it treats unk as blank
    if (y != 0 && y != unk_id_) {
      emitted = true;
      r.tokens.push_back(y);
      r.timestamps.push_back(t + r.frame_offset);
      r.num_trailing_blanks = 0;
    } else {
      ++r.num_trailing_blanks;
    }

    if (emitted) {
      decoder_input = {r.tokens.begin() + (r.tokens.size() - context_size),
                       r.tokens.end()};
      decoder_out = model_->RunDecoder(std::move(decoder_input));
    }
  }

  r.frame_offset += num_frames;
  r.previous_decoder_out = std::move(decoder_out);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/online-transducer-greedy-search-decoder-rknn.h
================================================
// sherpa-onnx/csrc/rknn/online-transducer-greedy-search-decoder-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_RKNN_H_

#include <vector>

#include "sherpa-onnx/csrc/rknn/online-transducer-decoder-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-transducer-greedy-search-decoder-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.h"

namespace sherpa_onnx {

class OnlineTransducerGreedySearchDecoderRknn
    : public OnlineTransducerDecoderRknn {
 public:
  explicit OnlineTransducerGreedySearchDecoderRknn(
      OnlineZipformerTransducerModelRknn *model, int32_t unk_id = 2,
      float blank_penalty = 0.0)
      : model_(model), unk_id_(unk_id), blank_penalty_(blank_penalty) {}

  OnlineTransducerDecoderResultRknn GetEmptyResult() const override;

  void StripLeadingBlanks(OnlineTransducerDecoderResultRknn *r) const override;

  void Decode(std::vector<float> encoder_out,
              OnlineTransducerDecoderResultRknn *result) const override;

 private:
  OnlineZipformerTransducerModelRknn *model_;  // Not owned
  int32_t unk_id_;
  float blank_penalty_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_GREEDY_SEARCH_DECODER_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/online-transducer-modified-beam-search-decoder-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/online-transducer-modified-beam-search-decoder-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/online-transducer-modified-beam-search-decoder-rknn.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/math.h"

namespace sherpa_onnx {

OnlineTransducerDecoderResultRknn
OnlineTransducerModifiedBeamSearchDecoderRknn::GetEmptyResult() const {
  int32_t context_size = model_->ContextSize();
  int32_t blank_id = 0;  // always 0
  OnlineTransducerDecoderResultRknn r;

  std::vector<int64_t> blanks(context_size, -1);
  blanks.back() = blank_id;

  Hypotheses blank_hyp({{blanks, 0}});
  r.hyps = std::move(blank_hyp);
  r.tokens = std::move(blanks);

  return r;
}

void OnlineTransducerModifiedBeamSearchDecoderRknn::StripLeadingBlanks(
    OnlineTransducerDecoderResultRknn *r) const {
  int32_t context_size = model_->ContextSize();
  auto hyp = r->hyps.GetMostProbable(true);

  std::vector<int64_t> tokens(hyp.ys.begin() + context_size, hyp.ys.end());
  r->tokens = std::move(tokens);
  r->timestamps = std::move(hyp.timestamps);

  r->num_trailing_blanks = hyp.num_trailing_blanks;
}

std::vector<std::vector<float>> GetDecoderOut(
    OnlineZipformerTransducerModelRknn *model, const Hypotheses &hyp_vec) {
  std::vector<std::vector<float>> ans;
  ans.reserve(hyp_vec.Size());

  int32_t context_size = model->ContextSize();
  for (const auto &p : hyp_vec) {
    const auto &hyp = p.second;
    auto start = hyp.ys.begin() + (hyp.ys.size() - context_size);
    auto end = hyp.ys.end();
    auto tokens = std::vector<int64_t>(start, end);
    auto decoder_out = model->RunDecoder(std::move(tokens));

    ans.push_back(std::move(decoder_out));
  }

  return ans;
}

std::vector<std::vector<float>> GetJoinerOutLogSoftmax(
    OnlineZipformerTransducerModelRknn *model, const float *p_encoder_out,
    const std::vector<std::vector<float>> &decoder_out) {
  std::vector<std::vector<float>> ans;
  ans.reserve(decoder_out.size());

  for (const auto &d : decoder_out) {
    auto joiner_out = model->RunJoiner(p_encoder_out, d.data());

    LogSoftmax(joiner_out.data(), joiner_out.size());

    ans.push_back(std::move(joiner_out));
  }
  return ans;
}

void OnlineTransducerModifiedBeamSearchDecoderRknn::Decode(
    std::vector<float> encoder_out,
    OnlineTransducerDecoderResultRknn *result) const {
  auto &r = result[0];
  auto attr = model_->GetEncoderOutAttr();
  int32_t num_frames = attr.dims[1];
  int32_t encoder_out_dim = attr.dims[2];

  int32_t vocab_size = model_->VocabSize();
  int32_t context_size = model_->ContextSize();

  Hypotheses cur = std::move(result->hyps);
  std::vector<Hypothesis> prev;

  auto decoder_out = std::move(result->previous_decoder_out2);
  if (decoder_out.empty()) {
    decoder_out = GetDecoderOut(model_, cur);
  }

  const float *p_encoder_out = encoder_out.data();

  int32_t frame_offset = result->frame_offset;

  for (int32_t t = 0; t != num_frames; ++t) {
    prev = cur.Vec();
    cur.Clear();

    auto log_probs = GetJoinerOutLogSoftmax(model_, p_encoder_out, decoder_out);
    p_encoder_out += encoder_out_dim;

    for (int32_t i = 0; i != prev.size(); ++i) {
      auto log_prob = prev[i].log_prob;
      for (auto &p : log_probs[i]) {
        p += log_prob;
      }
    }

    auto topk = TopkIndex(log_probs, max_active_paths_);
    for (auto k : topk) {
      int32_t hyp_index = k / vocab_size;
      int32_t new_token = k % vocab_size;

      Hypothesis new_hyp = prev[hyp_index];
      new_hyp.log_prob = log_probs[hyp_index][new_token];

      // blank is hardcoded to 0
      // also, it treats unk as blank
      if (new_token != 0 && new_token != unk_id_) {
        new_hyp.ys.push_back(new_token);
        new_hyp.timestamps.push_back(t + frame_offset);
        new_hyp.num_trailing_blanks = 0;

      } else {
        ++new_hyp.num_trailing_blanks;
      }
      cur.Add(std::move(new_hyp));
    }

    decoder_out = GetDecoderOut(model_, cur);
  }

  result->hyps = std::move(cur);
  result->frame_offset += num_frames;
  result->previous_decoder_out2 = std::move(decoder_out);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/online-transducer-modified-beam-search-decoder-rknn.h
================================================
// sherpa-onnx/csrc/rknn/online-transducer-modified-beam-search-decoder-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_RKNN_H_

#include <vector>

#include "sherpa-onnx/csrc/rknn/online-transducer-decoder-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.h"

namespace sherpa_onnx {

class OnlineTransducerModifiedBeamSearchDecoderRknn
    : public OnlineTransducerDecoderRknn {
 public:
  explicit OnlineTransducerModifiedBeamSearchDecoderRknn(
      OnlineZipformerTransducerModelRknn *model, int32_t max_active_paths,
      int32_t unk_id = 2, float blank_penalty = 0.0)
      : model_(model),
        max_active_paths_(max_active_paths),
        unk_id_(unk_id),
        blank_penalty_(blank_penalty) {}

  OnlineTransducerDecoderResultRknn GetEmptyResult() const override;

  void StripLeadingBlanks(OnlineTransducerDecoderResultRknn *r) const override;

  void Decode(std::vector<float> encoder_out,
              OnlineTransducerDecoderResultRknn *result) const override;

 private:
  OnlineZipformerTransducerModelRknn *model_;  // Not owned
  int32_t max_active_paths_;
  int32_t unk_id_;
  float blank_penalty_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/online-zipformer-ctc-model-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/online-zipformer-ctc-model-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/online-zipformer-ctc-model-rknn.h"

#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.h"
#include "sherpa-onnx/csrc/rknn/macros.h"
#include "sherpa-onnx/csrc/rknn/utils.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OnlineZipformerCtcModelRknn::Impl {
 public:
  ~Impl() {
    auto ret = rknn_destroy(ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the context");
    }
  }

  explicit Impl(const OnlineModelConfig &config) : config_(config) {
    auto buf = ReadFile(config.zipformer2_ctc.model);
    Init(buf.data(), buf.size());

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlineModelConfig &config) : config_(config) {
    auto buf = ReadFile(mgr, config.zipformer2_ctc.model);
    Init(buf.data(), buf.size());

    PostInit();
  }

  std::vector<std::vector<uint8_t>> GetInitStates() const {
    // input_attrs_[0] is for the feature
    // input_attrs_[1:] is for states
    // so we use -1 here
    std::vector<std::vector<uint8_t>> states(input_attrs_.size() - 1);

    int32_t i = -1;
    for (auto &attr : input_attrs_) {
      i += 1;
      if (i == 0) {
        // skip processing the attr for features.
        continue;
      }

      if (attr.type == RKNN_TENSOR_FLOAT16) {
        states[i - 1].resize(attr.n_elems * sizeof(float));
      } else if (attr.type == RKNN_TENSOR_INT64) {
        states[i - 1].resize(attr.n_elems * sizeof(int64_t));
      } else {
        SHERPA_ONNX_LOGE("Unsupported tensor type: %d, %s", attr.type,
                         get_type_string(attr.type));
        SHERPA_ONNX_EXIT(-1);
      }
    }

    return states;
  }

  std::pair<std::vector<float>, std::vector<std::vector<uint8_t>>> Run(
      std::vector<float> features, std::vector<std::vector<uint8_t>> states) {
    std::vector<rknn_input> inputs(input_attrs_.size());

    for (int32_t i = 0; i < static_cast<int32_t>(inputs.size()); ++i) {
      auto &input = inputs[i];
      auto &attr = input_attrs_[i];
      input.index = attr.index;

      if (attr.type == RKNN_TENSOR_FLOAT16) {
        input.type = RKNN_TENSOR_FLOAT32;
      } else if (attr.type == RKNN_TENSOR_INT64) {
        input.type = RKNN_TENSOR_INT64;
      } else {
        SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,
                         get_type_string(attr.type));
        SHERPA_ONNX_EXIT(-1);
      }

      input.fmt = attr.fmt;
      if (i == 0) {
        input.buf = reinterpret_cast<void *>(features.data());
        input.size = features.size() * sizeof(float);
      } else {
        input.buf = reinterpret_cast<void *>(states[i - 1].data());
        input.size = states[i - 1].size();
      }
    }

    std::vector<float> out(output_attrs_[0].n_elems);

    // Note(fangjun): We can reuse the memory from input argument `states`
    // auto next_states = GetInitStates();
    auto &next_states = states;

    std::vector<rknn_output> outputs(output_attrs_.size());
    for (int32_t i = 0; i < outputs.size(); ++i) {
      auto &output = outputs[i];
      auto &attr = output_attrs_[i];
      output.index = attr.index;
      output.is_prealloc = 1;

      if (attr.type == RKNN_TENSOR_FLOAT16) {
        output.want_float = 1;
      } else if (attr.type == RKNN_TENSOR_INT64) {
        output.want_float = 0;
      } else {
        SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,
                         get_type_string(attr.type));
        SHERPA_ONNX_EXIT(-1);
      }

      if (i == 0) {
        output.size = out.size() * sizeof(float);
        output.buf = reinterpret_cast<void *>(out.data());
      } else {
        output.size = next_states[i - 1].size();
        output.buf = reinterpret_cast<void *>(next_states[i - 1].data());
      }
    }

    rknn_context ctx = ctx_queue_->Take();

    auto ret = rknn_inputs_set(ctx, inputs.size(), inputs.data());
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set inputs");

    ret = rknn_run(ctx, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the model");

    ret = rknn_outputs_get(ctx, outputs.size(), outputs.data(), nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get model output");

    for (int32_t i = 0; i < next_states.size(); ++i) {
      const auto &attr = input_attrs_[i + 1];
      if (attr.n_dims == 4) {
        // TODO(fangjun): The transpose is copied from
        // https://github.com/airockchip/rknn_model_zoo/blob/main/examples/zipformer/cpp/process.cc#L22
        // I don't understand why we need to do that.
        std::vector<uint8_t> dst(next_states[i].size());
        int32_t n = attr.dims[0];
        int32_t h = attr.dims[1];
        int32_t w = attr.dims[2];
        int32_t c = attr.dims[3];
        ConvertNCHWtoNHWC(
            reinterpret_cast<const float *>(next_states[i].data()), n, c, h, w,
            reinterpret_cast<float *>(dst.data()));
        next_states[i] = std::move(dst);
      }
    }

    ctx_queue_->Put(ctx);

    return {std::move(out), std::move(next_states)};
  }

  int32_t ChunkSize() const { return T_; }

  int32_t ChunkShift() const { return decode_chunk_len_; }

  int32_t VocabSize() const { return vocab_size_; }

  rknn_tensor_attr GetOutAttr() const { return output_attrs_[0]; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &ctx_);

    InitInputOutputAttrs(ctx_, config_.debug, &input_attrs_, &output_attrs_);

    rknn_custom_string custom_string = GetCustomString(ctx_, config_.debug);

    auto meta = Parse(custom_string, config_.debug);

    if (meta.count("T")) {
      T_ = atoi(meta.at("T").c_str());
    }

    if (meta.count("decode_chunk_len")) {
      decode_chunk_len_ = atoi(meta.at("decode_chunk_len").c_str());
    }

    vocab_size_ = output_attrs_[0].dims[2];

    if (config_.debug) {
#if __OHOS__
      SHERPA_ONNX_LOGE("T: %{public}d", T_);
      SHERPA_ONNX_LOGE("decode_chunk_len_: %{public}d", decode_chunk_len_);
      SHERPA_ONNX_LOGE("vocab_size: %{public}d", vocab_size);
#else
      SHERPA_ONNX_LOGE("T: %d", T_);
      SHERPA_ONNX_LOGE("decode_chunk_len_: %d", decode_chunk_len_);
      SHERPA_ONNX_LOGE("vocab_size: %d", vocab_size_);
#endif
    }

    if (T_ == 0) {
      SHERPA_ONNX_LOGE(
          "Invalid T. Please use the script from icefall to export your model");
      SHERPA_ONNX_EXIT(-1);
    }

    if (decode_chunk_len_ == 0) {
      SHERPA_ONNX_LOGE(
          "Invalid decode_chunk_len. Please use the script from icefall to "
          "export your model");
      SHERPA_ONNX_EXIT(-1);
    }
  }

  void PostInit() {
    ctx_queue_ =
        std::make_unique<ContextBlockingQueueRknn>(ctx_, config_.num_threads);
  }

 private:
  OnlineModelConfig config_;
  rknn_context ctx_ = 0;
  std::unique_ptr<ContextBlockingQueueRknn> ctx_queue_;

  std::vector<rknn_tensor_attr> input_attrs_;
  std::vector<rknn_tensor_attr> output_attrs_;

  int32_t T_ = 0;
  int32_t decode_chunk_len_ = 0;
  int32_t vocab_size_ = 0;
};

OnlineZipformerCtcModelRknn::~OnlineZipformerCtcModelRknn() = default;

OnlineZipformerCtcModelRknn::OnlineZipformerCtcModelRknn(
    const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineZipformerCtcModelRknn::OnlineZipformerCtcModelRknn(
    Manager *mgr, const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

std::vector<std::vector<uint8_t>> OnlineZipformerCtcModelRknn::GetInitStates()
    const {
  return impl_->GetInitStates();
}

std::pair<std::vector<float>, std::vector<std::vector<uint8_t>>>
OnlineZipformerCtcModelRknn::Run(
    std::vector<float> features,
    std::vector<std::vector<uint8_t>> states) const {
  return impl_->Run(std::move(features), std::move(states));
}

int32_t OnlineZipformerCtcModelRknn::ChunkSize() const {
  return impl_->ChunkSize();
}

int32_t OnlineZipformerCtcModelRknn::ChunkShift() const {
  return impl_->ChunkShift();
}

int32_t OnlineZipformerCtcModelRknn::VocabSize() const {
  return impl_->VocabSize();
}

rknn_tensor_attr OnlineZipformerCtcModelRknn::GetOutAttr() const {
  return impl_->GetOutAttr();
}

#if __ANDROID_API__ >= 9
template OnlineZipformerCtcModelRknn::OnlineZipformerCtcModelRknn(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineZipformerCtcModelRknn::OnlineZipformerCtcModelRknn(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/online-zipformer-ctc-model-rknn.h
================================================
// sherpa-onnx/csrc/rknn/online-zipformer-ctc-model-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_RKNN_ONLINE_ZIPFORMER_CTC_MODEL_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_ONLINE_ZIPFORMER_CTC_MODEL_RKNN_H_

#include <memory>
#include <utility>
#include <vector>

#include "rknn_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

class OnlineZipformerCtcModelRknn {
 public:
  ~OnlineZipformerCtcModelRknn();

  explicit OnlineZipformerCtcModelRknn(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineZipformerCtcModelRknn(Manager *mgr, const OnlineModelConfig &config);

  std::vector<std::vector<uint8_t>> GetInitStates() const;

  std::pair<std::vector<float>, std::vector<std::vector<uint8_t>>> Run(
      std::vector<float> features,
      std::vector<std::vector<uint8_t>> states) const;

  int32_t ChunkSize() const;

  int32_t ChunkShift() const;

  int32_t VocabSize() const;

  rknn_tensor_attr GetOutAttr() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_ONLINE_ZIPFORMER_CTC_MODEL_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.h"

#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/rknn/context-blocking-queue-rknn.h"
#include "sherpa-onnx/csrc/rknn/macros.h"
#include "sherpa-onnx/csrc/rknn/utils.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class OnlineZipformerTransducerModelRknn::Impl {
 public:
  ~Impl() {
    auto ret = rknn_destroy(encoder_ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the encoder context");
    }

    ret = rknn_destroy(decoder_ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the decoder context");
    }

    ret = rknn_destroy(joiner_ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the joiner context");
    }
  }

  explicit Impl(const OnlineModelConfig &config) : config_(config) {
    {
      auto buf = ReadFile(config.transducer.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.transducer.decoder);
      InitDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(config.transducer.joiner);
      InitJoiner(buf.data(), buf.size());
    }

    PostInit();
  }

  template <typename Manager>
  Impl(Manager *mgr, const OnlineModelConfig &config) : config_(config) {
    {
      auto buf = ReadFile(mgr, config.transducer.encoder);
      InitEncoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.transducer.decoder);
      InitDecoder(buf.data(), buf.size());
    }

    {
      auto buf = ReadFile(mgr, config.transducer.joiner);
      InitJoiner(buf.data(), buf.size());
    }

    PostInit();
  }

  std::vector<std::vector<uint8_t>> GetEncoderInitStates() const {
    // encoder_input_attrs_[0] is for the feature
    // encoder_input_attrs_[1:] is for states
    // so we use -1 here
    std::vector<std::vector<uint8_t>> states(encoder_input_attrs_.size() - 1);

    int32_t i = -1;
    for (auto &attr : encoder_input_attrs_) {
      i += 1;
      if (i == 0) {
        // skip processing the attr for features.
        continue;
      }

      if (attr.type == RKNN_TENSOR_FLOAT16) {
        states[i - 1].resize(attr.n_elems * sizeof(float));
      } else if (attr.type == RKNN_TENSOR_INT64) {
        states[i - 1].resize(attr.n_elems * sizeof(int64_t));
      } else {
        SHERPA_ONNX_LOGE("Unsupported tensor type: %d, %s", attr.type,
                         get_type_string(attr.type));
        SHERPA_ONNX_EXIT(-1);
      }
    }

    return states;
  }

  std::pair<std::vector<float>, std::vector<std::vector<uint8_t>>> RunEncoder(
      std::vector<float> features, std::vector<std::vector<uint8_t>> states) {
    std::vector<rknn_input> inputs(encoder_input_attrs_.size());

    for (int32_t i = 0; i < static_cast<int32_t>(inputs.size()); ++i) {
      auto &input = inputs[i];
      auto &attr = encoder_input_attrs_[i];
      input.index = attr.index;

      if (attr.type == RKNN_TENSOR_FLOAT16) {
        input.type = RKNN_TENSOR_FLOAT32;
      } else if (attr.type == RKNN_TENSOR_INT64) {
        input.type = RKNN_TENSOR_INT64;
      } else {
        SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,
                         get_type_string(attr.type));
        SHERPA_ONNX_EXIT(-1);
      }

      input.fmt = attr.fmt;
      if (i == 0) {
        input.buf = reinterpret_cast<void *>(features.data());
        input.size = features.size() * sizeof(float);
      } else {
        input.buf = reinterpret_cast<void *>(states[i - 1].data());
        input.size = states[i - 1].size();
      }
    }

    std::vector<float> encoder_out(encoder_output_attrs_[0].n_elems);

    // Note(fangjun): We can reuse the memory from input argument `states`
    // auto next_states = GetEncoderInitStates();
    auto &next_states = states;

    std::vector<rknn_output> outputs(encoder_output_attrs_.size());
    for (int32_t i = 0; i < outputs.size(); ++i) {
      auto &output = outputs[i];
      auto &attr = encoder_output_attrs_[i];
      output.index = attr.index;
      output.is_prealloc = 1;

      if (attr.type == RKNN_TENSOR_FLOAT16) {
        output.want_float = 1;
      } else if (attr.type == RKNN_TENSOR_INT64) {
        output.want_float = 0;
      } else {
        SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,
                         get_type_string(attr.type));
        SHERPA_ONNX_EXIT(-1);
      }

      if (i == 0) {
        output.size = encoder_out.size() * sizeof(float);
        output.buf = reinterpret_cast<void *>(encoder_out.data());
      } else {
        output.size = next_states[i - 1].size();
        output.buf = reinterpret_cast<void *>(next_states[i - 1].data());
      }
    }

    rknn_context encoder_ctx = encoder_ctx_queue_->Take();

    auto ret = rknn_inputs_set(encoder_ctx, inputs.size(), inputs.data());
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set encoder inputs");

    ret = rknn_run(encoder_ctx, nullptr);

    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run encoder");

    ret =
        rknn_outputs_get(encoder_ctx, outputs.size(), outputs.data(), nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get encoder output");

    for (int32_t i = 0; i < next_states.size(); ++i) {
      const auto &attr = encoder_input_attrs_[i + 1];
      if (attr.n_dims == 4) {
        // TODO(fangjun): The ConvertNCHWtoNHWC is copied from
        // https://github.com/airockchip/rknn_model_zoo/blob/main/examples/zipformer/cpp/process.cc#L22
        // I don't understand why we need to do that.
        std::vector<uint8_t> dst(next_states[i].size());
        int32_t n = attr.dims[0];
        int32_t h = attr.dims[1];
        int32_t w = attr.dims[2];
        int32_t c = attr.dims[3];
        ConvertNCHWtoNHWC(
            reinterpret_cast<const float *>(next_states[i].data()), n, c, h, w,
            reinterpret_cast<float *>(dst.data()));
        next_states[i] = std::move(dst);
      }
    }

    encoder_ctx_queue_->Put(encoder_ctx);

    return {std::move(encoder_out), std::move(next_states)};
  }

  std::vector<float> RunDecoder(std::vector<int64_t> decoder_input) {
    auto &attr = decoder_input_attrs_[0];
    rknn_input input;

    input.index = 0;
    input.type = RKNN_TENSOR_INT64;
    input.fmt = attr.fmt;
    input.buf = decoder_input.data();
    input.size = decoder_input.size() * sizeof(int64_t);

    std::vector<float> decoder_out(decoder_output_attrs_[0].n_elems);
    rknn_output output;
    output.index = decoder_output_attrs_[0].index;
    output.is_prealloc = 1;
    output.want_float = 1;
    output.size = decoder_out.size() * sizeof(float);
    output.buf = decoder_out.data();

    rknn_context decoder_ctx = decoder_ctx_queue_->Take();

    auto ret = rknn_inputs_set(decoder_ctx, 1, &input);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set decoder inputs");

    ret = rknn_run(decoder_ctx, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run decoder");

    ret = rknn_outputs_get(decoder_ctx, 1, &output, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get decoder output");

    decoder_ctx_queue_->Put(decoder_ctx);

    return decoder_out;
  }

  std::vector<float> RunJoiner(const float *encoder_out,
                               const float *decoder_out) {
    std::vector<rknn_input> inputs(2);
    inputs[0].index = 0;
    inputs[0].type = RKNN_TENSOR_FLOAT32;
    inputs[0].fmt = joiner_input_attrs_[0].fmt;
    inputs[0].buf = const_cast<float *>(encoder_out);
    inputs[0].size = joiner_input_attrs_[0].n_elems * sizeof(float);

    inputs[1].index = 1;
    inputs[1].type = RKNN_TENSOR_FLOAT32;
    inputs[1].fmt = joiner_input_attrs_[1].fmt;
    inputs[1].buf = const_cast<float *>(decoder_out);
    inputs[1].size = joiner_input_attrs_[1].n_elems * sizeof(float);

    std::vector<float> joiner_out(joiner_output_attrs_[0].n_elems);
    rknn_output output;
    output.index = joiner_output_attrs_[0].index;
    output.is_prealloc = 1;
    output.want_float = 1;
    output.size = joiner_out.size() * sizeof(float);
    output.buf = joiner_out.data();

    rknn_context joiner_ctx = joiner_ctx_queue_->Take();

    auto ret = rknn_inputs_set(joiner_ctx, inputs.size(), inputs.data());
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set joiner inputs");

    ret = rknn_run(joiner_ctx, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run joiner");

    ret = rknn_outputs_get(joiner_ctx, 1, &output, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get joiner output");

    joiner_ctx_queue_->Put(joiner_ctx);

    return joiner_out;
  }

  int32_t ContextSize() const { return context_size_; }

  int32_t ChunkSize() const { return T_; }

  int32_t ChunkShift() const { return decode_chunk_len_; }

  int32_t VocabSize() const { return vocab_size_; }

  rknn_tensor_attr GetEncoderOutAttr() const {
    return encoder_output_attrs_[0];
  }

 private:
  void InitEncoder(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &encoder_ctx_);

    InitInputOutputAttrs(encoder_ctx_, config_.debug, &encoder_input_attrs_,
                         &encoder_output_attrs_);

    rknn_custom_string custom_string =
        GetCustomString(encoder_ctx_, config_.debug);

    auto meta = Parse(custom_string, config_.debug);

    if (meta.count("encoder_dims")) {
      SplitStringToIntegers(meta.at("encoder_dims"), ",", false,
                            &encoder_dims_);
    }

    if (meta.count("attention_dims")) {
      SplitStringToIntegers(meta.at("attention_dims"), ",", false,
                            &attention_dims_);
    }

    if (meta.count("num_encoder_layers")) {
      SplitStringToIntegers(meta.at("num_encoder_layers"), ",", false,
                            &num_encoder_layers_);
    }

    if (meta.count("cnn_module_kernels")) {
      SplitStringToIntegers(meta.at("cnn_module_kernels"), ",", false,
                            &cnn_module_kernels_);
    }

    if (meta.count("left_context_len")) {
      SplitStringToIntegers(meta.at("left_context_len"), ",", false,
                            &left_context_len_);
    }

    if (meta.count("T")) {
      T_ = atoi(meta.at("T").c_str());
    }

    if (meta.count("decode_chunk_len")) {
      decode_chunk_len_ = atoi(meta.at("decode_chunk_len").c_str());
    }

    if (meta.count("context_size")) {
      context_size_ = atoi(meta.at("context_size").c_str());
    }

    if (config_.debug) {
      auto print = [](const std::vector<int32_t> &v, const char *name) {
        std::ostringstream os;
        os << name << ": ";
        for (auto i : v) {
          os << i << " ";
        }
#if __OHOS__
        SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
        SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
      };
      print(encoder_dims_, "encoder_dims");
      print(attention_dims_, "attention_dims");
      print(num_encoder_layers_, "num_encoder_layers");
      print(cnn_module_kernels_, "cnn_module_kernels");
      print(left_context_len_, "left_context_len");
#if __OHOS__
      SHERPA_ONNX_LOGE("T: %{public}d", T_);
      SHERPA_ONNX_LOGE("decode_chunk_len_: %{public}d", decode_chunk_len_);
#else
      SHERPA_ONNX_LOGE("T: %d", T_);
      SHERPA_ONNX_LOGE("decode_chunk_len_: %d", decode_chunk_len_);
#endif
    }
  }

  void InitDecoder(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &decoder_ctx_);

    InitInputOutputAttrs(decoder_ctx_, config_.debug, &decoder_input_attrs_,
                         &decoder_output_attrs_);

    if (decoder_input_attrs_[0].type != RKNN_TENSOR_INT64) {
      SHERPA_ONNX_LOGE("Expect int64 for decoder input. Given: %d, %s",
                       decoder_input_attrs_[0].type,
                       get_type_string(decoder_input_attrs_[0].type));
      SHERPA_ONNX_EXIT(-1);
    }

    context_size_ = decoder_input_attrs_[0].dims[1];
    if (config_.debug) {
      SHERPA_ONNX_LOGE("context_size: %d", context_size_);
    }
  }

  void InitJoiner(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &joiner_ctx_);

    InitInputOutputAttrs(joiner_ctx_, config_.debug, &joiner_input_attrs_,
                         &joiner_output_attrs_);

    vocab_size_ = joiner_output_attrs_[0].dims[1];
    if (config_.debug) {
      SHERPA_ONNX_LOGE("vocab_size: %d", vocab_size_);
    }
  }

  void PostInit() {
    encoder_ctx_queue_ = std::make_unique<ContextBlockingQueueRknn>(
        encoder_ctx_, config_.num_threads);
    decoder_ctx_queue_ = std::make_unique<ContextBlockingQueueRknn>(
        decoder_ctx_, config_.num_threads);
    joiner_ctx_queue_ = std::make_unique<ContextBlockingQueueRknn>(
        joiner_ctx_, config_.num_threads);
  }

 private:
  OnlineModelConfig config_;
  rknn_context encoder_ctx_ = 0;
  rknn_context decoder_ctx_ = 0;
  rknn_context joiner_ctx_ = 0;

  std::unique_ptr<ContextBlockingQueueRknn> encoder_ctx_queue_;
  std::unique_ptr<ContextBlockingQueueRknn> decoder_ctx_queue_;
  std::unique_ptr<ContextBlockingQueueRknn> joiner_ctx_queue_;

  std::vector<rknn_tensor_attr> encoder_input_attrs_;
  std::vector<rknn_tensor_attr> encoder_output_attrs_;

  std::vector<rknn_tensor_attr> decoder_input_attrs_;
  std::vector<rknn_tensor_attr> decoder_output_attrs_;

  std::vector<rknn_tensor_attr> joiner_input_attrs_;
  std::vector<rknn_tensor_attr> joiner_output_attrs_;

  std::vector<int32_t> encoder_dims_;
  std::vector<int32_t> attention_dims_;
  std::vector<int32_t> num_encoder_layers_;
  std::vector<int32_t> cnn_module_kernels_;
  std::vector<int32_t> left_context_len_;

  int32_t T_ = 0;
  int32_t decode_chunk_len_ = 0;

  int32_t context_size_ = 2;
  int32_t vocab_size_ = 0;
};

OnlineZipformerTransducerModelRknn::~OnlineZipformerTransducerModelRknn() =
    default;  // NOLINT

OnlineZipformerTransducerModelRknn::OnlineZipformerTransducerModelRknn(
    const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
OnlineZipformerTransducerModelRknn::OnlineZipformerTransducerModelRknn(
    Manager *mgr, const OnlineModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

std::vector<std::vector<uint8_t>>
OnlineZipformerTransducerModelRknn::GetEncoderInitStates() const {
  return impl_->GetEncoderInitStates();
}

std::pair<std::vector<float>, std::vector<std::vector<uint8_t>>>
OnlineZipformerTransducerModelRknn::RunEncoder(
    std::vector<float> features,
    std::vector<std::vector<uint8_t>> states) const {
  return impl_->RunEncoder(std::move(features), std::move(states));
}

std::vector<float> OnlineZipformerTransducerModelRknn::RunDecoder(
    std::vector<int64_t> decoder_input) const {
  return impl_->RunDecoder(std::move(decoder_input));
}

std::vector<float> OnlineZipformerTransducerModelRknn::RunJoiner(
    const float *encoder_out, const float *decoder_out) const {
  return impl_->RunJoiner(encoder_out, decoder_out);
}

int32_t OnlineZipformerTransducerModelRknn::ContextSize() const {
  return impl_->ContextSize();
}

int32_t OnlineZipformerTransducerModelRknn::ChunkSize() const {
  return impl_->ChunkSize();
}

int32_t OnlineZipformerTransducerModelRknn::ChunkShift() const {
  return impl_->ChunkShift();
}

int32_t OnlineZipformerTransducerModelRknn::VocabSize() const {
  return impl_->VocabSize();
}

rknn_tensor_attr OnlineZipformerTransducerModelRknn::GetEncoderOutAttr() const {
  return impl_->GetEncoderOutAttr();
}

#if __ANDROID_API__ >= 9
template OnlineZipformerTransducerModelRknn::OnlineZipformerTransducerModelRknn(
    AAssetManager *mgr, const OnlineModelConfig &config);
#endif

#if __OHOS__
template OnlineZipformerTransducerModelRknn::OnlineZipformerTransducerModelRknn(
    NativeResourceManager *mgr, const OnlineModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.h
================================================
// sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_RKNN_ONLINE_ZIPFORMER_TRANSDUCER_MODEL_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_ONLINE_ZIPFORMER_TRANSDUCER_MODEL_RKNN_H_

#include <memory>
#include <utility>
#include <vector>

#include "rknn_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

// this is for zipformer v1 and v2, i.e., the folder
// pruned_transducer_statelss7_streaming
// and
// zipformer
// from icefall
class OnlineZipformerTransducerModelRknn {
 public:
  ~OnlineZipformerTransducerModelRknn();

  explicit OnlineZipformerTransducerModelRknn(const OnlineModelConfig &config);

  template <typename Manager>
  OnlineZipformerTransducerModelRknn(Manager *mgr,
                                     const OnlineModelConfig &config);

  std::vector<std::vector<uint8_t>> GetEncoderInitStates() const;

  std::pair<std::vector<float>, std::vector<std::vector<uint8_t>>> RunEncoder(
      std::vector<float> features,
      std::vector<std::vector<uint8_t>> states) const;

  std::vector<float> RunDecoder(std::vector<int64_t> decoder_input) const;

  std::vector<float> RunJoiner(const float *encoder_out,
                               const float *decoder_out) const;

  int32_t ContextSize() const;

  int32_t ChunkSize() const;

  int32_t ChunkShift() const;

  int32_t VocabSize() const;

  rknn_tensor_attr GetEncoderOutAttr() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_ONLINE_ZIPFORMER_TRANSDUCER_MODEL_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/silero-vad-model-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/silero-vad-model-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/silero-vad-model-rknn.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/rknn/macros.h"
#include "sherpa-onnx/csrc/rknn/utils.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class SileroVadModelRknn::Impl {
 public:
  ~Impl() {
    auto ret = rknn_destroy(ctx_);
    if (ret != RKNN_SUCC) {
      SHERPA_ONNX_LOGE("Failed to destroy the context");
    }
  }

  explicit Impl(const VadModelConfig &config)
      : config_(config), sample_rate_(config.sample_rate) {
    auto buf = ReadFile(config.silero_vad.model);
    Init(buf.data(), buf.size());

    SetCoreMask(ctx_, config_.num_threads);

    if (sample_rate_ != 16000) {
      SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d",
                       config.sample_rate);
      SHERPA_ONNX_EXIT(-1);
    }

    min_silence_samples_ =
        sample_rate_ * config_.silero_vad.min_silence_duration;

    min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
  }

  template <typename Manager>
  Impl(Manager *mgr, const VadModelConfig &config)
      : config_(config), sample_rate_(config.sample_rate) {
    auto buf = ReadFile(mgr, config.silero_vad.model);
    Init(buf.data(), buf.size());

    SetCoreMask(ctx_, config_.num_threads);

    if (sample_rate_ != 16000) {
      SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d",
                       config.sample_rate);
      exit(-1);
    }

    min_silence_samples_ =
        sample_rate_ * config_.silero_vad.min_silence_duration;

    min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
  }

  void Reset() {
    for (auto &s : states_) {
      std::fill(s.begin(), s.end(), 0);
    }

    triggered_ = false;
    current_sample_ = 0;
    temp_start_ = 0;
    temp_end_ = 0;
  }

  bool IsSpeech(const float *samples, int32_t n) {
    if (n != WindowSize()) {
      SHERPA_ONNX_LOGE("n: %d != window_size: %d", n, WindowSize());
      SHERPA_ONNX_EXIT(-1);
    }

    float prob = Run(samples, n);

    float threshold = config_.silero_vad.threshold;

    current_sample_ += config_.silero_vad.window_size;

    if (prob > threshold && temp_end_ != 0) {
      temp_end_ = 0;
    }

    if (prob > threshold && temp_start_ == 0) {
      // start speaking, but we require that it must satisfy
      // min_speech_duration
      temp_start_ = current_sample_;
      return false;
    }

    if (prob > threshold && temp_start_ != 0 && !triggered_) {
      if (current_sample_ - temp_start_ < min_speech_samples_) {
        return false;
      }

      triggered_ = true;

      return true;
    }

    if ((prob < threshold) && !triggered_) {
      // silence
      temp_start_ = 0;
      temp_end_ = 0;
      return false;
    }

    if ((prob > threshold - 0.15) && triggered_) {
      // speaking
      return true;
    }

    if ((prob > threshold) && !triggered_) {
      // start speaking
      triggered_ = true;

      return true;
    }

    if ((prob < threshold) && triggered_) {
      // stop to speak
      if (temp_end_ == 0) {
        temp_end_ = current_sample_;
      }

      if (current_sample_ - temp_end_ < min_silence_samples_) {
        // continue speaking
        return true;
      }
      // stopped speaking
      temp_start_ = 0;
      temp_end_ = 0;
      triggered_ = false;
      return false;
    }

    return false;
  }

  int32_t WindowShift() const { return config_.silero_vad.window_size; }

  int32_t WindowSize() const {
    return config_.silero_vad.window_size + window_overlap_;
  }

  int32_t MinSilenceDurationSamples() const { return min_silence_samples_; }

  int32_t MinSpeechDurationSamples() const { return min_speech_samples_; }

  void SetMinSilenceDuration(float s) {
    min_silence_samples_ = sample_rate_ * s;
  }

  void SetThreshold(float threshold) {
    config_.silero_vad.threshold = threshold;
  }

  float Run(const float *samples, int32_t n) {
    std::vector<rknn_input> inputs(input_attrs_.size());

    for (int32_t i = 0; i < static_cast<int32_t>(inputs.size()); ++i) {
      auto &input = inputs[i];
      auto &attr = input_attrs_[i];
      input.index = attr.index;

      if (attr.type == RKNN_TENSOR_FLOAT16) {
        input.type = RKNN_TENSOR_FLOAT32;
      } else if (attr.type == RKNN_TENSOR_INT64) {
        input.type = RKNN_TENSOR_INT64;
      } else {
        SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,
                         get_type_string(attr.type));
        SHERPA_ONNX_EXIT(-1);
      }

      input.fmt = attr.fmt;
      if (i == 0) {
        input.buf = reinterpret_cast<void *>(const_cast<float *>(samples));
        input.size = n * sizeof(float);
      } else {
        input.buf = reinterpret_cast<void *>(states_[i - 1].data());
        input.size = states_[i - 1].size() * sizeof(float);
      }
    }

    std::vector<float> out(output_attrs_[0].n_elems);

    auto &next_states = states_;

    std::vector<rknn_output> outputs(output_attrs_.size());

    for (int32_t i = 0; i < outputs.size(); ++i) {
      auto &output = outputs[i];
      auto &attr = output_attrs_[i];
      output.index = attr.index;
      output.is_prealloc = 1;

      if (attr.type == RKNN_TENSOR_FLOAT16) {
        output.want_float = 1;
      } else if (attr.type == RKNN_TENSOR_INT64) {
        output.want_float = 0;
      } else {
        SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,
                         get_type_string(attr.type));
        SHERPA_ONNX_EXIT(-1);
      }

      if (i == 0) {
        output.size = out.size() * sizeof(float);
        output.buf = reinterpret_cast<void *>(out.data());
      } else {
        output.size = next_states[i - 1].size() * sizeof(float);
        output.buf = reinterpret_cast<void *>(next_states[i - 1].data());
      }
    }

    auto ret = rknn_inputs_set(ctx_, inputs.size(), inputs.data());
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set inputs");

    ret = rknn_run(ctx_, nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the model");

    ret = rknn_outputs_get(ctx_, outputs.size(), outputs.data(), nullptr);
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get model output");

    return out[0];
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    InitContext(model_data, model_data_length, config_.debug, &ctx_);

    InitInputOutputAttrs(ctx_, config_.debug, &input_attrs_, &output_attrs_);

    rknn_custom_string custom_string = GetCustomString(ctx_, config_.debug);

    auto meta = Parse(custom_string, config_.debug);

    if (config_.silero_vad.window_size != 512) {
      SHERPA_ONNX_LOGE("we require window_size to be 512. Given: %d",
                       config_.silero_vad.window_size);
      SHERPA_ONNX_EXIT(-1);
    }

    if (config_.debug) {
      for (const auto &p : meta) {
        SHERPA_ONNX_LOGE("%s: %s", p.first.c_str(), p.second.c_str());
      }
    }

    if (meta.count("model_type") == 0) {
      SHERPA_ONNX_LOGE("No model type found in '%s'",
                       config_.silero_vad.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta.at("model_type") != "silero-vad-v4") {
      SHERPA_ONNX_LOGE("Expect model type silero-vad-v4 in '%s', given: '%s'",
                       config_.silero_vad.model.c_str(),
                       meta.at("model_type").c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta.count("sample_rate") == 0) {
      SHERPA_ONNX_LOGE("No sample_rate found in '%s'",
                       config_.silero_vad.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta.at("sample_rate") != "16000") {
      SHERPA_ONNX_LOGE("Expect sample rate 16000 in '%s', given: '%s'",
                       config_.silero_vad.model.c_str(),
                       meta.at("sample_rate").c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta.count("version") == 0) {
      SHERPA_ONNX_LOGE("No version found in '%s'",
                       config_.silero_vad.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta.at("version") != "4") {
      SHERPA_ONNX_LOGE("Expect version 4 in '%s', given: '%s'",
                       config_.silero_vad.model.c_str(),
                       meta.at("version").c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta.count("h_shape") == 0) {
      SHERPA_ONNX_LOGE("No h_shape found in '%s'",
                       config_.silero_vad.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    if (meta.count("c_shape") == 0) {
      SHERPA_ONNX_LOGE("No c_shape found in '%s'",
                       config_.silero_vad.model.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    std::vector<int64_t> h_shape;
    std::vector<int64_t> c_shape;

    SplitStringToIntegers(meta.at("h_shape"), ",", false, &h_shape);
    SplitStringToIntegers(meta.at("c_shape"), ",", false, &c_shape);
    if (h_shape.size() != 3 || c_shape.size() != 3) {
      SHERPA_ONNX_LOGE("Incorrect shape for h (%d) or c (%d)",
                       static_cast<int32_t>(h_shape.size()),
                       static_cast<int32_t>(c_shape.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    states_.resize(2);
    states_[0].resize(h_shape[0] * h_shape[1] * h_shape[2]);
    states_[1].resize(c_shape[0] * c_shape[1] * c_shape[2]);

    Reset();
  }

 private:
  VadModelConfig config_;
  rknn_context ctx_ = 0;

  std::vector<rknn_tensor_attr> input_attrs_;
  std::vector<rknn_tensor_attr> output_attrs_;

  std::vector<std::vector<float>> states_;

  int64_t sample_rate_;
  int32_t min_silence_samples_;
  int32_t min_speech_samples_;

  bool triggered_ = false;
  int32_t current_sample_ = 0;
  int32_t temp_start_ = 0;
  int32_t temp_end_ = 0;

  int32_t window_overlap_ = 0;
};

SileroVadModelRknn::SileroVadModelRknn(const VadModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
SileroVadModelRknn::SileroVadModelRknn(Manager *mgr,
                                       const VadModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

SileroVadModelRknn::~SileroVadModelRknn() = default;

void SileroVadModelRknn::Reset() { return impl_->Reset(); }

bool SileroVadModelRknn::IsSpeech(const float *samples, int32_t n) {
  return impl_->IsSpeech(samples, n);
}

int32_t SileroVadModelRknn::WindowSize() const { return impl_->WindowSize(); }

int32_t SileroVadModelRknn::WindowShift() const { return impl_->WindowShift(); }

int32_t SileroVadModelRknn::MinSilenceDurationSamples() const {
  return impl_->MinSilenceDurationSamples();
}

int32_t SileroVadModelRknn::MinSpeechDurationSamples() const {
  return impl_->MinSpeechDurationSamples();
}

void SileroVadModelRknn::SetMinSilenceDuration(float s) {
  impl_->SetMinSilenceDuration(s);
}

void SileroVadModelRknn::SetThreshold(float threshold) {
  impl_->SetThreshold(threshold);
}

float SileroVadModelRknn::Compute(const float *samples, int32_t n) {
  return impl_->Run(samples, n);
}

#if __ANDROID_API__ >= 9
template SileroVadModelRknn::SileroVadModelRknn(AAssetManager *mgr,
                                                const VadModelConfig &config);
#endif

#if __OHOS__
template SileroVadModelRknn::SileroVadModelRknn(NativeResourceManager *mgr,
                                                const VadModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/silero-vad-model-rknn.h
================================================
// sherpa-onnx/csrc/rknn/silero-vad-model-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_

#include <memory>

#include "rknn_api.h"  // NOLINT
#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/vad-model.h"

namespace sherpa_onnx {

class SileroVadModelRknn : public VadModel {
 public:
  explicit SileroVadModelRknn(const VadModelConfig &config);

  template <typename Manager>
  SileroVadModelRknn(Manager *mgr, const VadModelConfig &config);

  ~SileroVadModelRknn() override;

  // reset the internal model states
  void Reset() override;

  /**
   * @param samples Pointer to a 1-d array containing audio samples.
   *                Each sample should be normalized to the range [-1, 1].
   * @param n Number of samples.
   *
   * @return Return true if speech is detected. Return false otherwise.
   */
  bool IsSpeech(const float *samples, int32_t n) override;
  float Compute(const float *samples, int32_t n) override;

  // For silero vad V4, it is WindowShift().
  int32_t WindowSize() const override;

  // 512
  int32_t WindowShift() const override;

  int32_t MinSilenceDurationSamples() const override;
  int32_t MinSpeechDurationSamples() const override;

  void SetMinSilenceDuration(float s) override;
  void SetThreshold(float threshold) override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/transducer-keyword-decoder-rknn.cc
================================================
// sherpa-onnx/csrc/rknn/transducer-keywords-decoder-rknn.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/transducer-keyword-decoder-rknn.h"

#include <algorithm>
#include <cmath>
#include <cstring>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/log.h"

namespace sherpa_onnx {

TransducerKeywordResult TransducerKeywordDecoderRknn::GetEmptyResult() const {
  int32_t context_size = model_->ContextSize();
  int32_t blank_id = 0;  // always 0
  TransducerKeywordResult r;
  std::vector<int64_t> blanks(context_size, -1);
  blanks.back() = blank_id;

  Hypotheses blank_hyp({{blanks, 0}});
  r.hyps = std::move(blank_hyp);
  return r;
}

std::vector<std::vector<float>> GetDecoderOut(
    OnlineZipformerTransducerModelRknn *model, const Hypotheses &hyp_vec);

std::vector<std::vector<float>> GetJoinerOutLogSoftmax(
    OnlineZipformerTransducerModelRknn *model, const float *p_encoder_out,
    const std::vector<std::vector<float>> &decoder_out);

void TransducerKeywordDecoderRknn::Decode(std::vector<float> encoder_out,
                                          OnlineStreamRknn *s) {
  auto attr = model_->GetEncoderOutAttr();
  int32_t num_frames = attr.dims[1];
  int32_t encoder_out_dim = attr.dims[2];

  int32_t vocab_size = model_->VocabSize();
  int32_t context_size = model_->ContextSize();

  std::vector<int64_t> blanks(context_size, -1);
  blanks.back() = 0;  // blank_id is hardcoded to 0

  auto r = s->GetKeywordResult();

  Hypotheses cur = std::move(r.hyps);
  std::vector<Hypothesis> prev;

  auto decoder_out = GetDecoderOut(model_, cur);

  const float *p_encoder_out = encoder_out.data();

  int32_t frame_offset = r.frame_offset;

  for (int32_t t = 0; t != num_frames; ++t) {
    prev = cur.Vec();
    cur.Clear();

    auto log_probs = GetJoinerOutLogSoftmax(model_, p_encoder_out, decoder_out);

    auto log_probs_old = log_probs;

    p_encoder_out += encoder_out_dim;

    for (int32_t i = 0; i != prev.size(); ++i) {
      auto log_prob = prev[i].log_prob;
      for (auto &p : log_probs[i]) {
        p += log_prob;
      }
    }

    auto topk = TopkIndex(log_probs, max_active_paths_);

    Hypotheses hyps;

    for (auto k : topk) {
      int32_t hyp_index = k / vocab_size;
      int32_t new_token = k % vocab_size;

      Hypothesis new_hyp = prev[hyp_index];
      float context_score = 0;
      auto context_state = new_hyp.context_state;

      // blank is hardcoded to 0
      // also, it treats unk as blank
      if (new_token != 0 && new_token != unk_id_) {
        new_hyp.ys.push_back(new_token);
        new_hyp.timestamps.push_back(t + frame_offset);
        new_hyp.ys_probs.push_back(exp(log_probs_old[hyp_index][new_token]));

        new_hyp.num_trailing_blanks = 0;
        auto context_res =
            s->GetContextGraph()->ForwardOneStep(context_state, new_token);
        context_score = std::get<0>(context_res);
        new_hyp.context_state = std::get<1>(context_res);
        // Start matching from the start state, forget the decoder history.
        if (new_hyp.context_state->token == -1) {
          new_hyp.ys = blanks;
          new_hyp.timestamps.clear();
          new_hyp.ys_probs.clear();
        }
      } else {
        ++new_hyp.num_trailing_blanks;
      }
      new_hyp.log_prob = log_probs[hyp_index][new_token] + context_score;
      hyps.Add(std::move(new_hyp));
    }  // for (auto k : topk)

    auto best_hyp = hyps.GetMostProbable(false);

    auto status = s->GetContextGraph()->IsMatched(best_hyp.context_state);
    bool matched = std::get<0>(status);
    const ContextState *matched_state = std::get<1>(status);

    if (matched) {
      float ys_prob = 0.0;
      for (int32_t i = 0; i < matched_state->level; ++i) {
        ys_prob += best_hyp.ys_probs[i];
      }
      ys_prob /= matched_state->level;
      if (best_hyp.num_trailing_blanks > num_trailing_blanks_ &&
          ys_prob >= matched_state->ac_threshold) {
        r.tokens = {best_hyp.ys.end() - matched_state->level,
                    best_hyp.ys.end()};
        r.timestamps = {best_hyp.timestamps.end() - matched_state->level,
                        best_hyp.timestamps.end()};
        r.keyword = matched_state->phrase;

        hyps = Hypotheses({{blanks, 0, s->GetContextGraph()->Root()}});
      }
    }

    cur = std::move(hyps);
    decoder_out = GetDecoderOut(model_, cur);
  }

  auto best_hyp = cur.GetMostProbable(false);
  r.hyps = std::move(cur);
  r.frame_offset += num_frames;
  r.num_trailing_blanks = best_hyp.num_trailing_blanks;

  s->SetKeywordResult(r);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/transducer-keyword-decoder-rknn.h
================================================
// sherpa-onnx/csrc/rknn/transducer-keywords-decoder-rknn.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_TRANSDUCER_KEYWORD_DECODER_RKNN_H_
#define SHERPA_ONNX_CSRC_RKNN_TRANSDUCER_KEYWORD_DECODER_RKNN_H_

#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/rknn/online-stream-rknn.h"
#include "sherpa-onnx/csrc/rknn/online-zipformer-transducer-model-rknn.h"
#include "sherpa-onnx/csrc/transducer-keyword-decoder.h"

namespace sherpa_onnx {

class TransducerKeywordDecoderRknn {
 public:
  TransducerKeywordDecoderRknn(OnlineZipformerTransducerModelRknn *model,
                               int32_t max_active_paths,
                               int32_t num_trailing_blanks, int32_t unk_id)
      : model_(model),
        max_active_paths_(max_active_paths),
        num_trailing_blanks_(num_trailing_blanks),
        unk_id_(unk_id) {}

  TransducerKeywordResult GetEmptyResult() const;

  void Decode(std::vector<float> encoder_out, OnlineStreamRknn *s);

 private:
  OnlineZipformerTransducerModelRknn *model_;  // Not owned

  int32_t max_active_paths_;
  int32_t num_trailing_blanks_;
  int32_t unk_id_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_TRANSDUCER_KEYWORD_DECODER_RKNN_H_


================================================
FILE: sherpa-onnx/csrc/rknn/utils.cc
================================================
// sherpa-onnx/csrc/utils.cc
//
// Copyright      2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/rknn/utils.h"

#include <string.h>

#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/rknn/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void ConvertNCHWtoNHWC(const float *src, int32_t n, int32_t channel,
                       int32_t height, int32_t width, float *dst) {
  for (int32_t i = 0; i < n; ++i) {
    for (int32_t h = 0; h < height; ++h) {
      for (int32_t w = 0; w < width; ++w) {
        for (int32_t c = 0; c < channel; ++c) {
          // dst[h, w, c] = src[c, h, w]
          dst[i * height * width * channel + h * width * channel + w * channel +
              c] = src[i * height * width * channel + c * height * width +
                       h * width + w];
        }
      }
    }
  }
}

std::string ToString(const rknn_tensor_attr &attr) {
  std::ostringstream os;
  os << "{";
  os << attr.index;
  os << ", name: " << attr.name;
  os << ", shape: (";
  std::string sep;
  for (int32_t i = 0; i < static_cast<int32_t>(attr.n_dims); ++i) {
    os << sep << attr.dims[i];
    sep = ",";
  }
  os << ")";
  os << ", n_elems: " << attr.n_elems;
  os << ", size: " << attr.size;
  os << ", fmt: " << get_format_string(attr.fmt);
  os << ", type: " << get_type_string(attr.type);
  os << ", pass_through: " << (attr.pass_through ? "true" : "false");
  os << "}";
  return os.str();
}

std::unordered_map<std::string, std::string> Parse(
    const rknn_custom_string &custom_string, bool debug /*= false*/) {
  std::unordered_map<std::string, std::string> ans;
  std::vector<std::string> fields;
  SplitStringToVector(custom_string.string, ";", false, &fields);

  std::vector<std::string> tmp;
  for (const auto &f : fields) {
    SplitStringToVector(f, "=", false, &tmp);
    if (tmp.size() != 2) {
      SHERPA_ONNX_LOGE("Invalid custom string %s for %s", custom_string.string,
                       f.c_str());
      SHERPA_ONNX_EXIT(-1);
    }
    ans[std::move(tmp[0])] = std::move(tmp[1]);
  }

  if (debug) {
    for (const auto &p : ans) {
      SHERPA_ONNX_LOGE("%s: %s", p.first.c_str(), p.second.c_str());
    }
  }

  return ans;
}

void InitContext(void *model_data, size_t model_data_length, bool debug,
                 rknn_context *ctx) {
  auto ret = rknn_init(ctx, model_data, model_data_length, 0, nullptr);
  SHERPA_ONNX_RKNN_CHECK(ret, "Failed to init rknn");

  if (debug) {
    rknn_sdk_version v;
    ret = rknn_query(*ctx, RKNN_QUERY_SDK_VERSION, &v, sizeof(v));
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get rknn sdk version");

    SHERPA_ONNX_LOGE("sdk api version: %s, driver version: %s", v.api_version,
                     v.drv_version);
  }
}

void InitInputOutputAttrs(rknn_context ctx, bool debug,
                          std::vector<rknn_tensor_attr> *input_attrs,
                          std::vector<rknn_tensor_attr> *output_attrs) {
  rknn_input_output_num io_num;
  auto ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
  SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get I/O information for the model");

  if (debug) {
    SHERPA_ONNX_LOGE("model: %d inputs, %d outputs",
                     static_cast<int32_t>(io_num.n_input),
                     static_cast<int32_t>(io_num.n_output));
  }

  input_attrs->resize(io_num.n_input);
  output_attrs->resize(io_num.n_output);

  int32_t i = 0;
  for (auto &attr : *input_attrs) {
    memset(&attr, 0, sizeof(attr));
    attr.index = i;
    ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &attr, sizeof(attr));
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get attr for model input %d", i);
    i += 1;
  }

  if (debug) {
    std::ostringstream os;
    std::string sep;
    for (auto &attr : *input_attrs) {
      os << sep << ToString(attr);
      sep = "\n";
    }
    SHERPA_ONNX_LOGE("\n----------Model inputs info----------\n%s",
                     os.str().c_str());
  }

  i = 0;
  for (auto &attr : *output_attrs) {
    memset(&attr, 0, sizeof(attr));
    attr.index = i;
    ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &attr, sizeof(attr));
    SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get attr for model output %d", i);
    i += 1;
  }

  if (debug) {
    std::ostringstream os;
    std::string sep;
    for (auto &attr : *output_attrs) {
      os << sep << ToString(attr);
      sep = "\n";
    }
    SHERPA_ONNX_LOGE("\n----------Model outputs info----------\n%s",
                     os.str().c_str());
  }
}

rknn_custom_string GetCustomString(rknn_context ctx, bool debug) {
  rknn_custom_string custom_string;
  auto ret = rknn_query(ctx, RKNN_QUERY_CUSTOM_STRING, &custom_string,
                        sizeof(custom_string));
  SHERPA_ONNX_RKNN_CHECK(ret, "Failed to read custom string from the model");
  if (debug) {
    SHERPA_ONNX_LOGE("customs string: %s", custom_string.string);
  }
  return custom_string;
}

void SetCoreMask(rknn_context ctx, int32_t num_threads) {
  int32_t ret = RKNN_SUCC;
  switch (num_threads) {
    case 1:
      ret = rknn_set_core_mask(ctx, RKNN_NPU_CORE_AUTO);
      break;
    case 0:
      ret = rknn_set_core_mask(ctx, RKNN_NPU_CORE_0);
      break;
    case -1:
      ret = rknn_set_core_mask(ctx, RKNN_NPU_CORE_1);
      break;
    case -2:
      ret = rknn_set_core_mask(ctx, RKNN_NPU_CORE_2);
      break;
    case -3:
      ret = rknn_set_core_mask(ctx, RKNN_NPU_CORE_0_1);
      break;
    case -4:
      ret = rknn_set_core_mask(ctx, RKNN_NPU_CORE_0_1_2);
      break;
    default:
      SHERPA_ONNX_LOGE(
          "Valid num_threads for rk npu is 1 (auto), 0 (core 0), -1 (core "
          "1), -2 (core 2), -3 (core 0_1), -4 (core 0_1_2). Given: %d",
          num_threads);
      break;
  }
  if (ret != RKNN_SUCC) {
    SHERPA_ONNX_LOGE(
        "Failed to select npu core to run the model (You can ignore it if "
        "you are not using RK3588.");
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/rknn/utils.h
================================================
// sherpa-onnx/csrc/utils.h
//
// Copyright      2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_RKNN_UTILS_H_
#define SHERPA_ONNX_CSRC_RKNN_UTILS_H_

#include <string>
#include <unordered_map>
#include <vector>

#include "rknn_api.h"  // NOLINT

namespace sherpa_onnx {

void ConvertNCHWtoNHWC(const float *src, int32_t n, int32_t channel,
                       int32_t height, int32_t width, float *dst);

std::string ToString(const rknn_tensor_attr &attr);

std::unordered_map<std::string, std::string> Parse(
    const rknn_custom_string &custom_string, bool debug = false);

void InitContext(void *model_data, size_t model_data_length, bool debug,
                 rknn_context *ctx);

void InitInputOutputAttrs(rknn_context ctx, bool debug,
                          std::vector<rknn_tensor_attr> *input_attrs,
                          std::vector<rknn_tensor_attr> *output_attrs);

rknn_custom_string GetCustomString(rknn_context ctx, bool debug);

void SetCoreMask(rknn_context ctx, int32_t num_threads);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_RKNN_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/sentence-piece-tokenizer-test.cc
================================================
// sherpa-onnx/csrc/sentence-piece-tokenizer-test.cc
//
// Copyright (c)  2026  Xiaomi Corporation
#include "sherpa-onnx/csrc/sentence-piece-tokenizer.h"

#include <fstream>
#include <string>
#include <vector>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

static const char dir[] = "/tmp/sherpa-onnx-test-data";

TEST(SpTokenizer, TestEncode) {
  auto vocab_json = std::string(dir) + "/vocab.json";
  auto token_scores_json = std::string(dir) + "/token_scores.json";

  if (!std::ifstream(vocab_json).good() ||
      !std::ifstream(token_scores_json).good()) {
    SHERPA_ONNX_LOGE(
        "No test data found, skipping TestEncode()."
        "You can download the test data from: "
        "https://huggingface.co/csukuangfj/sherpa-onnx-test-data/tree/main"
        "and put it inside "
        "/tmp/sherpa-onnx-test-data");
    return;
  }

  auto sp = SentencePieceTokenizer(vocab_json, token_scores_json);
  std::string text =
      "How are you doing today? Fantastic! How about you? I am OK.";
  std::vector<std::string> expected_tokens = {
      "▁How", "▁are", "▁you",   "▁doing", "▁today", "?",
      "▁F",   "an",   "tastic", "!",      "▁How",   "▁about",
      "▁you", "?",    "▁I",     "▁am",    "▁OK",    "."};

  std::vector<std::string> tokens = sp.EncodeTokens(text);
  EXPECT_EQ(tokens, expected_tokens);

  std::vector<int32_t> expected_ids = {668, 304, 270,  473, 630,  292,
                                       496, 456, 2264, 682, 668,  315,
                                       270, 292, 268,  686, 1183, 263};

  std::vector<int32_t> token_ids = sp.EncodeIds(text);
  EXPECT_EQ(token_ids, expected_ids);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/sentence-piece-tokenizer.cc
================================================
// sherpa-onnx/csrc/sentence-piece-tokenizer.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/sentence-piece-tokenizer.h"

#include <cstdio>
#include <fstream>
#include <limits>
#include <string>
#include <unordered_map>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "nlohmann/json.hpp"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

using json = nlohmann::json;
static constexpr float kNegInf = -1e30f;

static json LoadJson(const std::string &filename) {
  if (filename.empty()) {
    SHERPA_ONNX_LOGE("Empty json filename");
    SHERPA_ONNX_EXIT(-1);
  }
  AssertFileExists(filename);

  std::ifstream is(filename);
  json j;
  is >> j;
  return j;
}

static json LoadJson(const std::vector<char> &buf) {
  if (buf.empty()) {
    SHERPA_ONNX_LOGE("Empty json buffer");
    SHERPA_ONNX_EXIT(-1);
  }
  return json::parse(buf.begin(), buf.end());
}

class SentencePieceTokenizer::Impl {
 public:
  Impl(const std::string &vocab_json, const std::string &token_scores_json) {
    Init(LoadJson(vocab_json), LoadJson(token_scores_json));
  }

  template <typename Manager>
  Impl(Manager *mgr, const std::string &vocab_json,
       const std::string &token_scores_json) {
    Init(LoadJson(ReadFile(mgr, vocab_json)),
         LoadJson(ReadFile(mgr, token_scores_json)));
  }

  std::vector<int32_t> EncodeIds(const std::string &text) const {
    std::vector<int32_t> ids;
    EncodeInternal(text, &ids, nullptr);
    return ids;
  }

  std::vector<std::string> EncodeTokens(const std::string &text) const {
    std::vector<std::string> tokens;
    EncodeInternal(text, nullptr, &tokens);
    return tokens;
  }

 private:
  void Init(const json &vocab, const json &scores) {
    InitVocabJson(vocab);
    InitTokenScores(scores);

    for (int i = 0; i < 256; ++i) {
      byte_token_id_[i] = -1;
      byte_token_score_[i] = kNegInf;
    }

    InitTrie();
  }

  void InitVocabJson(const std::string &filename) {
    InitVocabJson(LoadJson(filename));
  }

  void InitVocabJson(const std::vector<char> &buf) {
    InitVocabJson(LoadJson(buf));
  }

  void InitVocabJson(const json &j) {
    token2id_.reserve(j.size());
    id2token_.resize(j.size());

    for (const auto &item : j.items()) {
      token2id_[item.key()] = item.value();
      id2token_[item.value()] = item.key();
    }
  }

  void InitTokenScores(const std::string &filename) {
    InitTokenScores(LoadJson(filename));
  }

  void InitTokenScores(const std::vector<char> &buf) {
    InitTokenScores(LoadJson(buf));
  }

  void InitTokenScores(const json &j) {
    token2score_.reserve(j.size());

    for (const auto &item : j.items()) {
      token2score_[item.key()] = item.value();
    }
  }

  void InitTrie() {
    trie_.reserve(token2id_.size() * 2);
    trie_.push_back(TrieNode());  // root

    for (const auto &kv : token2id_) {
      const std::string &tok = kv.first;
      int32_t id = kv.second;

      int32_t node = 0;
      for (unsigned char c : tok) {
        auto it = trie_[node].next.find(c);
        if (it == trie_[node].next.end()) {
          int32_t new_node = trie_.size();
          trie_[node].next[c] = new_node;
          trie_.push_back(TrieNode());
          node = new_node;
        } else {
          node = it->second;
        }
      }

      trie_[node].token_id = id;
      trie_[node].score = token2score_[tok];
    }

    // -------------------------
    // Byte fallback
    // -------------------------
    for (int32_t i = 0; i < 256; ++i) {
      char buf[8];
      std::snprintf(buf, sizeof(buf), "<0x%02X>", i);
      std::string tok(buf);

      auto it = token2id_.find(tok);
      if (it == token2id_.end()) {
        SHERPA_ONNX_LOGE("Missing byte token: '%s'", tok.c_str());
        continue;
      }

      byte_token_id_[i] = it->second;
      byte_token_score_[i] = token2score_[tok];
    }
  }

  void EncodeInternal(const std::string &input, std::vector<int32_t> *ids,
                      std::vector<std::string> *tokens) const {
    // SentencePiece whitespace handling
    std::string text;
    text.reserve(input.size() + 8);

    for (char c : input) {
      if (c == ' ')
        text.append("\xE2\x96\x81");  // ▁
      else
        text.push_back(c);
    }

    if (text.rfind("\xE2\x96\x81", 0) == std::string::npos) {
      text.insert(0, "\xE2\x96\x81");
    }

    const int32_t n = static_cast<int32_t>(text.size());
    std::vector<float> dp(n + 1, kNegInf);
    std::vector<int32_t> back(n + 1, -1);
    std::vector<int32_t> back_id(n + 1, -1);

    dp[n] = 0.0f;

    // DP
    for (int32_t i = n - 1; i >= 0; --i) {
      int32_t node = 0;
      for (int32_t j = i; j < n; ++j) {
        unsigned char c = static_cast<unsigned char>(text[j]);
        auto it = trie_[node].next.find(c);
        if (it == trie_[node].next.end()) break;
        node = it->second;

        if (trie_[node].token_id >= 0) {
          float score = trie_[node].score + dp[j + 1];
          if (score > dp[i]) {
            dp[i] = score;
            back[i] = j + 1;
            back_id[i] = trie_[node].token_id;
          }
        }
      }

      // byte fallback
      if (back[i] < 0) {
        unsigned char b = static_cast<unsigned char>(text[i]);
        dp[i] = byte_token_score_[b] + dp[i + 1];
        back[i] = i + 1;
        back_id[i] = byte_token_id_[b];
      }
    }

    // reconstruct
    for (int32_t i = 0; i < n;) {
      int32_t j = back[i];
      int32_t id = back_id[i];
      if (j <= i || id < 0) break;

      if (ids != nullptr) {
        ids->push_back(id);
      }

      if (tokens != nullptr) {
        tokens->push_back(id2token_[id]);
      }

      i = j;
    }
  }

 private:
  struct TrieNode {
    std::unordered_map<unsigned char, int32_t> next;
    int32_t token_id = -1;
    float score = 0.0f;
  };

  std::vector<TrieNode> trie_;  // immutable after build
  std::vector<std::string> id2token_;
  std::unordered_map<std::string, int32_t> token2id_;
  std::unordered_map<std::string, float> token2score_;

  // <0xNN> byte fallback
  int32_t byte_token_id_[256];
  float byte_token_score_[256];
};

SentencePieceTokenizer::SentencePieceTokenizer(
    const std::string &vocab_json, const std::string &token_scores_json)
    : impl_(std::make_unique<Impl>(vocab_json, token_scores_json)) {}

template <typename Manager>
SentencePieceTokenizer::SentencePieceTokenizer(
    Manager *mgr, const std::string &vocab_json,
    const std::string &token_scores_json)
    : impl_(std::make_unique<Impl>(mgr, vocab_json, token_scores_json)) {}

SentencePieceTokenizer::~SentencePieceTokenizer() = default;

std::vector<int32_t> SentencePieceTokenizer::EncodeIds(
    const std::string &text) const {
  return impl_->EncodeIds(text);
}

std::vector<std::string> SentencePieceTokenizer::EncodeTokens(
    const std::string &text) const {
  return impl_->EncodeTokens(text);
}

#if __ANDROID_API__ >= 9
template SentencePieceTokenizer::SentencePieceTokenizer(
    AAssetManager *mgr, const std::string &vocab_json,
    const std::string &token_scores_json);
#endif

#if __OHOS__
template SentencePieceTokenizer::SentencePieceTokenizer(
    NativeResourceManager *mgr, const std::string &vocab_json,
    const std::string &token_scores_json);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/sentence-piece-tokenizer.h
================================================
// sherpa-onnx/csrc/sentence-piece-tokenizer.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SENTENCE_PIECE_TOKENIZER_H_
#define SHERPA_ONNX_CSRC_SENTENCE_PIECE_TOKENIZER_H_

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

class SentencePieceTokenizer {
 public:
  SentencePieceTokenizer(const std::string &vocab_json,
                         const std::string &token_scores_json);

  template <typename Manager>
  SentencePieceTokenizer(Manager *mgr, const std::string &vocab_json,
                         const std::string &token_scores_json);

  ~SentencePieceTokenizer();

  std::vector<int32_t> EncodeIds(const std::string &text) const;
  std::vector<std::string> EncodeTokens(const std::string &text) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SENTENCE_PIECE_TOKENIZER_H_


================================================
FILE: sherpa-onnx/csrc/session.cc
================================================
// sherpa-onnx/csrc/session.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/session.h"

#include <algorithm>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/provider.h"
#if defined(__APPLE__) && (ORT_API_VERSION >= 15) && \
    !defined(SHERPA_ONNX_DISABLE_COREML)
#include "coreml_provider_factory.h"  // NOLINT
#endif

#if __ANDROID_API__ >= 27
#include "nnapi_provider_factory.h"  // NOLINT
#endif

#if defined(_WIN32) && SHERPA_ONNX_ENABLE_DIRECTML == 1
#include "dml_provider_factory.h"  // NOLINT
#endif

#if defined(SHERPA_ONNX_ENABLE_SPACEMIT)
#include "spacemit_ort_env.h"  // NOLINT
#endif

namespace sherpa_onnx {

static void OrtStatusFailure(OrtStatus *status, const char *s) {
  const auto &api = Ort::GetApi();
  const char *msg = api.GetErrorMessage(status);
  SHERPA_ONNX_LOGE(
      "Failed to enable TensorRT : %s."
      "Available providers: %s. Fallback to cuda",
      msg, s);
  api.ReleaseStatus(status);
}

Ort::SessionOptions GetSessionOptionsImpl(
    int32_t num_threads, const std::string &provider_str,
    const ProviderConfig *provider_config /*= nullptr*/) {
  Provider p = StringToProvider(provider_str);

  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(num_threads);

  sess_opts.SetInterOpNumThreads(num_threads);

  std::vector<std::string> available_providers = Ort::GetAvailableProviders();
  std::ostringstream os;
  for (const auto &ep : available_providers) {
    os << ep << ", ";
  }

  // Other possible options
  // sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED);
  // sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE);
  // sess_opts.EnableProfiling("profile");

  // If you want to speed up initialization, please uncomment the following line
  // sess_opts.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL);

  switch (p) {
    case Provider::kCPU:
      break;  // nothing to do for the CPU provider
    case Provider::kXnnpack: {
#if ORT_API_VERSION >= 12
      if (std::find(available_providers.begin(), available_providers.end(),
                    "XnnpackExecutionProvider") != available_providers.end()) {
        sess_opts.AppendExecutionProvider("XNNPACK");
      } else {
        SHERPA_ONNX_LOGE("Available providers: %s. Fallback to cpu!",
                         os.str().c_str());
      }
#else
      SHERPA_ONNX_LOGE(
          "Does not support xnnpack for onnxruntime: %d. Fallback to cpu!",
          static_cast<int32_t>(ORT_API_VERSION));
#endif
      break;
    }
    case Provider::kTRT: {
      if (provider_config == nullptr) {
        SHERPA_ONNX_LOGE(
            "Tensorrt support for Online models only,"
            "Must be extended for offline and others");
        exit(1);
      }
      auto trt_config = provider_config->trt_config;
      struct TrtPairs {
        const char *op_keys;
        const char *op_values;
      };

      auto device_id = std::to_string(provider_config->device);
      auto trt_max_workspace_size =
          std::to_string(trt_config.trt_max_workspace_size);
      auto trt_max_partition_iterations =
          std::to_string(trt_config.trt_max_partition_iterations);
      auto trt_min_subgraph_size =
          std::to_string(trt_config.trt_min_subgraph_size);
      auto trt_fp16_enable = std::to_string(trt_config.trt_fp16_enable);
      auto trt_detailed_build_log =
          std::to_string(trt_config.trt_detailed_build_log);
      auto trt_engine_cache_enable =
          std::to_string(trt_config.trt_engine_cache_enable);
      auto trt_timing_cache_enable =
          std::to_string(trt_config.trt_timing_cache_enable);
      auto trt_dump_subgraphs = std::to_string(trt_config.trt_dump_subgraphs);
      std::vector<TrtPairs> trt_options = {
          {"device_id", device_id.c_str()},
          {"trt_max_workspace_size", trt_max_workspace_size.c_str()},
          {"trt_max_partition_iterations",
           trt_max_partition_iterations.c_str()},
          {"trt_min_subgraph_size", trt_min_subgraph_size.c_str()},
          {"trt_fp16_enable", trt_fp16_enable.c_str()},
          {"trt_detailed_build_log", trt_detailed_build_log.c_str()},
          {"trt_engine_cache_enable", trt_engine_cache_enable.c_str()},
          {"trt_engine_cache_path", trt_config.trt_engine_cache_path.c_str()},
          {"trt_timing_cache_enable", trt_timing_cache_enable.c_str()},
          {"trt_timing_cache_path", trt_config.trt_timing_cache_path.c_str()},
          {"trt_dump_subgraphs", trt_dump_subgraphs.c_str()}};
      // ToDo : Trt configs
      // "trt_int8_enable"
      // "trt_int8_use_native_calibration_table"

      std::vector<const char *> option_keys, option_values;
      for (const TrtPairs &pair : trt_options) {
        option_keys.emplace_back(pair.op_keys);
        option_values.emplace_back(pair.op_values);
      }

      std::vector<std::string> available_providers =
          Ort::GetAvailableProviders();
      if (std::find(available_providers.begin(), available_providers.end(),
                    "TensorrtExecutionProvider") != available_providers.end()) {
        const auto &api = Ort::GetApi();

        OrtTensorRTProviderOptionsV2 *tensorrt_options = nullptr;
        OrtStatus *statusC =
            api.CreateTensorRTProviderOptions(&tensorrt_options);
        OrtStatus *statusU = api.UpdateTensorRTProviderOptions(
            tensorrt_options, option_keys.data(), option_values.data(),
            option_keys.size());
        sess_opts.AppendExecutionProvider_TensorRT_V2(*tensorrt_options);

        if (statusC) {
          OrtStatusFailure(statusC, os.str().c_str());
        }
        if (statusU) {
          OrtStatusFailure(statusU, os.str().c_str());
        }

        api.ReleaseTensorRTProviderOptions(tensorrt_options);
      }
      // break; is omitted here intentionally so that
      // if TRT not available, CUDA will be used
    }
    case Provider::kCUDA: {
      if (std::find(available_providers.begin(), available_providers.end(),
                    "CUDAExecutionProvider") != available_providers.end()) {
        // The CUDA provider is available, proceed with setting the options
        OrtCUDAProviderOptions options;

        if (provider_config != nullptr) {
          options.device_id = provider_config->device;
          options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearch(
              provider_config->cuda_config.cudnn_conv_algo_search);
        } else {
          options.device_id = 0;
          // Default OrtCudnnConvAlgoSearchExhaustive is extremely slow
          options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
          // set more options on need
        }
        sess_opts.AppendExecutionProvider_CUDA(options);
      } else {
        SHERPA_ONNX_LOGE(
            "Please compile with -DSHERPA_ONNX_ENABLE_GPU=ON. Available "
            "providers: %s. Fallback to cpu!",
            os.str().c_str());
      }
      break;
    }
    case Provider::kDirectML: {
#if defined(_WIN32) && SHERPA_ONNX_ENABLE_DIRECTML == 1
      sess_opts.DisableMemPattern();
      sess_opts.SetExecutionMode(ORT_SEQUENTIAL);
      int32_t device_id = 0;
      OrtStatus *status =
          OrtSessionOptionsAppendExecutionProvider_DML(sess_opts, device_id);
      if (status) {
        const auto &api = Ort::GetApi();
        const char *msg = api.GetErrorMessage(status);
        SHERPA_ONNX_LOGE("Failed to enable DirectML: %s. Fallback to cpu", msg);
        api.ReleaseStatus(status);
      }
#else
      SHERPA_ONNX_LOGE("DirectML is for Windows only. Fallback to cpu!");
#endif
      break;
    }
    case Provider::kCoreML: {
#if defined(__APPLE__) && (ORT_API_VERSION >= 15) && \
    !defined(SHERPA_ONNX_DISABLE_COREML)
      uint32_t coreml_flags = 0;
      (void)OrtSessionOptionsAppendExecutionProvider_CoreML(sess_opts,
                                                            coreml_flags);
#else
      SHERPA_ONNX_LOGE(
          "CoreML is for Apple only since onnxruntime>=1.15. Fallback to cpu!");
#endif
      break;
    }
    case Provider::kNNAPI: {
#if __ANDROID_API__ >= 27
      SHERPA_ONNX_LOGE("Current API level %d ", (int32_t)__ANDROID_API__);

      // Please see
      // https://onnxruntime.ai/docs/execution-providers/NNAPI-ExecutionProvider.html#usage
      // to enable different flags
      uint32_t nnapi_flags = 0;
      // nnapi_flags |= NNAPI_FLAG_USE_FP16;
      // nnapi_flags |= NNAPI_FLAG_CPU_DISABLED;
      OrtStatus *status = OrtSessionOptionsAppendExecutionProvider_Nnapi(
          sess_opts, nnapi_flags);

      if (status) {
        const auto &api = Ort::GetApi();
        const char *msg = api.GetErrorMessage(status);
        SHERPA_ONNX_LOGE(
            "Failed to enable NNAPI: %s. Available providers: %s. Fallback to "
            "cpu",
            msg, os.str().c_str());
        api.ReleaseStatus(status);
      } else {
        SHERPA_ONNX_LOGE("Use nnapi");
      }
#elif defined(__ANDROID_API__)
      SHERPA_ONNX_LOGE(
          "Android NNAPI requires API level >= 27. Current API level %d "
          "Fallback to cpu!",
          (int32_t)__ANDROID_API__);
#else
      SHERPA_ONNX_LOGE("NNAPI is for Android only. Fallback to cpu");
#endif
      break;
    }
    case Provider::kSpacemiT: {
#if defined(SHERPA_ONNX_ENABLE_SPACEMIT)
      SHERPA_ONNX_LOGE("Use SpacemiT Execution Provider");
      // when using SpacemiT Execution Provider, set intra_op_num_threads and
      // inter_op_num_threads to 1 can improve performance.
      // all ops run on ep, no need to create multiple threads in onnxruntime.
      // ep will create SPACEMIT_EP_INTRA_THREAD_NUM threads as intra threads.
      std::unordered_map<std::string, std::string> provider_options;
      SHERPA_ONNX_LOGE("Set IntraOpNumThreads to 1");
      sess_opts.SetIntraOpNumThreads(1);
      SHERPA_ONNX_LOGE("Set InterOpNumThreads to 1");
      sess_opts.SetInterOpNumThreads(1);
      SHERPA_ONNX_LOGE("Set SPACEMIT_EP_INTRA_THREAD_NUM to %d", num_threads);
      provider_options.insert(std::make_pair("SPACEMIT_EP_INTRA_THREAD_NUM",
                                             std::to_string(num_threads)));
      OrtStatus *sts =
          Ort::SessionOptionsSpaceMITEnvInit(sess_opts, provider_options);
      if (sts) {
        const auto &api = Ort::GetApi();
        const char *msg = api.GetErrorMessage(sts);
        SHERPA_ONNX_LOGE(
            "Failed to enable SpacemiT Execution Provider: %s. Fallback to cpu",
            msg);
        api.ReleaseStatus(sts);
      }
#else
      SHERPA_ONNX_LOGE(
          "SpacemiT Execution Provider is for SpacemiT AI-CPUs only. Fallback "
          "to cpu!");
#endif
      break;
    }
  }
  return sess_opts;
}

Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config) {
  return GetSessionOptionsImpl(config.num_threads,
                               config.provider_config.provider,
                               &config.provider_config);
}

Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
                                      const std::string &model_type) {
  /*
    Transducer models : Only encoder will run with tensorrt,
                        decoder and joiner will run with cuda
  */
  if (config.provider_config.provider == "trt" &&
      (model_type == "decoder" || model_type == "joiner")) {
    return GetSessionOptionsImpl(config.num_threads, "cuda",
                                 &config.provider_config);
  }
  return GetSessionOptionsImpl(config.num_threads,
                               config.provider_config.provider,
                               &config.provider_config);
}

Ort::SessionOptions GetSessionOptions(const OfflineLMConfig &config) {
  return GetSessionOptionsImpl(config.lm_num_threads, config.lm_provider);
}

Ort::SessionOptions GetSessionOptions(const OnlineLMConfig &config) {
  return GetSessionOptionsImpl(config.lm_num_threads, config.lm_provider);
}

Ort::SessionOptions GetSessionOptions(int32_t num_threads,
                                      const std::string &provider_str) {
  return GetSessionOptionsImpl(num_threads, provider_str);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/session.h
================================================
// sherpa-onnx/csrc/session.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SESSION_H_
#define SHERPA_ONNX_CSRC_SESSION_H_

#include <string>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-lm-config.h"
#include "sherpa-onnx/csrc/online-lm-config.h"
#include "sherpa-onnx/csrc/online-model-config.h"

namespace sherpa_onnx {

Ort::SessionOptions GetSessionOptionsImpl(
    int32_t num_threads, const std::string &provider_str,
    const ProviderConfig *provider_config = nullptr);

Ort::SessionOptions GetSessionOptions(const OfflineLMConfig &config);
Ort::SessionOptions GetSessionOptions(const OnlineLMConfig &config);

Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config);

Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
                                      const std::string &model_type);

Ort::SessionOptions GetSessionOptions(int32_t num_threads,
                                      const std::string &provider_str);

template <typename T>
Ort::SessionOptions GetSessionOptions(const T &config) {
  return GetSessionOptionsImpl(config.num_threads, config.provider);
}

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SESSION_H_


================================================
FILE: sherpa-onnx/csrc/sherpa-display.h
================================================
// sherpa-onnx/csrc/sherpa-display.h
//
// Copyright (c)  2025  Xiaomi Corporation
#pragma once

#include <stdlib.h>

#include <cstdio>
#include <ctime>
#include <iomanip>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

namespace sherpa_onnx {

class SherpaDisplay {
 public:
  void UpdateText(const std::string &text) { current_text_ = text; }

  void FinalizeCurrentSentence() {
    if (!current_text_.empty() &&
        (current_text_[0] != ' ' || current_text_.size() > 1)) {
      sentences_.push_back({GetCurrentDateTime(), std::move(current_text_)});
    }
  }

  void Display() const {
    if (!sentences_.empty() || !current_text_.empty()) {
      ClearScreen();
    }

    printf("=== Speech Recognition with Next-gen Kaldi ===\n");
    printf("------------------------------\n");
    if (!sentences_.empty()) {
      int32_t i = 1;
      for (const auto &p : sentences_) {
        printf("[%s] %d. %s\n", p.first.c_str(), i, p.second.c_str());
        i += 1;
      }

      printf("------------------------------\n");
    }

    if (!current_text_.empty()) {
      printf("Recognizing: %s\n", current_text_.c_str());
    }
  }

 private:
  static void ClearScreen() {
#ifdef _MSC_VER
    auto ret = system("cls");
#else
    auto ret = system("clear");
#endif
    (void)ret;
  }

  static std::string GetCurrentDateTime() {
    std::ostringstream os;
    auto t = std::time(nullptr);
    auto tm = std::localtime(&t);
    os << std::put_time(tm, "%Y-%m-%d %H:%M:%S");
    return os.str();
  }

 private:
  std::vector<std::pair<std::string, std::string>> sentences_;
  std::string current_text_;
};

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-alsa-offline-audio-tagging.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-alsa-offline-audio-tagging.cc
//
// Copyright (c)  2022-2024  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <mutex>  // NOLINT
#include <string>
#include <thread>  // NOLINT
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/macros.h"

enum class State {
  kIdle,
  kRecording,
  kDecoding,
};

State state = State::kIdle;

// true to stop the program and exit
bool stop = false;

std::vector<float> samples;
std::mutex samples_mutex;

static void DetectKeyPress() {
  SHERPA_ONNX_LOGE("Press Enter to start");
  int32_t key;
  while (!stop && (key = getchar())) {
    if (key != 0x0a) {
      continue;
    }

    switch (state) {
      case State::kIdle:
        SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording");
        state = State::kRecording;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          samples.clear();
        }
        break;
      case State::kRecording:
        SHERPA_ONNX_LOGE("Stop recording. Decoding ...");
        state = State::kDecoding;
        break;
      case State::kDecoding:
        break;
    }
  }
}

static void Record(const char *device_name, int32_t expected_sample_rate) {
  sherpa_onnx::Alsa alsa(device_name);

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  int32_t chunk = 0.1 * alsa.GetActualSampleRate();
  while (!stop) {
    const std::vector<float> &s = alsa.Read(chunk);
    std::lock_guard<std::mutex> lock(samples_mutex);
    samples.insert(samples.end(), s.begin(), s.end());
  }
}

static void Handler(int32_t sig) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
Audio tagging from microphone (Linux only).
Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2

./bin/sherpa-onnx-alsa-offline-audio-tagging \
  --zipformer-model=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.onnx \
  --labels=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv \
    device_name

Please refer to
https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
for a list of pre-trained models to download.

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::AudioTaggingConfig config;
  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Please provide only 1 argument: the device name\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  SHERPA_ONNX_LOGE("Creating audio tagger ...");
  sherpa_onnx::AudioTagging tagger(config);
  SHERPA_ONNX_LOGE("Audio tagger created created!");

  std::string device_name = po.GetArg(1);
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  int32_t sample_rate = 16000;  // fixed to 16000Hz for all models from icefall

  std::thread t2(Record, device_name.c_str(), sample_rate);
  using namespace std::chrono_literals;  // NOLINT
  std::this_thread::sleep_for(100ms);    // sleep for 100ms
  std::thread t(DetectKeyPress);

  while (!stop) {
    switch (state) {
      case State::kIdle:
        break;
      case State::kRecording:
        break;
      case State::kDecoding: {
        std::vector<float> buf;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          buf = std::move(samples);
        }
        SHERPA_ONNX_LOGE("Computing...");
        auto s = tagger.CreateStream();
        s->AcceptWaveform(sample_rate, buf.data(), buf.size());
        auto results = tagger.Compute(s.get());
        SHERPA_ONNX_LOGE("Result is:");

        int32_t i = 0;
        std::ostringstream os;
        for (const auto &event : results) {
          os << i << ": " << event.ToString() << "\n";
          i += 1;
        }

        SHERPA_ONNX_LOGE("\n%s\n", os.str().c_str());

        state = State::kIdle;
        SHERPA_ONNX_LOGE("Press Enter to start");
        break;
      }
    }

    std::this_thread::sleep_for(20ms);  // sleep for 20ms
  }
  t.join();
  t2.join();

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-alsa-offline-speaker-identification.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-alsa-offline-speaker-identification.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <fstream>
#include <mutex>  // NOLINT
#include <sstream>
#include <string>
#include <thread>  // NOLINT
#include <unordered_map>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
#include "sherpa-onnx/csrc/wave-reader.h"

enum class State {
  kIdle,
  kRecording,
  kComputing,
};

State state = State::kIdle;

// true to stop the program and exit
bool stop = false;

std::vector<float> samples;
std::mutex samples_mutex;

static void DetectKeyPress() {
  SHERPA_ONNX_LOGE("\nPress Enter to start");
  int32_t key;
  while (!stop && (key = getchar())) {
    if (key != 0x0a) {
      continue;
    }

    switch (state) {
      case State::kIdle:
        SHERPA_ONNX_LOGE("\nStart recording. Press Enter to stop recording");
        state = State::kRecording;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          samples.clear();
        }
        break;
      case State::kRecording:
        SHERPA_ONNX_LOGE("\nStop recording. Computing ...");
        state = State::kComputing;
        break;
      case State::kComputing:
        break;
    }
  }
}

static void Record(const char *device_name, int32_t expected_sample_rate) {
  sherpa_onnx::Alsa alsa(device_name);

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  int32_t chunk = 0.1 * alsa.GetActualSampleRate();
  while (!stop) {
    const std::vector<float> &s = alsa.Read(chunk);
    std::lock_guard<std::mutex> lock(samples_mutex);
    samples.insert(samples.end(), s.begin(), s.end());
  }
}

static void Handler(int32_t sig) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
}

static std::vector<std::vector<float>> ComputeEmbeddings(
    const std::vector<std::string> &filenames,
    sherpa_onnx::SpeakerEmbeddingExtractor *extractor) {
  std::vector<std::vector<float>> embedding_list;
  embedding_list.reserve(filenames.size());

  for (const auto &f : filenames) {
    int32_t sampling_rate = -1;

    bool is_ok = false;
    const std::vector<float> samples =
        sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok);

    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'\n", f.c_str());
      exit(-1);
    }

    auto s = extractor->CreateStream();
    s->AcceptWaveform(sampling_rate, samples.data(), samples.size());
    s->InputFinished();
    auto embedding = extractor->Compute(s.get());
    embedding_list.push_back(embedding);
  }
  return embedding_list;
}

static std::unordered_map<std::string, std::vector<std::string>>
ReadSpeakerFile(const std::string &filename) {
  std::unordered_map<std::string, std::vector<std::string>> ans;

  std::ifstream is(filename);
  if (!is) {
    fprintf(stderr, "Failed to open %s", filename.c_str());
    exit(0);
  }

  std::string line;
  std::string name;
  std::string path;

  while (std::getline(is, line)) {
    std::istringstream iss(line);
    name.clear();
    path.clear();

    iss >> name >> path;
    if (!iss || !iss.eof() || name.empty() || path.empty()) {
      fprintf(stderr, "Invalid line: %s\n", line.c_str());
      exit(-1);
    }
    ans[name].push_back(path);
  }

  return ans;
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program shows how to use non-streaming speaker identification.
Usage:

(1) Prepare a text file containing speaker related files.

Each line in the text file contains two columns. The first column is the
speaker name, while the second column contains the wave file of the speaker.

If the text file contains multiple wave files for the same speaker, then the
embeddings of these files are averaged.

An example text file is given below:

    foo /path/to/a.wav
    bar /path/to/b.wav
    foo /path/to/c.wav
    foobar /path/to/d.wav

Each wave file should contain only a single channel; the sample format
should be int16_t; the sample rate can be arbitrary.

(2) Download a model for computing speaker embeddings

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a model. An example is given below:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx

Note that `zh` means Chinese, while `en` means English.

(3) Run it !

  ./bin/sherpa-onnx-alsa-offline-speaker-identification \
    --model=/path/to/your-model.onnx \
    --speaker-file=/path/to/speaker.txt \
    device_name

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:
  plughw:3,0
as the device_name.

)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  float threshold = 0.5;
  std::string speaker_file;

  po.Register("threshold", &threshold,
              "Threshold for comparing embedding scores.");

  po.Register("speaker-file", &speaker_file, "Path to speaker.txt");

  sherpa_onnx::SpeakerEmbeddingExtractorConfig config;
  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Please provide only 1 argument: the device name\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config! Please use --help to view the usage.\n");
    return -1;
  }

  SHERPA_ONNX_LOGE("\nCreating extractor ...");
  sherpa_onnx::SpeakerEmbeddingExtractor extractor(config);
  SHERPA_ONNX_LOGE("\nextractor created!");

  sherpa_onnx::SpeakerEmbeddingManager manager(extractor.Dim());

  auto name2files = ReadSpeakerFile(speaker_file);
  for (const auto &p : name2files) {
    SHERPA_ONNX_LOGE("\nProcessing speaker %s", p.first.c_str());
    auto embedding_list = ComputeEmbeddings(p.second, &extractor);
    manager.Add(p.first, embedding_list);
  }

  std::string device_name = po.GetArg(1);
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());
  int32_t sample_rate = 16000;

  std::thread t(DetectKeyPress);
  std::thread t2(Record, device_name.c_str(), sample_rate);

  while (!stop) {
    switch (state) {
      case State::kIdle:
        break;
      case State::kRecording:
        break;
      case State::kComputing: {
        std::vector<float> buf;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          buf = std::move(samples);
        }

        auto s = extractor.CreateStream();
        s->AcceptWaveform(sample_rate, buf.data(), buf.size());
        s->InputFinished();
        auto embedding = extractor.Compute(s.get());
        auto name = manager.Search(embedding.data(), threshold);

        if (name.empty()) {
          name = "--Unknown--";
        }

        SHERPA_ONNX_LOGE("\nDone!\nDetected speaker is: %s", name.c_str());

        state = State::kIdle;
        SHERPA_ONNX_LOGE("\nPress Enter to start");
        break;
      }
    }

    using namespace std::chrono_literals;  // NOLINT
    std::this_thread::sleep_for(20ms);     // sleep for 20ms
  }

  t.join();
  t2.join();

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-alsa-offline.cc
//
// Copyright (c)  2022-2024  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <cctype>  // std::tolower
#include <chrono>
#include <mutex>
#include <string>
#include <thread>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"

enum class State {
  kIdle,
  kRecording,
  kDecoding,
};

State state = State::kIdle;

// true to stop the program and exit
bool stop = false;

std::vector<float> samples;
std::mutex samples_mutex;

static void DetectKeyPress() {
  SHERPA_ONNX_LOGE("Press Enter to start");
  int32_t key;
  while (!stop && (key = getchar())) {
    if (key != 0x0a) {
      continue;
    }

    switch (state) {
      case State::kIdle:
        SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording");
        state = State::kRecording;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          samples.clear();
        }
        break;
      case State::kRecording:
        SHERPA_ONNX_LOGE("Stop recording. Decoding ...");
        state = State::kDecoding;
        break;
      case State::kDecoding:
        break;
    }
  }
}

static void Record(const char *device_name, int32_t expected_sample_rate) {
  sherpa_onnx::Alsa alsa(device_name);

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  int32_t chunk = 0.1 * alsa.GetActualSampleRate();
  while (!stop) {
    const std::vector<float> &s = alsa.Read(chunk);
    std::lock_guard<std::mutex> lock(samples_mutex);
    samples.insert(samples.end(), s.begin(), s.end());
  }
}

static void Handler(int32_t sig) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program uses non-streaming models with microphone for speech recognition.
Usage:

(1) Transducer from icefall

  ./bin/sherpa-onnx-alsa-offline \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --num-threads=2 \
    --decoding-method=greedy_search \
    device_name

(2) Paraformer from FunASR

  ./bin/sherpa-onnx-alsa-offline \
    --tokens=/path/to/tokens.txt \
    --paraformer=/path/to/model.onnx \
    --num-threads=1 \
    device_name

(3) Whisper models

  ./bin/sherpa-onnx-alsa-offline \
    --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
    --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
    --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
    --num-threads=1 \
    device_name

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OfflineRecognizerConfig config;
  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Please provide only 1 argument: the device name\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  SHERPA_ONNX_LOGE("Creating recognizer ...");
  sherpa_onnx::OfflineRecognizer recognizer(config);
  SHERPA_ONNX_LOGE("Recognizer created!");

  std::string device_name = po.GetArg(1);
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  int32_t sample_rate = config.feat_config.sampling_rate;

  std::thread t(DetectKeyPress);
  std::thread t2(Record, device_name.c_str(), sample_rate);

  while (!stop) {
    switch (state) {
      case State::kIdle:
        break;
      case State::kRecording:
        break;
      case State::kDecoding: {
        std::vector<float> buf;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          buf = std::move(samples);
        }

        auto s = recognizer.CreateStream();
        s->AcceptWaveform(sample_rate, buf.data(), buf.size());
        recognizer.DecodeStream(s.get());
        SHERPA_ONNX_LOGE("Decoding Done! Result is:");
        SHERPA_ONNX_LOGE("%s", s->GetResult().text.c_str());

        state = State::kIdle;
        SHERPA_ONNX_LOGE("Press Enter to start");
        break;
      }
    }

    using namespace std::chrono_literals;  // NOLINT
    std::this_thread::sleep_for(20ms);     // sleep for 20ms
  }
  t.join();
  t2.join();

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-alsa.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-alsa.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <cctype>  // std::tolower
#include <cstdint>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/parse-options.h"

bool stop = false;

static void Handler(int sig) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
Usage:
  ./bin/sherpa-onnx-alsa \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --provider=cpu \
    --num-threads=2 \
    --decoding-method=greedy_search \
    device_name

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";
  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OnlineRecognizerConfig config;

  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Please provide only 1 argument: the device name\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }
  sherpa_onnx::OnlineRecognizer recognizer(config);

  int32_t expected_sample_rate = config.feat_config.sampling_rate;

  std::string device_name = po.GetArg(1);
  sherpa_onnx::Alsa alsa(device_name.c_str());
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  fprintf(stderr, "Started! Please speak\n");

  int32_t chunk = 0.1 * alsa.GetActualSampleRate();

  std::string last_text;

  auto stream = recognizer.CreateStream();

  sherpa_onnx::Display display;

  int32_t segment_index = 0;
  while (!stop) {
    const std::vector<float> &samples = alsa.Read(chunk);

    stream->AcceptWaveform(expected_sample_rate, samples.data(),
                           samples.size());

    while (recognizer.IsReady(stream.get())) {
      recognizer.DecodeStream(stream.get());
    }

    auto text = recognizer.GetResult(stream.get()).text;

    bool is_endpoint = recognizer.IsEndpoint(stream.get());

    if (is_endpoint && !config.model_config.paraformer.encoder.empty()) {
      // For streaming paraformer models, since it has a large right chunk size
      // we need to pad it on endpointing so that the last character
      // can be recognized
      std::vector<float> tail_paddings(
          static_cast<int>(1.0 * expected_sample_rate));
      stream->AcceptWaveform(expected_sample_rate, tail_paddings.data(),
                             tail_paddings.size());
      while (recognizer.IsReady(stream.get())) {
        recognizer.DecodeStream(stream.get());
      }
      text = recognizer.GetResult(stream.get()).text;
    }

    if (!text.empty() && last_text != text) {
      last_text = text;

      std::transform(text.begin(), text.end(), text.begin(),
                     [](auto c) { return std::tolower(c); });

      display.Print(segment_index, text);
      fflush(stderr);
    }

    if (is_endpoint) {
      if (!text.empty()) {
        ++segment_index;
      }

      recognizer.Reset(stream.get());
    }
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-alsa.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <cstdint>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/keyword-spotter.h"
#include "sherpa-onnx/csrc/parse-options.h"

bool stop = false;

static void Handler(int sig) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
Usage:
  ./bin/sherpa-onnx-keyword-spotter-alsa \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --provider=cpu \
    --num-threads=2 \
    --keywords-file=keywords.txt \
    device_name

Please refer to
https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
for a list of pre-trained models to download.

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";
  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::KeywordSpotterConfig config;

  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Please provide only 1 argument: the device name\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }
  sherpa_onnx::KeywordSpotter spotter(config);

  int32_t expected_sample_rate = config.feat_config.sampling_rate;

  std::string device_name = po.GetArg(1);
  sherpa_onnx::Alsa alsa(device_name.c_str());
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            expected_sample_rate);
    exit(-1);
  }

  int32_t chunk = 0.1 * alsa.GetActualSampleRate();

  std::string last_text;

  auto stream = spotter.CreateStream();

  sherpa_onnx::Display display;

  int32_t keyword_index = 0;
  while (!stop) {
    const std::vector<float> &samples = alsa.Read(chunk);

    stream->AcceptWaveform(expected_sample_rate, samples.data(),
                           samples.size());

    while (spotter.IsReady(stream.get())) {
      spotter.DecodeStream(stream.get());

      const auto r = spotter.GetResult(stream.get());
      if (!r.keyword.empty()) {
        display.Print(keyword_index, r.AsJsonString());
        fflush(stderr);
        keyword_index++;

        spotter.Reset(stream.get());
      }
    }
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-microphone.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-keyword-spotter-microphone.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/keyword-spotter.h"
#include "sherpa-onnx/csrc/microphone.h"

bool stop = false;
float mic_sample_rate = 16000;

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void *user_data) {
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data);

  stream->AcceptWaveform(mic_sample_rate,
                         reinterpret_cast<const float *>(input_buffer),
                         frames_per_buffer);

  return stop ? paComplete : paContinue;
}

static void Handler(int32_t /*sig*/) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program uses streaming models with microphone for keyword spotting.
Usage:

  ./bin/sherpa-onnx-keyword-spotter-microphone \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --provider=cpu \
    --num-threads=1 \
    --keywords-file=keywords.txt

Please refer to
https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::KeywordSpotterConfig config;

  config.Register(&po);
  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  sherpa_onnx::KeywordSpotter spotter(config);
  auto s = spotter.CreateStream();

  sherpa_onnx::Microphone mic;

  int32_t device_index = Pa_GetDefaultInputDevice();
  if (device_index == paNoDevice) {
    fprintf(stderr, "No default input device found\n");
    fprintf(stderr, "If you are using Linux, please switch to \n");
    fprintf(stderr, " ./bin/sherpa-onnx-keyword-spotter-alsa \n");
    exit(EXIT_FAILURE);
  }

  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }

  mic.PrintDevices(device_index);

  const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (pSampleRateStr) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(pSampleRateStr);
  }

  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      s.get())) {
    fprintf(stderr, "portaudio error: %d\n", device_index);
    exit(EXIT_FAILURE);
  }

  int32_t keyword_index = 0;
  sherpa_onnx::Display display;
  while (!stop) {
    while (spotter.IsReady(s.get())) {
      spotter.DecodeStream(s.get());

      const auto r = spotter.GetResult(s.get());
      if (!r.keyword.empty()) {
        display.Print(keyword_index, r.AsJsonString());
        fflush(stderr);
        keyword_index++;

        spotter.Reset(s.get());
      }
    }

    Pa_Sleep(20);  // sleep for 20ms
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-keyword-spotter.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-keyword-spotter.cc
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#include <stdio.h>

#include <chrono>
#include <iomanip>
#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/keyword-spotter.h"
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"

typedef struct {
  std::unique_ptr<sherpa_onnx::OnlineStream> online_stream;
  std::string filename;
} Stream;

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Usage:

(1) Streaming transducer

  ./bin/sherpa-onnx-keyword-spotter \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --provider=cpu \
    --num-threads=2 \
    --keywords-file=keywords.txt \
    /path/to/foo.wav [bar.wav foobar.wav ...]

Note: It supports decoding multiple files in batches

Default value for num_threads is 2.
Valid values for provider: cpu (default), cuda, coreml.
foo.wav should be of single channel, 16-bit PCM encoded wave file; its
sampling rate can be arbitrary and does not need to be 16kHz.

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::KeywordSpotterConfig config;

  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() < 1) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  sherpa_onnx::KeywordSpotter keyword_spotter(config);

  if (po.NumArgs() == 1) {
    const std::string wav_filename = po.GetArg(1);

    int32_t sampling_rate = -1;

    bool is_ok = false;
    const std::vector<float> samples =
        sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);

    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
      return -1;
    }

    auto begin = std::chrono::steady_clock::now();

    auto s = keyword_spotter.CreateStream();
    s->AcceptWaveform(sampling_rate, samples.data(), samples.size());

    std::vector<float> tail_paddings(static_cast<int>(0.8 * sampling_rate));
    // Note: We can call AcceptWaveform() multiple times.
    s->AcceptWaveform(sampling_rate, tail_paddings.data(),
                      tail_paddings.size());

    s->InputFinished();

    while (keyword_spotter.IsReady(s.get())) {
      keyword_spotter.DecodeStream(s.get());

      auto r = keyword_spotter.GetResult(s.get());
      if (!r.keyword.empty()) {
        keyword_spotter.Reset(s.get());

        fprintf(stderr, "%s\n", wav_filename.c_str());
        fprintf(stdout, "%s\n", r.AsJsonString().c_str());
        fprintf(stderr, "\n");
      }
    }

    auto end = std::chrono::steady_clock::now();

    float duration = samples.size() / static_cast<float>(sampling_rate);

    float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
            .count() /
        1000.;
    float rtf = elapsed_seconds / duration;
    fprintf(stderr, "Number of threads: %d\n", config.model_config.num_threads);
    fprintf(stderr, "Audio duration: %.3f s\n", duration);
    fprintf(stderr, "Elapsed seconds: %.3f\n", elapsed_seconds);
    fprintf(stderr, "RTF = %.3f/%.3f = %.3f\n", elapsed_seconds, duration, rtf);

  } else {
    std::vector<Stream> ss;

    for (int32_t i = 1; i <= po.NumArgs(); ++i) {
      const std::string wav_filename = po.GetArg(i);
      int32_t sampling_rate = -1;

      bool is_ok = false;
      const std::vector<float> samples =
          sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);

      if (!is_ok) {
        fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
        return -1;
      }

      auto s = keyword_spotter.CreateStream();
      s->AcceptWaveform(sampling_rate, samples.data(), samples.size());

      std::vector<float> tail_paddings(static_cast<int>(0.8 * sampling_rate));
      // Note: We can call AcceptWaveform() multiple times.
      s->AcceptWaveform(sampling_rate, tail_paddings.data(),
                        tail_paddings.size());

      // Call InputFinished() to indicate that no audio samples are available
      s->InputFinished();
      ss.push_back({std::move(s), wav_filename});
    }

    std::vector<sherpa_onnx::OnlineStream *> ready_streams;
    for (;;) {
      ready_streams.clear();
      for (auto &s : ss) {
        const auto p_ss = s.online_stream.get();
        if (keyword_spotter.IsReady(p_ss)) {
          ready_streams.push_back(p_ss);
        }
        std::ostringstream os;
        const auto r = keyword_spotter.GetResult(p_ss);
        if (!r.keyword.empty()) {
          os << s.filename << "\n";
          fprintf(stderr, "%s", os.str().c_str());
          fprintf(stdout, "%s\n", r.AsJsonString().c_str());
          fprintf(stderr, "\n");
        }
      }

      if (ready_streams.empty()) {
        break;
      }
      keyword_spotter.DecodeStreams(ready_streams.data(), ready_streams.size());
    }
  }
  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-microphone-offline-audio-tagging.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-microphone-offline-audio-tagging.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <cctype>  // std::tolower
#include <mutex>
#include <thread>
#include <utility>
#include <vector>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/microphone.h"

enum class State {
  kIdle,
  kRecording,
  kDecoding,
};

State state = State::kIdle;

// true to stop the program and exit
bool stop = false;

std::vector<float> samples;
std::mutex samples_mutex;

static void DetectKeyPress() {
  SHERPA_ONNX_LOGE("Press Enter to start");
  int32_t key;
  while (!stop && (key = getchar())) {
    if (key != 0x0a) {
      continue;
    }

    switch (state) {
      case State::kIdle:
        SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording");
        state = State::kRecording;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          samples.clear();
        }
        break;
      case State::kRecording:
        SHERPA_ONNX_LOGE("Stop recording. Decoding ...");
        state = State::kDecoding;
        break;
      case State::kDecoding:
        break;
    }
  }
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(samples_mutex);

  auto p = reinterpret_cast<const float *>(input_buffer);
  samples.insert(samples.end(), p, p + frames_per_buffer);

  return stop ? paComplete : paContinue;
}

static void Handler(int32_t /*sig*/) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
Audio tagging from microphone.
Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2

./bin/sherpa-onnx-microphone-offline-audio-tagging \
  --zipformer-model=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.onnx \
  --labels=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv

Please see
https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
for more models.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::AudioTaggingConfig config;
  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    fprintf(stderr, "\nThis program does not support positional arguments\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  SHERPA_ONNX_LOGE("Creating audio tagger ...");
  sherpa_onnx::AudioTagging tagger(config);
  SHERPA_ONNX_LOGE("Audio tagger created created!");

  sherpa_onnx::Microphone mic;

  int32_t device_index = Pa_GetDefaultInputDevice();
  if (device_index == paNoDevice) {
    fprintf(stderr, "No default input device found\n");
    fprintf(stderr, "If you are using Linux, please switch to \n");
    fprintf(stderr, " ./bin/sherpa-onnx-alsa-offline-audio-tagging \n");
    exit(EXIT_FAILURE);
  }

  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }

  mic.PrintDevices(device_index);
  float mic_sample_rate = 16000;
  const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (pSampleRateStr) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(pSampleRateStr);
  }

  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr /* user_data */)) {
    fprintf(stderr, "portaudio error: %d\n", device_index);
    exit(EXIT_FAILURE);
  }

  std::thread t(DetectKeyPress);
  while (!stop) {
    switch (state) {
      case State::kIdle:
        break;
      case State::kRecording:
        break;
      case State::kDecoding: {
        std::vector<float> buf;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          buf = std::move(samples);
        }

        SHERPA_ONNX_LOGE("Computing...");
        auto s = tagger.CreateStream();
        s->AcceptWaveform(mic_sample_rate, buf.data(), buf.size());
        auto results = tagger.Compute(s.get());

        SHERPA_ONNX_LOGE("Result is:");

        int32_t i = 0;
        std::ostringstream os;
        for (const auto &event : results) {
          os << i << ": " << event.ToString() << "\n";
          i += 1;
        }

        SHERPA_ONNX_LOGE("\n%s\n", os.str().c_str());

        state = State::kIdle;
        SHERPA_ONNX_LOGE("Press Enter to start");
        break;
      }
    }

    Pa_Sleep(20);  // sleep for 20ms
  }
  t.join();

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-microphone-offline-speaker-identification.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-microphone-offline-speaker-identification.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <fstream>
#include <mutex>
#include <sstream>
#include <string>
#include <thread>
#include <unordered_map>
#include <utility>
#include <vector>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
#include "sherpa-onnx/csrc/wave-reader.h"

enum class State {
  kIdle,
  kRecording,
  kComputing,
};

State state = State::kIdle;

// true to stop the program and exit
bool stop = false;

std::vector<float> samples;
std::mutex samples_mutex;

static void DetectKeyPress() {
  SHERPA_ONNX_LOGE("\nPress Enter to start");
  int32_t key;
  while (!stop && (key = getchar())) {
    if (key != 0x0a) {
      continue;
    }

    switch (state) {
      case State::kIdle:
        SHERPA_ONNX_LOGE("\nStart recording. Press Enter to stop recording");
        state = State::kRecording;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          samples.clear();
        }
        break;
      case State::kRecording:
        SHERPA_ONNX_LOGE("\nStop recording. Computing ...");
        state = State::kComputing;
        break;
      case State::kComputing:
        break;
    }
  }
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void *user_data) {
  std::lock_guard<std::mutex> lock(samples_mutex);

  auto p = reinterpret_cast<const float *>(input_buffer);
  samples.insert(samples.end(), p, p + frames_per_buffer);

  return stop ? paComplete : paContinue;
}

static void Handler(int32_t sig) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
}

static std::vector<std::vector<float>> ComputeEmbeddings(
    const std::vector<std::string> &filenames,
    sherpa_onnx::SpeakerEmbeddingExtractor *extractor) {
  std::vector<std::vector<float>> embedding_list;
  embedding_list.reserve(filenames.size());

  for (const auto &f : filenames) {
    int32_t sampling_rate = -1;

    bool is_ok = false;
    const std::vector<float> samples =
        sherpa_onnx::ReadWave(f, &sampling_rate, &is_ok);

    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'\n", f.c_str());
      exit(-1);
    }

    auto s = extractor->CreateStream();
    s->AcceptWaveform(sampling_rate, samples.data(), samples.size());
    s->InputFinished();
    auto embedding = extractor->Compute(s.get());
    embedding_list.push_back(embedding);
  }
  return embedding_list;
}

static std::unordered_map<std::string, std::vector<std::string>>
ReadSpeakerFile(const std::string &filename) {
  std::unordered_map<std::string, std::vector<std::string>> ans;

  std::ifstream is(filename);
  if (!is) {
    fprintf(stderr, "Failed to open %s", filename.c_str());
    exit(0);
  }

  std::string line;
  std::string name;
  std::string path;

  while (std::getline(is, line)) {
    std::istringstream iss(line);
    name.clear();
    path.clear();

    iss >> name >> path;
    if (!iss || !iss.eof() || name.empty() || path.empty()) {
      fprintf(stderr, "Invalid line: %s\n", line.c_str());
      exit(-1);
    }
    ans[name].push_back(path);
  }

  return ans;
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program shows how to use non-streaming speaker identification.
Usage:

(1) Prepare a text file containing speaker related files.

Each line in the text file contains two columns. The first column is the
speaker name, while the second column contains the wave file of the speaker.

If the text file contains multiple wave files for the same speaker, then the
embeddings of these files are averaged.

An example text file is given below:

    foo /path/to/a.wav
    bar /path/to/b.wav
    foo /path/to/c.wav
    foobar /path/to/d.wav

Each wave file should contain only a single channel; the sample format
should be int16_t; the sample rate can be arbitrary.

(2) Download a model for computing speaker embeddings

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a model. An example is given below:

    wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx

Note that `zh` means Chinese, while `en` means English.

(3) Run it !

  ./bin/sherpa-onnx-microphone-offline-speaker-identification \
    --model=/path/to/your-model.onnx \
    --speaker-file=/path/to/speaker.txt
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  float threshold = 0.5;
  std::string speaker_file;

  po.Register("threshold", &threshold,
              "Threshold for comparing embedding scores.");

  po.Register("speaker-file", &speaker_file, "Path to speaker.txt");

  sherpa_onnx::SpeakerEmbeddingExtractorConfig config;
  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    fprintf(stderr,
            "This program does not support any positional arguments.\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config! Please use --help to view the usage.\n");
    return -1;
  }

  SHERPA_ONNX_LOGE("\nCreating extractor ...");
  sherpa_onnx::SpeakerEmbeddingExtractor extractor(config);
  SHERPA_ONNX_LOGE("\nextractor created!");

  sherpa_onnx::SpeakerEmbeddingManager manager(extractor.Dim());

  auto name2files = ReadSpeakerFile(speaker_file);
  for (const auto &p : name2files) {
    SHERPA_ONNX_LOGE("\nProcessing speaker %s", p.first.c_str());
    auto embedding_list = ComputeEmbeddings(p.second, &extractor);
    manager.Add(p.first, embedding_list);
  }

  sherpa_onnx::Microphone mic;

  int32_t device_index = Pa_GetDefaultInputDevice();
  if (device_index == paNoDevice) {
    fprintf(stderr, "No default input device found\n");
    fprintf(stderr, "If you are using Linux, please switch to \n");
    fprintf(stderr,
            " ./bin/sherpa-onnx-alsa-offline-speaker-identification \n");
    exit(EXIT_FAILURE);
  }

  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }

  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (pSampleRateStr) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(pSampleRateStr);
  }

  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr /* user_data */)) {
    fprintf(stderr, "portaudio error: %d\n", device_index);
    exit(EXIT_FAILURE);
  }

  std::thread t(DetectKeyPress);
  while (!stop) {
    switch (state) {
      case State::kIdle:
        break;
      case State::kRecording:
        break;
      case State::kComputing: {
        std::vector<float> buf;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          buf = std::move(samples);
        }

        auto s = extractor.CreateStream();
        s->AcceptWaveform(mic_sample_rate, buf.data(), buf.size());
        s->InputFinished();
        auto embedding = extractor.Compute(s.get());
        auto name = manager.Search(embedding.data(), threshold);

        if (name.empty()) {
          name = "--Unknown--";
        }

        SHERPA_ONNX_LOGE("\nDone!\nDetected speaker is: %s", name.c_str());

        state = State::kIdle;
        SHERPA_ONNX_LOGE("\nPress Enter to start");
        break;
      }
    }

    Pa_Sleep(20);  // sleep for 20ms
  }
  t.join();

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-microphone-offline.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-microphone-offline.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <cctype>  // std::tolower
#include <mutex>
#include <thread>
#include <utility>
#include <vector>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"

enum class State {
  kIdle,
  kRecording,
  kDecoding,
};

State state = State::kIdle;

// true to stop the program and exit
bool stop = false;

std::vector<float> samples;
std::mutex samples_mutex;

static void DetectKeyPress() {
  SHERPA_ONNX_LOGE("Press Enter to start");
  int32_t key;
  while (!stop && (key = getchar())) {
    if (key != 0x0a) {
      continue;
    }

    switch (state) {
      case State::kIdle:
        SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording");
        state = State::kRecording;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          samples.clear();
        }
        break;
      case State::kRecording:
        SHERPA_ONNX_LOGE("Stop recording. Decoding ...");
        state = State::kDecoding;
        break;
      case State::kDecoding:
        break;
    }
  }
}

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(samples_mutex);

  auto p = reinterpret_cast<const float *>(input_buffer);
  samples.insert(samples.end(), p, p + frames_per_buffer);

  return stop ? paComplete : paContinue;
}

static void Handler(int32_t /*sig*/) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program uses non-streaming models with microphone for speech recognition.
Usage:

(1) Transducer from icefall

  ./bin/sherpa-onnx-microphone-offline \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --num-threads=2 \
    --decoding-method=greedy_search

(2) Paraformer from FunASR

  ./bin/sherpa-onnx-microphone-offline \
    --tokens=/path/to/tokens.txt \
    --paraformer=/path/to/model.onnx \
    --num-threads=1

(3) Whisper models

  ./bin/sherpa-onnx-microphone-offline \
    --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
    --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
    --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
    --num-threads=1

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OfflineRecognizerConfig config;
  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  SHERPA_ONNX_LOGE("Creating recognizer ...");
  sherpa_onnx::OfflineRecognizer recognizer(config);
  SHERPA_ONNX_LOGE("Recognizer created!");

  sherpa_onnx::Microphone mic;

  int32_t device_index = Pa_GetDefaultInputDevice();
  if (device_index == paNoDevice) {
    fprintf(stderr, "No default input device found\n");
    fprintf(stderr, "If you are using Linux, please switch to \n");
    fprintf(stderr, " ./bin/sherpa-onnx-alsa-offline \n");
    exit(EXIT_FAILURE);
  }

  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }

  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (pSampleRateStr) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(pSampleRateStr);
  }

  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr /* user_data */)) {
    fprintf(stderr, "portaudio error: %d\n", device_index);
    exit(EXIT_FAILURE);
  }

  std::thread t(DetectKeyPress);
  while (!stop) {
    switch (state) {
      case State::kIdle:
        break;
      case State::kRecording:
        break;
      case State::kDecoding: {
        std::vector<float> buf;
        {
          std::lock_guard<std::mutex> lock(samples_mutex);
          buf = std::move(samples);
        }

        auto s = recognizer.CreateStream();
        s->AcceptWaveform(mic_sample_rate, buf.data(), buf.size());
        recognizer.DecodeStream(s.get());
        SHERPA_ONNX_LOGE("Decoding Done! Result is:");
        SHERPA_ONNX_LOGE("%s", s->GetResult().text.c_str());

        state = State::kIdle;
        SHERPA_ONNX_LOGE("Press Enter to start");
        break;
      }
    }

    Pa_Sleep(20);  // sleep for 20ms
  }
  t.join();

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-microphone.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-microphone.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <clocale>
#include <cwctype>
#include <string>
#include <vector>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/display.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/online-recognizer.h"

bool stop = false;
float mic_sample_rate = 16000;

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void *user_data) {
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data);

  stream->AcceptWaveform(mic_sample_rate,
                         reinterpret_cast<const float *>(input_buffer),
                         frames_per_buffer);

  return stop ? paComplete : paContinue;
}

static void Handler(int32_t /*sig*/) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

static std::string tolowerUnicode(const std::string &input_str) {
  // Use system locale
  std::setlocale(LC_ALL, "");

  // From char string to wchar string
  std::wstring input_wstr(input_str.size() + 1, '\0');
  std::mbstowcs(&input_wstr[0], input_str.c_str(), input_str.size());
  std::wstring lowercase_wstr;

  for (wchar_t wc : input_wstr) {
    if (std::iswupper(wc)) {
      lowercase_wstr += std::towlower(wc);
    } else {
      lowercase_wstr += wc;
    }
  }

  // Back to char string
  std::string lowercase_str(input_str.size() + 1, '\0');
  std::wcstombs(&lowercase_str[0], lowercase_wstr.c_str(),
                lowercase_wstr.size());

  return lowercase_str;
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program uses streaming models with microphone for speech recognition.
Usage:

  ./bin/sherpa-onnx-microphone \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --provider=cpu \
    --num-threads=1 \
    --decoding-method=greedy_search

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OnlineRecognizerConfig config;

  config.Register(&po);
  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  sherpa_onnx::OnlineRecognizer recognizer(config);
  auto s = recognizer.CreateStream();

  sherpa_onnx::Microphone mic;

  int32_t device_index = Pa_GetDefaultInputDevice();
  if (device_index == paNoDevice) {
    fprintf(stderr, "No default input device found\n");
    fprintf(stderr, "If you are using Linux, please switch to \n");
    fprintf(stderr, " ./bin/sherpa-onnx-alsa \n");
    exit(EXIT_FAILURE);
  }

  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }

  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (pSampleRateStr) {
    mic_sample_rate = atof(pSampleRateStr);
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
  }

  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      s.get())) {
    fprintf(stderr, "portaudio error: %d\n", device_index);
    exit(EXIT_FAILURE);
  }

  std::string last_text;
  int32_t segment_index = 0;
  sherpa_onnx::Display display(30);
  while (!stop) {
    while (recognizer.IsReady(s.get())) {
      recognizer.DecodeStream(s.get());
    }

    auto text = recognizer.GetResult(s.get()).text;
    bool is_endpoint = recognizer.IsEndpoint(s.get());

    if (is_endpoint && !config.model_config.paraformer.encoder.empty()) {
      // For streaming paraformer models, since it has a large right chunk size
      // we need to pad it on endpointing so that the last character
      // can be recognized
      std::vector<float> tail_paddings(static_cast<int>(1.0 * mic_sample_rate));
      s->AcceptWaveform(mic_sample_rate, tail_paddings.data(),
                        tail_paddings.size());
      while (recognizer.IsReady(s.get())) {
        recognizer.DecodeStream(s.get());
      }
      text = recognizer.GetResult(s.get()).text;
    }

    if (!text.empty() && last_text != text) {
      last_text = text;
      display.Print(segment_index, tolowerUnicode(text));
      fflush(stderr);
    }

    if (is_endpoint) {
      if (!text.empty()) {
        ++segment_index;
      }

      recognizer.Reset(s.get());
    }

    Pa_Sleep(20);  // sleep for 20ms
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-audio-tagging.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-audio-tagging.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <stdio.h>

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/audio-tagging.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"

int32_t main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Audio tagging from a file.

Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2

./bin/sherpa-onnx-offline-audio-tagging \
  --zipformer-model=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.onnx \
  --labels=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv \
  sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/0.wav

Input wave files should be of single channel, 16-bit PCM encoded wave file; its
sampling rate can be arbitrary and does not need to be 16kHz.

Please see
https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
for more models.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::AudioTaggingConfig config;
  config.Register(&po);
  po.Read(argc, argv);

  if (po.NumArgs() != 1) {
    fprintf(stderr, "\nError: Please provide 1 wave file\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  sherpa_onnx::AudioTagging tagger(config);
  std::string wav_filename = po.GetArg(1);

  int32_t sampling_rate = -1;

  bool is_ok = false;
  const std::vector<float> samples =
      sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);

  if (!is_ok) {
    fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
    return -1;
  }

  const float duration = samples.size() / static_cast<float>(sampling_rate);

  fprintf(stderr, "Start to compute\n");
  const auto begin = std::chrono::steady_clock::now();

  auto stream = tagger.CreateStream();

  stream->AcceptWaveform(sampling_rate, samples.data(), samples.size());

  auto results = tagger.Compute(stream.get());
  const auto end = std::chrono::steady_clock::now();
  fprintf(stderr, "Done\n");

  int32_t i = 0;

  for (const auto &event : results) {
    fprintf(stderr, "%d: ", i);
    fprintf(stdout, "%s\n", event.ToString().c_str());
    i += 1;
  }

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Num threads: %d\n", config.model.num_threads);
  fprintf(stderr, "Wave duration: %.3f\n", duration);
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-denoiser.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-denoiser.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include <stdio.h>

#include <chrono>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Non-streaming speech denoising with sherpa-onnx.

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
to download models.

Usage:

(1) Use gtcrn models

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
./bin/sherpa-onnx-offline-denoiser \
  --speech-denoiser-gtcrn-model=gtcrn_simple.onnx \
  --input-wav=input.wav \
  --output-wav=output_16k.wav

(2) Use DPDFNet models at 16 kHz or 48 kHz

# Download DPDFNet models from either:
#   https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
#   https://huggingface.co/Ceva-IP/DPDFNet

./bin/sherpa-onnx-offline-denoiser \
  --speech-denoiser-dpdfnet-model=dpdfnet4.onnx \
  --input-wav=input.wav \
  --output-wav=output_16k.wav

# You can also use other 16 kHz DPDFNet models such as:
#   dpdfnet_baseline.onnx
#   dpdfnet2.onnx
#   dpdfnet8.onnx

./bin/sherpa-onnx-offline-denoiser \
  --speech-denoiser-dpdfnet-model=dpdfnet2_48khz_hr.onnx \
  --input-wav=input.wav \
  --output-wav=output_48k.wav
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OfflineSpeechDenoiserConfig config;
  std::string input_wave;
  std::string output_wave;

  config.Register(&po);
  po.Register("input-wav", &input_wave, "Path to input wav.");
  po.Register("output-wav", &output_wave, "Path to output wav");

  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    fprintf(stderr, "Please don't give positional arguments\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }
  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (input_wave.empty()) {
    fprintf(stderr, "Please provide --input-wav\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (output_wave.empty()) {
    fprintf(stderr, "Please provide --output-wav\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  sherpa_onnx::OfflineSpeechDenoiser denoiser(config);
  int32_t sampling_rate = -1;
  bool is_ok = false;
  std::vector<float> samples =
      sherpa_onnx::ReadWave(input_wave, &sampling_rate, &is_ok);
  if (!is_ok) {
    fprintf(stderr, "Failed to read '%s'\n", input_wave.c_str());
    return -1;
  }

  fprintf(stderr, "Started\n");
  const auto begin = std::chrono::steady_clock::now();
  auto result = denoiser.Run(samples.data(), samples.size(), sampling_rate);
  const auto end = std::chrono::steady_clock::now();

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "Done\n");
  is_ok = sherpa_onnx::WriteWave(output_wave, result.sample_rate,
                                 result.samples.data(), result.samples.size());
  if (is_ok) {
    fprintf(stderr, "Saved to %s\n", output_wave.c_str());
  } else {
    fprintf(stderr, "Failed to save to %s\n", output_wave.c_str());
  }

  float duration = samples.size() / static_cast<float>(sampling_rate);
  fprintf(stderr, "num threads: %d\n", config.model.num_threads);
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-language-identification.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-language-identification.cc
//
// Copyright (c)  2022-2024  Xiaomi Corporation

#include <stdio.h>

#include <chrono>  // NOLINT
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/spoken-language-identification.h"
#include "sherpa-onnx/csrc/wave-reader.h"

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Spoken language identification with sherpa-onnx.

Usage:

(1) Use a whisper multilingual model

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2

We only use the int8.onnx models below.

./bin/sherpa-onnx-offline-spoken-language-identification \
  --whisper-encoder=sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx \
  --whisper-decoder=sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx \
  --num-threads=1 \
  /path/to/foo.wav

foo.wav should be of single channel, 16-bit PCM encoded wave file; its
sampling rate can be arbitrary and does not need to be 16kHz.
You can find test waves for different languages at
https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html
Note that only whisper multilingual models are supported. For instance,
"tiny" is supported but "tiny.en" is not.
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::SpokenLanguageIdentificationConfig config;
  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Error: Please provide 1 wave file.\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  fprintf(stderr, "Creating spoken language identifier ...\n");
  sherpa_onnx::SpokenLanguageIdentification slid(config);

  fprintf(stderr, "Started\n");
  const std::string wav_filename = po.GetArg(1);

  int32_t sampling_rate = -1;
  bool is_ok = false;
  const std::vector<float> samples =
      sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
  if (!is_ok) {
    fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
    return -1;
  }
  float duration = samples.size() / static_cast<float>(sampling_rate);

  const auto begin = std::chrono::steady_clock::now();

  auto s = slid.CreateStream();
  s->AcceptWaveform(sampling_rate, samples.data(), samples.size());

  auto language = slid.Compute(s.get());

  const auto end = std::chrono::steady_clock::now();

  fprintf(stderr, "Done!\n\n");
  fprintf(stderr, "%s\n", wav_filename.c_str());
  fprintf(stderr, "Detected language: ");
  fprintf(stdout, "%s\n", language.c_str());

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "num threads: %d\n", config.num_threads);

  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-parallel.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-parallel.cc
//
// Copyright (c)  2022-2023  cuidc

#include <stdio.h>

#include <atomic>
#include <chrono>
#include <fstream>
#include <mutex>
#include <string>
#include <thread>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"

std::atomic<int> wav_index(0);
std::mutex mtx;

std::vector<std::vector<std::string>> SplitToBatches(
    const std::vector<std::string> &input, int32_t batch_size) {
  std::vector<std::vector<std::string>> outputs;
  auto itr = input.cbegin();
  int32_t process_num = 0;

  while (process_num + batch_size <= static_cast<int32_t>(input.size())) {
    auto chunk_end = itr + batch_size;
    outputs.emplace_back(itr, chunk_end);
    itr = chunk_end;
    process_num += batch_size;
  }
  if (itr != input.cend()) {
    outputs.emplace_back(itr, input.cend());
  }
  return outputs;
}

std::vector<std::string> LoadScpFile(const std::string &wav_scp_path) {
  std::vector<std::string> wav_paths;
  std::ifstream in(wav_scp_path);
  if (!in.is_open()) {
    fprintf(stderr, "Failed to open file: %s.\n", wav_scp_path.c_str());
    return wav_paths;
  }
  std::string line, column1, column2;
  while (std::getline(in, line)) {
    std::istringstream iss(line);
    iss >> column1 >> column2;
    wav_paths.emplace_back(std::move(column2));
  }

  return wav_paths;
}

void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
                  sherpa_onnx::OfflineRecognizer *recognizer,
                  float *total_length, float *total_time) {
  std::vector<std::unique_ptr<sherpa_onnx::OfflineStream>> ss;
  std::vector<sherpa_onnx::OfflineStream *> ss_pointers;
  float duration = 0.0f;
  float elapsed_seconds_batch = 0.0f;

  // warm up
  for (const auto &wav_filename : chunk_wav_paths[0]) {
    int32_t sampling_rate = -1;
    bool is_ok = false;
    const std::vector<float> samples =
        sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
      continue;
    }
    duration += samples.size() / static_cast<float>(sampling_rate);
    auto s = recognizer->CreateStream();
    s->AcceptWaveform(sampling_rate, samples.data(), samples.size());

    ss.push_back(std::move(s));
    ss_pointers.push_back(ss.back().get());
  }
  recognizer->DecodeStreams(ss_pointers.data(), ss_pointers.size());
  ss_pointers.clear();
  ss.clear();

  while (true) {
    int chunk = wav_index.fetch_add(1);
    if (chunk >= static_cast<int32_t>(chunk_wav_paths.size())) {
      break;
    }
    const auto &wav_paths = chunk_wav_paths[chunk];
    const auto begin = std::chrono::steady_clock::now();
    for (const auto &wav_filename : wav_paths) {
      int32_t sampling_rate = -1;
      bool is_ok = false;
      const std::vector<float> samples =
          sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
      if (!is_ok) {
        fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
        continue;
      }
      duration += samples.size() / static_cast<float>(sampling_rate);
      auto s = recognizer->CreateStream();
      s->AcceptWaveform(sampling_rate, samples.data(), samples.size());

      ss.push_back(std::move(s));
      ss_pointers.push_back(ss.back().get());
    }
    recognizer->DecodeStreams(ss_pointers.data(), ss_pointers.size());
    const auto end = std::chrono::steady_clock::now();
    float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
            .count() /
        1000.;
    elapsed_seconds_batch += elapsed_seconds;
    int i = 0;
    for (const auto &wav_filename : wav_paths) {
      fprintf(stderr, "%s\n", wav_filename.c_str());
      fprintf(stdout, "%s\n", ss[i]->GetResult().AsJsonString().c_str());
      fprintf(stderr, "----\n");
      i = i + 1;
    }
    ss_pointers.clear();
    ss.clear();
  }

  {
    std::lock_guard<std::mutex> guard(mtx);
    *total_length += duration;
    if (*total_time < elapsed_seconds_batch) {
      *total_time = elapsed_seconds_batch;
    }
  }
}

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Speech recognition using non-streaming models with sherpa-onnx.

Usage:

(1) Transducer from icefall

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html

  ./bin/sherpa-onnx-offline-parallel \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --num-threads=1 \
    --decoding-method=greedy_search \
    --batch-size=8 \
    --nj=1 \
    --wav-scp=wav.scp

  ./bin/sherpa-onnx-offline-parallel \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --num-threads=1 \
    --decoding-method=greedy_search \
    --batch-size=1 \
    --nj=8 \
    /path/to/foo.wav [bar.wav foobar.wav ...]

(2) Paraformer from FunASR

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html

  ./bin/sherpa-onnx-offline-parallel \
    --tokens=/path/to/tokens.txt \
    --paraformer=/path/to/model.onnx \
    --num-threads=1 \
    --decoding-method=greedy_search \
    /path/to/foo.wav [bar.wav foobar.wav ...]

(3) Whisper models

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html

  ./bin/sherpa-onnx-offline-parallel \
    --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
    --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
    --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
    --num-threads=1 \
    /path/to/foo.wav [bar.wav foobar.wav ...]

(4) NeMo CTC models

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html

  ./bin/sherpa-onnx-offline-parallel \
    --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
    --nemo-ctc-model=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
    --num-threads=2 \
    --decoding-method=greedy_search \
    --debug=false \
    ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
    ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
    ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav

(5) TDNN CTC model for the yesno recipe from icefall

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html
      //
  ./bin/sherpa-onnx-offline-parallel \
    --sample-rate=8000 \
    --feat-dim=23 \
    --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
    --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
    ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
    ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav

Note: It supports decoding multiple files in batches

foo.wav should be of single channel, 16-bit PCM encoded wave file; its
sampling rate can be arbitrary and does not need to be 16kHz.

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";
  std::string wav_scp = "";  // file path, kaldi style wav list.
  int32_t nj = 1;            // thread number
  int32_t batch_size = 1;    // number of wav files processed at once.
  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OfflineRecognizerConfig config;
  config.Register(&po);
  po.Register("wav-scp", &wav_scp,
              "a file including wav-id and wav-path, kaldi style wav list."
              "default="
              ". when it is not empty, wav files which positional "
              "parameters provide are invalid.");
  po.Register("nj", &nj, "multi-thread num for decoding, default=1");
  po.Register("batch-size", &batch_size,
              "number of wav files processed at once during the decoding"
              "process. default=1");

  po.Read(argc, argv);
  if (po.NumArgs() < 1 && wav_scp.empty()) {
    fprintf(stderr, "Error: Please provide at least 1 wave file.\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }
  std::this_thread::sleep_for(std::chrono::seconds(10));  // sleep 10s
  fprintf(stderr, "Creating recognizer ...\n");
  const auto begin = std::chrono::steady_clock::now();
  sherpa_onnx::OfflineRecognizer recognizer(config);
  const auto end = std::chrono::steady_clock::now();
  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  fprintf(stderr,
          "Started nj: %d, batch_size: %d, wav_path: %s. recognizer init time: "
          "%.6f\n",
          nj, batch_size, wav_scp.c_str(), elapsed_seconds);
  std::this_thread::sleep_for(std::chrono::seconds(10));  // sleep 10s
  std::vector<std::string> wav_paths;
  if (!wav_scp.empty()) {
    wav_paths = LoadScpFile(wav_scp);
  } else {
    for (int32_t i = 1; i <= po.NumArgs(); ++i) {
      wav_paths.emplace_back(po.GetArg(i));
    }
  }
  if (wav_paths.empty()) {
    fprintf(stderr, "wav files is empty.\n");
    return -1;
  }
  std::vector<std::thread> threads;
  std::vector<std::vector<std::string>> batch_wav_paths =
      SplitToBatches(wav_paths, batch_size);
  float total_length = 0.0f;
  float total_time = 0.0f;
  for (int i = 0; i < nj; i++) {
    threads.emplace_back(std::thread(AsrInference, batch_wav_paths, &recognizer,
                                     &total_length, &total_time));
  }

  for (auto &thread : threads) {
    thread.join();
  }

  fprintf(stderr, "num threads: %d\n", config.model_config.num_threads);
  fprintf(stderr, "decoding method: %s\n", config.decoding_method.c_str());
  if (config.decoding_method == "modified_beam_search") {
    fprintf(stderr, "max active paths: %d\n", config.max_active_paths);
  }
  fprintf(stderr, "Elapsed seconds: %.3f s\n", total_time);
  float rtf = total_time / total_length;
  fprintf(stderr, "Real time factor (RTF): %.6f / %.6f = %.4f\n", total_time,
          total_length, rtf);
  fprintf(stderr, "SPEEDUP: %.4f\n", 1.0 / rtf);

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-punctuation.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-punctuation.cc
//
// Copyright (c)  2022-2024  Xiaomi Corporation
#include <stdio.h>

#include <chrono>
#include <string>

#include "sherpa-onnx/csrc/offline-punctuation.h"
#include "sherpa-onnx/csrc/parse-options.h"

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Add punctuations to the input text.

The input text can contain both Chinese and English words.

Usage:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2

./bin/sherpa-onnx-offline-punctuation \
  --ct-transformer=./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx
  "你好吗how are you Fantasitic 谢谢我很好你怎么样呢"

The output text should look like below:
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OfflinePunctuationConfig config;
  config.Register(&po);
  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr,
            "Error: Please provide only 1 position argument containing the "
            "input text.\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  fprintf(stderr, "Creating OfflinePunctuation ...\n");
  sherpa_onnx::OfflinePunctuation punct(config);
  fprintf(stderr, "Started\n");
  const auto begin = std::chrono::steady_clock::now();

  std::string text = po.GetArg(1);
  std::string text_with_punct = punct.AddPunctuation(text);
  fprintf(stderr, "Done\n");
  const auto end = std::chrono::steady_clock::now();

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "Num threads: %d\n", config.model.num_threads);
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  fprintf(stderr, "Input text: %s\n", text.c_str());
  fprintf(stderr, "Output text: ");
  fprintf(stdout, "%s\n", text_with_punct.c_str());
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-source-separation.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-source-separation.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include <stdio.h>

#include <chrono>  // NOLINT
#include <string>

#include "sherpa-onnx/csrc/offline-source-separation.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Non-streaming source separation with sherpa-onnx.

Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/source-separation-models
to download models.

Usage:

(1) Use spleeter models

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2
tar xvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/audio_example.wav

./bin/sherpa-onnx-offline-source-separation \
  --spleeter-vocals=sherpa-onnx-spleeter-2stems-fp16/vocals.fp16.onnx \
  --spleeter-accompaniment=sherpa-onnx-spleeter-2stems-fp16/accompaniment.fp16.onnx \
  --input-wav=audio_example.wav \
  --output-vocals-wav=output_vocals.wav \
  --output-accompaniment-wav=output_accompaniment.wav

(2) Use UVR models

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_1_9703.onnx
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/audio_example.wav

./bin/sherpa-onnx-offline-source-separation \
  --uvr-model=./UVR_MDXNET_1_9703.onnx \
  --input-wav=audio_example.wav \
  --output-vocals-wav=output_vocals.wav \
  --output-accompaniment-wav=output_accompaniment.wav
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OfflineSourceSeparationConfig config;

  std::string input_wave;
  std::string output_vocals_wave;
  std::string output_accompaniment_wave;

  config.Register(&po);
  po.Register("input-wav", &input_wave, "Path to input wav.");
  po.Register("output-vocals-wav", &output_vocals_wave,
              "Path to output vocals wav");
  po.Register("output-accompaniment-wav", &output_accompaniment_wave,
              "Path to output accompaniment wav");

  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    fprintf(stderr, "Please don't give positional arguments\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }
  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (input_wave.empty()) {
    fprintf(stderr, "Please provide --input-wav\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (output_vocals_wave.empty()) {
    fprintf(stderr, "Please provide --output-vocals-wav\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (output_accompaniment_wave.empty()) {
    fprintf(stderr, "Please provide --output-accompaniment-wav\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    exit(EXIT_FAILURE);
  }

  bool is_ok = false;
  sherpa_onnx::OfflineSourceSeparationInput input;
  input.samples.data =
      sherpa_onnx::ReadWaveMultiChannel(input_wave, &input.sample_rate, &is_ok);
  if (!is_ok) {
    fprintf(stderr, "Failed to read '%s'\n", input_wave.c_str());
    return -1;
  }

  fprintf(stderr, "Started\n");

  sherpa_onnx::OfflineSourceSeparation sp(config);

  const auto begin = std::chrono::steady_clock::now();
  auto output = sp.Process(input);
  const auto end = std::chrono::steady_clock::now();

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  is_ok = sherpa_onnx::WriteWave(
      output_vocals_wave, output.sample_rate, output.stems[0].data[0].data(),
      output.stems[0].data[1].data(), output.stems[0].data[0].size());

  if (!is_ok) {
    fprintf(stderr, "Failed to write to '%s'\n", output_vocals_wave.c_str());
    exit(EXIT_FAILURE);
  }

  is_ok = sherpa_onnx::WriteWave(output_accompaniment_wave, output.sample_rate,
                                 output.stems[1].data[0].data(),
                                 output.stems[1].data[1].data(),
                                 output.stems[1].data[0].size());

  if (!is_ok) {
    fprintf(stderr, "Failed to write to '%s'\n",
            output_accompaniment_wave.c_str());
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "Done\n");
  fprintf(stderr, "Saved to write to '%s' and '%s'\n",
          output_vocals_wave.c_str(), output_accompaniment_wave.c_str());

  float duration =
      input.samples.data[0].size() / static_cast<float>(input.sample_rate);
  fprintf(stderr, "num threads: %d\n", config.model.num_threads);
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-speaker-diarization.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-speaker-diarization.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <cstdio>
#include <iostream>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"

static int32_t ProgressCallback(int32_t processed_chunks, int32_t num_chunks,
                                void *) {
  float progress = 100.0 * processed_chunks / num_chunks;
  fprintf(stderr, "progress %.2f%%\n", progress);

  // the return value is currently ignored
  return 0;
}

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Offline/Non-streaming speaker diarization with sherpa-onnx
Usage example:

Step 1: Download a speaker segmentation model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

Step 2: Download a speaker embedding extractor model

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

Step 3. Download test wave files

Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

Step 4. Build sherpa-onnx

Step 5. Run it

  ./bin/sherpa-onnx-offline-speaker-diarization \
    --clustering.num-clusters=4 \
    --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
    --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
    ./0-four-speakers-zh.wav

Since we know that there are four speakers in the test wave file, we use
--clustering.num-clusters=4 in the above example.

If we don't know number of speakers in the given wave file, we can use
the argument --clustering.cluster-threshold. The following is an example:

  ./bin/sherpa-onnx-offline-speaker-diarization \
    --clustering.cluster-threshold=0.90 \
    --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
    --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
    ./0-four-speakers-zh.wav

A larger threshold leads to few clusters, i.e., few speakers;
a smaller threshold leads to more clusters, i.e., more speakers
  )usage";
  sherpa_onnx::OfflineSpeakerDiarizationConfig config;
  sherpa_onnx::ParseOptions po(kUsageMessage);
  config.Register(&po);
  po.Read(argc, argv);

  std::cout << config.ToString() << "\n";

  if (!config.Validate()) {
    po.PrintUsage();
    std::cerr << "Errors in config!\n";
    return -1;
  }

  if (po.NumArgs() != 1) {
    std::cerr << "Error: Please provide exactly 1 wave file.\n\n";
    po.PrintUsage();
    return -1;
  }

  sherpa_onnx::OfflineSpeakerDiarization sd(config);

  std::cout << "Started\n";
  const auto begin = std::chrono::steady_clock::now();
  const std::string wav_filename = po.GetArg(1);
  int32_t sample_rate = -1;
  bool is_ok = false;
  const std::vector<float> samples =
      sherpa_onnx::ReadWave(wav_filename, &sample_rate, &is_ok);
  if (!is_ok) {
    std::cerr << "Failed to read " << wav_filename.c_str() << "\n";
    return -1;
  }

  if (sample_rate != sd.SampleRate()) {
    std::cerr << "Expect sample rate " << sd.SampleRate()
              << ". Given: " << sample_rate << "\n";
    return -1;
  }

  float duration = samples.size() / static_cast<float>(sample_rate);

  auto result =
      sd.Process(samples.data(), samples.size(), ProgressCallback, nullptr)
          .SortByStartTime();

  for (const auto &r : result) {
    std::cout << r.ToString() << "\n";
  }

  const auto end = std::chrono::steady_clock::now();
  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "Duration : %.3f s\n", duration);
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-tts-play-alsa.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-tts-play-alsa.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

// see https://www.alsa-project.org/alsa-doc/alsa-lib/group___p_c_m.html
// https://www.alsa-project.org/alsa-doc/alsa-lib/group___p_c_m___h_w___params.html
// https://www.alsa-project.org/alsa-doc/alsa-lib/group___p_c_m.html

#include <signal.h>

#include <algorithm>
#include <chrono>              // NOLINT
#include <condition_variable>  // NOLINT
#include <cstdio>
#include <fstream>
#include <mutex>  // NOLINT
#include <queue>
#include <string>
#include <thread>  // NOLINT
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/alsa-play.h"
#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"

static std::condition_variable g_cv;
static std::mutex g_cv_m;

struct Buffer {
  std::queue<std::vector<float>> samples;
  std::mutex mutex;
};

static Buffer g_buffer;

static bool g_stopped = false;
static bool g_killed = false;

static void Handler(int32_t /*sig*/) {
  if (g_killed) {
    exit(0);
  }

  g_killed = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
}

static int32_t AudioGeneratedCallback(const float *s, int32_t n,
                                      float /*progress*/) {
  if (n > 0) {
    std::lock_guard<std::mutex> lock(g_buffer.mutex);
    g_buffer.samples.push({s, s + n});
    g_cv.notify_all();
  }

  if (g_killed) {
    return 0;  // stop generating
  }

  // continue generating
  return 1;
}

static void StartPlayback(const std::string &device_name, int32_t sample_rate) {
  sherpa_onnx::AlsaPlay alsa(device_name.c_str(), sample_rate);

  std::unique_lock<std::mutex> lock(g_cv_m);
  while (!g_killed && !g_stopped) {
    while (!g_buffer.samples.empty()) {
      auto &p = g_buffer.samples.front();
      alsa.Play(p);
      g_buffer.samples.pop();
    }

    g_cv.wait(lock);
  }

  if (g_killed) {
    return;
  }

  if (g_stopped) {
    while (!g_buffer.samples.empty()) {
      auto &p = g_buffer.samples.front();
      alsa.Play(p);
      g_buffer.samples.pop();
    }
  }

  alsa.Drain();
}

int main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
Offline text-to-speech with sherpa-onnx.

It plays the generated audio as the model is processing.

Note that it is alsa so it works only on **Linux**. For instance, you can
use it on Raspberry Pi.

Usage examples:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2

./bin/sherpa-onnx-offline-tts-play-alsa \
 --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
 --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
 --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
 --output-filename=./generated.wav \
 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Pocket TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

./bin/sherpa-onnx-offline-tts-play-alsa \
 --pocket-lm-flow=./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx \
 --pocket-lm-main=./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx \
 --pocket-encoder=./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx \
 --pocket-decoder=./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx \
 --pocket-text-conditioner=./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx \
 --pocket-vocab-json=./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json \
 --pocket-token-scores-json=./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json \
 --reference-audio=./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav \
 "Hello from Pocket TTS"

Supertonic TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

./bin/sherpa-onnx-offline-tts-play-alsa \
 --supertonic-duration-predictor=./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx \
 --supertonic-text-encoder=./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx \
 --supertonic-vector-estimator=./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx \
 --supertonic-vocoder=./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx \
 --supertonic-tts-json=./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json \
 --supertonic-unicode-indexer=./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin \
 --supertonic-voice-style=./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin \
 --lang=en \
 "Hello from Supertonic TTS"

ZipVoice TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

./bin/sherpa-onnx-offline-tts-play-alsa \
 --zipvoice-encoder=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx \
 --zipvoice-decoder=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx \
 --zipvoice-data-dir=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data \
 --zipvoice-lexicon=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt \
 --zipvoice-tokens=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt \
 --zipvoice-vocoder=./vocos_24khz.onnx \
 --reference-audio=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav \
 --reference-text="那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系." \
 --num-steps=4 \
 "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

It will optionally save audio to --output-filename and play it while generating.

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/index.html
or details.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  std::string device_name = "default";
  std::string output_filename = "./generated.wav";
  int32_t sid = 0;

  std::string reference_audio;
  po.Register(
      "reference-audio", &reference_audio,
      "Path to reference audio. Required by Pocket TTS and ZipVoice TTS.");

  std::string reference_text;
  po.Register(
      "reference-text", &reference_text,
      "Reference text for the reference audio. Required by ZipVoice TTS.");

  sherpa_onnx::GenerationConfig gen_config;
  std::string lang;

  po.Register("output-filename", &output_filename,
              "Path to save the generated audio");

  po.Register(
      "num-steps", &gen_config.num_steps,
      "Used by some models, e.g., Pocket TTS and ZipVoice. Number of flow "
      "matching steps.");

  po.Register("device-name", &device_name,
              "Name of the device to play the generated audio");

  po.Register("lang", &lang,
              "Language for text: en, ko, es, pt, fr. Used only by "
              "Supertonic TTS.");

  po.Register("sid", &sid,
              "Speaker ID. Used only for multi-speaker models, e.g., models "
              "trained using the VCTK dataset. Not used for single-speaker "
              "models, e.g., models trained using the LJSpeech dataset");

  po.Register("speed", &gen_config.speed,
              "Speech speed. Larger=faster. Used by Supertonic, VITS, etc. "
              "(float, default = 1.0)");

  sherpa_onnx::OfflineTtsConfig config;

  config.Register(&po);
  po.Read(argc, argv);

  if (po.NumArgs() == 0) {
    fprintf(stderr, "Error: Please provide the text to generate audio.\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (po.NumArgs() > 1) {
    fprintf(stderr,
            "Error: Accept only one positional argument. Please use single "
            "quotes to wrap your text\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    exit(EXIT_FAILURE);
  }

  if (config.max_num_sentences != 1) {
    fprintf(stderr, "Setting config.max_num_sentences to 1\n");
    config.max_num_sentences = 1;
  }

  fprintf(stderr, "Loading the model\n");
  sherpa_onnx::OfflineTts tts(config);

  fprintf(stderr, "Start the playback thread\n");
  std::thread playback_thread(StartPlayback, device_name, tts.SampleRate());

  fprintf(stderr, "Generating ...\n");
  const auto begin = std::chrono::steady_clock::now();

  sherpa_onnx::GeneratedAudio audio;

  bool is_pocket_tts = !config.model.pocket.lm_flow.empty();
  bool is_supertonic_tts = !config.model.supertonic.tts_json.empty();
  bool is_zipvoice_tts = !config.model.zipvoice.encoder.empty() &&
                         !config.model.zipvoice.decoder.empty();

  gen_config.sid = sid;

  if (is_supertonic_tts && !lang.empty()) {
    gen_config.extra["lang"] = lang;
  }

  if (is_pocket_tts || is_zipvoice_tts) {
    if (reference_audio.empty()) {
      fprintf(stderr,
              "You need to provide --reference-audio for this TTS model");
      exit(EXIT_FAILURE);
    }

    int32_t sample_rate;
    bool is_ok = false;
    auto samples =
        sherpa_onnx::ReadWave(reference_audio, &sample_rate, &is_ok);
    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'", reference_audio.c_str());
      exit(EXIT_FAILURE);
    }

    gen_config.reference_audio = std::move(samples);
    gen_config.reference_sample_rate = sample_rate;
  }

  if (is_zipvoice_tts) {
    if (reference_text.empty()) {
      fprintf(stderr,
              "You need to provide --reference-text for ZipVoice TTS");
      exit(EXIT_FAILURE);
    }
    gen_config.reference_text = reference_text;
  }

  audio = tts.Generate(po.GetArg(1), gen_config, AudioGeneratedCallback);

  const auto end = std::chrono::steady_clock::now();
  g_stopped = true;
  g_cv.notify_all();
  fprintf(stderr, "Generating done!\n");
  if (audio.samples.empty()) {
    fprintf(
        stderr,
        "Error in generating audio. Please read previous error messages.\n");
    exit(EXIT_FAILURE);
  }

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = audio.samples.size() / static_cast<float>(audio.sample_rate);

  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  fprintf(stderr, "Audio duration: %.3f s\n", duration);
  fprintf(stderr, "Real-time factor (RTF): %.3f/%.3f = %.3f\n", elapsed_seconds,
          duration, rtf);

  bool ok = sherpa_onnx::WriteWave(output_filename, audio.sample_rate,
                                   audio.samples.data(), audio.samples.size());
  if (!ok) {
    fprintf(stderr, "Failed to write wave to %s\n", output_filename.c_str());
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "The text is: %s. Speaker ID: %d\n\n", po.GetArg(1).c_str(),
          sid);
  fprintf(stderr, "\n**** Saved to %s successfully! ****\n",
          output_filename.c_str());

  fprintf(stderr, "\n");
  fprintf(
      stderr,
      "Wait for the playback to finish. You can safely press ctrl + C to stop "
      "the playback.\n");
  playback_thread.join();

  fprintf(stderr, "Done!\n");

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include <signal.h>

#include <algorithm>
#include <chrono>
#include <condition_variable>
#include <cstdio>
#include <fstream>
#include <mutex>
#include <queue>
#include <string>
#include <thread>
#include <utility>
#include <vector>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"

static std::condition_variable g_cv;
static std::mutex g_cv_m;

struct Samples {
  std::vector<float> data;
  int32_t consumed = 0;
};

struct Buffer {
  std::queue<Samples> samples;
  std::mutex mutex;
};

static Buffer g_buffer;

static bool g_started = false;
static bool g_stopped = false;
static bool g_killed = false;

static void Handler(int32_t /*sig*/) {
  if (g_killed) {
    exit(0);
  }

  g_killed = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
}

static int32_t AudioGeneratedCallback(const float *s, int32_t n,
                                      float /*progress*/) {
  if (n > 0) {
    Samples samples;
    samples.data = std::vector<float>{s, s + n};

    std::lock_guard<std::mutex> lock(g_buffer.mutex);
    g_buffer.samples.push(std::move(samples));
    g_started = true;
  }
  if (g_killed) {
    return 0;  // stop generating
  }

  // continue generating
  return 1;
}

static int PlayCallback(const void * /*in*/, void *out,
                        unsigned long n,  // NOLINT
                        const PaStreamCallbackTimeInfo * /*time_info*/,
                        PaStreamCallbackFlags /*status_flags*/,
                        void * /*user_data*/) {
  if (g_killed) {
    return paComplete;
  }

  float *pout = reinterpret_cast<float *>(out);
  std::lock_guard<std::mutex> lock(g_buffer.mutex);

  if (g_buffer.samples.empty()) {
    if (g_stopped) {
      // no more data is available and we have processed all of the samples
      return paComplete;
    }

    // The current sentence is so long, though very unlikely, that
    // the model has not finished processing it yet.
    std::fill_n(pout, n, 0);

    return paContinue;
  }

  int32_t k = 0;
  for (; k < static_cast<int32_t>(n) && !g_buffer.samples.empty();) {
    int32_t this_block = n - k;

    auto &p = g_buffer.samples.front();

    int32_t remaining = p.data.size() - p.consumed;

    if (this_block <= remaining) {
      std::copy(p.data.begin() + p.consumed,
                p.data.begin() + p.consumed + this_block, pout + k);
      p.consumed += this_block;

      k = n;

      if (p.consumed == static_cast<int32_t>(p.data.size())) {
        g_buffer.samples.pop();
      }
      break;
    }

    std::copy(p.data.begin() + p.consumed, p.data.end(), pout + k);
    k += p.data.size() - p.consumed;
    g_buffer.samples.pop();
  }

  if (k < static_cast<int32_t>(n)) {
    std::fill_n(pout + k, n - k, 0);
  }

  if (g_stopped && g_buffer.samples.empty()) {
    return paComplete;
  }

  return paContinue;
}

static void PlayCallbackFinished(void * /*userData*/) { g_cv.notify_all(); }

static void StartPlayback(int32_t sample_rate) {
  int32_t frames_per_buffer = 1024;
  PaStreamParameters outputParameters;
  PaStream *stream;
  PaError err;

  outputParameters.device =
      Pa_GetDefaultOutputDevice(); /* default output device */

  outputParameters.channelCount = 1;         /* stereo output */
  outputParameters.sampleFormat = paFloat32; /* 32 bit floating point output */
  outputParameters.suggestedLatency =
      Pa_GetDeviceInfo(outputParameters.device)->defaultLowOutputLatency;
  outputParameters.hostApiSpecificStreamInfo = nullptr;

  err = Pa_OpenStream(&stream, nullptr, /* no input */
                      &outputParameters, sample_rate, frames_per_buffer,
                      paClipOff,  // we won't output out of range samples so
                                  //   don't bother clipping them
                      PlayCallback, nullptr);
  if (err != paNoError) {
    fprintf(stderr, "%d portaudio error: %s\n", __LINE__, Pa_GetErrorText(err));
    return;
  }

  err = Pa_SetStreamFinishedCallback(stream, &PlayCallbackFinished);
  if (err != paNoError) {
    fprintf(stderr, "%d portaudio error: %s\n", __LINE__, Pa_GetErrorText(err));
    return;
  }

  err = Pa_StartStream(stream);
  if (err != paNoError) {
    fprintf(stderr, "%d portaudio error: %s\n", __LINE__, Pa_GetErrorText(err));
    return;
  }

  std::unique_lock<std::mutex> lock(g_cv_m);
  while (!g_killed && !g_stopped &&
         (!g_started || (g_started && !g_buffer.samples.empty()))) {
    g_cv.wait(lock);
  }

  err = Pa_StopStream(stream);
  if (err != paNoError) {
    return;
  }

  err = Pa_CloseStream(stream);
  if (err != paNoError) {
    return;
  }
}

int main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
Offline text-to-speech with sherpa-onnx.

It plays the generated audio as the model is processing.

Usage examples:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2

./bin/sherpa-onnx-offline-tts-play \
 --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
 --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
 --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
 --output-filename=./generated.wav \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Pocket TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

./bin/sherpa-onnx-offline-tts-play \
 --pocket-lm-flow=./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx \
 --pocket-lm-main=./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx \
 --pocket-encoder=./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx \
 --pocket-decoder=./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx \
 --pocket-text-conditioner=./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx \
 --pocket-vocab-json=./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json \
 --pocket-token-scores-json=./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json \
 --reference-audio=./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav \
 "Hello from Pocket TTS"

Supertonic TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

./bin/sherpa-onnx-offline-tts-play \
 --supertonic-duration-predictor=./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx \
 --supertonic-text-encoder=./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx \
 --supertonic-vector-estimator=./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx \
 --supertonic-vocoder=./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx \
 --supertonic-tts-json=./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json \
 --supertonic-unicode-indexer=./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin \
 --supertonic-voice-style=./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin \
 --lang=en \
 "Hello from Supertonic TTS"

ZipVoice TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

./bin/sherpa-onnx-offline-tts-play \
 --zipvoice-encoder=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx \
 --zipvoice-decoder=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx \
 --zipvoice-data-dir=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data \
 --zipvoice-lexicon=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt \
 --zipvoice-tokens=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt \
 --zipvoice-vocoder=./vocos_24khz.onnx \
 --reference-audio=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav \
 --reference-text="那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系." \
 --num-steps=4 \
 "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

It will optionally save audio to --output-filename and play it while generating.

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/index.html
or details.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  std::string output_filename = "./generated.wav";
  int32_t sid = 0;

  std::string reference_audio;
  po.Register(
      "reference-audio", &reference_audio,
      "Path to reference audio. Required by Pocket TTS and ZipVoice TTS.");

  std::string reference_text;
  po.Register(
      "reference-text", &reference_text,
      "Reference text for the reference audio. Required by ZipVoice TTS.");

  sherpa_onnx::GenerationConfig gen_config;
  std::string lang;

  po.Register("output-filename", &output_filename,
              "Path to save the generated audio");

  po.Register(
      "num-steps", &gen_config.num_steps,
      "Used by some models, e.g., Pocket TTS and ZipVoice. Number of flow "
      "matching steps.");

  po.Register("lang", &lang,
              "Language for text: en, ko, es, pt, fr. Used only by "
              "Supertonic TTS.");

  po.Register("sid", &sid,
              "Speaker ID. Used only for multi-speaker models, e.g., models "
              "trained using the VCTK dataset. Not used for single-speaker "
              "models, e.g., models trained using the LJSpeech dataset");

  po.Register("speed", &gen_config.speed,
              "Speech speed. Larger=faster. Used by Supertonic, VITS, etc. "
              "(float, default = 1.0)");

  sherpa_onnx::OfflineTtsConfig config;

  config.Register(&po);
  po.Read(argc, argv);

  if (po.NumArgs() == 0) {
    fprintf(stderr, "Error: Please provide the text to generate audio.\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (po.NumArgs() > 1) {
    fprintf(stderr,
            "Error: Accept only one positional argument. Please use single "
            "quotes to wrap your text\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    exit(EXIT_FAILURE);
  }

  sherpa_onnx::Microphone mic;

  PaDeviceIndex num_devices = Pa_GetDeviceCount();
  fprintf(stderr, "Num devices: %d\n", num_devices);

  PaStreamParameters param;

  param.device = Pa_GetDefaultOutputDevice();
  if (param.device == paNoDevice) {
    fprintf(stderr, "No default output device found\n");
    exit(EXIT_FAILURE);
  }
  fprintf(stderr, "Use default device: %d\n", param.device);

  const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
  fprintf(stderr, "  Name: %s\n", info->name);
  fprintf(stderr, "  Max output channels: %d\n", info->maxOutputChannels);

  if (config.max_num_sentences != 1) {
    fprintf(stderr, "Setting config.max_num_sentences to 1\n");
    config.max_num_sentences = 1;
  }

  fprintf(stderr, "Loading the model\n");
  sherpa_onnx::OfflineTts tts(config);

  fprintf(stderr, "Start the playback thread\n");
  std::thread playback_thread(StartPlayback, tts.SampleRate());

  fprintf(stderr, "Generating ...\n");
  const auto begin = std::chrono::steady_clock::now();

  sherpa_onnx::GeneratedAudio audio;

  bool is_pocket_tts = !config.model.pocket.lm_flow.empty();
  bool is_supertonic_tts = !config.model.supertonic.tts_json.empty();
  bool is_zipvoice_tts = !config.model.zipvoice.encoder.empty() &&
                         !config.model.zipvoice.decoder.empty();

  gen_config.sid = sid;

  if (is_supertonic_tts && !lang.empty()) {
    gen_config.extra["lang"] = lang;
  }

  if (is_pocket_tts || is_zipvoice_tts) {
    if (reference_audio.empty()) {
      fprintf(stderr,
              "You need to provide --reference-audio for this TTS model");
      exit(EXIT_FAILURE);
    }

    int32_t sample_rate;
    bool is_ok = false;
    auto samples =
        sherpa_onnx::ReadWave(reference_audio, &sample_rate, &is_ok);
    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'", reference_audio.c_str());
      exit(EXIT_FAILURE);
    }

    gen_config.reference_audio = std::move(samples);
    gen_config.reference_sample_rate = sample_rate;
  }

  if (is_zipvoice_tts) {
    if (reference_text.empty()) {
      fprintf(stderr,
              "You need to provide --reference-text for ZipVoice TTS");
      exit(EXIT_FAILURE);
    }
    gen_config.reference_text = reference_text;
  }

  audio = tts.Generate(po.GetArg(1), gen_config, AudioGeneratedCallback);

  const auto end = std::chrono::steady_clock::now();
  g_stopped = true;
  fprintf(stderr, "Generating done!\n");
  if (audio.samples.empty()) {
    fprintf(
        stderr,
        "Error in generating audio. Please read previous error messages.\n");
    exit(EXIT_FAILURE);
  }

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = audio.samples.size() / static_cast<float>(audio.sample_rate);

  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  fprintf(stderr, "Audio duration: %.3f s\n", duration);
  fprintf(stderr, "Real-time factor (RTF): %.3f/%.3f = %.3f\n", elapsed_seconds,
          duration, rtf);

  bool ok = sherpa_onnx::WriteWave(output_filename, audio.sample_rate,
                                   audio.samples.data(), audio.samples.size());
  if (!ok) {
    fprintf(stderr, "Failed to write wave to %s\n", output_filename.c_str());
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "The text is: %s. Speaker ID: %d\n\n", po.GetArg(1).c_str(),
          sid);
  fprintf(stderr, "\n**** Saved to %s successfully! ****\n",
          output_filename.c_str());

  fprintf(stderr, "\n");
  fprintf(
      stderr,
      "Wait for the playback to finish. You can safely press ctrl + C to stop "
      "the playback.\n");
  playback_thread.join();

  fprintf(stderr, "Done!\n");

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include <chrono>  // NOLINT
#include <cstdio>
#include <fstream>
#include <string>
#include <utility>

#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"

static int32_t AudioCallback(const float * /*samples*/, int32_t n,
                             float progress) {
  printf("sample=%d, progress=%f\n", n, progress);
  return 1;
}

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Offline/Non-streaming text-to-speech with sherpa-onnx

Usage examples:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2

./bin/sherpa-onnx-offline-tts \
 --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
 --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
 --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
 --output-filename=./generated.wav \
  "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."

Pocket TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

./bin/sherpa-onnx-offline-tts \
 --pocket-lm-flow=./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx \
 --pocket-lm-main=./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx \
 --pocket-encoder=./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx \
 --pocket-decoder=./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx \
 --pocket-text-conditioner=./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx \
 --pocket-vocab-json=./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json \
 --pocket-token-scores-json=./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json \
 --reference-audio=./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav \
 --output-filename=./generated-pocket.wav \
 "Hello from Pocket TTS"

Supertonic TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2

./bin/sherpa-onnx-offline-tts \
 --supertonic-duration-predictor=./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx \
 --supertonic-text-encoder=./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx \
 --supertonic-vector-estimator=./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx \
 --supertonic-vocoder=./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx \
 --supertonic-tts-json=./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json \
 --supertonic-unicode-indexer=./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin \
 --supertonic-voice-style=./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin \
 --lang=en \
 --output-filename=./generated-supertonic.wav \
 "Hello from Supertonic TTS"

ZipVoice TTS:

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx

./bin/sherpa-onnx-offline-tts \
 --zipvoice-encoder=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx \
 --zipvoice-decoder=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx \
 --zipvoice-data-dir=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data \
 --zipvoice-lexicon=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt \
 --zipvoice-tokens=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt \
 --zipvoice-vocoder=./vocos_24khz.onnx \
 --reference-audio=./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav \
 --reference-text="那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系." \
 --num-steps=4 \
 --output-filename=./generated-zipvoice.wav \
 "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

It will generate a file specified by --output-filename.

You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models

Please see
https://k2-fsa.github.io/sherpa/onnx/tts/index.html
or details.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  std::string output_filename = "./generated.wav";
  int32_t sid = 0;

  std::string reference_audio;
  po.Register(
      "reference-audio", &reference_audio,
      "Path to reference audio. Required by Pocket TTS and ZipVoice TTS.");

  std::string reference_text;
  po.Register(
      "reference-text", &reference_text,
      "Reference text for the reference audio. Required by ZipVoice TTS.");

  sherpa_onnx::GenerationConfig gen_config;

  std::string lang;

  po.Register(
      "num-steps", &gen_config.num_steps,
      "Used by some models, e.g., Pocket TTS and ZipVoice. Number of flow "
      "matching steps.");

  po.Register("output-filename", &output_filename,
              "Path to save the generated audio");

  po.Register("lang", &lang,
              "Language for text: en, ko, es, pt, fr. Used only by "
              "Supertonic TTS.");

  po.Register("sid", &sid,
              "Speaker ID. Used only for multi-speaker models, e.g., models "
              "trained using the VCTK dataset. Not used for single-speaker "
              "models, e.g., models trained using the LJSpeech dataset");

  po.Register("speed", &gen_config.speed,
              "Speech speed. Larger=faster. Used by Supertonic, VITS, etc. "
              "(float, default = 1.0)");

  sherpa_onnx::OfflineTtsConfig config;

  config.Register(&po);
  po.Read(argc, argv);

  if (po.NumArgs() == 0) {
    fprintf(stderr, "Error: Please provide the text to generate audio.\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (po.NumArgs() > 1) {
    fprintf(stderr,
            "Error: Accept only one positional argument. Please use single "
            "quotes to wrap your text.\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (config.model.debug) {
    fprintf(stderr, "%s\n", config.model.ToString().c_str());
  }

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    exit(EXIT_FAILURE);
  }

  sherpa_onnx::OfflineTts tts(config);

  const auto begin = std::chrono::steady_clock::now();
  sherpa_onnx::GeneratedAudio audio;

  bool is_pocket_tts = !config.model.pocket.lm_flow.empty();
  bool is_supertonic_tts = !config.model.supertonic.tts_json.empty();
  bool is_zipvoice_tts = !config.model.zipvoice.encoder.empty() &&
                         !config.model.zipvoice.decoder.empty();

  gen_config.sid = sid;

  if (is_supertonic_tts && !lang.empty()) {
    gen_config.extra["lang"] = lang;
  }

  if (is_pocket_tts || is_zipvoice_tts) {
    if (reference_audio.empty()) {
      fprintf(stderr,
              "You need to provide --reference-audio for this TTS model");
      exit(EXIT_FAILURE);
    }

    int32_t sample_rate;
    bool is_ok = false;
    auto samples =
        sherpa_onnx::ReadWave(reference_audio, &sample_rate, &is_ok);
    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'", reference_audio.c_str());
      exit(EXIT_FAILURE);
    }

    gen_config.reference_audio = std::move(samples);
    gen_config.reference_sample_rate = sample_rate;
  }

  if (is_zipvoice_tts) {
    if (reference_text.empty()) {
      fprintf(stderr,
              "You need to provide --reference-text for ZipVoice TTS");
      exit(EXIT_FAILURE);
    }
    gen_config.reference_text = reference_text;
  }

  audio = tts.Generate(po.GetArg(1), gen_config, AudioCallback);

  const auto end = std::chrono::steady_clock::now();

  if (audio.samples.empty()) {
    fprintf(
        stderr,
        "Error in generating audio. Please read previous error messages.\n");
    exit(EXIT_FAILURE);
  }

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;
  float duration = audio.samples.size() / static_cast<float>(audio.sample_rate);

  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Number of threads: %d\n", config.model.num_threads);
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  fprintf(stderr, "Audio duration: %.3f s\n", duration);
  fprintf(stderr, "Real-time factor (RTF): %.3f/%.3f = %.3f\n", elapsed_seconds,
          duration, rtf);

  bool ok = sherpa_onnx::WriteWave(output_filename, audio.sample_rate,
                                   audio.samples.data(), audio.samples.size());
  if (!ok) {
    fprintf(stderr, "Failed to write wave to %s\n", output_filename.c_str());
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "The text is: %s. Speaker ID: %d\n", po.GetArg(1).c_str(),
          sid);
  fprintf(stderr, "Saved to %s successfully!\n", output_filename.c_str());

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-offline.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-offline.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include <stdio.h>

#include <chrono>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-reader.h"

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Speech recognition using non-streaming models with sherpa-onnx.

Usage:

(1) Transducer from icefall

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html

  ./bin/sherpa-onnx-offline \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --num-threads=1 \
    --decoding-method=greedy_search \
    /path/to/foo.wav [bar.wav foobar.wav ...]


(2) Paraformer from FunASR

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html

  ./bin/sherpa-onnx-offline \
    --tokens=/path/to/tokens.txt \
    --paraformer=/path/to/model.onnx \
    --num-threads=1 \
    --decoding-method=greedy_search \
    /path/to/foo.wav [bar.wav foobar.wav ...]

(3) Moonshine models

See https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html

  ./bin/sherpa-onnx-offline \
    --moonshine-preprocessor=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/preprocess.onnx \
    --moonshine-encoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/encode.int8.onnx \
    --moonshine-uncached-decoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/uncached_decode.int8.onnx \
    --moonshine-cached-decoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/cached_decode.int8.onnx \
    --tokens=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/tokens.txt \
    --num-threads=1 \
    /path/to/foo.wav [bar.wav foobar.wav ...]

(4) Whisper models

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html

  ./bin/sherpa-onnx-offline \
    --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
    --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
    --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
    --num-threads=1 \
    /path/to/foo.wav [bar.wav foobar.wav ...]

(5) NeMo CTC models

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html

  ./bin/sherpa-onnx-offline \
    --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
    --nemo-ctc-model=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
    --num-threads=2 \
    --decoding-method=greedy_search \
    --debug=false \
    ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
    ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
    ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav

(6) TDNN CTC model for the yesno recipe from icefall

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html
      //
  ./build/bin/sherpa-onnx-offline \
    --sample-rate=8000 \
    --feat-dim=23 \
    --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
    --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
    ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
    ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav

(7) FunASR-nano models

See https://github.com/FunAudioLLM/Fun-ASR-Nano-2512

  ./bin/sherpa-onnx-offline \
    --funasr-nano-encoder-adaptor=/path/to/encoder_adaptor.onnx \
    --funasr-nano-llm=/path/to/llm.onnx \
    --funasr-nano-tokenizer=/path/to/Qwen3-0.6B \
    --funasr-nano-embedding=/path/to/embedding.onnx \
    /path/to/foo.wav [bar.wav foobar.wav ...]

Note: It supports decoding multiple files in batches

foo.wav should be of single channel, 16-bit PCM encoded wave file; its
sampling rate can be arbitrary and does not need to be 16kHz.

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OfflineRecognizerConfig config;
  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() < 1) {
    fprintf(stderr, "Error: Please provide at least 1 wave file.\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  fprintf(stderr, "Creating recognizer ...\n");
  const auto begin_init = std::chrono::steady_clock::now();

  sherpa_onnx::OfflineRecognizer recognizer(config);

  const auto end_init = std::chrono::steady_clock::now();
  float elapsed_seconds_init =
      std::chrono::duration_cast<std::chrono::milliseconds>(end_init -
                                                            begin_init)
          .count() /
      1000.;
  fprintf(stderr, "recognizer created in %.3f s\n", elapsed_seconds_init);

  fprintf(stderr, "Started\n");
  const auto begin = std::chrono::steady_clock::now();

  std::vector<std::unique_ptr<sherpa_onnx::OfflineStream>> ss;
  std::vector<sherpa_onnx::OfflineStream *> ss_pointers;
  float duration = 0;
  for (int32_t i = 1; i <= po.NumArgs(); ++i) {
    std::string wav_filename = po.GetArg(i);
    int32_t sampling_rate = -1;
    bool is_ok = false;
    std::vector<float> samples =
        sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
      return -1;
    }
    duration += samples.size() / static_cast<float>(sampling_rate);

    auto s = recognizer.CreateStream();
    s->AcceptWaveform(sampling_rate, samples.data(), samples.size());

    ss.push_back(std::move(s));
    ss_pointers.push_back(ss.back().get());
  }

  recognizer.DecodeStreams(ss_pointers.data(), ss_pointers.size());

  const auto end = std::chrono::steady_clock::now();

  fprintf(stderr, "Done!\n\n");
  for (int32_t i = 1; i <= po.NumArgs(); ++i) {
    fprintf(stderr, "%s\n", po.GetArg(i).c_str());
    fprintf(stdout, "%s\n", ss[i - 1]->GetResult().AsJsonString().c_str());
    fprintf(stderr, "----\n");
  }

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "num threads: %d\n", config.model_config.num_threads);
  fprintf(stderr, "decoding method: %s\n", config.decoding_method.c_str());
  if (config.decoding_method == "modified_beam_search") {
    fprintf(stderr, "max active paths: %d\n", config.max_active_paths);
  }

  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-online-denoiser.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-online-denoiser.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include <stdio.h>

#include <algorithm>
#include <chrono>
#include <cstdint>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-speech-denoiser.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Streaming speech denoising with sherpa-onnx.

Please download GTCRN and sample files from:

https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models

DPDFNet models are available from either:

https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
https://huggingface.co/Ceva-IP/DPDFNet

Currently this binary supports:
  gtcrn_simple.onnx
  dpdfnet_baseline.onnx
  dpdfnet2.onnx
  dpdfnet4.onnx
  dpdfnet8.onnx
  dpdfnet2_48khz_hr.onnx

Usage:

./bin/sherpa-onnx-online-denoiser \
  --speech-denoiser-gtcrn-model=gtcrn_simple.onnx \
  --chunk-duration-ms=16 \
  --input-wav=input.wav \
  --output-wav=output_16k.wav

./bin/sherpa-onnx-online-denoiser \
  --speech-denoiser-dpdfnet-model=dpdfnet4.onnx \
  --chunk-duration-ms=10 \
  --input-wav=input.wav \
  --output-wav=output_16k.wav

./bin/sherpa-onnx-online-denoiser \
  --speech-denoiser-dpdfnet-model=dpdfnet2_48khz_hr.onnx \
  --chunk-duration-ms=10 \
  --input-wav=input.wav \
  --output-wav=output_48k.wav
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OnlineSpeechDenoiserConfig config;
  std::string input_wave;
  std::string output_wave;
  int32_t chunk_duration_ms = 10;

  config.Register(&po);
  po.Register("input-wav", &input_wave, "Path to input wav.");
  po.Register("output-wav", &output_wave, "Path to output wav.");
  po.Register("chunk-duration-ms", &chunk_duration_ms,
              "Streaming chunk duration in milliseconds.");

  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    fprintf(stderr, "Please don't give positional arguments\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  if (input_wave.empty()) {
    fprintf(stderr, "Please provide --input-wav\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (output_wave.empty()) {
    fprintf(stderr, "Please provide --output-wav\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  if (chunk_duration_ms <= 0) {
    fprintf(stderr, "Please provide --chunk-duration-ms > 0\n");
    return -1;
  }

  int32_t sampling_rate = -1;
  bool is_ok = false;
  std::vector<float> samples =
      sherpa_onnx::ReadWave(input_wave, &sampling_rate, &is_ok);
  if (!is_ok) {
    fprintf(stderr, "Failed to read '%s'\n", input_wave.c_str());
    return -1;
  }

  int32_t chunk_size = sampling_rate * chunk_duration_ms / 1000;
  if (chunk_size <= 0) {
    fprintf(stderr,
            "The selected chunk duration is too small for sample rate %d\n",
            sampling_rate);
    return -1;
  }

  sherpa_onnx::OnlineSpeechDenoiser denoiser(config);

  fprintf(stderr, "Started\n");
  const auto begin = std::chrono::steady_clock::now();

  std::vector<float> enhanced;
  enhanced.reserve(samples.size());

  for (size_t i = 0; i < samples.size(); i += chunk_size) {
    size_t num_samples =
        std::min(static_cast<size_t>(chunk_size), samples.size() - i);
    auto chunk = denoiser.Run(samples.data() + i, num_samples, sampling_rate);
    enhanced.insert(enhanced.end(), chunk.samples.begin(), chunk.samples.end());
  }

  auto tail = denoiser.Flush();
  enhanced.insert(enhanced.end(), tail.samples.begin(), tail.samples.end());

  const auto end = std::chrono::steady_clock::now();

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "Done\n");
  is_ok = sherpa_onnx::WriteWave(output_wave, denoiser.GetSampleRate(),
                                 enhanced.data(), enhanced.size());
  if (is_ok) {
    fprintf(stderr, "Saved to %s\n", output_wave.c_str());
  } else {
    fprintf(stderr, "Failed to save to %s\n", output_wave.c_str());
  }

  float duration = samples.size() / static_cast<float>(sampling_rate);
  fprintf(stderr, "num threads: %d\n", config.model.num_threads);
  fprintf(stderr, "chunk duration: %d ms\n", chunk_duration_ms);
  fprintf(stderr, "frame shift: %d samples @ %d Hz\n",
          denoiser.GetFrameShiftInSamples(), denoiser.GetSampleRate());
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-online-punctuation.cc
//
// Copyright (c) 2024 Jian You (jianyou@cisco.com, Cisco Systems)

#include <stdio.h>

#include <chrono>
#include <iostream>
#include <string>

#include "sherpa-onnx/csrc/online-punctuation.h"
#include "sherpa-onnx/csrc/parse-options.h"

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Add punctuations to the input text.

The input text can contain English words.

Usage:

Please download the model from:
https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2

./bin/Release/sherpa-onnx-online-punctuation \
  --cnn-bilstm=/path/to/model.onnx \
  --bpe-vocab=/path/to/bpe.vocab \
  "how are you i am fine thank you"

The output text should look like below:
  "How are you? I am fine. Thank you."
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OnlinePunctuationConfig config;
  config.Register(&po);
  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr,
            "Error: Please provide only 1 positional argument containing the "
            "input text.\n\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  fprintf(stderr, "Creating OnlinePunctuation ...\n");
  sherpa_onnx::OnlinePunctuation punct(config);
  fprintf(stderr, "Started\n");
  const auto begin = std::chrono::steady_clock::now();

  std::string text = po.GetArg(1);

  std::string text_with_punct_case = punct.AddPunctuationWithCase(text);

  const auto end = std::chrono::steady_clock::now();
  fprintf(stderr, "Done\n");

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "Num threads: %d\n", config.model.num_threads);
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  fprintf(stderr, "Input text: %s\n", text.c_str());
  fprintf(stderr, "Output text: ");
  fprintf(stdout, "%s\n", text_with_punct_case.c_str());
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-vad-alsa-offline-asr.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-vad-alsa-offline-asr.cc
//
// Copyright (c)  2022-2025  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <memory>
#include <mutex>  // NOLINT
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"

bool stop = false;
static void Handler(int32_t /*sig*/) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program shows how to use a streaming VAD with non-streaming ASR in
sherpa-onnx.

Please download silero_vad.onnx from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

For instance, use
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

Please refer to ./sherpa-onnx-microphone-offline.cc
to download models for offline ASR.

(1) Transducer from icefall

  ./bin/sherpa-onnx-vad-microphone-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    device_name

(2) Paraformer from FunASR

  ./bin/sherpa-onnx-vad-microphone-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=/path/to/tokens.txt \
    --paraformer=/path/to/model.onnx \
    device_name

(3) Whisper models

  ./bin/sherpa-onnx-vad-microphone-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
    --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
    --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
    device_name

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::VadModelConfig vad_config;

  sherpa_onnx::OfflineRecognizerConfig asr_config;

  vad_config.Register(&po);
  asr_config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Please provide only 1 argument: the device name\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", vad_config.ToString().c_str());
  fprintf(stderr, "%s\n", asr_config.ToString().c_str());

  if (!vad_config.Validate()) {
    fprintf(stderr, "Errors in vad_config!\n");
    return -1;
  }

  if (!asr_config.Validate()) {
    fprintf(stderr, "Errors in asr_config!\n");
    return -1;
  }

  fprintf(stderr, "Creating recognizer ...\n");
  sherpa_onnx::OfflineRecognizer recognizer(asr_config);
  fprintf(stderr, "Recognizer created!\n");

  auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);

  std::string device_name = po.GetArg(1);
  sherpa_onnx::Alsa alsa(device_name.c_str());
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  int32_t sample_rate = 16000;

  if (alsa.GetExpectedSampleRate() != sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            sample_rate);
    exit(-1);
  }

  fprintf(stderr, "Started. Please speak\n");

  int32_t window_size = vad_config.silero_vad.window_size;
  int32_t index = 0;

  while (!stop) {
    const std::vector<float> &samples = alsa.Read(window_size);
    vad->AcceptWaveform(samples.data(), samples.size());

    while (!vad->Empty()) {
      const auto &segment = vad->Front();
      auto s = recognizer.CreateStream();
      s->AcceptWaveform(sample_rate, segment.samples.data(),
                        segment.samples.size());
      recognizer.DecodeStream(s.get());
      const auto &result = s->GetResult();
      if (!result.text.empty()) {
        fprintf(stderr, "%2d: %s\n", index, result.text.c_str());
        ++index;
      }
      vad->Pop();
    }
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-vad-alsa.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-vad-alsa.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <iomanip>
#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/alsa.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-writer.h"

bool stop = false;
static void Handler(int32_t sig) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program shows how to use VAD in sherpa-onnx.

  ./bin/sherpa-onnx-vad-alsa \
    --silero-vad-model=/path/to/silero_vad.onnx \
    device_name

Please download silero_vad.onnx from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

For instance, use
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

The device name specifies which microphone to use in case there are several
on your system. You can use

  arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

and if you want to select card 3 and device 0 on that card, please use:

  plughw:3,0

as the device_name.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::VadModelConfig config;

  config.Register(&po);
  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Please provide only 1 argument: the device name\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  std::string device_name = po.GetArg(1);
  sherpa_onnx::Alsa alsa(device_name.c_str());
  fprintf(stderr, "Use recording device: %s\n", device_name.c_str());

  int32_t sample_rate = 16000;

  if (alsa.GetExpectedSampleRate() != sample_rate) {
    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
            sample_rate);
    exit(-1);
  }

  auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(config);

  fprintf(stderr, "Started. Please speak\n");

  int32_t window_size = config.silero_vad.window_size;
  bool printed = false;

  int32_t k = 0;
  while (!stop) {
    const std::vector<float> &samples = alsa.Read(window_size);

    vad->AcceptWaveform(samples.data(), samples.size());

    if (vad->IsSpeechDetected() && !printed) {
      printed = true;
      fprintf(stderr, "\nDetected speech!\n");
    }
    if (!vad->IsSpeechDetected()) {
      printed = false;
    }

    while (!vad->Empty()) {
      const auto &segment = vad->Front();
      float duration = segment.samples.size() / static_cast<float>(sample_rate);

      fprintf(stderr, "Duration: %.3f seconds\n", duration);

      std::ostringstream os;
      os << "seg-" << k << "-" << std::fixed << std::setprecision(3) << duration
         << "s.wav";
      k += 1;
      sherpa_onnx::WriteWave(os.str(), 16000, segment.samples.data(),
                             segment.samples.size());
      fprintf(stderr, "Saved to %s\n", os.str().c_str());
      fprintf(stderr, "----------\n");

      vad->Pop();
    }
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <memory>
#include <mutex>
#include <utility>
#include <vector>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"

bool stop = false;
std::mutex mutex;
sherpa_onnx::CircularBuffer buffer(16000 * 60);

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  buffer.Push(reinterpret_cast<const float *>(input_buffer), frames_per_buffer);

  return stop ? paComplete : paContinue;
}

static void Handler(int32_t /*sig*/) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program shows how to use a streaming VAD with non-streaming ASR in
sherpa-onnx.

Please download silero_vad.onnx from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

For instance, use
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

Please refer to ./sherpa-onnx-microphone-offline.cc
to download models for offline ASR.

(1) Transducer from icefall

  ./bin/sherpa-onnx-vad-microphone-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx

(2) Paraformer from FunASR

  ./bin/sherpa-onnx-vad-microphone-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=/path/to/tokens.txt \
    --paraformer=/path/to/model.onnx \
    --num-threads=1

(3) Whisper models

  ./bin/sherpa-onnx-vad-microphone-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
    --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
    --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
    --num-threads=1
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::VadModelConfig vad_config;

  sherpa_onnx::OfflineRecognizerConfig asr_config;

  vad_config.Register(&po);
  asr_config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", vad_config.ToString().c_str());
  fprintf(stderr, "%s\n", asr_config.ToString().c_str());

  if (!vad_config.Validate()) {
    fprintf(stderr, "Errors in vad_config!\n");
    return -1;
  }

  if (!asr_config.Validate()) {
    fprintf(stderr, "Errors in asr_config!\n");
    return -1;
  }

  fprintf(stderr, "Creating recognizer ...\n");
  sherpa_onnx::OfflineRecognizer recognizer(asr_config);
  fprintf(stderr, "Recognizer created!\n");

  sherpa_onnx::Microphone mic;

  int32_t device_index = Pa_GetDefaultInputDevice();
  if (device_index == paNoDevice) {
    fprintf(stderr, "No default input device found\n");
    fprintf(stderr,
            "  If you are using Linux, please try "
            "./build/bin/sherpa-onnx-vad-alsa-offline-asr\n");
    exit(EXIT_FAILURE);
  }

  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (pSampleRateStr) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(pSampleRateStr);
  }

  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    fprintf(stderr, "Failed to open device %d\n", device_index);
    exit(EXIT_FAILURE);
  }

  float sample_rate = 16000;
  std::unique_ptr<sherpa_onnx::LinearResample> resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = std::make_unique<sherpa_onnx::LinearResample>(
        mic_sample_rate, sample_rate, lowpass_cutoff, lowpass_filter_width);
  }

  auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);

  fprintf(stderr, "Started. Please speak\n");

  int32_t window_size = vad_config.silero_vad.window_size;
  int32_t index = 0;

  while (!stop) {
    {
      std::lock_guard<std::mutex> lock(mutex);

      while (buffer.Size() >= window_size) {
        std::vector<float> samples = buffer.Get(buffer.Head(), window_size);
        buffer.Pop(window_size);

        if (resampler) {
          std::vector<float> tmp;
          resampler->Resample(samples.data(), samples.size(), true, &tmp);
          samples = std::move(tmp);
        }

        vad->AcceptWaveform(samples.data(), samples.size());
      }
    }

    while (!vad->Empty()) {
      const auto &segment = vad->Front();
      auto s = recognizer.CreateStream();
      s->AcceptWaveform(sample_rate, segment.samples.data(),
                        segment.samples.size());
      recognizer.DecodeStream(s.get());
      const auto &result = s->GetResult();
      if (!result.text.empty()) {
        fprintf(stderr, "%2d: %s\n", index, result.text.c_str());
        ++index;
      }
      vad->Pop();
    }

    Pa_Sleep(100);  // sleep for 100ms
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-vad-microphone-simulated-streaming-asr.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-vad-microphone-simulated-streaming-asr.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <chrono>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <vector>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/sherpa-display.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-writer.h"

std::queue<std::vector<float>> samples_queue;
std::condition_variable condition_variable;
std::mutex mutex;
bool stop = false;

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  samples_queue.emplace(
      reinterpret_cast<const float *>(input_buffer),
      reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  condition_variable.notify_one();

  return stop ? paComplete : paContinue;
}

static void Handler(int32_t /*sig*/) {
  stop = true;
  condition_variable.notify_one();
  fprintf(stdout, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program shows how to use a streaming VAD with non-streaming ASR in
sherpa-onnx for real-time speech recognition.

(1) SenseVoice

cd /path/to/sherpa-onnx/build

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

./bin/sherpa-onnx-vad-microphone-simulated-streaming-asr \
  --silero-vad-model=./silero_vad.onnx \
  --sense-voice-model=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/model.int8.onnx \
  --tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/tokens.txt

(2) Parakeet TDT 0.6b v2

cd /path/to/sherpa-onnx/build

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

./bin/sherpa-onnx-vad-microphone-simulated-streaming-asr \
  --silero-vad-model=./silero_vad.onnx \
  --encoder=./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx \
  --decoder=./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx \
  --joiner=./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx \
  --tokens=./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt

(3) Please refer to our doc for more non-streaming ASR models,
e.g., zipformer, paraformer, whisper, etc.

Please first use ./bin/sherpa-onnx-offline to test the RTF of the model.
A model with RTF < 0.2 should work with this program.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::VadModelConfig vad_config;

  sherpa_onnx::OfflineRecognizerConfig asr_config;

  vad_config.Register(&po);
  asr_config.Register(&po);

  int32_t user_device_index = -1;  // -1 means to use default value
  int32_t user_sample_rate = -1;   // -1 means to use default value

  po.Register("mic-device-index", &user_device_index,
              "If provided, we use it to replace the default device index."
              "You can use sherpa-onnx-pa-devs to list available devices");

  po.Register("mic-sample-rate", &user_sample_rate,
              "If provided, we use it to replace the default sample rate."
              "You can use sherpa-onnx-pa-devs to list sample rate of "
              "available devices");

  if (argc == 1) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stdout, "%s\n", vad_config.ToString().c_str());
  fprintf(stdout, "%s\n", asr_config.ToString().c_str());

  if (!vad_config.Validate()) {
    fprintf(stdout, "Errors in vad_config!\n");
    return -1;
  }

  if (!asr_config.Validate()) {
    fprintf(stdout, "Errors in asr_config!\n");
    return -1;
  }

  fprintf(stdout, "Creating recognizer ...\n");
  sherpa_onnx::OfflineRecognizer recognizer(asr_config);
  fprintf(stdout, "Recognizer created!\n");

  sherpa_onnx::Microphone mic;

  int32_t device_index = Pa_GetDefaultInputDevice();
  if (device_index == paNoDevice) {
    fprintf(stdout, "No default input device found\n");
    exit(EXIT_FAILURE);
  }

  if (user_device_index >= 0) {
    fprintf(stdout, "Use specified device: %d\n", user_device_index);
    device_index = user_device_index;
  } else {
    fprintf(stdout, "Use default device: %d\n", device_index);
  }

  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  if (user_sample_rate > 0) {
    fprintf(stdout, "Use sample rate %d for mic\n", user_sample_rate);
    mic_sample_rate = user_sample_rate;
  }

  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    fprintf(stdout, "Failed to open device %d\n", device_index);
    exit(EXIT_FAILURE);
  }

  float sample_rate = 16000;
  std::unique_ptr<sherpa_onnx::LinearResample> resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = std::make_unique<sherpa_onnx::LinearResample>(
        mic_sample_rate, sample_rate, lowpass_cutoff, lowpass_filter_width);
  }

  auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);

  int32_t window_size = vad_config.silero_vad.window_size;

  int32_t offset = 0;
  bool speech_started = false;
  std::vector<float> buffer;

  auto started_time = std::chrono::steady_clock::now();
  sherpa_onnx::SherpaDisplay display;

  fprintf(stdout, "Started. Please speak\n");
  std::vector<float> resampled;

  while (!stop) {
    {
      std::unique_lock<std::mutex> lock(mutex);
      while (samples_queue.empty() && !stop) {
        condition_variable.wait(lock);
      }

      if (stop) {
        break;
      }

      const auto &s = samples_queue.front();
      if (!resampler) {
        buffer.insert(buffer.end(), s.begin(), s.end());
      } else {
        resampler->Resample(s.data(), s.size(), false, &resampled);
        buffer.insert(buffer.end(), resampled.begin(), resampled.end());
      }

      samples_queue.pop();
    }

    for (; offset + window_size < buffer.size(); offset += window_size) {
      vad->AcceptWaveform(buffer.data() + offset, window_size);
      if (!speech_started && vad->IsSpeechDetected()) {
        speech_started = true;
        started_time = std::chrono::steady_clock::now();
      }
    }

    if (!speech_started) {
      if (buffer.size() > 10 * window_size) {
        offset -= buffer.size() - 10 * window_size;
        buffer = {buffer.end() - 10 * window_size, buffer.end()};
      }
    }

    auto current_time = std::chrono::steady_clock::now();
    const float elapsed_seconds =
        std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
                                                              started_time)
            .count() /
        1000.;

    if (speech_started && elapsed_seconds > 0.2) {
      auto s = recognizer.CreateStream();
      s->AcceptWaveform(sample_rate, buffer.data(), buffer.size());
      recognizer.DecodeStream(s.get());
      const auto &result = s->GetResult();
      display.UpdateText(result.text);
      display.Display();

      started_time = std::chrono::steady_clock::now();
    }

    while (!vad->Empty()) {
      // when stopping speak, this while loop is executed

      vad->Pop();

      display.FinalizeCurrentSentence();
      display.Display();

      buffer.clear();
      offset = 0;
      speech_started = false;
    }
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-vad-microphone.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-vad-microphone.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <memory>
#include <mutex>
#include <utility>
#include <vector>

#include "portaudio.h"  // NOLINT
#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-writer.h"

bool stop = false;
std::mutex mutex;
sherpa_onnx::CircularBuffer buffer(16000 * 60);

static int32_t RecordCallback(const void *input_buffer,
                              void * /*output_buffer*/,
                              unsigned long frames_per_buffer,  // NOLINT
                              const PaStreamCallbackTimeInfo * /*time_info*/,
                              PaStreamCallbackFlags /*status_flags*/,
                              void * /*user_data*/) {
  std::lock_guard<std::mutex> lock(mutex);
  buffer.Push(reinterpret_cast<const float *>(input_buffer), frames_per_buffer);

  return stop ? paComplete : paContinue;
}

static void Handler(int32_t /*sig*/) {
  stop = true;
  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}

int32_t main(int32_t argc, char *argv[]) {
  signal(SIGINT, Handler);

  const char *kUsageMessage = R"usage(
This program shows how to use VAD in sherpa-onnx.

  ./bin/sherpa-onnx-vad-microphone \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --vad-provider=cpu \
    --vad-num-threads=1

Please download silero_vad.onnx from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

For instance, use
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::VadModelConfig config;

  config.Register(&po);
  po.Read(argc, argv);
  if (po.NumArgs() != 0) {
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  sherpa_onnx::Microphone mic;

  int32_t device_index = Pa_GetDefaultInputDevice();
  if (device_index == paNoDevice) {
    fprintf(stderr, "No default input device found\n");
    fprintf(stderr, "If you are using Linux, please switch to \n");
    fprintf(stderr, " ./bin/sherpa-onnx-vad-alsa \n");
    exit(EXIT_FAILURE);
  }

  const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  if (pDeviceIndex) {
    fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
    device_index = atoi(pDeviceIndex);
  }
  mic.PrintDevices(device_index);

  float mic_sample_rate = 16000;
  const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  if (pSampleRateStr) {
    fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
    mic_sample_rate = atof(pSampleRateStr);
  }
  if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
                      nullptr)) {
    fprintf(stderr, "Failed to open microphone device %d\n", device_index);
    exit(EXIT_FAILURE);
  }

  float sample_rate = 16000;
  std::unique_ptr<sherpa_onnx::LinearResample> resampler;
  if (mic_sample_rate != sample_rate) {
    float min_freq = std::min(mic_sample_rate, sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    resampler = std::make_unique<sherpa_onnx::LinearResample>(
        mic_sample_rate, sample_rate, lowpass_cutoff, lowpass_filter_width);
  }

  auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(config);

  int32_t window_size = config.silero_vad.window_size;
  bool printed = false;

  int32_t k = 0;
  while (!stop) {
    {
      std::lock_guard<std::mutex> lock(mutex);

      while (buffer.Size() >= window_size) {
        std::vector<float> samples = buffer.Get(buffer.Head(), window_size);
        buffer.Pop(window_size);

        if (resampler) {
          std::vector<float> tmp;
          resampler->Resample(samples.data(), samples.size(), true, &tmp);
          samples = std::move(tmp);
        }

        vad->AcceptWaveform(samples.data(), samples.size());

        if (vad->IsSpeechDetected() && !printed) {
          printed = true;
          fprintf(stderr, "\nDetected speech!\n");
        }
        if (!vad->IsSpeechDetected()) {
          printed = false;
        }

        while (!vad->Empty()) {
          const auto &segment = vad->Front();
          float duration = segment.samples.size() / sample_rate;
          fprintf(stderr, "Duration: %.3f seconds\n", duration);

          char filename[128];
          snprintf(filename, sizeof(filename), "seg-%d-%.3fs.wav", k, duration);
          k += 1;
          sherpa_onnx::WriteWave(filename, sample_rate, segment.samples.data(),
                                 segment.samples.size());
          fprintf(stderr, "Saved to %s\n", filename);
          fprintf(stderr, "----------\n");

          vad->Pop();
        }
      }
    }
    Pa_Sleep(100);  // sleep for 100ms
  }

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-vad-with-offline-asr.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-vad-with-offline-asr.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include <stdio.h>

#include <algorithm>
#include <chrono>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-reader.h"

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Speech recognition using VAD + non-streaming models with sherpa-onnx.

Usage:

Note you can download silero_vad.onnx using

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

(0) FireRedAsr

See https://k2-fsa.github.io/sherpa/onnx/FireRedAsr/pretrained.html

  ./bin/sherpa-onnx-vad-with-offline-asr \
    --tokens=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt \
    --fire-red-asr-encoder=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx \
    --fire-red-asr-decoder=./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx \
    --num-threads=1 \
    --silero-vad-model=/path/to/silero_vad.onnx \
    /path/to/foo.wav

(1) Transducer from icefall

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html

  ./bin/sherpa-onnx-vad-with-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --num-threads=1 \
    --decoding-method=greedy_search \
    /path/to/foo.wav


(2) Paraformer from FunASR

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html

  ./bin/sherpa-onnx-vad-with-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=/path/to/tokens.txt \
    --paraformer=/path/to/model.onnx \
    --num-threads=1 \
    --decoding-method=greedy_search \
    /path/to/foo.wav

(3) Moonshine models

See https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html

  ./bin/sherpa-onnx-vad-with-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --moonshine-preprocessor=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/preprocess.onnx \
    --moonshine-encoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/encode.int8.onnx \
    --moonshine-uncached-decoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/uncached_decode.int8.onnx \
    --moonshine-cached-decoder=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/cached_decode.int8.onnx \
    --tokens=/Users/fangjun/open-source/sherpa-onnx/scripts/moonshine/tokens.txt \
    --num-threads=1 \
    /path/to/foo.wav

(4) Whisper models

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html

  ./bin/sherpa-onnx-vad-with-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --whisper-encoder=./sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx \
    --whisper-decoder=./sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx \
    --tokens=./sherpa-onnx-whisper-base.en/base.en-tokens.txt \
    --num-threads=1 \
    /path/to/foo.wav

(5) NeMo CTC models

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html

  ./bin/sherpa-onnx-vad-with-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
    --nemo-ctc-model=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
    --num-threads=2 \
    --decoding-method=greedy_search \
    --debug=false \
    ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav

(6) TDNN CTC model for the yesno recipe from icefall

See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html

  ./bin/sherpa-onnx-vad-with-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --sample-rate=8000 \
    --feat-dim=23 \
    --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
    --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
    ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav

(7) FunASR-nano models

See https://github.com/FunAudioLLM/Fun-ASR-Nano-2512

  ./bin/sherpa-onnx-vad-with-offline-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --funasr-nano-encoder-adaptor=/path/to/encoder_adaptor.onnx \
    --funasr-nano-llm=/path/to/llm.onnx \
    --funasr-nano-tokenizer=/path/to/Qwen3-0.6B \
    --funasr-nano-embedding=/path/to/embedding.onnx \
    [--funasr-nano-user-prompt="Transcription:"] \
    [--funasr-nano-max-new-tokens=512] \
    [--funasr-nano-temperature=1e-6] \
    [--funasr-nano-top-p=0.8] \
    --num-threads=4 \
    /path/to/foo.wav

The input wav should be of single channel, 16-bit PCM encoded wave file; its
sampling rate can be arbitrary and does not need to be 16kHz.

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OfflineRecognizerConfig asr_config;
  asr_config.Register(&po);

  sherpa_onnx::VadModelConfig vad_config;
  vad_config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Error: Please provide at only 1 wave file. Given: %d\n\n",
            po.NumArgs());
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", vad_config.ToString().c_str());
  fprintf(stderr, "%s\n", asr_config.ToString().c_str());

  if (!vad_config.Validate()) {
    fprintf(stderr, "Errors in vad_config!\n");
    return -1;
  }

  if (!asr_config.Validate()) {
    fprintf(stderr, "Errors in ASR config!\n");
    return -1;
  }

  fprintf(stderr, "Creating recognizer ...\n");
  sherpa_onnx::OfflineRecognizer recognizer(asr_config);
  fprintf(stderr, "Recognizer created!\n");

  auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);

  fprintf(stderr, "Started\n");
  const auto begin = std::chrono::steady_clock::now();

  std::string wave_filename = po.GetArg(1);
  fprintf(stderr, "Reading: %s\n", wave_filename.c_str());
  int32_t sampling_rate = -1;
  bool is_ok = false;
  auto samples = sherpa_onnx::ReadWave(wave_filename, &sampling_rate, &is_ok);
  if (!is_ok) {
    fprintf(stderr, "Failed to read '%s'\n", wave_filename.c_str());
    return -1;
  }

  if (sampling_rate != 16000) {
    fprintf(stderr, "Resampling from %d Hz to 16000 Hz", sampling_rate);
    float min_freq = std::min<int32_t>(sampling_rate, 16000);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    auto resampler = std::make_unique<sherpa_onnx::LinearResample>(
        sampling_rate, 16000, lowpass_cutoff, lowpass_filter_width);
    std::vector<float> out_samples;
    resampler->Resample(samples.data(), samples.size(), true, &out_samples);
    samples = std::move(out_samples);
    fprintf(stderr, "Resampling done\n");
  }

  fprintf(stderr, "Started!\n");
  int32_t window_size = vad_config.silero_vad.window_size;
  int32_t i = 0;
  while (i < samples.size()) {
    if (i + window_size <= samples.size()) {
      vad->AcceptWaveform(samples.data() + i, window_size);
    } else {
      vad->Flush();
    }

    i += window_size;

    while (!vad->Empty()) {
      const auto &segment = vad->Front();
      float duration = segment.samples.size() / 16000.;
      float start_time = segment.start / 16000.;
      float end_time = start_time + duration;
      if (duration < 0.1) {
        vad->Pop();
        continue;
      }

      auto s = recognizer.CreateStream();
      s->AcceptWaveform(16000, segment.samples.data(), segment.samples.size());
      recognizer.DecodeStream(s.get());
      const auto &result = s->GetResult();
      if (!result.text.empty()) {
        fprintf(stderr, "%.3f -- %.3f: ", start_time, end_time);
        fprintf(stdout, "%s\n", result.text.c_str());
      }
      vad->Pop();
    }
  }

  const auto end = std::chrono::steady_clock::now();

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "num threads: %d\n", asr_config.model_config.num_threads);
  fprintf(stderr, "decoding method: %s\n", asr_config.decoding_method.c_str());
  if (asr_config.decoding_method == "modified_beam_search") {
    fprintf(stderr, "max active paths: %d\n", asr_config.max_active_paths);
  }

  float duration = samples.size() / 16000.;
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-vad-with-online-asr.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-vad-with-online-asr.cc
//
// Copyright (c)  2025  Xiaomi Corporation
// Copyright (c)  2025  Pingfeng Luo
//
// This file demonstrates how to use vad in streaming speech recognition
//

#include <stdio.h>

#include <algorithm>
#include <chrono>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/resample.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-reader.h"

int32_t main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Speech recognition using VAD + streaming models with sherpa-onnx-vad-with-online-asr.
This is useful when testing long audio.

Usage:

Note you can download silero_vad.onnx using

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

(1) Streaming transducer

  ./bin/sherpa-onnx-vad-with-online-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --provider=cpu \
    --num-threads=2 \
    --decoding-method=greedy_search \
    /path/to/long_duration.wav

(2) Streaming zipformer2 CTC

  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2

  ./bin/sherpa-onnx-vad-with-online-asr \
    --debug=1 \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --zipformer2-ctc-model=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
    --tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
    ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav

(3) Streaming paraformer

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

  ./bin/sherpa-onnx-vad-with-online-asr \
    --silero-vad-model=/path/to/silero_vad.onnx \
    --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
    --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx \
    --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx \
    /path/to/long_duration.wav


The input wav should be of single channel, 16-bit PCM encoded wave file; its
sampling rate can be arbitrary and does not need to be 16kHz.

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OnlineRecognizerConfig asr_config;
  asr_config.Register(&po);

  sherpa_onnx::VadModelConfig vad_config;
  vad_config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() != 1) {
    fprintf(stderr, "Error: Please provide exactly 1 wave file. Given: %d\n\n",
            po.NumArgs());
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", vad_config.ToString().c_str());
  fprintf(stderr, "%s\n", asr_config.ToString().c_str());

  if (!vad_config.Validate()) {
    fprintf(stderr, "Errors in vad_config!\n");
    return -1;
  }

  if (!asr_config.Validate()) {
    fprintf(stderr, "Errors in ASR config!\n");
    return -1;
  }

  fprintf(stderr, "Creating recognizer ...\n");
  sherpa_onnx::OnlineRecognizer recognizer(asr_config);
  fprintf(stderr, "Recognizer created!\n");

  auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);

  fprintf(stderr, "Started\n");
  const auto begin = std::chrono::steady_clock::now();

  std::string wave_filename = po.GetArg(1);
  fprintf(stderr, "Reading: %s\n", wave_filename.c_str());
  int32_t sampling_rate = -1;
  bool is_ok = false;
  auto samples = sherpa_onnx::ReadWave(wave_filename, &sampling_rate, &is_ok);
  if (!is_ok) {
    fprintf(stderr, "Failed to read '%s'\n", wave_filename.c_str());
    return -1;
  }

  if (sampling_rate != 16000) {
    fprintf(stderr, "Resampling from %d Hz to 16000 Hz\n", sampling_rate);
    float min_freq = std::min(sampling_rate, 16000);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    auto resampler = std::make_unique<sherpa_onnx::LinearResample>(
        sampling_rate, 16000, lowpass_cutoff, lowpass_filter_width);
    std::vector<float> out_samples;
    resampler->Resample(samples.data(), samples.size(), true, &out_samples);
    samples = std::move(out_samples);
    fprintf(stderr, "Resampling done\n");
  }
  const float tail_padding_len = 1.28;  // related to model chunk-size
  std::vector<float> tail_paddings(static_cast<int>(tail_padding_len * 16000));

  fprintf(stderr, "Started!\n");
  int32_t window_size = vad_config.ten_vad.model.empty()
                            ? vad_config.silero_vad.window_size
                            : vad_config.ten_vad.window_size;
  int32_t offset = 0;
  int32_t segment_id = 0;
  bool speech_started = false;
  while (offset < samples.size()) {
    if (offset + window_size <= samples.size()) {
      vad->AcceptWaveform(samples.data() + offset, window_size);
    } else {
      vad->Flush();
    }
    offset += window_size;
    if (vad->IsSpeechDetected() && !speech_started) {
      // new voice activity
      speech_started = true;
      segment_id++;
    } else if (!vad->IsSpeechDetected() && speech_started) {
      // end voice activity
      speech_started = false;
    }

    while (!vad->Empty()) {
      const auto &segment = vad->Front();
      float duration = segment.samples.size() / 16000.;
      float start_time = segment.start / 16000.;
      float end_time = start_time + duration;
      auto s = recognizer.CreateStream();
      s->AcceptWaveform(16000, segment.samples.data(), segment.samples.size());
      s->AcceptWaveform(16000, tail_paddings.data(), tail_paddings.size());
      s->InputFinished();
      while (recognizer.IsReady(s.get())) {
        recognizer.DecodeStream(s.get());
      }
      auto text = recognizer.GetResult(s.get()).text;
      if (!text.empty()) {
        fprintf(stderr, "vad segment(%d:%.3f-%.3f) results: %s\n", segment_id,
                start_time, end_time, text.c_str());
      }
      vad->Pop();
    }
  }

  const auto end = std::chrono::steady_clock::now();

  float elapsed_seconds =
      std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
          .count() /
      1000.;

  fprintf(stderr, "num threads: %d\n", asr_config.model_config.num_threads);
  fprintf(stderr, "decoding method: %s\n", asr_config.decoding_method.c_str());
  if (asr_config.decoding_method == "modified_beam_search") {
    fprintf(stderr, "max active paths: %d\n", asr_config.max_active_paths);
  }

  float duration = samples.size() / 16000.;
  fprintf(stderr, "Elapsed seconds: %.3f s\n", elapsed_seconds);
  float rtf = elapsed_seconds / duration;
  fprintf(stderr, "Real time factor (RTF): %.3f / %.3f = %.3f\n",
          elapsed_seconds, duration, rtf);

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-vad.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-vad.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include <stdio.h>
#include <stdlib.h>

#include <algorithm>
#include <iomanip>
#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#include "sherpa-onnx/csrc/wave-writer.h"

int32_t main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
This program shows how to use VAD in sherpa-onnx
to remove silences from a file.

  ./bin/sherpa-onnx-vad \
    --silero-vad-model=/path/to/silero_vad.onnx \
    /path/to/input.wav
    /path/to/output.wav

Please download silero_vad.onnx from
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

For instance, use
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx

input.wav should be 16kHz.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::VadModelConfig config;

  config.Register(&po);
  po.Read(argc, argv);
  if (po.NumArgs() != 2) {
    fprintf(
        stderr,
        "Please provide only 2 argument2: the input wav and the output wav\n");
    po.PrintUsage();
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  std::string wav_filename = po.GetArg(1);
  int32_t sampling_rate = -1;

  bool is_ok = false;
  std::vector<float> samples =
      sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);

  if (!is_ok) {
    fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
    return -1;
  }

  if (sampling_rate != 16000) {
    fprintf(stderr, "Support only 16000Hz. Given: %d\n", sampling_rate);
    return -1;
  }

  auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(config);

  int32_t window_size = config.silero_vad.window_size;

  int32_t i = 0;
  bool is_eof = false;

  std::vector<float> samples_without_silence;

  while (!is_eof) {
    if (i + window_size < samples.size()) {
      vad->AcceptWaveform(samples.data() + i, window_size);
      i += window_size;
    } else {
      vad->Flush();
      is_eof = true;
    }

    while (!vad->Empty()) {
      const auto &segment = vad->Front();
      float start_time = segment.start / static_cast<float>(sampling_rate);
      float end_time = start_time + segment.samples.size() /
                                        static_cast<float>(sampling_rate);

      fprintf(stderr, "%.3f -- %.3f\n", start_time, end_time);
      samples_without_silence.insert(samples_without_silence.end(),
                                     segment.samples.begin(),
                                     segment.samples.end());
      vad->Pop();
    }
  }

  sherpa_onnx::WriteWave(po.GetArg(2), sampling_rate,
                         samples_without_silence.data(),
                         samples_without_silence.size());

  fprintf(stderr, "Saved to %s\n", po.GetArg(2).c_str());

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx-version.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx-version.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include <stdio.h>

#include <cstdint>

#include "sherpa-onnx/csrc/version.h"

int32_t main() {
  printf("sherpa-onnx version : %s\n", sherpa_onnx::GetVersionStr());
  printf("sherpa-onnx Git SHA1: %s\n", sherpa_onnx::GetGitSha1());
  printf("sherpa-onnx Git date: %s\n", sherpa_onnx::GetGitDate());

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/sherpa-onnx.cc
================================================
// sherpa-onnx/csrc/sherpa-onnx.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include <stdio.h>

#include <chrono>
#include <iomanip>
#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/symbol-table.h"
#include "sherpa-onnx/csrc/timer.h"
#include "sherpa-onnx/csrc/wave-reader.h"

typedef struct {
  std::unique_ptr<sherpa_onnx::OnlineStream> online_stream;
  float duration;
  float elapsed_seconds;
} Stream;

int main(int32_t argc, char *argv[]) {
  const char *kUsageMessage = R"usage(
Usage:

(1) Streaming transducer

  ./bin/sherpa-onnx \
    --tokens=/path/to/tokens.txt \
    --encoder=/path/to/encoder.onnx \
    --decoder=/path/to/decoder.onnx \
    --joiner=/path/to/joiner.onnx \
    --provider=cpu \
    --num-threads=2 \
    --decoding-method=greedy_search \
    /path/to/foo.wav [bar.wav foobar.wav ...]

(2) Streaming zipformer2 CTC

  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2

  ./bin/sherpa-onnx \
    --debug=1 \
    --zipformer2-ctc-model=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
    --tokens=./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
    ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav \
    ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000001.wav \
    ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000002.wav

(3) Streaming paraformer

  wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

  ./bin/sherpa-onnx \
    --tokens=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt \
    --paraformer-encoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx \
    --paraformer-decoder=./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx \
    ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav

Note: It supports decoding multiple files in batches

Default value for num_threads is 2.
Valid values for decoding_method: greedy_search (default), modified_beam_search.
Valid values for provider: cpu (default), cuda, coreml.
foo.wav should be of single channel, 16-bit PCM encoded wave file; its
sampling rate can be arbitrary and does not need to be 16kHz.

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";

  sherpa_onnx::ParseOptions po(kUsageMessage);
  sherpa_onnx::OnlineRecognizerConfig config;

  config.Register(&po);

  po.Read(argc, argv);
  if (po.NumArgs() < 1) {
    po.PrintUsage();
    fprintf(stderr, "Error! Please provide at lease 1 wav file\n");
    exit(EXIT_FAILURE);
  }

  fprintf(stderr, "%s\n", config.ToString().c_str());

  if (!config.Validate()) {
    fprintf(stderr, "Errors in config!\n");
    return -1;
  }

  printf("Start to create recognizer\n");
  sherpa_onnx::Timer timer;
  sherpa_onnx::OnlineRecognizer recognizer(config);
  printf("Recognizer created in %.5f s\n", timer.Elapsed());

  std::vector<Stream> ss;

  const auto begin = std::chrono::steady_clock::now();
  std::vector<float> durations;

  for (int32_t i = 1; i <= po.NumArgs(); ++i) {
    const std::string wav_filename = po.GetArg(i);
    int32_t sampling_rate = -1;

    bool is_ok = false;
    const std::vector<float> samples =
        sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);

    if (!is_ok) {
      fprintf(stderr, "Failed to read '%s'\n", wav_filename.c_str());
      return -1;
    }

    const float duration = samples.size() / static_cast<float>(sampling_rate);

    auto s = recognizer.CreateStream();

    // std::vector<float> left_paddings(static_cast<int>(0.3 * sampling_rate));
    // s->AcceptWaveform(sampling_rate, left_paddings.data(),
    //                   left_paddings.size());

    s->AcceptWaveform(sampling_rate, samples.data(), samples.size());

    std::vector<float> tail_paddings(static_cast<int>(0.8 * sampling_rate));
    // Note: We can call AcceptWaveform() multiple times.
    s->AcceptWaveform(sampling_rate, tail_paddings.data(),
                      tail_paddings.size());

    // Call InputFinished() to indicate that no audio samples are available
    s->InputFinished();
    ss.push_back({std::move(s), duration, 0});
  }

  std::vector<sherpa_onnx::OnlineStream *> ready_streams;
  for (;;) {
    ready_streams.clear();
    for (auto &s : ss) {
      const auto p_ss = s.online_stream.get();
      if (recognizer.IsReady(p_ss)) {
        ready_streams.push_back(p_ss);
      } else if (s.elapsed_seconds == 0) {
        const auto end = std::chrono::steady_clock::now();
        const float elapsed_seconds =
            std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
                .count() /
            1000.;
        s.elapsed_seconds = elapsed_seconds;
      }
    }

    if (ready_streams.empty()) {
      break;
    }

    recognizer.DecodeStreams(ready_streams.data(), ready_streams.size());
  }

  std::ostringstream os;
  for (int32_t i = 1; i <= po.NumArgs(); ++i) {
    const auto &s = ss[i - 1];
    const float rtf = s.elapsed_seconds / s.duration;

    os << po.GetArg(i) << "\n";
    os << "Number of threads: " << config.model_config.num_threads << ", "
       << std::setprecision(2) << "Elapsed seconds: " << s.elapsed_seconds
       << ", Audio duration (s): " << s.duration
       << ", Real time factor (RTF) = " << s.elapsed_seconds << "/"
       << s.duration << " = " << rtf << "\n";
    const auto r = recognizer.GetResult(s.online_stream.get());
    os << r.text << "\n";
    os << r.AsJsonString() << "\n\n";
  }

  std::cerr << os.str();

  return 0;
}


================================================
FILE: sherpa-onnx/csrc/silero-vad-model-config.cc
================================================
// sherpa-onnx/csrc/silero-vad-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/silero-vad-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void SileroVadModelConfig::Register(ParseOptions *po) {
  po->Register("silero-vad-model", &model, "Path to silero VAD ONNX model.");

  po->Register("silero-vad-threshold", &threshold,
               "Speech threshold. Silero VAD outputs speech probabilities for "
               "each audio chunk, probabilities ABOVE this value are "
               "considered as SPEECH. It is better to tune this parameter for "
               "each dataset separately, but lazy "
               "0.5 is pretty good for most datasets.");

  po->Register(
      "silero-vad-min-silence-duration", &min_silence_duration,
      "In seconds.  In the end of each speech chunk wait for "
      "--silero-vad-min-silence-duration seconds before separating it");

  po->Register("silero-vad-min-speech-duration", &min_speech_duration,
               "In seconds.  In the end of each silence chunk wait for "
               "--silero-vad-min-speech-duration seconds before separating it");

  po->Register(
      "silero-vad-max-speech-duration", &max_speech_duration,
      "In seconds. If a speech segment is longer than this value, then we "
      "increase the threshold to 0.9. After finishing detecting the segment, "
      "the threshold value is reset to its original value.");

  po->Register(
      "silero-vad-window-size", &window_size,
      "In samples. Audio chunks of --silero-vad-window-size samples are fed "
      "to the silero VAD model. WARNING! Silero VAD models were trained using "
      "512, 1024, 1536 samples for 16000 sample rate and 256, 512, 768 samples "
      "for 8000 sample rate. Values other than these may affect model "
      "performance!");

  po->Register("silero-vad-neg-threshold", &neg_threshold,
               "Negative threshold (noise threshold). If < 0, defaults to "
               "(threshold - 0.15) with lower bound 0.01.");
}

bool SileroVadModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --silero-vad-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("Silero vad model file '%s' does not exist",
                     model.c_str());
    return false;
  }

  if (threshold < 0.01) {
    SHERPA_ONNX_LOGE(
        "Please use a larger value for --silero-vad-threshold. Given: %f",
        threshold);
    return false;
  }

  if (threshold >= 1) {
    SHERPA_ONNX_LOGE(
        "Please use a smaller value for --silero-vad-threshold. Given: %f",
        threshold);
    return false;
  }

  if (min_silence_duration <= 0) {
    SHERPA_ONNX_LOGE(
        "Please use a larger value for --silero-vad-min-silence-duration. "
        "Given: "
        "%f",
        min_silence_duration);
    return false;
  }

  if (min_speech_duration <= 0) {
    SHERPA_ONNX_LOGE(
        "Please use a larger value for --silero-vad-min-speech-duration. "
        "Given: "
        "%f",
        min_speech_duration);
    return false;
  }

  if (max_speech_duration <= 0) {
    SHERPA_ONNX_LOGE(
        "Please use a larger value for --silero-vad-max-speech-duration. "
        "Given: "
        "%f",
        max_speech_duration);
    return false;
  }

  return true;
}

std::string SileroVadModelConfig::ToString() const {
  std::ostringstream os;

  os << "SileroVadModelConfig(";
  os << "model=\"" << model << "\", ";
  os << "threshold=" << threshold << ", ";
  os << "min_silence_duration=" << min_silence_duration << ", ";
  os << "min_speech_duration=" << min_speech_duration << ", ";
  os << "max_speech_duration=" << max_speech_duration << ", ";
  os << "window_size=" << window_size << ", ";
  os << "neg_threshold=" << neg_threshold << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/silero-vad-model-config.h
================================================
// sherpa-onnx/csrc/silero-vad-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct SileroVadModelConfig {
  std::string model;

  // threshold to classify a segment as speech
  //
  // If the predicted probability of a segment is larger than this
  // value, then it is classified as speech.
  float threshold = 0.5;

  float min_silence_duration = 0.5;  // in seconds

  float min_speech_duration = 0.25;  // in seconds

  // 512, 1024, 1536 samples for 16000 Hz
  int32_t window_size = 512;  // in samples

  // If a speech segment is longer than this value, then we increase
  // the threshold to 0.9. After finishing detecting the segment,
  // the threshold value is reset to its original value.
  float max_speech_duration = 20;  // in seconds

  // Negative (exit) threshold for transitioning from speech → silence.
  // If left as a negative value, the default Silero rule applies:
  //     neg_threshold = max(threshold - 0.15f, 0.01f)
  // This prevents the exit threshold from becoming negative when
  // threshold < 0.15.
  float neg_threshold = -1;

  SileroVadModelConfig() = default;

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/silero-vad-model.cc
================================================
// sherpa-onnx/csrc/silero-vad-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/silero-vad-model.h"

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"

namespace sherpa_onnx {

class SileroVadModel::Impl {
 public:
  explicit Impl(const VadModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        sample_rate_(config.sample_rate) {
    auto buf = ReadFile(config.silero_vad.model);
    Init(buf.data(), buf.size());

    if (sample_rate_ != 16000) {
      SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d",
                       config.sample_rate);
      exit(-1);
    }

    min_silence_samples_ =
        sample_rate_ * config_.silero_vad.min_silence_duration;

    min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
  }

  template <typename Manager>
  Impl(Manager *mgr, const VadModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        sample_rate_(config.sample_rate) {
    auto buf = ReadFile(mgr, config.silero_vad.model);
    Init(buf.data(), buf.size());

    if (sample_rate_ != 16000) {
      SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d",
                       config.sample_rate);
      exit(-1);
    }

    min_silence_samples_ =
        sample_rate_ * config_.silero_vad.min_silence_duration;

    min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
  }

  float Run(const float *samples, int32_t n) {
    if (is_v5_) {
      return RunV5(samples, n);
    } else {
      return RunV4(samples, n);
    }
  }

  void Reset() {
    if (is_v5_) {
      ResetV5();
    } else {
      ResetV4();
    }

    triggered_ = false;
    current_sample_ = 0;
    temp_start_ = 0;
    temp_end_ = 0;
  }

  bool IsSpeech(const float *samples, int32_t n) {
    if (n != WindowSize()) {
      SHERPA_ONNX_LOGE("n: %d != window_size: %d", n, WindowSize());
      exit(-1);
    }

    float prob = Run(samples, n);

    float threshold = config_.silero_vad.threshold;

    current_sample_ += config_.silero_vad.window_size;

    if (prob > threshold && temp_end_ != 0) {
      temp_end_ = 0;
    }

    if (prob > threshold && temp_start_ == 0) {
      // start speaking, but we require that it must satisfy
      // min_speech_duration
      temp_start_ = current_sample_;
      return false;
    }

    if (prob > threshold && temp_start_ != 0 && !triggered_) {
      if (current_sample_ - temp_start_ < min_speech_samples_) {
        return false;
      }

      triggered_ = true;

      return true;
    }

    if ((prob < threshold) && !triggered_) {
      // silence
      temp_start_ = 0;
      temp_end_ = 0;
      return false;
    }

    float neg_threshold;
    if (config_.silero_vad.neg_threshold < 0) {
        neg_threshold = std::max(threshold - 0.15f, 0.01f);
    } else {
        neg_threshold = std::max(config_.silero_vad.neg_threshold, 0.01f);
    }
    if ((prob > neg_threshold) && triggered_) {
      // speaking
      return true;
    }

    if ((prob > threshold) && !triggered_) {
      // start speaking
      triggered_ = true;

      return true;
    }

    if ((prob < threshold) && triggered_) {
      // stop to speak
      if (temp_end_ == 0) {
        temp_end_ = current_sample_;
      }

      if (current_sample_ - temp_end_ < min_silence_samples_) {
        // continue speaking
        return true;
      }
      // stopped speaking
      temp_start_ = 0;
      temp_end_ = 0;
      triggered_ = false;
      return false;
    }

    return false;
  }

  int32_t WindowShift() const { return config_.silero_vad.window_size; }

  int32_t WindowSize() const {
    return config_.silero_vad.window_size + window_overlap_;
  }

  int32_t MinSilenceDurationSamples() const { return min_silence_samples_; }

  int32_t MinSpeechDurationSamples() const { return min_speech_samples_; }

  void SetMinSilenceDuration(float s) {
    min_silence_samples_ = sample_rate_ * s;
  }

  void SetThreshold(float threshold) {
    config_.silero_vad.threshold = threshold;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    if ((input_names_.size() == 4 && output_names_.size() == 3) ||
        IsExportedByK2Fsa()) {
      is_v5_ = false;
    } else if (input_names_.size() == 3 && output_names_.size() == 2) {
      is_v5_ = true;

      // 64 for 16kHz
      // 32 for 8kHz
      window_overlap_ = 64;

      if (config_.silero_vad.window_size != 512) {
        SHERPA_ONNX_LOGE(
            "For silero_vad  v5, we require window_size to be 512 for 16kHz");
        exit(-1);
      }
    } else {
      SHERPA_ONNX_LOGE("Unsupported silero vad model");
      exit(-1);
    }

    Check();

    Reset();
  }

  void ResetV5() {
    // 2 - number of LSTM layer
    // 1 - batch size
    // 128 - hidden dim
    std::array<int64_t, 3> shape{2, 1, 128};

    Ort::Value s =
        Ort::Value::CreateTensor<float>(allocator_, shape.data(), shape.size());

    Fill<float>(&s, 0);
    states_.clear();
    states_.push_back(std::move(s));
  }

  void ResetV4() {
    // 2 - number of LSTM layer
    // 1 - batch size
    // 64 - hidden dim
    std::array<int64_t, 3> shape{2, 1, 64};

    Ort::Value h =
        Ort::Value::CreateTensor<float>(allocator_, shape.data(), shape.size());

    Ort::Value c =
        Ort::Value::CreateTensor<float>(allocator_, shape.data(), shape.size());

    Fill<float>(&h, 0);
    Fill<float>(&c, 0);

    states_.clear();

    states_.reserve(2);
    states_.push_back(std::move(h));
    states_.push_back(std::move(c));
  }

  void Check() const {
    if (is_v5_) {
      CheckV5();
    } else {
      CheckV4();
    }
  }

  bool IsExportedByK2Fsa() const {
    if (input_names_.size() == 3 && input_names_[0] == "x" &&
        input_names_[1] == "h" && input_names_[2] == "c" &&
        output_names_.size() == 3 && output_names_[0] == "prob" &&
        output_names_[1] == "new_h" && output_names_[2] == "new_c") {
      // this version is exported and maintained by us (k2-fsa)
      return true;
    }

    return false;
  }

  void CheckV4() const {
    if (IsExportedByK2Fsa()) {
      return;
    }

    if (input_names_.size() != 4) {
      SHERPA_ONNX_LOGE("Expect 4 inputs. Given: %d",
                       static_cast<int32_t>(input_names_.size()));
      exit(-1);
    }

    if (input_names_[0] != "input") {
      SHERPA_ONNX_LOGE("Input[0]: %s. Expected: input",
                       input_names_[0].c_str());
      exit(-1);
    }

    if (input_names_[1] != "sr") {
      SHERPA_ONNX_LOGE("Input[1]: %s. Expected: sr", input_names_[1].c_str());
      exit(-1);
    }

    if (input_names_[2] != "h") {
      SHERPA_ONNX_LOGE("Input[2]: %s. Expected: h", input_names_[2].c_str());
      exit(-1);
    }

    if (input_names_[3] != "c") {
      SHERPA_ONNX_LOGE("Input[3]: %s. Expected: c", input_names_[3].c_str());
      exit(-1);
    }

    // Now for outputs
    if (output_names_.size() != 3) {
      SHERPA_ONNX_LOGE("Expect 3 outputs. Given: %d",
                       static_cast<int32_t>(output_names_.size()));
      exit(-1);
    }

    if (output_names_[0] != "output") {
      SHERPA_ONNX_LOGE("Output[0]: %s. Expected: output",
                       output_names_[0].c_str());
      exit(-1);
    }

    if (output_names_[1] != "hn") {
      SHERPA_ONNX_LOGE("Output[1]: %s. Expected: sr", output_names_[1].c_str());
      exit(-1);
    }

    if (output_names_[2] != "cn") {
      SHERPA_ONNX_LOGE("Output[2]: %s. Expected: sr", output_names_[2].c_str());
      exit(-1);
    }
  }

  void CheckV5() const {
    if (input_names_.size() != 3) {
      SHERPA_ONNX_LOGE("Expect 3 inputs. Given: %d",
                       static_cast<int32_t>(input_names_.size()));
      exit(-1);
    }

    if (input_names_[0] != "input") {
      SHERPA_ONNX_LOGE("Input[0]: %s. Expected: input",
                       input_names_[0].c_str());
      exit(-1);
    }

    if (input_names_[1] != "state") {
      SHERPA_ONNX_LOGE("Input[1]: %s. Expected: state",
                       input_names_[1].c_str());
      exit(-1);
    }

    if (input_names_[2] != "sr") {
      SHERPA_ONNX_LOGE("Input[2]: %s. Expected: sr", input_names_[2].c_str());
      exit(-1);
    }

    // Now for outputs
    if (output_names_.size() != 2) {
      SHERPA_ONNX_LOGE("Expect 2 outputs. Given: %d",
                       static_cast<int32_t>(output_names_.size()));
      exit(-1);
    }

    if (output_names_[0] != "output") {
      SHERPA_ONNX_LOGE("Output[0]: %s. Expected: output",
                       output_names_[0].c_str());
      exit(-1);
    }

    if (output_names_[1] != "stateN") {
      SHERPA_ONNX_LOGE("Output[1]: %s. Expected: stateN",
                       output_names_[1].c_str());
      exit(-1);
    }
  }

  float RunV5(const float *samples, int32_t n) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> x_shape = {1, n};

    Ort::Value x =
        Ort::Value::CreateTensor(memory_info, const_cast<float *>(samples), n,
                                 x_shape.data(), x_shape.size());

    int64_t sr_shape = 1;
    Ort::Value sr =
        Ort::Value::CreateTensor(memory_info, &sample_rate_, 1, &sr_shape, 1);

    std::array<Ort::Value, 3> inputs = {std::move(x), std::move(states_[0]),
                                        std::move(sr)};

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    states_[0] = std::move(out[1]);

    float prob = out[0].GetTensorData<float>()[0];
    return prob;
  }

  float RunV4(const float *samples, int32_t n) {
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 2> x_shape = {1, n};

    Ort::Value x =
        Ort::Value::CreateTensor(memory_info, const_cast<float *>(samples), n,
                                 x_shape.data(), x_shape.size());

    int64_t sr_shape = 1;
    Ort::Value sr =
        Ort::Value::CreateTensor(memory_info, &sample_rate_, 1, &sr_shape, 1);

    std::vector<Ort::Value> inputs;
    inputs.reserve(input_names_.size());

    inputs.push_back(std::move(x));
    if (input_names_.size() == 4) {
      inputs.push_back(std::move(sr));
    }
    inputs.push_back(std::move(states_[0]));
    inputs.push_back(std::move(states_[1]));

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    states_[0] = std::move(out[1]);
    states_[1] = std::move(out[2]);

    float prob = out[0].GetTensorData<float>()[0];
    return prob;
  }

 private:
  VadModelConfig config_;

  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  std::vector<Ort::Value> states_;
  int64_t sample_rate_;
  int32_t min_silence_samples_;
  int32_t min_speech_samples_;

  bool triggered_ = false;
  int32_t current_sample_ = 0;
  int32_t temp_start_ = 0;
  int32_t temp_end_ = 0;

  int32_t window_overlap_ = 0;

  bool is_v5_ = false;
};

SileroVadModel::SileroVadModel(const VadModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
SileroVadModel::SileroVadModel(Manager *mgr, const VadModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

SileroVadModel::~SileroVadModel() = default;

void SileroVadModel::Reset() { return impl_->Reset(); }

bool SileroVadModel::IsSpeech(const float *samples, int32_t n) {
  return impl_->IsSpeech(samples, n);
}

int32_t SileroVadModel::WindowSize() const { return impl_->WindowSize(); }

int32_t SileroVadModel::WindowShift() const { return impl_->WindowShift(); }

int32_t SileroVadModel::MinSilenceDurationSamples() const {
  return impl_->MinSilenceDurationSamples();
}

int32_t SileroVadModel::MinSpeechDurationSamples() const {
  return impl_->MinSpeechDurationSamples();
}

void SileroVadModel::SetMinSilenceDuration(float s) {
  impl_->SetMinSilenceDuration(s);
}

void SileroVadModel::SetThreshold(float threshold) {
  impl_->SetThreshold(threshold);
}

float SileroVadModel::Compute(const float *samples, int32_t n) {
  return impl_->Run(samples, n);
}

#if __ANDROID_API__ >= 9
template SileroVadModel::SileroVadModel(AAssetManager *mgr,
                                        const VadModelConfig &config);
#endif

#if __OHOS__
template SileroVadModel::SileroVadModel(NativeResourceManager *mgr,
                                        const VadModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/silero-vad-model.h
================================================
// sherpa-onnx/csrc/silero-vad-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_
#define SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_

#include <memory>

#include "sherpa-onnx/csrc/vad-model.h"

namespace sherpa_onnx {

class SileroVadModel : public VadModel {
 public:
  explicit SileroVadModel(const VadModelConfig &config);

  template <typename Manager>
  SileroVadModel(Manager *mgr, const VadModelConfig &config);

  ~SileroVadModel() override;

  // reset the internal model states
  void Reset() override;

  /**
   * @param samples Pointer to a 1-d array containing audio samples.
   *                Each sample should be normalized to the range [-1, 1].
   * @param n Number of samples.
   *
   * @return Return true if speech is detected. Return false otherwise.
   */
  bool IsSpeech(const float *samples, int32_t n) override;

  float Compute(const float *samples, int32_t n) override;

  // For silero vad V4, it is WindowShift().
  // For silero vad V5, it is WindowShift()+64 for 16kHz and
  //                          WindowShift()+32 for 8kHz
  int32_t WindowSize() const override;

  // 512
  int32_t WindowShift() const override;

  int32_t MinSilenceDurationSamples() const override;
  int32_t MinSpeechDurationSamples() const override;

  void SetMinSilenceDuration(float s) override;
  void SetThreshold(float threshold) override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SILERO_VAD_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/slice-test.cc
================================================
// sherpa-onnx/csrc/slice-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/slice.h"

#include <numeric>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

TEST(Slice, Slice3D) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 3> shape{5, 5, 4};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  std::iota(p, p + shape[0] * shape[1] * shape[2], 0);

  auto v1 = Slice(allocator, &v, 2, 4, 0, 2);
  auto v2 = Slice(allocator, &v, 1, 3, 1, 3);

  Print3D(&v);
  Print3D(&v1);
  Print3D(&v2);

  // TODO(fangjun): Check that the results are correct
}

TEST(Slice, Slice2D) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 2> shape{5, 8};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  std::iota(p, p + shape[0] * shape[1], 0);

  auto v1 = Slice(allocator, &v, 1, 3);
  auto v2 = Slice(allocator, &v, 0, 2);

  Print2D(&v);
  Print2D(&v1);
  Print2D(&v2);

  // TODO(fangjun): Check that the results are correct
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/slice.cc
================================================
// sherpa-onnx/csrc/slice.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/slice.h"

#include <algorithm>
#include <cassert>
#include <vector>

namespace sherpa_onnx {

template <typename T /*=float*/>
Ort::Value Slice(OrtAllocator *allocator, const Ort::Value *v,
                 int32_t dim0_start, int32_t dim0_end, int32_t dim1_start,
                 int32_t dim1_end) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  assert(shape.size() == 3);

  assert(0 <= dim0_start);
  assert(dim0_start < dim0_end);
  assert(dim0_end <= shape[0]);

  assert(0 <= dim1_start);
  assert(dim1_start < dim1_end);
  assert(dim1_end <= shape[1]);

  std::array<int64_t, 3> ans_shape{dim0_end - dim0_start, dim1_end - dim1_start,
                                   shape[2]};

  Ort::Value ans = Ort::Value::CreateTensor<T>(allocator, ans_shape.data(),
                                               ans_shape.size());
  T *dst = ans.GetTensorMutableData<T>();
  for (int32_t i = dim0_start; i != dim0_end; ++i) {
    const T *src = v->GetTensorData<T>() + i * shape[1] * shape[2];
    const T *start = src + dim1_start * shape[2];
    const T *end = src + dim1_end * shape[2];

    std::copy(start, end, dst);
    dst += ans_shape[1] * ans_shape[2];
  }

  return ans;
}

template <typename T /*= float*/>
Ort::Value Slice(OrtAllocator *allocator, const Ort::Value *v,
                 int32_t dim0_start, int32_t dim0_end) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  assert(shape.size() == 2);

  assert(0 <= dim0_start);
  assert(dim0_start < dim0_end);
  assert(dim0_end <= shape[0]);

  const T *src = v->GetTensorData<T>();

  std::array<int64_t, 2> ans_shape{dim0_end - dim0_start, shape[1]};

  Ort::Value ans = Ort::Value::CreateTensor<T>(allocator, ans_shape.data(),
                                               ans_shape.size());
  const T *start = v->GetTensorData<T>() + dim0_start * shape[1];
  const T *end = v->GetTensorData<T>() + dim0_end * shape[1];
  T *dst = ans.GetTensorMutableData<T>();
  std::copy(start, end, dst);

  return ans;
}

template Ort::Value Slice<float>(OrtAllocator *allocator, const Ort::Value *v,
                                 int32_t dim0_start, int32_t dim0_end,
                                 int32_t dim1_start, int32_t dim1_end);

template Ort::Value Slice<float>(OrtAllocator *allocator, const Ort::Value *v,
                                 int32_t dim0_start, int32_t dim0_end);

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/slice.h
================================================
// sherpa-onnx/csrc/slice.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SLICE_H_
#define SHERPA_ONNX_CSRC_SLICE_H_

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

/** Get a deep copy by slicing a 3-D tensor v.
 *
 * It returns v[dim0_start:dim0_end, dim1_start:dim1_end, :]
 *
 * @param allocator
 * @param v A 3-D tensor. Its data type is T.
 * @param dim0_start  Start index of the first dimension..
 * @param dim0_end    End index of the first dimension..
 * @param dim1_start Start index of the second dimension.
 * @param dim1_end  End index of the second dimension.
 *
 * @return Return a 3-D tensor of shape
 *         (dim0_end-dim0_start, dim1_end-dim1_start, v.shape[2])
 */
template <typename T = float>
Ort::Value Slice(OrtAllocator *allocator, const Ort::Value *v,
                 int32_t dim0_start, int32_t dim0_end, int32_t dim1_start,
                 int32_t dim1_end);

/** Get a deep copy by slicing a 2-D tensor v.
 *
 * It returns v[dim0_start:dim0_end, :]
 *
 * @param allocator
 * @param v A 2-D tensor. Its data type is T.
 * @param dim0_start  Start index of the first dimension..
 * @param dim0_end    End index of the first dimension..
 *
 * @return Return a 2-D tensor of shape
 *         (dim0_end-dim0_start, v.shape[1])
 */
template <typename T = float>
Ort::Value Slice(OrtAllocator *allocator, const Ort::Value *v,
                 int32_t dim0_start, int32_t dim0_end);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SLICE_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_GENERAL_IMPL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_GENERAL_IMPL_H_
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-model.h"

namespace sherpa_onnx {

class SpeakerEmbeddingExtractorGeneralImpl
    : public SpeakerEmbeddingExtractorImpl {
 public:
  explicit SpeakerEmbeddingExtractorGeneralImpl(
      const SpeakerEmbeddingExtractorConfig &config)
      : model_(config) {}

  template <typename Manager>
  SpeakerEmbeddingExtractorGeneralImpl(
      Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
      : model_(mgr, config) {}

  int32_t Dim() const override { return model_.GetMetaData().output_dim; }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    FeatureExtractorConfig feat_config;
    const auto &meta_data = model_.GetMetaData();
    feat_config.sampling_rate = meta_data.sample_rate;
    feat_config.normalize_samples = meta_data.normalize_samples;

    return std::make_unique<OnlineStream>(feat_config);
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() < s->NumFramesReady();
  }

  std::vector<float> Compute(OnlineStream *s) const override {
    int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames();
    if (num_frames <= 0) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Please make sure IsReady(s) returns true. num_frames: %{public}d",
          num_frames);
#else
      SHERPA_ONNX_LOGE(
          "Please make sure IsReady(s) returns true. num_frames: %d",
          num_frames);
#endif
      return {};
    }

    std::vector<float> features =
        s->GetFrames(s->GetNumProcessedFrames(), num_frames);

    s->GetNumProcessedFrames() += num_frames;

    int32_t feat_dim = features.size() / num_frames;

    const auto &meta_data = model_.GetMetaData();
    if (!meta_data.feature_normalize_type.empty()) {
      if (meta_data.feature_normalize_type == "global-mean") {
        SubtractGlobalMean(features.data(), num_frames, feat_dim);
      } else {
#if __OHOS__
        SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s",
                         meta_data.feature_normalize_type.c_str());
#else
        SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s",
                         meta_data.feature_normalize_type.c_str());
#endif
        exit(-1);
      }
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{1, num_frames, feat_dim};
    Ort::Value x =
        Ort::Value::CreateTensor(memory_info, features.data(), features.size(),
                                 x_shape.data(), x_shape.size());
    Ort::Value embedding = model_.Compute(std::move(x));
    std::vector<int64_t> embedding_shape =
        embedding.GetTensorTypeAndShapeInfo().GetShape();

    std::vector<float> ans(embedding_shape[1]);
    std::copy(embedding.GetTensorData<float>(),
              embedding.GetTensorData<float>() + ans.size(), ans.begin());

    return ans;
  }

 private:
  void SubtractGlobalMean(float *p, int32_t num_frames,
                          int32_t feat_dim) const {
    auto m = Eigen::Map<
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
        p, num_frames, feat_dim);

    m = m.rowwise() - m.colwise().mean();
  }

 private:
  SpeakerEmbeddingExtractorModel model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_GENERAL_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-impl.cc
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-impl.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h"

namespace sherpa_onnx {

namespace {

enum class ModelType : std::uint8_t {
  kWeSpeaker,
  k3dSpeaker,
  kNeMo,
  kUnknown,
};

}  // namespace

static ModelType GetModelType(char *model_data, size_t model_data_length,
                              bool debug) {
  Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  auto sess = std::make_unique<Ort::Session>(env, model_data, model_data_length,
                                             sess_opts);

  Ort::ModelMetadata meta_data = sess->GetModelMetadata();
  if (debug) {
    std::ostringstream os;
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;
  auto model_type =
      LookupCustomModelMetaData(meta_data, "framework", allocator);
  if (model_type.empty()) {
    SHERPA_ONNX_LOGE(
        "No model_type in the metadata!\n"
        "Please make sure you have added metadata to the model.\n\n"
        "For instance, you can use\n"
        "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wespeaker/"
        "add_meta_data.py"
        "to add metadata to models from WeSpeaker\n");
    return ModelType::kUnknown;
  }

  if (model_type == "wespeaker") {
    return ModelType::kWeSpeaker;
  } else if (model_type == "3d-speaker") {
    return ModelType::k3dSpeaker;
  } else if (model_type == "nemo") {
    return ModelType::kNeMo;
  } else {
#if __OHOS__
    SHERPA_ONNX_LOGE("Unsupported model_type: %{public}s", model_type.c_str());
#else
    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
#endif
    return ModelType::kUnknown;
  }
}

std::unique_ptr<SpeakerEmbeddingExtractorImpl>
SpeakerEmbeddingExtractorImpl::Create(
    const SpeakerEmbeddingExtractorConfig &config) {
  ModelType model_type = ModelType::kUnknown;

  {
    auto buffer = ReadFile(config.model);

    model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  }

  switch (model_type) {
    case ModelType::kWeSpeaker:
      // fall through
    case ModelType::k3dSpeaker:
      return std::make_unique<SpeakerEmbeddingExtractorGeneralImpl>(config);
    case ModelType::kNeMo:
      return std::make_unique<SpeakerEmbeddingExtractorNeMoImpl>(config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE("Unknown model type for speaker embedding extractor!");
      return nullptr;
  }

  // unreachable code
  return nullptr;
}

template <typename Manager>
std::unique_ptr<SpeakerEmbeddingExtractorImpl>
SpeakerEmbeddingExtractorImpl::Create(
    Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) {
  ModelType model_type = ModelType::kUnknown;

  {
    auto buffer = ReadFile(mgr, config.model);

    model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  }

  switch (model_type) {
    case ModelType::kWeSpeaker:
      // fall through
    case ModelType::k3dSpeaker:
      return std::make_unique<SpeakerEmbeddingExtractorGeneralImpl>(mgr,
                                                                    config);
    case ModelType::kNeMo:
      return std::make_unique<SpeakerEmbeddingExtractorNeMoImpl>(mgr, config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE(
          "Unknown model type in for speaker embedding extractor!");
      return nullptr;
  }

  // unreachable code
  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<SpeakerEmbeddingExtractorImpl>
SpeakerEmbeddingExtractorImpl::Create(
    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<SpeakerEmbeddingExtractorImpl>
SpeakerEmbeddingExtractorImpl::Create(
    NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-impl.h
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_IMPL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_IMPL_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

namespace sherpa_onnx {

class SpeakerEmbeddingExtractorImpl {
 public:
  virtual ~SpeakerEmbeddingExtractorImpl() = default;

  static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
      const SpeakerEmbeddingExtractorConfig &config);

  template <typename Manager>
  static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
      Manager *mgr, const SpeakerEmbeddingExtractorConfig &config);

  virtual int32_t Dim() const = 0;

  virtual std::unique_ptr<OnlineStream> CreateStream() const = 0;

  virtual bool IsReady(OnlineStream *s) const = 0;

  virtual std::vector<float> Compute(OnlineStream *s) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_MODEL_META_DATA_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

struct SpeakerEmbeddingExtractorModelMetaData {
  int32_t output_dim = 0;
  int32_t sample_rate = 0;

  // for wespeaker models, it is 0;
  // for 3d-speaker models, it is 1
  int32_t normalize_samples = 1;

  // Chinese, English, etc.
  std::string language;

  // for 3d-speaker, it is global-mean
  std::string feature_normalize_type;
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-model.cc
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/speaker-embedding-extractor-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h"

namespace sherpa_onnx {

class SpeakerEmbeddingExtractorModel::Impl {
 public:
  explicit Impl(const SpeakerEmbeddingExtractorConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.model);
      Init(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.model);
      Init(buf.data(), buf.size());
    }
  }

  Ort::Value Compute(Ort::Value x) const {
    std::array<Ort::Value, 1> inputs = {std::move(x)};

    auto outputs =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());
    return std::move(outputs[0]);
  }

  const SpeakerEmbeddingExtractorModelMetaData &GetMetaData() const {
    return meta_data_;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(meta_data_.output_dim, "output_dim");
    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA(meta_data_.normalize_samples,
                               "normalize_samples");
    SHERPA_ONNX_READ_META_DATA_STR(meta_data_.language, "language");

    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(
        meta_data_.feature_normalize_type, "feature_normalize_type", "");

    std::string framework;
    SHERPA_ONNX_READ_META_DATA_STR(framework, "framework");
    if (framework != "wespeaker" && framework != "3d-speaker") {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Expect a wespeaker or a 3d-speaker model, given: %{public}s",
          framework.c_str());
#else
      SHERPA_ONNX_LOGE("Expect a wespeaker or a 3d-speaker model, given: %s",
                       framework.c_str());
#endif
      exit(-1);
    }
  }

 private:
  SpeakerEmbeddingExtractorConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  SpeakerEmbeddingExtractorModelMetaData meta_data_;
};

SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
    const SpeakerEmbeddingExtractorConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
    Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default;

const SpeakerEmbeddingExtractorModelMetaData &
SpeakerEmbeddingExtractorModel::GetMetaData() const {
  return impl_->GetMetaData();
}

Ort::Value SpeakerEmbeddingExtractorModel::Compute(Ort::Value x) const {
  return impl_->Compute(std::move(x));
}

#if __ANDROID_API__ >= 9
template SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif

#if __OHOS__
template SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
    NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-model.h
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_MODEL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_MODEL_H_

#include <memory>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

namespace sherpa_onnx {

class SpeakerEmbeddingExtractorModel {
 public:
  explicit SpeakerEmbeddingExtractorModel(
      const SpeakerEmbeddingExtractorConfig &config);

  template <typename Manager>
  SpeakerEmbeddingExtractorModel(Manager *mgr,
                                 const SpeakerEmbeddingExtractorConfig &config);

  ~SpeakerEmbeddingExtractorModel();

  const SpeakerEmbeddingExtractorModelMetaData &GetMetaData() const;

  /**
   * @param x A float32 tensor of shape (N, T, C)
   * @return A float32 tensor of shape (N, C)
   */
  Ort::Value Compute(Ort::Value x) const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-nemo-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_IMPL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_IMPL_H_
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
 public:
  explicit SpeakerEmbeddingExtractorNeMoImpl(
      const SpeakerEmbeddingExtractorConfig &config)
      : model_(config) {}

  template <typename Manager>
  SpeakerEmbeddingExtractorNeMoImpl(
      Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
      : model_(mgr, config) {}

  int32_t Dim() const override { return model_.GetMetaData().output_dim; }

  std::unique_ptr<OnlineStream> CreateStream() const override {
    FeatureExtractorConfig feat_config;
    const auto &meta_data = model_.GetMetaData();
    feat_config.sampling_rate = meta_data.sample_rate;
    feat_config.feature_dim = meta_data.feat_dim;
    feat_config.normalize_samples = true;
    feat_config.snip_edges = true;
    feat_config.frame_shift_ms = meta_data.window_stride_ms;
    feat_config.frame_length_ms = meta_data.window_size_ms;
    feat_config.low_freq = 0;
    feat_config.is_librosa = true;
    feat_config.remove_dc_offset = false;
    feat_config.window_type = meta_data.window_type;

    return std::make_unique<OnlineStream>(feat_config);
  }

  bool IsReady(OnlineStream *s) const override {
    return s->GetNumProcessedFrames() < s->NumFramesReady();
  }

  std::vector<float> Compute(OnlineStream *s) const override {
    int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames();
    if (num_frames <= 0) {
#if __OHOS__
      SHERPA_ONNX_LOGE(
          "Please make sure IsReady(s) returns true. num_frames: %{public}d",
          num_frames);
#else
      SHERPA_ONNX_LOGE(
          "Please make sure IsReady(s) returns true. num_frames: %d",
          num_frames);
#endif
      return {};
    }

    std::vector<float> features =
        s->GetFrames(s->GetNumProcessedFrames(), num_frames);

    s->GetNumProcessedFrames() += num_frames;

    int32_t feat_dim = features.size() / num_frames;

    const auto &meta_data = model_.GetMetaData();
    if (!meta_data.feature_normalize_type.empty()) {
      if (meta_data.feature_normalize_type == "per_feature") {
        NormalizePerFeature(features.data(), num_frames, feat_dim);
      } else {
#if __OHOS__
        SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s",
                         meta_data.feature_normalize_type.c_str());
#else

        SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s",
                         meta_data.feature_normalize_type.c_str());
#endif
        exit(-1);
      }
    }

    if (num_frames % 16 != 0) {
      int32_t pad = 16 - num_frames % 16;
      features.resize((num_frames + pad) * feat_dim);
    }

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape{1, num_frames, feat_dim};
    Ort::Value x =
        Ort::Value::CreateTensor(memory_info, features.data(), features.size(),
                                 x_shape.data(), x_shape.size());

    x = Transpose12(model_.Allocator(), &x);

    int64_t x_lens = num_frames;
    std::array<int64_t, 1> x_lens_shape{1};
    Ort::Value x_lens_tensor = Ort::Value::CreateTensor(
        memory_info, &x_lens, 1, x_lens_shape.data(), x_lens_shape.size());

    Ort::Value embedding =
        model_.Compute(std::move(x), std::move(x_lens_tensor));
    std::vector<int64_t> embedding_shape =
        embedding.GetTensorTypeAndShapeInfo().GetShape();

    std::vector<float> ans(embedding_shape[1]);
    std::copy(embedding.GetTensorData<float>(),
              embedding.GetTensorData<float>() + ans.size(), ans.begin());

    return ans;
  }

 private:
  void NormalizePerFeature(float *p, int32_t num_frames,
                           int32_t feat_dim) const {
    auto m = Eigen::Map<
        Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
        p, num_frames, feat_dim);

    auto EX = m.colwise().mean();
    auto EX2 = m.array().pow(2).colwise().sum() / num_frames;
    auto variance = (EX2 - EX.array().pow(2)).max(1e-5f);

    auto stddev = variance.array().sqrt();

    m = (m.rowwise() - EX).array().rowwise() / (stddev.array() + 1e-5f);
  }

 private:
  SpeakerEmbeddingExtractorNeMoModel model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_META_DATA_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

struct SpeakerEmbeddingExtractorNeMoModelMetaData {
  int32_t output_dim = 0;
  int32_t feat_dim = 80;
  int32_t sample_rate = 0;
  int32_t window_size_ms = 25;
  int32_t window_stride_ms = 25;

  // Chinese, English, etc.
  std::string language;

  // for 3d-speaker, it is global-mean
  std::string feature_normalize_type;
  std::string window_type = "hann";
};

}  // namespace sherpa_onnx
#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_META_DATA_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.cc
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h"

namespace sherpa_onnx {

class SpeakerEmbeddingExtractorNeMoModel::Impl {
 public:
  explicit Impl(const SpeakerEmbeddingExtractorConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(config.model);
      Init(buf.data(), buf.size());
    }
  }

  template <typename Manager>
  Impl(Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{} {
    {
      auto buf = ReadFile(mgr, config.model);
      Init(buf.data(), buf.size());
    }
  }

  Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const {
    std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)};

    // output_names_ptr_[0] is logits
    // output_names_ptr_[1] is embeddings
    // so we use output_names_ptr_.data() + 1 here to extract only the
    // embeddings
    auto outputs = sess_->Run({}, input_names_ptr_.data(), inputs.data(),
                              inputs.size(), output_names_ptr_.data() + 1, 1);
    return std::move(outputs[0]);
  }

  OrtAllocator *Allocator() { return allocator_; }

  const SpeakerEmbeddingExtractorNeMoModelMetaData &GetMetaData() const {
    return meta_data_;
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA(meta_data_.output_dim, "output_dim");
    SHERPA_ONNX_READ_META_DATA(meta_data_.feat_dim, "feat_dim");
    SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
    SHERPA_ONNX_READ_META_DATA(meta_data_.window_size_ms, "window_size_ms");
    SHERPA_ONNX_READ_META_DATA(meta_data_.window_stride_ms, "window_stride_ms");
    SHERPA_ONNX_READ_META_DATA_STR(meta_data_.language, "language");

    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(
        meta_data_.feature_normalize_type, "feature_normalize_type", "");

    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.window_type,
                                                "window_type", "povey");

    std::string framework;
    SHERPA_ONNX_READ_META_DATA_STR(framework, "framework");
    if (framework != "nemo") {
#if __OHOS__
      SHERPA_ONNX_LOGE("Expect a NeMo model, given: %{public}s",
                       framework.c_str());
#else
      SHERPA_ONNX_LOGE("Expect a NeMo model, given: %s", framework.c_str());
#endif
      exit(-1);
    }
  }

 private:
  SpeakerEmbeddingExtractorConfig config_;
  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  SpeakerEmbeddingExtractorNeMoModelMetaData meta_data_;
};

SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
    const SpeakerEmbeddingExtractorConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
    Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() =
    default;

const SpeakerEmbeddingExtractorNeMoModelMetaData &
SpeakerEmbeddingExtractorNeMoModel::GetMetaData() const {
  return impl_->GetMetaData();
}

Ort::Value SpeakerEmbeddingExtractorNeMoModel::Compute(
    Ort::Value x, Ort::Value x_lens) const {
  return impl_->Compute(std::move(x), std::move(x_lens));
}

OrtAllocator *SpeakerEmbeddingExtractorNeMoModel::Allocator() const {
  return impl_->Allocator();
}

#if __ANDROID_API__ >= 9
template SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif

#if __OHOS__
template SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
    NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.h
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_H_

#include <memory>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

namespace sherpa_onnx {

class SpeakerEmbeddingExtractorNeMoModel {
 public:
  explicit SpeakerEmbeddingExtractorNeMoModel(
      const SpeakerEmbeddingExtractorConfig &config);

  template <typename Manager>
  SpeakerEmbeddingExtractorNeMoModel(
      Manager *mgr, const SpeakerEmbeddingExtractorConfig &config);

  ~SpeakerEmbeddingExtractorNeMoModel();

  const SpeakerEmbeddingExtractorNeMoModelMetaData &GetMetaData() const;

  /**
   * @param x A float32 tensor of shape (N, C, T)
   * @param x_len A int64 tensor of shape (N,)
   * @return A float32 tensor of shape (N, C)
   */
  Ort::Value Compute(Ort::Value x, Ort::Value x_len) const;

  OrtAllocator *Allocator() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor.cc
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

#include <memory>
#include <string>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h"

namespace sherpa_onnx {

void SpeakerEmbeddingExtractorConfig::Register(ParseOptions *po) {
  po->Register("model", &model, "Path to the speaker embedding model.");
  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool SpeakerEmbeddingExtractorConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide a speaker embedding extractor model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("speaker embedding extractor model: '%s' does not exist",
                     model.c_str());
    return false;
  }

  return true;
}

std::string SpeakerEmbeddingExtractorConfig::ToString() const {
  std::ostringstream os;

  os << "SpeakerEmbeddingExtractorConfig(";
  os << "model=\"" << model << "\", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
    const SpeakerEmbeddingExtractorConfig &config)
    : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {}

template <typename Manager>
SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
    Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
    : impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {}

SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default;

int32_t SpeakerEmbeddingExtractor::Dim() const { return impl_->Dim(); }

std::unique_ptr<OnlineStream> SpeakerEmbeddingExtractor::CreateStream() const {
  return impl_->CreateStream();
}

bool SpeakerEmbeddingExtractor::IsReady(OnlineStream *s) const {
  return impl_->IsReady(s);
}

std::vector<float> SpeakerEmbeddingExtractor::Compute(OnlineStream *s) const {
  return impl_->Compute(s);
}

#if __ANDROID_API__ >= 9
template SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
    AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif

#if __OHOS__
template SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
    NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-extractor.h
================================================
// sherpa-onnx/csrc/speaker-embedding-extractor.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_

#include <memory>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct SpeakerEmbeddingExtractorConfig {
  std::string model;
  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  SpeakerEmbeddingExtractorConfig() = default;
  SpeakerEmbeddingExtractorConfig(const std::string &model, int32_t num_threads,
                                  bool debug, const std::string &provider)
      : model(model),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);
  bool Validate() const;
  std::string ToString() const;
};

class SpeakerEmbeddingExtractorImpl;

class SpeakerEmbeddingExtractor {
 public:
  explicit SpeakerEmbeddingExtractor(
      const SpeakerEmbeddingExtractorConfig &config);

  template <typename Manager>
  SpeakerEmbeddingExtractor(Manager *mgr,
                            const SpeakerEmbeddingExtractorConfig &config);

  ~SpeakerEmbeddingExtractor();

  // Return the dimension of the embedding
  int32_t Dim() const;

  // Create a stream to accept audio samples and compute features
  std::unique_ptr<OnlineStream> CreateStream() const;

  // Return true if there are feature frames in OnlineStream that
  // can be used to compute embeddings.
  bool IsReady(OnlineStream *s) const;

  // Compute the speaker embedding from the available unprocessed features
  // of the given stream
  //
  // You have to ensure IsReady(s) returns true before you call this method.
  std::vector<float> Compute(OnlineStream *s) const;

 private:
  std::unique_ptr<SpeakerEmbeddingExtractorImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-manager-test.cc
================================================
// sherpa-onnx/csrc/speaker-embedding-manager-test.cc
//
// Copyright (c) 2024 Jingzhao Ou (jingzhao.ou@gmail.com)

#include "sherpa-onnx/csrc/speaker-embedding-manager.h"

#include <string>
#include <vector>

#include "gtest/gtest.h"

namespace sherpa_onnx {

TEST(SpeakerEmbeddingManager, AddAndRemove) {
  int32_t dim = 2;
  SpeakerEmbeddingManager manager(dim);
  std::vector<float> v = {0.1, 0.1};
  bool status = manager.Add("first", v.data());
  ASSERT_TRUE(status);
  ASSERT_EQ(manager.NumSpeakers(), 1);

  // duplicate
  status = manager.Add("first", v.data());
  ASSERT_FALSE(status);
  ASSERT_EQ(manager.NumSpeakers(), 1);

  // non-duplicate
  v = {0.1, 0.9};
  status = manager.Add("second", v.data());
  ASSERT_TRUE(status);
  ASSERT_EQ(manager.NumSpeakers(), 2);

  // do not exist
  status = manager.Remove("third");
  ASSERT_FALSE(status);

  status = manager.Remove("first");
  ASSERT_TRUE(status);
  ASSERT_EQ(manager.NumSpeakers(), 1);

  v = {0.1, 0.1};
  status = manager.Add("first", v.data());
  ASSERT_TRUE(status);
  ASSERT_EQ(manager.NumSpeakers(), 2);

  status = manager.Remove("first");
  ASSERT_TRUE(status);
  ASSERT_EQ(manager.NumSpeakers(), 1);

  status = manager.Remove("second");
  ASSERT_TRUE(status);
  ASSERT_EQ(manager.NumSpeakers(), 0);
}

TEST(SpeakerEmbeddingManager, Search) {
  int32_t dim = 2;
  SpeakerEmbeddingManager manager(dim);
  std::vector<float> v1 = {0.1, 0.1};
  std::vector<float> v2 = {0.1, 0.9};
  std::vector<float> v3 = {0.9, 0.1};
  bool status = manager.Add("first", v1.data());
  ASSERT_TRUE(status);

  status = manager.Add("second", v2.data());
  ASSERT_TRUE(status);

  status = manager.Add("third", v3.data());
  ASSERT_TRUE(status);

  ASSERT_EQ(manager.NumSpeakers(), 3);

  std::vector<float> v = {15, 16};
  float threshold = 0.9;

  std::string name = manager.Search(v.data(), threshold);
  EXPECT_EQ(name, "first");

  v = {2, 17};
  name = manager.Search(v.data(), threshold);
  EXPECT_EQ(name, "second");

  v = {17, 2};
  name = manager.Search(v.data(), threshold);
  EXPECT_EQ(name, "third");

  threshold = 0.9;
  v = {15, 16};
  status = manager.Remove("first");
  ASSERT_TRUE(status);
  name = manager.Search(v.data(), threshold);
  EXPECT_EQ(name, "");

  v = {17, 2};
  status = manager.Remove("third");
  ASSERT_TRUE(status);
  name = manager.Search(v.data(), threshold);
  EXPECT_EQ(name, "");

  v = {2, 17};
  status = manager.Remove("second");
  ASSERT_TRUE(status);
  name = manager.Search(v.data(), threshold);
  EXPECT_EQ(name, "");

  ASSERT_EQ(manager.NumSpeakers(), 0);
}

TEST(SpeakerEmbeddingManager, Verify) {
  int32_t dim = 2;
  SpeakerEmbeddingManager manager(dim);
  std::vector<float> v1 = {0.1, 0.1};
  std::vector<float> v2 = {0.1, 0.9};
  std::vector<float> v3 = {0.9, 0.1};
  bool status = manager.Add("first", v1.data());
  ASSERT_TRUE(status);

  status = manager.Add("second", v2.data());
  ASSERT_TRUE(status);

  status = manager.Add("third", v3.data());
  ASSERT_TRUE(status);

  std::vector<float> v = {15, 16};
  float threshold = 0.9;

  status = manager.Verify("first", v.data(), threshold);
  ASSERT_TRUE(status);

  v = {2, 17};
  status = manager.Verify("first", v.data(), threshold);
  ASSERT_FALSE(status);

  status = manager.Verify("second", v.data(), threshold);
  ASSERT_TRUE(status);

  v = {17, 2};
  status = manager.Verify("first", v.data(), threshold);
  ASSERT_FALSE(status);

  status = manager.Verify("second", v.data(), threshold);
  ASSERT_FALSE(status);

  status = manager.Verify("third", v.data(), threshold);
  ASSERT_TRUE(status);

  status = manager.Verify("fourth", v.data(), threshold);
  ASSERT_FALSE(status);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-manager.cc
================================================
// sherpa-onnx/csrc/speaker-embedding-manager.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/speaker-embedding-manager.h"

#include <algorithm>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "Eigen/Dense"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

using FloatMatrix = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic,
                                  Eigen::RowMajor>;  // NOLINT

class SpeakerEmbeddingManager::Impl {
 public:
  explicit Impl(int32_t dim) : dim_(dim) {}

  bool Add(const std::string &name, const float *p) {
    if (name2row_.count(name)) {
      // a speaker with the same name already exists
      return false;
    }

    embedding_matrix_.conservativeResize(embedding_matrix_.rows() + 1, dim_);

    std::copy(p, p + dim_, &embedding_matrix_.bottomRows(1)(0, 0));

    embedding_matrix_.bottomRows(1).normalize();  // inplace

    name2row_[name] = embedding_matrix_.rows() - 1;
    row2name_[embedding_matrix_.rows() - 1] = name;

    return true;
  }

  bool Add(const std::string &name,
           const std::vector<std::vector<float>> &embedding_list) {
    if (name2row_.count(name)) {
      // a speaker with the same name already exists
      return false;
    }

    if (embedding_list.empty()) {
      SHERPA_ONNX_LOGE("Empty list of embeddings");
      return false;
    }

    for (const auto &x : embedding_list) {
      if (static_cast<int32_t>(x.size()) != dim_) {
        SHERPA_ONNX_LOGE("Given dim: %d, expected dim: %d",
                         static_cast<int32_t>(x.size()), dim_);
        return false;
      }
    }

    // compute the average
    Eigen::RowVectorXf v = Eigen::Map<Eigen::RowVectorXf>(
        const_cast<float *>(embedding_list[0].data()), dim_);
    int32_t i = -1;
    for (const auto &x : embedding_list) {
      ++i;
      if (i == 0) {
        continue;
      }
      v += Eigen::Map<Eigen::RowVectorXf>(const_cast<float *>(x.data()), dim_);
    }

    // no need to compute the mean since we are going to normalize it anyway
    // v /= embedding_list.size();

    v.normalize();

    embedding_matrix_.conservativeResize(embedding_matrix_.rows() + 1, dim_);
    embedding_matrix_.bottomRows(1) = v;

    name2row_[name] = embedding_matrix_.rows() - 1;
    row2name_[embedding_matrix_.rows() - 1] = name;

    return true;
  }

  bool Remove(const std::string &name) {
    if (!name2row_.count(name)) {
      return false;
    }

    int32_t row_idx = name2row_.at(name);

    int32_t num_rows = embedding_matrix_.rows();

    if (row_idx < num_rows - 1) {
      embedding_matrix_.block(row_idx, 0, num_rows - 1 - row_idx, dim_) =
          embedding_matrix_.bottomRows(num_rows - 1 - row_idx);
    }

    embedding_matrix_.conservativeResize(num_rows - 1, dim_);
    for (auto &p : name2row_) {
      if (p.second > row_idx) {
        p.second -= 1;
        row2name_[p.second] = p.first;
      }
    }

    name2row_.erase(name);
    row2name_.erase(num_rows - 1);

    return true;
  }

  std::string Search(const float *p, float threshold) {
    if (embedding_matrix_.rows() == 0) {
      return {};
    }

    Eigen::VectorXf v =
        Eigen::Map<Eigen::VectorXf>(const_cast<float *>(p), dim_);
    v.normalize();

    Eigen::VectorXf scores = embedding_matrix_ * v;

    Eigen::VectorXf::Index max_index = 0;
    float max_score = scores.maxCoeff(&max_index);
    if (max_score < threshold) {
      return {};
    }

    return row2name_.at(max_index);
  }

  std::vector<SpeakerMatch> GetBestMatches(const float *p, float threshold,
                                           int32_t n) {
    std::vector<SpeakerMatch> matches;

    if (embedding_matrix_.rows() == 0) {
      return matches;
    }

    Eigen::VectorXf v =
        Eigen::Map<Eigen::VectorXf>(const_cast<float *>(p), dim_);
    v.normalize();

    Eigen::VectorXf scores = embedding_matrix_ * v;

    std::vector<std::pair<float, int>> score_indices;
    for (int i = 0; i < scores.size(); ++i) {
      if (scores[i] >= threshold) {
        score_indices.emplace_back(scores[i], i);
      }
    }

    std::sort(score_indices.rbegin(), score_indices.rend(),
              [](const auto &a, const auto &b) { return a.first < b.first; });

    matches.reserve(score_indices.size());
    for (int i = 0; i < std::min(n, static_cast<int32_t>(score_indices.size()));
         ++i) {
      const auto &pair = score_indices[i];
      matches.push_back({row2name_.at(pair.second), pair.first});
    }

    return matches;
  }

  bool Verify(const std::string &name, const float *p, float threshold) {
    if (!name2row_.count(name)) {
      return false;
    }

    int32_t row_idx = name2row_.at(name);

    Eigen::VectorXf v =
        Eigen::Map<Eigen::VectorXf>(const_cast<float *>(p), dim_);
    v.normalize();

    float score = embedding_matrix_.row(row_idx) * v;

    if (score < threshold) {
      return false;
    }

    return true;
  }

  float Score(const std::string &name, const float *p) {
    if (!name2row_.count(name)) {
      // Setting a default value if the name is not found
      return -2.0;
    }

    int32_t row_idx = name2row_.at(name);

    Eigen::VectorXf v =
        Eigen::Map<Eigen::VectorXf>(const_cast<float *>(p), dim_);
    v.normalize();

    float score = embedding_matrix_.row(row_idx) * v;

    return score;
  }

  bool Contains(const std::string &name) const {
    return name2row_.count(name) > 0;
  }

  int32_t NumSpeakers() const { return embedding_matrix_.rows(); }

  int32_t Dim() const { return dim_; }

  std::vector<std::string> GetAllSpeakers() const {
    std::vector<std::string> all_speakers;
    all_speakers.reserve(name2row_.size());
    for (const auto &p : name2row_) {
      all_speakers.push_back(p.first);
    }

    std::sort(all_speakers.begin(), all_speakers.end());
    return all_speakers;
  }

 private:
  int32_t dim_;
  FloatMatrix embedding_matrix_;
  std::unordered_map<std::string, int32_t> name2row_;
  std::unordered_map<int32_t, std::string> row2name_;
};

SpeakerEmbeddingManager::SpeakerEmbeddingManager(int32_t dim)
    : impl_(std::make_unique<Impl>(dim)) {}

SpeakerEmbeddingManager::~SpeakerEmbeddingManager() = default;

bool SpeakerEmbeddingManager::Add(const std::string &name,
                                  const float *p) const {
  return impl_->Add(name, p);
}

bool SpeakerEmbeddingManager::Add(
    const std::string &name,
    const std::vector<std::vector<float>> &embedding_list) const {
  return impl_->Add(name, embedding_list);
}

bool SpeakerEmbeddingManager::Remove(const std::string &name) const {
  return impl_->Remove(name);
}

std::string SpeakerEmbeddingManager::Search(const float *p,
                                            float threshold) const {
  return impl_->Search(p, threshold);
}

std::vector<SpeakerMatch> SpeakerEmbeddingManager::GetBestMatches(
    const float *p, float threshold, int32_t n) const {
  return impl_->GetBestMatches(p, threshold, n);
}

bool SpeakerEmbeddingManager::Verify(const std::string &name, const float *p,
                                     float threshold) const {
  return impl_->Verify(name, p, threshold);
}

float SpeakerEmbeddingManager::Score(const std::string &name,
                                     const float *p) const {
  return impl_->Score(name, p);
}

int32_t SpeakerEmbeddingManager::NumSpeakers() const {
  return impl_->NumSpeakers();
}

int32_t SpeakerEmbeddingManager::Dim() const { return impl_->Dim(); }

bool SpeakerEmbeddingManager::Contains(const std::string &name) const {
  return impl_->Contains(name);
}

std::vector<std::string> SpeakerEmbeddingManager::GetAllSpeakers() const {
  return impl_->GetAllSpeakers();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/speaker-embedding-manager.h
================================================
// sherpa-onnx/csrc/speaker-embedding-manager.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_MANAGER_H_

#include <memory>
#include <string>
#include <vector>

struct SpeakerMatch {
  const std::string name;
  float score;
};

namespace sherpa_onnx {

class SpeakerEmbeddingManager {
 public:
  // @param dim Embedding dimension.
  explicit SpeakerEmbeddingManager(int32_t dim);
  ~SpeakerEmbeddingManager();

  /* Add the embedding and name of a speaker to the manager.
   *
   * @param name Name of the speaker
   * @param p Pointer to the embedding. Its length is `dim`.
   * @return Return true if added successfully. Return false if it failed.
   *         At present, the only reason for a failure is that there is already
   *         a speaker with the same `name`.
   */
  bool Add(const std::string &name, const float *p) const;

  /** Add a list of embeddings of a speaker.
   *
   * @param name Name of the speaker
   * @param embedding_list A list of embeddings. Each entry should be of size
   *                       `dim`. The average of the list is the final
   *                       embedding.
   * @return Return true if added successfully. Return false if it failed.
   *         At present, the only reason for a failure is that there is already
   *         a speaker with the same `name`.
   */
  bool Add(const std::string &name,
           const std::vector<std::vector<float>> &embedding_list) const;

  /* Remove a speaker by its name.
   *
   * @param name Name of the speaker to remove.
   * @return Return true if it is removed successfully. Return false
   *         if there is no such a speaker.
   */
  bool Remove(const std::string &name) const;

  /** It is for speaker identification.
   *
   * It computes the cosine similarity between and given embedding and all
   * other embeddings and find the embedding that has the largest score
   * and the score is above or equal to threshold. Return the speaker
   * name for the embedding if found; otherwise, it returns an empty string.
   *
   * @param p The input embedding.
   * @param threshold A value between 0 and 1.
   * @param If found, return the name of the speaker. Otherwise, return an
   *        empty string.
   */
  std::string Search(const float *p, float threshold) const;

  /**
   * It is for speaker identification.
   *
   * It computes the cosine similarity between a given embedding and all
   * other embeddings and finds the embeddings that have the largest scores
   * and the scores are above or equal to the threshold. Returns a vector of
   * SpeakerMatch structures containing the speaker names and scores for the
   * embeddings if found; otherwise, returns an empty vector.
   *
   * @param p A pointer to the input embedding.
   * @param threshold A value between 0 and 1.
   * @param n The number of top matches to return.
   * @return A vector of SpeakerMatch structures. If matches are found, the
   *         vector contains the names and scores of the speakers. Otherwise,
   *         it returns an empty vector.
   */
  std::vector<SpeakerMatch> GetBestMatches(const float *p, float threshold,
                                           int32_t n) const;

  /* Check whether the input embedding matches the embedding of the input
   * speaker.
   *
   * It is for speaker verification.
   *
   * @param name The target speaker name.
   * @param p The input embedding to check.
   * @param threshold A value between 0 and 1.
   * @return Return true if it matches. Otherwise, it returns false.
   */
  bool Verify(const std::string &name, const float *p, float threshold) const;

  float Score(const std::string &name, const float *p) const;

  // Return true if the given speaker already exists; return false otherwise.
  bool Contains(const std::string &name) const;

  int32_t NumSpeakers() const;

  int32_t Dim() const;

  // Return a list of speaker names
  std::vector<std::string> GetAllSpeakers() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_MANAGER_H_


================================================
FILE: sherpa-onnx/csrc/spoken-language-identification-impl.cc
================================================
// sherpa-onnx/csrc/spoken-language-identification-impl.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/spoken-language-identification-impl.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/spoken-language-identification-whisper-impl.h"

namespace sherpa_onnx {

namespace {

enum class ModelType : std::uint8_t {
  kWhisper,
  kUnknown,
};

}

static ModelType GetModelType(char *model_data, size_t model_data_length,
                              bool debug) {
  Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
  Ort::SessionOptions sess_opts;

  auto sess = std::make_unique<Ort::Session>(env, model_data, model_data_length,
                                             sess_opts);

  Ort::ModelMetadata meta_data = sess->GetModelMetadata();
  if (debug) {
    std::ostringstream os;
    PrintModelMetadata(os, meta_data);
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
  }

  Ort::AllocatorWithDefaultOptions allocator;
  auto model_type =
      LookupCustomModelMetaData(meta_data, "model_type", allocator);
  if (model_type.empty()) {
    SHERPA_ONNX_LOGE(
        "No model_type in the metadata!\n"
        "Please make sure you have added metadata to the model.\n\n"
        "For instance, you can use\n"
        "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/whisper/"
        "export-onnx.py "
        "to add metadata to models from whisper\n");
    return ModelType::kUnknown;
  }

  if (model_type.find("whisper") == 0) {
    return ModelType::kWhisper;
  } else {
    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
    return ModelType::kUnknown;
  }
}

std::unique_ptr<SpokenLanguageIdentificationImpl>
SpokenLanguageIdentificationImpl::Create(
    const SpokenLanguageIdentificationConfig &config) {
  ModelType model_type = ModelType::kUnknown;
  {
    if (config.whisper.encoder.empty()) {
      SHERPA_ONNX_LOGE("Only whisper models are supported at present");
      exit(-1);
    }
    auto buffer = ReadFile(config.whisper.encoder);

    model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  }

  switch (model_type) {
    case ModelType::kWhisper:
      return std::make_unique<SpokenLanguageIdentificationWhisperImpl>(config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE(
          "Unknown model type for spoken language identification!");
      return nullptr;
  }

  // unreachable code
  return nullptr;
}

#if __ANDROID_API__ >= 9
std::unique_ptr<SpokenLanguageIdentificationImpl>
SpokenLanguageIdentificationImpl::Create(
    AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config) {
  ModelType model_type = ModelType::kUnknown;
  {
    if (config.whisper.encoder.empty()) {
      SHERPA_ONNX_LOGE("Only whisper models are supported at present");
      exit(-1);
    }
    auto buffer = ReadFile(mgr, config.whisper.encoder);

    model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  }

  switch (model_type) {
    case ModelType::kWhisper:
      return std::make_unique<SpokenLanguageIdentificationWhisperImpl>(mgr,
                                                                       config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE(
          "Unknown model type for spoken language identification!");
      return nullptr;
  }

  // unreachable code
  return nullptr;
}
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/spoken-language-identification-impl.h
================================================
// sherpa-onnx/csrc/spoken-language-identification-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_IMPL_H_
#define SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_IMPL_H_

#include <memory>
#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/spoken-language-identification.h"

namespace sherpa_onnx {

class SpokenLanguageIdentificationImpl {
 public:
  virtual ~SpokenLanguageIdentificationImpl() = default;

  static std::unique_ptr<SpokenLanguageIdentificationImpl> Create(
      const SpokenLanguageIdentificationConfig &config);

#if __ANDROID_API__ >= 9
  static std::unique_ptr<SpokenLanguageIdentificationImpl> Create(
      AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config);
#endif

  virtual std::unique_ptr<OfflineStream> CreateStream() const = 0;

  virtual std::string Compute(OfflineStream *s) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/spoken-language-identification-whisper-impl.h
================================================
// sherpa-onnx/csrc/spoken-language-identification-whisper-impl.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_WHISPER_IMPL_H_
#define SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_WHISPER_IMPL_H_

#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/offline-whisper-model.h"
#include "sherpa-onnx/csrc/spoken-language-identification-impl.h"
#include "sherpa-onnx/csrc/transpose.h"

namespace sherpa_onnx {

class SpokenLanguageIdentificationWhisperImpl
    : public SpokenLanguageIdentificationImpl {
 public:
  explicit SpokenLanguageIdentificationWhisperImpl(
      const SpokenLanguageIdentificationConfig &config)
      : config_(config), model_(std::make_unique<OfflineWhisperModel>(config)) {
    Check();
  }

#if __ANDROID_API__ >= 9
  SpokenLanguageIdentificationWhisperImpl(
      AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config)
      : config_(config),
        model_(std::make_unique<OfflineWhisperModel>(mgr, config)) {
    Check();
  }
#endif

  std::unique_ptr<OfflineStream> CreateStream() const override {
    return std::make_unique<OfflineStream>(WhisperTag{});
  }

  std::string Compute(OfflineStream *s) const override {
    int32_t max_num_frames = 3000;
    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    int32_t feat_dim = s->FeatureDim();
    std::vector<float> f = s->GetFrames();
    int32_t num_frames = f.size() / feat_dim;

    // we use 50 here so that there will be some zero tail paddings
    if (num_frames >= max_num_frames - 50) {
      SHERPA_ONNX_LOGE(
          "Only waves less than 30 seconds are supported. We process only the "
          "first 30 seconds and discard the remaining data");
      num_frames = max_num_frames - 50;
    }

    model_->NormalizeFeatures(f.data(), num_frames, feat_dim);

    // note that 1000 is an experience-value.
    // You can replace 1000 by other values, say, 100.
    //
    // Since we have removed the 30 seconds constraint, we need
    // tail_padding_frames so that whisper is able to detect the eot token.
    int32_t tail_padding_frames = 1000;

    if (config_.whisper.tail_paddings > 0) {
      tail_padding_frames = config_.whisper.tail_paddings;
    }

    int32_t actual_frames =
        std::min(num_frames + tail_padding_frames, max_num_frames);

    std::array<int64_t, 3> shape{1, actual_frames, feat_dim};

    Ort::Value mel = Ort::Value::CreateTensor<float>(
        model_->Allocator(), shape.data(), shape.size());

    float *p_mel = mel.GetTensorMutableData<float>();
    std::copy(f.data(), f.data() + num_frames * feat_dim, p_mel);

    std::fill_n(p_mel + num_frames * feat_dim,
                (actual_frames - num_frames) * feat_dim, 0);

    mel = Transpose12(model_->Allocator(), &mel);

    try {
      auto cross_kv = model_->ForwardEncoder(std::move(mel));
      int32_t lang_id = model_->DetectLanguage(cross_kv.first, cross_kv.second);
      const auto &id2lang = model_->GetID2Lang();
      if (id2lang.count(lang_id)) {
        return id2lang.at(lang_id);
      } else {
        SHERPA_ONNX_LOGE("Unknown language ID: %d. Return an empty string.",
                         lang_id);
        return "";
      }
    } catch (const Ort::Exception &ex) {
      SHERPA_ONNX_LOGE(
          "\n\nCaught exception:\n\n%s\n\nReturn an empty result. Number of "
          "input frames: %d, Current tail "
          "paddings: %d. If you see a lot of such exceptions, please consider "
          "using a larger --whisper-tail-paddings",
          ex.what(), num_frames, tail_padding_frames);
      return "";
    }
  }

 private:
  void Check() const {
    if (!model_->IsMultiLingual()) {
      SHERPA_ONNX_LOGE(
          "Only whisper multilingual models can be used for spoken language "
          "identification. Given: %s,%s",
          config_.whisper.encoder.c_str(), config_.whisper.decoder.c_str());
      exit(-1);
    }
  }

 private:
  SpokenLanguageIdentificationConfig config_;
  std::unique_ptr<OfflineWhisperModel> model_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_WHISPER_IMPL_H_


================================================
FILE: sherpa-onnx/csrc/spoken-language-identification.cc
================================================
// sherpa-onnx/csrc/spoken-language-identification.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/spoken-language-identification.h"

#include <memory>
#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/spoken-language-identification-impl.h"

namespace sherpa_onnx {

void SpokenLanguageIdentificationWhisperConfig::Register(ParseOptions *po) {
  po->Register(
      "whisper-encoder", &encoder,
      "Path to then encoder of a whisper multilingual model. Support only "
      "tiny, base, small, medium, large.");

  po->Register(
      "whisper-decoder", &decoder,
      "Path to the decoder of a whisper multilingual model. Support only "
      "tiny, base, small, medium, large.");

  po->Register(
      "whisper-tail-paddings", &tail_paddings,
      "Suggested value: 300 for multilingual models. "
      "Since we have removed the 30-second constraint, we need to add some "
      "tail padding frames "
      "so that whisper can detect the eot token. Leave it to -1 to use 1000");
}

bool SpokenLanguageIdentificationWhisperConfig::Validate() const {
  if (encoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --whisper-encoder");
    return false;
  }

  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("whisper encoder file '%s' does not exist",
                     encoder.c_str());
    return false;
  }

  if (decoder.empty()) {
    SHERPA_ONNX_LOGE("Please provide --whisper-decoder");
    return false;
  }

  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("whisper decoder file '%s' does not exist",
                     decoder.c_str());
    return false;
  }

  return true;
}

std::string SpokenLanguageIdentificationWhisperConfig::ToString() const {
  std::ostringstream os;

  os << "SpokenLanguageIdentificationWhisperConfig(";
  os << "encoder=\"" << encoder << "\", ";
  os << "decoder=\"" << decoder << "\", ";
  os << "tail_paddings=" << tail_paddings << ")";

  return os.str();
}

void SpokenLanguageIdentificationConfig::Register(ParseOptions *po) {
  whisper.Register(po);

  po->Register("num-threads", &num_threads,
               "Number of threads to run the neural network");

  po->Register("debug", &debug,
               "true to print model information while loading it.");

  po->Register("provider", &provider,
               "Specify a provider to use: cpu, cuda, coreml");
}

bool SpokenLanguageIdentificationConfig::Validate() const {
  if (!whisper.Validate()) {
    return false;
  }

  return true;
}

std::string SpokenLanguageIdentificationConfig::ToString() const {
  std::ostringstream os;

  os << "SpokenLanguageIdentificationConfig(";
  os << "whisper=" << whisper.ToString() << ", ";
  os << "num_threads=" << num_threads << ", ";
  os << "debug=" << (debug ? "True" : "False") << ", ";
  os << "provider=\"" << provider << "\")";

  return os.str();
}

SpokenLanguageIdentification::SpokenLanguageIdentification(
    const SpokenLanguageIdentificationConfig &config)
    : impl_(SpokenLanguageIdentificationImpl::Create(config)) {}

#if __ANDROID_API__ >= 9
SpokenLanguageIdentification::SpokenLanguageIdentification(
    AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config)
    : impl_(SpokenLanguageIdentificationImpl::Create(mgr, config)) {}
#endif

SpokenLanguageIdentification::~SpokenLanguageIdentification() = default;

std::unique_ptr<OfflineStream> SpokenLanguageIdentification::CreateStream()
    const {
  return impl_->CreateStream();
}

std::string SpokenLanguageIdentification::Compute(OfflineStream *s) const {
  return impl_->Compute(s);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/spoken-language-identification.h
================================================
// sherpa-onnx/csrc/spoken-language-identification.h
//
// Copyright (c)  2024  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_H_
#define SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_H_

#include <memory>
#include <string>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#include "sherpa-onnx/csrc/offline-stream.h"
#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct SpokenLanguageIdentificationWhisperConfig {
  // Requires a multi-lingual whisper model.
  // That is, it supports only tiny, base, small, medium, large.
  // Note: It does NOT support tiny.en, base.en, small.en, medium.en
  std::string encoder;
  std::string decoder;

  // Number of tail padding frames.
  //
  // Since we remove the 30-second constraint, we need to add some paddings
  // at the end.
  //
  // Recommended values:
  //   - 50 for English models
  //   - 300 for multilingual models
  int32_t tail_paddings = -1;

  SpokenLanguageIdentificationWhisperConfig() = default;

  SpokenLanguageIdentificationWhisperConfig(const std::string &encoder,
                                            const std::string &decoder,
                                            int32_t tail_paddings)
      : encoder(encoder), decoder(decoder), tail_paddings(tail_paddings) {}

  void Register(ParseOptions *po);
  bool Validate() const;
  std::string ToString() const;
};

struct SpokenLanguageIdentificationConfig {
  SpokenLanguageIdentificationWhisperConfig whisper;

  int32_t num_threads = 1;
  bool debug = false;
  std::string provider = "cpu";

  SpokenLanguageIdentificationConfig() = default;

  SpokenLanguageIdentificationConfig(
      const SpokenLanguageIdentificationWhisperConfig &whisper,
      int32_t num_threads, bool debug, const std::string &provider)
      : whisper(whisper),
        num_threads(num_threads),
        debug(debug),
        provider(provider) {}

  void Register(ParseOptions *po);
  bool Validate() const;
  std::string ToString() const;
};

class SpokenLanguageIdentificationImpl;

class SpokenLanguageIdentification {
 public:
  explicit SpokenLanguageIdentification(
      const SpokenLanguageIdentificationConfig &config);

#if __ANDROID_API__ >= 9
  SpokenLanguageIdentification(
      AAssetManager *mgr, const SpokenLanguageIdentificationConfig &config);
#endif

  ~SpokenLanguageIdentification();

  // Create a stream to accept audio samples and compute features
  std::unique_ptr<OfflineStream> CreateStream() const;

  // Return a string containing the language, e.g., en, zh, de,
  // etc.
  // Note: en is for English, zh is for Chinese, de is for German, etc.
  std::string Compute(OfflineStream *s) const;

 private:
  std::unique_ptr<SpokenLanguageIdentificationImpl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_H_


================================================
FILE: sherpa-onnx/csrc/stack-test.cc
================================================
// sherpa-onnx/csrc/stack-test.cc
//
// Copyright (c) 2023 Jingzhao Ou (jingzhao.ou@gmail.com)

#include "sherpa-onnx/csrc/stack.h"

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

TEST(Stack, Test1DTensors) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 1> a_shape{3};
  std::array<int64_t, 1> b_shape{3};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());
  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Stack(allocator, {&a, &b}, 0);

  Print1D(&a);
  Print1D(&b);
  Print2D(&ans);

  const float *pans = ans.GetTensorData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0]); ++i) {
    EXPECT_EQ(pa[i], pans[i]);
  }

  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0]); ++i) {
    EXPECT_EQ(pb[i], pans[i + a_shape[0]]);
  }
}

TEST(Stack, Test2DTensorsDim0) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 2> a_shape{2, 3};
  std::array<int64_t, 2> b_shape{2, 3};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0] * a_shape[1]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0] * b_shape[1]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Stack(allocator, {&a, &b}, 0);

  Print2D(&a);
  Print2D(&b);
  Print3D(&ans);

  const float *pans = ans.GetTensorData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0] * a_shape[1]); ++i) {
    EXPECT_EQ(pa[i], pans[i]);
  }
  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0] * b_shape[1]); ++i) {
    EXPECT_EQ(pb[i], pans[i + a_shape[0] * a_shape[1]]);
  }
}

TEST(Stack, Test2DTensorsDim1) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 2> a_shape{4, 3};
  std::array<int64_t, 2> b_shape{4, 3};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0] * a_shape[1]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0; i != static_cast<int32_t>(b_shape[0] * b_shape[1]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Stack(allocator, {&a, &b}, 1);

  Print2D(&a);
  Print2D(&b);
  Print3D(&ans);

  const float *pans = ans.GetTensorData<float>();

  for (int32_t r = 0; r != static_cast<int32_t>(a_shape[0]); ++r) {
    for (int32_t i = 0; i != static_cast<int32_t>(a_shape[1]);
         ++i, ++pa, ++pans) {
      EXPECT_EQ(*pa, *pans);
    }

    for (int32_t i = 0; i != static_cast<int32_t>(b_shape[1]);
         ++i, ++pb, ++pans) {
      EXPECT_EQ(*pb, *pans);
    }
  }
}

TEST(Stack, Test3DTensorsDim0) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 3> a_shape{2, 3, 2};
  std::array<int64_t, 3> b_shape{2, 3, 2};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0;
       i != static_cast<int32_t>(a_shape[0] * a_shape[1] * a_shape[2]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0;
       i != static_cast<int32_t>(b_shape[0] * b_shape[1] * b_shape[2]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Stack(allocator, {&a, &b}, 0);

  const float *pans = ans.GetTensorData<float>();
  for (int32_t i = 0;
       i != static_cast<int32_t>(a_shape[0] * a_shape[1] * a_shape[2]); ++i) {
    EXPECT_EQ(pa[i], pans[i]);
  }
  for (int32_t i = 0;
       i != static_cast<int32_t>(b_shape[0] * b_shape[1] * b_shape[2]); ++i) {
    EXPECT_EQ(pb[i], pans[i + a_shape[0] * a_shape[1] * a_shape[2]]);
  }

  Print3D(&a);
  Print3D(&b);
  Print4D(&ans);
}

TEST(Stack, Test3DTensorsDim1) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 3> a_shape{2, 2, 3};
  std::array<int64_t, 3> b_shape{2, 2, 3};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0;
       i != static_cast<int32_t>(a_shape[0] * a_shape[1] * a_shape[2]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0;
       i != static_cast<int32_t>(b_shape[0] * b_shape[1] * b_shape[2]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Stack(allocator, {&a, &b}, 1);

  const float *pans = ans.GetTensorData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0]); ++i) {
    for (int32_t k = 0; k != static_cast<int32_t>(a_shape[1] * a_shape[2]);
         ++k, ++pa, ++pans) {
      EXPECT_EQ(*pa, *pans);
    }

    for (int32_t k = 0; k != static_cast<int32_t>(b_shape[1] * b_shape[2]);
         ++k, ++pb, ++pans) {
      EXPECT_EQ(*pb, *pans);
    }
  }

  Print3D(&a);
  Print3D(&b);
  Print4D(&ans);
}

TEST(Stack, Test3DTensorsDim2) {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 3> a_shape{2, 3, 4};
  std::array<int64_t, 3> b_shape{2, 3, 4};

  Ort::Value a = Ort::Value::CreateTensor<float>(allocator, a_shape.data(),
                                                 a_shape.size());

  Ort::Value b = Ort::Value::CreateTensor<float>(allocator, b_shape.data(),
                                                 b_shape.size());

  float *pa = a.GetTensorMutableData<float>();
  float *pb = b.GetTensorMutableData<float>();
  for (int32_t i = 0;
       i != static_cast<int32_t>(a_shape[0] * a_shape[1] * a_shape[2]); ++i) {
    pa[i] = i;
  }
  for (int32_t i = 0;
       i != static_cast<int32_t>(b_shape[0] * b_shape[1] * b_shape[2]); ++i) {
    pb[i] = i + 10;
  }

  Ort::Value ans = Stack(allocator, {&a, &b}, 2);

  const float *pans = ans.GetTensorData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(a_shape[0] * a_shape[1]); ++i) {
    for (int32_t k = 0; k != static_cast<int32_t>(a_shape[2]);
         ++k, ++pa, ++pans) {
      EXPECT_EQ(*pa, *pans);
    }

    for (int32_t k = 0; k != static_cast<int32_t>(b_shape[2]);
         ++k, ++pb, ++pans) {
      EXPECT_EQ(*pb, *pans);
    }
  }

  Print3D(&a);
  Print3D(&b);
  Print4D(&ans);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/stack.cc
================================================
// sherpa-onnx/csrc/stack.cc
//
// Copyright (c) 2023 Jingzhao Ou (jingzhao.ou@gmail.com)

#include "sherpa-onnx/csrc/stack.h"

#include <algorithm>
#include <functional>
#include <numeric>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

static bool Compare(const std::vector<int64_t> &a,
                    const std::vector<int64_t> &b) {
  if (a.size() != b.size()) return false;

  for (int32_t i = 0; i != static_cast<int32_t>(a.size()); ++i) {
    if (a[i] != b[i]) return false;
  }

  return true;
}

static void PrintShape(const std::vector<int64_t> &a) {
  for (auto i : a) {
    SHERPA_ONNX_LOGE("%d ", static_cast<int32_t>(i));
  }
  SHERPA_ONNX_LOGE("\n");
}

template <typename T /*=float*/>
Ort::Value Stack(OrtAllocator *allocator,
                 const std::vector<const Ort::Value *> &values, int32_t dim) {
  std::vector<int64_t> v0_shape =
      values[0]->GetTensorTypeAndShapeInfo().GetShape();

  for (int32_t i = 1; i != static_cast<int32_t>(values.size()); ++i) {
    auto s = values[i]->GetTensorTypeAndShapeInfo().GetShape();
    bool ret = Compare(v0_shape, s);
    if (!ret) {
      SHERPA_ONNX_LOGE("Incorrect shape in Stack !\n");

      SHERPA_ONNX_LOGE("Shape for tensor 0: ");
      PrintShape(v0_shape);

      SHERPA_ONNX_LOGE("Shape for tensor %d: ", i);
      PrintShape(s);

      exit(-1);
    }
  }

  std::vector<int64_t> ans_shape;
  ans_shape.reserve(v0_shape.size() + 1);
  ans_shape.insert(ans_shape.end(), v0_shape.data(), v0_shape.data() + dim);
  ans_shape.push_back(values.size());
  ans_shape.insert(ans_shape.end(), v0_shape.data() + dim,
                   v0_shape.data() + v0_shape.size());

  auto leading_size = static_cast<int32_t>(std::accumulate(
      v0_shape.begin(), v0_shape.begin() + dim, 1, std::multiplies<int64_t>()));

  auto trailing_size = static_cast<int32_t>(std::accumulate(
      v0_shape.begin() + dim, v0_shape.end(), 1, std::multiplies<int64_t>()));

  Ort::Value ans = Ort::Value::CreateTensor<T>(allocator, ans_shape.data(),
                                               ans_shape.size());
  T *dst = ans.GetTensorMutableData<T>();

  for (int32_t i = 0; i != leading_size; ++i) {
    for (auto value : values) {
      const T *src = value->GetTensorData<T>();
      src += i * trailing_size;

      std::copy(src, src + trailing_size, dst);
      dst += trailing_size;
    }
  }

  return ans;
}

template Ort::Value Stack<float>(OrtAllocator *allocator,
                                 const std::vector<const Ort::Value *> &values,
                                 int32_t dim);

template Ort::Value Stack<int64_t>(
    OrtAllocator *allocator, const std::vector<const Ort::Value *> &values,
    int32_t dim);

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/stack.h
================================================
// sherpa-onnx/csrc/stack.h
//
// Copyright (c) 2023 Jingzhao Ou (jingzhao.ou@gmail.com)

#ifndef SHERPA_ONNX_CSRC_STACK_H_
#define SHERPA_ONNX_CSRC_STACK_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

/** Stack a list of tensors along the given dim.
 *
 * @param allocator Allocator to allocate space for the returned tensor
 * @param values  Pointer to a list of tensors. The shape of the tensor must
 *                be the same except on the dim to be stacked.
 * @param dim  The dim along which to concatenate the input tensors
 *
 * @return Return the stacked tensor
 */
template <typename T = float>
Ort::Value Stack(OrtAllocator *allocator,
                 const std::vector<const Ort::Value *> &values, int32_t dim);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_STACK_H_


================================================
FILE: sherpa-onnx/csrc/symbol-table.cc
================================================
// sherpa-onnx/csrc/symbol-table.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/symbol-table.h"

#include <algorithm>
#include <cassert>
#include <cctype>
#include <fstream>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/base64-decode.h"
#include "sherpa-onnx/csrc/bbpe.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/lexicon.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

namespace {
// copied from
// https://stackoverflow.com/questions/216823/how-to-trim-a-stdstring
const char *ws = " \t\n\r\f\v";

// trim from end of string (right)
inline void TrimRight(std::string *s, const char *t = ws) {
  s->erase(s->find_last_not_of(t) + 1);
}

// trim from beginning of string (left)
inline void TrimLeft(std::string *s, const char *t = ws) {
  s->erase(0, s->find_first_not_of(t));
}

// trim from both ends of string (right then left)
inline void Trim(std::string *s, const char *t = ws) {
  TrimRight(s, t);
  TrimLeft(s, t);
}

bool IsByteBPE(const char *s, int32_t n) {
  const uint8_t *p = reinterpret_cast<const uint8_t *>(s);
  if (n >= 3 && p[0] == 0xe2 && p[1] == 0x96 && p[2] == 0x81) {
    return IsByteBPE(s + 3, n - 3);
  }

  for (int32_t i = 0; i != n; ++i) {
    if (p[i] > 0xc6) {
      return false;
    }
  }

  return true;
}

bool IsByteBPE(const std::unordered_map<std::string, int32_t> &sym2id) {
  uint8_t max_v = 0;
  for (const auto &p : sym2id) {
    const auto &s = p.first;
    if (!IsByteBPE(s.c_str(), s.size())) {
      return false;
    }

    uint8_t m = 0;
    if (s.size() >= 3) {
      const uint8_t *p = reinterpret_cast<const uint8_t *>(s.c_str());

      if (p[0] == 0xe2 && p[1] == 0x96 && p[2] == 0x81) {
        if (s.size() > 3) {
          m = *std::max_element(
              reinterpret_cast<const uint8_t *>(s.data()) + 3,
              reinterpret_cast<const uint8_t *>(s.data()) + s.size());
        } else {
          m = 0;
        }
      } else {
        m = *std::max_element(
            reinterpret_cast<const uint8_t *>(s.data()),
            reinterpret_cast<const uint8_t *>(s.data()) + s.size());
      }
    } else {
      m = *std::max_element(
          reinterpret_cast<const uint8_t *>(s.data()),
          reinterpret_cast<const uint8_t *>(s.data()) + s.size());
    }

    max_v = (m > max_v) ? m : max_v;
  }

  return static_cast<uint8_t>(max_v) == 0xc6;
}

}  // namespace

std::unordered_map<std::string, int32_t> ReadTokens(
    std::istream &is,
    std::unordered_map<int32_t, std::string> *id2token /*= nullptr*/) {
  std::unordered_map<std::string, int32_t> token2id;

  std::string line;

  std::string sym;
  int32_t id = -1;
  while (std::getline(is, line)) {
    Trim(&line);
    std::istringstream iss(line);
    iss >> sym;
    if (iss.eof()) {
      id = atoi(sym.c_str());
      sym = " ";
    } else {
      iss >> id;
    }

    // eat the trailing \r\n on windows
    iss >> std::ws;
    if (!iss.eof()) {
      SHERPA_ONNX_LOGE("Error: %s", line.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

#if 0
    if (token2id.count(sym)) {
      SHERPA_ONNX_LOGE("Duplicated token %s. Line %s. Existing ID: %d",
                       sym.c_str(), line.c_str(), token2id.at(sym));
      SHERPA_ONNX_EXIT(-1);
    }
#endif
    if (id2token) {
      id2token->insert({id, sym});
    }

    token2id.insert({std::move(sym), id});
  }

  return token2id;
}

SymbolTable::SymbolTable(const std::string &filename, bool is_file) {
  if (is_file) {
    std::ifstream is(filename);
    Init(is);
  } else {
    std::istringstream iss(filename);
    Init(iss);
  }
}

template <typename Manager>
SymbolTable::SymbolTable(Manager *mgr, const std::string &filename) {
  auto buf = ReadFile(mgr, filename);

  std::istringstream is(std::string(buf.data(), buf.size()));
  Init(is);
}

void SymbolTable::Init(std::istream &is) {
  sym2id_ = ReadTokens(is, &id2sym_);
  is_bbpe_ = IsByteBPE(sym2id_);

  if (sym2id_.count("<0x00>") && sym2id_.count("<0xFF>") &&
      ((sym2id_.at("<0xFF>") - sym2id_.at("<0x00>")) == 255)) {
    is_bpe_with_byte_fallback_ = true;
    id_for_0x00_ = sym2id_.at("<0x00>");
  }
}

std::string SymbolTable::ToString() const {
  std::ostringstream os;
  char sep = ' ';
  for (const auto &p : sym2id_) {
    os << p.first << sep << p.second << "\n";
  }
  return os.str();
}

const std::string SymbolTable::operator[](int32_t id) const {
  std::string sym = id2sym_.at(id);
  if (sym.size() >= 3 && !is_bbpe_) {
    // For BPE-based models, we replace ▁ with a space
    // Unicode 9601, hex 0x2581, utf8 0xe29681
    const uint8_t *p = reinterpret_cast<const uint8_t *>(sym.c_str());
    if (p[0] == 0xe2 && p[1] == 0x96 && p[2] == 0x81) {
      sym = sym.replace(0, 3, " ");
    }
  }

  // for BPE with byte_fallback
  // id 0 is blank, id 1 is sos/eos, id 2 is unk
  //
  // Note: For moonshine models, 0 is <unk>, 1, is <s>, 2 is</s>
  if (is_bpe_with_byte_fallback_ && sym.size() == 6 && sym[0] == '<' &&
      sym[1] == '0' && sym[2] == 'x' && sym[5] == '>') {
    std::ostringstream os;
    os << std::hex << std::uppercase << (id - id_for_0x00_);

    if (std::string(sym.data() + 3, sym.data() + 5) == os.str()) {
      uint8_t i = id - id_for_0x00_;
      sym = std::string(&i, &i + 1);
    }
  }
  return sym;
}

int32_t SymbolTable::operator[](const std::string &sym) const {
  return sym2id_.at(sym);
}

bool SymbolTable::Contains(int32_t id) const { return id2sym_.count(id) != 0; }

bool SymbolTable::Contains(const std::string &sym) const {
  return sym2id_.count(sym) != 0;
}

std::ostream &operator<<(std::ostream &os, const SymbolTable &symbol_table) {
  return os << symbol_table.ToString();
}

void SymbolTable::ApplyBase64Decode() {
  sym2id_.clear();
  for (auto &p : id2sym_) {
    if (p.second == " ") {
      // for FunASR nano models, there is an empty string in the tokens.txt,
      // which is converted to " " while reading it in sherpa-onnx. We convert
      // it back to "" here
      p.second = "";
    } else {
      p.second = Base64Decode(p.second);
    }
    sym2id_[p.second] = p.first;
  }
}

std::string SymbolTable::DecodeByteBpe(const std::string &text) const {
  if (!is_bbpe_) {
    return text;
  }
  auto v = SplitUtf8(text);

  const auto &bbpe_table = GetByteBpeTable();
  std::string ans;
  for (const auto &s : v) {
    if (s == "▁") {
      if (!ans.empty() && ans.back() != ' ' && std::isprint(ans.back())) {
        ans.push_back(' ');
      }
    } else if (bbpe_table.count(s)) {
      ans.push_back(bbpe_table.at(s));
    } else if (std::isprint(s[0])) {
      ans.append(s);
    } else {
      // Should not happen
      SHERPA_ONNX_LOGE("Skip OOV: %s from %s", s.c_str(), text.c_str());
    }
  }

  // TODO(fangjun): Filter invalid utf-8 sequences
  return ans;
}

#if __ANDROID_API__ >= 9
template SymbolTable::SymbolTable(AAssetManager *mgr,
                                  const std::string &filename);
#endif

#if __OHOS__
template SymbolTable::SymbolTable(NativeResourceManager *mgr,
                                  const std::string &filename);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/symbol-table.h
================================================
// sherpa-onnx/csrc/symbol-table.h
//
// Copyright (c)  2022-2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_
#define SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_

#include <istream>
#include <string>
#include <unordered_map>
#include <vector>

namespace sherpa_onnx {

// The same token can be mapped to different integer IDs, so
// we need an id2token argument here.
std::unordered_map<std::string, int32_t> ReadTokens(
    std::istream &is,
    std::unordered_map<int32_t, std::string> *id2token = nullptr);

std::vector<int32_t> ConvertTokensToIds(
    const std::unordered_map<std::string, int32_t> &token2id,
    const std::vector<std::string> &tokens);

/// It manages mapping between symbols and integer IDs.
class SymbolTable {
 public:
  SymbolTable() = default;
  /// Construct a symbol table from a file or from a buffered string.
  /// Each line in the file contains two fields:
  ///
  ///    sym ID
  ///
  /// Fields are separated by space(s).
  explicit SymbolTable(const std::string &filename, bool is_file = true);

  template <typename Manager>
  SymbolTable(Manager *mgr, const std::string &filename);

  /// Return a string representation of this symbol table
  std::string ToString() const;

  /// Return the symbol corresponding to the given ID.
  const std::string operator[](int32_t id) const;
  /// Return the ID corresponding to the given symbol.
  int32_t operator[](const std::string &sym) const;

  /// Return true if there is a symbol with the given ID.
  bool Contains(int32_t id) const;

  /// Return true if there is a given symbol in the symbol table.
  bool Contains(const std::string &sym) const;

  // for tokens.txt from Whisper
  void ApplyBase64Decode();

  int32_t NumSymbols() const { return id2sym_.size(); }

  std::string DecodeByteBpe(const std::string &text) const;

  bool IsByteBpe() const { return is_bbpe_; }

 private:
  void Init(std::istream &is);

 private:
  std::unordered_map<std::string, int32_t> sym2id_;
  std::unordered_map<int32_t, std::string> id2sym_;

  // see https://github.com/k2-fsa/sherpa-onnx/issues/2524
  bool is_bpe_with_byte_fallback_ = false;

  // used only when is_bpe_with_byte_fallback_ is true. It is the ID
  // of <0x00> in tokens.txt
  int32_t id_for_0x00_ = 0;

  // true for byte BPE. false for non byte BPE.
  bool is_bbpe_ = false;
};

std::ostream &operator<<(std::ostream &os, const SymbolTable &symbol_table);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_


================================================
FILE: sherpa-onnx/csrc/tee-stream.h
================================================
// Code in this file is copied and modified from
// https://wordaligned.org/articles/cpp-streambufs

#ifndef SHERPA_ONNX_CSRC_TEE_STREAM_H_
#define SHERPA_ONNX_CSRC_TEE_STREAM_H_
#include <ostream>
#include <streambuf>
#include <string>

namespace sherpa_onnx {

template <typename char_type, typename traits = std::char_traits<char_type>>
class basic_teebuf : public std::basic_streambuf<char_type, traits> {
 public:
  using int_type = typename traits::int_type;

  basic_teebuf(std::basic_streambuf<char_type, traits> *sb1,
               std::basic_streambuf<char_type, traits> *sb2)
      : sb1(sb1), sb2(sb2) {}

 private:
  int sync() override {
    int const r1 = sb1->pubsync();
    int const r2 = sb2->pubsync();
    return r1 == 0 && r2 == 0 ? 0 : -1;
  }

  int_type overflow(int_type c) override {
    int_type const eof = traits::eof();

    if (traits::eq_int_type(c, eof)) {
      return traits::not_eof(c);
    } else {
      char_type const ch = traits::to_char_type(c);
      int_type const r1 = sb1->sputc(ch);
      int_type const r2 = sb2->sputc(ch);

      return traits::eq_int_type(r1, eof) || traits::eq_int_type(r2, eof) ? eof
                                                                          : c;
    }
  }

 private:
  std::basic_streambuf<char_type, traits> *sb1;
  std::basic_streambuf<char_type, traits> *sb2;
};

using teebuf = basic_teebuf<char>;

class TeeStream : public std::ostream {
 public:
  TeeStream(std::ostream &o1, std::ostream &o2)
      : std::ostream(&tbuf), tbuf(o1.rdbuf(), o2.rdbuf()) {}

 private:
  teebuf tbuf;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_TEE_STREAM_H_


================================================
FILE: sherpa-onnx/csrc/ten-vad-model-config.cc
================================================
// sherpa-onnx/csrc/ten-vad-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/ten-vad-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void TenVadModelConfig::Register(ParseOptions *po) {
  po->Register("ten-vad-model", &model, "Path to TEN VAD ONNX model.");

  po->Register("ten-vad-threshold", &threshold,
               "Speech threshold. TEN VAD outputs speech probabilities for "
               "each audio chunk, probabilities ABOVE this value are "
               "considered as SPEECH. It is better to tune this parameter for "
               "each dataset separately, but lazy "
               "0.5 is pretty good for most datasets.");

  po->Register("ten-vad-min-silence-duration", &min_silence_duration,
               "In seconds.  In the end of each speech chunk wait for "
               "--ten-vad-min-silence-duration seconds before separating it");

  po->Register("ten-vad-min-speech-duration", &min_speech_duration,
               "In seconds.  In the end of each silence chunk wait for "
               "--ten-vad-min-speech-duration seconds before separating it");

  po->Register(
      "ten-vad-max-speech-duration", &max_speech_duration,
      "In seconds. If a speech segment is longer than this value, then we "
      "increase the threshold to 0.9. After finishing detecting the segment, "
      "the threshold value is reset to its original value.");

  po->Register(
      "ten-vad-window-size", &window_size,
      "In samples. Audio chunks of --ten-vad-window-size samples are fed "
      "to the ten VAD model. WARNING! Please use 160 or 256 ");
}

bool TenVadModelConfig::Validate() const {
  if (model.empty()) {
    SHERPA_ONNX_LOGE("Please provide --ten-vad-model");
    return false;
  }

  if (!FileExists(model)) {
    SHERPA_ONNX_LOGE("TEN vad model file '%s' does not exist", model.c_str());
    return false;
  }

  if (threshold < 0.01) {
    SHERPA_ONNX_LOGE(
        "Please use a larger value for --ten-vad-threshold. Given: %f",
        threshold);
    return false;
  }

  if (threshold >= 1) {
    SHERPA_ONNX_LOGE(
        "Please use a smaller value for --ten-vad-threshold. Given: %f",
        threshold);
    return false;
  }

  if (min_silence_duration <= 0) {
    SHERPA_ONNX_LOGE(
        "Please use a larger value for --ten-vad-min-silence-duration. "
        "Given: "
        "%f",
        min_silence_duration);
    return false;
  }

  if (min_speech_duration <= 0) {
    SHERPA_ONNX_LOGE(
        "Please use a larger value for --ten-vad-min-speech-duration. "
        "Given: "
        "%f",
        min_speech_duration);
    return false;
  }

  if (max_speech_duration <= 0) {
    SHERPA_ONNX_LOGE(
        "Please use a larger value for --ten-vad-max-speech-duration. "
        "Given: "
        "%f",
        max_speech_duration);
    return false;
  }

  return true;
}

std::string TenVadModelConfig::ToString() const {
  std::ostringstream os;

  os << "TenVadModelConfig(";
  os << "model=\"" << model << "\", ";
  os << "threshold=" << threshold << ", ";
  os << "min_silence_duration=" << min_silence_duration << ", ";
  os << "min_speech_duration=" << min_speech_duration << ", ";
  os << "max_speech_duration=" << max_speech_duration << ", ";
  os << "window_size=" << window_size << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/ten-vad-model-config.h
================================================
// sherpa-onnx/csrc/ten-vad-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_TEN_VAD_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_TEN_VAD_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct TenVadModelConfig {
  std::string model;

  // threshold to classify a segment as speech
  //
  // If the predicted probability of a segment is larger than this
  // value, then it is classified as speech.
  float threshold = 0.5;

  float min_silence_duration = 0.5;  // in seconds

  float min_speech_duration = 0.25;  // in seconds

  // 160 or 256
  int32_t window_size = 256;  // in samples

  // If a speech segment is longer than this value, then we increase
  // the threshold to 0.9. After finishing detecting the segment,
  // the threshold value is reset to its original value.
  float max_speech_duration = 20;  // in seconds

  TenVadModelConfig() = default;

  void Register(ParseOptions *po);

  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_TEN_VAD_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/ten-vad-model.cc
================================================
// sherpa-onnx/csrc/ten-vad-model.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/ten-vad-model.h"

#include <algorithm>
#include <cmath>
#include <cstring>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "Eigen/Dense"
#include "kaldi-native-fbank/csrc/mel-computations.h"
#include "kaldi-native-fbank/csrc/rfft.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

class TenVadModel::Impl {
 public:
  explicit Impl(const VadModelConfig &config)
      : config_(config),
        rfft_(1024),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        sample_rate_(config.sample_rate) {
    auto buf = ReadFile(config.ten_vad.model);
    Init(buf.data(), buf.size());
  }

  template <typename Manager>
  Impl(Manager *mgr, const VadModelConfig &config)
      : config_(config),
        rfft_(1024),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config)),
        allocator_{},
        sample_rate_(config.sample_rate) {
    auto buf = ReadFile(mgr, config.ten_vad.model);
    Init(buf.data(), buf.size());
  }

  float Run(const float *samples, int32_t n) {
    ComputeFeatures(samples, n);

    auto memory_info =
        Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

    std::array<int64_t, 3> x_shape = {1, 3, 41};

    Ort::Value x = Ort::Value::CreateTensor(memory_info, last_features_.data(),
                                            last_features_.size(),
                                            x_shape.data(), x_shape.size());

    std::vector<Ort::Value> inputs;
    inputs.reserve(input_names_.size());

    inputs.push_back(std::move(x));
    for (auto &s : states_) {
      inputs.push_back(std::move(s));
    }

    auto out =
        sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
                   output_names_ptr_.data(), output_names_ptr_.size());

    for (int32_t i = 1; i != static_cast<int32_t>(output_names_.size()); ++i) {
      states_[i - 1] = std::move(out[i]);
    }

    float prob = out[0].GetTensorData<float>()[0];

    return prob;
  }
  void Reset() {
    triggered_ = false;
    current_sample_ = 0;
    temp_start_ = 0;
    temp_end_ = 0;

    last_sample_ = 0;

    last_features_.resize(3 * 41);
    std::fill(last_features_.begin(), last_features_.end(), 0.0f);
    tmp_samples_.resize(1024);

    ResetStates();
  }

  bool IsSpeech(const float *samples, int32_t n) {
    if (n != WindowSize()) {
      SHERPA_ONNX_LOGE("n: %d != window_size: %d", n, WindowSize());
      SHERPA_ONNX_EXIT(-1);
    }

    float prob = Run(samples, n);

    float threshold = config_.ten_vad.threshold;

    current_sample_ += config_.ten_vad.window_size;

    if (prob > threshold && temp_end_ != 0) {
      temp_end_ = 0;
    }

    if (prob > threshold && temp_start_ == 0) {
      // start speaking, but we require that it must satisfy
      // min_speech_duration
      temp_start_ = current_sample_;
      return false;
    }

    if (prob > threshold && temp_start_ != 0 && !triggered_) {
      if (current_sample_ - temp_start_ < min_speech_samples_) {
        return false;
      }

      triggered_ = true;

      return true;
    }

    if ((prob < threshold) && !triggered_) {
      // silence
      temp_start_ = 0;
      temp_end_ = 0;
      return false;
    }

    if ((prob > threshold - 0.15) && triggered_) {
      // speaking
      return true;
    }

    if ((prob > threshold) && !triggered_) {
      // start speaking
      triggered_ = true;

      return true;
    }

    if ((prob < threshold) && triggered_) {
      // stop to speak
      if (temp_end_ == 0) {
        temp_end_ = current_sample_;
      }

      if (current_sample_ - temp_end_ < min_silence_samples_) {
        // continue speaking
        return true;
      }
      // stopped speaking
      temp_start_ = 0;
      temp_end_ = 0;
      triggered_ = false;
      return false;
    }

    return false;
  }

  int32_t WindowShift() const { return config_.ten_vad.window_size; }

  int32_t WindowSize() const { return config_.ten_vad.window_size; }

  int32_t MinSilenceDurationSamples() const { return min_silence_samples_; }

  int32_t MinSpeechDurationSamples() const { return min_speech_samples_; }

  void SetMinSilenceDuration(float s) {
    min_silence_samples_ = sample_rate_ * s;
  }

  void SetThreshold(float threshold) { config_.ten_vad.threshold = threshold; }

 private:
  void Init(void *model_data, size_t model_data_length) {
    if (sample_rate_ != 16000) {
      SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d",
                       config_.sample_rate);
      SHERPA_ONNX_EXIT(-1);
    }

    if (config_.ten_vad.window_size > 768) {
      SHERPA_ONNX_LOGE("Windows size %d for ten-vad is too large",
                       config_.ten_vad.window_size);
      SHERPA_ONNX_EXIT(-1);
    }

    min_silence_samples_ = sample_rate_ * config_.ten_vad.min_silence_duration;

    min_speech_samples_ = sample_rate_ * config_.ten_vad.min_speech_duration;

    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    InitMelBanks();

    Check();

    Reset();
  }

  void ResetStates() {
    std::array<int64_t, 2> shape{1, 64};

    states_.clear();
    states_.reserve(4);
    for (int32_t i = 0; i != 4; ++i) {
      Ort::Value s = Ort::Value::CreateTensor<float>(allocator_, shape.data(),
                                                     shape.size());

      Fill<float>(&s, 0);
      states_.push_back(std::move(s));
    }
  }

  void InitMelBanks() {
    knf::FrameExtractionOptions frame_opts;

    // 16 kHz, so num_fft is 16000*64/1000 = 1024
    frame_opts.frame_length_ms = 64;

    knf::MelBanksOptions mel_opts;
    mel_opts.is_librosa = true;
    mel_opts.norm = "";
    mel_opts.use_slaney_mel_scale = true;
    mel_opts.floor_to_int_bin = true;
    mel_opts.low_freq = 0;
    mel_opts.high_freq = 8000;
    mel_opts.num_bins = 40;

    mel_banks_ = std::make_unique<knf::MelBanks>(mel_opts, frame_opts, 1.0f);

    features_.resize(41);
  }

  void Check() {
    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---ten-vad---\n";
      PrintModelMetadata(os, meta_data);
#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }
    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below

    std::string model_type;
    SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(model_type, "model_type");

    if (model_type.empty()) {
      SHERPA_ONNX_LOGE(
          "Please download ten-vad.onnx or ten-vad.int8.onnx from\n"
          "https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models"
          "\nWe have added meta data to the original ten-vad.onnx from\n"
          "https://github.com/TEN-framework/ten-vad");
      SHERPA_ONNX_EXIT(-1);
    }

    if (model_type != "ten-vad") {
      SHERPA_ONNX_LOGE("Expect model type 'ten-vad', given '%s'",
                       model_type.c_str());
      SHERPA_ONNX_EXIT(-1);
    }

    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(mean_, "mean");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(inv_stddev_, "inv_stddev");
    SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(window_, "window");

    if (mean_.size() != 41) {
      SHERPA_ONNX_LOGE(
          "Incorrect size of the mean vector. Given %d, expected 41",
          static_cast<int32_t>(mean_.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (inv_stddev_.size() != 41) {
      SHERPA_ONNX_LOGE(
          "Incorrect size of the inv_stddev vector. Given %d, expected 41",
          static_cast<int32_t>(inv_stddev_.size()));
      SHERPA_ONNX_EXIT(-1);
    }

    if (window_.size() != 768) {
      SHERPA_ONNX_LOGE(
          "Incorrect size of the window vector. Given %d, expected 768",
          static_cast<int32_t>(window_.size()));
      SHERPA_ONNX_EXIT(-1);
    }
  }

  static void Scale(const float *samples, int32_t n, float *out) {
    Eigen::Map<const Eigen::ArrayXf> input(samples, n);
    Eigen::Map<Eigen::ArrayXf> output(out, n);
    constexpr float kScale = 32768.0f;
    output = input * kScale;
  }

  void Preemphasis(const float *samples, int32_t n, float *out) {
    float t = samples[n - 1];

    for (int32_t i = n - 1; i > 0; --i) {
      out[i] = samples[i] - 0.97 * samples[i - 1];
    }

    out[0] = samples[0] - 0.97 * last_sample_;

    last_sample_ = t;
  }

  static void ApplyWindow(const float *samples, const float *window, int32_t n,
                          float *out) {
    Eigen::Map<const Eigen::ArrayXf> samp_vec(samples, n);
    Eigen::Map<const Eigen::ArrayXf> win_vec(window, n);
    Eigen::Map<Eigen::ArrayXf> out_vec(out, n);
    out_vec = samp_vec * win_vec;
  }

  static void ComputePowerSpectrum(const float *fft_bins, int32_t n,
                                   float *out) {
    out[0] = fft_bins[0] * fft_bins[0];
    out[n - 1] = fft_bins[1] * fft_bins[1];

    for (int32_t i = 1; i < n / 2; ++i) {
      float real = fft_bins[2 * i];
      float imag = fft_bins[2 * i + 1];
      out[i] = real * real + imag * imag;
    }
  }

  static void LogMel(const float *in, int32_t n, float *out) {
    Eigen::Map<const Eigen::ArrayXf> input(in, n);
    Eigen::Map<Eigen::ArrayXf> output(out, n);
    // 20.79441541679836 is log(32768*32768)
    constexpr float kLogScale = 20.79441541679836f;
    output = (input + 1e-10f).log() - kLogScale;
  }

  void ApplyNormalization(const float *in, float *out) const {
    int32_t dim = static_cast<int32_t>(mean_.size());

    Eigen::Map<const Eigen::ArrayXf> input(in, dim);
    Eigen::Map<Eigen::ArrayXf> output(out, dim);
    Eigen::Map<const Eigen::ArrayXf> mean_vec(mean_.data(), dim);
    Eigen::Map<const Eigen::ArrayXf> inv_stddev_vec(inv_stddev_.data(), dim);
    output = (input - mean_vec) * inv_stddev_vec;
  }

  void ComputeFeatures(const float *samples, int32_t n) {
    std::fill(tmp_samples_.begin() + n, tmp_samples_.end(), 0.0f);

    Scale(samples, n, tmp_samples_.data());

    Preemphasis(tmp_samples_.data(), n, tmp_samples_.data());
    ApplyWindow(tmp_samples_.data(), window_.data(), n, tmp_samples_.data());

    rfft_.Compute(tmp_samples_.data());
    auto &power_spectrum = tmp_samples_;
    ComputePowerSpectrum(tmp_samples_.data(), tmp_samples_.size(),
                         power_spectrum.data());

    // note only the first half of power_spectrum is used inside Compute()
    mel_banks_->Compute(power_spectrum.data(), features_.data());
    LogMel(features_.data(), static_cast<int32_t>(features_.size()) - 1,
           features_.data());

    // Note(fangjun): The ten-vad model expects a pitch feature, but we set it
    // to 0 as a simplification. This may reduce performance as noted
    // in the PR #2377
    features_.back() = 0;

    ApplyNormalization(features_.data(), features_.data());

    std::memmove(last_features_.data(),
                 last_features_.data() + features_.size(),
                 2 * features_.size() * sizeof(float));
    std::copy(features_.begin(), features_.end(),
              last_features_.begin() + 2 * features_.size());
  }

 private:
  VadModelConfig config_;
  knf::Rfft rfft_;
  std::unique_ptr<knf::MelBanks> mel_banks_;

  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;

  std::vector<Ort::Value> states_;
  int64_t sample_rate_;
  int32_t min_silence_samples_;
  int32_t min_speech_samples_;

  bool triggered_ = false;
  int32_t current_sample_ = 0;
  int32_t temp_start_ = 0;
  int32_t temp_end_ = 0;

  float last_sample_ = 0;

  std::vector<float> mean_;
  std::vector<float> inv_stddev_;
  std::vector<float> window_;

  std::vector<float> features_;
  std::vector<float> last_features_;  // (3, 41), row major
  std::vector<float> tmp_samples_;    // (1024,)
};

TenVadModel::TenVadModel(const VadModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
TenVadModel::TenVadModel(Manager *mgr, const VadModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

TenVadModel::~TenVadModel() = default;

void TenVadModel::Reset() { return impl_->Reset(); }

bool TenVadModel::IsSpeech(const float *samples, int32_t n) {
  return impl_->IsSpeech(samples, n);
}

int32_t TenVadModel::WindowSize() const { return impl_->WindowSize(); }

int32_t TenVadModel::WindowShift() const { return impl_->WindowShift(); }

int32_t TenVadModel::MinSilenceDurationSamples() const {
  return impl_->MinSilenceDurationSamples();
}

int32_t TenVadModel::MinSpeechDurationSamples() const {
  return impl_->MinSpeechDurationSamples();
}

void TenVadModel::SetMinSilenceDuration(float s) {
  impl_->SetMinSilenceDuration(s);
}

void TenVadModel::SetThreshold(float threshold) {
  impl_->SetThreshold(threshold);
}

float TenVadModel::Compute(const float *samples, int32_t n) {
  return impl_->Run(samples, n);
}

#if __ANDROID_API__ >= 9
template TenVadModel::TenVadModel(AAssetManager *mgr,
                                  const VadModelConfig &config);
#endif

#if __OHOS__
template TenVadModel::TenVadModel(NativeResourceManager *mgr,
                                  const VadModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/ten-vad-model.h
================================================
// sherpa-onnx/csrc/ten-vad-model.h
//
// Copyright (c)  2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_TEN_VAD_MODEL_H_
#define SHERPA_ONNX_CSRC_TEN_VAD_MODEL_H_

#include <memory>

#include "sherpa-onnx/csrc/vad-model.h"

namespace sherpa_onnx {

class TenVadModel : public VadModel {
 public:
  explicit TenVadModel(const VadModelConfig &config);

  template <typename Manager>
  TenVadModel(Manager *mgr, const VadModelConfig &config);

  ~TenVadModel() override;

  // reset the internal model states
  void Reset() override;

  /**
   * @param samples Pointer to a 1-d array containing audio samples.
   *                Each sample should be normalized to the range [-1, 1].
   * @param n Number of samples.
   *
   * @return Return true if speech is detected. Return false otherwise.
   */
  bool IsSpeech(const float *samples, int32_t n) override;

  float Compute(const float *samples, int32_t n) override;

  // 256 or 160
  int32_t WindowSize() const override;

  // 256 or 128
  int32_t WindowShift() const override;

  int32_t MinSilenceDurationSamples() const override;
  int32_t MinSpeechDurationSamples() const override;

  void SetMinSilenceDuration(float s) override;
  void SetThreshold(float threshold) override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_TEN_VAD_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/text-utils-test.cc
================================================
// sherpa-onnx/csrc/text-utils-test.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/text-utils.h"

#include <cstdio>
#include <cstring>
#include <iostream>
#include <regex>
#include <sstream>
#include <string>
#include <vector>

#include "gtest/gtest.h"

namespace sherpa_onnx {

TEST(ToLowerCase, WideString) {
  std::string text =
      "Hallo! Übeltäter übergibt Ärzten öfters äußerst ätzende Öle 3€";
  auto t = ToLowerCase(text);
  std::cout << text << "\n";
  std::cout << t << "\n";
}

TEST(RemoveInvalidUtf8Sequences, Case1) {
  std::vector<uint8_t> v = {
      0xe4, 0xbb, 0x8a,                                  // 今
      0xe5, 0xa4, 0xa9,                                  // 天
      'i',  's',  ' ',  'M', 'o', 'd', 'a', 'y',  ',',   // is Monday,
      ' ',  'w',  'i',  'e', ' ', 'h', 'e', 'i',  0xc3,  // wie heißen Size
      0x9f, 'e',  'n',  ' ', 'S', 'i', 'e', 0xf0, 0x9d, 0x84, 0x81};

  std::vector<uint8_t> v0 = v;
  v0[1] = 0xc0;  // make the first 3 bytes an invalid utf8 character
  std::string s0{v0.begin(), v0.end()};
  EXPECT_EQ(s0.size(), v0.size());

  auto s = RemoveInvalidUtf8Sequences(s0);  // should remove 今

  v0 = v;
  // v0[23] == 0xc3
  // v0[24] == 0x9f

  v0[23] = 0xc1;

  s0 = {v0.begin(), v0.end()};
  s = RemoveInvalidUtf8Sequences(s0);  // should remove ß

  EXPECT_EQ(s.size() + 2, v.size());

  v0 = v;
  // v0[31] = 0xf0;
  // v0[32] = 0x9d;
  // v0[33] = 0x84;
  // v0[34] = 0x81;
  v0[31] = 0xf5;

  s0 = {v0.begin(), v0.end()};
  s = RemoveInvalidUtf8Sequences(s0);

  EXPECT_EQ(s.size() + 4, v.size());
}

// Tests for sanitizeUtf8
TEST(RemoveInvalidUtf8Sequences, ValidUtf8StringPassesUnchanged) {
  std::string input = "Valid UTF-8 🌍";
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), input);
}

TEST(RemoveInvalidUtf8Sequences, SingleInvalidByteReplaced) {
  std::string input = "Invalid \xFF UTF-8";
  std::string expected = "Invalid  UTF-8";
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), expected);
}

TEST(RemoveInvalidUtf8Sequences, TruncatedUtf8SequenceReplaced) {
  std::string input = "Broken \xE2\x82";  // Incomplete UTF-8 sequence
  std::string expected = "Broken ";
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), expected);
}

TEST(RemoveInvalidUtf8Sequences, MultipleInvalidBytes) {
  std::string input = "Test \xC0\xC0\xF8\xA0";  // Multiple invalid sequences
  std::string expected = "Test ";
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), expected);
}

TEST(RemoveInvalidUtf8Sequences, BreakingCase_SpaceFollowedByInvalidByte) {
  std::string input = "\x20\xC4";  // Space followed by an invalid byte
  std::string expected = " ";      // 0xC4 removed
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), expected);
}

TEST(RemoveInvalidUtf8Sequences, ValidUtf8WithEdgeCaseCharacters) {
  std::string input = "Edge 🏆💯";
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), input);
}

TEST(RemoveInvalidUtf8Sequences, MixedValidAndInvalidBytes) {
  std::string input = "Mix \xE2\x82\xAC \xF0\x9F\x98\x81 \xFF";
  std::string expected = "Mix € 😁 ";  // Invalid bytes removed
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), expected);
}

TEST(RemoveInvalidUtf8Sequences, SpaceFollowedByInvalidByte) {
  std::string input = "\x20\xC4";  // Space (0x20) followed by invalid (0xC4)
  std::string expected = " ";      // Space remains, 0xC4 is removed
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), expected);
}

TEST(RemoveInvalidUtf8Sequences, RemoveTruncatedC4) {
  std::string input = "Hello \xc4 world";  // Invalid `0xC4`
  std::string expected = "Hello  world";   // `0xC4` should be removed
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), expected);
}

TEST(RemoveInvalidUtf8Sequences, SpaceFollowedByInvalidByte_Breaking) {
  std::string input = "\x20\xc4";  // Space followed by invalid `0xc4`
  std::string expected = " ";      // `0xc4` should be removed, space remains
  EXPECT_EQ(RemoveInvalidUtf8Sequences(input), expected);
}

TEST(RemoveInvalidUtf8Sequences, DebugSpaceFollowedByInvalidByte) {
  std::string input = "\x20\xc4";  // Space followed by invalid `0xc4`
  std::string output = RemoveInvalidUtf8Sequences(input);

  std::cout << "Processed string: ";
  for (unsigned char c : output) {
    printf("\\x%02x ", c);
  }
  std::cout << std::endl;

  EXPECT_EQ(output, " ");  // Expect `0xc4` to be removed, leaving only space
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/text-utils.cc
================================================
// sherpa-onnx/csrc/text-utils.cc
//
// Copyright 2009-2011  Saarland University;  Microsoft Corporation
// Copyright      2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/text-utils.h"

#include <algorithm>
#include <cassert>
#include <cctype>
#include <charconv>
#include <cinttypes>
#include <climits>
#include <cstdint>
#include <cstdlib>
#include <cwctype>
#include <limits>
#include <locale>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#if defined(_WIN32)
#include <Windows.h>
#endif

#include "sherpa-onnx/csrc/macros.h"

// This file is copied/modified from
// https://github.com/kaldi-asr/kaldi/blob/master/src/util/text-utils.cc

namespace sherpa_onnx {

// copied from kaldi/src/util/text-util.cc
template <class T>
class NumberIstream {
 public:
  explicit NumberIstream(std::istream &i) : in_(i) {}

  NumberIstream &operator>>(T &x) {
    if (!in_.good()) return *this;
    in_ >> x;
    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
    return ParseOnFail(&x);
  }

 private:
  std::istream &in_;

  bool RemainderIsOnlySpaces() {
    if (in_.tellg() != std::istream::pos_type(-1)) {
      std::string rem;
      in_ >> rem;

      if (rem.find_first_not_of(' ') != std::string::npos) {
        // there is not only spaces
        return false;
      }
    }

    in_.clear();
    return true;
  }

  NumberIstream &ParseOnFail(T *x) {
    std::string str;
    in_.clear();
    in_.seekg(0);
    // If the stream is broken even before trying
    // to read from it or if there are many tokens,
    // it's pointless to try.
    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
      in_.setstate(std::ios_base::failbit);
      return *this;
    }

    std::unordered_map<std::string, T> inf_nan_map;
    // we'll keep just uppercase values.
    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
    inf_nan_map["-INF"] = -std::numeric_limits<T>::infinity();
    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
    inf_nan_map["-INFINITY"] = -std::numeric_limits<T>::infinity();
    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
    inf_nan_map["-NAN"] = -std::numeric_limits<T>::quiet_NaN();
    // MSVC
    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
    inf_nan_map["-1.#INF"] = -std::numeric_limits<T>::infinity();
    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
    inf_nan_map["-1.#QNAN"] = -std::numeric_limits<T>::quiet_NaN();

    std::transform(str.begin(), str.end(), str.begin(), ::toupper);

    if (inf_nan_map.find(str) != inf_nan_map.end()) {
      *x = inf_nan_map[str];
    } else {
      in_.setstate(std::ios_base::failbit);
    }

    return *this;
  }
};

/// ConvertStringToReal converts a string into either float or double
/// and returns false if there was any kind of problem (i.e. the string
/// was not a floating point number or contained extra non-whitespace junk).
/// Be careful- this function will successfully read inf's or nan's.
template <typename T>
bool ConvertStringToReal(const std::string &str, T *out) {
  std::istringstream iss(str);

  NumberIstream<T> i(iss);

  i >> *out;

  if (iss.fail()) {
    // Number conversion failed.
    return false;
  }

  return true;
}

template bool ConvertStringToReal<float>(const std::string &str, float *out);

template bool ConvertStringToReal<double>(const std::string &str, double *out);

void SplitStringToVector(const std::string &full, const char *delim,
                         bool omit_empty_strings,
                         std::vector<std::string> *out) {
  size_t start = 0, found = 0, end = full.size();
  out->clear();
  while (found != std::string::npos) {
    found = full.find_first_of(delim, start);
    // start != end condition is for when the delimiter is at the end
    if (!omit_empty_strings || (found != start && start != end))
      out->push_back(full.substr(start, found - start));
    start = found + 1;
  }
}

std::string Trim(const std::string &str) {
  size_t start = 0;
  while (start < str.size() &&
         std::isspace(static_cast<unsigned char>(str[start]))) {
    start++;
  }
  size_t end = str.size();
  while (end > start &&
         std::isspace(static_cast<unsigned char>(str[end - 1]))) {
    end--;
  }
  return str.substr(start, end - start);
}

std::vector<std::string> SplitStringAndTrim(const std::string &str,
                                            char delim) {
  std::vector<std::string> result;
  std::string delim_str(1, delim);
  SplitStringToVector(str, delim_str.c_str(), true, &result);
  // Trim whitespace from each part
  for (auto &part : result) {
    part = Trim(part);
  }
  // Remove empty strings after trimming
  result.erase(std::remove_if(result.begin(), result.end(),
                              [](const std::string &s) { return s.empty(); }),
               result.end());
  return result;
}

template <class F>
bool SplitStringToFloats(const std::string &full, const char *delim,
                         bool omit_empty_strings,  // typically false
                         std::vector<F> *out) {
  assert(out != nullptr);
  if (*(full.c_str()) == '\0') {
    out->clear();
    return true;
  }
  std::vector<std::string> split;
  SplitStringToVector(full, delim, omit_empty_strings, &split);
  out->resize(split.size());
  for (size_t i = 0; i < split.size(); ++i) {
    // assume atof never fails
    F f = 0;
    if (!ConvertStringToReal(split[i], &f)) return false;
    (*out)[i] = f;
  }
  return true;
}

// Instantiate the template above for float and double.
template bool SplitStringToFloats(const std::string &full, const char *delim,
                                  bool omit_empty_strings,
                                  std::vector<float> *out);
template bool SplitStringToFloats(const std::string &full, const char *delim,
                                  bool omit_empty_strings,
                                  std::vector<double> *out);

static bool IsPunct(char c) { return c != '\'' && std::ispunct(c); }
static bool IsGermanUmlaut(const std::string &word) {
  // ä 0xC3 0xA4
  // ö 0xC3 0xB6
  // ü 0xC3 0xBC
  // Ä 0xC3 0x84
  // Ö 0xC3 0x96
  // Ü 0xC3 0x9C
  // ß 0xC3 0x9F

  if (word.size() != 2 || static_cast<uint8_t>(word[0]) != 0xc3) {
    return false;
  }

  auto c = static_cast<uint8_t>(word[1]);
  if (c == 0xa4 || c == 0xb6 || c == 0xbc || c == 0x84 || c == 0x96 ||
      c == 0x9c || c == 0x9f) {
    return true;
  }

  return false;
}

// see https://www.tandem.net/blog/spanish-accents
// https://www.compart.com/en/unicode/U+00DC
static bool IsSpanishDiacritic(const std::string &word) {
  // á 0xC3 0xA1
  // é 0xC3 0xA9
  // í 0xC3 0xAD
  // ó 0xC3 0xB3
  // ú 0xC3 0xBA
  // ü 0xC3 0xBC
  // ñ 0xC3 0xB1
  //
  // uppercase
  //
  // Á 0xC3 0x81
  // É 0xC3 0x89
  // Í 0xC3 0x8D
  // Ó 0xC3 0x93
  // Ú 0xC3 0x9A
  // Ü 0xC3 0x9C
  // Ñ 0xC3 0x91

  if (word.size() != 2 || static_cast<uint8_t>(word[0]) != 0xc3) {
    return false;
  }

  auto c = static_cast<uint8_t>(word[1]);
  if (c == 0xa1 || c == 0xa9 || c == 0xad || c == 0xb3 || c == 0xba ||
      c == 0xbc || c == 0xb1 || c == 0x81 || c == 0x89 || c == 0x8d ||
      c == 0x93 || c == 0x9a || c == 0x9c || c == 0x91) {
    return true;
  }

  return false;
}

// see https://www.busuu.com/en/french/accent-marks
static bool IsFrenchDiacritic(const std::string &word) {
  // acute accent
  // é 0xC3 0xA9
  //
  // grave accent
  // à 0xC3 0xA0
  // è 0xC3 0xA8
  // ù 0xC3 0xB9
  //
  // cedilla
  // ç 0xC3 0xA7
  //
  // circumflex
  // â 0xC3 0xA2
  // ê 0xC3 0xAA
  // î 0xC3 0xAE
  // ô 0xC3 0xB4
  // û 0xC3 0xBB
  //
  // trema
  // ë 0xC3 0xAB
  // ï 0xC3 0xAF
  // ü 0xC3 0xBC
  //
  // É 0xC3 0x89
  //
  // À 0xC3 0x80
  // È 0xC3 0x88
  // Ù 0xC3 0x99
  // Ç 0xC3 0x87
  // Â 0xC3 0x82
  // Ê 0xC3 0x8A
  // Î 0xC3 0x8E
  // Ô 0xC3 0x94
  // Û 0xC3 0x9B
  // Ë 0xC3 0x8B
  // Ï 0xC3 0x8F
  // Ü 0xC3 0x9C

  if (word.size() != 2 || static_cast<uint8_t>(word[0]) != 0xc3) {
    return false;
  }

  auto c = static_cast<uint8_t>(word[1]);
  if (c == 0xa9 || c == 0xa0 || c == 0xa8 || c == 0xb9 || c == 0xa7 ||
      c == 0xa2 || c == 0xaa || c == 0xae || c == 0xb4 || c == 0xbb ||
      c == 0xab || c == 0xaf || c == 0xbc || c == 0x89 || c == 0x80 ||
      c == 0x88 || c == 0x99 || c == 0x87 || c == 0x82 || c == 0x8a ||
      c == 0x8e || c == 0x94 || c == 0x9b || c == 0x8b || c == 0x8f ||
      c == 0x9c) {
    return true;
  }
  return false;
}

static bool IsSpecial(const std::string &w) {
  bool ans = IsGermanUmlaut(w) || IsSpanishDiacritic(w) || IsFrenchDiacritic(w);

  // for french d’impossible
  // ’ 0xE2 0x80 0x99
  bool ans2 = false;
  if (w.size() == 3) {
    auto c0 = static_cast<uint8_t>(w[0]);
    auto c1 = static_cast<uint8_t>(w[1]);
    auto c2 = static_cast<uint8_t>(w[2]);
    if (c0 == 0xe2 && c1 == 0x80 && c2 == 0x99) {
      ans2 = true;
    }
  }

  return ans || ans2;
}

static std::vector<std::string> MergeCharactersIntoWords(
    const std::vector<std::string> &words) {
  std::vector<std::string> ans;

  int32_t n = static_cast<int32_t>(words.size());
  int32_t i = 0;
  int32_t prev = -1;

  while (i < n) {
    const auto &w = words[i];
    if (w.size() >= 3 || (w.size() == 2 && !IsSpecial(w)) ||
        (w.size() == 1 &&
         (IsPunct(w[0]) || std::isspace(static_cast<uint8_t>(w[0]))))) {
      if (prev != -1) {
        std::string t;
        for (; prev < i; ++prev) {
          t.append(words[prev]);
        }
        prev = -1;
        ans.push_back(std::move(t));
      }

      if (!std::isspace(static_cast<uint8_t>(w[0]))) {
        ans.push_back(w);
      }
      ++i;
      continue;
    }

    // e.g., öffnen
    if (w.size() == 1 || (w.size() == 2 && IsSpecial(w))) {
      if (prev == -1) {
        prev = i;
      }
      ++i;
      continue;
    }

    SHERPA_ONNX_LOGE("Ignore %s", w.c_str());
    ++i;
  }

  if (prev != -1) {
    std::string t;
    for (; prev < i; ++prev) {
      t.append(words[prev]);
    }
    ans.push_back(std::move(t));
  }

  return ans;
}

std::vector<std::string> SplitUtf8(const std::string &text) {
  const uint8_t *begin = reinterpret_cast<const uint8_t *>(text.c_str());
  const uint8_t *end = begin + text.size();

  // Note that English words are split into single characters.
  // We need to invoke MergeCharactersIntoWords() to merge them
  std::vector<std::string> ans;

  auto start = begin;
  while (start < end) {
    uint8_t c = *start;
    uint8_t i = 0x80;
    int32_t num_bytes = 0;

    // see
    // https://en.wikipedia.org/wiki/UTF-8
    for (; c & i; i >>= 1) {
      ++num_bytes;
    }

    if (num_bytes == 0) {
      // this is an ascii
      ans.emplace_back(reinterpret_cast<const char *>(start), 1);
      ++start;
    } else if (2 <= num_bytes && num_bytes <= 4) {
      ans.emplace_back(reinterpret_cast<const char *>(start), num_bytes);
      start += num_bytes;
    } else {
      SHERPA_ONNX_LOGE("Invalid byte at position: %d",
                       static_cast<int32_t>(start - begin));
      // skip this byte
      ++start;
    }
  }

  return MergeCharactersIntoWords(ans);
}

std::string ToLowerCase(const std::string &s) {
  return ToString(ToLowerCase(ToWideString(s)));
}

void ToLowerCase(std::string *in_out) {
  std::transform(in_out->begin(), in_out->end(), in_out->begin(),
                 [](unsigned char c) { return std::tolower(c); });
}

std::wstring ToLowerCase(const std::wstring &s) {
  std::wstring ans(s.size(), 0);
  std::transform(s.begin(), s.end(), ans.begin(), [](wchar_t c) -> wchar_t {
    switch (c) {
      // French
      case L'À':
        return L'à';
      case L'Â':
        return L'â';
      case L'Æ':
        return L'æ';
      case L'Ç':
        return L'ç';
      case L'È':
        return L'è';
      case L'É':
        return L'é';
      case L'Ë':
        return L'ë';
      case L'Î':
        return L'î';
      case L'Ï':
        return L'ï';
      case L'Ô':
        return L'ô';
      case L'Ù':
        return L'ù';
      case L'Û':
        return L'û';
      case L'Ü':
        return L'ü';

      // others
      case L'Á':
        return L'á';
      case L'Í':
        return L'í';
      case L'Ó':
        return L'ó';
      case L'Ú':
        return L'ú';
      case L'Ñ':
        return L'ñ';
      case L'Ì':
        return L'ì';
      case L'Ò':
        return L'ò';
      case L'Ä':
        return L'ä';
      case L'Ö':
        return L'ö';
        // TODO(fangjun): Add more

      default:
        return std::towlower(c);
    }
  });
  return ans;
}

static inline bool InRange(uint8_t x, uint8_t low, uint8_t high) {
  return low <= x && x <= high;
}

/*
Please see
https://stackoverflow.com/questions/6555015/check-for-invalid-utf8


Table 3-7. Well-Formed UTF-8 Byte Sequences

Code Points        First Byte Second Byte Third Byte Fourth Byte
U+0000..U+007F     00..7F
U+0080..U+07FF     C2..DF     80..BF
U+0800..U+0FFF     E0         A0..BF      80..BF
U+1000..U+CFFF     E1..EC     80..BF      80..BF
U+D000..U+D7FF     ED         80..9F      80..BF
U+E000..U+FFFF     EE..EF     80..BF      80..BF
U+10000..U+3FFFF   F0         90..BF      80..BF     80..BF
U+40000..U+FFFFF   F1..F3     80..BF      80..BF     80..BF
U+100000..U+10FFFF F4         80..8F      80..BF     80..BF
 */
std::string RemoveInvalidUtf8Sequences(const std::string &text,
                                       bool show_debug_msg /*= false*/) {
  int32_t n = static_cast<int32_t>(text.size());

  std::string ans;
  ans.reserve(n);

  int32_t i = 0;
  const uint8_t *p = reinterpret_cast<const uint8_t *>(text.data());
  while (i < n) {
    if (p[i] <= 0x7f) {
      ans.append(text, i, 1);
      i += 1;
      continue;
    }

    if (InRange(p[i], 0xc2, 0xdf) && i + 1 < n &&
        InRange(p[i + 1], 0x80, 0xbf)) {
      ans.append(text, i, 2);
      i += 2;
      continue;
    }

    if (p[i] == 0xe0 && i + 2 < n && InRange(p[i + 1], 0xa0, 0xbf) &&
        InRange(p[i + 2], 0x80, 0xbf)) {
      ans.append(text, i, 3);
      i += 3;
      continue;
    }

    if (InRange(p[i], 0xe1, 0xec) && i + 2 < n &&
        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf)) {
      ans.append(text, i, 3);
      i += 3;
      continue;
    }

    if (p[i] == 0xed && i + 2 < n && InRange(p[i + 1], 0x80, 0x9f) &&
        InRange(p[i + 2], 0x80, 0xbf)) {
      ans.append(text, i, 3);
      i += 3;
      continue;
    }

    if (InRange(p[i], 0xee, 0xef) && i + 2 < n &&
        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf)) {
      ans.append(text, i, 3);
      i += 3;
      continue;
    }

    if (p[i] == 0xf0 && i + 3 < n && InRange(p[i + 1], 0x90, 0xbf) &&
        InRange(p[i + 2], 0x80, 0xbf) && InRange(p[i + 3], 0x80, 0xbf)) {
      ans.append(text, i, 4);
      i += 4;
      continue;
    }

    if (InRange(p[i], 0xf1, 0xf3) && i + 3 < n &&
        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf) &&
        InRange(p[i + 3], 0x80, 0xbf)) {
      ans.append(text, i, 4);
      i += 4;
      continue;
    }

    if (p[i] == 0xf4 && i + 3 < n && InRange(p[i + 1], 0x80, 0x8f) &&
        InRange(p[i + 2], 0x80, 0xbf) && InRange(p[i + 3], 0x80, 0xbf)) {
      ans.append(text, i, 4);
      i += 4;
      continue;
    }

    if (show_debug_msg) {
      SHERPA_ONNX_LOGE("Ignore invalid utf8 sequence at pos: %d, value: %02x",
                       i, p[i]);
    }

    i += 1;
  }

  return ans;
}

bool IsUtf8(const std::string &text) {
  int32_t n = static_cast<int32_t>(text.size());
  int32_t i = 0;
  const uint8_t *p = reinterpret_cast<const uint8_t *>(text.data());
  while (i < n) {
    if (p[i] <= 0x7f) {
      i += 1;
      continue;
    }

    if (InRange(p[i], 0xc2, 0xdf) && i + 1 < n &&
        InRange(p[i + 1], 0x80, 0xbf)) {
      i += 2;
      continue;
    }

    if (p[i] == 0xe0 && i + 2 < n && InRange(p[i + 1], 0xa0, 0xbf) &&
        InRange(p[i + 2], 0x80, 0xbf)) {
      i += 3;
      continue;
    }

    if (InRange(p[i], 0xe1, 0xec) && i + 2 < n &&
        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf)) {
      i += 3;
      continue;
    }

    if (p[i] == 0xed && i + 2 < n && InRange(p[i + 1], 0x80, 0x9f) &&
        InRange(p[i + 2], 0x80, 0xbf)) {
      i += 3;
      continue;
    }

    if (InRange(p[i], 0xee, 0xef) && i + 2 < n &&
        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf)) {
      i += 3;
      continue;
    }

    if (p[i] == 0xf0 && i + 3 < n && InRange(p[i + 1], 0x90, 0xbf) &&
        InRange(p[i + 2], 0x80, 0xbf) && InRange(p[i + 3], 0x80, 0xbf)) {
      i += 4;
      continue;
    }

    if (InRange(p[i], 0xf1, 0xf3) && i + 3 < n &&
        InRange(p[i + 1], 0x80, 0xbf) && InRange(p[i + 2], 0x80, 0xbf) &&
        InRange(p[i + 3], 0x80, 0xbf)) {
      i += 4;
      continue;
    }

    if (p[i] == 0xf4 && i + 3 < n && InRange(p[i + 1], 0x80, 0x8f) &&
        InRange(p[i + 2], 0x80, 0xbf) && InRange(p[i + 3], 0x80, 0xbf)) {
      i += 4;
      continue;
    }

    return false;
  }

  return true;
}

bool IsGB2312(const std::string &text) {
  int32_t n = static_cast<int32_t>(text.size());
  int32_t i = 0;
  const uint8_t *p = reinterpret_cast<const uint8_t *>(text.data());
  while (i < n) {
    if (p[i] <= 0x7f) {
      i += 1;
      continue;
    }

    if (InRange(p[i], 0xa1, 0xf7) && i + 1 < n &&
        InRange(p[i + 1], 0xa1, 0xfe)) {
      i += 2;
      continue;
    }

    return false;
  }

  return true;
}

#if defined(_WIN32)
std::string Gb2312ToUtf8(const std::string &text) {
  // https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar
  // 936 is from
  // https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
  // GB2312 -> 936
  int32_t num_wchars =
      MultiByteToWideChar(936, 0, text.c_str(), text.size(), nullptr, 0);
  SHERPA_ONNX_LOGE("num of wchars: %d", num_wchars);
  if (num_wchars == 0) {
    return {};
  }

  std::wstring wstr;
  wstr.resize(num_wchars);
  MultiByteToWideChar(936, 0, text.c_str(), text.size(), wstr.data(),
                      num_wchars);
  // https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-widechartomultibyte
  int32_t num_chars = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, nullptr,
                                          0, nullptr, nullptr);
  if (num_chars == 0) {
    return {};
  }

  std::string ans(num_chars, 0);
  WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, ans.data(), num_chars,
                      nullptr, nullptr);

  return ans;
}
#endif

std::wstring ToWideString(const std::string &s) {
  std::u32string u32 = Utf8ToUtf32(s);
  std::wstring result;
  result.reserve(u32.size());

#if WCHAR_MAX > 0xFFFF
  // wchar_t is 32-bit (Linux, macOS) — direct copy
  for (char32_t cp : u32) {
    result.push_back(static_cast<wchar_t>(cp));
  }
#else
  // wchar_t is 16-bit (Windows) — encode surrogate pairs
  for (char32_t cp : u32) {
    if (cp <= 0xFFFF) {
      result.push_back(static_cast<wchar_t>(cp));
    } else {
      cp -= 0x10000;
      result.push_back(static_cast<wchar_t>(0xD800 + (cp >> 10)));
      result.push_back(static_cast<wchar_t>(0xDC00 + (cp & 0x3FF)));
    }
  }
#endif

  return result;
}

std::string ToString(const std::wstring &s) {
  std::u32string u32;
  u32.reserve(s.size());

#if WCHAR_MAX > 0xFFFF
  // wchar_t is 32-bit — direct copy
  for (wchar_t wc : s) {
    u32.push_back(static_cast<char32_t>(wc));
  }
#else
  // wchar_t is 16-bit — decode surrogate pairs
  for (size_t i = 0; i < s.size(); ++i) {
    auto wc = static_cast<uint16_t>(s[i]);
    if (wc >= 0xD800 && wc <= 0xDBFF) {
      // High surrogate — look for matching low surrogate
      if (i + 1 < s.size()) {
        auto wc2 = static_cast<uint16_t>(s[i + 1]);
        if (wc2 >= 0xDC00 && wc2 <= 0xDFFF) {
          char32_t cp = 0x10000 + ((static_cast<char32_t>(wc - 0xD800) << 10) |
                                   (wc2 - 0xDC00));
          u32.push_back(cp);
          ++i;
          continue;
        }
      }
      // Unpaired high surrogate
      u32.push_back(0xFFFD);
    } else if (wc >= 0xDC00 && wc <= 0xDFFF) {
      // Lone low surrogate
      u32.push_back(0xFFFD);
    } else {
      u32.push_back(static_cast<char32_t>(wc));
    }
  }
#endif

  return Utf32ToUtf8(u32);
}

bool EndsWith(const std::string &haystack, const std::string &needle) {
  if (needle.size() > haystack.size()) {
    return false;
  }

  return std::equal(needle.rbegin(), needle.rend(), haystack.rbegin());
}

bool Contains(const std::string &haystack, const std::string &needle) {
  if (needle.size() > haystack.size()) {
    return false;
  }

  return haystack.find(needle) != std::string::npos;
}

std::vector<std::string> SplitString(const std::string &s, int32_t chunk_size) {
  std::vector<std::string> ans;
  if (chunk_size < 1 || chunk_size > s.size()) {
    ans.push_back(s);
  } else {
    int32_t n = static_cast<int32_t>(s.size());
    int32_t i = 0;
    while (i < n) {
      int32_t end = std::min(i + chunk_size, n);
      ans.push_back(s.substr(i, end - i));
      i = end;
    }
  }
  return ans;
}

std::string Join(const std::vector<std::string> &ss, const std::string &delim) {
  std::ostringstream oss;
  if (!ss.empty()) {
    oss << ss[0];
    for (size_t i = 1; i < ss.size(); ++i) {
      oss << delim << ss[i];
    }
  }
  return oss.str();
}

std::string Utf32ToUtf8(char32_t cp) {
  // Clamp surrogates and out-of-range codepoints to U+FFFD
  if (cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) {
    cp = 0xFFFD;
  }

  std::string out;

  if (cp <= 0x7F) {
    out.push_back(static_cast<char>(cp));
  } else if (cp <= 0x7FF) {
    out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
  } else if (cp <= 0xFFFF) {
    out.push_back(static_cast<char>(0xE0 | (cp >> 12)));
    out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
  } else {
    out.push_back(static_cast<char>(0xF0 | (cp >> 18)));
    out.push_back(static_cast<char>(0x80 | ((cp >> 12) & 0x3F)));
    out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
  }

  return out;
}

std::u32string Utf8ToUtf32(const std::string &str) {
  std::u32string out;
  out.reserve(str.size());

  const auto *p = reinterpret_cast<const uint8_t *>(str.data());
  const auto *end = p + str.size();

  // RFC 3629 / Unicode Table 3-7 validation with U+FFFD replacement
  // for maximal subpart of ill-formed subsequence (Unicode 3.9)
  while (p < end) {
    uint8_t b0 = *p;
    if (b0 <= 0x7F) {
      // ASCII
      out.push_back(static_cast<char32_t>(b0));
      ++p;
    } else if (InRange(b0, 0xC2, 0xDF)) {
      // 2-byte: U+0080..U+07FF (C2..DF starts at C2 to reject overlongs)
      if (p + 1 < end && InRange(p[1], 0x80, 0xBF)) {
        char32_t cp = (static_cast<char32_t>(b0 & 0x1F) << 6) | (p[1] & 0x3F);
        out.push_back(cp);
        p += 2;
      } else {
        out.push_back(0xFFFD);
        ++p;
      }
    } else if (b0 == 0xE0) {
      // 3-byte: U+0800..U+0FFF — second byte must be A0..BF (reject overlongs)
      if (p + 2 < end && InRange(p[1], 0xA0, 0xBF) &&
          InRange(p[2], 0x80, 0xBF)) {
        char32_t cp = (static_cast<char32_t>(b0 & 0x0F) << 12) |
                      (static_cast<char32_t>(p[1] & 0x3F) << 6) | (p[2] & 0x3F);
        out.push_back(cp);
        p += 3;
      } else {
        out.push_back(0xFFFD);
        ++p;
      }
    } else if (InRange(b0, 0xE1, 0xEC)) {
      // 3-byte: U+1000..U+CFFF
      if (p + 2 < end && InRange(p[1], 0x80, 0xBF) &&
          InRange(p[2], 0x80, 0xBF)) {
        char32_t cp = (static_cast<char32_t>(b0 & 0x0F) << 12) |
                      (static_cast<char32_t>(p[1] & 0x3F) << 6) | (p[2] & 0x3F);
        out.push_back(cp);
        p += 3;
      } else {
        out.push_back(0xFFFD);
        ++p;
      }
    } else if (b0 == 0xED) {
      // 3-byte: U+D000..U+D7FF — second byte must be 80..9F (reject surrogates)
      if (p + 2 < end && InRange(p[1], 0x80, 0x9F) &&
          InRange(p[2], 0x80, 0xBF)) {
        char32_t cp = (static_cast<char32_t>(b0 & 0x0F) << 12) |
                      (static_cast<char32_t>(p[1] & 0x3F) << 6) | (p[2] & 0x3F);
        out.push_back(cp);
        p += 3;
      } else {
        out.push_back(0xFFFD);
        ++p;
      }
    } else if (InRange(b0, 0xEE, 0xEF)) {
      // 3-byte: U+E000..U+FFFF
      if (p + 2 < end && InRange(p[1], 0x80, 0xBF) &&
          InRange(p[2], 0x80, 0xBF)) {
        char32_t cp = (static_cast<char32_t>(b0 & 0x0F) << 12) |
                      (static_cast<char32_t>(p[1] & 0x3F) << 6) | (p[2] & 0x3F);
        out.push_back(cp);
        p += 3;
      } else {
        out.push_back(0xFFFD);
        ++p;
      }
    } else if (b0 == 0xF0) {
      // 4-byte: U+10000..U+3FFFF — second byte must be 90..BF (reject
      // overlongs)
      if (p + 3 < end && InRange(p[1], 0x90, 0xBF) &&
          InRange(p[2], 0x80, 0xBF) && InRange(p[3], 0x80, 0xBF)) {
        char32_t cp = (static_cast<char32_t>(b0 & 0x07) << 18) |
                      (static_cast<char32_t>(p[1] & 0x3F) << 12) |
                      (static_cast<char32_t>(p[2] & 0x3F) << 6) | (p[3] & 0x3F);
        out.push_back(cp);
        p += 4;
      } else {
        out.push_back(0xFFFD);
        ++p;
      }
    } else if (InRange(b0, 0xF1, 0xF3)) {
      // 4-byte: U+40000..U+FFFFF
      if (p + 3 < end && InRange(p[1], 0x80, 0xBF) &&
          InRange(p[2], 0x80, 0xBF) && InRange(p[3], 0x80, 0xBF)) {
        char32_t cp = (static_cast<char32_t>(b0 & 0x07) << 18) |
                      (static_cast<char32_t>(p[1] & 0x3F) << 12) |
                      (static_cast<char32_t>(p[2] & 0x3F) << 6) | (p[3] & 0x3F);
        out.push_back(cp);
        p += 4;
      } else {
        out.push_back(0xFFFD);
        ++p;
      }
    } else if (b0 == 0xF4) {
      // 4-byte: U+100000..U+10FFFF — second byte must be 80..8F (reject >
      // U+10FFFF)
      if (p + 3 < end && InRange(p[1], 0x80, 0x8F) &&
          InRange(p[2], 0x80, 0xBF) && InRange(p[3], 0x80, 0xBF)) {
        char32_t cp = (static_cast<char32_t>(b0 & 0x07) << 18) |
                      (static_cast<char32_t>(p[1] & 0x3F) << 12) |
                      (static_cast<char32_t>(p[2] & 0x3F) << 6) | (p[3] & 0x3F);
        out.push_back(cp);
        p += 4;
      } else {
        out.push_back(0xFFFD);
        ++p;
      }
    } else {
      // Invalid lead byte (C0, C1, F5..FF, or bare continuation 80..BF)
      out.push_back(0xFFFD);
      ++p;
    }
  }

  return out;
}

std::string Utf32ToUtf8(const std::u32string &str) {
  std::string out;
  out.reserve(str.size() * 2);  // rough estimate

  for (char32_t cp : str) {
    // Clamp surrogates and out-of-range codepoints to U+FFFD
    if (cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) {
      cp = 0xFFFD;
    }

    if (cp <= 0x7F) {
      out.push_back(static_cast<char>(cp));
    } else if (cp <= 0x7FF) {
      out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
      out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
    } else if (cp <= 0xFFFF) {
      out.push_back(static_cast<char>(0xE0 | (cp >> 12)));
      out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
      out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
    } else {
      out.push_back(static_cast<char>(0xF0 | (cp >> 18)));
      out.push_back(static_cast<char>(0x80 | ((cp >> 12) & 0x3F)));
      out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
      out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
    }
  }

  return out;
}

// Helper: Convert ASCII chars in a std::string to uppercase (leaves non-ASCII
// unchanged)
std::string ToUpperAscii(const std::string &str) {
  std::string out = str;
  for (char &c : out) {
    unsigned char uc = static_cast<unsigned char>(c);
    if (uc >= 'a' && uc <= 'z') {
      c = static_cast<char>(uc - 'a' + 'A');
    }
  }
  return out;
}

// Helper: Convert ASCII chars in a std::string to lowercase (leaves non-ASCII
// unchanged)
std::string ToLowerAscii(const std::string &str) {
  std::string out = str;
  for (char &c : out) {
    unsigned char uc = static_cast<unsigned char>(c);
    if (uc >= 'A' && uc <= 'Z') {
      c = static_cast<char>(uc - 'A' + 'a');
    }
  }
  return out;
}

// Detect if a codepoint is a CJK character
bool IsCJK(char32_t cp) {
  return (cp >= 0x1100 && cp <= 0x11FF) || (cp >= 0x2E80 && cp <= 0xA4CF) ||
         (cp >= 0xA840 && cp <= 0xD7AF) || (cp >= 0xF900 && cp <= 0xFAFF) ||
         (cp >= 0xFE30 && cp <= 0xFE4F) || (cp >= 0xFF65 && cp <= 0xFFDC) ||
         (cp >= 0x20000 && cp <= 0x2FFFF);
}

bool ContainsCJK(const std::string &text) {
  std::u32string utf32_text = Utf8ToUtf32(text);
  return ContainsCJK(utf32_text);
}

bool ContainsCJK(const std::u32string &text) {
  for (char32_t cp : text) {
    if (IsCJK(cp)) {
      return true;
    }
  }
  return false;
}

std::string GetWord(const std::vector<std::string> &words, int32_t start,
                    int32_t end) {
  std::string ans;

  int32_t ws = words.size();

  if (start >= ws || end >= ws || start < 0 || end < 0) {
    return ans;
  }

  for (int32_t i = start; i <= end; ++i) {
    ans += words[i];
  }

  return ans;
}

bool IsAlphaOrPunct(int ch) { return std::isalpha(ch) || std::ispunct(ch); }

bool IsPunct(const std::string &s) {
  static const std::unordered_set<std::string> puncts = {
      ",",  ".",  "!",  "?", ":", "\"", "'", "，",
      "。", "！", "？", "“", "”", "‘",  "’",
  };
  return puncts.count(s);
}

int32_t ToIntOrDefault(const std::string &s, int32_t default_value) {
  if (s.empty()) return default_value;

  std::string str = s;

  // Remove surrounding quotes if present
  if (str.size() >= 2 && str.front() == '"' && str.back() == '"') {
    str = str.substr(1, str.size() - 2);
  }

  int32_t value = default_value;
  auto [ptr, ec] = std::from_chars(str.data(), str.data() + str.size(), value);

  // Check for conversion errors or trailing characters
  if (ec != std::errc() || ptr != str.data() + str.size()) {
    return default_value;
  }

  return value;
}

float ToFloatOrDefault(const std::string &s, float default_value) {
  if (s.empty()) return default_value;

  std::string str = s;

  // Remove surrounding quotes if present
  if (str.size() >= 2 && str.front() == '"' && str.back() == '"') {
    str = str.substr(1, str.size() - 2);
  }

  char *end = nullptr;
  errno = 0;
  float value = std::strtof(str.c_str(), &end);

  // No conversion or out of range
  if (end == str.c_str() || errno == ERANGE) {
    return default_value;
  }

  // Reject trailing garbage
  if (*end != '\0') {
    return default_value;
  }

  return value;
}

void LengthsToMask(const std::vector<int64_t> &lengths,
                   std::vector<float> *mask_flat,
                   std::vector<int64_t> *mask_shape) {
  if (lengths.empty()) {
    mask_flat->clear();
    mask_shape->assign({0, 1, 0});
    return;
  }

  const int bsz = static_cast<int>(lengths.size());
  const int64_t max_len = *std::max_element(lengths.begin(), lengths.end());
  if (max_len < 0) {
    SHERPA_ONNX_LOGE("LengthsToMask: max_len (%" PRId64 ") < 0", max_len);
    SHERPA_ONNX_EXIT(-1);
  }

  mask_shape->assign({static_cast<int64_t>(lengths.size()), 1, max_len});

  size_t total_size = static_cast<size_t>(bsz) * static_cast<size_t>(max_len);
  mask_flat->assign(total_size, 0.0f);
  for (int b = 0; b < bsz; ++b) {
    int64_t len = lengths[b];
    float *batch_mask = mask_flat->data() + b * max_len;
    std::fill_n(batch_mask, len, 1.0f);
  }
}

std::vector<std::string> SplitByBlankLines(const std::string &text) {
  std::vector<std::string> paragraphs;
  std::string cur;

  auto flush = [&]() {
    std::string s = Trim(cur);
    if (!s.empty()) {
      paragraphs.emplace_back(std::move(s));
    }
    cur.clear();
  };

  size_t start = 0;
  const size_t n = text.size();

  while (start <= n) {
    size_t end = text.find('\n', start);
    if (end == std::string::npos) end = n;

    std::string line = text.substr(start, end - start);
    line = Trim(line);
    if (line.empty()) {
      flush();
    } else {
      if (!cur.empty()) cur.push_back(' ');
      cur += line;
    }

    if (end == n) break;
    start = end + 1;
  }
  flush();
  if (paragraphs.empty()) {
    std::string s = Trim(text);
    if (!s.empty()) paragraphs.emplace_back(std::move(s));
  }
  return paragraphs;
}

namespace {

bool IsSentenceBoundary(char32_t c) {
  return c == U'.' || c == U'!' || c == U'?' || c == U'。' || c == U'！' ||
         c == U'？';
}

bool IsChunkBoundary(char32_t c) {
  return IsSentenceBoundary(c) || c == U',' || c == U';' || c == U':' ||
         c == U'，' || c == U'；' || c == U'：';
}

bool IsSpace(char32_t c) {
  return c == U' ' || c == U'\t' || c == U'\n' || c == U'\r' || c == U'\f' ||
         c == U'\v';
}

size_t CountCodepoints(const std::string &s) { return Utf8ToUtf32(s).size(); }

bool NeedSpaceBetween(const std::string &left, const std::string &right) {
  if (left.empty() || right.empty()) {
    return false;
  }

  auto left_u32 = Utf8ToUtf32(left);
  auto right_u32 = Utf8ToUtf32(right);
  if (left_u32.empty() || right_u32.empty()) {
    return false;
  }

  char32_t last = left_u32.back();
  char32_t first = right_u32.front();

  if (IsSpace(last) || IsSpace(first)) {
    return false;
  }

  if (IsCJK(last) || IsCJK(first) || IsChunkBoundary(last) ||
      IsChunkBoundary(first)) {
    return false;
  }

  return true;
}

}  // namespace

std::vector<std::string> SplitByPunctuation(const std::string &text) {
  std::vector<std::string> sentences;
  std::u32string cur;
  auto flush = [&]() {
    std::string s = Trim(Utf32ToUtf8(cur));
    if (!s.empty()) sentences.emplace_back(std::move(s));
    cur.clear();
  };
  for (char32_t c : Utf8ToUtf32(text)) {
    cur.push_back(c);
    if (IsSentenceBoundary(c)) {
      flush();
    }
  }
  flush();
  return sentences;
}

std::vector<std::string> MergeShortSentences(
    const std::vector<std::string> &sentences, size_t min_chars) {
  std::vector<std::string> merged;
  std::string buffer;

  for (const auto &s : sentences) {
    std::string piece = Trim(s);
    if (piece.empty()) {
      continue;
    }

    if (!buffer.empty() && NeedSpaceBetween(buffer, piece)) {
      buffer += " ";
    }
    buffer += piece;

    if (CountCodepoints(buffer) >= min_chars) {
      merged.push_back(Trim(buffer));
      buffer.clear();
    }
  }

  if (!buffer.empty()) {
    merged.push_back(Trim(buffer));
  }

  return merged;
}

std::vector<std::string> SplitLongSentence(const std::string &sentence,
                                           size_t max_chars) {
  std::vector<std::string> chunks;
  if (max_chars == 0) return chunks;
  std::string s = Trim(sentence);
  if (s.empty()) return chunks;

  std::u32string u32 = Utf8ToUtf32(s);
  size_t start = 0;
  const size_t len = u32.size();
  while (start < len) {
    size_t end = std::min(start + max_chars, len);
    if (end >= len) {
      std::string piece = Trim(Utf32ToUtf8(u32.substr(start)));
      if (!piece.empty()) {
        chunks.emplace_back(std::move(piece));
      }
      break;
    }

    size_t split_pos = end;
    bool found = false;
    for (size_t i = end; i > start; --i) {
      char32_t c = u32[i - 1];
      if (IsSpace(c)) {
        split_pos = i - 1;
        found = true;
        break;
      }

      if (IsChunkBoundary(c)) {
        split_pos = i;
        found = true;
        break;
      }
    }

    if (!found || split_pos <= start) {
      split_pos = end;
    }

    std::string piece =
        Trim(Utf32ToUtf8(u32.substr(start, split_pos - start)));
    if (!piece.empty()) {
      chunks.emplace_back(std::move(piece));
    }

    start = split_pos;
    while (start < len && IsSpace(u32[start])) {
      ++start;
    }
  }
  return chunks;
}

std::vector<std::string> ChunkText(const std::string &text, size_t max_len) {
  std::vector<std::string> chunks;
  if (max_len == 0) return chunks;

  std::string text_single = Trim(text);
  if (text_single.empty()) return chunks;

  std::string cur;

  auto flush = [&]() {
    std::string s = Trim(cur);
    if (!s.empty()) chunks.emplace_back(std::move(s));
    cur.clear();
  };

  auto paragraphs = SplitByBlankLines(text_single);
  for (const auto &para : paragraphs) {
    auto sentences = SplitByPunctuation(para);
    for (const auto &sent : sentences) {
      auto pieces = SplitLongSentence(sent, max_len);
      for (auto &p : pieces) {
        if (p.empty()) continue;

        if (cur.empty()) {
          cur = std::move(p);
          continue;
        }

        if (cur.size() + 1 + p.size() <= max_len) {
          cur.push_back(' ');
          cur += p;
        } else {
          flush();
          cur = std::move(p);
        }
      }
    }
  }

  flush();
  if (chunks.empty()) chunks.emplace_back(std::move(text_single));
  return chunks;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/text-utils.h
================================================
// sherpa-onnx/csrc/text-utils.h
//
// Copyright 2009-2011  Saarland University;  Microsoft Corporation
// Copyright      2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_TEXT_UTILS_H_
#define SHERPA_ONNX_CSRC_TEXT_UTILS_H_
#include <errno.h>
#include <stdlib.h>

#include <limits>
#include <string>
#include <type_traits>
#include <vector>

#ifdef _MSC_VER
#define SHERPA_ONNX_STRTOLL(cur_cstr, end_cstr) \
  _strtoi64(cur_cstr, end_cstr, 10);
#else
#define SHERPA_ONNX_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
#endif

// This file is copied/modified from
// https://github.com/kaldi-asr/kaldi/blob/master/src/util/text-utils.h

namespace sherpa_onnx {

/// Converts a string into an integer via strtoll and returns false if there was
/// any kind of problem (i.e. the string was not an integer or contained extra
/// non-whitespace junk, or the integer was too large to fit into the type it is
/// being converted into).  Only sets *out if everything was OK and it returns
/// true.
template <class Int>
bool ConvertStringToInteger(const std::string &str, Int *out) {
  // copied from kaldi/src/util/text-util.h
  static_assert(std::is_integral<Int>::value, "");
  const char *this_str = str.c_str();
  char *end = nullptr;
  errno = 0;
  int64_t i = SHERPA_ONNX_STRTOLL(this_str, &end);
  if (end != this_str) {
    while (isspace(*end)) ++end;
  }
  if (end == this_str || *end != '\0' || errno != 0) return false;
  Int iInt = static_cast<Int>(i);
  if (static_cast<int64_t>(iInt) != i ||
      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
    return false;
  }
  *out = iInt;
  return true;
}

/// Split a string using any of the single character delimiters.
/// If omit_empty_strings == true, the output will contain any
/// nonempty strings after splitting on any of the
/// characters in the delimiter.  If omit_empty_strings == false,
/// the output will contain n+1 strings if there are n characters
/// in the set "delim" within the input string.  In this case
/// the empty string is split to a single empty string.
void SplitStringToVector(const std::string &full, const char *delim,
                         bool omit_empty_strings,
                         std::vector<std::string> *out);

/// Trim leading and trailing whitespace from a string.
std::string Trim(const std::string &str);

/// Split a string by a single character delimiter, trim whitespace from each
/// part, and remove empty strings. This is a convenience wrapper around
/// SplitStringToVector with trimming and filtering.
std::vector<std::string> SplitStringAndTrim(const std::string &str, char delim);

/**
  \brief Split a string (e.g. 1:2:3) into a vector of integers.

  \param [in]  delim  String containing a list of characters, any of which
                      is allowed as a delimiter.
  \param [in] omit_empty_strings If true, empty strings between delimiters are
                      allowed and will not produce an output integer; if false,
                      instances of characters in 'delim' that are consecutive or
                      at the start or end of the string would be an error.
                      You'll normally want this to be true if 'delim' consists
                      of spaces, and false otherwise.
  \param [out] out   The output list of integers.
*/
template <class I>
bool SplitStringToIntegers(const std::string &full, const char *delim,
                           bool omit_empty_strings,  // typically false [but
                                                     // should probably be true
                                                     // if "delim" is spaces].
                           std::vector<I> *out) {
  static_assert(std::is_integral<I>::value, "");
  if (*(full.c_str()) == '\0') {
    out->clear();
    return true;
  }
  std::vector<std::string> split;
  SplitStringToVector(full, delim, omit_empty_strings, &split);
  out->resize(split.size());
  for (size_t i = 0; i < split.size(); i++) {
    const char *this_str = split[i].c_str();
    char *end = NULL;
    int64_t j = 0;
    j = SHERPA_ONNX_STRTOLL(this_str, &end);
    if (end == this_str || *end != '\0') {
      out->clear();
      return false;
    } else {
      I jI = static_cast<I>(j);
      if (static_cast<int64_t>(jI) != j) {
        // output type cannot fit this integer.
        out->clear();
        return false;
      }
      (*out)[i] = jI;
    }
  }
  return true;
}

// This is defined for F = float and double.
template <class F>
bool SplitStringToFloats(const std::string &full, const char *delim,
                         bool omit_empty_strings,  // typically false
                         std::vector<F> *out);

// This is defined for F = float and double.
template <typename T>
bool ConvertStringToReal(const std::string &str, T *out);

std::vector<std::string> SplitUtf8(const std::string &text);

std::string ToLowerCase(const std::string &s);
void ToLowerCase(std::string *in_out);

std::wstring ToLowerCase(const std::wstring &s);

std::string RemoveInvalidUtf8Sequences(const std::string &text,
                                       bool show_debug_msg = false);

// Return true if text contains valid utf8 sequence.
// Return false otherwise
bool IsUtf8(const std::string &text);

// Return true if text contains valid gb2312 encoded sequence
// Return false otherwise
bool IsGB2312(const std::string &text);

#if defined(_WIN32)
std::string Gb2312ToUtf8(const std::string &text);
#endif

std::wstring ToWideString(const std::string &s);

std::string ToString(const std::wstring &s);

bool EndsWith(const std::string &haystack, const std::string &needle);

bool Contains(const std::string &haystack, const std::string &needle);

std::vector<std::string> SplitString(const std::string &s, int32_t chunk_size);

std::string Join(const std::vector<std::string> &ss,
                 const std::string &delim = "");

// Converts a UTF-8 std::string to a UTF-32 std::u32string
std::u32string Utf8ToUtf32(const std::string &str);

// Converts a UTF-32 std::u32string to a UTF-8 std::string
std::string Utf32ToUtf8(const std::u32string &str);

// Converts a single UTF-32 codepoint to a UTF-8 std::string
std::string Utf32ToUtf8(char32_t cp);

// Helper: Convert ASCII chars in a std::string to uppercase (leaves non-ASCII
// unchanged)
std::string ToUpperAscii(const std::string &str);

// Helper: Convert ASCII chars in a std::string to lowercase (leaves non-ASCII
// unchanged)
std::string ToLowerAscii(const std::string &str);

bool IsAlphaOrPunct(int ch);

// Detect if a codepoint is a CJK character
bool IsCJK(char32_t cp);

bool ContainsCJK(const std::string &text);

bool ContainsCJK(const std::u32string &text);

bool StringToBool(const std::string &s);

// end is inclusive
std::string GetWord(const std::vector<std::string> &words, int32_t start,
                    int32_t end);

bool IsPunct(const std::string &s);

#if defined(_WIN32)
#define SHERPA_ONNX_TO_ORT_PATH(s) (ToWideString(s).c_str())
#else
#define SHERPA_ONNX_TO_ORT_PATH(s) ((s).c_str())
#endif

int32_t ToIntOrDefault(const std::string &s, int32_t default_value);

float ToFloatOrDefault(const std::string &s, float default_value);

// Convert lengths to flat mask + shape. Outputs [batch, 1, max_len] format
// where mask[b][0][i] = 1.0 if i < lengths[b], else 0.0.
void LengthsToMask(const std::vector<int64_t> &lengths,
                   std::vector<float> *mask_flat,
                   std::vector<int64_t> *mask_shape);

// TTS text chunking helpers.
std::vector<std::string> SplitByBlankLines(const std::string &text);
std::vector<std::string> SplitByPunctuation(const std::string &text);
std::vector<std::string> MergeShortSentences(
    const std::vector<std::string> &sentences, size_t min_chars);
std::vector<std::string> SplitLongSentence(const std::string &sentence,
                                           size_t max_chars);
std::vector<std::string> ChunkText(const std::string &text, size_t max_len);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_TEXT_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/text2token-test.cc
================================================
// sherpa-onnx/csrc/text2token-test.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/utils.h"
#include "ssentencepiece/csrc/ssentencepiece.h"

namespace sherpa_onnx {

// Please refer to
// https://github.com/pkufool/sherpa-test-data
// to download test data for testing
static const char dir[] = "/tmp/sherpa-test-data";

TEST(TEXT2TOKEN, TEST_cjkchar) {
  std::ostringstream oss;
  oss << dir << "/text2token/tokens_cn.txt";

  std::string tokens = oss.str();

  if (!std::ifstream(tokens).good()) {
    SHERPA_ONNX_LOGE(
        "No test data found, skipping TEST_cjkchar()."
        "You can download the test data by: "
        "git clone https://github.com/pkufool/sherpa-test-data.git "
        "/tmp/sherpa-test-data");
    return;
  }

  auto sym_table = SymbolTable(tokens);

  std::string text =
      "世界人民大团结\n中国 V S 美国\n\n";  // Test blank lines also

  std::istringstream iss(text);

  std::vector<std::vector<int32_t>> ids;
  std::vector<float> scores;

  auto r = EncodeHotwords(iss, "cjkchar", sym_table, nullptr, &ids, &scores);

  std::vector<std::vector<int32_t>> expected_ids(
      {{379, 380, 72, 874, 93, 1251, 489}, {262, 147, 3423, 2476, 21, 147}});
  EXPECT_EQ(ids, expected_ids);

  EXPECT_EQ(scores.size(), 0);
}

TEST(TEXT2TOKEN, TEST_bpe) {
  std::ostringstream oss;
  oss << dir << "/text2token/tokens_en.txt";
  std::string tokens = oss.str();
  oss.clear();
  oss.str("");
  oss << dir << "/text2token/bpe_en.vocab";
  std::string bpe = oss.str();
  if (!std::ifstream(tokens).good() || !std::ifstream(bpe).good()) {
    SHERPA_ONNX_LOGE(
        "No test data found, skipping TEST_bpe()."
        "You can download the test data by: "
        "git clone https://github.com/pkufool/sherpa-test-data.git "
        "/tmp/sherpa-test-data");
    return;
  }

  auto sym_table = SymbolTable(tokens);
  auto bpe_processor = std::make_unique<ssentencepiece::Ssentencepiece>(bpe);

  std::string text = "HELLO WORLD\nI LOVE YOU :2.0";

  std::istringstream iss(text);

  std::vector<std::vector<int32_t>> ids;
  std::vector<float> scores;

  auto r =
      EncodeHotwords(iss, "bpe", sym_table, bpe_processor.get(), &ids, &scores);

  std::vector<std::vector<int32_t>> expected_ids(
      {{22, 58, 24, 425}, {19, 370, 47}});
  EXPECT_EQ(ids, expected_ids);

  std::vector<float> expected_scores({0, 2.0});
  EXPECT_EQ(scores, expected_scores);
}

TEST(TEXT2TOKEN, TEST_cjkchar_bpe) {
  std::ostringstream oss;
  oss << dir << "/text2token/tokens_mix.txt";
  std::string tokens = oss.str();
  oss.clear();
  oss.str("");
  oss << dir << "/text2token/bpe_mix.vocab";
  std::string bpe = oss.str();
  if (!std::ifstream(tokens).good() || !std::ifstream(bpe).good()) {
    SHERPA_ONNX_LOGE(
        "No test data found, skipping TEST_cjkchar_bpe()."
        "You can download the test data by: "
        "git clone https://github.com/pkufool/sherpa-test-data.git "
        "/tmp/sherpa-test-data");
    return;
  }

  auto sym_table = SymbolTable(tokens);
  auto bpe_processor = std::make_unique<ssentencepiece::Ssentencepiece>(bpe);

  std::string text = "世界人民 GOES TOGETHER :1.5\n中国 GOES WITH 美国 :0.5";

  std::istringstream iss(text);

  std::vector<std::vector<int32_t>> ids;
  std::vector<float> scores;

  auto r = EncodeHotwords(iss, "cjkchar+bpe", sym_table, bpe_processor.get(),
                          &ids, &scores);

  std::vector<std::vector<int32_t>> expected_ids(
      {{1368, 1392, 557, 680, 275, 178, 475},
       {685, 736, 275, 178, 179, 921, 736}});
  EXPECT_EQ(ids, expected_ids);

  std::vector<float> expected_scores({1.5, 0.5});
  EXPECT_EQ(scores, expected_scores);
}

TEST(TEXT2TOKEN, TEST_bbpe) {
  std::ostringstream oss;
  oss << dir << "/text2token/tokens_bbpe.txt";
  std::string tokens = oss.str();
  oss.clear();
  oss.str("");
  oss << dir << "/text2token/bbpe.vocab";
  std::string bpe = oss.str();
  if (!std::ifstream(tokens).good() || !std::ifstream(bpe).good()) {
    SHERPA_ONNX_LOGE(
        "No test data found, skipping TEST_bbpe()."
        "You can download the test data by: "
        "git clone https://github.com/pkufool/sherpa-test-data.git "
        "/tmp/sherpa-test-data");
    return;
  }

  auto sym_table = SymbolTable(tokens);
  auto bpe_processor = std::make_unique<ssentencepiece::Ssentencepiece>(bpe);

  std::string text = "频繁 :1.0\n李鞑靼";

  std::istringstream iss(text);

  std::vector<std::vector<int32_t>> ids;
  std::vector<float> scores;

  auto r =
      EncodeHotwords(iss, "bpe", sym_table, bpe_processor.get(), &ids, &scores);

  std::vector<std::vector<int32_t>> expected_ids(
      {{259, 1118, 234, 188, 132}, {259, 1585, 236, 161, 148, 236, 160, 191}});
  EXPECT_EQ(ids, expected_ids);

  std::vector<float> expected_scores({1.0, 0});
  EXPECT_EQ(scores, expected_scores);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/timer.cc
================================================
// sherpa-onnx/csrc/timer.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/timer.h"

#include <chrono>
#include <memory>

namespace sherpa_onnx {

// modified from https://github.com/kaldi-asr/kaldi/blob/master/src/base/timer.h
class Timer::Impl {
 public:
  Impl() { Reset(); }

  using high_resolution_clock = std::chrono::high_resolution_clock;

  void Reset() { begin_ = high_resolution_clock::now(); }

  // Return time in seconds
  double Elapsed() {
    auto end = high_resolution_clock::now();
    auto diff =
        std::chrono::duration_cast<std::chrono::microseconds>(end - begin_);
    return diff.count() / 1000000.0;
  }

 private:
  high_resolution_clock::time_point begin_;
};

Timer::Timer() : impl_(std::make_unique<Impl>()) {}

Timer::~Timer() = default;

void Timer::Reset() const { impl_->Reset(); }

// Return time in seconds
double Timer::Elapsed() const { return impl_->Elapsed(); }

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/timer.h
================================================
// sherpa-onnx/csrc/timer.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_TIMER_H_
#define SHERPA_ONNX_CSRC_TIMER_H_

#include <memory>

namespace sherpa_onnx {

class Timer {
 public:
  Timer();
  ~Timer();

  void Reset() const;

  // Return time in seconds
  double Elapsed() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_TIMER_H_


================================================
FILE: sherpa-onnx/csrc/transducer-keyword-decoder.cc
================================================
// sherpa-onnx/csrc/transducer-keywords-decoder.cc
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/transducer-keyword-decoder.h"

#include <algorithm>
#include <cmath>
#include <cstring>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

TransducerKeywordResult TransducerKeywordDecoder::GetEmptyResult() const {
  int32_t context_size = model_->ContextSize();
  int32_t blank_id = 0;  // always 0
  TransducerKeywordResult r;
  std::vector<int64_t> blanks(context_size, -1);
  blanks.back() = blank_id;

  Hypotheses blank_hyp({{blanks, 0}});
  r.hyps = std::move(blank_hyp);
  return r;
}

void TransducerKeywordDecoder::Decode(
    Ort::Value encoder_out, OnlineStream **ss,
    std::vector<TransducerKeywordResult> *result) {
  std::vector<int64_t> encoder_out_shape =
      encoder_out.GetTensorTypeAndShapeInfo().GetShape();

  if (encoder_out_shape[0] != result->size()) {
    SHERPA_ONNX_LOGE(
        "Size mismatch! encoder_out.size(0) %d, result.size(0): %d\n",
        static_cast<int32_t>(encoder_out_shape[0]),
        static_cast<int32_t>(result->size()));
    exit(-1);
  }

  int32_t batch_size = static_cast<int32_t>(encoder_out_shape[0]);

  int32_t num_frames = static_cast<int32_t>(encoder_out_shape[1]);
  int32_t vocab_size = model_->VocabSize();
  int32_t context_size = model_->ContextSize();
  std::vector<int64_t> blanks(context_size, -1);
  blanks.back() = 0;  // blank_id is hardcoded to 0

  std::vector<Hypotheses> cur;
  for (auto &r : *result) {
    cur.push_back(std::move(r.hyps));
  }
  std::vector<Hypothesis> prev;

  for (int32_t t = 0; t != num_frames; ++t) {
    // Due to merging paths with identical token sequences,
    // not all utterances have "num_active_paths" paths.
    auto hyps_row_splits = GetHypsRowSplits(cur);
    int32_t num_hyps =
        hyps_row_splits.back();  // total num hyps for all utterance
    prev.clear();
    for (auto &hyps : cur) {
      for (auto &h : hyps) {
        prev.push_back(std::move(h.second));
      }
    }
    cur.clear();
    cur.reserve(batch_size);

    Ort::Value decoder_input = model_->BuildDecoderInput(prev);
    Ort::Value decoder_out = model_->RunDecoder(std::move(decoder_input));

    Ort::Value cur_encoder_out =
        GetEncoderOutFrame(model_->Allocator(), &encoder_out, t);
    cur_encoder_out =
        Repeat(model_->Allocator(), &cur_encoder_out, hyps_row_splits);
    Ort::Value logit =
        model_->RunJoiner(std::move(cur_encoder_out), View(&decoder_out));

    float *p_logit = logit.GetTensorMutableData<float>();
    LogSoftmax(p_logit, vocab_size, num_hyps);

    // The acoustic logprobs for current frame
    std::vector<float> logprobs(vocab_size * num_hyps);
    std::memcpy(logprobs.data(), p_logit,
                sizeof(float) * vocab_size * num_hyps);

    // now p_logit contains log_softmax output, we rename it to p_logprob
    // to match what it actually contains
    float *p_logprob = p_logit;

    // add log_prob of each hypothesis to p_logprob before taking top_k
    for (int32_t i = 0; i != num_hyps; ++i) {
      float log_prob = prev[i].log_prob;
      for (int32_t k = 0; k != vocab_size; ++k, ++p_logprob) {
        *p_logprob += log_prob;
      }
    }
    p_logprob = p_logit;  // we changed p_logprob in the above for loop

    for (int32_t b = 0; b != batch_size; ++b) {
      int32_t frame_offset = (*result)[b].frame_offset;
      int32_t start = hyps_row_splits[b];
      int32_t end = hyps_row_splits[b + 1];
      auto topk =
          TopkIndex(p_logprob, vocab_size * (end - start), max_active_paths_);

      Hypotheses hyps;
      for (auto k : topk) {
        int32_t hyp_index = k / vocab_size + start;
        int32_t new_token = k % vocab_size;

        Hypothesis new_hyp = prev[hyp_index];
        float context_score = 0;
        auto context_state = new_hyp.context_state;

        // blank is hardcoded to 0
        // also, it treats unk as blank
        if (new_token != 0 && new_token != unk_id_) {
          new_hyp.ys.push_back(new_token);
          new_hyp.timestamps.push_back(t + frame_offset);
          new_hyp.ys_probs.push_back(
              exp(logprobs[hyp_index * vocab_size + new_token]));

          new_hyp.num_trailing_blanks = 0;
          auto context_res = ss[b]->GetContextGraph()->ForwardOneStep(
              context_state, new_token);
          context_score = std::get<0>(context_res);
          new_hyp.context_state = std::get<1>(context_res);
          // Start matching from the start state, forget the decoder history.
          if (new_hyp.context_state->token == -1) {
            new_hyp.ys = blanks;
            new_hyp.timestamps.clear();
            new_hyp.ys_probs.clear();
          }
        } else {
          ++new_hyp.num_trailing_blanks;
        }
        new_hyp.log_prob = p_logprob[k] + context_score;
        hyps.Add(std::move(new_hyp));
      }  // for (auto k : topk)

      auto best_hyp = hyps.GetMostProbable(false);

      auto status = ss[b]->GetContextGraph()->IsMatched(best_hyp.context_state);
      bool matched = std::get<0>(status);
      const ContextState *matched_state = std::get<1>(status);

      if (matched) {
        float ys_prob = 0.0;
        for (int32_t i = 0; i < matched_state->level; ++i) {
          ys_prob += best_hyp.ys_probs[i];
        }
        ys_prob /= matched_state->level;
        if (best_hyp.num_trailing_blanks > num_trailing_blanks_ &&
            ys_prob >= matched_state->ac_threshold) {
          auto &r = (*result)[b];
          r.tokens = {best_hyp.ys.end() - matched_state->level,
                      best_hyp.ys.end()};
          r.timestamps = {best_hyp.timestamps.end() - matched_state->level,
                          best_hyp.timestamps.end()};
          r.keyword = matched_state->phrase;

          hyps = Hypotheses({{blanks, 0, ss[b]->GetContextGraph()->Root()}});
        }
      }
      cur.push_back(std::move(hyps));
      p_logprob += (end - start) * vocab_size;
    }  // for (int32_t b = 0; b != batch_size; ++b)
  }

  for (int32_t b = 0; b != batch_size; ++b) {
    auto &hyps = cur[b];
    auto best_hyp = hyps.GetMostProbable(false);
    auto &r = (*result)[b];
    r.hyps = std::move(hyps);
    r.num_trailing_blanks = best_hyp.num_trailing_blanks;
    r.frame_offset += num_frames;
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/transducer-keyword-decoder.h
================================================
// sherpa-onnx/csrc/transducer-keywords-decoder.h
//
// Copyright (c)  2023-2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_TRANSDUCER_KEYWORD_DECODER_H_
#define SHERPA_ONNX_CSRC_TRANSDUCER_KEYWORD_DECODER_H_

#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"

namespace sherpa_onnx {

struct TransducerKeywordResult {
  /// Number of frames after subsampling we have decoded so far
  int32_t frame_offset = 0;

  /// The decoded token IDs for keywords
  std::vector<int64_t> tokens;

  /// The triggered keyword
  std::string keyword;

  /// number of trailing blank frames decoded so far
  int32_t num_trailing_blanks = 0;

  /// timestamps[i] contains the output frame index where tokens[i] is decoded.
  std::vector<int32_t> timestamps;

  // used only in modified beam_search
  Hypotheses hyps;
};

class TransducerKeywordDecoder {
 public:
  TransducerKeywordDecoder(OnlineTransducerModel *model,
                           int32_t max_active_paths,
                           int32_t num_trailing_blanks, int32_t unk_id)
      : model_(model),
        max_active_paths_(max_active_paths),
        num_trailing_blanks_(num_trailing_blanks),
        unk_id_(unk_id) {}

  TransducerKeywordResult GetEmptyResult() const;

  void Decode(Ort::Value encoder_out, OnlineStream **ss,
              std::vector<TransducerKeywordResult> *result);

 private:
  OnlineTransducerModel *model_;  // Not owned

  int32_t max_active_paths_;
  int32_t num_trailing_blanks_;
  int32_t unk_id_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_TRANSDUCER_KEYWORD_DECODER_H_


================================================
FILE: sherpa-onnx/csrc/transpose-test.cc
================================================
// sherpa-onnx/csrc/transpose-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/transpose.h"

#include <numeric>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

TEST(Transpose, Tranpose01) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 3> shape{3, 2, 5};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  std::iota(p, p + shape[0] * shape[1] * shape[2], 0);

  auto ans = Transpose01(allocator, &v);
  auto v2 = Transpose01(allocator, &ans);

  Print3D(&v);
  Print3D(&ans);
  Print3D(&v2);

  const float *q = v2.GetTensorData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0] * shape[1] * shape[2]);
       ++i) {
    EXPECT_EQ(p[i], q[i]);
  }
}

TEST(Transpose, Tranpose12) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 3> shape{3, 2, 5};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  std::iota(p, p + shape[0] * shape[1] * shape[2], 0);

  auto ans = Transpose12(allocator, &v);
  auto v2 = Transpose12(allocator, &ans);

  Print3D(&v);
  Print3D(&ans);
  Print3D(&v2);

  const float *q = v2.GetTensorData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0] * shape[1] * shape[2]);
       ++i) {
    EXPECT_EQ(p[i], q[i]);
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/transpose.cc
================================================
// sherpa-onnx/csrc/transpose.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/transpose.h"

#include <algorithm>
#include <cassert>
#include <vector>

namespace sherpa_onnx {

template <typename T /*=float*/>
Ort::Value Transpose01(OrtAllocator *allocator, const Ort::Value *v) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  assert(shape.size() == 3);

  std::array<int64_t, 3> ans_shape{shape[1], shape[0], shape[2]};
  Ort::Value ans = Ort::Value::CreateTensor<T>(allocator, ans_shape.data(),
                                               ans_shape.size());

  T *dst = ans.GetTensorMutableData<T>();
  auto plane_offset = shape[1] * shape[2];

  for (int64_t i = 0; i != ans_shape[0]; ++i) {
    const T *src = v->GetTensorData<T>() + i * shape[2];
    for (int64_t k = 0; k != ans_shape[1]; ++k) {
      std::copy(src, src + shape[2], dst);
      src += plane_offset;
      dst += shape[2];
    }
  }

  return ans;
}

template <typename T /*= float*/>
Ort::Value Transpose12(OrtAllocator *allocator, const Ort::Value *v) {
  std::vector<int64_t> shape = v->GetTensorTypeAndShapeInfo().GetShape();
  assert(shape.size() == 3);

  std::array<int64_t, 3> ans_shape{shape[0], shape[2], shape[1]};
  Ort::Value ans = Ort::Value::CreateTensor<T>(allocator, ans_shape.data(),
                                               ans_shape.size());
  T *dst = ans.GetTensorMutableData<T>();
  auto row_stride = shape[2];
  for (int64_t b = 0; b != ans_shape[0]; ++b) {
    const T *src = v->GetTensorData<T>() + b * shape[1] * shape[2];
    for (int64_t i = 0; i != ans_shape[1]; ++i) {
      for (int64_t k = 0; k != ans_shape[2]; ++k, ++dst) {
        *dst = (src + k * row_stride)[i];
      }
    }
  }

  return ans;
}

template Ort::Value Transpose01<float>(OrtAllocator *allocator,
                                       const Ort::Value *v);

template Ort::Value Transpose12<float>(OrtAllocator *allocator,
                                       const Ort::Value *v);

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/transpose.h
================================================
// sherpa-onnx/csrc/transpose.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_TRANSPOSE_H_
#define SHERPA_ONNX_CSRC_TRANSPOSE_H_

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {
/** Transpose a 3-D tensor from shape (B, T, C) to (T, B, C).
 *
 * @param allocator
 * @param v A 3-D tensor of shape (B, T, C). Its data type is type.
 *
 * @return Return a 3-D tensor of shape (T, B, C). Its data type is type.
 */
template <typename type = float>
Ort::Value Transpose01(OrtAllocator *allocator, const Ort::Value *v);

/** Transpose a 3-D tensor from shape (B, T, C) to (B, C, T).
 *
 * @param allocator
 * @param v A 3-D tensor of shape (B, T, C). Its data type is type.
 *
 * @return Return a 3-D tensor of shape (B, C, T). Its data type is type.
 */
template <typename type = float>
Ort::Value Transpose12(OrtAllocator *allocator, const Ort::Value *v);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_TRANSPOSE_H_


================================================
FILE: sherpa-onnx/csrc/unbind-test.cc
================================================
// sherpa-onnx/csrc/unbind-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/unbind.h"

#include <vector>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/cat.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

TEST(Ubind, Test1DTensors) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 1> shape{3};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0]); ++i) {
    p[i] = i;
  }
  auto ans = Unbind(allocator, &v, 0);
  EXPECT_EQ(ans.size(), shape[0]);
  for (int32_t i = 0; i != static_cast<int32_t>(shape[0]); ++i) {
    EXPECT_EQ(ans[i].GetTensorData<float>()[0], p[i]);
  }
  Print1D(&v);
  for (int32_t i = 0; i != static_cast<int32_t>(shape[0]); ++i) {
    Print1D(&ans[i]);
  }

  // For Cat
  std::vector<const Ort::Value *> vec(ans.size());
  for (int32_t i = 0; i != static_cast<int32_t>(vec.size()); ++i) {
    vec[i] = &ans[i];
  }
  Ort::Value v2 = Cat(allocator, vec, 0);
  const float *p2 = v2.GetTensorData<float>();
  for (int32_t i = 0; i != shape[0]; ++i) {
    EXPECT_EQ(p[i], p2[i]);
  }
}

TEST(Ubind, Test2DTensorsDim0) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 2> shape{3, 2};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0] * shape[1]); ++i) {
    p[i] = i;
  }
  auto ans = Unbind(allocator, &v, 0);

  Print2D(&v);
  for (int32_t i = 0; i != static_cast<int32_t>(shape[0]); ++i) {
    Print2D(&ans[i]);
  }

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0]); ++i) {
    const float *pans = ans[i].GetTensorData<float>();
    for (int32_t k = 0; k != static_cast<int32_t>(shape[1]); ++k, ++p) {
      EXPECT_EQ(*p, pans[k]);
    }
  }

  // For Cat
  std::vector<const Ort::Value *> vec(ans.size());
  for (int32_t i = 0; i != static_cast<int32_t>(vec.size()); ++i) {
    vec[i] = &ans[i];
  }
  Ort::Value v2 = Cat(allocator, vec, 0);
  Print2D(&v2);

  p = v.GetTensorMutableData<float>();
  const float *p2 = v2.GetTensorData<float>();
  for (int32_t i = 0; i != shape[0] * shape[1]; ++i) {
    EXPECT_EQ(p[i], p2[i]);
  }
}

TEST(Ubind, Test2DTensorsDim1) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 2> shape{3, 2};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0] * shape[1]); ++i) {
    p[i] = i;
  }
  auto ans = Unbind(allocator, &v, 1);

  Print2D(&v);
  for (int32_t i = 0; i != static_cast<int32_t>(shape[1]); ++i) {
    Print2D(&ans[i]);
  }

  // For Cat
  std::vector<const Ort::Value *> vec(ans.size());
  for (int32_t i = 0; i != static_cast<int32_t>(vec.size()); ++i) {
    vec[i] = &ans[i];
  }
  Ort::Value v2 = Cat(allocator, vec, 1);
  Print2D(&v2);

  p = v.GetTensorMutableData<float>();
  const float *p2 = v2.GetTensorData<float>();
  for (int32_t i = 0; i != shape[0] * shape[1]; ++i) {
    EXPECT_EQ(p[i], p2[i]);
  }
}

TEST(Ubind, Test3DTensorsDim0) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 3> shape{3, 2, 5};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0] * shape[1] * shape[2]);
       ++i) {
    p[i] = i;
  }
  auto ans = Unbind(allocator, &v, 0);

  Print3D(&v);
  for (int32_t i = 0; i != static_cast<int32_t>(shape[0]); ++i) {
    Print3D(&ans[i]);
  }

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0]); ++i) {
    const float *pans = ans[i].GetTensorData<float>();
    for (int32_t k = 0; k != static_cast<int32_t>(shape[1] * shape[2]);
         ++k, ++p) {
      EXPECT_EQ(*p, pans[k]);
    }
  }

  // For Cat
  std::vector<const Ort::Value *> vec(ans.size());
  for (int32_t i = 0; i != static_cast<int32_t>(vec.size()); ++i) {
    vec[i] = &ans[i];
  }
  Ort::Value v2 = Cat(allocator, vec, 0);
  Print3D(&v2);

  p = v.GetTensorMutableData<float>();
  const float *p2 = v2.GetTensorData<float>();
  for (int32_t i = 0; i != shape[0] * shape[1] * shape[2]; ++i) {
    EXPECT_EQ(p[i], p2[i]);
  }
}

TEST(Ubind, Test3DTensorsDim1) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 3> shape{3, 2, 5};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0] * shape[1] * shape[2]);
       ++i) {
    p[i] = i;
  }
  auto ans = Unbind(allocator, &v, 1);

  Print3D(&v);
  for (int32_t i = 0; i != static_cast<int32_t>(shape[1]); ++i) {
    Print3D(&ans[i]);
  }

  // For Cat
  std::vector<const Ort::Value *> vec(ans.size());
  for (int32_t i = 0; i != static_cast<int32_t>(vec.size()); ++i) {
    vec[i] = &ans[i];
  }
  Ort::Value v2 = Cat(allocator, vec, 1);
  Print3D(&v2);

  p = v.GetTensorMutableData<float>();
  const float *p2 = v2.GetTensorData<float>();
  for (int32_t i = 0; i != shape[0] * shape[1] * shape[2]; ++i) {
    EXPECT_EQ(p[i], p2[i]);
  }
}

TEST(Ubind, Test3DTensorsDim2) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::array<int64_t, 3> shape{3, 2, 5};
  Ort::Value v =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *p = v.GetTensorMutableData<float>();

  for (int32_t i = 0; i != static_cast<int32_t>(shape[0] * shape[1] * shape[2]);
       ++i) {
    p[i] = i;
  }
  auto ans = Unbind(allocator, &v, 2);

  Print3D(&v);
  for (int32_t i = 0; i != static_cast<int32_t>(shape[2]); ++i) {
    Print3D(&ans[i]);
  }

  // For Cat
  std::vector<const Ort::Value *> vec(ans.size());
  for (int32_t i = 0; i != static_cast<int32_t>(vec.size()); ++i) {
    vec[i] = &ans[i];
  }
  Ort::Value v2 = Cat(allocator, vec, 2);
  Print3D(&v2);

  p = v.GetTensorMutableData<float>();
  const float *p2 = v2.GetTensorData<float>();
  for (int32_t i = 0; i != shape[0] * shape[1] * shape[2]; ++i) {
    EXPECT_EQ(p[i], p2[i]);
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/unbind.cc
================================================
// sherpa-onnx/csrc/unbind.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/unbind.h"

#include <algorithm>
#include <cassert>
#include <functional>
#include <numeric>
#include <utility>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

template <typename T /*= float*/>
std::vector<Ort::Value> Unbind(OrtAllocator *allocator, const Ort::Value *value,
                               int32_t dim) {
  std::vector<int64_t> shape = value->GetTensorTypeAndShapeInfo().GetShape();
  assert(dim >= 0);
  assert(dim < static_cast<int32_t>(shape.size()));
  int32_t n = static_cast<int32_t>(shape[dim]);
  if (n == 1) {
    std::vector<Ort::Value> ans;
    ans.push_back(Clone(allocator, value));
    return ans;
  }

  std::vector<int64_t> ans_shape = shape;
  ans_shape[dim] = 1;  // // Unlike torch, we keep the dim to 1

  // allocator tensors
  std::vector<Ort::Value> ans;
  ans.reserve(n);
  for (int32_t i = 0; i != n; ++i) {
    Ort::Value t = Ort::Value::CreateTensor<T>(allocator, ans_shape.data(),
                                               ans_shape.size());
    ans.push_back(std::move(t));
  }

  auto leading_size = static_cast<int32_t>(std::accumulate(
      shape.begin(), shape.begin() + dim, 1, std::multiplies<int64_t>()));

  auto trailing_size = static_cast<int32_t>(std::accumulate(
      shape.begin() + dim + 1, shape.end(), 1, std::multiplies<int64_t>()));

  const T *src = value->GetTensorData<T>();

  for (int32_t i = 0; i != leading_size; ++i) {
    for (int32_t k = 0; k != n; ++k) {
      T *dst = ans[k].GetTensorMutableData<T>() + i * trailing_size;
      std::copy(src, src + trailing_size, dst);
      src += trailing_size;
    }
  }

  return ans;
}

template std::vector<Ort::Value> Unbind<float>(OrtAllocator *allocator,
                                               const Ort::Value *value,
                                               int32_t dim);

template std::vector<Ort::Value> Unbind<int64_t>(OrtAllocator *allocator,
                                                 const Ort::Value *value,
                                                 int32_t dim);

std::vector<Ort::Value> UnbindFloat16(OrtAllocator *allocator,
                                      const Ort::Value *value, int32_t dim) {
  std::vector<int64_t> shape = value->GetTensorTypeAndShapeInfo().GetShape();
  assert(dim >= 0);
  assert(dim < static_cast<int32_t>(shape.size()));
  int32_t n = static_cast<int32_t>(shape[dim]);
  if (n == 1) {
    std::vector<Ort::Value> ans;
    ans.push_back(Clone(allocator, value));
    return ans;
  }

  std::vector<int64_t> ans_shape = shape;
  ans_shape[dim] = 1;  // // Unlike torch, we keep the dim to 1

  // allocator tensors
  std::vector<Ort::Value> ans;
  ans.reserve(n);
  for (int32_t i = 0; i != n; ++i) {
    Ort::Value t =
        Ort::Value::CreateTensor(allocator, ans_shape.data(), ans_shape.size(),
                                 ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
    ans.push_back(std::move(t));
  }

  auto leading_size = static_cast<int32_t>(std::accumulate(
      shape.begin(), shape.begin() + dim, 1, std::multiplies<int64_t>()));

  auto trailing_size = static_cast<int32_t>(std::accumulate(
      shape.begin() + dim + 1, shape.end(), 1, std::multiplies<int64_t>()));

  using T = uint16_t;
  const T *src = value->GetTensorData<T>();

  for (int32_t i = 0; i != leading_size; ++i) {
    for (int32_t k = 0; k != n; ++k) {
      T *dst = ans[k].GetTensorMutableData<T>() + i * trailing_size;
      std::copy(src, src + trailing_size, dst);
      src += trailing_size;
    }
  }

  return ans;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/unbind.h
================================================
// sherpa-onnx/csrc/unbind.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_UNBIND_H_
#define SHERPA_ONNX_CSRC_UNBIND_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT

namespace sherpa_onnx {

/** It is similar to torch.unbind() but we keep the unbind dim to 1 in
 * the output
 *
 * @param allocator Allocator to allocate space for the returned tensor
 * @param value  The tensor to unbind
 * @param dim  The dim along which to unbind the tensor
 *
 * @return Return a list of tensors
 */
template <typename T = float>
std::vector<Ort::Value> Unbind(OrtAllocator *allocator, const Ort::Value *value,
                               int32_t dim);

std::vector<Ort::Value> UnbindFloat16(OrtAllocator *allocator,
                                      const Ort::Value *value, int32_t dim);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_UNBIND_H_


================================================
FILE: sherpa-onnx/csrc/utfcpp-test.cc
================================================
// sherpa-onnx/csrc/utfcpp-test.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include <cctype>
#include <iostream>
#include <string>
#include <vector>

#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

TEST(UTF8, Case1) {
  std::string hello = "你好, 早上好！世界.  hello!。Hallo! how are you?";
  std::vector<std::string> ss = SplitUtf8(hello);
  for (const auto &s : ss) {
    std::cout << s << "\n";
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/utils.cc
================================================
// sherpa-onnx/csrc/utils.cc
//
// Copyright      2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/utils.h"

#include <cassert>
#include <iostream>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/bbpe.h"
#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

static bool EncodeBase(const std::vector<std::string> &lines,
                       const SymbolTable &symbol_table,
                       std::vector<std::vector<int32_t>> *ids,
                       std::vector<std::string> *phrases,
                       std::vector<float> *scores,
                       std::vector<float> *thresholds) {
  ids->clear();

  std::vector<int32_t> tmp_ids;
  std::vector<float> tmp_scores;
  std::vector<float> tmp_thresholds;
  std::vector<std::string> tmp_phrases;

  std::string word;
  bool has_scores = false;
  bool has_thresholds = false;
  bool has_phrases = false;
  bool has_oov = false;

  for (const auto &line : lines) {
    float score = 0;
    float threshold = 0;
    std::string phrase = "";

    std::istringstream iss(line);
    while (iss >> word) {
      if (symbol_table.Contains(word)) {
        int32_t id = symbol_table[word];
        tmp_ids.push_back(id);
      } else {
        switch (word[0]) {
          case ':':  // boosting score for current keyword
            score = std::stof(word.substr(1));
            has_scores = true;
            break;
          case '#':  // triggering threshold (probability) for current keyword
            threshold = std::stof(word.substr(1));
            has_thresholds = true;
            break;
          case '@':  // the original keyword string
            phrase = word.substr(1);
            has_phrases = true;
            break;
          default:
            SHERPA_ONNX_LOGE(
                "Cannot find ID for token %s at line: %s. (Hint: Check the "
                "tokens.txt see if %s in it)",
                word.c_str(), line.c_str(), word.c_str());
            has_oov = true;
            break;
        }
      }
    }
    ids->push_back(std::move(tmp_ids));
    tmp_ids = {};
    tmp_scores.push_back(score);
    tmp_phrases.push_back(phrase);
    tmp_thresholds.push_back(threshold);
  }
  if (scores != nullptr) {
    if (has_scores) {
      scores->swap(tmp_scores);
    } else {
      scores->clear();
    }
  }
  if (phrases != nullptr) {
    if (has_phrases) {
      *phrases = std::move(tmp_phrases);
    } else {
      phrases->clear();
    }
  }
  if (thresholds != nullptr) {
    if (has_thresholds) {
      thresholds->swap(tmp_thresholds);
    } else {
      thresholds->clear();
    }
  }
  return !has_oov;
}

bool EncodeHotwords(std::istream &is, const std::string &modeling_unit,
                    const SymbolTable &symbol_table,
                    const ssentencepiece::Ssentencepiece *bpe_encoder,
                    std::vector<std::vector<int32_t>> *hotwords,
                    std::vector<float> *boost_scores) {
  std::vector<std::string> lines;
  std::string line;
  std::string word;

  while (std::getline(is, line)) {
    std::string score;
    std::string phrase;

    std::ostringstream oss;
    std::istringstream iss(line);
    while (iss >> word) {
      switch (word[0]) {
        case ':':  // boosting score for current keyword
          score = word;
          break;
        default:
          if (!score.empty()) {
            SHERPA_ONNX_LOGE(
                "Boosting score should be put after the words/phrase, given "
                "%s.",
                line.c_str());
            return false;
          }
          oss << " " << word;
          break;
      }
    }
    phrase = oss.str();
    if (phrase.empty()) {
      continue;
    } else {
      phrase = phrase.substr(1);
    }
    std::istringstream piss(phrase);
    oss.clear();
    oss.str("");
    while (piss >> word) {
      if (modeling_unit == "cjkchar") {
        for (const auto &w : SplitUtf8(word)) {
          oss << " " << w;
        }
      } else if (modeling_unit == "bpe") {
        std::vector<std::string> bpes;
        bpe_encoder->Encode(word, &bpes);
        for (const auto &bpe : bpes) {
          oss << " " << bpe;
        }
      } else if (modeling_unit == "bbpe") {
        std::vector<std::string> bpes;

        const auto &id2token = GetByteBpeTableId2Token();
        std::string tokens;
        for (size_t i = 0; i < word.length(); ++i) {
          uint8_t byte = static_cast<uint8_t>(word[i]);
          tokens += id2token.at(byte);
          if ((i + 1) % 3 == 0 && (i + 1) < word.length()) {
            tokens += " ";
          }
        }

        bpe_encoder->Encode(tokens, &bpes);
        for (const auto &bpe : bpes) {
          oss << " " << bpe;
        }
      } else {
        if (modeling_unit != "cjkchar+bpe") {
          SHERPA_ONNX_LOGE(
              "modeling_unit should be one of bpe, cjkchar or cjkchar+bpe, "
              "given "
              "%s",
              modeling_unit.c_str());
          exit(-1);
        }
        for (const auto &w : SplitUtf8(word)) {
          if (isalpha(w[0])) {
            std::vector<std::string> bpes;
            bpe_encoder->Encode(w, &bpes);
            for (const auto &bpe : bpes) {
              oss << " " << bpe;
            }
          } else {
            oss << " " << w;
          }
        }
      }
    }
    std::string encoded_phrase = oss.str().substr(1);
    oss.clear();
    oss.str("");
    oss << encoded_phrase;
    if (!score.empty()) {
      oss << " " << score;
    }
    lines.push_back(oss.str());
  }
  return EncodeBase(lines, symbol_table, hotwords, nullptr, boost_scores,
                    nullptr);
}

bool EncodeKeywords(std::istream &is, const SymbolTable &symbol_table,
                    std::vector<std::vector<int32_t>> *keywords_id,
                    std::vector<std::string> *keywords,
                    std::vector<float> *boost_scores,
                    std::vector<float> *threshold) {
  std::vector<std::string> lines;
  std::string line;
  while (std::getline(is, line)) {
    lines.push_back(line);
  }
  return EncodeBase(lines, symbol_table, keywords_id, keywords, boost_scores,
                    threshold);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/utils.h
================================================
// sherpa-onnx/csrc/utils.h
//
// Copyright      2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_UTILS_H_
#define SHERPA_ONNX_CSRC_UTILS_H_

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/symbol-table.h"
#include "ssentencepiece/csrc/ssentencepiece.h"

namespace sherpa_onnx {

/* Encode the hotwords in an input stream to be tokens ids.
 *
 * @param is The input stream, it contains several lines, one hotword for each
 *           line. For each hotword, the tokens (cjkchar or bpe) are separated
 *           by spaces.
 * @param symbol_table  The tokens table mapping symbols to ids. All the symbols
 *                      in the stream should be in the symbol_table, if not this
 *                      function returns false.
 *
 * @@param hotwords  The encoded ids to be written to.
 *
 * @return  If all the symbols from ``is`` are in the symbol_table, returns true
 *          otherwise returns false.
 */
bool EncodeHotwords(std::istream &is, const std::string &modeling_unit,
                    const SymbolTable &symbol_table,
                    const ssentencepiece::Ssentencepiece *bpe_encoder,
                    std::vector<std::vector<int32_t>> *hotwords_id,
                    std::vector<float> *boost_scores);

/* Encode the keywords in an input stream to be tokens ids.
 *
 * @param is The input stream, it contains several lines, one hotword for each
 *           line. For each hotword, the tokens (cjkchar or bpe) are separated
 *           by spaces, it might contain boosting score (starting with :),
 *           triggering threshold (starting with #) and keyword string (starting
 *           with @) too.
 * @param symbol_table  The tokens table mapping symbols to ids. All the symbols
 *                      in the stream should be in the symbol_table, if not this
 *                      function returns false.
 *
 * @param keywords_id The encoded ids to be written to.
 * @param keywords The original keyword string to be written to.
 * @param boost_scores  The boosting score for each keyword to be written to.
 * @param threshold  The triggering threshold for each keyword to be written to.
 *
 * @return  If all the symbols from ``is`` are in the symbol_table, returns true
 *          otherwise returns false.
 */
bool EncodeKeywords(std::istream &is, const SymbolTable &symbol_table,
                    std::vector<std::vector<int32_t>> *keywords_id,
                    std::vector<std::string> *keywords,
                    std::vector<float> *boost_scores,
                    std::vector<float> *threshold);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_UTILS_H_


================================================
FILE: sherpa-onnx/csrc/vad-model-config.cc
================================================
// sherpa-onnx/csrc/vad-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/vad-model-config.h"

#include <sstream>
#include <string>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"

namespace sherpa_onnx {

void VadModelConfig::Register(ParseOptions *po) {
  silero_vad.Register(po);
  ten_vad.Register(po);

  po->Register("vad-sample-rate", &sample_rate,
               "Sample rate expected by the VAD model");

  po->Register("vad-num-threads", &num_threads,
               "Number of threads to run the VAD model");

  po->Register("vad-provider", &provider,
               "Specify a provider to run the VAD model. Supported values: "
               "cpu, cuda, coreml");

  po->Register("vad-debug", &debug,
               "true to display debug information when loading vad models");
}

bool VadModelConfig::Validate() const {
  if (provider != "rknn") {
    if (!silero_vad.model.empty() && EndsWith(silero_vad.model, ".rknn")) {
      SHERPA_ONNX_LOGE(
          "--provider is %s, which is not rknn, but you pass an rknn model "
          "'%s'",
          provider.c_str(), silero_vad.model.c_str());
      return false;
    }
  }

  if (provider == "rknn") {
    if (!silero_vad.model.empty() && EndsWith(silero_vad.model, ".onnx")) {
      SHERPA_ONNX_LOGE("--provider is rknn, but you pass an onnx model '%s'",
                       silero_vad.model.c_str());
      return false;
    }
  }

  if (!silero_vad.model.empty()) {
    return silero_vad.Validate();
  }

  if (!ten_vad.model.empty()) {
    return ten_vad.Validate();
  }

  SHERPA_ONNX_LOGE("Please provide one VAD model.");

  return false;
}

std::string VadModelConfig::ToString() const {
  std::ostringstream os;

  os << "VadModelConfig(";
  os << "silero_vad=" << silero_vad.ToString() << ", ";
  os << "ten_vad=" << ten_vad.ToString() << ", ";
  os << "sample_rate=" << sample_rate << ", ";
  os << "num_threads=" << num_threads << ", ";
  os << "provider=\"" << provider << "\", ";
  os << "debug=" << (debug ? "True" : "False") << ")";

  return os.str();
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/vad-model-config.h
================================================
// sherpa-onnx/csrc/vad-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_VAD_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_VAD_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/silero-vad-model-config.h"
#include "sherpa-onnx/csrc/ten-vad-model-config.h"

namespace sherpa_onnx {

struct VadModelConfig {
  SileroVadModelConfig silero_vad;
  TenVadModelConfig ten_vad;

  int32_t sample_rate = 16000;
  int32_t num_threads = 1;
  std::string provider = "cpu";

  // true to show debug information when loading models
  bool debug = false;

  VadModelConfig() = default;

  VadModelConfig(const SileroVadModelConfig &silero_vad,
                 const TenVadModelConfig &ten_vad, int32_t sample_rate,
                 int32_t num_threads, const std::string &provider, bool debug)
      : silero_vad(silero_vad),
        ten_vad(ten_vad),
        sample_rate(sample_rate),
        num_threads(num_threads),
        provider(provider),
        debug(debug) {}

  void Register(ParseOptions *po);
  bool Validate() const;

  std::string ToString() const;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_VAD_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/csrc/vad-model.cc
================================================
// sherpa-onnx/csrc/vad-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/vad-model.h"

#include <memory>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#if SHERPA_ONNX_ENABLE_RKNN
#include "sherpa-onnx/csrc/rknn/silero-vad-model-rknn.h"
#endif

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/silero-vad-model.h"
#include "sherpa-onnx/csrc/ten-vad-model.h"

namespace sherpa_onnx {

std::unique_ptr<VadModel> VadModel::Create(const VadModelConfig &config) {
  if (config.provider == "rknn") {
#if SHERPA_ONNX_ENABLE_RKNN
    if (!config.silero_vad.model.empty()) {
      return std::make_unique<SileroVadModelRknn>(config);
    } else {
      SHERPA_ONNX_LOGE("Only silero-vad is supported for RKNN at present");
      SHERPA_ONNX_EXIT(-1);
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_RKNN=ON if you "
        "want to use rknn.");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }

  if (!config.silero_vad.model.empty()) {
    return std::make_unique<SileroVadModel>(config);
  }

  if (!config.ten_vad.model.empty()) {
    return std::make_unique<TenVadModel>(config);
  }

  SHERPA_ONNX_LOGE("Please provide a vad model");
  return nullptr;
}

template <typename Manager>
std::unique_ptr<VadModel> VadModel::Create(Manager *mgr,
                                           const VadModelConfig &config) {
  if (config.provider == "rknn") {
#if SHERPA_ONNX_ENABLE_RKNN
    if (!config.silero_vad.model.empty()) {
      return std::make_unique<SileroVadModelRknn>(mgr, config);
    } else {
      SHERPA_ONNX_LOGE("Only silero-vad is supported for RKNN at present");
      SHERPA_ONNX_EXIT(-1);
    }
#else
    SHERPA_ONNX_LOGE(
        "Please rebuild sherpa-onnx with -DSHERPA_ONNX_ENABLE_RKNN=ON if you "
        "want to use rknn.");
    SHERPA_ONNX_EXIT(-1);
    return nullptr;
#endif
  }
  if (!config.silero_vad.model.empty()) {
    return std::make_unique<SileroVadModel>(mgr, config);
  }

  if (!config.ten_vad.model.empty()) {
    return std::make_unique<TenVadModel>(mgr, config);
  }

  SHERPA_ONNX_LOGE("Please provide a vad model");
  return nullptr;
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<VadModel> VadModel::Create(
    AAssetManager *mgr, const VadModelConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<VadModel> VadModel::Create(
    NativeResourceManager *mgr, const VadModelConfig &config);
#endif
}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/vad-model.h
================================================
// sherpa-onnx/csrc/vad-model.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_VAD_MODEL_H_
#define SHERPA_ONNX_CSRC_VAD_MODEL_H_

#include <memory>

#include "sherpa-onnx/csrc/vad-model-config.h"

namespace sherpa_onnx {

class VadModel {
 public:
  virtual ~VadModel() = default;

  static std::unique_ptr<VadModel> Create(const VadModelConfig &config);

  template <typename Manager>
  static std::unique_ptr<VadModel> Create(Manager *mgr,
                                          const VadModelConfig &config);

  // reset the internal model states
  virtual void Reset() = 0;

  /**
   * @param samples Pointer to a 1-d array containing audio samples.
   *                Each sample should be normalized to the range [-1, 1].
   * @param n Number of samples. Should be equal to WindowSize()
   *
   * @return Return true if speech is detected. Return false otherwise.
   */
  virtual bool IsSpeech(const float *samples, int32_t n) = 0;

  virtual float Compute(const float *samples, int32_t n) = 0;

  virtual int32_t WindowSize() const = 0;

  virtual int32_t WindowShift() const = 0;

  virtual int32_t MinSilenceDurationSamples() const = 0;
  virtual int32_t MinSpeechDurationSamples() const = 0;
  virtual void SetMinSilenceDuration(float s) = 0;
  virtual void SetThreshold(float threshold) = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_VAD_MODEL_H_


================================================
FILE: sherpa-onnx/csrc/version.cc
================================================
// sherpa-onnx/csrc/version.h
//
// Copyright      2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/version.h"

namespace sherpa_onnx {

const char *GetGitDate() {
  static const char *date = "Fri Mar 20 19:09:44 2026";
  return date;
}

const char *GetGitSha1() {
  static const char *sha1 = "6ff3ce76";
  return sha1;
}

const char *GetVersionStr() {
  static const char *version = "1.12.31";
  return version;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/version.h
================================================
// sherpa-onnx/csrc/version.h
//
// Copyright      2025  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_VERSION_H_
#define SHERPA_ONNX_CSRC_VERSION_H_

namespace sherpa_onnx {

// Please don't free the returned pointer.
// Please don't modify the memory pointed by the returned pointer.
//
// The memory pointed by the returned pointer is statically allocated.
const char *GetVersionStr();

// Please don't free the returned pointer.
// Please don't modify the memory pointed by the returned pointer.
//
// The memory pointed by the returned pointer is statically allocated.
const char *GetGitSha1();

// Please don't free the returned pointer.
// Please don't modify the memory pointed by the returned pointer.
//
// The memory pointed by the returned pointer is statically allocated.
const char *GetGitDate();

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_VERSION_H_


================================================
FILE: sherpa-onnx/csrc/vocoder.cc
================================================
// sherpa-onnx/csrc/vocoder.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/vocoder.h"

#include <memory>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/hifigan-vocoder.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/vocos-vocoder.h"

namespace sherpa_onnx {

namespace {

enum class ModelType : std::uint8_t {
  kHifigan,
  kVocoos,
  kUnknown,
};

}  // namespace

static ModelType GetModelType(char *model_data, size_t model_data_length,
                              bool debug) {
  Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  auto sess = std::make_unique<Ort::Session>(env, model_data, model_data_length,
                                             sess_opts);

  Ort::ModelMetadata meta_data = sess->GetModelMetadata();
  if (debug) {
    std::ostringstream os;
    PrintModelMetadata(os, meta_data);
#if __OHOS__
    SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
    SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
  }

  Ort::AllocatorWithDefaultOptions allocator;
  auto model_type =
      LookupCustomModelMetaData(meta_data, "model_type", allocator);
  if (model_type.empty()) {
    SHERPA_ONNX_LOGE(
        "No model_type in the metadata!\n"
        "Please make sure you are using the vocoder from "
        "https://github.com/k2-fsa/sherpa-onnx/releases/tag/vocoder-models");
    return ModelType::kUnknown;
  }

  if (model_type == "hifigan") {
    return ModelType::kHifigan;
  } else if (model_type == "vocos" || model_type == "matcha-tts vocos") {
    return ModelType::kVocoos;
  } else {
    SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
    return ModelType::kUnknown;
  }
}

std::unique_ptr<Vocoder> Vocoder::Create(const OfflineTtsModelConfig &config) {
  std::vector<char> buffer;
  if (!config.matcha.vocoder.empty()) {
    buffer = ReadFile(config.matcha.vocoder);
  } else if (!config.zipvoice.vocoder.empty()) {
    buffer = ReadFile(config.zipvoice.vocoder);
  } else {
    SHERPA_ONNX_LOGE("No vocoder model provided in the config!");
    SHERPA_ONNX_EXIT(-1);
  }
  auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug);

  switch (model_type) {
    case ModelType::kHifigan:
      return std::make_unique<HifiganVocoder>(
          config.num_threads, config.provider, config.matcha.vocoder);
    case ModelType::kVocoos:
      return std::make_unique<VocosVocoder>(config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE("Unknown model type in vocoder!");
      return nullptr;
  }

  return nullptr;
}

template <typename Manager>
std::unique_ptr<Vocoder> Vocoder::Create(Manager *mgr,
                                         const OfflineTtsModelConfig &config) {
  std::vector<char> buffer;
  if (!config.matcha.vocoder.empty()) {
    SHERPA_ONNX_LOGE("Using matcha vocoder: %s", config.matcha.vocoder.c_str());
    buffer = ReadFile(mgr, config.matcha.vocoder);
  } else if (!config.zipvoice.vocoder.empty()) {
    SHERPA_ONNX_LOGE("Using zipvoice vocoder: %s",
                     config.zipvoice.vocoder.c_str());
    buffer = ReadFile(mgr, config.zipvoice.vocoder);
  } else {
    SHERPA_ONNX_LOGE("No vocoder model provided in the config!");
    return nullptr;
  }

  auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug);

  switch (model_type) {
    case ModelType::kHifigan:
      return std::make_unique<HifiganVocoder>(
          mgr, config.num_threads, config.provider, config.matcha.vocoder);
    case ModelType::kVocoos:
      return std::make_unique<VocosVocoder>(mgr, config);
    case ModelType::kUnknown:
      SHERPA_ONNX_LOGE("Unknown model type in vocoder!");
      return nullptr;
  }
}

#if __ANDROID_API__ >= 9
template std::unique_ptr<Vocoder> Vocoder::Create(
    AAssetManager *mgr, const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template std::unique_ptr<Vocoder> Vocoder::Create(
    NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/vocoder.h
================================================
// sherpa-onnx/csrc/vocoder.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_VOCODER_H_
#define SHERPA_ONNX_CSRC_VOCODER_H_

#include <memory>
#include <string>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-model-config.h"

namespace sherpa_onnx {

class Vocoder {
 public:
  virtual ~Vocoder() = default;

  static std::unique_ptr<Vocoder> Create(const OfflineTtsModelConfig &config);

  template <typename Manager>
  static std::unique_ptr<Vocoder> Create(Manager *mgr,
                                         const OfflineTtsModelConfig &config);

  /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames).
   *  @return Return a float32 vector containing audio samples..
   */
  virtual std::vector<float> Run(Ort::Value mel) const = 0;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_VOCODER_H_


================================================
FILE: sherpa-onnx/csrc/vocos-vocoder.cc
================================================
// sherpa-onnx/csrc/vocos-vocoder.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/vocos-vocoder.h"

#include <memory>
#include <string>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "kaldi-native-fbank/csrc/istft.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/session.h"

namespace sherpa_onnx {

struct VocosModelMetaData {
  int32_t n_fft;
  int32_t hop_length;
  int32_t win_length;
  int32_t center;
  int32_t normalized;
  std::string window_type;
  std::string pad_mode;
};

class VocosVocoder::Impl {
 public:
  explicit Impl(const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config.num_threads, config.provider)),
        allocator_{} {
    std::vector<char> buffer;
    if (!config.matcha.vocoder.empty()) {
      buffer = ReadFile(config.matcha.vocoder);
    } else if (!config.zipvoice.vocoder.empty()) {
      buffer = ReadFile(config.zipvoice.vocoder);
    } else {
      SHERPA_ONNX_LOGE("No vocoder model provided in the config!");
      SHERPA_ONNX_EXIT(-1);
    }
    Init(buffer.data(), buffer.size());
  }

  template <typename Manager>
  explicit Impl(Manager *mgr, const OfflineTtsModelConfig &config)
      : config_(config),
        env_(ORT_LOGGING_LEVEL_ERROR),
        sess_opts_(GetSessionOptions(config.num_threads, config.provider)),
        allocator_{} {
    std::vector<char> buffer;
    if (!config.matcha.vocoder.empty()) {
      buffer = ReadFile(mgr, config.matcha.vocoder);
    } else if (!config.zipvoice.vocoder.empty()) {
      buffer = ReadFile(mgr, config.zipvoice.vocoder);
    } else {
      SHERPA_ONNX_LOGE("No vocoder model provided in the config!");
      SHERPA_ONNX_EXIT(-1);
    }
    Init(buffer.data(), buffer.size());
  }

  std::vector<float> Run(Ort::Value mel) const {
    auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1,
                          output_names_ptr_.data(), output_names_ptr_.size());

    std::vector<int64_t> shape = out[0].GetTensorTypeAndShapeInfo().GetShape();

    if (shape[0] != 1) {
      SHERPA_ONNX_LOGE("Support only batch size 1, given: %d",
                       static_cast<int32_t>(shape[0]));
      SHERPA_ONNX_EXIT(-1);
    }

    knf::StftResult stft_result;
    stft_result.num_frames = shape[2];
    stft_result.real.resize(shape[1] * shape[2]);
    stft_result.imag.resize(shape[1] * shape[2]);

    // stft_result.real: (num_frames, n_fft/2+1), flattened in row major

    // mag.shape: (batch_size, n_fft/2+1, num_frames)
    const float *p_mag = out[0].GetTensorData<float>();
    const float *p_x = out[1].GetTensorData<float>();
    const float *p_y = out[2].GetTensorData<float>();

    for (int32_t frame_index = 0; frame_index < static_cast<int32_t>(shape[2]);
         ++frame_index) {
      for (int32_t bin = 0; bin < static_cast<int32_t>(shape[1]); ++bin) {
        stft_result.real[frame_index * shape[1] + bin] =
            p_mag[bin * shape[2] + frame_index] *
            p_x[bin * shape[2] + frame_index];
        stft_result.imag[frame_index * shape[1] + bin] =
            p_mag[bin * shape[2] + frame_index] *
            p_y[bin * shape[2] + frame_index];
      }
    }

    knf::StftConfig stft_config;
    stft_config.n_fft = meta_.n_fft;
    stft_config.hop_length = meta_.hop_length;
    stft_config.win_length = meta_.win_length;
    stft_config.normalized = meta_.normalized;
    stft_config.center = meta_.center;
    stft_config.window_type = meta_.window_type;
    stft_config.pad_mode = meta_.pad_mode;

    knf::IStft istft(stft_config);
    return istft.Compute(stft_result);
  }

 private:
  void Init(void *model_data, size_t model_data_length) {
    sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
                                           sess_opts_);

    GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);

    GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);

    // get meta data
    Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
    if (config_.debug) {
      std::ostringstream os;
      os << "---Vocos model---\n";
      PrintModelMetadata(os, meta_data);

      os << "----------input names----------\n";
      int32_t i = 0;
      for (const auto &s : input_names_) {
        os << i << " " << s << "\n";
        ++i;
      }
      os << "----------output names----------\n";
      i = 0;
      for (const auto &s : output_names_) {
        os << i << " " << s << "\n";
        ++i;
      }

#if __OHOS__
      SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
#else
      SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
#endif
    }

    Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_.n_fft, "n_fft", 1024);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_.hop_length, "hop_length",
                                            256);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_.win_length, "win_length",
                                            1024);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_.center, "center", 1);
    SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_.normalized, "normalized", 0);
    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_.window_type,
                                                "window_type", "hann");
    SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_.pad_mode, "pad_mode",
                                                "reflect");
  }

 private:
  OfflineTtsModelConfig config_;
  VocosModelMetaData meta_;

  Ort::Env env_;
  Ort::SessionOptions sess_opts_;
  Ort::AllocatorWithDefaultOptions allocator_;

  std::unique_ptr<Ort::Session> sess_;

  std::vector<std::string> input_names_;
  std::vector<const char *> input_names_ptr_;

  std::vector<std::string> output_names_;
  std::vector<const char *> output_names_ptr_;
};

VocosVocoder::VocosVocoder(const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(config)) {}

template <typename Manager>
VocosVocoder::VocosVocoder(Manager *mgr, const OfflineTtsModelConfig &config)
    : impl_(std::make_unique<Impl>(mgr, config)) {}

VocosVocoder::~VocosVocoder() = default;

std::vector<float> VocosVocoder::Run(Ort::Value mel) const {
  return impl_->Run(std::move(mel));
}

#if __ANDROID_API__ >= 9
template VocosVocoder::VocosVocoder(AAssetManager *mgr,
                                    const OfflineTtsModelConfig &config);
#endif

#if __OHOS__
template VocosVocoder::VocosVocoder(NativeResourceManager *mgr,
                                    const OfflineTtsModelConfig &config);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/vocos-vocoder.h
================================================
// sherpa-onnx/csrc/vocos-vocoder.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_
#define SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_

#include <memory>
#include <string>
#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/offline-tts-model-config.h"
#include "sherpa-onnx/csrc/vocoder.h"

namespace sherpa_onnx {

class VocosVocoder : public Vocoder {
 public:
  ~VocosVocoder() override;

  explicit VocosVocoder(const OfflineTtsModelConfig &config);

  template <typename Manager>
  VocosVocoder(Manager *mgr, const OfflineTtsModelConfig &config);

  /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames).
   *  @return Return a float32 tensor of shape (batch_size, num_samples).
   */
  std::vector<float> Run(Ort::Value mel) const override;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_


================================================
FILE: sherpa-onnx/csrc/voice-activity-detector.cc
================================================
// sherpa-onnx/csrc/voice-activity-detector.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/voice-activity-detector.h"

#include <algorithm>
#include <memory>
#include <queue>
#include <utility>
#include <vector>

#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif

#include "sherpa-onnx/csrc/circular-buffer.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/vad-model.h"

namespace sherpa_onnx {

class VoiceActivityDetector::Impl {
 public:
  explicit Impl(const VadModelConfig &config, float buffer_size_in_seconds = 60)
      : model_(VadModel::Create(config)),
        config_(config),
        buffer_(buffer_size_in_seconds * config.sample_rate) {
    Init();
  }

  template <typename Manager>
  Impl(Manager *mgr, const VadModelConfig &config,
       float buffer_size_in_seconds = 60)
      : model_(VadModel::Create(mgr, config)),
        config_(config),
        buffer_(buffer_size_in_seconds * config.sample_rate) {
    Init();
  }

  float Compute(const float *samples, int32_t n) {
    return model_->Compute(samples, n);
  }

  void AcceptWaveform(const float *samples, int32_t n) {
    if (buffer_.Size() > max_utterance_length_) {
      model_->SetMinSilenceDuration(new_min_silence_duration_s_);
      model_->SetThreshold(new_threshold_);
    } else {
      if (!config_.silero_vad.model.empty()) {
        model_->SetMinSilenceDuration(config_.silero_vad.min_silence_duration);
        model_->SetThreshold(config_.silero_vad.threshold);
      } else if (!config_.ten_vad.model.empty()) {
        model_->SetMinSilenceDuration(config_.ten_vad.min_silence_duration);
        model_->SetThreshold(config_.ten_vad.threshold);
      } else {
        SHERPA_ONNX_LOGE("Unknown vad model");
        SHERPA_ONNX_EXIT(-1);
      }
    }

    int32_t window_size = model_->WindowSize();
    int32_t window_shift = model_->WindowShift();

    // note n is usually window_size and there is no need to use
    // an extra buffer here
    last_.insert(last_.end(), samples, samples + n);

    if (last_.size() < window_size) {
      return;
    }

    // Note: For v4, window_shift == window_size
    int32_t k =
        (static_cast<int32_t>(last_.size()) - window_size) / window_shift + 1;
    const float *p = last_.data();
    bool is_speech = false;

    for (int32_t i = 0; i < k; ++i, p += window_shift) {
      buffer_.Push(p, window_shift);
      // NOTE(fangjun): Please don't use a very large n.
      bool this_window_is_speech = model_->IsSpeech(p, window_size);
      is_speech = is_speech || this_window_is_speech;
    }

    last_ = std::vector<float>(
        p, static_cast<const float *>(last_.data()) + last_.size());

    if (is_speech) {
      if (start_ == -1) {
        // beginning of speech
        start_ = std::max(buffer_.Tail() - 2 * model_->WindowSize() -
                              model_->MinSpeechDurationSamples(),
                          buffer_.Head());
        cur_segment_.start = start_;
      }
      int32_t num_samples = buffer_.Tail() - start_ - 1;
      cur_segment_.samples = buffer_.Get(start_, num_samples);
    } else {
      // non-speech

      cur_segment_.start = -1;
      cur_segment_.samples.clear();

      if (start_ != -1 && buffer_.Size()) {
        // end of speech, save the speech segment
        int32_t end = buffer_.Tail() - model_->MinSilenceDurationSamples();

        std::vector<float> s = buffer_.Get(start_, end - start_);
        SpeechSegment segment;

        segment.start = start_;
        segment.samples = std::move(s);

        segments_.push(std::move(segment));

        buffer_.Pop(end - buffer_.Head());
      }

      if (start_ == -1) {
        int32_t end = buffer_.Tail() - 2 * model_->WindowSize() -
                      model_->MinSpeechDurationSamples();
        int32_t n = std::max(0, end - buffer_.Head());
        if (n > 0) {
          buffer_.Pop(n);
        }
      }

      start_ = -1;
    }
  }

  bool Empty() const { return segments_.empty(); }

  void Pop() { segments_.pop(); }

  void Clear() { std::queue<SpeechSegment>().swap(segments_); }

  const SpeechSegment &Front() const {
    static SpeechSegment tmp;

    if (Empty()) {
      SHERPA_ONNX_LOGE(
          "Make sure you call this method only when Empty() returns false; "
          "Return an empty segment");
      return tmp;
    }

    return segments_.front();
  }

  void Reset() {
    std::queue<SpeechSegment>().swap(segments_);

    model_->Reset();
    buffer_.Reset();
    last_.clear();

    start_ = -1;

    cur_segment_.start = -1;
    cur_segment_.samples.clear();
  }

  void Flush() {
    if (start_ == -1 || buffer_.Size() == 0) {
      return;
    }

    int32_t end = buffer_.Tail();
    if (end <= start_) {
      return;
    }

    std::vector<float> s = buffer_.Get(start_, end - start_);

    SpeechSegment segment;

    segment.start = start_;
    segment.samples = std::move(s);

    segments_.push(std::move(segment));

    buffer_.Pop(end - buffer_.Head());
    start_ = -1;

    cur_segment_.start = -1;
    cur_segment_.samples.clear();
  }

  bool IsSpeechDetected() const { return start_ != -1; }

  SpeechSegment CurrentSpeechSegment() const { return cur_segment_; }

  const VadModelConfig &GetConfig() const { return config_; }

 private:
  void Init() {
    if (!config_.silero_vad.model.empty()) {
      max_utterance_length_ =
          config_.sample_rate * config_.silero_vad.max_speech_duration;
    } else if (!config_.ten_vad.model.empty()) {
      max_utterance_length_ =
          config_.sample_rate * config_.ten_vad.max_speech_duration;
    } else {
      SHERPA_ONNX_LOGE("Unsupported VAD model");
      SHERPA_ONNX_EXIT(-1);
    }
  }

 private:
  std::queue<SpeechSegment> segments_;

  // it is empty if no speech is detected
  SpeechSegment cur_segment_;

  std::unique_ptr<VadModel> model_;
  VadModelConfig config_;
  CircularBuffer buffer_;
  std::vector<float> last_;

  int max_utterance_length_ = -1;  // in samples
  float new_min_silence_duration_s_ = 0.1;
  float new_threshold_ = 0.90;

  int32_t start_ = -1;
};

VoiceActivityDetector::VoiceActivityDetector(
    const VadModelConfig &config, float buffer_size_in_seconds /*= 60*/)
    : impl_(std::make_unique<Impl>(config, buffer_size_in_seconds)) {}

template <typename Manager>
VoiceActivityDetector::VoiceActivityDetector(
    Manager *mgr, const VadModelConfig &config,
    float buffer_size_in_seconds /*= 60*/)
    : impl_(std::make_unique<Impl>(mgr, config, buffer_size_in_seconds)) {}

VoiceActivityDetector::~VoiceActivityDetector() = default;

void VoiceActivityDetector::AcceptWaveform(const float *samples, int32_t n) {
  impl_->AcceptWaveform(samples, n);
}

bool VoiceActivityDetector::Empty() const { return impl_->Empty(); }

void VoiceActivityDetector::Pop() { impl_->Pop(); }

void VoiceActivityDetector::Clear() { impl_->Clear(); }

const SpeechSegment &VoiceActivityDetector::Front() const {
  return impl_->Front();
}

void VoiceActivityDetector::Reset() const { impl_->Reset(); }

void VoiceActivityDetector::Flush() const { impl_->Flush(); }

bool VoiceActivityDetector::IsSpeechDetected() const {
  return impl_->IsSpeechDetected();
}

SpeechSegment VoiceActivityDetector::CurrentSpeechSegment() const {
  return impl_->CurrentSpeechSegment();
}

const VadModelConfig &VoiceActivityDetector::GetConfig() const {
  return impl_->GetConfig();
}

float VoiceActivityDetector::Compute(const float *samples, int32_t n) {
  return impl_->Compute(samples, n);
}

#if __ANDROID_API__ >= 9
template VoiceActivityDetector::VoiceActivityDetector(
    AAssetManager *mgr, const VadModelConfig &config,
    float buffer_size_in_seconds = 60);
#endif

#if __OHOS__
template VoiceActivityDetector::VoiceActivityDetector(
    NativeResourceManager *mgr, const VadModelConfig &config,
    float buffer_size_in_seconds = 60);
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/voice-activity-detector.h
================================================
// sherpa-onnx/csrc/voice-activity-detector.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_VOICE_ACTIVITY_DETECTOR_H_
#define SHERPA_ONNX_CSRC_VOICE_ACTIVITY_DETECTOR_H_

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/vad-model-config.h"

namespace sherpa_onnx {

struct SpeechSegment {
  int32_t start;  // in samples
  std::vector<float> samples;
};

class VoiceActivityDetector {
 public:
  explicit VoiceActivityDetector(const VadModelConfig &config,
                                 float buffer_size_in_seconds = 60);

  template <typename Manager>
  VoiceActivityDetector(Manager *mgr, const VadModelConfig &config,
                        float buffer_size_in_seconds = 60);

  ~VoiceActivityDetector();

  void AcceptWaveform(const float *samples, int32_t n);
  float Compute(const float *samples, int32_t n);

  bool Empty() const;
  void Pop();
  void Clear();

  // It is an error to call Front() if Empty() returns true.
  //
  // The returned reference is valid until the next call to any
  // methods of VoiceActivityDetector.
  const SpeechSegment &Front() const;

  bool IsSpeechDetected() const;

  // It is empty if IsSpeechDetected() returns false
  SpeechSegment CurrentSpeechSegment() const;

  void Reset() const;

  // At the end of the utterance, you can invoke this method so that
  // the last speech segment can be detected.
  void Flush() const;

  const VadModelConfig &GetConfig() const;

 private:
  class Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_VOICE_ACTIVITY_DETECTOR_H_


================================================
FILE: sherpa-onnx/csrc/wave-reader-test.cc
================================================
// sherpa-onnx/csrc/wave-reader-test.cc
//
// Copyright (c)  2025  Posit Software, PBC

#include "sherpa-onnx/csrc/wave-reader.h"

#include <cstdio>
#include <fstream>
#include <string>
#include <vector>

#if defined(_WIN32)
#include <windows.h>
#else
#include <unistd.h>
#endif

#include "gtest/gtest.h"

namespace sherpa_onnx {

// RAII helper class for managing temporary test files
class TempFile {
 public:
  TempFile() : TempFile("") {}

  explicit TempFile(const std::string &suffix) {
#if defined(_WIN32)
    char temp_path[MAX_PATH];
    char temp_file[MAX_PATH];
    GetTempPathA(MAX_PATH, temp_path);
    GetTempFileNameA(temp_path, "sot", 0, temp_file);
    path_ = temp_file;
    if (!suffix.empty()) {
      path_ += suffix;
      std::remove(temp_file);  // Remove the file without suffix
    }
#else
    char temp_template[] = "/tmp/sherpa_onnx_test_XXXXXX";
    int fd = mkstemp(temp_template);
    if (fd != -1) {
      close(fd);
      path_ = temp_template;
      if (!suffix.empty()) {
        path_ += suffix;
        std::remove(temp_template);  // Remove the file without suffix
      }
    }
#endif
  }

  ~TempFile() {
    if (!path_.empty()) {
      std::remove(path_.c_str());
    }
  }

  const char *path() const { return path_.c_str(); }

 private:
  std::string path_;
};

TEST(WaveReader, TestNonWavFile) {
  // Create a temporary file with non-WAV content (e.g., webm-like header)
  TempFile temp_file(".webm");

  {
    std::ofstream out(temp_file.path(), std::ios::binary);
    // Write some content that doesn't start with RIFF
    // (webm files typically start with EBML header: 0x1a45dfa3)
    const unsigned char webm_header[] = {
        0x1a, 0x45, 0xdf, 0xa3,  // EBML header signature (NOT RIFF)
        0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x42, 0x86, 0x81, 0x01,
        // Add some more bytes to make it look like a real file
        0x42, 0xf7, 0x81, 0x01, 0x42, 0xf2, 0x81, 0x04, 'w', 'e', 'b', 'm'};
    out.write(reinterpret_cast<const char *>(webm_header), sizeof(webm_header));
  }

  // Test C++ API - should not segfault
  int32_t sample_rate = -1;
  bool is_ok = false;
  std::vector<float> samples = ReadWave(temp_file.path(), &sample_rate, &is_ok);

  EXPECT_FALSE(is_ok);
  EXPECT_TRUE(samples.empty());
  EXPECT_EQ(sample_rate, -1);
}

TEST(WaveReader, TestNonExistentFile) {
  // Generate a unique path but don't create the file
  TempFile temp_file(".wav");

  // Test C++ API - should not segfault
  int32_t sample_rate = -1;
  bool is_ok = false;
  std::vector<float> samples = ReadWave(temp_file.path(), &sample_rate, &is_ok);

  EXPECT_FALSE(is_ok);
  EXPECT_TRUE(samples.empty());
  EXPECT_EQ(sample_rate, -1);
}

TEST(WaveReader, TestTruncatedWaveFile) {
  // Create a temporary file with truncated WAV header
  TempFile temp_file(".wav");

  {
    std::ofstream out(temp_file.path(), std::ios::binary);
    // Write only partial WAV header (less than 44 bytes required)
    const unsigned char partial_wav[] = {
        'R',  'I',  'F',
        'F',  // chunk_id
        0x00, 0x00, 0x00,
        0x00,  // chunk_size
        'W',  'A',  'V',
        'E'  // format
             // Missing the rest of the header
    };
    out.write(reinterpret_cast<const char *>(partial_wav), sizeof(partial_wav));
  }

  // Test C++ API - should not segfault
  int32_t sample_rate = -1;
  bool is_ok = false;
  std::vector<float> samples = ReadWave(temp_file.path(), &sample_rate, &is_ok);

  EXPECT_FALSE(is_ok);
  EXPECT_TRUE(samples.empty());
  EXPECT_EQ(sample_rate, -1);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/wave-reader.cc
================================================
// sherpa-onnx/csrc/wave-reader.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/wave-reader.h"

#include <cassert>
#include <cstdint>
#include <fstream>
#include <string>
#include <utility>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {
namespace {
// see http://soundfile.sapp.org/doc/WaveFormat/
//
// Note: We assume little endian here
// TODO(fangjun): Support big endian
struct WaveHeader {
  // See
  // https://en.wikipedia.org/wiki/WAV#Metadata
  // and
  // https://www.robotplanet.dk/audio/wav_meta_data/riff_mci.pdf
  void SeekToDataChunk(std::istream &is) {
    //                              a t a d
    while (is && subchunk2_id != 0x61746164) {
      // const char *p = reinterpret_cast<const char *>(&subchunk2_id);
      // printf("Skip chunk (%x): %c%c%c%c of size: %d\n", subchunk2_id, p[0],
      //        p[1], p[2], p[3], subchunk2_size);
      is.seekg(subchunk2_size, std::istream::cur);
      is.read(reinterpret_cast<char *>(&subchunk2_id), sizeof(int32_t));
      is.read(reinterpret_cast<char *>(&subchunk2_size), sizeof(int32_t));
    }
  }

  int32_t chunk_id;
  int32_t chunk_size;
  int32_t format;
  int32_t subchunk1_id;
  int32_t subchunk1_size;
  int16_t audio_format;
  int16_t num_channels;
  int32_t sample_rate;
  int32_t byte_rate;
  int16_t block_align;
  int16_t bits_per_sample;
  int32_t subchunk2_id;    // a tag of this chunk
  int32_t subchunk2_size;  // size of subchunk2
};
static_assert(sizeof(WaveHeader) == 44);

/*
sox int16-1-channel-zh.wav -b 8 int8-1-channel-zh.wav

sox int16-1-channel-zh.wav -c 2 int16-2-channel-zh.wav

we use audacity to generate int32-1-channel-zh.wav and float32-1-channel-zh.wav
because sox uses WAVE_FORMAT_EXTENSIBLE, which is not easy to support
in sherpa-onnx.
 */

// Read a wave file of mono-channel.
// Return its samples normalized to the range [-1, 1).
std::vector<std::vector<float>> ReadWaveImpl(std::istream &is,
                                             int32_t *sampling_rate,
                                             bool *is_ok) {
  WaveHeader header{};
  is.read(reinterpret_cast<char *>(&header.chunk_id), sizeof(header.chunk_id));

  //                        F F I R
  if (header.chunk_id != 0x46464952) {
    SHERPA_ONNX_LOGE("Expected chunk_id RIFF. Given: 0x%08x\n",
                     header.chunk_id);
    *is_ok = false;
    return {};
  }

  is.read(reinterpret_cast<char *>(&header.chunk_size),
          sizeof(header.chunk_size));

  is.read(reinterpret_cast<char *>(&header.format), sizeof(header.format));

  //                      E V A W
  if (header.format != 0x45564157) {
    SHERPA_ONNX_LOGE("Expected format WAVE. Given: 0x%08x\n", header.format);
    *is_ok = false;
    return {};
  }

  is.read(reinterpret_cast<char *>(&header.subchunk1_id),
          sizeof(header.subchunk1_id));

  is.read(reinterpret_cast<char *>(&header.subchunk1_size),
          sizeof(header.subchunk1_size));

  if (header.subchunk1_id == 0x4b4e554a) {
    // skip junk padding
    is.seekg(header.subchunk1_size, std::istream::cur);

    is.read(reinterpret_cast<char *>(&header.subchunk1_id),
            sizeof(header.subchunk1_id));

    is.read(reinterpret_cast<char *>(&header.subchunk1_size),
            sizeof(header.subchunk1_size));
  }

  if (header.subchunk1_id != 0x20746d66) {
    SHERPA_ONNX_LOGE("Expected subchunk1_id 0x20746d66. Given: 0x%08x\n",
                     header.subchunk1_id);
    *is_ok = false;
    return {};
  }

  // NAudio uses 18
  // See https://github.com/naudio/NAudio/issues/1132
  if (header.subchunk1_size != 16 &&
      header.subchunk1_size != 18) {  // 16 for PCM
    SHERPA_ONNX_LOGE("Expected subchunk1_size 16. Given: %d\n",
                     header.subchunk1_size);
    *is_ok = false;
    return {};
  }

  is.read(reinterpret_cast<char *>(&header.audio_format),
          sizeof(header.audio_format));

  if (header.audio_format != 1 && header.audio_format != 3) {
    // 1 for integer PCM
    // 3 for floating point PCM
    // see https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
    // and https://github.com/microsoft/DirectXTK/wiki/Wave-Formats
    SHERPA_ONNX_LOGE("Expected audio_format 1. Given: %d\n",
                     header.audio_format);

    if (header.audio_format == static_cast<int16_t>(0xfffe)) {
      SHERPA_ONNX_LOGE("We don't support WAVE_FORMAT_EXTENSIBLE files.");
    }

    *is_ok = false;
    return {};
  }

  is.read(reinterpret_cast<char *>(&header.num_channels),
          sizeof(header.num_channels));

  is.read(reinterpret_cast<char *>(&header.sample_rate),
          sizeof(header.sample_rate));

  is.read(reinterpret_cast<char *>(&header.byte_rate),
          sizeof(header.byte_rate));

  is.read(reinterpret_cast<char *>(&header.block_align),
          sizeof(header.block_align));

  is.read(reinterpret_cast<char *>(&header.bits_per_sample),
          sizeof(header.bits_per_sample));

  if (header.byte_rate !=
      (header.sample_rate * header.num_channels * header.bits_per_sample / 8)) {
    SHERPA_ONNX_LOGE("Incorrect byte rate: %d. Expected: %d", header.byte_rate,
                     (header.sample_rate * header.num_channels *
                      header.bits_per_sample / 8));
    *is_ok = false;
    return {};
  }

  if (header.block_align !=
      (header.num_channels * header.bits_per_sample / 8)) {
    SHERPA_ONNX_LOGE("Incorrect block align: %d. Expected: %d\n",
                     header.block_align,
                     (header.num_channels * header.bits_per_sample / 8));
    *is_ok = false;
    return {};
  }

  if (header.bits_per_sample != 8 && header.bits_per_sample != 16 &&
      header.bits_per_sample != 32) {
    SHERPA_ONNX_LOGE("Expected bits_per_sample 8, 16 or 32. Given: %d\n",
                     header.bits_per_sample);
    *is_ok = false;
    return {};
  }

  if (header.subchunk1_size == 18) {
    // this is for NAudio. It puts extra bytes after bits_per_sample
    // See
    // https://github.com/naudio/NAudio/blob/master/NAudio.Core/Wave/WaveFormats/WaveFormat.cs#L223

    int16_t extra_size = -1;
    is.read(reinterpret_cast<char *>(&extra_size), sizeof(int16_t));
    if (extra_size != 0) {
      SHERPA_ONNX_LOGE(
          "Extra size should be 0 for wave from NAudio. Current extra size "
          "%d\n",
          extra_size);
      *is_ok = false;
      return {};
    }
  }

  is.read(reinterpret_cast<char *>(&header.subchunk2_id),
          sizeof(header.subchunk2_id));

  is.read(reinterpret_cast<char *>(&header.subchunk2_size),
          sizeof(header.subchunk2_size));

  header.SeekToDataChunk(is);
  if (!is) {
    *is_ok = false;
    return {};
  }

  *sampling_rate = header.sample_rate;

  std::vector<std::vector<float>> ans(header.num_channels);

  if (header.bits_per_sample == 16 && header.audio_format == 1) {
    // header.subchunk2_size contains the number of bytes in the data.
    // As we assume each sample contains two bytes, so it is divided by 2 here
    std::vector<int16_t> samples(header.subchunk2_size / 2);

    is.read(reinterpret_cast<char *>(samples.data()),
            samples.size() * sizeof(int16_t));
    if (!is) {
      SHERPA_ONNX_LOGE("Failed to read %d bytes", header.subchunk2_size);
      *is_ok = false;
      return {};
    }

    for (auto &v : ans) {
      v.resize(samples.size() / header.num_channels);
    }

    // samples are interleaved
    for (int32_t i = 0, k = 0; i < static_cast<int32_t>(samples.size());
         i += header.num_channels, ++k) {
      for (int32_t c = 0; c != header.num_channels; ++c) {
        ans[c][k] = samples[i + c] / 32768.;
      }
    }
  } else if (header.bits_per_sample == 8 && header.audio_format == 1) {
    // number of samples == number of bytes for 8-bit encoded samples
    //
    // For 8-bit encoded samples, they are unsigned!
    std::vector<uint8_t> samples(header.subchunk2_size);

    is.read(reinterpret_cast<char *>(samples.data()), header.subchunk2_size);
    if (!is) {
      SHERPA_ONNX_LOGE("Failed to read %d bytes", header.subchunk2_size);
      *is_ok = false;
      return {};
    }

    for (auto &v : ans) {
      v.resize(samples.size() / header.num_channels);
    }

    // samples are interleaved
    for (int32_t i = 0, k = 0; i < static_cast<int32_t>(samples.size());
         i += header.num_channels, ++k) {
      for (int32_t c = 0; c != header.num_channels; ++c) {
        // Note(fangjun): We want to normalize each sample into the range [-1,
        // 1] Since each original sample is in the range [0, 256], dividing them
        // by 128 converts them to the range [0, 2]; so after subtracting 1, we
        // get the range [-1, 1]
        //
        ans[c][k] = samples[i + c] / 128. - 1;
      }
    }
  } else if (header.bits_per_sample == 32 && header.audio_format == 1) {
    // 32 here is for int32
    //
    // header.subchunk2_size contains the number of bytes in the data.
    // As we assume each sample contains 4 bytes, so it is divided by 4 here
    std::vector<int32_t> samples(header.subchunk2_size / 4);

    is.read(reinterpret_cast<char *>(samples.data()), header.subchunk2_size);
    if (!is) {
      SHERPA_ONNX_LOGE("Failed to read %d bytes", header.subchunk2_size);
      *is_ok = false;
      return {};
    }

    for (auto &v : ans) {
      v.resize(samples.size() / header.num_channels);
    }

    // samples are interleaved
    for (int32_t i = 0, k = 0; i < static_cast<int32_t>(samples.size());
         i += header.num_channels, ++k) {
      for (int32_t c = 0; c != header.num_channels; ++c) {
        ans[c][k] = static_cast<float>(samples[i + c]) / (1 << 31);
      }
    }
  } else if (header.bits_per_sample == 32 && header.audio_format == 3) {
    // 32 here is for float32
    //
    // header.subchunk2_size contains the number of bytes in the data.
    // As we assume each sample contains 4 bytes, so it is divided by 4 here
    std::vector<float> samples(header.subchunk2_size / 4);

    is.read(reinterpret_cast<char *>(samples.data()), header.subchunk2_size);
    if (!is) {
      SHERPA_ONNX_LOGE("Failed to read %d bytes", header.subchunk2_size);
      *is_ok = false;
      return {};
    }

    for (auto &v : ans) {
      v.resize(samples.size() / header.num_channels);
    }

    // samples are interleaved
    for (int32_t i = 0, k = 0; i < static_cast<int32_t>(samples.size());
         i += header.num_channels, ++k) {
      for (int32_t c = 0; c != header.num_channels; ++c) {
        ans[c][k] = samples[i + c];
      }
    }
  } else {
    SHERPA_ONNX_LOGE(
        "Unsupported %d bits per sample and audio format: %d. Supported values "
        "are: 8, 16, 32.",
        header.bits_per_sample, header.audio_format);
    *is_ok = false;
    return {};
  }

  *is_ok = true;
  return ans;
}

}  // namespace

std::vector<float> ReadWave(const std::string &filename, int32_t *sampling_rate,
                            bool *is_ok) {
  *is_ok = false;
  if (filename.empty()) {
    SHERPA_ONNX_LOGE("Filename is empty");
    return {};
  }

  if (!FileExists(filename)) {
    SHERPA_ONNX_LOGE("Filename '%s' does not exist", filename.c_str());
    return {};
  }

  std::ifstream is(filename, std::ifstream::binary);
  return ReadWave(is, sampling_rate, is_ok);
}

std::vector<float> ReadWave(std::istream &is, int32_t *sampling_rate,
                            bool *is_ok) {
  auto samples = ReadWaveImpl(is, sampling_rate, is_ok);

  if (!*is_ok || samples.empty()) {
    return {};
  }

  if (samples.size() > 1) {
    SHERPA_ONNX_LOGE(
        "Warning: %d channels are found. We only use the first channel.\n",
        static_cast<int32_t>(samples.size()));
  }

  return samples[0];
}

std::vector<std::vector<float>> ReadWaveMultiChannel(std::istream &is,
                                                     int32_t *sampling_rate,
                                                     bool *is_ok) {
  auto samples = ReadWaveImpl(is, sampling_rate, is_ok);
  return samples;
}

std::vector<std::vector<float>> ReadWaveMultiChannel(
    const std::string &filename, int32_t *sampling_rate, bool *is_ok) {
  std::ifstream is(filename, std::ifstream::binary);
  return ReadWaveMultiChannel(is, sampling_rate, is_ok);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/wave-reader.h
================================================
// sherpa-onnx/csrc/wave-reader.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_WAVE_READER_H_
#define SHERPA_ONNX_CSRC_WAVE_READER_H_

#include <istream>
#include <string>
#include <vector>

namespace sherpa_onnx {

/** Read a wave file with expected sample rate.

    @param filename Path to a wave file. It MUST be single channel, 16-bit
                    PCM encoded.
    @param sampling_rate  On return, it contains the sampling rate of the file.
    @param is_ok On return it is true if the reading succeeded; false otherwise.

    @return Return wave samples normalized to the range [-1, 1).
 */
std::vector<float> ReadWave(const std::string &filename, int32_t *sampling_rate,
                            bool *is_ok);

std::vector<float> ReadWave(std::istream &is, int32_t *sampling_rate,
                            bool *is_ok);

std::vector<std::vector<float>> ReadWaveMultiChannel(std::istream &is,
                                                     int32_t *sampling_rate,
                                                     bool *is_ok);

std::vector<std::vector<float>> ReadWaveMultiChannel(
    const std::string &filename, int32_t *sampling_rate, bool *is_ok);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_WAVE_READER_H_


================================================
FILE: sherpa-onnx/csrc/wave-writer.cc
================================================
// sherpa-onnx/csrc/wave-writer.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/wave-writer.h"

#include <algorithm>
#include <cstring>
#include <fstream>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {
namespace {

// see http://soundfile.sapp.org/doc/WaveFormat/
//
// Note: We assume little endian here
// TODO(fangjun): Support big endian
struct WaveHeader {
  int32_t chunk_id;
  int32_t chunk_size;
  int32_t format;
  int32_t subchunk1_id;
  int32_t subchunk1_size;
  int16_t audio_format;
  int16_t num_channels;
  int32_t sample_rate;
  int32_t byte_rate;
  int16_t block_align;
  int16_t bits_per_sample;
  int32_t subchunk2_id;    // a tag of this chunk
  int32_t subchunk2_size;  // size of subchunk2
};

}  // namespace

int64_t WaveFileSize(int32_t n_samples, int32_t num_channels /*= 1*/) {
  return sizeof(WaveHeader) + n_samples * sizeof(int16_t) * num_channels;
}

void WriteWave(char *buffer, int32_t sampling_rate, const float *samples,
               int32_t n) {
  WriteWave(buffer, sampling_rate, samples, nullptr, n);
}

bool WriteWave(const std::string &filename, int32_t sampling_rate,
               const float *samples, int32_t n) {
  return WriteWave(filename, sampling_rate, samples, nullptr, n);
}

bool WriteWave(const std::string &filename, int32_t sampling_rate,
               const float *samples_ch0, const float *samples_ch1, int32_t n) {
  std::string buffer;
  buffer.resize(WaveFileSize(n, samples_ch1 == nullptr ? 1 : 2));

  WriteWave(buffer.data(), sampling_rate, samples_ch0, samples_ch1, n);

  std::ofstream os(filename, std::ios::binary);
  if (!os) {
    SHERPA_ONNX_LOGE("Failed to create '%s'", filename.c_str());
    return false;
  }

  os << buffer;
  if (!os) {
    SHERPA_ONNX_LOGE("Write '%s' failed", filename.c_str());
    return false;
  }

  return true;
}

void WriteWave(char *buffer, int32_t sampling_rate, const float *samples_ch0,
               const float *samples_ch1, int32_t n) {
  WaveHeader header{};
  header.chunk_id = 0x46464952;      // FFIR
  header.format = 0x45564157;        // EVAW
  header.subchunk1_id = 0x20746d66;  // "fmt "
  header.subchunk1_size = 16;        // 16 for PCM
  header.audio_format = 1;           // PCM =1

  int32_t num_channels = samples_ch1 == nullptr ? 1 : 2;
  int32_t bits_per_sample = 16;  // int16_t

  header.num_channels = num_channels;
  header.sample_rate = sampling_rate;
  header.byte_rate = sampling_rate * num_channels * bits_per_sample / 8;
  header.block_align = num_channels * bits_per_sample / 8;
  header.bits_per_sample = bits_per_sample;
  header.subchunk2_id = 0x61746164;  // atad
  header.subchunk2_size = n * num_channels * bits_per_sample / 8;

  header.chunk_size = 36 + header.subchunk2_size;

  std::vector<int16_t> samples_int16_ch0(n);
  for (int32_t i = 0; i != n; ++i) {
    samples_int16_ch0[i] = std::min<int32_t>(samples_ch0[i] * 32767, 32767);
  }

  std::vector<int16_t> samples_int16_ch1;
  if (samples_ch1) {
    samples_int16_ch1.resize(n);
    for (int32_t i = 0; i != n; ++i) {
      samples_int16_ch1[i] = std::min<int32_t>(samples_ch1[i] * 32767, 32767);
    }
  }

  memcpy(buffer, &header, sizeof(WaveHeader));

  if (samples_ch1 == nullptr) {
    memcpy(buffer + sizeof(WaveHeader), samples_int16_ch0.data(),
           n * sizeof(int16_t));
  } else {
    auto p = reinterpret_cast<int16_t *>(buffer + sizeof(WaveHeader));

    for (int32_t i = 0; i != n; ++i) {
      p[2 * i] = samples_int16_ch0[i];
      p[2 * i + 1] = samples_int16_ch1[i];
    }
  }
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/csrc/wave-writer.h
================================================
// sherpa-onnx/csrc/wave-writer.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_WAVE_WRITER_H_
#define SHERPA_ONNX_CSRC_WAVE_WRITER_H_

#include <cstdint>
#include <string>

namespace sherpa_onnx {

// Write a single channel wave file.
// Note that the input samples are in the range [-1, 1]. It will be multiplied
// by 32767 and saved in int16_t format in the wave file.
//
// @param filename Path to save the samples.
// @param sampling_rate Sample rate of the samples.
// @param samples Pointer to the samples
// @param n Number of samples
// @return Return true if the write succeeds; return false otherwise.
bool WriteWave(const std::string &filename, int32_t sampling_rate,
               const float *samples, int32_t n);

void WriteWave(char *buffer, int32_t sampling_rate, const float *samples,
               int32_t n);

bool WriteWave(const std::string &filename, int32_t sampling_rate,
               const float *samples_ch0, const float *samples_ch1, int32_t n);

void WriteWave(char *buffer, int32_t sampling_rate, const float *samples_ch0,
               const float *samples_ch1, int32_t n);

int64_t WaveFileSize(int32_t n_samples, int32_t num_channels = 1);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_WAVE_WRITER_H_


================================================
FILE: sherpa-onnx/java-api/.build.txt
================================================
[win.env]
set JAVA_HOME=D:\java\jdk1.8.0_121

[win.build]
mvn clean install -DskipTests -Dgpg.skip=true

[linux.env]
export JAVA_HOME=/usr/java/jdk1.8.0_121

[linux.build]
mvn clean install -DskipTests -Dgpg.skip=true

[mac.env]
export JAVA_HOME=~/java/jdk1.8.0_121

[mac.build]
mvn clean install -DskipTests -Dgpg.skip=true

================================================
FILE: sherpa-onnx/java-api/.gitignore
================================================
### Eclipse template
*.pydevproject
.metadata
.gradle*
classes/
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
rebel.xml

# Eclipse Core
.project

generatedsources

# External tool builders
.externalToolBuilders/

# Locally stored "Eclipse launch configurations"
*.launch

# CDT-specific
.cproject

# JDT-specific (Eclipse Java Development Tools)
.classpath

# PDT-specific
.buildpath

# sbteclipse plugin
.target

# TeXlipse plugin
.texlipse


### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm

*.iml
.flattened-pom.xml
## Directory-based project format:
.idea/
# if you remove the above rule, at least ignore the following:

# User-specific stuff:
# .idea/workspace.xml
# .idea/tasks.xml
# .idea/dictionaries

# Sensitive or high-churn files:
# .idea/dataSources.ids
# .idea/dataSources.xml
# .idea/sqlDataSources.xml
# .idea/dynamic.xml
# .idea/uiDesigner.xml

# Gradle:
# .idea/gradle.xml
# .idea/libraries

# Mongo Explorer plugin:
# .idea/mongoSettings.xml

## File-based project format:
*.ipr
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties

build/

# Ignore Gradle GUI config
gradle-app.setting

# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
!gradle-wrapper.jar

db

### Java template
*.class

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
#*.jar

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*


### Leiningen template
classes/
target/
logs/
checkouts/
.lein-deps-sum
.lein-repl-history
.lein-plugins/
.lein-failures
.nrepl-port

querydsl/

.DS_Store

*.exe
*.out

*.log
node_modules/
dist/
dist.zip
package-lock.json
*.lock
local.properties
.cxx
.externalNativeBuild
/captures
/build
__pycache__/
*.pyc


cmake-build-debug/
cmake-build-debug-mingw/
venv/
.vs/
Debug/
vcpkg_installed/
.env
.next/
app.zip
secrets.txt
src.zip

================================================
FILE: sherpa-onnx/java-api/MANIFEST.MF
================================================
Manifest-Version: 1.0


================================================
FILE: sherpa-onnx/java-api/Makefile
================================================
# Copyright 2024 Xiaomi Corporation

# all .class and .jar files are put inside out_dir
out_dir := build
out_jar := $(out_dir)/sherpa-onnx.jar

package_dir := com/k2fsa/sherpa/onnx

java_files := LibraryUtils.java
java_files += LibraryLoader.java

java_files += VersionInfo.java
java_files += WaveData.java
java_files += WaveReader.java
java_files += WaveWriter.java
java_files += EndpointRule.java
java_files += EndpointConfig.java
java_files += FeatureConfig.java
java_files += QnnConfig.java
java_files += HomophoneReplacerConfig.java
java_files += OnlineLMConfig.java
java_files += OnlineParaformerModelConfig.java
java_files += OnlineZipformer2CtcModelConfig.java
java_files += OnlineToneCtcModelConfig.java
java_files += OnlineNeMoCtcModelConfig.java
java_files += OnlineTransducerModelConfig.java
java_files += OnlineModelConfig.java
java_files += OnlineCtcFstDecoderConfig.java
java_files += OnlineStream.java
java_files += OnlineRecognizerConfig.java
java_files += OnlineRecognizerResult.java
java_files += OnlineRecognizer.java

java_files += OfflineTransducerModelConfig.java
java_files += OfflineParaformerModelConfig.java
java_files += OfflineWhisperModelConfig.java
java_files += OfflineFireRedAsrModelConfig.java
java_files += OfflineMoonshineModelConfig.java
java_files += OfflineNemoEncDecCtcModelConfig.java
java_files += OfflineZipformerCtcModelConfig.java
java_files += OfflineWenetCtcModelConfig.java
java_files += OfflineOmnilingualAsrCtcModelConfig.java
java_files += OfflineMedAsrCtcModelConfig.java
java_files += OfflineFireRedAsrCtcModelConfig.java
java_files += OfflineFunAsrNanoModelConfig.java
java_files += OfflineCanaryModelConfig.java
java_files += OfflineSenseVoiceModelConfig.java
java_files += OfflineDolphinModelConfig.java
java_files += OfflineModelConfig.java
java_files += OfflineRecognizerConfig.java
java_files += OfflineRecognizerResult.java
java_files += OfflineStream.java
java_files += OfflineRecognizer.java

java_files += GenerationConfig.java
java_files += OfflineTtsKittenModelConfig.java
java_files += OfflineTtsPocketModelConfig.java
java_files += OfflineTtsSupertonicModelConfig.java
java_files += OfflineTtsKokoroModelConfig.java
java_files += OfflineTtsZipVoiceModelConfig.java
java_files += OfflineTtsMatchaModelConfig.java
java_files += OfflineTtsVitsModelConfig.java
java_files += OfflineTtsModelConfig.java
java_files += OfflineTtsConfig.java
java_files += GeneratedAudio.java
java_files += OfflineTtsCallback.java
java_files += OfflineTts.java

java_files += SpokenLanguageIdentificationWhisperConfig.java
java_files += SpokenLanguageIdentificationConfig.java
java_files += SpokenLanguageIdentification.java

java_files += OfflinePunctuationModelConfig.java
java_files += OfflinePunctuationConfig.java
java_files += OfflinePunctuation.java

java_files += OnlinePunctuationModelConfig.java
java_files += OnlinePunctuationConfig.java
java_files += OnlinePunctuation.java

java_files += OfflineZipformerAudioTaggingModelConfig.java
java_files += AudioTaggingModelConfig.java
java_files += AudioTaggingConfig.java
java_files += AudioEvent.java
java_files += AudioTagging.java

java_files += SpeakerEmbeddingExtractorConfig.java
java_files += SpeakerEmbeddingExtractor.java
java_files += SpeakerEmbeddingManager.java

java_files += TenVadModelConfig.java
java_files += SileroVadModelConfig.java
java_files += VadModelConfig.java
java_files += SpeechSegment.java
java_files += Vad.java

java_files += KeywordSpotterConfig.java
java_files += KeywordSpotterResult.java
java_files += KeywordSpotter.java

java_files += OfflineSpeakerSegmentationPyannoteModelConfig.java
java_files += OfflineSpeakerSegmentationModelConfig.java
java_files += FastClusteringConfig.java
java_files += OfflineSpeakerDiarizationConfig.java
java_files += OfflineSpeakerDiarizationSegment.java
java_files += OfflineSpeakerDiarizationCallback.java
java_files += OfflineSpeakerDiarization.java

java_files += OfflineSpeechDenoiserGtcrnModelConfig.java
java_files += OfflineSpeechDenoiserDpdfNetModelConfig.java
java_files += OfflineSpeechDenoiserModelConfig.java
java_files += OfflineSpeechDenoiserConfig.java
java_files += DenoisedAudio.java
java_files += OfflineSpeechDenoiser.java
java_files += OnlineSpeechDenoiserConfig.java
java_files += OnlineSpeechDenoiser.java

class_files := $(java_files:%.java=%.class)

java_files := $(addprefix src/main/java/$(package_dir)/,$(java_files))
class_files := $(addprefix $(out_dir)/$(package_dir)/,$(class_files))

$(info -- java files $(java_files))
$(info --)
$(info -- class files $(class_files))

.PHONY: all clean native

all: $(out_jar)

# macos x86_x64 -> osx-x64
# macos arm64 -> osx-aarch64
# linux x86_x64 -> linux-x64
# linux arm64 -> linux-aarch64
# windows x86_x64 -> win-x64
# windows arm64 -> win-aarch64
# windows x86 -> win-x86
native:
	jar cfvm ./sherpa-onnx-native.jar MANIFEST.MF -C ./resources .

$(out_jar): $(class_files)
	# jar --create --verbose --file $(out_jar) -C $(out_dir) ./
	# jar cvf $(out_jar) -C $(out_dir) ./
	jar cfvm $@ MANIFEST.MF -C $(out_dir) .

clean:
	$(RM) -rfv $(out_dir)

$(class_files): $(out_dir)/$(package_dir)/%.class: src/main/java/$(package_dir)/%.java
	mkdir -p build
	javac --release 8 -Xlint:-options -d $(out_dir) -cp $(out_dir) $<


================================================
FILE: sherpa-onnx/java-api/pom.xml
================================================
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.litongjava</groupId>
  <artifactId>sherpa-onnx-java-api</artifactId>
  <version>1.0.1</version>
  <packaging>jar</packaging>
  <name>sherpa-onnx-java-api</name>
  <description>sherpa-onnx-java-api</description>
  <url>https://github.com/k2-fsa/sherpa-onnx/tree/master/sherpa-onnx/java-api</url>
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <java.version>1.8</java.version>
    <maven.compiler.source>${java.version}</maven.compiler.source>
    <maven.compiler.target>${java.version}</maven.compiler.target>
  </properties>

  <licenses>
    <license>
      <name>The Apache Software License, Version 2.0</name>
      <url>http://apache.org/licenses/LICENSE-2.0.txt</url>
    </license>
  </licenses>

  <developers>
    <developer>
      <id>litongjava</id>
      <name>Tong Li</name>
      <email>litongjava001@gmail.com</email>
      <url>https://github.com/litongjava</url>
    </developer>
  </developers>

  <scm>
    <connection>scm:git:git@github.com:k2-fsa/sherpa-onnx.git</connection>
    <developerConnection>scm:git:git@github.com:k2-fsa/sherpa-onnx.git</developerConnection>
    <url>git@github.com:k2-fsa/sherpa-onnx.git</url>
  </scm>

  <build>
    <plugins>
      <!-- Source -->
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-source-plugin</artifactId>
        <version>2.2.1</version>
        <executions>
          <execution>
            <phase>package</phase>
            <goals>
              <goal>jar-no-fork</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
      <!-- Javadoc -->
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-javadoc-plugin</artifactId>
        <version>2.9.1</version>
        <configuration>
          <!-- 添加这个压制JavaDoc检查 -->
          <additionalparam>-Xdoclint:none</additionalparam>
        </configuration>
        <executions>
          <execution>
            <phase>package</phase>
            <goals>
              <goal>jar</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
      <!-- GPG mvn clean deploy -Dgpg.passphrase=YourPassphase -->
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-gpg-plugin</artifactId>
        <version>1.5</version>
        <executions>
          <execution>
            <id>sign-artifacts</id>
            <phase>verify</phase>
            <goals>
              <goal>sign</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
      <plugin>
        <groupId>org.sonatype.central</groupId>
        <artifactId>central-publishing-maven-plugin</artifactId>
        <version>0.7.0</version>
        <extensions>true</extensions>
        <configuration>
          <publishingServerId>central</publishingServerId>
        </configuration>
      </plugin>
    </plugins>
  </build>
</project>

================================================
FILE: sherpa-onnx/java-api/readme.md
================================================
# User Guide

*Applicable to Windows / macOS / Linux (using Windows as an example for dynamic library loading)*

## 1. Prerequisites

* Java 1.8+ environment
* Download and prepare the following:

  * Sherpa-ONNX Java API (Maven dependency)
  * Kokoro TTS model files (including `model.onnx`, etc.)

---

## 2. Add Maven Dependency

In your `pom.xml`, add:

```xml
<dependency>
  <groupId>com.litongjava</groupId>
  <artifactId>sherpa-onnx-java-api</artifactId>
  <version>1.0.1</version>
</dependency>
```

---

## 3. Obtain and Configure Native Dynamic Libraries (JNI)

### 3.1 Install ONNX Runtime

#### Windows 10

Starting from Windows 10 v1809 and all versions of Windows 11, the system comes with built-in ONNX Runtime as part of Windows ML (WinRT API), exposed through Windows.AI.MachineLearning.dll. You can directly use WinML to load and run ONNX models without additional downloads or installations.
[run-onnx-models](https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/run-onnx-models)

#### Linux

Sherpa-ONNX does **not** bundle ONNX Runtime. To install it manually:

1. Download the Linux x64 binary from Microsoft’s GitHub Releases:

   ```bash
   wget https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-linux-x64-1.23.2.tgz
   tar -xzf onnxruntime-linux-x64-1.23.2.tgz
   ```

2. Copy and symlink the library into a system directory:

   ```bash
   sudo cp onnxruntime-linux-x64-1.23.2/lib/libonnxruntime.so* /usr/local/lib/
   sudo ln -sf /usr/local/lib/libonnxruntime.so.1.23.2 /usr/local/lib/libonnxruntime.so
   ```

3. Update the shared-library cache and verify:

   ```bash
   sudo ldconfig
   ldconfig -p | grep onnxruntime
   ```

#### macOS

Sherpa-ONNX also requires you to install ONNX Runtime on macOS:

1. Download the macOS ARM64 binary:

   ```bash
   wget https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-osx-arm64-1.23.2.tgz
   tar -xzf onnxruntime-osx-arm64-1.23.2.tgz
   ```

2. Copy the dylib into `/usr/local/lib`:

   ```bash
   sudo cp onnxruntime-osx-arm64-1.23.2/lib/libonnxruntime.1.23.2.dylib /usr/local/lib/
   ```

3. Add `/usr/local/lib` to `dyld`’s search path:

   ```bash
   export DYLD_LIBRARY_PATH=/usr/local/lib:$DYLD_LIBRARY_PATH
   ```

4. Verify with `otool`:

   ```bash
   otool -L /Users/ping/lib/darwin_arm64/libsherpa-onnx-jni.dylib
   ```

---

### 3.2 Common Errors & Troubleshooting

**Error Example:**

```text
Exception in thread "main" java.lang.UnsatisfiedLinkError: no sherpa-onnx-jni in java.library.path: ...
```

This means the JVM couldn’t locate the native library in `java.library.path`.

**Troubleshooting steps:**

1. Ensure you downloaded the build matching your OS and architecture (e.g. win-x64 vs. arm64).

2. Test with an absolute path:

   ```bash
   java -Djava.library.path=C:\full\path\to\jni -jar your-app.jar
   ```

3. Print or inspect `java.library.path` at runtime (e.g. `System.out.println(System.getProperty("java.library.path"));`).

4. **Do not** hack the internal `sys_paths` via reflection (it may throw `NoSuchFieldException`). Use `-Djava.library.path` instead.

---

## 4. Download & Prepare the Kokoro Model

Fetch the model package from the official release (example: Kokoro v0.19 English):

```
https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
```

```bash
# Download (manually or via script)
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2

# Extract
tar -xjf kokoro-en-v0_19.tar.bz2

# Inspect
ls -lh kokoro-en-v0_19/
```

You should see:

```
LICENSE
README.md
espeak-ng-data/    # speech data directory
model.onnx         # TTS model
tokens.txt         # token mapping
voices.bin         # voice embeddings
```

Make sure your Java code points to these files (using either relative or absolute paths).

---

## 5. Test Code (Java Example)

```java
package com.litongjava.linux.tts;

import com.k2fsa.sherpa.onnx.GeneratedAudio;
import com.k2fsa.sherpa.onnx.OfflineTts;
import com.k2fsa.sherpa.onnx.OfflineTtsConfig;
import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig;
import com.k2fsa.sherpa.onnx.OfflineTtsModelConfig;

public class NonStreamingTtsKokoroEn {
  public static void main(String[] args) {
    String model   = "./kokoro-en-v0_19/model.onnx";
    String voices  = "./kokoro-en-v0_19/voices.bin";
    String tokens  = "./kokoro-en-v0_19/tokens.txt";
    String dataDir = "./kokoro-en-v0_19/espeak-ng-data";
    String text    = "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
                   + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
                   + " businessman, an official, or a scholar.";

    OfflineTtsKokoroModelConfig kokoroConfig = OfflineTtsKokoroModelConfig.builder()
        .setModel(model)
        .setVoices(voices)
        .setTokens(tokens)
        .setDataDir(dataDir)
        .build();

    OfflineTtsModelConfig modelConfig = OfflineTtsModelConfig.builder()
        .setKokoro(kokoroConfig)
        .setNumThreads(2)
        .setDebug(true)
        .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder()
        .setModel(modelConfig)
        .build();

    OfflineTts tts = new OfflineTts(config);

    int sid   = 0;
    float speed = 1.0f;
    long start = System.currentTimeMillis();
    GeneratedAudio audio = tts.generate(text, sid, speed);
    long stop  = System.currentTimeMillis();

    float elapsed   = (stop - start) / 1000.0f;
    float duration  = audio.getSamples().length / (float) audio.getSampleRate();
    float rtf       = elapsed / duration;

    String outFile = "tts-kokoro-en.wav";
    audio.save(outFile);

    System.out.printf("-- elapsed           : %.3f seconds%n", elapsed);
    System.out.printf("-- audio duration    : %.3f seconds%n", duration);
    System.out.printf("-- real-time factor  : %.3f%n", rtf);
    System.out.printf("-- text              : %s%n", text);
    System.out.printf("-- Saved to          : %s%n", outFile);

    tts.release();
  }
}
```

### Output Explanation

After successful execution, you should see something like:

```
-- elapsed           : 6.739 seconds
-- audio duration    : 6.739 seconds
-- real-time factor  : 0.563
-- text              : ...
-- Saved to          : tts-kokoro-en.wav
```

A file named `tts-kokoro-en.wav` will appear in the current directory—play it with any audio player to verify.


================================================
FILE: sherpa-onnx/java-api/readme.zh.md
================================================
# 使用指南

*适用于 Windows / macOS / Linux（以 Windows 为例说明动态库加载）*

## 1. 前提条件

* Java 1.8+ 环境
* 下载并准备好以下内容：
  * Sherpa-ONNX Java API（Maven 依赖）
  * Kokoro TTS 模型文件（包含 `model.onnx` 等）

---

## 2. 添加 Maven 依赖

在你的 `pom.xml` 中添加如下依赖：

```xml
<dependency>
  <groupId>com.litongjava</groupId>
  <artifactId>sherpa-onnx-java-api</artifactId>
  <version>1.0.1</version>
</dependency>
```

---

## 3. 获取并配置本地动态链接库（JNI）

### 3.1 安装 ONNX Runtime

#### 1. Windows 11

Starting from Windows 10 v1809 and all versions of Windows 11, the system comes with built-in ONNX Runtime as part of Windows ML (WinRT API), exposed through Windows.AI.MachineLearning.dll. You can directly use WinML to load and run ONNX models without additional downloads or installations.
(run-onnx-models)[https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/run-onnx-models]

#### 2. Linux

Sherpa-ONNX 并不包含 ONNX Runtime，需要手动下载并配置：

1. 从微软官方 GitHub Releases 下载 Linux 64 位二进制包：

   ```bash
   wget https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-linux-x64-1.23.2.tgz
   tar -xzf onnxruntime-linux-x64-1.23.2.tgz
   ```
2. 将解压后的 `libonnxruntime.so` 文件复制到系统库目录，并创建软链接：

   ```bash
   sudo cp onnxruntime-linux-x64-1.23.2/lib/libonnxruntime.so* /usr/local/lib/
   sudo ln -sf /usr/local/lib/libonnxruntime.so.1.23.2 /usr/local/lib/libonnxruntime.so
   ```
3. 更新共享库缓存并验证安装：

   ```bash
   sudo ldconfig
   ldconfig -p | grep onnxruntime
   ```

#### 3. macOS

Sherpa-ONNX 同样不包含 ONNX Runtime，需要从官方获取并配置：

1. 下载 macOS ARM64 版本二进制包：

   ```bash
   wget https://github.com/microsoft/onnxruntime/releases/download/v1.23.2/onnxruntime-osx-arm64-1.23.2.tgz
   tar -xzf onnxruntime-osx-arm64-1.23.2.tgz
   ```
2. 将 `libonnxruntime.1.23.2.dylib` 复制到 `/usr/local/lib`：

   ```bash
   sudo cp onnxruntime-osx-arm64-1.23.2/lib/libonnxruntime.1.23.2.dylib /usr/local/lib/
   ```
3. 将 `/usr/local/lib` 添加到 `dyld` 的搜索路径：

   ```bash
   export DYLD_LIBRARY_PATH=/usr/local/lib:$DYLD_LIBRARY_PATH
   ```
4. 使用 `otool` 验证：

   ```bash
   otool -L /Users/ping/lib/darwin_arm64/libsherpa-onnx-jni.dylib
   ```
---


### 3.2 常见错误与排查

**错误示例：**

```text
Exception in thread "main" java.lang.UnsatisfiedLinkError: no sherpa-onnx-jni in java.library.path: ...
```

说明 JVM 没有在 `java.library.path` 中找到本地库。

排查步骤：

1. 确认下载的是与你操作系统与架构匹配的版本（如 win-x64 vs arm64 等）。
2. 用绝对路径测试：将 `.dll` 放在某个目录并运行：

   ```sh
   java -Djava.library.path=C:\full\path\to\jni -jar your-app.jar
   ```
3. 打印或检查 `java.library.path` 内容（示例代码里可输出 `System.getProperty("java.library.path")`）。
4. 避免通过反射修改 `sys_paths`（不要尝试 hack `java.library.path` 的内部字段，容易引发 `NoSuchFieldException: sys_paths`，建议直接用 `-Djava.library.path`）。

---

## 4. 下载并准备 Kokoro 模型

从官方 release 获取模型包（以英文 Kokoro v0.19 为例）：
```
https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
```

```sh
# 下载（手工或脚本）
# 例如从 GitHub releases:
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2

# 解压
tar -xjf kokoro-en-v0_19.tar.bz2

# 查看结构
ls -lh kokoro-en-v0_19/
```

该目录结构示例（解压后应包含）：

```
LICENSE
README.md
espeak-ng-data/        # 语音数据目录
model.onnx            # TTS 模型
tokens.txt           # token 映射
voices.bin           # voice embedding
```

确保这些路径在你的 Java 程序中指向正确的位置（相对或绝对皆可）。

---

## 5. 测试代码（Java 示例）

```java
package com.litongjava.linux.tts;

import com.k2fsa.sherpa.onnx.GeneratedAudio;
import com.k2fsa.sherpa.onnx.OfflineTts;
import com.k2fsa.sherpa.onnx.OfflineTtsConfig;
import com.k2fsa.sherpa.onnx.OfflineTtsKokoroModelConfig;
import com.k2fsa.sherpa.onnx.OfflineTtsModelConfig;

public class NonStreamingTtsKokoroEn {
  public static void main(String[] args) {
    String model = "./kokoro-en-v0_19/model.onnx";
    String voices = "./kokoro-en-v0_19/voices.bin";
    String tokens = "./kokoro-en-v0_19/tokens.txt";
    String dataDir = "./kokoro-en-v0_19/espeak-ng-data";
    String text = "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
        + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
        + " businessman, an official, or a scholar.";

    OfflineTtsKokoroModelConfig kokoroModelConfig = OfflineTtsKokoroModelConfig.builder()
        .setModel(model)
        .setVoices(voices)
        .setTokens(tokens)
        .setDataDir(dataDir)
        .build();

    OfflineTtsModelConfig modelConfig = OfflineTtsModelConfig.builder()
        .setKokoro(kokoroModelConfig)
        .setNumThreads(2)
        .setDebug(true)
        .build();

    OfflineTtsConfig config = OfflineTtsConfig.builder()
        .setModel(modelConfig)
        .build();

    OfflineTts tts = new OfflineTts(config);

    int sid = 0;
    float speed = 1.0f;
    long start = System.currentTimeMillis();
    GeneratedAudio audio = tts.generate(text, sid, speed);
    long stop = System.currentTimeMillis();

    float timeElapsedSeconds = (stop - start) / 1000.0f;
    float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
    float real_time_factor = timeElapsedSeconds / audioDuration;

    String waveFilename = "tts-kokoro-en.wav";
    audio.save(waveFilename);
    System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
    System.out.printf("-- audio duration: %.3f seconds\n", audioDuration);
    System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
    System.out.printf("-- text: %s\n", text);
    System.out.printf("-- Saved to %s\n", waveFilename);

    tts.release();
  }
}
```

### 输出说明

成功执行后会输出类似：

```
-- elapsed : 6.739 seconds
-- audio duration: 6.739 seconds
-- real-time factor (RTF): 0.563
-- text: ...
-- Saved to tts-kokoro-en.wav
```

并在当前目录生成 `tts-kokoro-en.wav`，可以用任意音频播放器播放验证。

---


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/AudioEvent.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class AudioEvent {
    private String name = "";
    private int index = 0;
    private float prob = 0;

    public AudioEvent(String name, int index, float prob) {
        this.name = name;
        this.index = index;
        this.prob = prob;
    }

    public String getName() {
        return name;
    }

    public int getIndex() {
        return index;
    }

    public float getProb() {
        return prob;
    }

    @Override
    public String toString() {
        return String.format("AudioEven(name=%s, index=%d, prob=%.3f)\n", name, index, prob);
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/AudioTagging.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class AudioTagging {
    private long ptr = 0;

    public AudioTagging(AudioTaggingConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid AudioTaggingConfig: failed to create native AudioTagging");
        }
    }

    public OfflineStream createStream() {
        long p = createStream(ptr);
        return new OfflineStream(p);
    }

    public AudioEvent[] compute(OfflineStream stream) {
        return compute(stream, -1);

    }

    public AudioEvent[] compute(OfflineStream stream, int topK) {
        return compute(ptr, stream.getPtr(), topK);
    }


    @Override
    protected void finalize() throws Throwable {
        release();
    }

    // You'd better call it manually if it is not used anymore
    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native void delete(long ptr);

    private native long newFromFile(AudioTaggingConfig config);

    private native long createStream(long ptr);

    private native AudioEvent[] compute(long ptr, long streamPtr, int topK);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/AudioTaggingConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class AudioTaggingConfig {
    private final AudioTaggingModelConfig model;
    private final String labels;
    private final int topK;

    private AudioTaggingConfig(Builder builder) {
        this.model = builder.model;
        this.labels = builder.labels;
        this.topK = builder.topK;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private AudioTaggingModelConfig model = AudioTaggingModelConfig.builder().build();
        private String labels = "";
        private int topK = 5;

        public AudioTaggingConfig build() {
            return new AudioTaggingConfig(this);
        }

        public Builder setModel(AudioTaggingModelConfig model) {
            this.model = model;
            return this;
        }

        public Builder setLabels(String labels) {
            this.labels = labels;
            return this;
        }

        public Builder setTopK(int topK) {
            this.topK = topK;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/AudioTaggingModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class AudioTaggingModelConfig {
    private final OfflineZipformerAudioTaggingModelConfig zipformer;
    private final String ced;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private AudioTaggingModelConfig(Builder builder) {
        this.zipformer = builder.zipformer;
        this.ced = builder.ced;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private OfflineZipformerAudioTaggingModelConfig zipformer = OfflineZipformerAudioTaggingModelConfig.builder().build();
        private String ced = "";
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public AudioTaggingModelConfig build() {
            return new AudioTaggingModelConfig(this);
        }

        public Builder setZipformer(OfflineZipformerAudioTaggingModelConfig zipformer) {
            this.zipformer = zipformer;
            return this;
        }

        public Builder setCED(String ced) {
            this.ced = ced;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/DenoisedAudio.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class DenoisedAudio {
    private final float[] samples;
    private final int sampleRate;

    public DenoisedAudio(float[] samples, int sampleRate) {
        LibraryLoader.maybeLoad();
        this.samples = samples;
        this.sampleRate = sampleRate;
    }

    public int getSampleRate() {
        return sampleRate;
    }

    public float[] getSamples() {
        return samples;
    }

    // return true if saved successfully.
    public boolean save(String filename) {
        return saveImpl(filename, samples, sampleRate);
    }

    private native boolean saveImpl(String filename, float[] samples, int sampleRate);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/EndpointConfig.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class EndpointConfig {

    private final EndpointRule rule1;
    private final EndpointRule rule2;
    private final EndpointRule rule3;

    private EndpointConfig(Builder builder) {
        this.rule1 = builder.rule1;
        this.rule2 = builder.rule2;
        this.rule3 = builder.rule3;
    }

    public static Builder builder() {
        return new Builder();
    }

    public EndpointRule getRule1() {
        return rule1;
    }

    public EndpointRule getRule2() {
        return rule2;
    }

    public EndpointRule getRule3() {
        return rule3;
    }

    public static class Builder {

        private EndpointRule rule1 = EndpointRule.builder().
                setMustContainNonSilence(false).
                setMinTrailingSilence(2.4f).
                setMinUtteranceLength(0).
                build();
        private EndpointRule rule2 = EndpointRule.builder().
                setMustContainNonSilence(true).
                setMinTrailingSilence(1.4f).
                setMinUtteranceLength(0).
                build();
        private EndpointRule rule3 = EndpointRule.builder().
                setMustContainNonSilence(false).
                setMinTrailingSilence(0.0f).
                setMinUtteranceLength(20.0f).
                build();

        public EndpointConfig build() {
            return new EndpointConfig(this);
        }

        public Builder setRule1(EndpointRule rule) {
            this.rule1 = rule;
            return this;
        }

        public Builder setRule2(EndpointRule rule) {
            this.rule2 = rule;
            return this;
        }

        public Builder setRule3(EndpointRule rule) {
            this.rule3 = rule;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/EndpointRule.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class EndpointRule {

    private final boolean mustContainNonSilence;
    private final float minTrailingSilence;
    private final float minUtteranceLength;

    private EndpointRule(Builder builder) {
        this.mustContainNonSilence = builder.mustContainNonSilence;
        this.minTrailingSilence = builder.minTrailingSilence;
        this.minUtteranceLength = builder.minUtteranceLength;
    }

    public static Builder builder() {
        return new Builder();
    }

    public float getMinTrailingSilence() {
        return minTrailingSilence;
    }

    public float getMinUtteranceLength() {
        return minUtteranceLength;
    }

    public boolean getMustContainNonSilence() {
        return mustContainNonSilence;
    }

    public static class Builder {
        private boolean mustContainNonSilence = false;
        private float minTrailingSilence = 0;
        private float minUtteranceLength = 0;

        public EndpointRule build() {
            return new EndpointRule(this);
        }

        public Builder setMustContainNonSilence(boolean mustContainNonSilence) {
            this.mustContainNonSilence = mustContainNonSilence;
            return this;
        }

        public Builder setMinTrailingSilence(float minTrailingSilence) {
            this.minTrailingSilence = minTrailingSilence;
            return this;
        }

        public Builder setMinUtteranceLength(float minUtteranceLength) {
            this.minUtteranceLength = minUtteranceLength;
            return this;
        }
    }
}

================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/FastClusteringConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class FastClusteringConfig {
    private final int numClusters;
    private final float threshold;

    private FastClusteringConfig(Builder builder) {
        this.numClusters = builder.numClusters;
        this.threshold = builder.threshold;
    }

    public static Builder builder() {
        return new Builder();
    }

    public int getNumClusters() {
        return numClusters;
    }

    public float getThreshold() {
        return threshold;
    }

    public static class Builder {
        private int numClusters = -1;
        private float threshold = 0.5f;

        public FastClusteringConfig build() {
            return new FastClusteringConfig(this);
        }

        public Builder setNumClusters(int numClusters) {
            this.numClusters = numClusters;
            return this;
        }

        public Builder setThreshold(float threshold) {
            this.threshold = threshold;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/FeatureConfig.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class FeatureConfig {
    private final int sampleRate;
    private final int featureDim;
    private final float dither;

    private FeatureConfig(Builder builder) {
        this.sampleRate = builder.sampleRate;
        this.featureDim = builder.featureDim;
        this.dither = builder.dither;
    }

    public static Builder builder() {
        return new Builder();
    }

    public int getSampleRate() {
        return sampleRate;
    }

    public int getFeatureDim() {
        return featureDim;
    }

   public float getDither() {
        return dither;
   }

    public static class Builder {
        private int sampleRate = 16000;
        private int featureDim = 80;
        private float dither = 0.0f;

        public FeatureConfig build() {
          return new FeatureConfig(this);
        }

        public Builder setSampleRate(int sampleRate) {
            this.sampleRate = sampleRate;
            return this;
        }

        public Builder setFeatureDim(int featureDim) {
            this.featureDim = featureDim;
            return this;
        }
        public Builder setDither(float dither) {
            this.dither = dither;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/GeneratedAudio.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class GeneratedAudio {
    private final float[] samples;
    private final int sampleRate;

    public GeneratedAudio(float[] samples, int sampleRate) {
        LibraryLoader.maybeLoad();
        this.samples = samples;
        this.sampleRate = sampleRate;
    }

    public int getSampleRate() {
        return sampleRate;
    }

    public float[] getSamples() {
        return samples;
    }

    // return true if saved successfully.
    public boolean save(String filename) {
        return saveImpl(filename, samples, sampleRate);
    }

    private native boolean saveImpl(String filename, float[] samples, int sampleRate);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/GenerationConfig.java
================================================
// Copyright 2026 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

import java.util.Map;

/**
 * Configuration for generating audio.
 * Mirrors Kotlin GenerationConfig.
 */
public class GenerationConfig {

    private float silenceScale = 0.2f;
    private float speed = 1.0f;
    private int sid = 0;

    /** Reference audio samples (mono, [-1, 1]). */
    private float[] referenceAudio = null;

    /** Sample rate of reference audio */
    private int referenceSampleRate = 0;

    /** Optional reference text */
    private String referenceText = null;

    /** Number of steps in flow matching */
    private int numSteps = 5;

    /** Extra model-specific key-value pairs. Can be null. */
    private Map<String, String> extra = null;

    /** Default constructor */
    public GenerationConfig() {
    }

    /** Getters */
    public float getSilenceScale() {
        return silenceScale;
    }

    public float getSpeed() {
        return speed;
    }

    public int getSid() {
        return sid;
    }

    public float[] getReferenceAudio() {
        return referenceAudio;
    }

    public int getReferenceSampleRate() {
        return referenceSampleRate;
    }

    public String getReferenceText() {
        return referenceText;
    }

    public int getNumSteps() {
        return numSteps;
    }

    public Map<String, String> getExtra() {
        return extra;
    }

    /** Setters */
    public void setSilenceScale(float silenceScale) {
        this.silenceScale = silenceScale;
    }

    public void setSpeed(float speed) {
        this.speed = speed;
    }

    public void setSid(int sid) {
        this.sid = sid;
    }

    public void setReferenceAudio(float[] referenceAudio) {
        this.referenceAudio = referenceAudio;
    }

    public void setReferenceSampleRate(int referenceSampleRate) {
        this.referenceSampleRate = referenceSampleRate;
    }

    public void setReferenceText(String referenceText) {
        this.referenceText = referenceText;
    }

    public void setNumSteps(int numSteps) {
        this.numSteps = numSteps;
    }

    public void setExtra(Map<String, String> extra) {
        this.extra = extra;
    }

    @Override
    public String toString() {
        return "GenerationConfig{" +
                "silenceScale=" + silenceScale +
                ", speed=" + speed +
                ", sid=" + sid +
                ", referenceAudioLength=" + (referenceAudio != null ? referenceAudio.length : 0) +
                ", referenceSampleRate=" + referenceSampleRate +
                ", referenceText='" + referenceText + '\'' +
                ", numSteps=" + numSteps +
                ", extra=" + extra +
                '}';
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/HomophoneReplacerConfig.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class HomophoneReplacerConfig {
    private final String dictDir;  // unused
    private final String lexicon;
    private final String ruleFsts;

    private HomophoneReplacerConfig(Builder builder) {
        this.dictDir = builder.dictDir;
        this.lexicon = builder.lexicon;
        this.ruleFsts = builder.ruleFsts;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getDictDir() {
        return dictDir;
    }

    public String getLexicon() {
        return lexicon;
    }

    public String getRuleFsts() {
        return ruleFsts;
    }

    public static class Builder {
        private String dictDir = "";
        private String lexicon = "";
        private String ruleFsts = "";

        public HomophoneReplacerConfig build() {
            return new HomophoneReplacerConfig(this);
        }

        public Builder setDictDir(String dictDir) {
            this.dictDir = dictDir;
            return this;
        }

        public Builder setLexicon(String lexicon) {
            this.lexicon = lexicon;
            return this;
        }

        public Builder setRuleFsts(String ruleFsts) {
            this.ruleFsts = ruleFsts;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotter.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class KeywordSpotter {
    private long ptr = 0;

    public KeywordSpotter(KeywordSpotterConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid KeywordSpotterConfig: failed to create native KeywordSpotter");
        }
    }

    public OnlineStream createStream(String keywords) {
        long p = createStream(ptr, keywords);
        return new OnlineStream(p);
    }

    public OnlineStream createStream() {
        long p = createStream(ptr, "");
        return new OnlineStream(p);
    }

    public void decode(OnlineStream s) {
        decode(ptr, s.getPtr());
    }

    public void reset(OnlineStream s) {
        reset(ptr, s.getPtr());
    }

    public boolean isReady(OnlineStream s) {
        return isReady(ptr, s.getPtr());
    }

    public KeywordSpotterResult getResult(OnlineStream s) {
        return getResult(ptr, s.getPtr());
    }

    protected void finalize() throws Throwable {
        release();
    }

    // You'd better call it manually if it is not used anymore
    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native long newFromFile(KeywordSpotterConfig config);

    private native void delete(long ptr);

    private native long createStream(long ptr, String keywords);

    private native void decode(long ptr, long streamPtr);

    private native void reset(long ptr, long streamPtr);

    private native boolean isReady(long ptr, long streamPtr);

    private native KeywordSpotterResult getResult(long ptr, long streamPtr);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotterConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class KeywordSpotterConfig {
    private final FeatureConfig featConfig;
    private final OnlineModelConfig modelConfig;

    private final int maxActivePaths;
    private final String keywordsFile;
    private final float keywordsScore;
    private final float keywordsThreshold;
    private final int numTrailingBlanks;

    private KeywordSpotterConfig(Builder builder) {
        this.featConfig = builder.featConfig;
        this.modelConfig = builder.modelConfig;
        this.maxActivePaths = builder.maxActivePaths;
        this.keywordsFile = builder.keywordsFile;
        this.keywordsScore = builder.keywordsScore;
        this.keywordsThreshold = builder.keywordsThreshold;
        this.numTrailingBlanks = builder.numTrailingBlanks;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private FeatureConfig featConfig = FeatureConfig.builder().build();
        private OnlineModelConfig modelConfig = OnlineModelConfig.builder().build();
        private int maxActivePaths = 4;
        private String keywordsFile = "keywords.txt";
        private float keywordsScore = 1.5f;
        private float keywordsThreshold = 0.25f;
        private int numTrailingBlanks = 2;

        public KeywordSpotterConfig build() {
            return new KeywordSpotterConfig(this);
        }

        public Builder setFeatureConfig(FeatureConfig featConfig) {
            this.featConfig = featConfig;
            return this;
        }

        public Builder setOnlineModelConfig(OnlineModelConfig modelConfig) {
            this.modelConfig = modelConfig;
            return this;
        }

        public Builder setMaxActivePaths(int maxActivePaths) {
            this.maxActivePaths = maxActivePaths;
            return this;
        }

        public Builder setKeywordsFile(String keywordsFile) {
            this.keywordsFile = keywordsFile;
            return this;
        }

        public Builder setKeywordsScore(float keywordsScore) {
            this.keywordsScore = keywordsScore;
            return this;
        }

        public Builder setKeywordsThreshold(float keywordsThreshold) {
            this.keywordsThreshold = keywordsThreshold;
            return this;
        }

        public Builder setNumTrailingBlanks(int numTrailingBlanks) {
            this.numTrailingBlanks = numTrailingBlanks;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/KeywordSpotterResult.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

import java.util.Arrays;

public class KeywordSpotterResult {
    private final String keyword;
    private final String[] tokens;
    private final float[] timestamps;

    public KeywordSpotterResult(String keyword, String[] tokens, float[] timestamps) {
        this.keyword = keyword;
        this.tokens = tokens;
        this.timestamps = timestamps;
    }

    public String getKeyword() {
        return keyword;
    }

    public String[] getTokens() {
        return tokens;
    }

    public float[] getTimestamps() {
        return timestamps;
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("Keyword: ").append(keyword).append("\n");
        sb.append("Tokens: ").append(Arrays.toString(tokens)).append("\n");
        sb.append("Timestamps: ").append(Arrays.toString(timestamps));
        return sb.toString();
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/LibraryLoader.java
================================================
package com.k2fsa.sherpa.onnx;

public class LibraryLoader {
    private static volatile boolean autoLoadEnabled = true;
    private static volatile boolean isLoaded = false;

    static synchronized void loadLibrary() {
        if (!isLoaded) {
            LibraryUtils.load();
            isLoaded = true;
        }
    }

    public static void setAutoLoadEnabled(boolean enabled) {
        autoLoadEnabled = enabled;
    }

    static void maybeLoad() {
        if (autoLoadEnabled) {
            loadLibrary();
        }
    }
}

================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/LibraryUtils.java
================================================
package com.k2fsa.sherpa.onnx;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Locale;
import java.util.Objects;

/*
# We support the following loading methods

## Method 1 Specify the property sherpa_onnx.native.path

We assume the path contains the libraries sherpa-onnx-jni and onnxruntime.

java \
 -Dsherpa_onnx.native.path=/Users/fangjun/sherpa-onnx/build/install/lib \
 -cp /Users/fangjun/sherpa-onnx/sherpa-onnx/java-api/build/sherpa-onnx.jar
 xxx.java

## Method 2 Specify the native jar library

java \
 -cp /Users/fangjun/sherpa-onnx/sherpa-onnx/java-api/build/sherpa-onnx.jar:/path/to/sherpa-onnx-osx-x64.jar
 xxx.java

Note that you need to replace  : in -cp with ; on windows.

## Method 3 Specify the property java.library.path

We assume the path contains the libraries sherpa-onnx-jni and onnxruntime.

java \
 -Djava.library.path=/Users/fangjun/sherpa-onnx/build/install/lib \
 -cp /Users/fangjun/sherpa-onnx/sherpa-onnx/java-api/build/sherpa-onnx.jar
 xxx.java

 */

public class LibraryUtils {
    // System property to override native library path
    private static final String NATIVE_PATH_PROP = "sherpa_onnx.native.path";
    private static final String LIB_NAME = "sherpa-onnx-jni";

    private static boolean debug = false;

    private static String detectedOS;

    public static void enableDebug() {
        debug = true;
    }

    public static void disableDebug() {
        debug = false;
    }

    public static void load() {
        // 1. Try to load from external directory specified by -Dsherpa_onnx.native.path if provided
        if (loadFromSherpaOnnxNativePath()) {
            return;
        }

        // 2. Load from resources contains in some jar file
        if (!isAndroid()) {
            try {
                if (loadFromResourceInJar()) {
                    return;
                }
            } catch (IOException e) {
                // pass
            }
        }

        // 3. fallback to -Djava.library.path
        // java -Djava.library.path=C:\mylibs;D:\otherlibs -cp sherpa-onnx.jar xxx.java
        //
        // It throws if it cannot load the lib sherpa-onnx-jni
        System.loadLibrary(LIB_NAME);
    }

    // You specify -Dsherpa_onnx.native.path=/path/to/some/dir
    // where /path/to/some/dir contains the sherpa-onnx-jni and onnxruntime libs
    private static boolean loadFromSherpaOnnxNativePath() {
        String libFileName = System.mapLibraryName(LIB_NAME);
        String nativePath = System.getProperty(NATIVE_PATH_PROP);

        if (nativePath != null) {
            File nativeDir = new File(nativePath);
            File libInDir = new File(nativeDir, libFileName);
            if (nativeDir.isDirectory() && libInDir.exists()) {
                if (debug) {
                    System.out.printf("Loading from: %s\n", libInDir.getAbsolutePath());
                }

                System.load(libInDir.getAbsolutePath());
                return true;
            }
        }

        if (debug) {
            System.out.println("nativePath is null");
        }

        return false;
    }

    private static boolean loadFromResourceInJar() throws IOException {
        String libFileName = System.mapLibraryName(LIB_NAME);
        String sherpaOnnxJniPath = "sherpa-onnx/native/" + getOsArch() + '/' + libFileName;

        Path tempDirectory = null;
        try {
            if (!resourceExists(sherpaOnnxJniPath)) {
                if (debug) {
                    System.out.printf("%s does not exist\n", sherpaOnnxJniPath);
                }

                return false;
            }

            tempDirectory = Files.createTempDirectory("sherpa-onnx-java");

            if (Objects.equals(detectedOS, "osx")) {
                // for macos, we need to first load libonnxruntime.1.23.2.dylib
                String onnxruntimePath = "sherpa-onnx/native/" + getOsArch() + '/' + "libonnxruntime.1.23.2.dylib";
                if (!resourceExists(onnxruntimePath)) {
                    if (debug) {
                        System.out.printf("%s does not exist\n", onnxruntimePath);
                    }

                    return false;
                }

                File tempFile = tempDirectory.resolve("libonnxruntime.1.23.2.dylib").toFile();
                extractResource(onnxruntimePath, tempFile);
                System.load(tempFile.getAbsolutePath());
            } else {
                String onnxLibFileName = System.mapLibraryName("onnxruntime");
                String onnxruntimePath = "sherpa-onnx/native/" + getOsArch() + '/' + onnxLibFileName;
                if (!resourceExists(onnxruntimePath)) {
                    if (debug) {
                        System.out.printf("%s does not exist\n", onnxruntimePath);
                    }

                    return false;
                }

                File tempFile = tempDirectory.resolve(onnxLibFileName).toFile();
                extractResource(onnxruntimePath, tempFile);
                System.load(tempFile.getAbsolutePath());
            }

            File tempFile = tempDirectory.resolve(libFileName).toFile();
            extractResource(sherpaOnnxJniPath, tempFile);
            System.load(tempFile.getAbsolutePath());
        } finally {
            if (tempDirectory != null) {
                cleanUpTempDir(tempDirectory.toFile());
            }
        }

        return true;
    }

    // this method is copied and modified from
    // https://github.com/microsoft/onnxruntime/blob/main/java/src/main/java/ai/onnxruntime/OnnxRuntime.java#L118
    private static String getOsArch() {
        String os = System.getProperty("os.name", "generic").toLowerCase(Locale.ENGLISH);
        if (os.contains("mac") || os.contains("darwin")) {
            detectedOS = "osx";
        } else if (os.contains("win")) {
            detectedOS = "win";
        } else if (os.contains("nux")) {
            detectedOS = "linux";
        } else {
            throw new IllegalStateException("Unsupported os:" + os);
        }

        String detectedArch;
        String arch = System.getProperty("os.arch", "generic")
                .toLowerCase(Locale.ENGLISH);
        if (arch.startsWith("amd64") || arch.startsWith("x86_64")) {
            detectedArch = "x64";
        } else if (arch.startsWith("x86")) {
            // 32-bit x86 is not supported by the Java API
            detectedArch = "x86";
        } else if (arch.startsWith("aarch64") || arch.startsWith("arm64")) {
            detectedArch = "aarch64";
        } else if (arch.startsWith("arm")) {
            detectedArch = "arm"; //armv8l架构
        } else {
            throw new IllegalStateException("Unsupported arch:" + arch);
        }

        return detectedOS + '-' + detectedArch;
    }

    private static void extractResource(String resourcePath, File destination) {
        if (debug) {
            System.out.printf("Copying from resource path %s to %s\n", resourcePath, destination.toPath());
        }

        try (InputStream in = LibraryUtils.class.getClassLoader().getResourceAsStream(resourcePath)) {
            if (in == null) {
                throw new RuntimeException("Resource not found: " + resourcePath);
            }
            Files.copy(in, destination.toPath(), StandardCopyOption.REPLACE_EXISTING);
        } catch (IOException e) {
            throw new RuntimeException("Failed to extract resource " + resourcePath + " to " + destination.getAbsolutePath(), e);
        }
    }

    // From ChatGPT:
    // Class.getResourceAsStream(String path) behaves differently than ClassLoader
    //  - No leading slash → relative to the package of LibraryUtils
    //  - Leading slash → absolute path relative to classpath root
    //
    // ClassLoader.getResourceAsStream always uses absolute paths relative to classpath root,
    // no leading slash needed

    private static boolean resourceExists(String path) {
        return LibraryUtils.class.getClassLoader().getResource(path) != null;
    }

    private static void cleanUpTempDir(File dir) {
        if (!dir.exists()) return;

        File[] files = dir.listFiles();
        if (files != null) {
            for (File f : files) {
                f.deleteOnExit(); // schedule each .so for deletion
            }
        }
        dir.deleteOnExit(); // schedule the directory itself
    }

    static boolean isAndroid() {
        String vmName = System.getProperty("java.vm.name", "").toLowerCase(Locale.ROOT);
        String specVendor = System.getProperty("java.specification.vendor", "");
        return vmName.contains("dalvik") || vmName.contains("art") ||
               specVendor.equals("The Android Project");
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineCanaryModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineCanaryModelConfig {
    private final String encoder;
    private final String decoder;
    private final String srcLang;
    private final String tgtLang;
    private final boolean usePnc;

    private OfflineCanaryModelConfig(Builder builder) {
        this.encoder = builder.encoder;
        this.decoder = builder.decoder;
        this.srcLang = builder.srcLang;
        this.tgtLang = builder.tgtLang;
        this.usePnc = builder.usePnc;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public String getSrcLang() {
        return srcLang;
    }

    public String getTgtLang() {
        return tgtLang;
    }

    public boolean isUsePnc() {
        return usePnc;
    }

    public static class Builder {
        private String encoder = "";
        private String decoder = "";
        private String srcLang = "en";
        private String tgtLang = "en";
        private boolean usePnc = true;

        public OfflineCanaryModelConfig build() {
            return new OfflineCanaryModelConfig(this);
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }

        public Builder setSrcLang(String srcLang) {
            this.srcLang = srcLang;
            return this;
        }

        public Builder setTgtLang(String tgtLang) {
            this.tgtLang = tgtLang;
            return this;
        }

        public Builder setUsePnc(boolean usePnc) {
            this.usePnc = usePnc;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineDolphinModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineDolphinModelConfig {
    private final String model;

    private OfflineDolphinModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineDolphinModelConfig build() {
            return new OfflineDolphinModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}

================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineFireRedAsrCtcModelConfig.java
================================================
package com.k2fsa.sherpa.onnx;

public class OfflineFireRedAsrCtcModelConfig {
    private final String model;

    private OfflineFireRedAsrCtcModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineFireRedAsrCtcModelConfig build() {
            return new OfflineFireRedAsrCtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineFireRedAsrModelConfig.java
================================================
package com.k2fsa.sherpa.onnx;

public class OfflineFireRedAsrModelConfig {
    private final String encoder;
    private final String decoder;

    private OfflineFireRedAsrModelConfig(Builder builder) {
        this.encoder = builder.encoder;
        this.decoder = builder.decoder;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public static class Builder {
        private String encoder = "";
        private String decoder = "";

        public OfflineFireRedAsrModelConfig build() {
            return new OfflineFireRedAsrModelConfig(this);
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineFunAsrNanoModelConfig.java
================================================
package com.k2fsa.sherpa.onnx;

public class OfflineFunAsrNanoModelConfig {
    private final String encoderAdaptor;
    private final String llm;
    private final String embedding;
    private final String tokenizer;
    private final String systemPrompt;
    private final String userPrompt;
    private final int maxNewTokens;
    private final float temperature;
    private final float topP;
    private final int seed;
    private final String language;
    private final boolean itn;
    private final String hotwords;

    private OfflineFunAsrNanoModelConfig(Builder builder) {
        this.encoderAdaptor = builder.encoderAdaptor;
        this.llm = builder.llm;
        this.embedding = builder.embedding;
        this.tokenizer = builder.tokenizer;
        this.systemPrompt = builder.systemPrompt;
        this.userPrompt = builder.userPrompt;
        this.maxNewTokens = builder.maxNewTokens;
        this.temperature = builder.temperature;
        this.topP = builder.topP;
        this.seed = builder.seed;
        this.language = builder.language;
        this.itn = builder.itn;
        this.hotwords = builder.hotwords;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getEncoderAdaptor() {
        return encoderAdaptor;
    }

    public String getLLM() {
        return llm;
    }

    public String getEmbedding() {
        return embedding;
    }

    public String getTokenizer() {
        return tokenizer;
    }

    public String getSystemPrompt() {
        return systemPrompt;
    }

    public String getUserPrompt() {
        return userPrompt;
    }

    public String getLanguage() {
        return language;
    }

    public boolean getItn() {
        return itn;
    }

    public String getHotwords() {
        return hotwords;
    }

    public int getMaxNewTokens() {
        return maxNewTokens;
    }

    public float getTemperature() {
        return temperature;
    }

    public float getTopP() {
        return topP;
    }

    public int getSeed() {
        return seed;
    }

    public static class Builder {
        private String encoderAdaptor = "";
        private String llm = "";
        private String embedding = "";
        private String tokenizer = "";
        private String systemPrompt = "You are a helpful assistant.";
        private String userPrompt = "语音转写：";
        private int maxNewTokens = 512;
        private float temperature = 1e-6f;
        private float topP = 0.8f;
        private int seed = 42;
        private String language = "";
        private boolean itn = true;
        private String hotwords = "";

        public OfflineFunAsrNanoModelConfig build() {
            return new OfflineFunAsrNanoModelConfig(this);
        }

        public Builder setEncoderAdaptor(String encoderAdaptor) {
            this.encoderAdaptor = encoderAdaptor;
            return this;
        }

        public Builder setLLM(String llm) {
            this.llm = llm;
            return this;
        }

        public Builder setEmbedding(String embedding) {
            this.embedding = embedding;
            return this;
        }

        public Builder setTokenizer(String tokenizer) {
            this.tokenizer = tokenizer;
            return this;
        }

        public Builder setSystemPrompt(String systemPrompt) {
            this.systemPrompt = systemPrompt;
            return this;
        }

        public Builder setUserPrompt(String userPrompt) {
            this.userPrompt = userPrompt;
            return this;
        }

        public Builder setLanguage(String language) {
            this.language = language;
            return this;
        }

        public Builder setItn(boolean itn) {
            this.itn = itn;
            return this;
        }

        public Builder setHotwords(String hotwords) {
            this.hotwords = hotwords;
            return this;
        }

        public Builder setMaxNewTokens(int maxNewTokens) {
            this.maxNewTokens = maxNewTokens;
            return this;
        }

        public Builder setTemperature(float temperature) {
            this.temperature = temperature;
            return this;
        }

        public Builder setTopP(float topP) {
            this.topP = topP;
            return this;
        }

        public Builder setSeed(int seed) {
            this.seed = seed;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineMedAsrCtcModelConfig.java
================================================
package com.k2fsa.sherpa.onnx;

public class OfflineMedAsrCtcModelConfig {
    private final String model;

    private OfflineMedAsrCtcModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineMedAsrCtcModelConfig build() {
            return new OfflineMedAsrCtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineModelConfig {
    private final OfflineTransducerModelConfig transducer;
    private final OfflineParaformerModelConfig paraformer;
    private final OfflineWhisperModelConfig whisper;
    private final OfflineFireRedAsrModelConfig fireRedAsr;
    private final OfflineMoonshineModelConfig moonshine;
    private final OfflineNemoEncDecCtcModelConfig nemo;
    private final OfflineSenseVoiceModelConfig senseVoice;
    private final OfflineDolphinModelConfig dolphin;
    private final OfflineZipformerCtcModelConfig zipformerCtc;
    private final OfflineWenetCtcModelConfig wenetCtc;
    private final OfflineOmnilingualAsrCtcModelConfig omnilingual;
    private final OfflineMedAsrCtcModelConfig medasr;
    private final OfflineFireRedAsrCtcModelConfig fireRedAsrCtc;
    private final OfflineFunAsrNanoModelConfig funasrNano;
    private final OfflineCanaryModelConfig canary;
    private final String teleSpeech;
    private final String tokens;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private final String modelType;
    private final String modelingUnit;
    private final String bpeVocab;

    private OfflineModelConfig(Builder builder) {
        this.transducer = builder.transducer;
        this.paraformer = builder.paraformer;
        this.whisper = builder.whisper;
        this.fireRedAsr = builder.fireRedAsr;
        this.moonshine = builder.moonshine;
        this.nemo = builder.nemo;
        this.zipformerCtc = builder.zipformerCtc;
        this.canary = builder.canary;
        this.wenetCtc = builder.wenetCtc;
        this.omnilingual = builder.omnilingual;
        this.medasr = builder.medasr;
        this.fireRedAsrCtc = builder.fireRedAsrCtc;
        this.funasrNano = builder.funasrNano;
        this.senseVoice = builder.senseVoice;
        this.dolphin = builder.dolphin;
        this.teleSpeech = builder.teleSpeech;
        this.tokens = builder.tokens;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
        this.modelType = builder.modelType;
        this.modelingUnit = builder.modelingUnit;
        this.bpeVocab = builder.bpeVocab;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OfflineParaformerModelConfig getParaformer() {
        return paraformer;
    }

    public OfflineTransducerModelConfig getTransducer() {
        return transducer;
    }

    public OfflineWhisperModelConfig getWhisper() {
        return whisper;
    }

    public OfflineMoonshineModelConfig getMoonshine() {
        return moonshine;
    }

    public OfflineSenseVoiceModelConfig getSenseVoice() {
        return senseVoice;
    }

    public OfflineDolphinModelConfig getDolphin() {
        return dolphin;
    }

    public OfflineNemoEncDecCtcModelConfig getNemo() {
        return nemo;
    }

    public OfflineZipformerCtcModelConfig getZipformerCtc() {
        return zipformerCtc;
    }

    public OfflineWenetCtcModelConfig getWenetCtc() {
        return wenetCtc;
    }

    public OfflineOmnilingualAsrCtcModelConfig getOmnilingual() {
        return omnilingual;
    }

    public OfflineMedAsrCtcModelConfig getMedAsr() {
        return medasr;
    }

    public OfflineFireRedAsrCtcModelConfig getFireRedAsrCtc() {
        return fireRedAsrCtc;
    }

    public OfflineFireRedAsrModelConfig getFireRedAsr() {
        return fireRedAsr;
    }

    public OfflineFunAsrNanoModelConfig getFunAsrNano() {
        return funasrNano;
    }

    public OfflineCanaryModelConfig getCanary() {
        return canary;
    }

    public String getTokens() {
        return tokens;
    }

    public int getNumThreads() {
        return numThreads;
    }

    public boolean getDebug() {
        return debug;
    }

    public String getProvider() {
        return provider;
    }

    public String getModelType() {
        return modelType;
    }

    public String getModelingUnit() {
        return modelingUnit;
    }

    public String getBpeVocab() {
        return bpeVocab;
    }

    public String getTeleSpeech() {
        return teleSpeech;
    }

    public static class Builder {
        private OfflineParaformerModelConfig paraformer = OfflineParaformerModelConfig.builder().build();
        private OfflineTransducerModelConfig transducer = OfflineTransducerModelConfig.builder().build();
        private OfflineWhisperModelConfig whisper = OfflineWhisperModelConfig.builder().build();
        private OfflineFireRedAsrModelConfig fireRedAsr = OfflineFireRedAsrModelConfig.builder().build();
        private OfflineMoonshineModelConfig moonshine = OfflineMoonshineModelConfig.builder().build();
        private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build();
        private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build();
        private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build();
        private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build();
        private OfflineWenetCtcModelConfig wenetCtc = OfflineWenetCtcModelConfig.builder().build();
        private OfflineOmnilingualAsrCtcModelConfig omnilingual = OfflineOmnilingualAsrCtcModelConfig.builder().build();
        private OfflineMedAsrCtcModelConfig medasr = OfflineMedAsrCtcModelConfig.builder().build();
        private OfflineFireRedAsrCtcModelConfig fireRedAsrCtc = OfflineFireRedAsrCtcModelConfig.builder().build();
        private OfflineFunAsrNanoModelConfig funasrNano = OfflineFunAsrNanoModelConfig.builder().build();
        private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build();
        private String teleSpeech = "";
        private String tokens = "";
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";
        private String modelType = "";
        private String modelingUnit = "cjkchar";
        private String bpeVocab = "";

        public OfflineModelConfig build() {
            return new OfflineModelConfig(this);
        }

        public Builder setTransducer(OfflineTransducerModelConfig transducer) {
            this.transducer = transducer;
            return this;
        }

        public Builder setDolphin(OfflineDolphinModelConfig dolphin) {
            this.dolphin = dolphin;
            return this;
        }

        public Builder setParaformer(OfflineParaformerModelConfig paraformer) {
            this.paraformer = paraformer;
            return this;
        }

        public Builder setNemo(OfflineNemoEncDecCtcModelConfig nemo) {
            this.nemo = nemo;
            return this;
        }

        public Builder setZipformerCtc(OfflineZipformerCtcModelConfig zipformerCtc) {
            this.zipformerCtc = zipformerCtc;
            return this;
        }

        public Builder setWenetCtc(OfflineWenetCtcModelConfig wenetCtc) {
            this.wenetCtc = wenetCtc;
            return this;
        }

        public Builder setOmnilingual(OfflineOmnilingualAsrCtcModelConfig omnilingual) {
            this.omnilingual = omnilingual;
            return this;
        }

        public Builder setMedAsr(OfflineMedAsrCtcModelConfig medasr) {
            this.medasr = medasr;
            return this;
        }

        public Builder setFireRedAsrCtc(OfflineFireRedAsrCtcModelConfig fireRedAsrCtc) {
            this.fireRedAsrCtc = fireRedAsrCtc;
            return this;
        }

        public Builder setFunAsrNano(OfflineFunAsrNanoModelConfig funasrNano) {
            this.funasrNano = funasrNano;
            return this;
        }

        public Builder setCanary(OfflineCanaryModelConfig canary) {
            this.canary = canary;
            return this;
        }

        public Builder setTeleSpeech(String teleSpeech) {
            this.teleSpeech = teleSpeech;
            return this;
        }

        public Builder setWhisper(OfflineWhisperModelConfig whisper) {
            this.whisper = whisper;
            return this;
        }

        public Builder setFireRedAsr(OfflineFireRedAsrModelConfig fireRedAsr) {
            this.fireRedAsr = fireRedAsr;
            return this;
        }

        public Builder setSenseVoice(OfflineSenseVoiceModelConfig senseVoice) {
            this.senseVoice = senseVoice;
            return this;
        }

        public Builder setMoonshine(OfflineMoonshineModelConfig moonshine) {
            this.moonshine = moonshine;
            return this;
        }

        public Builder setTokens(String tokens) {
            this.tokens = tokens;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }

        public Builder setModelType(String modelType) {
            this.modelType = modelType;
            return this;
        }

        public Builder setModelingUnit(String modelingUnit) {
            this.modelingUnit = modelingUnit;
            return this;
        }

        public Builder setBpeVocab(String bpeVocab) {
            this.bpeVocab = bpeVocab;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineMoonshineModelConfig {
    private final String preprocessor;
    private final String encoder;
    private final String uncachedDecoder;
    private final String cachedDecoder;
    private final String mergedDecoder;

    private OfflineMoonshineModelConfig(Builder builder) {
        this.preprocessor = builder.preprocessor;
        this.encoder = builder.encoder;
        this.uncachedDecoder = builder.uncachedDecoder;
        this.cachedDecoder = builder.cachedDecoder;
        this.mergedDecoder = builder.mergedDecoder;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getPreprocessor() {
        return preprocessor;
    }

    public String getEncoder() {
        return encoder;
    }

    public String getUncachedDecoder() {
        return uncachedDecoder;
    }

    public String getCachedDecoder() {
        return cachedDecoder;
    }

    public String getMergedDecoder() {
        return mergedDecoder;
    }

    public static class Builder {
        private String preprocessor = "";
        private String encoder = "";
        private String uncachedDecoder = "";
        private String cachedDecoder = "";
        private String mergedDecoder = "";

        public OfflineMoonshineModelConfig build() {
            return new OfflineMoonshineModelConfig(this);
        }

        public Builder setPreprocessor(String preprocessor) {
            this.preprocessor = preprocessor;
            return this;
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setUncachedDecoder(String uncachedDecoder) {
            this.uncachedDecoder = uncachedDecoder;
            return this;
        }

        public Builder setCachedDecoder(String cachedDecoder) {
            this.cachedDecoder = cachedDecoder;
            return this;
        }

        public Builder setMergedDecoder(String mergedDecoder) {
            this.mergedDecoder = mergedDecoder;
            return this;
        }
    }


}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineNemoEncDecCtcModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineNemoEncDecCtcModelConfig {
    private final String model;

    private OfflineNemoEncDecCtcModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineNemoEncDecCtcModelConfig build() {
            return new OfflineNemoEncDecCtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineOmnilingualAsrCtcModelConfig.java
================================================
package com.k2fsa.sherpa.onnx;

public class OfflineOmnilingualAsrCtcModelConfig {
    private final String model;

    private OfflineOmnilingualAsrCtcModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineOmnilingualAsrCtcModelConfig build() {
            return new OfflineOmnilingualAsrCtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineParaformerModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineParaformerModelConfig {
    private final String model;
    private final QnnConfig qnnConfig;

    private OfflineParaformerModelConfig(Builder builder) {
        this.model = builder.model;
        this.qnnConfig = builder.qnnConfig;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public QnnConfig getQnnConfig() {
        return qnnConfig;
    }

    public static class Builder {
        private String model = "";
        private QnnConfig qnnConfig = QnnConfig.builder().build();

        public OfflineParaformerModelConfig build() {
            return new OfflineParaformerModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setQnnConfig(QnnConfig qnnConfig) {
            this.qnnConfig = qnnConfig;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuation.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflinePunctuation {
    private long ptr = 0;

    public OfflinePunctuation(OfflinePunctuationConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid OfflinePunctuationConfig: failed to create native OfflinePunctuation");
        }
    }

    public String addPunctuation(String text) {
        return addPunctuation(ptr, text);
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    // You'd better call it manually if it is not used anymore
    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native void delete(long ptr);

    private native long newFromFile(OfflinePunctuationConfig config);

    private native String addPunctuation(long ptr, String text);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuationConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflinePunctuationConfig {
    private final OfflinePunctuationModelConfig model;

    private OfflinePunctuationConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OfflinePunctuationModelConfig getModel() {
        return model;
    }


    public static class Builder {
        private OfflinePunctuationModelConfig model = OfflinePunctuationModelConfig.builder().build();

        public OfflinePunctuationConfig build() {
            return new OfflinePunctuationConfig(this);
        }

        public Builder setModel(OfflinePunctuationModelConfig model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflinePunctuationModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflinePunctuationModelConfig {
    private final String ctTransformer;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private OfflinePunctuationModelConfig(Builder builder) {
        this.ctTransformer = builder.ctTransformer;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getCtTransformer() {
        return ctTransformer;
    }

    public static class Builder {
        private String ctTransformer = "";
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public OfflinePunctuationModelConfig build() {
            return new OfflinePunctuationModelConfig(this);
        }

        public Builder setCtTransformer(String ctTransformer) {
            this.ctTransformer = ctTransformer;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizer.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineRecognizer {
    private long ptr = 0;
    private final OfflineRecognizerConfig config;

    public OfflineRecognizer(OfflineRecognizerConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid OfflineRecognizerConfig: failed to create native OfflineRecognizer");
        }

        this.config = config;
    }

    public void setConfig(OfflineRecognizerConfig config) {
        setConfig(ptr, config);
        // we don't update this.config
    }

    public OfflineRecognizerConfig getConfig() {
        return config;
    }

    public void decode(OfflineStream s) {
        decode(ptr, s.getPtr());
    }

    public void decode(OfflineStream[] ss) {
        long[] streamPtrs = new long[ss.length];
        for (int i = 0; i < ss.length; ++i) {
            streamPtrs[i] = ss[i].getPtr();
        }
        decodeStreams(ptr, streamPtrs);
    }

    public OfflineStream createStream() {
        long p = createStream(ptr);
        return new OfflineStream(p);
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    // You'd better call it manually if it is not used anymore
    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    public OfflineRecognizerResult getResult(OfflineStream s) {
        return getResult(s.getPtr());
    }

    private native void delete(long ptr);

    private native long newFromFile(OfflineRecognizerConfig config);

    private native long createStream(long ptr);

    private native void decode(long ptr, long streamPtr);

    private native void setConfig(long ptr, OfflineRecognizerConfig config);

    private native void decodeStreams(long ptr, long[] streamPtrs);

    private native OfflineRecognizerResult getResult(long streamPtr);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineRecognizerConfig {
    private final FeatureConfig featConfig;
    private final OfflineModelConfig modelConfig;
    private final HomophoneReplacerConfig hr;
    private final String decodingMethod;
    private final int maxActivePaths;
    private final String hotwordsFile;
    private final float hotwordsScore;
    private final String ruleFsts;
    private final String ruleFars;
    private final float blankPenalty;

    private OfflineRecognizerConfig(Builder builder) {
        this.featConfig = builder.featConfig;
        this.modelConfig = builder.modelConfig;
        this.hr = builder.hr;
        this.decodingMethod = builder.decodingMethod;
        this.maxActivePaths = builder.maxActivePaths;
        this.hotwordsFile = builder.hotwordsFile;
        this.hotwordsScore = builder.hotwordsScore;
        this.ruleFsts = builder.ruleFsts;
        this.ruleFars = builder.ruleFars;
        this.blankPenalty = builder.blankPenalty;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OfflineModelConfig getModelConfig() {
        return modelConfig;
    }

    public static class Builder {
        private FeatureConfig featConfig = FeatureConfig.builder().build();
        private OfflineModelConfig modelConfig = OfflineModelConfig.builder().build();
        private HomophoneReplacerConfig hr = HomophoneReplacerConfig.builder().build();
        private String decodingMethod = "greedy_search";
        private int maxActivePaths = 4;
        private String hotwordsFile = "";
        private float hotwordsScore = 1.5f;
        private String ruleFsts = "";
        private String ruleFars = "";
        private float blankPenalty = 0.0f;

        public OfflineRecognizerConfig build() {
            return new OfflineRecognizerConfig(this);
        }

        public Builder setFeatureConfig(FeatureConfig featConfig) {
            this.featConfig = featConfig;
            return this;
        }

        public Builder setOfflineModelConfig(OfflineModelConfig modelConfig) {
            this.modelConfig = modelConfig;
            return this;
        }

        public Builder setHr(HomophoneReplacerConfig hr) {
            this.hr = hr;
            return this;
        }

        public Builder setDecodingMethod(String decodingMethod) {
            this.decodingMethod = decodingMethod;
            return this;
        }

        public Builder setMaxActivePaths(int maxActivePaths) {
            this.maxActivePaths = maxActivePaths;
            return this;
        }

        public Builder setHotwordsFile(String hotwordsFile) {
            this.hotwordsFile = hotwordsFile;
            return this;
        }

        public Builder setHotwordsScore(float hotwordsScore) {
            this.hotwordsScore = hotwordsScore;
            return this;
        }

        public Builder setRuleFsts(String ruleFsts) {
            this.ruleFsts = ruleFsts;
            return this;
        }

        public Builder setRuleFars(String ruleFars) {
            this.ruleFars = ruleFars;
            return this;
        }

        public Builder setBlankPenalty(float blankPenalty) {
            this.blankPenalty = blankPenalty;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineRecognizerResult.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineRecognizerResult {
    private final String text;
    private final String[] tokens;
    private final float[] timestamps;
    private final String lang;
    private final String emotion;
    private final String event;
    private final float[] durations;

    public OfflineRecognizerResult(String text, String[] tokens, float[] timestamps, String lang, String emotion, String event, float[] durations) {
        this.text = text;
        this.tokens = tokens;
        this.timestamps = timestamps;
        this.lang = lang;
        this.emotion = emotion;
        this.event = event;
        this.durations = durations;
    }

    public String getText() {
        return text;
    }

    public String[] getTokens() {
        return tokens;
    }

    public float[] getTimestamps() {
        return timestamps;
    }

    public String getLang() {
        return lang;
    }

    public String getEmotion() {
        return emotion;
    }

    public String getEvent() {
        return event;
    }

    public float[] getDurations() {
        return durations;
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSenseVoiceModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineSenseVoiceModelConfig {
    private final String model;
    private final String language;
    private final boolean useInverseTextNormalization;
    private final QnnConfig qnnConfig;

    private OfflineSenseVoiceModelConfig(Builder builder) {
        this.model = builder.model;
        this.language = builder.language;
        this.useInverseTextNormalization = builder.useInverseTextNormalization;
        this.qnnConfig = builder.qnnConfig;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public String getLanguage() {
        return language;
    }

    public boolean getUseInverseTextNormalization() {
        return useInverseTextNormalization;
    }

    public QnnConfig getQnnConfig() {
        return qnnConfig;
    }

    public static class Builder {
        private String model = "";
        private String language = "";
        private boolean useInverseTextNormalization = true;
        private QnnConfig qnnConfig = QnnConfig.builder().build();

        public OfflineSenseVoiceModelConfig build() {
            return new OfflineSenseVoiceModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setLanguage(String language) {
            this.language = language;
            return this;
        }

        public Builder setInverseTextNormalization(boolean useInverseTextNormalization) {
            this.useInverseTextNormalization = useInverseTextNormalization;
            return this;
        }

        public Builder setQnnConfig(QnnConfig qnnConfig) {
            this.qnnConfig = qnnConfig;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineSpeakerDiarization {
    private long ptr = 0;

    public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid OfflineSpeakerDiarizationConfig: failed to create native OfflineSpeakerDiarization");
        }
    }

    public int getSampleRate() {
        return getSampleRate(ptr);
    }

    // Only config.clustering is used. All other fields are ignored
    public void setConfig(OfflineSpeakerDiarizationConfig config) {
        setConfig(ptr, config);
    }

    public OfflineSpeakerDiarizationSegment[] process(float[] samples) {
        return process(ptr, samples);
    }

    public OfflineSpeakerDiarizationSegment[] processWithCallback(float[] samples, OfflineSpeakerDiarizationCallback callback) {
        return processWithCallback(ptr, samples, callback, 0);
    }

    public OfflineSpeakerDiarizationSegment[] processWithCallback(float[] samples, OfflineSpeakerDiarizationCallback callback, long arg) {
        return processWithCallback(ptr, samples, callback, arg);
    }

    protected void finalize() throws Throwable {
        release();
    }

    // You'd better call it manually if it is not used anymore
    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native int getSampleRate(long ptr);

    private native void delete(long ptr);

    private native long newFromFile(OfflineSpeakerDiarizationConfig config);

    private native void setConfig(long ptr, OfflineSpeakerDiarizationConfig config);

    private native OfflineSpeakerDiarizationSegment[] process(long ptr, float[] samples);

    private native OfflineSpeakerDiarizationSegment[] processWithCallback(long ptr, float[] samples, OfflineSpeakerDiarizationCallback callback, long arg);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationCallback.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

@FunctionalInterface
public interface OfflineSpeakerDiarizationCallback {
    Integer invoke(int numProcessedChunks, int numTotalCunks, long arg);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationConfig.java
================================================
package com.k2fsa.sherpa.onnx;

public class OfflineSpeakerDiarizationConfig {
    private final OfflineSpeakerSegmentationModelConfig segmentation;
    private final SpeakerEmbeddingExtractorConfig embedding;
    private final FastClusteringConfig clustering;
    private final float minDurationOn;
    private final float minDurationOff;

    private OfflineSpeakerDiarizationConfig(Builder builder) {
        this.segmentation = builder.segmentation;
        this.embedding = builder.embedding;
        this.clustering = builder.clustering;
        this.minDurationOff = builder.minDurationOff;
        this.minDurationOn = builder.minDurationOn;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OfflineSpeakerSegmentationModelConfig getSegmentation() {
        return segmentation;
    }

    public SpeakerEmbeddingExtractorConfig getEmbedding() {
        return embedding;
    }

    public FastClusteringConfig getClustering() {
        return clustering;
    }

    public float getMinDurationOff() {
        return minDurationOff;
    }

    public float getMinDurationOn() {
        return minDurationOn;
    }

    public static class Builder {
        private OfflineSpeakerSegmentationModelConfig segmentation = OfflineSpeakerSegmentationModelConfig.builder().build();
        private SpeakerEmbeddingExtractorConfig embedding = SpeakerEmbeddingExtractorConfig.builder().build();
        private FastClusteringConfig clustering = FastClusteringConfig.builder().build();
        private float minDurationOn = 0.2f;
        private float minDurationOff = 0.5f;

        public OfflineSpeakerDiarizationConfig build() {
            return new OfflineSpeakerDiarizationConfig(this);
        }

        public Builder setSegmentation(OfflineSpeakerSegmentationModelConfig segmentation) {
            this.segmentation = segmentation;
            return this;
        }

        public Builder setEmbedding(SpeakerEmbeddingExtractorConfig embedding) {
            this.embedding = embedding;
            return this;
        }

        public Builder setClustering(FastClusteringConfig clustering) {
            this.clustering = clustering;
            return this;
        }

        public Builder setMinDurationOff(float minDurationOff) {
            this.minDurationOff = minDurationOff;
            return this;
        }

        public Builder setMinDurationOn(float minDurationOn) {
            this.minDurationOn = minDurationOn;
            return this;
        }
    }

}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineSpeakerDiarizationSegment {
    private final float start;
    private final float end;
    private final int speaker;

    public OfflineSpeakerDiarizationSegment(float start, float end, int speaker) {
        this.start = start;
        this.end = end;
        this.speaker = speaker;
    }

    public float getStart() {
        return start;
    }

    public float getEnd() {
        return end;
    }

    public int getSpeaker() {
        return speaker;
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineSpeakerSegmentationModelConfig {
    private final OfflineSpeakerSegmentationPyannoteModelConfig pyannote;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private OfflineSpeakerSegmentationModelConfig(Builder builder) {
        this.pyannote = builder.pyannote;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private OfflineSpeakerSegmentationPyannoteModelConfig pyannote = OfflineSpeakerSegmentationPyannoteModelConfig.builder().build();
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public OfflineSpeakerSegmentationModelConfig build() {
            return new OfflineSpeakerSegmentationModelConfig(this);
        }

        public Builder setPyannote(OfflineSpeakerSegmentationPyannoteModelConfig pyannote) {
            this.pyannote = pyannote;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}

================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineSpeakerSegmentationPyannoteModelConfig {
    private final String model;

    private OfflineSpeakerSegmentationPyannoteModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineSpeakerSegmentationPyannoteModelConfig build() {
            return new OfflineSpeakerSegmentationPyannoteModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeechDenoiser.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineSpeechDenoiser {
    private long ptr = 0;

    public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid OfflineSpeechDenoiserConfig: failed to create native OfflineSpeechDenoiser");
        }
    }

    public int getSampleRate() {
        return getSampleRate(ptr);
    }

    public DenoisedAudio run(float[] samples, int sampleRate) {
        return run(ptr, samples, sampleRate);
    }

    protected void finalize() throws Throwable {
        release();
    }

    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native void delete(long ptr);

    private native int getSampleRate(long ptr);

    private native DenoisedAudio run(long ptr, float[] samples, int sampleRate);

    private native long newFromFile(OfflineSpeechDenoiserConfig config);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeechDenoiserConfig.java
================================================
// Copyright 2025 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OfflineSpeechDenoiserConfig {
    private final OfflineSpeechDenoiserModelConfig model;

    private OfflineSpeechDenoiserConfig(OfflineSpeechDenoiserConfig.Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private OfflineSpeechDenoiserModelConfig model = OfflineSpeechDenoiserModelConfig.builder().build();

        public OfflineSpeechDenoiserConfig build() {
            return new OfflineSpeechDenoiserConfig(this);
        }

        public Builder setModel(OfflineSpeechDenoiserModelConfig model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeechDenoiserDpdfNetModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OfflineSpeechDenoiserDpdfNetModelConfig {
    private final String model;

    private OfflineSpeechDenoiserDpdfNetModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineSpeechDenoiserDpdfNetModelConfig build() {
            return new OfflineSpeechDenoiserDpdfNetModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeechDenoiserGtcrnModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OfflineSpeechDenoiserGtcrnModelConfig {
    private final String model;

    private OfflineSpeechDenoiserGtcrnModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineSpeechDenoiserGtcrnModelConfig build() {
            return new OfflineSpeechDenoiserGtcrnModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineSpeechDenoiserModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OfflineSpeechDenoiserModelConfig {
    private final OfflineSpeechDenoiserGtcrnModelConfig gtcrn;
    private final OfflineSpeechDenoiserDpdfNetModelConfig dpdfnet;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private OfflineSpeechDenoiserModelConfig(Builder builder) {
        this.gtcrn = builder.gtcrn;
        this.dpdfnet = builder.dpdfnet;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private OfflineSpeechDenoiserGtcrnModelConfig gtcrn = OfflineSpeechDenoiserGtcrnModelConfig.builder().build();
        private OfflineSpeechDenoiserDpdfNetModelConfig dpdfnet = OfflineSpeechDenoiserDpdfNetModelConfig.builder().build();
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public OfflineSpeechDenoiserModelConfig build() {
            return new OfflineSpeechDenoiserModelConfig(this);
        }

        public Builder setGtcrn(OfflineSpeechDenoiserGtcrnModelConfig gtcrn) {
            this.gtcrn = gtcrn;
            return this;
        }

        public Builder setDpdfnet(OfflineSpeechDenoiserDpdfNetModelConfig dpdfnet) {
            this.dpdfnet = dpdfnet;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineStream.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineStream {
    private long ptr = 0;

    public OfflineStream() {
        LibraryLoader.maybeLoad();
        this.ptr = 0;
    }

    public OfflineStream(long ptr) {
        this.ptr = ptr;
    }

    public long getPtr() {
        return ptr;
    }

    public void setPtr(long ptr) {
        this.ptr = ptr;
    }

    public void acceptWaveform(float[] samples, int sampleRate) {
        acceptWaveform(this.ptr, samples, sampleRate);
    }

    public void setOption(String key, String value) {
        setOption(this.ptr, key, value);
    }

    public String getOption(String key) {
        return getOption(this.ptr, key);
    }

    public boolean hasOption(String key) {
        return hasOption(this.ptr, key);
    }

    public void release() {
        // stream object must be release after used
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    @Override
    protected void finalize() throws Throwable {
        release();
        super.finalize();
    }

    private native void acceptWaveform(long ptr, float[] samples, int sampleRate);

    private native void setOption(long ptr, String key, String value);

    private native String getOption(long ptr, String key);

    private native boolean hasOption(long ptr, String key);

    private native void delete(long ptr);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTransducerModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineTransducerModelConfig {
    private final String encoder;
    private final String decoder;
    private final String joiner;

    private OfflineTransducerModelConfig(Builder builder) {
        this.encoder = builder.encoder;
        this.decoder = builder.decoder;
        this.joiner = builder.joiner;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public String getJoiner() {
        return joiner;
    }

    public static class Builder {
        private String encoder = "";
        private String decoder = "";
        private String joiner = "";

        public OfflineTransducerModelConfig build() {
            return new OfflineTransducerModelConfig(this);
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }

        public Builder setJoiner(String joiner) {
            this.joiner = joiner;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTts.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

import java.util.function.Consumer;

public class OfflineTts {
    private long ptr = 0;

    public OfflineTts(OfflineTtsConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid OfflineTtsConfig: failed to create native OfflineTts");
        }
    }

    /** Returns the sample rate of the TTS engine. */
    public int getSampleRate() {
        return getSampleRate(ptr);
    }

    public int getNumSpeakers() {
        return getNumSpeakers(ptr);
    }

    /** Generates audio for the given text using the default speaker (sid=0) and speed=1.0. */
    public GeneratedAudio generate(String text) {
        return generate(text, 0, 1.0f);
    }

    /** Generates audio for the given text using a specific speaker ID. */
    public GeneratedAudio generate(String text, int sid) {
        return generate(text, sid, 1.0f);
    }

    /** Generates audio for the given text using a specific speaker ID and speed multiplier. */
    public GeneratedAudio generate(String text, int sid, float speed) {
        return generateImpl(ptr, text, sid, speed);
    }

    public GeneratedAudio generateWithCallback(String text, OfflineTtsCallback callback) {
        return generateWithCallback(text, 0, 1.0f, callback);
    }

    public GeneratedAudio generateWithCallback(
        String text,
        Consumer<float[]> consumer
    ) {
        return generateWithCallback(text, 0, 1.0f, consumer);
    }

    public GeneratedAudio generateWithCallback(String text, int sid, OfflineTtsCallback callback) {
        return generateWithCallback(text, sid, 1.0f, callback);
    }

    public GeneratedAudio generateWithCallback(
        String text,
        int sid,
        Consumer<float[]> consumer
    ) {

        return generateWithCallback(text, sid, 1.0f, consumer);
    }

    public GeneratedAudio generateWithCallback(String text, int sid, float speed, OfflineTtsCallback callback) {
        return generateWithCallbackImpl(ptr, text, sid, speed, callback);
    }

    public GeneratedAudio generateWithCallback(
            String text,
            int sid,
            float speed,
            Consumer<float[]> consumer
    ) {
        OfflineTtsCallback cb = samples -> {
            consumer.accept(samples);
            return 1;
        };
        return generateWithCallback(text, sid, speed, cb);
    }

    /**
     * Generate audio using a GenerationConfig and a callback.
     *
     * @param text The text to synthesize.
     * @param config The generation configuration.
     * @param callback Callback to receive intermediate audio chunks.
     * @return GeneratedAudio with samples and sample rate.
     */
    public GeneratedAudio generateWithConfigAndCallback(
            String text,
            GenerationConfig config,
            OfflineTtsCallback callback
    ) {
        return generateWithConfigImpl(ptr, text, config, callback);
    }


    public GeneratedAudio generateWithConfigAndCallback(
            String text,
            GenerationConfig config,
            Consumer<float[]> consumer
    ) {
        OfflineTtsCallback cb = samples -> {
            consumer.accept(samples);
            return 1;
        };
        return generateWithConfigAndCallback(text, config, cb);
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native void delete(long ptr);

    private native int getSampleRate(long ptr);

    private native int getNumSpeakers(long ptr);

    private native GeneratedAudio generateImpl(long ptr, String text, int sid, float speed);

    private native GeneratedAudio generateWithCallbackImpl(long ptr, String text, int sid, float speed, OfflineTtsCallback callback);

    private native GeneratedAudio generateWithConfigImpl(
            long ptr,
            String text,
            GenerationConfig config,
            OfflineTtsCallback callback
    );

    private native long newFromFile(OfflineTtsConfig config);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

@FunctionalInterface
public interface OfflineTtsCallback {
    /**
     * @param samples audio chunk
     * @return 1 to continue, 0 to stop
     */
    Integer invoke(float[] samples);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineTtsConfig {
    private final OfflineTtsModelConfig model;
    private final String ruleFsts;
    private final String ruleFars;
    private final int maxNumSentences;
    private final float silenceScale;

    private OfflineTtsConfig(Builder builder) {
        this.model = builder.model;
        this.ruleFsts = builder.ruleFsts;
        this.ruleFars = builder.ruleFars;
        this.maxNumSentences = builder.maxNumSentences;
        this.silenceScale = builder.silenceScale;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OfflineTtsModelConfig getModel() {
        return model;
    }

    public String getRuleFsts() {
        return ruleFsts;
    }

    public String getRuleFars() {
        return ruleFars;
    }

    public int getMaxNumSentences() {
        return maxNumSentences;
    }

    public float getSilenceScale() {
        return silenceScale;
    }

    public static class Builder {
        private OfflineTtsModelConfig model = OfflineTtsModelConfig.builder().build();
        private String ruleFsts = "";
        private String ruleFars = "";
        private int maxNumSentences = 1;
        private float silenceScale = 0.2f;

        public OfflineTtsConfig build() {
            return new OfflineTtsConfig(this);
        }

        public Builder setModel(OfflineTtsModelConfig model) {
            this.model = model;
            return this;
        }

        public Builder setRuleFsts(String ruleFsts) {
            this.ruleFsts = ruleFsts;
            return this;
        }

        public Builder setRuleFars(String ruleFars) {
            this.ruleFars = ruleFars;
            return this;
        }

        public Builder setMaxNumSentences(int maxNumSentences) {
            this.maxNumSentences = maxNumSentences;
            return this;
        }

        public Builder setSilenceScale(float silenceScale) {
            this.silenceScale = silenceScale;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsKittenModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OfflineTtsKittenModelConfig {
    private final String model;
    private final String voices;
    private final String tokens;
    private final String dataDir;
    private final float lengthScale;

    private OfflineTtsKittenModelConfig(Builder builder) {
        this.model = builder.model;
        this.voices = builder.voices;
        this.tokens = builder.tokens;
        this.dataDir = builder.dataDir;
        this.lengthScale = builder.lengthScale;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public String getVoices() {
        return voices;
    }

    public String getTokens() {
        return tokens;
    }

    public String getDataDir() {
        return dataDir;
    }

    public float getLengthScale() {
        return lengthScale;
    }


    public static class Builder {
        private String model = "";
        private String voices = "";
        private String tokens = "";
        private String dataDir = "";
        private float lengthScale = 1.0f;

        public OfflineTtsKittenModelConfig build() {
            return new OfflineTtsKittenModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setVoices(String voices) {
            this.voices = voices;
            return this;
        }

        public Builder setTokens(String tokens) {
            this.tokens = tokens;
            return this;
        }

        public Builder setDataDir(String dataDir) {
            this.dataDir = dataDir;
            return this;
        }

        public Builder setLengthScale(float lengthScale) {
            this.lengthScale = lengthScale;
            return this;
        }
    }
}

================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OfflineTtsKokoroModelConfig {
    private final String model;
    private final String voices;
    private final String tokens;
    private final String lexicon;
    private final String lang;
    private final String dataDir;
    private final String dictDir;  // unused
    private final float lengthScale;

    private OfflineTtsKokoroModelConfig(Builder builder) {
        this.model = builder.model;
        this.voices = builder.voices;
        this.tokens = builder.tokens;
        this.lexicon = builder.lexicon;
        this.lang = builder.lang;
        this.dataDir = builder.dataDir;
        this.dictDir = builder.dictDir;
        this.lengthScale = builder.lengthScale;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public String getVoices() {
        return voices;
    }

    public String getTokens() {
        return tokens;
    }

    public String getDataDir() {
        return dataDir;
    }

    public float getLengthScale() {
        return lengthScale;
    }


    public static class Builder {
        private String model = "";
        private String voices = "";
        private String tokens = "";
        private String lexicon = "";
        private String lang = "";
        private String dataDir = "";
        private String dictDir = "";
        private float lengthScale = 1.0f;

        public OfflineTtsKokoroModelConfig build() {
            return new OfflineTtsKokoroModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setVoices(String voices) {
            this.voices = voices;
            return this;
        }

        public Builder setTokens(String tokens) {
            this.tokens = tokens;
            return this;
        }

        public Builder setLexicon(String lexicon) {
            this.lexicon = lexicon;
            return this;
        }

        public Builder setLang(String lang) {
            this.lang = lang;
            return this;
        }

        public Builder setDataDir(String dataDir) {
            this.dataDir = dataDir;
            return this;
        }

        public Builder setDictDir(String dictDir) {
            this.dictDir = dictDir;
            return this;
        }

        public Builder setLengthScale(float lengthScale) {
            this.lengthScale = lengthScale;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsMatchaModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineTtsMatchaModelConfig {
    private final String acousticModel;
    private final String vocoder;
    private final String lexicon;
    private final String tokens;
    private final String dataDir;
    private final String dictDir;  // unused
    private final float noiseScale;
    private final float lengthScale;

    private OfflineTtsMatchaModelConfig(Builder builder) {
        this.acousticModel = builder.acousticModel;
        this.vocoder = builder.vocoder;
        this.lexicon = builder.lexicon;
        this.tokens = builder.tokens;
        this.dataDir = builder.dataDir;
        this.dictDir = builder.dictDir;
        this.noiseScale = builder.noiseScale;
        this.lengthScale = builder.lengthScale;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getAcousticModel() {
        return acousticModel;
    }

    public String getVocoder() {
        return vocoder;
    }

    public String getLexicon() {
        return lexicon;
    }

    public String getTokens() {
        return tokens;
    }

    public String getDataDir() {
        return dataDir;
    }

    public String getDictDir() {
        return dictDir;
    }

    public float getLengthScale() {
        return lengthScale;
    }

    public float getNoiseScale() {
        return noiseScale;
    }

    public static class Builder {
        private String acousticModel = "";
        private String vocoder = "";
        private String lexicon = "";
        private String tokens = "";
        private String dataDir = "";
        private String dictDir = "";
        private float noiseScale = 1.0f;
        private float lengthScale = 1.0f;

        public OfflineTtsMatchaModelConfig build() {
            return new OfflineTtsMatchaModelConfig(this);
        }

        public Builder setAcousticModel(String acousticModel) {
            this.acousticModel = acousticModel;
            return this;
        }

        public Builder setVocoder(String vocoder) {
            this.vocoder = vocoder;
            return this;
        }

        public Builder setTokens(String tokens) {
            this.tokens = tokens;
            return this;
        }

        public Builder setLexicon(String lexicon) {
            this.lexicon = lexicon;
            return this;
        }

        public Builder setDataDir(String dataDir) {
            this.dataDir = dataDir;
            return this;
        }

        public Builder setDictDir(String dictDir) {
            this.dictDir = dictDir;
            return this;
        }

        public Builder setNoiseScale(float noiseScale) {
            this.noiseScale = noiseScale;
            return this;
        }

        public Builder setLengthScale(float lengthScale) {
            this.lengthScale = lengthScale;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineTtsModelConfig {
    private final OfflineTtsVitsModelConfig vits;
    private final OfflineTtsMatchaModelConfig matcha;
    private final OfflineTtsKokoroModelConfig kokoro;
    private final OfflineTtsZipVoiceModelConfig zipvoice;
    private final OfflineTtsKittenModelConfig kitten;
    private final OfflineTtsPocketModelConfig pocket;
    private final OfflineTtsSupertonicModelConfig supertonic;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private OfflineTtsModelConfig(Builder builder) {
        this.vits = builder.vits;
        this.matcha = builder.matcha;
        this.kokoro = builder.kokoro;
        this.zipvoice = builder.zipvoice;
        this.kitten = builder.kitten;
        this.pocket = builder.pocket;
        this.supertonic = builder.supertonic;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OfflineTtsVitsModelConfig getVits() {
        return vits;
    }

    public OfflineTtsMatchaModelConfig getMatcha() {
        return matcha;
    }

    public OfflineTtsKokoroModelConfig getKokoro() {
        return kokoro;
    }

    public OfflineTtsZipVoiceModelConfig getZipvoice() {
        return zipvoice;
    }

    public OfflineTtsKittenModelConfig getKitten() {
        return kitten;
    }

    public OfflineTtsPocketModelConfig getPocket() {
        return pocket;
    }

    public OfflineTtsSupertonicModelConfig getSupertonic() {
        return supertonic;
    }

    public static class Builder {
        private OfflineTtsVitsModelConfig vits = OfflineTtsVitsModelConfig.builder().build();
        private OfflineTtsMatchaModelConfig matcha = OfflineTtsMatchaModelConfig.builder().build();
        private OfflineTtsKokoroModelConfig kokoro = OfflineTtsKokoroModelConfig.builder().build();
        private OfflineTtsZipVoiceModelConfig zipvoice = OfflineTtsZipVoiceModelConfig.builder().build();
        private OfflineTtsKittenModelConfig kitten = OfflineTtsKittenModelConfig.builder().build();
        private OfflineTtsPocketModelConfig pocket = OfflineTtsPocketModelConfig.builder().build();
        private OfflineTtsSupertonicModelConfig supertonic = OfflineTtsSupertonicModelConfig.builder().build();
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public OfflineTtsModelConfig build() {
            return new OfflineTtsModelConfig(this);
        }

        public Builder setVits(OfflineTtsVitsModelConfig vits) {
            this.vits = vits;
            return this;
        }

        public Builder setMatcha(OfflineTtsMatchaModelConfig matcha) {
            this.matcha = matcha;
            return this;
        }

        public Builder setKokoro(OfflineTtsKokoroModelConfig kokoro) {
            this.kokoro = kokoro;
            return this;
        }

        public Builder setZipvoice(OfflineTtsZipVoiceModelConfig zipvoice) {
            this.zipvoice = zipvoice;
            return this;
        }

        public Builder setKitten(OfflineTtsKittenModelConfig kitten) {
            this.kitten = kitten;
            return this;
        }

        public Builder setPocket(OfflineTtsPocketModelConfig pocket) {
            this.pocket = pocket;
            return this;
        }

        public Builder setSupertonic(OfflineTtsSupertonicModelConfig supertonic) {
            this.supertonic = supertonic;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsPocketModelConfig.java
================================================
// Copyright 2026 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OfflineTtsPocketModelConfig {
    private final String lmFlow;
    private final String lmMain;
    private final String encoder;
    private final String decoder;
    private final String textConditioner;
    private final String vocabJson;
    private final String tokenScoresJson;
    private final int voiceEmbeddingCacheCapacity;

    private OfflineTtsPocketModelConfig(Builder builder) {
        this.lmFlow = builder.lmFlow;
        this.lmMain = builder.lmMain;
        this.encoder = builder.encoder;
        this.decoder = builder.decoder;
        this.textConditioner = builder.textConditioner;
        this.vocabJson = builder.vocabJson;
        this.tokenScoresJson = builder.tokenScoresJson;
        this.voiceEmbeddingCacheCapacity = builder.voiceEmbeddingCacheCapacity;
    }

    public String getLmFlow() {
        return lmFlow;
    }

    public String getLmMain() {
        return lmMain;
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public String getTextConditioner() {
        return textConditioner;
    }

    public String getVocabJson() {
        return vocabJson;
    }

    public String getTokenScoresJson() {
        return tokenScoresJson;
    }

    public int getVoiceEmbeddingCacheCapacity() {
        return voiceEmbeddingCacheCapacity;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private String lmFlow = "";
        private String lmMain = "";
        private String encoder = "";
        private String decoder = "";
        private String textConditioner = "";
        private String vocabJson = "";
        private String tokenScoresJson = "";
        private int voiceEmbeddingCacheCapacity = 50;

        public OfflineTtsPocketModelConfig build() {
            return new OfflineTtsPocketModelConfig(this);
        }

        public Builder setLmFlow(String lmFlow) {
            this.lmFlow = lmFlow;
            return this;
        }

        public Builder setLmMain(String lmMain) {
            this.lmMain = lmMain;
            return this;
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }

        public Builder setTextConditioner(String textConditioner) {
            this.textConditioner = textConditioner;
            return this;
        }

        public Builder setVocabJson(String vocabJson) {
            this.vocabJson = vocabJson;
            return this;
        }

        public Builder setTokenScoresJson(String tokenScoresJson) {
            this.tokenScoresJson = tokenScoresJson;
            return this;
        }

        public Builder setVoiceEmbeddingCacheCapacity(int voiceEmbeddingCacheCapacity) {
            this.voiceEmbeddingCacheCapacity = voiceEmbeddingCacheCapacity;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsSupertonicModelConfig.java
================================================
// Copyright 2026 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OfflineTtsSupertonicModelConfig {
    private final String durationPredictor;
    private final String textEncoder;
    private final String vectorEstimator;
    private final String vocoder;
    private final String ttsJson;
    private final String unicodeIndexer;
    private final String voiceStyle;

    private OfflineTtsSupertonicModelConfig(Builder builder) {
        this.durationPredictor = builder.durationPredictor;
        this.textEncoder = builder.textEncoder;
        this.vectorEstimator = builder.vectorEstimator;
        this.vocoder = builder.vocoder;
        this.ttsJson = builder.ttsJson;
        this.unicodeIndexer = builder.unicodeIndexer;
        this.voiceStyle = builder.voiceStyle;
    }

    public String getDurationPredictor() {
        return durationPredictor;
    }

    public String getTextEncoder() {
        return textEncoder;
    }

    public String getVectorEstimator() {
        return vectorEstimator;
    }

    public String getVocoder() {
        return vocoder;
    }

    public String getTtsJson() {
        return ttsJson;
    }

    public String getUnicodeIndexer() {
        return unicodeIndexer;
    }

    public String getVoiceStyle() {
        return voiceStyle;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private String durationPredictor = "";
        private String textEncoder = "";
        private String vectorEstimator = "";
        private String vocoder = "";
        private String ttsJson = "";
        private String unicodeIndexer = "";
        private String voiceStyle = "";

        public OfflineTtsSupertonicModelConfig build() {
            return new OfflineTtsSupertonicModelConfig(this);
        }

        public Builder setDurationPredictor(String durationPredictor) {
            this.durationPredictor = durationPredictor;
            return this;
        }

        public Builder setTextEncoder(String textEncoder) {
            this.textEncoder = textEncoder;
            return this;
        }

        public Builder setVectorEstimator(String vectorEstimator) {
            this.vectorEstimator = vectorEstimator;
            return this;
        }

        public Builder setVocoder(String vocoder) {
            this.vocoder = vocoder;
            return this;
        }

        public Builder setTtsJson(String ttsJson) {
            this.ttsJson = ttsJson;
            return this;
        }

        public Builder setUnicodeIndexer(String unicodeIndexer) {
            this.unicodeIndexer = unicodeIndexer;
            return this;
        }

        public Builder setVoiceStyle(String voiceStyle) {
            this.voiceStyle = voiceStyle;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineTtsVitsModelConfig {
    private final String model;
    private final String lexicon;
    private final String tokens;
    private final String dataDir;
    private final String dictDir;  // unused
    private final float noiseScale;
    private final float noiseScaleW;
    private final float lengthScale;

    private OfflineTtsVitsModelConfig(Builder builder) {
        this.model = builder.model;
        this.lexicon = builder.lexicon;
        this.tokens = builder.tokens;
        this.dataDir = builder.dataDir;
        this.dictDir = builder.dictDir;
        this.noiseScale = builder.noiseScale;
        this.noiseScaleW = builder.noiseScaleW;
        this.lengthScale = builder.lengthScale;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public String getLexicon() {
        return lexicon;
    }

    public String getTokens() {
        return tokens;
    }

    public String getDataDir() {
        return dataDir;
    }

    public String getDictDir() {
        return dictDir;
    }

    public float getLengthScale() {
        return lengthScale;
    }

    public float getNoiseScale() {
        return noiseScale;
    }

    public float getNoiseScaleW() {
        return noiseScaleW;
    }

    public static class Builder {
        private String model = "";
        private String lexicon = "";
        private String tokens = "";
        private String dataDir = "";
        private String dictDir = "";
        private float noiseScale = 0.667f;
        private float noiseScaleW = 0.8f;
        private float lengthScale = 1.0f;

        public OfflineTtsVitsModelConfig build() {
            return new OfflineTtsVitsModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setTokens(String tokens) {
            this.tokens = tokens;
            return this;
        }

        public Builder setLexicon(String lexicon) {
            this.lexicon = lexicon;
            return this;
        }

        public Builder setDataDir(String dataDir) {
            this.dataDir = dataDir;
            return this;
        }

        public Builder setDictDir(String dictDir) {
            this.dictDir = dictDir;
            return this;
        }

        public Builder setNoiseScale(float noiseScale) {
            this.noiseScale = noiseScale;
            return this;
        }

        public Builder setNoiseScaleW(float noiseScaleW) {
            this.noiseScaleW = noiseScaleW;
            return this;
        }

        public Builder setLengthScale(float lengthScale) {
            this.lengthScale = lengthScale;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineTtsZipVoiceModelConfig.java
================================================
// Copyright 2026 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineTtsZipVoiceModelConfig {
    private final String tokens;
    private final String encoder;
    private final String decoder;
    private final String vocoder;
    private final String dataDir;
    private final String lexicon;
    private final float featScale;
    private final float tShift;
    private final float targetRms;
    private final float guidanceScale;

    private OfflineTtsZipVoiceModelConfig(Builder builder) {
        this.tokens = builder.tokens;
        this.encoder = builder.encoder;
        this.decoder = builder.decoder;
        this.vocoder = builder.vocoder;
        this.dataDir = builder.dataDir;
        this.lexicon = builder.lexicon;
        this.featScale = builder.featScale;
        this.tShift = builder.tShift;
        this.targetRms = builder.targetRms;
        this.guidanceScale = builder.guidanceScale;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getTokens() {
        return tokens;
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public String getVocoder() {
        return vocoder;
    }

    public String getDataDir() {
        return dataDir;
    }

    public String getLexicon() {
        return lexicon;
    }

    public float getFeatScale() {
        return featScale;
    }

    public float getTShift() {
        return tShift;
    }

    public float getTargetRms() {
        return targetRms;
    }

    public float getGuidanceScale() {
        return guidanceScale;
    }

    public static class Builder {
        private String tokens = "";
        private String encoder = "";
        private String decoder = "";
        private String vocoder = "";
        private String dataDir = "";
        private String lexicon = "";
        private float featScale = 0.1f;
        private float tShift = 0.5f;
        private float targetRms = 0.1f;
        private float guidanceScale = 1.0f;

        public OfflineTtsZipVoiceModelConfig build() {
            return new OfflineTtsZipVoiceModelConfig(this);
        }

        public Builder setTokens(String tokens) {
            this.tokens = tokens;
            return this;
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }

        public Builder setVocoder(String vocoder) {
            this.vocoder = vocoder;
            return this;
        }

        public Builder setDataDir(String dataDir) {
            this.dataDir = dataDir;
            return this;
        }

        public Builder setLexicon(String lexicon) {
            this.lexicon = lexicon;
            return this;
        }

        public Builder setFeatScale(float featScale) {
            this.featScale = featScale;
            return this;
        }

        public Builder setTShift(float tShift) {
            this.tShift = tShift;
            return this;
        }

        public Builder setTargetRms(float targetRms) {
            this.targetRms = targetRms;
            return this;
        }

        public Builder setGuidanceScale(float guidanceScale) {
            this.guidanceScale = guidanceScale;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineWenetCtcModelConfig.java
================================================
package com.k2fsa.sherpa.onnx;

public class OfflineWenetCtcModelConfig {
    private final String model;

    private OfflineWenetCtcModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineWenetCtcModelConfig build() {
            return new OfflineWenetCtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineWhisperModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineWhisperModelConfig {
    private final String encoder;
    private final String decoder;
    private final String language;
    private final String task;
    private final int tailPaddings;
    private final boolean enableTokenTimestamps;
    private final boolean enableSegmentTimestamps;

    private OfflineWhisperModelConfig(Builder builder) {
        this.encoder = builder.encoder;
        this.decoder = builder.decoder;
        this.language = builder.language;
        this.task = builder.task;
        this.tailPaddings = builder.tailPaddings;
        this.enableTokenTimestamps = builder.enableTokenTimestamps;
        this.enableSegmentTimestamps = builder.enableSegmentTimestamps;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public String getLanguage() {
        return language;
    }

    public String getTask() {
        return task;
    }

    public int getTailPaddings() {
        return tailPaddings;
    }

    public boolean getEnableTokenTimestamps() {
        return enableTokenTimestamps;
    }

    public boolean getEnableSegmentTimestamps() {
        return enableSegmentTimestamps;
    }

    public static class Builder {
        private String encoder = "";
        private String decoder = "";
        private String language = "en"; // used only with multilingual models
        private String task = "transcribe"; // used only with multilingual models

        private int tailPaddings = 1000; // number of frames to pad
        private boolean enableTokenTimestamps = false;
        private boolean enableSegmentTimestamps = false;

        public OfflineWhisperModelConfig build() {
            return new OfflineWhisperModelConfig(this);
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }

        public Builder setLanguage(String language) {
            this.language = language;
            return this;
        }

        public Builder setTask(String task) {
            this.task = task;
            return this;
        }

        public Builder setTailPaddings(int tailPaddings) {
            this.tailPaddings = tailPaddings;
            return this;
        }

        public Builder setEnableTokenTimestamps(boolean enableTokenTimestamps) {
            this.enableTokenTimestamps = enableTokenTimestamps;
            return this;
        }

        public Builder setEnableSegmentTimestamps(boolean enableSegmentTimestamps) {
            this.enableSegmentTimestamps = enableSegmentTimestamps;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineZipformerAudioTaggingModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineZipformerAudioTaggingModelConfig {
    private final String model;

    private OfflineZipformerAudioTaggingModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OfflineZipformerAudioTaggingModelConfig build() {
            return new OfflineZipformerAudioTaggingModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OfflineZipformerCtcModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OfflineZipformerCtcModelConfig {
    private final String model;
    private final QnnConfig qnnConfig;

    private OfflineZipformerCtcModelConfig(Builder builder) {
        this.model = builder.model;
        this.qnnConfig = builder.qnnConfig;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public QnnConfig getQnnConfig() {
        return qnnConfig;
    }

    public static class Builder {
        private String model = "";
        private QnnConfig qnnConfig = QnnConfig.builder().build();

        public OfflineZipformerCtcModelConfig build() {
            return new OfflineZipformerCtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setQnnConfig(QnnConfig qnnConfig) {
            this.qnnConfig = qnnConfig;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineCtcFstDecoderConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineCtcFstDecoderConfig {
    private final String graph;
    private final int maxActive;

    private OnlineCtcFstDecoderConfig(Builder builder) {
        this.graph = builder.graph;
        this.maxActive = builder.maxActive;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getGraph() {
        return graph;
    }

    public float getMaxActive() {
        return maxActive;
    }

    public static class Builder {
        private String graph = "";
        private int maxActive = 3000;

        public OnlineCtcFstDecoderConfig build() {
            return new OnlineCtcFstDecoderConfig(this);
        }

        public Builder setGraph(String graph) {
            this.graph = graph;
            return this;
        }

        public Builder setMaxActive(int maxActive) {
            this.maxActive = maxActive;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineLMConfig.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineLMConfig {

    private final String model;
    private final float scale;

    private OnlineLMConfig(Builder builder) {
        this.model = builder.model;
        this.scale = builder.scale;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public float getScale() {
        return scale;
    }

    public static class Builder {
        private String model = "";
        private float scale = 1.0f;

        public OnlineLMConfig build() {
            return new OnlineLMConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setScale(float scale) {
            this.scale = scale;
            return this;
        }
    }
}

================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineModelConfig.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineModelConfig {
    private final OnlineTransducerModelConfig transducer;
    private final OnlineParaformerModelConfig paraformer;
    private final OnlineZipformer2CtcModelConfig zipformer2Ctc;
    private final OnlineNeMoCtcModelConfig neMoCtc;
    private final OnlineToneCtcModelConfig toneCtc;
    private final String tokens;
    private final int numThreads;
    private final boolean debug;
    private final String provider;
    private final String modelType;
    private final String modelingUnit;
    private final String bpeVocab;

    private OnlineModelConfig(Builder builder) {
        this.transducer = builder.transducer;
        this.paraformer = builder.paraformer;
        this.zipformer2Ctc = builder.zipformer2Ctc;
        this.neMoCtc = builder.neMoCtc;
        this.toneCtc = builder.toneCtc;
        this.tokens = builder.tokens;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
        this.modelType = builder.modelType;
        this.modelingUnit = builder.modelingUnit;
        this.bpeVocab = builder.bpeVocab;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OnlineParaformerModelConfig getParaformer() {
        return paraformer;
    }

    public OnlineTransducerModelConfig getTransducer() {
        return transducer;
    }

    public OnlineZipformer2CtcModelConfig getZipformer2Ctc() {
        return zipformer2Ctc;
    }

    public OnlineNeMoCtcModelConfig getNeMoCtc() {
        return neMoCtc;
    }

    public OnlineToneCtcModelConfig getToneCtc() {
        return toneCtc;
    }

    public String getTokens() {
        return tokens;
    }

    public int getNumThreads() {
        return numThreads;
    }

    public boolean getDebug() {
        return debug;
    }

    public String getProvider() {
        return provider;
    }

    public String getModelType() {
        return modelType;
    }

    public String getModelingUnit() {
        return modelingUnit;
    }

    public String getBpeVocab() {
        return bpeVocab;
    }

    public static class Builder {
        private OnlineParaformerModelConfig paraformer = OnlineParaformerModelConfig.builder().build();
        private OnlineTransducerModelConfig transducer = OnlineTransducerModelConfig.builder().build();
        private OnlineZipformer2CtcModelConfig zipformer2Ctc = OnlineZipformer2CtcModelConfig.builder().build();
        private OnlineNeMoCtcModelConfig neMoCtc = OnlineNeMoCtcModelConfig.builder().build();
        private OnlineToneCtcModelConfig toneCtc = OnlineToneCtcModelConfig.builder().build();
        private String tokens = "";
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";
        private String modelType = "";
        private String modelingUnit = "cjkchar";
        private String bpeVocab = "";

        public OnlineModelConfig build() {
            return new OnlineModelConfig(this);
        }

        public Builder setTransducer(OnlineTransducerModelConfig transducer) {
            this.transducer = transducer;
            return this;
        }

        public Builder setParaformer(OnlineParaformerModelConfig paraformer) {
            this.paraformer = paraformer;
            return this;
        }

        public Builder setZipformer2Ctc(OnlineZipformer2CtcModelConfig zipformer2Ctc) {
            this.zipformer2Ctc = zipformer2Ctc;
            return this;
        }

        public Builder setNeMoCtc(OnlineNeMoCtcModelConfig neMoCtc) {
            this.neMoCtc = neMoCtc;
            return this;
        }

        public Builder setToneCtc(OnlineToneCtcModelConfig toneCtc) {
            this.toneCtc = toneCtc;
            return this;
        }

        public Builder setTokens(String tokens) {
            this.tokens = tokens;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }

        public Builder setModelType(String modelType) {
            this.modelType = modelType;
            return this;
        }

        public Builder setModelingUnit(String modelingUnit) {
            this.modelingUnit = modelingUnit;
            return this;
        }

        public Builder setBpeVocab(String bpeVocab) {
            this.bpeVocab = bpeVocab;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineNeMoCtcModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OnlineNeMoCtcModelConfig {
    private final String model;

    private OnlineNeMoCtcModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OnlineNeMoCtcModelConfig build() {
            return new OnlineNeMoCtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}

================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineParaformerModelConfig.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineParaformerModelConfig {
    private final String encoder;
    private final String decoder;

    private OnlineParaformerModelConfig(Builder builder) {
      this.encoder = builder.encoder;
      this.decoder = builder.decoder;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public static class Builder {
        private String encoder = "";
        private String decoder = "";

        public OnlineParaformerModelConfig build() {
            return new OnlineParaformerModelConfig(this);
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlinePunctuation.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlinePunctuation {
    private long ptr = 0;

    public OnlinePunctuation(OnlinePunctuationConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
    }

    public String addPunctuation(String text) {
        return addPunctuation(ptr, text);
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    // You'd better call it manually if it is not used anymore
    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native void delete(long ptr);

    private native long newFromFile(OnlinePunctuationConfig config);

    private native String addPunctuation(long ptr, String text);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlinePunctuationConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlinePunctuationConfig {
    private final OnlinePunctuationModelConfig model;

    private OnlinePunctuationConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OnlinePunctuationModelConfig getModel() {
        return model;
    }


    public static class Builder {
        private OnlinePunctuationModelConfig model = OnlinePunctuationModelConfig.builder().build();

        public OnlinePunctuationConfig build() {
            return new OnlinePunctuationConfig(this);
        }

        public Builder setModel(OnlinePunctuationModelConfig model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlinePunctuationModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlinePunctuationModelConfig {
    private final String cnnBilstm;
    private final String bpeVocab;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private OnlinePunctuationModelConfig(Builder builder) {
        this.cnnBilstm = builder.cnnBilstm;
        this.bpeVocab = builder.bpeVocab;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getCnnBilstm() {
        return cnnBilstm;
    }

    public String getBpeVocab() {
        return bpeVocab;
    }

    public static class Builder {
        private String cnnBilstm = "";
        private String bpeVocab = "";
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public OnlinePunctuationModelConfig build() {
            return new OnlinePunctuationModelConfig(this);
        }

        public Builder setCnnBilstm(String cnnBilstm) {
            this.cnnBilstm = cnnBilstm;
            return this;
        }

        public Builder setBpeVocab(String bpeVocab) {
            this.bpeVocab = bpeVocab;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizer.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineRecognizer {
    private long ptr = 0;

    public OnlineRecognizer(OnlineRecognizerConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid OnlineRecognizerConfig: failed to create native OnlineRecognizer");
        }
    }

    public void decode(OnlineStream s) {
        decode(ptr, s.getPtr());
    }

    public void decode(OnlineStream[] ss) {
        if (ss == null || ss.length == 0) {
          throw new IllegalArgumentException("Stream array must be non-empty");
        }
        long[] streamPtrs = new long[ss.length];
        for (int i = 0; i < ss.length; ++i) {
            streamPtrs[i] = ss[i].getPtr();
        }
        decodeStreams(ptr, streamPtrs);
    }

    public boolean isReady(OnlineStream s) {
        return isReady(ptr, s.getPtr());
    }

    public boolean isEndpoint(OnlineStream s) {
        return isEndpoint(ptr, s.getPtr());
    }

    public void reset(OnlineStream s) {
        reset(ptr, s.getPtr());
    }

    public OnlineStream createStream() {
        long p = createStream(ptr, "");
        return new OnlineStream(p);
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    // You'd better call it manually if it is not used anymore
    protected void close()  {
      if (this.ptr == 0) {
        return;
      }
      delete(this.ptr);
      this.ptr = 0;
    }
    
    public void release() {
      this.close();
    }

    public OnlineRecognizerResult getResult(OnlineStream s) {
        return getResult(ptr, s.getPtr());
    }

    private native void delete(long ptr);

    private native long newFromFile(OnlineRecognizerConfig config);

    private native long createStream(long ptr, String hotwords);

    private native void reset(long ptr, long streamPtr);

    private native void decode(long ptr, long streamPtr);

    private native void decodeStreams(long ptr, long[] streamPtrs);

    private native boolean isEndpoint(long ptr, long streamPtr);

    private native boolean isReady(long ptr, long streamPtr);

    private native OnlineRecognizerResult getResult(long ptr, long streamPtr);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineRecognizerConfig {
    private final FeatureConfig featConfig;
    private final OnlineModelConfig modelConfig;
    private final OnlineLMConfig lmConfig;

    private final OnlineCtcFstDecoderConfig ctcFstDecoderConfig;
    private final EndpointConfig endpointConfig;
    private final HomophoneReplacerConfig hr;
    private final boolean enableEndpoint;
    private final String decodingMethod;
    private final int maxActivePaths;
    private final String hotwordsFile;
    private final float hotwordsScore;
    private final String ruleFsts;
    private final String ruleFars;
    private final float blankPenalty;

    private OnlineRecognizerConfig(Builder builder) {
        this.featConfig = builder.featConfig;
        this.modelConfig = builder.modelConfig;
        this.lmConfig = builder.lmConfig;
        this.ctcFstDecoderConfig = builder.ctcFstDecoderConfig;
        this.endpointConfig = builder.endpointConfig;
        this.hr = builder.hr;
        this.enableEndpoint = builder.enableEndpoint;
        this.decodingMethod = builder.decodingMethod;
        this.maxActivePaths = builder.maxActivePaths;
        this.hotwordsFile = builder.hotwordsFile;
        this.hotwordsScore = builder.hotwordsScore;
        this.ruleFsts = builder.ruleFsts;
        this.ruleFars = builder.ruleFars;
        this.blankPenalty = builder.blankPenalty;
    }

    public static Builder builder() {
        return new Builder();
    }

    public OnlineModelConfig getModelConfig() {
        return modelConfig;
    }

    public static class Builder {
        private FeatureConfig featConfig = FeatureConfig.builder().build();
        private OnlineModelConfig modelConfig = OnlineModelConfig.builder().build();
        private OnlineLMConfig lmConfig = OnlineLMConfig.builder().build();
        private OnlineCtcFstDecoderConfig ctcFstDecoderConfig = OnlineCtcFstDecoderConfig.builder().build();
        private EndpointConfig endpointConfig = EndpointConfig.builder().build();
        private HomophoneReplacerConfig hr = HomophoneReplacerConfig.builder().build();
        private boolean enableEndpoint = true;
        private String decodingMethod = "greedy_search";
        private int maxActivePaths = 4;
        private String hotwordsFile = "";
        private float hotwordsScore = 1.5f;
        private String ruleFsts = "";
        private String ruleFars = "";
        private float blankPenalty = 0.0f;

        public OnlineRecognizerConfig build() {
          return new OnlineRecognizerConfig(this);
        }

        public Builder setFeatureConfig(FeatureConfig featConfig) {
            this.featConfig = featConfig;
            return this;
        }

        public Builder setOnlineModelConfig(OnlineModelConfig modelConfig) {
            this.modelConfig = modelConfig;
            return this;
        }

        public Builder setOnlineLMConfig(OnlineLMConfig lmConfig) {
            this.lmConfig = lmConfig;
            return this;
        }

        public Builder setCtcFstDecoderConfig(OnlineCtcFstDecoderConfig ctcFstDecoderConfig) {
            this.ctcFstDecoderConfig = ctcFstDecoderConfig;
            return this;
        }

        public Builder setEndpointConfig(EndpointConfig endpointConfig) {
            this.endpointConfig = endpointConfig;
            return this;
        }

        public Builder setHr(HomophoneReplacerConfig hr) {
            this.hr = hr;
            return this;
        }

        public Builder setEnableEndpoint(boolean enableEndpoint) {
            this.enableEndpoint = enableEndpoint;
            return this;
        }

        public Builder setDecodingMethod(String decodingMethod) {
            this.decodingMethod = decodingMethod;
            return this;
        }

        public Builder setMaxActivePaths(int maxActivePaths) {
            this.maxActivePaths = maxActivePaths;
            return this;
        }

        public Builder setHotwordsFile(String hotwordsFile) {
            this.hotwordsFile = hotwordsFile;
            return this;
        }

        public Builder setHotwordsScore(float hotwordsScore) {
            this.hotwordsScore = hotwordsScore;
            return this;
        }

        public Builder setRuleFsts(String ruleFsts) {
            this.ruleFsts = ruleFsts;
            return this;
        }

        public Builder setRuleFars(String ruleFars) {
            this.ruleFars = ruleFars;
            return this;
        }

        public Builder setBlankPenalty(float blankPenalty) {
            this.blankPenalty = blankPenalty;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineRecognizerResult.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineRecognizerResult {
    private final String text;
    private final String[] tokens;
    private final float[] timestamps;
    private final float[] ysProbs;

    public OnlineRecognizerResult(String text, String[] tokens, float[] timestamps, float[] ysProbs) {
        this.text = text;
        this.tokens = tokens;
        this.timestamps = timestamps;
        this.ysProbs = ysProbs;
    }

    public String getText() {
        return text;
    }

    public String[] getTokens() {
        return tokens;
    }

    public float[] getTimestamps() {
        return timestamps;
    }

    public float[] getYsProbs() {
        return ysProbs;
    }

}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineSpeechDenoiser.java
================================================
// Copyright 2026 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineSpeechDenoiser {
    private long ptr = 0;

    public OnlineSpeechDenoiser(OnlineSpeechDenoiserConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid OnlineSpeechDenoiserConfig: failed to create native OnlineSpeechDenoiser");
        }
    }

    public int getSampleRate() {
        return getSampleRate(ptr);
    }

    public int getFrameShiftInSamples() {
        return getFrameShiftInSamples(ptr);
    }

    public DenoisedAudio run(float[] samples, int sampleRate) {
        return run(ptr, samples, sampleRate);
    }

    public DenoisedAudio flush() {
        return flush(ptr);
    }

    public void reset() {
        reset(ptr);
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native void delete(long ptr);

    private native int getSampleRate(long ptr);

    private native int getFrameShiftInSamples(long ptr);

    private native DenoisedAudio run(long ptr, float[] samples, int sampleRate);

    private native DenoisedAudio flush(long ptr);

    private native void reset(long ptr);

    private native long newFromFile(OnlineSpeechDenoiserConfig config);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineSpeechDenoiserConfig.java
================================================
// Copyright 2026 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

public class OnlineSpeechDenoiserConfig {
    private final OfflineSpeechDenoiserModelConfig model;

    private OnlineSpeechDenoiserConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private OfflineSpeechDenoiserModelConfig model = OfflineSpeechDenoiserModelConfig.builder().build();

        public OnlineSpeechDenoiserConfig build() {
            return new OnlineSpeechDenoiserConfig(this);
        }

        public Builder setModel(OfflineSpeechDenoiserModelConfig model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineStream.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineStream {
    private long ptr = 0;

    public OnlineStream() {
        LibraryLoader.maybeLoad();
        this.ptr = 0;
    }

    public OnlineStream(long ptr) {
        this.ptr = ptr;
    }

    public long getPtr() {
        return ptr;
    }

    public void setPtr(long ptr) {
        this.ptr = ptr;
    }

    public void acceptWaveform(float[] samples, int sampleRate) {
        acceptWaveform(this.ptr, samples, sampleRate);
    }

    public void inputFinished() {
        inputFinished(this.ptr);
    }

    public void setOption(String key, String value) {
        setOption(this.ptr, key, value);
    }

    public String getOption(String key) {
        return getOption(this.ptr, key);
    }

    public boolean hasOption(String key) {
        return hasOption(this.ptr, key);
    }

    public void release() {
        close();
    }
    
    public void close() {
      // stream object must be release after used
      if (this.ptr == 0) {
          return;
      }
      delete(this.ptr);
      this.ptr = 0;
    }
    
    @Override
    protected void finalize() throws Throwable {
        close();
        super.finalize();
    }

    private native void acceptWaveform(long ptr, float[] samples, int sampleRate);

    private native void inputFinished(long ptr);

    private native void setOption(long ptr, String key, String value);

    private native String getOption(long ptr, String key);

    private native boolean hasOption(long ptr, String key);

    private native void delete(long ptr);
}

================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineToneCtcModelConfig.java
================================================
package com.k2fsa.sherpa.onnx;

public class OnlineToneCtcModelConfig {
    private final String model;

    private OnlineToneCtcModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OnlineToneCtcModelConfig build() {
            return new OnlineToneCtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineTransducerModelConfig.java
================================================
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineTransducerModelConfig {
    private final String encoder;
    private final String decoder;
    private final String joiner;

    private OnlineTransducerModelConfig(Builder builder) {
        this.encoder = builder.encoder;
        this.decoder = builder.decoder;
        this.joiner = builder.joiner;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public String getJoiner() {
        return joiner;
    }

    public static class Builder {
        private String encoder = "";
        private String decoder = "";
        private String joiner = "";

        public OnlineTransducerModelConfig build() {
          return new OnlineTransducerModelConfig(this);
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }

        public Builder setJoiner(String joiner) {
            this.joiner = joiner;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class OnlineZipformer2CtcModelConfig {
    private final String model;

    private OnlineZipformer2CtcModelConfig(Builder builder) {
        this.model = builder.model;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public static class Builder {
        private String model = "";

        public OnlineZipformer2CtcModelConfig build() {
            return new OnlineZipformer2CtcModelConfig(this);
        }

        public Builder setModel(String model) {
            this.model = model;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/QnnConfig.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class QnnConfig {
    private final String backendLib;
    private final String contextBinary;
    private final String systemLib;

    private QnnConfig(Builder builder) {
        this.backendLib = builder.backendLib;
        this.contextBinary = builder.contextBinary;
        this.systemLib = builder.systemLib;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getBackendLib() {
        return backendLib;
    }

    public String getContextBinary() {
        return contextBinary;
    }

    public String getSystemLib() {
        return systemLib;
    }

    public static class Builder {
        private String backendLib = "";
        private String contextBinary = "";
        private String systemLib = "";

        public QnnConfig build() {
            return new QnnConfig(this);
        }

        public Builder setBackendLib(String backendLib) {
            this.backendLib = backendLib;
            return this;
        }

        public Builder setContextBinary(String contextBinary) {
            this.contextBinary = contextBinary;
            return this;
        }

        public Builder setSystemLib(String systemLib) {
            this.systemLib = systemLib;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/SileroVadModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class SileroVadModelConfig {
    private final String model;
    private final float threshold;
    private final float minSilenceDuration;
    private final float minSpeechDuration;
    private final int windowSize;
    private final float maxSpeechDuration;

    private SileroVadModelConfig(Builder builder) {
        this.model = builder.model;
        this.threshold = builder.threshold;
        this.minSilenceDuration = builder.minSilenceDuration;
        this.minSpeechDuration = builder.minSpeechDuration;
        this.windowSize = builder.windowSize;
        this.maxSpeechDuration = builder.maxSpeechDuration;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public float getThreshold() {
        return threshold;
    }

    public float getMinSilenceDuration() {
        return minSilenceDuration;
    }

    public float getMinSpeechDuration() {
        return minSpeechDuration;
    }

    public int getWindowSize() {
        return windowSize;
    }

    public float getMaxSpeechDuration() {
        return maxSpeechDuration;
    }

    public static class Builder {
        private String model = "";
        private float threshold = 0.5f;
        private float minSilenceDuration = 0.25f;
        private float minSpeechDuration = 0.5f;
        private int windowSize = 512;
        private float maxSpeechDuration = 5.0f;

        public SileroVadModelConfig build() {
            return new SileroVadModelConfig(this);
        }


        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setThreshold(float threshold) {
            this.threshold = threshold;
            return this;
        }

        public Builder setMinSilenceDuration(float minSilenceDuration) {
            this.minSilenceDuration = minSilenceDuration;
            return this;
        }

        public Builder setMinSpeechDuration(float minSpeechDuration) {
            this.minSpeechDuration = minSpeechDuration;
            return this;
        }

        public Builder setWindowSize(int windowSize) {
            this.windowSize = windowSize;
            return this;
        }

        public Builder setMaxSpeechDuration(float maxSpeechDuration) {
            this.maxSpeechDuration = maxSpeechDuration;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractor.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class SpeakerEmbeddingExtractor {
    private long ptr = 0;

    public SpeakerEmbeddingExtractor(SpeakerEmbeddingExtractorConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid SpeakerEmbeddingExtractorConfig: failed to create native SpeakerEmbeddingExtractor");
        }
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    public OnlineStream createStream() {
        long p = createStream(ptr);
        return new OnlineStream(p);
    }

    public boolean isReady(OnlineStream s) {
        return isReady(ptr, s.getPtr());
    }

    public float[] compute(OnlineStream s) {
        return compute(ptr, s.getPtr());
    }

    public int getDim() {
        return dim(ptr);
    }

    private native void delete(long ptr);

    private native long newFromFile(SpeakerEmbeddingExtractorConfig config);

    private native long createStream(long ptr);

    private native boolean isReady(long ptr, long streamPtr);

    private native float[] compute(long ptr, long streamPtr);

    private native int dim(long ptr);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class SpeakerEmbeddingExtractorConfig {
    private final String model;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private SpeakerEmbeddingExtractorConfig(Builder builder) {
        this.model = builder.model;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public static class Builder {
        private String model = "";
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public SpeakerEmbeddingExtractorConfig build() {
            return new SpeakerEmbeddingExtractorConfig(this);
        }


        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/SpeakerEmbeddingManager.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class SpeakerEmbeddingManager {
    private long ptr = 0;

    public SpeakerEmbeddingManager(int dim) {
        LibraryLoader.maybeLoad();
        ptr = create(dim);
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    public boolean add(String name, float[] embedding) {
        return add(ptr, name, embedding);
    }

    public boolean add(String name, float[][] embedding) {
        return addList(ptr, name, embedding);
    }

    public boolean remove(String name) {
        return remove(ptr, name);
    }

    public String search(float[] embedding, float threshold) {
        return search(ptr, embedding, threshold);
    }

    public boolean verify(String name, float[] embedding, float threshold) {
        return verify(ptr, name, embedding, threshold);
    }

    public boolean contains(String name) {
        return contains(ptr, name);
    }

    public int getNumSpeakers() {
        return numSpeakers(ptr);
    }

    public String[] getAllSpeakerNames() {
        return allSpeakerNames(ptr);
    }

    private native long create(int dim);

    private native void delete(long ptr);

    private native boolean add(long ptr, String name, float[] embedding);

    private native boolean addList(long ptr, String name, float[][] embedding);

    private native boolean remove(long ptr, String name);

    private native String search(long ptr, float[] embedding, float threshold);

    private native boolean verify(long ptr, String name, float[] embedding, float threshold);

    private native boolean contains(long ptr, String name);

    private native int numSpeakers(long ptr);

    private native String[] allSpeakerNames(long ptr);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/SpeechSegment.java
================================================
package com.k2fsa.sherpa.onnx;

public class SpeechSegment {

    private final int start;
    private final float[] samples;

    public SpeechSegment(int start, float[] samples) {
        this.start = start;
        this.samples = samples;
    }

    public int getStart() {
        return start;
    }

    public float[] getSamples() {
        return samples;
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

public class SpokenLanguageIdentification {
    private final Map<String, String> localeMap;
    private long ptr = 0;

    public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid SpokenLanguageIdentificationConfig: failed to create native SpokenLanguageIdentification");
        }

        String[] languages = Locale.getISOLanguages();
        localeMap = new HashMap<String, String>(languages.length);
        for (String language : languages) {
            Locale locale = new Locale(language);
            localeMap.put(language, locale.getDisplayName());
        }
    }

    public String compute(OfflineStream stream) {
        String lang = compute(ptr, stream.getPtr());
        return localeMap.getOrDefault(lang, lang);
    }

    public OfflineStream createStream() {
        long p = createStream(ptr);
        return new OfflineStream(p);
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    // You'd better call it manually if it is not used anymore
    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    private native void delete(long ptr);

    private native long newFromFile(SpokenLanguageIdentificationConfig config);

    private native long createStream(long ptr);

    private native String compute(long ptr, long streamPtr);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class SpokenLanguageIdentificationConfig {
    private final SpokenLanguageIdentificationWhisperConfig whisper;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private SpokenLanguageIdentificationConfig(Builder builder) {
        this.whisper = builder.whisper;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public SpokenLanguageIdentificationWhisperConfig getWhisper() {
        return whisper;
    }

    public static class Builder {
        private SpokenLanguageIdentificationWhisperConfig whisper = SpokenLanguageIdentificationWhisperConfig.builder().build();
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public SpokenLanguageIdentificationConfig build() {
            return new SpokenLanguageIdentificationConfig(this);
        }

        public Builder setWhisper(SpokenLanguageIdentificationWhisperConfig whisper) {
            this.whisper = whisper;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationWhisperConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class SpokenLanguageIdentificationWhisperConfig {
    private final String encoder;
    private final String decoder;
    private final int tailPaddings;

    private SpokenLanguageIdentificationWhisperConfig(Builder builder) {
        this.encoder = builder.encoder;
        this.decoder = builder.decoder;
        this.tailPaddings = builder.tailPaddings;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getEncoder() {
        return encoder;
    }

    public String getDecoder() {
        return decoder;
    }

    public int getTailPaddings() {
        return tailPaddings;
    }

    public static class Builder {
        private String encoder = "";
        private String decoder = "";
        private int tailPaddings = 1000; // number of frames to pad

        public SpokenLanguageIdentificationWhisperConfig build() {
            return new SpokenLanguageIdentificationWhisperConfig(this);
        }

        public Builder setEncoder(String encoder) {
            this.encoder = encoder;
            return this;
        }

        public Builder setDecoder(String decoder) {
            this.decoder = decoder;
            return this;
        }

        public Builder setTailPaddings(int tailPaddings) {
            this.tailPaddings = tailPaddings;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/TenVadModelConfig.java
================================================
// Copyright 2025 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class TenVadModelConfig {
    private final String model;
    private final float threshold;
    private final float minSilenceDuration;
    private final float minSpeechDuration;
    private final int windowSize;
    private final float maxSpeechDuration;

    private TenVadModelConfig(Builder builder) {
        this.model = builder.model;
        this.threshold = builder.threshold;
        this.minSilenceDuration = builder.minSilenceDuration;
        this.minSpeechDuration = builder.minSpeechDuration;
        this.windowSize = builder.windowSize;
        this.maxSpeechDuration = builder.maxSpeechDuration;
    }

    public static Builder builder() {
        return new Builder();
    }

    public String getModel() {
        return model;
    }

    public float getThreshold() {
        return threshold;
    }

    public float getMinSilenceDuration() {
        return minSilenceDuration;
    }

    public float getMinSpeechDuration() {
        return minSpeechDuration;
    }

    public int getWindowSize() {
        return windowSize;
    }

    public float getMaxSpeechDuration() {
        return maxSpeechDuration;
    }

    public static class Builder {
        private String model = "";
        private float threshold = 0.5f;
        private float minSilenceDuration = 0.25f;
        private float minSpeechDuration = 0.25f;
        private int windowSize = 256;
        private float maxSpeechDuration = 5.0f;

        public TenVadModelConfig build() {
            return new TenVadModelConfig(this);
        }


        public Builder setModel(String model) {
            this.model = model;
            return this;
        }

        public Builder setThreshold(float threshold) {
            this.threshold = threshold;
            return this;
        }

        public Builder setMinSilenceDuration(float minSilenceDuration) {
            this.minSilenceDuration = minSilenceDuration;
            return this;
        }

        public Builder setMinSpeechDuration(float minSpeechDuration) {
            this.minSpeechDuration = minSpeechDuration;
            return this;
        }

        public Builder setWindowSize(int windowSize) {
            this.windowSize = windowSize;
            return this;
        }

        public Builder setMaxSpeechDuration(float maxSpeechDuration) {
            this.maxSpeechDuration = maxSpeechDuration;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/Vad.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class Vad {
    private long ptr = 0;

    public Vad(VadModelConfig config) {
        LibraryLoader.maybeLoad();
        ptr = newFromFile(config);
        if (ptr == 0) {
            throw new IllegalArgumentException("Invalid VadModelConfig: failed to create native Vad");
        }
    }

    @Override
    protected void finalize() throws Throwable {
        release();
    }

    public void release() {
        if (this.ptr == 0) {
            return;
        }
        delete(this.ptr);
        this.ptr = 0;
    }

    public void acceptWaveform(float[] samples) {
        acceptWaveform(this.ptr, samples);
    }

    public float compute(float[] samples) {
        return compute(this.ptr, samples);
    }

    public boolean empty() {
        return empty(this.ptr);
    }

    public void pop() {
        pop(this.ptr);
    }

    public void clear() {
        clear(this.ptr);
    }

    public void reset() {
        reset(this.ptr);
    }

    public void flush() {
        flush(this.ptr);
    }

    public SpeechSegment front() {
        return front(this.ptr);
    }

    public boolean isSpeechDetected() {
        return isSpeechDetected(this.ptr);
    }

    private native void delete(long ptr);

    private native long newFromFile(VadModelConfig config);

    private native void acceptWaveform(long ptr, float[] samples);

    private native float compute(long ptr, float[] samples);

    private native boolean empty(long ptr);

    private native void pop(long ptr);

    private native void clear(long ptr);

    private native SpeechSegment front(long ptr);

    private native boolean isSpeechDetected(long ptr);

    private native void reset(long ptr);

    private native void flush(long ptr);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/VadModelConfig.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class VadModelConfig {
    private final SileroVadModelConfig sileroVadModelConfig;
    private final TenVadModelConfig tenVadModelConfig;
    private final int sampleRate;
    private final int numThreads;
    private final boolean debug;
    private final String provider;

    private VadModelConfig(Builder builder) {
        this.sileroVadModelConfig = builder.sileroVadModelConfig;
        this.tenVadModelConfig = builder.tenVadModelConfig;
        this.sampleRate = builder.sampleRate;
        this.numThreads = builder.numThreads;
        this.debug = builder.debug;
        this.provider = builder.provider;
    }

    public static Builder builder() {
        return new Builder();
    }

    public SileroVadModelConfig getSileroVadModelConfig() {
        return sileroVadModelConfig;
    }

    public TenVadModelConfig getTenVadModelConfig() {
        return tenVadModelConfig;
    }

    public int getSampleRate() {
        return sampleRate;
    }

    public int getNumThreads() {
        return numThreads;
    }

    public String getProvider() {
        return provider;
    }

    public boolean getDebug() {
        return debug;
    }

    public static class Builder {
        private SileroVadModelConfig sileroVadModelConfig = new SileroVadModelConfig.Builder().build();
        private TenVadModelConfig tenVadModelConfig = new TenVadModelConfig.Builder().build();
        private int sampleRate = 16000;
        private int numThreads = 1;
        private boolean debug = true;
        private String provider = "cpu";

        public VadModelConfig build() {
            return new VadModelConfig(this);
        }

        public Builder setSileroVadModelConfig(SileroVadModelConfig sileroVadModelConfig) {
            this.sileroVadModelConfig = sileroVadModelConfig;
            return this;
        }

        public Builder setTenVadModelConfig(TenVadModelConfig tenVadModelConfig) {
            this.tenVadModelConfig = tenVadModelConfig;
            return this;
        }

        public Builder setSampleRate(int sampleRate) {
            this.sampleRate = sampleRate;
            return this;
        }

        public Builder setNumThreads(int numThreads) {
            this.numThreads = numThreads;
            return this;
        }

        public Builder setDebug(boolean debug) {
            this.debug = debug;
            return this;
        }

        public Builder setProvider(String provider) {
            this.provider = provider;
            return this;
        }
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/VersionInfo.java
================================================
package com.k2fsa.sherpa.onnx;

public class VersionInfo {

    public static String getVersion() {
        LibraryLoader.maybeLoad();
        return getVersionStr2();
    }

    public static String getGitSha1() {
        LibraryLoader.maybeLoad();
        return getGitSha12();
    }

    public static String getGitDate() {
        LibraryLoader.maybeLoad();
        return getGitDate2();
    }

    private static native String getVersionStr2();
    private static native String getGitSha12();
    private static native String getGitDate2();
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/WaveData.java
================================================
// Copyright (c) 2026 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;

import java.util.Arrays;

public class WaveData {
    private final float[] samples;
    private final int sampleRate;

    public WaveData(float[] samples, int sampleRate) {
        this.samples = samples;
        this.sampleRate = sampleRate;
    }

    public float[] getSamples() {
        return samples;
    }

    public int getSampleRate() {
        return sampleRate;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj) return true;
        if (obj == null || getClass() != obj.getClass()) return false;
        WaveData other = (WaveData) obj;
        return sampleRate == other.sampleRate && Arrays.equals(samples, other.samples);
    }

    @Override
    public int hashCode() {
        int result = Arrays.hashCode(samples);
        result = 31 * result + sampleRate;
        return result;
    }
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class WaveReader {
    private WaveData data;

    // It supports only single channel, 16-bit wave file.
    // It will exit the program if the given file has a wrong format
    public WaveReader(String filename) {
        LibraryLoader.maybeLoad();
        this.data = readWaveFromFile(filename);
    }

    public int getSampleRate() {
        return this.data.getSampleRate();
    }

    public float[] getSamples() {
        return this.data.getSamples();
    }

    private native WaveData readWaveFromFile(String filename);
}


================================================
FILE: sherpa-onnx/java-api/src/main/java/com/k2fsa/sherpa/onnx/WaveWriter.java
================================================
// Copyright 2024 Xiaomi Corporation

package com.k2fsa.sherpa.onnx;

public class WaveWriter {
    public WaveWriter() {
    }

    public static boolean write(String filename, float[] samples, int sampleRate) {
        WaveWriter w = new WaveWriter();
        return w.writeWaveToFile(filename, samples, sampleRate);
    }

    private native boolean writeWaveToFile(String filename, float[] samples, int sampleRate);
}


================================================
FILE: sherpa-onnx/java-api/src/main/resources/.gitignore
================================================
lib/
native/

================================================
FILE: sherpa-onnx/java-api/src/main/resources/readme.md
================================================
please downlaod file and put in folder
[donwload link](https://huggingface.co/csukuangfj2/sherpa-onnx-libs/tree/main/jni)

- sherpa-onnx-v1.12.7-linux-aarch64-jni.tar.bz2
- sherpa-onnx-v1.12.7-linux-x64-jni.tar.bz2
- sherpa-onnx-v1.12.7-osx-arm64-jni.tar.bz2
- sherpa-onnx-v1.12.7-osx-x86_64-jni.tar.bz2
- sherpa-onnx-v1.12.7-win-x64-jni.tar.bz2


- linux_arm64
- linux_x64
- darwin_arm64
- darwin_x64
- windows_x64


add to src/main/resources

```
.
├── native
│   ├── linux-aarch64
│   │   ├── libsherpa-onnx-jni.so
│   ├── linux-x64
│   │   ├── libsherpa-onnx-jni.so
│   ├── osx-aarch64
│   │   ├── libsherpa-onnx-jni.dylib
│   ├── osx-x64
│   │   ├── libsherpa-onnx-jni.dylib
│   ├── win-x64
│   │   ├── sherpa-onnx-jni.dll
```


================================================
FILE: sherpa-onnx/jni/CMakeLists.txt
================================================
include_directories(${PROJECT_SOURCE_DIR})

if(NOT DEFINED ANDROID_ABI)
  if(NOT DEFINED ENV{JAVA_HOME})
    message(FATAL_ERROR "Please set the environment variable JAVA_HOME")
  endif()
  include_directories($ENV{JAVA_HOME}/include)
  include_directories($ENV{JAVA_HOME}/include/linux)
  include_directories($ENV{JAVA_HOME}/include/darwin)
  include_directories($ENV{JAVA_HOME}/include/win32)
endif()

set(sources
  audio-tagging.cc
  common.cc
  jni.cc
  keyword-spotter.cc
  offline-punctuation.cc
  offline-recognizer.cc
  offline-speech-denoiser.cc
  offline-stream.cc
  online-speech-denoiser.cc
  online-punctuation.cc
  online-recognizer.cc
  online-stream.cc
  speaker-embedding-extractor.cc
  speaker-embedding-manager.cc
  speech-denoiser.cc
  spoken-language-identification.cc
  version.cc
  voice-activity-detector.cc
  wave-reader.cc
  wave-writer.cc
)

if(SHERPA_ONNX_ENABLE_TTS)
  list(APPEND sources
    offline-tts.cc
  )
endif()

if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
  list(APPEND sources
    offline-speaker-diarization.cc
  )
endif()

add_library(sherpa-onnx-jni SHARED ${sources})

target_compile_definitions(sherpa-onnx-jni PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1)
target_compile_definitions(sherpa-onnx-jni PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1)

if(ANDROID OR (UNIX AND NOT APPLE))
  set_target_properties(sherpa-onnx-jni PROPERTIES
    LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/sherpa-onnx-symbols.lds"
  )
elseif(APPLE)
  set_target_properties(sherpa-onnx-jni PROPERTIES
    LINK_FLAGS "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/sherpa-onnx-symbols.exp"
  )
endif()

target_link_libraries(sherpa-onnx-jni sherpa-onnx-core)
install(TARGETS sherpa-onnx-jni DESTINATION lib)


================================================
FILE: sherpa-onnx/jni/audio-tagging.cc
================================================
// sherpa-onnx/jni/audio-tagging.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/audio-tagging.h"

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

static AudioTaggingConfig GetAudioTaggingConfig(JNIEnv *env, jobject config,
                                                bool *ok) {
  AudioTaggingConfig ans;

  jclass cls = env->GetObjectClass(config);

  jfieldID fid = env->GetFieldID(
      cls, "model", "Lcom/k2fsa/sherpa/onnx/AudioTaggingModelConfig;");
  jobject model = env->GetObjectField(config, fid);
  jclass model_cls = env->GetObjectClass(model);

  fid = env->GetFieldID(
      model_cls, "zipformer",
      "Lcom/k2fsa/sherpa/onnx/OfflineZipformerAudioTaggingModelConfig;");
  jobject zipformer = env->GetObjectField(model, fid);
  jclass zipformer_cls = env->GetObjectClass(zipformer);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.zipformer.model, model, zipformer_cls,
                              zipformer);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.ced, ced, model_cls, model);

  SHERPA_ONNX_JNI_READ_INT(ans.model.num_threads, numThreads, model_cls, model);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model.debug, debug, model_cls, model);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.provider, provider, model_cls, model);

  SHERPA_ONNX_JNI_READ_STRING(ans.labels, labels, cls, config);

  SHERPA_ONNX_JNI_READ_INT(ans.top_k, topK, cls, config);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_AudioTagging_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif

  bool ok = false;
  auto config = sherpa_onnx::GetAudioTaggingConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("audio tagging newFromAsset config:\n%s",
                   config.ToString().c_str());

  auto tagger = new sherpa_onnx::AudioTagging(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)tagger;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_AudioTagging_newFromFile(
    JNIEnv *env, jobject /*obj*/, jobject _config) {
  bool ok = false;

  auto config = sherpa_onnx::GetAudioTaggingConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("audio tagging newFromFile config:\n%s",
                   config.ToString().c_str());

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto tagger = new sherpa_onnx::AudioTagging(config);

  return (jlong)tagger;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_AudioTagging_delete(
    JNIEnv *env, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::AudioTagging *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_AudioTagging_createStream(
    JNIEnv *env, jobject /*obj*/, jlong ptr) {
  auto tagger = reinterpret_cast<sherpa_onnx::AudioTagging *>(ptr);
  std::unique_ptr<sherpa_onnx::OfflineStream> s = tagger->CreateStream();

  // The user is responsible to free the returned pointer.
  //
  // See Java_com_k2fsa_sherpa_onnx_OfflineStream_delete() from
  // ./offline-stream.cc
  sherpa_onnx::OfflineStream *p = s.release();
  return (jlong)p;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_AudioTagging_compute(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jlong streamPtr, jint top_k) {
  auto tagger = reinterpret_cast<sherpa_onnx::AudioTagging *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OfflineStream *>(streamPtr);
  std::vector<sherpa_onnx::AudioEvent> events = tagger->Compute(stream, top_k);

  // Find the AudioEvent class
  jclass cls = env->FindClass("com/k2fsa/sherpa/onnx/AudioEvent");
  if (cls == nullptr) {
    SHERPA_ONNX_LOGE("Failed to find class com/k2fsa/sherpa/onnx/AudioEvent");
    return nullptr;
  }

  // Get the constructor: AudioEvent(String name, int index, float prob)
  jmethodID ctor = env->GetMethodID(cls, "<init>", "(Ljava/lang/String;IF)V");
  if (ctor == nullptr) {
    SHERPA_ONNX_LOGE("Failed to get AudioEvent constructor");
    env->DeleteLocalRef(cls);
    return nullptr;
  }

  // Create a jobjectArray of AudioEvent
  jobjectArray obj_arr = env->NewObjectArray(events.size(), cls, nullptr);

  for (size_t i = 0; i < events.size(); ++i) {
    const auto &e = events[i];

    jstring name = env->NewStringUTF(e.name.c_str());
    jobject event_obj = env->NewObject(cls, ctor, name, e.index, e.prob);

    env->SetObjectArrayElement(obj_arr, i, event_obj);

    env->DeleteLocalRef(name);
    env->DeleteLocalRef(event_obj);
  }

  env->DeleteLocalRef(cls);

  return obj_arr;
}


================================================
FILE: sherpa-onnx/jni/common.cc
================================================
// sherpa-onnx/jni/common.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include "sherpa-onnx/jni/common.h"

#include <stdlib.h>

#include <string>

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

/* For qnn to load libQnnHtpVxxSkel.so, e.g., libQnnHtpV81Skel.so file

https://workbench.aihub.qualcomm.com/docs/hub/faq.html#why-am-i-seeing-error-1008-when-trying-to-use-htp
 */
#if defined(_WIN32)
void PrependAdspLibraryPath(const std::string &new_path) {
  SHERPA_ONNX_LOGE("This function is not for Windows. Ignore it");
}
#else
void PrependAdspLibraryPath(const std::string &new_path) {
  const char *old_path = getenv("ADSP_LIBRARY_PATH");
  std::string updated_path;

  if (old_path && !std::string(old_path).empty()) {
    // Caution(fangjun):
    // 1. Must use ; here, not :
    // 2. Must use prepend, not append
    updated_path = new_path + ";" + std::string(old_path);
  } else {
    updated_path = new_path;  // no old path
  }

  if (setenv("ADSP_LIBRARY_PATH", updated_path.c_str(), 1) != 0) {
    SHERPA_ONNX_LOGE("Failed to set ADSP_LIBRARY_PATH to '%s'",
                     updated_path.c_str());
  } else {
    SHERPA_ONNX_LOGE("Successfully set ADSP_LIBRARY_PATH to '%s'",
                     updated_path.c_str());
  }
  /*
You will see something like the following:

Successfully set ADSP_LIBRARY_PATH to
'/data/app/~~pHS2-9SwVjl9ma3cIKtj-g==/com.k2fsa.sherpa.onnx.simulate.streaming.asr-ejCDb8LodsnyK5cr3SvGjA==/lib/arm64;/odm/lib/rfsa/adsp;/vendor/lib/rfsa/adsp/;/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp'

   */
}
#endif

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/jni/common.h
================================================
// sherpa-onnx/jni/common.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_JNI_COMMON_H_
#define SHERPA_ONNX_JNI_COMMON_H_

#include <string>

#if __ANDROID_API__ >= 9
#include <sstream>

#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif

#if defined(_WIN32)
#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS)
#define SHERPA_ONNX_EXPORT __declspec(dllexport)
#define SHERPA_ONNX_IMPORT __declspec(dllimport)
#else
#define SHERPA_ONNX_EXPORT
#define SHERPA_ONNX_IMPORT
#endif
#else  // WIN32
#define SHERPA_ONNX_EXPORT __attribute__((visibility("default")))

#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT
#endif  // WIN32

#if defined(SHERPA_ONNX_BUILD_MAIN_LIB)
#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT
#else
#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT
#endif

// If you use ndk, you can find "jni.h" inside
// android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
#include "jni.h"  // NOLINT

#define SHERPA_ONNX_EXTERN_C extern "C" SHERPA_ONNX_API

#define SHERPA_ONNX_JNI_READ_STRING(cpp_field, kotlin_field, cls, config)     \
  do {                                                                        \
    jfieldID fid = env->GetFieldID(cls, #kotlin_field, "Ljava/lang/String;"); \
    if (fid == nullptr || env->ExceptionCheck()) {                            \
      SHERPA_ONNX_LOGE("Failed to get field ID for '%s'", #kotlin_field);     \
      SHERPA_ONNX_LOGE(                                                       \
          "Please check that your kotlin code matches the library file "      \
          "libsherpa-onnx-jni.so . If you are not sure, always use the "      \
          "LATEST code and the latest library");                              \
      if (env->ExceptionCheck()) {                                            \
        env->ExceptionDescribe();                                             \
        env->ExceptionClear();                                                \
      }                                                                       \
      jclass exClass = env->FindClass("java/lang/RuntimeException");          \
      if (exClass) {                                                          \
        env->ThrowNew(exClass, "Failed to get field ID for " #kotlin_field);  \
        env->DeleteLocalRef(exClass);                                         \
      }                                                                       \
      return ans;                                                             \
    }                                                                         \
    jstring s = (jstring)env->GetObjectField(config, fid);                    \
    if (s != nullptr) {                                                       \
      const char *p = env->GetStringUTFChars(s, nullptr);                     \
      cpp_field = p;                                                          \
      env->ReleaseStringUTFChars(s, p);                                       \
      env->DeleteLocalRef(s);                                                 \
    }                                                                         \
  } while (0)

#define SHERPA_ONNX_JNI_READ_FLOAT(cpp_field, kotlin_field, cls, config)     \
  do {                                                                       \
    jfieldID fid = env->GetFieldID(cls, #kotlin_field, "F");                 \
    if (fid == nullptr || env->ExceptionCheck()) {                           \
      SHERPA_ONNX_LOGE("Failed to get field ID for '%s'", #kotlin_field);    \
      SHERPA_ONNX_LOGE(                                                      \
          "Please check that your kotlin code matches the library file "     \
          "libsherpa-onnx-jni.so . If you are not sure, always use the "     \
          "LATEST code and the latest library");                             \
      if (env->ExceptionCheck()) {                                           \
        env->ExceptionDescribe();                                            \
        env->ExceptionClear();                                               \
      }                                                                      \
      jclass exClass = env->FindClass("java/lang/RuntimeException");         \
      if (exClass) {                                                         \
        env->ThrowNew(exClass, "Failed to get field ID for " #kotlin_field); \
        env->DeleteLocalRef(exClass);                                        \
      }                                                                      \
      return ans;                                                            \
    }                                                                        \
    cpp_field = env->GetFloatField(config, fid);                             \
  } while (0)

#define SHERPA_ONNX_JNI_READ_INT(cpp_field, kotlin_field, cls, config)       \
  do {                                                                       \
    jfieldID fid = env->GetFieldID(cls, #kotlin_field, "I");                 \
    if (fid == nullptr || env->ExceptionCheck()) {                           \
      SHERPA_ONNX_LOGE("Failed to get field ID for '%s'", #kotlin_field);    \
      SHERPA_ONNX_LOGE(                                                      \
          "Please check that your kotlin code matches the library file "     \
          "libsherpa-onnx-jni.so . If you are not sure, always use the "     \
          "LATEST code and the latest library");                             \
      if (env->ExceptionCheck()) {                                           \
        env->ExceptionDescribe();                                            \
        env->ExceptionClear();                                               \
      }                                                                      \
      jclass exClass = env->FindClass("java/lang/RuntimeException");         \
      if (exClass) {                                                         \
        env->ThrowNew(exClass, "Failed to get field ID for " #kotlin_field); \
        env->DeleteLocalRef(exClass);                                        \
      }                                                                      \
      return ans;                                                            \
    }                                                                        \
    cpp_field = env->GetIntField(config, fid);                               \
  } while (0)

#define SHERPA_ONNX_JNI_READ_BOOL(cpp_field, kotlin_field, cls, config)      \
  do {                                                                       \
    jfieldID fid = env->GetFieldID(cls, #kotlin_field, "Z");                 \
    if (fid == nullptr || env->ExceptionCheck()) {                           \
      SHERPA_ONNX_LOGE("Failed to get field ID for '%s'", #kotlin_field);    \
      SHERPA_ONNX_LOGE(                                                      \
          "Please check that your kotlin code matches the library file "     \
          "libsherpa-onnx-jni.so . If you are not sure, always use the "     \
          "LATEST code and the latest library");                             \
      if (env->ExceptionCheck()) {                                           \
        env->ExceptionDescribe();                                            \
        env->ExceptionClear();                                               \
      }                                                                      \
      jclass exClass = env->FindClass("java/lang/RuntimeException");         \
      if (exClass) {                                                         \
        env->ThrowNew(exClass, "Failed to get field ID for " #kotlin_field); \
        env->DeleteLocalRef(exClass);                                        \
      }                                                                      \
      return ans;                                                            \
    }                                                                        \
    cpp_field = env->GetBooleanField(config, fid);                           \
  } while (0)

// defined in jni.cc
jobject NewInteger(JNIEnv *env, int32_t value);
jobject NewFloat(JNIEnv *env, float value);

// Template function for non-void return types
template <typename Func, typename ReturnType>
ReturnType SafeJNI(JNIEnv *env, const char *functionName, Func func,
                   ReturnType defaultValue) {
  try {
    return func();
  } catch (const std::exception &e) {
    jclass exClass = env->FindClass("java/lang/RuntimeException");
    if (exClass != nullptr) {
      std::string errorMessage = std::string(functionName) + ": " + e.what();
      env->ThrowNew(exClass, errorMessage.c_str());
      env->DeleteLocalRef(exClass);
    }
  } catch (...) {
    jclass exClass = env->FindClass("java/lang/RuntimeException");
    if (exClass != nullptr) {
      std::string errorMessage = std::string(functionName) +
                                 ": Native exception: caught unknown exception";
      env->ThrowNew(exClass, errorMessage.c_str());
      env->DeleteLocalRef(exClass);
    }
  }
  return defaultValue;
}

// Specialization for void return type
template <typename Func>
void SafeJNI(JNIEnv *env, const char *functionName, Func func) {
  try {
    func();
  } catch (const std::exception &e) {
    jclass exClass = env->FindClass("java/lang/RuntimeException");
    if (exClass != nullptr) {
      std::string errorMessage = std::string(functionName) + ": " + e.what();
      env->ThrowNew(exClass, errorMessage.c_str());
      env->DeleteLocalRef(exClass);
    }
  } catch (...) {
    jclass exClass = env->FindClass("java/lang/RuntimeException");
    if (exClass != nullptr) {
      std::string errorMessage = std::string(functionName) +
                                 ": Native exception: caught unknown exception";
      env->ThrowNew(exClass, errorMessage.c_str());
      env->DeleteLocalRef(exClass);
    }
  }
}

// Helper function to validate JNI pointers
inline bool ValidatePointer(JNIEnv *env, jlong ptr, const char *functionName,
                            const char *message) {
  if (ptr == 0) {
    jclass exClass = env->FindClass("java/lang/NullPointerException");
    if (exClass != nullptr) {
      std::string errorMessage = std::string(functionName) + ": " + message;
      env->ThrowNew(exClass, errorMessage.c_str());
      env->DeleteLocalRef(exClass);
    }
    return false;
  }
  return true;
}

namespace sherpa_onnx {
void PrependAdspLibraryPath(const std::string &new_path);
}

#endif  // SHERPA_ONNX_JNI_COMMON_H_


================================================
FILE: sherpa-onnx/jni/generate.sh
================================================
#!/usr/bin/env bash
set -ex

nm -g ../../build/lib/libsherpa-onnx-jni.dylib | awk '$2=="T" && $3 ~ /^_Java_com_k2fsa/ {print $3}' | sort  > ./sherpa-onnx-symbols.exp


================================================
FILE: sherpa-onnx/jni/jni.cc
================================================
// sherpa-onnx/jni/jni.cc
//
// Copyright (c)  2022-2023  Xiaomi Corporation
//                2022       Pingfeng Luo
//                2023       Zhaoming

#include <fstream>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/wave-writer.h"
#include "sherpa-onnx/jni/common.h"

// see
// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables
jobject NewInteger(JNIEnv *env, int32_t value) {
  jclass cls = env->FindClass("java/lang/Integer");
  jmethodID constructor = env->GetMethodID(cls, "<init>", "(I)V");
  jobject obj = env->NewObject(cls, constructor, value);
  env->DeleteLocalRef(cls);
  return obj;
}

jobject NewFloat(JNIEnv *env, float value) {
  jclass cls = env->FindClass("java/lang/Float");
  jmethodID constructor = env->GetMethodID(cls, "<init>", "(F)V");
  jobject obj = env->NewObject(cls, constructor, value);
  env->DeleteLocalRef(cls);
  return obj;
}


================================================
FILE: sherpa-onnx/jni/keyword-spotter.cc
================================================
// sherpa-onnx/jni/keyword-spotter.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/keyword-spotter.h"

#include <memory>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls,
                                       jobject model_config, bool *ok);

static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config,
                                         bool *ok) {
  KeywordSpotterConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid;

  // https://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/types.html
  // https://courses.cs.washington.edu/courses/cse341/99wi/java/tutorial/native1.1/implementing/field.html

  //---------- decoding ----------
  SHERPA_ONNX_JNI_READ_INT(ans.max_active_paths, maxActivePaths, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.keywords_file, keywordsFile, cls, config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.keywords_score, keywordsScore, cls, config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.keywords_threshold, keywordsThreshold, cls,
                             config);

  SHERPA_ONNX_JNI_READ_INT(ans.num_trailing_blanks, numTrailingBlanks, cls,
                           config);

  //---------- feat config ----------
  fid = env->GetFieldID(cls, "featConfig",
                        "Lcom/k2fsa/sherpa/onnx/FeatureConfig;");
  jobject feat_config = env->GetObjectField(config, fid);
  jclass feat_config_cls = env->GetObjectClass(feat_config);

  SHERPA_ONNX_JNI_READ_INT(ans.feat_config.sampling_rate, sampleRate,
                           feat_config_cls, feat_config);

  SHERPA_ONNX_JNI_READ_INT(ans.feat_config.feature_dim, featureDim,
                           feat_config_cls, feat_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.feat_config.dither, dither, feat_config_cls,
                             feat_config);

  //---------- model config ----------
  fid = env->GetFieldID(cls, "modelConfig",
                        "Lcom/k2fsa/sherpa/onnx/OnlineModelConfig;");
  jobject model_config = env->GetObjectField(config, fid);
  jclass model_config_cls = env->GetObjectClass(model_config);
  ans.model_config =
      GetOnlineModelConfig(env, model_config_cls, model_config, ok);

  if (!*ok) {
    return ans;
  }

  // *ok = false;
  // If there are more fields, remember to set *ok to false

  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif
  bool ok = false;
  auto config = sherpa_onnx::GetKwsConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  auto kws = new sherpa_onnx::KeywordSpotter(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)kws;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_newFromFile(
    JNIEnv *env, jobject /*obj*/, jobject _config) {
  bool ok = false;
  auto config = sherpa_onnx::GetKwsConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto kws = new sherpa_onnx::KeywordSpotter(config);

  return (jlong)kws;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::KeywordSpotter *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_decode(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  auto kws = reinterpret_cast<sherpa_onnx::KeywordSpotter *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  kws->DecodeStream(stream);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_reset(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  auto kws = reinterpret_cast<sherpa_onnx::KeywordSpotter *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  kws->Reset(stream);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_createStream(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring keywords) {
  auto kws = reinterpret_cast<sherpa_onnx::KeywordSpotter *>(ptr);

  const char *p = env->GetStringUTFChars(keywords, nullptr);
  std::unique_ptr<sherpa_onnx::OnlineStream> stream;

  if (strlen(p) == 0) {
    stream = kws->CreateStream();
  } else {
    stream = kws->CreateStream(p);
  }

  env->ReleaseStringUTFChars(keywords, p);

  // The user is responsible to free the returned pointer.
  //
  // See Java_com_k2fsa_sherpa_onnx_OfflineStream_delete() from
  // ./offline-stream.cc
  sherpa_onnx::OnlineStream *ans = stream.release();
  return (jlong)ans;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_isReady(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  auto kws = reinterpret_cast<sherpa_onnx::KeywordSpotter *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  return kws->IsReady(stream);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_getResult(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  auto kws = reinterpret_cast<sherpa_onnx::KeywordSpotter *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  sherpa_onnx::KeywordResult result = kws->GetResult(stream);

  jstring j_keyword = env->NewStringUTF(result.keyword.c_str());

  // Convert tokens (std::vector<std::string> -> String[])
  jclass string_cls = env->FindClass("java/lang/String");
  if (string_cls == nullptr) {
    SHERPA_ONNX_LOGE("Failed to find class java/lang/String");
    env->DeleteLocalRef(j_keyword);
    return nullptr;
  }

  jobjectArray j_tokens =
      env->NewObjectArray(result.tokens.size(), string_cls, nullptr);

  for (size_t i = 0; i < result.tokens.size(); ++i) {
    jstring t = env->NewStringUTF(result.tokens[i].c_str());
    env->SetObjectArrayElement(j_tokens, i, t);
    env->DeleteLocalRef(t);
  }

  // Convert timestamps (std::vector<float> -> float[])
  jfloatArray j_timestamps = env->NewFloatArray(result.timestamps.size());
  env->SetFloatArrayRegion(j_timestamps, 0, result.timestamps.size(),
                           result.timestamps.data());

  // Find KeywordSpotterResult class
  jclass result_cls =
      env->FindClass("com/k2fsa/sherpa/onnx/KeywordSpotterResult");

  if (result_cls == nullptr) {
    SHERPA_ONNX_LOGE(
        "Failed to find class com/k2fsa/sherpa/onnx/KeywordSpotterResult");
    env->DeleteLocalRef(j_keyword);
    env->DeleteLocalRef(j_tokens);
    env->DeleteLocalRef(j_timestamps);
    env->DeleteLocalRef(string_cls);
    return nullptr;
  }

  jmethodID ctor = env->GetMethodID(
      result_cls, "<init>", "(Ljava/lang/String;[Ljava/lang/String;[F)V");

  if (ctor == nullptr) {
    SHERPA_ONNX_LOGE("Failed to get KeywordSpotterResult constructor");
    env->DeleteLocalRef(j_keyword);
    env->DeleteLocalRef(j_tokens);
    env->DeleteLocalRef(j_timestamps);
    env->DeleteLocalRef(result_cls);
    env->DeleteLocalRef(string_cls);
    return nullptr;
  }

  // Create the KeywordSpotterResult object
  jobject result_obj =
      env->NewObject(result_cls, ctor, j_keyword, j_tokens, j_timestamps);

  env->DeleteLocalRef(j_keyword);
  env->DeleteLocalRef(j_tokens);
  env->DeleteLocalRef(j_timestamps);
  env->DeleteLocalRef(result_cls);
  env->DeleteLocalRef(string_cls);

  return result_obj;
}


================================================
FILE: sherpa-onnx/jni/offline-punctuation.cc
================================================
// sherpa-onnx/jni/offline-punctuation.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-punctuation.h"

#include <string>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

static OfflinePunctuationConfig GetOfflinePunctuationConfig(JNIEnv *env,
                                                            jobject config,
                                                            bool *ok) {
  OfflinePunctuationConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid;

  fid = env->GetFieldID(
      cls, "model", "Lcom/k2fsa/sherpa/onnx/OfflinePunctuationModelConfig;");
  jobject model_config = env->GetObjectField(config, fid);
  jclass model_config_cls = env->GetObjectClass(model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.ct_transformer, ctTransformer,
                              model_config_cls, model_config);

  SHERPA_ONNX_JNI_READ_INT(ans.model.num_threads, numThreads, model_config_cls,
                           model_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model.debug, debug, model_config_cls,
                            model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.provider, provider, model_config_cls,
                              model_config);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif
  bool ok = false;
  auto config = sherpa_onnx::GetOfflinePunctuationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  auto model = new sherpa_onnx::OfflinePunctuation(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)model;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_newFromFile(JNIEnv *env,
                                                          jobject /*obj*/,
                                                          jobject _config) {
  bool ok = false;
  auto config = sherpa_onnx::GetOfflinePunctuationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto model = new sherpa_onnx::OfflinePunctuation(config);

  return (jlong)model;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OfflinePunctuation *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL
Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_addPunctuation(JNIEnv *env,
                                                             jobject /*obj*/,
                                                             jlong ptr,
                                                             jstring text) {
  auto punct = reinterpret_cast<const sherpa_onnx::OfflinePunctuation *>(ptr);

  const char *ptext = env->GetStringUTFChars(text, nullptr);

  std::string result = punct->AddPunctuation(ptext);

  env->ReleaseStringUTFChars(text, ptext);

  return env->NewStringUTF(result.c_str());
}


================================================
FILE: sherpa-onnx/jni/offline-recognizer.cc
================================================
// sherpa-onnx/jni/offline-recognizer.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-recognizer.h"

#include <stdlib.h>

#include <memory>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config,
                                                bool *ok) {
  OfflineRecognizerConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid;

  SHERPA_ONNX_JNI_READ_STRING(ans.decoding_method, decodingMethod, cls, config);

  SHERPA_ONNX_JNI_READ_INT(ans.max_active_paths, maxActivePaths, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.hotwords_file, hotwordsFile, cls, config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.hotwords_score, hotwordsScore, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.rule_fsts, ruleFsts, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.rule_fars, ruleFars, cls, config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.blank_penalty, blankPenalty, cls, config);

  fid = env->GetFieldID(cls, "featConfig",
                        "Lcom/k2fsa/sherpa/onnx/FeatureConfig;");
  jobject feat_config = env->GetObjectField(config, fid);
  jclass feat_config_cls = env->GetObjectClass(feat_config);

  SHERPA_ONNX_JNI_READ_INT(ans.feat_config.sampling_rate, sampleRate,
                           feat_config_cls, feat_config);

  SHERPA_ONNX_JNI_READ_INT(ans.feat_config.feature_dim, featureDim,
                           feat_config_cls, feat_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.feat_config.dither, dither, feat_config_cls,
                             feat_config);

  fid = env->GetFieldID(cls, "modelConfig",
                        "Lcom/k2fsa/sherpa/onnx/OfflineModelConfig;");
  jobject model_config = env->GetObjectField(config, fid);
  jclass model_config_cls = env->GetObjectClass(model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.tokens, tokens, model_config_cls,
                              model_config);

  SHERPA_ONNX_JNI_READ_INT(ans.model_config.num_threads, numThreads,
                           model_config_cls, model_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model_config.debug, debug, model_config_cls,
                            model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.provider, provider,
                              model_config_cls, model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.model_type, modelType,
                              model_config_cls, model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.modeling_unit, modelingUnit,
                              model_config_cls, model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.bpe_vocab, bpeVocab,
                              model_config_cls, model_config);

  fid = env->GetFieldID(model_config_cls, "transducer",
                        "Lcom/k2fsa/sherpa/onnx/OfflineTransducerModelConfig;");
  jobject transducer_config = env->GetObjectField(model_config, fid);
  jclass transducer_config_cls = env->GetObjectClass(transducer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.transducer.encoder_filename,
                              encoder, transducer_config_cls,
                              transducer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.transducer.decoder_filename,
                              decoder, transducer_config_cls,
                              transducer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.transducer.joiner_filename,
                              joiner, transducer_config_cls, transducer_config);

  fid = env->GetFieldID(model_config_cls, "paraformer",
                        "Lcom/k2fsa/sherpa/onnx/OfflineParaformerModelConfig;");
  jobject paraformer_config = env->GetObjectField(model_config, fid);
  jclass paraformer_config_cls = env->GetObjectClass(paraformer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.paraformer.model, model,
                              paraformer_config_cls, paraformer_config);

  fid = env->GetFieldID(paraformer_config_cls, "qnnConfig",
                        "Lcom/k2fsa/sherpa/onnx/QnnConfig;");
  jobject qnn_config = env->GetObjectField(paraformer_config, fid);
  jclass qnn_config_cls = env->GetObjectClass(qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(
      ans.model_config.paraformer.qnn_config.backend_lib, backendLib,
      qnn_config_cls, qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(
      ans.model_config.paraformer.qnn_config.context_binary, contextBinary,
      qnn_config_cls, qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.paraformer.qnn_config.system_lib,
                              systemLib, qnn_config_cls, qnn_config);

  fid = env->GetFieldID(model_config_cls, "whisper",
                        "Lcom/k2fsa/sherpa/onnx/OfflineWhisperModelConfig;");
  jobject whisper_config = env->GetObjectField(model_config, fid);
  jclass whisper_config_cls = env->GetObjectClass(whisper_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.whisper.encoder, encoder,
                              whisper_config_cls, whisper_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.whisper.decoder, decoder,
                              whisper_config_cls, whisper_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.whisper.language, language,
                              whisper_config_cls, whisper_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.whisper.task, task,
                              whisper_config_cls, whisper_config);

  SHERPA_ONNX_JNI_READ_INT(ans.model_config.whisper.tail_paddings, tailPaddings,
                           whisper_config_cls, whisper_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model_config.whisper.enable_token_timestamps,
                            enableTokenTimestamps, whisper_config_cls,
                            whisper_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model_config.whisper.enable_segment_timestamps,
                            enableSegmentTimestamps, whisper_config_cls,
                            whisper_config);

  fid = env->GetFieldID(model_config_cls, "fireRedAsr",
                        "Lcom/k2fsa/sherpa/onnx/OfflineFireRedAsrModelConfig;");
  jobject fire_red_asr_config = env->GetObjectField(model_config, fid);
  jclass fire_red_asr_config_cls = env->GetObjectClass(fire_red_asr_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.fire_red_asr.encoder, encoder,
                              fire_red_asr_config_cls, fire_red_asr_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.fire_red_asr.decoder, decoder,
                              fire_red_asr_config_cls, fire_red_asr_config);

  // moonshine
  fid = env->GetFieldID(model_config_cls, "moonshine",
                        "Lcom/k2fsa/sherpa/onnx/OfflineMoonshineModelConfig;");
  jobject moonshine_config = env->GetObjectField(model_config, fid);
  jclass moonshine_config_cls = env->GetObjectClass(moonshine_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.moonshine.preprocessor,
                              preprocessor, moonshine_config_cls,
                              moonshine_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.moonshine.encoder, encoder,
                              moonshine_config_cls, moonshine_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.moonshine.uncached_decoder,
                              uncachedDecoder, moonshine_config_cls,
                              moonshine_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.moonshine.cached_decoder,
                              cachedDecoder, moonshine_config_cls,
                              moonshine_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.moonshine.merged_decoder,
                              mergedDecoder, moonshine_config_cls,
                              moonshine_config);

  fid = env->GetFieldID(model_config_cls, "senseVoice",
                        "Lcom/k2fsa/sherpa/onnx/OfflineSenseVoiceModelConfig;");
  jobject sense_voice_config = env->GetObjectField(model_config, fid);
  jclass sense_voice_config_cls = env->GetObjectClass(sense_voice_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.sense_voice.model, model,
                              sense_voice_config_cls, sense_voice_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.sense_voice.language, language,
                              sense_voice_config_cls, sense_voice_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model_config.sense_voice.use_itn,
                            useInverseTextNormalization, sense_voice_config_cls,
                            sense_voice_config);

  fid = env->GetFieldID(sense_voice_config_cls, "qnnConfig",
                        "Lcom/k2fsa/sherpa/onnx/QnnConfig;");
  qnn_config = env->GetObjectField(sense_voice_config, fid);
  qnn_config_cls = env->GetObjectClass(qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(
      ans.model_config.sense_voice.qnn_config.backend_lib, backendLib,
      qnn_config_cls, qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(
      ans.model_config.sense_voice.qnn_config.context_binary, contextBinary,
      qnn_config_cls, qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(
      ans.model_config.sense_voice.qnn_config.system_lib, systemLib,
      qnn_config_cls, qnn_config);

  // nemo
  fid = env->GetFieldID(
      model_config_cls, "nemo",
      "Lcom/k2fsa/sherpa/onnx/OfflineNemoEncDecCtcModelConfig;");
  jobject nemo_config = env->GetObjectField(model_config, fid);
  jclass nemo_config_cls = env->GetObjectClass(nemo_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.nemo_ctc.model, model,
                              nemo_config_cls, nemo_config);

  // zipformer ctc
  fid =
      env->GetFieldID(model_config_cls, "zipformerCtc",
                      "Lcom/k2fsa/sherpa/onnx/OfflineZipformerCtcModelConfig;");
  jobject zipformer_ctc_config = env->GetObjectField(model_config, fid);
  jclass zipformer_ctc_config_cls = env->GetObjectClass(zipformer_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.zipformer_ctc.model, model,
                              zipformer_ctc_config_cls, zipformer_ctc_config);

  fid = env->GetFieldID(zipformer_ctc_config_cls, "qnnConfig",
                        "Lcom/k2fsa/sherpa/onnx/QnnConfig;");

  qnn_config = env->GetObjectField(zipformer_ctc_config, fid);
  qnn_config_cls = env->GetObjectClass(qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(
      ans.model_config.zipformer_ctc.qnn_config.backend_lib, backendLib,
      qnn_config_cls, qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(
      ans.model_config.zipformer_ctc.qnn_config.context_binary, contextBinary,
      qnn_config_cls, qnn_config);

  SHERPA_ONNX_JNI_READ_STRING(
      ans.model_config.zipformer_ctc.qnn_config.system_lib, systemLib,
      qnn_config_cls, qnn_config);

  // wenet ctc
  fid = env->GetFieldID(model_config_cls, "wenetCtc",
                        "Lcom/k2fsa/sherpa/onnx/OfflineWenetCtcModelConfig;");
  jobject wenet_ctc_config = env->GetObjectField(model_config, fid);
  jclass wenet_ctc_config_cls = env->GetObjectClass(wenet_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.wenet_ctc.model, model,
                              wenet_ctc_config_cls, wenet_ctc_config);

  // omnilingual asr ctc
  fid = env->GetFieldID(
      model_config_cls, "omnilingual",
      "Lcom/k2fsa/sherpa/onnx/OfflineOmnilingualAsrCtcModelConfig;");
  jobject omnilingual_ctc_config = env->GetObjectField(model_config, fid);
  jclass omnilingual_ctc_config_cls =
      env->GetObjectClass(omnilingual_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.omnilingual.model, model,
                              omnilingual_ctc_config_cls,
                              omnilingual_ctc_config);

  // medasr ctc
  fid = env->GetFieldID(model_config_cls, "medasr",
                        "Lcom/k2fsa/sherpa/onnx/OfflineMedAsrCtcModelConfig;");
  jobject medasr_ctc_config = env->GetObjectField(model_config, fid);
  jclass medasr_ctc_config_cls = env->GetObjectClass(medasr_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.medasr.model, model,
                              medasr_ctc_config_cls, medasr_ctc_config);

  // FunASR Nano
  fid = env->GetFieldID(model_config_cls, "funasrNano",
                        "Lcom/k2fsa/sherpa/onnx/OfflineFunAsrNanoModelConfig;");
  jobject funasr_nano_config = env->GetObjectField(model_config, fid);
  jclass funasr_nano_config_cls = env->GetObjectClass(funasr_nano_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.funasr_nano.encoder_adaptor,
                              encoderAdaptor, funasr_nano_config_cls,
                              funasr_nano_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.funasr_nano.llm, llm,
                              funasr_nano_config_cls, funasr_nano_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.funasr_nano.embedding, embedding,
                              funasr_nano_config_cls, funasr_nano_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.funasr_nano.tokenizer, tokenizer,
                              funasr_nano_config_cls, funasr_nano_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.funasr_nano.system_prompt,
                              systemPrompt, funasr_nano_config_cls,
                              funasr_nano_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.funasr_nano.user_prompt,
                              userPrompt, funasr_nano_config_cls,
                              funasr_nano_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.funasr_nano.language, language,
                              funasr_nano_config_cls, funasr_nano_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model_config.funasr_nano.itn, itn,
                            funasr_nano_config_cls, funasr_nano_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.funasr_nano.hotwords, hotwords,
                              funasr_nano_config_cls, funasr_nano_config);

  SHERPA_ONNX_JNI_READ_INT(ans.model_config.funasr_nano.max_new_tokens,
                           maxNewTokens, funasr_nano_config_cls,
                           funasr_nano_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model_config.funasr_nano.temperature,
                             temperature, funasr_nano_config_cls,
                             funasr_nano_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model_config.funasr_nano.top_p, topP,
                             funasr_nano_config_cls, funasr_nano_config);

  SHERPA_ONNX_JNI_READ_INT(ans.model_config.funasr_nano.seed, seed,
                           funasr_nano_config_cls, funasr_nano_config);

  // fire red asr ctc
  fid = env->GetFieldID(
      model_config_cls, "fireRedAsrCtc",
      "Lcom/k2fsa/sherpa/onnx/OfflineFireRedAsrCtcModelConfig;");
  jobject fire_red_asr_ctc_config = env->GetObjectField(model_config, fid);
  jclass fire_red_asr_ctc_config_cls =
      env->GetObjectClass(fire_red_asr_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.fire_red_asr_ctc.model, model,
                              fire_red_asr_ctc_config_cls,
                              fire_red_asr_ctc_config);

  // canary
  fid = env->GetFieldID(model_config_cls, "canary",
                        "Lcom/k2fsa/sherpa/onnx/OfflineCanaryModelConfig;");
  jobject canary_config = env->GetObjectField(model_config, fid);
  jclass canary_config_cls = env->GetObjectClass(canary_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.canary.encoder, encoder,
                              canary_config_cls, canary_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.canary.decoder, decoder,
                              canary_config_cls, canary_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.canary.src_lang, srcLang,
                              canary_config_cls, canary_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.canary.tgt_lang, tgtLang,
                              canary_config_cls, canary_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model_config.canary.use_pnc, usePnc,
                            canary_config_cls, canary_config);

  fid = env->GetFieldID(model_config_cls, "dolphin",
                        "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;");
  jobject dolphin_config = env->GetObjectField(model_config, fid);
  jclass dolphin_config_cls = env->GetObjectClass(dolphin_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.dolphin.model, model,
                              dolphin_config_cls, dolphin_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_config.telespeech_ctc, teleSpeech,
                              model_config_cls, model_config);

  // homophone replacer config
  fid = env->GetFieldID(cls, "hr",
                        "Lcom/k2fsa/sherpa/onnx/HomophoneReplacerConfig;");
  jobject hr_config = env->GetObjectField(config, fid);
  jclass hr_config_cls = env->GetObjectClass(hr_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.hr.lexicon, lexicon, hr_config_cls,
                              hr_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.hr.rule_fsts, ruleFsts, hr_config_cls,
                              hr_config);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromAsset(JNIEnv *env,
                                                          jobject /*obj*/,
                                                          jobject asset_manager,
                                                          jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif
  bool ok = false;
  auto config = sherpa_onnx::GetOfflineConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  if (config.model_config.debug) {
#if __ANDROID_API__
    // logcat truncates long strings, so we split the string into chunks
    auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
    for (const auto &s : str_vec) {
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
#else
    SHERPA_ONNX_LOGE("%s", config.ToString().c_str());
#endif
  }

  auto model = new sherpa_onnx::OfflineRecognizer(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)model;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromFile(JNIEnv *env,
                                                         jobject /*obj*/,
                                                         jobject _config) {
  bool ok = false;
  auto config = sherpa_onnx::GetOfflineConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  if (config.model_config.debug) {
#if __ANDROID_API__
    auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
    for (const auto &s : str_vec) {
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
#else
    SHERPA_ONNX_LOGE("%s", config.ToString().c_str());
#endif
  }

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto model = new sherpa_onnx::OfflineRecognizer(config);

  return (jlong)model;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_setConfig(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jobject _config) {
  bool ok = false;
  auto config = sherpa_onnx::GetOfflineConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return;
  }

  if (config.model_config.debug) {
    SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
  }

  auto recognizer = reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr);
  recognizer->SetConfig(config);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_createStream(JNIEnv * /*env*/,
                                                          jobject /*obj*/,
                                                          jlong ptr) {
  auto recognizer = reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr);
  std::unique_ptr<sherpa_onnx::OfflineStream> s = recognizer->CreateStream();

  // The user is responsible to free the returned pointer.
  //
  // See Java_com_k2fsa_sherpa_onnx_OfflineStream_delete() from
  // ./offline-stream.cc
  sherpa_onnx::OfflineStream *p = s.release();
  return (jlong)p;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_decode(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  SafeJNI(env, "OfflineRecognizer_decode", [&] {
    if (!ValidatePointer(env, ptr, "OfflineRecognizer_decode",
                         "OfflineRecognizer pointer is null.") ||
        !ValidatePointer(env, stream_ptr, "OfflineRecognizer_decode",
                         "OfflineStream pointer is null.")) {
      return;
    }

    auto recognizer = reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr);
    auto stream = reinterpret_cast<sherpa_onnx::OfflineStream *>(stream_ptr);
    recognizer->DecodeStream(stream);
  });
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_decodeStreams(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jlongArray stream_ptrs) {
  SafeJNI(env, "OfflineRecognizer_decode_streams", [&] {
    if (!ValidatePointer(env, ptr, "OfflineRecognizer_decode_streams",
                         "OfflineRecognizer pointer is null.")) {
      return;
    }

    auto recognizer = reinterpret_cast<sherpa_onnx::OfflineRecognizer *>(ptr);

    jlong *p = env->GetLongArrayElements(stream_ptrs, nullptr);
    jsize n = env->GetArrayLength(stream_ptrs);

    auto ss = reinterpret_cast<sherpa_onnx::OfflineStream **>(p);
    recognizer->DecodeStreams(ss, n);

    env->ReleaseLongArrayElements(stream_ptrs, p, JNI_ABORT);
  });
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_getResult(JNIEnv *env,
                                                       jobject /*obj*/,
                                                       jlong streamPtr) {
  auto stream = reinterpret_cast<sherpa_onnx::OfflineStream *>(streamPtr);
  sherpa_onnx::OfflineRecognitionResult result = stream->GetResult();

  // 2. Find the Java class and constructor
  jclass cls = env->FindClass("com/k2fsa/sherpa/onnx/OfflineRecognizerResult");
  if (cls == nullptr) {
    SHERPA_ONNX_LOGE("Failed to find class OfflineRecognizerResult");
    return nullptr;
  }
  jmethodID ctor =
      env->GetMethodID(cls, "<init>",
                       "(Ljava/lang/String;[Ljava/lang/String;[FLjava/lang/"
                       "String;Ljava/lang/String;Ljava/lang/String;[F)V");
  jstring jtext = env->NewStringUTF(result.text.c_str());

  jclass string_cls = env->FindClass("java/lang/String");
  jobjectArray jtokens = env->NewObjectArray(
      result.tokens.size(), string_cls, nullptr);
  env->DeleteLocalRef(string_cls);

  for (size_t i = 0; i < result.tokens.size(); ++i) {
    jstring token_str = env->NewStringUTF(result.tokens[i].c_str());
    env->SetObjectArrayElement(jtokens, i, token_str);
    env->DeleteLocalRef(token_str);
  }

  jfloatArray jtimestamps = env->NewFloatArray(result.timestamps.size());
  env->SetFloatArrayRegion(jtimestamps, 0, result.timestamps.size(),
                           result.timestamps.data());

  jstring jlang = env->NewStringUTF(result.lang.c_str());
  jstring jemotion = env->NewStringUTF(result.emotion.c_str());
  jstring jevent = env->NewStringUTF(result.event.c_str());

  jfloatArray jdurations = env->NewFloatArray(result.durations.size());
  env->SetFloatArrayRegion(jdurations, 0, result.durations.size(),
                           result.durations.data());

  jobject jresult = env->NewObject(cls, ctor, jtext, jtokens, jtimestamps,
                                   jlang, jemotion, jevent, jdurations);

  env->DeleteLocalRef(jtext);
  env->DeleteLocalRef(jtokens);
  env->DeleteLocalRef(jtimestamps);
  env->DeleteLocalRef(jlang);
  env->DeleteLocalRef(jemotion);
  env->DeleteLocalRef(jevent);
  env->DeleteLocalRef(jdurations);
  env->DeleteLocalRef(cls);

  return jresult;  // returned object is safe
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_prependAdspLibraryPath(
    JNIEnv *env, jclass /*cls*/, jstring new_path) {
  const char *p = env->GetStringUTFChars(new_path, nullptr);
  sherpa_onnx::PrependAdspLibraryPath(p);

  env->ReleaseStringUTFChars(new_path, p);
}


================================================
FILE: sherpa-onnx/jni/offline-speaker-diarization.cc
================================================
// sherpa-onnx/jni/offline-speaker-diarization.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-speaker-diarization.h"

#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

static OfflineSpeakerDiarizationConfig GetOfflineSpeakerDiarizationConfig(
    JNIEnv *env, jobject config, bool *ok) {
  OfflineSpeakerDiarizationConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid;

  //---------- segmentation ----------
  fid = env->GetFieldID(
      cls, "segmentation",
      "Lcom/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig;");
  jobject segmentation_config = env->GetObjectField(config, fid);
  jclass segmentation_config_cls = env->GetObjectClass(segmentation_config);

  fid = env->GetFieldID(
      segmentation_config_cls, "pyannote",
      "Lcom/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig;");
  jobject pyannote_config = env->GetObjectField(segmentation_config, fid);
  jclass pyannote_config_cls = env->GetObjectClass(pyannote_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.segmentation.pyannote.model, model,
                              pyannote_config_cls, pyannote_config);

  SHERPA_ONNX_JNI_READ_INT(ans.segmentation.num_threads, numThreads,
                           segmentation_config_cls, segmentation_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.segmentation.debug, debug,
                            segmentation_config_cls, segmentation_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.segmentation.provider, provider,
                              segmentation_config_cls, segmentation_config);

  //---------- embedding ----------
  fid = env->GetFieldID(
      cls, "embedding",
      "Lcom/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig;");
  jobject embedding_config = env->GetObjectField(config, fid);
  jclass embedding_config_cls = env->GetObjectClass(embedding_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.embedding.model, model, embedding_config_cls,
                              embedding_config);

  SHERPA_ONNX_JNI_READ_INT(ans.embedding.num_threads, numThreads,
                           embedding_config_cls, embedding_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.embedding.debug, debug, embedding_config_cls,
                            embedding_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.embedding.provider, provider,
                              embedding_config_cls, embedding_config);

  fid = env->GetFieldID(cls, "clustering",
                        "Lcom/k2fsa/sherpa/onnx/FastClusteringConfig;");
  jobject clustering_config = env->GetObjectField(config, fid);
  jclass clustering_config_cls = env->GetObjectClass(clustering_config);

  SHERPA_ONNX_JNI_READ_INT(ans.clustering.num_clusters, numClusters,
                           clustering_config_cls, clustering_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.clustering.threshold, threshold,
                             clustering_config_cls, clustering_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.min_duration_on, minDurationOn, cls, config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.min_duration_off, minDurationOff, cls, config);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif

  bool ok = false;
  auto config =
      sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  auto sd = new sherpa_onnx::OfflineSpeakerDiarization(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)sd;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromFile(
    JNIEnv *env, jobject /*obj*/, jobject _config) {
  bool ok = false;
  auto config =
      sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto sd = new sherpa_onnx::OfflineSpeakerDiarization(config);

  return (jlong)sd;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_setConfig(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jobject _config) {
  bool ok = false;
  auto config =
      sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  auto sd = reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr);
  sd->SetConfig(config);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_delete(JNIEnv * /*env*/,
                                                            jobject /*obj*/,
                                                            jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr);
}

static jobjectArray ProcessImpl(
    JNIEnv *env,
    const std::vector<sherpa_onnx::OfflineSpeakerDiarizationSegment>
        &segments) {
  jclass cls =
      env->FindClass("com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment");
  if (cls == nullptr) {
    SHERPA_ONNX_LOGE(
        "Failed to find class OfflineSpeakerDiarizationSegment");
    return nullptr;
  }

  jobjectArray obj_arr =
      (jobjectArray)env->NewObjectArray(segments.size(), cls, nullptr);

  jmethodID constructor = env->GetMethodID(cls, "<init>", "(FFI)V");
  if (constructor == nullptr) {
    SHERPA_ONNX_LOGE(
        "Failed to get OfflineSpeakerDiarizationSegment constructor");
    env->DeleteLocalRef(cls);
    return nullptr;
  }

  for (int32_t i = 0; i != static_cast<int32_t>(segments.size()); ++i) {
    const auto &s = segments[i];
    jobject segment =
        env->NewObject(cls, constructor, s.Start(), s.End(), s.Speaker());
    env->SetObjectArrayElement(obj_arr, i, segment);
    env->DeleteLocalRef(segment);
  }

  env->DeleteLocalRef(cls);
  return obj_arr;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobjectArray JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_process(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples) {
  auto sd = reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr);

  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);
  auto segments = sd->Process(p, n).SortByStartTime();
  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);

  return ProcessImpl(env, segments);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobjectArray JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
    jobject callback, jlong arg) {
  std::function<int32_t(int32_t, int32_t, void *)> callback_wrapper =
      [env, callback](int32_t num_processed_chunks, int32_t num_total_chunks,
                      void *data) -> int {
    jclass cls = env->GetObjectClass(callback);

    jmethodID mid = env->GetMethodID(cls, "invoke", "(IIJ)Ljava/lang/Integer;");
    if (mid == nullptr) {
      SHERPA_ONNX_LOGE("Failed to get the callback. Ignore it.");
      env->DeleteLocalRef(cls);
      return 0;
    }
    env->DeleteLocalRef(cls);

    jobject ret = env->CallObjectMethod(callback, mid, num_processed_chunks,
                                        num_total_chunks, (jlong)data);
    if (ret == nullptr) {
      return 0;
    }

    jclass jklass = env->GetObjectClass(ret);
    jmethodID int_value_mid = env->GetMethodID(jklass, "intValue", "()I");
    int32_t result = env->CallIntMethod(ret, int_value_mid);
    env->DeleteLocalRef(jklass);
    env->DeleteLocalRef(ret);
    return result;
  };

  auto sd = reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr);

  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);
  auto segments =
      sd->Process(p, n, callback_wrapper, reinterpret_cast<void *>(arg))
          .SortByStartTime();
  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);

  return ProcessImpl(env, segments);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jint JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_getSampleRate(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  return reinterpret_cast<sherpa_onnx::OfflineSpeakerDiarization *>(ptr)
      ->SampleRate();
}


================================================
FILE: sherpa-onnx/jni/offline-speech-denoiser.cc
================================================
// sherpa-onnx/jni/offline-speech-denoiser.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/wave-writer.h"
#include "sherpa-onnx/jni/common.h"
#include "sherpa-onnx/jni/speech-denoiser.h"

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif

  bool ok = false;
  auto config = sherpa_onnx::GetOfflineSpeechDenoiserConfig(env, _config, &ok);
  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  auto speech_denoiser = new sherpa_onnx::OfflineSpeechDenoiser(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)speech_denoiser;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_newFromFile(JNIEnv *env,
                                                             jobject /*obj*/,
                                                             jobject _config) {
  return SafeJNI(
      env, "OfflineSpeechDenoiser_newFromFile",
      [&]() -> jlong {
        bool ok = false;
        auto config =
            sherpa_onnx::GetOfflineSpeechDenoiserConfig(env, _config, &ok);

        if (!ok) {
          SHERPA_ONNX_LOGE("Please read the error message carefully");
          return 0;
        }

        SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

        if (!config.Validate()) {
          SHERPA_ONNX_LOGE("Errors found in config!");
          return 0;
        }

        auto speech_denoiser = new sherpa_onnx::OfflineSpeechDenoiser(config);
        return reinterpret_cast<jlong>(speech_denoiser);
      },
      (jlong)0);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OfflineSpeechDenoiser *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jint JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_getSampleRate(JNIEnv * /*env*/,
                                                               jobject /*obj*/,
                                                               jlong ptr) {
  return reinterpret_cast<sherpa_onnx::OfflineSpeechDenoiser *>(ptr)
      ->GetSampleRate();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_run(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
    jint sample_rate) {
  auto speech_denoiser =
      reinterpret_cast<sherpa_onnx::OfflineSpeechDenoiser *>(ptr);

  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);
  auto denoised = speech_denoiser->Run(p, n, sample_rate);
  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);

  return sherpa_onnx::NewDenoisedAudio(env, denoised);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_DenoisedAudio_saveImpl(
    JNIEnv *env, jobject /*obj*/, jstring filename, jfloatArray samples,
    jint sample_rate) {
  const char *p_filename = env->GetStringUTFChars(filename, nullptr);

  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);

  bool ok = sherpa_onnx::WriteWave(p_filename, sample_rate, p, n);

  env->ReleaseStringUTFChars(filename, p_filename);
  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);

  return ok;
}


================================================
FILE: sherpa-onnx/jni/offline-stream.cc
================================================
// sherpa-onnx/jni/offline-stream.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-stream.h"

#include "sherpa-onnx/jni/common.h"

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineStream_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OfflineStream *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineStream_acceptWaveform(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
    jint sample_rate) {
  auto stream = reinterpret_cast<sherpa_onnx::OfflineStream *>(ptr);

  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);
  stream->AcceptWaveform(sample_rate, p, n);
  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineStream_setOption(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring key, jstring value) {
  auto stream = reinterpret_cast<sherpa_onnx::OfflineStream *>(ptr);
  const char *p_key = env->GetStringUTFChars(key, nullptr);
  const char *p_value = env->GetStringUTFChars(value, nullptr);
  stream->SetOption(p_key, p_value);
  env->ReleaseStringUTFChars(key, p_key);
  env->ReleaseStringUTFChars(value, p_value);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL Java_com_k2fsa_sherpa_onnx_OfflineStream_getOption(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring key) {
  auto stream = reinterpret_cast<sherpa_onnx::OfflineStream *>(ptr);
  const char *p_key = env->GetStringUTFChars(key, nullptr);
  const std::string &value = stream->GetOption(p_key);
  env->ReleaseStringUTFChars(key, p_key);
  return env->NewStringUTF(value.c_str());
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_OfflineStream_hasOption(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring key) {
  auto stream = reinterpret_cast<sherpa_onnx::OfflineStream *>(ptr);
  const char *p_key = env->GetStringUTFChars(key, nullptr);
  jboolean result = stream->HasOption(p_key);
  env->ReleaseStringUTFChars(key, p_key);
  return result;
}


================================================
FILE: sherpa-onnx/jni/offline-tts.cc
================================================
// sherpa-onnx/jni/offline-tts.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tts.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/wave-writer.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

// ------------------ JNI Config Helpers ------------------

static GenerationConfig GetGenerationConfig(JNIEnv *env, jobject config_obj) {
  GenerationConfig ans;

  if (!config_obj) {
    SHERPA_ONNX_LOGE("GenerationConfig is null");
    return ans;
  }

  jclass cls = env->GetObjectClass(config_obj);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.silence_scale, silenceScale, cls, config_obj);
  SHERPA_ONNX_JNI_READ_FLOAT(ans.speed, speed, cls, config_obj);
  SHERPA_ONNX_JNI_READ_INT(ans.sid, sid, cls, config_obj);

  // referenceAudio
  jfieldID fid = env->GetFieldID(cls, "referenceAudio", "[F");
  if (fid != nullptr) {
    jfloatArray arr = (jfloatArray)env->GetObjectField(config_obj, fid);
    if (arr != nullptr) {
      jsize len = env->GetArrayLength(arr);
      jfloat *elems = env->GetFloatArrayElements(arr, nullptr);
      ans.reference_audio.assign(elems, elems + len);
      env->ReleaseFloatArrayElements(arr, elems, JNI_ABORT);
      env->DeleteLocalRef(arr);
    }
  }

  SHERPA_ONNX_JNI_READ_INT(ans.reference_sample_rate, referenceSampleRate, cls,
                           config_obj);

  // referenceText
  SHERPA_ONNX_JNI_READ_STRING(ans.reference_text, referenceText, cls,
                              config_obj);

  SHERPA_ONNX_JNI_READ_INT(ans.num_steps, numSteps, cls, config_obj);

  // extra Map<String, String>
  fid = env->GetFieldID(cls, "extra", "Ljava/util/Map;");
  if (fid != nullptr) {
    jobject map_obj = env->GetObjectField(config_obj, fid);
    if (map_obj != nullptr) {
      jclass map_cls = env->GetObjectClass(map_obj);
      jmethodID entrySet =
          env->GetMethodID(map_cls, "entrySet", "()Ljava/util/Set;");
      jobject entry_set = env->CallObjectMethod(map_obj, entrySet);

      jclass set_cls = env->GetObjectClass(entry_set);
      jmethodID iteratorMid =
          env->GetMethodID(set_cls, "iterator", "()Ljava/util/Iterator;");
      jobject iterator = env->CallObjectMethod(entry_set, iteratorMid);

      jclass iter_cls = env->GetObjectClass(iterator);
      jmethodID hasNextMid = env->GetMethodID(iter_cls, "hasNext", "()Z");
      jmethodID nextMid =
          env->GetMethodID(iter_cls, "next", "()Ljava/lang/Object;");

      jclass entry_cls = env->FindClass("java/util/Map$Entry");
      jmethodID getKeyMid =
          env->GetMethodID(entry_cls, "getKey", "()Ljava/lang/Object;");
      jmethodID getValueMid =
          env->GetMethodID(entry_cls, "getValue", "()Ljava/lang/Object;");

      while (env->CallBooleanMethod(iterator, hasNextMid)) {
        jobject entry = env->CallObjectMethod(iterator, nextMid);
        if (!entry) {
          continue;
        }

        jstring key = (jstring)env->CallObjectMethod(entry, getKeyMid);
        jstring value = (jstring)env->CallObjectMethod(entry, getValueMid);

        if (key != nullptr && value != nullptr) {
          const char *keyChars = env->GetStringUTFChars(key, nullptr);
          const char *valueChars = env->GetStringUTFChars(value, nullptr);
          ans.extra[std::string(keyChars)] = std::string(valueChars);

          env->ReleaseStringUTFChars(key, keyChars);
          env->ReleaseStringUTFChars(value, valueChars);
        }

        env->DeleteLocalRef(key);
        env->DeleteLocalRef(value);
        env->DeleteLocalRef(entry);
      }

      env->DeleteLocalRef(entry_set);
      env->DeleteLocalRef(iterator);
      env->DeleteLocalRef(entry_cls);
      env->DeleteLocalRef(iter_cls);
      env->DeleteLocalRef(set_cls);
      env->DeleteLocalRef(map_cls);
      env->DeleteLocalRef(map_obj);
    }
  }

  env->DeleteLocalRef(cls);
  return ans;
}

static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config,
                                            bool *ok) {
  OfflineTtsConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid;

  fid = env->GetFieldID(cls, "model",
                        "Lcom/k2fsa/sherpa/onnx/OfflineTtsModelConfig;");
  jobject model = env->GetObjectField(config, fid);
  jclass model_config_cls = env->GetObjectClass(model);

  fid = env->GetFieldID(model_config_cls, "vits",
                        "Lcom/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig;");
  jobject vits = env->GetObjectField(model, fid);
  jclass vits_cls = env->GetObjectClass(vits);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.vits.model, model, vits_cls, vits);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.vits.lexicon, lexicon, vits_cls, vits);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.vits.tokens, tokens, vits_cls, vits);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.vits.data_dir, dataDir, vits_cls, vits);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.vits.noise_scale, noiseScale, vits_cls,
                             vits);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.vits.noise_scale_w, noiseScaleW,
                             vits_cls, vits);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.vits.length_scale, lengthScale, vits_cls,
                             vits);

  // matcha
  fid = env->GetFieldID(model_config_cls, "matcha",
                        "Lcom/k2fsa/sherpa/onnx/OfflineTtsMatchaModelConfig;");
  jobject matcha = env->GetObjectField(model, fid);
  jclass matcha_cls = env->GetObjectClass(matcha);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.matcha.acoustic_model, acousticModel,
                              matcha_cls, matcha);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.matcha.vocoder, vocoder, matcha_cls,
                              matcha);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.matcha.lexicon, lexicon, matcha_cls,
                              matcha);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.matcha.tokens, tokens, matcha_cls,
                              matcha);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.matcha.data_dir, dataDir, matcha_cls,
                              matcha);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.matcha.noise_scale, noiseScale,
                             matcha_cls, matcha);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.matcha.length_scale, lengthScale,
                             matcha_cls, matcha);

  fid = env->GetFieldID(model_config_cls, "kokoro",
                        "Lcom/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig;");
  jobject kokoro = env->GetObjectField(model, fid);
  jclass kokoro_cls = env->GetObjectClass(kokoro);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kokoro.model, model, kokoro_cls,
                              kokoro);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kokoro.voices, voices, kokoro_cls,
                              kokoro);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kokoro.tokens, tokens, kokoro_cls,
                              kokoro);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kokoro.lexicon, lexicon, kokoro_cls,
                              kokoro);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kokoro.lang, lang, kokoro_cls, kokoro);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kokoro.data_dir, dataDir, kokoro_cls,
                              kokoro);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.kokoro.length_scale, lengthScale,
                             kokoro_cls, kokoro);

  // zipvoice
  fid = env->GetFieldID(
      model_config_cls, "zipvoice",
      "Lcom/k2fsa/sherpa/onnx/OfflineTtsZipVoiceModelConfig;");
  jobject zipvoice = env->GetObjectField(model, fid);
  jclass zipvoice_cls = env->GetObjectClass(zipvoice);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.zipvoice.tokens, tokens, zipvoice_cls,
                              zipvoice);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.zipvoice.encoder, encoder, zipvoice_cls,
                              zipvoice);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.zipvoice.decoder, decoder, zipvoice_cls,
                              zipvoice);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.zipvoice.vocoder, vocoder, zipvoice_cls,
                              zipvoice);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.zipvoice.data_dir, dataDir, zipvoice_cls,
                              zipvoice);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.zipvoice.lexicon, lexicon, zipvoice_cls,
                              zipvoice);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.zipvoice.feat_scale, featScale,
                             zipvoice_cls, zipvoice);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.zipvoice.t_shift, tShift, zipvoice_cls,
                             zipvoice);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.zipvoice.target_rms, targetRms,
                             zipvoice_cls, zipvoice);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.zipvoice.guidance_scale, guidanceScale,
                             zipvoice_cls, zipvoice);

  // kitten
  fid = env->GetFieldID(model_config_cls, "kitten",
                        "Lcom/k2fsa/sherpa/onnx/OfflineTtsKittenModelConfig;");
  jobject kitten = env->GetObjectField(model, fid);
  jclass kitten_cls = env->GetObjectClass(kitten);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kitten.model, model, kitten_cls,
                              kitten);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kitten.voices, voices, kitten_cls,
                              kitten);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kitten.tokens, tokens, kitten_cls,
                              kitten);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.kitten.data_dir, dataDir, kitten_cls,
                              kitten);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.model.kitten.length_scale, lengthScale,
                             kitten_cls, kitten);

  // pocket
  fid = env->GetFieldID(model_config_cls, "pocket",
                        "Lcom/k2fsa/sherpa/onnx/OfflineTtsPocketModelConfig;");
  jobject pocket = env->GetObjectField(model, fid);
  jclass pocket_cls = env->GetObjectClass(pocket);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.pocket.lm_flow, lmFlow, pocket_cls,
                              pocket);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.pocket.lm_main, lmMain, pocket_cls,
                              pocket);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.pocket.encoder, encoder, pocket_cls,
                              pocket);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.pocket.decoder, decoder, pocket_cls,
                              pocket);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.pocket.text_conditioner,
                              textConditioner, pocket_cls, pocket);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.pocket.vocab_json, vocabJson,
                              pocket_cls, pocket);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.pocket.token_scores_json,
                              tokenScoresJson, pocket_cls, pocket);

  SHERPA_ONNX_JNI_READ_INT(ans.model.pocket.voice_embedding_cache_capacity,
                           voiceEmbeddingCacheCapacity, pocket_cls, pocket);

  // supertonic
  fid = env->GetFieldID(
      model_config_cls, "supertonic",
      "Lcom/k2fsa/sherpa/onnx/OfflineTtsSupertonicModelConfig;");
  jobject supertonic = env->GetObjectField(model, fid);
  jclass supertonic_cls = env->GetObjectClass(supertonic);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.supertonic.duration_predictor,
                              durationPredictor, supertonic_cls, supertonic);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.supertonic.text_encoder, textEncoder,
                              supertonic_cls, supertonic);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.supertonic.vector_estimator,
                              vectorEstimator, supertonic_cls, supertonic);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.supertonic.vocoder, vocoder,
                              supertonic_cls, supertonic);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.supertonic.tts_json, ttsJson,
                              supertonic_cls, supertonic);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.supertonic.unicode_indexer,
                              unicodeIndexer, supertonic_cls, supertonic);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.supertonic.voice_style, voiceStyle,
                              supertonic_cls, supertonic);

  SHERPA_ONNX_JNI_READ_INT(ans.model.num_threads, numThreads, model_config_cls,
                           model);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model.debug, debug, model_config_cls, model);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.provider, provider, model_config_cls,
                              model);

  SHERPA_ONNX_JNI_READ_STRING(ans.rule_fsts, ruleFsts, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.rule_fars, ruleFars, cls, config);

  SHERPA_ONNX_JNI_READ_INT(ans.max_num_sentences, maxNumSentences, cls, config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.silence_scale, silenceScale, cls, config);

  env->DeleteLocalRef(model);
  env->DeleteLocalRef(vits);
  env->DeleteLocalRef(vits_cls);
  env->DeleteLocalRef(matcha);
  env->DeleteLocalRef(matcha_cls);
  env->DeleteLocalRef(kokoro);
  env->DeleteLocalRef(kokoro_cls);
  env->DeleteLocalRef(zipvoice);
  env->DeleteLocalRef(zipvoice_cls);
  env->DeleteLocalRef(kitten);
  env->DeleteLocalRef(kitten_cls);
  env->DeleteLocalRef(pocket);
  env->DeleteLocalRef(pocket_cls);
  env->DeleteLocalRef(supertonic);
  env->DeleteLocalRef(supertonic_cls);
  env->DeleteLocalRef(model_config_cls);
  env->DeleteLocalRef(cls);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

// Convert audio samples and sample rate to a Java GeneratedAudio object
static jobject CreateAudioObject(JNIEnv *env, const std::vector<float> &samples,
                                 int32_t sample_rate) {
  // Step 1: Create a jfloatArray for samples
  jfloatArray samples_arr = env->NewFloatArray(samples.size());
  env->SetFloatArrayRegion(samples_arr, 0, samples.size(), samples.data());

  // Step 2: Find the GeneratedAudio class
  jclass gen_audio_cls = env->FindClass("com/k2fsa/sherpa/onnx/GeneratedAudio");
  if (!gen_audio_cls) {
    env->DeleteLocalRef(samples_arr);
    return nullptr;
  }

  // Step 3: Get the constructor: GeneratedAudio(float[] samples, int
  // sampleRate)
  jmethodID ctor = env->GetMethodID(gen_audio_cls, "<init>", "([FI)V");
  if (!ctor) {
    env->DeleteLocalRef(samples_arr);
    env->DeleteLocalRef(gen_audio_cls);
    return nullptr;
  }

  // Step 4: Create the object
  jobject gen_audio_obj =
      env->NewObject(gen_audio_cls, ctor, samples_arr, sample_rate);

  // Step 5: Clean up local refs
  env->DeleteLocalRef(samples_arr);
  env->DeleteLocalRef(gen_audio_cls);

  return gen_audio_obj;
}

static int32_t CallCallback(JNIEnv *env, jobject callback,
                            jfloatArray samples_arr) {
  if (!callback) return 1;

  jclass cls = env->GetObjectClass(callback);
  if (env->ExceptionCheck()) {
    env->DeleteLocalRef(cls);
    return 1;
  }

  jmethodID invoke_mid =
      env->GetMethodID(cls, "invoke", "([F)Ljava/lang/Integer;");
  if (env->ExceptionCheck() || !invoke_mid) {
    env->DeleteLocalRef(cls);
    return 1;
  }

  jobject result = env->CallObjectMethod(callback, invoke_mid, samples_arr);
  if (env->ExceptionCheck() || !result) {
    env->DeleteLocalRef(cls);
    return 1;
  }

  jclass integer_cls = env->GetObjectClass(result);
  jmethodID int_val_mid = env->GetMethodID(integer_cls, "intValue", "()I");
  jint ret = env->CallIntMethod(result, int_val_mid);

  env->DeleteLocalRef(integer_cls);
  env->DeleteLocalRef(result);
  env->DeleteLocalRef(cls);

  return ret;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif

  bool ok = false;
  auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  if (config.model.debug) {
#if __ANDROID_API__
    auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
    for (const auto &s : str_vec) {
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
#else
    SHERPA_ONNX_LOGE("%s", config.ToString().c_str());
#endif
  }

  auto tts = new sherpa_onnx::OfflineTts(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return reinterpret_cast<jlong>(tts);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile(
    JNIEnv *env, jobject /*obj*/, jobject _config) {
  return SafeJNI(
      env, "OfflineTts_newFromFile",
      [&]() -> jlong {
        bool ok = false;
        auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config, &ok);

        if (!ok) {
          SHERPA_ONNX_LOGE("Please read the error message carefully");
          return 0;
        }

        SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

        if (!config.Validate()) {
          SHERPA_ONNX_LOGE("Errors found in config!");
          return 0;
        }

        auto tts = new sherpa_onnx::OfflineTts(config);
        return reinterpret_cast<jlong>(tts);
      },
      (jlong)0);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getSampleRate(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  return reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->SampleRate();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  return reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->NumSpeakers();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring text, jint sid,
    jfloat speed) {
  const char *p_text = env->GetStringUTFChars(text, nullptr);

  auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate(
      p_text, sid, speed);

  env->ReleaseStringUTFChars(text, p_text);

  return CreateAudioObject(env, audio.samples, audio.sample_rate);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring text, jint sid,
    jfloat speed, jobject callback) {
  const char *p_text = env->GetStringUTFChars(text, nullptr);

  auto tts = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr);

  sherpa_onnx::GeneratedAudio audio;

  if (callback) {
    std::function<int32_t(const float *, int32_t, float)> callback_wrapper =
        [env, callback](const float *samples, int32_t n, float) -> int32_t {
      jfloatArray samples_arr = env->NewFloatArray(n);
      env->SetFloatArrayRegion(samples_arr, 0, n, samples);
      int32_t ret = CallCallback(env, callback, samples_arr);
      env->DeleteLocalRef(samples_arr);
      return ret;
    };

    audio = tts->Generate(p_text, sid, speed, callback_wrapper);
  } else {
    audio = tts->Generate(p_text, sid, speed, nullptr);
  }

  env->ReleaseStringUTFChars(text, p_text);

  return CreateAudioObject(env, audio.samples, audio.sample_rate);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL
Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithConfigImpl(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring text, jobject _gen_config,
    jobject callback) {
  const char *p_text = env->GetStringUTFChars(text, nullptr);
  auto gen_config = sherpa_onnx::GetGenerationConfig(env, _gen_config);
  auto tts = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr);

  sherpa_onnx::GeneratedAudio audio;

  if (callback) {
    std::function<int32_t(const float *, int32_t, float)> callback_wrapper =
        [env, callback](const float *samples, int32_t n, float) -> int32_t {
      jfloatArray samples_arr = env->NewFloatArray(n);
      env->SetFloatArrayRegion(samples_arr, 0, n, samples);
      int32_t ret = CallCallback(env, callback, samples_arr);
      env->DeleteLocalRef(samples_arr);
      return ret;
    };

    audio = tts->Generate(p_text, gen_config, callback_wrapper);
  } else {
    audio = tts->Generate(p_text, gen_config, nullptr);
  }

  env->ReleaseStringUTFChars(text, p_text);

  return CreateAudioObject(env, audio.samples, audio.sample_rate);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl(
    JNIEnv *env, jobject /*obj*/, jstring filename, jfloatArray samples,
    jint sample_rate) {
  const char *p_filename = env->GetStringUTFChars(filename, nullptr);

  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);

  bool ok = sherpa_onnx::WriteWave(p_filename, sample_rate, p, n);

  env->ReleaseStringUTFChars(filename, p_filename);
  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);

  return ok;
}


================================================
FILE: sherpa-onnx/jni/online-punctuation.cc
================================================
// sherpa-onnx/jni/online-punctuation.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-punctuation.h"

#include <string>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

static OnlinePunctuationConfig GetOnlinePunctuationConfig(JNIEnv *env,
                                                          jobject config,
                                                          bool *ok) {
  OnlinePunctuationConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid;

  fid = env->GetFieldID(cls, "model",
                        "Lcom/k2fsa/sherpa/onnx/OnlinePunctuationModelConfig;");
  jobject model_config = env->GetObjectField(config, fid);
  jclass model_config_cls = env->GetObjectClass(model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.cnn_bilstm, cnnBilstm, model_config_cls,
                              model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.bpe_vocab, bpeVocab, model_config_cls,
                              model_config);

  SHERPA_ONNX_JNI_READ_INT(ans.model.num_threads, numThreads, model_config_cls,
                           model_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.model.debug, debug, model_config_cls,
                            model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model.provider, provider, model_config_cls,
                              model_config);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_newFromAsset(JNIEnv *env,
                                                          jobject /*obj*/,
                                                          jobject asset_manager,
                                                          jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif
  bool ok = false;
  auto config = sherpa_onnx::GetOnlinePunctuationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  auto model = new sherpa_onnx::OnlinePunctuation(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)model;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_newFromFile(JNIEnv *env,
                                                         jobject /*obj*/,
                                                         jobject _config) {
  bool ok = false;
  auto config = sherpa_onnx::GetOnlinePunctuationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto model = new sherpa_onnx::OnlinePunctuation(config);

  return (jlong)model;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OnlinePunctuation *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL
Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_addPunctuation(JNIEnv *env,
                                                            jobject /*obj*/,
                                                            jlong ptr,
                                                            jstring text) {
  auto punct = reinterpret_cast<const sherpa_onnx::OnlinePunctuation *>(ptr);

  const char *ptext = env->GetStringUTFChars(text, nullptr);

  std::string result = punct->AddPunctuationWithCase(ptext);

  env->ReleaseStringUTFChars(text, ptext);

  return env->NewStringUTF(result.c_str());
}


================================================
FILE: sherpa-onnx/jni/online-recognizer.cc
================================================
// sherpa-onnx/jni/online-recognizer.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-recognizer.h"

#include <memory>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls,
                                       jobject model_config, bool *ok) {
  OnlineModelConfig ans;

  auto fid =
      env->GetFieldID(model_config_cls, "transducer",
                      "Lcom/k2fsa/sherpa/onnx/OnlineTransducerModelConfig;");
  jobject transducer_config = env->GetObjectField(model_config, fid);
  jclass transducer_config_cls = env->GetObjectClass(transducer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.transducer.encoder, encoder,
                              transducer_config_cls, transducer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.transducer.decoder, decoder,
                              transducer_config_cls, transducer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.transducer.joiner, joiner,
                              transducer_config_cls, transducer_config);

  fid = env->GetFieldID(model_config_cls, "paraformer",
                        "Lcom/k2fsa/sherpa/onnx/OnlineParaformerModelConfig;");
  jobject paraformer_config = env->GetObjectField(model_config, fid);
  jclass paraformer_config_cls = env->GetObjectClass(paraformer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.paraformer.encoder, encoder,
                              paraformer_config_cls, paraformer_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.paraformer.decoder, decoder,
                              paraformer_config_cls, paraformer_config);

  fid =
      env->GetFieldID(model_config_cls, "zipformer2Ctc",
                      "Lcom/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig;");
  jobject zipformer2_ctc_config = env->GetObjectField(model_config, fid);
  jclass zipformer2_ctc_config_cls = env->GetObjectClass(zipformer2_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.zipformer2_ctc.model, model,
                              zipformer2_ctc_config_cls, zipformer2_ctc_config);

  fid = env->GetFieldID(model_config_cls, "neMoCtc",
                        "Lcom/k2fsa/sherpa/onnx/OnlineNeMoCtcModelConfig;");
  jobject nemo_ctc_config = env->GetObjectField(model_config, fid);
  jclass nemo_ctc_config_cls = env->GetObjectClass(nemo_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.nemo_ctc.model, model, nemo_ctc_config_cls,
                              nemo_ctc_config);

  fid = env->GetFieldID(model_config_cls, "toneCtc",
                        "Lcom/k2fsa/sherpa/onnx/OnlineToneCtcModelConfig;");
  jobject t_one_ctc_config = env->GetObjectField(model_config, fid);
  jclass t_one_ctc_config_cls = env->GetObjectClass(t_one_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.t_one_ctc.model, model, t_one_ctc_config_cls,
                              t_one_ctc_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.tokens, tokens, model_config_cls,
                              model_config);

  SHERPA_ONNX_JNI_READ_INT(ans.num_threads, numThreads, model_config_cls,
                           model_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.debug, debug, model_config_cls, model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.provider_config.provider, provider,
                              model_config_cls, model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model_type, modelType, model_config_cls,
                              model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.modeling_unit, modelingUnit, model_config_cls,
                              model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.bpe_vocab, bpeVocab, model_config_cls,
                              model_config);

  *ok = true;
  return ans;
}

static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config, bool *ok) {
  OnlineRecognizerConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid;

  // https://docs.oracle.com/javase/7/docs/technotes/guides/jni/spec/types.html
  // https://courses.cs.washington.edu/courses/cse341/99wi/java/tutorial/native1.1/implementing/field.html

  SHERPA_ONNX_JNI_READ_STRING(ans.decoding_method, decodingMethod, cls, config);

  SHERPA_ONNX_JNI_READ_INT(ans.max_active_paths, maxActivePaths, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.hotwords_file, hotwordsFile, cls, config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.hotwords_score, hotwordsScore, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.rule_fsts, ruleFsts, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.rule_fars, ruleFars, cls, config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.blank_penalty, blankPenalty, cls, config);

  fid = env->GetFieldID(cls, "featConfig",
                        "Lcom/k2fsa/sherpa/onnx/FeatureConfig;");
  jobject feat_config = env->GetObjectField(config, fid);
  jclass feat_config_cls = env->GetObjectClass(feat_config);

  SHERPA_ONNX_JNI_READ_INT(ans.feat_config.sampling_rate, sampleRate,
                           feat_config_cls, feat_config);

  SHERPA_ONNX_JNI_READ_INT(ans.feat_config.feature_dim, featureDim,
                           feat_config_cls, feat_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.feat_config.dither, dither, feat_config_cls,
                             feat_config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.enable_endpoint, enableEndpoint, cls, config);

  fid = env->GetFieldID(cls, "endpointConfig",
                        "Lcom/k2fsa/sherpa/onnx/EndpointConfig;");
  jobject endpoint_config = env->GetObjectField(config, fid);
  jclass endpoint_config_cls = env->GetObjectClass(endpoint_config);

  fid = env->GetFieldID(endpoint_config_cls, "rule1",
                        "Lcom/k2fsa/sherpa/onnx/EndpointRule;");
  jobject rule1 = env->GetObjectField(endpoint_config, fid);
  jclass rule_class = env->GetObjectClass(rule1);

  fid = env->GetFieldID(endpoint_config_cls, "rule2",
                        "Lcom/k2fsa/sherpa/onnx/EndpointRule;");
  jobject rule2 = env->GetObjectField(endpoint_config, fid);

  fid = env->GetFieldID(endpoint_config_cls, "rule3",
                        "Lcom/k2fsa/sherpa/onnx/EndpointRule;");
  jobject rule3 = env->GetObjectField(endpoint_config, fid);

  fid = env->GetFieldID(rule_class, "mustContainNonSilence", "Z");
  ans.endpoint_config.rule1.must_contain_nonsilence =
      env->GetBooleanField(rule1, fid);
  ans.endpoint_config.rule2.must_contain_nonsilence =
      env->GetBooleanField(rule2, fid);
  ans.endpoint_config.rule3.must_contain_nonsilence =
      env->GetBooleanField(rule3, fid);

  fid = env->GetFieldID(rule_class, "minTrailingSilence", "F");
  ans.endpoint_config.rule1.min_trailing_silence =
      env->GetFloatField(rule1, fid);
  ans.endpoint_config.rule2.min_trailing_silence =
      env->GetFloatField(rule2, fid);
  ans.endpoint_config.rule3.min_trailing_silence =
      env->GetFloatField(rule3, fid);

  fid = env->GetFieldID(rule_class, "minUtteranceLength", "F");
  ans.endpoint_config.rule1.min_utterance_length =
      env->GetFloatField(rule1, fid);
  ans.endpoint_config.rule2.min_utterance_length =
      env->GetFloatField(rule2, fid);
  ans.endpoint_config.rule3.min_utterance_length =
      env->GetFloatField(rule3, fid);

  //---------- model config ----------
  fid = env->GetFieldID(cls, "modelConfig",
                        "Lcom/k2fsa/sherpa/onnx/OnlineModelConfig;");
  jobject model_config = env->GetObjectField(config, fid);
  jclass model_config_cls = env->GetObjectClass(model_config);

  ans.model_config =
      GetOnlineModelConfig(env, model_config_cls, model_config, ok);

  if (!*ok) {
    return ans;
  }

  *ok = false;

  //---------- rnn lm model config ----------
  fid = env->GetFieldID(cls, "lmConfig",
                        "Lcom/k2fsa/sherpa/onnx/OnlineLMConfig;");
  jobject lm_model_config = env->GetObjectField(config, fid);
  jclass lm_model_config_cls = env->GetObjectClass(lm_model_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.lm_config.model, model, lm_model_config_cls,
                              lm_model_config);
  SHERPA_ONNX_JNI_READ_FLOAT(ans.lm_config.scale, scale, lm_model_config_cls,
                             lm_model_config);

  fid = env->GetFieldID(cls, "ctcFstDecoderConfig",
                        "Lcom/k2fsa/sherpa/onnx/OnlineCtcFstDecoderConfig;");

  jobject fst_decoder_config = env->GetObjectField(config, fid);
  jclass fst_decoder_config_cls = env->GetObjectClass(fst_decoder_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.ctc_fst_decoder_config.graph, graph,
                              fst_decoder_config_cls, fst_decoder_config);

  SHERPA_ONNX_JNI_READ_INT(ans.ctc_fst_decoder_config.max_active, maxActive,
                           fst_decoder_config_cls, fst_decoder_config);

  fid = env->GetFieldID(cls, "hr",
                        "Lcom/k2fsa/sherpa/onnx/HomophoneReplacerConfig;");
  jobject hr_config = env->GetObjectField(config, fid);
  jclass hr_config_cls = env->GetObjectClass(hr_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.hr.lexicon, lexicon, hr_config_cls,
                              hr_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.hr.rule_fsts, ruleFsts, hr_config_cls,
                              hr_config);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_newFromAsset(JNIEnv *env,
                                                         jobject /*obj*/,
                                                         jobject asset_manager,
                                                         jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif
  bool ok = false;
  auto config = sherpa_onnx::GetConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  if (config.model_config.debug) {
#if __ANDROID_API__
    auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
    for (const auto &s : str_vec) {
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
#else
    SHERPA_ONNX_LOGE("%s", config.ToString().c_str());
#endif
  }

  auto recognizer = new sherpa_onnx::OnlineRecognizer(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)recognizer;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_newFromFile(
    JNIEnv *env, jobject /*obj*/, jobject _config) {
  bool ok = false;
  auto config = sherpa_onnx::GetConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  if (config.model_config.debug) {
#if __ANDROID_API__
    auto str_vec = sherpa_onnx::SplitString(config.ToString(), 128);
    for (const auto &s : str_vec) {
      SHERPA_ONNX_LOGE("%s", s.c_str());
    }
#else
    SHERPA_ONNX_LOGE("%s", config.ToString().c_str());
#endif
  }

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto recognizer = new sherpa_onnx::OnlineRecognizer(config);

  return (jlong)recognizer;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OnlineRecognizer *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_reset(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  auto recognizer = reinterpret_cast<sherpa_onnx::OnlineRecognizer *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);
  recognizer->Reset(stream);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_isReady(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  auto recognizer = reinterpret_cast<sherpa_onnx::OnlineRecognizer *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  return recognizer->IsReady(stream);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL
Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_isEndpoint(JNIEnv * /*env*/,
                                                       jobject /*obj*/,
                                                       jlong ptr,
                                                       jlong stream_ptr) {
  auto recognizer = reinterpret_cast<sherpa_onnx::OnlineRecognizer *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  return recognizer->IsEndpoint(stream);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_decode(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  auto recognizer = reinterpret_cast<sherpa_onnx::OnlineRecognizer *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  recognizer->DecodeStream(stream);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL
Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_decodeStreams(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jlongArray stream_ptrs) {
  auto recognizer = reinterpret_cast<sherpa_onnx::OnlineRecognizer *>(ptr);

  jlong *p = env->GetLongArrayElements(stream_ptrs, nullptr);
  jsize n = env->GetArrayLength(stream_ptrs);

  auto ss = reinterpret_cast<sherpa_onnx::OnlineStream **>(p);

  recognizer->DecodeStreams(ss, n);

  env->ReleaseLongArrayElements(stream_ptrs, p, JNI_ABORT);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_createStream(JNIEnv *env,
                                                         jobject /*obj*/,
                                                         jlong ptr,
                                                         jstring hotwords) {
  auto recognizer = reinterpret_cast<sherpa_onnx::OnlineRecognizer *>(ptr);

  const char *p = env->GetStringUTFChars(hotwords, nullptr);
  std::unique_ptr<sherpa_onnx::OnlineStream> stream;

  if (strlen(p) == 0) {
    stream = recognizer->CreateStream();
  } else {
    stream = recognizer->CreateStream(p);
  }

  env->ReleaseStringUTFChars(hotwords, p);

  // The user is responsible to free the returned pointer.
  //
  // See Java_com_k2fsa_sherpa_onnx_OfflineStream_delete() from
  // ./offline-stream.cc
  sherpa_onnx::OnlineStream *ans = stream.release();
  return (jlong)ans;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_getResult(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jlong stream_ptr) {
  auto recognizer = reinterpret_cast<sherpa_onnx::OnlineRecognizer *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  sherpa_onnx::OnlineRecognizerResult result = recognizer->GetResult(stream);

  // Find the OnlineRecognizerResult class
  jclass cls = env->FindClass("com/k2fsa/sherpa/onnx/OnlineRecognizerResult");
  if (cls == nullptr) {
    SHERPA_ONNX_LOGE("Failed to find class OnlineRecognizerResult");
    return nullptr;
  }

  // Find the constructor: (String, String[], float[], float[])V
  jmethodID ctor = env->GetMethodID(
      cls, "<init>", "(Ljava/lang/String;[Ljava/lang/String;[F[F)V");

  // text
  jstring text = env->NewStringUTF(result.text.c_str());

  // tokens
  jclass string_cls = env->FindClass("java/lang/String");
  jobjectArray tokens =
      env->NewObjectArray(result.tokens.size(), string_cls, nullptr);
  env->DeleteLocalRef(string_cls);
  for (size_t i = 0; i < result.tokens.size(); ++i) {
    jstring token_str = env->NewStringUTF(result.tokens[i].c_str());
    env->SetObjectArrayElement(tokens, i, token_str);
    env->DeleteLocalRef(token_str);
  }

  // timestamps
  jfloatArray timestamps = env->NewFloatArray(result.timestamps.size());
  env->SetFloatArrayRegion(timestamps, 0, result.timestamps.size(),
                           result.timestamps.data());

  // ys_probs
  jfloatArray ys_probs = env->NewFloatArray(result.ys_probs.size());
  env->SetFloatArrayRegion(ys_probs, 0, result.ys_probs.size(),
                           result.ys_probs.data());

  // Construct and return OnlineRecognizerResult
  jobject obj = env->NewObject(cls, ctor, text, tokens, timestamps, ys_probs);

  // Delete local references
  env->DeleteLocalRef(text);
  env->DeleteLocalRef(tokens);
  env->DeleteLocalRef(timestamps);
  env->DeleteLocalRef(ys_probs);
  env->DeleteLocalRef(cls);

  return obj;
}


================================================
FILE: sherpa-onnx/jni/online-speech-denoiser.cc
================================================
// sherpa-onnx/jni/online-speech-denoiser.cc
//
// Copyright (c)  2026  Xiaomi Corporation
#include "sherpa-onnx/csrc/online-speech-denoiser.h"

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"
#include "sherpa-onnx/jni/speech-denoiser.h"

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif

  bool ok = false;
  auto config = sherpa_onnx::GetOnlineSpeechDenoiserConfig(env, _config, &ok);
  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  auto speech_denoiser = new sherpa_onnx::OnlineSpeechDenoiser(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return reinterpret_cast<jlong>(speech_denoiser);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_newFromFile(JNIEnv *env,
                                                            jobject /*obj*/,
                                                            jobject _config) {
  return SafeJNI(
      env, "OnlineSpeechDenoiser_newFromFile",
      [&]() -> jlong {
        bool ok = false;
        auto config =
            sherpa_onnx::GetOnlineSpeechDenoiserConfig(env, _config, &ok);

        if (!ok) {
          SHERPA_ONNX_LOGE("Please read the error message carefully");
          return 0;
        }

        SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

        if (!config.Validate()) {
          SHERPA_ONNX_LOGE("Errors found in config!");
          return 0;
        }

        auto speech_denoiser = new sherpa_onnx::OnlineSpeechDenoiser(config);
        return reinterpret_cast<jlong>(speech_denoiser);
      },
      static_cast<jlong>(0));
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OnlineSpeechDenoiser *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jint JNICALL
Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_getSampleRate(JNIEnv * /*env*/,
                                                              jobject /*obj*/,
                                                              jlong ptr) {
  return reinterpret_cast<sherpa_onnx::OnlineSpeechDenoiser *>(ptr)
      ->GetSampleRate();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jint JNICALL
Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_getFrameShiftInSamples(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  return reinterpret_cast<sherpa_onnx::OnlineSpeechDenoiser *>(ptr)
      ->GetFrameShiftInSamples();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_run(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
    jint sample_rate) {
  auto speech_denoiser =
      reinterpret_cast<sherpa_onnx::OnlineSpeechDenoiser *>(ptr);

  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);
  auto denoised = speech_denoiser->Run(p, n, sample_rate);
  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);

  return sherpa_onnx::NewDenoisedAudio(env, denoised);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_flush(
    JNIEnv *env, jobject /*obj*/, jlong ptr) {
  auto speech_denoiser =
      reinterpret_cast<sherpa_onnx::OnlineSpeechDenoiser *>(ptr);
  auto denoised = speech_denoiser->Flush();
  return sherpa_onnx::NewDenoisedAudio(env, denoised);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_reset(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  reinterpret_cast<sherpa_onnx::OnlineSpeechDenoiser *>(ptr)->Reset();
}


================================================
FILE: sherpa-onnx/jni/online-stream.cc
================================================
// sherpa-onnx/jni/online-stream.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-stream.h"

#include "sherpa-onnx/jni/common.h"

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineStream_delete(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::OnlineStream *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineStream_acceptWaveform(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
    jint sample_rate) {
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(ptr);

  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);
  stream->AcceptWaveform(sample_rate, p, n);
  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineStream_inputFinished(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(ptr);
  stream->InputFinished();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OnlineStream_setOption(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring key, jstring value) {
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(ptr);
  const char *p_key = env->GetStringUTFChars(key, nullptr);
  const char *p_value = env->GetStringUTFChars(value, nullptr);
  stream->SetOption(p_key, p_value);
  env->ReleaseStringUTFChars(key, p_key);
  env->ReleaseStringUTFChars(value, p_value);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL Java_com_k2fsa_sherpa_onnx_OnlineStream_getOption(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring key) {
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(ptr);
  const char *p_key = env->GetStringUTFChars(key, nullptr);
  const std::string &value = stream->GetOption(p_key);
  env->ReleaseStringUTFChars(key, p_key);
  return env->NewStringUTF(value.c_str());
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_OnlineStream_hasOption(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring key) {
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(ptr);
  const char *p_key = env->GetStringUTFChars(key, nullptr);
  jboolean result = stream->HasOption(p_key);
  env->ReleaseStringUTFChars(key, p_key);
  return result;
}


================================================
FILE: sherpa-onnx/jni/sherpa-onnx-symbols.exp
================================================
_Java_com_k2fsa_sherpa_onnx_AudioTagging_compute
_Java_com_k2fsa_sherpa_onnx_AudioTagging_createStream
_Java_com_k2fsa_sherpa_onnx_AudioTagging_delete
_Java_com_k2fsa_sherpa_onnx_AudioTagging_newFromAsset
_Java_com_k2fsa_sherpa_onnx_AudioTagging_newFromFile
_Java_com_k2fsa_sherpa_onnx_DenoisedAudio_saveImpl
_Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl
_Java_com_k2fsa_sherpa_onnx_KeywordSpotter_createStream
_Java_com_k2fsa_sherpa_onnx_KeywordSpotter_decode
_Java_com_k2fsa_sherpa_onnx_KeywordSpotter_delete
_Java_com_k2fsa_sherpa_onnx_KeywordSpotter_getResult
_Java_com_k2fsa_sherpa_onnx_KeywordSpotter_isReady
_Java_com_k2fsa_sherpa_onnx_KeywordSpotter_newFromAsset
_Java_com_k2fsa_sherpa_onnx_KeywordSpotter_newFromFile
_Java_com_k2fsa_sherpa_onnx_KeywordSpotter_reset
_Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_addPunctuation
_Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_delete
_Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_newFromAsset
_Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_newFromFile
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_createStream
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_decode
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_decodeStreams
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_delete
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_getResult
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromAsset
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromFile
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_prependAdspLibraryPath
_Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_setConfig
_Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_delete
_Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_getSampleRate
_Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromAsset
_Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromFile
_Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_process
_Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback
_Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_setConfig
_Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_delete
_Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_getSampleRate
_Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_newFromAsset
_Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_newFromFile
_Java_com_k2fsa_sherpa_onnx_OfflineSpeechDenoiser_run
_Java_com_k2fsa_sherpa_onnx_OfflineStream_acceptWaveform
_Java_com_k2fsa_sherpa_onnx_OfflineStream_delete
_Java_com_k2fsa_sherpa_onnx_OfflineTts_delete
_Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl
_Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl
_Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithConfigImpl
_Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers
_Java_com_k2fsa_sherpa_onnx_OfflineTts_getSampleRate
_Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset
_Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile
_Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_addPunctuation
_Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_delete
_Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_newFromAsset
_Java_com_k2fsa_sherpa_onnx_OnlinePunctuation_newFromFile
_Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_delete
_Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_flush
_Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_getFrameShiftInSamples
_Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_getSampleRate
_Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_newFromAsset
_Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_newFromFile
_Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_reset
_Java_com_k2fsa_sherpa_onnx_OnlineSpeechDenoiser_run
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_createStream
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_decode
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_decodeStreams
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_delete
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_getResult
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_isEndpoint
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_isReady
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_newFromAsset
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_newFromFile
_Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_reset
_Java_com_k2fsa_sherpa_onnx_OnlineStream_acceptWaveform
_Java_com_k2fsa_sherpa_onnx_OnlineStream_delete
_Java_com_k2fsa_sherpa_onnx_OnlineStream_inputFinished
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_compute
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_createStream
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_delete
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_dim
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_isReady
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromAsset
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromFile
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_add
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_addList
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_allSpeakerNames
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_contains
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_create
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_delete
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_numSpeakers
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_remove
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_search
_Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_verify
_Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_compute
_Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_createStream
_Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_delete
_Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_newFromAsset
_Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_newFromFile
_Java_com_k2fsa_sherpa_onnx_Vad_acceptWaveform
_Java_com_k2fsa_sherpa_onnx_Vad_clear
_Java_com_k2fsa_sherpa_onnx_Vad_compute
_Java_com_k2fsa_sherpa_onnx_Vad_delete
_Java_com_k2fsa_sherpa_onnx_Vad_empty
_Java_com_k2fsa_sherpa_onnx_Vad_flush
_Java_com_k2fsa_sherpa_onnx_Vad_front
_Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected
_Java_com_k2fsa_sherpa_onnx_Vad_newFromAsset
_Java_com_k2fsa_sherpa_onnx_Vad_newFromFile
_Java_com_k2fsa_sherpa_onnx_Vad_pop
_Java_com_k2fsa_sherpa_onnx_Vad_reset
_Java_com_k2fsa_sherpa_onnx_VersionInfo_00024Companion_getGitDate2
_Java_com_k2fsa_sherpa_onnx_VersionInfo_00024Companion_getGitSha12
_Java_com_k2fsa_sherpa_onnx_VersionInfo_00024Companion_getVersionStr2
_Java_com_k2fsa_sherpa_onnx_VersionInfo_getGitDate2
_Java_com_k2fsa_sherpa_onnx_VersionInfo_getGitSha12
_Java_com_k2fsa_sherpa_onnx_VersionInfo_getVersionStr2
_Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromAsset
_Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile
_Java_com_k2fsa_sherpa_onnx_WaveReader_readWaveFromFile
_Java_com_k2fsa_sherpa_onnx_WaveWriter_writeWaveToFile


================================================
FILE: sherpa-onnx/jni/sherpa-onnx-symbols.lds
================================================
{
  global:
    Java_com_k2fsa_sherpa_onnx*;
  local:
    *;
};


================================================
FILE: sherpa-onnx/jni/speaker-embedding-extractor.cc
================================================
// sherpa-onnx/jni/speaker-embedding-extractor.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

#include <memory>
#include <vector>

#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

static SpeakerEmbeddingExtractorConfig GetSpeakerEmbeddingExtractorConfig(
    JNIEnv *env, jobject config, bool *ok) {
  SpeakerEmbeddingExtractorConfig ans;

  jclass cls = env->GetObjectClass(config);

  SHERPA_ONNX_JNI_READ_STRING(ans.model, model, cls, config);

  SHERPA_ONNX_JNI_READ_INT(ans.num_threads, numThreads, cls, config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.debug, debug, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.provider, provider, cls, config);

  *ok = true;

  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif
  bool ok = false;
  auto config =
      sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("new config:\n%s", config.ToString().c_str());

  auto extractor = new sherpa_onnx::SpeakerEmbeddingExtractor(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)extractor;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromFile(
    JNIEnv *env, jobject /*obj*/, jobject _config) {
  bool ok = false;
  auto config =
      sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("newFromFile config:\n%s", config.ToString().c_str());

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto extractor = new sherpa_onnx::SpeakerEmbeddingExtractor(config);

  return (jlong)extractor;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_delete(JNIEnv * /*env*/,
                                                            jobject /*obj*/,
                                                            jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::SpeakerEmbeddingExtractor *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_createStream(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  std::unique_ptr<sherpa_onnx::OnlineStream> s =
      reinterpret_cast<sherpa_onnx::SpeakerEmbeddingExtractor *>(ptr)
          ->CreateStream();

  // The user is responsible to free the returned pointer.
  //
  // See Java_com_k2fsa_sherpa_onnx_OnlineStream_delete() from
  // ./online-stream.cc
  sherpa_onnx::OnlineStream *p = s.release();
  return (jlong)p;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_isReady(JNIEnv * /*env*/,
                                                             jobject /*obj*/,
                                                             jlong ptr,
                                                             jlong stream_ptr) {
  auto extractor =
      reinterpret_cast<sherpa_onnx::SpeakerEmbeddingExtractor *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);
  return extractor->IsReady(stream);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jfloatArray JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_compute(JNIEnv *env,
                                                             jobject /*obj*/,
                                                             jlong ptr,
                                                             jlong stream_ptr) {
  auto extractor =
      reinterpret_cast<sherpa_onnx::SpeakerEmbeddingExtractor *>(ptr);
  auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(stream_ptr);

  std::vector<float> embedding = extractor->Compute(stream);
  jfloatArray embedding_arr = env->NewFloatArray(embedding.size());
  env->SetFloatArrayRegion(embedding_arr, 0, embedding.size(),
                           embedding.data());
  return embedding_arr;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_dim(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  auto extractor =
      reinterpret_cast<sherpa_onnx::SpeakerEmbeddingExtractor *>(ptr);
  return extractor->Dim();
}


================================================
FILE: sherpa-onnx/jni/speaker-embedding-manager.cc
================================================
// sherpa-onnx/jni/speaker-embedding-manager.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_create(JNIEnv *env,
                                                          jobject /*obj*/,
                                                          jint dim) {
  auto p = new sherpa_onnx::SpeakerEmbeddingManager(dim);
  return (jlong)p;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_delete(JNIEnv * /*env*/,
                                                          jobject /*obj*/,
                                                          jlong ptr) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  delete manager;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_add(JNIEnv *env,
                                                       jobject /*obj*/,
                                                       jlong ptr, jstring name,
                                                       jfloatArray embedding) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);

  jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
  jsize n = env->GetArrayLength(embedding);

  if (n != manager->Dim()) {
    SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
                     static_cast<int32_t>(n));
    env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
    jclass iae = env->FindClass("java/lang/IllegalArgumentException");
    env->ThrowNew(iae, "Embedding dimension mismatch");
    env->DeleteLocalRef(iae);
    return false;
  }

  const char *p_name = env->GetStringUTFChars(name, nullptr);

  jboolean ok = manager->Add(p_name, p);
  env->ReleaseStringUTFChars(name, p_name);
  env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);

  return ok;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_addList(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring name,
    jobjectArray embedding_arr) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);

  int num_embeddings = env->GetArrayLength(embedding_arr);
  if (num_embeddings == 0) {
    return false;
  }

  std::vector<std::vector<float>> embedding_list;
  embedding_list.reserve(num_embeddings);
  for (int32_t i = 0; i != num_embeddings; ++i) {
    jfloatArray embedding =
        (jfloatArray)env->GetObjectArrayElement(embedding_arr, i);

    jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
    jsize n = env->GetArrayLength(embedding);

    if (n != manager->Dim()) {
      SHERPA_ONNX_LOGE("i: %d. Expected dim %d, given %d", i, manager->Dim(),
                       static_cast<int32_t>(n));
      env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
      env->DeleteLocalRef(embedding);
      jclass iae = env->FindClass("java/lang/IllegalArgumentException");
      env->ThrowNew(iae, "Embedding dimension mismatch");
      env->DeleteLocalRef(iae);
      return false;
    }

    embedding_list.push_back({p, p + n});
    env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
    env->DeleteLocalRef(embedding);
  }

  const char *p_name = env->GetStringUTFChars(name, nullptr);

  jboolean ok = manager->Add(p_name, embedding_list);

  env->ReleaseStringUTFChars(name, p_name);

  return ok;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_remove(JNIEnv *env,
                                                          jobject /*obj*/,
                                                          jlong ptr,
                                                          jstring name) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);

  const char *p_name = env->GetStringUTFChars(name, nullptr);

  jboolean ok = manager->Remove(p_name);

  env->ReleaseStringUTFChars(name, p_name);

  return ok;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_search(JNIEnv *env,
                                                          jobject /*obj*/,
                                                          jlong ptr,
                                                          jfloatArray embedding,
                                                          jfloat threshold) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);

  jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
  jsize n = env->GetArrayLength(embedding);

  if (n != manager->Dim()) {
    SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
                     static_cast<int32_t>(n));
    env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
    jclass iae = env->FindClass("java/lang/IllegalArgumentException");
    env->ThrowNew(iae, "Embedding dimension mismatch");
    env->DeleteLocalRef(iae);
    return env->NewStringUTF("");
  }

  std::string name = manager->Search(p, threshold);

  env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);

  return env->NewStringUTF(name.c_str());
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_verify(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jstring name,
    jfloatArray embedding, jfloat threshold) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);

  jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
  jsize n = env->GetArrayLength(embedding);

  if (n != manager->Dim()) {
    SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
                     static_cast<int32_t>(n));
    env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
    jclass iae = env->FindClass("java/lang/IllegalArgumentException");
    env->ThrowNew(iae, "Embedding dimension mismatch");
    env->DeleteLocalRef(iae);
    return false;
  }

  const char *p_name = env->GetStringUTFChars(name, nullptr);

  jboolean ok = manager->Verify(p_name, p, threshold);

  env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);

  env->ReleaseStringUTFChars(name, p_name);

  return ok;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_contains(JNIEnv *env,
                                                            jobject /*obj*/,
                                                            jlong ptr,
                                                            jstring name) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);

  const char *p_name = env->GetStringUTFChars(name, nullptr);

  jboolean ok = manager->Contains(p_name);

  env->ReleaseStringUTFChars(name, p_name);

  return ok;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jint JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_numSpeakers(JNIEnv * /*env*/,
                                                               jobject /*obj*/,
                                                               jlong ptr) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  return manager->NumSpeakers();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobjectArray JNICALL
Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_allSpeakerNames(
    JNIEnv *env, jobject /*obj*/, jlong ptr) {
  auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  std::vector<std::string> all_speakers = manager->GetAllSpeakers();

  jclass string_cls = env->FindClass("java/lang/String");
  jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(
      all_speakers.size(), string_cls, nullptr);
  env->DeleteLocalRef(string_cls);

  int32_t i = 0;
  for (auto &s : all_speakers) {
    jstring js = env->NewStringUTF(s.c_str());
    env->SetObjectArrayElement(obj_arr, i, js);
    env->DeleteLocalRef(js);
    ++i;
  }

  return obj_arr;
}


================================================
FILE: sherpa-onnx/jni/speech-denoiser.cc
================================================
// sherpa-onnx/jni/speech-denoiser.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/jni/speech-denoiser.h"

#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

OfflineSpeechDenoiserModelConfig GetOfflineSpeechDenoiserModelConfig(
    JNIEnv *env, jobject model, bool *ok) {
  OfflineSpeechDenoiserModelConfig ans;

  jclass model_config_cls = env->GetObjectClass(model);
  jfieldID fid;

  fid = env->GetFieldID(
      model_config_cls, "gtcrn",
      "Lcom/k2fsa/sherpa/onnx/OfflineSpeechDenoiserGtcrnModelConfig;");
  jobject gtcrn = env->GetObjectField(model, fid);
  jclass gtcrn_cls = env->GetObjectClass(gtcrn);

  SHERPA_ONNX_JNI_READ_STRING(ans.gtcrn.model, model, gtcrn_cls, gtcrn);

  fid = env->GetFieldID(
      model_config_cls, "dpdfnet",
      "Lcom/k2fsa/sherpa/onnx/OfflineSpeechDenoiserDpdfNetModelConfig;");
  jobject dpdfnet = env->GetObjectField(model, fid);
  jclass dpdfnet_cls = env->GetObjectClass(dpdfnet);

  SHERPA_ONNX_JNI_READ_STRING(ans.dpdfnet.model, model, dpdfnet_cls, dpdfnet);

  SHERPA_ONNX_JNI_READ_INT(ans.num_threads, numThreads, model_config_cls,
                           model);
  SHERPA_ONNX_JNI_READ_BOOL(ans.debug, debug, model_config_cls, model);
  SHERPA_ONNX_JNI_READ_STRING(ans.provider, provider, model_config_cls, model);

  *ok = true;
  return ans;
}

OfflineSpeechDenoiserConfig GetOfflineSpeechDenoiserConfig(JNIEnv *env,
                                                           jobject config,
                                                           bool *ok) {
  OfflineSpeechDenoiserConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid = env->GetFieldID(
      cls, "model", "Lcom/k2fsa/sherpa/onnx/OfflineSpeechDenoiserModelConfig;");
  jobject model = env->GetObjectField(config, fid);

  ans.model = GetOfflineSpeechDenoiserModelConfig(env, model, ok);
  return ans;
}

OnlineSpeechDenoiserConfig GetOnlineSpeechDenoiserConfig(JNIEnv *env,
                                                         jobject config,
                                                         bool *ok) {
  OnlineSpeechDenoiserConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid = env->GetFieldID(
      cls, "model", "Lcom/k2fsa/sherpa/onnx/OfflineSpeechDenoiserModelConfig;");
  jobject model = env->GetObjectField(config, fid);

  ans.model = GetOfflineSpeechDenoiserModelConfig(env, model, ok);
  return ans;
}

jobject NewDenoisedAudio(JNIEnv *env, const DenoisedAudio &denoised) {
  jclass cls = env->FindClass("com/k2fsa/sherpa/onnx/DenoisedAudio");
  if (cls == nullptr) {
    SHERPA_ONNX_LOGE("Failed to get class for DenoisedAudio");
    return nullptr;
  }

  jmethodID constructor = env->GetMethodID(cls, "<init>", "([FI)V");
  if (constructor == nullptr) {
    SHERPA_ONNX_LOGE("Failed to get constructor for DenoisedAudio");
    env->DeleteLocalRef(cls);
    return nullptr;
  }

  jfloatArray samples_arr = env->NewFloatArray(denoised.samples.size());
  env->SetFloatArrayRegion(samples_arr, 0, denoised.samples.size(),
                           denoised.samples.data());

  jobject obj =
      env->NewObject(cls, constructor, samples_arr, denoised.sample_rate);
  env->DeleteLocalRef(cls);
  env->DeleteLocalRef(samples_arr);
  return obj;
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/jni/speech-denoiser.h
================================================
// sherpa-onnx/jni/speech-denoiser.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_JNI_SPEECH_DENOISER_H_
#define SHERPA_ONNX_JNI_SPEECH_DENOISER_H_

#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/csrc/online-speech-denoiser.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

OfflineSpeechDenoiserModelConfig GetOfflineSpeechDenoiserModelConfig(
    JNIEnv *env, jobject model, bool *ok);

OfflineSpeechDenoiserConfig GetOfflineSpeechDenoiserConfig(
    JNIEnv *env, jobject config, bool *ok);

OnlineSpeechDenoiserConfig GetOnlineSpeechDenoiserConfig(
    JNIEnv *env, jobject config, bool *ok);

jobject NewDenoisedAudio(JNIEnv *env, const DenoisedAudio &denoised);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_JNI_SPEECH_DENOISER_H_


================================================
FILE: sherpa-onnx/jni/spoken-language-identification.cc
================================================
// sherpa-onnx/jni/spoken-language-identification.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/spoken-language-identification.h"

#include <memory>
#include <string>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

static SpokenLanguageIdentificationConfig GetSpokenLanguageIdentificationConfig(
    JNIEnv *env, jobject config, bool *ok) {
  SpokenLanguageIdentificationConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid = env->GetFieldID(
      cls, "whisper",
      "Lcom/k2fsa/sherpa/onnx/SpokenLanguageIdentificationWhisperConfig;");

  jobject whisper = env->GetObjectField(config, fid);
  jclass whisper_cls = env->GetObjectClass(whisper);

  SHERPA_ONNX_JNI_READ_STRING(ans.whisper.encoder, encoder, whisper_cls,
                              whisper);

  SHERPA_ONNX_JNI_READ_STRING(ans.whisper.decoder, decoder, whisper_cls,
                              whisper);

  SHERPA_ONNX_JNI_READ_INT(ans.whisper.tail_paddings, tailPaddings, whisper_cls,
                           whisper);

  SHERPA_ONNX_JNI_READ_INT(ans.num_threads, numThreads, cls, config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.debug, debug, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.provider, provider, cls, config);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif

  bool ok = false;
  auto config =
      sherpa_onnx::GetSpokenLanguageIdentificationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("spoken language identification newFromAsset config:\n%s",
                   config.ToString().c_str());

  auto slid = new sherpa_onnx::SpokenLanguageIdentification(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);
  SHERPA_ONNX_LOGE("slid %p", slid);

  return (jlong)slid;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_newFromFile(
    JNIEnv *env, jobject /*obj*/, jobject _config) {
  bool ok = false;
  auto config =
      sherpa_onnx::GetSpokenLanguageIdentificationConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("SpokenLanguageIdentification newFromFile config:\n%s",
                   config.ToString().c_str());

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto tagger = new sherpa_onnx::SpokenLanguageIdentification(config);

  return (jlong)tagger;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL
Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_delete(JNIEnv * /*env*/,
                                                               jobject /*obj*/,
                                                               jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::SpokenLanguageIdentification *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_createStream(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  auto slid =
      reinterpret_cast<sherpa_onnx::SpokenLanguageIdentification *>(ptr);
  std::unique_ptr<sherpa_onnx::OfflineStream> s = slid->CreateStream();

  // The user is responsible to free the returned pointer.
  //
  // See Java_com_k2fsa_sherpa_onnx_OfflineStream_delete() from
  // ./offline-stream.cc
  sherpa_onnx::OfflineStream *p = s.release();
  return (jlong)p;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL
Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_compute(JNIEnv *env,
                                                                jobject /*obj*/,
                                                                jlong ptr,
                                                                jlong s_ptr) {
  sherpa_onnx::SpokenLanguageIdentification *slid =
      reinterpret_cast<sherpa_onnx::SpokenLanguageIdentification *>(ptr);
  sherpa_onnx::OfflineStream *s =
      reinterpret_cast<sherpa_onnx::OfflineStream *>(s_ptr);
  std::string lang = slid->Compute(s);
  return env->NewStringUTF(lang.c_str());
}


================================================
FILE: sherpa-onnx/jni/version.cc
================================================
// sherpa-onnx/jni/version.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include "sherpa-onnx/csrc/version.h"

#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL
Java_com_k2fsa_sherpa_onnx_VersionInfo_00024Companion_getVersionStr2(
    JNIEnv *env, jclass /*cls*/) {
  return env->NewStringUTF(GetVersionStr());
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL
Java_com_k2fsa_sherpa_onnx_VersionInfo_00024Companion_getGitSha12(
    JNIEnv *env, jclass /*cls*/) {
  return env->NewStringUTF(GetGitSha1());
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL
Java_com_k2fsa_sherpa_onnx_VersionInfo_00024Companion_getGitDate2(
    JNIEnv *env, jclass /*cls*/) {
  return env->NewStringUTF(GetGitDate());
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL Java_com_k2fsa_sherpa_onnx_VersionInfo_getVersionStr2(
    JNIEnv *env, jclass /*cls*/) {
  return env->NewStringUTF(GetVersionStr());
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL Java_com_k2fsa_sherpa_onnx_VersionInfo_getGitSha12(
    JNIEnv *env, jclass /*cls*/) {
  return env->NewStringUTF(GetGitSha1());
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL Java_com_k2fsa_sherpa_onnx_VersionInfo_getGitDate2(
    JNIEnv *env, jclass /*cls*/) {
  return env->NewStringUTF(GetGitDate());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/jni/voice-activity-detector.cc
================================================
// sherpa-onnx/csrc/voice-activity-detector.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/voice-activity-detector.h"

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

namespace sherpa_onnx {

static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config, bool *ok) {
  VadModelConfig ans;

  jclass cls = env->GetObjectClass(config);
  jfieldID fid;

  // silero_vad
  fid = env->GetFieldID(cls, "sileroVadModelConfig",
                        "Lcom/k2fsa/sherpa/onnx/SileroVadModelConfig;");
  jobject silero_vad_config = env->GetObjectField(config, fid);
  jclass silero_vad_config_cls = env->GetObjectClass(silero_vad_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.silero_vad.model, model,
                              silero_vad_config_cls, silero_vad_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.silero_vad.threshold, threshold,
                             silero_vad_config_cls, silero_vad_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.silero_vad.min_silence_duration,
                             minSilenceDuration, silero_vad_config_cls,
                             silero_vad_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.silero_vad.min_speech_duration,
                             minSpeechDuration, silero_vad_config_cls,
                             silero_vad_config);

  SHERPA_ONNX_JNI_READ_INT(ans.silero_vad.window_size, windowSize,
                           silero_vad_config_cls, silero_vad_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.silero_vad.max_speech_duration,
                             maxSpeechDuration, silero_vad_config_cls,
                             silero_vad_config);

  fid = env->GetFieldID(cls, "tenVadModelConfig",
                        "Lcom/k2fsa/sherpa/onnx/TenVadModelConfig;");
  jobject ten_vad_config = env->GetObjectField(config, fid);
  jclass ten_vad_config_cls = env->GetObjectClass(ten_vad_config);

  SHERPA_ONNX_JNI_READ_STRING(ans.ten_vad.model, model, ten_vad_config_cls,
                              ten_vad_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.ten_vad.threshold, threshold,
                             ten_vad_config_cls, ten_vad_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.ten_vad.min_silence_duration,
                             minSilenceDuration, ten_vad_config_cls,
                             ten_vad_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.ten_vad.min_speech_duration, minSpeechDuration,
                             ten_vad_config_cls, ten_vad_config);

  SHERPA_ONNX_JNI_READ_INT(ans.ten_vad.window_size, windowSize,
                           ten_vad_config_cls, ten_vad_config);

  SHERPA_ONNX_JNI_READ_FLOAT(ans.ten_vad.max_speech_duration, maxSpeechDuration,
                             ten_vad_config_cls, ten_vad_config);

  SHERPA_ONNX_JNI_READ_INT(ans.sample_rate, sampleRate, cls, config);

  SHERPA_ONNX_JNI_READ_INT(ans.num_threads, numThreads, cls, config);

  SHERPA_ONNX_JNI_READ_STRING(ans.provider, provider, cls, config);

  SHERPA_ONNX_JNI_READ_BOOL(ans.debug, debug, cls, config);

  *ok = true;
  return ans;
}

}  // namespace sherpa_onnx

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_newFromAsset(
    JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    return 0;
  }
#endif

  bool ok = false;
  auto config = sherpa_onnx::GetVadModelConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  auto model = new sherpa_onnx::VoiceActivityDetector(
#if __ANDROID_API__ >= 9
      mgr,
#endif
      config);

  return (jlong)model;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_newFromFile(
    JNIEnv *env, jobject /*obj*/, jobject _config) {
  bool ok = false;
  auto config = sherpa_onnx::GetVadModelConfig(env, _config, &ok);

  if (!ok) {
    SHERPA_ONNX_LOGE("Please read the error message carefully");
    return 0;
  }

  SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());

  if (!config.Validate()) {
    SHERPA_ONNX_LOGE("Errors found in config!");
    return 0;
  }

  auto model = new sherpa_onnx::VoiceActivityDetector(config);

  return (jlong)model;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_delete(JNIEnv * /*env*/,
                                                             jobject /*obj*/,
                                                             jlong ptr) {
  delete reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_acceptWaveform(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples) {
  SafeJNI(env, "Vad_acceptWaveform", [&] {
    if (!ValidatePointer(env, ptr, "Vad_acceptWaveform",
                         "VoiceActivityDetector pointer is null.")) {
      return;
    }

    auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
    jfloat *p = env->GetFloatArrayElements(samples, nullptr);
    jsize n = env->GetArrayLength(samples);

    model->AcceptWaveform(p, n);

    env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
  });
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_Vad_empty(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
  return model->Empty();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_pop(JNIEnv * /*env*/,
                                                          jobject /*obj*/,
                                                          jlong ptr) {
  auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
  model->Pop();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_clear(JNIEnv * /*env*/,
                                                            jobject /*obj*/,
                                                            jlong ptr) {
  auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
  model->Clear();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL Java_com_k2fsa_sherpa_onnx_Vad_front(JNIEnv *env,
                                                               jobject /*obj*/,
                                                               jlong ptr) {
  auto vad = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
  if (!vad) {
    return nullptr;
  }

  const auto &front = vad->Front();

  jfloatArray samples_arr =
      env->NewFloatArray(static_cast<jsize>(front.samples.size()));

  if (!samples_arr) {
    SHERPA_ONNX_LOGE("Failed to allocate");
    return nullptr;
  }

  env->SetFloatArrayRegion(samples_arr, 0,
                           static_cast<jsize>(front.samples.size()),
                           front.samples.data());

  jclass cls = env->FindClass("com/k2fsa/sherpa/onnx/SpeechSegment");
  if (!cls) {
    SHERPA_ONNX_LOGE("Failed to find com/k2fsa/sherpa/onnx/SpeechSegment");

    env->DeleteLocalRef(samples_arr);
    return nullptr;
  }

  jmethodID ctor = env->GetMethodID(cls, "<init>", "(I[F)V");
  if (!ctor) {
    SHERPA_ONNX_LOGE("failed to get constructor");

    env->DeleteLocalRef(samples_arr);
    env->DeleteLocalRef(cls);
    return nullptr;
  }

  jobject speechSegment =
      env->NewObject(cls, ctor, static_cast<jint>(front.start), samples_arr);

  env->DeleteLocalRef(samples_arr);
  env->DeleteLocalRef(cls);

  return speechSegment;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected(
    JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) {
  auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
  return model->IsSpeechDetected();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv *env,
                                                            jobject /*obj*/,
                                                            jlong ptr) {
  SafeJNI(env, "Vad_reset", [&] {
    if (!ValidatePointer(env, ptr, "Vad_reset",
                         "VoiceActivityDetector pointer is null.")) {
      return;
    }

    auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
    model->Reset();
  });
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_flush(JNIEnv * /*env*/,
                                                            jobject /*obj*/,
                                                            jlong ptr) {
  auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
  model->Flush();
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jfloat JNICALL Java_com_k2fsa_sherpa_onnx_Vad_compute(
    JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples) {
  return SafeJNI(
      env, "Vad_compute",
      [&]() -> jfloat {
        if (!ValidatePointer(env, ptr, "Vad_compute",
                             "VoiceActivityDetector pointer is null.")) {
          return -1.0f;
        }
        auto vad = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
        jfloat *p = env->GetFloatArrayElements(samples, nullptr);
        jsize n = env->GetArrayLength(samples);

        float score = vad->Compute(p, n);

        env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);

        return static_cast<jfloat>(score);
      },
      -1.0f);
}


================================================
FILE: sherpa-onnx/jni/wave-reader.cc
================================================
// sherpa-onnx/jni/wave-reader.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/wave-reader.h"

#include <fstream>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/jni/common.h"

static jobject ReadWaveImpl(JNIEnv *env, std::istream &is,
                            const char *p_filename) {
  bool is_ok = false;
  int32_t sampling_rate = -1;
  std::vector<float> samples =
      sherpa_onnx::ReadWave(is, &sampling_rate, &is_ok);

  if (!is_ok) {
    SHERPA_ONNX_LOGE("Failed to read '%s'", p_filename);
    jclass exception_class = env->FindClass("java/lang/Exception");
    env->ThrowNew(exception_class, "Failed to read wave file.");
    env->DeleteLocalRef(exception_class);
    return nullptr;
  }

  jfloatArray samples_arr = env->NewFloatArray(samples.size());
  if (samples_arr == nullptr) {
    SHERPA_ONNX_LOGE("Failed to allocate samples array");
    return nullptr;
  }

  env->SetFloatArrayRegion(samples_arr, 0, samples.size(), samples.data());

  // Find WaveData class
  jclass cls = env->FindClass("com/k2fsa/sherpa/onnx/WaveData");
  if (cls == nullptr) {
    env->DeleteLocalRef(samples_arr);
    SHERPA_ONNX_LOGE("Failed to find class com/k2fsa/sherpa/onnx/WaveData");
    return nullptr;
  }

  // Get constructor: WaveData(float[] samples, int sampleRate)
  jmethodID ctor = env->GetMethodID(cls, "<init>", "([FI)V");
  if (ctor == nullptr) {
    SHERPA_ONNX_LOGE("Failed to get WaveData constructor");

    env->DeleteLocalRef(samples_arr);
    env->DeleteLocalRef(cls);
    return nullptr;
  }

  // Create WaveData object
  jobject obj = env->NewObject(cls, ctor, samples_arr, sampling_rate);
  if (obj == nullptr) {
    env->DeleteLocalRef(samples_arr);
    env->DeleteLocalRef(cls);
    return nullptr;
  }

  // Clean up local refs
  env->DeleteLocalRef(samples_arr);
  env->DeleteLocalRef(cls);

  return obj;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL
Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile(
    JNIEnv *env, jclass /*cls*/, jstring filename) {
  const char *p_filename = env->GetStringUTFChars(filename, nullptr);
  std::ifstream is(p_filename, std::ios::binary);

  auto obj = ReadWaveImpl(env, is, p_filename);

  env->ReleaseStringUTFChars(filename, p_filename);

  return obj;
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL
Java_com_k2fsa_sherpa_onnx_WaveReader_readWaveFromFile(JNIEnv *env,
                                                       jclass /*obj*/,
                                                       jstring filename) {
  return Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromFile(
      env, nullptr, filename);
}

SHERPA_ONNX_EXTERN_C
JNIEXPORT jobject JNICALL
Java_com_k2fsa_sherpa_onnx_WaveReader_00024Companion_readWaveFromAsset(
    JNIEnv *env, jclass /*cls*/, jobject asset_manager, jstring filename) {
  const char *p_filename = env->GetStringUTFChars(filename, nullptr);
#if __ANDROID_API__ >= 9
  AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  if (!mgr) {
    SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
    env->ReleaseStringUTFChars(filename, p_filename);
    jclass re = env->FindClass("java/lang/RuntimeException");
    env->ThrowNew(re, "Failed to get asset manager");
    env->DeleteLocalRef(re);
    return nullptr;
  }
  std::vector<char> buffer = sherpa_onnx::ReadFile(mgr, p_filename);

  std::istringstream is(std::string(buffer.data(), buffer.size()));
#else
  std::ifstream is(p_filename, std::ios::binary);
#endif

  auto obj = ReadWaveImpl(env, is, p_filename);

  env->ReleaseStringUTFChars(filename, p_filename);

  return obj;
}


================================================
FILE: sherpa-onnx/jni/wave-writer.cc
================================================
// sherpa-onnx/jni/wave-writer.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include "sherpa-onnx/csrc/wave-writer.h"

#include "sherpa-onnx/jni/common.h"

SHERPA_ONNX_EXTERN_C
JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_WaveWriter_writeWaveToFile(
    JNIEnv *env, jclass /*obj*/, jstring filename, jfloatArray samples,
    jint sample_rate) {
  jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  jsize n = env->GetArrayLength(samples);

  const char *p_filename = env->GetStringUTFChars(filename, nullptr);

  bool ok = sherpa_onnx::WriteWave(p_filename, sample_rate, p, n);

  env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
  env->ReleaseStringUTFChars(filename, p_filename);

  return ok;
}


================================================
FILE: sherpa-onnx/kotlin-api/AudioTagging.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class OfflineZipformerAudioTaggingModelConfig(
    var model: String = "",
)

data class AudioTaggingModelConfig(
    var zipformer: OfflineZipformerAudioTaggingModelConfig = OfflineZipformerAudioTaggingModelConfig(),
    var ced: String = "",
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
)

data class AudioTaggingConfig(
    var model: AudioTaggingModelConfig = AudioTaggingModelConfig(),
    var labels: String = "",
    var topK: Int = 5,
)

data class AudioEvent(
    val name: String,
    val index: Int,
    val prob: Float,
)

class AudioTagging(
    assetManager: AssetManager? = null,
    config: AudioTaggingConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun createStream(): OfflineStream {
        val p = createStream(ptr)
        return OfflineStream(p)
    }

    @Suppress("UNCHECKED_CAST")
    fun compute(stream: OfflineStream, topK: Int = -1): Array<AudioEvent> {
        return compute(ptr, stream.ptr, topK)
    }

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: AudioTaggingConfig,
    ): Long

    private external fun newFromFile(
        config: AudioTaggingConfig,
    ): Long

    private external fun delete(ptr: Long)

    private external fun createStream(ptr: Long): Long

    private external fun compute(ptr: Long, streamPtr: Long, topK: Int): Array<AudioEvent>

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}

// please refer to
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
// to download more models
//
// See also
// https://k2-fsa.github.io/sherpa/onnx/audio-tagging/
fun getAudioTaggingConfig(type: Int, numThreads: Int = 1): AudioTaggingConfig? {
    when (type) {
        0 -> {
            val modelDir = "sherpa-onnx-zipformer-small-audio-tagging-2024-04-15"
            return AudioTaggingConfig(
                model = AudioTaggingModelConfig(
                    zipformer = OfflineZipformerAudioTaggingModelConfig(model = "$modelDir/model.int8.onnx"),
                    numThreads = numThreads,
                    debug = true,
                ),
                labels = "$modelDir/class_labels_indices.csv",
                topK = 3,
            )
        }

        1 -> {
            val modelDir = "sherpa-onnx-zipformer-audio-tagging-2024-04-09"
            return AudioTaggingConfig(
                model = AudioTaggingModelConfig(
                    zipformer = OfflineZipformerAudioTaggingModelConfig(model = "$modelDir/model.int8.onnx"),
                    numThreads = numThreads,
                    debug = true,
                ),
                labels = "$modelDir/class_labels_indices.csv",
                topK = 3,
            )
        }

        2 -> {
            val modelDir = "sherpa-onnx-ced-tiny-audio-tagging-2024-04-19"
            return AudioTaggingConfig(
                model = AudioTaggingModelConfig(
                    ced = "$modelDir/model.int8.onnx",
                    numThreads = numThreads,
                    debug = true,
                ),
                labels = "$modelDir/class_labels_indices.csv",
                topK = 3,
            )
        }

        3 -> {
            val modelDir = "sherpa-onnx-ced-mini-audio-tagging-2024-04-19"
            return AudioTaggingConfig(
                model = AudioTaggingModelConfig(
                    ced = "$modelDir/model.int8.onnx",
                    numThreads = numThreads,
                    debug = true,
                ),
                labels = "$modelDir/class_labels_indices.csv",
                topK = 3,
            )
        }

        4 -> {
            val modelDir = "sherpa-onnx-ced-small-audio-tagging-2024-04-19"
            return AudioTaggingConfig(
                model = AudioTaggingModelConfig(
                    ced = "$modelDir/model.int8.onnx",
                    numThreads = numThreads,
                    debug = true,
                ),
                labels = "$modelDir/class_labels_indices.csv",
                topK = 3,
            )
        }

        5 -> {
            val modelDir = "sherpa-onnx-ced-base-audio-tagging-2024-04-19"
            return AudioTaggingConfig(
                model = AudioTaggingModelConfig(
                    ced = "$modelDir/model.int8.onnx",
                    numThreads = numThreads,
                    debug = true,
                ),
                labels = "$modelDir/class_labels_indices.csv",
                topK = 3,
            )
        }
    }

    return null
}


================================================
FILE: sherpa-onnx/kotlin-api/DenoisedAudio.kt
================================================
package com.k2fsa.sherpa.onnx

class DenoisedAudio(
    val samples: FloatArray,
    val sampleRate: Int,
) {
    fun save(filename: String) =
        saveImpl(filename = filename, samples = samples, sampleRate = sampleRate)

    private external fun saveImpl(
        filename: String,
        samples: FloatArray,
        sampleRate: Int
    ): Boolean
}


================================================
FILE: sherpa-onnx/kotlin-api/FeatureConfig.kt
================================================
package com.k2fsa.sherpa.onnx

data class FeatureConfig(
    var sampleRate: Int = 16000,
    var featureDim: Int = 80,
    var dither: Float = 0.0f
)

fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig {
    return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim)
}


================================================
FILE: sherpa-onnx/kotlin-api/HomophoneReplacerConfig.kt
================================================
package com.k2fsa.sherpa.onnx

data class HomophoneReplacerConfig(
    var dictDir: String = "", // unused
    var lexicon: String = "",
    var ruleFsts: String = "",
)


================================================
FILE: sherpa-onnx/kotlin-api/KeywordSpotter.kt
================================================
// Copyright (c)  2024  Xiaomi Corporation
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class KeywordSpotterConfig(
    var featConfig: FeatureConfig = FeatureConfig(),
    var modelConfig: OnlineModelConfig = OnlineModelConfig(),
    var maxActivePaths: Int = 4,
    var keywordsFile: String = "keywords.txt",
    var keywordsScore: Float = 1.5f,
    var keywordsThreshold: Float = 0.25f,
    var numTrailingBlanks: Int = 2,
)

data class KeywordSpotterResult(
    val keyword: String,
    val tokens: Array<String>,
    val timestamps: FloatArray,
    // TODO(fangjun): Add more fields
) {
    override fun toString(): String {
        val tokensStr = tokens.joinToString(", ")
        val timestampsStr = timestamps.joinToString(", ") { "%.2f".format(it) }
        return "Keyword: $keyword\nTokens: [$tokensStr]\nTimestamps: [$timestampsStr]"
    }
}

class KeywordSpotter(
    assetManager: AssetManager? = null,
    val config: KeywordSpotterConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun createStream(keywords: String = ""): OnlineStream {
        val p = createStream(ptr, keywords)
        return OnlineStream(p)
    }

    fun decode(stream: OnlineStream) = decode(ptr, stream.ptr)
    fun reset(stream: OnlineStream) = reset(ptr, stream.ptr)
    fun isReady(stream: OnlineStream) = isReady(ptr, stream.ptr)
    fun getResult(stream: OnlineStream): KeywordSpotterResult {
        return getResult(ptr, stream.ptr)
    }

    private external fun delete(ptr: Long)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: KeywordSpotterConfig,
    ): Long

    private external fun newFromFile(
        config: KeywordSpotterConfig,
    ): Long

    private external fun createStream(ptr: Long, keywords: String): Long
    private external fun isReady(ptr: Long, streamPtr: Long): Boolean
    private external fun decode(ptr: Long, streamPtr: Long)
    private external fun reset(ptr: Long, streamPtr: Long)
    private external fun getResult(ptr: Long, streamPtr: Long): KeywordSpotterResult

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}

/*
Please see
https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
for a list of pre-trained models.

We only add a few here. Please change the following code
to add your own. (It should be straightforward to add a new model
by following the code)

@param type
0 - sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 (Chinese)
    https://www.modelscope.cn/models/pkufool/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/summary

1 - sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01 (English)
    https://www.modelscope.cn/models/pkufool/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/summary

 */
fun getKwsModelConfig(type: Int): OnlineModelConfig? {
    when (type) {
        0 -> {
            val modelDir = "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-12-avg-2-chunk-16-left-64.onnx",
                    decoder = "$modelDir/decoder-epoch-12-avg-2-chunk-16-left-64.onnx",
                    joiner = "$modelDir/joiner-epoch-12-avg-2-chunk-16-left-64.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        1 -> {
            val modelDir = "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-12-avg-2-chunk-16-left-64.onnx",
                    decoder = "$modelDir/decoder-epoch-12-avg-2-chunk-16-left-64.onnx",
                    joiner = "$modelDir/joiner-epoch-12-avg-2-chunk-16-left-64.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

    }
    return null
}

/*
 * Get the default keywords for each model.
 * Caution: The types and modelDir should be the same as those in getModelConfig
 * function above.
 */
fun getKeywordsFile(type: Int): String {
    when (type) {
        0 -> {
            val modelDir = "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01"
            return "$modelDir/keywords.txt"
        }

        1 -> {
            val modelDir = "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"
            return "$modelDir/keywords.txt"
        }

    }
    return ""
}


================================================
FILE: sherpa-onnx/kotlin-api/OfflinePunctuation.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class OfflinePunctuationModelConfig(
    var ctTransformer: String = "",
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
)


data class OfflinePunctuationConfig(
    var model: OfflinePunctuationModelConfig,
)

class OfflinePunctuation(
    assetManager: AssetManager? = null,
    config: OfflinePunctuationConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun addPunctuation(text: String) = addPunctuation(ptr, text)

    private external fun delete(ptr: Long)

    private external fun addPunctuation(ptr: Long, text: String): String

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: OfflinePunctuationConfig,
    ): Long

    private external fun newFromFile(
        config: OfflinePunctuationConfig,
    ): Long

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/OfflineRecognizer.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class OfflineRecognizerResult(
    val text: String,
    val tokens: Array<String>,
    val timestamps: FloatArray,
    val lang: String,
    val emotion: String,
    val event: String,

    // valid only for TDT models
    val durations: FloatArray,
)

data class OfflineTransducerModelConfig(
    var encoder: String = "",
    var decoder: String = "",
    var joiner: String = "",
)

data class OfflineParaformerModelConfig(
    var model: String = "",
    var qnnConfig: QnnConfig = QnnConfig(),
)

data class OfflineNemoEncDecCtcModelConfig(
    var model: String = "",
)

data class OfflineDolphinModelConfig(
    var model: String = "",
)

data class OfflineZipformerCtcModelConfig(
    var model: String = "",
    var qnnConfig: QnnConfig = QnnConfig(),
)

data class OfflineWenetCtcModelConfig(
    var model: String = "",
)

data class OfflineOmnilingualAsrCtcModelConfig(
    var model: String = "",
)

data class OfflineMedAsrCtcModelConfig(
    var model: String = "",
)

data class OfflineFireRedAsrCtcModelConfig(
    var model: String = "",
)

data class OfflineFunAsrNanoModelConfig(
    var encoderAdaptor: String = "",
    var llm: String = "",
    var embedding: String = "",
    var tokenizer: String = "",
    var systemPrompt: String = "You are a helpful assistant.",
    var userPrompt: String = "语音转写：",
    var maxNewTokens: Int = 512,
    var temperature: Float = 1e-6f,
    var topP: Float = 0.8f,
    var seed: Int = 42,
    var language: String = "",
    var itn: Boolean = true,
    var hotwords: String = "",
)

data class OfflineWhisperModelConfig(
    var encoder: String = "",
    var decoder: String = "",
    var language: String = "en", // Used with multilingual model
    var task: String = "transcribe", // transcribe or translate
    var tailPaddings: Int = 1000, // Padding added at the end of the samples
    var enableTokenTimestamps: Boolean = false,
    var enableSegmentTimestamps: Boolean = false,
)

data class OfflineCanaryModelConfig(
    var encoder: String = "",
    var decoder: String = "",
    var srcLang: String = "en",
    var tgtLang: String = "en",
    var usePnc: Boolean = true,
)

data class OfflineFireRedAsrModelConfig(
    var encoder: String = "",
    var decoder: String = "",
)

// For moonshine v1, you need four models.
// For moonshine v2, you need two models.
// - v1: preprocessor, encoder, uncachedDecoder, cachedDecoder
// - v2: encoder, mergedDecoder
data class OfflineMoonshineModelConfig(
    var preprocessor: String = "",
    var encoder: String = "",
    var uncachedDecoder: String = "",
    var cachedDecoder: String = "",
    var mergedDecoder: String = "",
)

data class OfflineSenseVoiceModelConfig(
    var model: String = "",
    var language: String = "",
    var useInverseTextNormalization: Boolean = true,
    var qnnConfig: QnnConfig = QnnConfig(),
)

data class OfflineModelConfig(
    var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(),
    var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(),
    var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(),
    var fireRedAsr: OfflineFireRedAsrModelConfig = OfflineFireRedAsrModelConfig(),
    var moonshine: OfflineMoonshineModelConfig = OfflineMoonshineModelConfig(),
    var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(),
    var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(),
    var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(),
    var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(),
    var wenetCtc: OfflineWenetCtcModelConfig = OfflineWenetCtcModelConfig(),
    var omnilingual: OfflineOmnilingualAsrCtcModelConfig = OfflineOmnilingualAsrCtcModelConfig(),
    var medasr: OfflineMedAsrCtcModelConfig = OfflineMedAsrCtcModelConfig(),
    var funasrNano: OfflineFunAsrNanoModelConfig = OfflineFunAsrNanoModelConfig(),
    var fireRedAsrCtc: OfflineFireRedAsrCtcModelConfig = OfflineFireRedAsrCtcModelConfig(),
    var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(),
    var teleSpeech: String = "",
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
    var modelType: String = "",
    var tokens: String = "",
    var modelingUnit: String = "",
    var bpeVocab: String = "",
)

data class OfflineRecognizerConfig(
    var featConfig: FeatureConfig = FeatureConfig(),
    var modelConfig: OfflineModelConfig = OfflineModelConfig(),
    // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it
    var hr: HomophoneReplacerConfig = HomophoneReplacerConfig(),
    var decodingMethod: String = "greedy_search",
    var maxActivePaths: Int = 4,
    var hotwordsFile: String = "",
    var hotwordsScore: Float = 1.5f,
    var ruleFsts: String = "",
    var ruleFars: String = "",
    var blankPenalty: Float = 0.0f,
)

class OfflineRecognizer(
    assetManager: AssetManager? = null,
    val config: OfflineRecognizerConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun createStream(): OfflineStream {
        val p = createStream(ptr)
        return OfflineStream(p)
    }

    fun getResult(stream: OfflineStream): OfflineRecognizerResult {
        return getResult(stream.ptr)
    }

    fun decode(stream: OfflineStream) = decode(ptr, stream.ptr)

    fun setConfig(config: OfflineRecognizerConfig) = setConfig(ptr, config)

    private external fun delete(ptr: Long)

    private external fun createStream(ptr: Long): Long

    private external fun setConfig(ptr: Long, config: OfflineRecognizerConfig)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: OfflineRecognizerConfig,
    ): Long

    private external fun newFromFile(
        config: OfflineRecognizerConfig,
    ): Long

    private external fun decode(ptr: Long, streamPtr: Long)

    private external fun getResult(streamPtr: Long): OfflineRecognizerResult

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }

        @JvmStatic
        external fun prependAdspLibraryPath(newPath: String) // for qnn
    }
}

/*
Please see
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models.

We only add a few here. Please change the following code
to add your own. (It should be straightforward to add a new model
by following the code)

@param type

0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-09-14 (Chinese)
    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-09-14-chinese
    int8

1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English)
    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english
    encoder int8, decoder/joiner float32

2 - sherpa-onnx-whisper-tiny.en
    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
    encoder int8, decoder int8

3 - sherpa-onnx-whisper-base.en
    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
    encoder int8, decoder int8

4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese)
    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese
    encoder/joiner int8, decoder fp32

 */
fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
    when (type) {
        0 -> {
            val modelDir = "sherpa-onnx-paraformer-zh-2023-09-14"
            return OfflineModelConfig(
                paraformer = OfflineParaformerModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "paraformer",
            )
        }

        1 -> {
            val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-30-avg-4.onnx",
                    joiner = "$modelDir/joiner-epoch-30-avg-4.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        2 -> {
            val modelDir = "sherpa-onnx-whisper-tiny.en"
            return OfflineModelConfig(
                whisper = OfflineWhisperModelConfig(
                    encoder = "$modelDir/tiny.en-encoder.int8.onnx",
                    decoder = "$modelDir/tiny.en-decoder.int8.onnx",
                ),
                tokens = "$modelDir/tiny.en-tokens.txt",
                modelType = "whisper",
            )
        }

        3 -> {
            val modelDir = "sherpa-onnx-whisper-base.en"
            return OfflineModelConfig(
                whisper = OfflineWhisperModelConfig(
                    encoder = "$modelDir/base.en-encoder.int8.onnx",
                    decoder = "$modelDir/base.en-decoder.int8.onnx",
                ),
                tokens = "$modelDir/base.en-tokens.txt",
                modelType = "whisper",
            )
        }


        4 -> {
            val modelDir = "icefall-asr-zipformer-wenetspeech-20230615"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-12-avg-4.onnx",
                    joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        5 -> {
            val modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-20-avg-1.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-20-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        6 -> {
            val modelDir = "sherpa-onnx-nemo-ctc-en-citrinet-512"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        7 -> {
            val modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        8 -> {
            val modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-24500"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9 -> {
            val modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        10 -> {
            val modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-es-1424"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        11 -> {
            val modelDir = "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04"
            return OfflineModelConfig(
                teleSpeech = "$modelDir/model.int8.onnx",
                tokens = "$modelDir/tokens.txt",
                modelType = "telespeech_ctc",
            )
        }

        12 -> {
            val modelDir = "sherpa-onnx-zipformer-thai-2024-06-20"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-12-avg-5.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-12-avg-5.onnx",
                    joiner = "$modelDir/joiner-epoch-12-avg-5.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        13 -> {
            val modelDir = "sherpa-onnx-zipformer-korean-2024-06-24"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        14 -> {
            val modelDir = "sherpa-onnx-paraformer-zh-small-2024-03-09"
            return OfflineModelConfig(
                paraformer = OfflineParaformerModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "paraformer",
            )
        }

        15 -> {
            val modelDir = "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17"
            return OfflineModelConfig(
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        16 -> {
            val modelDir = "sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        17 -> {
            val modelDir = "sherpa-onnx-zipformer-ru-2024-09-18"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        18 -> {
            val modelDir = "sherpa-onnx-small-zipformer-ru-2024-09-18"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        19 -> {
            val modelDir = "sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        20 -> {
            val modelDir = "sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "nemo_transducer",
            )
        }

        21 -> {
            val modelDir = "sherpa-onnx-moonshine-tiny-en-int8"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    preprocessor = "$modelDir/preprocess.onnx",
                    encoder = "$modelDir/encode.int8.onnx",
                    uncachedDecoder = "$modelDir/uncached_decode.int8.onnx",
                    cachedDecoder = "$modelDir/cached_decode.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        22 -> {
            val modelDir = "sherpa-onnx-moonshine-base-en-int8"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    preprocessor = "$modelDir/preprocess.onnx",
                    encoder = "$modelDir/encode.int8.onnx",
                    uncachedDecoder = "$modelDir/uncached_decode.int8.onnx",
                    cachedDecoder = "$modelDir/cached_decode.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        23 -> {
            val modelDir = "sherpa-onnx-zipformer-zh-en-2023-11-22"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-34-avg-19.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-34-avg-19.onnx",
                    joiner = "$modelDir/joiner-epoch-34-avg-19.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        24 -> {
            val modelDir = "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16"
            return OfflineModelConfig(
                fireRedAsr = OfflineFireRedAsrModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        25 -> {
            val modelDir = "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02"
            return OfflineModelConfig(
                dolphin = OfflineDolphinModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        26 -> {
            val modelDir = "sherpa-onnx-zipformer-vi-int8-2025-04-20"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-12-avg-8.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-12-avg-8.onnx",
                    joiner = "$modelDir/joiner-epoch-12-avg-8.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        27 -> {
            val modelDir = "sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        28 -> {
            val modelDir = "sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "nemo_transducer",
            )
        }

        29 -> {
            val modelDir = "sherpa-onnx-zipformer-ru-int8-2025-04-20"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        30 -> {
            val modelDir = "sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.int8.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "nemo_transducer",
            )
        }

        31 -> {
            val modelDir = "sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03"
            return OfflineModelConfig(
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        32 -> {
            val modelDir = "sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8"
            return OfflineModelConfig(
                canary = OfflineCanaryModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.int8.onnx",
                    srcLang = "en",
                    tgtLang = "en",
                    usePnc = true,
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        33 -> {
            val modelDir = "sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        34 -> {
            val modelDir = "sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        35 -> {
            val modelDir = "sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.int8.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "nemo_transducer",
            )
        }

        36 -> {
            val modelDir = "sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        37 -> {
            val modelDir = "sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.int8.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "nemo_transducer",
            )
        }

        38 -> {
            val modelDir = "sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8"
            return OfflineModelConfig(
                nemo = OfflineNemoEncDecCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        39 -> {
            val modelDir = "sherpa-onnx-zipformer-ctc-small-zh-int8-2025-07-16"
            return OfflineModelConfig(
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        40 -> {
            val modelDir = "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.int8.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "nemo_transducer",
            )
        }

        41 -> {
            val modelDir = "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09"
            return OfflineModelConfig(
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        42 -> {
            val modelDir =
                "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10"
            return OfflineModelConfig(
                wenetCtc = OfflineWenetCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        43 -> {
            val modelDir = "sherpa-onnx-paraformer-zh-int8-2025-10-07"
            return OfflineModelConfig(
                paraformer = OfflineParaformerModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "paraformer",
            )
        }

        44 -> {
            val modelDir = "sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12"
            return OfflineModelConfig(
                omnilingual = OfflineOmnilingualAsrCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        45 -> {
            val modelDir = "sherpa-onnx-medasr-ctc-en-int8-2025-12-25"
            return OfflineModelConfig(
                medasr = OfflineMedAsrCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        46 -> {
            val modelDir = "sherpa-onnx-funasr-nano-int8-2025-12-30"
            return OfflineModelConfig(
                funasrNano = OfflineFunAsrNanoModelConfig(
                    encoderAdaptor = "$modelDir/encoder_adaptor.int8.onnx",
                    llm = "$modelDir/llm.int8.onnx",
                    embedding = "$modelDir/embedding.int8.onnx",
                    tokenizer = "$modelDir/Qwen3-0.6B",
                ),
                tokens = "",
            )
        }

        47 -> {
            val modelDir = "sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-int8-2026-02-03"
            return OfflineModelConfig(
                wenetCtc = OfflineWenetCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        48 -> {
            val modelDir = "sherpa-onnx-wenetspeech-wu-u2pp-conformer-ctc-zh-2026-02-03"
            return OfflineModelConfig(
                wenetCtc = OfflineWenetCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        49 -> {
            val modelDir = "sherpa-onnx-zipformer-vi-30M-int8-2026-02-09"
            return OfflineModelConfig(
                transducer = OfflineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "transducer",
            )
        }

        50 -> {
            val modelDir = "sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25"
            return OfflineModelConfig(
                fireRedAsrCtc = OfflineFireRedAsrCtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        51 -> {
            val modelDir = "sherpa-onnx-moonshine-tiny-ko-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        52 -> {
            val modelDir = "sherpa-onnx-moonshine-tiny-ja-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        53 -> {
            val modelDir = "sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        54 -> {
            val modelDir = "sherpa-onnx-moonshine-base-zh-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        55 -> {
            val modelDir = "sherpa-onnx-moonshine-base-vi-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        56 -> {
            val modelDir = "sherpa-onnx-moonshine-base-uk-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        57 -> {
            val modelDir = "sherpa-onnx-moonshine-base-ja-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        58 -> {
            val modelDir = "sherpa-onnx-moonshine-base-es-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        59 -> {
            val modelDir = "sherpa-onnx-moonshine-base-en-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        60 -> {
            val modelDir = "sherpa-onnx-moonshine-base-ar-quantized-2026-02-27"
            return OfflineModelConfig(
                moonshine = OfflineMoonshineModelConfig(
                    encoder = "$modelDir/encoder_model.ort",
                    mergedDecoder = "$modelDir/decoder_model_merged.ort",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9000 -> {
            val modelDir =
                "sherpa-onnx-qnn-5-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        // Please copy libQnnHtp.so and libQnnSystem.so to jniLibs/arm64-v8a by yourself
                        //
                        // model.bin is created in the first run and is used from the second run
                        // to speed up the initialization
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9001 -> {
            val modelDir =
                "sherpa-onnx-qnn-8-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9002 -> {
            val modelDir =
                "sherpa-onnx-qnn-10-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9003 -> {
            val modelDir =
                "sherpa-onnx-qnn-13-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9004 -> {
            val modelDir =
                "sherpa-onnx-qnn-15-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9005 -> {
            val modelDir =
                "sherpa-onnx-qnn-18-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9006 -> {
            val modelDir =
                "sherpa-onnx-qnn-20-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9007 -> {
            val modelDir =
                "sherpa-onnx-qnn-23-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9008 -> {
            val modelDir =
                "sherpa-onnx-qnn-25-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9009 -> {
            val modelDir =
                "sherpa-onnx-qnn-28-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9010 -> {
            val modelDir =
                "sherpa-onnx-qnn-30-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9011 -> {
            val modelDir =
                "sherpa-onnx-qnn-5-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9012 -> {
            val modelDir =
                "sherpa-onnx-qnn-8-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9013 -> {
            val modelDir =
                "sherpa-onnx-qnn-10-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9014 -> {
            val modelDir =
                "sherpa-onnx-qnn-13-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9015 -> {
            val modelDir =
                "sherpa-onnx-qnn-15-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9016 -> {
            val modelDir =
                "sherpa-onnx-qnn-18-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9017 -> {
            val modelDir =
                "sherpa-onnx-qnn-20-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9018 -> {
            val modelDir =
                "sherpa-onnx-qnn-23-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9019 -> {
            val modelDir =
                "sherpa-onnx-qnn-25-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9020 -> {
            val modelDir =
                "sherpa-onnx-qnn-28-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9021 -> {
            val modelDir =
                "sherpa-onnx-qnn-30-seconds-zipformer-ctc-zh-2025-07-03-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                zipformerCtc = OfflineZipformerCtcModelConfig(
                    model = "$modelDir/libmodel.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        9022 -> {
            // for Xiaomi 17 Pro
            val modelDir =
                "sherpa-onnx-qnn-SM8850-binary-10-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8"
            return OfflineModelConfig(
                provider = "qnn",
                senseVoice = OfflineSenseVoiceModelConfig(
                    qnnConfig = QnnConfig(
                        // Please copy libQnnHtp.so and libQnnSystem.so to jniLibs/arm64-v8a by yourself
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/model.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9023 -> {
            val modelDir = "sherpa-onnx-qnn-5-seconds-paraformer-zh-2023-03-28-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                paraformer = OfflineParaformerModelConfig(
                    model = "$modelDir/libencoder.so,$modelDir/libpredictor.so,$modelDir/libdecoder.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        // The following three *.bin files are generated during the first run
                        // and are used to replace the corresponding *.so files in later runs
                        contextBinary = "$modelDir/encoder.bin,$modelDir/predictor.bin,$modelDir/decoder.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9024 -> {
            val modelDir = "sherpa-onnx-qnn-5-seconds-paraformer-zh-2025-10-07-int8-android-aarch64"
            return OfflineModelConfig(
                provider = "qnn",
                paraformer = OfflineParaformerModelConfig(
                    model = "$modelDir/libencoder.so,$modelDir/libpredictor.so,$modelDir/libdecoder.so",
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        // The following three *.bin files are generated during the first run
                        // and are used to replace the corresponding *.so files in later runs
                        contextBinary = "$modelDir/encoder.bin,$modelDir/predictor.bin,$modelDir/decoder.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }

        9025 -> {
            // for Xiaomi 17 Pro
            val modelDir = "sherpa-onnx-qnn-SM8850-binary-5-seconds-paraformer-zh-2023-03-28-int8"
            return OfflineModelConfig(
                provider = "qnn",
                paraformer = OfflineParaformerModelConfig(
                    qnnConfig = QnnConfig(
                        backendLib = "libQnnHtp.so",
                        systemLib = "libQnnSystem.so",
                        contextBinary = "$modelDir/encoder.bin,$modelDir/predictor.bin,$modelDir/decoder.bin",
                    ),
                ),
                tokens = "$modelDir/tokens.txt",
                debug = true,
            )
        }
    }
    return null
}


================================================
FILE: sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class OfflineSpeakerSegmentationPyannoteModelConfig(
    var model: String = "",
)

data class OfflineSpeakerSegmentationModelConfig(
    var pyannote: OfflineSpeakerSegmentationPyannoteModelConfig = OfflineSpeakerSegmentationPyannoteModelConfig(),
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
)

data class FastClusteringConfig(
    var numClusters: Int = -1,
    var threshold: Float = 0.5f,
)

data class OfflineSpeakerDiarizationConfig(
    var segmentation: OfflineSpeakerSegmentationModelConfig = OfflineSpeakerSegmentationModelConfig(),
    var embedding: SpeakerEmbeddingExtractorConfig = SpeakerEmbeddingExtractorConfig(),
    var clustering: FastClusteringConfig = FastClusteringConfig(),
    var minDurationOn: Float = 0.2f,
    var minDurationOff: Float = 0.5f,
)

data class OfflineSpeakerDiarizationSegment(
    val start: Float, // in seconds
    val end: Float, // in seconds
    val speaker: Int, // ID of the speaker; count from 0
)

class OfflineSpeakerDiarization(
    assetManager: AssetManager? = null,
    val config: OfflineSpeakerDiarizationConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    // Only config.clustering is used. All other fields in config
    // are ignored
    fun setConfig(config: OfflineSpeakerDiarizationConfig) = setConfig(ptr, config)

    fun sampleRate() = getSampleRate(ptr)

    fun process(samples: FloatArray) = process(ptr, samples)

    fun processWithCallback(
        samples: FloatArray,
        callback: (numProcessedChunks: Int, numTotalChunks: Int, arg: Long) -> Int,
        arg: Long = 0,
    ) = processWithCallback(ptr, samples, callback, arg)

    private external fun delete(ptr: Long)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: OfflineSpeakerDiarizationConfig,
    ): Long

    private external fun newFromFile(
        config: OfflineSpeakerDiarizationConfig,
    ): Long

    private external fun setConfig(ptr: Long, config: OfflineSpeakerDiarizationConfig)

    private external fun getSampleRate(ptr: Long): Int

    private external fun process(
        ptr: Long,
        samples: FloatArray
    ): Array<OfflineSpeakerDiarizationSegment>

    private external fun processWithCallback(
        ptr: Long,
        samples: FloatArray,
        callback: (numProcessedChunks: Int, numTotalChunks: Int, arg: Long) -> Int,
        arg: Long,
    ): Array<OfflineSpeakerDiarizationSegment>

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/OfflineSpeechDenoiser.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class OfflineSpeechDenoiserGtcrnModelConfig(
    var model: String = "",
)

data class OfflineSpeechDenoiserDpdfNetModelConfig(
    var model: String = "",
)

data class OfflineSpeechDenoiserModelConfig(
    var gtcrn: OfflineSpeechDenoiserGtcrnModelConfig = OfflineSpeechDenoiserGtcrnModelConfig(),
    var dpdfnet: OfflineSpeechDenoiserDpdfNetModelConfig = OfflineSpeechDenoiserDpdfNetModelConfig(),
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
)

data class OfflineSpeechDenoiserConfig(
    var model: OfflineSpeechDenoiserModelConfig = OfflineSpeechDenoiserModelConfig(),
)

class OfflineSpeechDenoiser(
    assetManager: AssetManager? = null,
    config: OfflineSpeechDenoiserConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun run(samples: FloatArray, sampleRate: Int) = run(ptr, samples, sampleRate)

    val sampleRate
      get() = getSampleRate(ptr)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: OfflineSpeechDenoiserConfig,
    ): Long

    private external fun newFromFile(
        config: OfflineSpeechDenoiserConfig,
    ): Long

    private external fun delete(ptr: Long)

    private external fun run(ptr: Long, samples: FloatArray, sampleRate: Int): DenoisedAudio

    private external fun getSampleRate(ptr: Long): Int

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/OfflineStream.kt
================================================
package com.k2fsa.sherpa.onnx

class OfflineStream(var ptr: Long) {
    fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
        acceptWaveform(ptr, samples, sampleRate)

    fun setOption(key: String, value: String) = setOption(ptr, key, value)

    fun getOption(key: String): String = getOption(ptr, key)

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun use(block: (OfflineStream) -> Unit) {
        try {
            block(this)
        } finally {
            release()
        }
    }

    private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
    private external fun setOption(ptr: Long, key: String, value: String)
    private external fun getOption(ptr: Long, key: String): String
    private external fun delete(ptr: Long)

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/OnlinePunctuation.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class OnlinePunctuationModelConfig(
    var cnnBilstm: String = "",
    var bpeVocab: String = "",
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
)


data class OnlinePunctuationConfig(
    var model: OnlinePunctuationModelConfig,
)

class OnlinePunctuation(
    assetManager: AssetManager? = null,
    config: OnlinePunctuationConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun addPunctuation(text: String) = addPunctuation(ptr, text)

    private external fun delete(ptr: Long)

    private external fun addPunctuation(ptr: Long, text: String): String

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: OnlinePunctuationConfig,
    ): Long

    private external fun newFromFile(
        config: OnlinePunctuationConfig,
    ): Long

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/OnlineRecognizer.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class EndpointRule(
    var mustContainNonSilence: Boolean,
    var minTrailingSilence: Float,
    var minUtteranceLength: Float,
)

data class EndpointConfig(
    var rule1: EndpointRule = EndpointRule(false, 2.4f, 0.0f),
    var rule2: EndpointRule = EndpointRule(true, 1.4f, 0.0f),
    var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f)
)

data class OnlineTransducerModelConfig(
    var encoder: String = "",
    var decoder: String = "",
    var joiner: String = "",
)

data class OnlineParaformerModelConfig(
    var encoder: String = "",
    var decoder: String = "",
)

data class OnlineZipformer2CtcModelConfig(
    var model: String = "",
)

data class OnlineNeMoCtcModelConfig(
    var model: String = "",
)

data class OnlineToneCtcModelConfig(
    var model: String = "",
)

data class OnlineModelConfig(
    var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(),
    var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(),
    var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(),
    var neMoCtc: OnlineNeMoCtcModelConfig = OnlineNeMoCtcModelConfig(),
    var toneCtc: OnlineToneCtcModelConfig = OnlineToneCtcModelConfig(),
    var tokens: String = "",
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
    var modelType: String = "",
    var modelingUnit: String = "",
    var bpeVocab: String = "",
)

data class OnlineLMConfig(
    var model: String = "",
    var scale: Float = 0.5f,
)

data class OnlineCtcFstDecoderConfig(
    var graph: String = "",
    var maxActive: Int = 3000,
)

data class OnlineRecognizerConfig(
    var featConfig: FeatureConfig = FeatureConfig(),
    var modelConfig: OnlineModelConfig = OnlineModelConfig(),
    var lmConfig: OnlineLMConfig = OnlineLMConfig(),
    var ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = OnlineCtcFstDecoderConfig(),
    var hr: HomophoneReplacerConfig = HomophoneReplacerConfig(),
    var endpointConfig: EndpointConfig = EndpointConfig(),
    var enableEndpoint: Boolean = true,
    var decodingMethod: String = "greedy_search",
    var maxActivePaths: Int = 4,
    var hotwordsFile: String = "",
    var hotwordsScore: Float = 1.5f,
    var ruleFsts: String = "",
    var ruleFars: String = "",
    var blankPenalty: Float = 0.0f,
)

data class OnlineRecognizerResult(
    val text: String,
    val tokens: Array<String>,
    val timestamps: FloatArray,
    val ysProbs: FloatArray,
    // TODO(fangjun): Add more fields
)

class OnlineRecognizer(
    assetManager: AssetManager? = null,
    val config: OnlineRecognizerConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun createStream(hotwords: String = ""): OnlineStream {
        val p = createStream(ptr, hotwords)
        return OnlineStream(p)
    }

    fun reset(stream: OnlineStream) = reset(ptr, stream.ptr)
    fun decode(stream: OnlineStream) = decode(ptr, stream.ptr)
    fun isEndpoint(stream: OnlineStream) = isEndpoint(ptr, stream.ptr)
    fun isReady(stream: OnlineStream) = isReady(ptr, stream.ptr)
    fun getResult(stream: OnlineStream): OnlineRecognizerResult {
        return getResult(ptr, stream.ptr)
    }

    private external fun delete(ptr: Long)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: OnlineRecognizerConfig,
    ): Long

    private external fun newFromFile(
        config: OnlineRecognizerConfig,
    ): Long

    private external fun createStream(ptr: Long, hotwords: String): Long
    private external fun reset(ptr: Long, streamPtr: Long)
    private external fun decode(ptr: Long, streamPtr: Long)
    private external fun isEndpoint(ptr: Long, streamPtr: Long): Boolean
    private external fun isReady(ptr: Long, streamPtr: Long): Boolean
    private external fun getResult(ptr: Long, streamPtr: Long): OnlineRecognizerResult

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


/*
Please see
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models.

We only add a few here. Please change the following code
to add your own. (It should be straightforward to add a new model
by following the code)

@param type
0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english

1 - csukuangfj/sherpa-onnx-lstm-zh-2023-02-20 (Chinese)

    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/lstm-transducer-models.html#csukuangfj-sherpa-onnx-lstm-zh-2023-02-20-chinese

2 - csukuangfj/sherpa-onnx-lstm-en-2023-02-17 (English)
    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/lstm-transducer-models.html#csukuangfj-sherpa-onnx-lstm-en-2023-02-17-english

3,4 - pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615
    https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615
    3 - int8 encoder
    4 - float32 encoder

5 - csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
    https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en

6 - sherpa-onnx-streaming-zipformer-en-2023-06-26
    https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26

7 - shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14 (French)
    https://huggingface.co/shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14

8 - csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
    https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
    encoder int8, decoder/joiner float32

 */
fun getModelConfig(type: Int): OnlineModelConfig? {
    when (type) {
        0 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer",
            )
        }

        1 -> {
            val modelDir = "sherpa-onnx-lstm-zh-2023-02-20"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-11-avg-1.onnx",
                    decoder = "$modelDir/decoder-epoch-11-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-11-avg-1.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "lstm",
            )
        }

        2 -> {
            val modelDir = "sherpa-onnx-lstm-en-2023-02-17"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "lstm",
            )
        }

        3 -> {
            val modelDir = "icefall-asr-zipformer-streaming-wenetspeech-20230615"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/exp/encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
                    decoder = "$modelDir/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx",
                    joiner = "$modelDir/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx",
                ),
                tokens = "$modelDir/data/lang_char/tokens.txt",
                modelType = "zipformer2",
            )
        }

        4 -> {
            val modelDir = "icefall-asr-zipformer-streaming-wenetspeech-20230615"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx",
                    decoder = "$modelDir/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx",
                    joiner = "$modelDir/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx",
                ),
                tokens = "$modelDir/data/lang_char/tokens.txt",
                modelType = "zipformer2",
            )
        }

        5 -> {
            val modelDir = "sherpa-onnx-streaming-paraformer-bilingual-zh-en"
            return OnlineModelConfig(
                paraformer = OnlineParaformerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "paraformer",
            )
        }

        6 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-en-2023-06-26"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1-chunk-16-left-128.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1-chunk-16-left-128.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        7 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-fr-2023-04-14"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-29-avg-9-with-averaged-model.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-29-avg-9-with-averaged-model.onnx",
                    joiner = "$modelDir/joiner-epoch-29-avg-9-with-averaged-model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer",
            )
        }

        8 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer",
            )
        }

        9 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer",
            )
        }

        10 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer",
            )
        }

        11 -> {
            val modelDir = "sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms"
            return OnlineModelConfig(
                neMoCtc = OnlineNeMoCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        12 -> {
            val modelDir = "sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms"
            return OnlineModelConfig(
                neMoCtc = OnlineNeMoCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        13 -> {
            val modelDir = "sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms"
            return OnlineModelConfig(
                neMoCtc = OnlineNeMoCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        14 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-korean-2024-06-16"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
                    decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
                    joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer",
            )
        }

        15 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01"
            return OnlineModelConfig(
                zipformer2Ctc = OnlineZipformer2CtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        16 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-small-ctc-zh-2025-04-01"
            return OnlineModelConfig(
                zipformer2Ctc = OnlineZipformer2CtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        17 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30"
            return OnlineModelConfig(
                zipformer2Ctc = OnlineZipformer2CtcModelConfig(
                    model = "$modelDir/model.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        18 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30"
            return OnlineModelConfig(
                zipformer2Ctc = OnlineZipformer2CtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        19 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30"
            return OnlineModelConfig(
                zipformer2Ctc = OnlineZipformer2CtcModelConfig(
                    model = "$modelDir/model.fp16.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        20 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        21 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-en-kroko-2025-08-06"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        22 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-es-kroko-2025-08-06"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        23 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        24 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        25 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        26 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        27 -> {
            val modelDir = "sherpa-onnx-streaming-t-one-russian-2025-09-08"
            return OnlineModelConfig(
                toneCtc = OnlineToneCtcModelConfig(
                    model = "$modelDir/model.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        28 -> {
            val modelDir = "sherpa-onnx-nemotron-speech-streaming-en-0.6b-int8-2026-01-14"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.int8.onnx",
                    decoder = "$modelDir/decoder.int8.onnx",
                    joiner = "$modelDir/joiner.int8.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
            )
        }

        29 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-bn-vosk-2026-02-09"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.onnx",
                    decoder = "$modelDir/decoder.onnx",
                    joiner = "$modelDir/joiner.onnx",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer2",
            )
        }

        1000 -> {
            val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.rknn",
                    decoder = "$modelDir/decoder.rknn",
                    joiner = "$modelDir/joiner.rknn",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer",
                provider = "rknn",
            )
        }

        1001 -> {
            val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-small-bilingual-zh-en-2023-02-16"
            return OnlineModelConfig(
                transducer = OnlineTransducerModelConfig(
                    encoder = "$modelDir/encoder.rknn",
                    decoder = "$modelDir/decoder.rknn",
                    joiner = "$modelDir/joiner.rknn",
                ),
                tokens = "$modelDir/tokens.txt",
                modelType = "zipformer",
                provider = "rknn",
            )
        }

    }
    return null
}

/*
Please see
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models.

We only add a few here. Please change the following code
to add your own LM model. (It should be straightforward to train a new NN LM model
by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py)

@param type
0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
    https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
 */
fun getOnlineLMConfig(type: Int): OnlineLMConfig {
    when (type) {
        0 -> {
            val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"
            return OnlineLMConfig(
                model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx",
                scale = 0.5f,
            )
        }
    }
    return OnlineLMConfig()
}

fun getEndpointConfig(): EndpointConfig {
    return EndpointConfig(
        rule1 = EndpointRule(false, 2.4f, 0.0f),
        rule2 = EndpointRule(true, 1.4f, 0.0f),
        rule3 = EndpointRule(false, 0.0f, 20.0f)
    )
}


================================================
FILE: sherpa-onnx/kotlin-api/OnlineSpeechDenoiser.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class OnlineSpeechDenoiserConfig(
    var model: OfflineSpeechDenoiserModelConfig = OfflineSpeechDenoiserModelConfig(),
)

class OnlineSpeechDenoiser(
    assetManager: AssetManager? = null,
    config: OnlineSpeechDenoiserConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun run(samples: FloatArray, sampleRate: Int) = run(ptr, samples, sampleRate)

    fun flush() = flush(ptr)

    fun reset() = reset(ptr)

    val sampleRate
      get() = getSampleRate(ptr)

    val frameShiftInSamples
      get() = getFrameShiftInSamples(ptr)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: OnlineSpeechDenoiserConfig,
    ): Long

    private external fun newFromFile(
        config: OnlineSpeechDenoiserConfig,
    ): Long

    private external fun delete(ptr: Long)

    private external fun run(ptr: Long, samples: FloatArray, sampleRate: Int): DenoisedAudio

    private external fun flush(ptr: Long): DenoisedAudio

    private external fun reset(ptr: Long)

    private external fun getSampleRate(ptr: Long): Int

    private external fun getFrameShiftInSamples(ptr: Long): Int

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/OnlineStream.kt
================================================
package com.k2fsa.sherpa.onnx

class OnlineStream(var ptr: Long = 0) {
    fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
        acceptWaveform(ptr, samples, sampleRate)

    fun inputFinished() = inputFinished(ptr)

    fun setOption(key: String, value: String) = setOption(ptr, key, value)

    fun getOption(key: String): String = getOption(ptr, key)

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun use(block: (OnlineStream) -> Unit) {
        try {
            block(this)
        } finally {
            release()
        }
    }

    private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
    private external fun inputFinished(ptr: Long)
    private external fun setOption(ptr: Long, key: String, value: String)
    private external fun getOption(ptr: Long, key: String): String
    private external fun delete(ptr: Long)


    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/QnnConfig.kt
================================================
package com.k2fsa.sherpa.onnx

data class QnnConfig(
    var backendLib: String = "",
    var contextBinary: String = "",
    var systemLib: String = "",
)


================================================
FILE: sherpa-onnx/kotlin-api/Speaker.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager
import android.util.Log

class SpeakerEmbeddingExtractor(
    assetManager: AssetManager? = null,
    config: SpeakerEmbeddingExtractorConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun createStream(): OnlineStream {
        val p = createStream(ptr)
        return OnlineStream(p)
    }

    fun isReady(stream: OnlineStream) = isReady(ptr, stream.ptr)
    fun compute(stream: OnlineStream) = compute(ptr, stream.ptr)
    fun dim() = dim(ptr)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: SpeakerEmbeddingExtractorConfig,
    ): Long

    private external fun newFromFile(
        config: SpeakerEmbeddingExtractorConfig,
    ): Long

    private external fun delete(ptr: Long)

    private external fun createStream(ptr: Long): Long

    private external fun isReady(ptr: Long, streamPtr: Long): Boolean

    private external fun compute(ptr: Long, streamPtr: Long): FloatArray

    private external fun dim(ptr: Long): Int

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}

class SpeakerEmbeddingManager(val dim: Int) {
    private var ptr: Long

    init {
        ptr = create(dim)
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()
    fun add(name: String, embedding: FloatArray) = add(ptr, name, embedding)
    fun add(name: String, embedding: Array<FloatArray>) = addList(ptr, name, embedding)
    fun remove(name: String) = remove(ptr, name)
    fun search(embedding: FloatArray, threshold: Float) = search(ptr, embedding, threshold)
    fun verify(name: String, embedding: FloatArray, threshold: Float) =
        verify(ptr, name, embedding, threshold)

    fun contains(name: String) = contains(ptr, name)
    fun numSpeakers() = numSpeakers(ptr)

    fun allSpeakerNames() = allSpeakerNames(ptr)

    private external fun create(dim: Int): Long
    private external fun delete(ptr: Long): Unit
    private external fun add(ptr: Long, name: String, embedding: FloatArray): Boolean
    private external fun addList(ptr: Long, name: String, embedding: Array<FloatArray>): Boolean
    private external fun remove(ptr: Long, name: String): Boolean
    private external fun search(ptr: Long, embedding: FloatArray, threshold: Float): String
    private external fun verify(
        ptr: Long,
        name: String,
        embedding: FloatArray,
        threshold: Float
    ): Boolean

    private external fun contains(ptr: Long, name: String): Boolean
    private external fun numSpeakers(ptr: Long): Int

    private external fun allSpeakerNames(ptr: Long): Array<String>

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}

// Please download the model file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
// and put it inside the assets directory.
//
// Please don't put it in a subdirectory of assets
private val modelName = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"

object SpeakerRecognition {
    var _extractor: SpeakerEmbeddingExtractor? = null
    var _manager: SpeakerEmbeddingManager? = null

    val extractor: SpeakerEmbeddingExtractor
        get() {
            return _extractor!!
        }

    val manager: SpeakerEmbeddingManager
        get() {
            return _manager!!
        }

    fun initExtractor(assetManager: AssetManager? = null) {
        synchronized(this) {
            if (_extractor != null) {
                return
            }
            Log.i("sherpa-onnx", "Initializing speaker embedding extractor")

            _extractor = SpeakerEmbeddingExtractor(
                assetManager = assetManager,
                config = SpeakerEmbeddingExtractorConfig(
                    model = modelName,
                    numThreads = 2,
                    debug = false,
                    provider = "cpu",
                )
            )

            _manager = SpeakerEmbeddingManager(dim = _extractor!!.dim())
        }
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
================================================
package com.k2fsa.sherpa.onnx

data class SpeakerEmbeddingExtractorConfig(
    val model: String = "",
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
)


================================================
FILE: sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
================================================
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class SpokenLanguageIdentificationWhisperConfig(
    var encoder: String = "",
    var decoder: String = "",
    var tailPaddings: Int = -1,
)

data class SpokenLanguageIdentificationConfig(
    var whisper: SpokenLanguageIdentificationWhisperConfig = SpokenLanguageIdentificationWhisperConfig(),
    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
)

class SpokenLanguageIdentification(
    assetManager: AssetManager? = null,
    config: SpokenLanguageIdentificationConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun createStream(): OfflineStream {
        val p = createStream(ptr)
        return OfflineStream(p)
    }

    fun compute(stream: OfflineStream) = compute(ptr, stream.ptr)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: SpokenLanguageIdentificationConfig,
    ): Long

    private external fun newFromFile(
        config: SpokenLanguageIdentificationConfig,
    ): Long

    private external fun delete(ptr: Long)

    private external fun createStream(ptr: Long): Long

    private external fun compute(ptr: Long, streamPtr: Long): String

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/spolken-language-identification/pretrained_models.html#whisper
// to download more models
fun getSpokenLanguageIdentificationConfig(
    type: Int,
    numThreads: Int = 1
): SpokenLanguageIdentificationConfig? {
    when (type) {
        0 -> {
            val modelDir = "sherpa-onnx-whisper-tiny"
            return SpokenLanguageIdentificationConfig(
                whisper = SpokenLanguageIdentificationWhisperConfig(
                    encoder = "$modelDir/tiny-encoder.int8.onnx",
                    decoder = "$modelDir/tiny-decoder.int8.onnx",
                ),
                numThreads = numThreads,
                debug = true,
            )
        }

        1 -> {
            val modelDir = "sherpa-onnx-whisper-base"
            return SpokenLanguageIdentificationConfig(
                whisper = SpokenLanguageIdentificationWhisperConfig(
                    encoder = "$modelDir/base-encoder.int8.onnx",
                    decoder = "$modelDir/base-decoder.int8.onnx",
                ),
                numThreads = 1,
                debug = true,
            )
        }
    }
    return null
}


================================================
FILE: sherpa-onnx/kotlin-api/Tts.kt
================================================
// Copyright (c)  2023  Xiaomi Corporation
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class OfflineTtsVitsModelConfig(
    var model: String = "",
    var lexicon: String = "",
    var tokens: String = "",
    var dataDir: String = "",
    var dictDir: String = "", // unused
    var noiseScale: Float = 0.667f,
    var noiseScaleW: Float = 0.8f,
    var lengthScale: Float = 1.0f,
)

data class OfflineTtsMatchaModelConfig(
    var acousticModel: String = "",
    var vocoder: String = "",
    var lexicon: String = "",
    var tokens: String = "",
    var dataDir: String = "",
    var dictDir: String = "", // unused
    var noiseScale: Float = 1.0f,
    var lengthScale: Float = 1.0f,
)

data class OfflineTtsKokoroModelConfig(
    var model: String = "",
    var voices: String = "",
    var tokens: String = "",
    var dataDir: String = "",
    var lexicon: String = "",
    var lang: String = "",
    var dictDir: String = "", // unused
    var lengthScale: Float = 1.0f,
)

data class OfflineTtsZipVoiceModelConfig(
    var tokens: String = "",
    var encoder: String = "",
    var decoder: String = "",
    var vocoder: String = "",
    var dataDir: String = "",
    var lexicon: String = "",
    var featScale: Float = 0.1f,
    var tShift: Float = 0.5f,
    var targetRms: Float = 0.1f,
    var guidanceScale: Float = 1.0f,
)

data class OfflineTtsKittenModelConfig(
    var model: String = "",
    var voices: String = "",
    var tokens: String = "",
    var dataDir: String = "",
    var lengthScale: Float = 1.0f,
)

/**
 * Configuration for Pocket TTS models.
 *
 * See https://k2-fsa.github.io/sherpa/onnx/tts/pocket/index.html for details.
 *
 * @property lmFlow Path to the LM flow model (.onnx)
 * @property lmMain Path to the LM main model (.onnx)
 * @property encoder Path to the encoder model (.onnx)
 * @property decoder Path to the decoder model (.onnx)
 * @property textConditioner Path to the text conditioner model (.onnx)
 * @property vocabJson Path to vocabulary JSON file
 * @property tokenScoresJson Path to token scores JSON file
 */
data class OfflineTtsPocketModelConfig(
  var lmFlow: String = "",
  var lmMain: String = "",
  var encoder: String = "",
  var decoder: String = "",
  var textConditioner: String = "",
  var vocabJson: String = "",
  var tokenScoresJson: String = "",
  var voiceEmbeddingCacheCapacity: Int = 50,
)

data class OfflineTtsSupertonicModelConfig(
  var durationPredictor: String = "",
  var textEncoder: String = "",
  var vectorEstimator: String = "",
  var vocoder: String = "",
  var ttsJson: String = "",
  var unicodeIndexer: String = "",
  var voiceStyle: String = "",
)

data class OfflineTtsModelConfig(
    var vits: OfflineTtsVitsModelConfig = OfflineTtsVitsModelConfig(),
    var matcha: OfflineTtsMatchaModelConfig = OfflineTtsMatchaModelConfig(),
    var kokoro: OfflineTtsKokoroModelConfig = OfflineTtsKokoroModelConfig(),
    var zipvoice: OfflineTtsZipVoiceModelConfig = OfflineTtsZipVoiceModelConfig(),
    var kitten: OfflineTtsKittenModelConfig = OfflineTtsKittenModelConfig(),
    var pocket: OfflineTtsPocketModelConfig = OfflineTtsPocketModelConfig(),
    var supertonic: OfflineTtsSupertonicModelConfig = OfflineTtsSupertonicModelConfig(),

    var numThreads: Int = 1,
    var debug: Boolean = false,
    var provider: String = "cpu",
)

data class OfflineTtsConfig(
    var model: OfflineTtsModelConfig = OfflineTtsModelConfig(),
    var ruleFsts: String = "",
    var ruleFars: String = "",
    var maxNumSentences: Int = 1,
    var silenceScale: Float = 0.2f,
)

class GeneratedAudio(
    val samples: FloatArray,
    val sampleRate: Int,
) {
    fun save(filename: String) =
        saveImpl(filename = filename, samples = samples, sampleRate = sampleRate)

    private external fun saveImpl(
        filename: String,
        samples: FloatArray,
        sampleRate: Int
    ): Boolean
}

data class GenerationConfig(
    var silenceScale: Float = 0.2f,
    var speed: Float = 1.0f,
    var sid: Int = 0,
    var referenceAudio: FloatArray? = null,
    var referenceSampleRate: Int = 0,
    var referenceText: String? = null,
    var numSteps: Int = 5,
    var extra: Map<String, String>? = null
)

class OfflineTts(
    assetManager: AssetManager? = null,
    var config: OfflineTtsConfig,
) {
    private var ptr: Long

    init {
        ptr = if (assetManager != null) {
            newFromAsset(assetManager, config)
        } else {
            newFromFile(config)
        }
    }

    fun sampleRate() = getSampleRate(ptr)

    fun numSpeakers() = getNumSpeakers(ptr)

    fun generate(
        text: String,
        sid: Int = 0,
        speed: Float = 1.0f
    ): GeneratedAudio {
        return generateImpl(ptr, text = text, sid = sid, speed = speed)
    }

    fun generateWithCallback(
        text: String,
        sid: Int = 0,
        speed: Float = 1.0f,
        callback: (samples: FloatArray) -> Int
    ): GeneratedAudio {
        return generateWithCallbackImpl(
            ptr,
            text = text,
            sid = sid,
            speed = speed,
            callback = callback
        )
    }

    fun generateWithConfig(
      text: String,
      config: GenerationConfig
    ): GeneratedAudio {
        return generateWithConfigImpl(ptr, text, config, null)
    }

    fun generateWithConfigAndCallback(
        text: String,
        config: GenerationConfig,
        callback: (samples: FloatArray) -> Int
    ): GeneratedAudio {
        return generateWithConfigImpl(ptr, text, config, callback)
    }

    fun allocate(assetManager: AssetManager? = null) {
        if (ptr == 0L) {
            ptr = if (assetManager != null) {
                newFromAsset(assetManager, config)
            } else {
                newFromFile(config)
            }
        }
    }

    fun free() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: OfflineTtsConfig,
    ): Long

    private external fun newFromFile(
        config: OfflineTtsConfig,
    ): Long

    private external fun delete(ptr: Long)
    private external fun getSampleRate(ptr: Long): Int
    private external fun getNumSpeakers(ptr: Long): Int

    // The returned array has two entries:
    //  - the first entry is an 1-D float array containing audio samples.
    //    Each sample is normalized to the range [-1, 1]
    //  - the second entry is the sample rate
    private external fun generateImpl(
        ptr: Long,
        text: String,
        sid: Int = 0,
        speed: Float = 1.0f
    ): GeneratedAudio

    private external fun generateWithCallbackImpl(
        ptr: Long,
        text: String,
        sid: Int = 0,
        speed: Float = 1.0f,
        callback: (samples: FloatArray) -> Int
    ): GeneratedAudio


    private external fun generateWithConfigImpl(
        ptr: Long,
        text: String,
        config: GenerationConfig,
        callback: ((samples: FloatArray) -> Int)?
    ): GeneratedAudio

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}

// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
// to download models
fun getOfflineTtsConfig(
    modelDir: String,
    modelName: String, // for VITS
    acousticModelName: String, // for Matcha
    vocoder: String, // for Matcha
    voices: String, // for Kokoro or kitten
    lexicon: String,
    dataDir: String,
    dictDir: String, // unused
    ruleFsts: String,
    ruleFars: String,
    numThreads: Int? = null,
    isKitten: Boolean = false
): OfflineTtsConfig {
    // For Matcha TTS, please set
    // acousticModelName, vocoder

    // For Kokoro TTS, please set
    // modelName, voices

    // For Kitten TTS, please set
    // modelName, voices, isKitten

    // For VITS, please set
    // modelName

    val numberOfThreads = if (numThreads != null) {
        numThreads
    } else if (voices.isNotEmpty()) {
        // for Kokoro and Kitten TTS models, we use more threads
        4
    } else {
        2
    }

    if (modelName.isEmpty() && acousticModelName.isEmpty()) {
        throw IllegalArgumentException("Please specify a TTS model")
    }

    if (modelName.isNotEmpty() && acousticModelName.isNotEmpty()) {
        throw IllegalArgumentException("Please specify either a VITS or a Matcha model, but not both")
    }

    if (acousticModelName.isNotEmpty() && vocoder.isEmpty()) {
        throw IllegalArgumentException("Please provide vocoder for Matcha TTS")
    }

    val vits = if (modelName.isNotEmpty() && voices.isEmpty()) {
        OfflineTtsVitsModelConfig(
            model = "$modelDir/$modelName",
            lexicon = "$modelDir/$lexicon",
            tokens = "$modelDir/tokens.txt",
            dataDir = dataDir,
        )
    } else {
        OfflineTtsVitsModelConfig()
    }

    val matcha = if (acousticModelName.isNotEmpty()) {
        OfflineTtsMatchaModelConfig(
            acousticModel = "$modelDir/$acousticModelName",
            vocoder = vocoder,
            lexicon = "$modelDir/$lexicon",
            tokens = "$modelDir/tokens.txt",
            dataDir = dataDir,
        )
    } else {
        OfflineTtsMatchaModelConfig()
    }

    val kokoro = if (voices.isNotEmpty() && !isKitten) {
        OfflineTtsKokoroModelConfig(
            model = "$modelDir/$modelName",
            voices = "$modelDir/$voices",
            tokens = "$modelDir/tokens.txt",
            dataDir = dataDir,
            lexicon = when {
                lexicon == "" -> lexicon
                "," in lexicon -> lexicon
                else -> "$modelDir/$lexicon"
            },
        )
    } else {
        OfflineTtsKokoroModelConfig()
    }

    val kitten = if (isKitten) {
        OfflineTtsKittenModelConfig(
            model = "$modelDir/$modelName",
            voices = "$modelDir/$voices",
            tokens = "$modelDir/tokens.txt",
            dataDir = dataDir,
        )
    } else {
        OfflineTtsKittenModelConfig()
    }

    return OfflineTtsConfig(
        model = OfflineTtsModelConfig(
            vits = vits,
            matcha = matcha,
            kokoro = kokoro,
            kitten = kitten,
            numThreads = numberOfThreads,
            debug = true,
            provider = "cpu",
        ),
        ruleFsts = ruleFsts,
        ruleFars = ruleFars,
    )
}


================================================
FILE: sherpa-onnx/kotlin-api/Vad.kt
================================================
// Copyright (c)  2023  Xiaomi Corporation
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class SileroVadModelConfig(
    var model: String = "",
    var threshold: Float = 0.5F,
    var minSilenceDuration: Float = 0.25F,
    var minSpeechDuration: Float = 0.25F,
    var windowSize: Int = 512,
    var maxSpeechDuration: Float = 5.0F,
)

data class TenVadModelConfig(
    var model: String = "",
    var threshold: Float = 0.5F,
    var minSilenceDuration: Float = 0.25F,
    var minSpeechDuration: Float = 0.25F,
    var windowSize: Int = 256,
    var maxSpeechDuration: Float = 5.0F,
)

data class VadModelConfig(
    var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(),
    var tenVadModelConfig: TenVadModelConfig = TenVadModelConfig(),
    var sampleRate: Int = 16000,
    var numThreads: Int = 1,
    var provider: String = "cpu",
    var debug: Boolean = false,
)

class SpeechSegment(val start: Int, val samples: FloatArray)

class Vad(
    assetManager: AssetManager? = null,
    var config: VadModelConfig,
) {
    private var ptr: Long

    init {
        if (assetManager != null) {
            ptr = newFromAsset(assetManager, config)
        } else {
            ptr = newFromFile(config)
        }
    }

    protected fun finalize() {
        if (ptr != 0L) {
            delete(ptr)
            ptr = 0
        }
    }

    fun release() = finalize()

    fun compute(samples: FloatArray): Float = compute(ptr, samples)


    fun acceptWaveform(samples: FloatArray) = acceptWaveform(ptr, samples)

    fun empty(): Boolean = empty(ptr)
    fun pop() = pop(ptr)

    fun front(): SpeechSegment {
        return front(ptr)
    }

    fun clear() = clear(ptr)

    fun isSpeechDetected(): Boolean = isSpeechDetected(ptr)

    fun reset() = reset(ptr)

    fun flush() = flush(ptr)

    private external fun delete(ptr: Long)

    private external fun newFromAsset(
        assetManager: AssetManager,
        config: VadModelConfig,
    ): Long

    private external fun newFromFile(
        config: VadModelConfig,
    ): Long

    private external fun acceptWaveform(ptr: Long, samples: FloatArray)
    private external fun compute(ptr: Long, samples: FloatArray): Float

    private external fun empty(ptr: Long): Boolean
    private external fun pop(ptr: Long)
    private external fun clear(ptr: Long)
    private external fun front(ptr: Long): SpeechSegment
    private external fun isSpeechDetected(ptr: Long): Boolean
    private external fun reset(ptr: Long)
    private external fun flush(ptr: Long)

    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}

// Please visit
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
// to download silero_vad.onnx
// and put it inside the assets/
// directory
//
// For ten-vad, please use
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
//
fun getVadModelConfig(type: Int): VadModelConfig? {
    when (type) {
        0 -> {
            return VadModelConfig(
                sileroVadModelConfig = SileroVadModelConfig(
                    model = "silero_vad.onnx",
                    threshold = 0.5F,
                    minSilenceDuration = 0.25F,
                    minSpeechDuration = 0.25F,
                    windowSize = 512,
                ),
                sampleRate = 16000,
                numThreads = 1,
                provider = "cpu",
            )
        }

        1 -> {
            return VadModelConfig(
                tenVadModelConfig = TenVadModelConfig(
                    model = "ten-vad.onnx",
                    threshold = 0.5F,
                    minSilenceDuration = 0.25F,
                    minSpeechDuration = 0.25F,
                    windowSize = 256,
                ),
                sampleRate = 16000,
                numThreads = 1,
                provider = "cpu",
            )
        }
    }
    return null
}


================================================
FILE: sherpa-onnx/kotlin-api/VersionInfo.kt
================================================
package com.k2fsa.sherpa.onnx

class VersionInfo {
    companion object {
        init {
            System.loadLibrary("sherpa-onnx-jni")
        }

        val version: String
            get() = getVersionStr2()

        val gitSha1: String
            get() = getGitSha12()

        val gitDate: String
            get() = getGitDate2()

        external fun getVersionStr2(): String
        external fun getGitSha12(): String
        external fun getGitDate2(): String
    }
}


================================================
FILE: sherpa-onnx/kotlin-api/WaveReader.kt
================================================
// Copyright (c)  2023  Xiaomi Corporation
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager

data class WaveData(
    val samples: FloatArray,
    val sampleRate: Int,
) {
    override fun equals(other: Any?): Boolean {
        if (this === other) return true
        if (javaClass != other?.javaClass) return false

        other as WaveData

        if (!samples.contentEquals(other.samples)) return false
        if (sampleRate != other.sampleRate) return false

        return true
    }

    override fun hashCode(): Int {
        var result = samples.contentHashCode()
        result = 31 * result + sampleRate
        return result
    }
}

class WaveReader {
    companion object {

        fun readWave(
            assetManager: AssetManager,
            filename: String,
        ): WaveData {
            return readWaveFromAsset(assetManager, filename)
        }

        fun readWave(
            filename: String,
        ): WaveData {
            return readWaveFromFile(filename)
        }

        // Read a mono wave file asset
        external fun readWaveFromAsset(
            assetManager: AssetManager,
            filename: String,
        ): WaveData

        // Read a mono wave file from disk
        external fun readWaveFromFile(
            filename: String,
        ): WaveData

        init {
            System.loadLibrary("sherpa-onnx-jni")
        }
    }
}


================================================
FILE: sherpa-onnx/pascal-api/README.md
================================================
# Introduction

This directory contains APIs for [Object Pascal](https://en.wikipedia.org/wiki/Object_Pascal).

Please see
https://github.com/k2-fsa/sherpa-onnx/tree/master/pascal-api-examples
for usages.

[portaudio.pas](./portaudio.pas)
is copied from
https://github.com/UltraStar-Deluxe/USDX/blob/master/src/lib/portaudio/portaudio.pas


================================================
FILE: sherpa-onnx/pascal-api/portaudio.pas
================================================
{
This file is copied from
https://github.com/UltraStar-Deluxe/USDX/blob/master/src/lib/portaudio/portaudio.pas
}
{*
 * $Id: portaudio.h,v 1.7 2007/08/16 20:45:34 richardash1981 Exp $
 * PortAudio Portable Real-Time Audio Library
 * PortAudio API Header File
 * Latest version available at: http://www.portaudio.com/
 *
 * Copyright (c) 1999-2002 Ross Bencina and Phil Burk
 *                                                 
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files
 * (the "Software"), to deal in the Software without restriction,
 * including without limitation the rights to use, copy, modify, merge,
 * publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *}

{*
 * The text above constitutes the entire PortAudio license; however, 
 * the PortAudio community also makes the following non-binding requests:
 *
 * Any person wishing to distribute modifications to the Software is
 * requested to send the modifications to the original developer so that
 * they can be incorporated into the canonical version. It is also 
 * requested that these non-binding requests be included along with the 
 * license above.
 *}

{** @file
 @brief The PortAudio API.
*}

unit portaudio;

{$IFDEF FPC}
  {$PACKENUM 4}    (* use 4-byte enums *)
  {$PACKRECORDS C} (* C/C++-compatible record packing *)
  {$MODE DELPHI }
{$ELSE}
  {$MINENUMSIZE 4} (* use 4-byte enums *)
{$ENDIF}

interface

uses
  ctypes;

const
{$IF Defined(MSWINDOWS)}
  LibName = 'portaudio_x86.dll';
{$ELSEIF Defined(UNIX)}
  LibName = 'portaudio';
  {$LINKLIB portaudio}
{$IFEND}

{** Retrieve the release number of the currently running PortAudio build,
 eg 1900.
*}
function Pa_GetVersion(): cint; cdecl; external LibName;


{** Retrieve a textual description of the current PortAudio build,
 eg "PortAudio V19-devel 13 October 2002".
*}
function Pa_GetVersionText(): PChar; cdecl; external LibName;


{** Error codes returned by PortAudio functions.
 Note that with the exception of paNoError, all PaErrorCodes are negative.
*}

type TPaError = cint;
type TPaErrorCode = {enum}cint; const
{enum_begin PaErrorCode}
    paNoError = 0;

    paNotInitialized = -10000;
    paUnanticipatedHostError                = (paNotInitialized+ 1);
    paInvalidChannelCount                   = (paNotInitialized+ 2);
    paInvalidSampleRate                     = (paNotInitialized+ 3);
    paInvalidDevice                         = (paNotInitialized+ 4);
    paInvalidFlag                           = (paNotInitialized+ 5);
    paSampleFormatNotSupported              = (paNotInitialized+ 6);
    paBadIODeviceCombination                = (paNotInitialized+ 7);
    paInsufficientMemory                    = (paNotInitialized+ 8);
    paBufferTooBig                          = (paNotInitialized+ 9);
    paBufferTooSmall                        = (paNotInitialized+10);
    paNullCallback                          = (paNotInitialized+11);
    paBadStreamPtr                          = (paNotInitialized+12);
    paTimedOut                              = (paNotInitialized+13);
    paInternalError                         = (paNotInitialized+14);
    paDeviceUnavailable                     = (paNotInitialized+15);
    paIncompatibleHostApiSpecificStreamInfo = (paNotInitialized+16);
    paStreamIsStopped                       = (paNotInitialized+17);
    paStreamIsNotStopped                    = (paNotInitialized+18);
    paInputOverflowed                       = (paNotInitialized+19);
    paOutputUnderflowed                     = (paNotInitialized+20);
    paHostApiNotFound                       = (paNotInitialized+21); // The notes below are from the 
    paInvalidHostApi                        = (paNotInitialized+22); // original file portaudio.h
    paCanNotReadFromACallbackStream         = (paNotInitialized+23); {**< @todo review error code name *}
    paCanNotWriteToACallbackStream          = (paNotInitialized+24); {**< @todo review error code name *}
    paCanNotReadFromAnOutputOnlyStream      = (paNotInitialized+25); {**< @todo review error code name *}
    paCanNotWriteToAnInputOnlyStream        = (paNotInitialized+26); {**< @todo review error code name *}
    paIncompatibleStreamHostApi             = (paNotInitialized+27);
    paBadBufferPtr                          = (paNotInitialized+28);
{enum_end PaErrorCode}


{** Translate the supplied PortAudio error code into a human readable
 message.
*}
function Pa_GetErrorText( errorCode: TPaError ): PChar; cdecl; external LibName;


{** Library initialization function - call this before using PortAudio.
 This function initialises internal data structures and prepares underlying
 host APIs for use.  With the exception of Pa_GetVersion(), Pa_GetVersionText(),
 and Pa_GetErrorText(), this function MUST be called before using any other
 PortAudio API functions.

 If Pa_Initialize() is called multiple times, each successful
 call must be matched with a corresponding call to Pa_Terminate(). 
 Pairs of calls to Pa_Initialize()/Pa_Terminate() may overlap, and are not 
 required to be fully nested.

 Note that if Pa_Initialize() returns an error code, Pa_Terminate() should
 NOT be called.

 @return paNoError if successful, otherwise an error code indicating the cause
 of failure.

 @see Pa_Terminate
*}
function Pa_Initialize(): TPaError; cdecl; external LibName;


{** Library termination function - call this when finished using PortAudio.
 This function deallocates all resources allocated by PortAudio since it was
 initializied by a call to Pa_Initialize(). In cases where Pa_Initialise() has
 been called multiple times, each call must be matched with a corresponding call
 to Pa_Terminate(). The final matching call to Pa_Terminate() will automatically
 close any PortAudio streams that are still open.

 Pa_Terminate() MUST be called before exiting a program which uses PortAudio.
 Failure to do so may result in serious resource leaks, such as audio devices
 not being available until the next reboot.

 @return paNoError if successful, otherwise an error code indicating the cause
 of failure.
 
 @see Pa_Initialize
*}
function Pa_Terminate(): TPaError; cdecl; external LibName;


{** The type used to refer to audio devices. Values of this type usually
 range from 0 to (Pa_GetDeviceCount()-1), and may also take on the PaNoDevice
 and paUseHostApiSpecificDeviceSpecification values.

 @see Pa_GetDeviceCount, paNoDevice, paUseHostApiSpecificDeviceSpecification
*}
type TPaDeviceIndex = cint;


{** A special PaDeviceIndex value indicating that no device is available,
 or should be used.

 @see PaDeviceIndex
*}
const paNoDevice = TPaDeviceIndex(-1);


{** A special PaDeviceIndex value indicating that the device(s) to be used
 are specified in the host api specific stream info structure.

 @see PaDeviceIndex
*}
const paUseHostApiSpecificDeviceSpecification = TPaDeviceIndex(-2);


{* Host API enumeration mechanism *}

{** The type used to enumerate to host APIs at runtime. Values of this type
 range from 0 to (Pa_GetHostApiCount()-1).

 @see Pa_GetHostApiCount
*}
type TPaHostApiIndex = cint;

{** Retrieve the number of available host APIs. Even if a host API is
 available it may have no devices available.

 @return A non-negative value indicating the number of available host APIs
 or, a PaErrorCode (which are always negative) if PortAudio is not initialized
 or an error is encountered.

 @see PaHostApiIndex
*}
function Pa_GetHostApiCount(): TPaHostApiIndex; cdecl; external LibName;


{** Retrieve the index of the default host API. The default host API will be
 the lowest common denominator host API on the current platform and is
 unlikely to provide the best performance.

 @return A non-negative value ranging from 0 to (Pa_GetHostApiCount()-1)
 indicating the default host API index or, a PaErrorCode (which are always
 negative) if PortAudio is not initialized or an error is encountered.
*}
function Pa_GetDefaultHostApi(): TPaHostApiIndex; cdecl; external LibName;


{** Unchanging unique identifiers for each supported host API. This type
 is used in the PaHostApiInfo structure. The values are guaranteed to be
 unique and to never change, thus allowing code to be written that
 conditionally uses host API specific extensions.

 New type ids will be allocated when support for a host API reaches
 "public alpha" status, prior to that developers should use the
 paInDevelopment type id.

 @see PaHostApiInfo
*}
type TPaHostApiTypeId = {enum}cint; const
{enum_begin PaHostApiTypeId}
    paInDevelopment=0; {* use while developing support for a new host API *}
    paDirectSound=1;
    paMME=2;
    paASIO=3;
    paSoundManager=4;
    paCoreAudio=5;
    paOSS=7;
    paALSA=8;
    paAL=9;
    paBeOS=10;
    paWDMKS=11;
    paJACK=12;
    paWASAPI=13;
    paAudioScienceHPI=14;
{enum_end PaHostApiTypeId}

{** A structure containing information about a particular host API. *}

type
  PPaHostApiInfo = ^TPaHostApiInfo;
  TPaHostApiInfo = record
      {** this is struct version 1 *}
      structVersion: cint;
      {** The well known unique identifier of this host API @see PaHostApiTypeId *}
      _type: TPaHostApiTypeId;
      {** A textual description of the host API for display on user interfaces. *}
      name: PChar;

      {**  The number of devices belonging to this host API. This field may be
       used in conjunction with Pa_HostApiDeviceIndexToDeviceIndex() to enumerate
       all devices for this host API.
       @see Pa_HostApiDeviceIndexToDeviceIndex
      *}
      deviceCount: cint;

      {** The default input device for this host API. The value will be a
       device index ranging from 0 to (Pa_GetDeviceCount()-1), or paNoDevice
       if no default input device is available.
      *}
      defaultInputDevice: TPaDeviceIndex;

      {** The default output device for this host API. The value will be a
       device index ranging from 0 to (Pa_GetDeviceCount()-1), or paNoDevice
       if no default output device is available.
      *}
      defaultOutputDevice: TPaDeviceIndex;
  end;


{** Retrieve a pointer to a structure containing information about a specific
 host Api.

 @param hostApi A valid host API index ranging from 0 to (Pa_GetHostApiCount()-1)

 @return A pointer to an immutable PaHostApiInfo structure describing
 a specific host API. If the hostApi parameter is out of range or an error
 is encountered, the function returns NULL.

 The returned structure is owned by the PortAudio implementation and must not
 be manipulated or freed. The pointer is only guaranteed to be valid between
 calls to Pa_Initialize() and Pa_Terminate().
*}
function Pa_GetHostApiInfo( hostApi: TPaHostApiIndex ): PPaHostApiInfo; cdecl; external LibName;


{** Convert a static host API unique identifier, into a runtime
 host API index.

 @param type A unique host API identifier belonging to the PaHostApiTypeId
 enumeration.

 @return A valid PaHostApiIndex ranging from 0 to (Pa_GetHostApiCount()-1) or,
 a PaErrorCode (which are always negative) if PortAudio is not initialized
 or an error is encountered.
 
 The paHostApiNotFound error code indicates that the host API specified by the
 type parameter is not available.

 @see PaHostApiTypeId
*}
function Pa_HostApiTypeIdToHostApiIndex( _type: TPaHostApiTypeId ): TPaHostApiIndex; cdecl; external LibName;


{** Convert a host-API-specific device index to standard PortAudio device index.
 This function may be used in conjunction with the deviceCount field of
 PaHostApiInfo to enumerate all devices for the specified host API.

 @param hostApi A valid host API index ranging from 0 to (Pa_GetHostApiCount()-1)

 @param hostApiDeviceIndex A valid per-host device index in the range
 0 to (Pa_GetHostApiInfo(hostApi)->deviceCount-1)

 @return A non-negative PaDeviceIndex ranging from 0 to (Pa_GetDeviceCount()-1)
 or, a PaErrorCode (which are always negative) if PortAudio is not initialized
 or an error is encountered.

 A paInvalidHostApi error code indicates that the host API index specified by
 the hostApi parameter is out of range.

 A paInvalidDevice error code indicates that the hostApiDeviceIndex parameter
 is out of range.
 
 @see PaHostApiInfo
*}
function Pa_HostApiDeviceIndexToDeviceIndex( hostApi: TPaHostApiIndex;
        hostApiDeviceIndex: cint ): TPaDeviceIndex; cdecl; external LibName;


{** Structure used to return information about a host error condition.
*}
type
  PPaHostErrorInfo = ^TPaHostErrorInfo;
  TPaHostErrorInfo = record
      hostApiType: TPaHostApiTypeId;    {**< the host API which returned the error code *}
      errorCode: clong;                 {**< the error code returned *}
      errorText: PChar;                 {**< a textual description of the error if available, otherwise a zero-length string *}
  end;


{** Return information about the last host error encountered. The error
 information returned by Pa_GetLastHostErrorInfo() will never be modified
 asynchronously by errors occurring in other PortAudio owned threads
 (such as the thread that manages the stream callback.)

 This function is provided as a last resort, primarily to enhance debugging
 by providing clients with access to all available error information.

 @return A pointer to an immutable structure containing information about
 the host error. The values in this structure will only be valid if a
 PortAudio function has previously returned the paUnanticipatedHostError
 error code.
*}
function Pa_GetLastHostErrorInfo(): PPaHostErrorInfo; cdecl; external LibName;


{* Device enumeration and capabilities *}

{** Retrieve the number of available devices. The number of available devices
 may be zero.

 @return A non-negative value indicating the number of available devices or,
 a PaErrorCode (which are always negative) if PortAudio is not initialized
 or an error is encountered.
*}
function Pa_GetDeviceCount(): TPaDeviceIndex; cdecl; external LibName;


{** Retrieve the index of the default input device. The result can be
 used in the inputDevice parameter to Pa_OpenStream().

 @return The default input device index for the default host API, or paNoDevice
 if no default input device is available or an error was encountered.
*}
function Pa_GetDefaultInputDevice(): TPaDeviceIndex; cdecl; external LibName;


{** Retrieve the index of the default output device. The result can be
 used in the outputDevice parameter to Pa_OpenStream().

 @return The default output device index for the default host API, or paNoDevice
 if no default output device is available or an error was encountered.

 @note
 On the PC, the user can specify a default device by
 setting an environment variable. For example, to use device #1.
<pre>
 set PA_RECOMMENDED_OUTPUT_DEVICE=1
</pre>
 The user should first determine the available device ids by using
 the supplied application "pa_devs".
*}
function Pa_GetDefaultOutputDevice(): TPaDeviceIndex; cdecl; external LibName;


{** The type used to represent monotonic time in seconds that can be used
 for synchronisation. The type is used for the outTime argument to the
 PaStreamCallback and as the result of Pa_GetStreamTime().
     
 @see PaStreamCallback, Pa_GetStreamTime
*}
type TPaTime = cdouble;


{** A type used to specify one or more sample formats. Each value indicates
 a possible format for sound data passed to and from the stream callback,
 Pa_ReadStream and Pa_WriteStream.

 The standard formats paFloat32, paInt16, paInt32, paInt24, paInt8
 and aUInt8 are usually implemented by all implementations.

 The floating point representation (paFloat32) uses +1.0 and -1.0 as the
 maximum and minimum respectively.

 paUInt8 is an unsigned 8 bit format where 128 is considered "ground"

 The paNonInterleaved flag indicates that a multichannel buffer is passed
 as a set of non-interleaved pointers.

 @see Pa_OpenStream, Pa_OpenDefaultStream, PaDeviceInfo
 @see paFloat32, paInt16, paInt32, paInt24, paInt8
 @see paUInt8, paCustomFormat, paNonInterleaved
*}
type TPaSampleFormat = culong;
const
  paFloat32        = TPaSampleFormat($00000001); {**< @see PaSampleFormat *}
  paInt32          = TPaSampleFormat($00000002); {**< @see PaSampleFormat *}
  paInt24          = TPaSampleFormat($00000004); {**< Packed 24 bit format. @see PaSampleFormat *}
  paInt16          = TPaSampleFormat($00000008); {**< @see PaSampleFormat *}
  paInt8           = TPaSampleFormat($00000010); {**< @see PaSampleFormat *}
  paUInt8          = TPaSampleFormat($00000020); {**< @see PaSampleFormat *}
  paCustomFormat   = TPaSampleFormat($00010000); {**< @see PaSampleFormat *}
  paNonInterleaved = TPaSampleFormat($80000000);

{** A structure providing information and capabilities of PortAudio devices.
 Devices may support input, output or both input and output.
*}
type
  PPaDeviceInfo = ^TPaDeviceInfo;
  TPaDeviceInfo = record
      structVersion: cint;  {* this is struct version 2 *}
      name: PChar;
      hostApi: TPaHostApiIndex; {* note this is a host API index, not a type id*}

      maxInputChannels: cint;
      maxOutputChannels: cint;

      {* Default latency values for interactive performance. *}
      defaultLowInputLatency: TPaTime;
      defaultLowOutputLatency: TPaTime;
      {* Default latency values for robust non-interactive applications (eg. playing sound files). *}
      defaultHighInputLatency: TPaTime;
      defaultHighOutputLatency: TPaTime;

      defaultSampleRate: cdouble;
  end;


{** Retrieve a pointer to a PaDeviceInfo structure containing information
 about the specified device.
 @return A pointer to an immutable PaDeviceInfo structure. If the device
 parameter is out of range the function returns NULL.

 @param device A valid device index in the range 0 to (Pa_GetDeviceCount()-1)

 @note PortAudio manages the memory referenced by the returned pointer,
 the client must not manipulate or free the memory. The pointer is only
 guaranteed to be valid between calls to Pa_Initialize() and Pa_Terminate().

 @see PaDeviceInfo, PaDeviceIndex
*}
function Pa_GetDeviceInfo( device: TPaDeviceIndex ): PPaDeviceInfo; cdecl; external LibName;


{** Parameters for one direction (input or output) of a stream.
*}
type
  PPaStreamParameters = ^TPaStreamParameters;
  TPaStreamParameters = record
      {** A valid device index in the range 0 to (Pa_GetDeviceCount()-1)
       specifying the device to be used or the special constant
       paUseHostApiSpecificDeviceSpecification which indicates that the actual
       device(s) to use are specified in hostApiSpecificStreamInfo.
       This field must not be set to paNoDevice.
      *}
      device: TPaDeviceIndex;

      {** The number of channels of sound to be delivered to the
       stream callback or accessed by Pa_ReadStream() or Pa_WriteStream().
       It can range from 1 to the value of maxInputChannels in the
       PaDeviceInfo record for the device specified by the device parameter.
      *}
      channelCount: cint;

      {** The sample format of the buffer provided to the stream callback,
       a_ReadStream() or Pa_WriteStream(). It may be any of the formats described
       by the PaSampleFormat enumeration.
      *}
      sampleFormat: TPaSampleFormat;

      {** The desired latency in seconds. Where practical, implementations should
       configure their latency based on these parameters, otherwise they may
       choose the closest viable latency instead. Unless the suggested latency
       is greater than the absolute upper limit for the device implementations
       should round the suggestedLatency up to the next practical value - ie to
       provide an equal or higher latency than suggestedLatency wherever possible.
       Actual latency values for an open stream may be retrieved using the
       inputLatency and outputLatency fields of the PaStreamInfo structure
       returned by Pa_GetStreamInfo().
       @see default*Latency in PaDeviceInfo, *Latency in PaStreamInfo
      *}
      suggestedLatency: TPaTime;

      {** An optional pointer to a host api specific data structure
       containing additional information for device setup and/or stream processing.
       hostApiSpecificStreamInfo is never required for correct operation,
       if not used it should be set to NULL.
      *}
      hostApiSpecificStreamInfo: Pointer;
  end;


{** Return code for Pa_IsFormatSupported indicating success. *}
const paFormatIsSupported = (0);

{** Determine whether it would be possible to open a stream with the specified
 parameters.

 @param inputParameters A structure that describes the input parameters used to
 open a stream. The suggestedLatency field is ignored. See PaStreamParameters
 for a description of these parameters. inputParameters must be NULL for
 output-only streams.

 @param outputParameters A structure that describes the output parameters used
 to open a stream. The suggestedLatency field is ignored. See PaStreamParameters
 for a description of these parameters. outputParameters must be NULL for
 input-only streams.

 @param sampleRate The required sampleRate. For full-duplex streams it is the
 sample rate for both input and output

 @return Returns 0 if the format is supported, and an error code indicating why
 the format is not supported otherwise. The constant paFormatIsSupported is
 provided to compare with the return value for success.

 @see paFormatIsSupported, PaStreamParameters
*}
function Pa_IsFormatSupported( inputParameters: PPaStreamParameters;
                              outputParameters: PPaStreamParameters;
                              sampleRate: cdouble ): TPaError; cdecl; external LibName;


{* Streaming types and functions *}


{**
 A single PaStream can provide multiple channels of real-time
 streaming audio input and output to a client application. A stream
 provides access to audio hardware represented by one or more
 PaDevices. Depending on the underlying Host API, it may be possible 
 to open multiple streams using the same device, however this behavior 
 is implementation defined. Portable applications should assume that 
 a PaDevice may be simultaneously used by at most one PaStream.

 Pointers to PaStream objects are passed between PortAudio functions that
 operate on streams.

 @see Pa_OpenStream, Pa_OpenDefaultStream, Pa_OpenDefaultStream, Pa_CloseStream,
 Pa_StartStream, Pa_StopStream, Pa_AbortStream, Pa_IsStreamActive,
 Pa_GetStreamTime, Pa_GetStreamCpuLoad

*}
type
  PPaStream = Pointer;

{** Can be passed as the framesPerBuffer parameter to Pa_OpenStream()
 or Pa_OpenDefaultStream() to indicate that the stream callback will
 accept buffers of any size.
*}
const paFramesPerBufferUnspecified = (0);


{** Flags used to control the behavior of a stream. They are passed as
 parameters to Pa_OpenStream or Pa_OpenDefaultStream. Multiple flags may be
 ORed together.

 @see Pa_OpenStream, Pa_OpenDefaultStream
 @see paNoFlag, paClipOff, paDitherOff, paNeverDropInput,
  paPrimeOutputBuffersUsingStreamCallback, paPlatformSpecificFlags
*}
type TPaStreamFlags = culong;

{** @see PaStreamFlags *}
const   paNoFlag          = TPaStreamFlags(0);

{** Disable default clipping of out of range samples.
 @see PaStreamFlags
*}
const   paClipOff         = TPaStreamFlags($00000001);

{** Disable default dithering.
 @see PaStreamFlags
*}
const   paDitherOff       = TPaStreamFlags($00000002);

{** Flag requests that where possible a full duplex stream will not discard
 overflowed input samples without calling the stream callback. This flag is
 only valid for full duplex callback streams and only when used in combination
 with the paFramesPerBufferUnspecified (0) framesPerBuffer parameter. Using
 this flag incorrectly results in a paInvalidFlag error being returned from
 Pa_OpenStream and Pa_OpenDefaultStream.

 @see PaStreamFlags, paFramesPerBufferUnspecified
*}
const   paNeverDropInput  = TPaStreamFlags($00000004);

{** Call the stream callback to fill initial output buffers, rather than the
 default behavior of priming the buffers with zeros (silence). This flag has
 no effect for input-only and blocking read/write streams.
 
 @see PaStreamFlags
*}
const   paPrimeOutputBuffersUsingStreamCallback = TPaStreamFlags($00000008);

{** A mask specifying the platform specific bits.
 @see PaStreamFlags
*}
const   paPlatformSpecificFlags = TPaStreamFlags($FFFF0000);

{**
 Timing information for the buffers passed to the stream callback.
*}
type
  PPaStreamCallbackTimeInfo = ^TPaStreamCallbackTimeInfo;
  TPaStreamCallbackTimeInfo = record
      inputBufferAdcTime: TPaTime;
      currentTime: TPaTime;
      outputBufferDacTime: TPaTime;
  end;


{**
 Flag bit constants for the statusFlags to PaStreamCallback.

 @see paInputUnderflow, paInputOverflow, paOutputUnderflow, paOutputOverflow,
 paPrimingOutput
*}
type TPaStreamCallbackFlags = culong;

{** In a stream opened with paFramesPerBufferUnspecified, indicates that
 input data is all silence (zeros) because no real data is available. In a
 stream opened without paFramesPerBufferUnspecified, it indicates that one or
 more zero samples have been inserted into the input buffer to compensate
 for an input underflow.
 @see PaStreamCallbackFlags
*}
const paInputUnderflow   = TPaStreamCallbackFlags($00000001);

{** In a stream opened with paFramesPerBufferUnspecified, indicates that data
 prior to the first sample of the input buffer was discarded due to an
 overflow, possibly because the stream callback is using too much CPU time.
 Otherwise indicates that data prior to one or more samples in the
 input buffer was discarded.
 @see PaStreamCallbackFlags
*}
const paInputOverflow    = TPaStreamCallbackFlags($00000002);

{** Indicates that output data (or a gap) was inserted, possibly because the
 stream callback is using too much CPU time.
 @see PaStreamCallbackFlags
*}
const paOutputUnderflow  = TPaStreamCallbackFlags($00000004);

{** Indicates that output data will be discarded because no room is available.
 @see PaStreamCallbackFlags
*}
const paOutputOverflow   = TPaStreamCallbackFlags($00000008);

{** Some of all of the output data will be used to prime the stream, input
 data may be zero.
 @see PaStreamCallbackFlags
*}
const paPrimingOutput    = TPaStreamCallbackFlags($00000010);

{**
 Allowable return values for the PaStreamCallback.
 @see PaStreamCallback
*}
type TPaStreamCallbackResult = {enum}cint; const
{enum_begin PaStreamCallbackResult}
    paContinue=0;
    paComplete=1;
    paAbort=2;
{enum_end PaStreamCallbackResult}

{**
 Functions of type PaStreamCallback are implemented by PortAudio clients.
 They consume, process or generate audio in response to requests from an
 active PortAudio stream.
     
 @param input and @param output are arrays of interleaved samples,
 the format, packing and number of channels used by the buffers are
 determined by parameters to Pa_OpenStream().
     
 @param frameCount The number of sample frames to be processed by
 the stream callback.

 @param timeInfo The time in seconds when the first sample of the input
 buffer was received at the audio input, the time in seconds when the first
 sample of the output buffer will begin being played at the audio output, and
 the time in seconds when the stream callback was called.
 See also Pa_GetStreamTime()

 @param statusFlags Flags indicating whether input and/or output buffers
 have been inserted or will be dropped to overcome underflow or overflow
 conditions.

 @param userData The value of a user supplied pointer passed to
 Pa_OpenStream() intended for storing synthesis data etc.

 @return
 The stream callback should return one of the values in the
 PaStreamCallbackResult enumeration. To ensure that the callback continues
 to be called, it should return paContinue (0). Either paComplete or paAbort
 can be returned to finish stream processing, after either of these values is
 returned the callback will not be called again. If paAbort is returned the
 stream will finish as soon as possible. If paComplete is returned, the stream
 will continue until all buffers generated by the callback have been played.
 This may be useful in applications such as soundfile players where a specific
 duration of output is required. However, it is not necessary to utilise this
 mechanism as Pa_StopStream(), Pa_AbortStream() or Pa_CloseStream() can also
 be used to stop the stream. The callback must always fill the entire output
 buffer irrespective of its return value.

 @see Pa_OpenStream, Pa_OpenDefaultStream

 @note With the exception of Pa_GetStreamCpuLoad() it is not permissible to call
 PortAudio API functions from within the stream callback.
*}
type
  PPaStreamCallback = ^TPaStreamCallback;
  TPaStreamCallback = function(
      input: Pointer; output: Pointer;
      frameCount: culong;
      timeInfo: PPaStreamCallbackTimeInfo;
      statusFlags: TPaStreamCallbackFlags;
      userData: Pointer ): cint; cdecl;


{** Opens a stream for either input, output or both.
     
 @param stream The address of a PaStream pointer which will receive
 a pointer to the newly opened stream.
     
 @param inputParameters A structure that describes the input parameters used by
 the opened stream. See PaStreamParameters for a description of these parameters.
 inputParameters must be NULL for output-only streams.

 @param outputParameters A structure that describes the output parameters used by
 the opened stream. See PaStreamParameters for a description of these parameters.
 outputParameters must be NULL for input-only streams.
 
 @param sampleRate The desired sampleRate. For full-duplex streams it is the
 sample rate for both input and output

 @param framesPerBuffer The number of frames passed to the stream callback
 function, or the preferred block granularity for a blocking read/write stream.
 The special value paFramesPerBufferUnspecified (0) may be used to request that
 the stream callback will receive an optimal (and possibly varying) number of
 frames based on host requirements and the requested latency settings.
 Note: With some host APIs, the use of non-zero framesPerBuffer for a callback
 stream may introduce an additional layer of buffering which could introduce
 additional latency. PortAudio guarantees that the additional latency
 will be kept to the theoretical minimum however, it is strongly recommended
 that a non-zero framesPerBuffer value only be used when your algorithm
 requires a fixed number of frames per stream callback.
 
 @param streamFlags Flags which modify the behaviour of the streaming process.
 This parameter may contain a combination of flags ORed together. Some flags may
 only be relevant to certain buffer formats.
     
 @param streamCallback A pointer to a client supplied function that is responsible
 for processing and filling input and output buffers. If this parameter is NULL
 the stream will be opened in 'blocking read/write' mode. In blocking mode,
 the client can receive sample data using Pa_ReadStream and write sample data
 using Pa_WriteStream, the number of samples that may be read or written
 without blocking is returned by Pa_GetStreamReadAvailable and
 Pa_GetStreamWriteAvailable respectively.

 @param userData A client supplied pointer which is passed to the stream callback
 function. It could for example, contain a pointer to instance data necessary
 for processing the audio buffers. This parameter is ignored if streamCallback
 is NULL.
     
 @return
 Upon success Pa_OpenStream() returns paNoError and places a pointer to a
 valid PaStream in the stream argument. The stream is inactive (stopped).
 If a call to Pa_OpenStream() fails, a non-zero error code is returned (see
 PaError for possible error codes) and the value of stream is invalid.

 @see PaStreamParameters, PaStreamCallback, Pa_ReadStream, Pa_WriteStream,
 Pa_GetStreamReadAvailable, Pa_GetStreamWriteAvailable
*}
function Pa_OpenStream( var stream: PPaStream;
                       inputParameters: PPaStreamParameters;
                       outputParameters: PPaStreamParameters;
                       sampleRate: cdouble;
                       framesPerBuffer: culong;
                       streamFlags: TPaStreamFlags;
                       streamCallback: PPaStreamCallback;
                       userData: Pointer ): TPaError; cdecl; external LibName;


{** A simplified version of Pa_OpenStream() that opens the default input
 and/or output devices.

 @param stream The address of a PaStream pointer which will receive
 a pointer to the newly opened stream.
 
 @param numInputChannels  The number of channels of sound that will be supplied
 to the stream callback or returned by Pa_ReadStream. It can range from 1 to
 the value of maxInputChannels in the PaDeviceInfo record for the default input
 device. If 0 the stream is opened as an output-only stream.

 @param numOutputChannels The number of channels of sound to be delivered to the
 stream callback or passed to Pa_WriteStream. It can range from 1 to the value
 of maxOutputChannels in the PaDeviceInfo record for the default output dvice.
 If 0 the stream is opened as an output-only stream.

 @param sampleFormat The sample format of both the input and output buffers
 provided to the callback or passed to and from Pa_ReadStream and Pa_WriteStream.
 sampleFormat may be any of the formats described by the PaSampleFormat
 enumeration.
 
 @param sampleRate Same as Pa_OpenStream parameter of the same name.
 @param framesPerBuffer Same as Pa_OpenStream parameter of the same name.
 @param streamCallback Same as Pa_OpenStream parameter of the same name.
 @param userData Same as Pa_OpenStream parameter of the same name.

 @return As for Pa_OpenStream

 @see Pa_OpenStream, PaStreamCallback
*}
function Pa_OpenDefaultStream( var stream: PPaStream;
                              numInputChannels: cint;
                              numOutputChannels: cint;
                              sampleFormat: TPaSampleFormat;
                              sampleRate: cdouble;
                              framesPerBuffer: culong;
                              streamCallback: PPaStreamCallback;
                              userData: Pointer ): TPaError; cdecl; external LibName;


{** Closes an audio stream. If the audio stream is active it
 discards any pending buffers as if Pa_AbortStream() had been called.
*}
function Pa_CloseStream( stream: PPaStream ): TPaError; cdecl; external LibName;


{** Functions of type PaStreamFinishedCallback are implemented by PortAudio 
 clients. They can be registered with a stream using the Pa_SetStreamFinishedCallback
 function. Once registered they are called when the stream becomes inactive
 (ie once a call to Pa_StopStream() will not block).
 A stream will become inactive after the stream callback returns non-zero,
 or when Pa_StopStream or Pa_AbortStream is called. For a stream providing audio
 output, if the stream callback returns paComplete, or Pa_StopStream is called,
 the stream finished callback will not be called until all generated sample data
 has been played.
 
 @param userData The userData parameter supplied to Pa_OpenStream()

 @see Pa_SetStreamFinishedCallback
*}
type
  PPaStreamFinishedCallback = ^TPaStreamFinishedCallback;
  TPaStreamFinishedCallback = procedure( userData: Pointer ); cdecl;


{** Register a stream finished callback function which will be called when the 
 stream becomes inactive. See the description of PaStreamFinishedCallback for 
 further details about when the callback will be called.

 @param stream a pointer to a PaStream that is in the stopped state - if the
 stream is not stopped, the stream's finished callback will remain unchanged 
 and an error code will be returned.

 @param streamFinishedCallback a pointer to a function with the same signature
 as PaStreamFinishedCallback, that will be called when the stream becomes
 inactive. Passing NULL for this parameter will un-register a previously
 registered stream finished callback function.

 @return on success returns paNoError, otherwise an error code indicating the cause
 of the error.

 @see PaStreamFinishedCallback
*}
function Pa_SetStreamFinishedCallback( stream: PPaStream;
                streamFinishedCallback: PPaStreamFinishedCallback ): TPaError; cdecl; external LibName;


{** Commences audio processing.
*}
function Pa_StartStream( stream: PPaStream ): TPaError; cdecl; external LibName;


{** Terminates audio processing. It waits until all pending
 audio buffers have been played before it returns.
*}
function Pa_StopStream( stream: PPaStream ): TPaError; cdecl; external LibName;


{** Terminates audio processing immediately without waiting for pending
 buffers to complete.
*}
function Pa_AbortStream( stream: PPaStream ): TPaError; cdecl; external LibName;


{** Determine whether the stream is stopped.
 A stream is considered to be stopped prior to a successful call to
 Pa_StartStream and after a successful call to Pa_StopStream or Pa_AbortStream.
 If a stream callback returns a value other than paContinue the stream is NOT
 considered to be stopped.

 @return Returns one (1) when the stream is stopped, zero (0) when
 the stream is running or, a PaErrorCode (which are always negative) if
 PortAudio is not initialized or an error is encountered.

 @see Pa_StopStream, Pa_AbortStream, Pa_IsStreamActive
*}
function Pa_IsStreamStopped( stream: PPaStream ): TPaError; cdecl; external LibName;


{** Determine whether the stream is active.
 A stream is active after a successful call to Pa_StartStream(), until it
 becomes inactive either as a result of a call to Pa_StopStream() or
 Pa_AbortStream(), or as a result of a return value other than paContinue from
 the stream callback. In the latter case, the stream is considered inactive
 after the last buffer has finished playing.

 @return Returns one (1) when the stream is active (ie playing or recording
 audio), zero (0) when not playing or, a PaErrorCode (which are always negative)
 if PortAudio is not initialized or an error is encountered.

 @see Pa_StopStream, Pa_AbortStream, Pa_IsStreamStopped
*}
function Pa_IsStreamActive( stream: PPaStream ): TPaError; cdecl; external LibName;


{** A structure containing unchanging information about an open stream.
 @see Pa_GetStreamInfo
*}
type
  PPaStreamInfo = ^TPaStreamInfo;
  TPaStreamInfo = record
      {** this is struct version 1 *}
      structVersion: cint;

      {** The input latency of the stream in seconds. This value provides the most
       accurate estimate of input latency available to the implementation. It may
       differ significantly from the suggestedLatency value passed to Pa_OpenStream().
       The value of this field will be zero (0.) for output-only streams.
       @see PaTime
      *}
      inputLatency: TPaTime;

      {** The output latency of the stream in seconds. This value provides the most
       accurate estimate of output latency available to the implementation. It may
       differ significantly from the suggestedLatency value passed to Pa_OpenStream().
       The value of this field will be zero (0.) for input-only streams.
       @see PaTime
      *}
      outputLatency: TPaTime;

      {** The sample rate of the stream in Hertz (samples per second). In cases
       where the hardware sample rate is inaccurate and PortAudio is aware of it,
       the value of this field may be different from the sampleRate parameter
       passed to Pa_OpenStream(). If information about the actual hardware sample
       rate is not available, this field will have the same value as the sampleRate
       parameter passed to Pa_OpenStream().
      *}
      sampleRate: cdouble;
  end;


{** Retrieve a pointer to a PaStreamInfo structure containing information
 about the specified stream.
 @return A pointer to an immutable PaStreamInfo structure. If the stream
 parameter invalid, or an error is encountered, the function returns NULL.

 @param stream A pointer to an open stream previously created with Pa_OpenStream.

 @note PortAudio manages the memory referenced by the returned pointer,
 the client must not manipulate or free the memory. The pointer is only
 guaranteed to be valid until the specified stream is closed.

 @see PaStreamInfo
*}
function Pa_GetStreamInfo( stream: PPaStream ): PPaStreamInfo; cdecl; external LibName;


{** Determine the current time for the stream according to the same clock used
 to generate buffer timestamps. This time may be used for synchronising other
 events to the audio stream, for example synchronizing audio to MIDI.
                                        
 @return The stream's current time in seconds, or 0 if an error occurred.

 @see PaTime, PaStreamCallback
*}
function Pa_GetStreamTime( stream: PPaStream ): TPaTime; cdecl; external LibName;


{** Retrieve CPU usage information for the specified stream.
 The "CPU Load" is a fraction of total CPU time consumed by a callback stream's
 audio processing routines including, but not limited to the client supplied
 stream callback. This function does not work with blocking read/write streams.

 This function may be called from the stream callback function or the
 application.
     
 @return
 A floating point value, typically between 0.0 and 1.0, where 1.0 indicates
 that the stream callback is consuming the maximum number of CPU cycles possible
 to maintain real-time operation. A value of 0.5 would imply that PortAudio and
 the stream callback was consuming roughly 50% of the available CPU time. The
 return value may exceed 1.0. A value of 0.0 will always be returned for a
 blocking read/write stream, or if an error occurs.
*}
function Pa_GetStreamCpuLoad( stream: PPaStream ): cdouble; cdecl; external LibName;


{** Read samples from an input stream. The function doesn't return until
 the entire buffer has been filled - this may involve waiting for the operating
 system to supply the data.

 @param stream A pointer to an open stream previously created with Pa_OpenStream.
 
 @param buffer A pointer to a buffer of sample frames. The buffer contains
 samples in the format specified by the inputParameters->sampleFormat field
 used to open the stream, and the number of channels specified by
 inputParameters->numChannels. If non-interleaved samples were requested,
 buffer is a pointer to the first element of an array of non-interleaved
 buffer pointers, one for each channel.

 @param frames The number of frames to be read into buffer. This parameter
 is not constrained to a specific range, however high performance applications
 will want to match this parameter to the framesPerBuffer parameter used
 when opening the stream.

 @return On success PaNoError will be returned, or PaInputOverflowed if input
 data was discarded by PortAudio after the previous call and before this call.
*}
function Pa_ReadStream( stream: PPaStream;
                       buffer: Pointer;
                       frames: culong ): TPaError; cdecl; external LibName;


{** Write samples to an output stream. This function doesn't return until the
 entire buffer has been consumed - this may involve waiting for the operating
 system to consume the data.

 @param stream A pointer to an open stream previously created with Pa_OpenStream.

 @param buffer A pointer to a buffer of sample frames. The buffer contains
 samples in the format specified by the outputParameters->sampleFormat field
 used to open the stream, and the number of channels specified by
 outputParameters->numChannels. If non-interleaved samples were requested,
 buffer is a pointer to the first element of an array of non-interleaved
 buffer pointers, one for each channel.

 @param frames The number of frames to be written from buffer. This parameter
 is not constrained to a specific range, however high performance applications
 will want to match this parameter to the framesPerBuffer parameter used
 when opening the stream.

 @return On success PaNoError will be returned, or paOutputUnderflowed if
 additional output data was inserted after the previous call and before this
 call.
*}
function Pa_WriteStream( stream: PPaStream;
                        buffer: Pointer;
                        frames: culong ): TPaError; cdecl; external LibName;


{** Retrieve the number of frames that can be read from the stream without
 waiting.

 @return Returns a non-negative value representing the maximum number of frames
 that can be read from the stream without blocking or busy waiting or, a
 PaErrorCode (which are always negative) if PortAudio is not initialized or an
 error is encountered.
*}
function Pa_GetStreamReadAvailable( stream: PPaStream ): cslong; cdecl; external LibName;


{** Retrieve the number of frames that can be written to the stream without
 waiting.

 @return Returns a non-negative value representing the maximum number of frames
 that can be written to the stream without blocking or busy waiting or, a
 PaErrorCode (which are always negative) if PortAudio is not initialized or an
 error is encountered.
*}
function Pa_GetStreamWriteAvailable( stream: PPaStream ): cslong; cdecl; external LibName;


{** Retrieve the host type handling an open stream.

 @return Returns a non-negative value representing the host API type
 handling an open stream or, a PaErrorCode (which are always negative)
 if PortAudio is not initialized or an error is encountered.
*}
function Pa_GetStreamHostApiType( stream: PPaStream ): TPaHostApiTypeId; cdecl; external LibName;


{* Miscellaneous utilities *}


{** Retrieve the size of a given sample format in bytes.

 @return The size in bytes of a single sample in the specified format,
 or paSampleFormatNotSupported if the format is not supported.
*}
function Pa_GetSampleSize( format: TPaSampleFormat ): TPaError; cdecl; external LibName;


{** Put the caller to sleep for at least 'msec' milliseconds. This function is
 provided only as a convenience for authors of portable code (such as the tests
 and examples in the PortAudio distribution.)

 The function may sleep longer than requested so don't rely on this for accurate
 musical timing.
*}
procedure Pa_Sleep( msec: clong ); cdecl; external LibName;

implementation

end.


================================================
FILE: sherpa-onnx/pascal-api/sherpa_onnx.pas
================================================
{ Copyright (c)  2024  Xiaomi Corporation

Please see
https://github.com/k2-fsa/sherpa-onnx/tree/master/pascal-api-examples
for how to use APIs in this file.
}

unit sherpa_onnx;

{$IFDEF FPC}
  {$mode objfpc}
  {$modeSwitch advancedRecords} { to support records with methods }
{$ENDIF}

{$LongStrings ON}

interface
uses
  ctypes;

type
  TSherpaOnnxSamplesArray = array of Single;

  TSherpaOnnxLinearResampler = class
  private
    Handle: Pointer;
    InputSampleRate: Integer;
    OutputSampleRate: Integer;
  public
    constructor Create(SampleRateIn: Integer; SampleRateOut: Integer);
    destructor Destroy; override;

    function Resample(Samples: pcfloat;
      N: Integer; Flush: Boolean): TSherpaOnnxSamplesArray; overload;

    function Resample(const Samples: array of Single;
      Flush: Boolean): TSherpaOnnxSamplesArray; overload;

    procedure Reset;

    property GetInputSampleRate: Integer Read InputSampleRate;
    property GetOutputSampleRate: Integer Read OutputSampleRate;
  end;

  TSherpaOnnxGeneratedAudioCallbackWithArg = function(
      Samples: pcfloat; N: cint32;
      Arg: Pointer): cint32; cdecl;

  TSherpaOnnxGeneratedAudioProgressCallbackWithArg = function(
      Samples: pcfloat; N: cint32; P: cfloat;
      Arg: Pointer): cint32; cdecl;

  TSherpaOnnxOfflineTtsVitsModelConfig = record
    Model: AnsiString;
    Lexicon: AnsiString;
    Tokens: AnsiString;
    DataDir: AnsiString;
    NoiseScale: Single;
    NoiseScaleW: Single;
    LengthScale: Single;
    DictDir: AnsiString;

    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig);
  end;

  TSherpaOnnxGenerationConfig = record
    SilenceScale: Single;
    Speed: Single;
    Sid: Integer;
    ReferenceAudio: array of Single;
    ReferenceAudioLen: Integer;
    ReferenceSampleRate: Integer;
    ReferenceText: AnsiString;
    NumSteps: Integer;
    Extra: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxGenerationConfig);
  end;

  TSherpaOnnxOfflineTtsMatchaModelConfig = record
    AcousticModel: AnsiString;
    Vocoder: AnsiString;
    Lexicon: AnsiString;
    Tokens: AnsiString;
    DataDir: AnsiString;
    NoiseScale: Single;
    LengthScale: Single;
    DictDir: AnsiString;

    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig);
  end;

  TSherpaOnnxOfflineTtsKokoroModelConfig = record
    Model: AnsiString;
    Voices: AnsiString;
    Tokens: AnsiString;
    DataDir: AnsiString;
    LengthScale: Single;
    DictDir: AnsiString;
    Lexicon: AnsiString;
    Lang: AnsiString;

    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKokoroModelConfig);
  end;

  TSherpaOnnxOfflineTtsKittenModelConfig = record
    Model: AnsiString;
    Voices: AnsiString;
    Tokens: AnsiString;
    DataDir: AnsiString;
    LengthScale: Single;

    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKittenModelConfig);
  end;

  TSherpaOnnxOfflineTtsZipVoiceModelConfig = record
    Tokens: AnsiString;
    Encoder: AnsiString;
    Decoder: AnsiString;
    Vocoder: AnsiString;
    DataDir: AnsiString;
    Lexicon: AnsiString;
    FeatScale: Single;
    Tshift: Single;
    TargetRms: Single;
    GuidanceScale: Single;

    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsZipVoiceModelConfig);
  end;

  TSherpaOnnxOfflineTtsPocketModelConfig = record
    LmFlow: AnsiString;
    LmMain: AnsiString;
    Encoder: AnsiString;
    Decoder: AnsiString;
    TextConditioner: AnsiString;
    VocabJson: AnsiString;
    TokenScoresJson: AnsiString;
    VoiceEmbeddingCacheCapacity: Integer;

    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsPocketModelConfig);
  end;

  TSherpaOnnxOfflineTtsSupertonicModelConfig = record
    DurationPredictor: AnsiString;
    TextEncoder: AnsiString;
    VectorEstimator: AnsiString;
    Vocoder: AnsiString;
    TtsJson: AnsiString;
    UnicodeIndexer: AnsiString;
    VoiceStyle: AnsiString;

    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineTtsModelConfig = record
    Vits: TSherpaOnnxOfflineTtsVitsModelConfig;
    NumThreads: Integer;
    Debug: Boolean;
    Provider: AnsiString;
    Matcha: TSherpaOnnxOfflineTtsMatchaModelConfig;
    Kokoro: TSherpaOnnxOfflineTtsKokoroModelConfig;
    Kitten: TSherpaOnnxOfflineTtsKittenModelConfig;
    ZipVoice: TSherpaOnnxOfflineTtsZipVoiceModelConfig;
    Pocket: TSherpaOnnxOfflineTtsPocketModelConfig;
    Supertonic: TSherpaOnnxOfflineTtsSupertonicModelConfig;

    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig);
  end;

  TSherpaOnnxOfflineTtsConfig = record
    Model: TSherpaOnnxOfflineTtsModelConfig;
    RuleFsts: AnsiString;
    MaxNumSentences: Integer;
    RuleFars: AnsiString;
    SilenceScale: Single;

    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsConfig);
  end;

  TSherpaOnnxGeneratedAudio = record
    Samples: array of Single;
    SampleRate: Integer;
  end;

  TSherpaOnnxOfflineTts = class
  private
   Handle: Pointer;
   SampleRate: Integer;
   NumSpeakers: Integer;
   _Config: TSherpaOnnxOfflineTtsConfig;
  public
    constructor Create(Config: TSherpaOnnxOfflineTtsConfig);
    destructor Destroy; override;

    function Generate(Text: AnsiString; SpeakerId: Integer;
      Speed: Single): TSherpaOnnxGeneratedAudio; overload;

    function Generate(Text: AnsiString; SpeakerId: Integer;
      Speed: Single;
      Callback: TSherpaOnnxGeneratedAudioCallbackWithArg;
      Arg: Pointer
      ): TSherpaOnnxGeneratedAudio; overload;

    function Generate(Text: AnsiString;
      GenerationConfig: TSherpaOnnxGenerationConfig;
      Callback: TSherpaOnnxGeneratedAudioProgressCallbackWithArg;
      Arg: Pointer
      ): TSherpaOnnxGeneratedAudio; overload;

    property GetHandle: Pointer Read Handle;
    property GetSampleRate: Integer Read SampleRate;
    property GetNumSpeakers: Integer Read NumSpeakers;
  end;

  TSherpaOnnxWave = record
    Samples: array of Single; { normalized to the range [-1, 1] }
    SampleRate: Integer;
  end;

  TSherpaOnnxOnlineTransducerModelConfig = record
    Encoder: AnsiString;
    Decoder: AnsiString;
    Joiner: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOnlineParaformerModelConfig = record
    Encoder: AnsiString;
    Decoder: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOnlineZipformer2CtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOnlineNemoCtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOnlineToneCtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOnlineModelConfig = record
    Transducer: TSherpaOnnxOnlineTransducerModelConfig;
    Paraformer: TSherpaOnnxOnlineParaformerModelConfig;
    Zipformer2Ctc: TSherpaOnnxOnlineZipformer2CtcModelConfig;
    Tokens: AnsiString;
    NumThreads: Integer;
    Provider: AnsiString;
    Debug: Boolean;
    ModelType: AnsiString;
    ModelingUnit: AnsiString;
    BpeVocab: AnsiString;
    TokensBuf: AnsiString;
    TokensBufSize: Integer;
    NemoCtc: TSherpaOnnxOnlineNemoCtcModelConfig;
    ToneCtc: TSherpaOnnxOnlineToneCtcModelConfig;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig);
  end;

  TSherpaOnnxFeatureConfig = record
    SampleRate: Integer;
    FeatureDim: Integer;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFeatureConfig);
  end;

  TSherpaOnnxOnlineCtcFstDecoderConfig = record
    Graph: AnsiString;
    MaxActive: Integer;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineCtcFstDecoderConfig);
  end;

  TSherpaOnnxHomophoneReplacerConfig = record
    DictDir: AnsiString;
    Lexicon: AnsiString;
    RuleFsts: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOnlineRecognizerConfig = record
    FeatConfig: TSherpaOnnxFeatureConfig;
    ModelConfig: TSherpaOnnxOnlineModelConfig;
    DecodingMethod: AnsiString;
    MaxActivePaths: Integer;
    EnableEndpoint: Boolean;
    Rule1MinTrailingSilence: Single;
    Rule2MinTrailingSilence: Single;
    Rule3MinUtteranceLength: Single;
    HotwordsFile: AnsiString;
    HotwordsScore: Single;
    CtcFstDecoderConfig: TSherpaOnnxOnlineCtcFstDecoderConfig;
    RuleFsts: AnsiString;
    RuleFars: AnsiString;
    BlankPenalty: Single;
    HotwordsBuf: AnsiString;
    HotwordsBufSize: Integer;
    Hr: TSherpaOnnxHomophoneReplacerConfig;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineRecognizerConfig);
  end;

  TSherpaOnnxOnlineRecognizerResult = record
    Text: AnsiString;
    Tokens: array of AnsiString;
    Timestamps: array of Single;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOnlineStream = class
  private
   Handle: Pointer;
  public
    constructor Create(P: Pointer);
    destructor Destroy; override;
    procedure AcceptWaveform(const Samples: array of Single; SampleRate: Integer);
    procedure InputFinished;
    property GetHandle: Pointer Read Handle;
  end;

  TSherpaOnnxOnlineRecognizer = class
  private
   Handle: Pointer;
   _Config: TSherpaOnnxOnlineRecognizerConfig;
  public
    constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig);
    destructor Destroy; override;

    function CreateStream: TSherpaOnnxOnlineStream; overload;
    function CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; overload;
    function IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
    procedure Decode(Stream: TSherpaOnnxOnlineStream);
    procedure Reset(Stream: TSherpaOnnxOnlineStream);
    function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
    function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
    property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config;
    property GetHandle: Pointer Read Handle;
  end;

  TSherpaOnnxOfflineTransducerModelConfig = record
    Encoder: AnsiString;
    Decoder: AnsiString;
    Joiner: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineParaformerModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineNemoEncDecCtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineDolphinModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineZipformerCtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineWenetCtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineOmnilingualAsrCtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineMedAsrCtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineFireRedAsrCtcModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineFunAsrNanoModelConfig = record
    EncoderAdaptor: AnsiString;
    LLM: AnsiString;
    Embedding: AnsiString;
    Tokenizer: AnsiString;
    SystemPrompt: AnsiString;
    UserPrompt: AnsiString;
    MaxNewTokens: Integer;
    Temperature: Single;
    TopP: Single;
    Seed: Integer;
    Language: AnsiString;
    UseItn: Boolean;
    Hotwords: AnsiString;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineFunAsrNanoModelConfig);
  end;

  TSherpaOnnxOfflineWhisperModelConfig = record
    Encoder: AnsiString;
    Decoder: AnsiString;
    Language: AnsiString;
    Task: AnsiString;
    TailPaddings: Integer;
    EnableTokenTimestamps: Boolean;
    EnableSegmentTimestamps: Boolean;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
  end;

  TSherpaOnnxOfflineCanaryModelConfig = record
    Encoder: AnsiString;
    Decoder: AnsiString;
    SrcLang: AnsiString;
    TgtLang: AnsiString;
    UsePnc: Boolean;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig);
  end;

  TSherpaOnnxOfflineMoonshineModelConfig = record
    Preprocessor: AnsiString;
    Encoder: AnsiString;
    UncachedDecoder: AnsiString;
    CachedDecoder: AnsiString;
    MergedDecoder: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineFireRedAsrModelConfig = record
    Encoder: AnsiString;
    Decoder: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineTdnnModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineLMConfig = record
    Model: AnsiString;
    Scale: Single;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig);
  end;

  TSherpaOnnxOfflineSenseVoiceModelConfig = record
    Model: AnsiString;
    Language: AnsiString;
    UseItn: Boolean;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSenseVoiceModelConfig);
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineModelConfig = record
    Transducer: TSherpaOnnxOfflineTransducerModelConfig;
    Paraformer: TSherpaOnnxOfflineParaformerModelConfig;
    NeMoCtc: TSherpaOnnxOfflineNemoEncDecCtcModelConfig;
    Whisper: TSherpaOnnxOfflineWhisperModelConfig;
    Tdnn: TSherpaOnnxOfflineTdnnModelConfig;
    Tokens: AnsiString;
    NumThreads: Integer;
    Debug: Boolean;
    Provider: AnsiString;
    ModelType: AnsiString;
    ModelingUnit: AnsiString;
    BpeVocab: AnsiString;
    TeleSpeechCtc: AnsiString;
    SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
    Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
    FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
    Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
    ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig;
    Canary: TSherpaOnnxOfflineCanaryModelConfig;
    WenetCtc: TSherpaOnnxOfflineWenetCtcModelConfig;
    Omnilingual: TSherpaOnnxOfflineOmnilingualAsrCtcModelConfig;
    MedAsr: TSherpaOnnxOfflineMedAsrCtcModelConfig;
    FunAsrNano: TSherpaOnnxOfflineFunAsrNanoModelConfig;
    FireRedAsrCtc: TSherpaOnnxOfflineFireRedAsrCtcModelConfig;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineRecognizerConfig = record
    FeatConfig: TSherpaOnnxFeatureConfig;
    ModelConfig: TSherpaOnnxOfflineModelConfig;
    LMConfig: TSherpaOnnxOfflineLMConfig;
    DecodingMethod: AnsiString;
    MaxActivePaths: Integer;
    HotwordsFile: AnsiString;
    HotwordsScore: Single;
    RuleFsts: AnsiString;
    RuleFars: AnsiString;
    BlankPenalty: Single;
    Hr: TSherpaOnnxHomophoneReplacerConfig;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineRecognizerConfig);
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineRecognizerResult = record
    Text: AnsiString;
    Tokens: array of AnsiString;
    Timestamps: array of Single;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineStream = class
  private
   Handle: Pointer;
  public
    constructor Create(P: Pointer);
    destructor Destroy; override;
    procedure AcceptWaveform(const Samples: array of Single; SampleRate: Integer);
    property GetHandle: Pointer Read Handle;
  end;

  TSherpaOnnxOfflineRecognizer = class
  private
   Handle: Pointer;
   _Config: TSherpaOnnxOfflineRecognizerConfig;
  public
    constructor Create(Config: TSherpaOnnxOfflineRecognizerConfig);
    destructor Destroy; override;
    function CreateStream: TSherpaOnnxOfflineStream;
    procedure Decode(Stream: TSherpaOnnxOfflineStream);
    procedure SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig);
    function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
    property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
    property GetHandle: Pointer Read Handle;
  end;

  TSherpaOnnxSileroVadModelConfig = record
    Model: AnsiString;
    Threshold: Single;
    MinSilenceDuration: Single;
    MinSpeechDuration: Single;
    WindowSize: Integer;
    MaxSpeechDuration: Single;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSileroVadModelConfig);
  end;

  TSherpaOnnxTenVadModelConfig = record
    Model: AnsiString;
    Threshold: Single;
    MinSilenceDuration: Single;
    MinSpeechDuration: Single;
    WindowSize: Integer;
    MaxSpeechDuration: Single;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxTenVadModelConfig);
  end;

  TSherpaOnnxVadModelConfig = record
    SileroVad: TSherpaOnnxSileroVadModelConfig;
    SampleRate: Integer;
    NumThreads: Integer;
    Provider: AnsiString;
    Debug: Boolean;
    TenVad: TSherpaOnnxTenVadModelConfig;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxVadModelConfig);
  end;


  TSherpaOnnxCircularBuffer = class
  private
    Handle: Pointer;
  public
    constructor Create(Capacity: Integer);
    destructor Destroy; override;
    procedure Push(Samples: array of Single); overload;
    procedure Push(Samples: pcfloat; N: Integer); overload;
    function Get(StartIndex: Integer; N: Integer): TSherpaOnnxSamplesArray;
    procedure Pop(N: Integer);
    procedure Reset;
    function Size: Integer;
    function Head: Integer;
    property GetHandle: Pointer Read Handle;
  end;

  TSherpaOnnxSpeechSegment = record
    Samples: array of Single;
    Start: Integer;
  end;

  TSherpaOnnxVoiceActivityDetector = class
  private
    Handle: Pointer;
    _Config: TSherpaOnnxVadModelConfig;
  public
    constructor Create(Config: TSherpaOnnxVadModelConfig; BufferSizeInSeconds: Single);
    destructor Destroy; override;
    procedure AcceptWaveform(const Samples: array of Single); overload;
    procedure AcceptWaveform(const Samples: array of Single; Offset: Integer; N: Integer); overload;
    function IsEmpty: Boolean;
    function IsDetected: Boolean;
    procedure Pop;
    procedure Clear;
    function Front: TSherpaOnnxSpeechSegment;
    procedure Reset;
    procedure Flush;
    property Config: TSherpaOnnxVadModelConfig Read _Config;
    property GetHandle: Pointer Read Handle;
  end;


  TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineSpeakerSegmentationModelConfig = record
    Pyannote: TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig;
    NumThreads: Integer;
    Debug: Boolean;
    Provider: AnsiString;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerSegmentationModelConfig);
  end;

  TSherpaOnnxFastClusteringConfig = record
    NumClusters: Integer;
    Threshold: Single;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFastClusteringConfig);
  end;

  TSherpaOnnxSpeakerEmbeddingExtractorConfig = record
    Model: AnsiString;
    NumThreads: Integer;
    Debug: Boolean;
    Provider: AnsiString;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSpeakerEmbeddingExtractorConfig);
  end;

  TSherpaOnnxOfflineSpeakerDiarizationConfig = record
    Segmentation: TSherpaOnnxOfflineSpeakerSegmentationModelConfig;
    Embedding: TSherpaOnnxSpeakerEmbeddingExtractorConfig;
    Clustering: TSherpaOnnxFastClusteringConfig;
    MinDurationOn: Single;
    MinDurationOff: Single;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerDiarizationConfig);
  end;

  TSherpaOnnxOfflineSpeakerDiarizationSegment = record
    Start: Single;
    Stop: Single;
    Speaker: Integer;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineSpeakerDiarizationSegmentArray = array of TSherpaOnnxOfflineSpeakerDiarizationSegment;

  PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg = ^TSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg;

  TSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg = function(
      NumProcessChunks: cint32;
      NumTotalChunks: cint32): cint32; cdecl;

  TSherpaOnnxOfflineSpeakerDiarization = class
  private
    Handle: Pointer;
    SampleRate: Integer;
    _Config: TSherpaOnnxOfflineSpeakerDiarizationConfig;
  public
    constructor Create(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig);
    destructor Destroy; override;
    procedure SetConfig(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig);
    function Process(const Samples: array of Single): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; overload;
    function Process(const Samples: array of Single; Callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; overload;
    property GetHandle: Pointer Read Handle;
    property GetSampleRate: Integer Read SampleRate;
  end;

  TSherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig = record
    Model: AnsiString;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOfflineSpeechDenoiserModelConfig = record
    Gtcrn: TSherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig;
    DpdfNet: TSherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig;
    NumThreads: Integer;
    Debug: Boolean;
    Provider: AnsiString;
    function ToString: AnsiString;
    class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeechDenoiserModelConfig);
  end;

  TSherpaOnnxOfflineSpeechDenoiserConfig = record
    Model: TSherpaOnnxOfflineSpeechDenoiserModelConfig;
    function ToString: AnsiString;
  end;

  TSherpaOnnxOnlineSpeechDenoiserConfig = record
    Model: TSherpaOnnxOfflineSpeechDenoiserModelConfig;
    function ToString: AnsiString;
  end;

  TSherpaOnnxDenoisedAudio = record
    Samples: array of Single;
    SampleRate: Integer;
  end;

  TSherpaOnnxOfflineSpeechDenoiser = class
  private
   Handle: Pointer;
   SampleRate: Integer;
   _Config: TSherpaOnnxOfflineSpeechDenoiserConfig;
  public
    constructor Create(Config: TSherpaOnnxOfflineSpeechDenoiserConfig);
    destructor Destroy; override;

    function Run(const Samples: array of Single; InputSampleRate: Integer): TSherpaOnnxDenoisedAudio;

    property GetHandle: Pointer Read Handle;
    property GetSampleRate: Integer Read SampleRate;
  end;

  TSherpaOnnxOnlineSpeechDenoiser = class
  private
   Handle: Pointer;
   SampleRate: Integer;
   FrameShiftInSamples: Integer;
   _Config: TSherpaOnnxOnlineSpeechDenoiserConfig;
  public
    constructor Create(Config: TSherpaOnnxOnlineSpeechDenoiserConfig);
    destructor Destroy; override;

    function Run(const Samples: array of Single; InputSampleRate: Integer): TSherpaOnnxDenoisedAudio;
    function Flush: TSherpaOnnxDenoisedAudio;
    procedure Reset;

    property GetHandle: Pointer Read Handle;
    property GetSampleRate: Integer Read SampleRate;
    property GetFrameShiftInSamples: Integer Read FrameShiftInSamples;
  end;

  { It supports reading a single channel wave with 16-bit encoded samples.
    Samples are normalized to the range [-1, 1].
  }
  function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;

  function SherpaOnnxWriteWave(Filename: AnsiString;
    const Samples: array of Single; SampleRate: Integer): Boolean;

  function SherpaOnnxGetVersionStr(): AnsiString;
  function SherpaOnnxGetGitSha1(): AnsiString;
  function SherpaOnnxGetGitDate(): AnsiString;

implementation

uses
  Math,
  fpjson,
    { See
      - https://wiki.freepascal.org/fcl-json
      - https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
    }
  jsonparser,
  SysUtils;

const
  {
  See
   - https://www.freepascal.org/docs-html/prog/progap7.html
   - https://downloads.freepascal.org/fpc/docs-pdf/
   - https://downloads.freepascal.org/fpc/docs-pdf/CinFreePascal.pdf
  }

  {$if defined(WINDOWS)}
   { For windows, we always use dynamic link. See
     https://forum.lazarus.freepascal.org/index.php/topic,15712.msg84781.html#msg84781
     We need to rebuild the static lib for windows using Mingw or cygwin
   }
     SherpaOnnxLibName = 'sherpa-onnx-c-api.dll';
  {$elseif not defined(SHERPA_ONNX_USE_SHARED_LIBS)}
     {static link for linux and macos}
     {$linklib sherpa-onnx-c-api}
     {$linklib sherpa-onnx-core}
     {$linklib kaldi-decoder-core}
     {$linklib sherpa-onnx-kaldifst-core}
     {$linklib sherpa-onnx-fstfar}
     {$linklib sherpa-onnx-fst}
     {$linklib kissfft-float}
     {$linklib kaldi-native-fbank-core}
     {$linklib piper_phonemize}
     {$linklib espeak-ng}
     {$linklib ucd}
     {$linklib onnxruntime}
     {$linklib ssentencepiece_core}

     {$ifdef LINUX}
       {$linklib m}
       {$LINKLIB stdc++}
       {$LINKLIB gcc_s}
     {$endif}

     {$ifdef DARWIN}
       {$linklib c++}
     {$endif}
     SherpaOnnxLibName = '';
  {$else}
     {dynamic link for linux and macos}
     SherpaOnnxLibName = 'sherpa-onnx-c-api';
     {$linklib sherpa-onnx-c-api}
  {$endif}

type
  SherpaOnnxWave = record
    Samples: pcfloat;
    SampleRate: cint32;
    NumSamples: cint32;
  end;

  PSherpaOnnxWave = ^SherpaOnnxWave;

  SherpaOnnxOnlineTransducerModelConfig = record
    Encoder: PAnsiChar;
    Decoder: PAnsiChar;
    Joiner: PAnsiChar;
  end;
  SherpaOnnxOnlineParaformerModelConfig = record
    Encoder: PAnsiChar;
    Decoder: PAnsiChar;
  end;
  SherpaOnnxOnlineZipformer2CtcModelConfig = record
    Model: PAnsiChar;
  end;

  SherpaOnnxOnlineNemoCtcModelConfig = record
    Model: PAnsiChar;
  end;

  SherpaOnnxOnlineToneCtcModelConfig = record
    Model: PAnsiChar;
  end;

  SherpaOnnxOnlineModelConfig= record
    Transducer: SherpaOnnxOnlineTransducerModelConfig;
    Paraformer: SherpaOnnxOnlineParaformerModelConfig;
    Zipformer2Ctc: SherpaOnnxOnlineZipformer2CtcModelConfig;
    Tokens: PAnsiChar;
    NumThreads: cint32;
    Provider: PAnsiChar;
    Debug: cint32;
    ModelType: PAnsiChar;
    ModelingUnit: PAnsiChar;
    BpeVocab: PAnsiChar;
    TokensBuf: PAnsiChar;
    TokensBufSize: cint32;
    NemoCtc: SherpaOnnxOnlineNemoCtcModelConfig;
    ToneCtc: SherpaOnnxOnlineToneCtcModelConfig;
  end;
  SherpaOnnxFeatureConfig = record
    SampleRate: cint32;
    FeatureDim: cint32;
  end;
  SherpaOnnxOnlineCtcFstDecoderConfig = record
    Graph: PAnsiChar;
    MaxActive: cint32;
  end;

  SherpaOnnxHomophoneReplacerConfig = record
    DictDir: PAnsiChar;
    Lexicon: PAnsiChar;
    RuleFsts: PAnsiChar;
  end;

  SherpaOnnxOnlineRecognizerConfig = record
    FeatConfig: SherpaOnnxFeatureConfig;
    ModelConfig: SherpaOnnxOnlineModelConfig;
    DecodingMethod: PAnsiChar;
    MaxActivePaths: cint32;
    EnableEndpoint: cint32;
    Rule1MinTrailingSilence: cfloat;
    Rule2MinTrailingSilence: cfloat;
    Rule3MinUtteranceLength: cfloat;
    HotwordsFile: PAnsiChar;
    HotwordsScore: cfloat;
    CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig;
    RuleFsts: PAnsiChar;
    RuleFars: PAnsiChar;
    BlankPenalty: cfloat;
    HotwordsBuf: PAnsiChar;
    HotwordsBufSize: cint32;
    Hr: SherpaOnnxHomophoneReplacerConfig;
  end;

  PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig;

  SherpaOnnxOfflineTransducerModelConfig = record
    Encoder: PAnsiChar;
    Decoder: PAnsiChar;
    Joiner: PAnsiChar;
  end;
  SherpaOnnxOfflineParaformerModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineNemoEncDecCtcModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineDolphinModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineZipformerCtcModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineWenetCtcModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineOmnilingualAsrCtcModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineMedAsrCtcModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineFunAsrNanoModelConfig = record
    EncoderAdaptor: PAnsiChar;
    LLM: PAnsiChar;
    Embedding: PAnsiChar;
    Tokenizer: PAnsiChar;
    SystemPrompt: PAnsiChar;
    UserPrompt: PAnsiChar;
    MaxNewTokens: cint32;
    Temperature: cfloat;
    TopP: cfloat;
    Seed: cint32;
    Language: PAnsiChar;
    UseItn: cint32;
    Hotwords: PAnsiChar;
  end;
  SherpaOnnxOfflineFireRedAsrCtcModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineWhisperModelConfig = record
    Encoder: PAnsiChar;
    Decoder: PAnsiChar;
    Language: PAnsiChar;
    Task: PAnsiChar;
    TailPaddings: cint32;
    EnableTokenTimestamps: cint32;
    EnableSegmentTimestamps: cint32;
  end;
  SherpaOnnxOfflineCanaryModelConfig = record
    Encoder: PAnsiChar;
    Decoder: PAnsiChar;
    SrcLang: PAnsiChar;
    TgtLang: PAnsiChar;
    UsePnc: cint32;
  end;
  SherpaOnnxOfflineFireRedAsrModelConfig = record
    Encoder: PAnsiChar;
    Decoder: PAnsiChar;
  end;
  SherpaOnnxOfflineMoonshineModelConfig = record
    Preprocessor: PAnsiChar;
    Encoder: PAnsiChar;
    UncachedDecoder: PAnsiChar;
    CachedDecoder: PAnsiChar;
    MergedDecoder: PAnsiChar;
  end;
  SherpaOnnxOfflineTdnnModelConfig = record
    Model: PAnsiChar;
  end;
  SherpaOnnxOfflineLMConfig = record
    Model: PAnsiChar;
    Scale: cfloat;
  end;
  SherpaOnnxOfflineSenseVoiceModelConfig = record
    Model: PAnsiChar;
    Language: PAnsiChar;
    UseItn: cint32;
  end;
  SherpaOnnxOfflineModelConfig = record
    Transducer: SherpaOnnxOfflineTransducerModelConfig;
    Paraformer: SherpaOnnxOfflineParaformerModelConfig;
    NeMoCtc: SherpaOnnxOfflineNemoEncDecCtcModelConfig;
    Whisper: SherpaOnnxOfflineWhisperModelConfig;
    Tdnn: SherpaOnnxOfflineTdnnModelConfig;
    Tokens: PAnsiChar;
    NumThreads: cint32;
    Debug: cint32;
    Provider: PAnsiChar;
    ModelType: PAnsiChar;
    ModelingUnit: PAnsiChar;
    BpeVocab: PAnsiChar;
    TeleSpeechCtc: PAnsiChar;
    SenseVoice:  SherpaOnnxOfflineSenseVoiceModelConfig;
    Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
    FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
    Dolphin: SherpaOnnxOfflineDolphinModelConfig;
    ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig;
    Canary: SherpaOnnxOfflineCanaryModelConfig;
    WenetCtc: SherpaOnnxOfflineWenetCtcModelConfig;
    Omnilingual: SherpaOnnxOfflineOmnilingualAsrCtcModelConfig;
    MedAsr: SherpaOnnxOfflineMedAsrCtcModelConfig;
    FunAsrNano: SherpaOnnxOfflineFunAsrNanoModelConfig;
    FireRedAsrCtc: SherpaOnnxOfflineFireRedAsrCtcModelConfig;
  end;

  SherpaOnnxOfflineRecognizerConfig = record
    FeatConfig: SherpaOnnxFeatureConfig;
    ModelConfig: SherpaOnnxOfflineModelConfig;
    LMConfig: SherpaOnnxOfflineLMConfig;
    DecodingMethod: PAnsiChar;
    MaxActivePaths: cint32;
    HotwordsFile: PAnsiChar;
    HotwordsScore: cfloat;
    RuleFsts: PAnsiChar;
    RuleFars: PAnsiChar;
    BlankPenalty: cfloat;
    Hr: SherpaOnnxHomophoneReplacerConfig;
  end;

  PSherpaOnnxOfflineRecognizerConfig = ^SherpaOnnxOfflineRecognizerConfig;

  SherpaOnnxSileroVadModelConfig = record
    Model: PAnsiChar;
    Threshold: cfloat;
    MinSilenceDuration: cfloat;
    MinSpeechDuration: cfloat;
    WindowSize: cint32;
    MaxSpeechDuration: cfloat;
  end;

  SherpaOnnxTenVadModelConfig = record
    Model: PAnsiChar;
    Threshold: cfloat;
    MinSilenceDuration: cfloat;
    MinSpeechDuration: cfloat;
    WindowSize: cint32;
    MaxSpeechDuration: cfloat;
  end;

  SherpaOnnxVadModelConfig = record
    SileroVad: SherpaOnnxSileroVadModelConfig;
    SampleRate: cint32;
    NumThreads: cint32;
    Provider: PAnsiChar;
    Debug: cint32;
    TenVad: SherpaOnnxTenVadModelConfig;
  end;
  PSherpaOnnxVadModelConfig = ^SherpaOnnxVadModelConfig;

  SherpaOnnxSpeechSegment = record
    Start: cint32;
    Samples: pcfloat;
    N: cint32;
  end;

  PSherpaOnnxSpeechSegment = ^SherpaOnnxSpeechSegment;

  SherpaOnnxOfflineTtsVitsModelConfig = record
    Model: PAnsiChar;
    Lexicon: PAnsiChar;
    Tokens: PAnsiChar;
    DataDir: PAnsiChar;
    NoiseScale: cfloat;
    NoiseScaleW: cfloat;
    LengthScale: cfloat;
    DictDir: PAnsiChar;
  end;

  PSherpaOnnxGenerationConfig = ^SherpaOnnxGenerationConfig;

  SherpaOnnxGenerationConfig = record
    SilenceScale: cfloat;
    Speed: cfloat;
    Sid: cint32;
    ReferenceAudio: pcfloat;
    ReferenceAudioLen: cint32;
    ReferenceSampleRate: cint32;
    ReferenceText: PAnsiChar;
    NumSteps: cint32;
    Extra: PAnsiChar;
  end;

  SherpaOnnxOfflineTtsMatchaModelConfig = record
    AcousticModel: PAnsiChar;
    Vocoder: PAnsiChar;
    Lexicon: PAnsiChar;
    Tokens: PAnsiChar;
    DataDir: PAnsiChar;
    NoiseScale: cfloat;
    LengthScale: cfloat;
    DictDir: PAnsiChar;
  end;

  SherpaOnnxOfflineTtsKokoroModelConfig = record
    Model: PAnsiChar;
    Voices: PAnsiChar;
    Tokens: PAnsiChar;
    DataDir: PAnsiChar;
    LengthScale: cfloat;
    DictDir: PAnsiChar;
    Lexicon: PAnsiChar;
    Lang: PAnsiChar;
  end;

  SherpaOnnxOfflineTtsKittenModelConfig = record
    Model: PAnsiChar;
    Voices: PAnsiChar;
    Tokens: PAnsiChar;
    DataDir: PAnsiChar;
    LengthScale: cfloat;
  end;

  SherpaOnnxOfflineTtsZipVoiceModelConfig = record
    Tokens: PAnsiChar;
    Encoder: PAnsiChar;
    Decoder: PAnsiChar;
    Vocoder: PAnsiChar;
    DataDir: PAnsiChar;
    Lexicon: PAnsiChar;
    FeatScale: cfloat;
    Tshift: cfloat;
    TargetRms: cfloat;
    GuidanceScale: cfloat;
  end;

  SherpaOnnxOfflineTtsPocketModelConfig = record
    LmFlow: PAnsiChar;
    LmMain: PAnsiChar;
    Encoder: PAnsiChar;
    Decoder: PAnsiChar;
    TextConditioner: PAnsiChar;
    VocabJson: PAnsiChar;
    TokenScoresJson: PAnsiChar;
    VoiceEmbeddingCacheCapacity: cint32;
  end;

  SherpaOnnxOfflineTtsSupertonicModelConfig = record
    DurationPredictor: PAnsiChar;
    TextEncoder: PAnsiChar;
    VectorEstimator: PAnsiChar;
    Vocoder: PAnsiChar;
    TtsJson: PAnsiChar;
    UnicodeIndexer: PAnsiChar;
    VoiceStyle: PAnsiChar;
  end;

  SherpaOnnxOfflineTtsModelConfig = record
    Vits: SherpaOnnxOfflineTtsVitsModelConfig;
    NumThreads: cint32;
    Debug: cint32;
    Provider: PAnsiChar;
    Matcha: SherpaOnnxOfflineTtsMatchaModelConfig;
    Kokoro: SherpaOnnxOfflineTtsKokoroModelConfig;
    Kitten: SherpaOnnxOfflineTtsKittenModelConfig;
    ZipVoice: SherpaOnnxOfflineTtsZipVoiceModelConfig;
    Pocket: SherpaOnnxOfflineTtsPocketModelConfig;
    Supertonic: SherpaOnnxOfflineTtsSupertonicModelConfig;
  end;

  SherpaOnnxOfflineTtsConfig = record
    Model: SherpaOnnxOfflineTtsModelConfig;
    RuleFsts: PAnsiChar;
    MaxNumSentences: cint32;
    RuleFars: PAnsiChar;
    SilenceScale: cfloat;
  end;

  PSherpaOnnxOfflineTtsConfig = ^SherpaOnnxOfflineTtsConfig;

  SherpaOnnxGeneratedAudio = record
    Samples: pcfloat;
    N: cint32;
    SampleRate: cint32;
  end;

  PSherpaOnnxGeneratedAudio = ^SherpaOnnxGeneratedAudio;

  SherpaOnnxResampleOut = record
    Samples: pcfloat;
    N: cint32;
  end;

  PSherpaOnnxResampleOut = ^SherpaOnnxResampleOut;

  SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig = record
    Model: PAnsiChar;
  end;

  SherpaOnnxOfflineSpeakerSegmentationModelConfig = record
    Pyannote: SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig;
    NumThreads: cint32;
    Debug: cint32;
    Provider: PAnsiChar;
  end;

  SherpaOnnxFastClusteringConfig = record
    NumClusters: cint32;
    Threshold: cfloat;
  end;

  SherpaOnnxSpeakerEmbeddingExtractorConfig = record
    Model: PAnsiChar;
    NumThreads: cint32;
    Debug: cint32;
    Provider: PAnsiChar;
  end;

  SherpaOnnxOfflineSpeakerDiarizationConfig = record
    Segmentation: SherpaOnnxOfflineSpeakerSegmentationModelConfig;
    Embedding: SherpaOnnxSpeakerEmbeddingExtractorConfig;
    Clustering: SherpaOnnxFastClusteringConfig;
    MinDurationOn: cfloat;
    MinDurationOff: cfloat;
  end;

  SherpaOnnxOfflineSpeakerDiarizationSegment = record
    Start: cfloat;
    Stop: cfloat;
    Speaker: cint32;
  end;

  PSherpaOnnxOfflineSpeakerDiarizationSegment = ^SherpaOnnxOfflineSpeakerDiarizationSegment;

  PSherpaOnnxOfflineSpeakerDiarizationConfig = ^SherpaOnnxOfflineSpeakerDiarizationConfig;

  SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig = record
    Model: PAnsiChar;
  end;

  SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig = record
    Model: PAnsiChar;
  end;

  SherpaOnnxOfflineSpeechDenoiserModelConfig = record
    Gtcrn: SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig;
    NumThreads: cint32;
    Debug: cint32;
    Provider: PAnsiChar;
    DpdfNet: SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig;
  end;

  SherpaOnnxOfflineSpeechDenoiserConfig = record
    Model: SherpaOnnxOfflineSpeechDenoiserModelConfig;
  end;

  PSherpaOnnxOfflineSpeechDenoiserConfig = ^SherpaOnnxOfflineSpeechDenoiserConfig;

  SherpaOnnxOnlineSpeechDenoiserConfig = record
    Model: SherpaOnnxOfflineSpeechDenoiserModelConfig;
  end;

  PSherpaOnnxOnlineSpeechDenoiserConfig = ^SherpaOnnxOnlineSpeechDenoiserConfig;

  SherpaOnnxDenoisedAudio = record
    Samples: pcfloat;
    N: cint32;
    SampleRate: cint32;
  end;

  PSherpaOnnxDenoisedAudio = ^SherpaOnnxDenoisedAudio;

function SherpaOnnxCreateLinearResampler(SampleRateInHz: cint32;
  SampleRateOutHz: cint32;
  FilterCutoffHz: cfloat;
  NumZeros: cint32): Pointer; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxGetVersionStrWrapper(): PAnsiChar; cdecl;
  external SherpaOnnxLibName name 'SherpaOnnxGetVersionStr';

function SherpaOnnxGetGitSha1Wrapper(): PAnsiChar; cdecl;
  external SherpaOnnxLibName name 'SherpaOnnxGetGitSha1';

function SherpaOnnxGetGitDateWrapper(): PAnsiChar; cdecl;
  external SherpaOnnxLibName name 'SherpaOnnxGetGitDate';

function SherpaOnnxGetVersionStr(): AnsiString;
begin
  Result := SherpaOnnxGetVersionStrWrapper();
end;

function SherpaOnnxGetGitSha1(): AnsiString;
begin
  Result := SherpaOnnxGetGitSha1Wrapper();
end;

function SherpaOnnxGetGitDate(): AnsiString;
begin
  Result := SherpaOnnxGetGitDateWrapper();
end;

procedure SherpaOnnxDestroyLinearResampler(P: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxLinearResamplerResample(P: Pointer;
  Samples: pcfloat;
  N: Integer;
  Flush: Integer): PSherpaOnnxResampleOut; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxLinearResamplerResampleFree(P: PSherpaOnnxResampleOut); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxLinearResamplerReset(P: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOfflineSpeechDenoiser(Config: PSherpaOnnxOfflineSpeechDenoiserConfig): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOfflineSpeechDenoiser(P: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineSpeechDenoiserGetSampleRate(P: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineSpeechDenoiserRun(P: Pointer;
  Samples: pcfloat; N: cint32;SampleRate: cint32):PSherpaOnnxDenoisedAudio; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOnlineSpeechDenoiser(Config: PSherpaOnnxOnlineSpeechDenoiserConfig): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOnlineSpeechDenoiser(P: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOnlineSpeechDenoiserGetSampleRate(P: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(P: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOnlineSpeechDenoiserRun(P: Pointer;
  Samples: pcfloat; N: cint32; SampleRate: cint32): PSherpaOnnxDenoisedAudio; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOnlineSpeechDenoiserFlush(P: Pointer): PSherpaOnnxDenoisedAudio; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxOnlineSpeechDenoiserReset(P: Pointer); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyDenoisedAudio(Audio: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOfflineSpeakerDiarization(Config: PSherpaOnnxOfflineSpeakerDiarizationConfig): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOfflineSpeakerDiarization(P: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(P: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxOfflineSpeakerDiarizationSetConfig(P: Pointer; Config: PSherpaOnnxOfflineSpeakerDiarizationConfig); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(P: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(P: Pointer): PSherpaOnnxOfflineSpeakerDiarizationSegment; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxOfflineSpeakerDiarizationDestroySegment(P: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineSpeakerDiarizationProcess(P: Pointer; Samples: pcfloat; N: cint32): Pointer; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(P: Pointer;
  Samples: pcfloat; N: cint32;  Callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxOfflineSpeakerDiarizationDestroyResult(P: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOfflineTts(Config: PSherpaOnnxOfflineTtsConfig): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOfflineTts(Tts: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineTtsSampleRate(Tts: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineTtsNumSpeakers(Tts: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineTtsGenerate(Tts: Pointer;
  Text: PAnsiChar; Sid: cint32; Speed: cfloat): PSherpaOnnxGeneratedAudio; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(Tts: Pointer;
  Text: PAnsiChar; Sid: cint32; Speed: cfloat;
  Callback: TSherpaOnnxGeneratedAudioCallbackWithArg;
  Arg: Pointer): PSherpaOnnxGeneratedAudio; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOfflineTtsGenerateWithConfig(Tts: Pointer;
  Text: PAnsiChar; config: PSherpaOnnxGenerationConfig;
  Callback: TSherpaOnnxGeneratedAudioProgressCallbackWithArg;
  Arg: Pointer): PSherpaOnnxGeneratedAudio; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOfflineTtsGeneratedAudio(Audio: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateVoiceActivityDetector(Config: PSherpaOnnxVadModelConfig;
  BufferSizeInSeconds: cfloat): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyVoiceActivityDetector(Vad: Pointer); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxVoiceActivityDetectorAcceptWaveform(Vad: Pointer;
  Samples: pcfloat; N: cint32); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxVoiceActivityDetectorEmpty(Vad: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxVoiceActivityDetectorDetected(Vad: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxVoiceActivityDetectorPop(Vad: Pointer); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxVoiceActivityDetectorClear(Vad: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxVoiceActivityDetectorFront(Vad: Pointer): PSherpaOnnxSpeechSegment; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroySpeechSegment(P: PSherpaOnnxSpeechSegment); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxVoiceActivityDetectorReset(P: PSherpaOnnxSpeechSegment); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxVoiceActivityDetectorFlush(P: PSherpaOnnxSpeechSegment); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateCircularBuffer(Capacity: cint32): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyCircularBuffer(Buffer: Pointer) ; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxCircularBufferPush(Buffer: Pointer; Samples: pcfloat; N: cint32); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCircularBufferGet(Buffer: Pointer; StartIndex: cint32; N: cint32): pcfloat ; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxCircularBufferFree(P: pcfloat); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxCircularBufferPop(Buffer: Pointer; N: cint32); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCircularBufferSize(Buffer: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCircularBufferHead(Buffer: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxCircularBufferReset(Buffer: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOnlineRecognizer(Recognizer: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOnlineStream(Recognizer: Pointer): Pointer; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOnlineStreamWithHotwords(Recognizer: Pointer; Hotwords: PAnsiChar): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOnlineStream(Recognizer: Pointer); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxOnlineStreamAcceptWaveform(Stream: Pointer;
  SampleRate: cint32; Samples: pcfloat; N: cint32 ); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxOnlineStreamInputFinished(Stream: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxIsOnlineStreamReady(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDecodeOnlineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxOnlineStreamReset(Recognizer: Pointer; Stream: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxOnlineStreamIsEndpoint(Recognizer: Pointer; Stream: Pointer): cint32; cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Pointer): PAnsiChar; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOfflineRecognizer(Config: PSherpaOnnxOfflineRecognizerConfig): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOfflineRecognizer(Recognizer: Pointer); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxCreateOfflineStream(Recognizer: Pointer): Pointer; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOfflineStream(Stream: Pointer); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer;
  SampleRate: cint32; Samples: pcfloat; N: cint32); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxOfflineRecognizerSetConfig(Recognizer: Pointer; Config: PSherpaOnnxOfflineRecognizerConfig); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl;
  external SherpaOnnxLibName;

procedure SherpaOnnxDestroyOfflineStreamResultJson(Json: PAnsiChar); cdecl;
  external SherpaOnnxLibName;

function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl;
  external SherpaOnnxLibName name 'SherpaOnnxReadWave';

function SherpaOnnxWriteWaveWrapper(Samples: pcfloat; N: cint32;
  SampleRate: cint32; Filename: PAnsiChar): cint32; cdecl;
  external SherpaOnnxLibName name 'SherpaOnnxWriteWave';

procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl;
  external SherpaOnnxLibName name 'SherpaOnnxFreeWave';

function SherpaOnnxWriteWave(Filename: AnsiString;
    const Samples: array of Single; SampleRate: Integer): Boolean;
begin
  Result := SherpaOnnxWriteWaveWrapper(pcfloat(Samples), Length(Samples),
    SampleRate, PAnsiChar(Filename)) = 1;
end;

function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave;
var
  PWave: PSherpaOnnxWave;
begin
  Result.Samples := nil;
  Result.SampleRate := 0;

  PWave := SherpaOnnxReadWaveWrapper(PAnsiChar(Filename));

  if PWave = nil then
    Exit;

  Result.SampleRate := PWave^.SampleRate;
  SetLength(Result.Samples, PWave^.NumSamples);

  if PWave^.NumSamples > 0 then
    Move(PWave^.Samples[0], Result.Samples[0], PWave^.NumSamples * SizeOf(Single));

  SherpaOnnxFreeWaveWrapper(PWave);
end;

function TSherpaOnnxOnlineTransducerModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineTransducerModelConfig(Encoder := %s, Decoder := %s, Joiner := %s)',
  [Self.Encoder, Self.Decoder, Self.Joiner]);
end;

function TSherpaOnnxOnlineParaformerModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineParaformerModelConfig(Encoder := %s, Decoder := %s)',
  [Self.Encoder, Self.Decoder]);
end;

function TSherpaOnnxOnlineZipformer2CtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineZipformer2CtcModelConfig(Model := %s)',
  [Self.Model]);
end;

function TSherpaOnnxOnlineNemoCtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineNemoCtcModelConfig(Model := %s)',
  [Self.Model]);
end;

function TSherpaOnnxOnlineToneCtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineToneCtcModelConfig(Model := %s)',
  [Self.Model]);
end;

function TSherpaOnnxOnlineModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' +
    'Paraformer := %s,' +
    'Zipformer2Ctc := %s, ' +
    'Tokens := %s, ' +
    'NumThreads := %d, ' +
    'Provider := %s, ' +
    'Debug := %s, ' +
    'ModelType := %s, ' +
    'ModelingUnit := %s, ' +
    'BpeVocab := %s, ' +
    'NemoCtc := %s, ' +
    'ToneCtc := %s)',
  [Self.Transducer.ToString, Self.Paraformer.ToString,
   Self.Zipformer2Ctc.ToString, Self.Tokens,
   Self.NumThreads, Self.Provider, Self.Debug.ToString,
   Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
   Self.NemoCtc.ToString, Self.ToneCtc.ToString
  ]);
end;

function TSherpaOnnxFeatureConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxFeatureConfig(SampleRate := %d, FeatureDim := %d)',
    [Self.SampleRate, Self.FeatureDim]);
end;

function TSherpaOnnxOnlineCtcFstDecoderConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineCtcFstDecoderConfig(Graph := %s, MaxActive := %d)',
  [Self.Graph, Self.MaxActive]);
end;

function TSherpaOnnxHomophoneReplacerConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxHomophoneReplacerConfig(Lexicon := %s, RuleFsts := %s)',
  [Self.Lexicon, Self.RuleFsts]);
end;

function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfig := %s, ' +
    'ModelConfig := %s, ' +
    'DecodingMethod := %s, ' +
    'MaxActivePaths := %d, ' +
    'EnableEndpoint := %s, ' +
    'Rule1MinTrailingSilence := %.1f, ' +
    'Rule2MinTrailingSilence := %.1f, ' +
    'Rule3MinUtteranceLength := %.1f, ' +
    'HotwordsFile := %s, ' +
    'HotwordsScore := %.1f, ' +
    'CtcFstDecoderConfig := %s, ' +
    'RuleFsts := %s, ' +
    'RuleFars := %s, ' +
    'BlankPenalty := %.1f, ' +
    'Hr := %s' +
    ')'
    ,
    [Self.FeatConfig.ToString, Self.ModelConfig.ToString,
     Self.DecodingMethod, Self.MaxActivePaths, Self.EnableEndpoint.ToString,
     Self.Rule1MinTrailingSilence, Self.Rule2MinTrailingSilence,
     Self.Rule3MinUtteranceLength, Self.HotwordsFile, Self.HotwordsScore,
     Self.CtcFstDecoderConfig.ToString, Self.RuleFsts, Self.RuleFars,
     Self.BlankPenalty, Self.Hr.ToString
    ]);
end;

function TSherpaOnnxOnlineRecognizerResult.ToString: AnsiString;
var
  TokensStr: AnsiString;
  S: AnsiString;
  TimestampStr: AnsiString;
  T: Single;
  Sep: AnsiString;
begin
  TokensStr := '[';
  Sep := '';
  for S in Self.Tokens do
  begin
    TokensStr := TokensStr + Sep + S;
    Sep := ', ';
  end;
  TokensStr := TokensStr + ']';

  TimestampStr := '[';
  Sep := '';
  for T in Self.Timestamps do
  begin
    TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
    Sep := ', ';
  end;
  TimestampStr := TimestampStr + ']';

  Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' +
    'Tokens := %s, ' +
    'Timestamps := %s' +
    ')',
    [Self.Text, TokensStr, TimestampStr]);
end;

constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecognizerConfig);
var
  C: SherpaOnnxOnlineRecognizerConfig;
begin
  C := Default(SherpaOnnxOnlineRecognizerConfig);
  C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
  C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;

  C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
  C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
  C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);

  C.ModelConfig.Paraformer.Encoder := PAnsiChar(Config.ModelConfig.Paraformer.Encoder);
  C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder);

  C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model);
  C.ModelConfig.NemoCtc.Model := PAnsiChar(Config.ModelConfig.NemoCtc.Model);
  C.ModelConfig.ToneCtc.Model := PAnsiChar(Config.ModelConfig.ToneCtc.Model);

  C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
  C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
  C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
  C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
  C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
  C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
  C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);

  C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
  C.MaxActivePaths := Config.MaxActivePaths;
  C.EnableEndpoint := Ord(Config.EnableEndpoint);
  C.Rule1MinTrailingSilence := Config.Rule1MinTrailingSilence;
  C.Rule2MinTrailingSilence := Config.Rule2MinTrailingSilence;
  C.Rule3MinUtteranceLength := Config.Rule3MinUtteranceLength;
  C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
  C.HotwordsScore := Config.HotwordsScore;
  C.CtcFstDecoderConfig.Graph := PAnsiChar(Config.CtcFstDecoderConfig.Graph);
  C.CtcFstDecoderConfig.MaxActive := Config.CtcFstDecoderConfig.MaxActive;
  C.RuleFsts := PAnsiChar(Config.RuleFsts);
  C.RuleFars := PAnsiChar(Config.RuleFars);
  C.BlankPenalty := Config.BlankPenalty;
  C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon);
  C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts);

  Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C);
  Self._Config := Config;
end;

destructor TSherpaOnnxOnlineRecognizer.Destroy;
begin
  SherpaOnnxDestroyOnlineRecognizer(Self.Handle);
  Self.Handle := nil;
end;

function TSherpaOnnxOnlineRecognizer.CreateStream: TSherpaOnnxOnlineStream;
var
  Stream: Pointer;
begin
  Stream := SherpaOnnxCreateOnlineStream(Self.Handle);
  Result := TSherpaOnnxOnlineStream.Create(Stream);
end;

function TSherpaOnnxOnlineRecognizer.CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream;
var
  Stream: Pointer;
begin
  Stream := SherpaOnnxCreateOnlineStreamWithHotwords(Self.Handle, PAnsiChar(Hotwords));
  Result := TSherpaOnnxOnlineStream.Create(Stream);
end;

function TSherpaOnnxOnlineRecognizer.IsReady(Stream: TSherpaOnnxOnlineStream): Boolean;
begin
  Result := SherpaOnnxIsOnlineStreamReady(Self.Handle, Stream.Handle) = 1;
end;

procedure TSherpaOnnxOnlineRecognizer.Decode(Stream: TSherpaOnnxOnlineStream);
begin
  SherpaOnnxDecodeOnlineStream(Self.Handle, Stream.Handle);
end;

procedure TSherpaOnnxOnlineRecognizer.Reset(Stream: TSherpaOnnxOnlineStream);
begin
  SherpaOnnxOnlineStreamReset(Self.Handle, Stream.Handle);
end;

function TSherpaOnnxOnlineRecognizer.IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
begin
  Result := SherpaOnnxOnlineStreamIsEndpoint(Self.Handle, Stream.Handle) = 1;
end;

function TSherpaOnnxOnlineRecognizer.GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
var
  pJson: PAnsiChar;
  JsonData: TJSONData;
  JsonObject : TJSONObject;
  JsonEnum: TJSONEnum;
  I: Integer;
begin
  pJson := SherpaOnnxGetOnlineStreamResultAsJson(Self.Handle, Stream.Handle);

  {
   - https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html
   - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsondata.html
   - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonobject.html
   - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonenum.html
  }

  JsonData := GetJSON(AnsiString(pJson), False);

  JsonObject := JsonData as TJSONObject;

  Result.Text := JsonObject.Strings['text'];

  SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);

  I := 0;
  for JsonEnum in JsonObject.Arrays['tokens'] do
  begin
    Result.Tokens[I] := JsonEnum.Value.AsString;
    Inc(I);
  end;

  SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
  I := 0;
  for JsonEnum in JsonObject.Arrays['timestamps'] do
  begin
    Result.Timestamps[I] := JsonEnum.Value.AsFloat;
    Inc(I);
  end;

  SherpaOnnxDestroyOnlineStreamResultJson(pJson);
end;


constructor TSherpaOnnxOnlineStream.Create(P: Pointer);
begin
  Self.Handle := P;
end;

destructor TSherpaOnnxOnlineStream.Destroy;
begin
  SherpaOnnxDestroyOnlineStream(Self.Handle);
  Self.Handle := nil;
end;

procedure TSherpaOnnxOnlineStream.AcceptWaveform(const Samples: array of Single; SampleRate: Integer);
begin
  SherpaOnnxOnlineStreamAcceptWaveform(Self.Handle, SampleRate,
    pcfloat(Samples), Length(Samples));
end;

procedure TSherpaOnnxOnlineStream.InputFinished;
begin
  SherpaOnnxOnlineStreamInputFinished(Self.Handle);
end;

function TSherpaOnnxOfflineTransducerModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTransducerModelConfig(' +
    'Encoder := %s, ' +
    'Decoder := %s, ' +
    'Joiner := %s' +
    ')',
    [Self.Encoder, Self.Decoder, Self.Joiner]);
end;

function TSherpaOnnxOfflineParaformerModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineParaformerModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineNemoEncDecCtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineNemoEncDecCtcModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineDolphinModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineDolphinModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineZipformerCtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineZipformerCtcModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineWenetCtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineWenetCtcModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineOmnilingualAsrCtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineOmnilingualAsrCtcModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineMedAsrCtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineMedAsrCtcModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineFireRedAsrCtcModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineFireRedAsrCtcModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineFunAsrNanoModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineFunAsrNanoModelConfig(' +
    'EncoderAdaptor := %s' +
    ', LLM := %s' +
    ', Embedding := %s' +
    ', Tokenizer := %s' +
    ', SystemPrompt := %s' +
    ', UserPrompt := %s' +
    ', MaxNewTokens := %d' +
    ', Temperature := %.3f' +
    ', TopP := %.3f' +
    ', Seed := %d' +
    ', Language := %s' +
    ', UseItn := %s' +
    ', Hotwords := %s' +
    ')',
    [Self.EncoderAdaptor, Self.LLM, Self.Embedding, Self.Tokenizer,
     Self.SystemPrompt, Self.UserPrompt, Self.MaxNewTokens, Self.Temperature,
     Self.TopP, Self.Seed, Self.Language, Self.UseItn.ToString, Self.Hotwords]);
end;

function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
    'Encoder := %s, ' +
    'Decoder := %s, ' +
    'Language := %s, ' +
    'Task := %s, ' +
    'TailPaddings := %d, ' +
    'EnableTokenTimestamps := %s, ' +
    'EnableSegmentTimestamps := %s' +
    ')',
    [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings,
     Self.EnableTokenTimestamps.ToString,
     Self.EnableSegmentTimestamps.ToString]);
end;

function TSherpaOnnxOfflineCanaryModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineCanaryModelConfig(' +
    'Encoder := %s, ' +
    'Decoder := %s, ' +
    'SrcLang := %s, ' +
    'TgtLang := %s, ' +
    'UsePnc := %s' +
    ')',
    [Self.Encoder, Self.Decoder, Self.SrcLang,
     Self.TgtLang, Self.UsePnc.ToString]);
end;

function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' +
    'Encoder := %s, ' +
    'Decoder := %s)',
    [Self.Encoder, Self.Decoder]);
end;

function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' +
    'Preprocessor := %s, ' +
    'Encoder := %s, ' +
    'UncachedDecoder := %s, ' +
    'CachedDecoder := %s, ' +
    'MergedDecoder := %s)',
    [Self.Preprocessor, Self.Encoder, Self.UncachedDecoder, Self.CachedDecoder,
     Self.MergedDecoder]);
end;

function TSherpaOnnxOfflineTdnnModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTdnnModelConfig(Model := %s)',
    [Self.Model]);
end;

function TSherpaOnnxOfflineLMConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineLMConfig(' +
    'Model := %s, ' +
    'Scale := %.1f' +
    ')',
    [Self.Model, Self.Scale]);
end;

function TSherpaOnnxOfflineSenseVoiceModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSenseVoiceModelConfig(' +
    'Model := %s, ' +
    'Language := %s, ' +
    'UseItn := %s' +
    ')',
    [Self.Model, Self.Language, Self.UseItn.ToString]);
end;

function TSherpaOnnxOfflineModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineModelConfig(' +
    'Transducer := %s, ' +
    'Paraformer := %s, ' +
    'NeMoCtc := %s, ' +
    'Whisper := %s, ' +
    'Tdnn := %s, ' +
    'Tokens := %s, ' +
    'NumThreads := %d, ' +
    'Debug := %s, ' +
    'Provider := %s, ' +
    'ModelType := %s, ' +
    'ModelingUnit := %s, ' +
    'BpeVocab := %s, ' +
    'TeleSpeechCtc := %s, ' +
    'SenseVoice := %s, ' +
    'Moonshine := %s, ' +
    'FireRedAsr := %s, ' +
    'Dolphin := %s, ' +
    'ZipformerCtc := %s, ' +
    'Canary := %s, ' +
    'WenetCtc := %s, ' +
    'Omnilingual := %s' +
    ', MedAsr := %s' +
    ', FunAsrNano := %s' +
    ', FireRedAsrCtc := %s' +
    ')',
    [Self.Transducer.ToString, Self.Paraformer.ToString,
     Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
     Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
     Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
     Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
     Self.FireRedAsr.ToString, Self.Dolphin.ToString,
     Self.ZipformerCtc.ToString, Self.Canary.ToString, Self.WenetCtc.ToString,
     Self.Omnilingual.ToString, Self.MedAsr.ToString,
     Self.FunAsrNano.ToString, Self.FireRedAsrCtc.ToString
     ]);
end;

function TSherpaOnnxOfflineRecognizerConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineRecognizerConfig(' +
    'FeatConfig := %s, ' +
    'ModelConfig := %s, ' +
    'LMConfig := %s, ' +
    'DecodingMethod := %s, ' +
    'MaxActivePaths := %d, ' +
    'HotwordsFile := %s, ' +
    'HotwordsScore := %.1f, ' +
    'RuleFsts := %s, ' +
    'RuleFars := %s, ' +
    'BlankPenalty := %1.f, ' +
    'Hr := %s' +
    ')',
    [Self.FeatConfig.ToString, Self.ModelConfig.ToString,
     Self.LMConfig.ToString, Self.DecodingMethod, Self.MaxActivePaths,
     Self.HotwordsFile, Self.HotwordsScore, Self.RuleFsts, Self.RuleFars,
     Self.BlankPenalty, Self.Hr.ToString
     ]);
end;

function ConvertOfflineRecognizerConfig(Config: TSherpaOnnxOfflineRecognizerConfig): SherpaOnnxOfflineRecognizerConfig;
var
  C: SherpaOnnxOfflineRecognizerConfig;
begin
  C := Default(SherpaOnnxOfflineRecognizerConfig);
  C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate;
  C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim;

  C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder);
  C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder);
  C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner);

  C.ModelConfig.Paraformer.Model := PAnsiChar(Config.ModelConfig.Paraformer.Model);
  C.ModelConfig.NeMoCtc.Model := PAnsiChar(Config.ModelConfig.NeMoCtc.Model);

  C.ModelConfig.Whisper.Encoder := PAnsiChar(Config.ModelConfig.Whisper.Encoder);
  C.ModelConfig.Whisper.Decoder := PAnsiChar(Config.ModelConfig.Whisper.Decoder);
  C.ModelConfig.Whisper.Language := PAnsiChar(Config.ModelConfig.Whisper.Language);
  C.ModelConfig.Whisper.Task := PAnsiChar(Config.ModelConfig.Whisper.Task);
  C.ModelConfig.Whisper.TailPaddings := Config.ModelConfig.Whisper.TailPaddings;
  C.ModelConfig.Whisper.EnableTokenTimestamps := Ord(Config.ModelConfig.Whisper.EnableTokenTimestamps);
  C.ModelConfig.Whisper.EnableSegmentTimestamps := Ord(Config.ModelConfig.Whisper.EnableSegmentTimestamps);

  C.ModelConfig.Tdnn.Model := PAnsiChar(Config.ModelConfig.Tdnn.Model);

  C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
  C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
  C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug);
  C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider);
  C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType);
  C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit);
  C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab);
  C.ModelConfig.TeleSpeechCtc := PAnsiChar(Config.ModelConfig.TeleSpeechCtc);

  C.ModelConfig.SenseVoice.Model := PAnsiChar(Config.ModelConfig.SenseVoice.Model);
  C.ModelConfig.SenseVoice.Language := PAnsiChar(Config.ModelConfig.SenseVoice.Language);
  C.ModelConfig.SenseVoice.UseItn := Ord(Config.ModelConfig.SenseVoice.UseItn);

  C.ModelConfig.Moonshine.Preprocessor := PAnsiChar(Config.ModelConfig.Moonshine.Preprocessor);
  C.ModelConfig.Moonshine.Encoder := PAnsiChar(Config.ModelConfig.Moonshine.Encoder);
  C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder);
  C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder);
  C.ModelConfig.Moonshine.MergedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.MergedDecoder);

  C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder);
  C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder);

  C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model);
  C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model);

  C.ModelConfig.Canary.Encoder := PAnsiChar(Config.ModelConfig.Canary.Encoder);
  C.ModelConfig.Canary.Decoder := PAnsiChar(Config.ModelConfig.Canary.Decoder);
  C.ModelConfig.Canary.SrcLang := PAnsiChar(Config.ModelConfig.Canary.SrcLang);
  C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang);
  C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc);

  C.ModelConfig.WenetCtc.Model := PAnsiChar(Config.ModelConfig.WenetCtc.Model);
  C.ModelConfig.Omnilingual.Model := PAnsiChar(Config.ModelConfig.Omnilingual.Model);
  C.ModelConfig.MedAsr.Model := PAnsiChar(Config.ModelConfig.MedAsr.Model);

  C.ModelConfig.FunAsrNano.EncoderAdaptor := PAnsiChar(Config.ModelConfig.FunAsrNano.EncoderAdaptor);
  C.ModelConfig.FunAsrNano.LLM := PAnsiChar(Config.ModelConfig.FunAsrNano.LLM);
  C.ModelConfig.FunAsrNano.Embedding := PAnsiChar(Config.ModelConfig.FunAsrNano.Embedding);
  C.ModelConfig.FunAsrNano.Tokenizer := PAnsiChar(Config.ModelConfig.FunAsrNano.Tokenizer);
  C.ModelConfig.FunAsrNano.SystemPrompt := PAnsiChar(Config.ModelConfig.FunAsrNano.SystemPrompt);
  C.ModelConfig.FunAsrNano.UserPrompt := PAnsiChar(Config.ModelConfig.FunAsrNano.UserPrompt);
  C.ModelConfig.FunAsrNano.MaxNewTokens := Config.ModelConfig.FunAsrNano.MaxNewTokens;
  C.ModelConfig.FunAsrNano.Temperature := Config.ModelConfig.FunAsrNano.Temperature;
  C.ModelConfig.FunAsrNano.TopP := Config.ModelConfig.FunAsrNano.TopP;
  C.ModelConfig.FunAsrNano.Seed := Config.ModelConfig.FunAsrNano.Seed;
  C.ModelConfig.FunAsrNano.Language := PAnsiChar(Config.ModelConfig.FunAsrNano.Language);
  C.ModelConfig.FunAsrNano.UseItn := Ord(Config.ModelConfig.FunAsrNano.UseItn);
  C.ModelConfig.FunAsrNano.Hotwords := PAnsiChar(Config.ModelConfig.FunAsrNano.Hotwords);

  C.ModelConfig.FireRedAsrCtc.Model := PAnsiChar(Config.ModelConfig.FireRedAsrCtc.Model);

  C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
  C.LMConfig.Scale := Config.LMConfig.Scale;

  C.DecodingMethod := PAnsiChar(Config.DecodingMethod);
  C.MaxActivePaths := Config.MaxActivePaths;
  C.HotwordsFile := PAnsiChar(Config.HotwordsFile);
  C.HotwordsScore := Config.HotwordsScore;
  C.RuleFsts := PAnsiChar(Config.RuleFsts);
  C.RuleFars := PAnsiChar(Config.RuleFars);
  C.BlankPenalty := Config.BlankPenalty;

  C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon);
  C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts);

  Result := C;
end;

constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
var
  C: SherpaOnnxOfflineRecognizerConfig;
begin
  C := ConvertOfflineRecognizerConfig(Config);
  Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
  Self._Config := Config;
end;

procedure TSherpaOnnxOfflineRecognizer.SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig);
var
  C: SherpaOnnxOfflineRecognizerConfig;
begin
  C := ConvertOfflineRecognizerConfig(Config);
  SherpaOnnxOfflineRecognizerSetConfig(Self.Handle, @C);
  { We don't update Self._Config }
end;

destructor TSherpaOnnxOfflineRecognizer.Destroy;
begin
  SherpaOnnxDestroyOfflineRecognizer(Self.Handle);
  Self.Handle := nil;
end;

function TSherpaOnnxOfflineRecognizer.CreateStream: TSherpaOnnxOfflineStream;
var
  Stream: Pointer;
begin
  Stream := SherpaOnnxCreateOfflineStream(Self.Handle);
  Result := TSherpaOnnxOfflineStream.Create(Stream);
end;

procedure TSherpaOnnxOfflineRecognizer.Decode(Stream: TSherpaOnnxOfflineStream);
begin
  SherpaOnnxDecodeOfflineStream(Self.Handle, Stream.Handle);
end;

function TSherpaOnnxOfflineRecognizer.GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
var
  pJson: PAnsiChar;
  JsonData: TJSONData;
  JsonObject : TJSONObject;
  JsonEnum: TJSONEnum;
  I: Integer;
begin
  pJson := SherpaOnnxGetOfflineStreamResultAsJson(Stream.Handle);

  JsonData := GetJSON(AnsiString(pJson), False);

  JsonObject := JsonData as TJSONObject;

  Result.Text := JsonObject.Strings['text'];

  SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count);

  I := 0;
  for JsonEnum in JsonObject.Arrays['tokens'] do
  begin
    Result.Tokens[I] := JsonEnum.Value.AsString;
    Inc(I);
  end;

  SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count);
  I := 0;
  for JsonEnum in JsonObject.Arrays['timestamps'] do
  begin
    Result.Timestamps[I] := JsonEnum.Value.AsFloat;
    Inc(I);
  end;

  SherpaOnnxDestroyOfflineStreamResultJson(pJson);
end;

constructor TSherpaOnnxOfflineStream.Create(P: Pointer);
begin
  Self.Handle := P;
end;

destructor TSherpaOnnxOfflineStream.Destroy;
begin
  SherpaOnnxDestroyOfflineStream(Self.Handle);
  Self.Handle := nil;
end;

procedure TSherpaOnnxOfflineStream.AcceptWaveform(const Samples: array of Single; SampleRate: Integer);
begin
  SherpaOnnxAcceptWaveformOffline(Self.Handle, SampleRate, pcfloat(Samples),
    Length(Samples));
end;

function TSherpaOnnxOfflineRecognizerResult.ToString: AnsiString;
var
  TokensStr: AnsiString;
  S: AnsiString;
  TimestampStr: AnsiString;
  T: Single;
  Sep: AnsiString;
begin
  TokensStr := '[';
  Sep := '';
  for S in Self.Tokens do
  begin
    TokensStr := TokensStr + Sep + S;
    Sep := ', ';
  end;
  TokensStr := TokensStr + ']';

  TimestampStr := '[';
  Sep := '';
  for T in Self.Timestamps do
  begin
    TimestampStr := TimestampStr + Sep + Format('%.2f', [T]);
    Sep := ', ';
  end;
  TimestampStr := TimestampStr + ']';

  Result := Format('TSherpaOnnxOfflineRecognizerResult(Text := %s, ' +
    'Tokens := %s, ' +
    'Timestamps := %s' +
    ')',
    [Self.Text, TokensStr, TimestampStr]);
end;

function TSherpaOnnxSileroVadModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxSileroVadModelConfig(' +
    'Model := %s, ' +
    'Threshold := %.2f, ' +
    'MinSilenceDuration := %.2f, ' +
    'MinSpeechDuration := %.2f, ' +
    'WindowSize := %d, ' +
    'MaxSpeechDuration := %.2f' +
    ')',
    [Self.Model, Self.Threshold, Self.MinSilenceDuration,
     Self.MinSpeechDuration, Self.WindowSize, Self.MaxSpeechDuration
    ]);
end;

function TSherpaOnnxTenVadModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxTenVadModelConfig(' +
    'Model := %s, ' +
    'Threshold := %.2f, ' +
    'MinSilenceDuration := %.2f, ' +
    'MinSpeechDuration := %.2f, ' +
    'WindowSize := %d, ' +
    'MaxSpeechDuration := %.2f' +
    ')',
    [Self.Model, Self.Threshold, Self.MinSilenceDuration,
     Self.MinSpeechDuration, Self.WindowSize, Self.MaxSpeechDuration
    ]);
end;

class operator TSherpaOnnxOfflineFunAsrNanoModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineFunAsrNanoModelConfig);
begin
  Dest.MaxNewTokens := 512;
  Dest.Temperature := 1e-6;
  Dest.TopP := 0.8;
  Dest.Seed := 42;
  Dest.UseItn := False;
end;

class operator TSherpaOnnxSileroVadModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSileroVadModelConfig);
begin
  Dest.Threshold := 0.5;
  Dest.MinSilenceDuration := 0.5;
  Dest.MinSpeechDuration := 0.25;
  Dest.WindowSize := 512;
  Dest.MaxSpeechDuration := 5.0;
end;

class operator TSherpaOnnxTenVadModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxTenVadModelConfig);
begin
  Dest.Threshold := 0.5;
  Dest.MinSilenceDuration := 0.5;
  Dest.MinSpeechDuration := 0.25;
  Dest.WindowSize := 256;
  Dest.MaxSpeechDuration := 5.0;
end;

function TSherpaOnnxVadModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxVadModelConfig(' +
    'SileroVad := %s, ' +
    'SampleRate := %d, ' +
    'NumThreads := %d, ' +
    'Provider := %s, ' +
    'Debug := %s, ' +
    'TenVad := %s' +
    ')',
    [Self.SileroVad.ToString, Self.SampleRate, Self.NumThreads, Self.Provider,
     Self.Debug.ToString, Self.TenVad.ToString
    ]);
end;

class operator TSherpaOnnxVadModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxVadModelConfig);
begin
  Dest.SampleRate := 16000;
  Dest.NumThreads := 1;
  Dest.Provider := 'cpu';
  Dest.Debug := False;
end;

class operator TSherpaOnnxFeatureConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFeatureConfig);
begin
  Dest.SampleRate := 16000;
  Dest.FeatureDim := 80;
end;

class operator TSherpaOnnxOnlineCtcFstDecoderConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineCtcFstDecoderConfig);
begin
  Dest.MaxActive := 3000;
end;

class operator TSherpaOnnxOnlineRecognizerConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineRecognizerConfig);
begin
  Dest.DecodingMethod := 'greedy_search';
  Dest.EnableEndpoint := False;
  Dest.Rule1MinTrailingSilence := 2.4;
  Dest.Rule2MinTrailingSilence := 1.2;
  Dest.Rule3MinUtteranceLength := 20;
  Dest.HotwordsScore := 1.5;
  Dest.BlankPenalty := 0;
end;

class operator TSherpaOnnxOnlineModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig);
begin
  Dest.NumThreads := 1;
  Dest.Provider := 'cpu';
  Dest.Debug := False;
end;

class operator TSherpaOnnxOfflineWhisperModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
begin
  Dest.Task := 'transcribe';
  Dest.TailPaddings := -1;
  Dest.EnableTokenTimestamps := False;
  Dest.EnableSegmentTimestamps := False;
end;

class operator TSherpaOnnxOfflineCanaryModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig);
begin
  Dest.SrcLang := 'en';
  Dest.TgtLang := 'en';
  Dest.UsePnc := True;
end;

class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig);
begin
  Dest.Scale := 1.0;
end;

class operator TSherpaOnnxOfflineSenseVoiceModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSenseVoiceModelConfig);
begin
  Dest.UseItn := True;
end;

class operator TSherpaOnnxOfflineModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
begin
  Dest.NumThreads := 1;
  Dest.Debug := False;
  Dest.Provider := 'cpu';
end;

class operator TSherpaOnnxOfflineRecognizerConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineRecognizerConfig);
begin
  Dest.DecodingMethod := 'greedy_search';
  Dest.MaxActivePaths := 4;
  Dest.HotwordsScore := 1.5;
  Dest.BlankPenalty := 0;
end;

constructor TSherpaOnnxCircularBuffer.Create(Capacity: Integer);
begin
  Self.Handle := SherpaOnnxCreateCircularBuffer(Capacity);
end;

destructor TSherpaOnnxCircularBuffer.Destroy;
begin
  SherpaOnnxDestroyCircularBuffer(Self.Handle);
  Self.Handle := nil;
end;

procedure TSherpaOnnxCircularBuffer.Push(Samples: array of Single);
begin
  SherpaOnnxCircularBufferPush(Self.Handle, pcfloat(Samples), Length(Samples));
end;

procedure TSherpaOnnxCircularBuffer.Push(Samples: pcfloat; N: Integer);
begin
  SherpaOnnxCircularBufferPush(Self.Handle, Samples, N);
end;

function TSherpaOnnxCircularBuffer.Get(StartIndex: Integer; N: Integer): TSherpaOnnxSamplesArray;
var
  P: pcfloat;
begin
  Result := nil;

  if N <= 0 then
    Exit;

  P := SherpaOnnxCircularBufferGet(Self.Handle, StartIndex, N);
  if P = nil then
    Exit;

  SetLength(Result, N);

  Move(P[0], Result[0], N * SizeOf(Single));

  SherpaOnnxCircularBufferFree(P);
end;

procedure TSherpaOnnxCircularBuffer.Pop(N: Integer);
begin
  SherpaOnnxCircularBufferPop(Self.Handle, N);
end;

procedure TSherpaOnnxCircularBuffer.Reset;
begin
  SherpaOnnxCircularBufferReset(Self.Handle);
end;

function TSherpaOnnxCircularBuffer.Size: Integer;
begin
  Result := SherpaOnnxCircularBufferSize(Self.Handle);
end;

function TSherpaOnnxCircularBuffer.Head: Integer;
begin
  Result := SherpaOnnxCircularBufferHead(Self.Handle);
end;

constructor TSherpaOnnxVoiceActivityDetector.Create(Config: TSherpaOnnxVadModelConfig; BufferSizeInSeconds: Single);
var
  C: SherpaOnnxVadModelConfig ;
begin
  C := Default(SherpaOnnxVadModelConfig);
  Self._Config := Config;

  C.SileroVad.Model := PAnsiChar(Config.SileroVad.Model);
  C.SileroVad.Threshold := Config.SileroVad.Threshold;
  C.SileroVad.MinSilenceDuration := Config.SileroVad.MinSilenceDuration;
  C.SileroVad.MinSpeechDuration := Config.SileroVad.MinSpeechDuration;
  C.SileroVad.WindowSize := Config.SileroVad.WindowSize;
  C.SileroVad.MaxSpeechDuration := Config.SileroVad.MaxSpeechDuration;

  C.TenVad.Model := PAnsiChar(Config.TenVad.Model);
  C.TenVad.Threshold := Config.TenVad.Threshold;
  C.TenVad.MinSilenceDuration := Config.TenVad.MinSilenceDuration;
  C.TenVad.MinSpeechDuration := Config.TenVad.MinSpeechDuration;
  C.TenVad.WindowSize := Config.TenVad.WindowSize;
  C.TenVad.MaxSpeechDuration := Config.TenVad.MaxSpeechDuration;

  C.SampleRate := Config.SampleRate;
  C.NumThreads := Config.NumThreads;
  C.Provider := PAnsiChar(Config.Provider);
  C.Debug := Ord(Config.Debug);

  Self.Handle := SherpaOnnxCreateVoiceActivityDetector(@C, BufferSizeInSeconds);
end;

destructor TSherpaOnnxVoiceActivityDetector.Destroy;
begin
  SherpaOnnxDestroyVoiceActivityDetector(Self.Handle);
  Self.Handle := nil;
end;

procedure TSherpaOnnxVoiceActivityDetector.AcceptWaveform(const Samples: array of Single);
begin
  SherpaOnnxVoiceActivityDetectorAcceptWaveform(Self.Handle, pcfloat(Samples), Length(Samples));
end;

procedure TSherpaOnnxVoiceActivityDetector.AcceptWaveform(const Samples: array of Single; Offset: Integer; N: Integer);
begin
  if Offset + N > Length(Samples) then
    begin
      WriteLn(Format('Invalid arguments!. Array length: %d, Offset: %d, N: %d',
        [Length(Samples), Offset, N]
      ));
      Exit;
    end;

  SherpaOnnxVoiceActivityDetectorAcceptWaveform(Self.Handle,
    pcfloat(Samples) + Offset, N);
end;

function TSherpaOnnxVoiceActivityDetector.IsEmpty: Boolean;
begin
  Result := SherpaOnnxVoiceActivityDetectorEmpty(Self.Handle) = 1;
end;

function TSherpaOnnxVoiceActivityDetector.IsDetected: Boolean;
begin
  Result := SherpaOnnxVoiceActivityDetectorDetected(Self.Handle) = 1;
end;

procedure TSherpaOnnxVoiceActivityDetector.Pop;
begin
  SherpaOnnxVoiceActivityDetectorPop(Self.Handle);
end;

procedure TSherpaOnnxVoiceActivityDetector.Clear;
begin
  SherpaOnnxVoiceActivityDetectorClear(Self.Handle);
end;

function TSherpaOnnxVoiceActivityDetector.Front: TSherpaOnnxSpeechSegment;
var
  P: PSherpaOnnxSpeechSegment;
begin
  Result := Default(TSherpaOnnxSpeechSegment);

  P := SherpaOnnxVoiceActivityDetectorFront(Self.Handle);
  if P = nil then
    Exit;

  Result.Start := P^.Start;
  Result.Samples := nil;
  SetLength(Result.Samples, P^.N);

  if P^.N > 0 then
    Move(P^.Samples[0], Result.Samples[0], P^.N * SizeOf(Single));

  SherpaOnnxDestroySpeechSegment(P);
end;

procedure TSherpaOnnxVoiceActivityDetector.Reset;
begin
  SherpaOnnxVoiceActivityDetectorReset(Self.Handle);
end;

procedure TSherpaOnnxVoiceActivityDetector.Flush;
begin
  SherpaOnnxVoiceActivityDetectorFlush(Self.Handle);
end;

function TSherpaOnnxOfflineTtsVitsModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsVitsModelConfig(' +
    'Model := %s, ' +
    'Lexicon := %s, ' +
    'Tokens := %s, ' +
    'DataDir := %s, ' +
    'NoiseScale := %.2f, ' +
    'NoiseScaleW := %.2f, ' +
    'LengthScale := %.2f' +
    ')',
    [Self.Model, Self.Lexicon, Self.Tokens, Self.DataDir, Self.NoiseScale,
     Self.NoiseScaleW, Self.LengthScale
    ]);
end;

class operator TSherpaOnnxOfflineTtsVitsModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig);
begin
  Dest.NoiseScale := 0.667;
  Dest.NoiseScaleW := 0.8;
  Dest.LengthScale := 1.0;
end;

class operator TSherpaOnnxGenerationConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxGenerationConfig);
begin
  Dest.SilenceScale := 0.2;
  Dest.Speed := 1.0;
  Dest.Sid := 0;
  Dest.ReferenceAudioLen := 0;
  Dest.ReferenceSampleRate := 0;
  Dest.NumSteps := 5;
end;

function TSherpaOnnxOfflineTtsMatchaModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsMatchaModelConfig(' +
    'AcousticModel := %s, ' +
    'Vocoder := %s, ' +
    'Lexicon := %s, ' +
    'Tokens := %s, ' +
    'DataDir := %s, ' +
    'NoiseScale := %.2f, ' +
    'LengthScale := %.2f' +
    ')',
    [Self.AcousticModel, Self.Vocoder, Self.Lexicon, Self.Tokens,
     Self.DataDir, Self.NoiseScale, Self.LengthScale
    ]);
end;

class operator TSherpaOnnxOfflineTtsMatchaModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig);
begin
  Dest.NoiseScale := 0.667;
  Dest.LengthScale := 1.0;
end;

function TSherpaOnnxOfflineTtsKokoroModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsKokoroModelConfig(' +
    'Model := %s, ' +
    'Voices := %s, ' +
    'Tokens := %s, ' +
    'DataDir := %s, ' +
    'LengthScale := %.2f, ' +
    'Lexicon := %s, ' +
    'Lang := %s' +
    ')',
    [Self.Model, Self.Voices, Self.Tokens, Self.DataDir, Self.LengthScale,
     Self.Lexicon, Self.Lang]);
end;

class operator TSherpaOnnxOfflineTtsKokoroModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKokoroModelConfig);
begin
  Dest.LengthScale := 1.0;
end;

function TSherpaOnnxOfflineTtsKittenModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsKittenModelConfig(' +
    'Model := %s, ' +
    'Voices := %s, ' +
    'Tokens := %s, ' +
    'DataDir := %s, ' +
    'LengthScale := %.2f' +
    ')',
    [Self.Model, Self.Voices, Self.Tokens, Self.DataDir, Self.LengthScale]);
end;

class operator TSherpaOnnxOfflineTtsKittenModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKittenModelConfig);
begin
  Dest.LengthScale := 1.0;
end;

function TSherpaOnnxOfflineTtsZipVoiceModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsZipVoiceModelConfig(' +
    'Tokens := %s, ' +
    'Encoder := %s, ' +
    'Decoder := %s, ' +
    'Vocoder := %s, ' +
    'DataDir := %s, ' +
    'Lexicon := %s, ' +
    'FeatScale := %.2f, ' +
    'Tshift := %.2f, ' +
    'TargetRms := %.2f, ' +
    'GuidanceScale := %.2f' +
    ')',
    [Self.Tokens, Self.Encoder, Self.Decoder, Self.Vocoder,
     Self.DataDir, Self.Lexicon, Self.FeatScale, Self.Tshift,
     Self.TargetRms, Self.GuidanceScale]);
end;

class operator TSherpaOnnxOfflineTtsZipVoiceModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsZipVoiceModelConfig);
begin
  Dest.FeatScale := 0.1;
  Dest.Tshift := 0.5;
  Dest.TargetRms := 0.1;
  Dest.GuidanceScale := 1.0;
end;

class operator TSherpaOnnxOfflineTtsPocketModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsPocketModelConfig);
begin
  Dest.VoiceEmbeddingCacheCapacity := 50;
end;

function TSherpaOnnxOfflineTtsPocketModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsPocketModelConfig(' +
    'LmFlow := %s, ' +
    'LmMain := %s, ' +
    'Encoder := %s, ' +
    'Decoder := %s, ' +
    'TextConditioner := %s, ' +
    'VocabJson := %s, ' +
    'TokenScoresJson := %s, ' +
    'VoiceEmbeddingCacheCapacity := %d' +
    ')',
    [Self.LmFlow, Self.LmMain, Self.Encoder, Self.Decoder, Self.TextConditioner,
     Self.VocabJson, Self.TokenScoresJson, Self.VoiceEmbeddingCacheCapacity]);
end;

function TSherpaOnnxOfflineTtsSupertonicModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsSupertonicModelConfig(' +
    'DurationPredictor := %s, ' +
    'TextEncoder := %s, ' +
    'VectorEstimator := %s, ' +
    'Vocoder := %s, ' +
    'TtsJson := %s, ' +
    'UnicodeIndexer := %s, ' +
    'VoiceStyle := %s' +
    ')',
    [Self.DurationPredictor, Self.TextEncoder, Self.VectorEstimator, Self.Vocoder,
     Self.TtsJson, Self.UnicodeIndexer, Self.VoiceStyle]);
end;

function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsModelConfig(' +
    'Vits := %s, ' +
    'NumThreads := %d, ' +
    'Debug := %s, ' +
    'Provider := %s, ' +
    'Matcha := %s, ' +
    'Kokoro := %s, ' +
    'Kitten := %s, ' +
    'ZipVoice := %s, ' +
    'Pocket := %s, ' +
    'Supertonic := %s' +
    ')',
    [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider,
     Self.Matcha.ToString, Self.Kokoro.ToString, Self.Kitten.ToString,
     Self.ZipVoice.ToString, Self.Pocket.ToString, Self.Supertonic.ToString
    ]);
end;

class operator TSherpaOnnxOfflineTtsModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig);
begin
  Dest.NumThreads := 1;
  Dest.Debug := False;
  Dest.Provider := 'cpu';
end;

function TSherpaOnnxOfflineTtsConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineTtsConfig(' +
    'Model := %s, ' +
    'RuleFsts := %s, ' +
    'MaxNumSentences := %d, ' +
    'RuleFars := %s, ' +
    'SilenceScale := %f' +
    ')',
    [Self.Model.ToString, Self.RuleFsts, Self.MaxNumSentences, Self.RuleFars,
     Self.SilenceScale]);
end;

class operator TSherpaOnnxOfflineTtsConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsConfig);
begin
  Dest.MaxNumSentences := 1;
  Dest.SilenceScale := 0.2;
end;

constructor TSherpaOnnxOfflineTts.Create(Config: TSherpaOnnxOfflineTtsConfig);
var
  C: SherpaOnnxOfflineTtsConfig;
begin
  C := Default(SherpaOnnxOfflineTtsConfig);
  Self._Config := Config;

  C.Model.Vits.Model := PAnsiChar(Config.Model.Vits.Model);
  C.Model.Vits.Lexicon := PAnsiChar(Config.Model.Vits.Lexicon);
  C.Model.Vits.Tokens := PAnsiChar(Config.Model.Vits.Tokens);
  C.Model.Vits.DataDir := PAnsiChar(Config.Model.Vits.DataDir);
  C.Model.Vits.NoiseScale := Config.Model.Vits.NoiseScale;
  C.Model.Vits.NoiseScaleW := Config.Model.Vits.NoiseScaleW;
  C.Model.Vits.LengthScale := Config.Model.Vits.LengthScale;

  C.Model.Matcha.AcousticModel := PAnsiChar(Config.Model.Matcha.AcousticModel);
  C.Model.Matcha.Vocoder := PAnsiChar(Config.Model.Matcha.Vocoder);
  C.Model.Matcha.Lexicon := PAnsiChar(Config.Model.Matcha.Lexicon);
  C.Model.Matcha.Tokens := PAnsiChar(Config.Model.Matcha.Tokens);
  C.Model.Matcha.DataDir := PAnsiChar(Config.Model.Matcha.DataDir);
  C.Model.Matcha.NoiseScale := Config.Model.Matcha.NoiseScale;
  C.Model.Matcha.LengthScale := Config.Model.Matcha.LengthScale;

  C.Model.Kokoro.Model := PAnsiChar(Config.Model.Kokoro.Model);
  C.Model.Kokoro.Voices := PAnsiChar(Config.Model.Kokoro.Voices);
  C.Model.Kokoro.Tokens := PAnsiChar(Config.Model.Kokoro.Tokens);
  C.Model.Kokoro.DataDir := PAnsiChar(Config.Model.Kokoro.DataDir);
  C.Model.Kokoro.LengthScale := Config.Model.Kokoro.LengthScale;
  C.Model.Kokoro.Lexicon := PAnsiChar(Config.Model.Kokoro.Lexicon);
  C.Model.Kokoro.Lang := PAnsiChar(Config.Model.Kokoro.Lang);

  C.Model.Kitten.Model := PAnsiChar(Config.Model.Kitten.Model);
  C.Model.Kitten.Voices := PAnsiChar(Config.Model.Kitten.Voices);
  C.Model.Kitten.Tokens := PAnsiChar(Config.Model.Kitten.Tokens);
  C.Model.Kitten.DataDir := PAnsiChar(Config.Model.Kitten.DataDir);
  C.Model.Kitten.LengthScale := Config.Model.Kitten.LengthScale;

  C.Model.ZipVoice.Tokens := PAnsiChar(Config.Model.ZipVoice.Tokens);
  C.Model.ZipVoice.Encoder := PAnsiChar(Config.Model.ZipVoice.Encoder);
  C.Model.ZipVoice.Decoder := PAnsiChar(Config.Model.ZipVoice.Decoder);
  C.Model.ZipVoice.Vocoder := PAnsiChar(Config.Model.ZipVoice.Vocoder);
  C.Model.ZipVoice.DataDir := PAnsiChar(Config.Model.ZipVoice.DataDir);
  C.Model.ZipVoice.Lexicon := PAnsiChar(Config.Model.ZipVoice.Lexicon);
  C.Model.ZipVoice.FeatScale := Config.Model.ZipVoice.FeatScale;
  C.Model.ZipVoice.Tshift := Config.Model.ZipVoice.Tshift;
  C.Model.ZipVoice.TargetRms := Config.Model.ZipVoice.TargetRms;
  C.Model.ZipVoice.GuidanceScale := Config.Model.ZipVoice.GuidanceScale;

  C.Model.Pocket.LmFlow := PAnsiChar(Config.Model.Pocket.LmFlow);
  C.Model.Pocket.LmMain := PAnsiChar(Config.Model.Pocket.LmMain);
  C.Model.Pocket.Encoder := PAnsiChar(Config.Model.Pocket.Encoder);
  C.Model.Pocket.Decoder := PAnsiChar(Config.Model.Pocket.Decoder);
  C.Model.Pocket.TextConditioner := PAnsiChar(Config.Model.Pocket.TextConditioner);
  C.Model.Pocket.VocabJson := PAnsiChar(Config.Model.Pocket.VocabJson);
  C.Model.Pocket.TokenScoresJson := PAnsiChar(Config.Model.Pocket.TokenScoresJson);
  C.Model.Pocket.VoiceEmbeddingCacheCapacity := Config.Model.Pocket.VoiceEmbeddingCacheCapacity;

  C.Model.Supertonic.DurationPredictor := PAnsiChar(Config.Model.Supertonic.DurationPredictor);
  C.Model.Supertonic.TextEncoder := PAnsiChar(Config.Model.Supertonic.TextEncoder);
  C.Model.Supertonic.VectorEstimator := PAnsiChar(Config.Model.Supertonic.VectorEstimator);
  C.Model.Supertonic.Vocoder := PAnsiChar(Config.Model.Supertonic.Vocoder);
  C.Model.Supertonic.TtsJson := PAnsiChar(Config.Model.Supertonic.TtsJson);
  C.Model.Supertonic.UnicodeIndexer := PAnsiChar(Config.Model.Supertonic.UnicodeIndexer);
  C.Model.Supertonic.VoiceStyle := PAnsiChar(Config.Model.Supertonic.VoiceStyle);

  C.Model.NumThreads := Config.Model.NumThreads;
  C.Model.Provider := PAnsiChar(Config.Model.Provider);
  C.Model.Debug := Ord(Config.Model.Debug);

  C.RuleFsts := PAnsiChar(Config.RuleFsts);
  C.MaxNumSentences := Config.MaxNumSentences;
  C.RuleFars := PAnsiChar(Config.RuleFars);
  C.SilenceScale := Config.SilenceScale;

  Self.Handle := SherpaOnnxCreateOfflineTts(@C);

  Self.SampleRate := SherpaOnnxOfflineTtsSampleRate(Self.Handle);
  Self.NumSpeakers := SherpaOnnxOfflineTtsNumSpeakers(Self.Handle);
end;

destructor TSherpaOnnxOfflineTts.Destroy;
begin
  SherpaOnnxDestroyOfflineTts(Self.Handle);
  Self.Handle := nil;
end;

function ExtractGeneratedAudio(Audio: PSherpaOnnxGeneratedAudio): TSherpaOnnxGeneratedAudio;
begin
  Result := Default(TSherpaOnnxGeneratedAudio);

  if Audio = nil then
    Exit;

  SetLength(Result.Samples, Audio^.N);
  Result.SampleRate := Audio^.SampleRate;

  if Audio^.N > 0 then
    Move(Audio^.Samples[0], Result.Samples[0], Audio^.N * SizeOf(Single));

  SherpaOnnxDestroyOfflineTtsGeneratedAudio(Audio);
end;

function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer;
  Speed: Single): TSherpaOnnxGeneratedAudio;
var
  Audio: PSherpaOnnxGeneratedAudio;
begin
  Audio := SherpaOnnxOfflineTtsGenerate(Self.Handle, PAnsiChar(Text), SpeakerId, Speed);
  Result := ExtractGeneratedAudio(Audio);
end;

function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer;
  Speed: Single;
  Callback: TSherpaOnnxGeneratedAudioCallbackWithArg;
  Arg: Pointer
  ): TSherpaOnnxGeneratedAudio;
var
  Audio: PSherpaOnnxGeneratedAudio;
begin
  Audio := SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(Self.Handle, PAnsiChar(Text),
    SpeakerId, Speed, Callback, Arg);
  Result := ExtractGeneratedAudio(Audio);
end;

function TSherpaOnnxOfflineTts.Generate(Text: AnsiString;
  GenerationConfig: TSherpaOnnxGenerationConfig;
  Callback: TSherpaOnnxGeneratedAudioProgressCallbackWithArg;
  Arg: Pointer
  ): TSherpaOnnxGeneratedAudio;
var
  Audio: PSherpaOnnxGeneratedAudio;
  C: SherpaOnnxGenerationConfig;
  ReferenceAudio: TSherpaOnnxSamplesArray;
  CReferenceAudio: pcfloat;
  ReferenceText: AnsiString;
  Extra: AnsiString;
begin
  C := Default(SherpaOnnxGenerationConfig);
  C.SilenceScale := GenerationConfig.SilenceScale;
  C.Speed := GenerationConfig.Speed;
  C.Sid := GenerationConfig.Sid;
  ReferenceAudio := GenerationConfig.ReferenceAudio;
  CReferenceAudio := nil;
  C.ReferenceAudio := nil;
  C.ReferenceAudioLen := Length(ReferenceAudio);
  if C.ReferenceAudioLen > 0 then
    begin
      GetMem(CReferenceAudio, C.ReferenceAudioLen * SizeOf(Single));
      Move(ReferenceAudio[0], CReferenceAudio[0], C.ReferenceAudioLen * SizeOf(Single));
      C.ReferenceAudio := CReferenceAudio;
    end;
  C.ReferenceSampleRate:= GenerationConfig.ReferenceSampleRate;
  ReferenceText := GenerationConfig.ReferenceText;
  C.ReferenceText := PAnsiChar(ReferenceText);
  C.NumSteps := GenerationConfig.NumSteps;
  Extra := GenerationConfig.Extra;
  C.Extra := PAnsiChar(Extra);

  Audio := nil;
  try
    Audio := SherpaOnnxOfflineTtsGenerateWithConfig(Self.Handle, PAnsiChar(Text),
      @C, Callback, Arg);
  finally
    if CReferenceAudio <> nil then
      FreeMem(CReferenceAudio);
  end;

  Result := ExtractGeneratedAudio(Audio);
end;

constructor TSherpaOnnxLinearResampler.Create(SampleRateIn: Integer; SampleRateOut: Integer);
var
  MinFreq: Single;
  LowpassCutoff: Single;
  LowpassFilterWidth: Integer = 6;
begin
  if SampleRateIn > SampleRateOut then
    MinFreq := SampleRateOut
  else
    MinFreq := SampleRateIn;

  LowpassCutoff := 0.99 * 0.5 * MinFreq;

  Self.Handle := SherpaOnnxCreateLinearResampler(SampleRateIn,
    SampleRateOut, LowpassCutoff, LowpassFilterWidth);
  Self.InputSampleRate := SampleRateIn;
  Self.OutputSampleRate := SampleRateOut;
end;

destructor TSherpaOnnxLinearResampler.Destroy;
begin
  SherpaOnnxDestroyLinearResampler(Self.Handle);
  Self.Handle := nil;
end;

function TSherpaOnnxLinearResampler.Resample(Samples: pcfloat;
  N: Integer; Flush: Boolean): TSherpaOnnxSamplesArray;
var
  P: PSherpaOnnxResampleOut;
begin
  Result := Default(TSherpaOnnxSamplesArray);
  P := SherpaOnnxLinearResamplerResample(Self.Handle, Samples, N, Ord(Flush));
  if P = nil then
    Exit;

  SetLength(Result, P^.N);

  if P^.N > 0 then
    Move(P^.Samples[0], Result[0], P^.N * SizeOf(Single));

  SherpaOnnxLinearResamplerResampleFree(P);
end;

function TSherpaOnnxLinearResampler.Resample(const Samples: array of Single; Flush: Boolean): TSherpaOnnxSamplesArray;
begin
  Result := Self.Resample(pcfloat(Samples), Length(Samples), Flush);
end;

procedure TSherpaOnnxLinearResampler.Reset;
begin
  SherpaOnnxLinearResamplerReset(Self.Handle);
end;

function TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(' +
    'Model := %s)',[Self.Model]);
end;

function TSherpaOnnxOfflineSpeakerSegmentationModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(' +
    'Pyannote := %s, ' +
    'NumThreads := %d, ' +
    'Debug := %s, ' +
    'Provider := %s)',
    [Self.Pyannote.ToString, Self.NumThreads,
     Self.Debug.ToString, Self.Provider]);
end;

class operator TSherpaOnnxOfflineSpeakerSegmentationModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerSegmentationModelConfig);
begin
  Dest.NumThreads := 1;
  Dest.Debug := False;
  Dest.Provider := 'cpu';
end;

function TSherpaOnnxFastClusteringConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxFastClusteringConfig(' +
    'NumClusters := %d, Threshold := %.3f)',
    [Self.NumClusters, Self.Threshold]);
end;

class operator TSherpaOnnxFastClusteringConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFastClusteringConfig);
begin
  Dest.NumClusters := -1;
  Dest.Threshold := 0.5;
end;

function TSherpaOnnxSpeakerEmbeddingExtractorConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxSpeakerEmbeddingExtractorConfig(' +
    'Model := %s, '+
    'NumThreads := %d, '+
    'Debug := %s, '+
    'Provider := %s)',
    [Self.Model, Self.NumThreads, Self.Debug.ToString, Self.Provider]);
end;

class operator TSherpaOnnxSpeakerEmbeddingExtractorConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSpeakerEmbeddingExtractorConfig);
begin
  Dest.NumThreads := 1;
  Dest.Debug := False;
  Dest.Provider := 'cpu';
end;

function TSherpaOnnxOfflineSpeakerDiarizationConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSpeakerDiarizationConfig(' +
    'Segmentation := %s, '+
    'Embedding := %s, '+
    'Clustering := %s, '+
    'MinDurationOn := %.3f, '+
    'MinDurationOff := %.3f)',
    [Self.Segmentation.ToString, Self.Embedding.ToString,
     Self.Clustering.ToString, Self.MinDurationOn, Self.MinDurationOff]);
end;

class operator TSherpaOnnxOfflineSpeakerDiarizationConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerDiarizationConfig);
begin
  Dest.MinDurationOn := 0.2;
  Dest.MinDurationOff := 0.5;
end;

function TSherpaOnnxOfflineSpeakerDiarizationSegment.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSpeakerDiarizationSegment(' +
    'Start := %.3f, '+
    'Stop := %.3f, '+
    'Speaker := %d)',
    [Self.Start, Self.Stop, Self.Speaker]);
end;

constructor TSherpaOnnxOfflineSpeakerDiarization.Create(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig);
var
  C: SherpaOnnxOfflineSpeakerDiarizationConfig;
begin
  C := Default(SherpaOnnxOfflineSpeakerDiarizationConfig);
  C.Segmentation.Pyannote.Model := PAnsiChar(Config.Segmentation.Pyannote.Model);
  C.Segmentation.NumThreads := Config.Segmentation.NumThreads;
  C.Segmentation.Debug := Ord(Config.Segmentation.Debug);
  C.Segmentation.Provider := PAnsiChar(Config.Segmentation.Provider);

  C.Embedding.Model := PAnsiChar(Config.Embedding.Model);
  C.Embedding.NumThreads := Config.Embedding.NumThreads;
  C.Embedding.Debug := Ord(Config.Embedding.Debug);
  C.Embedding.Provider := PAnsiChar(Config.Embedding.Provider);

  C.Clustering.NumClusters := Config.Clustering.NumClusters;
  C.Clustering.Threshold := Config.Clustering.Threshold;

  C.MinDurationOn := Config.MinDurationOn;
  C.MinDurationOff := Config.MinDurationOff;

  Self.Handle := SherpaOnnxCreateOfflineSpeakerDiarization(@C);
  Self._Config := Config;
  Self.SampleRate :=  0;

  if Self.Handle <> nil then
    begin
      Self.SampleRate := SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(Self.Handle);
    end;
end;

destructor TSherpaOnnxOfflineSpeakerDiarization.Destroy;
begin
  SherpaOnnxDestroyOfflineSpeakerDiarization(Self.Handle);
  Self.Handle := nil;
end;

procedure TSherpaOnnxOfflineSpeakerDiarization.SetConfig(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig);
var
  C: SherpaOnnxOfflineSpeakerDiarizationConfig;
begin
  C := Default(SherpaOnnxOfflineSpeakerDiarizationConfig);

  C.Clustering.NumClusters := Config.Clustering.NumClusters;
  C.Clustering.Threshold := Config.Clustering.Threshold;

  SherpaOnnxOfflineSpeakerDiarizationSetConfig(Self.Handle, @C);
end;

function TSherpaOnnxOfflineSpeakerDiarization.Process(const Samples: array of Single): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray;
var
  R: Pointer;
  NumSegments: Integer;
  I: Integer;
  Segments: PSherpaOnnxOfflineSpeakerDiarizationSegment;
begin
  Result := nil;

  R := SherpaOnnxOfflineSpeakerDiarizationProcess(Self.Handle, pcfloat(Samples), Length(Samples));
  if R = nil then
    begin
      Exit
    end;
  NumSegments := SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(R);

  Segments := SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(R);

  SetLength(Result, NumSegments);
  for I := Low(Result) to High(Result) do
    begin
      Result[I].Start := Segments[I].Start;
      Result[I].Stop := Segments[I].Stop;
      Result[I].Speaker := Segments[I].Speaker;
    end;

  SherpaOnnxOfflineSpeakerDiarizationDestroySegment(Segments);
  SherpaOnnxOfflineSpeakerDiarizationDestroyResult(R);
end;

function TSherpaOnnxOfflineSpeakerDiarization.Process(const Samples: array of Single;
  callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray;
var
  R: Pointer;
  NumSegments: Integer;
  I: Integer;
  Segments: PSherpaOnnxOfflineSpeakerDiarizationSegment;
begin
  Result := nil;

  R := SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(Self.Handle, pcfloat(Samples), Length(Samples), callback);
  if R = nil then
    begin
      Exit
    end;
  NumSegments := SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(R);

  Segments := SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(R);

  SetLength(Result, NumSegments);
  for I := Low(Result) to High(Result) do
    begin
      Result[I].Start := Segments[I].Start;
      Result[I].Stop := Segments[I].Stop;
      Result[I].Speaker := Segments[I].Speaker;
    end;

  SherpaOnnxOfflineSpeakerDiarizationDestroySegment(Segments);
  SherpaOnnxOfflineSpeakerDiarizationDestroyResult(R);
end;

function TSherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(' +
    'Model := %s)', [Self.Model]);
end;

function TSherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig(' +
    'Model := %s)', [Self.Model]);
end;

function TSherpaOnnxOfflineSpeechDenoiserModelConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSpeechDenoiserModelConfig(' +
    'Gtcrn := %s, '+
    'DpdfNet := %s, '+
    'NumThreads := %d, '+
    'Debug := %s, '+
    'Provider := %s)',
    [Self.Gtcrn.ToString, Self.DpdfNet.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider]);
end;

class operator TSherpaOnnxOfflineSpeechDenoiserModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeechDenoiserModelConfig);
begin
  Dest.NumThreads := 1;
  Dest.Debug := False;
  Dest.Provider := 'cpu';
end;

function TSherpaOnnxOfflineSpeechDenoiserConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOfflineSpeechDenoiserConfig(' +
    'Model := %s)', [Self.Model.ToString]);
end;

function TSherpaOnnxOnlineSpeechDenoiserConfig.ToString: AnsiString;
begin
  Result := Format('TSherpaOnnxOnlineSpeechDenoiserConfig(' +
    'Model := %s)', [Self.Model.ToString]);
end;

function ExtractDenoisedAudio(Audio: PSherpaOnnxDenoisedAudio): TSherpaOnnxDenoisedAudio;
begin
  Result := Default(TSherpaOnnxDenoisedAudio);

  if Audio = nil then
    Exit;

  SetLength(Result.Samples, Audio^.N);
  Result.SampleRate := Audio^.SampleRate;

  if Audio^.N > 0 then
    Move(Audio^.Samples[0], Result.Samples[0], Audio^.N * SizeOf(Single));

  SherpaOnnxDestroyDenoisedAudio(Audio);
end;

constructor TSherpaOnnxOfflineSpeechDenoiser.Create(Config: TSherpaOnnxOfflineSpeechDenoiserConfig);
var
  C: SherpaOnnxOfflineSpeechDenoiserConfig;
begin
  C := Default(SherpaOnnxOfflineSpeechDenoiserConfig);
  C.Model.Gtcrn.Model := PAnsiChar(Config.Model.Gtcrn.Model);
  C.Model.DpdfNet.Model := PAnsiChar(Config.Model.DpdfNet.Model);
  C.Model.NumThreads := Config.Model.NumThreads;
  C.Model.Debug := Ord(Config.Model.Debug);
  C.Model.Provider := PAnsiChar(Config.Model.Provider);

  Self.Handle := SherpaOnnxCreateOfflineSpeechDenoiser(@C);
  Self._Config := Config;
  Self.SampleRate :=  0;

  if Self.Handle <> nil then
    begin
      Self.SampleRate := SherpaOnnxOfflineSpeechDenoiserGetSampleRate(Self.Handle);
    end;
end;

destructor TSherpaOnnxOfflineSpeechDenoiser.Destroy;
begin
  SherpaOnnxDestroyOfflineSpeechDenoiser(Self.Handle);
  Self.Handle := nil;
end;

function TSherpaOnnxOfflineSpeechDenoiser.Run(const Samples: array of Single; InputSampleRate: Integer): TSherpaOnnxDenoisedAudio;
var
  Audio: PSherpaOnnxDenoisedAudio;
begin
  Audio := SherpaOnnxOfflineSpeechDenoiserRun(Self.Handle, pcfloat(Samples), Length(Samples), InputSampleRate);
  Result := ExtractDenoisedAudio(Audio);
end;

constructor TSherpaOnnxOnlineSpeechDenoiser.Create(Config: TSherpaOnnxOnlineSpeechDenoiserConfig);
var
  C: SherpaOnnxOnlineSpeechDenoiserConfig;
begin
  C := Default(SherpaOnnxOnlineSpeechDenoiserConfig);
  C.Model.Gtcrn.Model := PAnsiChar(Config.Model.Gtcrn.Model);
  C.Model.DpdfNet.Model := PAnsiChar(Config.Model.DpdfNet.Model);
  C.Model.NumThreads := Config.Model.NumThreads;
  C.Model.Debug := Ord(Config.Model.Debug);
  C.Model.Provider := PAnsiChar(Config.Model.Provider);

  Self.Handle := SherpaOnnxCreateOnlineSpeechDenoiser(@C);
  Self._Config := Config;
  Self.SampleRate := 0;
  Self.FrameShiftInSamples := 0;

  if Self.Handle <> nil then
    begin
      Self.SampleRate := SherpaOnnxOnlineSpeechDenoiserGetSampleRate(Self.Handle);
      Self.FrameShiftInSamples := SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(Self.Handle);
    end;
end;

destructor TSherpaOnnxOnlineSpeechDenoiser.Destroy;
begin
  SherpaOnnxDestroyOnlineSpeechDenoiser(Self.Handle);
  Self.Handle := nil;
end;

function TSherpaOnnxOnlineSpeechDenoiser.Run(const Samples: array of Single; InputSampleRate: Integer): TSherpaOnnxDenoisedAudio;
var
  Audio: PSherpaOnnxDenoisedAudio;
begin
  Audio := SherpaOnnxOnlineSpeechDenoiserRun(Self.Handle, pcfloat(Samples), Length(Samples), InputSampleRate);
  Result := ExtractDenoisedAudio(Audio);
end;

function TSherpaOnnxOnlineSpeechDenoiser.Flush: TSherpaOnnxDenoisedAudio;
var
  Audio: PSherpaOnnxDenoisedAudio;
begin
  Audio := SherpaOnnxOnlineSpeechDenoiserFlush(Self.Handle);
  Result := ExtractDenoisedAudio(Audio);
end;

procedure TSherpaOnnxOnlineSpeechDenoiser.Reset;
begin
  SherpaOnnxOnlineSpeechDenoiserReset(Self.Handle);
end;

initialization
  { Match the C API's default behavior. PocketTTS can raise FP overflow flags
    during native inference on some platforms, and Free Pascal would otherwise
    surface them as EOverflow.
    See also https://github.com/k2-fsa/sherpa-onnx/pull/3351
  }
  SetExceptionMask([exInvalidOp, exDenormalized, exZeroDivide, exOverflow,
    exUnderflow, exPrecision]);

end.


================================================
FILE: sherpa-onnx/python/CMakeLists.txt
================================================
add_subdirectory(csrc)

if(SHERPA_ONNX_ENABLE_TESTS)
  add_subdirectory(tests)
endif()


================================================
FILE: sherpa-onnx/python/csrc/CMakeLists.txt
================================================
include_directories(${PROJECT_SOURCE_DIR})

set(srcs
  audio-tagging.cc
  circular-buffer.cc
  cuda-config.cc
  display.cc
  endpoint.cc
  features.cc
  homophone-replacer.cc
  keyword-spotter.cc
  offline-canary-model-config.cc
  offline-ctc-fst-decoder-config.cc
  offline-dolphin-model-config.cc
  offline-fire-red-asr-ctc-model-config.cc
  offline-fire-red-asr-model-config.cc
  offline-funasr-nano-model-config.cc
  offline-lm-config.cc
  offline-medasr-ctc-model-config.cc
  offline-model-config.cc
  offline-moonshine-model-config.cc
  offline-nemo-enc-dec-ctc-model-config.cc
  offline-omnilingual-asr-ctc-model-config.cc
  offline-paraformer-model-config.cc
  offline-punctuation.cc
  offline-recognizer.cc
  offline-sense-voice-model-config.cc
  offline-source-separation-model-config.cc
  offline-source-separation-spleeter-model-config.cc
  offline-source-separation-uvr-model-config.cc
  offline-source-separation.cc
  offline-speech-denoiser-dpdfnet-model-config.cc
  offline-speech-denoiser-gtcrn-model-config.cc
  offline-speech-denoiser-model-config.cc
  offline-speech-denoiser.cc
  offline-stream.cc
  offline-tdnn-model-config.cc
  offline-transducer-model-config.cc
  offline-wenet-ctc-model-config.cc
  offline-whisper-model-config.cc
  offline-zipformer-ctc-model-config.cc
  online-ctc-fst-decoder-config.cc
  online-lm-config.cc
  online-model-config.cc
  online-nemo-ctc-model-config.cc
  online-paraformer-model-config.cc
  online-punctuation.cc
  online-recognizer.cc
  online-speech-denoiser.cc
  online-stream.cc
  online-t-one-ctc-model-config.cc
  online-transducer-model-config.cc
  online-wenet-ctc-model-config.cc
  online-zipformer2-ctc-model-config.cc
  provider-config.cc
  sherpa-onnx.cc
  silero-vad-model-config.cc
  speaker-embedding-extractor.cc
  speaker-embedding-manager.cc
  spoken-language-identification.cc
  ten-vad-model-config.cc
  tensorrt-config.cc
  vad-model-config.cc
  vad-model.cc
  version.cc
  voice-activity-detector.cc
  wave-writer.cc
)
if(SHERPA_ONNX_HAS_ALSA)
  list(APPEND srcs ${PROJECT_SOURCE_DIR}/sherpa-onnx/csrc/alsa.cc alsa.cc)
else()
  list(APPEND srcs faked-alsa.cc)
endif()

if(SHERPA_ONNX_ENABLE_TTS)
  list(APPEND srcs
    offline-tts-kitten-model-config.cc
    offline-tts-kokoro-model-config.cc
    offline-tts-matcha-model-config.cc
    offline-tts-model-config.cc
    offline-tts-pocket-model-config.cc
    offline-tts-supertonic-model-config.cc
    offline-tts-vits-model-config.cc
    offline-tts-zipvoice-model-config.cc
    offline-tts.cc
    sentence-piece-tokenizer.cc
  )
endif()

if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
  list(APPEND srcs
    fast-clustering.cc
    offline-speaker-diarization-result.cc
    offline-speaker-diarization.cc
  )
endif()

pybind11_add_module(_sherpa_onnx ${srcs})

if(APPLE)
  execute_process(
    COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
    OUTPUT_STRIP_TRAILING_WHITESPACE
    OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
  )
  message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
  if(PYTHON_SITE_PACKAGE_DIR STREQUAL "")
    message(WARNING "PYTHON_SITE_PACKAGE_DIR is empty!")
  else()
    target_link_libraries(_sherpa_onnx PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
  endif()
endif()

if(NOT WIN32)
  target_link_libraries(_sherpa_onnx PRIVATE "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/sherpa_onnx/lib")
endif()

target_link_libraries(_sherpa_onnx PRIVATE sherpa-onnx-core)

if(SHERPA_ONNX_HAS_ALSA)
  if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
    target_link_libraries(_sherpa_onnx PRIVATE -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
  else()
    target_link_libraries(_sherpa_onnx PRIVATE asound)
  endif()
endif()

install(TARGETS _sherpa_onnx DESTINATION lib)


================================================
FILE: sherpa-onnx/python/csrc/alsa.cc
================================================
// sherpa-onnx/python/csrc/alsa.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/alsa.h"

#include <vector>

#include "sherpa-onnx/csrc/alsa.h"

namespace sherpa_onnx {

void PybindAlsa(py::module *m) {
  using PyClass = Alsa;
  py::class_<PyClass>(*m, "Alsa")
      .def(py::init<const char *>(), py::arg("device_name"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "read",
          [](PyClass &self, int32_t num_samples) -> std::vector<float> {
            return self.Read(num_samples);
          },
          py::arg("num_samples"), py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("expected_sample_rate",
                             &PyClass::GetExpectedSampleRate)
      .def_property_readonly("actual_sample_rate",
                             &PyClass::GetActualSampleRate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/alsa.h
================================================
// sherpa-onnx/python/csrc/alsa.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ALSA_H_
#define SHERPA_ONNX_PYTHON_CSRC_ALSA_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindAlsa(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_ALSA_H_


================================================
FILE: sherpa-onnx/python/csrc/audio-tagging.cc
================================================
// sherpa-onnx/python/csrc/audio-tagging.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/audio-tagging.h"

#include <string>

#include "sherpa-onnx/csrc/audio-tagging.h"

namespace sherpa_onnx {

static void PybindOfflineZipformerAudioTaggingModelConfig(py::module *m) {
  using PyClass = OfflineZipformerAudioTaggingModelConfig;
  py::class_<PyClass>(*m, "OfflineZipformerAudioTaggingModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

static void PybindAudioTaggingModelConfig(py::module *m) {
  PybindOfflineZipformerAudioTaggingModelConfig(m);

  using PyClass = AudioTaggingModelConfig;

  py::class_<PyClass>(*m, "AudioTaggingModelConfig")
      .def(py::init<>())
      .def(py::init<const OfflineZipformerAudioTaggingModelConfig &,
                    const std::string &, int32_t, bool, const std::string &>(),
           py::arg("zipformer") = OfflineZipformerAudioTaggingModelConfig{},
           py::arg("ced") = "", py::arg("num_threads") = 1,
           py::arg("debug") = false, py::arg("provider") = "cpu")
      .def_readwrite("zipformer", &PyClass::zipformer)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

static void PybindAudioTaggingConfig(py::module *m) {
  PybindAudioTaggingModelConfig(m);

  using PyClass = AudioTaggingConfig;

  py::class_<PyClass>(*m, "AudioTaggingConfig")
      .def(py::init<>())
      .def(py::init<const AudioTaggingModelConfig &, const std::string &,
                    int32_t>(),
           py::arg("model"), py::arg("labels"), py::arg("top_k") = 5)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("labels", &PyClass::labels)
      .def_readwrite("top_k", &PyClass::top_k)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

static void PybindAudioEvent(py::module *m) {
  using PyClass = AudioEvent;

  py::class_<PyClass>(*m, "AudioEvent")
      .def_property_readonly(
          "name", [](const PyClass &self) -> std::string { return self.name; })
      .def_property_readonly(
          "index", [](const PyClass &self) -> int32_t { return self.index; })
      .def_property_readonly(
          "prob", [](const PyClass &self) -> float { return self.prob; })
      .def("__str__", &PyClass::ToString);
}

void PybindAudioTagging(py::module *m) {
  PybindAudioTaggingConfig(m);
  PybindAudioEvent(m);

  using PyClass = AudioTagging;

  py::class_<PyClass>(*m, "AudioTagging")
      .def(py::init<const AudioTaggingConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def("create_stream", &PyClass::CreateStream,
           py::call_guard<py::gil_scoped_release>())
      .def("compute", &PyClass::Compute, py::arg("s"), py::arg("top_k") = -1,
           py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/audio-tagging.h
================================================
// sherpa-onnx/python/csrc/audio-tagging.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_AUDIO_TAGGING_H_
#define SHERPA_ONNX_PYTHON_CSRC_AUDIO_TAGGING_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindAudioTagging(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_AUDIO_TAGGING_H_


================================================
FILE: sherpa-onnx/python/csrc/circular-buffer.cc
================================================
// sherpa-onnx/python/csrc/circular-buffer.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/circular-buffer.h"

#include <vector>

#include "sherpa-onnx/csrc/circular-buffer.h"

namespace sherpa_onnx {

void PybindCircularBuffer(py::module *m) {
  using PyClass = CircularBuffer;
  py::class_<PyClass>(*m, "CircularBuffer")
      .def(py::init<int32_t>(), py::arg("capacity"))
      .def(
          "push",
          [](PyClass &self, const std::vector<float> &samples) {
            self.Push(samples.data(), samples.size());
          },
          py::arg("samples"), py::call_guard<py::gil_scoped_release>())
      .def("get", &PyClass::Get, py::arg("start_index"), py::arg("n"),
           py::call_guard<py::gil_scoped_release>())
      .def("pop", &PyClass::Pop, py::arg("n"),
           py::call_guard<py::gil_scoped_release>())
      .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("size", &PyClass::Size)
      .def_property_readonly("head", &PyClass::Head)
      .def_property_readonly("tail", &PyClass::Tail);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/circular-buffer.h
================================================
// sherpa-onnx/python/csrc/circular-buffer.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_CIRCULAR_BUFFER_H_
#define SHERPA_ONNX_PYTHON_CSRC_CIRCULAR_BUFFER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindCircularBuffer(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_CIRCULAR_BUFFER_H_


================================================
FILE: sherpa-onnx/python/csrc/cuda-config.cc
================================================
// sherpa-onnx/python/csrc/cuda-config.cc
//
// Copyright (c)  2024  Uniphore (Author: Manickavela A)

#include "sherpa-onnx/python/csrc/cuda-config.h"

#include <memory>
#include <string>

#include "sherpa-onnx/csrc/provider-config.h"

namespace sherpa_onnx {

void PybindCudaConfig(py::module *m) {
  using PyClass = CudaConfig;
  py::class_<PyClass>(*m, "CudaConfig")
      .def(py::init<>())
      .def(py::init<int32_t>(),
           py::arg("cudnn_conv_algo_search") = 1)
      .def_readwrite("cudnn_conv_algo_search", &PyClass::cudnn_conv_algo_search)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/cuda-config.h
================================================
// sherpa-onnx/python/csrc/cuda-config.h
//
// Copyright (c)  2024  Uniphore (Author: Manickavela A)

#ifndef SHERPA_ONNX_PYTHON_CSRC_CUDA_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_CUDA_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindCudaConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_CUDA_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/display.cc
================================================
// sherpa-onnx/python/csrc/display.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/display.h"

#include "sherpa-onnx/csrc/display.h"

namespace sherpa_onnx {

void PybindDisplay(py::module *m) {
  using PyClass = Display;
  py::class_<PyClass>(*m, "Display")
      .def(py::init<int32_t>(), py::arg("max_word_per_line") = 60)
      .def("print", &PyClass::Print, py::arg("idx"), py::arg("s"));
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/display.h
================================================
// sherpa-onnx/python/csrc/display.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_DISPLAY_H_
#define SHERPA_ONNX_PYTHON_CSRC_DISPLAY_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindDisplay(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_DISPLAY_H_


================================================
FILE: sherpa-onnx/python/csrc/endpoint.cc
================================================
// sherpa-onnx/csrc/endpoint.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/endpoint.h"

#include <memory>
#include <string>

#include "sherpa-onnx/csrc/endpoint.h"

namespace sherpa_onnx {

static constexpr const char *kEndpointRuleInitDoc = R"doc(
Constructor for EndpointRule.

Args:
  must_contain_nonsilence:
    If True, for this endpointing rule to apply there must be nonsilence in the
    best-path traceback. For decoding, a non-blank token is considered as
    non-silence.
  min_trailing_silence:
    This endpointing rule requires duration of trailing silence (in seconds)
    to be ``>=`` this value.
  min_utterance_length:
    This endpointing rule requires utterance-length (in seconds) to
    be ``>=`` this value.
)doc";

static constexpr const char *kEndpointConfigInitDoc = R"doc(
If any rule in EndpointConfig is activated, it is said that an endpointing
is detected.

Args:
  rule1:
    By default, it times out after 2.4 seconds of silence, even if
    we decoded nothing.
  rule2:
    By default, it times out after 1.2 seconds of silence after decoding
    something.
  rule3:
    By default, it times out after the utterance is 20 seconds long, regardless of
    anything else.
)doc";

static void PybindEndpointRule(py::module *m) {
  using PyClass = EndpointRule;
  py::class_<PyClass>(*m, "EndpointRule")
      .def(py::init<bool, float, float>(), py::arg("must_contain_nonsilence"),
           py::arg("min_trailing_silence"), py::arg("min_utterance_length"),
           kEndpointRuleInitDoc)
      .def("__str__", &PyClass::ToString)
      .def_readwrite("must_contain_nonsilence",
                     &PyClass::must_contain_nonsilence)
      .def_readwrite("min_trailing_silence", &PyClass::min_trailing_silence)
      .def_readwrite("min_utterance_length", &PyClass::min_utterance_length);
}

static void PybindEndpointConfig(py::module *m) {
  using PyClass = EndpointConfig;
  py::class_<PyClass>(*m, "EndpointConfig")
      .def(
          py::init(
              [](float rule1_min_trailing_silence,
                 float rule2_min_trailing_silence,
                 float rule3_min_utterance_length) -> std::unique_ptr<PyClass> {
                EndpointRule rule1(false, rule1_min_trailing_silence, 0);
                EndpointRule rule2(true, rule2_min_trailing_silence, 0);
                EndpointRule rule3(false, 0, rule3_min_utterance_length);

                return std::make_unique<EndpointConfig>(rule1, rule2, rule3);
              }),
          py::arg("rule1_min_trailing_silence"),
          py::arg("rule2_min_trailing_silence"),
          py::arg("rule3_min_utterance_length"))
      .def(py::init([](const EndpointRule &rule1, const EndpointRule &rule2,
                       const EndpointRule &rule3) -> std::unique_ptr<PyClass> {
             auto ans = std::make_unique<PyClass>();
             ans->rule1 = rule1;
             ans->rule2 = rule2;
             ans->rule3 = rule3;
             return ans;
           }),
           py::arg("rule1") = EndpointRule(false, 2.4, 0),
           py::arg("rule2") = EndpointRule(true, 1.2, 0),
           py::arg("rule3") = EndpointRule(false, 0, 20),
           kEndpointConfigInitDoc)
      .def("__str__",
           [](const PyClass &self) -> std::string { return self.ToString(); })
      .def_readwrite("rule1", &PyClass::rule1)
      .def_readwrite("rule2", &PyClass::rule2)
      .def_readwrite("rule3", &PyClass::rule3);
}

void PybindEndpoint(py::module *m) {
  PybindEndpointRule(m);
  PybindEndpointConfig(m);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/endpoint.h
================================================
// sherpa-onnx/csrc/endpoint.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ENDPOINT_H_
#define SHERPA_ONNX_PYTHON_CSRC_ENDPOINT_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindEndpoint(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_ENDPOINT_H_


================================================
FILE: sherpa-onnx/python/csrc/faked-alsa.cc
================================================
// sherpa-onnx/python/csrc/faked-alsa.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include <vector>

#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/python/csrc/alsa.h"

namespace sherpa_onnx {

class FakedAlsa {
 public:
  explicit FakedAlsa(const char *) {
    SHERPA_ONNX_LOGE("This function is for Linux only.");
#if (SHERPA_ONNX_ENABLE_ALSA == 0) && (defined(__unix__) || defined(__unix))
    SHERPA_ONNX_LOGE(R"doc(
sherpa-onnx is compiled without alsa support. To enable that, please run
  (1) sudo apt-get install alsa-utils libasound2-dev
  (2) rebuild sherpa-onnx
)doc");
#endif
    exit(-1);
  }

  std::vector<float> Read(int32_t) const { return {}; }
  int32_t GetExpectedSampleRate() const { return -1; }
  int32_t GetActualSampleRate() const { return -1; }
};

void PybindAlsa(py::module *m) {
  using PyClass = FakedAlsa;
  py::class_<PyClass>(*m, "Alsa")
      .def(py::init<const char *>(), py::arg("device_name"))
      .def(
          "read",
          [](PyClass &self, int32_t num_samples) -> std::vector<float> {
            return self.Read(num_samples);
          },
          py::arg("num_samples"), py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("expected_sample_rate",
                             &PyClass::GetExpectedSampleRate)
      .def_property_readonly("actual_sample_rate",
                             &PyClass::GetActualSampleRate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/fast-clustering.cc
================================================
// sherpa-onnx/python/csrc/fast-clustering.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/fast-clustering.h"

#include <sstream>
#include <vector>

#include "sherpa-onnx/csrc/fast-clustering.h"

namespace sherpa_onnx {

static void PybindFastClusteringConfig(py::module *m) {
  using PyClass = FastClusteringConfig;
  py::class_<PyClass>(*m, "FastClusteringConfig")
      .def(py::init<int32_t, float>(), py::arg("num_clusters") = -1,
           py::arg("threshold") = 0.5)
      .def_readwrite("num_clusters", &PyClass::num_clusters)
      .def_readwrite("threshold", &PyClass::threshold)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

void PybindFastClustering(py::module *m) {
  PybindFastClusteringConfig(m);

  using PyClass = FastClustering;
  py::class_<PyClass>(*m, "FastClustering")
      .def(py::init<const FastClusteringConfig &>(), py::arg("config"))
      .def(
          "__call__",
          [](const PyClass &self,
             py::array_t<float> features) -> std::vector<int32_t> {
            if (!(features.flags() & py::array::c_style)) {
              throw py::value_error(
                  "input features should be contiguous. Please use "
                  "np.ascontiguousarray(features)");
            }

            int num_dim = features.ndim();
            if (num_dim != 2) {
              std::ostringstream os;
              os << "Expect an array of 2 dimensions. Given dim: " << num_dim
                 << "\n";
              throw py::value_error(os.str());
            }

            int32_t num_rows = features.shape(0);
            int32_t num_cols = features.shape(1);
            float *p = features.mutable_data();
            py::gil_scoped_release release;
            return self.Cluster(p, num_rows, num_cols);
          },
          py::arg("features"));
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/fast-clustering.h
================================================
// sherpa-onnx/python/csrc/fast-clustering.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_FAST_CLUSTERING_H_
#define SHERPA_ONNX_PYTHON_CSRC_FAST_CLUSTERING_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindFastClustering(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_FAST_CLUSTERING_H_


================================================
FILE: sherpa-onnx/python/csrc/features.cc
================================================
// sherpa-onnx/python/csrc/features.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/features.h"

#include "sherpa-onnx/csrc/features.h"

namespace sherpa_onnx {

static void PybindFeatureExtractorConfig(py::module *m) {
  using PyClass = FeatureExtractorConfig;
  py::class_<PyClass>(*m, "FeatureExtractorConfig")
      .def(py::init<int32_t, int32_t, float, float, float, bool, bool>(),
           py::arg("sampling_rate") = 16000,
           py::arg("feature_dim") = 80,
           py::arg("low_freq") = 20.0f,
           py::arg("high_freq") = -400.0f,
           py::arg("dither") = 0.0f,
           py::arg("normalize_samples") = true,
           py::arg("snip_edges") = false)
      .def_readwrite("sampling_rate", &PyClass::sampling_rate)
      .def_readwrite("feature_dim", &PyClass::feature_dim)
      .def_readwrite("low_freq", &PyClass::low_freq)
      .def_readwrite("high_freq", &PyClass::high_freq)
      .def_readwrite("dither", &PyClass::dither)
      .def_readwrite("normalize_samples", &PyClass::normalize_samples)
      .def_readwrite("snip_edges", &PyClass::snip_edges)
      .def("__str__", &PyClass::ToString);
}

void PybindFeatures(py::module *m) { PybindFeatureExtractorConfig(m); }

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/features.h
================================================
// sherpa-onnx/python/csrc/features.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_FEATURES_H_
#define SHERPA_ONNX_PYTHON_CSRC_FEATURES_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindFeatures(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_FEATURES_H_


================================================
FILE: sherpa-onnx/python/csrc/homophone-replacer.cc
================================================
// sherpa-onnx/python/csrc/homophone-replacer.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/homophone-replacer.h"

#include <string>

#include "sherpa-onnx/csrc/homophone-replacer.h"

namespace sherpa_onnx {

void PybindHomophoneReplacer(py::module *m) {
  using PyClass = HomophoneReplacerConfig;
  py::class_<PyClass>(*m, "HomophoneReplacerConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, bool>(),
           py::arg("dict_dir") = "", py::arg("lexicon") = "",
           py::arg("rule_fsts") = "", py::arg("debug") = false)
      .def_readwrite("dict_dir", &PyClass::dict_dir)
      .def_readwrite("lexicon", &PyClass::lexicon)
      .def_readwrite("rule_fsts", &PyClass::rule_fsts)
      .def_readwrite("debug", &PyClass::debug)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/homophone-replacer.h
================================================
// sherpa-onnx/python/csrc/homophone-replacer.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_HOMOPHONE_REPLACER_H_
#define SHERPA_ONNX_PYTHON_CSRC_HOMOPHONE_REPLACER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindHomophoneReplacer(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_HOMOPHONE_REPLACER_H_


================================================
FILE: sherpa-onnx/python/csrc/keyword-spotter.cc
================================================
// sherpa-onnx/python/csrc/keyword-spotter.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/keyword-spotter.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/keyword-spotter.h"

namespace sherpa_onnx {

static void PybindKeywordResult(py::module *m) {
  using PyClass = KeywordResult;
  py::class_<PyClass>(*m, "KeywordResult")
      .def_property_readonly(
          "keyword",
          [](PyClass &self) -> py::str {
            return py::str(PyUnicode_DecodeUTF8(self.keyword.c_str(),
                                                self.keyword.size(), "ignore"));
          })
      .def_property_readonly(
          "tokens",
          [](PyClass &self) -> std::vector<std::string> { return self.tokens; })
      .def_property_readonly(
          "timestamps",
          [](PyClass &self) -> std::vector<float> { return self.timestamps; });
}

static void PybindKeywordSpotterConfig(py::module *m) {
  using PyClass = KeywordSpotterConfig;
  py::class_<PyClass>(*m, "KeywordSpotterConfig")
      .def(py::init<const FeatureExtractorConfig &, const OnlineModelConfig &,
                    int32_t, int32_t, float, float, const std::string &>(),
           py::arg("feat_config"), py::arg("model_config"),
           py::arg("max_active_paths") = 4, py::arg("num_trailing_blanks") = 1,
           py::arg("keywords_score") = 1.0,
           py::arg("keywords_threshold") = 0.25, py::arg("keywords_file") = "")
      .def_readwrite("feat_config", &PyClass::feat_config)
      .def_readwrite("model_config", &PyClass::model_config)
      .def_readwrite("max_active_paths", &PyClass::max_active_paths)
      .def_readwrite("num_trailing_blanks", &PyClass::num_trailing_blanks)
      .def_readwrite("keywords_score", &PyClass::keywords_score)
      .def_readwrite("keywords_threshold", &PyClass::keywords_threshold)
      .def_readwrite("keywords_file", &PyClass::keywords_file)
      .def("__str__", &PyClass::ToString);
}

void PybindKeywordSpotter(py::module *m) {
  PybindKeywordResult(m);
  PybindKeywordSpotterConfig(m);

  using PyClass = KeywordSpotter;
  py::class_<PyClass>(*m, "KeywordSpotter")
      .def(py::init<const KeywordSpotterConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "create_stream",
          [](const PyClass &self) { return self.CreateStream(); },
          py::call_guard<py::gil_scoped_release>())
      .def(
          "create_stream",
          [](PyClass &self, const std::string &keywords) {
            return self.CreateStream(keywords);
          },
          py::arg("keywords"), py::call_guard<py::gil_scoped_release>())
      .def("is_ready", &PyClass::IsReady,
           py::call_guard<py::gil_scoped_release>())
      .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
      .def("decode_stream", &PyClass::DecodeStream,
           py::call_guard<py::gil_scoped_release>())
      .def(
          "decode_streams",
          [](PyClass &self, std::vector<OnlineStream *> ss) {
            self.DecodeStreams(ss.data(), ss.size());
          },
          py::call_guard<py::gil_scoped_release>())
      .def("get_result", &PyClass::GetResult,
           py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/keyword-spotter.h
================================================
// sherpa-onnx/python/csrc/keyword-spotter.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_KEYWORD_SPOTTER_H_
#define SHERPA_ONNX_PYTHON_CSRC_KEYWORD_SPOTTER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindKeywordSpotter(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_KEYWORD_SPOTTER_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-canary-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-canary-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-canary-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-canary-model-config.h"

namespace sherpa_onnx {

void PybindOfflineCanaryModelConfig(py::module *m) {
  using PyClass = OfflineCanaryModelConfig;
  py::class_<PyClass>(*m, "OfflineCanaryModelConfig")
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &, bool>(),
           py::arg("encoder") = "", py::arg("decoder") = "",
           py::arg("src_lang") = "", py::arg("tgt_lang") = "",
           py::arg("use_pnc") = true)
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("decoder", &PyClass::decoder)
      .def_readwrite("src_lang", &PyClass::src_lang)
      .def_readwrite("tgt_lang", &PyClass::tgt_lang)
      .def_readwrite("use_pnc", &PyClass::use_pnc)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-canary-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-canary-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_CANARY_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_CANARY_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineCanaryModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_CANARY_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-ctc-fst-decoder-config.cc
================================================
// sherpa-onnx/python/csrc/offline-ctc-fst-decoder-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-ctc-fst-decoder-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-ctc-fst-decoder-config.h"

namespace sherpa_onnx {

void PybindOfflineCtcFstDecoderConfig(py::module *m) {
  using PyClass = OfflineCtcFstDecoderConfig;
  py::class_<PyClass>(*m, "OfflineCtcFstDecoderConfig")
      .def(py::init<const std::string &, int32_t>(), py::arg("graph") = "",
           py::arg("max_active") = 3000)
      .def_readwrite("graph", &PyClass::graph)
      .def_readwrite("max_active", &PyClass::max_active)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-ctc-fst-decoder-config.h
================================================
// sherpa-onnx/python/csrc/offline-ctc-fst-decoder-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_CTC_FST_DECODER_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_CTC_FST_DECODER_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineCtcFstDecoderConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_CTC_FST_DECODER_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-dolphin-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-dolphin-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-dolphin-model-config.h"

namespace sherpa_onnx {

void PybindOfflineDolphinModelConfig(py::module *m) {
  using PyClass = OfflineDolphinModelConfig;
  py::class_<PyClass>(*m, "OfflineDolphinModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-dolphin-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-dolphin-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineDolphinModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-fire-red-asr-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-fire-red-asr-ctc-model-config.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-fire-red-asr-ctc-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-fire-red-asr-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOfflineFireRedAsrCtcModelConfig(py::module *m) {
  using PyClass = OfflineFireRedAsrCtcModelConfig;
  py::class_<PyClass>(*m, "OfflineFireRedAsrCtcModelConfig")
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-fire-red-asr-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-fire-red-asr-ctc-model-config.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineFireRedAsrCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FIRE_RED_ASR_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-fire-red-asr-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.h"

namespace sherpa_onnx {

void PybindOfflineFireRedAsrModelConfig(py::module *m) {
  using PyClass = OfflineFireRedAsrModelConfig;
  py::class_<PyClass>(*m, "OfflineFireRedAsrModelConfig")
      .def(py::init<const std::string &, const std::string &>(),
           py::arg("encoder"), py::arg("decoder"))
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("decoder", &PyClass::decoder)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineFireRedAsrModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FIRE_RED_ASR_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-funasr-nano-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-funasr-nano-model-config.cc
//
// Copyright (c)  2025  zengyw

#include "sherpa-onnx/csrc/offline-funasr-nano-model-config.h"

#include <string>

#include "sherpa-onnx/python/csrc/offline-funasr-nano-model-config.h"

namespace sherpa_onnx {

void PybindOfflineFunASRNanoModelConfig(py::module *m) {
  using PyClass = OfflineFunASRNanoModelConfig;
  py::class_<PyClass>(*m, "OfflineFunASRNanoModelConfig")
      .def(py::init<>())
      .def_readwrite("encoder_adaptor", &PyClass::encoder_adaptor)
      .def_readwrite("llm", &PyClass::llm)
      .def_readwrite("embedding", &PyClass::embedding)
      .def_readwrite("tokenizer", &PyClass::tokenizer)
      .def_readwrite("system_prompt", &PyClass::system_prompt)
      .def_readwrite("user_prompt", &PyClass::user_prompt)
      .def_readwrite("max_new_tokens", &PyClass::max_new_tokens)
      .def_readwrite("temperature", &PyClass::temperature)
      .def_readwrite("top_p", &PyClass::top_p)
      .def_readwrite("seed", &PyClass::seed)
      .def_readwrite("language", &PyClass::language)
      .def_readwrite("itn", &PyClass::itn)
      .def_readwrite("hotwords", &PyClass::hotwords)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-funasr-nano-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-funasr-nano-model-config.h
//
// Copyright (c)  2025  zengyw

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FUNASR_NANO_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FUNASR_NANO_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineFunASRNanoModelConfig(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_FUNASR_NANO_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-lm-config.cc
================================================
// sherpa-onnx/python/csrc/offline-lm-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-lm-config.h"

#include <string>

#include "sherpa-onnx//csrc/offline-lm-config.h"

namespace sherpa_onnx {

void PybindOfflineLMConfig(py::module *m) {
  using PyClass = OfflineLMConfig;
  py::class_<PyClass>(*m, "OfflineLMConfig")
      .def(py::init<const std::string &, float, int32_t, const std::string &,
           const std::string &, float, int32_t>(),
           py::arg("model"), py::arg("scale") = 0.5f,
           py::arg("lm_num_threads") = 1, py::arg("lm_provider") = "cpu",
           py::arg("lodr_fst") = "", py::arg("lodr_scale") = 0.0f,
           py::arg("lodr_backoff_id") = -1)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("scale", &PyClass::scale)
      .def_readwrite("lm_provider", &PyClass::lm_provider)
      .def_readwrite("lm_num_threads", &PyClass::lm_num_threads)
      .def_readwrite("lodr_fst", &PyClass::lodr_fst)
      .def_readwrite("lodr_scale", &PyClass::lodr_scale)
      .def_readwrite("lodr_backoff_id", &PyClass::lodr_backoff_id)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-lm-config.h
================================================
// sherpa-onnx/python/csrc/offline-lm-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_LM_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_LM_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineLMConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_LM_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-medasr-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-medasr-ctc-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-medasr-ctc-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-medasr-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOfflineMedAsrCtcModelConfig(py::module *m) {
  using PyClass = OfflineMedAsrCtcModelConfig;
  py::class_<PyClass>(*m, "OfflineMedAsrCtcModelConfig")
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-medasr-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-medasr-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MEDASR_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MEDASR_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineMedAsrCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MEDASR_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-model-config.cc
//
// Copyright (c)  2023 by manyeyes

#include "sherpa-onnx/python/csrc/offline-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/python/csrc/offline-canary-model-config.h"
#include "sherpa-onnx/python/csrc/offline-dolphin-model-config.h"
#include "sherpa-onnx/python/csrc/offline-fire-red-asr-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.h"
#include "sherpa-onnx/python/csrc/offline-funasr-nano-model-config.h"
#include "sherpa-onnx/python/csrc/offline-medasr-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/offline-moonshine-model-config.h"
#include "sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/offline-omnilingual-asr-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/offline-paraformer-model-config.h"
#include "sherpa-onnx/python/csrc/offline-sense-voice-model-config.h"
#include "sherpa-onnx/python/csrc/offline-tdnn-model-config.h"
#include "sherpa-onnx/python/csrc/offline-transducer-model-config.h"
#include "sherpa-onnx/python/csrc/offline-wenet-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/offline-whisper-model-config.h"
#include "sherpa-onnx/python/csrc/offline-zipformer-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOfflineModelConfig(py::module *m) {
  PybindOfflineTransducerModelConfig(m);
  PybindOfflineParaformerModelConfig(m);
  PybindOfflineNemoEncDecCtcModelConfig(m);
  PybindOfflineWhisperModelConfig(m);
  PybindOfflineFireRedAsrModelConfig(m);
  PybindOfflineTdnnModelConfig(m);
  PybindOfflineZipformerCtcModelConfig(m);
  PybindOfflineWenetCtcModelConfig(m);
  PybindOfflineSenseVoiceModelConfig(m);
  PybindOfflineMoonshineModelConfig(m);
  PybindOfflineDolphinModelConfig(m);
  PybindOfflineCanaryModelConfig(m);
  PybindOfflineOmnilingualAsrCtcModelConfig(m);
  PybindOfflineFunASRNanoModelConfig(m);
  PybindOfflineMedAsrCtcModelConfig(m);
  PybindOfflineFireRedAsrCtcModelConfig(m);

  using PyClass = OfflineModelConfig;
  py::class_<PyClass>(*m, "OfflineModelConfig")
      .def(py::init<const OfflineTransducerModelConfig &,
                    const OfflineParaformerModelConfig &,
                    const OfflineNemoEncDecCtcModelConfig &,
                    const OfflineWhisperModelConfig &,
                    const OfflineFireRedAsrModelConfig &,
                    const OfflineTdnnModelConfig &,
                    const OfflineZipformerCtcModelConfig &,
                    const OfflineWenetCtcModelConfig &,
                    const OfflineSenseVoiceModelConfig &,
                    const OfflineMoonshineModelConfig &,
                    const OfflineDolphinModelConfig &,
                    const OfflineCanaryModelConfig &,
                    const OfflineOmnilingualAsrCtcModelConfig &,
                    const OfflineFunASRNanoModelConfig &,
                    const OfflineMedAsrCtcModelConfig &,
                    const OfflineFireRedAsrCtcModelConfig &,
                    const std::string &, const std::string &, int32_t, bool,
                    const std::string &, const std::string &,
                    const std::string &, const std::string &>(),
           py::arg("transducer") = OfflineTransducerModelConfig(),
           py::arg("paraformer") = OfflineParaformerModelConfig(),
           py::arg("nemo_ctc") = OfflineNemoEncDecCtcModelConfig(),
           py::arg("whisper") = OfflineWhisperModelConfig(),
           py::arg("fire_red_asr") = OfflineFireRedAsrModelConfig(),
           py::arg("tdnn") = OfflineTdnnModelConfig(),
           py::arg("zipformer_ctc") = OfflineZipformerCtcModelConfig(),
           py::arg("wenet_ctc") = OfflineWenetCtcModelConfig(),
           py::arg("sense_voice") = OfflineSenseVoiceModelConfig(),
           py::arg("moonshine") = OfflineMoonshineModelConfig(),
           py::arg("dolphin") = OfflineDolphinModelConfig(),
           py::arg("canary") = OfflineCanaryModelConfig(),
           py::arg("omnilingual") = OfflineOmnilingualAsrCtcModelConfig(),
           py::arg("funasr_nano") = OfflineFunASRNanoModelConfig(),
           py::arg("medasr") = OfflineMedAsrCtcModelConfig(),
           py::arg("fire_red_asr_ctc") = OfflineFireRedAsrCtcModelConfig(),
           py::arg("telespeech_ctc") = "", py::arg("tokens") = "",
           py::arg("num_threads") = 1, py::arg("debug") = false,
           py::arg("provider") = "cpu", py::arg("model_type") = "",
           py::arg("modeling_unit") = "cjkchar", py::arg("bpe_vocab") = "")
      .def_readwrite("transducer", &PyClass::transducer)
      .def_readwrite("paraformer", &PyClass::paraformer)
      .def_readwrite("nemo_ctc", &PyClass::nemo_ctc)
      .def_readwrite("whisper", &PyClass::whisper)
      .def_readwrite("fire_red_asr", &PyClass::fire_red_asr)
      .def_readwrite("tdnn", &PyClass::tdnn)
      .def_readwrite("zipformer_ctc", &PyClass::zipformer_ctc)
      .def_readwrite("wenet_ctc", &PyClass::wenet_ctc)
      .def_readwrite("sense_voice", &PyClass::sense_voice)
      .def_readwrite("moonshine", &PyClass::moonshine)
      .def_readwrite("dolphin", &PyClass::dolphin)
      .def_readwrite("canary", &PyClass::canary)
      .def_readwrite("omnilingual", &PyClass::omnilingual)
      .def_readwrite("funasr_nano", &PyClass::funasr_nano)
      .def_readwrite("medasr", &PyClass::medasr)
      .def_readwrite("fire_red_asr_ctc", &PyClass::fire_red_asr_ctc)
      .def_readwrite("telespeech_ctc", &PyClass::telespeech_ctc)
      .def_readwrite("tokens", &PyClass::tokens)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def_readwrite("model_type", &PyClass::model_type)
      .def_readwrite("modeling_unit", &PyClass::modeling_unit)
      .def_readwrite("bpe_vocab", &PyClass::bpe_vocab)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-model-config.h
//
// Copyright (c)  2023 by manyeyes

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-moonshine-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-moonshine-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-moonshine-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-moonshine-model-config.h"

namespace sherpa_onnx {

void PybindOfflineMoonshineModelConfig(py::module *m) {
  using PyClass = OfflineMoonshineModelConfig;
  py::class_<PyClass>(*m, "OfflineMoonshineModelConfig")
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &>(),
           py::arg("preprocessor") = "", py::arg("encoder") = "",
           py::arg("uncached_decoder") = "", py::arg("cached_decoder") = "",
           py::arg("merged_decoder") = "")
      .def_readwrite("preprocessor", &PyClass::preprocessor)
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("uncached_decoder", &PyClass::uncached_decoder)
      .def_readwrite("cached_decoder", &PyClass::cached_decoder)
      .def_readwrite("merged_decoder", &PyClass::merged_decoder)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-moonshine-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-moonshine-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineMoonshineModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_MOONSHINE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOfflineNemoEncDecCtcModelConfig(py::module *m) {
  using PyClass = OfflineNemoEncDecCtcModelConfig;
  py::class_<PyClass>(*m, "OfflineNemoEncDecCtcModelConfig")
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineNemoEncDecCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-omnilingual-asr-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-omnilingual-asr-ctc-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-omnilingual-asr-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-omnilingual-asr-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOfflineOmnilingualAsrCtcModelConfig(py::module *m) {
  using PyClass = OfflineOmnilingualAsrCtcModelConfig;
  py::class_<PyClass>(*m, "OfflineOmnilingualAsrCtcModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-omnilingual-asr-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-omnilingual-asr-ctc-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineOmnilingualAsrCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_OMNILINGUAL_ASR_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-paraformer-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-paraformer-model-config.cc
//
// Copyright (c)  2023 by manyeyes

#include "sherpa-onnx/python/csrc/offline-paraformer-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-paraformer-model-config.h"

namespace sherpa_onnx {

void PybindOfflineParaformerModelConfig(py::module *m) {
  using PyClass = OfflineParaformerModelConfig;
  py::class_<PyClass>(*m, "OfflineParaformerModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-paraformer-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-paraformer-model-config.h
//
// Copyright (c)  2023 by manyeyes

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PARAFORMER_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PARAFORMER_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineParaformerModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PARAFORMER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-punctuation.cc
================================================
// sherpa-onnx/python/csrc/offline-punctuation.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-punctuation.h"

#include <string>

#include "sherpa-onnx/csrc/offline-punctuation.h"

namespace sherpa_onnx {

static void PybindOfflinePunctuationModelConfig(py::module *m) {
  using PyClass = OfflinePunctuationModelConfig;
  py::class_<PyClass>(*m, "OfflinePunctuationModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, int32_t, bool, const std::string &>(),
           py::arg("ct_transformer"), py::arg("num_threads") = 1,
           py::arg("debug") = false, py::arg("provider") = "cpu")
      .def_readwrite("ct_transformer", &PyClass::ct_transformer)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

static void PybindOfflinePunctuationConfig(py::module *m) {
  PybindOfflinePunctuationModelConfig(m);
  using PyClass = OfflinePunctuationConfig;

  py::class_<PyClass>(*m, "OfflinePunctuationConfig")
      .def(py::init<>())
      .def(py::init<const OfflinePunctuationModelConfig &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

void PybindOfflinePunctuation(py::module *m) {
  PybindOfflinePunctuationConfig(m);
  using PyClass = OfflinePunctuation;

  py::class_<PyClass>(*m, "OfflinePunctuation")
      .def(py::init<const OfflinePunctuationConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def("add_punctuation", &PyClass::AddPunctuation, py::arg("text"),
           py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-punctuation.h
================================================
// sherpa-onnx/python/csrc/offline-punctuation.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PUNCTUATION_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PUNCTUATION_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflinePunctuation(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_PUNCTUATION_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-recognizer.cc
================================================
// sherpa-onnx/python/csrc/offline-recognizer.cc
//
// Copyright (c)  2023 by manyeyes

#include "sherpa-onnx/python/csrc/offline-recognizer.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-recognizer.h"

namespace sherpa_onnx {

static void PybindOfflineRecognizerConfig(py::module *m) {
  using PyClass = OfflineRecognizerConfig;
  py::class_<PyClass>(*m, "OfflineRecognizerConfig")
      .def(py::init<const FeatureExtractorConfig &, const OfflineModelConfig &,
                    const OfflineLMConfig &, const OfflineCtcFstDecoderConfig &,
                    const std::string &, int32_t, const std::string &, float,
                    float, const std::string &, const std::string &,
                    const HomophoneReplacerConfig &>(),
           py::arg("feat_config") = FeatureExtractorConfig(),
           py::arg("model_config") = OfflineModelConfig(),
           py::arg("lm_config") = OfflineLMConfig(),
           py::arg("ctc_fst_decoder_config") = OfflineCtcFstDecoderConfig(),
           py::arg("decoding_method") = "greedy_search",
           py::arg("max_active_paths") = 4, py::arg("hotwords_file") = "",
           py::arg("hotwords_score") = 1.5, py::arg("blank_penalty") = 0.0,
           py::arg("rule_fsts") = "", py::arg("rule_fars") = "",
           py::arg("hr") = HomophoneReplacerConfig{})
      .def_readwrite("feat_config", &PyClass::feat_config)
      .def_readwrite("model_config", &PyClass::model_config)
      .def_readwrite("lm_config", &PyClass::lm_config)
      .def_readwrite("ctc_fst_decoder_config", &PyClass::ctc_fst_decoder_config)
      .def_readwrite("decoding_method", &PyClass::decoding_method)
      .def_readwrite("max_active_paths", &PyClass::max_active_paths)
      .def_readwrite("hotwords_file", &PyClass::hotwords_file)
      .def_readwrite("hotwords_score", &PyClass::hotwords_score)
      .def_readwrite("blank_penalty", &PyClass::blank_penalty)
      .def_readwrite("rule_fsts", &PyClass::rule_fsts)
      .def_readwrite("rule_fars", &PyClass::rule_fars)
      .def_readwrite("hr", &PyClass::hr)
      .def("__str__", &PyClass::ToString);
}

void PybindOfflineRecognizer(py::module *m) {
  PybindOfflineRecognizerConfig(m);

  using PyClass = OfflineRecognizer;
  py::class_<PyClass>(*m, "OfflineRecognizer")
      .def(py::init<const OfflineRecognizerConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "create_stream",
          [](const PyClass &self) { return self.CreateStream(); },
          py::call_guard<py::gil_scoped_release>())
      .def(
          "create_stream",
          [](PyClass &self, const std::string &hotwords) {
            return self.CreateStream(hotwords);
          },
          py::arg("hotwords"), py::call_guard<py::gil_scoped_release>())
      .def("decode_stream", &PyClass::DecodeStream, py::arg("s"),
           py::call_guard<py::gil_scoped_release>())
      .def("set_config", &PyClass::SetConfig, py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "decode_streams",
          [](const PyClass &self, std::vector<OfflineStream *> ss) {
            self.DecodeStreams(ss.data(), ss.size());
          },
          py::arg("ss"), py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-recognizer.h
================================================
// sherpa-onnx/python/csrc/offline-recognizer.h
//
// Copyright (c)  2023 by manyeyes

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_RECOGNIZER_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_RECOGNIZER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineRecognizer(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_RECOGNIZER_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-sense-voice-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-sense-voice-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-sense-voice-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-sense-voice-model-config.h"

namespace sherpa_onnx {

void PybindOfflineSenseVoiceModelConfig(py::module *m) {
  using PyClass = OfflineSenseVoiceModelConfig;
  py::class_<PyClass>(*m, "OfflineSenseVoiceModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &, bool>(),
           py::arg("model"), py::arg("language"), py::arg("use_itn"))
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("language", &PyClass::language)
      .def_readwrite("use_itn", &PyClass::use_itn)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-sense-voice-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-sense-voice-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SENSE_VOICE_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SENSE_VOICE_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSenseVoiceModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SENSE_VOICE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-source-separation-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-source-separation-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-source-separation-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-source-separation-model-config.h"
#include "sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h"
#include "sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h"

namespace sherpa_onnx {

void PybindOfflineSourceSeparationModelConfig(py::module *m) {
  PybindOfflineSourceSeparationSpleeterModelConfig(m);
  PybindOfflineSourceSeparationUvrModelConfig(m);

  using PyClass = OfflineSourceSeparationModelConfig;
  py::class_<PyClass>(*m, "OfflineSourceSeparationModelConfig")
      .def(py::init<const OfflineSourceSeparationSpleeterModelConfig &,
                    const OfflineSourceSeparationUvrModelConfig &, int32_t,
                    bool, const std::string &>(),
           py::arg("spleeter") = OfflineSourceSeparationSpleeterModelConfig{},
           py::arg("uvr") = OfflineSourceSeparationUvrModelConfig{},
           py::arg("num_threads") = 1, py::arg("debug") = false,
           py::arg("provider") = "cpu")
      .def_readwrite("spleeter", &PyClass::spleeter)
      .def_readwrite("uvr", &PyClass::uvr)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-source-separation-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-source-separation-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSourceSeparationModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-source-separation-spleeter-model-config.h"

namespace sherpa_onnx {

void PybindOfflineSourceSeparationSpleeterModelConfig(py::module *m) {
  using PyClass = OfflineSourceSeparationSpleeterModelConfig;
  py::class_<PyClass>(*m, "OfflineSourceSeparationSpleeterModelConfig")
      .def(py::init<const std::string &, const std::string &>(),
           py::arg("vocals") = "", py::arg("accompaniment") = "")
      .def_readwrite("vocals", &PyClass::vocals)
      .def_readwrite("accompaniment", &PyClass::accompaniment)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-source-separation-spleeter-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSourceSeparationSpleeterModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_SPLEETER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-source-separation-uvr-model-config.h"

namespace sherpa_onnx {

void PybindOfflineSourceSeparationUvrModelConfig(py::module *m) {
  using PyClass = OfflineSourceSeparationUvrModelConfig;
  py::class_<PyClass>(*m, "OfflineSourceSeparationUvrModelConfig")
      .def(py::init<const std::string &>(), py::arg("model") = "")
      .def_readwrite("model", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-source-separation-uvr-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSourceSeparationUvrModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_UVR_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-source-separation.cc
================================================
// sherpa-onnx/python/csrc/offline-source-separation-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-source-separation.h"

#include <algorithm>
#include <string>

#include "sherpa-onnx/python/csrc/offline-source-separation-model-config.h"
#include "sherpa-onnx/python/csrc/offline-source-separation.h"

#define C_CONTIGUOUS py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_

namespace sherpa_onnx {

static void PybindOfflineSourceSeparationConfig(py::module *m) {
  PybindOfflineSourceSeparationModelConfig(m);

  using PyClass = OfflineSourceSeparationConfig;
  py::class_<PyClass>(*m, "OfflineSourceSeparationConfig")
      .def(py::init<const OfflineSourceSeparationModelConfig &>(),
           py::arg("model") = OfflineSourceSeparationModelConfig{})
      .def_readwrite("model", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

static void PybindMultiChannelSamples(py::module *m) {
  using PyClass = MultiChannelSamples;

  py::class_<PyClass>(*m, "MultiChannelSamples")
      .def_property_readonly("data", [](PyClass &self) -> py::object {
        // if data is not empty, return a float array of
        // shape (num_channels, num_samples)
        int32_t num_channels = self.data.size();
        if (num_channels == 0) {
          return py::none();
        }

        int32_t num_samples = self.data[0].size();
        if (num_samples == 0) {
          return py::none();
        }

        py::array_t<float> ans({num_channels, num_samples});

        py::buffer_info buf = ans.request();
        auto p = static_cast<float *>(buf.ptr);

        for (int32_t i = 0; i != num_channels; ++i) {
          std::copy(self.data[i].begin(), self.data[i].end(),
                    p + i * num_samples);
        }

        return ans;
      });
}

static void PybindOfflineSourceSeparationOutput(py::module *m) {
  using PyClass = OfflineSourceSeparationOutput;
  py::class_<PyClass>(*m, "OfflineSourceSeparationOutput")
      .def_property_readonly(
          "sample_rate", [](const PyClass &self) { return self.sample_rate; })
      .def_property_readonly("stems",
                             [](const PyClass &self) { return self.stems; });
}

void PybindOfflineSourceSeparation(py::module *m) {
  PybindOfflineSourceSeparationConfig(m);
  PybindOfflineSourceSeparationOutput(m);

  PybindMultiChannelSamples(m);

  using PyClass = OfflineSourceSeparation;
  py::class_<PyClass>(*m, "OfflineSourceSeparation")
      .def(py::init<const OfflineSourceSeparationConfig &>(),
           py::arg("config") = OfflineSourceSeparationConfig{})
      .def(
          "process",
          [](const PyClass &self, int32_t sample_rate,
             const py::array_t<float> &samples) {
            if (!(samples.flags() & py::array::c_style)) {
              throw py::value_error(
                  "input samples should be contiguous. Please use "
                  "np.ascontiguousarray(samples)");
            }

            int num_dim = samples.ndim();
            if (samples.ndim() != 2) {
              std::ostringstream os;
              os << "Expect an array of 2 dimensions [num_channels x "
                    "num_samples]. "
                    "Given dim: "
                 << num_dim << "\n";
              throw py::value_error(os.str());
            }

            // if num_samples is less than 10, it is very likely the user
            // has swapped num_channels and num_samples.
            if (samples.shape(1) < 10) {
              std::ostringstream os;
              os << "Expect an array of 2 dimensions [num_channels x "
                    "num_samples]. "
                    "Given ["
                 << samples.shape(0) << " x " << samples.shape(1) << "]"
                 << "\n";
              throw py::value_error(os.str());
            }

            int32_t num_channels = samples.shape(0);
            int32_t num_samples = samples.shape(1);
            const float *p = samples.data();

            OfflineSourceSeparationInput input;

            input.samples.data.resize(num_channels);
            input.sample_rate = sample_rate;

            for (int32_t i = 0; i != num_channels; ++i) {
              input.samples.data[i] = {p + i * num_samples,
                                       p + (i + 1) * num_samples};
            }

            pybind11::gil_scoped_release release;

            return self.Process(input);
          },
          py::arg("sample_rate"), py::arg("samples"),
          "samples is of shape (num_channels, num-samples) with dtype "
          "np.float32");
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-source-separation.h
================================================
// sherpa-onnx/python/csrc/offline-source-separation.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSourceSeparation(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SOURCE_SEPARATION_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-speaker-diarization-result.cc
================================================
// sherpa-onnx/python/csrc/offline-speaker-diarization-result.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-speaker-diarization-result.h"

#include "sherpa-onnx/csrc/offline-speaker-diarization-result.h"

namespace sherpa_onnx {

static void PybindOfflineSpeakerDiarizationSegment(py::module *m) {
  using PyClass = OfflineSpeakerDiarizationSegment;
  py::class_<PyClass>(*m, "OfflineSpeakerDiarizationSegment")
      .def_property_readonly("start", &PyClass::Start)
      .def_property_readonly("end", &PyClass::End)
      .def_property_readonly("duration", &PyClass::Duration)
      .def_property_readonly("speaker", &PyClass::Speaker)
      .def_property("text", &PyClass::Text, &PyClass::SetText)
      .def("__str__", &PyClass::ToString);
}

void PybindOfflineSpeakerDiarizationResult(py::module *m) {
  PybindOfflineSpeakerDiarizationSegment(m);
  using PyClass = OfflineSpeakerDiarizationResult;
  py::class_<PyClass>(*m, "OfflineSpeakerDiarizationResult")
      .def_property_readonly("num_speakers", &PyClass::NumSpeakers)
      .def_property_readonly("num_segments", &PyClass::NumSegments)
      .def("sort_by_start_time", &PyClass::SortByStartTime)
      .def("sort_by_speaker", &PyClass::SortBySpeaker);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-speaker-diarization-result.h
================================================
// sherpa-onnx/python/csrc/offline-speaker-diarization-result.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSpeakerDiarizationResult(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_RESULT_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-speaker-diarization.cc
================================================
// sherpa-onnx/python/csrc/offline-speaker-diarization.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-speaker-diarization.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-speaker-diarization.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
#include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-config.h"

namespace sherpa_onnx {

static void PybindOfflineSpeakerSegmentationPyannoteModelConfig(py::module *m) {
  using PyClass = OfflineSpeakerSegmentationPyannoteModelConfig;
  py::class_<PyClass>(*m, "OfflineSpeakerSegmentationPyannoteModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

static void PybindOfflineSpeakerSegmentationModelConfig(py::module *m) {
  PybindOfflineSpeakerSegmentationPyannoteModelConfig(m);

  using PyClass = OfflineSpeakerSegmentationModelConfig;
  py::class_<PyClass>(*m, "OfflineSpeakerSegmentationModelConfig")
      .def(py::init<>())
      .def(py::init<const OfflineSpeakerSegmentationPyannoteModelConfig &,
                    int32_t, bool, const std::string &>(),
           py::arg("pyannote"), py::arg("num_threads") = 1,
           py::arg("debug") = false, py::arg("provider") = "cpu")
      .def_readwrite("pyannote", &PyClass::pyannote)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

static void PybindOfflineSpeakerDiarizationConfig(py::module *m) {
  PybindOfflineSpeakerSegmentationModelConfig(m);

  using PyClass = OfflineSpeakerDiarizationConfig;
  py::class_<PyClass>(*m, "OfflineSpeakerDiarizationConfig")
      .def(py::init<const OfflineSpeakerSegmentationModelConfig &,
                    const SpeakerEmbeddingExtractorConfig &,
                    const FastClusteringConfig &, float, float>(),
           py::arg("segmentation"), py::arg("embedding"), py::arg("clustering"),
           py::arg("min_duration_on") = 0.3, py::arg("min_duration_off") = 0.5)
      .def_readwrite("segmentation", &PyClass::segmentation)
      .def_readwrite("embedding", &PyClass::embedding)
      .def_readwrite("clustering", &PyClass::clustering)
      .def_readwrite("min_duration_on", &PyClass::min_duration_on)
      .def_readwrite("min_duration_off", &PyClass::min_duration_off)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

void PybindOfflineSpeakerDiarization(py::module *m) {
  PybindOfflineSpeakerDiarizationConfig(m);

  using PyClass = OfflineSpeakerDiarization;
  py::class_<PyClass>(*m, "OfflineSpeakerDiarization")
      .def(py::init<const OfflineSpeakerDiarizationConfig &>(),
           py::arg("config"))
      .def_property_readonly("sample_rate", &PyClass::SampleRate)
      .def("set_config", &PyClass::SetConfig, py::arg("config"))
      .def(
          "process",
          [](const PyClass &self, const std::vector<float> samples,
             std::function<int32_t(int32_t, int32_t)> callback) {
            if (!callback) {
              return self.Process(samples.data(), samples.size());
            }

            std::function<int32_t(int32_t, int32_t, void *)> callback_wrapper =
                [callback](int32_t processed_chunks, int32_t num_chunks,
                           void *) -> int32_t {
              callback(processed_chunks, num_chunks);
              return 0;
            };

            return self.Process(samples.data(), samples.size(),
                                callback_wrapper);
          },
          py::arg("samples"), py::arg("callback") = py::none());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-speaker-diarization.h
================================================
// sherpa-onnx/python/csrc/offline-speaker-diarization.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSpeakerDiarization(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEAKER_DIARIZATION_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-speech-denoiser-dpdfnet-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-speech-denoiser-dpdfnet-model-config.cc
//
// Copyright (c)  2026  Ceva Inc

#include "sherpa-onnx/python/csrc/offline-speech-denoiser-dpdfnet-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-speech-denoiser-dpdfnet-model-config.h"

namespace sherpa_onnx {

void PybindOfflineSpeechDenoiserDpdfNetModelConfig(py::module *m) {
  using PyClass = OfflineSpeechDenoiserDpdfNetModelConfig;
  py::class_<PyClass>(*m, "OfflineSpeechDenoiserDpdfNetModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &>(), py::arg("model") = "")
      .def_readwrite("model", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-speech-denoiser-dpdfnet-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-speech-denoiser-dpdfnet-model-config.h
//
// Copyright (c)  2026  Ceva Inc

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSpeechDenoiserDpdfNetModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_DPDFNET_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-config.h"

namespace sherpa_onnx {

void PybindOfflineSpeechDenoiserGtcrnModelConfig(py::module *m) {
  using PyClass = OfflineSpeechDenoiserGtcrnModelConfig;
  py::class_<PyClass>(*m, "OfflineSpeechDenoiserGtcrnModelConfig")
      .def(py::init<const std::string &>(), py::arg("model") = "")
      .def_readwrite("model", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSpeechDenoiserGtcrnModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-speech-denoiser-model-config.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser-dpdfnet-model-config.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.h"

namespace sherpa_onnx {

void PybindOfflineSpeechDenoiserModelConfig(py::module *m) {
  PybindOfflineSpeechDenoiserDpdfNetModelConfig(m);
  PybindOfflineSpeechDenoiserGtcrnModelConfig(m);

  using PyClass = OfflineSpeechDenoiserModelConfig;
  py::class_<PyClass>(*m, "OfflineSpeechDenoiserModelConfig")
      .def(py::init<>())
      .def(py::init<const OfflineSpeechDenoiserGtcrnModelConfig &,
                    const OfflineSpeechDenoiserDpdfNetModelConfig &, int32_t,
                    bool, const std::string &>(),
           py::arg("gtcrn") = OfflineSpeechDenoiserGtcrnModelConfig{},
           py::arg("dpdfnet") = OfflineSpeechDenoiserDpdfNetModelConfig{},
           py::arg("num_threads") = 1, py::arg("debug") = false,
           py::arg("provider") = "cpu")
      .def_readwrite("gtcrn", &PyClass::gtcrn)
      .def_readwrite("dpdfnet", &PyClass::dpdfnet)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineSpeechDenoiserModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-speech-denoiser.cc
================================================
// sherpa-onnx/python/csrc/offline-speech-denoiser.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-speech-denoiser.h"

#include <vector>

#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.h"

namespace sherpa_onnx {

void PybindOfflineSpeechDenoiserConfig(py::module *m) {
  PybindOfflineSpeechDenoiserModelConfig(m);

  using PyClass = OfflineSpeechDenoiserConfig;

  py::class_<PyClass>(*m, "OfflineSpeechDenoiserConfig")
      .def(py::init<>())
      .def(py::init<const OfflineSpeechDenoiserModelConfig &>(),
           py::arg("model") = OfflineSpeechDenoiserModelConfig{})
      .def_readwrite("model", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

void PybindDenoisedAudio(py::module *m) {
  using PyClass = DenoisedAudio;
  py::class_<PyClass>(*m, "DenoisedAudio")
      .def_property_readonly(
          "sample_rate", [](const PyClass &self) { return self.sample_rate; })
      .def_property_readonly("samples",
                             [](const PyClass &self) { return self.samples; });
}

void PybindOfflineSpeechDenoiser(py::module *m) {
  PybindOfflineSpeechDenoiserConfig(m);
  PybindDenoisedAudio(m);
  using PyClass = OfflineSpeechDenoiser;
  py::class_<PyClass>(*m, "OfflineSpeechDenoiser")
      .def(py::init<const OfflineSpeechDenoiserConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "__call__",
          [](const PyClass &self, const std::vector<float> &samples,
             int32_t sample_rate) {
            return self.Run(samples.data(), samples.size(), sample_rate);
          },
          py::arg("samples"), py::arg("sample_rate"),
          py::call_guard<py::gil_scoped_release>())
      .def(
          "run",
          [](const PyClass &self, const std::vector<float> &samples,
             int32_t sample_rate) {
            return self.Run(samples.data(), samples.size(), sample_rate);
          },
          py::arg("samples"), py::arg("sample_rate"),
          py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("sample_rate", &PyClass::GetSampleRate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-speech-denoiser.h
================================================
// sherpa-onnx/python/csrc/offline-speech-denoiser.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindDenoisedAudio(py::module *m);

void PybindOfflineSpeechDenoiser(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-stream.cc
================================================
// sherpa-onnx/python/csrc/offline-stream.cc
//
// Copyright (c)  2023 by manyeyes

#include "sherpa-onnx/python/csrc/offline-stream.h"

#include <vector>

#include "sherpa-onnx/csrc/offline-stream.h"

namespace sherpa_onnx {

constexpr const char *kAcceptWaveformUsage = R"(
Process audio samples.

Args:
  sample_rate:
    Sample rate of the input samples. If it is different from the one
    expected by the model, we will do resampling inside.
  waveform:
    A 1-D float32 tensor containing audio samples. It must be normalized
    to the range [-1, 1].
)";

static void PybindOfflineRecognitionResult(py::module *m) {  // NOLINT
  using PyClass = OfflineRecognitionResult;
  py::class_<PyClass>(*m, "OfflineRecognitionResult")
      .def("__str__", &PyClass::AsJsonString)
      .def_property_readonly(
          "text",
          [](const PyClass &self) -> py::str {
            return py::str(PyUnicode_DecodeUTF8(self.text.c_str(),
                                                self.text.size(), "ignore"));
          })
      .def_property_readonly("lang",
         [](const PyClass &self) { return self.lang; })
      .def_property_readonly("emotion",
        [](const PyClass &self) { return self.emotion; })
      .def_property_readonly("event",
        [](const PyClass &self) { return self.event; })
      .def_property_readonly("tokens",
        [](const PyClass &self) { return self.tokens; })
      .def_property_readonly("words",
        [](const PyClass &self) { return self.words; })
      .def_property_readonly("timestamps",
        [](const PyClass &self) { return self.timestamps; })
      .def_property_readonly("durations",
        [](const PyClass &self) { return self.durations; })
      .def_property_readonly("ys_log_probs",
        [](const PyClass &self) { return self.ys_log_probs; })
      .def_property_readonly("segment_timestamps",
        [](const PyClass &self) { return self.segment_timestamps; })
      .def_property_readonly("segment_durations",
        [](const PyClass &self) { return self.segment_durations; })
      .def_property_readonly("segment_texts",
        [](const PyClass &self) { return self.segment_texts; });
}

void PybindOfflineStream(py::module *m) {
  PybindOfflineRecognitionResult(m);

  using PyClass = OfflineStream;
  py::class_<PyClass>(*m, "OfflineStream")
      .def(
          "accept_waveform",
          [](PyClass &self, float sample_rate,
             const std::vector<float> &waveform) {
            self.AcceptWaveform(sample_rate, waveform.data(), waveform.size());
          },
          py::arg("sample_rate"), py::arg("waveform"), kAcceptWaveformUsage,
          py::call_guard<py::gil_scoped_release>())
      .def("set_option", &PyClass::SetOption, py::arg("key"),
           py::arg("value"), py::call_guard<py::gil_scoped_release>())
      .def("has_option", &PyClass::HasOption, py::arg("key"),
           py::call_guard<py::gil_scoped_release>())
      .def("get_option", &PyClass::GetOption, py::arg("key"),
           py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("result", &PyClass::GetResult);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-stream.h
================================================
// sherpa-onnx/python/csrc/offline-stream.h
//
// Copyright (c)  2023 by manyeyes

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_STREAM_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_STREAM_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineStream(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_STREAM_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tdnn-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tdnn-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-tdnn-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-tdnn-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTdnnModelConfig(py::module *m) {
  using PyClass = OfflineTdnnModelConfig;
  py::class_<PyClass>(*m, "OfflineTdnnModelConfig")
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tdnn-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tdnn-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TDNN_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TDNN_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTdnnModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TDNN_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-transducer-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-transducer-model-config.cc
//
// Copyright (c)  2023 by manyeyes

#include "sherpa-onnx/python/csrc/offline-transducer-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-transducer-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTransducerModelConfig(py::module *m) {
  using PyClass = OfflineTransducerModelConfig;
  py::class_<PyClass>(*m, "OfflineTransducerModelConfig")
      .def(py::init<const std::string &, const std::string &,
                    const std::string &>(),
           py::arg("encoder_filename"), py::arg("decoder_filename"),
           py::arg("joiner_filename"))
      .def_readwrite("encoder_filename", &PyClass::encoder_filename)
      .def_readwrite("decoder_filename", &PyClass::decoder_filename)
      .def_readwrite("joiner_filename", &PyClass::joiner_filename)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-transducer-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-transducer-model-config.h
//
// Copyright (c)  2023 by manyeyes

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TRANSDUCER_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TRANSDUCER_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTransducerModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TRANSDUCER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-kitten-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tts-kitten-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-tts-kitten-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-tts-kitten-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTtsKittenModelConfig(py::module *m) {
  using PyClass = OfflineTtsKittenModelConfig;

  py::class_<PyClass>(*m, "OfflineTtsKittenModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &, float>(),
           py::arg("model"), py::arg("voices"), py::arg("tokens"),
           py::arg("data_dir"), py::arg("length_scale") = 1.0)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("voices", &PyClass::voices)
      .def_readwrite("tokens", &PyClass::tokens)
      .def_readwrite("data_dir", &PyClass::data_dir)
      .def_readwrite("length_scale", &PyClass::length_scale)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-kitten-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tts-kitten-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KITTEN_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KITTEN_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTtsKittenModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KITTEN_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-tts-kokoro-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTtsKokoroModelConfig(py::module *m) {
  using PyClass = OfflineTtsKokoroModelConfig;

  py::class_<PyClass>(*m, "OfflineTtsKokoroModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &, const std::string &, float,
                    const std::string &>(),
           py::arg("model"), py::arg("voices"), py::arg("tokens"),
           py::arg("lexicon") = "", py::arg("data_dir"),
           py::arg("dict_dir") = "", py::arg("length_scale") = 1.0,
           py::arg("lang") = "")
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("voices", &PyClass::voices)
      .def_readwrite("tokens", &PyClass::tokens)
      .def_readwrite("lexicon", &PyClass::lexicon)
      .def_readwrite("data_dir", &PyClass::data_dir)
      .def_readwrite("dict_dir", &PyClass::dict_dir)
      .def_readwrite("length_scale", &PyClass::length_scale)
      .def_readwrite("lang", &PyClass::lang)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTtsKokoroModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_KOKORO_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-tts-matcha-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTtsMatchaModelConfig(py::module *m) {
  using PyClass = OfflineTtsMatchaModelConfig;

  py::class_<PyClass>(*m, "OfflineTtsMatchaModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &, const std::string &, float, float>(),
           py::arg("acoustic_model"), py::arg("vocoder"),
           py::arg("lexicon") = "", py::arg("tokens"), py::arg("data_dir") = "",
           py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0,
           py::arg("length_scale") = 1.0)
      .def_readwrite("acoustic_model", &PyClass::acoustic_model)
      .def_readwrite("vocoder", &PyClass::vocoder)
      .def_readwrite("lexicon", &PyClass::lexicon)
      .def_readwrite("tokens", &PyClass::tokens)
      .def_readwrite("data_dir", &PyClass::data_dir)
      .def_readwrite("dict_dir", &PyClass::dict_dir)
      .def_readwrite("noise_scale", &PyClass::noise_scale)
      .def_readwrite("length_scale", &PyClass::length_scale)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTtsMatchaModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MATCHA_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tts-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-tts-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-tts-model-config.h"
#include "sherpa-onnx/python/csrc/offline-tts-kitten-model-config.h"
#include "sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.h"
#include "sherpa-onnx/python/csrc/offline-tts-matcha-model-config.h"
#include "sherpa-onnx/python/csrc/offline-tts-pocket-model-config.h"
#include "sherpa-onnx/python/csrc/offline-tts-supertonic-model-config.h"
#include "sherpa-onnx/python/csrc/offline-tts-vits-model-config.h"
#include "sherpa-onnx/python/csrc/offline-tts-zipvoice-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTtsModelConfig(py::module *m) {
  PybindOfflineTtsVitsModelConfig(m);
  PybindOfflineTtsMatchaModelConfig(m);
  PybindOfflineTtsKokoroModelConfig(m);
  PybindOfflineTtsZipvoiceModelConfig(m);
  PybindOfflineTtsKittenModelConfig(m);
  PybindOfflineTtsPocketModelConfig(m);
  PybindOfflineTtsSupertonicModelConfig(m);

  using PyClass = OfflineTtsModelConfig;

  py::class_<PyClass>(*m, "OfflineTtsModelConfig")
      .def(py::init<>())
      .def(py::init<const OfflineTtsVitsModelConfig &,
                    const OfflineTtsMatchaModelConfig &,
                    const OfflineTtsKokoroModelConfig &,
                    const OfflineTtsZipvoiceModelConfig &,
                    const OfflineTtsKittenModelConfig &,
                    const OfflineTtsPocketModelConfig &,
                    const OfflineTtsSupertonicModelConfig &, int32_t, bool,
                    const std::string &>(),
           py::arg("vits") = OfflineTtsVitsModelConfig{},
           py::arg("matcha") = OfflineTtsMatchaModelConfig{},
           py::arg("kokoro") = OfflineTtsKokoroModelConfig{},
           py::arg("zipvoice") = OfflineTtsZipvoiceModelConfig{},
           py::arg("kitten") = OfflineTtsKittenModelConfig{},
           py::arg("pocket") = OfflineTtsPocketModelConfig{},
           py::arg("supertonic") = OfflineTtsSupertonicModelConfig{},
           py::arg("num_threads") = 1, py::arg("debug") = false,
           py::arg("provider") = "cpu")
      .def_readwrite("vits", &PyClass::vits)
      .def_readwrite("matcha", &PyClass::matcha)
      .def_readwrite("kokoro", &PyClass::kokoro)
      .def_readwrite("zipvoice", &PyClass::zipvoice)
      .def_readwrite("kitten", &PyClass::kitten)
      .def_readwrite("pocket", &PyClass::pocket)
      .def_readwrite("supertonic", &PyClass::supertonic)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tts-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTtsModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-pocket-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tts-pocket-model-config.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-tts-pocket-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-tts-pocket-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTtsPocketModelConfig(py::module *m) {
  using PyClass = OfflineTtsPocketModelConfig;

  py::class_<PyClass>(*m, "OfflineTtsPocketModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &, int32_t>(),
           py::arg("lm_flow"), py::arg("lm_main"), py::arg("encoder"),
           py::arg("decoder"), py::arg("text_conditioner"),
           py::arg("vocab_json"), py::arg("token_scores_json"),
           py::arg("voice_embedding_cache_capacity") = 50)
      .def_readwrite("lm_flow", &PyClass::lm_flow)
      .def_readwrite("lm_main", &PyClass::lm_main)
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("decoder", &PyClass::decoder)
      .def_readwrite("text_conditioner", &PyClass::text_conditioner)
      .def_readwrite("vocab_json", &PyClass::vocab_json)
      .def_readwrite("token_scores_json", &PyClass::token_scores_json)
      .def_readwrite("voice_embedding_cache_capacity",
                     &PyClass::voice_embedding_cache_capacity)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-pocket-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tts-pocket-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_POCKET_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_POCKET_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTtsPocketModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_POCKET_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-supertonic-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tts-supertonic-model-config.cc
//
// Copyright (c)  2026 zengyw

#include "sherpa-onnx/python/csrc/offline-tts-supertonic-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-tts-supertonic-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTtsSupertonicModelConfig(py::module *m) {
  using PyClass = OfflineTtsSupertonicModelConfig;

  py::class_<PyClass>(*m, "OfflineTtsSupertonicModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &>(),
           py::arg("duration_predictor"), py::arg("text_encoder"),
           py::arg("vector_estimator"), py::arg("vocoder"), py::arg("tts_json"),
           py::arg("unicode_indexer"), py::arg("voice_style"))
      .def_readwrite("duration_predictor", &PyClass::duration_predictor)
      .def_readwrite("text_encoder", &PyClass::text_encoder)
      .def_readwrite("vector_estimator", &PyClass::vector_estimator)
      .def_readwrite("vocoder", &PyClass::vocoder)
      .def_readwrite("tts_json", &PyClass::tts_json)
      .def_readwrite("unicode_indexer", &PyClass::unicode_indexer)
      .def_readwrite("voice_style", &PyClass::voice_style)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-supertonic-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tts-supertonic-model-config.h
//
// Copyright (c)  2026 zengyw

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTtsSupertonicModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_SUPERTONIC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-vits-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tts-vits-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-tts-vits-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-tts-vits-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTtsVitsModelConfig(py::module *m) {
  using PyClass = OfflineTtsVitsModelConfig;

  py::class_<PyClass>(*m, "OfflineTtsVitsModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &, float, float, float>(),
           py::arg("model"), py::arg("lexicon") = "", py::arg("tokens"),
           py::arg("data_dir") = "", py::arg("dict_dir") = "",
           py::arg("noise_scale") = 0.667, py::arg("noise_scale_w") = 0.8,
           py::arg("length_scale") = 1.0)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("lexicon", &PyClass::lexicon)
      .def_readwrite("tokens", &PyClass::tokens)
      .def_readwrite("data_dir", &PyClass::data_dir)
      .def_readwrite("dict_dir", &PyClass::dict_dir)
      .def_readwrite("noise_scale", &PyClass::noise_scale)
      .def_readwrite("noise_scale_w", &PyClass::noise_scale_w)
      .def_readwrite("length_scale", &PyClass::length_scale)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-vits-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tts-vits-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_VITS_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_VITS_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTtsVitsModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_VITS_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-zipvoice-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-tts-zipvoice-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-tts-zipvoice-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-tts-zipvoice-model-config.h"

namespace sherpa_onnx {

void PybindOfflineTtsZipvoiceModelConfig(py::module *m) {
  using PyClass = OfflineTtsZipvoiceModelConfig;

  py::class_<PyClass>(*m, "OfflineTtsZipvoiceModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &,
                    const std::string &, const std::string &, float, float,
                    float, float>(),
           py::arg("tokens"), py::arg("encoder"), py::arg("decoder"),
           py::arg("vocoder"), py::arg("data_dir") = "",
           py::arg("lexicon") = "", py::arg("feat_scale") = 0.1,
           py::arg("t_shift") = 0.5, py::arg("target_rms") = 0.1,
           py::arg("guidance_scale") = 1.0)
      .def_readwrite("tokens", &PyClass::tokens)
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("decoder", &PyClass::decoder)
      .def_readwrite("vocoder", &PyClass::vocoder)
      .def_readwrite("data_dir", &PyClass::data_dir)
      .def_readwrite("lexicon", &PyClass::lexicon)
      .def_readwrite("feat_scale", &PyClass::feat_scale)
      .def_readwrite("t_shift", &PyClass::t_shift)
      .def_readwrite("target_rms", &PyClass::target_rms)
      .def_readwrite("guidance_scale", &PyClass::guidance_scale)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts-zipvoice-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-tts-zipvoice-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTtsZipvoiceModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_ZIPVOICE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-tts.cc
================================================
// sherpa-onnx/python/csrc/offline-tts.cc
//
// Copyright (c)  2023  Xiaomi Corporation
#include "sherpa-onnx/python/csrc/offline-tts.h"

#include <algorithm>
#include <string>
#include <vector>

#include "sherpa-onnx/csrc/offline-tts.h"
#include "sherpa-onnx/python/csrc/offline-tts-model-config.h"

namespace sherpa_onnx {

static void PybindGeneratedAudio(py::module *m) {
  using PyClass = GeneratedAudio;
  py::class_<PyClass>(*m, "GeneratedAudio")
      .def(py::init<>())
      .def_readwrite("samples", &PyClass::samples)
      .def_readwrite("sample_rate", &PyClass::sample_rate)
      .def("__str__", [](PyClass &self) {
        std::ostringstream os;
        os << "GeneratedAudio(sample_rate=" << self.sample_rate << ", ";
        os << "num_samples=" << self.samples.size() << ")";
        return os.str();
      });
}

static void PybindGenerationConfig(py::module *m) {
  using PyClass = GenerationConfig;

  py::class_<PyClass>(*m, "GenerationConfig")
      .def(py::init<>())
      .def_readwrite("silence_scale", &PyClass::silence_scale)
      .def_readwrite("speed", &PyClass::speed)
      .def_readwrite("sid", &PyClass::sid)
      .def_readwrite("reference_audio", &PyClass::reference_audio)
      .def_readwrite("reference_sample_rate", &PyClass::reference_sample_rate)
      .def_readwrite("reference_text", &PyClass::reference_text)
      .def_readwrite("num_steps", &PyClass::num_steps)
      .def_readwrite("extra", &PyClass::extra)
      .def("__str__", &PyClass::ToString);
}

static void PybindOfflineTtsConfig(py::module *m) {
  PybindOfflineTtsModelConfig(m);

  using PyClass = OfflineTtsConfig;
  py::class_<PyClass>(*m, "OfflineTtsConfig")
      .def(py::init<>())
      .def(py::init<const OfflineTtsModelConfig &, const std::string &,
                    const std::string &, int32_t, float>(),
           py::arg("model"), py::arg("rule_fsts") = "",
           py::arg("rule_fars") = "", py::arg("max_num_sentences") = 1,
           py::arg("silence_scale") = 0.2)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("rule_fsts", &PyClass::rule_fsts)
      .def_readwrite("rule_fars", &PyClass::rule_fars)
      .def_readwrite("max_num_sentences", &PyClass::max_num_sentences)
      .def_readwrite("silence_scale", &PyClass::silence_scale)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

void PybindOfflineTts(py::module *m) {
  PybindOfflineTtsConfig(m);
  PybindGeneratedAudio(m);
  PybindGenerationConfig(m);

  using PyClass = OfflineTts;
  py::class_<PyClass>(*m, "OfflineTts")
      .def(py::init<const OfflineTtsConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("sample_rate", &PyClass::SampleRate)
      .def_property_readonly("num_speakers", &PyClass::NumSpeakers)
      .def(
          "generate",
          [](const PyClass &self, const std::string &text, int64_t sid,
             float speed,
             std::function<int32_t(py::array_t<float>, float)> callback)
              -> GeneratedAudio {
            if (!callback) {
              return self.Generate(text, sid, speed);
            }

            std::function<int32_t(const float *, int32_t, float)>
                callback_wrapper = [callback](const float *samples, int32_t n,
                                              float progress) {
                  // CAUTION(fangjun): we have to copy samples since it is
                  // freed once the call back returns.

                  pybind11::gil_scoped_acquire acquire;

                  pybind11::array_t<float> array(n);
                  py::buffer_info buf = array.request();
                  auto p = static_cast<float *>(buf.ptr);
                  std::copy(samples, samples + n, p);
                  return callback(array, progress);
                };

            return self.Generate(text, sid, speed, callback_wrapper);
          },
          py::arg("text"), py::arg("sid") = 0, py::arg("speed") = 1.0,
          py::arg("callback") = py::none(),
          py::call_guard<py::gil_scoped_release>())
      .def(
          "generate",
          [](const PyClass &self, const std::string &text,
             const GenerationConfig &config,
             std::function<int32_t(py::array_t<float>, float)> callback)
              -> GeneratedAudio {
            if (!callback) {
              return self.Generate(text, config);
            }

            std::function<int32_t(const float *, int32_t, float)>
                callback_wrapper = [callback](const float *samples, int32_t n,
                                              float progress) {
                  py::gil_scoped_acquire acquire;

                  py::array_t<float> array(n);
                  auto buf = array.request();
                  auto *p = static_cast<float *>(buf.ptr);
                  std::copy(samples, samples + n, p);

                  return callback(array, progress);
                };

            return self.Generate(text, config, callback_wrapper);
          },
          py::arg("text"), py::arg("config"), py::arg("callback") = py::none(),
          py::call_guard<py::gil_scoped_release>())
      .def(
          "generate",
          [](const PyClass &self, const std::string &text,
             const std::string &prompt_text,
             const std::vector<float> &prompt_samples, int32_t sample_rate,
             float speed, int32_t num_steps,
             std::function<int32_t(py::array_t<float>, float)> callback)
              -> GeneratedAudio {
            GenerationConfig config;
            config.reference_audio = prompt_samples;
            config.reference_sample_rate = sample_rate;
            config.reference_text = prompt_text;
            config.speed = speed;
            config.num_steps = num_steps;

            if (!callback) {
              return self.Generate(text, config);
            }

            std::function<int32_t(const float *, int32_t, float)>
                callback_wrapper = [callback](const float *samples, int32_t n,
                                              float progress) {
                  // CAUTION(fangjun): we have to copy samples since it is
                  // freed once the call back returns.

                  pybind11::gil_scoped_acquire acquire;

                  pybind11::array_t<float> array(n);
                  py::buffer_info buf = array.request();
                  auto p = static_cast<float *>(buf.ptr);
                  std::copy(samples, samples + n, p);
                  return callback(array, progress);
                };

            return self.Generate(text, config, callback_wrapper);
          },
          py::arg("text"), py::arg("prompt_text"), py::arg("prompt_samples"),
          py::arg("sample_rate"), py::arg("speed") = 1.0,
          py::arg("num_steps") = 4, py::arg("callback") = py::none(),
          py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-tts.h
================================================
// sherpa-onnx/python/csrc/offline-tts.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineTts(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_TTS_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-wenet-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-wenet-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-wenet-ctc-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-wenet-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOfflineWenetCtcModelConfig(py::module *m) {
  using PyClass = OfflineWenetCtcModelConfig;
  py::class_<PyClass>(*m, "OfflineWenetCtcModelConfig")
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-wenet-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-wenet-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_WENET_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_WENET_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineWenetCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_WENET_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-whisper-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-whisper-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-whisper-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/python/csrc/offline-whisper-model-config.h"

namespace sherpa_onnx {

void PybindOfflineWhisperModelConfig(py::module *m) {
  using PyClass = OfflineWhisperModelConfig;
  py::class_<PyClass>(*m, "OfflineWhisperModelConfig")
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &, int32_t, bool,
                    bool>(),
           py::arg("encoder"), py::arg("decoder"), py::arg("language"),
           py::arg("task"), py::arg("tail_paddings") = -1,
           py::arg("enable_token_timestamps") = false,
           py::arg("enable_segment_timestamps") = false)
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("decoder", &PyClass::decoder)
      .def_readwrite("language", &PyClass::language)
      .def_readwrite("task", &PyClass::task)
      .def_readwrite("tail_paddings", &PyClass::tail_paddings)
      .def_readwrite("enable_token_timestamps",
                     &PyClass::enable_token_timestamps)
      .def_readwrite("enable_segment_timestamps",
                     &PyClass::enable_segment_timestamps)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-whisper-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-whisper-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_WHISPER_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_WHISPER_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineWhisperModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_WHISPER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/offline-zipformer-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/offline-zipformer-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/offline-zipformer-ctc-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/offline-zipformer-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOfflineZipformerCtcModelConfig(py::module *m) {
  using PyClass = OfflineZipformerCtcModelConfig;
  py::class_<PyClass>(*m, "OfflineZipformerCtcModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/offline-zipformer-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/offline-zipformer-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOfflineZipformerCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_ZIPFORMER_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.cc
================================================
// sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.h"

#include <string>

#include "sherpa-onnx/csrc/online-ctc-fst-decoder-config.h"

namespace sherpa_onnx {

void PybindOnlineCtcFstDecoderConfig(py::module *m) {
  using PyClass = OnlineCtcFstDecoderConfig;
  py::class_<PyClass>(*m, "OnlineCtcFstDecoderConfig")
      .def(py::init<const std::string &, int32_t>(), py::arg("graph") = "",
           py::arg("max_active") = 3000)
      .def_readwrite("graph", &PyClass::graph)
      .def_readwrite("max_active", &PyClass::max_active)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.h
================================================
// sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_CTC_FST_DECODER_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_CTC_FST_DECODER_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineCtcFstDecoderConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_CTC_FST_DECODER_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-lm-config.cc
================================================
// sherpa-onnx/python/csrc/online-lm-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-lm-config.h"

#include <string>

#include "sherpa-onnx//csrc/online-lm-config.h"

namespace sherpa_onnx {

void PybindOnlineLMConfig(py::module *m) {
  using PyClass = OnlineLMConfig;
  py::class_<PyClass>(*m, "OnlineLMConfig")
      .def(py::init<const std::string &, float, int32_t,
           const std::string &, bool, const std::string &,
           float, int>(),
           py::arg("model") = "", py::arg("scale") = 0.5f,
           py::arg("lm_num_threads") = 1, py::arg("lm_provider") = "cpu",
           py::arg("shallow_fusion") = true, py::arg("lodr_fst") = "",
           py::arg("lodr_scale") = 0.0f, py::arg("lodr_backoff_id") = -1)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("scale", &PyClass::scale)
      .def_readwrite("lm_provider", &PyClass::lm_provider)
      .def_readwrite("lm_num_threads", &PyClass::lm_num_threads)
      .def_readwrite("shallow_fusion", &PyClass::shallow_fusion)
      .def_readwrite("lodr_fst", &PyClass::lodr_fst)
      .def_readwrite("lodr_scale", &PyClass::lodr_scale)
      .def_readwrite("lodr_backoff_id", &PyClass::lodr_backoff_id)

      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-lm-config.h
================================================
// sherpa-onnx/python/csrc/online-lm-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_LM_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_LM_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineLMConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_LM_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-model-config.cc
================================================
// sherpa-onnx/python/csrc/online-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-model-config.h"
#include "sherpa-onnx/csrc/online-transducer-model-config.h"
#include "sherpa-onnx/csrc/provider-config.h"
#include "sherpa-onnx/python/csrc/online-nemo-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/online-paraformer-model-config.h"
#include "sherpa-onnx/python/csrc/online-t-one-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/online-transducer-model-config.h"
#include "sherpa-onnx/python/csrc/online-wenet-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/online-zipformer2-ctc-model-config.h"
#include "sherpa-onnx/python/csrc/provider-config.h"

namespace sherpa_onnx {

void PybindOnlineModelConfig(py::module *m) {
  PybindOnlineTransducerModelConfig(m);
  PybindOnlineParaformerModelConfig(m);
  PybindOnlineWenetCtcModelConfig(m);
  PybindOnlineZipformer2CtcModelConfig(m);
  PybindOnlineNeMoCtcModelConfig(m);
  PybindOnlineToneCtcModelConfig(m);
  PybindProviderConfig(m);

  using PyClass = OnlineModelConfig;
  py::class_<PyClass>(*m, "OnlineModelConfig")
      .def(py::init<const OnlineTransducerModelConfig &,
                    const OnlineParaformerModelConfig &,
                    const OnlineWenetCtcModelConfig &,
                    const OnlineZipformer2CtcModelConfig &,
                    const OnlineNeMoCtcModelConfig &,
                    const OnlineToneCtcModelConfig &, const ProviderConfig &,
                    const std::string &, int32_t, int32_t, bool,
                    const std::string &, const std::string &,
                    const std::string &>(),
           py::arg("transducer") = OnlineTransducerModelConfig(),
           py::arg("paraformer") = OnlineParaformerModelConfig(),
           py::arg("wenet_ctc") = OnlineWenetCtcModelConfig(),
           py::arg("zipformer2_ctc") = OnlineZipformer2CtcModelConfig(),
           py::arg("nemo_ctc") = OnlineNeMoCtcModelConfig(),
           py::arg("t_one_ctc") = OnlineToneCtcModelConfig(),
           py::arg("provider_config") = ProviderConfig(), py::arg("tokens"),
           py::arg("num_threads"), py::arg("warm_up") = 0,
           py::arg("debug") = false, py::arg("model_type") = "",
           py::arg("modeling_unit") = "", py::arg("bpe_vocab") = "")
      .def_readwrite("transducer", &PyClass::transducer)
      .def_readwrite("paraformer", &PyClass::paraformer)
      .def_readwrite("wenet_ctc", &PyClass::wenet_ctc)
      .def_readwrite("zipformer2_ctc", &PyClass::zipformer2_ctc)
      .def_readwrite("nemo_ctc", &PyClass::nemo_ctc)
      .def_readwrite("t_one_ctc", &PyClass::t_one_ctc)
      .def_readwrite("provider_config", &PyClass::provider_config)
      .def_readwrite("tokens", &PyClass::tokens)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("warm_up", &PyClass::warm_up)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("model_type", &PyClass::model_type)
      .def_readwrite("modeling_unit", &PyClass::modeling_unit)
      .def_readwrite("bpe_vocab", &PyClass::bpe_vocab)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}
}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-model-config.h
================================================
// sherpa-onnx/python/csrc/online-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-nemo-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/online-nemo-ctc-model-config.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-nemo-ctc-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-nemo-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOnlineNeMoCtcModelConfig(py::module *m) {
  using PyClass = OnlineNeMoCtcModelConfig;
  py::class_<PyClass>(*m, "OnlineNeMoCtcModelConfig")
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-nemo-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/online-nemo-ctc-model-config.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_NEMO_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_NEMO_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineNeMoCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_NEMO_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-paraformer-model-config.cc
================================================
// sherpa-onnx/python/csrc/online-paraformer-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-paraformer-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-paraformer-model-config.h"

namespace sherpa_onnx {

void PybindOnlineParaformerModelConfig(py::module *m) {
  using PyClass = OnlineParaformerModelConfig;
  py::class_<PyClass>(*m, "OnlineParaformerModelConfig")
      .def(py::init<const std::string &, const std::string &>(),
           py::arg("encoder"), py::arg("decoder"))
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("decoder", &PyClass::decoder)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-paraformer-model-config.h
================================================
// sherpa-onnx/python/csrc/online-paraformer-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_PARAFORMER_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_PARAFORMER_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineParaformerModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_PARAFORMER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-punctuation.cc
================================================
// sherpa-onnx/python/csrc/online-punctuation.cc
//
// Copyright (c) 2024

#include "sherpa-onnx/python/csrc/online-punctuation.h"

#include <string>

#include "sherpa-onnx/csrc/online-punctuation.h"

namespace sherpa_onnx {

static void PybindOnlinePunctuationModelConfig(py::module *m) {
  using PyClass = OnlinePunctuationModelConfig;
  py::class_<PyClass>(*m, "OnlinePunctuationModelConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &, int32_t, bool,
                    const std::string &>(),
           py::arg("cnn_bilstm"), py::arg("bpe_vocab"),
           py::arg("num_threads") = 1, py::arg("debug") = false,
           py::arg("provider") = "cpu")
      .def_readwrite("cnn_bilstm", &PyClass::cnn_bilstm)
      .def_readwrite("bpe_vocab", &PyClass::bpe_vocab)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

static void PybindOnlinePunctuationConfig(py::module *m) {
  PybindOnlinePunctuationModelConfig(m);
  using PyClass = OnlinePunctuationConfig;

  py::class_<PyClass>(*m, "OnlinePunctuationConfig")
      .def(py::init<>())
      .def(py::init<const OnlinePunctuationModelConfig &>(),
           py::arg("model_config"))
      .def_readwrite("model_config", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

void PybindOnlinePunctuation(py::module *m) {
  PybindOnlinePunctuationConfig(m);
  using PyClass = OnlinePunctuation;

  py::class_<PyClass>(*m, "OnlinePunctuation")
      .def(py::init<const OnlinePunctuationConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def("add_punctuation_with_case", &PyClass::AddPunctuationWithCase,
           py::arg("text"), py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-punctuation.h
================================================
// sherpa-onnx/python/csrc/online-punctuation.h
//
// Copyright (c) 2024

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlinePunctuation(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_


================================================
FILE: sherpa-onnx/python/csrc/online-recognizer.cc
================================================
// sherpa-onnx/python/csrc/online-recongizer.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-recognizer.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-recognizer.h"

namespace sherpa_onnx {

static void PybindOnlineRecognizerResult(py::module *m) {
  using PyClass = OnlineRecognizerResult;
  py::class_<PyClass>(*m, "OnlineRecognizerResult")
      .def_property_readonly(
          "text",
          [](PyClass &self) -> py::str {
            return py::str(PyUnicode_DecodeUTF8(self.text.c_str(),
                                                self.text.size(), "ignore"));
          })
      .def_property_readonly(
          "tokens",
          [](PyClass &self) -> std::vector<std::string> { return self.tokens; })
      .def_property_readonly(
          "start_time", [](PyClass &self) -> float { return self.start_time; })
      .def_property_readonly(
          "timestamps",
          [](PyClass &self) -> std::vector<float> { return self.timestamps; })
      .def_property_readonly(
          "ys_probs",
          [](PyClass &self) -> std::vector<float> { return self.ys_probs; })
      .def_property_readonly(
          "lm_probs",
          [](PyClass &self) -> std::vector<float> { return self.lm_probs; })
      .def_property_readonly("context_scores",
                             [](PyClass &self) -> std::vector<float> {
                               return self.context_scores;
                             })
      .def_property_readonly(
          "segment", [](PyClass &self) -> int32_t { return self.segment; })
      .def_property_readonly(
          "words",
          [](PyClass &self) -> std::vector<int32_t> { return self.words; })
      .def_property_readonly(
          "is_final", [](PyClass &self) -> bool { return self.is_final; })
      .def("__str__", &PyClass::AsJsonString,
           py::call_guard<py::gil_scoped_release>())
      .def("as_json_string", &PyClass::AsJsonString,
           py::call_guard<py::gil_scoped_release>());
}

static void PybindOnlineRecognizerConfig(py::module *m) {
  using PyClass = OnlineRecognizerConfig;
  py::class_<PyClass>(*m, "OnlineRecognizerConfig")
      .def(py::init<const FeatureExtractorConfig &, const OnlineModelConfig &,
                    const OnlineLMConfig &, const EndpointConfig &,
                    const OnlineCtcFstDecoderConfig &, bool,
                    const std::string &, int32_t, const std::string &, float,
                    float, float, const std::string &, const std::string &,
                    bool, const HomophoneReplacerConfig &>(),
           py::arg("feat_config"), py::arg("model_config"),
           py::arg("lm_config") = OnlineLMConfig(),
           py::arg("endpoint_config") = EndpointConfig(),
           py::arg("ctc_fst_decoder_config") = OnlineCtcFstDecoderConfig(),
           py::arg("enable_endpoint"), py::arg("decoding_method"),
           py::arg("max_active_paths") = 4, py::arg("hotwords_file") = "",
           py::arg("hotwords_score") = 0, py::arg("blank_penalty") = 0.0,
           py::arg("temperature_scale") = 2.0, py::arg("rule_fsts") = "",
           py::arg("rule_fars") = "", py::arg("reset_encoder") = false,
           py::arg("hr") = HomophoneReplacerConfig{})
      .def_readwrite("feat_config", &PyClass::feat_config)
      .def_readwrite("model_config", &PyClass::model_config)
      .def_readwrite("lm_config", &PyClass::lm_config)
      .def_readwrite("endpoint_config", &PyClass::endpoint_config)
      .def_readwrite("ctc_fst_decoder_config", &PyClass::ctc_fst_decoder_config)
      .def_readwrite("enable_endpoint", &PyClass::enable_endpoint)
      .def_readwrite("decoding_method", &PyClass::decoding_method)
      .def_readwrite("max_active_paths", &PyClass::max_active_paths)
      .def_readwrite("hotwords_file", &PyClass::hotwords_file)
      .def_readwrite("hotwords_score", &PyClass::hotwords_score)
      .def_readwrite("blank_penalty", &PyClass::blank_penalty)
      .def_readwrite("temperature_scale", &PyClass::temperature_scale)
      .def_readwrite("rule_fsts", &PyClass::rule_fsts)
      .def_readwrite("rule_fars", &PyClass::rule_fars)
      .def_readwrite("reset_encoder", &PyClass::reset_encoder)
      .def_readwrite("hr", &PyClass::hr)
      .def("__str__", &PyClass::ToString);
}

void PybindOnlineRecognizer(py::module *m) {
  PybindOnlineRecognizerResult(m);
  PybindOnlineRecognizerConfig(m);

  using PyClass = OnlineRecognizer;
  py::class_<PyClass>(*m, "OnlineRecognizer")
      .def(py::init<const OnlineRecognizerConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "create_stream",
          [](const PyClass &self) { return self.CreateStream(); },
          py::call_guard<py::gil_scoped_release>())
      .def(
          "create_stream",
          [](PyClass &self, const std::string &hotwords) {
            return self.CreateStream(hotwords);
          },
          py::arg("hotwords"), py::call_guard<py::gil_scoped_release>())
      .def("is_ready", &PyClass::IsReady,
           py::call_guard<py::gil_scoped_release>())
      .def("decode_stream", &PyClass::DecodeStream, py::arg("s"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "decode_streams",
          [](PyClass &self, std::vector<OnlineStream *> ss) {
            self.DecodeStreams(ss.data(), ss.size());
          },
          py::arg("ss"), py::call_guard<py::gil_scoped_release>())
      .def("get_result", &PyClass::GetResult, py::arg("s"),
           py::call_guard<py::gil_scoped_release>())
      .def("is_endpoint", &PyClass::IsEndpoint, py::arg("s"),
           py::call_guard<py::gil_scoped_release>())
      .def("reset", &PyClass::Reset, py::arg("s"),
           py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-recognizer.h
================================================
// sherpa-onnx/python/csrc/online-recognizer.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_RECOGNIZER_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_RECOGNIZER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineRecognizer(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_RECOGNIZER_H_


================================================
FILE: sherpa-onnx/python/csrc/online-speech-denoiser.cc
================================================
// sherpa-onnx/python/csrc/online-speech-denoiser.cc
//
// Copyright (c)  2026  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-speech-denoiser.h"

#include <vector>

#include "sherpa-onnx/csrc/online-speech-denoiser.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.h"

namespace sherpa_onnx {

static void PybindOnlineSpeechDenoiserConfig(py::module *m) {
  using PyClass = OnlineSpeechDenoiserConfig;

  py::class_<PyClass>(*m, "OnlineSpeechDenoiserConfig")
      .def(py::init<>())
      .def(py::init<const OfflineSpeechDenoiserModelConfig &>(),
           py::arg("model") = OfflineSpeechDenoiserModelConfig{})
      .def_readwrite("model", &PyClass::model)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

void PybindOnlineSpeechDenoiser(py::module *m) {
  PybindOnlineSpeechDenoiserConfig(m);

  using PyClass = OnlineSpeechDenoiser;
  py::class_<PyClass>(*m, "OnlineSpeechDenoiser")
      .def(py::init<const OnlineSpeechDenoiserConfig &>(), py::arg("config"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "__call__",
          [](PyClass &self, const std::vector<float> &samples,
             int32_t sample_rate) {
            return self.Run(samples.data(), samples.size(), sample_rate);
          },
          py::arg("samples"), py::arg("sample_rate"),
          py::call_guard<py::gil_scoped_release>())
      .def(
          "run",
          [](PyClass &self, const std::vector<float> &samples,
             int32_t sample_rate) {
            return self.Run(samples.data(), samples.size(), sample_rate);
          },
          py::arg("samples"), py::arg("sample_rate"),
          py::call_guard<py::gil_scoped_release>())
      .def("flush", &PyClass::Flush, py::call_guard<py::gil_scoped_release>())
      .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("sample_rate", &PyClass::GetSampleRate)
      .def_property_readonly("frame_shift_in_samples",
                             &PyClass::GetFrameShiftInSamples);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-speech-denoiser.h
================================================
// sherpa-onnx/python/csrc/online-speech-denoiser.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_SPEECH_DENOISER_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_SPEECH_DENOISER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineSpeechDenoiser(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_SPEECH_DENOISER_H_


================================================
FILE: sherpa-onnx/python/csrc/online-stream.cc
================================================
// sherpa-onnx/python/csrc/online-stream.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-stream.h"

#include <vector>

#include "sherpa-onnx/csrc/online-stream.h"

namespace sherpa_onnx {

constexpr const char *kAcceptWaveformUsage = R"(
Process audio samples.

Args:
  sample_rate:
    Sample rate of the input samples. If it is different from the one
    expected by the model, we will do resampling inside.
  waveform:
    A 1-D float32 tensor containing audio samples. It must be normalized
    to the range [-1, 1].
)";


constexpr const char *kGetFramesUsage = R"(
Get n frames starting from the given frame index.
(hint: intended for debugging, for comparing FBANK features across pipelines)

Args:
  frame_index:
    The starting frame index
  n:
    Number of frames to get.
Return:
  Return a 2-D tensor of shape (n, feature_dim).
  which is flattened into a 1-D vector (flattened in row major).
  Unflatten in python with:
    `features = np.reshape(arr, (n, feature_dim))`
)";

void PybindOnlineStream(py::module *m) {
  using PyClass = OnlineStream;
  py::class_<PyClass>(*m, "OnlineStream")
      .def(
          "accept_waveform",
          [](PyClass &self, float sample_rate,
             const std::vector<float> &waveform) {
            self.AcceptWaveform(sample_rate, waveform.data(), waveform.size());
          },
          py::arg("sample_rate"), py::arg("waveform"), kAcceptWaveformUsage,
          py::call_guard<py::gil_scoped_release>())
      .def("input_finished", &PyClass::InputFinished,
           py::call_guard<py::gil_scoped_release>())
      .def("set_option", &PyClass::SetOption, py::arg("key"),
           py::arg("value"), py::call_guard<py::gil_scoped_release>())
      .def("has_option", &PyClass::HasOption, py::arg("key"),
           py::call_guard<py::gil_scoped_release>())
      .def("get_option", &PyClass::GetOption, py::arg("key"),
           py::call_guard<py::gil_scoped_release>())
      .def("get_frames", &PyClass::GetFrames,
           py::arg("frame_index"), py::arg("n"), kGetFramesUsage,
           py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-stream.h
================================================
// sherpa-onnx/python/csrc/online-stream.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_STREAM_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_STREAM_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineStream(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_STREAM_H_


================================================
FILE: sherpa-onnx/python/csrc/online-t-one-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/online-t-one-ctc-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-t-one-ctc-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-t-one-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOnlineToneCtcModelConfig(py::module *m) {
  using PyClass = OnlineToneCtcModelConfig;
  py::class_<PyClass>(*m, "OnlineToneCtcModelConfig")
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-t-one-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/online-t-one-ctc-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_T_ONE_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_T_ONE_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineToneCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_T_ONE_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-transducer-model-config.cc
================================================
// sherpa-onnx/python/csrc/online-transducer-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/csrc/online-transducer-model-config.h"

#include <string>

#include "sherpa-onnx/python/csrc/online-transducer-model-config.h"

namespace sherpa_onnx {

void PybindOnlineTransducerModelConfig(py::module *m) {
  using PyClass = OnlineTransducerModelConfig;
  py::class_<PyClass>(*m, "OnlineTransducerModelConfig")
      .def(py::init<const std::string &, const std::string &,
                    const std::string &>(),
           py::arg("encoder"), py::arg("decoder"), py::arg("joiner"))
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("decoder", &PyClass::decoder)
      .def_readwrite("joiner", &PyClass::joiner)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-transducer-model-config.h
================================================
// sherpa-onnx/python/csrc/online-transducer-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_TRANSDUCER_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_TRANSDUCER_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineTransducerModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_TRANSDUCER_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-wenet-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/online-wenet-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-wenet-ctc-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-wenet-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOnlineWenetCtcModelConfig(py::module *m) {
  using PyClass = OnlineWenetCtcModelConfig;
  py::class_<PyClass>(*m, "OnlineWenetCtcModelConfig")
      .def(py::init<const std::string &, int32_t, int32_t>(), py::arg("model"),
           py::arg("chunk_size") = 16, py::arg("num_left_chunks") = 4)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("chunk_size", &PyClass::chunk_size)
      .def_readwrite("num_left_chunks", &PyClass::num_left_chunks)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-wenet-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/online-wenet-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_WENET_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_WENET_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineWenetCtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_WENET_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/online-zipformer2-ctc-model-config.cc
================================================
// sherpa-onnx/python/csrc/online-zipformer2-ctc-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/online-zipformer2-ctc-model-config.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/online-zipformer2-ctc-model-config.h"

namespace sherpa_onnx {

void PybindOnlineZipformer2CtcModelConfig(py::module *m) {
  using PyClass = OnlineZipformer2CtcModelConfig;
  py::class_<PyClass>(*m, "OnlineZipformer2CtcModelConfig")
      .def(py::init<const std::string &>(), py::arg("model"))
      .def_readwrite("model", &PyClass::model)
      .def("__str__", &PyClass::ToString);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/online-zipformer2-ctc-model-config.h
================================================
// sherpa-onnx/python/csrc/online-zipformer2-ctc-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindOnlineZipformer2CtcModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_ZIPFORMER2_CTC_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/provider-config.cc
================================================
// sherpa-onnx/python/csrc/provider-config.cc
//
// Copyright (c)  2024  Uniphore (Author: Manickavela A)


#include "sherpa-onnx/python/csrc/provider-config.h"

#include <string>

#include "sherpa-onnx/csrc/provider-config.h"
#include "sherpa-onnx/python/csrc/cuda-config.h"
#include "sherpa-onnx/python/csrc/tensorrt-config.h"

namespace sherpa_onnx {

void PybindProviderConfig(py::module *m) {
  PybindCudaConfig(m);
  PybindTensorrtConfig(m);

  using PyClass = ProviderConfig;
  py::class_<PyClass>(*m, "ProviderConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, int32_t>(),
           py::arg("provider") = "cpu",
           py::arg("device") = 0)
      .def(py::init<const TensorrtConfig &, const CudaConfig &,
          const std::string &, int32_t>(),
           py::arg("trt_config") = TensorrtConfig{},
           py::arg("cuda_config") = CudaConfig{},
           py::arg("provider") = "cpu",
           py::arg("device") = 0)
      .def_readwrite("trt_config", &PyClass::trt_config)
      .def_readwrite("cuda_config", &PyClass::cuda_config)
      .def_readwrite("provider", &PyClass::provider)
      .def_readwrite("device", &PyClass::device)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}
}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/provider-config.h
================================================
// sherpa-onnx/python/csrc/provider-config.h
//
// Copyright (c)  2024  Uniphore (Author: Manickavela A)

#ifndef SHERPA_ONNX_PYTHON_CSRC_PROVIDER_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_PROVIDER_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindProviderConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_PROVIDER_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/sentence-piece-tokenizer.cc
================================================
// sherpa-onnx/python/csrc/sentence-piece-tokenizer.cc
//
// Copyright (c)  2026  Xiaomi Corporation
#include "sherpa-onnx/python/csrc/sentence-piece-tokenizer.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/sentence-piece-tokenizer.h"

namespace sherpa_onnx {
void PybindSentencePieceTokenizer(py::module *m) {
  using PyClass = SentencePieceTokenizer;
  py::class_<PyClass>(*m, "SentencePieceTokenizer")
      .def(py::init<const std::string &, const std::string &>(),
           py::arg("vocab_json"), py::arg("token_scores_json"),
           py::call_guard<py::gil_scoped_release>())
      .def(
          "encode",
          [](const PyClass &self, const std::string &text,
             py::object out_type) -> py::object {
            auto builtins = py::module::import("builtins");
            py::object int_type = builtins.attr("int");
            py::object str_type = builtins.attr("str");

            if (out_type.is_none() || out_type.equal(str_type)) {
              std::vector<std::string> tokens;
              {
                py::gil_scoped_release release;
                tokens = self.EncodeTokens(text);
              }
              return py::cast(tokens);
            } else if (out_type.equal(int_type)) {
              std::vector<int32_t> ids;
              {
                py::gil_scoped_release release;
                ids = self.EncodeIds(text);
              }
              return py::cast(ids);
            } else {
              throw std::runtime_error(
                  "Invalid out_type. Must be int, str, or None.");
            }
          },
          py::arg("text"), py::arg("out_type") = py::none(),
          "Encode text. out_type can be int, str, or None. Default to str");
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/sentence-piece-tokenizer.h
================================================
// sherpa-onnx/python/csrc/sentence-piece-tokenizer.h
//
// Copyright (c)  2026  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_SENTENCE_PIECE_TOKENIZER_H_
#define SHERPA_ONNX_PYTHON_CSRC_SENTENCE_PIECE_TOKENIZER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindSentencePieceTokenizer(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_SENTENCE_PIECE_TOKENIZER_H_


================================================
FILE: sherpa-onnx/python/csrc/sherpa-onnx.cc
================================================
// sherpa-onnx/python/csrc/sherpa-onnx.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

#include "sherpa-onnx/python/csrc/alsa.h"
#include "sherpa-onnx/python/csrc/audio-tagging.h"
#include "sherpa-onnx/python/csrc/circular-buffer.h"
#include "sherpa-onnx/python/csrc/display.h"
#include "sherpa-onnx/python/csrc/endpoint.h"
#include "sherpa-onnx/python/csrc/features.h"
#include "sherpa-onnx/python/csrc/homophone-replacer.h"
#include "sherpa-onnx/python/csrc/keyword-spotter.h"
#include "sherpa-onnx/python/csrc/offline-ctc-fst-decoder-config.h"
#include "sherpa-onnx/python/csrc/offline-lm-config.h"
#include "sherpa-onnx/python/csrc/offline-model-config.h"
#include "sherpa-onnx/python/csrc/offline-punctuation.h"
#include "sherpa-onnx/python/csrc/offline-recognizer.h"
#include "sherpa-onnx/python/csrc/offline-source-separation.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/python/csrc/offline-stream.h"
#include "sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.h"
#include "sherpa-onnx/python/csrc/online-lm-config.h"
#include "sherpa-onnx/python/csrc/online-model-config.h"
#include "sherpa-onnx/python/csrc/online-punctuation.h"
#include "sherpa-onnx/python/csrc/online-recognizer.h"
#include "sherpa-onnx/python/csrc/online-speech-denoiser.h"
#include "sherpa-onnx/python/csrc/online-stream.h"
#include "sherpa-onnx/python/csrc/speaker-embedding-extractor.h"
#include "sherpa-onnx/python/csrc/speaker-embedding-manager.h"
#include "sherpa-onnx/python/csrc/spoken-language-identification.h"
#include "sherpa-onnx/python/csrc/vad-model-config.h"
#include "sherpa-onnx/python/csrc/vad-model.h"
#include "sherpa-onnx/python/csrc/version.h"
#include "sherpa-onnx/python/csrc/voice-activity-detector.h"
#include "sherpa-onnx/python/csrc/wave-writer.h"

#if SHERPA_ONNX_ENABLE_TTS == 1
#include "sherpa-onnx/python/csrc/offline-tts.h"
#include "sherpa-onnx/python/csrc/sentence-piece-tokenizer.h"
#endif

#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
#include "sherpa-onnx/python/csrc/fast-clustering.h"
#include "sherpa-onnx/python/csrc/offline-speaker-diarization-result.h"
#include "sherpa-onnx/python/csrc/offline-speaker-diarization.h"
#endif

namespace sherpa_onnx {

PYBIND11_MODULE(_sherpa_onnx, m) {
  m.doc() = "pybind11 binding of sherpa-onnx";

  PybindWaveWriter(&m);
  PybindAudioTagging(&m);
  PybindOfflinePunctuation(&m);
  PybindOnlinePunctuation(&m);
  PybindHomophoneReplacer(&m);

  PybindFeatures(&m);
  PybindOnlineCtcFstDecoderConfig(&m);
  PybindOnlineModelConfig(&m);
  PybindOnlineLMConfig(&m);
  PybindOnlineStream(&m);
  PybindEndpoint(&m);
  PybindOnlineRecognizer(&m);
  PybindKeywordSpotter(&m);
  PybindDisplay(&m);

  PybindOfflineStream(&m);
  PybindOfflineLMConfig(&m);
  PybindOfflineModelConfig(&m);
  PybindOfflineCtcFstDecoderConfig(&m);
  PybindOfflineRecognizer(&m);

  PybindVadModelConfig(&m);
  PybindVadModel(&m);
  PybindCircularBuffer(&m);
  PybindVoiceActivityDetector(&m);

#if SHERPA_ONNX_ENABLE_TTS == 1
  PybindOfflineTts(&m);
  PybindSentencePieceTokenizer(&m);
#else
  /* Define "empty" TTS symbols */
  m.attr("OfflineTtsKittenModelConfig") = py::none();
  m.attr("OfflineTtsPocketModelConfig") = py::none();
  m.attr("OfflineTtsKokoroModelConfig") = py::none();
  m.attr("OfflineTtsMatchaModelConfig") = py::none();
  m.attr("OfflineTtsModelConfig") = py::none();
  m.attr("OfflineTtsVitsModelConfig") = py::none();
  m.attr("OfflineTtsZipvoiceModelConfig") = py::none();
  m.attr("GeneratedAudio") = py::none();
  m.attr("OfflineTtsConfig") = py::none();
  m.attr("OfflineTts") = py::none();
  m.attr("SentencePieceTokenizer") = py::none();
#endif

  PybindSpeakerEmbeddingExtractor(&m);
  PybindSpeakerEmbeddingManager(&m);
  PybindSpokenLanguageIdentification(&m);

#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
  PybindFastClustering(&m);
  PybindOfflineSpeakerDiarizationResult(&m);
  PybindOfflineSpeakerDiarization(&m);
#else
  /* Define "empty" diarization symbols */
  m.attr("FastClusteringConfig") = py::none();
  m.attr("FastClustering") = py::none();
  m.attr("OfflineSpeakerDiarizationSegment") = py::none();
  m.attr("OfflineSpeakerDiarizationResult") = py::none();
  m.attr("OfflineSpeakerSegmentationPyannoteModelConfig") = py::none();
  m.attr("OfflineSpeakerSegmentationModelConfig") = py::none();
  m.attr("OfflineSpeakerDiarizationConfig") = py::none();
  m.attr("OfflineSpeakerDiarization") = py::none();
#endif

  PybindAlsa(&m);
  PybindOfflineSpeechDenoiser(&m);
  PybindOnlineSpeechDenoiser(&m);
  PybindOfflineSourceSeparation(&m);
  PybindVersion(&m);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/sherpa-onnx.h
================================================
// sherpa-onnx/python/csrc/sherpa-onnx.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_SHERPA_ONNX_H_
#define SHERPA_ONNX_PYTHON_CSRC_SHERPA_ONNX_H_

#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION

#include "pybind11/functional.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"

namespace py = pybind11;

#endif  // SHERPA_ONNX_PYTHON_CSRC_SHERPA_ONNX_H_


================================================
FILE: sherpa-onnx/python/csrc/silero-vad-model-config.cc
================================================
// sherpa-onnx/python/csrc/silero-vad-model-config.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/silero-vad-model-config.h"

#include <memory>
#include <string>

#include "sherpa-onnx/csrc/silero-vad-model-config.h"

namespace sherpa_onnx {

void PybindSileroVadModelConfig(py::module *m) {
  using PyClass = SileroVadModelConfig;
  py::class_<PyClass>(*m, "SileroVadModelConfig")
      .def(py::init<>())
      .def(py::init([](const std::string &model, float threshold,
                       float min_silence_duration, float min_speech_duration,
                       int32_t window_size,
                       float max_speech_duration) -> std::unique_ptr<PyClass> {
             auto ans = std::make_unique<PyClass>();

             ans->model = model;
             ans->threshold = threshold;
             ans->min_silence_duration = min_silence_duration;
             ans->min_speech_duration = min_speech_duration;
             ans->window_size = window_size;
             ans->max_speech_duration = max_speech_duration;

             return ans;
           }),
           py::arg("model"), py::arg("threshold") = 0.5,
           py::arg("min_silence_duration") = 0.5,
           py::arg("min_speech_duration") = 0.25, py::arg("window_size") = 512,
           py::arg("max_speech_duration") = 20)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("threshold", &PyClass::threshold)
      .def_readwrite("min_silence_duration", &PyClass::min_silence_duration)
      .def_readwrite("min_speech_duration", &PyClass::min_speech_duration)
      .def_readwrite("window_size", &PyClass::window_size)
      .def_readwrite("max_speech_duration", &PyClass::max_speech_duration)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/silero-vad-model-config.h
================================================
// sherpa-onnx/python/csrc/silero-vad-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_SILERO_VAD_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_SILERO_VAD_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindSileroVadModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_SILERO_VAD_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/speaker-embedding-extractor.cc
================================================
// sherpa-onnx/python/csrc/speaker-embedding-extractor.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/speaker-embedding-extractor.h"

#include <string>

#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"

namespace sherpa_onnx {

static void PybindSpeakerEmbeddingExtractorConfig(py::module *m) {
  using PyClass = SpeakerEmbeddingExtractorConfig;
  py::class_<PyClass>(*m, "SpeakerEmbeddingExtractorConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, int32_t, bool, const std::string &>(),
           py::arg("model"), py::arg("num_threads") = 1,
           py::arg("debug") = false, py::arg("provider") = "cpu")
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

void PybindSpeakerEmbeddingExtractor(py::module *m) {
  PybindSpeakerEmbeddingExtractorConfig(m);

  using PyClass = SpeakerEmbeddingExtractor;
  py::class_<PyClass>(*m, "SpeakerEmbeddingExtractor")
      .def(py::init<const SpeakerEmbeddingExtractorConfig &>(),
           py::arg("config"), py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("dim", &PyClass::Dim)
      .def("create_stream", &PyClass::CreateStream,
           py::call_guard<py::gil_scoped_release>())
      .def("compute", &PyClass::Compute,
           py::call_guard<py::gil_scoped_release>())
      .def("is_ready", &PyClass::IsReady,
           py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/speaker-embedding-extractor.h
================================================
// sherpa-onnx/python/csrc/speaker-embedding-extractor.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
#define SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindSpeakerEmbeddingExtractor(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_


================================================
FILE: sherpa-onnx/python/csrc/speaker-embedding-manager.cc
================================================
// sherpa-onnx/python/csrc/speaker-embedding-manager.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/speaker-embedding-manager.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/speaker-embedding-manager.h"

namespace sherpa_onnx {

void PybindSpeakerEmbeddingManager(py::module *m) {
  using PyClass = SpeakerEmbeddingManager;
  py::class_<PyClass>(*m, "SpeakerEmbeddingManager")
      .def(py::init<int32_t>(), py::arg("dim"),
           py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("num_speakers", &PyClass::NumSpeakers)
      .def_property_readonly("dim", &PyClass::Dim)
      .def_property_readonly("all_speakers", &PyClass::GetAllSpeakers)
      .def(
          "__contains__",
          [](const PyClass &self, const std::string &name) -> bool {
            return self.Contains(name);
          },
          py::arg("name"), py::call_guard<py::gil_scoped_release>())
      .def(
          "add",
          [](const PyClass &self, const std::string &name,
             const std::vector<float> &v) -> bool {
            return self.Add(name, v.data());
          },
          py::arg("name"), py::arg("v"),
          py::call_guard<py::gil_scoped_release>())
      .def(
          "add",
          [](const PyClass &self, const std::string &name,
             const std::vector<std::vector<float>> &embedding_list) -> bool {
            return self.Add(name, embedding_list);
          },
          py::arg("name"), py::arg("embedding_list"),
          py::call_guard<py::gil_scoped_release>())
      .def(
          "remove",
          [](const PyClass &self, const std::string &name) -> bool {
            return self.Remove(name);
          },
          py::arg("name"), py::call_guard<py::gil_scoped_release>())
      .def(
          "search",
          [](const PyClass &self, const std::vector<float> &v, float threshold)
              -> std::string { return self.Search(v.data(), threshold); },
          py::arg("v"), py::arg("threshold"),
          py::call_guard<py::gil_scoped_release>())
      .def(
          "verify",
          [](const PyClass &self, const std::string &name,
             const std::vector<float> &v, float threshold) -> bool {
            return self.Verify(name, v.data(), threshold);
          },
          py::arg("name"), py::arg("v"), py::arg("threshold"),
          py::call_guard<py::gil_scoped_release>())
      .def(
          "score",
          [](const PyClass &self, const std::string &name,
             const std::vector<float> &v) -> float {
            return self.Score(name, v.data());
          },
          py::arg("name"), py::arg("v"),
          py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/speaker-embedding-manager.h
================================================
// sherpa-onnx/python/csrc/speaker-embedding-manager.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
#define SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindSpeakerEmbeddingManager(py::module *m);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_


================================================
FILE: sherpa-onnx/python/csrc/spoken-language-identification.cc
================================================
// sherpa-onnx/python/csrc/spoken-language-identification.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/spoken-language-identification.h"

#include <string>

#include "sherpa-onnx/csrc/spoken-language-identification.h"

namespace sherpa_onnx {

static void PybindSpokenLanguageIdentificationWhisperConfig(py::module *m) {
  using PyClass = SpokenLanguageIdentificationWhisperConfig;

  py::class_<PyClass>(*m, "SpokenLanguageIdentificationWhisperConfig")
      .def(py::init<>())
      .def(py::init<const std::string &, const std::string &, int32_t>(),
           py::arg("encoder"), py::arg("decoder"),
           py::arg("tail_paddings") = -1)
      .def_readwrite("encoder", &PyClass::encoder)
      .def_readwrite("decoder", &PyClass::decoder)
      .def_readwrite("tail_paddings", &PyClass::tail_paddings)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

static void PybindSpokenLanguageIdentificationConfig(py::module *m) {
  PybindSpokenLanguageIdentificationWhisperConfig(m);

  using PyClass = SpokenLanguageIdentificationConfig;

  py::class_<PyClass>(*m, "SpokenLanguageIdentificationConfig")
      .def(py::init<>())
      .def(py::init<const SpokenLanguageIdentificationWhisperConfig &, int32_t,
                    bool, const std::string &>(),
           py::arg("whisper"), py::arg("num_threads") = 1,
           py::arg("debug") = false, py::arg("provider") = "cpu")
      .def_readwrite("whisper", &PyClass::whisper)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
      .def("validate", &PyClass::Validate)
      .def("__str__", &PyClass::ToString);
}

void PybindSpokenLanguageIdentification(py::module *m) {
  PybindSpokenLanguageIdentificationConfig(m);

  using PyClass = SpokenLanguageIdentification;
  py::class_<PyClass>(*m, "SpokenLanguageIdentification")
      .def(py::init<const SpokenLanguageIdentificationConfig &>(),
           py::arg("config"), py::call_guard<py::gil_scoped_release>())
      .def("create_stream", &PyClass::CreateStream,
           py::call_guard<py::gil_scoped_release>())
      .def("compute", &PyClass::Compute, py::arg("s"),
           py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/spoken-language-identification.h
================================================
// sherpa-onnx/python/csrc/spoken-language-identification.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_H_
#define SHERPA_ONNX_PYTHON_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindSpokenLanguageIdentification(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_SPOKEN_LANGUAGE_IDENTIFICATION_H_


================================================
FILE: sherpa-onnx/python/csrc/ten-vad-model-config.cc
================================================
// sherpa-onnx/python/csrc/ten-vad-model-config.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/ten-vad-model-config.h"

#include <memory>
#include <string>

#include "sherpa-onnx/csrc/ten-vad-model-config.h"

namespace sherpa_onnx {

void PybindTenVadModelConfig(py::module *m) {
  using PyClass = TenVadModelConfig;
  py::class_<PyClass>(*m, "TenVadModelConfig")
      .def(py::init<>())
      .def(py::init([](const std::string &model, float threshold,
                       float min_silence_duration, float min_speech_duration,
                       int32_t window_size,
                       float max_speech_duration) -> std::unique_ptr<PyClass> {
             auto ans = std::make_unique<PyClass>();

             ans->model = model;
             ans->threshold = threshold;
             ans->min_silence_duration = min_silence_duration;
             ans->min_speech_duration = min_speech_duration;
             ans->window_size = window_size;
             ans->max_speech_duration = max_speech_duration;

             return ans;
           }),
           py::arg("model"), py::arg("threshold") = 0.5,
           py::arg("min_silence_duration") = 0.5,
           py::arg("min_speech_duration") = 0.25, py::arg("window_size") = 256,
           py::arg("max_speech_duration") = 20)
      .def_readwrite("model", &PyClass::model)
      .def_readwrite("threshold", &PyClass::threshold)
      .def_readwrite("min_silence_duration", &PyClass::min_silence_duration)
      .def_readwrite("min_speech_duration", &PyClass::min_speech_duration)
      .def_readwrite("window_size", &PyClass::window_size)
      .def_readwrite("max_speech_duration", &PyClass::max_speech_duration)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/ten-vad-model-config.h
================================================
// sherpa-onnx/python/csrc/ten-vad-model-config.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_TEN_VAD_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_TEN_VAD_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindTenVadModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_TEN_VAD_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/tensorrt-config.cc
================================================
// sherpa-onnx/python/csrc/tensorrt-config.cc
//
// Copyright (c)  2024  Uniphore (Author: Manickavela A)

#include "sherpa-onnx/python/csrc/tensorrt-config.h"

#include <string>
#include <memory>
#include "sherpa-onnx/csrc/provider-config.h"

namespace sherpa_onnx {

void PybindTensorrtConfig(py::module *m) {
  using PyClass = TensorrtConfig;
  py::class_<PyClass>(*m, "TensorrtConfig")
        .def(py::init<>())
        .def(py::init([](int64_t trt_max_workspace_size,
                      int32_t trt_max_partition_iterations,
                      int32_t trt_min_subgraph_size,
                      bool trt_fp16_enable,
                      bool trt_detailed_build_log,
                      bool trt_engine_cache_enable,
                      bool trt_timing_cache_enable,
                      const std::string &trt_engine_cache_path,
                      const std::string &trt_timing_cache_path,
                      bool trt_dump_subgraphs) -> std::unique_ptr<PyClass> {
            auto ans = std::make_unique<PyClass>();

            ans->trt_max_workspace_size = trt_max_workspace_size;
            ans->trt_max_partition_iterations = trt_max_partition_iterations;
            ans->trt_min_subgraph_size = trt_min_subgraph_size;
            ans->trt_fp16_enable = trt_fp16_enable;
            ans->trt_detailed_build_log = trt_detailed_build_log;
            ans->trt_engine_cache_enable = trt_engine_cache_enable;
            ans->trt_timing_cache_enable = trt_timing_cache_enable;
            ans->trt_engine_cache_path = trt_engine_cache_path;
            ans->trt_timing_cache_path = trt_timing_cache_path;
            ans->trt_dump_subgraphs = trt_dump_subgraphs;

            return ans;
          }),
           py::arg("trt_max_workspace_size") = 2147483647,
           py::arg("trt_max_partition_iterations") = 10,
           py::arg("trt_min_subgraph_size") = 5,
           py::arg("trt_fp16_enable") = true,
           py::arg("trt_detailed_build_log") = false,
           py::arg("trt_engine_cache_enable") = true,
           py::arg("trt_timing_cache_enable") = true,
           py::arg("trt_engine_cache_path") = ".",
           py::arg("trt_timing_cache_path") = ".",
           py::arg("trt_dump_subgraphs") = false)

      .def_readwrite("trt_max_workspace_size",
          &PyClass::trt_max_workspace_size)
      .def_readwrite("trt_max_partition_iterations",
          &PyClass::trt_max_partition_iterations)
      .def_readwrite("trt_min_subgraph_size", &PyClass::trt_min_subgraph_size)
      .def_readwrite("trt_fp16_enable", &PyClass::trt_fp16_enable)
      .def_readwrite("trt_detailed_build_log",
          &PyClass::trt_detailed_build_log)
      .def_readwrite("trt_engine_cache_enable",
          &PyClass::trt_engine_cache_enable)
      .def_readwrite("trt_timing_cache_enable",
          &PyClass::trt_timing_cache_enable)
      .def_readwrite("trt_engine_cache_path", &PyClass::trt_engine_cache_path)
      .def_readwrite("trt_timing_cache_path", &PyClass::trt_timing_cache_path)
      .def_readwrite("trt_dump_subgraphs", &PyClass::trt_dump_subgraphs)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/tensorrt-config.h
================================================
// sherpa-onnx/python/csrc/tensorrt-config.h
//
// Copyright (c)  2024  Uniphore (Author: Manickavela A)

#ifndef SHERPA_ONNX_PYTHON_CSRC_TENSORRT_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_TENSORRT_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindTensorrtConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_TENSORRT_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/vad-model-config.cc
================================================
// sherpa-onnx/python/csrc/vad-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/vad-model-config.h"

#include <string>

#include "sherpa-onnx/csrc/vad-model-config.h"
#include "sherpa-onnx/python/csrc/silero-vad-model-config.h"
#include "sherpa-onnx/python/csrc/ten-vad-model-config.h"

namespace sherpa_onnx {

void PybindVadModelConfig(py::module *m) {
  PybindSileroVadModelConfig(m);
  PybindTenVadModelConfig(m);

  using PyClass = VadModelConfig;
  py::class_<PyClass>(*m, "VadModelConfig")
      .def(py::init<>())
      .def(py::init<const SileroVadModelConfig &, const TenVadModelConfig &,
                    int32_t, int32_t, const std::string &, bool>(),
           py::arg("silero_vad") = SileroVadModelConfig{},
           py::arg("ten_vad") = TenVadModelConfig{},
           py::arg("sample_rate") = 16000, py::arg("num_threads") = 1,
           py::arg("provider") = "cpu", py::arg("debug") = false)
      .def_readwrite("silero_vad", &PyClass::silero_vad)
      .def_readwrite("ten_vad", &PyClass::ten_vad)
      .def_readwrite("sample_rate", &PyClass::sample_rate)
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("provider", &PyClass::provider)
      .def_readwrite("debug", &PyClass::debug)
      .def("__str__", &PyClass::ToString)
      .def("validate", &PyClass::Validate);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/vad-model-config.h
================================================
// sherpa-onnx/python/csrc/vad-model-config.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_CONFIG_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindVadModelConfig(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_CONFIG_H_


================================================
FILE: sherpa-onnx/python/csrc/vad-model.cc
================================================
// sherpa-onnx/python/csrc/vad-model.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/vad-model.h"

#include <memory>
#include <vector>

#include "sherpa-onnx/csrc/vad-model.h"

namespace sherpa_onnx {

void PybindVadModel(py::module *m) {
  using PyClass = VadModel;
  py::class_<PyClass>(*m, "VadModel")
      .def_static("create",
                  (std::unique_ptr<VadModel>(*)(const VadModelConfig &))(
                      &PyClass::Create),
                  py::arg("config"), py::call_guard<py::gil_scoped_release>())
      .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
      .def(
          "is_speech",
          [](PyClass &self, const std::vector<float> &samples) -> bool {
            return self.IsSpeech(samples.data(), samples.size());
          },
          py::arg("samples"), py::call_guard<py::gil_scoped_release>())
      .def("window_size", &PyClass::WindowSize,
           py::call_guard<py::gil_scoped_release>())
      .def("min_silence_duration_samples", &PyClass::MinSilenceDurationSamples,
           py::call_guard<py::gil_scoped_release>())
      .def("min_speech_duration_samples", &PyClass::MinSpeechDurationSamples,
           py::call_guard<py::gil_scoped_release>());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/vad-model.h
================================================
// sherpa-onnx/python/csrc/vad-model.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_H_
#define SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindVadModel(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_H_


================================================
FILE: sherpa-onnx/python/csrc/version.cc
================================================
// sherpa-onnx/python/csrc/version.cc
//
// Copyright (c)  2025  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/version.h"

#include <string>

#include "sherpa-onnx/csrc/version.h"

namespace sherpa_onnx {

void PybindVersion(py::module *m) {
  m->attr("version") = std::string(GetVersionStr());

  m->attr("git_sha1") = std::string(GetGitSha1());

  m->attr("git_date") = std::string(GetGitDate());
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/version.h
================================================
// sherpa-onnx/python/csrc/version.h
//
// Copyright (c)  2025  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_VERSION_H_
#define SHERPA_ONNX_PYTHON_CSRC_VERSION_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindVersion(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_VERSION_H_


================================================
FILE: sherpa-onnx/python/csrc/voice-activity-detector.cc
================================================
// sherpa-onnx/python/csrc/voice-activity-detector.cc
//
// Copyright (c)  2023  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/voice-activity-detector.h"

#include <vector>

#include "sherpa-onnx/csrc/voice-activity-detector.h"

namespace sherpa_onnx {

void PybindSpeechSegment(py::module *m) {
  using PyClass = SpeechSegment;
  py::class_<PyClass>(*m, "SpeechSegment")
      .def_property_readonly("start",
                             [](const PyClass &self) { return self.start; })
      .def_property_readonly("samples",
                             [](const PyClass &self) { return self.samples; });
}

void PybindVoiceActivityDetector(py::module *m) {
  PybindSpeechSegment(m);
  using PyClass = VoiceActivityDetector;
  py::class_<PyClass>(*m, "VoiceActivityDetector",
                      R"(
1. It is an error to call the front property when the method empty() returns True
2. The property front returns a reference, which is valid until the next call of any
   methods of this class
3. When speech is detected, the method is_speech_detected() return True, you can
   use the property current_segment to get the speech samples since
   is_speech_detected() returns true
4. When is_speech_detected() is changed from True to False, the method
   empty() returns False.
      )")
      .def(py::init<const VadModelConfig &, float>(), py::arg("config"),
           py::arg("buffer_size_in_seconds") = 60,
           py::call_guard<py::gil_scoped_release>())
      .def(
          "accept_waveform",
          [](PyClass &self, const std::vector<float> &samples) {
            self.AcceptWaveform(samples.data(), samples.size());
          },
          py::arg("samples"), py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("config", &PyClass::GetConfig)
      .def("empty", &PyClass::Empty, py::call_guard<py::gil_scoped_release>())
      .def("pop", &PyClass::Pop, py::call_guard<py::gil_scoped_release>())
      .def("is_speech_detected", &PyClass::IsSpeechDetected,
           py::call_guard<py::gil_scoped_release>())
      .def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
      .def("flush", &PyClass::Flush, py::call_guard<py::gil_scoped_release>())
      .def_property_readonly("front", &PyClass::Front)
      .def_property_readonly("current_segment", &PyClass::CurrentSpeechSegment);
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/voice-activity-detector.h
================================================
// sherpa-onnx/python/csrc/voice-activity-detector.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_VOICE_ACTIVITY_DETECTOR_H_
#define SHERPA_ONNX_PYTHON_CSRC_VOICE_ACTIVITY_DETECTOR_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindVoiceActivityDetector(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_VOICE_ACTIVITY_DETECTOR_H_


================================================
FILE: sherpa-onnx/python/csrc/wave-writer.cc
================================================
// sherpa-onnx/python/csrc/wave-writer.cc
//
// Copyright (c)  2024  Xiaomi Corporation

#include "sherpa-onnx/python/csrc/wave-writer.h"

#include <string>
#include <vector>

#include "sherpa-onnx/csrc/wave-writer.h"

namespace sherpa_onnx {

void PybindWaveWriter(py::module *m) {
  m->def(
      "write_wave",
      [](const std::string &filename, const std::vector<float> &samples,
         int32_t sample_rate) -> bool {
        bool ok =
            WriteWave(filename, sample_rate, samples.data(), samples.size());

        return ok;
      },
      py::arg("filename"), py::arg("samples"), py::arg("sample_rate"));
}

}  // namespace sherpa_onnx


================================================
FILE: sherpa-onnx/python/csrc/wave-writer.h
================================================
// sherpa-onnx/python/csrc/wave-writer.h
//
// Copyright (c)  2024  Xiaomi Corporation

#ifndef SHERPA_ONNX_PYTHON_CSRC_WAVE_WRITER_H_
#define SHERPA_ONNX_PYTHON_CSRC_WAVE_WRITER_H_

#include "sherpa-onnx/python/csrc/sherpa-onnx.h"

namespace sherpa_onnx {

void PybindWaveWriter(py::module *m);

}

#endif  // SHERPA_ONNX_PYTHON_CSRC_WAVE_WRITER_H_


================================================
FILE: sherpa-onnx/python/sherpa_onnx/__init__.py
================================================
from sherpa_onnx.lib._sherpa_onnx import (
    Alsa,
    AudioEvent,
    AudioTagging,
    AudioTaggingConfig,
    AudioTaggingModelConfig,
    CircularBuffer,
    DenoisedAudio,
    FastClustering,
    FastClusteringConfig,
    FeatureExtractorConfig,
    GenerationConfig,
    HomophoneReplacerConfig,
    OfflineCanaryModelConfig,
    OfflineCtcFstDecoderConfig,
    OfflineDolphinModelConfig,
    OfflineFireRedAsrModelConfig,
    OfflineFunASRNanoModelConfig,
    OfflineLMConfig,
    OfflineModelConfig,
    OfflineMoonshineModelConfig,
    OfflineNemoEncDecCtcModelConfig,
    OfflineParaformerModelConfig,
    OfflinePunctuation,
    OfflinePunctuationConfig,
    OfflinePunctuationModelConfig,
    OfflineRecognizerConfig,
    OfflineSenseVoiceModelConfig,
    OfflineSourceSeparation,
    OfflineSourceSeparationConfig,
    OfflineSourceSeparationModelConfig,
    OfflineSourceSeparationSpleeterModelConfig,
    OfflineSourceSeparationUvrModelConfig,
    OfflineSpeakerDiarization,
    OfflineSpeakerDiarizationConfig,
    OfflineSpeakerDiarizationResult,
    OfflineSpeakerDiarizationSegment,
    OfflineSpeakerSegmentationModelConfig,
    OfflineSpeakerSegmentationPyannoteModelConfig,
    OfflineSpeechDenoiser,
    OfflineSpeechDenoiserConfig,
    OfflineSpeechDenoiserDpdfNetModelConfig,
    OfflineSpeechDenoiserGtcrnModelConfig,
    OfflineSpeechDenoiserModelConfig,
    OfflineStream,
    OfflineTdnnModelConfig,
    OfflineTransducerModelConfig,
    OfflineTts,
    OfflineTtsConfig,
    OfflineTtsKittenModelConfig,
    OfflineTtsKokoroModelConfig,
    OfflineTtsMatchaModelConfig,
    OfflineTtsModelConfig,
    OfflineTtsPocketModelConfig,
    OfflineTtsSupertonicModelConfig,
    OfflineTtsVitsModelConfig,
    OfflineTtsZipvoiceModelConfig,
    OfflineWenetCtcModelConfig,
    OfflineWhisperModelConfig,
    OfflineZipformerAudioTaggingModelConfig,
    OfflineZipformerCtcModelConfig,
    OnlinePunctuation,
    OnlinePunctuationConfig,
    OnlinePunctuationModelConfig,
    OnlineSpeechDenoiser,
    OnlineSpeechDenoiserConfig,
    OnlineStream,
    SentencePieceTokenizer,
    SileroVadModelConfig,
    SpeakerEmbeddingExtractor,
    SpeakerEmbeddingExtractorConfig,
    SpeakerEmbeddingManager,
    SpeechSegment,
    SpokenLanguageIdentification,
    SpokenLanguageIdentificationConfig,
    SpokenLanguageIdentificationWhisperConfig,
    TenVadModelConfig,
    VadModel,
    VadModelConfig,
    VoiceActivityDetector,
    git_date,
    git_sha1,
    version,
    write_wave,
)

from .display import Display
from .keyword_spotter import KeywordSpotter
from .offline_recognizer import OfflineRecognizer
from .online_recognizer import OnlineRecognizer
from .utils import text2token


================================================
FILE: sherpa-onnx/python/sherpa_onnx/cli.py
================================================
# Copyright (c)  2023  Xiaomi Corporation

import logging

try:
    import click
except ImportError:
    print("Please run")
    print("  pip install click")
    print("before you continue")
    raise

from pathlib import Path
from sherpa_onnx import text2token


@click.group()
def cli():
    """
    The shell entry point to sherpa-onnx.
    """
    logging.basicConfig(
        format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s",
        level=logging.INFO,
    )


@cli.command(name="text2token")
@click.argument("input", type=click.Path(exists=True, dir_okay=False))
@click.argument("output", type=click.Path())
@click.option(
    "--tokens",
    type=str,
    required=True,
    help="The path to tokens.txt.",
)
@click.option(
    "--tokens-type",
    type=click.Choice(
        [
            "cjkchar",
            "bpe",
            "cjkchar+bpe",
            "fpinyin",
            "ppinyin",
            "phone+ppinyin",
        ],
        case_sensitive=True,
    ),
    required=True,
    help="""The type of modeling units, should be cjkchar, bpe, cjkchar+bpe, fpinyin, ppinyin or phone+ppinyin.
    fpinyin means full pinyin, each cjkchar has a pinyin(with tone).
    ppinyin means partial pinyin, it splits pinyin into initial and final,
    phone means English phonemes in CMU dictionary format.
    """,
)
@click.option(
    "--bpe-model",
    type=str,
    help="The path to bpe.model. Only required when tokens-type is bpe or cjkchar+bpe.",
)
@click.option(
    "--lexicon",
    type=str,
    help="The path to lexicon.txt. Only required when tokens-type is phone+ppinyin.",
)
def encode_text(
    input: Path,
    output: Path,
    tokens: Path,
    tokens_type: str,
    bpe_model: Path,
    lexicon: Path,
):
    """
    Encode the texts given by the INPUT to tokens and write the results to the OUTPUT.
    Each line in the texts contains the original phrase, it might also contain some
    extra items, for example, the boosting score (starting with :), the triggering
    threshold (starting with #, only used in keyword spotting task) and the original
    phrase (starting with @). Note: the extra items will be kept same in the output.

    example input 1 (tokens_type = ppinyin):

    小爱同学 :2.0 #0.6 @小爱同学
    你好问问 :3.5 @你好问问
    小艺小艺 #0.6 @小艺小艺

    example output 1:

    x iǎo ài t óng x ué :2.0 #0.6 @小爱同学
    n ǐ h ǎo w èn w èn :3.5 @你好问问
    x iǎo y ì x iǎo y ì #0.6 @小艺小艺

    example input 2 (tokens_type = bpe):

    HELLO WORLD :1.5 #0.4
    HI GOOGLE :2.0 #0.8
    HEY SIRI #0.35

    example output 2:

    ▁HE LL O ▁WORLD :1.5 #0.4
    ▁HI ▁GO O G LE :2.0 #0.8
    ▁HE Y ▁S I RI #0.35
    """
    texts = []
    # extra information like boosting score (start with :), triggering threshold (start with #)
    # original keyword (start with @)
    extra_info = []
    with open(input, "r", encoding="utf8") as f:
        for line in f:
            extra = []
            text = []
            toks = line.strip().split()
            for tok in toks:
                if tok[0] == ":" or tok[0] == "#" or tok[0] == "@":
                    extra.append(tok)
                else:
                    text.append(tok)
            texts.append(" ".join(text))
            extra_info.append(extra)

    encoded_texts = text2token(
        texts,
        tokens=tokens,
        tokens_type=tokens_type,
        bpe_model=bpe_model,
        lexicon=lexicon,
    )
    with open(output, "w", encoding="utf8") as f:
        for i, txt in enumerate(encoded_texts):
            txt += extra_info[i]
            f.write(" ".join(txt) + "\n")


================================================
FILE: sherpa-onnx/python/sherpa_onnx/display.py
================================================
# Copyright (c)  2025  Xiaomi Corporation
import os
from time import localtime, strftime


def get_current_time():
    return strftime("%Y-%m-%d %H:%M:%S", localtime())


def clear_console():
    os.system("cls" if os.name == "nt" else "clear")


class Display:
    def __init__(self):
        self.sentences = []
        self.currentText = ""

    def update_text(self, text):
        self.currentText = text

    def finalize_current_sentence(self):
        if self.currentText.strip():
            self.sentences.append((get_current_time(), self.currentText))

        self.currentText = ""

    def display(self):
        clear_console()
        print("=== Speech Recognition with Next-gen Kaldi ===")
        print("Time:", get_current_time())
        print("-" * 30)

        # display history sentences
        if self.sentences:
            for i, (when, text) in enumerate(self.sentences):
                print(f"[{when}] {i + 1}. {text}")
            print("-" * 30)

        if self.currentText.strip():
            print("Recognizing:", self.currentText)


================================================
FILE: sherpa-onnx/python/sherpa_onnx/keyword_spotter.py
================================================
# Copyright (c)  2023  Xiaomi Corporation

from pathlib import Path
from typing import List, Optional

from sherpa_onnx.lib._sherpa_onnx import (
    FeatureExtractorConfig,
    KeywordSpotterConfig,
    OnlineModelConfig,
    OnlineTransducerModelConfig,
    OnlineStream,
    ProviderConfig,
)

from sherpa_onnx.lib._sherpa_onnx import KeywordSpotter as _KeywordSpotter


def _assert_file_exists(f: str):
    assert Path(f).is_file(), f"{f} does not exist"


class KeywordSpotter(object):
    """A class for keyword spotting.

    Please refer to the following files for usages
     - https://github.com/k2-fsa/sherpa-onnx/blob/master/python-api-examples/keyword-spotter.py
     - https://github.com/k2-fsa/sherpa-onnx/blob/master/python-api-examples/keyword-spotter-from-microphone.py
    """

    def __init__(
        self,
        tokens: str,
        encoder: str,
        decoder: str,
        joiner: str,
        keywords_file: str,
        num_threads: int = 2,
        sample_rate: float = 16000,
        feature_dim: int = 80,
        max_active_paths: int = 4,
        keywords_score: float = 1.0,
        keywords_threshold: float = 0.25,
        num_trailing_blanks: int = 1,
        provider: str = "cpu",
        device: int = 0,
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          encoder:
            Path to ``encoder.onnx``.
          decoder:
            Path to ``decoder.onnx``.
          joiner:
            Path to ``joiner.onnx``.
          keywords_file:
            The file containing keywords, one word/phrase per line, and for each
            phrase the bpe/cjkchar/pinyin are separated by a space.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          max_active_paths:
            Use only when decoding_method is modified_beam_search. It specifies
            the maximum number of active paths during beam search.
          keywords_score:
            The boosting score of each token for keywords. The larger the easier to
            survive beam search.
          keywords_threshold:
            The trigger threshold (i.e. probability) of the keyword. The larger the
            harder to trigger.
          num_trailing_blanks:
            The number of trailing blanks a keyword should be followed. Setting
            to a larger value (e.g. 8) when your keywords has overlapping tokens
            between each other.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          device:
            onnxruntime cuda device index.
        """
        _assert_file_exists(tokens)
        _assert_file_exists(encoder)
        _assert_file_exists(decoder)
        _assert_file_exists(joiner)

        assert num_threads > 0, num_threads

        transducer_config = OnlineTransducerModelConfig(
            encoder=encoder,
            decoder=decoder,
            joiner=joiner,
        )

        provider_config = ProviderConfig(
            provider=provider,
            device=device,
        )

        model_config = OnlineModelConfig(
            transducer=transducer_config,
            tokens=tokens,
            num_threads=num_threads,
            provider_config=provider_config,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        keywords_spotter_config = KeywordSpotterConfig(
            feat_config=feat_config,
            model_config=model_config,
            max_active_paths=max_active_paths,
            num_trailing_blanks=num_trailing_blanks,
            keywords_score=keywords_score,
            keywords_threshold=keywords_threshold,
            keywords_file=keywords_file,
        )
        self.keyword_spotter = _KeywordSpotter(keywords_spotter_config)

    def reset_stream(self, s: OnlineStream):
        self.keyword_spotter.reset(s)

    def create_stream(self, keywords: Optional[str] = None):
        if keywords is None:
            return self.keyword_spotter.create_stream()
        else:
            return self.keyword_spotter.create_stream(keywords)

    def decode_stream(self, s: OnlineStream):
        self.keyword_spotter.decode_stream(s)

    def decode_streams(self, ss: List[OnlineStream]):
        self.keyword_spotter.decode_streams(ss)

    def is_ready(self, s: OnlineStream) -> bool:
        return self.keyword_spotter.is_ready(s)

    def get_result(self, s: OnlineStream) -> str:
        return self.keyword_spotter.get_result(s).keyword.strip()

    def tokens(self, s: OnlineStream) -> List[str]:
        return self.keyword_spotter.get_result(s).tokens

    def timestamps(self, s: OnlineStream) -> List[float]:
        return self.keyword_spotter.get_result(s).timestamps


================================================
FILE: sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
================================================
# Copyright (c)  2023 by manyeyes
# Copyright (c)  2023  Xiaomi Corporation
from pathlib import Path
from typing import List, Optional

from sherpa_onnx.lib._sherpa_onnx import (
    FeatureExtractorConfig,
    HomophoneReplacerConfig,
    OfflineCanaryModelConfig,
    OfflineFunASRNanoModelConfig,
    OfflineOmnilingualAsrCtcModelConfig,
    OfflineMedAsrCtcModelConfig,
    OfflineFireRedAsrCtcModelConfig,
    OfflineCtcFstDecoderConfig,
    OfflineDolphinModelConfig,
    OfflineFireRedAsrModelConfig,
    OfflineLMConfig,
    OfflineModelConfig,
    OfflineMoonshineModelConfig,
    OfflineNemoEncDecCtcModelConfig,
    OfflineParaformerModelConfig,
)
from sherpa_onnx.lib._sherpa_onnx import OfflineRecognizer as _Recognizer
from sherpa_onnx.lib._sherpa_onnx import (
    OfflineRecognizerConfig,
    OfflineSenseVoiceModelConfig,
    OfflineStream,
    OfflineTdnnModelConfig,
    OfflineTransducerModelConfig,
    OfflineWenetCtcModelConfig,
    OfflineWhisperModelConfig,
    OfflineZipformerCtcModelConfig,
)


def _assert_file_exists(f: str):
    assert Path(f).is_file(), f"{f} does not exist"


class OfflineRecognizer(object):
    """A class for offline speech recognition.

    Please refer to the following files for usages
     - https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/python/tests/test_offline_recognizer.py
     - https://github.com/k2-fsa/sherpa-onnx/blob/master/python-api-examples/offline-decode-files.py
    """

    @classmethod
    def from_transducer(
        cls,
        encoder: str,
        decoder: str,
        joiner: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 80,
        dither: float = 0.0,
        decoding_method: str = "greedy_search",
        max_active_paths: int = 4,
        hotwords_file: str = "",
        hotwords_score: float = 1.5,
        blank_penalty: float = 0.0,
        modeling_unit: str = "cjkchar",
        bpe_vocab: str = "",
        debug: bool = False,
        provider: str = "cpu",
        model_type: str = "transducer",
        rule_fsts: str = "",
        rule_fars: str = "",
        lm: str = "",
        lm_scale: float = 0.1,
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
        lodr_fst: str = "",
        lodr_scale: float = 0.0,
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          encoder:
            Path to ``encoder.onnx``.
          decoder:
            Path to ``decoder.onnx``.
          joiner:
            Path to ``joiner.onnx``.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          dither:
            Dithering constant (0.0 means no dither).
            By default the audio samples are in range [-1,+1],
            so dithering constant 0.00003 is a good value,
            equivalent to the default 1.0 from kaldi
          decoding_method:
            Valid values: greedy_search, modified_beam_search.
          max_active_paths:
            Maximum number of active paths to keep. Used only when
            decoding_method is modified_beam_search.
          hotwords_file:
            The file containing hotwords, one words/phrases per line, and for each
            phrase the bpe/cjkchar are separated by a space.
          hotwords_score:
            The hotword score of each token for biasing word/phrase. Used only if
            hotwords_file is given with modified_beam_search as decoding method.
          blank_penalty:
            The penalty applied on blank symbol during decoding.
          modeling_unit:
            The modeling unit of the model, commonly used units are bpe, cjkchar,
            cjkchar+bpe, etc. Currently, it is needed only when hotwords are
            provided, we need it to encode the hotwords into token sequence.
            and the modeling unit is bpe or cjkchar+bpe.
          bpe_vocab:
            The vocabulary generated by google's sentencepiece program.
            It is a file has two columns, one is the token, the other is
            the log probability, you can get it from the directory where
            your bpe model is generated. Only used when hotwords provided
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
          lodr_fst:
            Path to the LODR FST file in binary format. If empty, LODR is disabled.
          lodr_scale:
            Scale factor for LODR rescoring. Only used when lodr_fst is provided.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            transducer=OfflineTransducerModelConfig(
                encoder_filename=encoder,
                decoder_filename=decoder,
                joiner_filename=joiner,
            ),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
            modeling_unit=modeling_unit,
            bpe_vocab=bpe_vocab,
            model_type=model_type,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
            dither=dither,
        )

        if len(hotwords_file) > 0 and decoding_method != "modified_beam_search":
            raise ValueError(
                "Please use --decoding-method=modified_beam_search when using "
                f"--hotwords-file. Currently given: {decoding_method}"
            )

        if lm and decoding_method != "modified_beam_search":
            raise ValueError(
                "Please use --decoding-method=modified_beam_search when using "
                f"--lm. Currently given: {decoding_method}"
            )

        lm_config = OfflineLMConfig(
            model=lm,
            scale=lm_scale,
            lm_num_threads=num_threads,
            lm_provider=provider,
            lodr_fst=lodr_fst,
            lodr_scale=lodr_scale,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            lm_config=lm_config,
            decoding_method=decoding_method,
            max_active_paths=max_active_paths,
            hotwords_file=hotwords_file,
            hotwords_score=hotwords_score,
            blank_penalty=blank_penalty,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_sense_voice(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 80,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        language: str = "",
        use_itn: bool = False,
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models>`_
        to download pre-trained models.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          model:
            Path to ``model.onnx``.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          language:
            If not empty, then valid values are: auto, zh, en, ja, ko, yue
          use_itn:
            True to enable inverse text normalization; False to disable it.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            sense_voice=OfflineSenseVoiceModelConfig(
                model=model,
                language=language,
                use_itn=use_itn,
            ),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_funasr_nano(
        cls,
        encoder_adaptor: str,
        llm: str,
        embedding: str,
        tokenizer: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 80,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        system_prompt: str = "You are a helpful assistant.",
        user_prompt: str = "语音转写:",
        max_new_tokens: int = 512,
        temperature: float = 1e-6,
        top_p: float = 0.8,
        seed: int = 42,
        language: str = "",
        itn: bool = True,
        hotwords: str = "",
    ):
        """
        Create an offline recognizer for FunASR-nano models.

        Args:
          encoder_adaptor:
            Path to ``encoder_adaptor.onnx``.
          llm:
            Path to ``llm.onnx`` (KV cache model).
          embedding:
            Path to ``embedding.onnx``.
          tokenizer:
            Path to tokenizer directory (e.g., Qwen3-0.6B).
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda.
          system_prompt:
            System prompt for FunASR-nano.
          user_prompt:
            User prompt template for FunASR-nano.
          max_new_tokens:
            Maximum number of new tokens to generate.
          temperature:
            Sampling temperature.
          top_p:
            Top-p (nucleus) sampling threshold.
          seed:
            Random seed.
          language:
            Language for transcription (empty string means None).
          itn:
            Whether to apply inverse text normalization (default: True).
          hotwords:
            Hotwords (comma-separated, e.g., "Sherpa,FunASR").
        """
        self = cls.__new__(cls)
        # Create OfflineFunASRNanoModelConfig and set attributes
        funasr_nano_config = OfflineFunASRNanoModelConfig()
        funasr_nano_config.encoder_adaptor = encoder_adaptor
        funasr_nano_config.llm = llm
        funasr_nano_config.embedding = embedding
        funasr_nano_config.tokenizer = tokenizer
        funasr_nano_config.system_prompt = system_prompt
        funasr_nano_config.user_prompt = user_prompt
        funasr_nano_config.max_new_tokens = max_new_tokens
        funasr_nano_config.temperature = temperature
        funasr_nano_config.top_p = top_p
        funasr_nano_config.seed = seed
        funasr_nano_config.language = language
        funasr_nano_config.itn = itn
        funasr_nano_config.hotwords = hotwords

        model_config = OfflineModelConfig(
            funasr_nano=funasr_nano_config,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_paraformer(
        cls,
        paraformer: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 80,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html>`_
        to download pre-trained models.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          paraformer:
            Path to ``model.onnx``.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            paraformer=OfflineParaformerModelConfig(model=paraformer),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
            model_type="paraformer",
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_telespeech_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 40,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models>`_
        to download pre-trained models.

        Args:
          model:
            Path to ``model.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model. It is
            ignored and is hard-coded in C++ to 40.
          feature_dim:
            Dimension of the feature used to train the model. It is ignored
            and is hard-coded in C++ to 40.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            telespeech_ctc=model,
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir, lexicon=hr_lexicon, rule_fsts=hr_rule_fsts
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_dolphin_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 80,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html>`_
        to download pre-trained models.

        Args:
          model:
            Path to ``model.onnx`` or ``model.int8.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            dolphin=OfflineDolphinModelConfig(model=model),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_fire_red_asr_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/FireRedAsr/index.html>`_
        to download pre-trained models.

        Args:
          model:
            Path to ``model.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          decoding_method:
            The only supported decoding method is greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            fire_red_asr_ctc=OfflineFireRedAsrCtcModelConfig(model=model),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        recognizer_config = OfflineRecognizerConfig(
            model_config=model_config,
            decoding_method=decoding_method,
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_medasr_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/medasr/index.html>`_
        to download pre-trained models.

        Args:
          model:
            Path to ``model.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          decoding_method:
            The only supported decoding method is greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            medasr=OfflineMedAsrCtcModelConfig(model=model),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        recognizer_config = OfflineRecognizerConfig(
            model_config=model_config,
            decoding_method=decoding_method,
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_omnilingual_asr_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/omnilingual-asr/index.html>`_
        to download pre-trained models.

        Args:
          model:
            Path to ``model.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          decoding_method:
            The only supported decoding method is greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            omnilingual=OfflineOmnilingualAsrCtcModelConfig(model=model),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        recognizer_config = OfflineRecognizerConfig(
            model_config=model_config,
            decoding_method=decoding_method,
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_zipformer_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 80,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          model:
            Path to ``model.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            zipformer_ctc=OfflineZipformerCtcModelConfig(model=model),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_nemo_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 80,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          model:
            Path to ``model.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=model),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
            model_type="nemo_ctc",
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_nemo_canary(
        cls,
        encoder: str,
        decoder: str,
        tokens: str,
        src_lang: str = "en",
        tgt_lang: str = "en",
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 128,  # not used
        decoding_method: str = "greedy_search",  # not used
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/nemo/index.html>`_
        to download pre-trained models for different languages.

        Args:
          encoder:
            Path to ``encoder.onnx`` or ``encoder.int8.onnx``.
          decoder:
            Path to ``decoder.onnx`` or ``decoder.int8.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          src_lang:
            The language of the input audio. Valid values are: en, es, de, fr.
            If you leave it empty, it uses en internally.
          tgt_lang:
            The language of the output text. Valid values are: en, es, de, fr.
            If you leave it empty, it uses en internally.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model. Not used
          feature_dim:
            Dimension of the feature used to train the model. Not used
          decoding_method:
            Valid values are greedy_search. Not used
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            canary=OfflineCanaryModelConfig(
                encoder=encoder,
                decoder=decoder,
                src_lang=src_lang,
                tgt_lang=tgt_lang,
            ),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_whisper(
        cls,
        encoder: str,
        decoder: str,
        tokens: str,
        language: str = "en",
        task: str = "transcribe",
        num_threads: int = 1,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        tail_paddings: int = -1,
        enable_token_timestamps: bool = False,
        enable_segment_timestamps: bool = False,
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html>`_
        to download pre-trained models for different kinds of whisper models,
        e.g., tiny, tiny.en, base, base.en, etc.

        Args:
          encoder:
            Path to the encoder model, e.g., tiny-encoder.onnx,
            tiny-encoder.int8.onnx, tiny-encoder.ort, etc.
          decoder:
            Path to the decoder model, e.g., tiny-decoder.onnx,
            tiny-decoder.int8.onnx, tiny-decoder.ort, etc.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          language:
            The spoken language in the audio file. Example values: en, de, zh,
            jp, fr. See https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10
            for all possible values. Note that for non-multilingual models, the
            only valid value is 'en'.
          task:
            Valid values are: transcribe, translate. Note that for
            non-multilingual models, the only valid value is 'transcribe'.
          num_threads:
            Number of threads for neural network computation.
          decoding_method:
            Valid values: greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          enable_token_timestamps:
            True to enable token-level timestamps using cross-attention alignment
            and DTW. Requires ONNX models exported with attention outputs.
            When enabled, result.timestamps will contain token-level start times.
            Defaults to False.
          enable_segment_timestamps:
            True to enable segment-level timestamps using Whisper's native
            timestamp token mode. The decoder outputs timestamp tokens like
            <|0.00|> to mark segment boundaries. Does not require attention
            outputs. Can be combined with enable_token_timestamps for both
            segment and token-level timestamps. Defaults to False.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            whisper=OfflineWhisperModelConfig(
                encoder=encoder,
                decoder=decoder,
                language=language,
                task=task,
                tail_paddings=tail_paddings,
                enable_token_timestamps=enable_token_timestamps,
                enable_segment_timestamps=enable_segment_timestamps,
            ),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
            model_type="whisper",
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=16000,
            feature_dim=80,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_fire_red_asr(
        cls,
        encoder: str,
        decoder: str,
        tokens: str,
        num_threads: int = 1,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/fire_red_asr/index.html>`_
        to download pre-trained models for different kinds of FireRedAsr models,
        e.g., xs, large, etc.

        Args:
          encoder:
            Path to the encoder model.
          decoder:
            Path to the decoder model.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id
          num_threads:
            Number of threads for neural network computation.
          decoding_method:
            Valid values: greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            fire_red_asr=OfflineFireRedAsrModelConfig(
                encoder=encoder,
                decoder=decoder,
            ),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=16000,
            feature_dim=80,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_moonshine(
        cls,
        preprocessor: str,
        encoder: str,
        uncached_decoder: str,
        cached_decoder: str,
        tokens: str,
        num_threads: int = 1,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html>`_
        to download pre-trained models for different kinds of moonshine models,
        e.g., tiny, base, etc.

        Args:
          preprocessor:
            Path to the preprocessor model, e.g., preprocess.onnx
          encoder:
            Path to the encoder model, e.g., encode.int8.onnx
          uncached_decoder:
            Path to the uncached decoder model, e.g., uncached_decode.int8.onnx,
          cached_decoder:
            Path to the cached decoder model, e.g., cached_decode.int8.onnx,
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          decoding_method:
            Valid values: greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            moonshine=OfflineMoonshineModelConfig(
                preprocessor=preprocessor,
                encoder=encoder,
                uncached_decoder=uncached_decoder,
                cached_decoder=cached_decoder,
            ),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        unused_feat_config = FeatureExtractorConfig(
            sampling_rate=16000,
            feature_dim=80,
        )

        recognizer_config = OfflineRecognizerConfig(
            model_config=model_config,
            feat_config=unused_feat_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_moonshine_v2(
        cls,
        encoder: str,
        decoder: str,
        tokens: str,
        num_threads: int = 1,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html>`_
        to download pre-trained models for different kinds of moonshine v2 models,
        e.g., tiny-en, base-zh, etc.

        Args:
          encoder:
            Path to the encoder model, e.g., encoder_model.ort
          decoder:
            Path to the merged decoder model, e.g., decoder_model_merged.ort,
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          decoding_method:
            Valid values: greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            moonshine=OfflineMoonshineModelConfig(
                encoder=encoder,
                merged_decoder=decoder,
            ),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
        )

        unused_feat_config = FeatureExtractorConfig(
            sampling_rate=16000,
            feature_dim=80,
        )

        recognizer_config = OfflineRecognizerConfig(
            model_config=model_config,
            feat_config=unused_feat_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_tdnn_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 8000,
        feature_dim: int = 23,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html>`_
        to download pre-trained models.

        Args:
          model:
            Path to ``model.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            tdnn=OfflineTdnnModelConfig(model=model),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
            model_type="tdnn",
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_wenet_ctc(
        cls,
        model: str,
        tokens: str,
        num_threads: int = 1,
        sample_rate: int = 16000,
        feature_dim: int = 80,
        decoding_method: str = "greedy_search",
        debug: bool = False,
        provider: str = "cpu",
        rule_fsts: str = "",
        rule_fars: str = "",
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          model:
            Path to ``model.onnx``.
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          decoding_method:
            Valid values are greedy_search.
          debug:
            True to show debug messages.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
        """
        self = cls.__new__(cls)
        model_config = OfflineModelConfig(
            wenet_ctc=OfflineWenetCtcModelConfig(model=model),
            tokens=tokens,
            num_threads=num_threads,
            debug=debug,
            provider=provider,
            model_type="wenet_ctc",
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        recognizer_config = OfflineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )
        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    def create_stream(self, hotwords: Optional[str] = None):
        if hotwords is None:
            return self.recognizer.create_stream()
        else:
            return self.recognizer.create_stream(hotwords)

    def decode_stream(self, s: OfflineStream):
        self.recognizer.decode_stream(s)

    def decode_streams(self, ss: List[OfflineStream]):
        self.recognizer.decode_streams(ss)


================================================
FILE: sherpa-onnx/python/sherpa_onnx/online_recognizer.py
================================================
# Copyright (c)  2023  Xiaomi Corporation
from pathlib import Path
from typing import List, Optional

from sherpa_onnx.lib._sherpa_onnx import (
    CudaConfig,
    EndpointConfig,
    FeatureExtractorConfig,
    HomophoneReplacerConfig,
    OnlineCtcFstDecoderConfig,
    OnlineLMConfig,
    OnlineModelConfig,
    OnlineNeMoCtcModelConfig,
    OnlineParaformerModelConfig,
)
from sherpa_onnx.lib._sherpa_onnx import OnlineRecognizer as _Recognizer
from sherpa_onnx.lib._sherpa_onnx import (
    OnlineRecognizerConfig,
    OnlineRecognizerResult,
    OnlineStream,
    OnlineToneCtcModelConfig,
    OnlineTransducerModelConfig,
    OnlineWenetCtcModelConfig,
    OnlineZipformer2CtcModelConfig,
    ProviderConfig,
    TensorrtConfig,
)


def _assert_file_exists(f: str):
    assert Path(f).is_file(), f"{f} does not exist"


class OnlineRecognizer(object):
    """A class for streaming speech recognition.

    Please refer to the following files for usages
     - https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/python/tests/test_online_recognizer.py
     - https://github.com/k2-fsa/sherpa-onnx/blob/master/python-api-examples/online-decode-files.py
    """

    @classmethod
    def from_transducer(
        cls,
        tokens: str,
        encoder: str,
        decoder: str,
        joiner: str,
        num_threads: int = 2,
        sample_rate: float = 16000,
        feature_dim: int = 80,
        low_freq: float = 20.0,
        high_freq: float = -400.0,
        dither: float = 0.0,
        normalize_samples: bool = True,
        snip_edges: bool = False,
        enable_endpoint_detection: bool = False,
        rule1_min_trailing_silence: float = 2.4,
        rule2_min_trailing_silence: float = 1.2,
        rule3_min_utterance_length: float = 20.0,
        decoding_method: str = "greedy_search",
        max_active_paths: int = 4,
        hotwords_score: float = 1.5,
        blank_penalty: float = 0.0,
        hotwords_file: str = "",
        model_type: str = "",
        modeling_unit: str = "cjkchar",
        bpe_vocab: str = "",
        lm: str = "",
        lm_scale: float = 0.1,
        lm_shallow_fusion: bool = True,
        temperature_scale: float = 2.0,
        reset_encoder: bool = False,
        debug: bool = False,
        rule_fsts: str = "",
        rule_fars: str = "",
        provider: str = "cpu",
        device: int = 0,
        cudnn_conv_algo_search: int = 1,
        trt_max_workspace_size: int = 2147483647,
        trt_max_partition_iterations: int = 10,
        trt_min_subgraph_size: int = 5,
        trt_fp16_enable: bool = True,
        trt_detailed_build_log: bool = False,
        trt_engine_cache_enable: bool = True,
        trt_timing_cache_enable: bool = True,
        trt_engine_cache_path: str = "",
        trt_timing_cache_path: str = "",
        trt_dump_subgraphs: bool = False,
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
        lodr_fst: str = "",
        lodr_scale: float = 0.0,
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          encoder:
            Path to ``encoder.onnx``.
          decoder:
            Path to ``decoder.onnx``.
          joiner:
            Path to ``joiner.onnx``.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          low_freq:
            Low cutoff frequency for mel bins in feature extraction.
          high_freq:
            High cutoff frequency for mel bins in feature extraction
            (if <= 0, offset from Nyquist)
          dither:
            Dithering constant (0.0 means no dither).
            By default the audio samples are in range [-1,+1],
            so dithering constant 0.00003 is a good value,
            equivalent to the default 1.0 from kaldi
          normalize_samples:
            True for +/- 1.0 range of audio samples (default, zipformer feats),
            False for +/- 32k samples (ebranchformer features).
          snip_edges:
            handling of end of audio signal in kaldi feature extraction.
            If true, end effects will be handled by outputting only frames that
            completely fit in the file, and the number of frames depends on the
            frame-length.  If false, the number of frames depends only on the
            frame-shift, and we reflect the data at the ends.
          enable_endpoint_detection:
            True to enable endpoint detection. False to disable endpoint
            detection.
          rule1_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If the duration
            of trailing silence in seconds is larger than this value, we assume
            an endpoint is detected.
          rule2_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If we have decoded
            something that is nonsilence and if the duration of trailing silence
            in seconds is larger than this value, we assume an endpoint is
            detected.
          rule3_min_utterance_length:
            Used only when enable_endpoint_detection is True. If the utterance
            length in seconds is larger than this value, we assume an endpoint
            is detected.
          decoding_method:
            Valid values are greedy_search, modified_beam_search.
          max_active_paths:
            Use only when decoding_method is modified_beam_search. It specifies
            the maximum number of active paths during beam search.
          blank_penalty:
            The penalty applied on blank symbol during decoding.
          hotwords_file:
            The file containing hotwords, one words/phrases per line, and for each
            phrase the bpe/cjkchar are separated by a space.
          hotwords_score:
            The hotword score of each token for biasing word/phrase. Used only if
            hotwords_file is given with modified_beam_search as decoding method.
          temperature_scale:
            Temperature scaling for output symbol confidence estimation.
            It affects only confidence values, the decoding uses the original
            logits without temperature.
          reset_encoder:
            True to reset `encoder_state` on an endpoint after empty segment.
            Done in `Reset()` method, after an endpoint was detected,
            currently only in `OnlineRecognizerTransducerImpl`.
          model_type:
            Online transducer model type. Valid values are: conformer, lstm,
            zipformer, zipformer2. All other values lead to loading the model twice.
          modeling_unit:
            The modeling unit of the model, commonly used units are bpe, cjkchar,
            cjkchar+bpe, etc. Currently, it is needed only when hotwords are
            provided, we need it to encode the hotwords into token sequence.
          bpe_vocab:
            The vocabulary generated by google's sentencepiece program.
            It is a file has two columns, one is the token, the other is
            the log probability, you can get it from the directory where
            your bpe model is generated. Only used when hotwords provided
            and the modeling unit is bpe or cjkchar+bpe.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          device:
            onnxruntime cuda device index.
          cudnn_conv_algo_search:
            onxrt CuDNN convolution search algorithm selection. CUDA EP
          trt_max_workspace_size:
            Set TensorRT EP GPU memory usage limit. TensorRT EP
          trt_max_partition_iterations:
            Limit partitioning iterations for model conversion. TensorRT EP
          trt_min_subgraph_size:
            Set minimum size for subgraphs in partitioning. TensorRT EP
          trt_fp16_enable: bool = True,
            Enable FP16 precision for faster performance. TensorRT EP
          trt_detailed_build_log: bool = False,
            Enable detailed logging of build steps. TensorRT EP
          trt_engine_cache_enable: bool = True,
            Enable caching of TensorRT engines. TensorRT EP
          trt_timing_cache_enable: bool = True,
            "Enable use of timing cache to speed up builds." TensorRT EP
          trt_engine_cache_path: str ="",
            "Set path to store cached TensorRT engines." TensorRT EP
          trt_timing_cache_path: str ="",
            "Set path for storing timing cache." TensorRT EP
          trt_dump_subgraphs: bool = False,
            "Dump optimized subgraphs for debugging." TensorRT EP
          lodr_fst:
            Path to the LODR FST file in binary format. If empty, LODR is disabled.
          lodr_scale:
            Scale factor for LODR rescoring. Only used when lodr_fst is provided.
        """
        self = cls.__new__(cls)
        _assert_file_exists(tokens)
        _assert_file_exists(encoder)
        _assert_file_exists(decoder)
        _assert_file_exists(joiner)

        assert num_threads > 0, num_threads

        transducer_config = OnlineTransducerModelConfig(
            encoder=encoder,
            decoder=decoder,
            joiner=joiner,
        )

        cuda_config = CudaConfig(
            cudnn_conv_algo_search=cudnn_conv_algo_search,
        )

        trt_config = TensorrtConfig(
            trt_max_workspace_size=trt_max_workspace_size,
            trt_max_partition_iterations=trt_max_partition_iterations,
            trt_min_subgraph_size=trt_min_subgraph_size,
            trt_fp16_enable=trt_fp16_enable,
            trt_detailed_build_log=trt_detailed_build_log,
            trt_engine_cache_enable=trt_engine_cache_enable,
            trt_timing_cache_enable=trt_timing_cache_enable,
            trt_engine_cache_path=trt_engine_cache_path,
            trt_timing_cache_path=trt_timing_cache_path,
            trt_dump_subgraphs=trt_dump_subgraphs,
        )

        provider_config = ProviderConfig(
            trt_config=trt_config,
            cuda_config=cuda_config,
            provider=provider,
            device=device,
        )

        model_config = OnlineModelConfig(
            transducer=transducer_config,
            tokens=tokens,
            num_threads=num_threads,
            provider_config=provider_config,
            model_type=model_type,
            modeling_unit=modeling_unit,
            bpe_vocab=bpe_vocab,
            debug=debug,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            normalize_samples=normalize_samples,
            snip_edges=snip_edges,
            feature_dim=feature_dim,
            low_freq=low_freq,
            high_freq=high_freq,
            dither=dither,
        )

        endpoint_config = EndpointConfig(
            rule1_min_trailing_silence=rule1_min_trailing_silence,
            rule2_min_trailing_silence=rule2_min_trailing_silence,
            rule3_min_utterance_length=rule3_min_utterance_length,
        )

        if len(hotwords_file) > 0 and decoding_method != "modified_beam_search":
            raise ValueError(
                "Please use --decoding-method=modified_beam_search when using "
                f"--hotwords-file. Currently given: {decoding_method}"
            )

        if lm and decoding_method != "modified_beam_search":
            raise ValueError(
                "Please use --decoding-method=modified_beam_search when using "
                f"--lm. Currently given: {decoding_method}"
            )

        lm_config = OnlineLMConfig(
            model=lm,
            scale=lm_scale,
            shallow_fusion=lm_shallow_fusion,
            lodr_fst=lodr_fst,
            lodr_scale=lodr_scale,
        )

        recognizer_config = OnlineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            lm_config=lm_config,
            endpoint_config=endpoint_config,
            enable_endpoint=enable_endpoint_detection,
            decoding_method=decoding_method,
            max_active_paths=max_active_paths,
            hotwords_score=hotwords_score,
            hotwords_file=hotwords_file,
            blank_penalty=blank_penalty,
            temperature_scale=temperature_scale,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            reset_encoder=reset_encoder,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )

        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_paraformer(
        cls,
        tokens: str,
        encoder: str,
        decoder: str,
        num_threads: int = 2,
        sample_rate: float = 16000,
        feature_dim: int = 80,
        enable_endpoint_detection: bool = False,
        rule1_min_trailing_silence: float = 2.4,
        rule2_min_trailing_silence: float = 1.2,
        rule3_min_utterance_length: float = 20.0,
        decoding_method: str = "greedy_search",
        provider: str = "cpu",
        debug: bool = False,
        rule_fsts: str = "",
        rule_fars: str = "",
        device: int = 0,
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          encoder:
            Path to ``encoder.onnx``.
          decoder:
            Path to ``decoder.onnx``.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          enable_endpoint_detection:
            True to enable endpoint detection. False to disable endpoint
            detection.
          rule1_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If the duration
            of trailing silence in seconds is larger than this value, we assume
            an endpoint is detected.
          rule2_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If we have decoded
            something that is nonsilence and if the duration of trailing silence
            in seconds is larger than this value, we assume an endpoint is
            detected.
          rule3_min_utterance_length:
            Used only when enable_endpoint_detection is True. If the utterance
            length in seconds is larger than this value, we assume an endpoint
            is detected.
          decoding_method:
            The only valid value is greedy_search.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
          device:
            onnxruntime cuda device index.
        """
        self = cls.__new__(cls)
        _assert_file_exists(tokens)
        _assert_file_exists(encoder)
        _assert_file_exists(decoder)

        assert num_threads > 0, num_threads

        paraformer_config = OnlineParaformerModelConfig(
            encoder=encoder,
            decoder=decoder,
        )

        provider_config = ProviderConfig(
            provider=provider,
            device=device,
        )

        model_config = OnlineModelConfig(
            paraformer=paraformer_config,
            tokens=tokens,
            num_threads=num_threads,
            provider_config=provider_config,
            model_type="paraformer",
            debug=debug,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        endpoint_config = EndpointConfig(
            rule1_min_trailing_silence=rule1_min_trailing_silence,
            rule2_min_trailing_silence=rule2_min_trailing_silence,
            rule3_min_utterance_length=rule3_min_utterance_length,
        )

        recognizer_config = OnlineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            endpoint_config=endpoint_config,
            enable_endpoint=enable_endpoint_detection,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )

        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_zipformer2_ctc(
        cls,
        tokens: str,
        model: str,
        num_threads: int = 2,
        sample_rate: float = 16000,
        feature_dim: int = 80,
        enable_endpoint_detection: bool = False,
        rule1_min_trailing_silence: float = 2.4,
        rule2_min_trailing_silence: float = 1.2,
        rule3_min_utterance_length: float = 20.0,
        decoding_method: str = "greedy_search",
        ctc_graph: str = "",
        ctc_max_active: int = 3000,
        provider: str = "cpu",
        debug: bool = False,
        rule_fsts: str = "",
        rule_fars: str = "",
        device: int = 0,
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          model:
            Path to ``model.onnx``.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          enable_endpoint_detection:
            True to enable endpoint detection. False to disable endpoint
            detection.
          rule1_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If the duration
            of trailing silence in seconds is larger than this value, we assume
            an endpoint is detected.
          rule2_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If we have decoded
            something that is nonsilence and if the duration of trailing silence
            in seconds is larger than this value, we assume an endpoint is
            detected.
          rule3_min_utterance_length:
            Used only when enable_endpoint_detection is True. If the utterance
            length in seconds is larger than this value, we assume an endpoint
            is detected.
          decoding_method:
            The only valid value is greedy_search.
          ctc_graph:
            If not empty, decoding_method is ignored. It contains the path to
            H.fst, HL.fst, or HLG.fst
          ctc_max_active:
            Used only when ctc_graph is not empty. It specifies the maximum
            active paths at a time.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
          device:
            onnxruntime cuda device index.
        """
        self = cls.__new__(cls)
        _assert_file_exists(tokens)
        _assert_file_exists(model)

        assert num_threads > 0, num_threads

        zipformer2_ctc_config = OnlineZipformer2CtcModelConfig(model=model)

        provider_config = ProviderConfig(
            provider=provider,
            device=device,
        )

        model_config = OnlineModelConfig(
            zipformer2_ctc=zipformer2_ctc_config,
            tokens=tokens,
            num_threads=num_threads,
            provider_config=provider_config,
            debug=debug,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        endpoint_config = EndpointConfig(
            rule1_min_trailing_silence=rule1_min_trailing_silence,
            rule2_min_trailing_silence=rule2_min_trailing_silence,
            rule3_min_utterance_length=rule3_min_utterance_length,
        )

        ctc_fst_decoder_config = OnlineCtcFstDecoderConfig(
            graph=ctc_graph,
            max_active=ctc_max_active,
        )

        recognizer_config = OnlineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            endpoint_config=endpoint_config,
            ctc_fst_decoder_config=ctc_fst_decoder_config,
            enable_endpoint=enable_endpoint_detection,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )

        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_t_one_ctc(
        cls,
        tokens: str,
        model: str,
        num_threads: int = 2,
        sample_rate: float = 8000,
        feature_dim: int = 80,
        enable_endpoint_detection: bool = False,
        rule1_min_trailing_silence: float = 2.4,
        rule2_min_trailing_silence: float = 1.2,
        rule3_min_utterance_length: float = 20.0,
        decoding_method: str = "greedy_search",
        provider: str = "cpu",
        debug: bool = False,
        rule_fsts: str = "",
        rule_fars: str = "",
        device: int = 0,
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models>`_
        to download pre-trained models.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          model:
            Path to ``model.onnx``.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          enable_endpoint_detection:
            True to enable endpoint detection. False to disable endpoint
            detection.
          rule1_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If the duration
            of trailing silence in seconds is larger than this value, we assume
            an endpoint is detected.
          rule2_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If we have decoded
            something that is nonsilence and if the duration of trailing silence
            in seconds is larger than this value, we assume an endpoint is
            detected.
          rule3_min_utterance_length:
            Used only when enable_endpoint_detection is True. If the utterance
            length in seconds is larger than this value, we assume an endpoint
            is detected.
          decoding_method:
            The only valid value is greedy_search.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          debug:
            True to show meta data in the model.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
          device:
            onnxruntime cuda device index.
        """
        self = cls.__new__(cls)
        _assert_file_exists(tokens)
        _assert_file_exists(model)

        assert num_threads > 0, num_threads

        t_one_ctc_config = OnlineToneCtcModelConfig(
            model=model,
        )

        provider_config = ProviderConfig(
            provider=provider,
            device=device,
        )

        model_config = OnlineModelConfig(
            t_one_ctc=t_one_ctc_config,
            tokens=tokens,
            num_threads=num_threads,
            provider_config=provider_config,
            debug=debug,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        endpoint_config = EndpointConfig(
            rule1_min_trailing_silence=rule1_min_trailing_silence,
            rule2_min_trailing_silence=rule2_min_trailing_silence,
            rule3_min_utterance_length=rule3_min_utterance_length,
        )

        recognizer_config = OnlineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            endpoint_config=endpoint_config,
            enable_endpoint=enable_endpoint_detection,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )

        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_nemo_ctc(
        cls,
        tokens: str,
        model: str,
        num_threads: int = 2,
        sample_rate: float = 16000,
        feature_dim: int = 80,
        enable_endpoint_detection: bool = False,
        rule1_min_trailing_silence: float = 2.4,
        rule2_min_trailing_silence: float = 1.2,
        rule3_min_utterance_length: float = 20.0,
        decoding_method: str = "greedy_search",
        provider: str = "cpu",
        debug: bool = False,
        rule_fsts: str = "",
        rule_fars: str = "",
        device: int = 0,
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models>`_
        to download pre-trained models.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          model:
            Path to ``model.onnx``.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          enable_endpoint_detection:
            True to enable endpoint detection. False to disable endpoint
            detection.
          rule1_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If the duration
            of trailing silence in seconds is larger than this value, we assume
            an endpoint is detected.
          rule2_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If we have decoded
            something that is nonsilence and if the duration of trailing silence
            in seconds is larger than this value, we assume an endpoint is
            detected.
          rule3_min_utterance_length:
            Used only when enable_endpoint_detection is True. If the utterance
            length in seconds is larger than this value, we assume an endpoint
            is detected.
          decoding_method:
            The only valid value is greedy_search.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          debug:
            True to show meta data in the model.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
          device:
            onnxruntime cuda device index.
        """
        self = cls.__new__(cls)
        _assert_file_exists(tokens)
        _assert_file_exists(model)

        assert num_threads > 0, num_threads

        nemo_ctc_config = OnlineNeMoCtcModelConfig(
            model=model,
        )

        provider_config = ProviderConfig(
            provider=provider,
            device=device,
        )

        model_config = OnlineModelConfig(
            nemo_ctc=nemo_ctc_config,
            tokens=tokens,
            num_threads=num_threads,
            provider_config=provider_config,
            debug=debug,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        endpoint_config = EndpointConfig(
            rule1_min_trailing_silence=rule1_min_trailing_silence,
            rule2_min_trailing_silence=rule2_min_trailing_silence,
            rule3_min_utterance_length=rule3_min_utterance_length,
        )

        recognizer_config = OnlineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            endpoint_config=endpoint_config,
            enable_endpoint=enable_endpoint_detection,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )

        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    @classmethod
    def from_wenet_ctc(
        cls,
        tokens: str,
        model: str,
        chunk_size: int = 16,
        num_left_chunks: int = 4,
        num_threads: int = 2,
        sample_rate: float = 16000,
        feature_dim: int = 80,
        enable_endpoint_detection: bool = False,
        rule1_min_trailing_silence: float = 2.4,
        rule2_min_trailing_silence: float = 1.2,
        rule3_min_utterance_length: float = 20.0,
        decoding_method: str = "greedy_search",
        provider: str = "cpu",
        debug: bool = False,
        rule_fsts: str = "",
        rule_fars: str = "",
        device: int = 0,
        hr_dict_dir: str = "",
        hr_rule_fsts: str = "",
        hr_lexicon: str = "",
    ):
        """
        Please refer to
        `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/wenet/index.html>`_
        to download pre-trained models for different languages, e.g., Chinese,
        English, etc.

        Args:
          tokens:
            Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
            columns::

                symbol integer_id

          model:
            Path to ``model.onnx``.
          chunk_size:
            The --chunk-size parameter from WeNet.
          num_left_chunks:
            The --num-left-chunks parameter from WeNet.
          num_threads:
            Number of threads for neural network computation.
          sample_rate:
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
          enable_endpoint_detection:
            True to enable endpoint detection. False to disable endpoint
            detection.
          rule1_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If the duration
            of trailing silence in seconds is larger than this value, we assume
            an endpoint is detected.
          rule2_min_trailing_silence:
            Used only when enable_endpoint_detection is True. If we have decoded
            something that is nonsilence and if the duration of trailing silence
            in seconds is larger than this value, we assume an endpoint is
            detected.
          rule3_min_utterance_length:
            Used only when enable_endpoint_detection is True. If the utterance
            length in seconds is larger than this value, we assume an endpoint
            is detected.
          decoding_method:
            The only valid value is greedy_search.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
          rule_fsts:
            If not empty, it specifies fsts for inverse text normalization.
            If there are multiple fsts, they are separated by a comma.
          rule_fars:
            If not empty, it specifies fst archives for inverse text normalization.
            If there are multiple archives, they are separated by a comma.
          device:
            onnxruntime cuda device index.
        """
        self = cls.__new__(cls)
        _assert_file_exists(tokens)
        _assert_file_exists(model)

        assert num_threads > 0, num_threads

        wenet_ctc_config = OnlineWenetCtcModelConfig(
            model=model,
            chunk_size=chunk_size,
            num_left_chunks=num_left_chunks,
        )

        provider_config = ProviderConfig(
            provider=provider,
            device=device,
        )

        model_config = OnlineModelConfig(
            wenet_ctc=wenet_ctc_config,
            tokens=tokens,
            num_threads=num_threads,
            provider_config=provider_config,
            debug=debug,
        )

        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
        )

        endpoint_config = EndpointConfig(
            rule1_min_trailing_silence=rule1_min_trailing_silence,
            rule2_min_trailing_silence=rule2_min_trailing_silence,
            rule3_min_utterance_length=rule3_min_utterance_length,
        )

        recognizer_config = OnlineRecognizerConfig(
            feat_config=feat_config,
            model_config=model_config,
            endpoint_config=endpoint_config,
            enable_endpoint=enable_endpoint_detection,
            decoding_method=decoding_method,
            rule_fsts=rule_fsts,
            rule_fars=rule_fars,
            hr=HomophoneReplacerConfig(
                dict_dir=hr_dict_dir,
                lexicon=hr_lexicon,
                rule_fsts=hr_rule_fsts,
            ),
        )

        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
        return self

    def create_stream(self, hotwords: Optional[str] = None):
        if hotwords is None:
            return self.recognizer.create_stream()
        else:
            return self.recognizer.create_stream(hotwords)

    def decode_stream(self, s: OnlineStream):
        self.recognizer.decode_stream(s)

    def decode_streams(self, ss: List[OnlineStream]):
        self.recognizer.decode_streams(ss)

    def is_ready(self, s: OnlineStream) -> bool:
        return self.recognizer.is_ready(s)

    def get_result_all(self, s: OnlineStream) -> OnlineRecognizerResult:
        return self.recognizer.get_result(s)

    def get_result(self, s: OnlineStream) -> str:
        return self.recognizer.get_result(s).text.strip()

    def get_result_as_json_string(self, s: OnlineStream) -> str:
        return self.recognizer.get_result(s).as_json_string()

    def tokens(self, s: OnlineStream) -> List[str]:
        return self.recognizer.get_result(s).tokens

    def timestamps(self, s: OnlineStream) -> List[float]:
        return self.recognizer.get_result(s).timestamps

    def start_time(self, s: OnlineStream) -> float:
        return self.recognizer.get_result(s).start_time

    def ys_probs(self, s: OnlineStream) -> List[float]:
        return self.recognizer.get_result(s).ys_probs

    def lm_probs(self, s: OnlineStream) -> List[float]:
        return self.recognizer.get_result(s).lm_probs

    def context_scores(self, s: OnlineStream) -> List[float]:
        return self.recognizer.get_result(s).context_scores

    def is_endpoint(self, s: OnlineStream) -> bool:
        return self.recognizer.is_endpoint(s)

    def reset(self, s: OnlineStream) -> bool:
        return self.recognizer.reset(s)


================================================
FILE: sherpa-onnx/python/sherpa_onnx/utils.py
================================================
# Copyright (c)  2023  Xiaomi Corporation
import re

from pathlib import Path
from typing import List, Optional, Union


def text2token(
    texts: List[str],
    tokens: str,
    tokens_type: str = "cjkchar",
    bpe_model: Optional[str] = None,
    lexicon: Optional[str] = None,
    output_ids: bool = False,
) -> List[List[Union[str, int]]]:
    """
    Encode the given texts (a list of string) to a list of a list of tokens.

    Args:
      texts:
        The given contexts list (a list of string).
      tokens:
        The path of the tokens.txt.
      tokens_type:
        The valid values are cjkchar, bpe, cjkchar+bpe, fpinyin, ppinyin, phone+ppinyin.
        fpinyin means full pinyin, each cjkchar has a pinyin(with tone).
        ppinyin means partial pinyin, it splits pinyin into initial and final,
        phone means English phonemes in CMU dictionary format.
      bpe_model:
        The path of the bpe model. Only required when tokens_type is bpe or
        cjkchar+bpe.
      lexicon:
        The path of the lexicon.txt. Only required when tokens_type is phone+ppinyin.
      output_ids:
        True to output token ids otherwise tokens.
    Returns:
      Return the encoded texts, it is a list of a list of token ids if output_ids
      is True, or it is a list of list of tokens.
    """
    try:
        import sentencepiece as spm
    except ImportError:
        print("Please run")
        print("  pip install sentencepiece")
        print("before you continue")
        raise

    try:
        from pypinyin import pinyin
        from pypinyin.contrib.tone_convert import to_initials, to_finals_tone
    except ImportError:
        print("Please run")
        print("  pip install pypinyin")
        print("before you continue")
        raise

    assert Path(tokens).is_file(), f"File not exists, {tokens}"
    tokens_table = {}
    with open(tokens, "r", encoding="utf-8") as f:
        for line in f:
            toks = line.strip().split()
            assert len(toks) == 2, len(toks)
            assert toks[0] not in tokens_table, f"Duplicate token: {toks} "
            tokens_table[toks[0]] = int(toks[1])

    if "bpe" in tokens_type:
        assert Path(bpe_model).is_file(), f"File not exists, {bpe_model}"
        sp = spm.SentencePieceProcessor()
        sp.load(bpe_model)

    phone_table = {}
    if tokens_type == "phone+ppinyin":
        assert (
            lexicon and Path(lexicon).is_file()
        ), f"File not exists, {lexicon}"
        with open(lexicon, "r", encoding="utf-8") as f:
            for line in f:
                toks = line.strip().split()
                assert len(toks) >= 2, len(toks)
                word = toks[0]
                phones = toks[1:]
                phone_table[word] = phones

    texts_list: List[List[str]] = []

    def to_pinyin(txt: str, out_type: str) -> List[str]:
        assert out_type in ["ppinyin", "fpinyin"], f"given {out_type}"
        py = [x[0] for x in pinyin(txt)]
        if "ppinyin" == out_type:
            res = []
            for x in py:
                initial = to_initials(x, strict=False)
                final = to_finals_tone(x, strict=False)
                if initial == "" and final == "":
                    res.append(x)
                else:
                    if initial:
                        res.append(initial)
                    if final:
                        res.append(final)
            return res
        else:
            return py

    if tokens_type == "cjkchar":
        texts_list = [list("".join(text.split())) for text in texts]
    elif tokens_type == "bpe":
        texts_list = sp.encode(texts, out_type=str)
    elif tokens_type == "ppinyin" or tokens_type == "fpinyin":
        for txt in texts:
            texts_list.append(to_pinyin(txt, tokens_type))
    elif tokens_type == "phone+ppinyin":
        # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
        # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
        pattern = re.compile(r"^[\u4e00-\u9fff]+$")
        for text in texts:
            words = text.strip().split()
            text_list = []
            skip_text = False
            for w in words:
                if w in phone_table:
                    text_list += phone_table[w]
                else:
                    if pattern.fullmatch(w) is None:
                        print(
                            f"Word {w} not in lexicon and it is not a CJK character, "
                            f"skipping text: {text}."
                        )
                        skip_text = True
                        break
                    else:
                        text_list += to_pinyin(w, "ppinyin")
            if not skip_text:
                texts_list.append(text_list)
    else:
        assert (
            tokens_type == "cjkchar+bpe"
        ), f"Supported tokens_type are cjkchar, bpe, cjkchar+bpe, ppinyin, fpinyin, phone+ppinyin given {tokens_type}"

        # CJK(China Japan Korea) unicode range is [U+4E00, U+9FFF], ref:
        # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
        pattern = re.compile(r"([\u4e00-\u9fff])")
        for text in texts:
            # Example:
            #   txt   = "你好 ITS'S OKAY 的"
            #   chars = ["你", "好", " ITS'S OKAY ", "的"]
            chars = pattern.split(text)
            mix_chars = [w for w in chars if len(w.strip()) > 0]
            text_list = []
            for ch_or_w in mix_chars:
                # ch_or_w is a single CJK character(i.e., "你"), do nothing.
                if pattern.fullmatch(ch_or_w) is not None:
                    text_list.append(ch_or_w)
                # ch_or_w contains non-CJK characters(i.e., " IT'S OKAY "),
                # encode ch_or_w using bpe_model.
                else:
                    text_list += sp.encode_as_pieces(ch_or_w)
            texts_list.append(text_list)

    result: List[List[Union[int, str]]] = []
    for text in texts_list:
        text_list = []
        contain_oov = False
        for txt in text:
            if txt in tokens_table:
                text_list.append(tokens_table[txt] if output_ids else txt)
            else:
                print(
                    f"Can't find token {txt} in token table, check your "
                    f"tokens.txt see if {txt} in it. skipping text : {text}."
                )
                contain_oov = True
                break
        if contain_oov:
            continue
        else:
            result.append(text_list)
    return result


================================================
FILE: sherpa-onnx/python/tests/CMakeLists.txt
================================================
function(sherpa_onnx_add_py_test source)
  get_filename_component(name ${source} NAME_WE)
  set(name "${name}_py")

  add_test(NAME ${name}
    COMMAND
      "${PYTHON_EXECUTABLE}"
      "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
    WORKING_DIRECTORY
      ${CMAKE_CURRENT_SOURCE_DIR}
  )

  get_filename_component(sherpa_onnx_path ${CMAKE_CURRENT_LIST_DIR} DIRECTORY)

  set_property(TEST ${name}
    PROPERTY ENVIRONMENT "PYTHONPATH=${sherpa_onnx_path}:$<TARGET_FILE_DIR:_sherpa_onnx>:$ENV{PYTHONPATH}"
  )
endfunction()

# please sort the files in alphabetic order
set(py_test_files
  test_fast_clustering.py
  test_feature_extractor_config.py
  test_keyword_spotter.py
  test_offline_recognizer.py
  test_online_recognizer.py
  test_online_transducer_model_config.py
  test_speaker_recognition.py
  test_text2token.py
)

foreach(source IN LISTS py_test_files)
  sherpa_onnx_add_py_test(${source})
endforeach()


================================================
FILE: sherpa-onnx/python/tests/test_fast_clustering.py
================================================
# sherpa-onnx/python/tests/test_fast_clustering.py
#
# Copyright (c)  2024  Xiaomi Corporation
#
# To run this single test, use
#
#  ctest --verbose -R  test_fast_clustering_py
import unittest

import sherpa_onnx
import numpy as np
from pathlib import Path
from typing import Tuple

import soundfile as sf


def load_audio(filename: str) -> np.ndarray:
    data, sample_rate = sf.read(
        filename,
        always_2d=True,
        dtype="float32",
    )
    data = data[:, 0]  # use only the first channel
    samples = np.ascontiguousarray(data)
    assert sample_rate == 16000, f"Expect sample_rate 16000. Given: {sample_rate}"
    return samples


class TestFastClustering(unittest.TestCase):
    def test_construct_by_num_clusters(self):
        config = sherpa_onnx.FastClusteringConfig(num_clusters=4)
        assert config.validate() is True

        print(config)

        clustering = sherpa_onnx.FastClustering(config)
        features = np.array(
            [
                [0.2, 0.3],  # cluster 0
                [0.3, -0.4],  # cluster 1
                [-0.1, -0.2],  # cluster 2
                [-0.3, -0.5],  # cluster 2
                [0.1, -0.2],  # cluster 1
                [0.1, 0.2],  # cluster 0
                [-0.8, 1.9],  # cluster 3
                [-0.4, -0.6],  # cluster 2
                [-0.7, 0.9],  # cluster 3
            ]
        )
        labels = clustering(features)
        assert isinstance(labels, list)
        assert len(labels) == features.shape[0]

        expected = [0, 1, 2, 2, 1, 0, 3, 2, 3]
        assert labels == expected, (labels, expected)

    def test_construct_by_threshold(self):
        config = sherpa_onnx.FastClusteringConfig(threshold=0.2)
        assert config.validate() is True

        print(config)

        clustering = sherpa_onnx.FastClustering(config)
        features = np.array(
            [
                [0.2, 0.3],  # cluster 0
                [0.3, -0.4],  # cluster 1
                [-0.1, -0.2],  # cluster 2
                [-0.3, -0.5],  # cluster 2
                [0.1, -0.2],  # cluster 1
                [0.1, 0.2],  # cluster 0
                [-0.8, 1.9],  # cluster 3
                [-0.4, -0.6],  # cluster 2
                [-0.7, 0.9],  # cluster 3
            ]
        )
        labels = clustering(features)
        assert isinstance(labels, list)
        assert len(labels) == features.shape[0]

        expected = [0, 1, 2, 2, 1, 0, 3, 2, 3]
        assert labels == expected, (labels, expected)

    def test_cluster_speaker_embeddings(self):
        d = Path("/tmp/test-cluster")

        # Please download the onnx file from
        # https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
        model_file = d / "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"

        if not model_file.exists():
            print(f"skip test since {model_file} does not exist")
            return

        # Please download the test wave files from
        # https://github.com/csukuangfj/sr-data
        wave_dir = d / "sr-data"
        if not wave_dir.is_dir():
            print(f"skip test since {wave_dir} does not exist")
            return

        wave_files = [
            "enroll/fangjun-sr-1.wav",  # cluster 0
            "enroll/fangjun-sr-2.wav",  # cluster 0
            "enroll/fangjun-sr-3.wav",  # cluster 0
            "enroll/leijun-sr-1.wav",  # cluster 1
            "enroll/leijun-sr-2.wav",  # cluster 1
            "enroll/liudehua-sr-1.wav",  # cluster 2
            "enroll/liudehua-sr-2.wav",  # cluster 2
            "test/fangjun-test-sr-1.wav",  # cluster 0
            "test/fangjun-test-sr-2.wav",  # cluster 0
            "test/leijun-test-sr-1.wav",  # cluster 1
            "test/leijun-test-sr-2.wav",  # cluster 1
            "test/leijun-test-sr-3.wav",  # cluster 1
            "test/liudehua-test-sr-1.wav",  # cluster 2
            "test/liudehua-test-sr-2.wav",  # cluster 2
        ]
        for w in wave_files:
            f = d / "sr-data" / w
            if not f.is_file():
                print(f"skip testing since {f} does not exist")
                return

        extractor_config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
            model=str(model_file),
            num_threads=1,
            debug=0,
        )
        if not extractor_config.validate():
            raise ValueError(f"Invalid extractor config. {config}")

        extractor = sherpa_onnx.SpeakerEmbeddingExtractor(extractor_config)

        features = []

        for w in wave_files:
            f = d / "sr-data" / w
            audio = load_audio(str(f))
            stream = extractor.create_stream()
            stream.accept_waveform(sample_rate=16000, waveform=audio)
            stream.input_finished()

            assert extractor.is_ready(stream)
            embedding = extractor.compute(stream)
            embedding = np.array(embedding)
            features.append(embedding)
        features = np.array(features)

        config = sherpa_onnx.FastClusteringConfig(num_clusters=3)
        #  config = sherpa_onnx.FastClusteringConfig(threshold=0.5)
        clustering = sherpa_onnx.FastClustering(config)
        labels = clustering(features)

        expected = [0, 0, 0, 1, 1, 2, 2]
        expected += [0, 0, 1, 1, 1, 2, 2]

        assert labels == expected, (labels, expected)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: sherpa-onnx/python/tests/test_feature_extractor_config.py
================================================
# sherpa-onnx/python/tests/test_feature_extractor_config.py
#
# Copyright (c)  2023  Xiaomi Corporation
#
# To run this single test, use
#
#  ctest --verbose -R  test_feature_extractor_config_py

import unittest

import _sherpa_onnx


class TestFeatureExtractorConfig(unittest.TestCase):
    def test_default_constructor(self):
        config = _sherpa_onnx.FeatureExtractorConfig()
        assert config.sampling_rate == 16000, config.sampling_rate
        assert config.feature_dim == 80, config.feature_dim
        print(config)

    def test_constructor(self):
        config = _sherpa_onnx.FeatureExtractorConfig(sampling_rate=8000, feature_dim=40)
        assert config.sampling_rate == 8000, config.sampling_rate
        assert config.feature_dim == 40, config.feature_dim
        print(config)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: sherpa-onnx/python/tests/test_keyword_spotter.py
================================================
# sherpa-onnx/python/tests/test_keyword_spotter.py
#
# Copyright (c)  2024  Xiaomi Corporation
#
# To run this single test, use
#
#  ctest --verbose -R  test_keyword_spotter_py

import unittest
import wave
from pathlib import Path
from typing import Tuple

import numpy as np
import sherpa_onnx

d = "/tmp/onnx-models"
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
# to download pre-trained models for testing


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


class TestKeywordSpotter(unittest.TestCase):
    def test_zipformer_transducer_en(self):
        for use_int8 in [True, False]:
            if use_int8:
                encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"
                decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
                joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx"
            else:
                encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
                decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
                joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"

            tokens = (
                f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/tokens.txt"
            )
            keywords_file = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"
            wave0 = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/test_wavs/0.wav"
            wave1 = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/test_wavs/1.wav"

            if not Path(encoder).is_file():
                print("skipping test_zipformer_transducer_en()")
                return
            keyword_spotter = sherpa_onnx.KeywordSpotter(
                encoder=encoder,
                decoder=decoder,
                joiner=joiner,
                tokens=tokens,
                num_threads=1,
                keywords_file=keywords_file,
                provider="cpu",
            )
            streams = []
            waves = [wave0, wave1]
            for wave in waves:
                s = keyword_spotter.create_stream()
                samples, sample_rate = read_wave(wave)
                s.accept_waveform(sample_rate, samples)

                tail_paddings = np.zeros(int(0.2 * sample_rate), dtype=np.float32)
                s.accept_waveform(sample_rate, tail_paddings)
                s.input_finished()
                streams.append(s)

            results = [""] * len(streams)
            while True:
                ready_list = []
                for i, s in enumerate(streams):
                    if keyword_spotter.is_ready(s):
                        ready_list.append(s)
                    r = keyword_spotter.get_result(s)
                    if r:
                        print(f"{r} is detected.")
                        results[i] += f"{r}/"

                        keyword_spotter.reset_stream(s)

                if len(ready_list) == 0:
                    break
                keyword_spotter.decode_streams(ready_list)
            for wave_filename, result in zip(waves, results):
                print(f"{wave_filename}\n{result[0:-1]}")
                print("-" * 10)

    def test_zipformer_transducer_cn(self):
        for use_int8 in [True, False]:
            if use_int8:
                encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"
                decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
                joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx"
            else:
                encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
                decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
                joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"

            tokens = (
                f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"
            )
            keywords_file = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"
            wave0 = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
            wave1 = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/4.wav"
            wave2 = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/5.wav"

            if not Path(encoder).is_file():
                print("skipping test_zipformer_transducer_cn()")
                return
            keyword_spotter = sherpa_onnx.KeywordSpotter(
                encoder=encoder,
                decoder=decoder,
                joiner=joiner,
                tokens=tokens,
                num_threads=1,
                keywords_file=keywords_file,
                provider="cpu",
            )
            streams = []
            waves = [wave0, wave1, wave2]
            for wave in waves:
                s = keyword_spotter.create_stream()
                samples, sample_rate = read_wave(wave)
                s.accept_waveform(sample_rate, samples)

                tail_paddings = np.zeros(int(0.2 * sample_rate), dtype=np.float32)
                s.accept_waveform(sample_rate, tail_paddings)
                s.input_finished()
                streams.append(s)

            results = [""] * len(streams)
            while True:
                ready_list = []
                for i, s in enumerate(streams):
                    if keyword_spotter.is_ready(s):
                        ready_list.append(s)
                    r = keyword_spotter.get_result(s)
                    if r:
                        print(f"{r} is detected.")
                        results[i] += f"{r}/"

                        keyword_spotter.reset_stream(s)

                if len(ready_list) == 0:
                    break
                keyword_spotter.decode_streams(ready_list)
            for wave_filename, result in zip(waves, results):
                print(f"{wave_filename}\n{result[0:-1]}")
                print("-" * 10)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: sherpa-onnx/python/tests/test_offline_recognizer.py
================================================
# sherpa-onnx/python/tests/test_offline_recognizer.py
#
# Copyright (c)  2023  Xiaomi Corporation
#
# To run this single test, use
#
#  ctest --verbose -R  test_offline_recognizer_py

import unittest
import wave
from pathlib import Path
from typing import Tuple

import numpy as np
import sherpa_onnx

d = "/tmp/icefall-models"
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
# and
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
# to download pre-trained models for testing


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


class TestOfflineRecognizer(unittest.TestCase):
    def test_transducer_single_file(self):
        for use_int8 in [True, False]:
            if use_int8:
                encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx"
                decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx"
                joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx"
            else:
                encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx"
                decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx"
                joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx"

            tokens = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/tokens.txt"
            wave0 = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav"

            if not Path(encoder).is_file():
                print("skipping test_transducer_single_file()")
                return

            recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
                encoder=encoder,
                decoder=decoder,
                joiner=joiner,
                tokens=tokens,
                num_threads=1,
                provider="cpu",
            )

            s = recognizer.create_stream()
            samples, sample_rate = read_wave(wave0)
            s.accept_waveform(sample_rate, samples)
            recognizer.decode_stream(s)
            print(s.result.text)

    def test_transducer_multiple_files(self):
        for use_int8 in [True, False]:
            if use_int8:
                encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx"
                decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx"
                joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx"
            else:
                encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx"
                decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx"
                joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx"

            tokens = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/tokens.txt"
            wave0 = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav"
            wave1 = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav"
            wave2 = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav"

            if not Path(encoder).is_file():
                print("skipping test_transducer_multiple_files()")
                return

            recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
                encoder=encoder,
                decoder=decoder,
                joiner=joiner,
                tokens=tokens,
                num_threads=1,
                provider="cpu",
            )

            s0 = recognizer.create_stream()
            samples0, sample_rate0 = read_wave(wave0)
            s0.accept_waveform(sample_rate0, samples0)

            s1 = recognizer.create_stream()
            samples1, sample_rate1 = read_wave(wave1)
            s1.accept_waveform(sample_rate1, samples1)

            s2 = recognizer.create_stream()
            samples2, sample_rate2 = read_wave(wave2)
            s2.accept_waveform(sample_rate2, samples2)

            recognizer.decode_streams([s0, s1, s2])
            print(s0.result.text)
            print(s1.result.text)
            print(s2.result.text)

    def test_paraformer_single_file(self):
        model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"

        tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"
        wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav"

        if not Path(model).is_file():
            print("skipping test_paraformer_single_file()")
            return

        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=model,
            tokens=tokens,
            num_threads=1,
            provider="cpu",
        )

        s = recognizer.create_stream()
        samples, sample_rate = read_wave(wave0)
        s.accept_waveform(sample_rate, samples)
        recognizer.decode_stream(s)
        print(s.result.text)

    def test_paraformer_multiple_files(self):
        model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"

        tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"
        wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav"
        wave1 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav"
        wave2 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav"
        wave3 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav"

        if not Path(model).is_file():
            print("skipping test_paraformer_multiple_files()")
            return

        recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
            paraformer=model,
            tokens=tokens,
            num_threads=1,
            provider="cpu",
        )

        s0 = recognizer.create_stream()
        samples0, sample_rate0 = read_wave(wave0)
        s0.accept_waveform(sample_rate0, samples0)

        s1 = recognizer.create_stream()
        samples1, sample_rate1 = read_wave(wave1)
        s1.accept_waveform(sample_rate1, samples1)

        s2 = recognizer.create_stream()
        samples2, sample_rate2 = read_wave(wave2)
        s2.accept_waveform(sample_rate2, samples2)

        s3 = recognizer.create_stream()
        samples3, sample_rate3 = read_wave(wave3)
        s3.accept_waveform(sample_rate3, samples3)

        recognizer.decode_streams([s0, s1, s2, s3])
        print(s0.result.text)
        print(s1.result.text)
        print(s2.result.text)
        print(s3.result.text)

    def test_nemo_ctc_single_file(self):
        for use_int8 in [True, False]:
            if use_int8:
                model = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/model.int8.onnx"
            else:
                model = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx"

            tokens = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt"
            wave0 = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav"

            if not Path(model).is_file():
                print("skipping test_nemo_ctc_single_file()")
                return

            recognizer = sherpa_onnx.OfflineRecognizer.from_nemo_ctc(
                model=model,
                tokens=tokens,
                num_threads=1,
                provider="cpu",
            )

            s = recognizer.create_stream()
            samples, sample_rate = read_wave(wave0)
            s.accept_waveform(sample_rate, samples)
            recognizer.decode_stream(s)
            print(s.result.text)

    def test_nemo_ctc_multiple_files(self):
        for use_int8 in [True, False]:
            if use_int8:
                model = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/model.int8.onnx"
            else:
                model = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx"

            tokens = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt"
            wave0 = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav"
            wave1 = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/1.wav"
            wave2 = f"{d}/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/8k.wav"

            if not Path(model).is_file():
                print("skipping test_nemo_ctc_multiple_files()")
                return

            recognizer = sherpa_onnx.OfflineRecognizer.from_nemo_ctc(
                model=model,
                tokens=tokens,
                num_threads=1,
                provider="cpu",
            )

            s0 = recognizer.create_stream()
            samples0, sample_rate0 = read_wave(wave0)
            s0.accept_waveform(sample_rate0, samples0)

            s1 = recognizer.create_stream()
            samples1, sample_rate1 = read_wave(wave1)
            s1.accept_waveform(sample_rate1, samples1)

            s2 = recognizer.create_stream()
            samples2, sample_rate2 = read_wave(wave2)
            s2.accept_waveform(sample_rate2, samples2)

            recognizer.decode_streams([s0, s1, s2])
            print(s0.result.text)
            print(s1.result.text)
            print(s2.result.text)

    def _test_wenet_ctc(self):
        models = [
            "sherpa-onnx-zh-wenet-aishell",
            "sherpa-onnx-zh-wenet-aishell2",
            "sherpa-onnx-zh-wenet-wenetspeech",
            "sherpa-onnx-zh-wenet-multi-cn",
            "sherpa-onnx-en-wenet-librispeech",
            "sherpa-onnx-en-wenet-gigaspeech",
        ]
        for m in models:
            for use_int8 in [True, False]:
                name = "model.int8.onnx" if use_int8 else "model.onnx"
                model = f"{d}/{m}/{name}"
                tokens = f"{d}/{m}/tokens.txt"

                wave0 = f"{d}/{m}/test_wavs/0.wav"
                wave1 = f"{d}/{m}/test_wavs/1.wav"
                wave2 = f"{d}/{m}/test_wavs/8k.wav"

                if not Path(model).is_file():
                    print("skipping test_wenet_ctc()")
                    return

                recognizer = sherpa_onnx.OfflineRecognizer.from_wenet_ctc(
                    model=model,
                    tokens=tokens,
                    num_threads=1,
                    provider="cpu",
                )

                s0 = recognizer.create_stream()
                samples0, sample_rate0 = read_wave(wave0)
                s0.accept_waveform(sample_rate0, samples0)

                s1 = recognizer.create_stream()
                samples1, sample_rate1 = read_wave(wave1)
                s1.accept_waveform(sample_rate1, samples1)

                s2 = recognizer.create_stream()
                samples2, sample_rate2 = read_wave(wave2)
                s2.accept_waveform(sample_rate2, samples2)

                recognizer.decode_streams([s0, s1, s2])
                print(s0.result.text)
                print(s1.result.text)
                print(s2.result.text)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: sherpa-onnx/python/tests/test_online_recognizer.py
================================================
# sherpa-onnx/python/tests/test_online_recognizer.py
#
# Copyright (c)  2023  Xiaomi Corporation
#
# To run this single test, use
#
#  ctest --verbose -R  test_online_recognizer_py

import unittest
import wave
from pathlib import Path
from typing import Tuple

import numpy as np
import sherpa_onnx

d = "/tmp/icefall-models"
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
# to download pre-trained models for testing


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


class TestOnlineRecognizer(unittest.TestCase):
    def test_transducer_single_file(self):
        for use_int8 in [True, False]:
            if use_int8:
                encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"
                decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
                joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx"
            else:
                encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
                decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
                joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"

            tokens = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
            wave0 = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav"

            if not Path(encoder).is_file():
                print("skipping test_transducer_single_file()")
                return

            for decoding_method in ["greedy_search", "modified_beam_search"]:
                recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
                    encoder=encoder,
                    decoder=decoder,
                    joiner=joiner,
                    tokens=tokens,
                    num_threads=1,
                    decoding_method=decoding_method,
                    provider="cpu",
                )
                s = recognizer.create_stream()
                samples, sample_rate = read_wave(wave0)
                s.accept_waveform(sample_rate, samples)

                tail_paddings = np.zeros(int(0.2 * sample_rate), dtype=np.float32)
                s.accept_waveform(sample_rate, tail_paddings)

                s.input_finished()
                while recognizer.is_ready(s):
                    recognizer.decode_stream(s)
                print(recognizer.get_result(s))

    def test_transducer_multiple_files(self):
        for use_int8 in [True, False]:
            if use_int8:
                encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"
                decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
                joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx"
            else:
                encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
                decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
                joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"

            tokens = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
            wave0 = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav"
            wave1 = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav"
            wave2 = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/2.wav"
            wave3 = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/3.wav"
            wave4 = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/8k.wav"

            if not Path(encoder).is_file():
                print("skipping test_transducer_multiple_files()")
                return

            for decoding_method in ["greedy_search", "modified_beam_search"]:
                recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
                    encoder=encoder,
                    decoder=decoder,
                    joiner=joiner,
                    tokens=tokens,
                    num_threads=1,
                    decoding_method=decoding_method,
                    provider="cpu",
                )
                streams = []
                waves = [wave0, wave1, wave2, wave3, wave4]
                for wave in waves:
                    s = recognizer.create_stream()
                    samples, sample_rate = read_wave(wave)
                    s.accept_waveform(sample_rate, samples)

                    tail_paddings = np.zeros(int(0.2 * sample_rate), dtype=np.float32)
                    s.accept_waveform(sample_rate, tail_paddings)
                    s.input_finished()
                    streams.append(s)

                while True:
                    ready_list = []
                    for s in streams:
                        if recognizer.is_ready(s):
                            ready_list.append(s)
                    if len(ready_list) == 0:
                        break
                    recognizer.decode_streams(ready_list)
                results = [recognizer.get_result(s) for s in streams]
                for wave_filename, result in zip(waves, results):
                    print(f"{wave_filename}\n{result}")
                    print("-" * 10)

    def test_zipformer2_ctc(self):
        m = "sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13"
        for use_int8 in [True, False]:
            name = (
                "ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx"
                if use_int8
                else "ctc-epoch-20-avg-1-chunk-16-left-128.onnx"
            )
            model = f"{d}/{m}/{name}"
            tokens = f"{d}/{m}/tokens.txt"
            wave0 = f"{d}/{m}/test_wavs/DEV_T0000000000.wav"
            wave1 = f"{d}/{m}/test_wavs/DEV_T0000000001.wav"
            wave2 = f"{d}/{m}/test_wavs/DEV_T0000000002.wav"
            if not Path(model).is_file():
                print("skipping test_zipformer2_ctc()")
                return
            print(f"testing {model}")

            recognizer = sherpa_onnx.OnlineRecognizer.from_zipformer2_ctc(
                model=model,
                tokens=tokens,
                num_threads=1,
                provider="cpu",
            )

            streams = []
            waves = [wave0, wave1, wave2]
            for wave in waves:
                s = recognizer.create_stream()
                samples, sample_rate = read_wave(wave)
                s.accept_waveform(sample_rate, samples)

                tail_paddings = np.zeros(int(0.2 * sample_rate), dtype=np.float32)
                s.accept_waveform(sample_rate, tail_paddings)
                s.input_finished()
                streams.append(s)

            while True:
                ready_list = []
                for s in streams:
                    if recognizer.is_ready(s):
                        ready_list.append(s)
                if len(ready_list) == 0:
                    break
                recognizer.decode_streams(ready_list)

            results = [recognizer.get_result(s) for s in streams]
            for wave_filename, result in zip(waves, results):
                print(f"{wave_filename}\n{result}")
                print("-" * 10)

    def test_wenet_ctc(self):
        models = [
            "sherpa-onnx-zh-wenet-aishell",
            "sherpa-onnx-zh-wenet-aishell2",
            "sherpa-onnx-zh-wenet-wenetspeech",
            "sherpa-onnx-zh-wenet-multi-cn",
            "sherpa-onnx-en-wenet-librispeech",
            "sherpa-onnx-en-wenet-gigaspeech",
        ]
        for m in models:
            for use_int8 in [True, False]:
                name = (
                    "model-streaming.int8.onnx" if use_int8 else "model-streaming.onnx"
                )
                model = f"{d}/{m}/{name}"
                tokens = f"{d}/{m}/tokens.txt"

                wave0 = f"{d}/{m}/test_wavs/0.wav"
                wave1 = f"{d}/{m}/test_wavs/1.wav"
                wave2 = f"{d}/{m}/test_wavs/8k.wav"

                if not Path(model).is_file():
                    print("skipping test_wenet_ctc()")
                    return

                recognizer = sherpa_onnx.OnlineRecognizer.from_wenet_ctc(
                    model=model,
                    tokens=tokens,
                    num_threads=1,
                    provider="cpu",
                )

                streams = []
                waves = [wave0, wave1, wave2]
                for wave in waves:
                    s = recognizer.create_stream()
                    samples, sample_rate = read_wave(wave)
                    s.accept_waveform(sample_rate, samples)

                    tail_paddings = np.zeros(int(0.2 * sample_rate), dtype=np.float32)
                    s.accept_waveform(sample_rate, tail_paddings)
                    s.input_finished()
                    streams.append(s)

                while True:
                    ready_list = []
                    for s in streams:
                        if recognizer.is_ready(s):
                            ready_list.append(s)
                    if len(ready_list) == 0:
                        break
                    recognizer.decode_streams(ready_list)

                results = [recognizer.get_result(s) for s in streams]
                for wave_filename, result in zip(waves, results):
                    print(f"{wave_filename}\n{result}")
                    print("-" * 10)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: sherpa-onnx/python/tests/test_online_transducer_model_config.py
================================================
# sherpa-onnx/python/tests/test_online_transducer_model_config.py
#
# Copyright (c)  2023  Xiaomi Corporation
#
# To run this single test, use
#
#  ctest --verbose -R  test_online_transducer_model_config_py

import unittest

import _sherpa_onnx


class TestOnlineTransducerModelConfig(unittest.TestCase):
    def test_constructor(self):
        config = _sherpa_onnx.OnlineTransducerModelConfig(
            encoder="encoder.onnx",
            decoder="decoder.onnx",
            joiner="joiner.onnx",
        )
        assert config.encoder == "encoder.onnx", config.encoder
        assert config.decoder == "decoder.onnx", config.decoder
        assert config.joiner == "joiner.onnx", config.joiner
        print(config)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: sherpa-onnx/python/tests/test_speaker_recognition.py
================================================
# sherpa-onnx/python/tests/test_speaker_recognition.py
#
# Copyright (c)  2024  Xiaomi Corporation
#
# To run this single test, use
#
#  ctest --verbose -R  test_speaker_recognition_py

import unittest
import wave
from collections import defaultdict
from pathlib import Path
from typing import Tuple

import numpy as np
import sherpa_onnx

d = "/tmp/sr-models"


def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
    """
    Args:
      wave_filename:
        Path to a wave file. It should be single channel and each sample should
        be 16-bit. Its sample rate does not need to be 16kHz.
    Returns:
      Return a tuple containing:
       - A 1-D array of dtype np.float32 containing the samples, which are
       normalized to the range [-1, 1].
       - sample rate of the wave file
    """

    with wave.open(wave_filename) as f:
        assert f.getnchannels() == 1, f.getnchannels()
        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
        num_samples = f.getnframes()
        samples = f.readframes(num_samples)
        samples_int16 = np.frombuffer(samples, dtype=np.int16)
        samples_float32 = samples_int16.astype(np.float32)

        samples_float32 = samples_float32 / 32768
        return samples_float32, f.getframerate()


def load_speaker_embedding_model(model_filename):
    config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
        model=model_filename,
        num_threads=1,
        debug=True,
        provider="cpu",
    )
    if not config.validate():
        raise ValueError(f"Invalid config. {config}")
    extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config)
    return extractor


def test_zh_models(model_filename: str, threshold: float = 0.5):
    model_filename = str(model_filename)
    if "en" in model_filename:
        print(f"skip {model_filename}")
        return
    extractor = load_speaker_embedding_model(model_filename)
    filenames = [
        "leijun-sr-1",
        "leijun-sr-2",
        "fangjun-sr-1",
        "fangjun-sr-2",
        "fangjun-sr-3",
    ]
    tmp = defaultdict(list)
    for filename in filenames:
        print(filename)
        name = filename.split("-", maxsplit=1)[0]
        data, sample_rate = read_wave(f"/tmp/sr-models/sr-data/enroll/{filename}.wav")
        stream = extractor.create_stream()
        stream.accept_waveform(sample_rate=sample_rate, waveform=data)
        stream.input_finished()
        assert extractor.is_ready(stream)
        embedding = extractor.compute(stream)
        embedding = np.array(embedding)
        tmp[name].append(embedding)

    manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)
    for name, embedding_list in tmp.items():
        print(name, len(embedding_list))
        embedding = sum(embedding_list) / len(embedding_list)
        status = manager.add(name, embedding)
        if not status:
            raise RuntimeError(f"Failed to register speaker {name}")

    filenames = [
        "leijun-test-sr-1",
        "leijun-test-sr-2",
        "leijun-test-sr-3",
        "fangjun-test-sr-1",
        "fangjun-test-sr-2",
    ]
    for filename in filenames:
        name = filename.split("-", maxsplit=1)[0]
        data, sample_rate = read_wave(f"/tmp/sr-models/sr-data/test/{filename}.wav")
        stream = extractor.create_stream()
        stream.accept_waveform(sample_rate=sample_rate, waveform=data)
        stream.input_finished()
        assert extractor.is_ready(stream)
        embedding = extractor.compute(stream)
        embedding = np.array(embedding)
        status = manager.verify(name, embedding, threshold=threshold)
        if not status:
            raise RuntimeError(f"Failed to verify {name} with wave {filename}.wav")

        ans = manager.search(embedding, threshold=threshold)
        assert ans == name, (name, ans)


def test_en_and_zh_models(model_filename: str, threshold: float = 0.5):
    model_filename = str(model_filename)
    extractor = load_speaker_embedding_model(model_filename)
    manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim)

    filenames = [
        "speaker1_a_cn_16k",
        "speaker2_a_cn_16k",
        "speaker1_a_en_16k",
        "speaker2_a_en_16k",
    ]
    is_en = "en" in model_filename
    for filename in filenames:
        if is_en and "cn" in filename:
            continue

        if not is_en and "en" in filename:
            continue

        name = filename.rsplit("_", maxsplit=1)[0]
        data, sample_rate = read_wave(
            f"/tmp/sr-models/sr-data/test/3d-speaker/{filename}.wav"
        )
        stream = extractor.create_stream()
        stream.accept_waveform(sample_rate=sample_rate, waveform=data)
        stream.input_finished()
        assert extractor.is_ready(stream)
        embedding = extractor.compute(stream)
        embedding = np.array(embedding)

        status = manager.add(name, embedding)
        if not status:
            raise RuntimeError(f"Failed to register speaker {name}")

    filenames = [
        "speaker1_b_cn_16k",
        "speaker1_b_en_16k",
    ]
    for filename in filenames:
        if is_en and "cn" in filename:
            continue

        if not is_en and "en" in filename:
            continue
        print(filename)
        name = filename.rsplit("_", maxsplit=1)[0]
        name = name.replace("b_cn", "a_cn")
        name = name.replace("b_en", "a_en")
        print(name)

        data, sample_rate = read_wave(
            f"/tmp/sr-models/sr-data/test/3d-speaker/{filename}.wav"
        )
        stream = extractor.create_stream()
        stream.accept_waveform(sample_rate=sample_rate, waveform=data)
        stream.input_finished()
        assert extractor.is_ready(stream)
        embedding = extractor.compute(stream)
        embedding = np.array(embedding)
        status = manager.verify(name, embedding, threshold=threshold)
        if not status:
            raise RuntimeError(
                f"Failed to verify {name} with wave {filename}.wav. model: {model_filename}"
            )

        ans = manager.search(embedding, threshold=threshold)
        assert ans == name, (name, ans)


class TestSpeakerRecognition(unittest.TestCase):
    def test_wespeaker_models(self):
        model_dir = Path(d) / "wespeaker"
        if not model_dir.is_dir():
            print(f"{model_dir} does not exist - skip it")
            return
        for filename in model_dir.glob("*.onnx"):
            print(filename)
            threshold = 0.5

            test_zh_models(filename, threshold)

            if "wespeaker_en_voxceleb_CAM++_LM.onnx" in str(filename):
                threshold = 0.3
            test_en_and_zh_models(filename, threshold)

    def _test_3dpeaker_models(self):
        model_dir = Path(d) / "3dspeaker"
        if not model_dir.is_dir():
            print(f"{model_dir} does not exist - skip it")
            return
        for filename in model_dir.glob("*.onnx"):
            print(filename)
            test_en_and_zh_models(filename)

    def test_nemo_models(self):
        model_dir = Path(d) / "nemo"
        if not model_dir.is_dir():
            print(f"{model_dir} does not exist - skip it")
            return
        for filename in model_dir.glob("*.onnx"):
            print(filename)
            test_en_and_zh_models(filename)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: sherpa-onnx/python/tests/test_text2token.py
================================================
# sherpa-onnx/python/tests/test_text2token.py
#
# Copyright (c)  2023  Xiaomi Corporation
#
# To run this single test, use
#
#  ctest --verbose -R  test_text2token_py

import unittest
from pathlib import Path

import sherpa_onnx

d = "/tmp/sherpa-test-data"
# Please refer to
# https://github.com/pkufool/sherpa-test-data
# to download test data for testing


class TestText2Token(unittest.TestCase):
    def test_bpe(self):
        tokens = f"{d}/text2token/tokens_en.txt"
        bpe_model = f"{d}/text2token/bpe_en.model"

        if not Path(tokens).is_file() or not Path(bpe_model).is_file():
            print(
                f"No test data found, skipping test_bpe().\n"
                f"You can download the test data by: \n"
                f"git clone https://github.com/pkufool/sherpa-test-data.git /tmp/sherpa-test-data"
            )
            return

        texts = ["HELLO WORLD", "I LOVE YOU"]
        encoded_texts = sherpa_onnx.text2token(
            texts,
            tokens=tokens,
            tokens_type="bpe",
            bpe_model=bpe_model,
        )
        assert encoded_texts == [
            ["▁HE", "LL", "O", "▁WORLD"],
            ["▁I", "▁LOVE", "▁YOU"],
        ], encoded_texts

        encoded_ids = sherpa_onnx.text2token(
            texts,
            tokens=tokens,
            tokens_type="bpe",
            bpe_model=bpe_model,
            output_ids=True,
        )
        assert encoded_ids == [[22, 58, 24, 425], [19, 370, 47]], encoded_ids

    def test_cjkchar(self):
        tokens = f"{d}/text2token/tokens_cn.txt"

        if not Path(tokens).is_file():
            print(
                f"No test data found, skipping test_cjkchar().\n"
                f"You can download the test data by: \n"
                f"git clone https://github.com/pkufool/sherpa-test-data.git /tmp/sherpa-test-data"
            )
            return

        texts = ["世界人民大团结", "中国 VS 美国"]
        encoded_texts = sherpa_onnx.text2token(
            texts, tokens=tokens, tokens_type="cjkchar"
        )
        assert encoded_texts == [
            ["世", "界", "人", "民", "大", "团", "结"],
            ["中", "国", "V", "S", "美", "国"],
        ], encoded_texts
        encoded_ids = sherpa_onnx.text2token(
            texts,
            tokens=tokens,
            tokens_type="cjkchar",
            output_ids=True,
        )
        assert encoded_ids == [
            [379, 380, 72, 874, 93, 1251, 489],
            [262, 147, 3423, 2476, 21, 147],
        ], encoded_ids

    def test_cjkchar_bpe(self):
        tokens = f"{d}/text2token/tokens_mix.txt"
        bpe_model = f"{d}/text2token/bpe_mix.model"

        if not Path(tokens).is_file() or not Path(bpe_model).is_file():
            print(
                f"No test data found, skipping test_cjkchar_bpe().\n"
                f"You can download the test data by: \n"
                f"git clone https://github.com/pkufool/sherpa-test-data.git /tmp/sherpa-test-data"
            )
            return

        texts = ["世界人民 GOES TOGETHER", "中国 GOES WITH 美国"]
        encoded_texts = sherpa_onnx.text2token(
            texts,
            tokens=tokens,
            tokens_type="cjkchar+bpe",
            bpe_model=bpe_model,
        )
        assert encoded_texts == [
            ["世", "界", "人", "民", "▁GO", "ES", "▁TOGETHER"],
            ["中", "国", "▁GO", "ES", "▁WITH", "美", "国"],
        ], encoded_texts
        encoded_ids = sherpa_onnx.text2token(
            texts,
            tokens=tokens,
            tokens_type="cjkchar+bpe",
            bpe_model=bpe_model,
            output_ids=True,
        )
        assert encoded_ids == [
            [1368, 1392, 557, 680, 275, 178, 475],
            [685, 736, 275, 178, 179, 921, 736],
        ], encoded_ids

    def test_phone_ppinyin(self):
        tokens = f"{d}/text2token/tokens_phone_ppinyin.txt"
        lexicon = f"{d}/text2token/en.phone"

        if not Path(tokens).is_file() or not Path(lexicon).is_file():
            print(
                f"No test data found, skipping test_phone_ppinyin().\n"
                f"You can download the test data by: \n"
                f"git clone https://github.com/pkufool/sherpa-test-data.git /tmp/sherpa-test-data"
            )
            return

        texts = ["世界人民 GOES TOGETHER", "中国 GOES WITH 美国"]
        encoded_texts = sherpa_onnx.text2token(
            texts,
            tokens=tokens,
            tokens_type="phone+ppinyin",
            lexicon=lexicon,
        )
        assert encoded_texts == [
            [
                "sh",
                "ì",
                "j",
                "iè",
                "r",
                "én",
                "m",
                "ín",
                "G",
                "OW1",
                "Z",
                "T",
                "AH0",
                "G",
                "EH1",
                "DH",
                "ER0",
            ],
            [
                "zh",
                "ōng",
                "g",
                "uó",
                "G",
                "OW1",
                "Z",
                "W",
                "IH1",
                "DH",
                "m",
                "ěi",
                "g",
                "uó",
            ],
        ], encoded_texts

        encoded_ids = sherpa_onnx.text2token(
            texts,
            tokens=tokens,
            tokens_type="phone+ppinyin",
            lexicon=lexicon,
            output_ids=True,
        )
        assert encoded_ids == [
            [
                139,
                203,
                127,
                107,
                137,
                200,
                130,
                207,
                35,
                50,
                70,
                59,
                9,
                35,
                26,
                24,
                28,
            ],
            [182, 241, 87, 163, 35, 50, 70, 68, 38, 24, 130, 231, 87, 163],
        ], encoded_ids


if __name__ == "__main__":
    unittest.main()


================================================
FILE: sherpa-onnx/rust/.gitignore
================================================
notes.md
target


================================================
FILE: sherpa-onnx/rust/.rustfmt.toml
================================================
# Put each method in a chain on its own line
chain_width = 0

# Optional: make sure calls break vertically
fn_call_width = 60

# Optional: control general line width
max_width = 100


================================================
FILE: sherpa-onnx/rust/Cargo.toml
================================================
[workspace]
resolver = "2"
members = ["sherpa-onnx","sherpa-onnx-sys",]


================================================
FILE: sherpa-onnx/rust/check.sh
================================================
#!/usr/bin/env bash
set -euo pipefail

echo "=== Building sherpa-onnx ==="
cargo build -p sherpa-onnx

echo "=== Checking code with cargo check ==="
cargo check -p sherpa-onnx

echo "=== Running clippy for lints ==="
cargo clippy -p sherpa-onnx -- -D warnings

echo "=== Running tests ==="
cargo test -p sherpa-onnx

echo "All checks passed for sherpa-onnx ✅"


================================================
FILE: sherpa-onnx/rust/publish.sh
================================================
#!/usr/bin/env bash

pushd sherpa-onnx-sys

cp -v ../../../README.md ./
cp -v ../../../LICENSE ./

popd

pushd sherpa-onnx

cp -v ../../../README.md ./
cp -v ../../../LICENSE ./

popd


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/Cargo.toml
================================================
[package]
name = "sherpa-onnx"
version = "1.12.31"
edition = "2021"
description = "Safe Rust wrapper for sherpa-onnx speech recognition toolkit"
license = "Apache-2.0"
repository = "https://github.com/k2-fsa/sherpa-onnx"
documentation = "https://docs.rs/sherpa-onnx"
readme = "README.md"  # make sure this is inside the crate folder

keywords = ["speech", "speech-to-text", "stt", "onnx", "asr"]
categories = ["api-bindings", "multimedia::audio"]

# Explicitly list files to include in crates.io
include = [
    "src/**",
    "Cargo.toml",
    "README.md",
    "LICENSE*",
]

[dependencies]
sherpa-onnx-sys = { path = "../sherpa-onnx-sys", version = "1.12.31" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/audio_tagging.rs
================================================
//! Offline audio tagging.
//!
//! This API classifies complete audio clips and returns the most likely events.
//! See:
//!
//! - `rust-api-examples/examples/audio_tagging_zipformer.rs`
//! - `rust-api-examples/examples/audio_tagging_ced.rs`

use crate::utils::to_c_ptr;
use sherpa_onnx_sys as sys;
use std::ffi::{CStr, CString};

#[derive(Clone, Debug, Default)]
/// Zipformer audio tagging model path.
pub struct OfflineZipformerAudioTaggingModelConfig {
    pub model: Option<String>,
}

impl OfflineZipformerAudioTaggingModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineZipformerAudioTaggingModelConfig {
        sys::OfflineZipformerAudioTaggingModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Model-level configuration for audio tagging.
///
/// Configure either `zipformer` or `ced` for a concrete model package.
pub struct AudioTaggingModelConfig {
    pub zipformer: OfflineZipformerAudioTaggingModelConfig,
    pub ced: Option<String>,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
}

impl Default for AudioTaggingModelConfig {
    fn default() -> Self {
        Self {
            zipformer: Default::default(),
            ced: None,
            num_threads: 1,
            debug: false,
            provider: Some("cpu".to_string()),
        }
    }
}

impl AudioTaggingModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::AudioTaggingModelConfig {
        sys::AudioTaggingModelConfig {
            zipformer: self
                .zipformer
                .to_sys(cstrings),
            ced: to_c_ptr(&self.ced, cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Top-level configuration for [`AudioTagging`].
pub struct AudioTaggingConfig {
    pub model: AudioTaggingModelConfig,
    pub labels: Option<String>,
    pub top_k: i32,
}

impl Default for AudioTaggingConfig {
    fn default() -> Self {
        Self {
            model: Default::default(),
            labels: None,
            top_k: 5,
        }
    }
}

impl AudioTaggingConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::AudioTaggingConfig {
        sys::AudioTaggingConfig {
            model: self
                .model
                .to_sys(cstrings),
            labels: to_c_ptr(&self.labels, cstrings),
            top_k: self.top_k,
        }
    }
}

#[derive(Clone, Debug)]
/// One predicted audio event.
pub struct AudioEvent {
    pub name: String,
    pub index: i32,
    pub prob: f32,
}

/// Offline audio tagger.
pub struct AudioTagging {
    ptr: *const sys::AudioTagging,
}

unsafe impl Send for AudioTagging {}

impl AudioTagging {
    /// Create a tagger from `config`.
    pub fn create(config: &AudioTaggingConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateAudioTagging(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Create a stream that accepts one complete clip.
    pub fn create_stream(&self) -> AudioTaggingOfflineStream {
        let ptr = unsafe { sys::SherpaOnnxAudioTaggingCreateOfflineStream(self.ptr) };
        AudioTaggingOfflineStream { ptr }
    }

    /// Compute the top `top_k` events for the provided stream.
    pub fn compute(&self, stream: &AudioTaggingOfflineStream, top_k: i32) -> Vec<AudioEvent> {
        unsafe {
            let p = sys::SherpaOnnxAudioTaggingCompute(self.ptr, stream.ptr, top_k);
            if p.is_null() {
                return Vec::new();
            }

            let mut ans = Vec::new();
            let mut cur = p;
            while !(*cur).is_null() {
                let event = &*(*cur);
                let name = if event
                    .name
                    .is_null()
                {
                    String::new()
                } else {
                    CStr::from_ptr(event.name)
                        .to_string_lossy()
                        .into_owned()
                };
                ans.push(AudioEvent {
                    name,
                    index: event.index,
                    prob: event.prob,
                });
                cur = cur.add(1);
            }

            sys::SherpaOnnxAudioTaggingFreeResults(p);
            ans
        }
    }
}

impl Drop for AudioTagging {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroyAudioTagging(self.ptr);
            }
        }
    }
}

/// Input stream for offline audio tagging.
pub struct AudioTaggingOfflineStream {
    ptr: *const sys::OfflineStream,
}

impl AudioTaggingOfflineStream {
    /// Append waveform samples to the clip.
    pub fn accept_waveform(&self, sample_rate: i32, samples: &[f32]) {
        unsafe {
            sys::SherpaOnnxAcceptWaveformOffline(
                self.ptr,
                sample_rate,
                samples.as_ptr(),
                samples.len() as i32,
            )
        }
    }
}

impl Drop for AudioTaggingOfflineStream {
    fn drop(&mut self) {
        unsafe { sys::SherpaOnnxDestroyOfflineStream(self.ptr) }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/display.rs
================================================
//! Small terminal display helper for streaming ASR demos.

use std::time::{Duration, Instant};

/// Stores finalized sentences and the current partial hypothesis for terminal UIs.
#[derive(Debug)]
pub struct DisplayManager {
    sentences: Vec<String>,
    current_text: String,
    last_render: Instant,
}

impl DisplayManager {
    /// Create an empty display manager.
    pub fn new() -> Self {
        Self {
            sentences: Vec::new(),
            current_text: String::new(),
            last_render: Instant::now(),
        }
    }

    /// Replace the current partial text shown in the display.
    pub fn update_text(&mut self, text: &str) {
        self.current_text = text.to_string();
    }

    /// Move the current partial text into the finalized sentence list.
    pub fn finalize_sentence(&mut self) {
        let trimmed = self
            .current_text
            .trim();
        if !trimmed.is_empty() {
            self.sentences
                .push(trimmed.to_string());
        }
        self.current_text
            .clear();
    }

    /// Render the current state to stdout.
    ///
    /// Rendering is throttled slightly to reduce terminal flicker.
    pub fn render(&mut self) {
        // Throttle rendering to reduce flicker (200ms)
        if self
            .last_render
            .elapsed()
            < Duration::from_millis(200)
        {
            return;
        }
        self.last_render = Instant::now();

        // Clear screen (ANSI escape)
        print!("\x1B[2J\x1B[1;1H");
        println!("=== Speech Recognition with Next-gen Kaldi ===");
        println!("-----------------------------------------------");

        for (i, s) in self
            .sentences
            .iter()
            .enumerate()
        {
            println!("{}: {}", i + 1, s);
        }

        if !self
            .current_text
            .is_empty()
        {
            println!("-----------------------------------------------");
            println!("Recognizing: {}", self.current_text);
        }
    }

    /// Return `true` if at least one sentence has been finalized.
    pub fn has_sentences(&self) -> bool {
        !self
            .sentences
            .is_empty()
    }

    /// Borrow the current partial text.
    pub fn current_text(&self) -> &str {
        &self.current_text
    }
}

impl Default for DisplayManager {
    fn default() -> Self {
        Self::new()
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/kws.rs
================================================
//! Streaming keyword spotting.
//!
//! This module detects predefined or per-stream override keywords from an
//! online ASR model. See
//! [`rust-api-examples/examples/keyword_spotter.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/keyword_spotter.rs)
//! for a complete example.
//!
//! # Example
//!
//! ```no_run
//! use sherpa_onnx::{KeywordSpotter, KeywordSpotterConfig, Wave};
//!
//! let wave = Wave::read("./test.wav").expect("read wave");
//! let mut config = KeywordSpotterConfig::default();
//! config.model_config.transducer.encoder = Some("./kws/encoder.onnx".into());
//! config.model_config.transducer.decoder = Some("./kws/decoder.onnx".into());
//! config.model_config.transducer.joiner = Some("./kws/joiner.onnx".into());
//! config.model_config.tokens = Some("./kws/tokens.txt".into());
//! config.keywords_file = Some("./keywords.txt".into());
//!
//! let kws = KeywordSpotter::create(&config).expect("create keyword spotter");
//! let stream = kws.create_stream();
//! stream.accept_waveform(wave.sample_rate(), wave.samples());
//! stream.input_finished();
//!
//! while kws.is_ready(&stream) {
//!     kws.decode(&stream);
//! }
//!
//! if let Some(result) = kws.get_result(&stream) {
//!     println!("{}", result.keyword);
//! }
//! ```

use crate::online_asr::{OnlineModelConfig, OnlineStream};
use crate::utils::to_c_ptr;
use sherpa_onnx_sys as sys;
use std::ffi::{c_char, CStr, CString};
use std::slice;

fn c_ptr_to_string(ptr: *const c_char) -> String {
    if ptr.is_null() {
        String::new()
    } else {
        unsafe {
            CStr::from_ptr(ptr)
                .to_string_lossy()
                .into_owned()
        }
    }
}

#[derive(Clone, Debug)]
/// Configuration for [`KeywordSpotter`].
pub struct KeywordSpotterConfig {
    pub feat_config: sys::FeatureConfig,
    pub model_config: OnlineModelConfig,
    pub max_active_paths: i32,
    pub num_trailing_blanks: i32,
    pub keywords_score: f32,
    pub keywords_threshold: f32,
    pub keywords_file: Option<String>,
    pub keywords_buf: Option<String>,
}

impl Default for KeywordSpotterConfig {
    fn default() -> Self {
        Self {
            feat_config: sys::FeatureConfig {
                sample_rate: 16000,
                feature_dim: 80,
            },
            model_config: Default::default(),
            max_active_paths: 4,
            num_trailing_blanks: 1,
            keywords_score: 1.0,
            keywords_threshold: 0.25,
            keywords_file: None,
            keywords_buf: None,
        }
    }
}

impl KeywordSpotterConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::KeywordSpotterConfig {
        sys::KeywordSpotterConfig {
            feat_config: self.feat_config,
            model_config: self
                .model_config
                .to_sys(cstrings),
            max_active_paths: self.max_active_paths,
            num_trailing_blanks: self.num_trailing_blanks,
            keywords_score: self.keywords_score,
            keywords_threshold: self.keywords_threshold,
            keywords_file: to_c_ptr(&self.keywords_file, cstrings),
            keywords_buf: to_c_ptr(&self.keywords_buf, cstrings),
            keywords_buf_size: self
                .keywords_buf
                .as_ref()
                .map_or(0, |s| s.len() as i32),
        }
    }
}

#[derive(Clone, Debug)]
/// Decoded keyword spotting result for one stream.
pub struct KeywordResult {
    pub keyword: String,
    pub tokens: String,
    pub tokens_arr: Vec<String>,
    pub timestamps: Vec<f32>,
    pub start_time: f32,
    pub json: String,
}

/// Streaming keyword spotter.
pub struct KeywordSpotter {
    ptr: *const sys::KeywordSpotter,
}

unsafe impl Send for KeywordSpotter {}

impl KeywordSpotter {
    /// Create a keyword spotter from [`KeywordSpotterConfig`].
    pub fn create(config: &KeywordSpotterConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateKeywordSpotter(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Create a stream that uses the keywords configured in [`KeywordSpotterConfig`].
    pub fn create_stream(&self) -> OnlineStream {
        let ptr = unsafe { sys::SherpaOnnxCreateKeywordStream(self.ptr) };
        OnlineStream { ptr }
    }

    /// Create a stream that uses `keywords` instead of the configured keyword list.
    pub fn create_stream_with_keywords(&self, keywords: &str) -> OnlineStream {
        let keywords = CString::new(keywords).unwrap();
        let ptr =
            unsafe { sys::SherpaOnnxCreateKeywordStreamWithKeywords(self.ptr, keywords.as_ptr()) };
        OnlineStream { ptr }
    }

    /// Return `true` if `stream` has enough audio for another decode step.
    pub fn is_ready(&self, stream: &OnlineStream) -> bool {
        unsafe { sys::SherpaOnnxIsKeywordStreamReady(self.ptr, stream.ptr) != 0 }
    }

    /// Decode one incremental step for `stream`.
    pub fn decode(&self, stream: &OnlineStream) {
        unsafe { sys::SherpaOnnxDecodeKeywordStream(self.ptr, stream.ptr) }
    }

    /// Decode multiple streams in one batch.
    pub fn decode_multiple_streams(&self, streams: &[&OnlineStream]) {
        let ptrs: Vec<*const sys::OnlineStream> = streams
            .iter()
            .map(|s| s.ptr)
            .collect();
        unsafe {
            sys::SherpaOnnxDecodeMultipleKeywordStreams(self.ptr, ptrs.as_ptr(), ptrs.len() as i32)
        }
    }

    /// Reset the detector state for `stream`.
    pub fn reset(&self, stream: &OnlineStream) {
        unsafe { sys::SherpaOnnxResetKeywordStream(self.ptr, stream.ptr) }
    }

    /// Get the structured keyword spotting result for `stream`.
    pub fn get_result(&self, stream: &OnlineStream) -> Option<KeywordResult> {
        unsafe {
            let p = sys::SherpaOnnxGetKeywordResult(self.ptr, stream.ptr);
            if p.is_null() {
                return None;
            }

            let result = &*p;
            let tokens_arr = if result
                .tokens_arr
                .is_null()
                || result.count <= 0
            {
                Vec::new()
            } else {
                slice::from_raw_parts(result.tokens_arr, result.count as usize)
                    .iter()
                    .map(|item| c_ptr_to_string(*item))
                    .collect()
            };

            let timestamps = if result
                .timestamps
                .is_null()
                || result.count <= 0
            {
                Vec::new()
            } else {
                slice::from_raw_parts(result.timestamps, result.count as usize).to_vec()
            };

            let ans = KeywordResult {
                keyword: c_ptr_to_string(result.keyword),
                tokens: c_ptr_to_string(result.tokens),
                tokens_arr,
                timestamps,
                start_time: result.start_time,
                json: c_ptr_to_string(result.json),
            };

            sys::SherpaOnnxDestroyKeywordResult(p);
            Some(ans)
        }
    }

    /// Get the result for `stream` as a JSON string.
    pub fn get_result_as_json(&self, stream: &OnlineStream) -> Option<String> {
        unsafe {
            let p = sys::SherpaOnnxGetKeywordResultAsJson(self.ptr, stream.ptr);
            if p.is_null() {
                return None;
            }

            let ans = CStr::from_ptr(p)
                .to_string_lossy()
                .into_owned();
            sys::SherpaOnnxFreeKeywordResultJson(p);
            Some(ans)
        }
    }
}

impl Drop for KeywordSpotter {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroyKeywordSpotter(self.ptr);
            }
        }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/lib.rs
================================================
//! Safe Rust bindings for the public sherpa-onnx inference APIs.
//!
//! This crate wraps the sherpa-onnx C API with RAII-owned Rust types and
//! idiomatic configuration structs. The main feature families are:
//!
//! - offline ASR through [`OfflineRecognizer`]
//! - streaming ASR through [`OnlineRecognizer`]
//! - offline text-to-speech through [`OfflineTts`]
//! - voice activity detection through [`VoiceActivityDetector`]
//! - speaker embeddings and diarization
//! - online punctuation
//! - offline and streaming speech denoising
//! - audio tagging
//! - WAV I/O helpers through [`Wave`] and [`write()`]
//!
//! # How the Rust API is organized
//!
//! Most APIs follow the same pattern:
//!
//! 1. Start with a `*Config` value and fill the fields for exactly one model
//!    family.
//! 2. Call `create()` to construct the runtime object.
//! 3. Create a stream if the API is stream-based.
//! 4. Feed audio or text, then fetch results with the provided accessor methods.
//!
//! All runtime wrappers automatically free their underlying C resources on drop.
//!
//! # Examples
//!
//! The repository contains end-to-end Rust examples under
//! [`rust-api-examples/examples/`](https://github.com/k2-fsa/sherpa-onnx/tree/master/rust-api-examples/examples).
//! Good entry points are:
//!
//! - [`sense_voice.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/sense_voice.rs)
//! - [`nemo_parakeet.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/nemo_parakeet.rs)
//! - [`streaming_zipformer.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/streaming_zipformer.rs)
//! - [`pocket_tts.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/pocket_tts.rs)
//! - [`silero_vad_remove_silence.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/silero_vad_remove_silence.rs)
//! - [`online_punctuation.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/online_punctuation.rs)
//! - [`offline_punctuation.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/offline_punctuation.rs)
//! - [`keyword_spotter.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/keyword_spotter.rs)
//! - [`spoken_language_identification.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/spoken_language_identification.rs)
//! - [`offline_speaker_diarization.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/offline_speaker_diarization.rs)
//! - [`speaker_embedding_manager.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/speaker_embedding_manager.rs)
//!
//! # Offline recognition example
//!
//! ```no_run
//! use sherpa_onnx::{
//!     OfflineRecognizer, OfflineRecognizerConfig, OfflineSenseVoiceModelConfig, Wave,
//! };
//!
//! let wave = Wave::read("./test.wav").expect("read wave");
//!
//! let mut config = OfflineRecognizerConfig::default();
//! config.model_config.sense_voice = OfflineSenseVoiceModelConfig {
//!     model: Some(
//!         "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/model.int8.onnx".into(),
//!     ),
//!     language: Some("auto".into()),
//!     use_itn: true,
//! };
//! config.model_config.tokens = Some(
//!     "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/tokens.txt".into(),
//! );
//!
//! let recognizer = OfflineRecognizer::create(&config).expect("create recognizer");
//! let stream = recognizer.create_stream();
//! stream.accept_waveform(wave.sample_rate(), wave.samples());
//! recognizer.decode(&stream);
//!
//! let result = stream.get_result().expect("result");
//! println!("{}", result.text);
//! ```
//!
//! # Streaming recognition example
//!
//! ```no_run
//! use sherpa_onnx::{OnlineRecognizer, OnlineRecognizerConfig, Wave};
//!
//! let wave = Wave::read("./test.wav").expect("read wave");
//!
//! let mut config = OnlineRecognizerConfig::default();
//! config.model_config.transducer.encoder = Some(
//!     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx".into(),
//! );
//! config.model_config.transducer.decoder = Some(
//!     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx".into(),
//! );
//! config.model_config.transducer.joiner = Some(
//!     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx".into(),
//! );
//! config.model_config.tokens = Some(
//!     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt".into(),
//! );
//! config.enable_endpoint = true;
//! config.decoding_method = Some("greedy_search".into());
//!
//! let recognizer = OnlineRecognizer::create(&config).expect("create recognizer");
//! let stream = recognizer.create_stream();
//! stream.accept_waveform(wave.sample_rate(), wave.samples());
//! stream.input_finished();
//! while recognizer.is_ready(&stream) {
//!     recognizer.decode(&stream);
//! }
//! ```
//!
//! # TTS example
//!
//! ```no_run
//! use sherpa_onnx::{OfflineTts, OfflineTtsConfig, OfflineTtsModelConfig, OfflineTtsPocketModelConfig};
//!
//! let config = OfflineTtsConfig {
//!     model: OfflineTtsModelConfig {
//!         pocket: OfflineTtsPocketModelConfig {
//!             lm_flow: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx".into()),
//!             lm_main: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx".into()),
//!             encoder: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx".into()),
//!             decoder: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx".into()),
//!             text_conditioner: Some(
//!                 "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx".into(),
//!             ),
//!             vocab_json: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json".into()),
//!             token_scores_json: Some(
//!                 "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json".into(),
//!             ),
//!             ..Default::default()
//!         },
//!         ..Default::default()
//!     },
//!     ..Default::default()
//! };
//!
//! let tts = OfflineTts::create(&config).expect("create tts");
//! println!("{}", tts.sample_rate());
//! ```
mod audio_tagging;
mod display;
mod kws;
mod offline_asr;
mod offline_punctuation;
mod offline_speaker_diarization;
mod offline_speech_denoiser;
mod online_asr;
mod online_punctuation;
mod online_speech_denoiser;
mod speaker_embedding;
mod spoken_language_identification;
mod tts;
mod utils;
mod vad;
mod wave;

pub use audio_tagging::*;
pub use display::*;
pub use kws::*;
pub use offline_asr::*;
pub use offline_punctuation::*;
pub use offline_speaker_diarization::*;
pub use offline_speech_denoiser::*;
pub use online_asr::*;
pub use online_punctuation::*;
pub use online_speech_denoiser::*;
pub use speaker_embedding::*;
pub use spoken_language_identification::*;
pub use tts::*;
pub use utils::*;
pub use vad::*;
pub use wave::*;


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/offline_asr.rs
================================================
//! Offline speech recognition.
//!
//! The Rust wrapper exposes the same model families as the native C API. In
//! typical use, configure exactly one model family inside [`OfflineModelConfig`]
//! and then create an [`OfflineRecognizer`].
//!
//! Repository examples:
//!
//! - `rust-api-examples/examples/sense_voice.rs`
//! - `rust-api-examples/examples/nemo_parakeet.rs`
//! - `rust-api-examples/examples/moonshine_v2.rs`
//! - `rust-api-examples/examples/fire_red_asr_ctc.rs`
//!
//! ```no_run
//! use sherpa_onnx::{
//!     OfflineRecognizer, OfflineRecognizerConfig, OfflineSenseVoiceModelConfig, Wave,
//! };
//!
//! let wave = Wave::read("./test.wav").expect("read wave");
//! let mut config = OfflineRecognizerConfig::default();
//! config.model_config.sense_voice = OfflineSenseVoiceModelConfig {
//!     model: Some(
//!         "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/model.int8.onnx".into(),
//!     ),
//!     language: Some("auto".into()),
//!     use_itn: true,
//! };
//! config.model_config.tokens = Some(
//!     "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17-int8/tokens.txt".into(),
//! );
//!
//! let recognizer = OfflineRecognizer::create(&config).expect("create recognizer");
//! let stream = recognizer.create_stream();
//! stream.accept_waveform(wave.sample_rate(), wave.samples());
//! recognizer.decode(&stream);
//! println!("{}", stream.get_result().expect("result").text);
//! ```

use crate::utils::to_c_ptr;
use serde::Deserialize;
use sherpa_onnx_sys as sys;
use std::ffi::{CStr, CString};

#[derive(Clone, Debug, Default)]
/// Offline transducer model configuration.
///
/// This is used for transducer-style models such as the Parakeet example in
/// `rust-api-examples/examples/nemo_parakeet.rs`.
pub struct OfflineTransducerModelConfig {
    pub encoder: Option<String>,
    pub decoder: Option<String>,
    pub joiner: Option<String>,
}

impl OfflineTransducerModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTransducerModelConfig {
        sys::OfflineTransducerModelConfig {
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
            joiner: to_c_ptr(&self.joiner, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline Paraformer model configuration.
pub struct OfflineParaformerModelConfig {
    pub model: Option<String>,
}

impl OfflineParaformerModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineParaformerModelConfig {
        sys::OfflineParaformerModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline NeMo CTC model configuration.
pub struct OfflineNemoEncDecCtcModelConfig {
    pub model: Option<String>,
}

impl OfflineNemoEncDecCtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineNemoEncDecCtcModelConfig {
        sys::OfflineNemoEncDecCtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline Whisper model configuration.
pub struct OfflineWhisperModelConfig {
    pub encoder: Option<String>,
    pub decoder: Option<String>,
    pub language: Option<String>,
    pub task: Option<String>,
    pub tail_paddings: i32,
    pub enable_token_timestamps: bool,
    pub enable_segment_timestamps: bool,
}

impl OfflineWhisperModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineWhisperModelConfig {
        sys::OfflineWhisperModelConfig {
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
            language: to_c_ptr(&self.language, cstrings),
            task: to_c_ptr(&self.task, cstrings),
            tail_paddings: self.tail_paddings,
            enable_token_timestamps: self.enable_token_timestamps as i32,
            enable_segment_timestamps: self.enable_segment_timestamps as i32,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline Canary model configuration.
pub struct OfflineCanaryModelConfig {
    pub encoder: Option<String>,
    pub decoder: Option<String>,
    pub src_lang: Option<String>,
    pub tgt_lang: Option<String>,
    pub use_pnc: bool,
}

impl OfflineCanaryModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineCanaryModelConfig {
        sys::OfflineCanaryModelConfig {
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
            src_lang: to_c_ptr(&self.src_lang, cstrings),
            tgt_lang: to_c_ptr(&self.tgt_lang, cstrings),
            use_pnc: self.use_pnc as i32,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline FireRed ASR transducer configuration.
pub struct OfflineFireRedAsrModelConfig {
    pub encoder: Option<String>,
    pub decoder: Option<String>,
}

impl OfflineFireRedAsrModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineFireRedAsrModelConfig {
        sys::OfflineFireRedAsrModelConfig {
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
        }
    }
}

/// For Moonshine v1, you need 4 models:
///  - preprocessor, encoder, uncached_decoder, cached_decoder
///
/// For Moonshine v2, you need 2 models:
///  - encoder, merged_decoder
#[derive(Clone, Debug, Default)]
/// Offline Moonshine model configuration.
pub struct OfflineMoonshineModelConfig {
    pub preprocessor: Option<String>,
    pub encoder: Option<String>,
    pub uncached_decoder: Option<String>,
    pub cached_decoder: Option<String>,
    pub merged_decoder: Option<String>,
}

impl OfflineMoonshineModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineMoonshineModelConfig {
        sys::OfflineMoonshineModelConfig {
            preprocessor: to_c_ptr(&self.preprocessor, cstrings),
            encoder: to_c_ptr(&self.encoder, cstrings),
            uncached_decoder: to_c_ptr(&self.uncached_decoder, cstrings),
            cached_decoder: to_c_ptr(&self.cached_decoder, cstrings),
            merged_decoder: to_c_ptr(&self.merged_decoder, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline TDNN model configuration.
pub struct OfflineTdnnModelConfig {
    pub model: Option<String>,
}

impl OfflineTdnnModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTdnnModelConfig {
        sys::OfflineTdnnModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Optional external language model configuration for offline ASR.
pub struct OfflineLMConfig {
    pub model: Option<String>,
    pub scale: f32,
}
impl Default for OfflineLMConfig {
    fn default() -> Self {
        Self {
            model: None,
            scale: 1.0,
        }
    }
}
impl OfflineLMConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineLMConfig {
        sys::OfflineLMConfig {
            model: to_c_ptr(&self.model, cstrings),
            scale: self.scale,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline SenseVoice model configuration.
pub struct OfflineSenseVoiceModelConfig {
    pub model: Option<String>,
    pub language: Option<String>,
    pub use_itn: bool,
}

impl OfflineSenseVoiceModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineSenseVoiceModelConfig {
        sys::OfflineSenseVoiceModelConfig {
            model: to_c_ptr(&self.model, cstrings),
            language: to_c_ptr(&self.language, cstrings),
            use_itn: self.use_itn as i32,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline Dolphin model configuration.
pub struct OfflineDolphinModelConfig {
    pub model: Option<String>,
}

impl OfflineDolphinModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineDolphinModelConfig {
        sys::OfflineDolphinModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline Zipformer CTC model configuration.
pub struct OfflineZipformerCtcModelConfig {
    pub model: Option<String>,
}

impl OfflineZipformerCtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineZipformerCtcModelConfig {
        sys::OfflineZipformerCtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline WeNet CTC model configuration.
pub struct OfflineWenetCtcModelConfig {
    pub model: Option<String>,
}

impl OfflineWenetCtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineWenetCtcModelConfig {
        sys::OfflineWenetCtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline omnilingual CTC model configuration.
pub struct OfflineOmnilingualAsrCtcModelConfig {
    pub model: Option<String>,
}

impl OfflineOmnilingualAsrCtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineOmnilingualAsrCtcModelConfig {
        sys::OfflineOmnilingualAsrCtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline MedASR CTC model configuration.
pub struct OfflineMedAsrCtcModelConfig {
    pub model: Option<String>,
}

impl OfflineMedAsrCtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineMedAsrCtcModelConfig {
        sys::OfflineMedAsrCtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Offline FireRed ASR CTC model configuration.
pub struct OfflineFireRedAsrCtcModelConfig {
    pub model: Option<String>,
}

impl OfflineFireRedAsrCtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineFireRedAsrCtcModelConfig {
        sys::OfflineFireRedAsrCtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Offline FunASR Nano model configuration.
pub struct OfflineFunASRNanoModelConfig {
    pub encoder_adaptor: Option<String>,
    pub llm: Option<String>,
    pub embedding: Option<String>,
    pub tokenizer: Option<String>,
    pub system_prompt: Option<String>,
    pub user_prompt: Option<String>,
    pub max_new_tokens: i32,
    pub temperature: f32,
    pub top_p: f32,
    pub seed: i32,
    pub language: Option<String>,
    pub itn: i32,
    pub hotwords: Option<String>,
}
impl Default for OfflineFunASRNanoModelConfig {
    fn default() -> Self {
        Self {
            encoder_adaptor: None,
            llm: None,
            embedding: None,
            tokenizer: None,
            system_prompt: None,
            user_prompt: None,
            max_new_tokens: 0,
            temperature: 1.0,
            top_p: 1.0,
            seed: 0,
            language: None,
            itn: 0,
            hotwords: None,
        }
    }
}
impl OfflineFunASRNanoModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineFunASRNanoModelConfig {
        sys::OfflineFunASRNanoModelConfig {
            encoder_adaptor: to_c_ptr(&self.encoder_adaptor, cstrings),
            llm: to_c_ptr(&self.llm, cstrings),
            embedding: to_c_ptr(&self.embedding, cstrings),
            tokenizer: to_c_ptr(&self.tokenizer, cstrings),
            system_prompt: to_c_ptr(&self.system_prompt, cstrings),
            user_prompt: to_c_ptr(&self.user_prompt, cstrings),
            max_new_tokens: self.max_new_tokens,
            temperature: self.temperature,
            top_p: self.top_p,
            seed: self.seed,
            language: to_c_ptr(&self.language, cstrings),
            itn: self.itn,
            hotwords: to_c_ptr(&self.hotwords, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Aggregate model configuration for offline recognition.
///
/// Configure exactly one model family for typical use. Shared options such as
/// `tokens`, `provider`, and `num_threads` live here as well.
pub struct OfflineModelConfig {
    pub transducer: OfflineTransducerModelConfig,
    pub paraformer: OfflineParaformerModelConfig,
    pub nemo_ctc: OfflineNemoEncDecCtcModelConfig,
    pub whisper: OfflineWhisperModelConfig,
    pub tdnn: OfflineTdnnModelConfig,
    pub sense_voice: OfflineSenseVoiceModelConfig,
    pub moonshine: OfflineMoonshineModelConfig,
    pub fire_red_asr: OfflineFireRedAsrModelConfig,
    pub dolphin: OfflineDolphinModelConfig,
    pub zipformer_ctc: OfflineZipformerCtcModelConfig,
    pub canary: OfflineCanaryModelConfig,
    pub wenet_ctc: OfflineWenetCtcModelConfig,
    pub omnilingual: OfflineOmnilingualAsrCtcModelConfig,
    pub medasr: OfflineMedAsrCtcModelConfig,
    pub funasr_nano: OfflineFunASRNanoModelConfig,
    pub fire_red_asr_ctc: OfflineFireRedAsrCtcModelConfig,

    pub tokens: Option<String>,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
    pub model_type: Option<String>,
    pub modeling_unit: Option<String>,
    pub bpe_vocab: Option<String>,
    pub telespeech_ctc: Option<String>,
}

impl OfflineModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineModelConfig {
        sys::OfflineModelConfig {
            transducer: self
                .transducer
                .to_sys(cstrings),
            paraformer: self
                .paraformer
                .to_sys(cstrings),
            nemo_ctc: self
                .nemo_ctc
                .to_sys(cstrings),
            whisper: self
                .whisper
                .to_sys(cstrings),
            tdnn: self
                .tdnn
                .to_sys(cstrings),
            sense_voice: self
                .sense_voice
                .to_sys(cstrings),
            canary: self
                .canary
                .to_sys(cstrings),
            fire_red_asr: self
                .fire_red_asr
                .to_sys(cstrings),
            dolphin: self
                .dolphin
                .to_sys(cstrings),
            moonshine: self
                .moonshine
                .to_sys(cstrings),
            zipformer_ctc: self
                .zipformer_ctc
                .to_sys(cstrings),
            wenet_ctc: self
                .wenet_ctc
                .to_sys(cstrings),
            omnilingual: self
                .omnilingual
                .to_sys(cstrings),
            medasr: self
                .medasr
                .to_sys(cstrings),
            funasr_nano: self
                .funasr_nano
                .to_sys(cstrings),
            fire_red_asr_ctc: self
                .fire_red_asr_ctc
                .to_sys(cstrings),

            tokens: to_c_ptr(&self.tokens, cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
            model_type: to_c_ptr(&self.model_type, cstrings),
            modeling_unit: to_c_ptr(&self.modeling_unit, cstrings),
            bpe_vocab: to_c_ptr(&self.bpe_vocab, cstrings),
            telespeech_ctc: to_c_ptr(&self.telespeech_ctc, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Top-level configuration for [`OfflineRecognizer`].
///
/// Use [`Default`] as a starting point, then fill the fields for the model you
/// want to run.
pub struct OfflineRecognizerConfig {
    pub feat_config: sys::FeatureConfig,
    pub model_config: OfflineModelConfig,
    pub lm_config: OfflineLMConfig,
    pub decoding_method: Option<String>,
    pub max_active_paths: i32,
    pub hotwords_file: Option<String>,
    pub hotwords_score: f32,
    pub rule_fsts: Option<String>,
    pub rule_fars: Option<String>,
    pub blank_penalty: f32,
    pub hr: super::online_asr::HomophoneReplacerConfig,
}

impl OfflineRecognizerConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineRecognizerConfig {
        sys::OfflineRecognizerConfig {
            feat_config: self.feat_config,
            model_config: self
                .model_config
                .to_sys(cstrings),
            lm_config: self
                .lm_config
                .to_sys(cstrings),
            decoding_method: to_c_ptr(&self.decoding_method, cstrings),
            max_active_paths: self.max_active_paths,
            hotwords_file: to_c_ptr(&self.hotwords_file, cstrings),
            hotwords_score: self.hotwords_score,
            rule_fsts: to_c_ptr(&self.rule_fsts, cstrings),
            rule_fars: to_c_ptr(&self.rule_fars, cstrings),
            blank_penalty: self.blank_penalty,
            hr: self
                .hr
                .to_sys(cstrings),
        }
    }
}

impl Default for OfflineRecognizerConfig {
    fn default() -> Self {
        Self {
            feat_config: sys::FeatureConfig {
                sample_rate: 16000,
                feature_dim: 80,
            },

            model_config: OfflineModelConfig::default(),
            lm_config: OfflineLMConfig::default(),
            decoding_method: None,
            max_active_paths: 4, // a reasonable default
            hotwords_file: None,
            hotwords_score: 0.0,
            rule_fsts: None,
            rule_fars: None,
            blank_penalty: 0.0,
            hr: super::online_asr::HomophoneReplacerConfig::default(),
        }
    }
}

#[derive(Clone, Debug, Deserialize)]
/// Recognition result returned by [`OfflineStream::get_result`].
pub struct OfflineRecognizerResult {
    pub text: String,
    pub tokens: Vec<String>,
    pub timestamps: Option<Vec<f32>>,
    pub durations: Option<Vec<f32>>,
}

/// Offline speech recognizer.
///
/// ```no_run
/// use sherpa_onnx::{
///     OfflineRecognizer, OfflineRecognizerConfig, OfflineTransducerModelConfig, Wave,
/// };
///
/// let wave = Wave::read("./test.wav").expect("read wave");
/// let mut config = OfflineRecognizerConfig::default();
/// config.model_config.transducer = OfflineTransducerModelConfig {
///     encoder: Some("./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/encoder.int8.onnx".into()),
///     decoder: Some("./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/decoder.int8.onnx".into()),
///     joiner: Some("./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/joiner.int8.onnx".into()),
/// };
/// config.model_config.tokens =
///     Some("./sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8/tokens.txt".into());
/// config.model_config.model_type = Some("nemo_transducer".into());
///
/// let recognizer = OfflineRecognizer::create(&config).expect("create recognizer");
/// let stream = recognizer.create_stream();
/// stream.accept_waveform(wave.sample_rate(), wave.samples());
/// recognizer.decode(&stream);
/// let result = stream.get_result().expect("result");
/// println!("{}", result.text);
/// ```
pub struct OfflineRecognizer {
    ptr: *const sys::OfflineRecognizer,
}

impl OfflineRecognizer {
    /// Create a recognizer from `config`.
    pub fn create(config: &OfflineRecognizerConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateOfflineRecognizer(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Create an empty offline stream.
    pub fn create_stream(&self) -> OfflineStream {
        let ptr = unsafe { sys::SherpaOnnxCreateOfflineStream(self.ptr) };
        OfflineStream { ptr }
    }

    /// Create a stream with per-stream hotwords.
    pub fn create_stream_with_hotwords(&self, hotwords: &str) -> OfflineStream {
        let c = CString::new(hotwords).unwrap();
        let ptr = unsafe { sys::SherpaOnnxCreateOfflineStreamWithHotwords(self.ptr, c.as_ptr()) };
        OfflineStream { ptr }
    }

    /// Decode one stream.
    pub fn decode(&self, stream: &OfflineStream) {
        unsafe { sys::SherpaOnnxDecodeOfflineStream(self.ptr, stream.ptr) }
    }

    /// Decode multiple streams in one batch call.
    pub fn decode_multiple_streams(&self, streams: &[&OfflineStream]) {
        let ptrs: Vec<*const sys::OfflineStream> = streams
            .iter()
            .map(|s| s.ptr)
            .collect();
        unsafe {
            sys::SherpaOnnxDecodeMultipleOfflineStreams(self.ptr, ptrs.as_ptr(), ptrs.len() as i32)
        }
    }
}

impl Drop for OfflineRecognizer {
    fn drop(&mut self) {
        unsafe {
            sys::SherpaOnnxDestroyOfflineRecognizer(self.ptr);
        }
    }
}

/// Input stream used by [`OfflineRecognizer`].
pub struct OfflineStream {
    pub(crate) ptr: *const sys::OfflineStream,
}

impl OfflineStream {
    /// Append samples to the stream.
    pub fn accept_waveform(&self, sample_rate: i32, samples: &[f32]) {
        unsafe {
            sys::SherpaOnnxAcceptWaveformOffline(
                self.ptr,
                sample_rate,
                samples.as_ptr(),
                samples.len() as i32,
            )
        }
    }

    /// Fetch the current recognition result.
    pub fn get_result(&self) -> Option<OfflineRecognizerResult> {
        unsafe {
            let cstr = sys::SherpaOnnxGetOfflineStreamResultAsJson(self.ptr);
            if cstr.is_null() {
                return None;
            }
            let s = CStr::from_ptr(cstr)
                .to_string_lossy()
                .into_owned();
            sys::SherpaOnnxDestroyOfflineStreamResultJson(cstr);
            serde_json::from_str(&s).ok()
        }
    }

    pub fn set_option(&self, key: &str, value: &str) {
        let key = CString::new(key).unwrap();
        let value = CString::new(value).unwrap();
        unsafe { sys::SherpaOnnxOfflineStreamSetOption(self.ptr, key.as_ptr(), value.as_ptr()) }
    }

    pub fn get_option(&self, key: &str) -> String {
        let key = CString::new(key).unwrap();
        unsafe {
            let p = sys::SherpaOnnxOfflineStreamGetOption(self.ptr, key.as_ptr());
            if p.is_null() {
                String::new()
            } else {
                CStr::from_ptr(p)
                    .to_string_lossy()
                    .into_owned()
            }
        }
    }

    pub fn has_option(&self, key: &str) -> bool {
        let key = CString::new(key).unwrap();
        unsafe { sys::SherpaOnnxOfflineStreamHasOption(self.ptr, key.as_ptr()) != 0 }
    }
}

impl Drop for OfflineStream {
    fn drop(&mut self) {
        unsafe { sys::SherpaOnnxDestroyOfflineStream(self.ptr) }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/offline_punctuation.rs
================================================
//! Offline punctuation restoration.
//!
//! Use this module when you already have a complete text string and want a
//! one-shot punctuation pass. See
//! [`rust-api-examples/examples/offline_punctuation.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/offline_punctuation.rs)
//! for a complete example.
//!
//! # Example
//!
//! ```no_run
//! use sherpa_onnx::{OfflinePunctuation, OfflinePunctuationConfig};
//!
//! let mut config = OfflinePunctuationConfig::default();
//! config.model.ct_transformer = Some("./sherpa-onnx-offline-punctuation/model.onnx".into());
//!
//! let punct = OfflinePunctuation::create(&config).expect("create punctuator");
//! let text = punct
//!     .add_punctuation("today is a good day how are you")
//!     .expect("punctuate");
//! println!("{text}");
//! ```

use crate::utils::to_c_ptr;
use sherpa_onnx_sys as sys;
use std::ffi::{CStr, CString};

#[derive(Clone, Debug)]
/// Model configuration for offline punctuation restoration.
pub struct OfflinePunctuationModelConfig {
    pub ct_transformer: Option<String>,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
}

impl Default for OfflinePunctuationModelConfig {
    fn default() -> Self {
        Self {
            ct_transformer: None,
            num_threads: 1,
            debug: false,
            provider: Some("cpu".to_string()),
        }
    }
}

impl OfflinePunctuationModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflinePunctuationModelConfig {
        sys::OfflinePunctuationModelConfig {
            ct_transformer: to_c_ptr(&self.ct_transformer, cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Top-level configuration for [`OfflinePunctuation`].
pub struct OfflinePunctuationConfig {
    pub model: OfflinePunctuationModelConfig,
}

impl OfflinePunctuationConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflinePunctuationConfig {
        sys::OfflinePunctuationConfig {
            model: self
                .model
                .to_sys(cstrings),
        }
    }
}

/// Offline punctuation restorer.
pub struct OfflinePunctuation {
    ptr: *const sys::OfflinePunctuation,
}

unsafe impl Send for OfflinePunctuation {}

impl OfflinePunctuation {
    /// Create an offline punctuator from [`OfflinePunctuationConfig`].
    pub fn create(config: &OfflinePunctuationConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateOfflinePunctuation(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Add punctuation to `text`.
    pub fn add_punctuation(&self, text: &str) -> Option<String> {
        let text = CString::new(text).ok()?;

        unsafe {
            let p = sys::SherpaOfflinePunctuationAddPunct(self.ptr, text.as_ptr());
            if p.is_null() {
                return None;
            }

            let ans = CStr::from_ptr(p)
                .to_string_lossy()
                .into_owned();
            sys::SherpaOfflinePunctuationFreeText(p);
            Some(ans)
        }
    }
}

impl Drop for OfflinePunctuation {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroyOfflinePunctuation(self.ptr);
            }
        }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/offline_speaker_diarization.rs
================================================
//! Offline speaker diarization.
//!
//! This combines segmentation, speaker embedding extraction, and clustering.
//! See `rust-api-examples/examples/offline_speaker_diarization.rs`.

use crate::{speaker_embedding::SpeakerEmbeddingExtractorConfig, utils::to_c_ptr};
use sherpa_onnx_sys as sys;
use std::ffi::CString;
use std::slice;

#[derive(Clone, Debug, Default)]
/// Pyannote segmentation model path.
pub struct OfflineSpeakerSegmentationPyannoteModelConfig {
    pub model: Option<String>,
}

impl OfflineSpeakerSegmentationPyannoteModelConfig {
    fn to_sys(
        &self,
        cstrings: &mut Vec<CString>,
    ) -> sys::OfflineSpeakerSegmentationPyannoteModelConfig {
        sys::OfflineSpeakerSegmentationPyannoteModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Segmentation model configuration for diarization.
pub struct OfflineSpeakerSegmentationModelConfig {
    pub pyannote: OfflineSpeakerSegmentationPyannoteModelConfig,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
}

impl Default for OfflineSpeakerSegmentationModelConfig {
    fn default() -> Self {
        Self {
            pyannote: Default::default(),
            num_threads: 1,
            debug: false,
            provider: Some("cpu".to_string()),
        }
    }
}

impl OfflineSpeakerSegmentationModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineSpeakerSegmentationModelConfig {
        sys::OfflineSpeakerSegmentationModelConfig {
            pyannote: self
                .pyannote
                .to_sys(cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Fast clustering options used after segmentation and embedding extraction.
pub struct FastClusteringConfig {
    pub num_clusters: i32,
    pub threshold: f32,
}

impl Default for FastClusteringConfig {
    fn default() -> Self {
        Self {
            num_clusters: -1,
            threshold: 0.5,
        }
    }
}

impl FastClusteringConfig {
    fn to_sys(&self) -> sys::FastClusteringConfig {
        sys::FastClusteringConfig {
            num_clusters: self.num_clusters,
            threshold: self.threshold,
        }
    }
}

#[derive(Clone, Debug)]
/// Top-level configuration for [`OfflineSpeakerDiarization`].
pub struct OfflineSpeakerDiarizationConfig {
    pub segmentation: OfflineSpeakerSegmentationModelConfig,
    pub embedding: SpeakerEmbeddingExtractorConfig,
    pub clustering: FastClusteringConfig,
    pub min_duration_on: f32,
    pub min_duration_off: f32,
}

impl Default for OfflineSpeakerDiarizationConfig {
    fn default() -> Self {
        Self {
            segmentation: Default::default(),
            embedding: Default::default(),
            clustering: Default::default(),
            min_duration_on: 0.3,
            min_duration_off: 0.5,
        }
    }
}

impl OfflineSpeakerDiarizationConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineSpeakerDiarizationConfig {
        sys::OfflineSpeakerDiarizationConfig {
            segmentation: self
                .segmentation
                .to_sys(cstrings),
            embedding: self
                .embedding
                .to_sys(cstrings),
            clustering: self
                .clustering
                .to_sys(),
            min_duration_on: self.min_duration_on,
            min_duration_off: self.min_duration_off,
        }
    }
}

#[derive(Clone, Debug)]
/// One diarization segment labeled with a speaker index.
pub struct OfflineSpeakerDiarizationSegment {
    pub start: f32,
    pub end: f32,
    pub speaker: i32,
}

/// Offline speaker diarizer.
pub struct OfflineSpeakerDiarization {
    ptr: *const sys::OfflineSpeakerDiarization,
}

unsafe impl Send for OfflineSpeakerDiarization {}

impl OfflineSpeakerDiarization {
    /// Create a diarizer from `config`.
    pub fn create(config: &OfflineSpeakerDiarizationConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateOfflineSpeakerDiarization(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Return the sample rate expected by the segmentation model.
    pub fn sample_rate(&self) -> i32 {
        unsafe { sys::SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(self.ptr) }
    }

    /// Replace the current configuration.
    pub fn set_config(&self, config: &OfflineSpeakerDiarizationConfig) {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        unsafe { sys::SherpaOnnxOfflineSpeakerDiarizationSetConfig(self.ptr, &sys_config) }
    }

    /// Process a complete waveform and return a diarization result.
    pub fn process(&self, samples: &[f32]) -> Option<OfflineSpeakerDiarizationResult> {
        let ptr = unsafe {
            sys::SherpaOnnxOfflineSpeakerDiarizationProcess(
                self.ptr,
                samples.as_ptr(),
                samples.len() as i32,
            )
        };
        if ptr.is_null() {
            None
        } else {
            Some(OfflineSpeakerDiarizationResult { ptr })
        }
    }
}

impl Drop for OfflineSpeakerDiarization {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroyOfflineSpeakerDiarization(self.ptr);
            }
        }
    }
}

/// Result object returned by [`OfflineSpeakerDiarization::process`].
pub struct OfflineSpeakerDiarizationResult {
    ptr: *const sys::OfflineSpeakerDiarizationResult,
}

impl OfflineSpeakerDiarizationResult {
    /// Return the number of speakers estimated for the recording.
    pub fn num_speakers(&self) -> i32 {
        unsafe { sys::SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(self.ptr) }
    }

    /// Return the number of diarization segments.
    pub fn num_segments(&self) -> i32 {
        unsafe { sys::SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(self.ptr) }
    }

    /// Return all segments sorted by start time.
    pub fn sort_by_start_time(&self) -> Vec<OfflineSpeakerDiarizationSegment> {
        let n = self.num_segments();
        if n <= 0 {
            return Vec::new();
        }

        unsafe {
            let p = sys::SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(self.ptr);
            if p.is_null() {
                return Vec::new();
            }

            let segments = slice::from_raw_parts(p, n as usize)
                .iter()
                .map(|s| OfflineSpeakerDiarizationSegment {
                    start: s.start,
                    end: s.end,
                    speaker: s.speaker,
                })
                .collect::<Vec<_>>();
            sys::SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p);
            segments
        }
    }
}

impl Drop for OfflineSpeakerDiarizationResult {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxOfflineSpeakerDiarizationDestroyResult(self.ptr);
            }
        }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/offline_speech_denoiser.rs
================================================
//! Offline speech denoising.
//!
//! Supported model families mirror the native API and currently include GTCRN
//! and DPDFNet. See the repository examples:
//!
//! - `rust-api-examples/examples/offline_speech_enhancement_gtcrn.rs`
//! - `rust-api-examples/examples/offline_speech_enhancement_dpdfnet.rs`

use crate::utils::to_c_ptr;
use sherpa_onnx_sys as sys;
use std::ffi::CString;
use std::ptr;
use std::slice;

#[derive(Clone, Debug, Default)]
/// GTCRN model path for offline denoising.
pub struct OfflineSpeechDenoiserGtcrnModelConfig {
    pub model: Option<String>,
}

impl OfflineSpeechDenoiserGtcrnModelConfig {
    pub(crate) fn to_sys(
        &self,
        cstrings: &mut Vec<CString>,
    ) -> sys::OfflineSpeechDenoiserGtcrnModelConfig {
        sys::OfflineSpeechDenoiserGtcrnModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// DPDFNet model path for offline denoising.
pub struct OfflineSpeechDenoiserDpdfNetModelConfig {
    pub model: Option<String>,
}

impl OfflineSpeechDenoiserDpdfNetModelConfig {
    pub(crate) fn to_sys(
        &self,
        cstrings: &mut Vec<CString>,
    ) -> sys::OfflineSpeechDenoiserDpdfNetModelConfig {
        sys::OfflineSpeechDenoiserDpdfNetModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Aggregate model configuration for [`OfflineSpeechDenoiser`].
///
/// Configure exactly one model family in normal use.
pub struct OfflineSpeechDenoiserModelConfig {
    pub gtcrn: OfflineSpeechDenoiserGtcrnModelConfig,
    pub dpdfnet: OfflineSpeechDenoiserDpdfNetModelConfig,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
}

impl Default for OfflineSpeechDenoiserModelConfig {
    fn default() -> Self {
        Self {
            gtcrn: Default::default(),
            dpdfnet: Default::default(),
            num_threads: 1,
            debug: false,
            provider: Some("cpu".to_string()),
        }
    }
}

impl OfflineSpeechDenoiserModelConfig {
    pub(crate) fn to_sys(
        &self,
        cstrings: &mut Vec<CString>,
    ) -> sys::OfflineSpeechDenoiserModelConfig {
        sys::OfflineSpeechDenoiserModelConfig {
            gtcrn: self
                .gtcrn
                .to_sys(cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
            dpdfnet: self
                .dpdfnet
                .to_sys(cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Denoised samples returned from an offline or online denoiser.
pub struct DenoisedAudio {
    pub samples: Vec<f32>,
    pub sample_rate: i32,
}

impl DenoisedAudio {
    pub(crate) fn from_ptr(ptr: *const sys::DenoisedAudio) -> Self {
        if ptr.is_null() {
            return Self::default();
        }

        unsafe {
            let n = (*ptr)
                .n
                .max(0) as usize;
            let samples = if (*ptr)
                .samples
                .is_null()
                || n == 0
            {
                vec![]
            } else {
                slice::from_raw_parts((*ptr).samples, n).to_vec()
            };
            let sample_rate = (*ptr).sample_rate;
            sys::SherpaOnnxDestroyDenoisedAudio(ptr);
            Self {
                samples,
                sample_rate,
            }
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Top-level configuration for [`OfflineSpeechDenoiser`].
pub struct OfflineSpeechDenoiserConfig {
    pub model: OfflineSpeechDenoiserModelConfig,
}

impl OfflineSpeechDenoiserConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineSpeechDenoiserConfig {
        sys::OfflineSpeechDenoiserConfig {
            model: self
                .model
                .to_sys(cstrings),
        }
    }
}

/// Offline speech denoiser.
pub struct OfflineSpeechDenoiser {
    ptr: *const sys::OfflineSpeechDenoiser,
}

impl OfflineSpeechDenoiser {
    /// Create a denoiser from `config`.
    pub fn create(config: &OfflineSpeechDenoiserConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateOfflineSpeechDenoiser(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Denoise one chunk or a complete waveform.
    pub fn run(&self, samples: &[f32], sample_rate: i32) -> DenoisedAudio {
        let samples_ptr = if samples.is_empty() {
            ptr::null()
        } else {
            samples.as_ptr()
        };
        let ptr = unsafe {
            sys::SherpaOnnxOfflineSpeechDenoiserRun(
                self.ptr,
                samples_ptr,
                samples.len() as i32,
                sample_rate,
            )
        };
        DenoisedAudio::from_ptr(ptr)
    }

    /// Return the model sample rate expected by this denoiser.
    pub fn sample_rate(&self) -> i32 {
        unsafe { sys::SherpaOnnxOfflineSpeechDenoiserGetSampleRate(self.ptr) }
    }
}

impl Drop for OfflineSpeechDenoiser {
    fn drop(&mut self) {
        unsafe { sys::SherpaOnnxDestroyOfflineSpeechDenoiser(self.ptr) }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/online_asr.rs
================================================
//! Streaming speech recognition.
//!
//! Configure exactly one model family inside [`OnlineModelConfig`], create an
//! [`OnlineRecognizer`], then feed waveform chunks into an [`OnlineStream`].
//!
//! See:
//!
//! - `rust-api-examples/examples/streaming_zipformer.rs`
//! - `rust-api-examples/examples/streaming_zipformer_microphone.rs`
//!
//! ```no_run
//! use sherpa_onnx::{OnlineRecognizer, OnlineRecognizerConfig, Wave};
//!
//! let wave = Wave::read("./test.wav").expect("read wave");
//! let mut config = OnlineRecognizerConfig::default();
//! config.model_config.transducer.encoder = Some(
//!     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx".into(),
//! );
//! config.model_config.transducer.decoder = Some(
//!     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx".into(),
//! );
//! config.model_config.transducer.joiner = Some(
//!     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx".into(),
//! );
//! config.model_config.tokens = Some(
//!     "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt".into(),
//! );
//! config.enable_endpoint = true;
//! config.decoding_method = Some("greedy_search".into());
//!
//! let recognizer = OnlineRecognizer::create(&config).expect("create recognizer");
//! let stream = recognizer.create_stream();
//! stream.accept_waveform(wave.sample_rate(), wave.samples());
//! stream.input_finished();
//!
//! while recognizer.is_ready(&stream) {
//!     recognizer.decode(&stream);
//! }
//! ```

use crate::utils::to_c_ptr;
use serde::Deserialize;
use std::ffi::{CStr, CString};
use std::ptr;

use sherpa_onnx_sys as sys;

#[derive(Clone, Debug, Default)]
/// Online transducer model configuration.
pub struct OnlineTransducerModelConfig {
    pub encoder: Option<String>,
    pub decoder: Option<String>,
    pub joiner: Option<String>,
}

impl OnlineTransducerModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineTransducerModelConfig {
        sys::OnlineTransducerModelConfig {
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
            joiner: to_c_ptr(&self.joiner, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Online Paraformer model configuration.
pub struct OnlineParaformerModelConfig {
    pub encoder: Option<String>,
    pub decoder: Option<String>,
}

impl OnlineParaformerModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineParaformerModelConfig {
        sys::OnlineParaformerModelConfig {
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Online Zipformer2 CTC model configuration.
pub struct OnlineZipformer2CtcModelConfig {
    pub model: Option<String>,
}

impl OnlineZipformer2CtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineZipformer2CtcModelConfig {
        sys::OnlineZipformer2CtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Online NeMo CTC model configuration.
pub struct OnlineNemoCtcModelConfig {
    pub model: Option<String>,
}

impl OnlineNemoCtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineNemoCtcModelConfig {
        sys::OnlineNemoCtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Online Tone CTC model configuration.
pub struct OnlineToneCtcModelConfig {
    pub model: Option<String>,
}

impl OnlineToneCtcModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineToneCtcModelConfig {
        sys::OnlineToneCtcModelConfig {
            model: to_c_ptr(&self.model, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Aggregate model configuration for streaming recognition.
///
/// Configure exactly one model family for typical use.
pub struct OnlineModelConfig {
    pub transducer: OnlineTransducerModelConfig,
    pub paraformer: OnlineParaformerModelConfig,
    pub zipformer2_ctc: OnlineZipformer2CtcModelConfig,
    pub nemo_ctc: OnlineNemoCtcModelConfig,
    pub t_one_ctc: OnlineToneCtcModelConfig,

    pub tokens: Option<String>,
    pub num_threads: i32,
    pub provider: Option<String>,
    pub debug: bool,

    pub model_type: Option<String>,
    pub modeling_unit: Option<String>, // cjkchar | bpe | cjkchar+bpe
    pub bpe_vocab: Option<String>,

    /// Optional in-memory tokens
    pub tokens_buf: Option<Vec<u8>>,
}

impl Default for OnlineModelConfig {
    fn default() -> Self {
        Self {
            transducer: Default::default(),
            paraformer: Default::default(),
            zipformer2_ctc: Default::default(),
            nemo_ctc: Default::default(),
            t_one_ctc: Default::default(),

            tokens: None,
            num_threads: 1,
            provider: Some("cpu".to_string()),
            debug: false,

            model_type: None,
            modeling_unit: None,
            bpe_vocab: None,
            tokens_buf: None,
        }
    }
}

impl OnlineModelConfig {
    pub(crate) fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineModelConfig {
        sys::OnlineModelConfig {
            transducer: self
                .transducer
                .to_sys(cstrings),
            paraformer: self
                .paraformer
                .to_sys(cstrings),
            zipformer2_ctc: self
                .zipformer2_ctc
                .to_sys(cstrings),
            nemo_ctc: self
                .nemo_ctc
                .to_sys(cstrings),
            t_one_ctc: self
                .t_one_ctc
                .to_sys(cstrings),

            tokens: to_c_ptr(&self.tokens, cstrings),
            num_threads: self.num_threads,
            provider: to_c_ptr(&self.provider, cstrings),
            debug: self.debug as i32,

            model_type: to_c_ptr(&self.model_type, cstrings),
            modeling_unit: to_c_ptr(&self.modeling_unit, cstrings),
            bpe_vocab: to_c_ptr(&self.bpe_vocab, cstrings),

            tokens_buf: self
                .tokens_buf
                .as_ref()
                .map_or(ptr::null(), |buf| buf.as_ptr() as *const _),
            tokens_buf_size: self
                .tokens_buf
                .as_ref()
                .map_or(0, |buf| buf.len() as i32),
        }
    }
}

#[derive(Clone, Debug)]
/// FST decoder options for CTC models.
pub struct OnlineCtcFstDecoderConfig {
    pub graph: Option<String>,
    pub max_active: i32,
}

impl Default for OnlineCtcFstDecoderConfig {
    fn default() -> Self {
        Self {
            graph: None,
            max_active: 4,
        }
    }
}

impl OnlineCtcFstDecoderConfig {
    /// Convert to sys struct using `to_c_ptr()`
    pub(crate) fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineCtcFstDecoderConfig {
        sys::OnlineCtcFstDecoderConfig {
            graph: to_c_ptr(&self.graph, cstrings),
            max_active: self.max_active,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Optional homophone replacement resources.
pub struct HomophoneReplacerConfig {
    pub lexicon: Option<String>,
    pub rule_fsts: Option<String>,
}

impl HomophoneReplacerConfig {
    pub(crate) fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::HomophoneReplacerConfig {
        sys::HomophoneReplacerConfig {
            dict_dir: ptr::null(), // not used any more internally
            lexicon: to_c_ptr(&self.lexicon, cstrings),
            rule_fsts: to_c_ptr(&self.rule_fsts, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Top-level configuration for [`OnlineRecognizer`].
pub struct OnlineRecognizerConfig {
    pub feat_config: sys::FeatureConfig,
    pub model_config: OnlineModelConfig,

    /// Decoding method: greedy_search | modified_beam_search
    pub decoding_method: Option<String>,

    /// Used only when decoding_method is modified_beam_search
    pub max_active_paths: i32,

    /// Endpoint detection
    pub enable_endpoint: bool,

    pub rule1_min_trailing_silence: f32,
    pub rule2_min_trailing_silence: f32,
    pub rule3_min_utterance_length: f32,

    pub hotwords_file: Option<String>,
    pub hotwords_score: f32,

    pub ctc_fst_decoder_config: OnlineCtcFstDecoderConfig,

    pub rule_fsts: Option<String>,
    pub rule_fars: Option<String>,

    pub blank_penalty: f32,

    pub hotwords_buf: Option<Vec<u8>>,

    pub hr: HomophoneReplacerConfig,
}

impl Default for OnlineRecognizerConfig {
    fn default() -> Self {
        Self {
            feat_config: sys::FeatureConfig {
                sample_rate: 16000,
                feature_dim: 80,
            },
            model_config: Default::default(),
            decoding_method: None,
            max_active_paths: 0,
            enable_endpoint: false,
            rule1_min_trailing_silence: 0.0,
            rule2_min_trailing_silence: 0.0,
            rule3_min_utterance_length: 0.0,
            hotwords_file: None,
            hotwords_score: 0.0,
            ctc_fst_decoder_config: Default::default(),
            rule_fsts: None,
            rule_fars: None,
            blank_penalty: 0.0,
            hotwords_buf: None,
            hr: Default::default(),
        }
    }
}

impl OnlineRecognizerConfig {
    /// Convert to sys struct for FFI call
    pub(crate) fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineRecognizerConfig {
        sys::OnlineRecognizerConfig {
            feat_config: self.feat_config,
            model_config: self
                .model_config
                .to_sys(cstrings),
            decoding_method: to_c_ptr(&self.decoding_method, cstrings),
            max_active_paths: self.max_active_paths,
            enable_endpoint: self.enable_endpoint as i32,
            rule1_min_trailing_silence: self.rule1_min_trailing_silence,
            rule2_min_trailing_silence: self.rule2_min_trailing_silence,
            rule3_min_utterance_length: self.rule3_min_utterance_length,
            hotwords_file: to_c_ptr(&self.hotwords_file, cstrings),
            hotwords_score: self.hotwords_score,
            ctc_fst_decoder_config: self
                .ctc_fst_decoder_config
                .to_sys(cstrings),
            rule_fsts: to_c_ptr(&self.rule_fsts, cstrings),
            rule_fars: to_c_ptr(&self.rule_fars, cstrings),
            blank_penalty: self.blank_penalty,
            hotwords_buf: self
                .hotwords_buf
                .as_ref()
                .map_or(ptr::null(), |buf| buf.as_ptr() as *const _),
            hotwords_buf_size: self
                .hotwords_buf
                .as_ref()
                .map_or(0, |buf| buf.len() as i32),
            hr: self
                .hr
                .to_sys(cstrings),
        }
    }
}

/// Streaming speech recognizer.
pub struct OnlineRecognizer {
    ptr: *const sys::OnlineRecognizer,
}

impl OnlineRecognizer {
    /// Create a recognizer from `config`.
    pub fn create(config: &OnlineRecognizerConfig) -> Option<Self> {
        let mut cstrings = Vec::new();

        let sys_config = config.to_sys(&mut cstrings);

        let ptr = unsafe { sys::SherpaOnnxCreateOnlineRecognizer(&sys_config) };

        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Create an empty online stream.
    pub fn create_stream(&self) -> OnlineStream {
        let ptr = unsafe { sys::SherpaOnnxCreateOnlineStream(self.ptr) };
        OnlineStream { ptr }
    }

    /// Create a stream with per-stream hotwords.
    pub fn create_stream_with_hotwords(&self, hotwords: &str) -> OnlineStream {
        let c = CString::new(hotwords).unwrap();
        let ptr = unsafe { sys::SherpaOnnxCreateOnlineStreamWithHotwords(self.ptr, c.as_ptr()) };
        OnlineStream { ptr }
    }

    /// Decode one step for `stream`.
    pub fn decode(&self, stream: &OnlineStream) {
        unsafe { sys::SherpaOnnxDecodeOnlineStream(self.ptr, stream.ptr) }
    }

    /// Decode multiple streams in one batch call.
    pub fn decode_multiple_streams(&self, streams: &[&OnlineStream]) {
        let ptrs: Vec<*const sys::OnlineStream> = streams
            .iter()
            .map(|s| s.ptr)
            .collect();
        unsafe {
            sys::SherpaOnnxDecodeMultipleOnlineStreams(self.ptr, ptrs.as_ptr(), ptrs.len() as i32)
        }
    }

    /// Reset stream state after an endpoint or utterance boundary.
    pub fn reset(&self, stream: &OnlineStream) {
        unsafe { sys::SherpaOnnxOnlineStreamReset(self.ptr, stream.ptr) }
    }

    /// Return `true` if endpointing rules say the current utterance has ended.
    pub fn is_endpoint(&self, stream: &OnlineStream) -> bool {
        unsafe { sys::SherpaOnnxOnlineStreamIsEndpoint(self.ptr, stream.ptr) != 0 }
    }

    /// Return `true` if the recognizer has enough audio to run another step.
    pub fn is_ready(&self, stream: &OnlineStream) -> bool {
        unsafe { sys::SherpaOnnxIsOnlineStreamReady(self.ptr, stream.ptr) != 0 }
    }

    /// Fetch the current recognition hypothesis.
    pub fn get_result(&self, stream: &OnlineStream) -> Option<RecognizerResult> {
        unsafe {
            let cstr = sys::SherpaOnnxGetOnlineStreamResultAsJson(self.ptr, stream.ptr);
            if cstr.is_null() {
                return None;
            }
            let s = CStr::from_ptr(cstr)
                .to_string_lossy()
                .into_owned();
            sys::SherpaOnnxDestroyOnlineStreamResultJson(cstr);
            serde_json::from_str(&s).ok()
        }
    }
}

#[derive(Clone, Debug, Deserialize)]
/// Streaming ASR result returned by [`OnlineRecognizer::get_result`].
pub struct RecognizerResult {
    pub text: String,
    pub tokens: Vec<String>,
    pub timestamps: Option<Vec<f32>>,
    pub segment: Option<i32>,
    pub start_time: Option<f32>,
    pub is_final: bool,
}

impl Drop for OnlineRecognizer {
    fn drop(&mut self) {
        unsafe {
            sys::SherpaOnnxDestroyOnlineRecognizer(self.ptr);
        }
    }
}

/// Input stream used by [`OnlineRecognizer`].
pub struct OnlineStream {
    pub(crate) ptr: *const sys::OnlineStream,
}

impl OnlineStream {
    /// Append one chunk of waveform samples.
    pub fn accept_waveform(&self, sample_rate: i32, samples: &[f32]) {
        unsafe {
            sys::SherpaOnnxOnlineStreamAcceptWaveform(
                self.ptr,
                sample_rate,
                samples.as_ptr(),
                samples.len() as i32,
            )
        }
    }

    /// Mark the end of input so the recognizer can flush trailing context.
    pub fn input_finished(&self) {
        unsafe { sys::SherpaOnnxOnlineStreamInputFinished(self.ptr) }
    }

    pub fn set_option(&self, key: &str, value: &str) {
        let key = CString::new(key).unwrap();
        let value = CString::new(value).unwrap();
        unsafe { sys::SherpaOnnxOnlineStreamSetOption(self.ptr, key.as_ptr(), value.as_ptr()) }
    }

    pub fn get_option(&self, key: &str) -> String {
        let key = CString::new(key).unwrap();
        unsafe {
            let p = sys::SherpaOnnxOnlineStreamGetOption(self.ptr, key.as_ptr());
            if p.is_null() {
                String::new()
            } else {
                CStr::from_ptr(p)
                    .to_string_lossy()
                    .into_owned()
            }
        }
    }

    pub fn has_option(&self, key: &str) -> bool {
        let key = CString::new(key).unwrap();
        unsafe { sys::SherpaOnnxOnlineStreamHasOption(self.ptr, key.as_ptr()) != 0 }
    }
}

impl Drop for OnlineStream {
    fn drop(&mut self) {
        unsafe { sys::SherpaOnnxDestroyOnlineStream(self.ptr) }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/online_punctuation.rs
================================================
//! Online punctuation restoration.
//!
//! This module wraps the punctuation model used in
//! `rust-api-examples/examples/online_punctuation.rs`.
//!
//! ```no_run
//! use sherpa_onnx::{OnlinePunctuation, OnlinePunctuationConfig, OnlinePunctuationModelConfig};
//!
//! let config = OnlinePunctuationConfig {
//!     model: OnlinePunctuationModelConfig {
//!         cnn_bilstm: Some("./sherpa-onnx-online-punct-en/cnn_bilstm.onnx".into()),
//!         bpe_vocab: Some("./sherpa-onnx-online-punct-en/bpe.vocab".into()),
//!         ..Default::default()
//!     },
//! };
//!
//! let punct = OnlinePunctuation::create(&config).expect("create punctuation");
//! let text = punct
//!     .add_punctuation("how are you i am fine thank you")
//!     .expect("punctuate");
//! println!("{text}");
//! ```

use crate::utils::to_c_ptr;
use sherpa_onnx_sys as sys;
use std::ffi::{CStr, CString};

#[derive(Clone, Debug)]
/// Model-level options for online punctuation restoration.
pub struct OnlinePunctuationModelConfig {
    pub cnn_bilstm: Option<String>,
    pub bpe_vocab: Option<String>,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
}

impl Default for OnlinePunctuationModelConfig {
    fn default() -> Self {
        Self {
            cnn_bilstm: None,
            bpe_vocab: None,
            num_threads: 1,
            debug: false,
            provider: Some("cpu".to_string()),
        }
    }
}

impl OnlinePunctuationModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlinePunctuationModelConfig {
        sys::OnlinePunctuationModelConfig {
            cnn_bilstm: to_c_ptr(&self.cnn_bilstm, cstrings),
            bpe_vocab: to_c_ptr(&self.bpe_vocab, cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Top-level configuration for [`OnlinePunctuation`].
pub struct OnlinePunctuationConfig {
    pub model: OnlinePunctuationModelConfig,
}

impl OnlinePunctuationConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlinePunctuationConfig {
        sys::OnlinePunctuationConfig {
            model: self
                .model
                .to_sys(cstrings),
        }
    }
}

/// Online punctuation restorer.
///
/// Feed plain text fragments to [`OnlinePunctuation::add_punctuation`] and get
/// punctuated text back.
pub struct OnlinePunctuation {
    ptr: *const sys::OnlinePunctuation,
}

unsafe impl Send for OnlinePunctuation {}

impl OnlinePunctuation {
    pub fn create(config: &OnlinePunctuationConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);

        let ptr = unsafe { sys::SherpaOnnxCreateOnlinePunctuation(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Add punctuation to a text fragment.
    ///
    /// Returns `None` if the input cannot be converted to a C string or the
    /// native punctuator fails.
    pub fn add_punctuation(&self, text: &str) -> Option<String> {
        let text = CString::new(text).ok()?;

        unsafe {
            let p = sys::SherpaOnnxOnlinePunctuationAddPunct(self.ptr, text.as_ptr());
            if p.is_null() {
                return None;
            }

            let ans = CStr::from_ptr(p)
                .to_string_lossy()
                .into_owned();
            sys::SherpaOnnxOnlinePunctuationFreeText(p);
            Some(ans)
        }
    }
}

impl Drop for OnlinePunctuation {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroyOnlinePunctuation(self.ptr);
            }
        }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/online_speech_denoiser.rs
================================================
//! Streaming speech denoising.
//!
//! This API is intended for chunked audio. Call [`OnlineSpeechDenoiser::run`]
//! on consecutive chunks, then [`OnlineSpeechDenoiser::flush`] after the final
//! chunk to drain any buffered state.

use crate::offline_speech_denoiser::{DenoisedAudio, OfflineSpeechDenoiserModelConfig};
use sherpa_onnx_sys as sys;
use std::ffi::CString;
use std::ptr;

#[derive(Clone, Debug, Default)]
/// Top-level configuration for [`OnlineSpeechDenoiser`].
pub struct OnlineSpeechDenoiserConfig {
    pub model: OfflineSpeechDenoiserModelConfig,
}

impl OnlineSpeechDenoiserConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OnlineSpeechDenoiserConfig {
        sys::OnlineSpeechDenoiserConfig {
            model: self
                .model
                .to_sys(cstrings),
        }
    }
}

/// Streaming speech denoiser.
pub struct OnlineSpeechDenoiser {
    ptr: *const sys::OnlineSpeechDenoiser,
}

impl OnlineSpeechDenoiser {
    /// Create a denoiser from `config`.
    pub fn create(config: &OnlineSpeechDenoiserConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateOnlineSpeechDenoiser(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Denoise one input chunk.
    pub fn run(&self, samples: &[f32], sample_rate: i32) -> DenoisedAudio {
        let samples_ptr = if samples.is_empty() {
            ptr::null()
        } else {
            samples.as_ptr()
        };
        let ptr = unsafe {
            sys::SherpaOnnxOnlineSpeechDenoiserRun(
                self.ptr,
                samples_ptr,
                samples.len() as i32,
                sample_rate,
            )
        };
        DenoisedAudio::from_ptr(ptr)
    }

    /// Flush any internally buffered samples after the final chunk.
    pub fn flush(&self) -> DenoisedAudio {
        let ptr = unsafe { sys::SherpaOnnxOnlineSpeechDenoiserFlush(self.ptr) };
        DenoisedAudio::from_ptr(ptr)
    }

    /// Reset the streaming state.
    pub fn reset(&self) {
        unsafe { sys::SherpaOnnxOnlineSpeechDenoiserReset(self.ptr) }
    }

    /// Return the model sample rate expected by this denoiser.
    pub fn sample_rate(&self) -> i32 {
        unsafe { sys::SherpaOnnxOnlineSpeechDenoiserGetSampleRate(self.ptr) }
    }

    /// Return the preferred input frame shift, in samples.
    pub fn frame_shift_in_samples(&self) -> i32 {
        unsafe { sys::SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(self.ptr) }
    }
}

impl Drop for OnlineSpeechDenoiser {
    fn drop(&mut self) {
        unsafe { sys::SherpaOnnxDestroyOnlineSpeechDenoiser(self.ptr) }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/speaker_embedding.rs
================================================
//! Speaker embedding extraction and speaker search utilities.
//!
//! See:
//!
//! - `rust-api-examples/examples/speaker_embedding_extractor.rs`
//! - `rust-api-examples/examples/speaker_embedding_manager.rs`
//! - `rust-api-examples/examples/speaker_embedding_cosine_similarity.rs`

use crate::{online_asr::OnlineStream, utils::to_c_ptr};
use sherpa_onnx_sys as sys;
use std::ffi::{CStr, CString};
use std::ptr;
use std::slice;

#[derive(Clone, Debug)]
/// Configuration for [`SpeakerEmbeddingExtractor`].
pub struct SpeakerEmbeddingExtractorConfig {
    pub model: Option<String>,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
}

impl Default for SpeakerEmbeddingExtractorConfig {
    fn default() -> Self {
        Self {
            model: None,
            num_threads: 1,
            debug: false,
            provider: Some("cpu".to_string()),
        }
    }
}

impl SpeakerEmbeddingExtractorConfig {
    pub(crate) fn to_sys(
        &self,
        cstrings: &mut Vec<CString>,
    ) -> sys::SpeakerEmbeddingExtractorConfig {
        sys::SpeakerEmbeddingExtractorConfig {
            model: to_c_ptr(&self.model, cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// One speaker search result returned by [`SpeakerEmbeddingManager::get_best_matches`].
pub struct SpeakerEmbeddingMatch {
    pub score: f32,
    pub name: String,
}

/// Embedding extractor that consumes audio through an [`OnlineStream`].
pub struct SpeakerEmbeddingExtractor {
    ptr: *const sys::SpeakerEmbeddingExtractor,
    dim: i32,
}

unsafe impl Send for SpeakerEmbeddingExtractor {}

impl SpeakerEmbeddingExtractor {
    /// Create an extractor from `config`.
    pub fn create(config: &SpeakerEmbeddingExtractorConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateSpeakerEmbeddingExtractor(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            let dim = unsafe { sys::SherpaOnnxSpeakerEmbeddingExtractorDim(ptr) };
            Some(Self { ptr, dim })
        }
    }

    /// Return the embedding dimension.
    pub fn dim(&self) -> i32 {
        self.dim
    }

    /// Create an audio stream that can be filled with waveform chunks.
    pub fn create_stream(&self) -> Option<OnlineStream> {
        let ptr = unsafe { sys::SherpaOnnxSpeakerEmbeddingExtractorCreateStream(self.ptr) };
        if ptr.is_null() {
            None
        } else {
            Some(OnlineStream { ptr })
        }
    }

    /// Return `true` if enough audio has been accumulated to compute an embedding.
    pub fn is_ready(&self, stream: &OnlineStream) -> bool {
        unsafe { sys::SherpaOnnxSpeakerEmbeddingExtractorIsReady(self.ptr, stream.ptr) == 1 }
    }

    /// Compute the embedding for `stream`.
    pub fn compute(&self, stream: &OnlineStream) -> Option<Vec<f32>> {
        let p = unsafe {
            sys::SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(self.ptr, stream.ptr)
        };
        if p.is_null() {
            None
        } else {
            let ans = unsafe { slice::from_raw_parts(p, self.dim as usize) }.to_vec();
            unsafe { sys::SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(p) };
            Some(ans)
        }
    }
}

impl Drop for SpeakerEmbeddingExtractor {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroySpeakerEmbeddingExtractor(self.ptr);
            }
        }
    }
}

/// In-memory index of named speaker embeddings.
pub struct SpeakerEmbeddingManager {
    ptr: *const sys::SpeakerEmbeddingManager,
    dim: i32,
}

unsafe impl Send for SpeakerEmbeddingManager {}

impl SpeakerEmbeddingManager {
    /// Create a manager for embeddings with the given dimension.
    pub fn create(dim: i32) -> Option<Self> {
        let ptr = unsafe { sys::SherpaOnnxCreateSpeakerEmbeddingManager(dim) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr, dim })
        }
    }

    /// Return the embedding dimension expected by the manager.
    pub fn dim(&self) -> i32 {
        self.dim
    }

    /// Add one embedding for `name`.
    pub fn add(&self, name: &str, embedding: &[f32]) -> bool {
        if embedding.len() != self.dim as usize {
            return false;
        }

        let c_name = match CString::new(name) {
            Ok(v) => v,
            Err(_) => return false,
        };

        unsafe {
            sys::SherpaOnnxSpeakerEmbeddingManagerAdd(self.ptr, c_name.as_ptr(), embedding.as_ptr())
                == 1
        }
    }

    /// Add multiple embeddings for `name`.
    pub fn add_list(&self, name: &str, embeddings: &[Vec<f32>]) -> bool {
        if embeddings.is_empty()
            || embeddings
                .iter()
                .any(|v| v.len() != self.dim as usize)
        {
            return false;
        }

        let c_name = match CString::new(name) {
            Ok(v) => v,
            Err(_) => return false,
        };

        let mut ptrs: Vec<*const f32> = embeddings
            .iter()
            .map(|v| v.as_ptr())
            .collect();
        ptrs.push(ptr::null());

        unsafe {
            sys::SherpaOnnxSpeakerEmbeddingManagerAddList(self.ptr, c_name.as_ptr(), ptrs.as_ptr())
                == 1
        }
    }

    /// Add multiple embeddings laid out as a flattened slice.
    pub fn add_list_flattened(&self, name: &str, embeddings: &[f32]) -> bool {
        if embeddings.is_empty() || embeddings.len() % self.dim as usize != 0 {
            return false;
        }

        let c_name = match CString::new(name) {
            Ok(v) => v,
            Err(_) => return false,
        };

        let n = (embeddings.len() / self.dim as usize) as i32;
        unsafe {
            sys::SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
                self.ptr,
                c_name.as_ptr(),
                embeddings.as_ptr(),
                n,
            ) == 1
        }
    }

    /// Remove all embeddings stored under `name`.
    pub fn remove(&self, name: &str) -> bool {
        let c_name = match CString::new(name) {
            Ok(v) => v,
            Err(_) => return false,
        };

        unsafe { sys::SherpaOnnxSpeakerEmbeddingManagerRemove(self.ptr, c_name.as_ptr()) == 1 }
    }

    /// Search for the best matching speaker name above `threshold`.
    pub fn search(&self, embedding: &[f32], threshold: f32) -> Option<String> {
        if embedding.len() != self.dim as usize {
            return None;
        }

        unsafe {
            let p = sys::SherpaOnnxSpeakerEmbeddingManagerSearch(
                self.ptr,
                embedding.as_ptr(),
                threshold,
            );
            if p.is_null() {
                None
            } else {
                let ans = CStr::from_ptr(p)
                    .to_string_lossy()
                    .into_owned();
                sys::SherpaOnnxSpeakerEmbeddingManagerFreeSearch(p);
                Some(ans)
            }
        }
    }

    /// Return up to `n` best matches above `threshold`.
    pub fn get_best_matches(
        &self,
        embedding: &[f32],
        threshold: f32,
        n: i32,
    ) -> Vec<SpeakerEmbeddingMatch> {
        if embedding.len() != self.dim as usize {
            return Vec::new();
        }

        unsafe {
            let r = sys::SherpaOnnxSpeakerEmbeddingManagerGetBestMatches(
                self.ptr,
                embedding.as_ptr(),
                threshold,
                n,
            );
            if r.is_null() {
                return Vec::new();
            }

            let result = &*r;
            let matches = slice::from_raw_parts(result.matches, result.count as usize)
                .iter()
                .map(|m| SpeakerEmbeddingMatch {
                    score: m.score,
                    name: if m
                        .name
                        .is_null()
                    {
                        String::new()
                    } else {
                        CStr::from_ptr(m.name)
                            .to_string_lossy()
                            .into_owned()
                    },
                })
                .collect::<Vec<_>>();
            sys::SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches(r);
            matches
        }
    }

    pub fn verify(&self, name: &str, embedding: &[f32], threshold: f32) -> bool {
        if embedding.len() != self.dim as usize {
            return false;
        }

        let c_name = match CString::new(name) {
            Ok(v) => v,
            Err(_) => return false,
        };

        unsafe {
            sys::SherpaOnnxSpeakerEmbeddingManagerVerify(
                self.ptr,
                c_name.as_ptr(),
                embedding.as_ptr(),
                threshold,
            ) == 1
        }
    }

    pub fn contains(&self, name: &str) -> bool {
        let c_name = match CString::new(name) {
            Ok(v) => v,
            Err(_) => return false,
        };

        unsafe { sys::SherpaOnnxSpeakerEmbeddingManagerContains(self.ptr, c_name.as_ptr()) == 1 }
    }

    pub fn num_speakers(&self) -> i32 {
        unsafe { sys::SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(self.ptr) }
    }

    pub fn get_all_speakers(&self) -> Vec<String> {
        unsafe {
            let names = sys::SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(self.ptr);
            if names.is_null() {
                return Vec::new();
            }

            let mut ans = Vec::new();
            let mut p = names;
            while !(*p).is_null() {
                ans.push(
                    CStr::from_ptr(*p)
                        .to_string_lossy()
                        .into_owned(),
                );
                p = p.add(1);
            }
            sys::SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(names);
            ans
        }
    }
}

impl Drop for SpeakerEmbeddingManager {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroySpeakerEmbeddingManager(self.ptr);
            }
        }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/spoken_language_identification.rs
================================================
//! Spoken language identification.
//!
//! This module identifies the language spoken in an audio clip using the
//! Whisper-based language ID API. See
//! [`rust-api-examples/examples/spoken_language_identification.rs`](https://github.com/k2-fsa/sherpa-onnx/blob/master/rust-api-examples/examples/spoken_language_identification.rs)
//! for a complete example.
//!
//! # Example
//!
//! ```no_run
//! use sherpa_onnx::{
//!     SpokenLanguageIdentification, SpokenLanguageIdentificationConfig,
//!     SpokenLanguageIdentificationWhisperConfig, Wave,
//! };
//!
//! let wave = Wave::read("./test.wav").expect("read wave");
//! let config = SpokenLanguageIdentificationConfig {
//!     whisper: SpokenLanguageIdentificationWhisperConfig {
//!         encoder: Some("./sherpa-onnx-whisper-tiny/encoder.int8.onnx".into()),
//!         decoder: Some("./sherpa-onnx-whisper-tiny/decoder.int8.onnx".into()),
//!         tail_paddings: 0,
//!     },
//!     ..Default::default()
//! };
//!
//! let slid = SpokenLanguageIdentification::create(&config).expect("create");
//! let stream = slid.create_stream();
//! stream.accept_waveform(wave.sample_rate(), wave.samples());
//! let result = slid.compute(&stream).expect("compute");
//! println!("{}", result.lang);
//! ```

use crate::offline_asr::OfflineStream;
use crate::utils::to_c_ptr;
use sherpa_onnx_sys as sys;
use std::ffi::{CStr, CString};

#[derive(Clone, Debug, Default)]
/// Whisper model configuration for spoken language identification.
pub struct SpokenLanguageIdentificationWhisperConfig {
    pub encoder: Option<String>,
    pub decoder: Option<String>,
    pub tail_paddings: i32,
}

impl SpokenLanguageIdentificationWhisperConfig {
    fn to_sys(
        &self,
        cstrings: &mut Vec<CString>,
    ) -> sys::SpokenLanguageIdentificationWhisperConfig {
        sys::SpokenLanguageIdentificationWhisperConfig {
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
            tail_paddings: self.tail_paddings,
        }
    }
}

#[derive(Clone, Debug)]
/// Top-level configuration for [`SpokenLanguageIdentification`].
pub struct SpokenLanguageIdentificationConfig {
    pub whisper: SpokenLanguageIdentificationWhisperConfig,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
}

impl Default for SpokenLanguageIdentificationConfig {
    fn default() -> Self {
        Self {
            whisper: Default::default(),
            num_threads: 1,
            debug: false,
            provider: Some("cpu".to_string()),
        }
    }
}

impl SpokenLanguageIdentificationConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::SpokenLanguageIdentificationConfig {
        sys::SpokenLanguageIdentificationConfig {
            whisper: self
                .whisper
                .to_sys(cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Result returned by [`SpokenLanguageIdentification::compute`].
pub struct SpokenLanguageIdentificationResult {
    pub lang: String,
}

/// Spoken language identifier.
pub struct SpokenLanguageIdentification {
    ptr: *const sys::SpokenLanguageIdentification,
}

unsafe impl Send for SpokenLanguageIdentification {}

impl SpokenLanguageIdentification {
    /// Create a language identifier from [`SpokenLanguageIdentificationConfig`].
    pub fn create(config: &SpokenLanguageIdentificationConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateSpokenLanguageIdentification(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Create an offline stream for one audio clip.
    pub fn create_stream(&self) -> OfflineStream {
        let ptr =
            unsafe { sys::SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(self.ptr) };
        OfflineStream { ptr }
    }

    /// Compute the spoken language for `stream`.
    pub fn compute(&self, stream: &OfflineStream) -> Option<SpokenLanguageIdentificationResult> {
        unsafe {
            let p = sys::SherpaOnnxSpokenLanguageIdentificationCompute(self.ptr, stream.ptr);
            if p.is_null() {
                return None;
            }

            let ans = SpokenLanguageIdentificationResult {
                lang: if (*p)
                    .lang
                    .is_null()
                {
                    String::new()
                } else {
                    CStr::from_ptr((*p).lang)
                        .to_string_lossy()
                        .into_owned()
                },
            };

            sys::SherpaOnnxDestroySpokenLanguageIdentificationResult(p);
            Some(ans)
        }
    }
}

impl Drop for SpokenLanguageIdentification {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroySpokenLanguageIdentification(self.ptr);
            }
        }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/tts.rs
================================================
//! Offline text-to-speech.
//!
//! Supported model families include VITS, Matcha, Kokoro, Kitten, ZipVoice,
//! Pocket TTS, and Supertonic. See the repository examples:
//!
//! - `rust-api-examples/examples/pocket_tts.rs`
//! - `rust-api-examples/examples/kokoro_tts_en.rs`
//! - `rust-api-examples/examples/kokoro_tts_zh_en.rs`
//! - `rust-api-examples/examples/matcha_tts_en.rs`
//! - `rust-api-examples/examples/matcha_tts_zh.rs`
//! - `rust-api-examples/examples/zipvoice_tts.rs`
//! - `rust-api-examples/examples/supertonic_tts.rs`
//!
//! # Example
//!
//! ```no_run
//! use sherpa_onnx::{
//!     GenerationConfig, OfflineTts, OfflineTtsConfig, OfflineTtsModelConfig,
//!     OfflineTtsPocketModelConfig, Wave,
//! };
//!
//! let config = OfflineTtsConfig {
//!     model: OfflineTtsModelConfig {
//!         pocket: OfflineTtsPocketModelConfig {
//!             lm_flow: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx".into()),
//!             lm_main: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx".into()),
//!             encoder: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx".into()),
//!             decoder: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx".into()),
//!             text_conditioner: Some(
//!                 "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx".into(),
//!             ),
//!             vocab_json: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json".into()),
//!             token_scores_json: Some(
//!                 "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json".into(),
//!             ),
//!             ..Default::default()
//!         },
//!         ..Default::default()
//!     },
//!     ..Default::default()
//! };
//!
//! let tts = OfflineTts::create(&config).expect("create tts");
//! let reference = Wave::read("./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav")
//!     .expect("read reference");
//! let generation_config = GenerationConfig {
//!     reference_audio: Some(reference.samples().to_vec()),
//!     reference_sample_rate: reference.sample_rate(),
//!     ..Default::default()
//! };
//! let audio = tts
//!     .generate_with_config("Hello from sherpa-onnx", &generation_config, None)
//!     .expect("generate");
//! println!("{}", audio.sample_rate());
//! ```

use crate::utils::to_c_ptr;
use sherpa_onnx_sys as sys;
use std::collections::HashMap;
use std::ffi::CString;
use std::os::raw::c_void;
use std::ptr;
use std::slice;

type ProgressCallback = dyn FnMut(&[f32], f32) -> bool;
type BoxedProgressCallback = Box<ProgressCallback>;

// --- Model config structs ---

#[derive(Clone, Debug)]
/// VITS model configuration.
pub struct OfflineTtsVitsModelConfig {
    pub model: Option<String>,
    pub lexicon: Option<String>,
    pub tokens: Option<String>,
    pub data_dir: Option<String>,
    pub noise_scale: f32,
    pub noise_scale_w: f32,
    pub length_scale: f32,
    pub dict_dir: Option<String>,
}

impl Default for OfflineTtsVitsModelConfig {
    fn default() -> Self {
        Self {
            model: None,
            lexicon: None,
            tokens: None,
            data_dir: None,
            noise_scale: 0.667,
            noise_scale_w: 0.8,
            length_scale: 1.0,
            dict_dir: None,
        }
    }
}

impl OfflineTtsVitsModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsVitsModelConfig {
        sys::OfflineTtsVitsModelConfig {
            model: to_c_ptr(&self.model, cstrings),
            lexicon: to_c_ptr(&self.lexicon, cstrings),
            tokens: to_c_ptr(&self.tokens, cstrings),
            data_dir: to_c_ptr(&self.data_dir, cstrings),
            noise_scale: self.noise_scale,
            noise_scale_w: self.noise_scale_w,
            length_scale: self.length_scale,
            dict_dir: to_c_ptr(&self.dict_dir, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Matcha model configuration.
pub struct OfflineTtsMatchaModelConfig {
    pub acoustic_model: Option<String>,
    pub vocoder: Option<String>,
    pub lexicon: Option<String>,
    pub tokens: Option<String>,
    pub data_dir: Option<String>,
    pub noise_scale: f32,
    pub length_scale: f32,
    pub dict_dir: Option<String>,
}

impl Default for OfflineTtsMatchaModelConfig {
    fn default() -> Self {
        Self {
            acoustic_model: None,
            vocoder: None,
            lexicon: None,
            tokens: None,
            data_dir: None,
            noise_scale: 0.667,
            length_scale: 1.0,
            dict_dir: None,
        }
    }
}

impl OfflineTtsMatchaModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsMatchaModelConfig {
        sys::OfflineTtsMatchaModelConfig {
            acoustic_model: to_c_ptr(&self.acoustic_model, cstrings),
            vocoder: to_c_ptr(&self.vocoder, cstrings),
            lexicon: to_c_ptr(&self.lexicon, cstrings),
            tokens: to_c_ptr(&self.tokens, cstrings),
            data_dir: to_c_ptr(&self.data_dir, cstrings),
            noise_scale: self.noise_scale,
            length_scale: self.length_scale,
            dict_dir: to_c_ptr(&self.dict_dir, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Kokoro model configuration.
pub struct OfflineTtsKokoroModelConfig {
    pub model: Option<String>,
    pub voices: Option<String>,
    pub tokens: Option<String>,
    pub data_dir: Option<String>,
    pub length_scale: f32,
    pub dict_dir: Option<String>,
    pub lexicon: Option<String>,
    pub lang: Option<String>,
}

impl Default for OfflineTtsKokoroModelConfig {
    fn default() -> Self {
        Self {
            model: None,
            voices: None,
            tokens: None,
            data_dir: None,
            length_scale: 1.0,
            dict_dir: None,
            lexicon: None,
            lang: None,
        }
    }
}

impl OfflineTtsKokoroModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsKokoroModelConfig {
        sys::OfflineTtsKokoroModelConfig {
            model: to_c_ptr(&self.model, cstrings),
            voices: to_c_ptr(&self.voices, cstrings),
            tokens: to_c_ptr(&self.tokens, cstrings),
            data_dir: to_c_ptr(&self.data_dir, cstrings),
            length_scale: self.length_scale,
            dict_dir: to_c_ptr(&self.dict_dir, cstrings),
            lexicon: to_c_ptr(&self.lexicon, cstrings),
            lang: to_c_ptr(&self.lang, cstrings),
        }
    }
}

#[derive(Clone, Debug)]
/// Kitten model configuration.
pub struct OfflineTtsKittenModelConfig {
    pub model: Option<String>,
    pub voices: Option<String>,
    pub tokens: Option<String>,
    pub data_dir: Option<String>,
    pub length_scale: f32,
}

impl Default for OfflineTtsKittenModelConfig {
    fn default() -> Self {
        Self {
            model: None,
            voices: None,
            tokens: None,
            data_dir: None,
            length_scale: 1.0,
        }
    }
}

impl OfflineTtsKittenModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsKittenModelConfig {
        sys::OfflineTtsKittenModelConfig {
            model: to_c_ptr(&self.model, cstrings),
            voices: to_c_ptr(&self.voices, cstrings),
            tokens: to_c_ptr(&self.tokens, cstrings),
            data_dir: to_c_ptr(&self.data_dir, cstrings),
            length_scale: self.length_scale,
        }
    }
}

#[derive(Clone, Debug)]
/// ZipVoice model configuration.
pub struct OfflineTtsZipvoiceModelConfig {
    pub tokens: Option<String>,
    pub encoder: Option<String>,
    pub decoder: Option<String>,
    pub vocoder: Option<String>,
    pub data_dir: Option<String>,
    pub lexicon: Option<String>,
    pub feat_scale: f32,
    pub t_shift: f32,
    pub target_rms: f32,
    pub guidance_scale: f32,
}

impl Default for OfflineTtsZipvoiceModelConfig {
    fn default() -> Self {
        Self {
            tokens: None,
            encoder: None,
            decoder: None,
            vocoder: None,
            data_dir: None,
            lexicon: None,
            feat_scale: 0.0,
            t_shift: 0.0,
            target_rms: 0.0,
            guidance_scale: 0.0,
        }
    }
}

impl OfflineTtsZipvoiceModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsZipvoiceModelConfig {
        sys::OfflineTtsZipvoiceModelConfig {
            tokens: to_c_ptr(&self.tokens, cstrings),
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
            vocoder: to_c_ptr(&self.vocoder, cstrings),
            data_dir: to_c_ptr(&self.data_dir, cstrings),
            lexicon: to_c_ptr(&self.lexicon, cstrings),
            feat_scale: self.feat_scale,
            t_shift: self.t_shift,
            target_rms: self.target_rms,
            guidance_scale: self.guidance_scale,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Pocket TTS model configuration.
pub struct OfflineTtsPocketModelConfig {
    pub lm_flow: Option<String>,
    pub lm_main: Option<String>,
    pub encoder: Option<String>,
    pub decoder: Option<String>,
    pub text_conditioner: Option<String>,
    pub vocab_json: Option<String>,
    pub token_scores_json: Option<String>,
    pub voice_embedding_cache_capacity: i32,
}

impl OfflineTtsPocketModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsPocketModelConfig {
        sys::OfflineTtsPocketModelConfig {
            lm_flow: to_c_ptr(&self.lm_flow, cstrings),
            lm_main: to_c_ptr(&self.lm_main, cstrings),
            encoder: to_c_ptr(&self.encoder, cstrings),
            decoder: to_c_ptr(&self.decoder, cstrings),
            text_conditioner: to_c_ptr(&self.text_conditioner, cstrings),
            vocab_json: to_c_ptr(&self.vocab_json, cstrings),
            token_scores_json: to_c_ptr(&self.token_scores_json, cstrings),
            voice_embedding_cache_capacity: self.voice_embedding_cache_capacity,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Supertonic model configuration.
pub struct OfflineTtsSupertonicModelConfig {
    pub duration_predictor: Option<String>,
    pub text_encoder: Option<String>,
    pub vector_estimator: Option<String>,
    pub vocoder: Option<String>,
    pub tts_json: Option<String>,
    pub unicode_indexer: Option<String>,
    pub voice_style: Option<String>,
}

impl OfflineTtsSupertonicModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsSupertonicModelConfig {
        sys::OfflineTtsSupertonicModelConfig {
            duration_predictor: to_c_ptr(&self.duration_predictor, cstrings),
            text_encoder: to_c_ptr(&self.text_encoder, cstrings),
            vector_estimator: to_c_ptr(&self.vector_estimator, cstrings),
            vocoder: to_c_ptr(&self.vocoder, cstrings),
            tts_json: to_c_ptr(&self.tts_json, cstrings),
            unicode_indexer: to_c_ptr(&self.unicode_indexer, cstrings),
            voice_style: to_c_ptr(&self.voice_style, cstrings),
        }
    }
}

// --- Aggregate config structs ---

#[derive(Clone, Debug, Default)]
/// Aggregate model configuration for [`OfflineTts`].
///
/// Configure exactly one model family for typical use.
pub struct OfflineTtsModelConfig {
    pub vits: OfflineTtsVitsModelConfig,
    pub matcha: OfflineTtsMatchaModelConfig,
    pub kokoro: OfflineTtsKokoroModelConfig,
    pub kitten: OfflineTtsKittenModelConfig,
    pub zipvoice: OfflineTtsZipvoiceModelConfig,
    pub pocket: OfflineTtsPocketModelConfig,
    pub supertonic: OfflineTtsSupertonicModelConfig,
    pub num_threads: i32,
    pub debug: bool,
    pub provider: Option<String>,
}

impl OfflineTtsModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsModelConfig {
        sys::OfflineTtsModelConfig {
            vits: self
                .vits
                .to_sys(cstrings),
            num_threads: self.num_threads,
            debug: self.debug as i32,
            provider: to_c_ptr(&self.provider, cstrings),
            matcha: self
                .matcha
                .to_sys(cstrings),
            kokoro: self
                .kokoro
                .to_sys(cstrings),
            kitten: self
                .kitten
                .to_sys(cstrings),
            zipvoice: self
                .zipvoice
                .to_sys(cstrings),
            pocket: self
                .pocket
                .to_sys(cstrings),
            supertonic: self
                .supertonic
                .to_sys(cstrings),
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Top-level configuration for [`OfflineTts`].
pub struct OfflineTtsConfig {
    pub model: OfflineTtsModelConfig,
    pub rule_fsts: Option<String>,
    pub max_num_sentences: i32,
    pub rule_fars: Option<String>,
    pub silence_scale: f32,
}

impl OfflineTtsConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::OfflineTtsConfig {
        sys::OfflineTtsConfig {
            model: self
                .model
                .to_sys(cstrings),
            rule_fsts: to_c_ptr(&self.rule_fsts, cstrings),
            max_num_sentences: self.max_num_sentences,
            rule_fars: to_c_ptr(&self.rule_fars, cstrings),
            silence_scale: self.silence_scale,
        }
    }
}

// --- Generation config ---

#[derive(Clone, Debug)]
/// Per-request generation options for [`OfflineTts::generate_with_config`].
pub struct GenerationConfig {
    pub silence_scale: f32,
    pub speed: f32,
    pub sid: i32,
    pub reference_audio: Option<Vec<f32>>,
    pub reference_sample_rate: i32,
    pub reference_text: Option<String>,
    pub num_steps: i32,
    pub extra: Option<HashMap<String, serde_json::Value>>,
}

impl Default for GenerationConfig {
    fn default() -> Self {
        Self {
            silence_scale: 0.2,
            speed: 1.0,
            sid: 0,
            reference_audio: None,
            reference_sample_rate: 0,
            reference_text: None,
            num_steps: 5,
            extra: None,
        }
    }
}

// --- Generated audio ---

/// Generated audio returned by [`OfflineTts::generate_with_config`].
pub struct GeneratedAudio {
    ptr: *const sys::SherpaOnnxGeneratedAudio,
}

impl GeneratedAudio {
    /// Borrow generated samples.
    pub fn samples(&self) -> &[f32] {
        unsafe {
            let p = &*self.ptr;
            if p.samples
                .is_null()
                || p.n <= 0
            {
                &[]
            } else {
                slice::from_raw_parts(p.samples, p.n as usize)
            }
        }
    }

    /// Return the output sample rate in Hz.
    pub fn sample_rate(&self) -> i32 {
        unsafe { (*self.ptr).sample_rate }
    }

    /// Save generated audio to a WAV file.
    pub fn save(&self, filename: &str) -> bool {
        crate::wave::write(filename, self.samples(), self.sample_rate())
    }
}

impl Drop for GeneratedAudio {
    fn drop(&mut self) {
        unsafe {
            if !self
                .ptr
                .is_null()
            {
                sys::SherpaOnnxDestroyOfflineTtsGeneratedAudio(self.ptr);
            }
        }
    }
}

// --- Offline TTS ---

/// Offline TTS engine.
///
/// ```no_run
/// use sherpa_onnx::{
///     OfflineTts, OfflineTtsConfig, OfflineTtsModelConfig, OfflineTtsPocketModelConfig,
/// };
///
/// let config = OfflineTtsConfig {
///     model: OfflineTtsModelConfig {
///         pocket: OfflineTtsPocketModelConfig {
///             lm_flow: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx".into()),
///             lm_main: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx".into()),
///             encoder: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx".into()),
///             decoder: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx".into()),
///             text_conditioner: Some(
///                 "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx".into(),
///             ),
///             vocab_json: Some("./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json".into()),
///             token_scores_json: Some(
///                 "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json".into(),
///             ),
///             ..Default::default()
///         },
///         ..Default::default()
///     },
///     ..Default::default()
/// };
///
/// let tts = OfflineTts::create(&config).expect("create tts");
/// println!("{}", tts.sample_rate());
/// ```
pub struct OfflineTts {
    ptr: *const sys::SherpaOnnxOfflineTts,
}

unsafe impl Send for OfflineTts {}

impl OfflineTts {
    /// Create a TTS engine from `config`.
    pub fn create(config: &OfflineTtsConfig) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);
        let ptr = unsafe { sys::SherpaOnnxCreateOfflineTts(&sys_config) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Return the output sample rate in Hz.
    pub fn sample_rate(&self) -> i32 {
        unsafe { sys::SherpaOnnxOfflineTtsSampleRate(self.ptr) }
    }

    /// Return the number of built-in speakers reported by the model.
    pub fn num_speakers(&self) -> i32 {
        unsafe { sys::SherpaOnnxOfflineTtsNumSpeakers(self.ptr) }
    }

    /// Generate audio for `text`.
    ///
    /// The optional callback receives the samples generated so far together
    /// with a progress value in `[0, 1]`. Return `true` to continue and
    /// `false` to stop early.
    pub fn generate_with_config<F>(
        &self,
        text: &str,
        config: &GenerationConfig,
        callback: Option<F>,
    ) -> Option<GeneratedAudio>
    where
        F: FnMut(&[f32], f32) -> bool + 'static,
    {
        let mut cstrings = Vec::new();

        let c_text = CString::new(text).unwrap();

        // Build extra JSON string
        let extra_json = match &config.extra {
            Some(map) => serde_json::to_string(map).unwrap_or_else(|_| "{}".to_string()),
            None => "{}".to_string(),
        };
        let c_extra = CString::new(extra_json).unwrap();
        let c_ref_text = to_c_ptr(&config.reference_text, &mut cstrings);

        let (ref_ptr, ref_len) = match &config.reference_audio {
            Some(samples) => (samples.as_ptr(), samples.len() as i32),
            None => (ptr::null(), 0),
        };

        let sys_gen_config = sys::SherpaOnnxGenerationConfig {
            silence_scale: config.silence_scale,
            speed: config.speed,
            sid: config.sid,
            reference_audio: ref_ptr,
            reference_audio_len: ref_len,
            reference_sample_rate: config.reference_sample_rate,
            reference_text: c_ref_text,
            num_steps: config.num_steps,
            extra: c_extra.as_ptr(),
        };

        let (c_callback, c_arg): (
            sys::SherpaOnnxGeneratedAudioProgressCallbackWithArg,
            *mut c_void,
        ) = if let Some(cb) = callback {
            let boxed: Box<BoxedProgressCallback> = Box::new(Box::new(cb));
            let raw = Box::into_raw(boxed);
            (Some(progress_callback_trampoline), raw as *mut c_void)
        } else {
            (None, ptr::null_mut())
        };

        let audio_ptr = unsafe {
            sys::SherpaOnnxOfflineTtsGenerateWithConfig(
                self.ptr,
                c_text.as_ptr(),
                &sys_gen_config,
                c_callback,
                c_arg,
            )
        };

        // Clean up the boxed callback if we allocated one
        if !c_arg.is_null() {
            unsafe {
                let _ = Box::from_raw(c_arg as *mut BoxedProgressCallback);
            }
        }

        if audio_ptr.is_null() {
            None
        } else {
            Some(GeneratedAudio { ptr: audio_ptr })
        }
    }
}

impl Drop for OfflineTts {
    fn drop(&mut self) {
        unsafe {
            sys::SherpaOnnxDestroyOfflineTts(self.ptr);
        }
    }
}

unsafe extern "C" fn progress_callback_trampoline(
    samples: *const f32,
    n: i32,
    progress: f32,
    arg: *mut c_void,
) -> i32 {
    let cb = &mut *(arg as *mut BoxedProgressCallback);
    let data = if samples.is_null() || n <= 0 {
        &[]
    } else {
        slice::from_raw_parts(samples, n as usize)
    };
    if cb(data, progress) {
        1
    } else {
        0
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/utils.rs
================================================
use sherpa_onnx_sys as sys;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
use std::ptr;

/// Safely convert a C string pointer to a `'static` Rust string slice.
///
/// If the pointer is null, an empty string is returned.
/// If the C string is not valid UTF-8, a lossy UTF-8 conversion is used
/// and the resulting string is leaked to obtain a `'static` lifetime.
fn c_str_to_static_str(ptr: *const c_char) -> &'static str {
    assert!(!ptr.is_null(), "C string pointer is null");

    unsafe {
        CStr::from_ptr(ptr)
            .to_str()
            .unwrap()
    }
}

/// Return the sherpa-onnx version string compiled into the native library.
pub fn version() -> &'static str {
    let ptr = unsafe { sys::SherpaOnnxGetVersionStr() };
    c_str_to_static_str(ptr)
}

/// Return the Git SHA1 of the native library build.
pub fn git_sha1() -> &'static str {
    let ptr = unsafe { sys::SherpaOnnxGetGitSha1() };
    c_str_to_static_str(ptr)
}

/// Return the Git date of the native library build.
pub fn git_date() -> &'static str {
    let ptr = unsafe { sys::SherpaOnnxGetGitDate() };
    c_str_to_static_str(ptr)
}

/// Return `true` if `filename` exists according to the native helper.
pub fn file_exists(filename: &str) -> bool {
    let cstr = match CString::new(filename) {
        Ok(cstr) => cstr,
        Err(_) => {
            // Invalid input (e.g., contains interior NUL); treat as non-existent.
            return false;
        }
    };

    unsafe { sys::SherpaOnnxFileExists(cstr.as_ptr()) != 0 }
}

pub(crate) fn to_c_ptr(opt: &Option<String>, storage: &mut Vec<CString>) -> *const c_char {
    if let Some(s) = opt {
        let c = CString::new(s.as_str()).unwrap();
        let ptr = c.as_ptr();
        storage.push(c);
        ptr
    } else {
        ptr::null()
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/vad.rs
================================================
//! Voice activity detection and buffering helpers.
//!
//! See `rust-api-examples/examples/silero_vad_remove_silence.rs` for a complete
//! example that removes non-speech segments from a WAV file.

use crate::utils::to_c_ptr;
use std::ffi::CString;
use std::slice;

use sherpa_onnx_sys as sys;

#[derive(Clone, Debug, Default)]
/// Silero VAD configuration.
pub struct SileroVadModelConfig {
    pub model: Option<String>,
    pub threshold: f32,
    pub min_silence_duration: f32,
    pub min_speech_duration: f32,
    pub window_size: i32,
    pub max_speech_duration: f32,
}

impl SileroVadModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::SileroVadModelConfig {
        sys::SileroVadModelConfig {
            model: to_c_ptr(&self.model, cstrings),
            threshold: self.threshold,
            min_silence_duration: self.min_silence_duration,
            min_speech_duration: self.min_speech_duration,
            window_size: self.window_size,
            max_speech_duration: self.max_speech_duration,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Ten VAD configuration.
pub struct TenVadModelConfig {
    pub model: Option<String>,
    pub threshold: f32,
    pub min_silence_duration: f32,
    pub min_speech_duration: f32,
    pub window_size: i32,
    pub max_speech_duration: f32,
}

impl TenVadModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::TenVadModelConfig {
        sys::TenVadModelConfig {
            model: to_c_ptr(&self.model, cstrings),
            threshold: self.threshold,
            min_silence_duration: self.min_silence_duration,
            min_speech_duration: self.min_speech_duration,
            window_size: self.window_size,
            max_speech_duration: self.max_speech_duration,
        }
    }
}

#[derive(Clone, Debug, Default)]
/// Top-level model configuration for [`VoiceActivityDetector`].
///
/// Configure exactly one model family for typical use.
pub struct VadModelConfig {
    pub silero_vad: SileroVadModelConfig,
    pub ten_vad: TenVadModelConfig,
    pub sample_rate: i32,
    pub num_threads: i32,
    pub provider: Option<String>,
    pub debug: bool,
}

impl VadModelConfig {
    fn to_sys(&self, cstrings: &mut Vec<CString>) -> sys::VadModelConfig {
        sys::VadModelConfig {
            silero_vad: self
                .silero_vad
                .to_sys(cstrings),
            ten_vad: self
                .ten_vad
                .to_sys(cstrings),
            sample_rate: self.sample_rate,
            num_threads: self.num_threads,
            provider: to_c_ptr(&self.provider, cstrings),
            debug: self.debug as i32,
        }
    }
}

/// Circular sample buffer used by some VAD workflows.
pub struct CircularBuffer {
    ptr: *const sys::CircularBuffer,
}

impl CircularBuffer {
    /// Create a new buffer with capacity measured in samples.
    pub fn new(capacity: i32) -> Option<Self> {
        let ptr = unsafe { sys::SherpaOnnxCreateCircularBuffer(capacity) };
        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Append samples to the tail of the buffer.
    pub fn push(&self, samples: &[f32]) {
        unsafe {
            sys::SherpaOnnxCircularBufferPush(self.ptr, samples.as_ptr(), samples.len() as i32)
        }
    }

    /// Copy `n` samples starting at `start_index`.
    pub fn get(&self, start_index: i32, n: i32) -> Vec<f32> {
        unsafe {
            let p = sys::SherpaOnnxCircularBufferGet(self.ptr, start_index, n);
            if p.is_null() {
                return vec![];
            }
            let slice = slice::from_raw_parts(p, n as usize);
            let result = slice.to_vec();
            sys::SherpaOnnxCircularBufferFree(p);
            result
        }
    }

    /// Drop `n` samples from the head of the buffer.
    pub fn pop(&self, n: i32) {
        unsafe { sys::SherpaOnnxCircularBufferPop(self.ptr, n) }
    }

    /// Return the number of samples currently stored.
    pub fn size(&self) -> i32 {
        unsafe { sys::SherpaOnnxCircularBufferSize(self.ptr) }
    }

    /// Return the logical head position.
    pub fn head(&self) -> i32 {
        unsafe { sys::SherpaOnnxCircularBufferHead(self.ptr) }
    }

    /// Clear the buffer.
    pub fn reset(&self) {
        unsafe { sys::SherpaOnnxCircularBufferReset(self.ptr) }
    }
}

impl Drop for CircularBuffer {
    fn drop(&mut self) {
        unsafe { sys::SherpaOnnxDestroyCircularBuffer(self.ptr) }
    }
}

/// One detected speech segment.
pub struct SpeechSegment {
    ptr: *const sys::SpeechSegment,
}

impl SpeechSegment {
    /// Start index, in samples, relative to the input seen so far.
    pub fn start(&self) -> i32 {
        unsafe { (*self.ptr).start }
    }

    /// Borrow the segment samples.
    pub fn samples(&self) -> &[f32] {
        unsafe { slice::from_raw_parts((*self.ptr).samples, (*self.ptr).n as usize) }
    }

    /// Return the number of samples in the segment.
    pub fn n(&self) -> i32 {
        unsafe { (*self.ptr).n }
    }
}

impl Drop for SpeechSegment {
    fn drop(&mut self) {
        unsafe { sys::SherpaOnnxDestroySpeechSegment(self.ptr) }
    }
}

/// Voice activity detector that emits speech segments.
pub struct VoiceActivityDetector {
    ptr: *const sys::VoiceActivityDetector,
}

impl VoiceActivityDetector {
    /// Create a detector and an internal result buffer.
    pub fn create(config: &VadModelConfig, buffer_size_in_seconds: f32) -> Option<Self> {
        let mut cstrings = Vec::new();
        let sys_config = config.to_sys(&mut cstrings);

        let ptr = unsafe {
            sys::SherpaOnnxCreateVoiceActivityDetector(&sys_config, buffer_size_in_seconds)
        };

        if ptr.is_null() {
            None
        } else {
            Some(Self { ptr })
        }
    }

    /// Feed waveform samples to the detector.
    pub fn accept_waveform(&self, samples: &[f32]) {
        unsafe {
            sys::SherpaOnnxVoiceActivityDetectorAcceptWaveform(
                self.ptr,
                samples.as_ptr(),
                samples.len() as i32,
            )
        }
    }

    /// Return `true` if there are no queued speech segments.
    pub fn is_empty(&self) -> bool {
        unsafe { sys::SherpaOnnxVoiceActivityDetectorEmpty(self.ptr) != 0 }
    }

    /// Return `true` if speech is currently being detected.
    pub fn detected(&self) -> bool {
        unsafe { sys::SherpaOnnxVoiceActivityDetectorDetected(self.ptr) != 0 }
    }

    /// Drop the front speech segment, if any.
    pub fn pop(&self) {
        unsafe { sys::SherpaOnnxVoiceActivityDetectorPop(self.ptr) }
    }

    /// Remove all queued segments.
    pub fn clear(&self) {
        unsafe { sys::SherpaOnnxVoiceActivityDetectorClear(self.ptr) }
    }

    /// Borrow the front speech segment, if available.
    pub fn front(&self) -> Option<SpeechSegment> {
        if self.is_empty() {
            return None;
        }

        unsafe {
            let ptr = sys::SherpaOnnxVoiceActivityDetectorFront(self.ptr);
            if ptr.is_null() {
                None
            } else {
                Some(SpeechSegment { ptr })
            }
        }
    }

    /// Reset the detector state.
    pub fn reset(&self) {
        unsafe { sys::SherpaOnnxVoiceActivityDetectorReset(self.ptr) }
    }

    /// Flush any buffered trailing speech into the output queue.
    pub fn flush(&self) {
        unsafe { sys::SherpaOnnxVoiceActivityDetectorFlush(self.ptr) }
    }
}

impl Drop for VoiceActivityDetector {
    fn drop(&mut self) {
        unsafe { sys::SherpaOnnxDestroyVoiceActivityDetector(self.ptr) }
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx/src/wave.rs
================================================
//! WAV file helpers used by the Rust wrappers and examples.

use std::ffi::CString;
use std::slice;

use sherpa_onnx_sys as sys;

#[derive(Debug)]
/// A WAV file loaded through sherpa-onnx.
///
/// Samples are exposed as normalized `f32` PCM values. Use [`Wave::read`] to
/// load a file and [`Wave::write`] or [`write()`] to save audio.
pub struct Wave {
    inner: *const sys::SherpaOnnxWave,
}

impl Wave {
    /// Read a mono WAV file from disk.
    ///
    /// Returns `None` if the file cannot be opened or decoded.
    pub fn read(filename: &str) -> Option<Self> {
        let c_filename = CString::new(filename).unwrap();
        let wave_ptr = unsafe { sys::SherpaOnnxReadWave(c_filename.as_ptr()) };
        if wave_ptr.is_null() {
            None
        } else {
            Some(Self { inner: wave_ptr })
        }
    }

    /// Write this waveform to a WAV file.
    pub fn write(&self, filename: &str) -> bool {
        let c_filename = CString::new(filename).unwrap();
        unsafe {
            sys::SherpaOnnxWriteWave(
                (*self.inner).samples,
                (*self.inner).num_samples,
                (*self.inner).sample_rate,
                c_filename.as_ptr(),
            ) == 1
        }
    }

    /// Return the sample rate in Hz.
    pub fn sample_rate(&self) -> i32 {
        unsafe { (*self.inner).sample_rate }
    }

    /// Return the number of samples in the waveform.
    pub fn num_samples(&self) -> i32 {
        unsafe { (*self.inner).num_samples }
    }

    /// Return the normalized PCM samples.
    pub fn samples(&self) -> &[f32] {
        unsafe {
            let ptr = (*self.inner).samples;
            let len = (*self.inner).num_samples as usize;

            if ptr.is_null() || len == 0 {
                &[]
            } else {
                slice::from_raw_parts(ptr, len)
            }
        }
    }
}

impl Drop for Wave {
    fn drop(&mut self) {
        unsafe {
            if !self
                .inner
                .is_null()
            {
                sys::SherpaOnnxFreeWave(self.inner);
            }
        }
    }
}

/// Write normalized PCM samples to a WAV file.
///
/// This is convenient when an API returns a plain `Vec<f32>` and you do not
/// need to build a [`Wave`] first.
pub fn write(filename: &str, samples: &[f32], sample_rate: i32) -> bool {
    let c_filename = CString::new(filename).unwrap();
    unsafe {
        sys::SherpaOnnxWriteWave(
            samples.as_ptr(),
            samples.len() as i32,
            sample_rate,
            c_filename.as_ptr(),
        ) == 1
    }
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/Cargo.toml
================================================
[package]
name = "sherpa-onnx-sys"
version = "1.12.31"
edition = "2021"
description = "Raw FFI bindings to the sherpa-onnx C API"
license = "Apache-2.0"
repository = "https://github.com/k2-fsa/sherpa-onnx"
readme = "README.md"
links = "sherpa-onnx"

keywords = ["ffi", "speech", "sherpa-onnx", "bindings"]
categories = ["external-ffi-bindings"]

include = [
    "src/**",
    "build.rs",
    "Cargo.toml",
    "README.md",
    "LICENSE*",
]


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/build.rs
================================================
use std::env;

fn main() {
    // Try to get library directory from environment variable
    let lib_dir = env::var("SHERPA_ONNX_LIB_DIR").ok();

    match &lib_dir {
        Some(path) => {
            println!("cargo:warning=SHERPA_ONNX_LIB_DIR={}", path);

            // Tell Rust/Cargo where to find the libraries at build time
            println!("cargo:rustc-link-search=native={}", path);

            // Add rpath for Linux/macOS
            if cfg!(any(target_os = "linux", target_os = "macos")) {
                println!("cargo:rustc-link-arg=-Wl,-rpath,{}", path);
            }
        }
        None => {
            println!("cargo:warning=SHERPA_ONNX_LIB_DIR not set. You may need to set it to the folder containing libsherpa-onnx-c-api and libonnxruntime.");
        }
    }

    // Link the dynamic libraries regardless (cargo will fail later if not found)
    println!("cargo:rustc-link-lib=dylib=sherpa-onnx-c-api");
    println!("cargo:rustc-link-lib=dylib=onnxruntime");

    // Rebuild if the env variable changes
    println!("cargo:rerun-if-env-changed=SHERPA_ONNX_LIB_DIR");
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/audio_tagging.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::{c_char, c_float};

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineZipformerAudioTaggingModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct AudioTaggingModelConfig {
    pub zipformer: OfflineZipformerAudioTaggingModelConfig,
    pub ced: *const c_char,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct AudioTaggingConfig {
    pub model: AudioTaggingModelConfig,
    pub labels: *const c_char,
    pub top_k: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct AudioEvent {
    pub name: *const c_char,
    pub index: i32,
    pub prob: c_float,
}

#[repr(C)]
pub struct AudioTagging {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateAudioTagging(config: *const AudioTaggingConfig) -> *const AudioTagging;

    pub fn SherpaOnnxDestroyAudioTagging(tagger: *const AudioTagging);

    pub fn SherpaOnnxAudioTaggingCreateOfflineStream(
        tagger: *const AudioTagging,
    ) -> *const crate::offline_asr::OfflineStream;

    pub fn SherpaOnnxAudioTaggingCompute(
        tagger: *const AudioTagging,
        s: *const crate::offline_asr::OfflineStream,
        top_k: i32,
    ) -> *const *const AudioEvent;

    pub fn SherpaOnnxAudioTaggingFreeResults(p: *const *const AudioEvent);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/kws.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::{c_char, c_float};

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct KeywordResult {
    pub keyword: *const c_char,
    pub tokens: *const c_char,
    pub tokens_arr: *const *const c_char,
    pub count: i32,
    pub timestamps: *mut c_float,
    pub start_time: c_float,
    pub json: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct KeywordSpotterConfig {
    pub feat_config: super::online_asr::FeatureConfig,
    pub model_config: super::online_asr::OnlineModelConfig,
    pub max_active_paths: i32,
    pub num_trailing_blanks: i32,
    pub keywords_score: c_float,
    pub keywords_threshold: c_float,
    pub keywords_file: *const c_char,
    pub keywords_buf: *const c_char,
    pub keywords_buf_size: i32,
}

#[repr(C)]
pub struct KeywordSpotter {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateKeywordSpotter(
        config: *const KeywordSpotterConfig,
    ) -> *const KeywordSpotter;

    pub fn SherpaOnnxDestroyKeywordSpotter(spotter: *const KeywordSpotter);

    pub fn SherpaOnnxCreateKeywordStream(
        spotter: *const KeywordSpotter,
    ) -> *const super::online_asr::OnlineStream;

    pub fn SherpaOnnxCreateKeywordStreamWithKeywords(
        spotter: *const KeywordSpotter,
        keywords: *const c_char,
    ) -> *const super::online_asr::OnlineStream;

    pub fn SherpaOnnxIsKeywordStreamReady(
        spotter: *const KeywordSpotter,
        stream: *const super::online_asr::OnlineStream,
    ) -> i32;

    pub fn SherpaOnnxDecodeKeywordStream(
        spotter: *const KeywordSpotter,
        stream: *const super::online_asr::OnlineStream,
    );

    pub fn SherpaOnnxResetKeywordStream(
        spotter: *const KeywordSpotter,
        stream: *const super::online_asr::OnlineStream,
    );

    pub fn SherpaOnnxDecodeMultipleKeywordStreams(
        spotter: *const KeywordSpotter,
        streams: *const *const super::online_asr::OnlineStream,
        n: i32,
    );

    pub fn SherpaOnnxGetKeywordResult(
        spotter: *const KeywordSpotter,
        stream: *const super::online_asr::OnlineStream,
    ) -> *const KeywordResult;

    pub fn SherpaOnnxDestroyKeywordResult(r: *const KeywordResult);

    pub fn SherpaOnnxGetKeywordResultAsJson(
        spotter: *const KeywordSpotter,
        stream: *const super::online_asr::OnlineStream,
    ) -> *const c_char;

    pub fn SherpaOnnxFreeKeywordResultJson(s: *const c_char);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/lib.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::c_char;

extern "C" {
    pub fn SherpaOnnxGetVersionStr() -> *const c_char;
    pub fn SherpaOnnxGetGitSha1() -> *const c_char;
    pub fn SherpaOnnxGetGitDate() -> *const c_char;
    pub fn SherpaOnnxFileExists(filename: *const c_char) -> i32;
}

pub mod audio_tagging;
pub mod kws;
pub mod offline_asr;
pub mod offline_punctuation;
pub mod offline_speaker_diarization;
pub mod online_asr;
pub mod online_punctuation;
pub mod speaker_embedding;
pub mod speech_denoiser;
pub mod spoken_language_identification;
pub mod tts;
pub mod vad;
pub mod wave;

pub use audio_tagging::*;
pub use kws::*;
pub use offline_asr::*;
pub use offline_punctuation::*;
pub use offline_speaker_diarization::*;
pub use online_asr::*;
pub use online_punctuation::*;
pub use speaker_embedding::*;
pub use speech_denoiser::*;
pub use spoken_language_identification::*;
pub use tts::*;
pub use vad::*;
pub use wave::*;


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/offline_asr.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::{c_char, c_float};

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTransducerModelConfig {
    pub encoder: *const c_char,
    pub decoder: *const c_char,
    pub joiner: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineParaformerModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineNemoEncDecCtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineWhisperModelConfig {
    pub encoder: *const c_char,
    pub decoder: *const c_char,
    pub language: *const c_char,
    pub task: *const c_char,
    pub tail_paddings: i32,
    pub enable_token_timestamps: i32,
    pub enable_segment_timestamps: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineCanaryModelConfig {
    pub encoder: *const c_char,
    pub decoder: *const c_char,
    pub src_lang: *const c_char,
    pub tgt_lang: *const c_char,
    pub use_pnc: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineFireRedAsrModelConfig {
    pub encoder: *const c_char,
    pub decoder: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineMoonshineModelConfig {
    pub preprocessor: *const c_char,
    pub encoder: *const c_char,
    pub uncached_decoder: *const c_char,
    pub cached_decoder: *const c_char,
    pub merged_decoder: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTdnnModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineLMConfig {
    pub model: *const c_char,
    pub scale: c_float,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSenseVoiceModelConfig {
    pub model: *const c_char,
    pub language: *const c_char,
    pub use_itn: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineDolphinModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineZipformerCtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineWenetCtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineOmnilingualAsrCtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineFunASRNanoModelConfig {
    pub encoder_adaptor: *const c_char,
    pub llm: *const c_char,
    pub embedding: *const c_char,
    pub tokenizer: *const c_char,
    pub system_prompt: *const c_char,
    pub user_prompt: *const c_char,
    pub max_new_tokens: i32,
    pub temperature: c_float,
    pub top_p: c_float,
    pub seed: i32,
    pub language: *const c_char,
    pub itn: i32,
    pub hotwords: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineMedAsrCtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineFireRedAsrCtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineModelConfig {
    pub transducer: OfflineTransducerModelConfig,
    pub paraformer: OfflineParaformerModelConfig,
    pub nemo_ctc: OfflineNemoEncDecCtcModelConfig,
    pub whisper: OfflineWhisperModelConfig,
    pub tdnn: OfflineTdnnModelConfig,

    pub tokens: *const c_char,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
    pub model_type: *const c_char,
    pub modeling_unit: *const c_char,
    pub bpe_vocab: *const c_char,
    pub telespeech_ctc: *const c_char,

    pub sense_voice: OfflineSenseVoiceModelConfig,
    pub moonshine: OfflineMoonshineModelConfig,
    pub fire_red_asr: OfflineFireRedAsrModelConfig,
    pub dolphin: OfflineDolphinModelConfig,
    pub zipformer_ctc: OfflineZipformerCtcModelConfig,
    pub canary: OfflineCanaryModelConfig,
    pub wenet_ctc: OfflineWenetCtcModelConfig,
    pub omnilingual: OfflineOmnilingualAsrCtcModelConfig,
    pub medasr: OfflineMedAsrCtcModelConfig,
    pub funasr_nano: OfflineFunASRNanoModelConfig,
    pub fire_red_asr_ctc: OfflineFireRedAsrCtcModelConfig,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineRecognizerConfig {
    pub feat_config: super::online_asr::FeatureConfig,
    pub model_config: OfflineModelConfig,
    pub lm_config: OfflineLMConfig,

    pub decoding_method: *const c_char,
    pub max_active_paths: i32,
    pub hotwords_file: *const c_char,
    pub hotwords_score: c_float,
    pub rule_fsts: *const c_char,
    pub rule_fars: *const c_char,
    pub blank_penalty: c_float,
    pub hr: super::online_asr::HomophoneReplacerConfig,
}

#[repr(C)]
pub struct OfflineRecognizer {
    _private: [u8; 0],
}

#[repr(C)]
pub struct OfflineStream {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateOfflineRecognizer(
        config: *const OfflineRecognizerConfig,
    ) -> *const OfflineRecognizer;

    pub fn SherpaOnnxDestroyOfflineRecognizer(recognizer: *const OfflineRecognizer);

    pub fn SherpaOnnxCreateOfflineStream(
        recognizer: *const OfflineRecognizer,
    ) -> *const OfflineStream;

    pub fn SherpaOnnxCreateOfflineStreamWithHotwords(
        recognizer: *const OfflineRecognizer,
        hotwords: *const c_char,
    ) -> *const OfflineStream;

    pub fn SherpaOnnxDestroyOfflineStream(stream: *const OfflineStream);

    pub fn SherpaOnnxAcceptWaveformOffline(
        stream: *const OfflineStream,
        sample_rate: i32,
        samples: *const f32,
        n: i32,
    );

    pub fn SherpaOnnxOfflineStreamSetOption(
        stream: *const OfflineStream,
        key: *const c_char,
        value: *const c_char,
    );

    pub fn SherpaOnnxOfflineStreamGetOption(
        stream: *const OfflineStream,
        key: *const c_char,
    ) -> *const c_char;

    pub fn SherpaOnnxOfflineStreamHasOption(
        stream: *const OfflineStream,
        key: *const c_char,
    ) -> i32;

    pub fn SherpaOnnxDecodeOfflineStream(
        recognizer: *const OfflineRecognizer,
        stream: *const OfflineStream,
    );

    pub fn SherpaOnnxDecodeMultipleOfflineStreams(
        recognizer: *const OfflineRecognizer,
        streams: *const *const OfflineStream,
        n: i32,
    );

    pub fn SherpaOnnxGetOfflineStreamResultAsJson(stream: *const OfflineStream) -> *const c_char;

    pub fn SherpaOnnxDestroyOfflineStreamResultJson(s: *const c_char);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/offline_punctuation.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::c_char;

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflinePunctuationModelConfig {
    pub ct_transformer: *const c_char,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflinePunctuationConfig {
    pub model: OfflinePunctuationModelConfig,
}

#[repr(C)]
pub struct OfflinePunctuation {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateOfflinePunctuation(
        config: *const OfflinePunctuationConfig,
    ) -> *const OfflinePunctuation;

    pub fn SherpaOnnxDestroyOfflinePunctuation(punct: *const OfflinePunctuation);

    pub fn SherpaOfflinePunctuationAddPunct(
        punct: *const OfflinePunctuation,
        text: *const c_char,
    ) -> *const c_char;

    pub fn SherpaOfflinePunctuationFreeText(text: *const c_char);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/offline_speaker_diarization.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::{c_char, c_float};

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSpeakerSegmentationPyannoteModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSpeakerSegmentationModelConfig {
    pub pyannote: OfflineSpeakerSegmentationPyannoteModelConfig,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct FastClusteringConfig {
    pub num_clusters: i32,
    pub threshold: c_float,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSpeakerDiarizationConfig {
    pub segmentation: OfflineSpeakerSegmentationModelConfig,
    pub embedding: crate::speaker_embedding::SpeakerEmbeddingExtractorConfig,
    pub clustering: FastClusteringConfig,
    pub min_duration_on: c_float,
    pub min_duration_off: c_float,
}

#[repr(C)]
pub struct OfflineSpeakerDiarization {
    _private: [u8; 0],
}

#[repr(C)]
pub struct OfflineSpeakerDiarizationResult {
    _private: [u8; 0],
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSpeakerDiarizationSegment {
    pub start: c_float,
    pub end: c_float,
    pub speaker: i32,
}

extern "C" {
    pub fn SherpaOnnxCreateOfflineSpeakerDiarization(
        config: *const OfflineSpeakerDiarizationConfig,
    ) -> *const OfflineSpeakerDiarization;

    pub fn SherpaOnnxDestroyOfflineSpeakerDiarization(sd: *const OfflineSpeakerDiarization);

    pub fn SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(
        sd: *const OfflineSpeakerDiarization,
    ) -> i32;

    pub fn SherpaOnnxOfflineSpeakerDiarizationSetConfig(
        sd: *const OfflineSpeakerDiarization,
        config: *const OfflineSpeakerDiarizationConfig,
    );

    pub fn SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers(
        r: *const OfflineSpeakerDiarizationResult,
    ) -> i32;

    pub fn SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(
        r: *const OfflineSpeakerDiarizationResult,
    ) -> i32;

    pub fn SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
        r: *const OfflineSpeakerDiarizationResult,
    ) -> *const OfflineSpeakerDiarizationSegment;

    pub fn SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
        s: *const OfflineSpeakerDiarizationSegment,
    );

    pub fn SherpaOnnxOfflineSpeakerDiarizationProcess(
        sd: *const OfflineSpeakerDiarization,
        samples: *const c_float,
        n: i32,
    ) -> *const OfflineSpeakerDiarizationResult;

    pub fn SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
        r: *const OfflineSpeakerDiarizationResult,
    );
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/online_asr.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::{c_char, c_float};

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineTransducerModelConfig {
    pub encoder: *const c_char,
    pub decoder: *const c_char,
    pub joiner: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineParaformerModelConfig {
    pub encoder: *const c_char,
    pub decoder: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineZipformer2CtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineNemoCtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineToneCtcModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineModelConfig {
    pub transducer: OnlineTransducerModelConfig,
    pub paraformer: OnlineParaformerModelConfig,
    pub zipformer2_ctc: OnlineZipformer2CtcModelConfig,

    pub tokens: *const c_char,
    pub num_threads: i32,
    pub provider: *const c_char,
    pub debug: i32,

    pub model_type: *const c_char,

    // cjkchar | bpe | cjkchar+bpe
    pub modeling_unit: *const c_char,

    pub bpe_vocab: *const c_char,

    pub tokens_buf: *const u8,
    pub tokens_buf_size: i32,

    pub nemo_ctc: OnlineNemoCtcModelConfig,
    pub t_one_ctc: OnlineToneCtcModelConfig,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct FeatureConfig {
    pub sample_rate: i32,
    pub feature_dim: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineCtcFstDecoderConfig {
    pub graph: *const c_char,
    pub max_active: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct HomophoneReplacerConfig {
    pub dict_dir: *const c_char,
    pub lexicon: *const c_char,
    pub rule_fsts: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineRecognizerConfig {
    pub feat_config: FeatureConfig,
    pub model_config: OnlineModelConfig,

    // greedy_search | modified_beam_search
    pub decoding_method: *const c_char,

    pub max_active_paths: i32,

    pub enable_endpoint: i32,

    pub rule1_min_trailing_silence: c_float,
    pub rule2_min_trailing_silence: c_float,
    pub rule3_min_utterance_length: c_float,

    pub hotwords_file: *const c_char,
    pub hotwords_score: c_float,

    pub ctc_fst_decoder_config: OnlineCtcFstDecoderConfig,

    pub rule_fsts: *const c_char,
    pub rule_fars: *const c_char,

    pub blank_penalty: c_float,

    pub hotwords_buf: *const u8,
    pub hotwords_buf_size: i32,

    pub hr: HomophoneReplacerConfig,
}

#[repr(C)]
pub struct OnlineRecognizer {
    _private: [u8; 0],
}

#[repr(C)]
pub struct OnlineStream {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateOnlineRecognizer(
        config: *const OnlineRecognizerConfig,
    ) -> *const OnlineRecognizer;

    pub fn SherpaOnnxDestroyOnlineRecognizer(recognizer: *const OnlineRecognizer);

    pub fn SherpaOnnxCreateOnlineStream(recognizer: *const OnlineRecognizer)
        -> *const OnlineStream;

    pub fn SherpaOnnxCreateOnlineStreamWithHotwords(
        recognizer: *const OnlineRecognizer,
        hotwords: *const c_char,
    ) -> *const OnlineStream;

    pub fn SherpaOnnxDestroyOnlineStream(stream: *const OnlineStream);

    pub fn SherpaOnnxOnlineStreamAcceptWaveform(
        stream: *const OnlineStream,
        sample_rate: i32,
        samples: *const f32,
        n: i32,
    );

    pub fn SherpaOnnxIsOnlineStreamReady(
        recognizer: *const OnlineRecognizer,
        stream: *const OnlineStream,
    ) -> i32;

    pub fn SherpaOnnxDecodeOnlineStream(
        recognizer: *const OnlineRecognizer,
        stream: *const OnlineStream,
    );

    pub fn SherpaOnnxDecodeMultipleOnlineStreams(
        recognizer: *const OnlineRecognizer,
        streams: *const *const OnlineStream,
        n: i32,
    );

    pub fn SherpaOnnxGetOnlineStreamResultAsJson(
        recognizer: *const OnlineRecognizer,
        stream: *const OnlineStream,
    ) -> *const c_char;

    pub fn SherpaOnnxDestroyOnlineStreamResultJson(s: *const c_char);

    pub fn SherpaOnnxOnlineStreamReset(
        recognizer: *const OnlineRecognizer,
        stream: *const OnlineStream,
    );

    pub fn SherpaOnnxOnlineStreamInputFinished(stream: *const OnlineStream);

    pub fn SherpaOnnxOnlineStreamSetOption(
        stream: *const OnlineStream,
        key: *const c_char,
        value: *const c_char,
    );

    pub fn SherpaOnnxOnlineStreamGetOption(
        stream: *const OnlineStream,
        key: *const c_char,
    ) -> *const c_char;

    pub fn SherpaOnnxOnlineStreamHasOption(
        stream: *const OnlineStream,
        key: *const c_char,
    ) -> i32;

    pub fn SherpaOnnxOnlineStreamIsEndpoint(
        recognizer: *const OnlineRecognizer,
        stream: *const OnlineStream,
    ) -> i32;
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/online_punctuation.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::c_char;

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlinePunctuationModelConfig {
    pub cnn_bilstm: *const c_char,
    pub bpe_vocab: *const c_char,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlinePunctuationConfig {
    pub model: OnlinePunctuationModelConfig,
}

#[repr(C)]
pub struct OnlinePunctuation {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateOnlinePunctuation(
        config: *const OnlinePunctuationConfig,
    ) -> *const OnlinePunctuation;

    pub fn SherpaOnnxDestroyOnlinePunctuation(punctuation: *const OnlinePunctuation);

    pub fn SherpaOnnxOnlinePunctuationAddPunct(
        punctuation: *const OnlinePunctuation,
        text: *const c_char,
    ) -> *const c_char;

    pub fn SherpaOnnxOnlinePunctuationFreeText(text: *const c_char);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/speaker_embedding.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::{c_char, c_float};

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SpeakerEmbeddingExtractorConfig {
    pub model: *const c_char,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
}

#[repr(C)]
pub struct SpeakerEmbeddingExtractor {
    _private: [u8; 0],
}

#[repr(C)]
pub struct SpeakerEmbeddingManager {
    _private: [u8; 0],
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SpeakerEmbeddingManagerSpeakerMatch {
    pub score: c_float,
    pub name: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SpeakerEmbeddingManagerBestMatchesResult {
    pub matches: *const SpeakerEmbeddingManagerSpeakerMatch,
    pub count: i32,
}

extern "C" {
    pub fn SherpaOnnxCreateSpeakerEmbeddingExtractor(
        config: *const SpeakerEmbeddingExtractorConfig,
    ) -> *const SpeakerEmbeddingExtractor;

    pub fn SherpaOnnxDestroySpeakerEmbeddingExtractor(p: *const SpeakerEmbeddingExtractor);

    pub fn SherpaOnnxSpeakerEmbeddingExtractorDim(p: *const SpeakerEmbeddingExtractor) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingExtractorCreateStream(
        p: *const SpeakerEmbeddingExtractor,
    ) -> *const crate::online_asr::OnlineStream;

    pub fn SherpaOnnxSpeakerEmbeddingExtractorIsReady(
        p: *const SpeakerEmbeddingExtractor,
        s: *const crate::online_asr::OnlineStream,
    ) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(
        p: *const SpeakerEmbeddingExtractor,
        s: *const crate::online_asr::OnlineStream,
    ) -> *const c_float;

    pub fn SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v: *const c_float);

    pub fn SherpaOnnxCreateSpeakerEmbeddingManager(dim: i32) -> *const SpeakerEmbeddingManager;

    pub fn SherpaOnnxDestroySpeakerEmbeddingManager(p: *const SpeakerEmbeddingManager);

    pub fn SherpaOnnxSpeakerEmbeddingManagerAdd(
        p: *const SpeakerEmbeddingManager,
        name: *const c_char,
        v: *const c_float,
    ) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingManagerAddList(
        p: *const SpeakerEmbeddingManager,
        name: *const c_char,
        v: *const *const c_float,
    ) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
        p: *const SpeakerEmbeddingManager,
        name: *const c_char,
        v: *const c_float,
        n: i32,
    ) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingManagerRemove(
        p: *const SpeakerEmbeddingManager,
        name: *const c_char,
    ) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingManagerSearch(
        p: *const SpeakerEmbeddingManager,
        v: *const c_float,
        threshold: c_float,
    ) -> *const c_char;

    pub fn SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name: *const c_char);

    pub fn SherpaOnnxSpeakerEmbeddingManagerGetBestMatches(
        p: *const SpeakerEmbeddingManager,
        v: *const c_float,
        threshold: c_float,
        n: i32,
    ) -> *const SpeakerEmbeddingManagerBestMatchesResult;

    pub fn SherpaOnnxSpeakerEmbeddingManagerFreeBestMatches(
        r: *const SpeakerEmbeddingManagerBestMatchesResult,
    );

    pub fn SherpaOnnxSpeakerEmbeddingManagerVerify(
        p: *const SpeakerEmbeddingManager,
        name: *const c_char,
        v: *const c_float,
        threshold: c_float,
    ) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingManagerContains(
        p: *const SpeakerEmbeddingManager,
        name: *const c_char,
    ) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(
        p: *const SpeakerEmbeddingManager,
    ) -> i32;

    pub fn SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(
        p: *const SpeakerEmbeddingManager,
    ) -> *const *const c_char;

    pub fn SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(names: *const *const c_char);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/speech_denoiser.rs
================================================
use std::os::raw::c_char;

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSpeechDenoiserGtcrnModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSpeechDenoiserDpdfNetModelConfig {
    pub model: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSpeechDenoiserModelConfig {
    pub gtcrn: OfflineSpeechDenoiserGtcrnModelConfig,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
    pub dpdfnet: OfflineSpeechDenoiserDpdfNetModelConfig,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineSpeechDenoiserConfig {
    pub model: OfflineSpeechDenoiserModelConfig,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OnlineSpeechDenoiserConfig {
    pub model: OfflineSpeechDenoiserModelConfig,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct DenoisedAudio {
    pub samples: *const f32,
    pub n: i32,
    pub sample_rate: i32,
}

#[repr(C)]
pub struct OfflineSpeechDenoiser {
    _private: [u8; 0],
}

#[repr(C)]
pub struct OnlineSpeechDenoiser {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateOfflineSpeechDenoiser(
        config: *const OfflineSpeechDenoiserConfig,
    ) -> *const OfflineSpeechDenoiser;
    pub fn SherpaOnnxDestroyOfflineSpeechDenoiser(p: *const OfflineSpeechDenoiser);
    pub fn SherpaOnnxOfflineSpeechDenoiserGetSampleRate(p: *const OfflineSpeechDenoiser) -> i32;
    pub fn SherpaOnnxOfflineSpeechDenoiserRun(
        p: *const OfflineSpeechDenoiser,
        samples: *const f32,
        n: i32,
        sample_rate: i32,
    ) -> *const DenoisedAudio;

    pub fn SherpaOnnxCreateOnlineSpeechDenoiser(
        config: *const OnlineSpeechDenoiserConfig,
    ) -> *const OnlineSpeechDenoiser;
    pub fn SherpaOnnxDestroyOnlineSpeechDenoiser(p: *const OnlineSpeechDenoiser);
    pub fn SherpaOnnxOnlineSpeechDenoiserGetSampleRate(p: *const OnlineSpeechDenoiser) -> i32;
    pub fn SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(
        p: *const OnlineSpeechDenoiser,
    ) -> i32;
    pub fn SherpaOnnxOnlineSpeechDenoiserRun(
        p: *const OnlineSpeechDenoiser,
        samples: *const f32,
        n: i32,
        sample_rate: i32,
    ) -> *const DenoisedAudio;
    pub fn SherpaOnnxOnlineSpeechDenoiserFlush(
        p: *const OnlineSpeechDenoiser,
    ) -> *const DenoisedAudio;
    pub fn SherpaOnnxOnlineSpeechDenoiserReset(p: *const OnlineSpeechDenoiser);

    pub fn SherpaOnnxDestroyDenoisedAudio(audio: *const DenoisedAudio);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/spoken_language_identification.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::c_char;

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SpokenLanguageIdentificationWhisperConfig {
    pub encoder: *const c_char,
    pub decoder: *const c_char,
    pub tail_paddings: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SpokenLanguageIdentificationConfig {
    pub whisper: SpokenLanguageIdentificationWhisperConfig,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
}

#[repr(C)]
pub struct SpokenLanguageIdentification {
    _private: [u8; 0],
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SpokenLanguageIdentificationResult {
    pub lang: *const c_char,
}

extern "C" {
    pub fn SherpaOnnxCreateSpokenLanguageIdentification(
        config: *const SpokenLanguageIdentificationConfig,
    ) -> *const SpokenLanguageIdentification;

    pub fn SherpaOnnxDestroySpokenLanguageIdentification(
        slid: *const SpokenLanguageIdentification,
    );

    pub fn SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
        slid: *const SpokenLanguageIdentification,
    ) -> *const super::offline_asr::OfflineStream;

    pub fn SherpaOnnxSpokenLanguageIdentificationCompute(
        slid: *const SpokenLanguageIdentification,
        stream: *const super::offline_asr::OfflineStream,
    ) -> *const SpokenLanguageIdentificationResult;

    pub fn SherpaOnnxDestroySpokenLanguageIdentificationResult(
        r: *const SpokenLanguageIdentificationResult,
    );
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/tts.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::{c_char, c_float, c_void};

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsVitsModelConfig {
    pub model: *const c_char,
    pub lexicon: *const c_char,
    pub tokens: *const c_char,
    pub data_dir: *const c_char,
    pub noise_scale: c_float,
    pub noise_scale_w: c_float,
    pub length_scale: c_float,
    pub dict_dir: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsMatchaModelConfig {
    pub acoustic_model: *const c_char,
    pub vocoder: *const c_char,
    pub lexicon: *const c_char,
    pub tokens: *const c_char,
    pub data_dir: *const c_char,
    pub noise_scale: c_float,
    pub length_scale: c_float,
    pub dict_dir: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsKokoroModelConfig {
    pub model: *const c_char,
    pub voices: *const c_char,
    pub tokens: *const c_char,
    pub data_dir: *const c_char,
    pub length_scale: c_float,
    pub dict_dir: *const c_char,
    pub lexicon: *const c_char,
    pub lang: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsKittenModelConfig {
    pub model: *const c_char,
    pub voices: *const c_char,
    pub tokens: *const c_char,
    pub data_dir: *const c_char,
    pub length_scale: c_float,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsZipvoiceModelConfig {
    pub tokens: *const c_char,
    pub encoder: *const c_char,
    pub decoder: *const c_char,
    pub vocoder: *const c_char,
    pub data_dir: *const c_char,
    pub lexicon: *const c_char,
    pub feat_scale: c_float,
    pub t_shift: c_float,
    pub target_rms: c_float,
    pub guidance_scale: c_float,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsPocketModelConfig {
    pub lm_flow: *const c_char,
    pub lm_main: *const c_char,
    pub encoder: *const c_char,
    pub decoder: *const c_char,
    pub text_conditioner: *const c_char,
    pub vocab_json: *const c_char,
    pub token_scores_json: *const c_char,
    pub voice_embedding_cache_capacity: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsSupertonicModelConfig {
    pub duration_predictor: *const c_char,
    pub text_encoder: *const c_char,
    pub vector_estimator: *const c_char,
    pub vocoder: *const c_char,
    pub tts_json: *const c_char,
    pub unicode_indexer: *const c_char,
    pub voice_style: *const c_char,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsModelConfig {
    pub vits: OfflineTtsVitsModelConfig,
    pub num_threads: i32,
    pub debug: i32,
    pub provider: *const c_char,
    pub matcha: OfflineTtsMatchaModelConfig,
    pub kokoro: OfflineTtsKokoroModelConfig,
    pub kitten: OfflineTtsKittenModelConfig,
    pub zipvoice: OfflineTtsZipvoiceModelConfig,
    pub pocket: OfflineTtsPocketModelConfig,
    pub supertonic: OfflineTtsSupertonicModelConfig,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct OfflineTtsConfig {
    pub model: OfflineTtsModelConfig,
    pub rule_fsts: *const c_char,
    pub max_num_sentences: i32,
    pub rule_fars: *const c_char,
    pub silence_scale: c_float,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SherpaOnnxGeneratedAudio {
    pub samples: *const f32,
    pub n: i32,
    pub sample_rate: i32,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SherpaOnnxGenerationConfig {
    pub silence_scale: c_float,
    pub speed: c_float,
    pub sid: i32,
    pub reference_audio: *const f32,
    pub reference_audio_len: i32,
    pub reference_sample_rate: i32,
    pub reference_text: *const c_char,
    pub num_steps: i32,
    pub extra: *const c_char,
}

pub type SherpaOnnxGeneratedAudioProgressCallbackWithArg =
    Option<unsafe extern "C" fn(samples: *const f32, n: i32, progress: c_float, arg: *mut c_void) -> i32>;

#[repr(C)]
pub struct SherpaOnnxOfflineTts {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateOfflineTts(
        config: *const OfflineTtsConfig,
    ) -> *const SherpaOnnxOfflineTts;

    pub fn SherpaOnnxDestroyOfflineTts(tts: *const SherpaOnnxOfflineTts);

    pub fn SherpaOnnxOfflineTtsSampleRate(tts: *const SherpaOnnxOfflineTts) -> i32;

    pub fn SherpaOnnxOfflineTtsNumSpeakers(tts: *const SherpaOnnxOfflineTts) -> i32;

    pub fn SherpaOnnxOfflineTtsGenerateWithConfig(
        tts: *const SherpaOnnxOfflineTts,
        text: *const c_char,
        config: *const SherpaOnnxGenerationConfig,
        callback: SherpaOnnxGeneratedAudioProgressCallbackWithArg,
        arg: *mut c_void,
    ) -> *const SherpaOnnxGeneratedAudio;

    pub fn SherpaOnnxDestroyOfflineTtsGeneratedAudio(p: *const SherpaOnnxGeneratedAudio);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/vad.rs
================================================
use std::os::raw::{c_char, c_float};

#[repr(C)]
pub struct SileroVadModelConfig {
    pub model: *const c_char,
    pub threshold: c_float,
    pub min_silence_duration: c_float,
    pub min_speech_duration: c_float,
    pub window_size: i32,
    pub max_speech_duration: c_float,
}

#[repr(C)]
pub struct TenVadModelConfig {
    pub model: *const c_char,
    pub threshold: c_float,
    pub min_silence_duration: c_float,
    pub min_speech_duration: c_float,
    pub window_size: i32,
    pub max_speech_duration: c_float,
}

#[repr(C)]
pub struct VadModelConfig {
    pub silero_vad: SileroVadModelConfig,
    pub sample_rate: i32,
    pub num_threads: i32,
    pub provider: *const c_char,
    pub debug: i32,
    pub ten_vad: TenVadModelConfig,
}

#[repr(C)]
pub struct CircularBuffer {
    _private: [u8; 0],
}

#[repr(C)]
pub struct SpeechSegment {
    pub start: i32,
    pub samples: *mut f32,
    pub n: i32,
}

#[repr(C)]
pub struct VoiceActivityDetector {
    _private: [u8; 0],
}

extern "C" {
    pub fn SherpaOnnxCreateCircularBuffer(capacity: i32) -> *const CircularBuffer;
    pub fn SherpaOnnxDestroyCircularBuffer(buffer: *const CircularBuffer);
    pub fn SherpaOnnxCircularBufferPush(buffer: *const CircularBuffer, p: *const f32, n: i32);
    pub fn SherpaOnnxCircularBufferGet(
        buffer: *const CircularBuffer,
        start_index: i32,
        n: i32,
    ) -> *const f32;
    pub fn SherpaOnnxCircularBufferFree(p: *const f32);
    pub fn SherpaOnnxCircularBufferPop(buffer: *const CircularBuffer, n: i32);
    pub fn SherpaOnnxCircularBufferSize(buffer: *const CircularBuffer) -> i32;
    pub fn SherpaOnnxCircularBufferHead(buffer: *const CircularBuffer) -> i32;
    pub fn SherpaOnnxCircularBufferReset(buffer: *const CircularBuffer);

    pub fn SherpaOnnxCreateVoiceActivityDetector(
        config: *const VadModelConfig,
        buffer_size_in_seconds: c_float,
    ) -> *const VoiceActivityDetector;
    pub fn SherpaOnnxDestroyVoiceActivityDetector(p: *const VoiceActivityDetector);
    pub fn SherpaOnnxVoiceActivityDetectorAcceptWaveform(
        p: *const VoiceActivityDetector,
        samples: *const f32,
        n: i32,
    );
    pub fn SherpaOnnxVoiceActivityDetectorEmpty(p: *const VoiceActivityDetector) -> i32;
    pub fn SherpaOnnxVoiceActivityDetectorDetected(p: *const VoiceActivityDetector) -> i32;
    pub fn SherpaOnnxVoiceActivityDetectorPop(p: *const VoiceActivityDetector);
    pub fn SherpaOnnxVoiceActivityDetectorClear(p: *const VoiceActivityDetector);
    pub fn SherpaOnnxVoiceActivityDetectorFront(
        p: *const VoiceActivityDetector,
    ) -> *const SpeechSegment;
    pub fn SherpaOnnxDestroySpeechSegment(p: *const SpeechSegment);
    pub fn SherpaOnnxVoiceActivityDetectorReset(p: *const VoiceActivityDetector);
    pub fn SherpaOnnxVoiceActivityDetectorFlush(p: *const VoiceActivityDetector);
}


================================================
FILE: sherpa-onnx/rust/sherpa-onnx-sys/src/wave.rs
================================================
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
#![allow(non_upper_case_globals)]

use std::os::raw::c_char;

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct SherpaOnnxWave {
    /// Samples normalized to [-1, 1]
    pub samples: *const f32,
    pub sample_rate: i32,
    pub num_samples: i32,
}

extern "C" {
    /// Read a WAV file. Returns NULL on error.
    pub fn SherpaOnnxReadWave(filename: *const c_char) -> *const SherpaOnnxWave;

    /// Free memory allocated by SherpaOnnxReadWave
    pub fn SherpaOnnxFreeWave(wave: *const SherpaOnnxWave);

    /// Write a WAV file. Returns 1 on success, 0 on failure.
    pub fn SherpaOnnxWriteWave(
        samples: *const f32,
        n: i32,
        sample_rate: i32,
        filename: *const c_char,
    ) -> i32;
}


================================================
FILE: swift-api-examples/.gitignore
================================================
decode-file
decode-file-non-streaming
generate-subtitles
generate-subtitles-ten-vad
spoken-language-identification
tts-vits
vits-vctk
sherpa-onnx-paraformer-zh-2023-09-14
!*.sh
*.bak
streaming-hlg-decode-file
keyword-spotting-from-file
add-punctuations
tts-matcha-zh
tts-matcha-en
tts-kokoro-en
tts-kokoro-zh-en
speech-enhancement-gtcrn
speech-enhancement-dpdfnet
online-speech-enhancement-gtcrn
online-speech-enhancement-dpdfnet
decode-file-sense-voice-with-hr
test-version
zipformer-ctc-asr
wenet-ctc-asr
dolphin-ctc-asr
tts-kitten-en
tts-pocket-en
compute-speaker-embeddings
decode-file-t-one-streaming
omnilingual-asr-ctc
medasr-ctc
funasr-nano
fire-red-asr-ctc
moonshine-v2-asr
tts-supertonic-en
tts-zipvoice


================================================
FILE: swift-api-examples/SherpaOnnx-Bridging-Header.h
================================================
// swfit-api-examples/SherpaOnnx-Bridging-Header.h
//
// Copyright (c)  2023  Xiaomi Corporation
#ifndef SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_
#define SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_

#import "sherpa-onnx/c-api/c-api.h"

#endif  // SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_


================================================
FILE: swift-api-examples/SherpaOnnx.swift
================================================
/// swift-api-examples/SherpaOnnx.swift
/// Copyright (c)  2023  Xiaomi Corporation

import Foundation  // For NSString

/// Convert a String from swift to a `const char*` so that we can pass it to
/// the C language.
///
/// - Parameters:
///   - s: The String to convert.
/// - Returns: A pointer that can be passed to C as `const char*`

func toCPointer(_ s: String) -> UnsafePointer<Int8>! {
  let cs = (s as NSString).utf8String
  return UnsafePointer<Int8>(cs)
}

/// Return an instance of SherpaOnnxOnlineTransducerModelConfig.
///
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
/// to download the required `.onnx` files.
///
/// - Parameters:
///   - encoder: Path to encoder.onnx
///   - decoder: Path to decoder.onnx
///   - joiner: Path to joiner.onnx
///
/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
func sherpaOnnxOnlineTransducerModelConfig(
  encoder: String = "",
  decoder: String = "",
  joiner: String = ""
) -> SherpaOnnxOnlineTransducerModelConfig {
  return SherpaOnnxOnlineTransducerModelConfig(
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder),
    joiner: toCPointer(joiner)
  )
}

/// Return an instance of SherpaOnnxOnlineParaformerModelConfig.
///
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
/// to download the required `.onnx` files.
///
/// - Parameters:
///   - encoder: Path to encoder.onnx
///   - decoder: Path to decoder.onnx
///
/// - Returns: Return an instance of SherpaOnnxOnlineParaformerModelConfig
func sherpaOnnxOnlineParaformerModelConfig(
  encoder: String = "",
  decoder: String = ""
) -> SherpaOnnxOnlineParaformerModelConfig {
  return SherpaOnnxOnlineParaformerModelConfig(
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder)
  )
}

func sherpaOnnxOnlineZipformer2CtcModelConfig(
  model: String = ""
) -> SherpaOnnxOnlineZipformer2CtcModelConfig {
  return SherpaOnnxOnlineZipformer2CtcModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOnlineNemoCtcModelConfig(
  model: String = ""
) -> SherpaOnnxOnlineNemoCtcModelConfig {
  return SherpaOnnxOnlineNemoCtcModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOnlineToneCtcModelConfig(
  model: String = ""
) -> SherpaOnnxOnlineToneCtcModelConfig {
  return SherpaOnnxOnlineToneCtcModelConfig(
    model: toCPointer(model)
  )
}

/// Return an instance of SherpaOnnxOnlineModelConfig.
///
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download the required `.onnx` files.
///
/// - Parameters:
///   - tokens: Path to tokens.txt
///   - numThreads:  Number of threads to use for neural network computation.
///
/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
func sherpaOnnxOnlineModelConfig(
  tokens: String,
  transducer: SherpaOnnxOnlineTransducerModelConfig = sherpaOnnxOnlineTransducerModelConfig(),
  paraformer: SherpaOnnxOnlineParaformerModelConfig = sherpaOnnxOnlineParaformerModelConfig(),
  zipformer2Ctc: SherpaOnnxOnlineZipformer2CtcModelConfig =
    sherpaOnnxOnlineZipformer2CtcModelConfig(),
  numThreads: Int = 1,
  provider: String = "cpu",
  debug: Int = 0,
  modelType: String = "",
  modelingUnit: String = "cjkchar",
  bpeVocab: String = "",
  tokensBuf: String = "",
  tokensBufSize: Int = 0,
  nemoCtc: SherpaOnnxOnlineNemoCtcModelConfig = sherpaOnnxOnlineNemoCtcModelConfig(),
  toneCtc: SherpaOnnxOnlineToneCtcModelConfig = sherpaOnnxOnlineToneCtcModelConfig()
) -> SherpaOnnxOnlineModelConfig {
  return SherpaOnnxOnlineModelConfig(
    transducer: transducer,
    paraformer: paraformer,
    zipformer2_ctc: zipformer2Ctc,
    tokens: toCPointer(tokens),
    num_threads: Int32(numThreads),
    provider: toCPointer(provider),
    debug: Int32(debug),
    model_type: toCPointer(modelType),
    modeling_unit: toCPointer(modelingUnit),
    bpe_vocab: toCPointer(bpeVocab),
    tokens_buf: toCPointer(tokensBuf),
    tokens_buf_size: Int32(tokensBufSize),
    nemo_ctc: nemoCtc,
    t_one_ctc: toneCtc
  )
}

func sherpaOnnxFeatureConfig(
  sampleRate: Int = 16000,
  featureDim: Int = 80
) -> SherpaOnnxFeatureConfig {
  return SherpaOnnxFeatureConfig(
    sample_rate: Int32(sampleRate),
    feature_dim: Int32(featureDim))
}

func sherpaOnnxOnlineCtcFstDecoderConfig(
  graph: String = "",
  maxActive: Int = 3000
) -> SherpaOnnxOnlineCtcFstDecoderConfig {
  return SherpaOnnxOnlineCtcFstDecoderConfig(
    graph: toCPointer(graph),
    max_active: Int32(maxActive))
}

func sherpaOnnxHomophoneReplacerConfig(
  dictDir: String = "",
  lexicon: String = "",
  ruleFsts: String = ""
) -> SherpaOnnxHomophoneReplacerConfig {
  return SherpaOnnxHomophoneReplacerConfig(
    dict_dir: toCPointer(dictDir),
    lexicon: toCPointer(lexicon),
    rule_fsts: toCPointer(ruleFsts))
}

func sherpaOnnxOnlineRecognizerConfig(
  featConfig: SherpaOnnxFeatureConfig,
  modelConfig: SherpaOnnxOnlineModelConfig,
  enableEndpoint: Bool = false,
  rule1MinTrailingSilence: Float = 2.4,
  rule2MinTrailingSilence: Float = 1.2,
  rule3MinUtteranceLength: Float = 30,
  decodingMethod: String = "greedy_search",
  maxActivePaths: Int = 4,
  hotwordsFile: String = "",
  hotwordsScore: Float = 1.5,
  ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig(),
  ruleFsts: String = "",
  ruleFars: String = "",
  blankPenalty: Float = 0.0,
  hotwordsBuf: String = "",
  hotwordsBufSize: Int = 0,
  hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig()
) -> SherpaOnnxOnlineRecognizerConfig {
  return SherpaOnnxOnlineRecognizerConfig(
    feat_config: featConfig,
    model_config: modelConfig,
    decoding_method: toCPointer(decodingMethod),
    max_active_paths: Int32(maxActivePaths),
    enable_endpoint: enableEndpoint ? 1 : 0,
    rule1_min_trailing_silence: rule1MinTrailingSilence,
    rule2_min_trailing_silence: rule2MinTrailingSilence,
    rule3_min_utterance_length: rule3MinUtteranceLength,
    hotwords_file: toCPointer(hotwordsFile),
    hotwords_score: hotwordsScore,
    ctc_fst_decoder_config: ctcFstDecoderConfig,
    rule_fsts: toCPointer(ruleFsts),
    rule_fars: toCPointer(ruleFars),
    blank_penalty: blankPenalty,
    hotwords_buf: toCPointer(hotwordsBuf),
    hotwords_buf_size: Int32(hotwordsBufSize),
    hr: hr
  )
}

/// Wrapper for recognition result.
///
/// Usage:
///
///  let result = recognizer.getResult()
///  print("text: \(result.text)")
///
class SherpaOnnxOnlineRecongitionResult {
  /// A pointer to the underlying counterpart in C
  private let result: UnsafePointer<SherpaOnnxOnlineRecognizerResult>

  private lazy var _text: String = {
    guard let cstr = result.pointee.text else { return "" }
    return String(cString: cstr)
  }()

  private lazy var _tokens: [String] = {
    guard let tokensPointer = result.pointee.tokens_arr else { return [] }
    return (0..<count).compactMap { index in
      guard let ptr = tokensPointer[index] else { return nil }
      return String(cString: ptr)
    }
  }()

  private lazy var _timestamps: [Float] = {
    guard let timestampsPointer = result.pointee.timestamps else { return [] }
    return (0..<count).map { index in timestampsPointer[index] }
  }()

  init(result: UnsafePointer<SherpaOnnxOnlineRecognizerResult>) {
    self.result = result
  }

  deinit {
    SherpaOnnxDestroyOnlineRecognizerResult(result)
  }

  /// Return the actual recognition result.
  /// For English models, it contains words separated by spaces.
  /// For Chinese models, it contains Chinese words.
  var text: String { _text }

  var count: Int { Int(result.pointee.count) }

  var tokens: [String] { _tokens }

  var timestamps: [Float] { _timestamps }
}

class SherpaOnnxRecognizer {
  /// A pointer to the underlying counterpart in C
  private let recognizer: OpaquePointer
  private var stream: OpaquePointer
  private let lock = NSLock()  // for thread-safe stream replacement

  /// Constructor taking a model config
  init(
    config: UnsafePointer<SherpaOnnxOnlineRecognizerConfig>
  ) {
    self.recognizer = SherpaOnnxCreateOnlineRecognizer(config)
    self.stream = SherpaOnnxCreateOnlineStream(recognizer)
  }

  deinit {
    SherpaOnnxDestroyOnlineStream(stream)
    SherpaOnnxDestroyOnlineRecognizer(recognizer)
  }

  /// Decode wave samples.
  ///
  /// - Parameters:
  ///   - samples: Audio samples normalized to the range [-1, 1]
  ///   - sampleRate: Sample rate of the input audio samples. Must match
  ///                 the one expected by the model.
  func acceptWaveform(samples: [Float], sampleRate: Int = 16_000) {
    SherpaOnnxOnlineStreamAcceptWaveform(stream, Int32(sampleRate), samples, Int32(samples.count))
  }

  func isReady() -> Bool {
    return SherpaOnnxIsOnlineStreamReady(recognizer, stream) != 0
  }

  /// If there are enough number of feature frames, it invokes the neural
  /// network computation and decoding. Otherwise, it is a no-op.
  func decode() {
    SherpaOnnxDecodeOnlineStream(recognizer, stream)
  }

  /// Get the decoding results so far
  func getResult() -> SherpaOnnxOnlineRecongitionResult {
    guard let result = SherpaOnnxGetOnlineStreamResult(recognizer, stream) else {
      fatalError("SherpaOnnxGetOnlineStreamResult returned nil")
    }
    return SherpaOnnxOnlineRecongitionResult(result: result)
  }

  /// Reset the recognizer, which clears the neural network model state
  /// and the state for decoding.
  /// If hotwords is an empty string, it just recreates the decoding stream
  /// If hotwords is not empty, it will create a new decoding stream with
  /// the given hotWords appended to the default hotwords.
  func reset(hotwords: String? = nil) {
    guard let words = hotwords, !words.isEmpty else {
      SherpaOnnxOnlineStreamReset(recognizer, stream)
      return
    }

    words.withCString { cString in
      guard let newStream = SherpaOnnxCreateOnlineStreamWithHotwords(recognizer, cString) else {
        fatalError("SherpaOnnxCreateOnlineStreamWithHotwords returned nil")
      }
      lock.lock()
      // lock while release and replace stream
      SherpaOnnxDestroyOnlineStream(stream)
      stream = newStream
      lock.unlock()
    }
  }

  /// Signal that no more audio samples would be available.
  /// After this call, you cannot call acceptWaveform() any more.
  func inputFinished() {
    SherpaOnnxOnlineStreamInputFinished(stream)
  }

  /// Return true is an endpoint has been detected.
  func isEndpoint() -> Bool {
    return SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream) != 0
  }
}

// For offline APIs

func sherpaOnnxOfflineTransducerModelConfig(
  encoder: String = "",
  decoder: String = "",
  joiner: String = ""
) -> SherpaOnnxOfflineTransducerModelConfig {
  return SherpaOnnxOfflineTransducerModelConfig(
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder),
    joiner: toCPointer(joiner)
  )
}

func sherpaOnnxOfflineParaformerModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineParaformerModelConfig {
  return SherpaOnnxOfflineParaformerModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineZipformerCtcModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineZipformerCtcModelConfig {
  return SherpaOnnxOfflineZipformerCtcModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineWenetCtcModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineWenetCtcModelConfig {
  return SherpaOnnxOfflineWenetCtcModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineOmnilingualAsrCtcModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineOmnilingualAsrCtcModelConfig {
  return SherpaOnnxOfflineOmnilingualAsrCtcModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineMedAsrCtcModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineMedAsrCtcModelConfig {
  return SherpaOnnxOfflineMedAsrCtcModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineFireRedAsrCtcModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineFireRedAsrCtcModelConfig {
  return SherpaOnnxOfflineFireRedAsrCtcModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineNemoEncDecCtcModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineNemoEncDecCtcModelConfig {
  return SherpaOnnxOfflineNemoEncDecCtcModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineDolphinModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineDolphinModelConfig {
  return SherpaOnnxOfflineDolphinModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineWhisperModelConfig(
  encoder: String = "",
  decoder: String = "",
  language: String = "",
  task: String = "transcribe",
  tailPaddings: Int = -1,
  enableTokenTimestamps: Bool = false,
  enableSegmentTimestamps: Bool = false
) -> SherpaOnnxOfflineWhisperModelConfig {
  return SherpaOnnxOfflineWhisperModelConfig(
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder),
    language: toCPointer(language),
    task: toCPointer(task),
    tail_paddings: Int32(tailPaddings),
    enable_token_timestamps: enableTokenTimestamps ? 1 : 0,
    enable_segment_timestamps: enableSegmentTimestamps ? 1 : 0
  )
}

func sherpaOnnxOfflineCanaryModelConfig(
  encoder: String = "",
  decoder: String = "",
  srcLang: String = "en",
  tgtLang: String = "en",
  usePnc: Bool = true
) -> SherpaOnnxOfflineCanaryModelConfig {
  return SherpaOnnxOfflineCanaryModelConfig(
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder),
    src_lang: toCPointer(srcLang),
    tgt_lang: toCPointer(tgtLang),
    use_pnc: usePnc ? 1 : 0
  )
}

func sherpaOnnxOfflineFireRedAsrModelConfig(
  encoder: String = "",
  decoder: String = ""
) -> SherpaOnnxOfflineFireRedAsrModelConfig {
  return SherpaOnnxOfflineFireRedAsrModelConfig(
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder)
  )
}

// there are two versions of Moonshine
// For v1, you need four models: preprocessor, encoder, uncachedDecoder, cachedDecoder
// For v2, you need two models: encoder, mergedDecoder
func sherpaOnnxOfflineMoonshineModelConfig(
  preprocessor: String = "",
  encoder: String = "",
  uncachedDecoder: String = "",
  cachedDecoder: String = "",
  mergedDecoder: String = ""
) -> SherpaOnnxOfflineMoonshineModelConfig {
  return SherpaOnnxOfflineMoonshineModelConfig(
    preprocessor: toCPointer(preprocessor),
    encoder: toCPointer(encoder),
    uncached_decoder: toCPointer(uncachedDecoder),
    cached_decoder: toCPointer(cachedDecoder),
    merged_decoder: toCPointer(mergedDecoder)
  )
}

func sherpaOnnxOfflineTdnnModelConfig(
  model: String = ""
) -> SherpaOnnxOfflineTdnnModelConfig {
  return SherpaOnnxOfflineTdnnModelConfig(
    model: toCPointer(model)
  )
}

func sherpaOnnxOfflineSenseVoiceModelConfig(
  model: String = "",
  language: String = "",
  useInverseTextNormalization: Bool = false
) -> SherpaOnnxOfflineSenseVoiceModelConfig {
  return SherpaOnnxOfflineSenseVoiceModelConfig(
    model: toCPointer(model),
    language: toCPointer(language),
    use_itn: useInverseTextNormalization ? 1 : 0
  )
}

func sherpaOnnxOfflineLMConfig(
  model: String = "",
  scale: Float = 1.0
) -> SherpaOnnxOfflineLMConfig {
  return SherpaOnnxOfflineLMConfig(
    model: toCPointer(model),
    scale: scale
  )
}

func sherpaOnnxOfflineFunASRNanoModelConfig(
  encoderAdaptor: String = "",
  llm: String = "",
  embedding: String = "",
  tokenizer: String = "",
  systemPrompt: String = "You are a helpful assistant.",
  userPrompt: String = "语音转写：",
  maxNewTokens: Int = 512,
  temperature: Float = 1e-6,
  topP: Float = 0.8,
  seed: Int = 42,
  language: String = "",
  itn: Bool = true,
  hotwords: String = ""
) -> SherpaOnnxOfflineFunASRNanoModelConfig {
  return SherpaOnnxOfflineFunASRNanoModelConfig(
    encoder_adaptor: toCPointer(encoderAdaptor),
    llm: toCPointer(llm),
    embedding: toCPointer(embedding),
    tokenizer: toCPointer(tokenizer),
    system_prompt: toCPointer(systemPrompt),
    user_prompt: toCPointer(userPrompt),
    max_new_tokens: Int32(maxNewTokens),
    temperature: temperature,
    top_p: topP,
    seed: Int32(seed),
    language: toCPointer(language),
    itn: itn ? 1 : 0,
    hotwords: toCPointer(hotwords)
  )
}

func sherpaOnnxOfflineModelConfig(
  tokens: String,
  transducer: SherpaOnnxOfflineTransducerModelConfig = sherpaOnnxOfflineTransducerModelConfig(),
  paraformer: SherpaOnnxOfflineParaformerModelConfig = sherpaOnnxOfflineParaformerModelConfig(),
  nemoCtc: SherpaOnnxOfflineNemoEncDecCtcModelConfig = sherpaOnnxOfflineNemoEncDecCtcModelConfig(),
  whisper: SherpaOnnxOfflineWhisperModelConfig = sherpaOnnxOfflineWhisperModelConfig(),
  tdnn: SherpaOnnxOfflineTdnnModelConfig = sherpaOnnxOfflineTdnnModelConfig(),
  numThreads: Int = 1,
  provider: String = "cpu",
  debug: Int = 0,
  modelType: String = "",
  modelingUnit: String = "cjkchar",
  bpeVocab: String = "",
  teleSpeechCtc: String = "",
  senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(),
  moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig(),
  fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig(),
  dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(),
  zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig =
    sherpaOnnxOfflineZipformerCtcModelConfig(),
  canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig(),
  wenetCtc: SherpaOnnxOfflineWenetCtcModelConfig =
    sherpaOnnxOfflineWenetCtcModelConfig(),
  omnilingual: SherpaOnnxOfflineOmnilingualAsrCtcModelConfig =
    sherpaOnnxOfflineOmnilingualAsrCtcModelConfig(),
  medasr: SherpaOnnxOfflineMedAsrCtcModelConfig =
    sherpaOnnxOfflineMedAsrCtcModelConfig(),
  funasrNano: SherpaOnnxOfflineFunASRNanoModelConfig =
    sherpaOnnxOfflineFunASRNanoModelConfig(),
  fireRedAsrCtc: SherpaOnnxOfflineFireRedAsrCtcModelConfig =
    sherpaOnnxOfflineFireRedAsrCtcModelConfig()
) -> SherpaOnnxOfflineModelConfig {
  return SherpaOnnxOfflineModelConfig(
    transducer: transducer,
    paraformer: paraformer,
    nemo_ctc: nemoCtc,
    whisper: whisper,
    tdnn: tdnn,
    tokens: toCPointer(tokens),
    num_threads: Int32(numThreads),
    debug: Int32(debug),
    provider: toCPointer(provider),
    model_type: toCPointer(modelType),
    modeling_unit: toCPointer(modelingUnit),
    bpe_vocab: toCPointer(bpeVocab),
    telespeech_ctc: toCPointer(teleSpeechCtc),
    sense_voice: senseVoice,
    moonshine: moonshine,
    fire_red_asr: fireRedAsr,
    dolphin: dolphin,
    zipformer_ctc: zipformerCtc,
    canary: canary,
    wenet_ctc: wenetCtc,
    omnilingual: omnilingual,
    medasr: medasr,
    funasr_nano: funasrNano,
    fire_red_asr_ctc: fireRedAsrCtc
  )
}

func sherpaOnnxOfflineRecognizerConfig(
  featConfig: SherpaOnnxFeatureConfig,
  modelConfig: SherpaOnnxOfflineModelConfig,
  lmConfig: SherpaOnnxOfflineLMConfig = sherpaOnnxOfflineLMConfig(),
  decodingMethod: String = "greedy_search",
  maxActivePaths: Int = 4,
  hotwordsFile: String = "",
  hotwordsScore: Float = 1.5,
  ruleFsts: String = "",
  ruleFars: String = "",
  blankPenalty: Float = 0.0,
  hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig()
) -> SherpaOnnxOfflineRecognizerConfig {
  return SherpaOnnxOfflineRecognizerConfig(
    feat_config: featConfig,
    model_config: modelConfig,
    lm_config: lmConfig,
    decoding_method: toCPointer(decodingMethod),
    max_active_paths: Int32(maxActivePaths),
    hotwords_file: toCPointer(hotwordsFile),
    hotwords_score: hotwordsScore,
    rule_fsts: toCPointer(ruleFsts),
    rule_fars: toCPointer(ruleFars),
    blank_penalty: blankPenalty,
    hr: hr
  )
}

class SherpaOnnxOfflineRecongitionResult {
  /// A pointer to the underlying counterpart in C
  let result: UnsafePointer<SherpaOnnxOfflineRecognizerResult>

  private lazy var _text: String = {
    guard let cstr = result.pointee.text else { return "" }
    return String(cString: cstr)
  }()

  private lazy var _timestamps: [Float] = {
    guard let p = result.pointee.timestamps else { return [] }
    return (0..<result.pointee.count).map { p[Int($0)] }
  }()

  private lazy var _durations: [Float] = {
    guard let p = result.pointee.durations else { return [] }
    return (0..<result.pointee.count).map { p[Int($0)] }
  }()

  private lazy var _lang: String = {
    guard let cstr = result.pointee.lang else { return "" }
    return String(cString: cstr)
  }()

  private lazy var _emotion: String = {
    guard let cstr = result.pointee.emotion else { return "" }
    return String(cString: cstr)
  }()

  private lazy var _event: String = {
    guard let cstr = result.pointee.event else { return "" }
    return String(cString: cstr)
  }()

  private lazy var _segmentTimestamps: [Float] = {
    guard let p = result.pointee.segment_timestamps else { return [] }
    return (0..<result.pointee.segment_count).map { p[Int($0)] }
  }()

  private lazy var _segmentDurations: [Float] = {
    guard let p = result.pointee.segment_durations else { return [] }
    return (0..<result.pointee.segment_count).map { p[Int($0)] }
  }()

  private lazy var _segmentTexts: [String] = {
    guard let arr = result.pointee.segment_texts_arr else { return [] }
    return (0..<result.pointee.segment_count).compactMap { idx -> String? in
      guard let ptr = arr[Int(idx)] else { return nil }
      return String(cString: ptr)
    }
  }()

  /// Return the actual recognition result.
  /// For English models, it contains words separated by spaces.
  /// For Chinese models, it contains Chinese words.
  var text: String { _text }
  var count: Int { Int(result.pointee.count) }
  var timestamps: [Float] { _timestamps }

  // Non-empty for TDT models. Empty for all other non-TDT models
  var durations: [Float] { _durations }

  // For SenseVoice models, it can be zh, en, ja, yue, ko
  // where zh is for Chinese
  // en is for English
  // ja is for Japanese
  // yue is for Cantonese
  // ko is for Korean
  var lang: String { _lang }

  // for SenseVoice models
  var emotion: String { _emotion }

  // for SenseVoice models
  var event: String { _event }

  // Segment-level timestamps (for Whisper with segment timestamps enabled)
  var segmentCount: Int { Int(result.pointee.segment_count) }
  var segmentTimestamps: [Float] { _segmentTimestamps }
  var segmentDurations: [Float] { _segmentDurations }
  var segmentTexts: [String] { _segmentTexts }

  init(result: UnsafePointer<SherpaOnnxOfflineRecognizerResult>) {
    self.result = result
  }

  deinit {
    SherpaOnnxDestroyOfflineRecognizerResult(result)
  }
}

class SherpaOnnxOfflineRecognizer {
  /// A pointer to the underlying counterpart in C
  private let recognizer: OpaquePointer

  init(
    config: UnsafePointer<SherpaOnnxOfflineRecognizerConfig>
  ) {
    guard let ptr = SherpaOnnxCreateOfflineRecognizer(config) else {
      fatalError("Failed to create SherpaOnnxOfflineRecognizer")
    }
    self.recognizer = ptr
  }

  deinit {
    SherpaOnnxDestroyOfflineRecognizer(recognizer)
  }

  /// Decode wave samples.
  ///
  /// - Parameters:
  ///   - samples: Audio samples normalized to the range [-1, 1]
  ///   - sampleRate: Sample rate of the input audio samples. Must match
  ///                 the one expected by the model.
  func decode(samples: [Float], sampleRate: Int = 16_000) -> SherpaOnnxOfflineRecongitionResult {
    guard let stream = SherpaOnnxCreateOfflineStream(recognizer) else {
      fatalError("Failed to create offline stream")
    }

    defer { SherpaOnnxDestroyOfflineStream(stream) }

    SherpaOnnxAcceptWaveformOffline(stream, Int32(sampleRate), samples, Int32(samples.count))

    SherpaOnnxDecodeOfflineStream(recognizer, stream)

    guard let resultPtr = SherpaOnnxGetOfflineStreamResult(stream) else {
      fatalError("Failed to get offline recognition result")
    }

    return SherpaOnnxOfflineRecongitionResult(result: resultPtr)
  }

  func setConfig(config: UnsafePointer<SherpaOnnxOfflineRecognizerConfig>) {
    SherpaOnnxOfflineRecognizerSetConfig(recognizer, config)
  }
}

func sherpaOnnxSileroVadModelConfig(
  model: String = "",
  threshold: Float = 0.5,
  minSilenceDuration: Float = 0.25,
  minSpeechDuration: Float = 0.5,
  windowSize: Int = 512,
  maxSpeechDuration: Float = 5.0
) -> SherpaOnnxSileroVadModelConfig {
  return SherpaOnnxSileroVadModelConfig(
    model: toCPointer(model),
    threshold: threshold,
    min_silence_duration: minSilenceDuration,
    min_speech_duration: minSpeechDuration,
    window_size: Int32(windowSize),
    max_speech_duration: maxSpeechDuration
  )
}

func sherpaOnnxTenVadModelConfig(
  model: String = "",
  threshold: Float = 0.5,
  minSilenceDuration: Float = 0.25,
  minSpeechDuration: Float = 0.5,
  windowSize: Int = 256,
  maxSpeechDuration: Float = 5.0
) -> SherpaOnnxTenVadModelConfig {
  return SherpaOnnxTenVadModelConfig(
    model: toCPointer(model),
    threshold: threshold,
    min_silence_duration: minSilenceDuration,
    min_speech_duration: minSpeechDuration,
    window_size: Int32(windowSize),
    max_speech_duration: maxSpeechDuration
  )
}

func sherpaOnnxVadModelConfig(
  sileroVad: SherpaOnnxSileroVadModelConfig = sherpaOnnxSileroVadModelConfig(),
  sampleRate: Int32 = 16000,
  numThreads: Int = 1,
  provider: String = "cpu",
  debug: Int = 0,
  tenVad: SherpaOnnxTenVadModelConfig = sherpaOnnxTenVadModelConfig()
) -> SherpaOnnxVadModelConfig {
  return SherpaOnnxVadModelConfig(
    silero_vad: sileroVad,
    sample_rate: sampleRate,
    num_threads: Int32(numThreads),
    provider: toCPointer(provider),
    debug: Int32(debug),
    ten_vad: tenVad
  )
}

class SherpaOnnxCircularBufferWrapper {
  private let buffer: OpaquePointer

  init(capacity: Int) {
    guard let ptr = SherpaOnnxCreateCircularBuffer(Int32(capacity)) else {
      fatalError("Failed to create SherpaOnnxCircularBuffer")
    }
    self.buffer = ptr
  }

  deinit {
    SherpaOnnxDestroyCircularBuffer(buffer)
  }

  func push(samples: [Float]) {
    guard !samples.isEmpty else { return }
    SherpaOnnxCircularBufferPush(buffer, samples, Int32(samples.count))
  }

  func get(startIndex: Int, n: Int) -> [Float] {
    guard startIndex >= 0 else { return [] }
    guard n > 0 else { return [] }

    guard let ptr = SherpaOnnxCircularBufferGet(buffer, Int32(startIndex), Int32(n)) else {
      return []
    }
    defer { SherpaOnnxCircularBufferFree(ptr) }

    return Array(UnsafeBufferPointer(start: ptr, count: n))
  }

  func pop(n: Int) {
    guard n > 0 else { return }
    SherpaOnnxCircularBufferPop(buffer, Int32(n))
  }

  func size() -> Int {
    return Int(SherpaOnnxCircularBufferSize(buffer))
  }

  func reset() {
    SherpaOnnxCircularBufferReset(buffer)
  }
}

class SherpaOnnxSpeechSegmentWrapper {
  private let p: UnsafePointer<SherpaOnnxSpeechSegment>

  init(p: UnsafePointer<SherpaOnnxSpeechSegment>) {
    self.p = p
  }

  deinit {
    SherpaOnnxDestroySpeechSegment(p)
  }

  var start: Int {
    Int(p.pointee.start)
  }

  var n: Int {
    Int(p.pointee.n)
  }

  lazy var samples: [Float] = {
    Array(UnsafeBufferPointer(start: p.pointee.samples, count: n))
  }()
}

class SherpaOnnxVoiceActivityDetectorWrapper {
  /// A pointer to the underlying counterpart in C
  private let vad: OpaquePointer

  init(config: UnsafePointer<SherpaOnnxVadModelConfig>, buffer_size_in_seconds: Float) {
    guard let vad = SherpaOnnxCreateVoiceActivityDetector(config, buffer_size_in_seconds) else {
      fatalError("SherpaOnnxCreateVoiceActivityDetector returned nil")
    }
    self.vad = vad
  }

  deinit {
    SherpaOnnxDestroyVoiceActivityDetector(vad)
  }

  func acceptWaveform(samples: [Float]) {
    SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples, Int32(samples.count))
  }

  func isEmpty() -> Bool {
    return SherpaOnnxVoiceActivityDetectorEmpty(vad) == 1
  }

  func isSpeechDetected() -> Bool {
    return SherpaOnnxVoiceActivityDetectorDetected(vad) == 1
  }

  func pop() {
    SherpaOnnxVoiceActivityDetectorPop(vad)
  }

  func clear() {
    SherpaOnnxVoiceActivityDetectorClear(vad)
  }

  func front() -> SherpaOnnxSpeechSegmentWrapper {
    guard let p = SherpaOnnxVoiceActivityDetectorFront(vad) else {
      fatalError("SherpaOnnxVoiceActivityDetectorFront returned nil")
    }
    return SherpaOnnxSpeechSegmentWrapper(p: p)
  }

  func reset() {
    SherpaOnnxVoiceActivityDetectorReset(vad)
  }

  func flush() {
    SherpaOnnxVoiceActivityDetectorFlush(vad)
  }
}

// offline tts
func sherpaOnnxOfflineTtsVitsModelConfig(
  model: String = "",
  lexicon: String = "",
  tokens: String = "",
  dataDir: String = "",
  noiseScale: Float = 0.667,
  noiseScaleW: Float = 0.8,
  lengthScale: Float = 1.0,
  dictDir: String = ""
) -> SherpaOnnxOfflineTtsVitsModelConfig {
  return SherpaOnnxOfflineTtsVitsModelConfig(
    model: toCPointer(model),
    lexicon: toCPointer(lexicon),
    tokens: toCPointer(tokens),
    data_dir: toCPointer(dataDir),
    noise_scale: noiseScale,
    noise_scale_w: noiseScaleW,
    length_scale: lengthScale,
    dict_dir: toCPointer(dictDir)
  )
}

func sherpaOnnxOfflineTtsMatchaModelConfig(
  acousticModel: String = "",
  vocoder: String = "",
  lexicon: String = "",
  tokens: String = "",
  dataDir: String = "",
  noiseScale: Float = 0.667,
  lengthScale: Float = 1.0,
  dictDir: String = ""
) -> SherpaOnnxOfflineTtsMatchaModelConfig {
  return SherpaOnnxOfflineTtsMatchaModelConfig(
    acoustic_model: toCPointer(acousticModel),
    vocoder: toCPointer(vocoder),
    lexicon: toCPointer(lexicon),
    tokens: toCPointer(tokens),
    data_dir: toCPointer(dataDir),
    noise_scale: noiseScale,
    length_scale: lengthScale,
    dict_dir: toCPointer(dictDir)
  )
}

func sherpaOnnxOfflineTtsKokoroModelConfig(
  model: String = "",
  voices: String = "",
  tokens: String = "",
  dataDir: String = "",
  lengthScale: Float = 1.0,
  dictDir: String = "",
  lexicon: String = "",
  lang: String = ""
) -> SherpaOnnxOfflineTtsKokoroModelConfig {
  return SherpaOnnxOfflineTtsKokoroModelConfig(
    model: toCPointer(model),
    voices: toCPointer(voices),
    tokens: toCPointer(tokens),
    data_dir: toCPointer(dataDir),
    length_scale: lengthScale,
    dict_dir: toCPointer(dictDir),
    lexicon: toCPointer(lexicon),
    lang: toCPointer(lang)
  )
}

func sherpaOnnxOfflineTtsKittenModelConfig(
  model: String = "",
  voices: String = "",
  tokens: String = "",
  dataDir: String = "",
  lengthScale: Float = 1.0
) -> SherpaOnnxOfflineTtsKittenModelConfig {
  return SherpaOnnxOfflineTtsKittenModelConfig(
    model: toCPointer(model),
    voices: toCPointer(voices),
    tokens: toCPointer(tokens),
    data_dir: toCPointer(dataDir),
    length_scale: lengthScale
  )
}

func sherpaOnnxOfflineTtsZipvoiceModelConfig(
  tokens: String = "",
  encoder: String = "",
  decoder: String = "",
  vocoder: String = "",
  dataDir: String = "",
  lexicon: String = "",
  featScale: Float = 0.1,
  tShift: Float = 0.5,
  targetRms: Float = 0.1,
  guidanceScale: Float = 1.0
) -> SherpaOnnxOfflineTtsZipvoiceModelConfig {
  return SherpaOnnxOfflineTtsZipvoiceModelConfig(
    tokens: toCPointer(tokens),
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder),
    vocoder: toCPointer(vocoder),
    data_dir: toCPointer(dataDir),
    lexicon: toCPointer(lexicon),
    feat_scale: featScale,
    t_shift: tShift,
    target_rms: targetRms,
    guidance_scale: guidanceScale
  )
}

func sherpaOnnxOfflineTtsPocketModelConfig(
  lmFlow: String = "",
  lmMain: String = "",
  encoder: String = "",
  decoder: String = "",
  textConditioner: String = "",
  vocabJson: String = "",
  tokenScoresJson: String = "",
  voiceEmbeddingCacheCapacity: Int = 50
) -> SherpaOnnxOfflineTtsPocketModelConfig {
  return SherpaOnnxOfflineTtsPocketModelConfig(
    lm_flow: toCPointer(lmFlow),
    lm_main: toCPointer(lmMain),
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder),
    text_conditioner: toCPointer(textConditioner),
    vocab_json: toCPointer(vocabJson),
    token_scores_json: toCPointer(tokenScoresJson),
    voice_embedding_cache_capacity: Int32(voiceEmbeddingCacheCapacity)
  )
}

func sherpaOnnxOfflineTtsSupertonicModelConfig(
  durationPredictor: String = "",
  textEncoder: String = "",
  vectorEstimator: String = "",
  vocoder: String = "",
  ttsJson: String = "",
  unicodeIndexer: String = "",
  voiceStyle: String = ""
) -> SherpaOnnxOfflineTtsSupertonicModelConfig {
  return SherpaOnnxOfflineTtsSupertonicModelConfig(
    duration_predictor: toCPointer(durationPredictor),
    text_encoder: toCPointer(textEncoder),
    vector_estimator: toCPointer(vectorEstimator),
    vocoder: toCPointer(vocoder),
    tts_json: toCPointer(ttsJson),
    unicode_indexer: toCPointer(unicodeIndexer),
    voice_style: toCPointer(voiceStyle)
  )
}

func sherpaOnnxOfflineTtsModelConfig(
  vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
  matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
  kokoro: SherpaOnnxOfflineTtsKokoroModelConfig = sherpaOnnxOfflineTtsKokoroModelConfig(),
  numThreads: Int = 1,
  debug: Int = 0,
  provider: String = "cpu",
  kitten: SherpaOnnxOfflineTtsKittenModelConfig = sherpaOnnxOfflineTtsKittenModelConfig(),
  zipvoice: SherpaOnnxOfflineTtsZipvoiceModelConfig = sherpaOnnxOfflineTtsZipvoiceModelConfig(),
  pocket: SherpaOnnxOfflineTtsPocketModelConfig = sherpaOnnxOfflineTtsPocketModelConfig(),
  supertonic: SherpaOnnxOfflineTtsSupertonicModelConfig = sherpaOnnxOfflineTtsSupertonicModelConfig()
) -> SherpaOnnxOfflineTtsModelConfig {
  return SherpaOnnxOfflineTtsModelConfig(
    vits: vits,
    num_threads: Int32(numThreads),
    debug: Int32(debug),
    provider: toCPointer(provider),
    matcha: matcha,
    kokoro: kokoro,
    kitten: kitten,
    zipvoice: zipvoice,
    pocket: pocket,
    supertonic: supertonic
  )
}

func sherpaOnnxOfflineTtsConfig(
  model: SherpaOnnxOfflineTtsModelConfig,
  ruleFsts: String = "",
  ruleFars: String = "",
  maxNumSentences: Int = 1,
  silenceScale: Float = 0.2
) -> SherpaOnnxOfflineTtsConfig {
  return SherpaOnnxOfflineTtsConfig(
    model: model,
    rule_fsts: toCPointer(ruleFsts),
    max_num_sentences: Int32(maxNumSentences),
    rule_fars: toCPointer(ruleFars),
    silence_scale: silenceScale
  )
}

class SherpaOnnxWaveWrapper {
  let wave: UnsafePointer<SherpaOnnxWave>!

  class func readWave(filename: String) -> SherpaOnnxWaveWrapper {
    let wave = SherpaOnnxReadWave(toCPointer(filename))
    return SherpaOnnxWaveWrapper(wave: wave)
  }

  init(wave: UnsafePointer<SherpaOnnxWave>!) {
    self.wave = wave
  }

  deinit {
    if let wave {
      SherpaOnnxFreeWave(wave)
    }
  }

  var numSamples: Int {
    return Int(wave.pointee.num_samples)
  }

  var sampleRate: Int {
    return Int(wave.pointee.sample_rate)
  }

  var samples: [Float] {
    if numSamples == 0 {
      return []
    } else {
      return [Float](UnsafeBufferPointer(start: wave.pointee.samples, count: numSamples))
    }
  }
}

class SherpaOnnxGeneratedAudioWrapper {
  /// A pointer to the underlying counterpart in C
  let audio: UnsafePointer<SherpaOnnxGeneratedAudio>!

  init(audio: UnsafePointer<SherpaOnnxGeneratedAudio>!) {
    self.audio = audio
  }

  deinit {
    if let audio {
      SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)
    }
  }

  var n: Int32 {
    return audio.pointee.n
  }

  var sampleRate: Int32 {
    return audio.pointee.sample_rate
  }

  var samples: [Float] {
    if let p = audio.pointee.samples {
      return [Float](UnsafeBufferPointer(start: p, count: Int(n)))
    } else {
      return []
    }
  }

  func save(filename: String) -> Int32 {
    return SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
  }
}

typealias TtsCallbackWithArg = (
  @convention(c) (
    UnsafePointer<Float>?,  // const float* samples
    Int32,  // int32_t n
    UnsafeMutableRawPointer?  // void *arg
  ) -> Int32
)?

typealias TtsProgressCallbackWithArg =
  @convention(c) (
    UnsafePointer<Float>?, Int32, Float, UnsafeMutableRawPointer?
  ) -> Int32

struct SherpaOnnxGenerationConfigSwift {
  var silenceScale: Float = 0.2
  var speed: Float = 1.0
  var sid: Int = 0
  var referenceAudio: [Float] = []
  var referenceSampleRate: Int = 16000
  var referenceText: String = ""
  var numSteps: Int = 1
  var extra: [String: Any] = [:]  // Any can be String, Int, Float, Double

  /// Convert the extra dictionary into a JSON string
  func extraJsonString() -> String {
    var jsonCompatible: [String: Any] = [:]

    for (key, value) in extra {
      switch value {
      case let v as String:
        jsonCompatible[key] = v
      case let v as Int:
        jsonCompatible[key] = v
      case let v as Float:
        jsonCompatible[key] = v
      case let v as Double:
        jsonCompatible[key] = v
      default:
        // ignore unsupported types
        print("Warning: unsupported type for key '\(key)' in extra")
      }
    }

    guard let data = try? JSONSerialization.data(withJSONObject: jsonCompatible, options: []),
      let json = String(data: data, encoding: .utf8)
    else {
      return "{}"
    }

    return json
  }
}
final class SherpaOnnxGenerationConfigC {
  /// The underlying C struct
  var cConfig: SherpaOnnxGenerationConfig

  /// Storage for reference audio so the pointer stays valid during the C call
  private let referenceAudioStorage: [Float]

  /// Extra JSON string for C API
  let extraJson: String

  init(_ swiftConfig: SherpaOnnxGenerationConfigSwift) {
    let referenceAudio = swiftConfig.referenceAudio

    let extraJson = swiftConfig.extraJsonString()
    self.extraJson = extraJson

    self.referenceAudioStorage = referenceAudio

    self.cConfig = self.referenceAudioStorage.withUnsafeBufferPointer { buffer in
      SherpaOnnxGenerationConfig(
        silence_scale: swiftConfig.silenceScale,
        speed: swiftConfig.speed,
        sid: Int32(swiftConfig.sid),
        reference_audio: buffer.count > 0 ? buffer.baseAddress : nil,
        reference_audio_len: Int32(buffer.count),
        reference_sample_rate: Int32(swiftConfig.referenceSampleRate),
        reference_text: toCPointer(swiftConfig.referenceText),
        num_steps: Int32(swiftConfig.numSteps),
        extra: toCPointer(extraJson)
      )
    }
  }
}

class SherpaOnnxOfflineTtsWrapper {
  /// A pointer to the underlying counterpart in C
  let tts: OpaquePointer!

  /// Constructor taking a model config
  init(
    config: UnsafePointer<SherpaOnnxOfflineTtsConfig>!
  ) {
    tts = SherpaOnnxCreateOfflineTts(config)
  }

  deinit {
    if let tts {
      SherpaOnnxDestroyOfflineTts(tts)
    }
  }

  func generate(text: String, sid: Int = 0, speed: Float = 1.0) -> SherpaOnnxGeneratedAudioWrapper {
    let audio: UnsafePointer<SherpaOnnxGeneratedAudio>? = SherpaOnnxOfflineTtsGenerate(
      tts, toCPointer(text), Int32(sid), speed)

    return SherpaOnnxGeneratedAudioWrapper(audio: audio)
  }

  func generateWithCallbackWithArg(
    text: String, callback: TtsCallbackWithArg, arg: UnsafeMutableRawPointer, sid: Int = 0,
    speed: Float = 1.0
  ) -> SherpaOnnxGeneratedAudioWrapper {
    let audio: UnsafePointer<SherpaOnnxGeneratedAudio>? =
      SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
        tts, toCPointer(text), Int32(sid), speed, callback, arg)

    return SherpaOnnxGeneratedAudioWrapper(audio: audio)
  }

  func generateWithConfig(
    text: String,
    config: SherpaOnnxGenerationConfigSwift,
    callback: TtsProgressCallbackWithArg?,
    arg: UnsafeMutableRawPointer?
  ) -> SherpaOnnxGeneratedAudioWrapper {
    let bridge = SherpaOnnxGenerationConfigC(config)

    let audio: UnsafePointer<SherpaOnnxGeneratedAudio>? =
      withUnsafePointer(to: &bridge.cConfig) { configPtr in
        SherpaOnnxOfflineTtsGenerateWithConfig(
          tts,
          toCPointer(text),
          configPtr,
          callback,
          arg
        )
      }

    return SherpaOnnxGeneratedAudioWrapper(audio: audio)
  }

}

// spoken language identification

func sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
  encoder: String,
  decoder: String,
  tailPaddings: Int = -1
) -> SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
  return SherpaOnnxSpokenLanguageIdentificationWhisperConfig(
    encoder: toCPointer(encoder),
    decoder: toCPointer(decoder),
    tail_paddings: Int32(tailPaddings))
}

func sherpaOnnxSpokenLanguageIdentificationConfig(
  whisper: SherpaOnnxSpokenLanguageIdentificationWhisperConfig,
  numThreads: Int = 1,
  debug: Int = 0,
  provider: String = "cpu"
) -> SherpaOnnxSpokenLanguageIdentificationConfig {
  return SherpaOnnxSpokenLanguageIdentificationConfig(
    whisper: whisper,
    num_threads: Int32(numThreads),
    debug: Int32(debug),
    provider: toCPointer(provider))
}

class SherpaOnnxSpokenLanguageIdentificationResultWrapper {
  /// A pointer to the underlying counterpart in C
  let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!

  /// Return the detected language.
  /// en for English
  /// zh for Chinese
  /// es for Spanish
  /// de for German
  /// etc.
  var lang: String {
    return String(cString: result.pointee.lang)
  }

  init(result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!) {
    self.result = result
  }

  deinit {
    if let result {
      SherpaOnnxDestroySpokenLanguageIdentificationResult(result)
    }
  }
}

class SherpaOnnxSpokenLanguageIdentificationWrapper {
  /// A pointer to the underlying counterpart in C
  let slid: OpaquePointer!

  init(
    config: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationConfig>!
  ) {
    slid = SherpaOnnxCreateSpokenLanguageIdentification(config)
  }

  deinit {
    if let slid {
      SherpaOnnxDestroySpokenLanguageIdentification(slid)
    }
  }

  func decode(samples: [Float], sampleRate: Int = 16000)
    -> SherpaOnnxSpokenLanguageIdentificationResultWrapper
  {
    let stream: OpaquePointer! = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid)
    SherpaOnnxAcceptWaveformOffline(stream, Int32(sampleRate), samples, Int32(samples.count))

    let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>? =
      SherpaOnnxSpokenLanguageIdentificationCompute(
        slid,
        stream)

    SherpaOnnxDestroyOfflineStream(stream)
    return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result)
  }
}

// keyword spotting

class SherpaOnnxKeywordResultWrapper {
  /// A pointer to the underlying counterpart in C
  let result: UnsafePointer<SherpaOnnxKeywordResult>!

  var keyword: String {
    return String(cString: result.pointee.keyword)
  }

  var count: Int32 {
    return result.pointee.count
  }

  var tokens: [String] {
    if let tokensPointer = result.pointee.tokens_arr {
      var tokens: [String] = []
      for index in 0..<count {
        if let tokenPointer = tokensPointer[Int(index)] {
          let token = String(cString: tokenPointer)
          tokens.append(token)
        }
      }
      return tokens
    } else {
      let tokens: [String] = []
      return tokens
    }
  }

  init(result: UnsafePointer<SherpaOnnxKeywordResult>!) {
    self.result = result
  }

  deinit {
    if let result {
      SherpaOnnxDestroyKeywordResult(result)
    }
  }
}

func sherpaOnnxKeywordSpotterConfig(
  featConfig: SherpaOnnxFeatureConfig,
  modelConfig: SherpaOnnxOnlineModelConfig,
  keywordsFile: String,
  maxActivePaths: Int = 4,
  numTrailingBlanks: Int = 1,
  keywordsScore: Float = 1.0,
  keywordsThreshold: Float = 0.25,
  keywordsBuf: String = "",
  keywordsBufSize: Int = 0
) -> SherpaOnnxKeywordSpotterConfig {
  return SherpaOnnxKeywordSpotterConfig(
    feat_config: featConfig,
    model_config: modelConfig,
    max_active_paths: Int32(maxActivePaths),
    num_trailing_blanks: Int32(numTrailingBlanks),
    keywords_score: keywordsScore,
    keywords_threshold: keywordsThreshold,
    keywords_file: toCPointer(keywordsFile),
    keywords_buf: toCPointer(keywordsBuf),
    keywords_buf_size: Int32(keywordsBufSize)
  )
}

class SherpaOnnxKeywordSpotterWrapper {
  /// A pointer to the underlying counterpart in C
  let spotter: OpaquePointer!
  var stream: OpaquePointer!

  init(
    config: UnsafePointer<SherpaOnnxKeywordSpotterConfig>!
  ) {
    spotter = SherpaOnnxCreateKeywordSpotter(config)
    stream = SherpaOnnxCreateKeywordStream(spotter)
  }

  deinit {
    if let stream {
      SherpaOnnxDestroyOnlineStream(stream)
    }

    if let spotter {
      SherpaOnnxDestroyKeywordSpotter(spotter)
    }
  }

  func acceptWaveform(samples: [Float], sampleRate: Int = 16000) {
    SherpaOnnxOnlineStreamAcceptWaveform(stream, Int32(sampleRate), samples, Int32(samples.count))
  }

  func isReady() -> Bool {
    return SherpaOnnxIsKeywordStreamReady(spotter, stream) == 1 ? true : false
  }

  func decode() {
    SherpaOnnxDecodeKeywordStream(spotter, stream)
  }

  func reset() {
    SherpaOnnxResetKeywordStream(spotter, stream)
  }

  func getResult() -> SherpaOnnxKeywordResultWrapper {
    let result: UnsafePointer<SherpaOnnxKeywordResult>? = SherpaOnnxGetKeywordResult(
      spotter, stream)
    return SherpaOnnxKeywordResultWrapper(result: result)
  }

  /// Signal that no more audio samples would be available.
  /// After this call, you cannot call acceptWaveform() any more.
  func inputFinished() {
    SherpaOnnxOnlineStreamInputFinished(stream)
  }
}

// Punctuation

func sherpaOnnxOfflinePunctuationModelConfig(
  ctTransformer: String,
  numThreads: Int = 1,
  debug: Int = 0,
  provider: String = "cpu"
) -> SherpaOnnxOfflinePunctuationModelConfig {
  return SherpaOnnxOfflinePunctuationModelConfig(
    ct_transformer: toCPointer(ctTransformer),
    num_threads: Int32(numThreads),
    debug: Int32(debug),
    provider: toCPointer(provider)
  )
}

func sherpaOnnxOfflinePunctuationConfig(
  model: SherpaOnnxOfflinePunctuationModelConfig
) -> SherpaOnnxOfflinePunctuationConfig {
  return SherpaOnnxOfflinePunctuationConfig(
    model: model
  )
}

class SherpaOnnxOfflinePunctuationWrapper {
  /// A pointer to the underlying counterpart in C
  let ptr: OpaquePointer!

  /// Constructor taking a model config
  init(
    config: UnsafePointer<SherpaOnnxOfflinePunctuationConfig>!
  ) {
    ptr = SherpaOnnxCreateOfflinePunctuation(config)
  }

  deinit {
    if let ptr {
      SherpaOnnxDestroyOfflinePunctuation(ptr)
    }
  }

  func addPunct(text: String) -> String {
    let cText = SherpaOfflinePunctuationAddPunct(ptr, toCPointer(text))
    let ans = String(cString: cText!)
    SherpaOfflinePunctuationFreeText(cText)
    return ans
  }
}

func sherpaOnnxOnlinePunctuationModelConfig(
  cnnBiLstm: String,
  bpeVocab: String,
  numThreads: Int = 1,
  debug: Int = 0,
  provider: String = "cpu"
) -> SherpaOnnxOnlinePunctuationModelConfig {
  return SherpaOnnxOnlinePunctuationModelConfig(
    cnn_bilstm: toCPointer(cnnBiLstm),
    bpe_vocab: toCPointer(bpeVocab),
    num_threads: Int32(numThreads),
    debug: Int32(debug),
    provider: toCPointer(provider))
}

func sherpaOnnxOnlinePunctuationConfig(
  model: SherpaOnnxOnlinePunctuationModelConfig
) -> SherpaOnnxOnlinePunctuationConfig {
  return SherpaOnnxOnlinePunctuationConfig(model: model)
}

class SherpaOnnxOnlinePunctuationWrapper {
  /// A pointer to the underlying counterpart in C
  let ptr: OpaquePointer!

  /// Constructor taking a model config
  init(
    config: UnsafePointer<SherpaOnnxOnlinePunctuationConfig>!
  ) {
    ptr = SherpaOnnxCreateOnlinePunctuation(config)
  }

  deinit {
    if let ptr {
      SherpaOnnxDestroyOnlinePunctuation(ptr)
    }
  }

  func addPunct(text: String) -> String {
    let cText = SherpaOnnxOnlinePunctuationAddPunct(ptr, toCPointer(text))
    let ans = String(cString: cText!)
    SherpaOnnxOnlinePunctuationFreeText(cText)
    return ans
  }
}

func sherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(model: String)
  -> SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
{
  return SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(model: toCPointer(model))
}

func sherpaOnnxOfflineSpeakerSegmentationModelConfig(
  pyannote: SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig,
  numThreads: Int = 1,
  debug: Int = 0,
  provider: String = "cpu"
) -> SherpaOnnxOfflineSpeakerSegmentationModelConfig {
  return SherpaOnnxOfflineSpeakerSegmentationModelConfig(
    pyannote: pyannote,
    num_threads: Int32(numThreads),
    debug: Int32(debug),
    provider: toCPointer(provider)
  )
}

func sherpaOnnxFastClusteringConfig(numClusters: Int = -1, threshold: Float = 0.5)
  -> SherpaOnnxFastClusteringConfig
{
  return SherpaOnnxFastClusteringConfig(num_clusters: Int32(numClusters), threshold: threshold)
}

func sherpaOnnxSpeakerEmbeddingExtractorConfig(
  model: String,
  numThreads: Int = 1,
  debug: Int = 0,
  provider: String = "cpu"
) -> SherpaOnnxSpeakerEmbeddingExtractorConfig {
  return SherpaOnnxSpeakerEmbeddingExtractorConfig(
    model: toCPointer(model),
    num_threads: Int32(numThreads),
    debug: Int32(debug),
    provider: toCPointer(provider)
  )
}

func sherpaOnnxOfflineSpeakerDiarizationConfig(
  segmentation: SherpaOnnxOfflineSpeakerSegmentationModelConfig,
  embedding: SherpaOnnxSpeakerEmbeddingExtractorConfig,
  clustering: SherpaOnnxFastClusteringConfig,
  minDurationOn: Float = 0.3,
  minDurationOff: Float = 0.5
) -> SherpaOnnxOfflineSpeakerDiarizationConfig {
  return SherpaOnnxOfflineSpeakerDiarizationConfig(
    segmentation: segmentation,
    embedding: embedding,
    clustering: clustering,
    min_duration_on: minDurationOn,
    min_duration_off: minDurationOff
  )
}

struct SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper {
  var start: Float = 0
  var end: Float = 0
  var speaker: Int = 0
}

class SherpaOnnxOfflineSpeakerDiarizationWrapper {
  /// A pointer to the underlying counterpart in C
  let impl: OpaquePointer!

  init(
    config: UnsafePointer<SherpaOnnxOfflineSpeakerDiarizationConfig>!
  ) {
    impl = SherpaOnnxCreateOfflineSpeakerDiarization(config)
  }

  deinit {
    if let impl {
      SherpaOnnxDestroyOfflineSpeakerDiarization(impl)
    }
  }

  var sampleRate: Int {
    return Int(SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(impl))
  }

  // only config.clustering is used. All other fields are ignored
  func setConfig(config: UnsafePointer<SherpaOnnxOfflineSpeakerDiarizationConfig>!) {
    SherpaOnnxOfflineSpeakerDiarizationSetConfig(impl, config)
  }

  func process(samples: [Float]) -> [SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper] {
    let result = SherpaOnnxOfflineSpeakerDiarizationProcess(
      impl, samples, Int32(samples.count))

    if result == nil {
      return []
    }

    let numSegments = Int(SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(result))

    let p: UnsafePointer<SherpaOnnxOfflineSpeakerDiarizationSegment>? =
      SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(result)

    if p == nil {
      return []
    }

    var ans: [SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper] = []
    for i in 0..<numSegments {
      ans.append(
        SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper(
          start: p![i].start, end: p![i].end, speaker: Int(p![i].speaker)))
    }

    SherpaOnnxOfflineSpeakerDiarizationDestroySegment(p)
    SherpaOnnxOfflineSpeakerDiarizationDestroyResult(result)

    return ans
  }
}

class SherpaOnnxOnlineStreamWrapper {
  /// A pointer to the underlying counterpart in C
  let impl: OpaquePointer!
  init(impl: OpaquePointer!) {
    self.impl = impl
  }

  deinit {
    if let impl {
      SherpaOnnxDestroyOnlineStream(impl)
    }
  }

  func acceptWaveform(samples: [Float], sampleRate: Int = 16000) {
    SherpaOnnxOnlineStreamAcceptWaveform(impl, Int32(sampleRate), samples, Int32(samples.count))
  }

  func inputFinished() {
    SherpaOnnxOnlineStreamInputFinished(impl)
  }
}

class SherpaOnnxSpeakerEmbeddingExtractorWrapper {
  /// A pointer to the underlying counterpart in C
  let impl: OpaquePointer!

  init(
    config: UnsafePointer<SherpaOnnxSpeakerEmbeddingExtractorConfig>!
  ) {
    impl = SherpaOnnxCreateSpeakerEmbeddingExtractor(config)
  }

  deinit {
    if let impl {
      SherpaOnnxDestroySpeakerEmbeddingExtractor(impl)
    }
  }

  var dim: Int {
    return Int(SherpaOnnxSpeakerEmbeddingExtractorDim(impl))
  }

  func createStream() -> SherpaOnnxOnlineStreamWrapper {
    let newStream = SherpaOnnxSpeakerEmbeddingExtractorCreateStream(impl)
    return SherpaOnnxOnlineStreamWrapper(impl: newStream)
  }

  func isReady(stream: SherpaOnnxOnlineStreamWrapper) -> Bool {
    return SherpaOnnxSpeakerEmbeddingExtractorIsReady(impl, stream.impl) == 1 ? true : false
  }

  func compute(stream: SherpaOnnxOnlineStreamWrapper) -> [Float] {
    if !isReady(stream: stream) {
      return []
    }

    let p = SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(impl, stream.impl)

    defer {
      SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(p)
    }

    return [Float](UnsafeBufferPointer(start: p, count: dim))
  }
}

func sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: String = "")
  -> SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig
{
  return SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: toCPointer(model))
}

func sherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig(model: String = "")
  -> SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig
{
  return SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig(model: toCPointer(model))
}

func sherpaOnnxOfflineSpeechDenoiserModelConfig(
  gtcrn: SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig =
    sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(),
  dpdfnet: SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig =
    sherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig(),
  numThreads: Int = 1,
  provider: String = "cpu",
  debug: Int = 0
) -> SherpaOnnxOfflineSpeechDenoiserModelConfig {
  return SherpaOnnxOfflineSpeechDenoiserModelConfig(
    gtcrn: gtcrn,
    num_threads: Int32(numThreads),
    debug: Int32(debug),
    provider: toCPointer(provider),
    dpdfnet: dpdfnet
  )
}

func sherpaOnnxOfflineSpeechDenoiserConfig(
  model: SherpaOnnxOfflineSpeechDenoiserModelConfig =
    sherpaOnnxOfflineSpeechDenoiserModelConfig()
) -> SherpaOnnxOfflineSpeechDenoiserConfig {
  return SherpaOnnxOfflineSpeechDenoiserConfig(
    model: model)
}

class SherpaOnnxDenoisedAudioWrapper {
  /// A pointer to the underlying counterpart in C
  let audio: UnsafePointer<SherpaOnnxDenoisedAudio>!

  init(audio: UnsafePointer<SherpaOnnxDenoisedAudio>!) {
    self.audio = audio
  }

  deinit {
    if let audio {
      SherpaOnnxDestroyDenoisedAudio(audio)
    }
  }

  var n: Int32 {
    guard let audio else {
      return 0
    }
    return audio.pointee.n
  }

  var sampleRate: Int32 {
    guard let audio else {
      return 0
    }
    return audio.pointee.sample_rate
  }

  var samples: [Float] {
    guard let audio else {
      return []
    }

    if let p = audio.pointee.samples {
      var samples: [Float] = []
      for index in 0..<n {
        samples.append(p[Int(index)])
      }
      return samples
    } else {
      let samples: [Float] = []
      return samples
    }
  }

  func save(filename: String) -> Int32 {
    guard let audio else {
      return 0
    }
    return SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
  }
}

class SherpaOnnxOfflineSpeechDenoiserWrapper {
  /// A pointer to the underlying counterpart in C
  let impl: OpaquePointer!

  /// Constructor taking a model config
  init(
    config: UnsafePointer<SherpaOnnxOfflineSpeechDenoiserConfig>!
  ) {
    impl = SherpaOnnxCreateOfflineSpeechDenoiser(config)
  }

  deinit {
    if let impl {
      SherpaOnnxDestroyOfflineSpeechDenoiser(impl)
    }
  }

  func run(samples: [Float], sampleRate: Int) -> SherpaOnnxDenoisedAudioWrapper {
    let audio: UnsafePointer<SherpaOnnxDenoisedAudio>? = SherpaOnnxOfflineSpeechDenoiserRun(
      impl, samples, Int32(samples.count), Int32(sampleRate))

    return SherpaOnnxDenoisedAudioWrapper(audio: audio)
  }

  var sampleRate: Int {
    return Int(SherpaOnnxOfflineSpeechDenoiserGetSampleRate(impl))
  }
}

func sherpaOnnxOnlineSpeechDenoiserConfig(
  model: SherpaOnnxOfflineSpeechDenoiserModelConfig =
    sherpaOnnxOfflineSpeechDenoiserModelConfig()
) -> SherpaOnnxOnlineSpeechDenoiserConfig {
  return SherpaOnnxOnlineSpeechDenoiserConfig(model: model)
}

class SherpaOnnxOnlineSpeechDenoiserWrapper {
  let impl: OpaquePointer!

  init(
    config: UnsafePointer<SherpaOnnxOnlineSpeechDenoiserConfig>!
  ) {
    impl = SherpaOnnxCreateOnlineSpeechDenoiser(config)
  }

  deinit {
    if let impl {
      SherpaOnnxDestroyOnlineSpeechDenoiser(impl)
    }
  }

  func run(samples: [Float], sampleRate: Int) -> SherpaOnnxDenoisedAudioWrapper {
    let audio: UnsafePointer<SherpaOnnxDenoisedAudio>? = SherpaOnnxOnlineSpeechDenoiserRun(
      impl, samples, Int32(samples.count), Int32(sampleRate))
    return SherpaOnnxDenoisedAudioWrapper(audio: audio)
  }

  func flush() -> SherpaOnnxDenoisedAudioWrapper {
    let audio: UnsafePointer<SherpaOnnxDenoisedAudio>? = SherpaOnnxOnlineSpeechDenoiserFlush(impl)
    return SherpaOnnxDenoisedAudioWrapper(audio: audio)
  }

  func reset() {
    SherpaOnnxOnlineSpeechDenoiserReset(impl)
  }

  var sampleRate: Int {
    return Int(SherpaOnnxOnlineSpeechDenoiserGetSampleRate(impl))
  }

  var frameShiftInSamples: Int {
    return Int(SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(impl))
  }
}

func getSherpaOnnxVersion() -> String {
  return String(cString: SherpaOnnxGetVersionStr())
}

func getSherpaOnnxGitSha1() -> String {
  return String(cString: SherpaOnnxGetGitSha1())
}

func getSherpaOnnxGitDate() -> String {
  return String(cString: SherpaOnnxGetGitDate())
}


================================================
FILE: swift-api-examples/add-punctuation-online.swift
================================================
func run() {
    let model = "./sherpa-onnx-online-punct-en-2024-08-06/model.onnx"
    let bpe = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab"
    
    // Create model config
    let modelConfig = sherpaOnnxOnlinePunctuationModelConfig(
        cnnBiLstm: model,
        bpeVocab: bpe
    )
    
    // Create punctuation config
    var config = sherpaOnnxOnlinePunctuationConfig(model: modelConfig)
    
    // Create punctuation instance
    let punct = SherpaOnnxOnlinePunctuationWrapper(config: &config)
    
    // Test texts
    let textList = [
        "how are you i am fine thank you",
        "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
    ]
    
    // Process each text
  for i in 0..<textList.count {
    let t = punct.addPunct(text: textList[i])
    print("\nresult is:\n\(t)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/add-punctuations.swift
================================================
func run() {
  let model = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx"
  let modelConfig = sherpaOnnxOfflinePunctuationModelConfig(
    ctTransformer: model,
    numThreads: 1,
    debug: 1,
    provider: "cpu"
  )
  var config = sherpaOnnxOfflinePunctuationConfig(model: modelConfig)

  let punct = SherpaOnnxOfflinePunctuationWrapper(config: &config)

  let textList = [
    "这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
    "我们都是木头人不会说话不会动",
    "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
  ]

  for i in 0..<textList.count {
    let t = punct.addPunct(text: textList[i])
    print("\nresult is:\n\(t)")
  }

}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/compute-speaker-embeddings.swift
================================================
/// swift-api-examples/compute-speaker-embeddings.swift
/// Copyright (c)  2025  Xiaomi Corporation
/*
Please download test files used in this script from

https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
*/
import Foundation

func cosineSimilarity(_ a: [Float], _ b: [Float]) -> Float {
  precondition(a.count == b.count, "Vectors must have the same length")

  var dot: Float = 0
  var sumA: Float = 0
  var sumB: Float = 0

  for i in 0..<a.count {
    let x = a[i]
    let y = b[i]
    dot += x * y
    sumA += x * x
    sumB += y * y
  }

  let magA = sqrt(sumA)
  let magB = sqrt(sumB)

  guard magA > 0 && magB > 0 else { return 0 }
  return dot / (magA * magB)
}

func computeEmbedding(extractor: SherpaOnnxSpeakerEmbeddingExtractorWrapper, waveFilename: String)
  -> [Float]
{
  let audio = SherpaOnnxWaveWrapper.readWave(filename: waveFilename)
  let stream = extractor.createStream()
  stream.acceptWaveform(samples: audio.samples, sampleRate: audio.sampleRate)
  stream.inputFinished()
  return extractor.compute(stream: stream)
}

func run() {
  let model = "./wespeaker_zh_cnceleb_resnet34.onnx"
  var config = sherpaOnnxSpeakerEmbeddingExtractorConfig(model: model)
  let extractor = SherpaOnnxSpeakerEmbeddingExtractorWrapper(config: &config)
  let embedding1 = computeEmbedding(extractor: extractor, waveFilename: "./fangjun-sr-1.wav")
  let embedding2 = computeEmbedding(extractor: extractor, waveFilename: "./fangjun-sr-2.wav")
  let embedding3 = computeEmbedding(extractor: extractor, waveFilename: "./leijun-sr-1.wav")

  let score12 = cosineSimilarity(embedding1, embedding2)
  let score13 = cosineSimilarity(embedding1, embedding3)
  let score23 = cosineSimilarity(embedding2, embedding3)

  print("Score between spk1 and spk2: \(score12)")
  print("Score between spk1 and spk3: \(score13)")
  print("Score between spk2 and spk3: \(score23)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/decode-file-non-streaming.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  var recognizer: SherpaOnnxOfflineRecognizer
  var modelConfig: SherpaOnnxOfflineModelConfig
  var modelType = "whisper"
  // modelType = "paraformer"
  // modelType = "sense_voice"
  // modelType = "moonshine"

  if modelType == "whisper" {
    let encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"
    let decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"
    let tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"

    let whisperConfig = sherpaOnnxOfflineWhisperModelConfig(
      encoder: encoder,
      decoder: decoder
    )

    modelConfig = sherpaOnnxOfflineModelConfig(
      tokens: tokens,
      whisper: whisperConfig,
      debug: 0,
      modelType: "whisper"
    )
  } else if modelType == "paraformer" {
    let model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"
    let tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"
    let paraformerConfig = sherpaOnnxOfflineParaformerModelConfig(
      model: model
    )

    modelConfig = sherpaOnnxOfflineModelConfig(
      tokens: tokens,
      paraformer: paraformerConfig,
      debug: 0,
      modelType: "paraformer"
    )
  } else if modelType == "sense_voice" {
    let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"
    let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
    let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig(
      model: model,
      useInverseTextNormalization: true
    )

    modelConfig = sherpaOnnxOfflineModelConfig(
      tokens: tokens,
      debug: 0,
      senseVoice: senseVoiceConfig
    )
  } else if modelType == "moonshine" {
    let preprocessor = "./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx"
    let encoder = "./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx"
    let uncachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx"
    let cachedDecoder = "./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx"
    let tokens = "./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt"
    let moonshine = sherpaOnnxOfflineMoonshineModelConfig(
      preprocessor: preprocessor,
      encoder: encoder,
      uncachedDecoder: uncachedDecoder,
      cachedDecoder: cachedDecoder
    )

    modelConfig = sherpaOnnxOfflineModelConfig(
      tokens: tokens,
      debug: 0,
      moonshine: moonshine
    )
  } else {
    print("Please specify a supported modelType \(modelType)")
    return
  }

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  var filePath = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"
  if modelType == "sense_voice" {
    filePath = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav"
  } else if modelType == "moonshine" {
    filePath = "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"
  }
  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }

}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/decode-file-sense-voice-with-hr.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  var recognizer: SherpaOnnxOfflineRecognizer
  let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"
  let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
  let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig(
    model: model,
    useInverseTextNormalization: true
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 0,
    senseVoice: senseVoiceConfig
  )

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )

  let hrConfig = sherpaOnnxHomophoneReplacerConfig(
    lexicon: "./lexicon.txt",
    ruleFsts: "./replace.fst"
  )
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig,
    hr: hrConfig
  )

  recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./test-hr.wav"
  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }

}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/decode-file-t-one-streaming.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let filePath = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav"
  let model =
    "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx"
  let tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt"

  let toneCtcConfig = sherpaOnnxOnlineToneCtcModelConfig(
    model: model)

  let modelConfig = sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    toneCtc: toneCtcConfig
  )

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 8000,
    featureDim: 80
  )
  var config = sherpaOnnxOnlineRecognizerConfig(
    featConfig: featConfig,  // not used
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxRecognizer(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == 8000)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()

  let leftPadding = [Float](repeating: 0.0, count: 2400)
  recognizer.acceptWaveform(samples: leftPadding, sampleRate: Int(audioFormat.sampleRate))

  recognizer.acceptWaveform(samples: array, sampleRate: Int(audioFormat.sampleRate))

  let tailPadding = [Float](repeating: 0.0, count: 4800)
  recognizer.acceptWaveform(samples: tailPadding, sampleRate: Int(audioFormat.sampleRate))

  recognizer.inputFinished()
  while recognizer.isReady() {
    recognizer.decode()
  }

  let result = recognizer.getResult()
  print("\nresult is:\n\(result.text)")
  print("\nresult is:\n\(result.timestamps)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/decode-file.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  var modelConfig: SherpaOnnxOnlineModelConfig
  var modelType = "zipformer2-ctc"
  var filePath: String

  modelType = "transducer"

  if modelType == "transducer" {
    filePath = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav"
    let encoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
    let decoder =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
    let joiner =
      "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
    let tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"

    let transducerConfig = sherpaOnnxOnlineTransducerModelConfig(
      encoder: encoder,
      decoder: decoder,
      joiner: joiner
    )

    modelConfig = sherpaOnnxOnlineModelConfig(
      tokens: tokens,
      transducer: transducerConfig
    )
  } else {
    filePath =
      "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav"
    let model =
      "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx"
    let tokens = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt"
    let zipfomer2CtcModelConfig = sherpaOnnxOnlineZipformer2CtcModelConfig(
      model: model
    )

    modelConfig = sherpaOnnxOnlineModelConfig(
      tokens: tokens,
      zipformer2Ctc: zipfomer2CtcModelConfig
    )
  }

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )
  var config = sherpaOnnxOnlineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxRecognizer(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == 16000)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  recognizer.acceptWaveform(samples: array)

  let tailPadding = [Float](repeating: 0.0, count: 3200)
  recognizer.acceptWaveform(samples: tailPadding)

  recognizer.inputFinished()
  while recognizer.isReady() {
    recognizer.decode()
  }

  let result = recognizer.getResult()
  print("\nresult is:\n\(result.text)")
  print("\nresult is:\n\(result.timestamps)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/dolphin-ctc-asr.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
  let tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"

  let dolphin = sherpaOnnxOfflineDolphinModelConfig(
    model: model
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 0,
    dolphin: dolphin
  )

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }

}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/fire-red-asr-ctc.swift
================================================
func run() {
  let model =
    "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx"
  let tokens =
    "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/tokens.txt"

  let fireRedAsrCtc = sherpaOnnxOfflineFireRedAsrCtcModelConfig(
    model: model
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 1,
    fireRedAsrCtc: fireRedAsrCtc
  )

  let featConfig = sherpaOnnxFeatureConfig()
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/test_wavs/1.wav"
  let audio = SherpaOnnxWaveWrapper.readWave(filename: filePath)

  let result = recognizer.decode(samples: audio.samples, sampleRate: audio.sampleRate)
  print("decode done")

  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/fire-red-asr.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let encoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx"
  let decoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx"
  let tokens = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt"

  let fireRedAsr = sherpaOnnxOfflineFireRedAsrModelConfig(
    encoder: encoder,
    decoder: decoder
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 0,
    fireRedAsr: fireRedAsr
  )

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"
  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }

}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/funasr-nano.swift
================================================
func run() {
  let encoderAdaptor =
    "./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx"
  let llm =
    "./sherpa-onnx-funasr-nano-int8-2025-12-30/llm.int8.onnx"
  let embedding =
    "./sherpa-onnx-funasr-nano-int8-2025-12-30/embedding.int8.onnx"
  let tokenizer =
    "./sherpa-onnx-funasr-nano-int8-2025-12-30/Qwen3-0.6B"

  let funasrNano = sherpaOnnxOfflineFunASRNanoModelConfig(
    encoderAdaptor: encoderAdaptor,
    llm: llm,
    embedding: embedding,
    tokenizer: tokenizer
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: "",
    debug: 1,
    funasrNano: funasrNano
  )

  let featConfig = sherpaOnnxFeatureConfig()
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./sherpa-onnx-funasr-nano-int8-2025-12-30/test_wavs/lyrics.wav"
  let audio = SherpaOnnxWaveWrapper.readWave(filename: filePath)

  let result = recognizer.decode(samples: audio.samples, sampleRate: audio.sampleRate)
  print("decode done")

  print("\nresult is:\n\(result.text)")
  if !result.timestamps.isEmpty {
    print("\ntimestamps is:\n\(result.timestamps)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/generate-subtitles.swift
================================================
/*
This file shows how to use Swift API to generate subtitles.

You can use the files from
https://huggingface.co/csukuangfj/vad/tree/main
for testing.

For instance, to generate subtitles for Obama.mov, please first
use

ffmpeg -i ./Obama.mov -acodec pcm_s16le -ac 1 -ar 16000 Obama.wav

to extract the audio part from the video.

This file supports only processing WAV sound files, so you have to first
extract audios from videos.

Please see
./run-generate-subtitles.sh
for usages.
*/

import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

extension TimeInterval {
  var hourMinuteSecondMS: String {
    String(format: "%d:%02d:%02d,%03d", hour, minute, second, millisecond)
  }

  var hour: Int {
    Int((self / 3600).truncatingRemainder(dividingBy: 3600))
  }
  var minute: Int {
    Int((self / 60).truncatingRemainder(dividingBy: 60))
  }
  var second: Int {
    Int(truncatingRemainder(dividingBy: 60))
  }
  var millisecond: Int {
    Int((self * 1000).truncatingRemainder(dividingBy: 1000))
  }
}

extension String {
  var fileURL: URL {
    return URL(fileURLWithPath: self)
  }
  var pathExtension: String {
    return fileURL.pathExtension
  }
  var lastPathComponent: String {
    return fileURL.lastPathComponent
  }
  var stringByDeletingPathExtension: String {
    return fileURL.deletingPathExtension().path
  }
}

class SpeechSegment: CustomStringConvertible {

  let start: Float
  let end: Float
  let text: String

  init(start: Float, duration: Float, text: String) {
    self.start = start
    self.end = start + duration
    self.text = text
  }
  public var description: String {
    var s: String
    s = TimeInterval(self.start).hourMinuteSecondMS
    s += " --> "
    s += TimeInterval(self.end).hourMinuteSecondMS
    s += "\n"
    s += self.text

    return s
  }
}

func run() {
  var recognizer: SherpaOnnxOfflineRecognizer
  var modelConfig: SherpaOnnxOfflineModelConfig
  let modelType = "whisper"
  // modelType = "paraformer"
  let filePath = "/Users/fangjun/Desktop/Obama.wav"  // English
  // filePath = "/Users/fangjun/Desktop/lei-jun.wav"  // Chinese
  // please go to https://huggingface.co/csukuangfj/vad
  // to download the above two files

  if modelType == "whisper" {
    // for English
    let encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"
    let decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"
    let tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"

    let whisperConfig = sherpaOnnxOfflineWhisperModelConfig(
      encoder: encoder,
      decoder: decoder
    )

    modelConfig = sherpaOnnxOfflineModelConfig(
      tokens: tokens,
      whisper: whisperConfig,
      debug: 0,
      modelType: "whisper"
    )
  } else if modelType == "paraformer" {
    // for Chinese
    let model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"
    let tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"
    let paraformerConfig = sherpaOnnxOfflineParaformerModelConfig(
      model: model
    )

    modelConfig = sherpaOnnxOfflineModelConfig(
      tokens: tokens,
      paraformer: paraformerConfig,
      debug: 0,
      modelType: "paraformer"
    )
  } else {
    print("Please specify a supported modelType \(modelType)")
    return
  }

  let sampleRate = 16000
  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: sampleRate,
    featureDim: 80
  )
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let audioFile = try! AVAudioFile(forReading: filePath.fileURL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == Double(sampleRate))
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  var sileroVadConfig = sherpaOnnxSileroVadModelConfig()
  var tenVadConfig = sherpaOnnxTenVadModelConfig()

  var windowSize = 0

  if FileManager.default.fileExists(atPath: "./silero_vad.onnx") {
    sileroVadConfig = sherpaOnnxSileroVadModelConfig(
      model: "./silero_vad.onnx",
      threshold: 0.25,
      windowSize: 512
    )
    windowSize = 512
    print("Use silero-vad")
  } else if FileManager.default.fileExists(atPath: "./ten-vad.onnx") {
    tenVadConfig = sherpaOnnxTenVadModelConfig(
      model: "./ten-vad.onnx",
      threshold: 0.25,
      windowSize: 256
    )
    windowSize = 256
    print("Use ten-vad")
  } else {
    print("Please provide ./silero_vad.onnx or ./ten-vad.onnx")
    return
  }

  var vadModelConfig = sherpaOnnxVadModelConfig(
    sileroVad: sileroVadConfig, tenVad: tenVadConfig)

  let vad = SherpaOnnxVoiceActivityDetectorWrapper(
    config: &vadModelConfig, buffer_size_in_seconds: 120)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()

  var segments: [SpeechSegment] = []

  for offset in stride(from: 0, to: array.count, by: windowSize) {
    let end = min(offset + windowSize, array.count)
    vad.acceptWaveform(samples: [Float](array[offset..<end]))
  }

  vad.flush()
  while !vad.isEmpty() {
    let s = vad.front()
    vad.pop()
    let result = recognizer.decode(samples: s.samples)

    segments.append(
      SpeechSegment(
        start: Float(s.start) / Float(sampleRate),
        duration: Float(s.samples.count) / Float(sampleRate),
        text: result.text))

    print(segments.last!)
  }

  let srt: String = zip(segments.indices, segments).map { (index, element) in
    return "\(index+1)\n\(element)"
  }.joined(separator: "\n\n")

  let srtFilename: String = filePath.stringByDeletingPathExtension + ".srt"
  do {
    try srt.write(to: srtFilename.fileURL, atomically: true, encoding: .utf8)
  } catch {
    print("Error writing: \(error.localizedDescription)")
  }

  print("Saved to \(srtFilename)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/keyword-spotting-from-file.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let filePath = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
  let encoder =
    "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
  let decoder =
    "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
  let joiner =
    "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"
  let tokens =
    "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"
  let keywordsFile =
    "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"
  let transducerConfig = sherpaOnnxOnlineTransducerModelConfig(
    encoder: encoder,
    decoder: decoder,
    joiner: joiner
  )

  let modelConfig = sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    transducer: transducerConfig
  )

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )
  var config = sherpaOnnxKeywordSpotterConfig(
    featConfig: featConfig,
    modelConfig: modelConfig,
    keywordsFile: keywordsFile
  )

  let spotter = SherpaOnnxKeywordSpotterWrapper(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == 16000)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  spotter.acceptWaveform(samples: array)

  let tailPadding = [Float](repeating: 0.0, count: 3200)
  spotter.acceptWaveform(samples: tailPadding)

  spotter.inputFinished()
  while spotter.isReady() {
    spotter.decode()
    let keyword = spotter.getResult().keyword
    if keyword != "" {
      // Remember to call reset() right after detecting a keyword
      spotter.reset()

      print("Detected: \(keyword)")
    }
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/medasr-ctc.swift
================================================
func run() {
  let model =
    "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx"
  let tokens =
    "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt"

  let medasr = sherpaOnnxOfflineMedAsrCtcModelConfig(
    model: model
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 1,
    medasr: medasr
  )

  let featConfig = sherpaOnnxFeatureConfig()
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav"
  let audio = SherpaOnnxWaveWrapper.readWave(filename: filePath)

  let result = recognizer.decode(samples: audio.samples, sampleRate: audio.sampleRate)
  print("decode done")

  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/moonshine-v2-asr.swift
================================================
func run() {
  let encoder =
    "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort"
  let decoder =
    "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/decoder_model_merged.ort"
  let tokens =
    "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/tokens.txt"

  let moonshine = sherpaOnnxOfflineMoonshineModelConfig(
    encoder: encoder,
    mergedDecoder: decoder
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 1,
    moonshine: moonshine
  )

  let featConfig = sherpaOnnxFeatureConfig()
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/test_wavs/0.wav"
  let audio = SherpaOnnxWaveWrapper.readWave(filename: filePath)

  let result = recognizer.decode(samples: audio.samples, sampleRate: audio.sampleRate)
  print("decode done")

  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/omnilingual-asr-ctc.swift
================================================
func run() {
  let model =
    "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/model.int8.onnx"
  let tokens =
    "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt"

  let omnilingual = sherpaOnnxOfflineOmnilingualAsrCtcModelConfig(
    model: model
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 0,
    omnilingual: omnilingual
  )

  let featConfig = sherpaOnnxFeatureConfig()
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/test_wavs/en.wav"
  let audio = SherpaOnnxWaveWrapper.readWave(filename: filePath)

  let result = recognizer.decode(samples: audio.samples, sampleRate: audio.sampleRate)

  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/online-speech-enhancement-dpdfnet.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let model = "./dpdfnet_baseline.onnx"
  // Please refer to
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  // to download files used in this script
  var config = sherpaOnnxOnlineSpeechDenoiserConfig(
    model: sherpaOnnxOfflineSpeechDenoiserModelConfig(
      dpdfnet: sherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig(model: model))
  )

  let sd = SherpaOnnxOnlineSpeechDenoiserWrapper(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: "./inp_16k.wav")
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == 16000)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let samples: [Float]! = audioFileBuffer?.array()

  var enhanced: [Float] = []
  let frameShift = sd.frameShiftInSamples

  var start = 0
  while start < samples.count {
    let end = min(start + frameShift, samples.count)
    let audio = sd.run(samples: Array(samples[start..<end]), sampleRate: Int(audioFormat.sampleRate))
    enhanced.append(contentsOf: audio.samples)
    start = end
  }

  enhanced.append(contentsOf: sd.flush().samples)

  let filename = "enhanced-online-dpdfnet.wav"
  _ = enhanced.withUnsafeBufferPointer { p in
    SherpaOnnxWriteWave(
      p.baseAddress,
      Int32(enhanced.count),
      Int32(sd.sampleRate),
      toCPointer(filename))
  }
  print("\nSaved to:\(filename)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/online-speech-enhancement-gtcrn.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let model = "./gtcrn_simple.onnx"
  // Please refer to
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  // to download files used in this script
  var config = sherpaOnnxOnlineSpeechDenoiserConfig(
    model: sherpaOnnxOfflineSpeechDenoiserModelConfig(
      gtcrn: sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: model))
  )

  let sd = SherpaOnnxOnlineSpeechDenoiserWrapper(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: "./inp_16k.wav")
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == 16000)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let samples: [Float]! = audioFileBuffer?.array()

  var enhanced: [Float] = []
  let frameShift = sd.frameShiftInSamples

  var start = 0
  while start < samples.count {
    let end = min(start + frameShift, samples.count)
    let audio = sd.run(samples: Array(samples[start..<end]), sampleRate: Int(audioFormat.sampleRate))
    enhanced.append(contentsOf: audio.samples)
    start = end
  }

  enhanced.append(contentsOf: sd.flush().samples)

  let filename = "enhanced-online-gtcrn.wav"
  _ = enhanced.withUnsafeBufferPointer { p in
    SherpaOnnxWriteWave(
      p.baseAddress,
      Int32(enhanced.count),
      Int32(sd.sampleRate),
      toCPointer(filename))
  }
  print("\nSaved to:\(filename)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/run-add-punctuations-online.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# Download and extract the online punctuation model if not exists
if [ ! -d ./sherpa-onnx-online-punct-en-2024-08-06 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
  rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
fi

if [ ! -e ./add-punctuation-online ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./add-punctuation-online.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o ./add-punctuation-online

  strip ./add-punctuation-online
else
  echo "./add-punctuation-online exists - skip building"
fi

# Set library path and run the executable
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./add-punctuation-online 

================================================
FILE: swift-api-examples/run-add-punctuations.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
  rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
fi

if [ ! -e ./add-punctuations ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./add-punctuations.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o ./add-punctuations

  strip ./add-punctuations
else
  echo "./add-punctuations exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./add-punctuations


================================================
FILE: swift-api-examples/run-compute-speaker-embeddings.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./wespeaker_zh_cnceleb_resnet34.onnx ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models"
  echo ""
  echo "for help"

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx
fi

if [ ! -f ./fangjun-sr-1.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/fangjun-sr-1.wav
fi

if [ ! -f ./fangjun-sr-2.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/fangjun-sr-2.wav
fi

if [ ! -f ./leijun-sr-1.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/leijun-sr-1.wav
fi

if [ ! -e ./compute-speaker-embeddings ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./compute-speaker-embeddings.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o compute-speaker-embeddings

  strip compute-speaker-embeddings
else
  echo "./compute-speaker-embeddings exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./compute-speaker-embeddings


================================================
FILE: swift-api-examples/run-decode-file-non-streaming.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
  echo ""
  echo "for help"

  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi

if [ ! -e ./decode-file-non-streaming ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./decode-file-non-streaming.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o decode-file-non-streaming

  strip decode-file-non-streaming
else
  echo "./decode-file-non-streaming exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./decode-file-non-streaming


================================================
FILE: swift-api-examples/run-decode-file-sense-voice-with-hr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

if [ ! -d dict ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  tar xf dict.tar.bz2
  rm -rf dict.tar.bz2

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
fi

if [ ! -e ./decode-file-sense-voice-with-hr ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./decode-file-sense-voice-with-hr.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o decode-file-sense-voice-with-hr

  strip decode-file-sense-voice-with-hr
else
  echo "./decode-file-sense-voice-with-hr exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./decode-file-sense-voice-with-hr


================================================
FILE: swift-api-examples/run-decode-file-t-one-streaming.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-streaming-t-one-russian-2025-09-08 ]; then
  echo "Downloading the pre-trained model for testing."

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
fi

if [ ! -e ./decode-file-t-one-streaming ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./decode-file-t-one-streaming.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o decode-file-t-one-streaming

  strip decode-file-t-one-streaming
else
  echo "./decode-file-t-one-streaming exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./decode-file-t-one-streaming


================================================
FILE: swift-api-examples/run-decode-file.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english"
  echo ""
  echo "for help"

  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi

if [ ! -d ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then
  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
fi

if [ ! -e ./decode-file ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./decode-file.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o decode-file

  strip decode-file
else
  echo "./decode-file exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./decode-file


================================================
FILE: swift-api-examples/run-dolphin-ctc-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/dolphin/index.html"
  echo ""
  echo "for help"

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
fi

if [ ! -e ./dolphin-ctc-asr ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./dolphin-ctc-asr.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o dolphin-ctc-asr

  strip dolphin-ctc-asr
else
  echo "./dolphin-ctc-asr exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./dolphin-ctc-asr


================================================
FILE: swift-api-examples/run-fire-red-asr-ctc.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25/model.int8.onnx ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/FireRedAsr/index.html"
  echo ""
  echo "for help"

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2
  rm sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25.tar.bz2

  ls -lh sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25
fi

if [ ! -e ./fire-red-asr-ctc ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./fire-red-asr-ctc.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o fire-red-asr-ctc

  strip fire-red-asr-ctc
else
  echo "./fire-red-asr-ctc exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./fire-red-asr-ctc


================================================
FILE: swift-api-examples/run-fire-red-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/FireRedAsr/index.html"
  echo ""
  echo "for help"

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
fi

if [ ! -e ./fire-red-asr ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./fire-red-asr.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o fire-red-asr

  strip fire-red-asr
else
  echo "./fire-red-asr exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./fire-red-asr


================================================
FILE: swift-api-examples/run-funasr-nano-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./sherpa-onnx-funasr-nano-int8-2025-12-30/encoder_adaptor.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  tar xvf sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
  rm sherpa-onnx-funasr-nano-int8-2025-12-30.tar.bz2
fi

if [ ! -e ./funasr-nano ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./funasr-nano.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o funasr-nano

  strip funasr-nano
else
  echo "./funasr-nano exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./funasr-nano


================================================
FILE: swift-api-examples/run-generate-subtitles-ten-vad.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
  echo ""
  echo "for help"

  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
  ls -lh sherpa-onnx-whisper-tiny.en
fi
if [ ! -f ./ten-vad.onnx ]; then
  echo "downloading ten-vad"
  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi

if [ ! -e ./generate-subtitles-ten-vad ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./generate-subtitles.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o generate-subtitles-ten-vad

  strip generate-subtitles-ten-vad
else
  echo "./generate-subtitles-ten-vad exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./generate-subtitles-ten-vad


================================================
FILE: swift-api-examples/run-generate-subtitles.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
  echo ""
  echo "for help"

  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  rm sherpa-onnx-whisper-tiny.en.tar.bz2
  ls -lh sherpa-onnx-whisper-tiny.en
fi
if [ ! -f ./silero_vad.onnx ]; then
  echo "downloading silero_vad"
  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi

if [ ! -e ./generate-subtitles ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./generate-subtitles.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o generate-subtitles

  strip generate-subtitles
else
  echo "./generate-subtitles exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./generate-subtitles


================================================
FILE: swift-api-examples/run-keyword-spotting-from-file.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  tar xf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
fi

if [ ! -e ./keyword-spotting-from-file ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./keyword-spotting-from-file.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o keyword-spotting-from-file

  strip keyword-spotting-from-file
else
  echo "./keyword-spotting-from-file exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./keyword-spotting-from-file


================================================
FILE: swift-api-examples/run-medasr-ctc-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
  rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
fi

if [ ! -e ./medasr-ctc ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./medasr-ctc.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o medasr-ctc

  strip medasr-ctc
else
  echo "./medasr-ctc exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./medasr-ctc


================================================
FILE: swift-api-examples/run-moonshine-v2-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27/encoder_model.ort ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/moonshine/index.html"
  echo ""
  echo "for help"

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  tar xvf sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  rm sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
  ls -lh sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27
fi

if [ ! -e ./moonshine-v2-asr ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./moonshine-v2-asr.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o moonshine-v2-asr

  strip moonshine-v2-asr
else
  echo "./moonshine-v2-asr exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./moonshine-v2-asr


================================================
FILE: swift-api-examples/run-omnilingual-asr-ctc-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12/tokens.txt ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
  rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
fi

if [ ! -e ./omnilingual-asr-ctc ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./omnilingual-asr-ctc.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o omnilingual-asr-ctc

  strip omnilingual-asr-ctc
else
  echo "./omnilingual-asr-ctc exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./omnilingual-asr-ctc


================================================
FILE: swift-api-examples/run-online-speech-enhancement-dpdfnet.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

if [ ! -e ./online-speech-enhancement-dpdfnet ]; then
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./online-speech-enhancement-dpdfnet.swift ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o online-speech-enhancement-dpdfnet

  strip online-speech-enhancement-dpdfnet
else
  echo "./online-speech-enhancement-dpdfnet exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./online-speech-enhancement-dpdfnet


================================================
FILE: swift-api-examples/run-online-speech-enhancement-gtcrn.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

if [ ! -e ./online-speech-enhancement-gtcrn ]; then
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./online-speech-enhancement-gtcrn.swift ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o online-speech-enhancement-gtcrn

  strip online-speech-enhancement-gtcrn
else
  echo "./online-speech-enhancement-gtcrn exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./online-speech-enhancement-gtcrn


================================================
FILE: swift-api-examples/run-speaker-diarization.sh
================================================
#!/usr/bin/env bash

if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
  rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi

if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./0-four-speakers-zh.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi

if [ ! -e ./speaker-diarization ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./speaker-diarization.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o speaker-diarization

  strip speaker-diarization
else
  echo "./speaker-diarization exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./speaker-diarization


================================================
FILE: swift-api-examples/run-speech-enhancement-dpdfnet.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./dpdfnet_baseline.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/dpdfnet_baseline.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

if [ ! -e ./speech-enhancement-dpdfnet ]; then
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./speech-enhancement-dpdfnet.swift ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o speech-enhancement-dpdfnet

  strip speech-enhancement-dpdfnet
else
  echo "./speech-enhancement-dpdfnet exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./speech-enhancement-dpdfnet


================================================
FILE: swift-api-examples/run-speech-enhancement-gtcrn.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./gtcrn_simple.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi

if [ ! -f ./inp_16k.wav ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi

if [ ! -e ./speech-enhancement-gtcrn ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./speech-enhancement-gtcrn.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o speech-enhancement-gtcrn

  strip speech-enhancement-gtcrn
else
  echo "./speech-enhancement-gtcrn  exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./speech-enhancement-gtcrn


================================================
FILE: swift-api-examples/run-spoken-language-identification.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
  echo "Download a pre-trained model for testing."

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
  tar xvf sherpa-onnx-whisper-tiny.tar.bz2
  rm sherpa-onnx-whisper-tiny.tar.bz2
fi

if [ ! -e ./spoken-language-identification ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./spoken-language-identification.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o spoken-language-identification

  strip spoken-language-identification
else
  echo "./spoken-language-identification exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./spoken-language-identification


================================================
FILE: swift-api-examples/run-streaming-hlg-decode-file.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
  echo "Downloading the pre-trained model for testing."

  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

if [ ! -e ./streaming-hlg-decode-file ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./streaming-hlg-decode-file.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o streaming-hlg-decode-file

  strip ./streaming-hlg-decode-file
else
  echo "./streaming-hlg-decode-file exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./streaming-hlg-decode-file


================================================
FILE: swift-api-examples/run-test-version.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -e ./test-version ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./test-version.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o ./test-version

  strip ./test-version
else
  echo "./test-version exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./test-version


================================================
FILE: swift-api-examples/run-tts-kitten-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html
# to download more models
if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  rm kitten-nano-en-v0_1-fp16.tar.bz2
fi

if [ ! -e ./tts-kitten-en ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-kitten-en.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-kitten-en

  strip tts-kitten-en
else
  echo "./tts-kitten-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-kitten-en


================================================
FILE: swift-api-examples/run-tts-kokoro-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
# to download more models
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  tar xf kokoro-en-v0_19.tar.bz2
  rm kokoro-en-v0_19.tar.bz2
fi

if [ ! -e ./tts-kokoro-en ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-kokoro-en.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-kokoro-en

  strip tts-kokoro-en
else
  echo "./tts-kokoro-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-kokoro-en


================================================
FILE: swift-api-examples/run-tts-kokoro-zh-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
# to download more models
if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  tar xf kokoro-multi-lang-v1_0.tar.bz2
  rm kokoro-multi-lang-v1_0.tar.bz2
fi

if [ ! -e ./tts-kokoro-zh-en ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-kokoro-zh-en.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-kokoro-zh-en

  strip tts-kokoro-zh-en
else
  echo "./tts-kokoro-zh-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-kokoro-zh-en


================================================
FILE: swift-api-examples/run-tts-matcha-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  tar xf matcha-icefall-en_US-ljspeech.tar.bz2
  rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

if [ ! -e ./tts-matcha-en ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-matcha-en.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-matcha-en

  strip tts-matcha-en
else
  echo "./tts-matcha-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-matcha-en


================================================
FILE: swift-api-examples/run-tts-matcha-zh.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  tar xvf matcha-icefall-zh-baker.tar.bz2
  rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./vocos-22khz-univ.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
fi

if [ ! -e ./tts-matcha-zh ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-matcha-zh.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-matcha-zh

  strip tts-matcha-zh
else
  echo "./tts-matcha-zh exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-matcha-zh


================================================
FILE: swift-api-examples/run-tts-pocket-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
# to download more models
if [ ! -f ./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
  rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
fi

if [ ! -e ./tts-pocket-en ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-pocket-en.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-pocket-en

  strip tts-pocket-en
else
  echo "./tts-pocket-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-pocket-en


================================================
FILE: swift-api-examples/run-tts-supertonic-en.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html
# to download more models
if [ ! -f ./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  tar xf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
  rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
fi

if [ ! -e ./tts-supertonic-en ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-supertonic-en.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-supertonic-en

  strip tts-supertonic-en
else
  echo "./tts-supertonic-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-supertonic-en


================================================
FILE: swift-api-examples/run-tts-vits.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -d ./vits-piper-en_US-amy-low ]; then
  echo "Download a pre-trained model for testing."

  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  tar xf vits-piper-en_US-amy-low.tar.bz2
  rm vits-piper-en_US-amy-low.tar.bz2
fi

if [ ! -e ./tts-vits ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-vits.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-vits

  strip tts-vits
else
  echo "./tts-vits exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-vits


================================================
FILE: swift-api-examples/run-tts-zipvoice.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
# to download more models
if [ ! -f ./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
  rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
fi

if [ ! -f ./vocos_24khz.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
fi

if [ ! -e ./tts-zipvoice ]; then
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./tts-zipvoice.swift ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o tts-zipvoice

  strip tts-zipvoice
else
  echo "./tts-zipvoice exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-zipvoice


================================================
FILE: swift-api-examples/run-wenet-ctc-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
fi

if [ ! -e ./wenet-ctc-asr ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./wenet-ctc-asr.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o wenet-ctc-asr

  strip wenet-ctc-asr
else
  echo "./wenet-ctc-asr exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./wenet-ctc-asr


================================================
FILE: swift-api-examples/run-zipformer-ctc-asr.sh
================================================
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
  echo "Please run ../build-swift-macos.sh first!"
  exit 1
fi

if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx ]; then
  echo "Please download the pre-trained model for testing."
  echo "You can refer to"
  echo ""
  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html#sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03-chinese"
  echo ""
  echo "for help"

  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2

  tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
  rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
  ls -lh sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
fi

if [ ! -e ./zipformer-ctc-asr ]; then
  # Note: We use -lc++ to link against libc++ instead of libstdc++
  swiftc \
    -lc++ \
    -I ../build-swift-macos/install/include \
    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
    ./zipformer-ctc-asr.swift  ./SherpaOnnx.swift \
    -L ../build-swift-macos/install/lib/ \
    -l sherpa-onnx \
    -l onnxruntime \
    -o zipformer-ctc-asr

  strip zipformer-ctc-asr
else
  echo "./zipformer-ctc-asr exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./zipformer-ctc-asr


================================================
FILE: swift-api-examples/speaker-diarization.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let segmentationModel = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
  let embeddingExtractorModel = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
  let waveFilename = "./0-four-speakers-zh.wav"

  // There are 4 speakers in ./0-four-speakers-zh.wav, so we use 4 here
  let numSpeakers = 4
  var config = sherpaOnnxOfflineSpeakerDiarizationConfig(
    segmentation: sherpaOnnxOfflineSpeakerSegmentationModelConfig(
      pyannote: sherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(model: segmentationModel)),
    embedding: sherpaOnnxSpeakerEmbeddingExtractorConfig(model: embeddingExtractorModel),
    clustering: sherpaOnnxFastClusteringConfig(numClusters: numSpeakers)
  )

  let sd = SherpaOnnxOfflineSpeakerDiarizationWrapper(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: waveFilename)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(Int(audioFormat.sampleRate) == sd.sampleRate)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  print("Started!")
  let segments = sd.process(samples: array)
  for i in 0..<segments.count {
    print("\(segments[i].start) -- \(segments[i].end) speaker_\(segments[i].speaker)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/speech-enhancement-dpdfnet.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let model = "./dpdfnet_baseline.onnx"
  // Please refer to
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  // to download files used in this script
  // Use dpdfnet_baseline.onnx, dpdfnet2.onnx, dpdfnet4.onnx, or dpdfnet8.onnx
  // for 16 kHz downstream ASR or speech recognition.
  // Use dpdfnet2_48khz_hr.onnx for 48 kHz enhancement output.
  var config = sherpaOnnxOfflineSpeechDenoiserConfig(
    model: sherpaOnnxOfflineSpeechDenoiserModelConfig(
      dpdfnet: sherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig(model: model))
  )

  let sd = SherpaOnnxOfflineSpeechDenoiserWrapper(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: "./inp_16k.wav")
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == 16000)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let audio = sd.run(samples: array, sampleRate: Int(audioFormat.sampleRate))

  let filename = "enhanced.wav"
  let ok = audio.save(filename: filename)
  if ok == 1 {
    print("\nSaved to:\(filename)")
  } else {
    print("Failed to save to \(filename)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/speech-enhancement-gtcrn.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let model = "./gtcrn_simple.onnx"
  // Please refer to
  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  // to download files used in this script
  var config = sherpaOnnxOfflineSpeechDenoiserConfig(
    model: sherpaOnnxOfflineSpeechDenoiserModelConfig(
      gtcrn: sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: model))
  )

  let sd = SherpaOnnxOfflineSpeechDenoiserWrapper(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: "./inp_16k.wav")
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == 16000)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let audio = sd.run(samples: array, sampleRate: Int(audioFormat.sampleRate))

  let filename = "enhanced.wav"
  let ok = audio.save(filename: filename)
  if ok == 1 {
    print("\nSaved to:\(filename)")
  } else {
    print("Failed to save to \(filename)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/spoken-language-identification.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
  let decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"

  let whisperConfig = sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
    encoder: encoder,
    decoder: decoder
  )

  var config = sherpaOnnxSpokenLanguageIdentificationConfig(
    whisper: whisperConfig,
    numThreads: 1,
    debug: 1,
    provider: "cpu"
  )
  let filePath = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"

  let slid = SherpaOnnxSpokenLanguageIdentificationWrapper(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.sampleRate == 16000)
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let result = slid.decode(samples: array)

  print("\nDetectedllanguage is:\n\(result.lang)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/streaming-hlg-decode-file.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let filePath =
    "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"
  let model =
    "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"
  let tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"
  let zipfomer2CtcModelConfig = sherpaOnnxOnlineZipformer2CtcModelConfig(
    model: model
  )

  let modelConfig = sherpaOnnxOnlineModelConfig(
    tokens: tokens,
    zipformer2Ctc: zipfomer2CtcModelConfig
  )

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )

  let ctcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig(
    graph: "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst",
    maxActive: 3000
  )

  var config = sherpaOnnxOnlineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig,
    ctcFstDecoderConfig: ctcFstDecoderConfig
  )

  let recognizer = SherpaOnnxRecognizer(config: &config)

  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  recognizer.acceptWaveform(samples: array, sampleRate: Int(audioFormat.sampleRate))

  let tailPadding = [Float](repeating: 0.0, count: 3200)
  recognizer.acceptWaveform(samples: tailPadding, sampleRate: Int(audioFormat.sampleRate))

  recognizer.inputFinished()
  while recognizer.isReady() {
    recognizer.decode()
  }

  let result = recognizer.getResult()
  print("\nresult is:\n\(result.text)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/test-version.swift
================================================
func run() {
  let version = getSherpaOnnxVersion()
  let gitSha1 = getSherpaOnnxGitSha1()
  let gitDate = getSherpaOnnxGitDate()
  print("sherpa-onnx version: \(version)")
  print("sherpa-onnx gitSha1: \(gitSha1)")
  print("sherpa-onnx gitDate: \(gitDate)")
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/tts-kitten-en.swift
================================================
class MyClass {
  func playSamples(samples: [Float]) {
    print("Play \(samples.count) samples")
  }
}

func run() {
  let model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx"
  let voices = "./kitten-nano-en-v0_1-fp16/voices.bin"
  let tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt"
  let dataDir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data"
  let kitten = sherpaOnnxOfflineTtsKittenModelConfig(
    model: model,
    voices: voices,
    tokens: tokens,
    dataDir: dataDir
  )
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(debug: 0, kitten: kitten)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)

  let myClass = MyClass()

  // We use Unretained here so myClass must be kept alive as the callback is invoked
  //
  // See also
  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

  let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
    var savedSamples: [Float] = []
    for index in 0..<n {
      savedSamples.append(samples![Int(index)])
    }

    o.playSamples(samples: savedSamples)

    // return 1 so that it continues generating
    return 1
  }

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  let text =
    "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.sid = 0
  genConfig.speed = 1.0
  genConfig.silenceScale = 0.2

  let audio = tts.generateWithConfig(
    text: text, config: genConfig, callback: callback, arg: arg)
  let filename = "test-kitten-en.wav"
  let ok = audio.save(filename: filename)
  if ok == 1 {
    print("\nSaved to:\(filename)")
  } else {
    print("Failed to save to \(filename)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/tts-kokoro-en.swift
================================================
class MyClass {
  func playSamples(samples: [Float]) {
    print("Play \(samples.count) samples")
  }
}

func run() {
  let model = "./kokoro-en-v0_19/model.onnx"
  let voices = "./kokoro-en-v0_19/voices.bin"
  let tokens = "./kokoro-en-v0_19/tokens.txt"
  let dataDir = "./kokoro-en-v0_19/espeak-ng-data"
  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
    model: model,
    voices: voices,
    tokens: tokens,
    dataDir: dataDir
  )
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)

  let myClass = MyClass()

  // We use Unretained here so myClass must be kept alive as the callback is invoked
  //
  // See also
  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

  let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
    var savedSamples: [Float] = []
    for index in 0..<n {
      savedSamples.append(samples![Int(index)])
    }

    o.playSamples(samples: savedSamples)

    // return 1 so that it continues generating
    return 1
  }

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  let text =
    "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.sid = 0
  genConfig.speed = 1.0
  genConfig.silenceScale = 0.2

  let audio = tts.generateWithConfig(
    text: text, config: genConfig, callback: callback, arg: arg)
  let filename = "test-kokoro-en.wav"
  let ok = audio.save(filename: filename)
  if ok == 1 {
    print("\nSaved to:\(filename)")
  } else {
    print("Failed to save to \(filename)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/tts-kokoro-zh-en.swift
================================================
class MyClass {
  func playSamples(samples: [Float]) {
    print("Play \(samples.count) samples")
  }
}

func run() {
  let model = "./kokoro-multi-lang-v1_0/model.onnx"
  let voices = "./kokoro-multi-lang-v1_0/voices.bin"
  let tokens = "./kokoro-multi-lang-v1_0/tokens.txt"
  let dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data"
  let lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt"
  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
    model: model,
    voices: voices,
    tokens: tokens,
    dataDir: dataDir,
    lexicon: lexicon
  )
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)

  let myClass = MyClass()

  // We use Unretained here so myClass must be kept alive as the callback is invoked
  //
  // See also
  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

  let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
    var savedSamples: [Float] = []
    for index in 0..<n {
      savedSamples.append(samples![Int(index)])
    }

    o.playSamples(samples: savedSamples)

    // return 1 so that it continues generating
    return 1
  }

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  let text =
    "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.sid = 0
  genConfig.speed = 1.0
  genConfig.silenceScale = 0.2

  let audio = tts.generateWithConfig(
    text: text, config: genConfig, callback: callback, arg: arg)
  let filename = "test-kokoro-zh-en.wav"
  let ok = audio.save(filename: filename)
  if ok == 1 {
    print("\nSaved to:\(filename)")
  } else {
    print("Failed to save to \(filename)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/tts-matcha-en.swift
================================================
class MyClass {
  func playSamples(samples: [Float]) {
    print("Play \(samples.count) samples")
  }
}

func run() {
  let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
  let vocoder = "./vocos-22khz-univ.onnx"
  let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"
  let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"
  let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
    acousticModel: acousticModel,
    vocoder: vocoder,
    tokens: tokens,
    dataDir: dataDir
  )
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)

  let myClass = MyClass()

  // We use Unretained here so myClass must be kept alive as the callback is invoked
  //
  // See also
  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

  let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
    var savedSamples: [Float] = []
    for index in 0..<n {
      savedSamples.append(samples![Int(index)])
    }

    o.playSamples(samples: savedSamples)

    // return 1 so that it continues generating
    return 1
  }

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  let text =
    "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.sid = 0
  genConfig.speed = 1.0
  genConfig.silenceScale = 0.2

  let audio = tts.generateWithConfig(
    text: text, config: genConfig, callback: callback, arg: arg)
  let filename = "test-matcha-en.wav"
  let ok = audio.save(filename: filename)
  if ok == 1 {
    print("\nSaved to:\(filename)")
  } else {
    print("Failed to save to \(filename)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/tts-matcha-zh.swift
================================================
class MyClass {
  func playSamples(samples: [Float]) {
    print("Play \(samples.count) samples")
  }
}

func run() {
  let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"
  let vocoder = "./vocos-22khz-univ.onnx"
  let lexicon = "./matcha-icefall-zh-baker/lexicon.txt"
  let tokens = "./matcha-icefall-zh-baker/tokens.txt"
  let ruleFsts =
    "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"
  let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
    acousticModel: acousticModel,
    vocoder: vocoder,
    lexicon: lexicon,
    tokens: tokens
  )
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig, ruleFsts: ruleFsts)

  let myClass = MyClass()

  // We use Unretained here so myClass must be kept alive as the callback is invoked
  //
  // See also
  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

  let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
    var savedSamples: [Float] = []
    for index in 0..<n {
      savedSamples.append(samples![Int(index)])
    }

    o.playSamples(samples: savedSamples)

    // return 1 so that it continues generating
    return 1
  }

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  let text = "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.sid = 0
  genConfig.speed = 1.0
  genConfig.silenceScale = 0.2

  let audio = tts.generateWithConfig(
    text: text, config: genConfig, callback: callback, arg: arg)
  let filename = "test-matcha-zh.wav"
  let ok = audio.save(filename: filename)
  if ok == 1 {
    print("\nSaved to:\(filename)")
  } else {
    print("Failed to save to \(filename)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/tts-pocket-en.swift
================================================
import Foundation

class PocketTtsProgressHandler {
  func progress(samples: [Float], progress: Float) {
    print(String(format: "Received %d samples, Progress: %.2f%%", samples.count, progress * 100))
  }
}

func runPocketTtsDemo() {
  let pocket = sherpaOnnxOfflineTtsPocketModelConfig(
    lmFlow: "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx",
    lmMain: "./sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx",
    encoder: "./sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx",
    decoder: "./sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx",
    textConditioner: "./sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx",
    vocabJson: "./sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json",
    tokenScoresJson: "./sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json"
  )

  let modelConfig = sherpaOnnxOfflineTtsModelConfig(numThreads: 2, pocket: pocket)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
  ttsConfig.model.debug = 1

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  let referenceAudioFile = "./sherpa-onnx-pocket-tts-int8-2026-01-26/test_wavs/bria.wav"
  let referenceWave = SherpaOnnxWaveWrapper.readWave(filename: referenceAudioFile)

  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.speed = 1.0
  genConfig.referenceAudio = referenceWave.samples
  genConfig.referenceSampleRate = referenceWave.sampleRate
  genConfig.extra = ["max_reference_audio_len": 15.0]

  let text = """
    Today as always, men fall into two groups: slaves and free men. Whoever \
    does not have two-thirds of his day for himself, is a slave, whatever \
    he may be: a statesman, a businessman, an official, or a scholar. \
    Friends fell out often because life was changing so fast. \
    The easiest thing in the world was to lose touch with someone.
    """

  func generateAndSave(
    outputFile: String, callback: TtsProgressCallbackWithArg? = nil,
    arg: UnsafeMutableRawPointer? = nil
  ) {
    let audio = tts.generateWithConfig(
      text: text,
      config: genConfig,
      callback: callback,
      arg: arg
    )

    if audio.save(filename: outputFile) == 1 {
      print("Saved to: \(outputFile)")
    } else {
      print("Failed to save to \(outputFile)")
    }
  }

  // -------------------------
  // Option 1: with callback
  // -------------------------
  let useCallback = true
  if useCallback {
    let progressHandler = PocketTtsProgressHandler()
    let arg = Unmanaged.passUnretained(progressHandler).toOpaque()

    let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
      let handler = Unmanaged<PocketTtsProgressHandler>.fromOpaque(arg!).takeUnretainedValue()

      let buffer: [Float] =
        samples != nil ? Array(UnsafeBufferPointer(start: samples, count: Int(n))) : []
      handler.progress(samples: buffer, progress: progress)
      return 1  // continue generating
    }

    generateAndSave(outputFile: "generated-pocket-callback.wav", callback: callback, arg: arg)
  } else {
    // -------------------------
    // Option 2: direct generation
    // -------------------------
    generateAndSave(outputFile: "generated-pocket-direct.wav")
  }
}

// -------------------------
// Run demo
// -------------------------
@main
struct App {
  static func main() {
    runPocketTtsDemo()
  }
}


================================================
FILE: swift-api-examples/tts-supertonic-en.swift
================================================
import Foundation

class SupertonicTtsProgressHandler {
  func progress(samples: [Float], progress: Float) {
    print(String(format: "Received %d samples, Progress: %.2f%%", samples.count, progress * 100))
  }
}

func runSupertonicTtsDemo() {
  let supertonic = sherpaOnnxOfflineTtsSupertonicModelConfig(
    durationPredictor: "./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx",
    textEncoder: "./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx",
    vectorEstimator: "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx",
    vocoder: "./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx",
    ttsJson: "./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json",
    unicodeIndexer: "./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin",
    voiceStyle: "./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin"
  )

  let modelConfig = sherpaOnnxOfflineTtsModelConfig(numThreads: 2, supertonic: supertonic)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
  ttsConfig.model.debug = 1

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.sid = 6
  genConfig.numSteps = 5
  genConfig.speed = 1.25
  genConfig.extra = ["lang": "en"]

  let text =
    "Today as always, men fall into two groups: slaves and free men. Whoever "
    + "does not have two-thirds of his day for himself, is a slave, whatever "
    + "he may be: a statesman, a businessman, an official, or a scholar."

  func generateAndSave(
    outputFile: String, callback: TtsProgressCallbackWithArg? = nil,
    arg: UnsafeMutableRawPointer? = nil
  ) {
    let audio = tts.generateWithConfig(
      text: text,
      config: genConfig,
      callback: callback,
      arg: arg
    )

    if audio.save(filename: outputFile) == 1 {
      print("Saved to: \(outputFile)")
    } else {
      print("Failed to save to \(outputFile)")
    }
  }

  // -------------------------
  // Option 1: with callback
  // -------------------------
  let useCallback = true
  if useCallback {
    let progressHandler = SupertonicTtsProgressHandler()
    let arg = Unmanaged.passUnretained(progressHandler).toOpaque()

    let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
      let handler = Unmanaged<SupertonicTtsProgressHandler>.fromOpaque(arg!).takeUnretainedValue()

      let buffer: [Float] =
        samples != nil ? Array(UnsafeBufferPointer(start: samples, count: Int(n))) : []
      handler.progress(samples: buffer, progress: progress)
      return 1  // continue generating
    }

    generateAndSave(outputFile: "generated-supertonic-callback.wav", callback: callback, arg: arg)
  } else {
    // -------------------------
    // Option 2: direct generation
    // -------------------------
    generateAndSave(outputFile: "generated-supertonic-direct.wav")
  }
}

// -------------------------
// Run demo
// -------------------------
@main
struct App {
  static func main() {
    runSupertonicTtsDemo()
  }
}


================================================
FILE: swift-api-examples/tts-vits.swift
================================================
class MyClass {
  func playSamples(samples: [Float]) {
    print("Play \(samples.count) samples")
  }
}

func run() {
  let model = "./vits-piper-en_US-amy-low/en_US-amy-low.onnx"
  let tokens = "./vits-piper-en_US-amy-low/tokens.txt"
  let dataDir = "./vits-piper-en_US-amy-low/espeak-ng-data"
  let vits = sherpaOnnxOfflineTtsVitsModelConfig(
    model: model,
    lexicon: "",
    tokens: tokens,
    dataDir: dataDir
  )
  let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)

  let myClass = MyClass()

  // We use Unretained here so myClass must be kept alive as the callback is invoked
  //
  // See also
  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

  let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
    var savedSamples: [Float] = []
    for index in 0..<n {
      savedSamples.append(samples![Int(index)])
    }

    o.playSamples(samples: savedSamples)

    // return 1 so that it continues generating
    return 1
  }

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  let text =
    "“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”"
  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.sid = 99
  genConfig.speed = 1.0
  genConfig.silenceScale = 0.2

  let audio = tts.generateWithConfig(
    text: text, config: genConfig, callback: callback, arg: arg)
  let filename = "test-vits-en.wav"
  let ok = audio.save(filename: filename)
  if ok == 1 {
    print("\nSaved to:\(filename)")
  } else {
    print("Failed to save to \(filename)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/tts-zipvoice.swift
================================================
import Foundation

class ZipVoiceTtsProgressHandler {
  func progress(samples: [Float], progress: Float) {
    print(String(format: "Received %d samples, Progress: %.2f%%", samples.count, progress * 100))
  }
}

func runZipVoiceTtsDemo() {
  let zipvoice = sherpaOnnxOfflineTtsZipvoiceModelConfig(
    tokens: "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt",
    encoder: "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx",
    decoder: "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx",
    vocoder: "./vocos_24khz.onnx",
    dataDir: "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data",
    lexicon: "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt"
  )

  let modelConfig = sherpaOnnxOfflineTtsModelConfig(numThreads: 2, zipvoice: zipvoice)
  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
  ttsConfig.model.debug = 1

  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

  let referenceAudioFile = "./sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/test_wavs/leijun-1.wav"
  let referenceWave = SherpaOnnxWaveWrapper.readWave(filename: referenceAudioFile)

  var genConfig = SherpaOnnxGenerationConfigSwift()
  genConfig.speed = 1.0
  genConfig.referenceAudio = referenceWave.samples
  genConfig.referenceSampleRate = referenceWave.sampleRate
  genConfig.referenceText = "那还是三十六年前, 一九八七年. 我呢考上了武汉大学的计算机系."
  genConfig.numSteps = 4
  genConfig.extra = ["min_char_in_sentence": "10"]

  let text = "小米的价值观是真诚, 热爱. 真诚，就是不欺人也不自欺. 热爱, 就是全心投入并享受其中."

  func generateAndSave(
    outputFile: String, callback: TtsProgressCallbackWithArg? = nil,
    arg: UnsafeMutableRawPointer? = nil
  ) {
    let audio = tts.generateWithConfig(
      text: text,
      config: genConfig,
      callback: callback,
      arg: arg
    )

    if audio.save(filename: outputFile) == 1 {
      print("Saved to: \(outputFile)")
    } else {
      print("Failed to save to \(outputFile)")
    }
  }

  let useCallback = true
  if useCallback {
    let progressHandler = ZipVoiceTtsProgressHandler()
    let arg = Unmanaged.passUnretained(progressHandler).toOpaque()

    let callback: TtsProgressCallbackWithArg = { samples, n, progress, arg in
      let handler = Unmanaged<ZipVoiceTtsProgressHandler>.fromOpaque(arg!).takeUnretainedValue()

      let buffer: [Float] =
        samples != nil ? Array(UnsafeBufferPointer(start: samples, count: Int(n))) : []
      handler.progress(samples: buffer, progress: progress)
      return 1
    }

    generateAndSave(outputFile: "generated-zipvoice-callback.wav", callback: callback, arg: arg)
  } else {
    generateAndSave(outputFile: "generated-zipvoice-direct.wav")
  }
}

@main
struct App {
  static func main() {
    runZipVoiceTtsDemo()
  }
}


================================================
FILE: swift-api-examples/wenet-ctc-asr.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let model =
    "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"
  let tokens =
    "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt"

  let wenetCtc = sherpaOnnxOfflineWenetCtcModelConfig(
    model: model
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 0,
    wenetCtc: wenetCtc
  )

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath =
    "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"
  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }
}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: swift-api-examples/zipformer-ctc-asr.swift
================================================
import AVFoundation

extension AudioBuffer {
  func array() -> [Float] {
    return Array(UnsafeBufferPointer(self))
  }
}

extension AVAudioPCMBuffer {
  func array() -> [Float] {
    return self.audioBufferList.pointee.mBuffers.array()
  }
}

func run() {
  let model = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx"
  let tokens = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt"

  let zipformerCtc = sherpaOnnxOfflineZipformerCtcModelConfig(
    model: model
  )

  let modelConfig = sherpaOnnxOfflineModelConfig(
    tokens: tokens,
    debug: 0,
    zipformerCtc: zipformerCtc
  )

  let featConfig = sherpaOnnxFeatureConfig(
    sampleRate: 16000,
    featureDim: 80
  )
  var config = sherpaOnnxOfflineRecognizerConfig(
    featConfig: featConfig,
    modelConfig: modelConfig
  )

  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)

  let filePath = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav"
  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  let audioFile = try! AVAudioFile(forReading: fileURL as URL)

  let audioFormat = audioFile.processingFormat
  assert(audioFormat.channelCount == 1)
  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

  let audioFrameCount = UInt32(audioFile.length)
  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

  try! audioFile.read(into: audioFileBuffer!)
  let array: [Float]! = audioFileBuffer?.array()
  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  print("\nresult is:\n\(result.text)")
  if result.timestamps.count != 0 {
    print("\ntimestamps is:\n\(result.timestamps)")
  }

}

@main
struct App {
  static func main() {
    run()
  }
}


================================================
FILE: toolchains/aarch64-linux-gnu.toolchain.cmake
================================================
# Copied from https://github.com/Tencent/ncnn/blob/master/toolchains/aarch64-linux-gnu.toolchain.cmake

set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)

set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

set(CMAKE_C_FLAGS "-march=armv8-a")
set(CMAKE_CXX_FLAGS "-march=armv8-a")

# cache flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")


================================================
FILE: toolchains/arm-linux-gnueabihf.toolchain.cmake
================================================
# Copied from https://github.com/Tencent/ncnn/blob/master/toolchains/arm-linux-gnueabihf.toolchain.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)

set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon")
set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon")

# cache flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")


================================================
FILE: toolchains/ios.toolchain.cmake
================================================
# This file is part of the ios-cmake project. It was retrieved from
# https://github.com/leetal/ios-cmake.git, which is a fork of
# https://github.com/gerstrong/ios-cmake.git, which is a fork of
# https://github.com/cristeab/ios-cmake.git, which is a fork of
# https://code.google.com/p/ios-cmake/. Which in turn is based off of
# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which
# are included with CMake 2.8.4
#
# The ios-cmake project is licensed under the new BSD license.
#
# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software,
# Kitware, Inc., Insight Software Consortium.  All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# This file is based off of the Platform/Darwin.cmake and
# Platform/UnixPaths.cmake files which are included with CMake 2.8.4
# It has been altered for iOS development.
#
# Updated by Alex Stewart (alexs.mac@gmail.com)
#
# *****************************************************************************
#      Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com)
#                      under the BSD-3-Clause license
#                   https://github.com/leetal/ios-cmake
# *****************************************************************************
#
#                           INFORMATION / HELP
#
# The following options control the behaviour of this toolchain:
#
# PLATFORM: (default "OS64")
#    OS = Build for iPhoneOS.
#    OS64 = Build for arm64 iphoneOS.
#    OS64COMBINED = Build for arm64 x86_64 iphoneOS. Combined into FAT STATIC lib (supported on 3.14+ of CMakewith "-G Xcode" argument ONLY)
#    SIMULATOR = Build for x86 i386 iphoneOS Simulator.
#    SIMULATOR64 = Build for x86_64 iphoneOS Simulator.
#    SIMULATORARM64 = Build for arm64 iphoneOS Simulator.
#    TVOS = Build for arm64 tvOS.
#    TVOSCOMBINED = Build for arm64 x86_64 tvOS. Combined into FAT STATIC lib (supported on 3.14+ of CMake with "-G Xcode" argument ONLY)
#    SIMULATOR_TVOS = Build for x86_64 tvOS Simulator.
#    WATCHOS = Build for armv7k arm64_32 for watchOS.
#    WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS. Combined into FAT STATIC lib (supported on 3.14+ of CMake with "-G Xcode" argument ONLY)
#    SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator.
#    MAC = Build for x86_64 macOS.
#    MAC_ARM64 = Build for Apple Silicon macOS.
#    MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS).
#                   Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
#    MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS).
#                         Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
#
# CMAKE_OSX_SYSROOT: Path to the SDK to use.  By default this is
#    automatically determined from PLATFORM and xcodebuild, but
#    can also be manually specified (although this should not be required).
#
# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform
#    being compiled for.  By default this is automatically determined from
#    CMAKE_OSX_SYSROOT, but can also be manually specified (although this should
#    not be required).
#
# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS
#
# ENABLE_BITCODE: (1|0) Enables or disables bitcode support. Default 1 (true)
#
# ENABLE_ARC: (1|0) Enables or disables ARC support. Default 1 (true, ARC enabled by default)
#
# ENABLE_VISIBILITY: (1|0) Enables or disables symbol visibility support. Default 0 (false, visibility hidden by default)
#
# ENABLE_STRICT_TRY_COMPILE: (1|0) Enables or disables strict try_compile() on all Check* directives (will run linker
#    to actually check if linking is possible). Default 0 (false, will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY)
#
# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM
#    OS = armv7 armv7s arm64 (if applicable)
#    OS64 = arm64 (if applicable)
#    SIMULATOR = i386
#    SIMULATOR64 = x86_64
#    SIMULATORARM64 = arm64
#    TVOS = arm64
#    SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated)
#    WATCHOS = armv7k arm64_32 (if applicable)
#    SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated)
#    MAC = x86_64
#    MAC_ARM64 = arm64
#    MAC_CATALYST = x86_64
#    MAC_CATALYST_ARM64 = arm64
#
# This toolchain defines the following properties (available via get_property()) for use externally:
#
# PLATFORM: The currently targeted platform.
# XCODE_VERSION: Version number (not including Build version) of Xcode detected.
# SDK_VERSION: Version of SDK being used.
# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM).
# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" are overridden, this will *NOT* be set!
#
# This toolchain defines the following macros for use externally:
#
# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT)
#   A convenience macro for setting xcode specific properties on targets.
#   Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel
#   example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all").
#
# find_host_package (PROGRAM ARGS)
#   A macro used to find executable programs on the host system, not within the
#   environment. Thanks to the android-cmake project for providing the
#   command.
#

cmake_minimum_required(VERSION 3.8.0)

# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds.
if(IOS_TOOLCHAIN_HAS_RUN)
  return()
endif(IOS_TOOLCHAIN_HAS_RUN)
set(IOS_TOOLCHAIN_HAS_RUN true)

###############################################################################
#                                  OPTIONS                                    #
###############################################################################

option(DROP_32_BIT "Drops the 32-bit targets universally." YES)

###############################################################################
#                                END OPTIONS                                  #
###############################################################################

# List of supported platform values
list(APPEND _supported_platforms
        "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64"
        "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS"
        "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS"
        "MAC" "MAC_ARM64"
        "MAC_CATALYST" "MAC_CATALYST_ARM64")

# Cache what generator is used
set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}")

# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib)
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14")
  set(MODERN_CMAKE YES)
endif()

# Get the Xcode version being used.
# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs.
# Workaround: On first run (in which cache variables are always accessible), set an intermediary environment variable.
#
# NOTE: This pattern is used i many places in this toolchain to speed up checks of all sorts
if(DEFINED XCODE_VERSION_INT)
  # Environment variables are always preserved.
  set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}")
elseif(DEFINED ENV{_XCODE_VERSION_INT})
  set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}")
elseif(NOT DEFINED XCODE_VERSION_INT)
  find_program(XCODEBUILD_EXECUTABLE xcodebuild)
  if(NOT XCODEBUILD_EXECUTABLE)
    message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.")
  endif()
  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version
          OUTPUT_VARIABLE XCODE_VERSION_INT
          ERROR_QUIET
          OUTPUT_STRIP_TRAILING_WHITESPACE)
  string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
  string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
  set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "")
endif()

# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur)
# if you don't set a deployment target it will be set the way you only get 64-bit builds
if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0)
  # Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...)
  set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64")
endif()

# Check if the platform variable is set
if(DEFINED PLATFORM)
  # Environment variables are always preserved.
  set(ENV{_PLATFORM} "${PLATFORM}")
elseif(DEFINED ENV{_PLATFORM})
  set(PLATFORM "$ENV{_PLATFORM}")
elseif(NOT DEFINED PLATFORM)
  message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!")
endif ()

# Safeguard that the platform value is set and is one of the supported values
list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM)
if("${contains_PLATFORM}" EQUAL "-1")
  string(REPLACE ";"  "\n * " _supported_platforms_formatted "${_supported_platforms}")
  message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n"
          " Supported PLATFORM values: \n * ${_supported_platforms_formatted}")
endif()

# Check if Apple Silicon is supported
if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5")
  message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5")
endif()

# Touch toolchain variable to suppress "unused variable" warning.
# This happens if CMake is invoked with the same command line the second time.
if(CMAKE_TOOLCHAIN_FILE)
endif()

# Fix for PThread library not in path
set(CMAKE_THREAD_LIBS_INIT "-lpthread")
set(CMAKE_HAVE_THREADS_LIBRARY 1)
set(CMAKE_USE_WIN32_THREADS_INIT 0)
set(CMAKE_USE_PTHREADS_INIT 1)

# Specify minimum version of deployment target.
if(NOT DEFINED DEPLOYMENT_TARGET)
  if (PLATFORM MATCHES "WATCHOS")
    # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS).
    set(DEPLOYMENT_TARGET "4.0")
  elseif(PLATFORM STREQUAL "MAC")
    # Unless specified, SDK version 10.13 (High sierra) is used by default as minimum target version (macos).
    set(DEPLOYMENT_TARGET "10.13")
  elseif(PLATFORM STREQUAL "MAC_ARM64")
    # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version (macos on arm).
    set(DEPLOYMENT_TARGET "11.0")
  elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
    # Unless specified, SDK version 13.0 is used by default as minimum target version (mac catalyst minimum requirement).
    set(DEPLOYMENT_TARGET "13.0")
  else()
    # Unless specified, SDK version 11.0 is used by default as minimum target version (iOS, tvOS).
    set(DEPLOYMENT_TARGET "11.0")
  endif()
  message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!")
elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM STREQUAL "MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.0")
  message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.0!")
endif()

# Store the DEPLOYMENT_TARGET in the cache
set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "")

# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially)
if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
  set(PLATFORM "OS64")
  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
  set(PLATFORM "SIMULATOR64")
  message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
endif()

set(PLATFORM_INT "${PLATFORM}")

if(DEFINED ARCHS)
  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
endif()

# Determine the platform name and architectures for use in xcodebuild commands
# from the specified PLATFORM_INT name.
if(PLATFORM_INT STREQUAL "OS")
  set(SDK_NAME iphoneos)
  if(NOT ARCHS)
    set(ARCHS armv7 armv7s arm64)
    set(APPLE_TARGET_TRIPLE_INT arm-apple-ios)
  endif()
elseif(PLATFORM_INT STREQUAL "OS64")
  set(SDK_NAME iphoneos)
  if(NOT ARCHS)
    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
      set(ARCHS arm64) # Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
    else()
      set(ARCHS arm64)
    endif()
    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios)
  else()
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios)
  endif()
elseif(PLATFORM_INT STREQUAL "OS64COMBINED")
  set(SDK_NAME iphoneos)
  if(MODERN_CMAKE)
    if(NOT ARCHS)
      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
        set(ARCHS arm64 x86_64) # Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
      else()
        set(ARCHS arm64 x86_64)
        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
      endif()
      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios)
    else()
      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios)
    endif()
  else()
    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work")
  endif()
elseif(PLATFORM_INT STREQUAL "SIMULATOR")
  set(SDK_NAME iphonesimulator)
  if(NOT ARCHS)
    set(ARCHS i386)
    set(APPLE_TARGET_TRIPLE_INT i386-apple-ios)
  else()
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios)
  endif()
  message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.")
elseif(PLATFORM_INT STREQUAL "SIMULATOR64")
  set(SDK_NAME iphonesimulator)
  if(NOT ARCHS)
    set(ARCHS x86_64)
    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios)
  else()
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios)
  endif()
elseif(PLATFORM_INT STREQUAL "SIMULATORARM64")
  set(SDK_NAME iphonesimulator)
  if(NOT ARCHS)
    set(ARCHS arm64)
    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios)
  else()
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios)
  endif()
elseif(PLATFORM_INT STREQUAL "TVOS")
  set(SDK_NAME appletvos)
  if(NOT ARCHS)
    set(ARCHS arm64)
    set(APPLE_TARGET_TRIPLE_INT aarch64-apple-tvos)
  else()
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos)
  endif()
elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED")
  set(SDK_NAME appletvos)
  if(MODERN_CMAKE)
    if(NOT ARCHS)
      set(ARCHS arm64 x86_64)
      set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-tvos)
      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64")
      set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64")
      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64")
      set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64")
    else()
      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos)
    endif()
  else()
    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work")
  endif()
elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
  set(SDK_NAME appletvsimulator)
  if(NOT ARCHS)
    set(ARCHS x86_64)
    set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos)
  else()
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos)
  endif()
elseif(PLATFORM_INT STREQUAL "WATCHOS")
  set(SDK_NAME watchos)
  if(NOT ARCHS)
    if (XCODE_VERSION_INT VERSION_GREATER 10.0)
      set(ARCHS armv7k arm64_32)
      set(APPLE_TARGET_TRIPLE_INT aarch64_32-apple-watchos)
    else()
      set(ARCHS armv7k)
      set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos)
    endif()
  else()
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos)
  endif()
elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED")
  set(SDK_NAME watchos)
  if(MODERN_CMAKE)
    if(NOT ARCHS)
      if (XCODE_VERSION_INT VERSION_GREATER 10.0)
        set(ARCHS armv7k arm64_32 i386)
        set(APPLE_TARGET_TRIPLE_INT aarch64_32-i386-apple-watchos)
        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32")
        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32")
        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
      else()
        set(ARCHS armv7k i386)
        set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos)
        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k")
        set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k")
        set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
      endif()
    else()
      set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos)
    endif()
  else()
    message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work")
  endif()
elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
  set(SDK_NAME watchsimulator)
  if(NOT ARCHS)
    set(ARCHS i386)
    set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos)
  else()
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos)
  endif()
elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST")
  set(SDK_NAME macosx)
  if(NOT ARCHS)
    set(ARCHS x86_64)
  endif()
  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
  if(PLATFORM_INT STREQUAL "MAC")
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx)
  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST")
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
  endif()
elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$")
  set(SDK_NAME macosx)
  if(NOT ARCHS)
    set(ARCHS arm64)
  endif()
  string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
  if(PLATFORM_INT STREQUAL "MAC_ARM64")
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx)
  elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64")
    set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
  endif()
else()
  message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}")
endif()

if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
  message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode")
endif()

if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "MAC_CATALYST_.*")
  set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
  set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx")
  set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-maccatalyst")
  if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET)
    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15")
  else()
    set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}")
  endif()
elseif(CMAKE_GENERATOR MATCHES "Xcode")
  set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}")
  if(NOT PLATFORM_INT MATCHES ".*COMBINED")
    set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS}")
    set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS}")
  endif()
endif()

# If user did not specify the SDK root to use, then query xcodebuild for it.
if(DEFINED CMAKE_OSX_SYSROOT_INT)
  # Environment variables are always preserved.
  set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}")
elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT})
  set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}")
elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT)
  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path
          OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT
          ERROR_QUIET
          OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()

if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT)
  message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain"
          "is pointing to the correct path. Please run:"
          "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer"
          "and see if that fixes the problem for you.")
  message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} "
          "does not exist.")
elseif(DEFINED CMAKE_OSX_SYSROOT_INT)
  set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
  # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT.
  set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
endif()

# Use bitcode or not
if(NOT DEFINED ENABLE_BITCODE AND NOT ARCHS MATCHES "((^|;|, )(i386|x86_64))+")
  # Unless specified, enable bitcode support by default
  message(STATUS "[DEFAULTS] Enabling bitcode support by default. ENABLE_BITCODE not provided!")
  set(ENABLE_BITCODE TRUE)
elseif(NOT DEFINED ENABLE_BITCODE)
  message(STATUS "[DEFAULTS] Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!")
  set(ENABLE_BITCODE FALSE)
endif()
set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL
        "Whether or not to enable bitcode" FORCE)
# Use ARC or not
if(NOT DEFINED ENABLE_ARC)
  # Unless specified, enable ARC support by default
  set(ENABLE_ARC TRUE)
  message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!")
endif()
set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE)
# Use hidden visibility or not
if(NOT DEFINED ENABLE_VISIBILITY)
  # Unless specified, disable symbols visibility by default
  set(ENABLE_VISIBILITY FALSE)
  message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!")
endif()
set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE)
# Set strict compiler checks or not
if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE)
  # Unless specified, disable strict try_compile()
  set(ENABLE_STRICT_TRY_COMPILE FALSE)
  message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!")
endif()
set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL
        "Whether or not to use strict compiler checks" FORCE)

# Get the SDK version information.
if(DEFINED SDK_VERSION)
  # Environment variables are always preserved.
  set(ENV{_SDK_VERSION} "${SDK_VERSION}")
elseif(DEFINED ENV{_SDK_VERSION})
  set(SDK_VERSION "$ENV{_SDK_VERSION}")
elseif(NOT DEFINED SDK_VERSION)
  execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion
          OUTPUT_VARIABLE SDK_VERSION
          ERROR_QUIET
          OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()

# Find the Developer root for the specific iOS platform being compiled for
# from CMAKE_OSX_SYSROOT.  Should be ../../ from SDK specified in
# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain
# this information from xcrun or xcodebuild.
if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode")
  get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH)
  get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH)
  if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}")
    message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.")
  endif()
endif()

# Find the C & C++ compilers for the specified SDK.
if(DEFINED CMAKE_C_COMPILER)
  # Environment variables are always preserved.
  set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}")
elseif(DEFINED ENV{_CMAKE_C_COMPILER})
  set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}")
elseif(NOT DEFINED CMAKE_C_COMPILER)
  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang
          OUTPUT_VARIABLE CMAKE_C_COMPILER
          ERROR_QUIET
          OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
if(DEFINED CMAKE_CXX_COMPILER)
  # Environment variables are always preserved.
  set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}")
elseif(DEFINED ENV{_CMAKE_CXX_COMPILER})
  set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}")
elseif(NOT DEFINED CMAKE_CXX_COMPILER)
  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++
          OUTPUT_VARIABLE CMAKE_CXX_COMPILER
          ERROR_QUIET
          OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
# Find (Apple's) libtool.
if(DEFINED BUILD_LIBTOOL)
  # Environment variables are always preserved.
  set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}")
elseif(DEFINED ENV{_BUILD_LIBTOOL})
  set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}")
elseif(NOT DEFINED BUILD_LIBTOOL)
  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool
          OUTPUT_VARIABLE BUILD_LIBTOOL
          ERROR_QUIET
          OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
# Find the toolchain's provided install_name_tool if none is found on the host
if(DEFINED CMAKE_INSTALL_NAME_TOOL)
  # Environment variables are always preserved.
  set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}")
elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL})
  set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}")
elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL)
  execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool
          OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT
          ERROR_QUIET
          OUTPUT_STRIP_TRAILING_WHITESPACE)
  set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "")
endif()

# Configure libtool to be used instead of ar + ranlib to build static libraries.
# This is required on Xcode 7+, but should also work on previous versions of
# Xcode.
get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
foreach(lang ${languages})
  set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o <TARGET> <LINK_FLAGS> <OBJECTS> " CACHE INTERNAL "")
endforeach()

# CMake 3.14+ support building for iOS, watchOS and tvOS out of the box.
if(MODERN_CMAKE)
  if(SDK_NAME MATCHES "iphone")
    set(CMAKE_SYSTEM_NAME iOS)
  elseif(SDK_NAME MATCHES "macosx")
    set(CMAKE_SYSTEM_NAME Darwin)
  elseif(SDK_NAME MATCHES "appletv")
    set(CMAKE_SYSTEM_NAME tvOS)
  elseif(SDK_NAME MATCHES "watch")
    set(CMAKE_SYSTEM_NAME watchOS)
  endif()
  # Provide flags for a combined FAT library build on newer CMake versions
  if(PLATFORM_INT MATCHES ".*COMBINED")
    set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO")
    set(CMAKE_IOS_INSTALL_COMBINED YES)
    message(STATUS "Will combine built (static) artifacts into FAT lib...")
  endif()
elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10")
  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
  set(CMAKE_SYSTEM_NAME iOS)
elseif(NOT DEFINED CMAKE_SYSTEM_NAME)
  # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
  set(CMAKE_SYSTEM_NAME Darwin)
endif()
# Standard settings.
set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "")
set(UNIX TRUE CACHE BOOL "")
set(APPLE TRUE CACHE BOOL "")
if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64")
  set(IOS FALSE CACHE BOOL "")
  set(MACOS TRUE CACHE BOOL "")
elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
  set(IOS TRUE CACHE BOOL "")
  set(MACOS TRUE CACHE BOOL "")
else()
  set(IOS TRUE CACHE BOOL "")
endif()
set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE)
set(CMAKE_STRIP strip CACHE FILEPATH "" FORCE)
# Set the architectures for which to build.
set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "")
# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks
if(NOT ENABLE_STRICT_TRY_COMPILE_INT)
  set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
endif()
# All iOS/Darwin specific settings - some may be redundant.
set(CMAKE_MACOSX_BUNDLE YES)
set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO")
set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib")
set(CMAKE_SHARED_MODULE_PREFIX "lib")
set(CMAKE_SHARED_MODULE_SUFFIX ".so")
set(CMAKE_C_COMPILER_ABI ELF)
set(CMAKE_CXX_COMPILER_ABI ELF)
set(CMAKE_C_HAS_ISYSROOT 1)
set(CMAKE_CXX_HAS_ISYSROOT 1)
set(CMAKE_MODULE_EXISTS 1)
set(CMAKE_DL_LIBS "")
set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")

if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+")
  set(CMAKE_C_SIZEOF_DATA_PTR 8)
  set(CMAKE_CXX_SIZEOF_DATA_PTR 8)
  if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+")
    set(CMAKE_SYSTEM_PROCESSOR "aarch64")
  else()
    set(CMAKE_SYSTEM_PROCESSOR "x86_64")
  endif()
else()
  set(CMAKE_C_SIZEOF_DATA_PTR 4)
  set(CMAKE_CXX_SIZEOF_DATA_PTR 4)
  set(CMAKE_SYSTEM_PROCESSOR "arm")
endif()

# Note that only Xcode 7+ supports the newer more specific:
# -m${SDK_NAME}-version-min flags, older versions of Xcode use:
# -m(ios/ios-simulator)-version-min instead.
if(${CMAKE_VERSION} VERSION_LESS "3.11")
  if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64")
    if(XCODE_VERSION_INT VERSION_LESS 7.0)
      set(SDK_NAME_VERSION_FLAGS
              "-mios-version-min=${DEPLOYMENT_TARGET}")
    else()
      # Xcode 7.0+ uses flags we can build directly from SDK_NAME.
      set(SDK_NAME_VERSION_FLAGS
              "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}")
    endif()
  elseif(PLATFORM_INT STREQUAL "TVOS")
    set(SDK_NAME_VERSION_FLAGS
            "-mtvos-version-min=${DEPLOYMENT_TARGET}")
  elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
    set(SDK_NAME_VERSION_FLAGS
            "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}")
  elseif(PLATFORM_INT STREQUAL "WATCHOS")
    set(SDK_NAME_VERSION_FLAGS
            "-mwatchos-version-min=${DEPLOYMENT_TARGET}")
  elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
    set(SDK_NAME_VERSION_FLAGS
            "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}")
  elseif(PLATFORM_INT STREQUAL "MAC")
    set(SDK_NAME_VERSION_FLAGS
            "-mmacosx-version-min=${DEPLOYMENT_TARGET}")
  else()
    # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min.
    set(SDK_NAME_VERSION_FLAGS
            "-mios-simulator-version-min=${DEPLOYMENT_TARGET}")
  endif()
elseif(NOT PLATFORM_INT STREQUAL "MAC_CATALYST")
  # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets
  set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET})
endif()

if(DEFINED APPLE_TARGET_TRIPLE_INT)
  set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "")
endif()

if(PLATFORM_INT STREQUAL "MAC_CATALYST")
  set(C_TARGET_FLAGS "-target ${APPLE_TARGET_TRIPLE_INT} -isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include")
endif()

if(ENABLE_BITCODE_INT)
  set(BITCODE "-fembed-bitcode")
  set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode")
  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES")
else()
  set(BITCODE "")
  set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO")
endif()

if(ENABLE_ARC_INT)
  set(FOBJC_ARC "-fobjc-arc")
  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES")
else()
  set(FOBJC_ARC "-fno-objc-arc")
  set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO")
endif()

if(NOT ENABLE_VISIBILITY_INT)
  foreach(lang ${languages})
    set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "")
  endforeach()
  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES")
  set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden")
else()
  foreach(lang ${languages})
    set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "")
  endforeach()
  set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO")
  set(VISIBILITY "-fvisibility=default")
endif()

#Check if Xcode generator is used, since that will handle these flags automagically
if(CMAKE_GENERATOR MATCHES "Xcode")
  message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as generator.")
else()
  # Hidden visibility is required for C++ on iOS.
  set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} -fobjc-abi-version=2 ${FOBJC_ARC} ${CMAKE_C_FLAGS}")
  set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} -fobjc-abi-version=2 ${FOBJC_ARC} ${CMAKE_CXX_FLAGS}")
  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -O0 -g ${CMAKE_CXX_FLAGS_DEBUG}")
  set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -DNDEBUG -Os -ffast-math ${CMAKE_CXX_FLAGS_MINSIZEREL}")
  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -DNDEBUG -O2 -g -ffast-math ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG -O3 -ffast-math ${CMAKE_CXX_FLAGS_RELEASE}")
  set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
  set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS}  -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")
  set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES}")
endif()

## Print status messages to inform of the current state
message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}")
message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}")
message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}")
message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}")
message(STATUS "Using libtool: ${BUILD_LIBTOOL}")
message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}")
if(DEFINED APPLE_TARGET_TRIPLE)
  message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}")
endif()
message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}"
        " (SDK version: ${SDK_VERSION})")
if(MODERN_CMAKE)
  message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!")
endif()
if(CMAKE_GENERATOR MATCHES "Xcode")
  message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}")
endif()
message(STATUS "CMake version: ${CMAKE_VERSION}")
if(DEFINED SDK_NAME_VERSION_FLAGS)
  message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}")
endif()
message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}")
if(ENABLE_BITCODE_INT)
  message(STATUS "Bitcode: Enabled")
else()
  message(STATUS "Bitcode: Disabled")
endif()

if(ENABLE_ARC_INT)
  message(STATUS "ARC: Enabled")
else()
  message(STATUS "ARC: Disabled")
endif()

if(ENABLE_VISIBILITY_INT)
  message(STATUS "Hiding symbols: Disabled")
else()
  message(STATUS "Hiding symbols: Enabled")
endif()

# Set global properties
set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}")
set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}")
set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}")
set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}")
set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}")

# Export configurable variables for the try_compile() command.
set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
        PLATFORM
        XCODE_VERSION_INT
        SDK_VERSION
        DEPLOYMENT_TARGET
        CMAKE_DEVELOPER_ROOT
        CMAKE_OSX_SYSROOT_INT
        ENABLE_BITCODE
        ENABLE_ARC
        CMAKE_C_COMPILER
        CMAKE_CXX_COMPILER
        BUILD_LIBTOOL
        CMAKE_INSTALL_NAME_TOOL
        CMAKE_C_FLAGS
        CMAKE_CXX_FLAGS
        CMAKE_CXX_FLAGS_DEBUG
        CMAKE_CXX_FLAGS_MINSIZEREL
        CMAKE_CXX_FLAGS_RELWITHDEBINFO
        CMAKE_CXX_FLAGS_RELEASE
        CMAKE_C_LINK_FLAGS
        CMAKE_CXX_LINK_FLAGS
        CMAKE_ASM_FLAGS
        )

set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks")
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names")
set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names")
set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a")
set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name")

# Set the find root to the SDK developer roots.
# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds.
if(NOT PLATFORM_INT STREQUAL "MAC" AND NOT PLATFORM_INT STREQUAL "MAC_ARM64")
  list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
  set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib" CACHE INTERNAL "")
endif()

# Default to searching for frameworks first.
set(CMAKE_FIND_FRAMEWORK FIRST)

# Set up the default search directories for frameworks.
if(PLATFORM_INT MATCHES "MAC_CATALYST.*")
  set(CMAKE_FRAMEWORK_PATH
          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
          ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks
          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
else()
  set(CMAKE_FRAMEWORK_PATH
          ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
          ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
          ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
endif()

# By default, search both the specified iOS SDK and the remainder of the host filesystem.
if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM)
  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "")
endif()
if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY)
  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "")
endif()
if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE)
  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "")
endif()
if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE)
  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "")
endif()

#
# Some helper-macros below to simplify and beautify the CMakeFile
#

# This little macro lets you set any Xcode specific property.
macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION)
  set(XCODE_RELVERSION_I "${XCODE_RELVERSION}")
  if(XCODE_RELVERSION_I STREQUAL "All")
    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}")
  else()
    set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}")
  endif()
endmacro(set_xcode_property)

# This macro lets you find executable programs on the host system.
macro(find_host_package)
  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER)
  set(_TOOLCHAIN_IOS ${IOS})
  set(IOS FALSE)
  find_package(${ARGN})
  set(IOS ${_TOOLCHAIN_IOS})
  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
  set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
  set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
endmacro(find_host_package)


================================================
FILE: toolchains/riscv64-linux-gnu-spacemit.toolchain.cmake
================================================
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR riscv64)
set(CMAKE_SYSTEM_VERSION 1)

if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(riscv)")
    message(STATUS "HOST SYSTEM ${CMAKE_HOST_SYSTEM_PROCESSOR}")
else()
    set(GNU_MACHINE riscv64-unknown-linux-gnu CACHE STRING "GNU compiler triple")
    if(DEFINED ENV{RISCV_ROOT_PATH})
        file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
    else()
        message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
    endif()

    set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv toolchain")
    set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
    set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
    set(CMAKE_STRIP ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-strip)
    set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/sysroot")
    set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
endif()

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zvfh_zba_zicbop_zihintpause -mabi=lp64d -ftree-vectorize ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zvfh_zba_zicbop_zihintpause -mabi=lp64d  -ftree-vectorize ${CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic -lrt -lpthread")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${CMAKE_SYSROOT}")


================================================
FILE: toolchains/riscv64-linux-gnu.toolchain.cmake
================================================
# Copied from https://github.com/Tencent/ncnn/blob/master/toolchains/riscv64-linux-gnu.toolchain.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR riscv64)

set(CMAKE_C_COMPILER "riscv64-unknown-linux-gnu-gcc")
set(CMAKE_CXX_COMPILER "riscv64-unknown-linux-gnu-g++")

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

set(CMAKE_C_FLAGS "-march=rv64gc")
set(CMAKE_CXX_FLAGS "-march=rv64gc")

# cache flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")


================================================
FILE: wasm/CMakeLists.txt
================================================
if(SHERPA_ONNX_ENABLE_WASM_TTS)
  add_subdirectory(tts)
endif()

if(SHERPA_ONNX_ENABLE_WASM_ASR)
  add_subdirectory(asr)
endif()

if(SHERPA_ONNX_ENABLE_WASM_KWS)
  add_subdirectory(kws)
endif()

if(SHERPA_ONNX_ENABLE_WASM_VAD)
  add_subdirectory(vad)
endif()

if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
  add_subdirectory(vad-asr)
endif()

if(SHERPA_ONNX_ENABLE_WASM_SPEECH_ENHANCEMENT)
  add_subdirectory(speech-enhancement)
endif()

if(SHERPA_ONNX_ENABLE_WASM_SPEAKER_DIARIZATION)
  add_subdirectory(speaker-diarization)
endif()

if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
  add_subdirectory(nodejs)
endif()


================================================
FILE: wasm/asr/.gitignore
================================================
*.bak


================================================
FILE: wasm/asr/CMakeLists.txt
================================================
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  message(FATAL_ERROR "Please use ./build-wasm-simd-asr.sh to build for wasm ASR")
endif()

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/encoder.onnx")
  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
endif()

set(exported_functions
  MyPrint
  # online ASR
  SherpaOnnxCreateOnlineRecognizer
  SherpaOnnxCreateOnlineStream
  SherpaOnnxDecodeOnlineStream
  SherpaOnnxDestroyOnlineRecognizer
  SherpaOnnxDestroyOnlineRecognizerResult
  SherpaOnnxDestroyOnlineStream
  SherpaOnnxDestroyOnlineStreamResultJson
  SherpaOnnxGetOfflineStreamResultAsJson
  SherpaOnnxGetOnlineStreamResult
  SherpaOnnxGetOnlineStreamResultAsJson
  SherpaOnnxIsOnlineStreamReady
  SherpaOnnxOnlineStreamAcceptWaveform
  SherpaOnnxOnlineStreamGetOption
  SherpaOnnxOnlineStreamInputFinished
  SherpaOnnxOnlineStreamIsEndpoint
  SherpaOnnxOnlineStreamReset
  SherpaOnnxOnlineStreamSetOption
  SherpaOnnxOfflineStreamGetOption
  SherpaOnnxOfflineStreamSetOption
  #
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
  list(APPEND mangled_exported_functions "_${x}")
endforeach()
list(JOIN mangled_exported_functions "," all_exported_functions)

include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','HEAPU8','HEAP16','HEAP32','HEAPU32','HEAPF32','HEAPF64'] ")

message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${MY_FLAGS}")

if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  message(FATAL_ERROR "The default suffix for building executables should be .js!")
endif()
# set(CMAKE_EXECUTABLE_SUFFIX ".html")

add_executable(sherpa-onnx-wasm-main-asr sherpa-onnx-wasm-main-asr.cc)
target_link_libraries(sherpa-onnx-wasm-main-asr sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-main-asr DESTINATION bin/wasm/asr)

install(
  FILES
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-asr>/sherpa-onnx-wasm-main-asr.js"
    "index.html"
    "sherpa-onnx-asr.js"
    "app-asr.js"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-asr>/sherpa-onnx-wasm-main-asr.wasm"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-asr>/sherpa-onnx-wasm-main-asr.data"
  DESTINATION
    bin/wasm/asr
)


================================================
FILE: wasm/asr/app-asr.js
================================================
// This file copies and modifies code
// from https://mdn.github.io/web-dictaphone/scripts/app.js
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e

const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const clearBtn = document.getElementById('clearBtn');
const soundClips = document.getElementById('sound-clips');

let textArea = document.getElementById('results');

let lastResult = '';
let resultList = [];

clearBtn.onclick = function() {
  resultList = [];
  textArea.value = getDisplayResult();
  textArea.scrollTop = textArea.scrollHeight;  // auto scroll
};

function getDisplayResult() {
  let i = 0;
  let ans = '';
  for (let s in resultList) {
    if (resultList[s] == '') {
      continue;
    }

    ans += '' + i + ': ' + resultList[s] + '\n';
    i += 1;
  }

  if (lastResult.length > 0) {
    ans += '' + i + ': ' + lastResult + '\n';
  }
  return ans;
}

Module = {};

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.locateFile = function(path, scriptDirectory = '') {
  console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
  return scriptDirectory + path;
};

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.setStatus = function(status) {
  console.log(`status ${status}`);
  const statusElement = document.getElementById('status');
  if (status == 'Running...') {
    status = 'Model downloaded. Initializing recognizer...'
  }

  const downloadMatch = status.match(/Downloading data... \((\d+)\/(\d+)\)/);
  if (downloadMatch) {
    const downloaded = BigInt(downloadMatch[1]);
    const total = BigInt(downloadMatch[2]);
    const percent =
        total === 0 ? 0.00 : Number((downloaded * 10000n) / total) / 100;
    const downloadedMB = Number(downloaded) / (1024 * 1024);
    const totalMB = Number(total) / (1024 * 1024);
    status = `Downloading data... ${percent.toFixed(2)}% (${downloadedMB.toFixed(2)} MB/${
        totalMB.toFixed(2)} MB)`;
    console.log(`here ${status}`)
  }

  statusElement.textContent = status;
  if (status === '') {
    statusElement.style.display = 'none';
    // statusElement.parentNode.removeChild(statusElement);

    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.remove('loading');
    });
  } else {
    statusElement.style.display = 'block';
    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.add('loading');
    });
  }
};

Module.onRuntimeInitialized = function() {
  console.log('inited!');

  startBtn.disabled = false;

  recognizer = createOnlineRecognizer(Module);
  console.log('recognizer is created!', recognizer);
};

let audioCtx;
let mediaStream;

let expectedSampleRate = 16000;
let recordSampleRate;  // the sampleRate of the microphone
let recorder = null;   // the microphone
let leftchannel = [];  // TODO: Use a single channel

let recordingLength = 0;  // number of samples so far

let recognizer = null;
let recognizer_stream = null;

if (navigator.mediaDevices.getUserMedia) {
  console.log('getUserMedia supported.');

  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
  const constraints = {audio: true};

  let onSuccess = function(stream) {
    if (!audioCtx) {
      audioCtx = new AudioContext({sampleRate: 16000});
    }
    console.log(audioCtx);
    recordSampleRate = audioCtx.sampleRate;
    console.log('sample rate ' + recordSampleRate);

    // creates an audio node from the microphone incoming stream
    mediaStream = audioCtx.createMediaStreamSource(stream);
    console.log('media stream', mediaStream);

    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
    // bufferSize: the onaudioprocess event is called when the buffer is full
    var bufferSize = 4096;
    var numberOfInputChannels = 1;
    var numberOfOutputChannels = 2;
    if (audioCtx.createScriptProcessor) {
      recorder = audioCtx.createScriptProcessor(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    } else {
      recorder = audioCtx.createJavaScriptNode(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    }
    console.log('recorder', recorder);

    recorder.onaudioprocess = function(e) {
      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
      samples = downsampleBuffer(samples, expectedSampleRate);

      if (recognizer_stream == null) {
        recognizer_stream = recognizer.createStream();
      }

      recognizer_stream.acceptWaveform(expectedSampleRate, samples);
      while (recognizer.isReady(recognizer_stream)) {
        recognizer.decode(recognizer_stream);
      }

      let isEndpoint = recognizer.isEndpoint(recognizer_stream);

      let result = recognizer.getResult(recognizer_stream).text;

      if (recognizer.config.modelConfig.paraformer.encoder != '') {
        let tailPaddings = new Float32Array(expectedSampleRate);
        recognizer_stream.acceptWaveform(expectedSampleRate, tailPaddings);
        while (recognizer.isReady(recognizer_stream)) {
          recognizer.decode(recognizer_stream);
        }
        result = recognizer.getResult(recognizer_stream).text;
      }

      if (result.length > 0 && lastResult != result) {
        lastResult = result;
      }

      if (isEndpoint) {
        if (lastResult.length > 0) {
          resultList.push(lastResult);
          lastResult = '';
        }
        recognizer.reset(recognizer_stream);
      }

      textArea.value = getDisplayResult();
      textArea.scrollTop = textArea.scrollHeight;  // auto scroll

      let buf = new Int16Array(samples.length);
      for (var i = 0; i < samples.length; ++i) {
        let s = samples[i];
        if (s >= 1)
          s = 1;
        else if (s <= -1)
          s = -1;

        samples[i] = s;
        buf[i] = s * 32767;
      }

      leftchannel.push(buf);
      recordingLength += bufferSize;
    };

    startBtn.onclick = function() {
      mediaStream.connect(recorder);
      recorder.connect(audioCtx.destination);

      console.log('recorder started');

      stopBtn.disabled = false;
      startBtn.disabled = true;
    };

    stopBtn.onclick = function() {
      console.log('recorder stopped');

      // stopBtn recording
      recorder.disconnect(audioCtx.destination);
      mediaStream.disconnect(recorder);

      startBtn.style.background = '';
      startBtn.style.color = '';
      // mediaRecorder.requestData();

      stopBtn.disabled = true;
      startBtn.disabled = false;

      var clipName = new Date().toISOString();

      const clipContainer = document.createElement('article');
      const clipLabel = document.createElement('p');
      const audio = document.createElement('audio');
      const deleteButton = document.createElement('button');
      clipContainer.classList.add('clip');
      audio.setAttribute('controls', '');
      deleteButton.textContent = 'Delete';
      deleteButton.className = 'delete';

      clipLabel.textContent = clipName;

      clipContainer.appendChild(audio);

      clipContainer.appendChild(clipLabel);
      clipContainer.appendChild(deleteButton);
      soundClips.appendChild(clipContainer);

      audio.controls = true;
      let samples = flatten(leftchannel);
      const blob = toWav(samples);

      leftchannel = [];
      const audioURL = window.URL.createObjectURL(blob);
      audio.src = audioURL;
      console.log('recorder stopped');

      deleteButton.onclick = function(e) {
        let evtTgt = e.target;
        evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
      };

      clipLabel.onclick = function() {
        const existingName = clipLabel.textContent;
        const newClipName = prompt('Enter a new name for your sound clip?');
        if (newClipName === null) {
          clipLabel.textContent = existingName;
        } else {
          clipLabel.textContent = newClipName;
        }
      };
    };
  };

  let onError = function(err) {
    console.log('The following error occurred: ' + err);
  };

  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
  console.log('getUserMedia not supported on your browser!');
  alert('getUserMedia not supported on your browser!');
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function flatten(listOfSamples) {
  let n = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    n += listOfSamples[i].length;
  }
  let ans = new Int16Array(n);

  let offset = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    ans.set(listOfSamples[i], offset);
    offset += listOfSamples[i].length;
  }
  return ans;
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples) {
  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, expectedSampleRate, true);  // sampleRate
  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
  view.setUint16(32, 2, true);                       // blockAlign
  view.setUint16(34, 16, true);                      // bitsPerSample
  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);      // subchunk2Size

  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }

  return new Blob([view], {type: 'audio/wav'});
}

// this function is copied from
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
function downsampleBuffer(buffer, exportSampleRate) {
  if (exportSampleRate === recordSampleRate) {
    return buffer;
  }
  var sampleRateRatio = recordSampleRate / exportSampleRate;
  var newLength = Math.round(buffer.length / sampleRateRatio);
  var result = new Float32Array(newLength);
  var offsetResult = 0;
  var offsetBuffer = 0;
  while (offsetResult < result.length) {
    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
    var accum = 0, count = 0;
    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
      accum += buffer[i];
      count++;
    }
    result[offsetResult] = accum / count;
    offsetResult++;
    offsetBuffer = nextOffsetBuffer;
  }
  return result;
};


================================================
FILE: wasm/asr/assets/.gitignore
================================================


================================================
FILE: wasm/asr/assets/README.md
================================================
# Introduction

Please refer to
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
or
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
to download a model.

# Streaming ASR

## Transducer
```bash
cd sherpa-onnx/wasm/asr/assets

wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2

# Note it is not an error that we rename encoder.int8.onnx to encoder.onnx

mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx encoder.onnx
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx decoder.onnx
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx joiner.onnx
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ./
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/

cd ../../..

./build-wasm-simd-asr.sh
```

You should have the following files in `assets` before you can run
`build-wasm-simd-asr.sh`

```
assets fangjun$ tree -L 1
.
├── README.md
├── decoder.onnx
├── encoder.onnx
├── joiner.onnx
└── tokens.txt

0 directories, 5 files
```

## Paraformer

```
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2

mv sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx encoder.onnx
mv sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx decoder.onnx
mv sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ./

rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

cd ../

sed -i.bak s/"type = 0"/"type = 1"/g ./sherpa-onnx-asr.js
sed -i.bak s/Zipformer/Paraformer/g ./index.html

cd ../..

./build-wasm-simd-asr.sh
```

You should have the following files in `assets` before you can run
`build-wasm-simd-asr.sh`

```
assets fangjun$ tree -L 1
.
├── README.md
├── decoder.onnx
├── encoder.onnx
└── tokens.txt

0 directories, 4 files
```

You can find example build scripts at:

  - Streaming Zipformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/ wasm-simd-hf-space-zh-en-asr-zipformer.yaml
  - Streaming Zipformer (English): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
  - Streaming Paraformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
  - Streaming Paraformer (English + Chinese + Cantonese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml


================================================
FILE: wasm/asr/index.html
================================================
<html lang="en">

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for ASR</title>
  <style>
    h1,div {
      text-align: center;
    }
    textarea {
      width:100%;
    }
    .loading {
      display: none !important;
    }
  </style>
</head>

<body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
  <h1>
    Next-gen Kaldi + WebAssembly<br/>
    ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
    (with Zipformer)
  </h1>

  <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
    <div id="status">Loading...</div>

    <div id="singleAudioContent" class="tab-content loading">
      <div style="display: flex; gap: 1.5rem;">
        <div style="flex: 1; display: flex; flex-direction: row; align-items: center; gap: 1rem;">
          <button id="startBtn" disabled>Start</button>
          <button id="stopBtn" disabled>Stop</button>
          <button id="clearBtn">Clear</button>
        </div>
      </div>

      <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
          <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; color: #6c757d;">Transcript</div>
          <textarea id="results" rows="10" placeholder="Output will appear here..." readonly style="flex: 1; padding: 0.75rem; font-size: 1rem; border: 1px solid #ced4da; border-radius: 8px; resize: none; background-color: #f8f9fa;"></textarea>
      </div>
    </div>

    <section flex="1" overflow="auto" id="sound-clips">
    </section>

  </div>

  <!-- Footer Section -->
  <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
    <h3>Description</h3>
    <ul>
      <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
      <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
        <ul>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
        </ul>
    </ul>
    <h3>About This Demo</h3>
    <ul>
      <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
      <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
    </ul>
    <h3>Latest Update</h3>
    <ul>
      <li>Update UI.</li>
      <li>First working version.</li>
    </ul>

    <h3>Acknowledgement</h3>
    <ul>
      <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
    </ul>
  </div>

  <script src="sherpa-onnx-asr.js"></script>
  <script src="app-asr.js"></script>
  <script src="sherpa-onnx-wasm-main-asr.js"></script>
</body>


================================================
FILE: wasm/asr/sherpa-onnx-asr.js
================================================
function freeConfig(config, Module) {
  if ('buffer' in config) {
    Module._free(config.buffer);
  }

  if ('config' in config) {
    freeConfig(config.config, Module)
  }

  if ('transducer' in config) {
    freeConfig(config.transducer, Module)
  }

  if ('paraformer' in config) {
    freeConfig(config.paraformer, Module)
  }

  if ('zipformer2Ctc' in config) {
    freeConfig(config.zipformer2Ctc, Module)
  }

  if ('feat' in config) {
    freeConfig(config.feat, Module)
  }

  if ('model' in config) {
    freeConfig(config.model, Module)
  }

  if ('nemoCtc' in config) {
    freeConfig(config.nemoCtc, Module)
  }

  if ('toneCtc' in config) {
    freeConfig(config.toneCtc, Module)
  }

  if ('whisper' in config) {
    freeConfig(config.whisper, Module)
  }

  if ('fireRedAsr' in config) {
    freeConfig(config.fireRedAsr, Module)
  }

  if ('dolphin' in config) {
    freeConfig(config.dolphin, Module)
  }

  if ('zipformerCtc' in config) {
    freeConfig(config.zipformerCtc, Module)
  }

  if ('wenetCtc' in config) {
    freeConfig(config.wenetCtc, Module)
  }

  if ('omnilingual' in config) {
    freeConfig(config.omnilingual, Module)
  }

  if ('medasr' in config) {
    freeConfig(config.medasr, Module)
  }

  if ('fireRedAsrCtc' in config) {
    freeConfig(config.fireRedAsrCtc, Module)
  }

  if ('funasrNano' in config) {
    freeConfig(config.funasrNano, Module)
  }

  if ('moonshine' in config) {
    freeConfig(config.moonshine, Module)
  }

  if ('tdnn' in config) {
    freeConfig(config.tdnn, Module)
  }

  if ('senseVoice' in config) {
    freeConfig(config.senseVoice, Module)
  }

  if ('canary' in config) {
    freeConfig(config.canary, Module)
  }

  if ('lm' in config) {
    freeConfig(config.lm, Module)
  }

  if ('ctcFstDecoder' in config) {
    freeConfig(config.ctcFstDecoder, Module)
  }

  if ('hr' in config) {
    freeConfig(config.hr, Module)
  }

  Module._free(config.ptr);
}

// The user should free the returned pointers
function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1;

  const n = encoderLen + decoderLen + joinerLen;

  const buffer = Module._malloc(n);

  const len = 3 * 4;  // 3 pointers
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
  offset += decoderLen;

  Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen);

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += decoderLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;

  const n = encoderLen + decoderLen;
  const buffer = Module._malloc(n);

  const len = 2 * 4;  // 2 pointers
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;
  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;
  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOnlineToneCtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;
  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOnlineModelConfig(config, Module) {
  if (!('transducer' in config)) {
    config.transducer = {
      encoder: '',
      decoder: '',
      joiner: '',
    };
  }

  if (!('paraformer' in config)) {
    config.paraformer = {
      encoder: '',
      decoder: '',
    };
  }

  if (!('zipformer2Ctc' in config)) {
    config.zipformer2Ctc = {
      model: '',
    };
  }

  if (!('nemoCtc' in config)) {
    config.nemoCtc = {
      model: '',
    };
  }

  if (!('toneCtc' in config)) {
    config.toneCtc = {
      model: '',
    };
  }

  if (!('tokensBuf' in config)) {
    config.tokensBuf = '';
  }

  if (!('tokensBufSize' in config)) {
    config.tokensBufSize = 0;
  }

  const transducer =
      initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);

  const paraformer =
      initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module);

  const zipformer2Ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
      config.zipformer2Ctc, Module);

  const nemoCtc =
      initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module);

  const toneCtc =
      initSherpaOnnxOnlineToneCtcModelConfig(config.toneCtc, Module);

  const len = transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 +
      nemoCtc.len + toneCtc.len;

  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
  offset += transducer.len;

  Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
  offset += paraformer.len;

  Module._CopyHeap(zipformer2Ctc.ptr, zipformer2Ctc.len, ptr + offset);
  offset += zipformer2Ctc.len;

  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
  const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
  const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
  const tokensBufLen = Module.lengthBytesUTF8(config.tokensBuf || '') + 1;

  const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen +
      bpeVocabLen + tokensBufLen;
  const buffer = Module._malloc(bufferLen);

  offset = 0;
  Module.stringToUTF8(config.tokens || '', buffer, tokensLen);
  offset += tokensLen;

  Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
  offset += providerLen;

  Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
  offset += modelTypeLen;

  Module.stringToUTF8(
      config.modelingUnit || '', buffer + offset, modelingUnitLen);
  offset += modelingUnitLen;

  Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen);
  offset += bpeVocabLen;

  Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen);
  offset += tokensBufLen;

  offset = transducer.len + paraformer.len + zipformer2Ctc.len;
  Module.setValue(ptr + offset, buffer, 'i8*');  // tokens
  offset += 4;

  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, buffer + tokensLen, 'i8*');  // provider
  offset += 4;

  Module.setValue(ptr + offset, config.debug ?? 1, 'i32');
  offset += 4;

  Module.setValue(
      ptr + offset, buffer + tokensLen + providerLen, 'i8*');  // modelType
  offset += 4;

  Module.setValue(
      ptr + offset, buffer + tokensLen + providerLen + modelTypeLen,
      'i8*');  // modelingUnit
  offset += 4;

  Module.setValue(
      ptr + offset,
      buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen,
      'i8*');  // bpeVocab
  offset += 4;

  Module.setValue(
      ptr + offset,
      buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen +
          bpeVocabLen,
      'i8*');  // tokens_buf
  offset += 4;

  Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32');
  offset += 4;

  Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
  offset += nemoCtc.len;

  Module._CopyHeap(toneCtc.ptr, toneCtc.len, ptr + offset);
  offset += toneCtc.len;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    transducer: transducer,
    paraformer: paraformer,
    zipformer2Ctc: zipformer2Ctc,
    nemoCtc: nemoCtc,
    toneCtc: toneCtc,
  };
}

function initSherpaOnnxFeatureConfig(config, Module) {
  const len = 2 * 4;  // 2 pointers
  const ptr = Module._malloc(len);

  Module.setValue(ptr, config.sampleRate || 16000, 'i32');
  Module.setValue(ptr + 4, config.featureDim || 80, 'i32');
  return {ptr: ptr, len: len};
}

function initSherpaOnnxHomophoneReplacerConfig(config, Module) {
  const len = 3 * 4;
  const ptr = Module._malloc(len);

  const dictDir = '';

  const dictDirLen = Module.lengthBytesUTF8(dictDir) + 1;
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
  const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;

  const bufferLen = dictDirLen + lexiconLen + ruleFstsLen;

  const buffer = Module._malloc(bufferLen);
  let offset = 0
  Module.stringToUTF8(dictDir, buffer + offset, dictDirLen);
  offset += dictDirLen;

  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
  offset += lexiconLen;

  Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
  offset += ruleFstsLen;

  Module.setValue(ptr, buffer, 'i8*');
  Module.setValue(ptr + 4, buffer + dictDirLen, 'i8*');
  Module.setValue(ptr + 8, buffer + dictDirLen + lexiconLen, 'i8*');

  return {ptr: ptr, len: len, buffer: buffer};
}

function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
  const len = 2 * 4;
  const ptr = Module._malloc(len);

  const graphLen = Module.lengthBytesUTF8(config.graph || '') + 1;
  const buffer = Module._malloc(graphLen);
  Module.stringToUTF8(config.graph, buffer, graphLen);

  Module.setValue(ptr, buffer, 'i8*');
  Module.setValue(ptr + 4, config.maxActive || 3000, 'i32');
  return {ptr: ptr, len: len, buffer: buffer};
}

function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
  if (!('featConfig' in config)) {
    config.featConfig = {
      sampleRate: 16000,
      featureDim: 80,
    };
  }

  if (!('ctcFstDecoderConfig' in config)) {
    config.ctcFstDecoderConfig = {
      graph: '',
      maxActive: 3000,
    };
  }

  if (!('hotwordsBuf' in config)) {
    config.hotwordsBuf = '';
  }

  if (!('hotwordsBufSize' in config)) {
    config.hotwordsBufSize = 0;
  }

  if (!('hr' in config)) {
    config.hr = {
      lexicon: '',
      ruleFsts: '',
    };
  }

  const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
  const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
  const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
      config.ctcFstDecoderConfig, Module)
  const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);

  const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4 + hr.len;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
  offset += feat.len;

  Module._CopyHeap(model.ptr, model.len, ptr + offset);
  offset += model.len;

  const decodingMethodLen =
      Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1;
  const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1;
  const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
  const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
  const hotwordsBufLen = Module.lengthBytesUTF8(config.hotwordsBuf || '') + 1;
  const bufferLen = decodingMethodLen + hotwordsFileLen + ruleFstsFileLen +
      ruleFarsFileLen + hotwordsBufLen;
  const buffer = Module._malloc(bufferLen);

  offset = 0;
  Module.stringToUTF8(
      config.decodingMethod || 'greedy_search', buffer, decodingMethodLen);
  offset += decodingMethodLen;

  Module.stringToUTF8(
      config.hotwordsFile || '', buffer + offset, hotwordsFileLen);
  offset += hotwordsFileLen;

  Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen);
  offset += ruleFstsFileLen;

  Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen);
  offset += ruleFarsFileLen;

  Module.stringToUTF8(
      config.hotwordsBuf || '', buffer + offset, hotwordsBufLen);
  offset += hotwordsBufLen;

  offset = feat.len + model.len;
  Module.setValue(ptr + offset, buffer, 'i8*');  // decoding method
  offset += 4;

  Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.enableEndpoint || 0, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.rule1MinTrailingSilence || 2.4, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.rule2MinTrailingSilence || 1.2, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.rule3MinUtteranceLength || 20, 'float');
  offset += 4;

  Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float');
  offset += 4;

  Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
  offset += ctcFstDecoder.len;

  Module.setValue(
      ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
  offset += 4;

  Module.setValue(
      ptr + offset,
      buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.blankPenalty || 0, 'float');
  offset += 4;

  Module.setValue(
      ptr + offset,
      buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen +
          ruleFarsFileLen,
      'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.hotwordsBufSize || 0, 'i32');
  offset += 4;

  Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
  offset += hr.len;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    feat: feat,
    model: model,
    ctcFstDecoder: ctcFstDecoder,
    hr: hr,
  };
}

function createOnlineRecognizer(Module, myConfig) {
  const onlineTransducerModelConfig = {
    encoder: '',
    decoder: '',
    joiner: '',
  };

  const onlineParaformerModelConfig = {
    encoder: '',
    decoder: '',
  };

  const onlineZipformer2CtcModelConfig = {
    model: '',
  };

  const onlineNemoCtcModelConfig = {
    model: '',
  };

  const onlineToneCtcModelConfig = {
    model: '',
  };

  let type = 0;

  switch (type) {
    case 0:
      // transducer
      onlineTransducerModelConfig.encoder = './encoder.onnx';
      onlineTransducerModelConfig.decoder = './decoder.onnx';
      onlineTransducerModelConfig.joiner = './joiner.onnx';
      break;
    case 1:
      // paraformer
      onlineParaformerModelConfig.encoder = './encoder.onnx';
      onlineParaformerModelConfig.decoder = './decoder.onnx';
      break;
    case 2:
      // zipformer2Ctc
      onlineZipformer2CtcModelConfig.model = './encoder.onnx';
      break;
    case 3:
      // nemoCtc
      onlineNemoCtcModelConfig.model = './nemo-ctc.onnx';
      break;
    case 4:
      // toneCtc
      onlineToneCtcModelConfig.model = './tone-ctc.onnx';
      break;
  }


  const onlineModelConfig = {
    transducer: onlineTransducerModelConfig,
    paraformer: onlineParaformerModelConfig,
    zipformer2Ctc: onlineZipformer2CtcModelConfig,
    nemoCtc: onlineNemoCtcModelConfig,
    toneCtc: onlineToneCtcModelConfig,
    tokens: './tokens.txt',
    numThreads: 1,
    provider: 'cpu',
    debug: 1,
    modelType: '',
    modelingUnit: 'cjkchar',
    bpeVocab: '',
  };

  const featureConfig = {
    sampleRate: 16000,  // it is ignored when toneCtc is used
    featureDim: 80,     // it is ignored when toneCtc is used
  };

  let recognizerConfig = {
    featConfig: featureConfig,
    modelConfig: onlineModelConfig,
    decodingMethod: 'greedy_search',
    maxActivePaths: 4,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
    hotwordsFile: '',
    hotwordsScore: 1.5,
    ctcFstDecoderConfig: {
      graph: '',
      maxActive: 3000,
    },
    ruleFsts: '',
    ruleFars: '',
  };
  if (myConfig) {
    recognizerConfig = myConfig;
  }

  return new OnlineRecognizer(recognizerConfig, Module);
}

function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  const joinerLen = Module.lengthBytesUTF8(config.joiner || '') + 1;

  const n = encoderLen + decoderLen + joinerLen;

  const buffer = Module._malloc(n);

  const len = 3 * 4;  // 3 pointers
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
  offset += decoderLen;

  Module.stringToUTF8(config.joiner || '', buffer + offset, joinerLen);

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += decoderLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineParaformerModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;

  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;

  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineDolphinModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;

  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;

  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineWenetCtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;

  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineOmnilingualAsrCtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;

  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineMedAsrCtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;

  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineFireRedAsrCtcModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;

  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineFunAsrNanoModelConfig(config, Module) {
  const encoderAdaptorLen =
      Module.lengthBytesUTF8(config.encoderAdaptor || '') + 1;
  const llmLen = Module.lengthBytesUTF8(config.llm || '') + 1;
  const embeddingLen = Module.lengthBytesUTF8(config.embedding || '') + 1;
  const tokenizerLen = Module.lengthBytesUTF8(config.tokenizer || '') + 1;
  const systemPromptLen =
      Module.lengthBytesUTF8(
          config.systemPrompt || 'You are a helpful assistant.') +
      1;
  const userPromptLen =
      Module.lengthBytesUTF8(config.userPrompt || '语音转写：') + 1;
  const languageLen = Module.lengthBytesUTF8(config.language || '') + 1;
  const hotwordsLen = Module.lengthBytesUTF8(config.hotwords || '') + 1;

  const n = encoderAdaptorLen + llmLen + embeddingLen + tokenizerLen +
      systemPromptLen + userPromptLen + languageLen + hotwordsLen;

  const buffer = Module._malloc(n);

  const len = 13 * 4;  // 8 pointers + 3 int + 2 float
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(
      config.encoderAdaptor || '', buffer + offset, encoderAdaptorLen);
  offset += encoderAdaptorLen;

  Module.stringToUTF8(config.llm || '', buffer + offset, llmLen);
  offset += llmLen;

  Module.stringToUTF8(config.embedding || '', buffer + offset, embeddingLen);
  offset += embeddingLen;

  Module.stringToUTF8(config.tokenizer || '', buffer + offset, tokenizerLen);
  offset += tokenizerLen;

  Module.stringToUTF8(
      config.systemPrompt || 'You are a helpful assistant.', buffer + offset,
      systemPromptLen);
  offset += systemPromptLen;

  Module.stringToUTF8(
      config.userPrompt || '语音转写：', buffer + offset, userPromptLen);
  offset += userPromptLen;

  Module.stringToUTF8(config.language || '', buffer + offset, languageLen);
  offset += languageLen;

  Module.stringToUTF8(config.hotwords || '', buffer + offset, hotwordsLen);
  offset += hotwordsLen;

  offset = 0;
  Module.setValue(ptr + 0 * 4, buffer + offset, 'i8*');
  offset += encoderAdaptorLen;

  Module.setValue(ptr + 1 * 4, buffer + offset, 'i8*');
  offset += llmLen;

  Module.setValue(ptr + 2 * 4, buffer + offset, 'i8*');
  offset += embeddingLen;

  Module.setValue(ptr + 3 * 4, buffer + offset, 'i8*');
  offset += tokenizerLen;

  Module.setValue(ptr + 4 * 4, buffer + offset, 'i8*');
  offset += systemPromptLen;

  Module.setValue(ptr + 5 * 4, buffer + offset, 'i8*');
  offset += userPromptLen;

  Module.setValue(ptr + 6 * 4, config.maxNewTokens || 512, 'i32');
  Module.setValue(ptr + 7 * 4, config.temperature || 1e-6, 'float');
  Module.setValue(ptr + 8 * 4, config.topP || 0.8, 'float');
  Module.setValue(ptr + 9 * 4, config.seed || 42, 'i32');
  Module.setValue(ptr + 10 * 4, buffer + offset, 'i8*');
  offset += languageLen;
  Module.setValue(ptr + 11 * 4, config.itn || 0, 'i32');
  Module.setValue(ptr + 12 * 4, buffer + offset, 'i8*');
  offset += hotwordsLen;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  const languageLen = Module.lengthBytesUTF8(config.language || '') + 1;
  const taskLen = Module.lengthBytesUTF8(config.task || '') + 1;

  const n = encoderLen + decoderLen + languageLen + taskLen;
  const buffer = Module._malloc(n);

  const len = 7 * 4;  // 4 pointers + 3 int32
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
  offset += decoderLen;

  Module.stringToUTF8(config.language || '', buffer + offset, languageLen);
  offset += languageLen;

  Module.stringToUTF8(config.task || '', buffer + offset, taskLen);

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += decoderLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');
  offset += languageLen;

  Module.setValue(ptr + 12, buffer + offset, 'i8*');
  offset += taskLen;

  Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32');
  Module.setValue(ptr + 20, config.enableTokenTimestamps || 0, 'i32');
  Module.setValue(ptr + 24, config.enableSegmentTimestamps || 0, 'i32');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineCanaryModelConfig(config, Module) {
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  const srcLangLen = Module.lengthBytesUTF8(config.srcLang || '') + 1;
  const tgtLangLen = Module.lengthBytesUTF8(config.tgtLang || '') + 1;

  const n = encoderLen + decoderLen + srcLangLen + tgtLangLen;
  const buffer = Module._malloc(n);

  const len = 5 * 4;  // 4 pointers + 1 int32
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
  offset += decoderLen;

  Module.stringToUTF8(config.srcLang || '', buffer + offset, srcLangLen);
  offset += srcLangLen;

  Module.stringToUTF8(config.tgtLang || '', buffer + offset, tgtLangLen);
  offset += tgtLangLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += decoderLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');
  offset += srcLangLen;

  Module.setValue(ptr + 12, buffer + offset, 'i8*');
  offset += tgtLangLen;

  Module.setValue(ptr + 16, config.usePnc ?? 1, 'i32');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) {
  const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1;
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const uncachedDecoderLen =
      Module.lengthBytesUTF8(config.uncachedDecoder || '') + 1;
  const cachedDecoderLen =
      Module.lengthBytesUTF8(config.cachedDecoder || '') + 1;
  const mergedDecoderLen =
      Module.lengthBytesUTF8(config.mergedDecoder || '') + 1;

  const n = preprocessorLen + encoderLen + uncachedDecoderLen +
      cachedDecoderLen + mergedDecoderLen;
  const buffer = Module._malloc(n);

  const len = 5 * 4;  // 5 pointers
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(
      config.preprocessor || '', buffer + offset, preprocessorLen);
  offset += preprocessorLen;

  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(
      config.uncachedDecoder || '', buffer + offset, uncachedDecoderLen);
  offset += uncachedDecoderLen;

  Module.stringToUTF8(
      config.cachedDecoder || '', buffer + offset, cachedDecoderLen);
  offset += cachedDecoderLen;

  Module.stringToUTF8(
      config.mergedDecoder || '', buffer + offset, mergedDecoderLen);
  offset += mergedDecoderLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += preprocessorLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');
  offset += uncachedDecoderLen;

  Module.setValue(ptr + 12, buffer + offset, 'i8*');
  offset += cachedDecoderLen;

  Module.setValue(ptr + 16, buffer + offset, 'i8*');
  offset += mergedDecoderLen;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineFireRedAsrModelConfig(config, Module) {
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;

  const n = encoderLen + decoderLen;
  const buffer = Module._malloc(n);

  const len = 2 * 4;  // 2 pointers
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
  offset += decoderLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += decoderLen;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;
  const buffer = Module._malloc(n);

  const len = 1 * 4;  // 1 pointer
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);

  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineSenseVoiceModelConfig(config, Module) {
  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
  const languageLen = Module.lengthBytesUTF8(config.language || '') + 1;

  // useItn is a integer with 4 bytes
  const n = modelLen + languageLen;
  const buffer = Module._malloc(n);

  const len = 3 * 4;  // 2 pointers + 1 int
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
  offset += modelLen;

  Module.stringToUTF8(config.language || '', buffer + offset, languageLen);
  offset += languageLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += modelLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += languageLen;

  Module.setValue(ptr + 8, config.useInverseTextNormalization ?? 0, 'i32');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineLMConfig(config, Module) {
  const n = Module.lengthBytesUTF8(config.model || '') + 1;
  const buffer = Module._malloc(n);

  const len = 2 * 4;
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, n);
  Module.setValue(ptr, buffer, 'i8*');
  Module.setValue(ptr + 4, config.scale || 1, 'float');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineModelConfig(config, Module) {
  if (!('transducer' in config)) {
    config.transducer = {
      encoder: '',
      decoder: '',
      joiner: '',
    };
  }

  if (!('paraformer' in config)) {
    config.paraformer = {
      model: '',
    };
  }

  if (!('nemoCtc' in config)) {
    config.nemoCtc = {
      model: '',
    };
  }

  if (!('dolphin' in config)) {
    config.dolphin = {
      model: '',
    };
  }

  if (!('zipformerCtc' in config)) {
    config.zipformerCtc = {
      model: '',
    };
  }

  if (!('wenetCtc' in config)) {
    config.wenetCtc = {
      model: '',
    };
  }

  if (!('omnilingual' in config)) {
    config.omnilingual = {
      model: '',
    };
  }

  if (!('medasr' in config)) {
    config.medasr = {
      model: '',
    };
  }

  if (!('fireRedAsrCtc' in config)) {
    config.fireRedAsrCtc = {
      model: '',
    };
  }

  if (!('funasrNano' in config)) {
    config.funasrNano = {
      encoderAdaptor: '',
      llm: '',
      embedding: '',
      tokenizer: '',
      systemPrompt: 'You are a helpful assistant.',
      userPrompt: '语音转写：',
      maxNewTokens: 512,
      temperature: 1e-6,
      topP: 0.8,
      seed: 42,
      language: '',
      itn: 0,
      hotwords: '',
    };
  }

  if (!('whisper' in config)) {
    config.whisper = {
      encoder: '',
      decoder: '',
      language: '',
      task: '',
      tailPaddings: -1,
      enableTokenTimestamps: 0,
      enableSegmentTimestamps: 0,
    };
  }

  if (!('moonshine' in config)) {
    config.moonshine = {
      preprocessor: '',
      encoder: '',
      uncachedDecoder: '',
      cachedDecoder: '',
      mergedDecoder: '',
    };
  }

  if (!('fireRedAsr' in config)) {
    config.fireRedAsr = {
      encoder: '',
      decoder: '',
    };
  }

  if (!('tdnn' in config)) {
    config.tdnn = {
      model: '',
    };
  }

  if (!('senseVoice' in config)) {
    config.senseVoice = {
      model: '',
      language: '',
      useInverseTextNormalization: 0,
    };
  }

  if (!('canary' in config)) {
    config.canary = {
      encoder: '',
      decoder: '',
      srcLang: '',
      tgtLang: '',
      usePnc: 1,
    };
  }

  const transducer =
      initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module);

  const paraformer =
      initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module);

  const nemoCtc =
      initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module);

  const whisper =
      initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module);

  const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module);

  const senseVoice =
      initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module);

  const moonshine =
      initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module);

  const fireRedAsr =
      initSherpaOnnxOfflineFireRedAsrModelConfig(config.fireRedAsr, Module);

  const dolphin =
      initSherpaOnnxOfflineDolphinModelConfig(config.dolphin, Module);

  const zipformerCtc =
      initSherpaOnnxOfflineZipformerCtcModelConfig(config.zipformerCtc, Module);

  const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module);

  const wenetCtc =
      initSherpaOnnxOfflineWenetCtcModelConfig(config.wenetCtc, Module);

  const omnilingual = initSherpaOnnxOfflineOmnilingualAsrCtcModelConfig(
      config.omnilingual, Module);

  const medasr =
      initSherpaOnnxOfflineMedAsrCtcModelConfig(config.medasr, Module);

  const funasrNano =
      initSherpaOnnxOfflineFunAsrNanoModelConfig(config.funasrNano, Module);

  const fireRedAsrCtc = initSherpaOnnxOfflineFireRedAsrCtcModelConfig(
      config.fireRedAsrCtc, Module);

  const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
      tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
      dolphin.len + zipformerCtc.len + canary.len + wenetCtc.len +
      omnilingual.len + medasr.len + funasrNano.len + fireRedAsrCtc.len;

  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
  offset += transducer.len;

  Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
  offset += paraformer.len;

  Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
  offset += nemoCtc.len;

  Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset);
  offset += whisper.len;

  Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
  offset += tdnn.len;

  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
  const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
  const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
  const teleSpeechCtcLen =
      Module.lengthBytesUTF8(config.teleSpeechCtc || '') + 1;

  const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen +
      bpeVocabLen + teleSpeechCtcLen;

  const buffer = Module._malloc(bufferLen);

  offset = 0;
  Module.stringToUTF8(config.tokens, buffer, tokensLen);
  offset += tokensLen;

  Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
  offset += providerLen;

  Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
  offset += modelTypeLen;

  Module.stringToUTF8(
      config.modelingUnit || '', buffer + offset, modelingUnitLen);
  offset += modelingUnitLen;

  Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen);
  offset += bpeVocabLen;

  Module.stringToUTF8(
      config.teleSpeechCtc || '', buffer + offset, teleSpeechCtcLen);
  offset += teleSpeechCtcLen;

  offset =
      transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len;
  Module.setValue(ptr + offset, buffer, 'i8*');  // tokens
  offset += 4;

  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.debug ?? 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, buffer + tokensLen, 'i8*');  // provider
  offset += 4;

  Module.setValue(
      ptr + offset, buffer + tokensLen + providerLen, 'i8*');  // modelType
  offset += 4;

  Module.setValue(
      ptr + offset, buffer + tokensLen + providerLen + modelTypeLen,
      'i8*');  // modelingUnit
  offset += 4;

  Module.setValue(
      ptr + offset,
      buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen,
      'i8*');  // bpeVocab
  offset += 4;

  Module.setValue(
      ptr + offset,
      buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen +
          bpeVocabLen,
      'i8*');  // teleSpeechCtc
  offset += 4;

  Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset);
  offset += senseVoice.len;

  Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset);
  offset += moonshine.len;

  Module._CopyHeap(fireRedAsr.ptr, fireRedAsr.len, ptr + offset);
  offset += fireRedAsr.len;

  Module._CopyHeap(dolphin.ptr, dolphin.len, ptr + offset);
  offset += dolphin.len;

  Module._CopyHeap(zipformerCtc.ptr, zipformerCtc.len, ptr + offset);
  offset += zipformerCtc.len;

  Module._CopyHeap(canary.ptr, canary.len, ptr + offset);
  offset += canary.len;

  Module._CopyHeap(wenetCtc.ptr, wenetCtc.len, ptr + offset);
  offset += wenetCtc.len;

  Module._CopyHeap(omnilingual.ptr, omnilingual.len, ptr + offset);
  offset += omnilingual.len;

  Module._CopyHeap(medasr.ptr, medasr.len, ptr + offset);
  offset += medasr.len;

  Module._CopyHeap(funasrNano.ptr, funasrNano.len, ptr + offset);
  offset += funasrNano.len;

  Module._CopyHeap(fireRedAsrCtc.ptr, fireRedAsrCtc.len, ptr + offset);
  offset += fireRedAsrCtc.len;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    transducer: transducer,
    paraformer: paraformer,
    nemoCtc: nemoCtc,
    whisper: whisper,
    tdnn: tdnn,
    senseVoice: senseVoice,
    moonshine: moonshine,
    fireRedAsr: fireRedAsr,
    dolphin: dolphin,
    zipformerCtc: zipformerCtc,
    canary: canary,
    wenetCtc: wenetCtc,
    omnilingual: omnilingual,
    medasr: medasr,
    funasrNano: funasrNano,
    fireRedAsrCtc: fireRedAsrCtc
  };
}

function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
  if (!('featConfig' in config)) {
    config.featConfig = {
      sampleRate: 16000,
      featureDim: 80,
    };
  }

  if (!('lmConfig' in config)) {
    config.lmConfig = {
      model: '',
      scale: 1.0,
    };
  }

  if (!('hr' in config)) {
    config.hr = {
      lexicon: '',
      ruleFsts: '',
    };
  }

  const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
  const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
  const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
  const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);

  const len = feat.len + model.len + lm.len + 7 * 4 + hr.len;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
  offset += feat.len;

  Module._CopyHeap(model.ptr, model.len, ptr + offset);
  offset += model.len;

  Module._CopyHeap(lm.ptr, lm.len, ptr + offset);
  offset += lm.len;

  const decodingMethodLen =
      Module.lengthBytesUTF8(config.decodingMethod || 'greedy_search') + 1;
  const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile || '') + 1;
  const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
  const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
  const bufferLen =
      decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen;
  const buffer = Module._malloc(bufferLen);

  offset = 0;
  Module.stringToUTF8(
      config.decodingMethod || 'greedy_search', buffer, decodingMethodLen);
  offset += decodingMethodLen;

  Module.stringToUTF8(
      config.hotwordsFile || '', buffer + offset, hotwordsFileLen);
  offset += hotwordsFileLen;

  Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
  offset += ruleFstsLen;

  Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen);
  offset += ruleFarsLen;

  offset = feat.len + model.len + lm.len;

  Module.setValue(ptr + offset, buffer, 'i8*');  // decoding method
  offset += 4;

  Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.hotwordsScore || 1.5, 'float');
  offset += 4;

  Module.setValue(
      ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
  offset += 4;

  Module.setValue(
      ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen,
      'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.blankPenalty || 0, 'float');
  offset += 4;

  Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
  offset += hr.len;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    feat: feat,
    model: model,
    lm: lm,
    hr: hr,
  };
}

class OfflineStream {
  constructor(handle, Module) {
    this.handle = handle;
    this.Module = Module;
  }

  free() {
    if (this.handle) {
      this.Module._SherpaOnnxDestroyOfflineStream(this.handle);
      this.handle = null;
    }
  }

  /**
   * @param sampleRate {Number}
   * @param samples {Float32Array} Containing samples in the range [-1, 1]
   */
  acceptWaveform(sampleRate, samples) {
    const pointer =
        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
    this.Module._SherpaOnnxAcceptWaveformOffline(
        this.handle, sampleRate, pointer, samples.length);
    this.Module._free(pointer);
  }

  /**
   * @param key {String} The option name
   * @param value {String} The option value
   */
  setOption(key, value) {
    const keyLen = this.Module.lengthBytesUTF8(key) + 1;
    const valueLen = this.Module.lengthBytesUTF8(value) + 1;
    const pKey = this.Module._malloc(keyLen);
    const pValue = this.Module._malloc(valueLen);
    this.Module.stringToUTF8(key, pKey, keyLen);
    this.Module.stringToUTF8(value, pValue, valueLen);
    this.Module._SherpaOnnxOfflineStreamSetOption(this.handle, pKey, pValue);
    this.Module._free(pKey);
    this.Module._free(pValue);
  }

  /**
   * @param key {String} The option name
   * @returns {String} The option value, or empty string if not set
   */
  getOption(key) {
    const keyLen = this.Module.lengthBytesUTF8(key) + 1;
    const pKey = this.Module._malloc(keyLen);
    this.Module.stringToUTF8(key, pKey, keyLen);
    const pValue = this.Module._SherpaOnnxOfflineStreamGetOption(this.handle, pKey);
    const value = this.Module.UTF8ToString(pValue);
    this.Module._free(pKey);
    return value;
  }
};

class OfflineRecognizer {
  constructor(configObj, Module) {
    this.config = configObj;
    const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module);
    const handle = Module._SherpaOnnxCreateOfflineRecognizer(config.ptr);
    freeConfig(config, Module);

    this.handle = handle;
    this.Module = Module;
  }

  setConfig(configObj) {
    const config =
        initSherpaOnnxOfflineRecognizerConfig(configObj, this.Module);
    this.Module._SherpaOnnxOfflineRecognizerSetConfig(this.handle, config.ptr);
    freeConfig(config, this.Module);
  }

  free() {
    this.Module._SherpaOnnxDestroyOfflineRecognizer(this.handle);
    this.handle = 0
  }

  createStream() {
    const handle = this.Module._SherpaOnnxCreateOfflineStream(this.handle);
    return new OfflineStream(handle, this.Module);
  }

  decode(stream) {
    this.Module._SherpaOnnxDecodeOfflineStream(this.handle, stream.handle);
  }

  getResult(stream) {
    const r =
        this.Module._SherpaOnnxGetOfflineStreamResultAsJson(stream.handle);
    const jsonStr = this.Module.UTF8ToString(r);
    const ans = JSON.parse(jsonStr);
    this.Module._SherpaOnnxDestroyOfflineStreamResultJson(r);

    return ans;
  }
};

class OnlineStream {
  constructor(handle, Module) {
    this.handle = handle;
    this.pointer = null;  // buffer
    this.n = 0;           // buffer size
    this.Module = Module;
  }

  free() {
    if (this.handle) {
      this.Module._SherpaOnnxDestroyOnlineStream(this.handle);
      this.handle = null;
      this.Module._free(this.pointer);
      this.pointer = null;
      this.n = 0;
    }
  }

  /**
   * @param sampleRate {Number}
   * @param samples {Float32Array} Containing samples in the range [-1, 1]
   */
  acceptWaveform(sampleRate, samples) {
    if (this.n < samples.length) {
      this.Module._free(this.pointer);
      this.pointer =
          this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
      this.n = samples.length
    }

    this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
    this.Module._SherpaOnnxOnlineStreamAcceptWaveform(
        this.handle, sampleRate, this.pointer, samples.length);
  }

  inputFinished() {
    this.Module._SherpaOnnxOnlineStreamInputFinished(this.handle);
  }

  /**
   * @param key {String} The option name
   * @param value {String} The option value
   */
  setOption(key, value) {
    const keyLen = this.Module.lengthBytesUTF8(key) + 1;
    const valueLen = this.Module.lengthBytesUTF8(value) + 1;
    const pKey = this.Module._malloc(keyLen);
    const pValue = this.Module._malloc(valueLen);
    this.Module.stringToUTF8(key, pKey, keyLen);
    this.Module.stringToUTF8(value, pValue, valueLen);
    this.Module._SherpaOnnxOnlineStreamSetOption(this.handle, pKey, pValue);
    this.Module._free(pKey);
    this.Module._free(pValue);
  }

  /**
   * @param key {String} The option name
   * @returns {String} The option value, or empty string if not set
   */
  getOption(key) {
    const keyLen = this.Module.lengthBytesUTF8(key) + 1;
    const pKey = this.Module._malloc(keyLen);
    this.Module.stringToUTF8(key, pKey, keyLen);
    const pValue = this.Module._SherpaOnnxOnlineStreamGetOption(this.handle, pKey);
    const value = this.Module.UTF8ToString(pValue);
    this.Module._free(pKey);
    return value;
  }
};

class OnlineRecognizer {
  constructor(configObj, Module) {
    this.config = configObj;
    const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module)
    const handle = Module._SherpaOnnxCreateOnlineRecognizer(config.ptr);

    freeConfig(config, Module);

    this.handle = handle;
    this.Module = Module;
  }

  free() {
    this.Module._SherpaOnnxDestroyOnlineRecognizer(this.handle);
    this.handle = 0
  }

  createStream() {
    const handle = this.Module._SherpaOnnxCreateOnlineStream(this.handle);
    return new OnlineStream(handle, this.Module);
  }

  isReady(stream) {
    return this.Module._SherpaOnnxIsOnlineStreamReady(
               this.handle, stream.handle) == 1;
  }

  decode(stream) {
    this.Module._SherpaOnnxDecodeOnlineStream(this.handle, stream.handle);
  }

  isEndpoint(stream) {
    return this.Module._SherpaOnnxOnlineStreamIsEndpoint(
               this.handle, stream.handle) == 1;
  }

  reset(stream) {
    this.Module._SherpaOnnxOnlineStreamReset(this.handle, stream.handle);
  }

  getResult(stream) {
    const r = this.Module._SherpaOnnxGetOnlineStreamResultAsJson(
        this.handle, stream.handle);
    const jsonStr = this.Module.UTF8ToString(r);
    const ans = JSON.parse(jsonStr);
    this.Module._SherpaOnnxDestroyOnlineStreamResultJson(r);

    return ans;
  }
}

if (typeof process == 'object' && typeof process.versions == 'object' &&
    typeof process.versions.node == 'string') {
  module.exports = {
    createOnlineRecognizer,
    OfflineRecognizer,
  };
}


================================================
FILE: wasm/asr/sherpa-onnx-wasm-main-asr.cc
================================================
// wasm/sherpa-onnx-wasm-main-asr.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

// see also
// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html

extern "C" {

static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineNemoCtcModelConfig) == 1 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineModelConfig) ==
                  sizeof(SherpaOnnxOnlineTransducerModelConfig) +
                      sizeof(SherpaOnnxOnlineParaformerModelConfig) +
                      sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 +
                      sizeof(SherpaOnnxOnlineNemoCtcModelConfig) +
                      sizeof(SherpaOnnxOnlineToneCtcModelConfig),
              "");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
                  sizeof(SherpaOnnxFeatureConfig) +
                      sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
                      sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4 +
                      sizeof(SherpaOnnxHomophoneReplacerConfig),
              "");

void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
  auto model_config = &config->model_config;
  auto feat = &config->feat_config;
  auto transducer_model_config = &model_config->transducer;
  auto paraformer_model_config = &model_config->paraformer;
  auto ctc_model_config = &model_config->zipformer2_ctc;
  auto nemo_ctc = &model_config->nemo_ctc;
  auto t_one_ctc = &model_config->t_one_ctc;

  fprintf(stdout, "----------online transducer model config----------\n");
  fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder);
  fprintf(stdout, "decoder: %s\n", transducer_model_config->decoder);
  fprintf(stdout, "joiner: %s\n", transducer_model_config->joiner);

  fprintf(stdout, "----------online parformer model config----------\n");
  fprintf(stdout, "encoder: %s\n", paraformer_model_config->encoder);
  fprintf(stdout, "decoder: %s\n", paraformer_model_config->decoder);

  fprintf(stdout, "----------online zipformer2 ctc model config----------\n");
  fprintf(stdout, "model: %s\n", ctc_model_config->model);

  fprintf(stdout, "----------online nemo ctc model config----------\n");
  fprintf(stdout, "model: %s\n", nemo_ctc->model);

  fprintf(stdout, "----------online t-one ctc model config----------\n");
  fprintf(stdout, "model: %s\n", t_one_ctc->model);

  fprintf(stdout, "tokens: %s\n", model_config->tokens);
  fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
  fprintf(stdout, "provider: %s\n", model_config->provider);
  fprintf(stdout, "debug: %d\n", model_config->debug);
  fprintf(stdout, "model type: %s\n", model_config->model_type);
  fprintf(stdout, "modeling unit: %s\n", model_config->modeling_unit);
  fprintf(stdout, "bpe vocab: %s\n", model_config->bpe_vocab);
  fprintf(stdout, "tokens_buf: %s\n",
          model_config->tokens_buf ? model_config->tokens_buf : "");
  fprintf(stdout, "tokens_buf_size: %d\n", model_config->tokens_buf_size);

  fprintf(stdout, "----------feat config----------\n");
  fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
  fprintf(stdout, "feat dim: %d\n", feat->feature_dim);

  fprintf(stdout, "----------recognizer config----------\n");
  fprintf(stdout, "decoding method: %s\n", config->decoding_method);
  fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
  fprintf(stdout, "enable_endpoint: %d\n", config->enable_endpoint);
  fprintf(stdout, "rule1_min_trailing_silence: %.2f\n",
          config->rule1_min_trailing_silence);
  fprintf(stdout, "rule2_min_trailing_silence: %.2f\n",
          config->rule2_min_trailing_silence);
  fprintf(stdout, "rule3_min_utterance_length: %.2f\n",
          config->rule3_min_utterance_length);
  fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
  fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
  fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
  fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
  fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty);

  fprintf(stdout, "----------ctc fst decoder config----------\n");
  fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
  fprintf(stdout, "max_active: %d\n",
          config->ctc_fst_decoder_config.max_active);

  fprintf(stdout, "----------hr config----------\n");
  fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir);
  fprintf(stdout, "lexicon: %s\n", config->hr.lexicon);
  fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}


================================================
FILE: wasm/kws/CMakeLists.txt
================================================
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
    message(FATAL_ERROR "Please use ./build-wasm-simd-kws.sh to build for wasm KWS")
endif()

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder-epoch-12-avg-2-chunk-16-left-64.onnx")
    message(WARNING "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder-epoch-12-avg-2-chunk-16-left-64.onnx does not exist")
    message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
endif()

set(exported_functions
  SherpaOnnxCreateKeywordSpotter
  SherpaOnnxCreateKeywordStream
  SherpaOnnxDecodeKeywordStream
  SherpaOnnxDestroyKeywordResult
  SherpaOnnxDestroyKeywordSpotter
  SherpaOnnxGetKeywordResult
  SherpaOnnxIsKeywordStreamReady
  SherpaOnnxOnlineStreamAcceptWaveform
  SherpaOnnxOnlineStreamGetOption
  SherpaOnnxOnlineStreamInputFinished
  SherpaOnnxOnlineStreamSetOption
  SherpaOnnxResetKeywordStream
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
    list(APPEND mangled_exported_functions "_${x}")
endforeach()

list(JOIN mangled_exported_functions "," all_exported_functions)

include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS "-s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ")
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','HEAPU8','HEAP16','HEAP32','HEAPU32','HEAPF32','HEAPF64'] ")

message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${MY_FLAGS}")

add_executable(sherpa-onnx-wasm-kws-main sherpa-onnx-wasm-main-kws.cc)
target_link_libraries(sherpa-onnx-wasm-kws-main sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-kws-main DESTINATION bin/wasm)

install(
        FILES
        "sherpa-onnx-kws.js"
        "app.js"
        "index.html"
        "$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.js"
        "$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.wasm"
        "$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.data"
        DESTINATION
        bin/wasm
)


================================================
FILE: wasm/kws/app.js
================================================
// This file copies and modifies code
// from https://mdn.github.io/web-dictaphone/scripts/app.js
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e

const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const clearBtn = document.getElementById('clearBtn');
const hint = document.getElementById('hint');
const soundClips = document.getElementById('sound-clips');

let textArea = document.getElementById('results');

let lastResult = '';
let resultList = [];

clearBtn.onclick = function() {
  resultList = [];
  textArea.value = getDisplayResult();
  textArea.scrollTop = textArea.scrollHeight;  // auto scroll
};

function getDisplayResult() {
  let i = 0;
  let ans = '';
  for (let s in resultList) {
    if (resultList[s] == '') {
      continue;
    }

    ans += '' + i + ': ' + resultList[s] + '\n';
    i += 1;
  }

  return ans;
}


Module = {};
Module.onRuntimeInitialized = function() {
  console.log('inited!');
  hint.innerText = 'Model loaded! Please click start';

  startBtn.disabled = false;

  recognizer = createKws(Module);
  console.log('recognizer is created!', recognizer);
};

let audioCtx;
let mediaStream;

let expectedSampleRate = 16000;
let recordSampleRate;  // the sampleRate of the microphone
let recorder = null;   // the microphone
let leftchannel = [];  // TODO: Use a single channel

let recordingLength = 0;  // number of samples so far

let recognizer = null;
let recognizer_stream = null;

if (navigator.mediaDevices.getUserMedia) {
  console.log('getUserMedia supported.');

  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
  const constraints = {audio: true};

  let onSuccess = function(stream) {
    if (!audioCtx) {
      audioCtx = new AudioContext({sampleRate: 16000});
    }
    console.log(audioCtx);
    recordSampleRate = audioCtx.sampleRate;
    console.log('sample rate ' + recordSampleRate);

    // creates an audio node from the microphone incoming stream
    mediaStream = audioCtx.createMediaStreamSource(stream);
    console.log('media stream', mediaStream);

    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
    // bufferSize: the onaudioprocess event is called when the buffer is full
    var bufferSize = 4096;
    var numberOfInputChannels = 1;
    var numberOfOutputChannels = 2;
    if (audioCtx.createScriptProcessor) {
      recorder = audioCtx.createScriptProcessor(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    } else {
      recorder = audioCtx.createJavaScriptNode(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    }
    console.log('recorder', recorder);

    recorder.onaudioprocess = function(e) {
      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
      samples = downsampleBuffer(samples, expectedSampleRate);

      if (recognizer_stream == null) {
        recognizer_stream = recognizer.createStream();
      }

      recognizer_stream.acceptWaveform(expectedSampleRate, samples);
      while (recognizer.isReady(recognizer_stream)) {
        recognizer.decode(recognizer_stream);

        let result = recognizer.getResult(recognizer_stream);

        if (result.keyword.length > 0) {
          console.log(result)
          lastResult = result;
          resultList.push(JSON.stringify(result));

          // remember to reset the stream right after detecting a keyword
          recognizer.reset(recognizer_stream);
        }
      }


      textArea.value = getDisplayResult();
      textArea.scrollTop = textArea.scrollHeight;  // auto scroll

      let buf = new Int16Array(samples.length);
      for (var i = 0; i < samples.length; ++i) {
        let s = samples[i];
        if (s >= 1)
          s = 1;
        else if (s <= -1)
          s = -1;

        samples[i] = s;
        buf[i] = s * 32767;
      }

      leftchannel.push(buf);
      recordingLength += bufferSize;
    };

    startBtn.onclick = function() {
      mediaStream.connect(recorder);
      recorder.connect(audioCtx.destination);

      console.log('recorder started');

      stopBtn.disabled = false;
      startBtn.disabled = true;
    };

    stopBtn.onclick = function() {
      console.log('recorder stopped');

      // stopBtn recording
      recorder.disconnect(audioCtx.destination);
      mediaStream.disconnect(recorder);

      startBtn.style.background = '';
      startBtn.style.color = '';
      // mediaRecorder.requestData();

      stopBtn.disabled = true;
      startBtn.disabled = false;

      var clipName = new Date().toISOString();

      const clipContainer = document.createElement('article');
      const clipLabel = document.createElement('p');
      const audio = document.createElement('audio');
      const deleteButton = document.createElement('button');
      clipContainer.classList.add('clip');
      audio.setAttribute('controls', '');
      deleteButton.textContent = 'Delete';
      deleteButton.className = 'delete';

      clipLabel.textContent = clipName;

      clipContainer.appendChild(audio);

      clipContainer.appendChild(clipLabel);
      clipContainer.appendChild(deleteButton);
      soundClips.appendChild(clipContainer);

      audio.controls = true;
      let samples = flatten(leftchannel);
      const blob = toWav(samples);

      leftchannel = [];
      const audioURL = window.URL.createObjectURL(blob);
      audio.src = audioURL;
      console.log('recorder stopped');

      deleteButton.onclick = function(e) {
        let evtTgt = e.target;
        evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
      };

      clipLabel.onclick = function() {
        const existingName = clipLabel.textContent;
        const newClipName = prompt('Enter a new name for your sound clip?');
        if (newClipName === null) {
          clipLabel.textContent = existingName;
        } else {
          clipLabel.textContent = newClipName;
        }
      };
    };
  };

  let onError = function(err) {
    console.log('The following error occurred: ' + err);
  };

  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
  console.log('getUserMedia not supported on your browser!');
  alert('getUserMedia not supported on your browser!');
}


// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function flatten(listOfSamples) {
  let n = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    n += listOfSamples[i].length;
  }
  let ans = new Int16Array(n);

  let offset = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    ans.set(listOfSamples[i], offset);
    offset += listOfSamples[i].length;
  }
  return ans;
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples) {
  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, expectedSampleRate, true);  // sampleRate
  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
  view.setUint16(32, 2, true);                       // blockAlign
  view.setUint16(34, 16, true);                      // bitsPerSample
  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);      // subchunk2Size

  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }

  return new Blob([view], {type: 'audio/wav'});
}

// this function is copied from
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
function downsampleBuffer(buffer, exportSampleRate) {
  if (exportSampleRate === recordSampleRate) {
    return buffer;
  }
  var sampleRateRatio = recordSampleRate / exportSampleRate;
  var newLength = Math.round(buffer.length / sampleRateRatio);
  var result = new Float32Array(newLength);
  var offsetResult = 0;
  var offsetBuffer = 0;
  while (offsetResult < result.length) {
    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
    var accum = 0, count = 0;
    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
      accum += buffer[i];
      count++;
    }
    result[offsetResult] = accum / count;
    offsetResult++;
    offsetBuffer = nextOffsetBuffer;
  }
  return result;
};


================================================
FILE: wasm/kws/assets/README.md
================================================
# Introduction

Please refer to
https://www.modelscope.cn/models/pkufool/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/summary
to download a model.

# Kws

The following is an example:
```bash
cd sherpa-onnx/wasm/kws/assets
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2

mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx ./
mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx ./
mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx ./
mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ./
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
```

You should have the following files in `assets` before you can run
`build-wasm-simd-kws.sh`

```bash
fangjuns-MacBook-Pro:assets fangjun$ pwd
/Users/fangjun/open-source/sherpa-onnx/wasm/kws/assets

fangjuns-MacBook-Pro:assets fangjun$ ls -lh
total 25616
-rw-r--r--  1 fangjun  staff   692B Oct 29 16:53 README.md
-rw-r--r--  1 fangjun  staff   660K Aug 14 15:21 decoder-epoch-12-avg-2-chunk-16-left-64.onnx
-rw-r--r--  1 fangjun  staff    12M Aug 14 15:21 encoder-epoch-12-avg-2-chunk-16-left-64.onnx
-rw-r--r--  1 fangjun  staff   247K Aug 14 15:21 joiner-epoch-12-avg-2-chunk-16-left-64.onnx
-rw-r--r--  1 fangjun  staff   1.6K Aug 14 15:08 tokens.txt
```

**Hint**: Remember to remove extra files from ``assets``. For instance, please remember to remove
the file `sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2`.


================================================
FILE: wasm/kws/index.html
================================================
<html lang="en">

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for kws</title>
  <style>
    h1,div {
      text-align: center;
    }
    textarea {
      width:100%;
    }
  </style>
</head>

<body>
  <h1>
    WebAssembly<br/>
    Kws Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a>
  </h1>
  <div>
    <span id="hint">Loading model ... ...</span>
    <br/>
    <br/>
    <button id="startBtn" disabled>Start</button>
    <button id="stopBtn" disabled>Stop</button>
    <button id="clearBtn">Clear</button>
    <br/>
    <br/>
    <textarea id="results" rows="10" readonly></textarea>
  </div>

  <section flex="1" overflow="auto" id="sound-clips">
  </section>

  <script src="sherpa-onnx-kws.js"></script>
  <script src="app.js"></script>
  <script src="sherpa-onnx-wasm-kws-main.js"></script>
</body>

================================================
FILE: wasm/kws/sherpa-onnx-kws.js
================================================


function freeConfig(config, Module) {
  if ('buffer' in config) {
    Module._free(config.buffer);
  }

  if ('transducer' in config) {
    freeConfig(config.transducer, Module);
  }

  if ('featConfig' in config) {
    freeConfig(config.featConfig, Module);
  }

  if ('modelConfig' in config) {
    freeConfig(config.modelConfig, Module);
  }

  if ('keywordsBuffer' in config) {
    Module._free(config.keywordsBuffer);
  }

  Module._free(config.ptr);
}


function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
  const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
  const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;

  const n = encoderLen + decoderLen + joinerLen;

  const buffer = Module._malloc(n);

  const len = 3 * 4;  // 3 pointers
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
  offset += decoderLen;

  Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += decoderLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

// The user should free the returned pointers
function initModelConfig(config, Module) {
  if (!('tokensBuf' in config)) {
    config.tokensBuf = '';
  }

  if (!('tokensBufSize' in config)) {
    config.tokensBufSize = 0;
  }

  const transducer =
      initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);
  const paraformer_len = 2 * 4
  const zipfomer2_ctc_len = 1 * 4
  const nemo_ctc_len = 1 * 4
  const t_one_ctc_len = 1 * 4

  const len = transducer.len + paraformer_len + zipfomer2_ctc_len + 9 * 4 +
      nemo_ctc_len + t_one_ctc_len;

  const ptr = Module._malloc(len);
  Module.HEAPU8.fill(0, ptr, ptr + len);

  let offset = 0;
  Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);

  const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
  const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
  const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
  const tokensBufLen = Module.lengthBytesUTF8(config.tokensBuf || '') + 1;
  const bufferLen = tokensLen + providerLen + modelTypeLen + modelingUnitLen +
      bpeVocabLen + tokensBufLen;
  const buffer = Module._malloc(bufferLen);

  offset = 0;
  Module.stringToUTF8(config.tokens, buffer, tokensLen);
  offset += tokensLen;

  Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
  offset += providerLen;

  Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
  offset += modelTypeLen;

  Module.stringToUTF8(
      config.modelingUnit || '', buffer + offset, modelingUnitLen);
  offset += modelingUnitLen;

  Module.stringToUTF8(config.bpeVocab || '', buffer + offset, bpeVocabLen);
  offset += bpeVocabLen;

  Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen);
  offset += tokensBufLen;

  offset = transducer.len + paraformer_len + zipfomer2_ctc_len;
  Module.setValue(ptr + offset, buffer, 'i8*');  // tokens
  offset += 4;

  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, buffer + tokensLen, 'i8*');  // provider
  offset += 4;

  Module.setValue(ptr + offset, config.debug, 'i32');
  offset += 4;

  Module.setValue(
      ptr + offset, buffer + tokensLen + providerLen, 'i8*');  // modelType
  offset += 4;

  Module.setValue(
      ptr + offset, buffer + tokensLen + providerLen + modelTypeLen,
      'i8*');  // modelingUnit
  offset += 4;

  Module.setValue(
      ptr + offset,
      buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen,
      'i8*');  // bpeVocab
  offset += 4;

  Module.setValue(
      ptr + offset,
      buffer + tokensLen + providerLen + modelTypeLen + modelingUnitLen +
          bpeVocabLen,
      'i8*');  // tokens_buf
  offset += 4;

  Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32');
  offset += 4;
  // skip nemo_ctc and t_one_ctc

  return {buffer: buffer, ptr: ptr, len: len, transducer: transducer};
}

function initFeatureExtractorConfig(config, Module) {
  let ptr = Module._malloc(4 * 2);
  Module.setValue(ptr, config.samplingRate || 16000, 'i32');
  Module.setValue(ptr + 4, config.featureDim || 80, 'i32');
  return {
    ptr: ptr,
    len: 8,
  };
}

function initKwsConfig(config, Module) {
  if (!('featConfig' in config)) {
    config.featConfig = {
      sampleRate: 16000,
      featureDim: 80,
    };
  }

  if (!('keywordsBuf' in config)) {
    config.keywordsBuf = '';
  }

  if (!('keywordsBufSize' in config)) {
    config.keywordsBufSize = 0;
  }

  let featConfig = initFeatureExtractorConfig(config.featConfig, Module);

  let modelConfig = initModelConfig(config.modelConfig, Module);
  let numBytes = featConfig.len + modelConfig.len + 4 * 7;

  let ptr = Module._malloc(numBytes);
  let offset = 0;
  Module._CopyHeap(featConfig.ptr, featConfig.len, ptr + offset);
  offset += featConfig.len;

  Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset)
  offset += modelConfig.len;

  Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.numTrailingBlanks || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.keywordsScore || 1.0, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.keywordsThreshold || 0.25, 'float');
  offset += 4;

  let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1;
  let keywordsBufLen = Module.lengthBytesUTF8(config.keywordsBuf) + 1;

  let keywordsBuffer = Module._malloc(keywordsLen + keywordsBufLen);
  Module.stringToUTF8(config.keywords, keywordsBuffer, keywordsLen);
  Module.stringToUTF8(
      config.keywordsBuf, keywordsBuffer + keywordsLen, keywordsBufLen);

  Module.setValue(ptr + offset, keywordsBuffer, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, keywordsBuffer + keywordsLen, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.keywordsBufSize, 'i32');
  offset += 4;

  return {
    ptr: ptr,
    len: numBytes,
    featConfig: featConfig,
    modelConfig: modelConfig,
    keywordsBuffer: keywordsBuffer
  };
}

class Stream {
  constructor(handle, Module) {
    this.handle = handle;
    this.pointer = null;
    this.n = 0;
    this.Module = Module;
  }

  free() {
    if (this.handle) {
      this.Module._SherpaOnnxDestroyOnlineStream(this.handle);
      this.handle = null;
      this.Module._free(this.pointer);
      this.pointer = null;
      this.n = 0;
    }
  }

  /**
   * @param sampleRate {Number}
   * @param samples {Float32Array} Containing samples in the range [-1, 1]
   */
  acceptWaveform(sampleRate, samples) {
    if (this.n < samples.length) {
      this.Module._free(this.pointer);
      this.pointer =
          this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
      this.n = samples.length
    }

    this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
    this.Module._SherpaOnnxOnlineStreamAcceptWaveform(
        this.handle, sampleRate, this.pointer, samples.length);
  }

  inputFinished() {
    this.Module._SherpaOnnxOnlineStreamInputFinished(this.handle);
  }
};

class Kws {
  constructor(configObj, Module) {
    this.config = configObj;
    let config = initKwsConfig(configObj, Module)
    let handle = Module._SherpaOnnxCreateKeywordSpotter(config.ptr);

    freeConfig(config, Module);

    this.handle = handle;
    this.Module = Module;
  }

  free() {
    this.Module._SherpaOnnxDestroyKeywordSpotter(this.handle);
    this.handle = 0
  }

  createStream() {
    let handle = this.Module._SherpaOnnxCreateKeywordStream(this.handle);
    return new Stream(handle, this.Module);
  }

  isReady(stream) {
    return this.Module._SherpaOnnxIsKeywordStreamReady(
               this.handle, stream.handle) == 1;
  }

  decode(stream) {
    this.Module._SherpaOnnxDecodeKeywordStream(this.handle, stream.handle);
  }

  reset(stream) {
    this.Module._SherpaOnnxResetKeywordStream(this.handle, stream.handle);
  }

  getResult(stream) {
    let r = this.Module._SherpaOnnxGetKeywordResult(this.handle, stream.handle);
    let jsonPtr = this.Module.getValue(r + 24, 'i8*');
    let json = this.Module.UTF8ToString(jsonPtr);
    this.Module._SherpaOnnxDestroyKeywordResult(r);
    return JSON.parse(json);
  }
}

function createKws(Module, myConfig) {
  let transducerConfig = {
    encoder: './encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
    decoder: './decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
    joiner: './joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
  };
  let modelConfig = {
    transducer: transducerConfig,
    tokens: './tokens.txt',
    provider: 'cpu',
    modelType: '',
    numThreads: 1,
    debug: 1,
    modelingUnit: 'cjkchar',
    bpeVocab: '',
  };

  let featConfig = {
    samplingRate: 16000,
    featureDim: 80,
  };

  let configObj = {
    featConfig: featConfig,
    modelConfig: modelConfig,
    maxActivePaths: 4,
    numTrailingBlanks: 1,
    keywordsScore: 1.0,
    keywordsThreshold: 0.25,
    keywords: 'x iǎo ài t óng x ué @小爱同学\n' +
        'j ūn g ē n iú b ī @军哥牛逼'
  };

  if (myConfig) {
    configObj = myConfig;
  }
  return new Kws(configObj, Module);
}

if (typeof process == 'object' && typeof process.versions == 'object' &&
    typeof process.versions.node == 'string') {
  module.exports = {
    createKws,
  };
}


================================================
FILE: wasm/kws/sherpa-onnx-wasm-main-kws.cc
================================================
// wasm/sherpa-onnx-wasm-main-kws.cc
//
// Copyright (c)  2024  lovemefan
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

// see also
// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html

extern "C" {

static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineModelConfig) ==
                  sizeof(SherpaOnnxOnlineTransducerModelConfig) +
                      sizeof(SherpaOnnxOnlineParaformerModelConfig) +
                      sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 +
                      sizeof(SherpaOnnxOnlineNemoCtcModelConfig) +
                      sizeof(SherpaOnnxOnlineToneCtcModelConfig),
              "");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxKeywordSpotterConfig) ==
                  sizeof(SherpaOnnxFeatureConfig) +
                      sizeof(SherpaOnnxOnlineModelConfig) + 7 * 4,
              "");

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}


================================================
FILE: wasm/nodejs/CMakeLists.txt
================================================
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  message(FATAL_ERROR "Please use ./build-wasm-simd-nodejs.sh to build for wasm NodeJS")
endif()

set(exported_functions
  #tts
  PrintOfflineTtsConfig
  SherpaOnnxCreateOfflineTts
  SherpaOnnxDestroyOfflineTts
  SherpaOnnxDestroyOfflineTtsGeneratedAudio
  SherpaOnnxOfflineTtsGenerate
  SherpaOnnxOfflineTtsGenerateWithCallback
  SherpaOnnxOfflineTtsGenerateWithConfig
  SherpaOnnxOfflineTtsNumSpeakers
  SherpaOnnxOfflineTtsSampleRate
  SherpaOnnxWriteWave
  # streaming asr
  SherpaOnnxCreateOnlineRecognizer
  SherpaOnnxCreateOnlineStream
  SherpaOnnxDecodeOnlineStream
  SherpaOnnxDestroyOnlineRecognizer
  SherpaOnnxDestroyOnlineRecognizerResult
  SherpaOnnxDestroyOnlineStream
  SherpaOnnxDestroyOnlineStreamResultJson
  SherpaOnnxGetOnlineStreamResult
  SherpaOnnxGetOnlineStreamResultAsJson
  SherpaOnnxIsOnlineStreamReady
  SherpaOnnxOnlineStreamAcceptWaveform
  SherpaOnnxOnlineStreamGetOption
  SherpaOnnxOnlineStreamInputFinished
  SherpaOnnxOnlineStreamIsEndpoint
  SherpaOnnxOnlineStreamReset
  SherpaOnnxOnlineStreamSetOption
  # non-streaming ASR
  PrintOfflineRecognizerConfig
  SherpaOnnxAcceptWaveformOffline
  SherpaOnnxCreateOfflineRecognizer
  SherpaOnnxCreateOfflineStream
  SherpaOnnxDecodeMultipleOfflineStreams
  SherpaOnnxDecodeOfflineStream
  SherpaOnnxDestroyOfflineRecognizer
  SherpaOnnxDestroyOfflineRecognizerResult
  SherpaOnnxDestroyOfflineStream
  SherpaOnnxDestroyOfflineStreamResultJson
  SherpaOnnxGetOfflineStreamResult
  SherpaOnnxGetOfflineStreamResultAsJson
  SherpaOnnxOfflineStreamGetOption
  SherpaOnnxOfflineStreamSetOption
  SherpaOnnxOfflineRecognizerSetConfig
  # online kws
  SherpaOnnxCreateKeywordSpotter
  SherpaOnnxCreateKeywordStream
  SherpaOnnxDecodeKeywordStream
  SherpaOnnxDestroyKeywordResult
  SherpaOnnxDestroyKeywordSpotter
  SherpaOnnxGetKeywordResult
  SherpaOnnxIsKeywordStreamReady
  SherpaOnnxResetKeywordStream
  # VAD
  SherpaOnnxCreateCircularBuffer
  SherpaOnnxDestroyCircularBuffer
  SherpaOnnxCircularBufferPush
  SherpaOnnxCircularBufferGet
  SherpaOnnxCircularBufferFree
  SherpaOnnxCircularBufferPop
  SherpaOnnxCircularBufferSize
  SherpaOnnxCircularBufferHead
  SherpaOnnxCircularBufferReset
  SherpaOnnxCreateVoiceActivityDetector
  SherpaOnnxDestroyVoiceActivityDetector
  SherpaOnnxVoiceActivityDetectorAcceptWaveform
  SherpaOnnxVoiceActivityDetectorEmpty
  SherpaOnnxVoiceActivityDetectorDetected
  SherpaOnnxVoiceActivityDetectorPop
  SherpaOnnxVoiceActivityDetectorClear
  SherpaOnnxVoiceActivityDetectorFront
  SherpaOnnxDestroySpeechSegment
  SherpaOnnxVoiceActivityDetectorReset
  SherpaOnnxVoiceActivityDetectorFlush
  # Speaker diarization
  SherpaOnnxCreateOfflineSpeakerDiarization
  SherpaOnnxDestroyOfflineSpeakerDiarization
  SherpaOnnxOfflineSpeakerDiarizationDestroyResult
  SherpaOnnxOfflineSpeakerDiarizationDestroySegment
  SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
  SherpaOnnxOfflineSpeakerDiarizationProcess
  SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback
  SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
  SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
  SherpaOnnxOfflineSpeakerDiarizationSetConfig
  #
  SherpaOnnxFileExists
  SherpaOnnxReadWave
  SherpaOnnxReadWaveFromBinaryData
  SherpaOnnxFreeWave
  SherpaOnnxWriteWave
  # speech enhancement
  SherpaOnnxCreateOfflineSpeechDenoiser
  SherpaOnnxCreateOnlineSpeechDenoiser
  SherpaOnnxDestroyDenoisedAudio
  SherpaOnnxDestroyOfflineSpeechDenoiser
  SherpaOnnxDestroyOnlineSpeechDenoiser
  SherpaOnnxOfflineSpeechDenoiserGetSampleRate
  SherpaOnnxOfflineSpeechDenoiserRun
  SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples
  SherpaOnnxOnlineSpeechDenoiserGetSampleRate
  SherpaOnnxOnlineSpeechDenoiserRun
  SherpaOnnxOnlineSpeechDenoiserFlush
  SherpaOnnxOnlineSpeechDenoiserReset
  # version
  SherpaOnnxGetGitDate
  SherpaOnnxGetGitSha1
  SherpaOnnxGetVersionStr
)


set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
  list(APPEND mangled_exported_functions "_${x}")
endforeach()
list(JOIN mangled_exported_functions "," all_exported_functions)

include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
string(APPEND MY_FLAGS " -sALLOW_TABLE_GROWTH ")
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS " -sNODERAWFS=1 ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','HEAPU8','HEAP16','HEAP32','HEAPU32','HEAPF32','HEAPF64','addFunction','removeFunction'] ")

string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ")

message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${MY_FLAGS}")

add_executable(sherpa-onnx-wasm-nodejs sherpa-onnx-wasm-nodejs.cc)
target_link_libraries(sherpa-onnx-wasm-nodejs sherpa-onnx-core sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-nodejs DESTINATION bin/wasm/nodejs)

install(
  FILES
  ${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
  ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
  ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
  ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
  ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
  ${CMAKE_SOURCE_DIR}/wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js
  ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
  DESTINATION
    bin/wasm/nodejs
)


================================================
FILE: wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
================================================
// wasm/sherpa-onnx-wasm-main-nodejs.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

extern "C" {

static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");

static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWenetCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineOmnilingualAsrCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineMedAsrCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineFireRedAsrCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineFunASRNanoModelConfig) == 13 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 7 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 5 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineCanaryModelConfig) == 5 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");

static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
                  sizeof(SherpaOnnxOfflineTransducerModelConfig) +
                      sizeof(SherpaOnnxOfflineParaformerModelConfig) +
                      sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
                      sizeof(SherpaOnnxOfflineWhisperModelConfig) +
                      sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
                      sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
                      sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
                      sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
                      sizeof(SherpaOnnxOfflineDolphinModelConfig) +
                      sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) +
                      sizeof(SherpaOnnxOfflineCanaryModelConfig) +
                      sizeof(SherpaOnnxOfflineWenetCtcModelConfig) +
                      sizeof(SherpaOnnxOfflineOmnilingualAsrCtcModelConfig) +
                      sizeof(SherpaOnnxOfflineMedAsrCtcModelConfig) +
                      sizeof(SherpaOnnxOfflineFunASRNanoModelConfig) +
                      sizeof(SherpaOnnxOfflineFireRedAsrCtcModelConfig),

              "");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
                  sizeof(SherpaOnnxFeatureConfig) +
                      sizeof(SherpaOnnxOfflineLMConfig) +
                      sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4 +
                      sizeof(SherpaOnnxHomophoneReplacerConfig),
              "");

void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
  auto tts_model_config = &tts_config->model;
  auto vits_model_config = &tts_model_config->vits;
  auto matcha_model_config = &tts_model_config->matcha;
  auto kokoro = &tts_model_config->kokoro;
  auto kitten = &tts_model_config->kitten;
  auto zipvoice = &tts_model_config->zipvoice;
  auto pocket = &tts_model_config->pocket;
  auto supertonic = &tts_model_config->supertonic;

  fprintf(stdout, "----------vits model config----------\n");
  fprintf(stdout, "model: %s\n", vits_model_config->model);
  fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
  fprintf(stdout, "tokens: %s\n", vits_model_config->tokens);
  fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir);
  fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
  fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
  fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
  fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir);

  fprintf(stdout, "----------matcha model config----------\n");
  fprintf(stdout, "acoustic_model: %s\n", matcha_model_config->acoustic_model);
  fprintf(stdout, "vocoder: %s\n", matcha_model_config->vocoder);
  fprintf(stdout, "lexicon: %s\n", matcha_model_config->lexicon);
  fprintf(stdout, "tokens: %s\n", matcha_model_config->tokens);
  fprintf(stdout, "data_dir: %s\n", matcha_model_config->data_dir);
  fprintf(stdout, "noise scale: %.3f\n", matcha_model_config->noise_scale);
  fprintf(stdout, "length scale: %.3f\n", matcha_model_config->length_scale);
  fprintf(stdout, "dict_dir: %s\n", matcha_model_config->dict_dir);

  fprintf(stdout, "----------kokoro model config----------\n");
  fprintf(stdout, "model: %s\n", kokoro->model);
  fprintf(stdout, "voices: %s\n", kokoro->voices);
  fprintf(stdout, "tokens: %s\n", kokoro->tokens);
  fprintf(stdout, "data_dir: %s\n", kokoro->data_dir);
  fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale);
  fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir);
  fprintf(stdout, "lexicon: %s\n", kokoro->lexicon);
  fprintf(stdout, "lang: %s\n", kokoro->lang);

  fprintf(stdout, "----------kitten model config----------\n");
  fprintf(stdout, "model: %s\n", kitten->model);
  fprintf(stdout, "voices: %s\n", kitten->voices);
  fprintf(stdout, "tokens: %s\n", kitten->tokens);
  fprintf(stdout, "data_dir: %s\n", kitten->data_dir);
  fprintf(stdout, "length scale: %.3f\n", kitten->length_scale);

  fprintf(stdout, "----------zipvoice model config----------\n");
  fprintf(stdout, "tokens: %s\n", zipvoice->tokens);
  fprintf(stdout, "encoder: %s\n", zipvoice->encoder);
  fprintf(stdout, "decoder: %s\n", zipvoice->decoder);
  fprintf(stdout, "vocoder: %s\n", zipvoice->vocoder);
  fprintf(stdout, "data_dir: %s\n", zipvoice->data_dir);
  fprintf(stdout, "lexicon: %s\n", zipvoice->lexicon);
  fprintf(stdout, "feat scale: %.3f\n", zipvoice->feat_scale);
  fprintf(stdout, "t_shift: %.3f\n", zipvoice->t_shift);
  fprintf(stdout, "target_rms: %.3f\n", zipvoice->target_rms);
  fprintf(stdout, "guidance_scale: %.3f\n", zipvoice->guidance_scale);

  fprintf(stdout, "----------pocketTTS model config----------\n");
  fprintf(stdout, "lm_flow: %s\n", pocket->lm_flow);
  fprintf(stdout, "lm_main: %s\n", pocket->lm_main);
  fprintf(stdout, "encoder: %s\n", pocket->encoder);
  fprintf(stdout, "decoder: %s\n", pocket->decoder);
  fprintf(stdout, "text_conditioner: %s\n", pocket->text_conditioner);
  fprintf(stdout, "vocab_json: %s\n", pocket->vocab_json);
  fprintf(stdout, "token_scores_json: %s\n", pocket->token_scores_json);
  fprintf(stdout, "voice_embedding_cache_capacity: %d\n",
          pocket->voice_embedding_cache_capacity);

  fprintf(stdout, "----------supertonic model config----------\n");
  fprintf(stdout, "duration_predictor: %s\n", supertonic->duration_predictor);
  fprintf(stdout, "text_encoder: %s\n", supertonic->text_encoder);
  fprintf(stdout, "vector_estimator: %s\n", supertonic->vector_estimator);
  fprintf(stdout, "vocoder: %s\n", supertonic->vocoder);
  fprintf(stdout, "tts_json: %s\n", supertonic->tts_json);
  fprintf(stdout, "unicode_indexer: %s\n", supertonic->unicode_indexer);
  fprintf(stdout, "voice_style: %s\n", supertonic->voice_style);

  fprintf(stdout, "----------tts model config----------\n");
  fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
  fprintf(stdout, "debug: %d\n", tts_model_config->debug);
  fprintf(stdout, "provider: %s\n", tts_model_config->provider);

  fprintf(stdout, "----------tts config----------\n");
  fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
  fprintf(stdout, "rule_fars: %s\n", tts_config->rule_fars);
  fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
  fprintf(stdout, "silence scale: %.3f\n", tts_config->silence_scale);
}

void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
  auto model_config = &config->model_config;
  auto feat = &config->feat_config;
  auto transducer = &model_config->transducer;
  auto paraformer = &model_config->paraformer;
  auto nemo_ctc = &model_config->nemo_ctc;
  auto whisper = &model_config->whisper;
  auto tdnn = &model_config->tdnn;
  auto sense_voice = &model_config->sense_voice;
  auto moonshine = &model_config->moonshine;
  auto fire_red_asr = &model_config->fire_red_asr;
  auto dolphin = &model_config->dolphin;
  auto zipformer_ctc = &model_config->zipformer_ctc;
  auto canary = &model_config->canary;
  auto wenet_ctc = &model_config->wenet_ctc;
  auto omnilingual = &model_config->omnilingual;
  auto medasr = &model_config->medasr;
  auto funasr_nano = &model_config->funasr_nano;
  auto fire_red_asr_ctc = &model_config->fire_red_asr_ctc;

  fprintf(stdout, "----------offline transducer model config----------\n");
  fprintf(stdout, "encoder: %s\n", transducer->encoder);
  fprintf(stdout, "decoder: %s\n", transducer->decoder);
  fprintf(stdout, "joiner: %s\n", transducer->joiner);

  fprintf(stdout, "----------offline paraformer model config----------\n");
  fprintf(stdout, "model: %s\n", paraformer->model);

  fprintf(stdout, "----------offline nemo_ctc model config----------\n");
  fprintf(stdout, "model: %s\n", nemo_ctc->model);

  fprintf(stdout, "----------offline whisper model config----------\n");
  fprintf(stdout, "encoder: %s\n", whisper->encoder);
  fprintf(stdout, "decoder: %s\n", whisper->decoder);
  fprintf(stdout, "language: %s\n", whisper->language);
  fprintf(stdout, "task: %s\n", whisper->task);
  fprintf(stdout, "tail_paddings: %d\n", whisper->tail_paddings);

  fprintf(stdout, "----------offline tdnn model config----------\n");
  fprintf(stdout, "model: %s\n", tdnn->model);

  fprintf(stdout, "----------offline sense_voice model config----------\n");
  fprintf(stdout, "model: %s\n", sense_voice->model);
  fprintf(stdout, "language: %s\n", sense_voice->language);
  fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn);

  fprintf(stdout, "----------offline moonshine model config----------\n");
  fprintf(stdout, "preprocessor: %s\n", moonshine->preprocessor);
  fprintf(stdout, "encoder: %s\n", moonshine->encoder);
  fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder);
  fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder);
  fprintf(stdout, "merged_decoder: %s\n", moonshine->merged_decoder);

  fprintf(stdout, "----------offline FireRedAsr model config----------\n");
  fprintf(stdout, "encoder: %s\n", fire_red_asr->encoder);
  fprintf(stdout, "decoder: %s\n", fire_red_asr->decoder);

  fprintf(stdout, "----------offline Dolphin model config----------\n");
  fprintf(stdout, "model: %s\n", dolphin->model);

  fprintf(stdout, "----------offline zipformer ctc model config----------\n");
  fprintf(stdout, "model: %s\n", zipformer_ctc->model);

  fprintf(stdout, "----------offline NeMo Canary model config----------\n");
  fprintf(stdout, "encoder: %s\n", canary->encoder);
  fprintf(stdout, "decoder: %s\n", canary->decoder);
  fprintf(stdout, "src_lang: %s\n", canary->src_lang);
  fprintf(stdout, "tgt_lang: %s\n", canary->tgt_lang);
  fprintf(stdout, "use_pnc: %d\n", canary->use_pnc);

  fprintf(stdout, "----------offline wenet ctc model config----------\n");
  fprintf(stdout, "model: %s\n", wenet_ctc->model);

  fprintf(stdout, "----------offline Omnilingual ASR model config----------\n");
  fprintf(stdout, "model: %s\n", omnilingual->model);

  fprintf(stdout, "----------offline MedASR model config----------\n");
  fprintf(stdout, "model: %s\n", medasr->model);

  fprintf(stdout, "----------offline FunASR Nano config----------\n");
  fprintf(stdout, "encoder_adaptor: %s\n", funasr_nano->encoder_adaptor);
  fprintf(stdout, "llm: %s\n", funasr_nano->llm);
  fprintf(stdout, "embedding: %s\n", funasr_nano->embedding);
  fprintf(stdout, "tokenizer: %s\n", funasr_nano->tokenizer);
  fprintf(stdout, "system_prompt: %s\n", funasr_nano->system_prompt);
  fprintf(stdout, "user_prompt: %s\n", funasr_nano->user_prompt);
  fprintf(stdout, "max_new_tokens: %d\n", funasr_nano->max_new_tokens);
  fprintf(stdout, "temperature: %f\n", funasr_nano->temperature);
  fprintf(stdout, "top_p: %f\n", funasr_nano->top_p);
  fprintf(stdout, "seed: %d\n", funasr_nano->seed);

  fprintf(stdout, "----------offline FireRedASR CTC model config----------\n");
  fprintf(stdout, "model: %s\n", fire_red_asr_ctc->model);

  fprintf(stdout, "tokens: %s\n", model_config->tokens);
  fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
  fprintf(stdout, "provider: %s\n", model_config->provider);
  fprintf(stdout, "debug: %d\n", model_config->debug);
  fprintf(stdout, "model type: %s\n", model_config->model_type);
  fprintf(stdout, "modeling unit: %s\n", model_config->modeling_unit);
  fprintf(stdout, "bpe vocab: %s\n", model_config->bpe_vocab);
  fprintf(stdout, "telespeech_ctc: %s\n", model_config->telespeech_ctc);

  fprintf(stdout, "----------feat config----------\n");
  fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
  fprintf(stdout, "feat dim: %d\n", feat->feature_dim);

  fprintf(stdout, "----------recognizer config----------\n");
  fprintf(stdout, "decoding method: %s\n", config->decoding_method);
  fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
  fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
  fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
  fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
  fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
  fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty);
  fprintf(stdout, "----------hr config----------\n");
  fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir);
  fprintf(stdout, "lexicon: %s\n", config->hr.lexicon);
  fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}


================================================
FILE: wasm/nodejs/sherpa-onnx-wave.js
================================================
// return an object
// {
//   samples: a float32 array
//   sampleRate: an integer
// }
function readWave(filename, Module) {
  const filenameLen = Module.lengthBytesUTF8(filename) + 1;
  const pFilename = Module._malloc(filenameLen);
  Module.stringToUTF8(filename, pFilename, filenameLen);

  const w = Module._SherpaOnnxReadWave(pFilename);
  Module._free(pFilename);


  const samplesPtr = Module.HEAP32[w / 4] / 4;
  const sampleRate = Module.HEAP32[w / 4 + 1];
  const numSamples = Module.HEAP32[w / 4 + 2];

  const samples = new Float32Array(numSamples);
  for (let i = 0; i < numSamples; i++) {
    samples[i] = Module.HEAPF32[samplesPtr + i];
  }

  Module._SherpaOnnxFreeWave(w);

  return {samples: samples, sampleRate: sampleRate};
}

function readWaveFromBinaryData(uint8Array, Module) {
  const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT;
  const pointer = Module._malloc(numBytes);

  const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes);
  dataOnHeap.set(uint8Array);

  const w =
      Module._SherpaOnnxReadWaveFromBinaryData(dataOnHeap.byteOffset, numBytes);
  if (w == 0) {
    console.log('Failed to read wave from binary data');
    return null;
  }

  Module._free(pointer);

  const samplesPtr = Module.HEAP32[w / 4] / 4;
  const sampleRate = Module.HEAP32[w / 4 + 1];
  const numSamples = Module.HEAP32[w / 4 + 2];

  const samples = new Float32Array(numSamples);
  for (let i = 0; i < numSamples; i++) {
    samples[i] = Module.HEAPF32[samplesPtr + i];
  }

  Module._SherpaOnnxFreeWave(w);


  return {samples: samples, sampleRate: sampleRate};
}

// data is an object
// {
//   samples: a float32 array
//   sampleRate: an integer
// }
function writeWave(filename, data, Module) {
  const pSamples =
      Module._malloc(data.samples.length * data.samples.BYTES_PER_ELEMENT);
  Module.HEAPF32.set(data.samples, pSamples / data.samples.BYTES_PER_ELEMENT);

  const filenameLen = Module.lengthBytesUTF8(filename) + 1;
  const pFilename = Module._malloc(filenameLen);
  Module.stringToUTF8(filename, pFilename, filenameLen);

  Module._SherpaOnnxWriteWave(
      pSamples, data.samples.length, data.sampleRate, pFilename);

  Module._free(pFilename);
  Module._free(pSamples);
}

if (typeof process == 'object' && typeof process.versions == 'object' &&
    typeof process.versions.node == 'string') {
  module.exports = {
    readWave,
    writeWave,
    readWaveFromBinaryData,
  };
}


================================================
FILE: wasm/speaker-diarization/CMakeLists.txt
================================================
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  message(FATAL_ERROR "Please use ./build-wasm-simd-speaker-diarization.sh to build for WASM for speaker diarization")
endif()

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/segmentation.onnx" OR NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/embedding.onnx")
  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
endif()

set(exported_functions
  MyPrint
  SherpaOnnxCreateOfflineSpeakerDiarization
  SherpaOnnxDestroyOfflineSpeakerDiarization
  SherpaOnnxOfflineSpeakerDiarizationDestroyResult
  SherpaOnnxOfflineSpeakerDiarizationDestroySegment
  SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
  SherpaOnnxOfflineSpeakerDiarizationProcess
  SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback
  SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
  SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
  SherpaOnnxOfflineSpeakerDiarizationSetConfig
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
  list(APPEND mangled_exported_functions "_${x}")
endforeach()
list(JOIN mangled_exported_functions "," all_exported_functions)


include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','HEAPU8','HEAP16','HEAP32','HEAPU32','HEAPF32','HEAPF64'] ")

message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${MY_FLAGS}")

if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  message(FATAL_ERROR "The default suffix for building executables should be .js!")
endif()
# set(CMAKE_EXECUTABLE_SUFFIX ".html")

add_executable(sherpa-onnx-wasm-main-speaker-diarization sherpa-onnx-wasm-main-speaker-diarization.cc)
target_link_libraries(sherpa-onnx-wasm-main-speaker-diarization sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-main-speaker-diarization DESTINATION bin/wasm/speaker-diarization)

install(
  FILES
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speaker-diarization>/sherpa-onnx-wasm-main-speaker-diarization.js"
    "index.html"
    "sherpa-onnx-speaker-diarization.js"
    "app-speaker-diarization.js"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speaker-diarization>/sherpa-onnx-wasm-main-speaker-diarization.wasm"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speaker-diarization>/sherpa-onnx-wasm-main-speaker-diarization.data"
  DESTINATION
    bin/wasm/speaker-diarization
)


================================================
FILE: wasm/speaker-diarization/app-speaker-diarization.js
================================================
const startBtn = document.getElementById('startBtn');
const hint = document.getElementById('hint');
const numClustersInput = document.getElementById('numClustersInputID');
const thresholdInput = document.getElementById('thresholdInputID');
const textArea = document.getElementById('text');

const fileSelectCtrl = document.getElementById('file');

let sd = null;
let float32Samples = null;

Module = {};
Module.onRuntimeInitialized = function() {
  console.log('Model files downloaded!');

  console.log('Initializing speaker diarization ......');
  sd = createOfflineSpeakerDiarization(Module)
  console.log('sampleRate', sd.sampleRate);

  hint.innerText =
      'Initialized! Please select a wave file and click the Start button.';

  fileSelectCtrl.disabled = false;
};

function onFileChange() {
  var files = document.getElementById('file').files;

  if (files.length == 0) {
    console.log('No file selected');
    float32Samples = null;
    startBtn.disabled = true;
    return;
  }
  textArea.value = '';

  console.log('files: ' + files);

  const file = files[0];
  console.log(file);
  console.log('file.name ' + file.name);
  console.log('file.type ' + file.type);
  console.log('file.size ' + file.size);

  let audioCtx = new AudioContext({sampleRate: sd.sampleRate});

  let reader = new FileReader();
  reader.onload = function() {
    console.log('reading file!');
    audioCtx.decodeAudioData(reader.result, decodedDone);
  };

  function decodedDone(decoded) {
    let typedArray = new Float32Array(decoded.length);
    float32Samples = decoded.getChannelData(0);

    startBtn.disabled = false;
  }

  reader.readAsArrayBuffer(file);
}

startBtn.onclick = function() {
  textArea.value = '';
  if (float32Samples == null) {
    alert('Empty audio samples!');

    startBtn.disabled = true;
    return;
  }

  let numClusters = numClustersInput.value;
  if (numClusters.trim().length == 0) {
    alert(
        'Please provide numClusters. Use -1 if you are not sure how many speakers are there');
    return;
  }

  if (!numClusters.match(/^\d+$/)) {
    alert(`number of clusters ${
        numClusters} is not an integer .\nPlease enter an integer`);
    return;
  }
  numClusters = parseInt(numClusters, 10);
  if (numClusters < -1) {
    alert(`Number of clusters should be >= -1`);
    return;
  }

  let threshold = 0.5;
  if (numClusters <= 0) {
    threshold = thresholdInput.value;
    if (threshold.trim().length == 0) {
      alert('Please provide a threshold.');
      return;
    }

    threshold = parseFloat(threshold);
    if (threshold < 0) {
      alert(`Pleaser enter a positive threshold`);
      return;
    }
  }

  let config = sd.config
  config.clustering = {numClusters: numClusters, threshold: threshold};
  sd.setConfig(config);
  let segments = sd.process(float32Samples);
  if (segments == null) {
    textArea.value = 'No speakers detected';
    return
  }

  let s = '';
  let sep = '';

  for (seg of segments) {
    // clang-format off
    s += sep + `${seg.start.toFixed(2)} -- ${seg.end.toFixed(2)} speaker_${seg.speaker}`
    // clang-format on
    sep = '\n';
  }
  textArea.value = s;
}


================================================
FILE: wasm/speaker-diarization/assets/README.md
================================================
# Introduction

Please refer to
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
to download a speaker segmentation model
and
refer to
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a speaker embedding extraction model.

Remember to rename the downloaded files.

The following is an example.

```bash
cd wasm/speaker-diarization/assets/

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
cp sherpa-onnx-pyannote-segmentation-3-0/model.onnx ./segmentation.onnx
rm -rf sherpa-onnx-pyannote-segmentation-3-0

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
mv 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ./embedding.onnx
```


================================================
FILE: wasm/speaker-diarization/index.html
================================================
<html lang="en">

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Speaker Diarization</title>
  <style>
    h1,div {
      text-align: center;
    }
    textarea {
      width:100%;
    }
  </style>
</head>

<body>
  <h1>
    Next-gen Kaldi + WebAssembly<br/>
    Speaker Diarization <br> with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a>
  </h1>
  <div>
    <span id="hint">Loading model ... ...</span>
    <br/>
    <br/>
    <label for="avatar">Choose a wav file:</label>
    <input type="file" id="file" accept=".wav" onchange="onFileChange()" disabled></input>
    <br/>
    <br/>
    <label for="numClusters" id="numClustersID">Number of speakers: </label>
    <input type="text" id="numClustersInputID" name="numClusters" value="-1" />
    <br/>
    <br/>
    <label for="clusteringThreshold" id="thresholdID">Clustering threshold: </label>
    <input type="text" id="thresholdInputID" name="clusteringThreshold" value="0.5" />
    <br/>
    <br/>

    <textarea id="text" rows="10" placeholder="If you know the actual number of speakers in the input wave file, please provide it via Number of speakers. Otherwise, please leave Number of speakers to -1 and provide Clustering threshold instead. A larger threshold leads to fewer clusters, i.e., fewer speakers; a smaller threshold leads to more clusters, i.e., more speakers."></textarea>
    <br/>
    <br/>
    <button id="startBtn" disabled>Start</button>
  </div>

  <script src="app-speaker-diarization.js"></script>
  <script src="sherpa-onnx-speaker-diarization.js"></script>
  <script src="sherpa-onnx-wasm-main-speaker-diarization.js"></script>
</body>


================================================
FILE: wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
================================================

function freeConfig(config, Module) {
  if ('buffer' in config) {
    Module._free(config.buffer);
  }

  if ('config' in config) {
    freeConfig(config.config, Module)
  }

  if ('segmentation' in config) {
    freeConfig(config.segmentation, Module)
  }

  if ('embedding' in config) {
    freeConfig(config.embedding, Module)
  }

  if ('clustering' in config) {
    freeConfig(config.clustering, Module)
  }

  Module._free(config.ptr);
}

function initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(
    config, Module) {
  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
  const n = modelLen;
  const buffer = Module._malloc(n);

  const len = 1 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
  offset += modelLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineSpeakerSegmentationModelConfig(config, Module) {
  if (!('pyannote' in config)) {
    config.pyannote = {
      model: '',
    };
  }

  const pyannote = initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(
      config.pyannote, Module);

  const len = pyannote.len + 3 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(pyannote.ptr, pyannote.len, ptr + offset);
  offset += pyannote.len;

  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.debug || 0, 'i32');
  offset += 4;

  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  const buffer = Module._malloc(providerLen);
  Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
  Module.setValue(ptr + offset, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    config: pyannote,
  };
}

function initSherpaOnnxSpeakerEmbeddingExtractorConfig(config, Module) {
  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  const n = modelLen + providerLen;
  const buffer = Module._malloc(n);

  const len = 4 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
  offset += modelLen;

  Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
  offset += providerLen;

  offset = 0
  Module.setValue(ptr + offset, buffer, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.debug || 0, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, buffer + modelLen, 'i8*');
  offset += 4;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxFastClusteringConfig(config, Module) {
  const len = 2 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.setValue(ptr + offset, config.numClusters || -1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
  offset += 4;

  return {
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineSpeakerDiarizationConfig(config, Module) {
  if (!('segmentation' in config)) {
    config.segmentation = {
      pyannote: {model: ''},
      numThreads: 1,
      debug: 0,
      provider: 'cpu',
    };
  }

  if (!('embedding' in config)) {
    config.embedding = {
      model: '',
      numThreads: 1,
      debug: 0,
      provider: 'cpu',
    };
  }

  if (!('clustering' in config)) {
    config.clustering = {
      numClusters: -1,
      threshold: 0.5,
    };
  }

  const segmentation = initSherpaOnnxOfflineSpeakerSegmentationModelConfig(
      config.segmentation, Module);

  const embedding =
      initSherpaOnnxSpeakerEmbeddingExtractorConfig(config.embedding, Module);

  const clustering =
      initSherpaOnnxFastClusteringConfig(config.clustering, Module);

  const len = segmentation.len + embedding.len + clustering.len + 2 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(segmentation.ptr, segmentation.len, ptr + offset);
  offset += segmentation.len;

  Module._CopyHeap(embedding.ptr, embedding.len, ptr + offset);
  offset += embedding.len;

  Module._CopyHeap(clustering.ptr, clustering.len, ptr + offset);
  offset += clustering.len;

  Module.setValue(ptr + offset, config.minDurationOn || 0.2, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.minDurationOff || 0.5, 'float');
  offset += 4;

  return {
    ptr: ptr,
    len: len,
    segmentation: segmentation,
    embedding: embedding,
    clustering: clustering,
  };
}

class OfflineSpeakerDiarization {
  constructor(configObj, Module) {
    const config =
        initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, Module)
    // Module._MyPrint(config.ptr);

    const handle =
        Module._SherpaOnnxCreateOfflineSpeakerDiarization(config.ptr);

    freeConfig(config, Module);

    this.handle = handle;
    this.sampleRate =
        Module._SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(this.handle);
    this.Module = Module

                  this.config = configObj;
  }

  free() {
    this.Module._SherpaOnnxDestroyOfflineSpeakerDiarization(this.handle);
    this.handle = 0
  }

  setConfig(configObj) {
    if (!('clustering' in configObj)) {
      return;
    }

    const config =
        initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, this.Module);

    this.Module._SherpaOnnxOfflineSpeakerDiarizationSetConfig(
        this.handle, config.ptr);

    freeConfig(config, this.Module);

    this.config.clustering = configObj.clustering;
  }

  process(samples) {
    const pointer =
        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);

    let r = this.Module._SherpaOnnxOfflineSpeakerDiarizationProcess(
        this.handle, pointer, samples.length);
    this.Module._free(pointer);

    let numSegments =
        this.Module._SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r);

    let segments =
        this.Module._SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(
            r);

    let ans = [];

    let sizeOfSegment = 3 * 4;
    for (let i = 0; i < numSegments; ++i) {
      let p = segments + i * sizeOfSegment

      let start = this.Module.HEAPF32[p / 4 + 0];
      let end = this.Module.HEAPF32[p / 4 + 1];
      let speaker = this.Module.HEAP32[p / 4 + 2];

      ans.push({start: start, end: end, speaker: speaker});
    }

    this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
    this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r);

    return ans;
  }
}

function createOfflineSpeakerDiarization(Module, myConfig) {
  let config = {
    segmentation: {
      pyannote: {model: './segmentation.onnx'},
      debug: 1,
    },
    embedding: {
      model: './embedding.onnx',
      debug: 1,
    },
    clustering: {numClusters: -1, threshold: 0.5},
    minDurationOn: 0.3,
    minDurationOff: 0.5,
  };

  if (myConfig) {
    config = myConfig;
  }

  return new OfflineSpeakerDiarization(config, Module);
}

if (typeof process == 'object' && typeof process.versions == 'object' &&
    typeof process.versions.node == 'string') {
  module.exports = {
    createOfflineSpeakerDiarization,
  };
}


================================================
FILE: wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc
================================================
// wasm/sherpa-onnx-wasm-main-speaker-diarization.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

// see also
// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html

extern "C" {

static_assert(sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) ==
                  1 * 4,
              "");

static_assert(
    sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) ==
        sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) + 3 * 4,
    "");

static_assert(sizeof(SherpaOnnxFastClusteringConfig) == 2 * 4, "");

static_assert(sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) == 4 * 4, "");

static_assert(sizeof(SherpaOnnxOfflineSpeakerDiarizationConfig) ==
                  sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) +
                      sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) +
                      sizeof(SherpaOnnxFastClusteringConfig) + 2 * 4,
              "");

void MyPrint(const SherpaOnnxOfflineSpeakerDiarizationConfig *sd_config) {
  const auto &segmentation = sd_config->segmentation;
  const auto &embedding = sd_config->embedding;
  const auto &clustering = sd_config->clustering;

  fprintf(stdout, "----------segmentation config----------\n");
  fprintf(stdout, "pyannote model: %s\n", segmentation.pyannote.model);
  fprintf(stdout, "num threads: %d\n", segmentation.num_threads);
  fprintf(stdout, "debug: %d\n", segmentation.debug);
  fprintf(stdout, "provider: %s\n", segmentation.provider);

  fprintf(stdout, "----------embedding config----------\n");
  fprintf(stdout, "model: %s\n", embedding.model);
  fprintf(stdout, "num threads: %d\n", embedding.num_threads);
  fprintf(stdout, "debug: %d\n", embedding.debug);
  fprintf(stdout, "provider: %s\n", embedding.provider);

  fprintf(stdout, "----------clustering config----------\n");
  fprintf(stdout, "num_clusters: %d\n", clustering.num_clusters);
  fprintf(stdout, "threshold: %.3f\n", clustering.threshold);

  fprintf(stdout, "min_duration_on: %.3f\n", sd_config->min_duration_on);
  fprintf(stdout, "min_duration_off: %.3f\n", sd_config->min_duration_off);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}


================================================
FILE: wasm/speech-enhancement/CMakeLists.txt
================================================
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  message(FATAL_ERROR "Please use ./build-wasm-simd-speech-enhancement.sh to build for wasm speech enhancement")
endif()

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/gtcrn.onnx")
  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
endif()

set(exported_functions
  MyPrint
  SherpaOnnxCreateOfflineSpeechDenoiser
  SherpaOnnxCreateOnlineSpeechDenoiser
  SherpaOnnxDestroyDenoisedAudio
  SherpaOnnxDestroyOfflineSpeechDenoiser
  SherpaOnnxDestroyOnlineSpeechDenoiser
  SherpaOnnxFreeWave
  SherpaOnnxOfflineSpeechDenoiserGetSampleRate
  SherpaOnnxOfflineSpeechDenoiserRun
  SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples
  SherpaOnnxOnlineSpeechDenoiserGetSampleRate
  SherpaOnnxOnlineSpeechDenoiserRun
  SherpaOnnxOnlineSpeechDenoiserFlush
  SherpaOnnxOnlineSpeechDenoiserReset
  SherpaOnnxReadWave
  SherpaOnnxReadWaveFromBinaryData
  SherpaOnnxWriteWave
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
  list(APPEND mangled_exported_functions "_${x}")
endforeach()
list(JOIN mangled_exported_functions "," all_exported_functions)


include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=128MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','HEAPU8','HEAP16','HEAP32','HEAPU32','HEAPF32','HEAPF64'] ")

message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${MY_FLAGS}")

if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  message(FATAL_ERROR "The default suffix for building executables should be .js!")
endif()
# set(CMAKE_EXECUTABLE_SUFFIX ".html")

add_executable(sherpa-onnx-wasm-main-speech-enhancement sherpa-onnx-wasm-main-speech-enhancement.cc)
target_link_libraries(sherpa-onnx-wasm-main-speech-enhancement sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-main-speech-enhancement DESTINATION bin/wasm/speech-enhancement)

install(
  FILES
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speech-enhancement>/sherpa-onnx-wasm-main-speech-enhancement.js"
    "index.html"
    "sherpa-onnx-speech-enhancement.js"
    "../nodejs/sherpa-onnx-wave.js"
    "app-speech-enhancement.js"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speech-enhancement>/sherpa-onnx-wasm-main-speech-enhancement.wasm"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-speech-enhancement>/sherpa-onnx-wasm-main-speech-enhancement.data"
  DESTINATION
    bin/wasm/speech-enhancement
)


================================================
FILE: wasm/speech-enhancement/app-speech-enhancement.js
================================================

const fileInput = document.getElementById('fileInput');

let speech_denoiser = null;
const inAudioPlayback = document.getElementById('inAudioPlayback');
const outAudioPlayback = document.getElementById('outAudioPlayback');

Module = {};

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.locateFile = function(path, scriptDirectory = '') {
  console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
  return scriptDirectory + path;
};

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.setStatus = function(status) {
  console.log(`status ${status}`);
  const statusElement = document.getElementById('status');
  statusElement.textContent = status;
  if (status === '') {
    statusElement.style.display = 'none';
    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.remove('loading');
    });
  } else {
    statusElement.style.display = 'block';
    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.add('loading');
    });
  }
};

Module.onRuntimeInitialized = function() {
  console.log('Model files downloaded!');

  console.log('Initializing speech denoiser ......');
  speech_denoiser = createOfflineSpeechDenoiser(Module)
};

async function process(wave) {
  let denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
  console.log(denoised);

  let int16Samples = new Int16Array(denoised.samples.length);
  for (var i = 0; i < denoised.samples.length; ++i) {
    let s = denoised.samples[i];
    if (s >= 1)
      s = 1;
    else if (s <= -1)
      s = -1;

    int16Samples[i] = s * 32767;
  }

  let blob = toWav(int16Samples, denoised.sampleRate);
  const objectUrl = URL.createObjectURL(blob);
  console.log(objectUrl);

  outAudioPlayback.src = objectUrl;
  outAudioPlayback.controls = true;
  outAudioPlayback.style.display = 'block';
}

fileInput.addEventListener('change', function(event) {
  if (!event.target.files || !event.target.files[0]) {
    console.log('No file selected.');
    return;
  }

  const file = event.target.files[0];
  console.log('Selected file:', file.name, file.type, file.size, 'bytes');
  const reader = new FileReader();
  reader.onload = function(ev) {
    console.log('FileReader onload called.');
    const arrayBuffer = ev.target.result;
    console.log('ArrayBuffer length:', arrayBuffer.byteLength);

    const uint8Array = new Uint8Array(arrayBuffer);
    const wave = readWaveFromBinaryData(uint8Array, Module);
    if (wave == null) {
      alert(
          `${file.name} is not a valid .wav file. Please select a *.wav file`);
      return;
    }


    var url = URL.createObjectURL(file);
    console.log(`url: ${url}`);
    inAudioPlayback.src = url;
    inAudioPlayback.style.display = 'block';

    process(wave)
    console.log('process done')
  };
  reader.onerror = function(err) {
    console.error('FileReader error:', err);
  };
  console.log('Starting FileReader.readAsArrayBuffer...');
  reader.readAsArrayBuffer(file);
});

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples, sampleRate) {
  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, sampleRate, true);          // sampleRate
  view.setUint32(28, sampleRate * 2, true);      // byteRate
  view.setUint16(32, 2, true);                   // blockAlign
  view.setUint16(34, 16, true);                  // bitsPerSample
  view.setUint32(36, 0x61746164, true);          // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);  // subchunk2Size

  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }

  return new Blob([view], {type: 'audio/wav'});
}


================================================
FILE: wasm/speech-enhancement/assets/README.md
================================================
# Introduction

## Huggingface space

You can visit https://huggingface.co/spaces/k2-fsa/wasm-speech-enhancement-gtcrn
to try it in your browser without building or installing anything.

You can also visit
https://modelscope.cn/studios/csukuangfj/wasm-speech-enhancement-gtcrn

## Usage

Please refer to
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
to download a model.

The following is an example:

```bash
cd sherpa-onnx/wasm/speech-enhancement/assets
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx

mv gtcrn_simple.onnx gtcrn.onnx
```

You should have the following files in `assets` before you can run
`build-wasm-simd-speech-enhancement.sh`

```
(py38) fangjuns-MacBook-Pro:assets fangjun$ tree .
.
├── README.md
└── gtcrn.onnx

0 directories, 2 files
(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh
total 1056
-rw-r--r--  1 fangjun  staff   466B Mar 12 16:13 README.md
-rw-r--r--  1 fangjun  staff   523K Mar 12 16:14 gtcrn.onnx
```


================================================
FILE: wasm/speech-enhancement/index.html
================================================
<html lang="en">

<!--
The UI code is modified from
https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm
-->

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for speech enhancement</title>
  <style>
    h1,div {
      text-align: center;
    }
    textarea {
      width:100%;
    }
    .loading {
      display: none !important;
    }
  </style>
</head>

<body>
  <h1>
    Next-gen Kaldi + WebAssembly<br/>
    Speech Enhancement with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
    using <a href="https://github.com/Xiaobin-Rong/gtcrn">GTCRN</a>
  </h1>

  <div id="status">Loading...</div>

  <div id="singleAudioContent" class="tab-content loading">
    <div style="display: flex; gap: 1.5rem;">
      <!-- Input Section -->
      <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
        <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; display: flex; align-items: center; gap: 0.5rem; color: #6c757d;">
          <span style="line-height: 1;">🎵</span> Input
        </div>

        <!-- Drag and Drop / File Upload -->
        <div id="dropzone" style="border: 2px dashed #ced4da; border-radius: 8px; padding: 2rem; text-align: center; color: #6c757d; cursor: pointer; background-color: #f8f9fa; transition: background-color 0.3s, border-color 0.3s; position: relative;">
          <input type="file" id="fileInput" accept=".wav" style="position: absolute; top: 0; left: 0; opacity: 0; width: 100%; height: 100%; cursor: pointer;" />
          <p style="margin: 0;">Drop Audio Here (*.wav)<br>- or -<br>Click to Upload</p>
        </div>
        <audio id="inAudioPlayback" controls style="display: none; margin-top: 1rem; width: 100%;"></audio>
      </div>
    </div>

    <div style="display: flex; gap: 1.5rem;">
      <!-- Output Section -->
      <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
        <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; display: flex; align-items: center; gap: 0.5rem; color: #6c757d;">
        <span style="line-height: 1;">🎵</span> Output
      </div>
        <audio id="outAudioPlayback" controls style="display: none; margin-top: 1rem; width: 100%;"></audio>
    </div>
  </div>

  <!-- Footer Section -->
  <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
    <h3>Description</h3>
    <ul>
      <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
      <li>The model is from <a href="https://github.com/Xiaobin-Rong/gtcrn">GTCRN</a></li>
      <li>Please upload .wav files</li>
        <ul>
          <li>You can download noisy test wave files from <a href="https://htmlpreview.github.io/?https://github.com/Xiaobin-Rong/gtcrn_demo/blob/main/index.html">https://htmlpreview.github.io/?https://github.com/Xiaobin-Rong/gtcrn_demo/blob/main/index.html</a></li>
        </ul>
      <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
        <ul>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
        </ul>
    </ul>
    <h3>About This Demo</h3>
    <ul>
      <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
      <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
    </ul>
    <h3>Latest Update</h3>
    <ul>
      <li>First working version.</li>
    </ul>

    <h3>Acknowledgement</h3>
    <ul>
      <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
    </ul>
  </div>

  <script src="app-speech-enhancement.js"></script>
  <script src="sherpa-onnx-wave.js"></script>
  <script src="sherpa-onnx-speech-enhancement.js"></script>
  <script src="sherpa-onnx-wasm-main-speech-enhancement.js"></script>
</body>


================================================
FILE: wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js
================================================
function freeConfig(config, Module) {
  if ('buffer' in config) {
    Module._free(config.buffer);
  }

  if ('config' in config) {
    freeConfig(config.config, Module)
  }

  if ('gtcrn' in config) {
    freeConfig(config.gtcrn, Module)
  }

  if ('dpdfnet' in config) {
    freeConfig(config.dpdfnet, Module)
  }

  Module._free(config.ptr);
}

function initSherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(config, Module) {
  if (!('model' in config)) {
    config.model = '';
  }

  const modelLen = Module.lengthBytesUTF8(config.model) + 1;

  const n = modelLen;

  const buffer = Module._malloc(n);

  const len = 1 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.model, buffer + offset, modelLen);
  offset += modelLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += modelLen;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig(config, Module) {
  if (!('model' in config)) {
    config.model = '';
  }

  const modelLen = Module.lengthBytesUTF8(config.model) + 1;
  const n = modelLen;
  const buffer = Module._malloc(n);
  const len = 1 * 4;
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model, buffer, modelLen);
  Module.setValue(ptr, buffer, 'i8*');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineSpeechDenoiserModelConfig(config, Module) {
  if (!('gtcrn' in config)) {
    config.gtcrn = {model: ''};
  }

  if (!('dpdfnet' in config)) {
    config.dpdfnet = {model: ''};
  }

  const gtcrn =
      initSherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(config.gtcrn, Module);
  const dpdfnet =
      initSherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig(
          config.dpdfnet, Module);

  const len = gtcrn.len + 3 * 4 + dpdfnet.len;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(gtcrn.ptr, gtcrn.len, ptr + offset);
  offset += gtcrn.len;

  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.debug || 0, 'i32');
  offset += 4;

  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  const buffer = Module._malloc(providerLen);
  Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
  Module.setValue(ptr + offset, buffer, 'i8*');
  offset += 4;

  Module._CopyHeap(dpdfnet.ptr, dpdfnet.len, ptr + offset);
  offset += dpdfnet.len;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    gtcrn: gtcrn,
    dpdfnet: dpdfnet,
  };
}

function initSherpaOnnxOfflineSpeechDenoiserConfig(config, Module) {
  if (!('model' in config)) {
    config.model = {
      gtcrn: {model: ''},
      dpdfnet: {model: ''},
      provider: 'cpu',
      debug: 1,
      numThreads: 1,
    };
  }

  const modelConfig =
      initSherpaOnnxOfflineSpeechDenoiserModelConfig(config.model, Module);
  const len = modelConfig.len;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
  offset += modelConfig.len;

  return {
    ptr: ptr,
    len: len,
    config: modelConfig,
  };
}

function copyDenoisedAudio(handle, Module) {
  const numSamples = Module.HEAP32[handle / 4 + 1];
  const denoisedSampleRate = Module.HEAP32[handle / 4 + 2];
  const samplesPtr = Module.HEAP32[handle / 4] / 4;
  const denoisedSamples = new Float32Array(numSamples);
  for (let i = 0; i < numSamples; i++) {
    denoisedSamples[i] = Module.HEAPF32[samplesPtr + i];
  }

  Module._SherpaOnnxDestroyDenoisedAudio(handle);
  return {samples: denoisedSamples, sampleRate: denoisedSampleRate};
}

class SpeechDenoiserBase {
  constructor(Module) {
    this.Module = Module;
  }

  save(filename, audio) {
    const samples = audio.samples;
    const sampleRate = audio.sampleRate;
    const ptr = this.Module._malloc(samples.length * 4);
    for (let i = 0; i < samples.length; i++) {
      this.Module.HEAPF32[ptr / 4 + i] = samples[i];
    }

    const filenameLen = this.Module.lengthBytesUTF8(filename) + 1;
    const buffer = this.Module._malloc(filenameLen);
    this.Module.stringToUTF8(filename, buffer, filenameLen);
    this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer);
    this.Module._free(buffer);
    this.Module._free(ptr);
  }
}

class OfflineSpeechDenoiser extends SpeechDenoiserBase {
  constructor(configObj, Module) {
    super(Module);
    const config = initSherpaOnnxOfflineSpeechDenoiserConfig(configObj, Module);
    const handle = Module._SherpaOnnxCreateOfflineSpeechDenoiser(config.ptr);

    freeConfig(config, Module);

    this.handle = handle;
    this.sampleRate =
        Module._SherpaOnnxOfflineSpeechDenoiserGetSampleRate(this.handle);
  }

  free() {
    this.Module._SherpaOnnxDestroyOfflineSpeechDenoiser(this.handle);
    this.handle = 0;
  }

  run(samples, sampleRate) {
    const pointer =
        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
    const h = this.Module._SherpaOnnxOfflineSpeechDenoiserRun(
        this.handle, pointer, samples.length, sampleRate);
    this.Module._free(pointer);

    return copyDenoisedAudio(h, this.Module);
  }
}

class OnlineSpeechDenoiser extends SpeechDenoiserBase {
  constructor(configObj, Module) {
    super(Module);
    const config = initSherpaOnnxOfflineSpeechDenoiserConfig(configObj, Module);
    const handle = Module._SherpaOnnxCreateOnlineSpeechDenoiser(config.ptr);

    freeConfig(config, Module);

    this.handle = handle;
    this.sampleRate =
        Module._SherpaOnnxOnlineSpeechDenoiserGetSampleRate(this.handle);
    this.frameShiftInSamples =
        Module._SherpaOnnxOnlineSpeechDenoiserGetFrameShiftInSamples(
            this.handle);
  }

  free() {
    this.Module._SherpaOnnxDestroyOnlineSpeechDenoiser(this.handle);
    this.handle = 0;
  }

  run(samples, sampleRate) {
    const pointer =
        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
    const h = this.Module._SherpaOnnxOnlineSpeechDenoiserRun(
        this.handle, pointer, samples.length, sampleRate);
    this.Module._free(pointer);

    return copyDenoisedAudio(h, this.Module);
  }

  flush() {
    const h = this.Module._SherpaOnnxOnlineSpeechDenoiserFlush(this.handle);
    return copyDenoisedAudio(h, this.Module);
  }

  reset() {
    this.Module._SherpaOnnxOnlineSpeechDenoiserReset(this.handle);
  }
}

function createOfflineSpeechDenoiser(Module, myConfig) {
  let config = {
    model: {
      gtcrn: {model: './gtcrn.onnx'},
      debug: 0,
    },
  };

  if (myConfig) {
    config = myConfig;
  }

  return new OfflineSpeechDenoiser(config, Module);
}

function createOnlineSpeechDenoiser(Module, myConfig) {
  let config = {
    model: {
      gtcrn: {model: './gtcrn.onnx'},
      debug: 0,
    },
  };

  if (myConfig) {
    config = myConfig;
  }

  return new OnlineSpeechDenoiser(config, Module);
}

if (typeof process == 'object' && typeof process.versions == 'object' &&
    typeof process.versions.node == 'string') {
  module.exports = {
    createOfflineSpeechDenoiser,
    createOnlineSpeechDenoiser,
  };
}


================================================
FILE: wasm/speech-enhancement/sherpa-onnx-wasm-main-speech-enhancement.cc
================================================
// wasm/sherpa-onnx-wasm-main-speech-enhancement.cc
//
// Copyright (c)  2025  Xiaomi Corporation
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

// see also
// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html

extern "C" {

static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig) == 1 * 4,
              "");
static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig) ==
                  1 * 4,
              "");
static_assert(
    sizeof(SherpaOnnxOfflineSpeechDenoiserModelConfig) ==
        sizeof(SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig) +
            sizeof(SherpaOnnxOfflineSpeechDenoiserDpdfNetModelConfig) + 3 * 4,
    "");
static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserConfig) ==
                  sizeof(SherpaOnnxOfflineSpeechDenoiserModelConfig),
              "");

void MyPrint(SherpaOnnxOfflineSpeechDenoiserConfig *config) {
  auto model = &config->model;
  auto gtcrn = &model->gtcrn;
  auto dpdfnet = &model->dpdfnet;
  fprintf(stdout, "----------offline speech denoiser model config----------\n");
  fprintf(stdout, "gtcrn: %s\n", gtcrn->model);
  fprintf(stdout, "dpdfnet: %s\n", dpdfnet->model);
  fprintf(stdout, "num threads: %d\n", model->num_threads);
  fprintf(stdout, "debug: %d\n", model->debug);
  fprintf(stdout, "provider: %s\n", model->provider);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}


================================================
FILE: wasm/tts/CMakeLists.txt
================================================
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  message(FATAL_ERROR "Please use ./build-wasm-simd-tts.sh to build for wasm TTS")
endif()

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/tokens.txt" AND
   NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/lm_flow.int8.onnx")
  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
endif()

set(exported_functions
  MyPrint
  SherpaOnnxCreateOfflineTts
  SherpaOnnxDestroyOfflineTts
  SherpaOnnxDestroyOfflineTtsGeneratedAudio
  SherpaOnnxOfflineTtsGenerate
  SherpaOnnxOfflineTtsGenerateWithCallback
  SherpaOnnxOfflineTtsGenerateWithConfig
  SherpaOnnxOfflineTtsNumSpeakers
  SherpaOnnxOfflineTtsSampleRate
  SherpaOnnxWriteWave
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
  list(APPEND mangled_exported_functions "_${x}")
endforeach()
list(JOIN mangled_exported_functions "," all_exported_functions)


include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
string(APPEND MY_FLAGS " -sALLOW_TABLE_GROWTH ")
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','HEAPU8','HEAP16','HEAP32','HEAPU32','HEAPF32','HEAPF64','addFunction','removeFunction'] ")

message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${MY_FLAGS}")

if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  message(FATAL_ERROR "The default suffix for building executables should be .js!")
endif()
# set(CMAKE_EXECUTABLE_SUFFIX ".html")

add_executable(sherpa-onnx-wasm-main-tts sherpa-onnx-wasm-main-tts.cc)
target_link_libraries(sherpa-onnx-wasm-main-tts sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-main-tts DESTINATION bin/wasm/tts)

install(
  FILES
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-tts>/sherpa-onnx-wasm-main-tts.js"
    "index.html"
    "sherpa-onnx-tts.js"
    "sherpa-onnx-tts.worker.js"
    "app-tts.js"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-tts>/sherpa-onnx-wasm-main-tts.wasm"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-tts>/sherpa-onnx-wasm-main-tts.data"
  DESTINATION
    bin/wasm/tts
)


================================================
FILE: wasm/tts/app-tts.js
================================================
const generateBtn = document.getElementById('generateBtn');
const speakerIdLabel = document.getElementById('speakerIdLabel');
const speakerIdInput = document.getElementById('speakerId');
const speakerIdSection = document.getElementById('speakerIdSection');
const referenceAudioSection = document.getElementById('referenceAudioSection');
const referenceTextSection = document.getElementById('referenceTextSection');
const referenceAudioInput = document.getElementById('referenceAudio');
const referenceTextInput = document.getElementById('referenceText');
const speedInput = document.getElementById('speed');
const speedValue = document.getElementById('speedValue');
const textArea = document.getElementById('text');
const soundClips = document.getElementById('sound-clips');
const statusElement = document.getElementById('status');
const generationStatusElement = document.getElementById('generationStatus');

speedValue.innerHTML = speedInput.value;

let index = 0;

let audioCtx = null;
const worker = new Worker("sherpa-onnx-tts.worker.js");
let ttsInstanceInfo = {
  modelType: null,
  numSpeakers: 0,
  isReady: false,
};
worker.onmessage = (e) => {
  if (e.data.type === "sherpa-onnx-tts-progress") {
    Module.setStatus(e.data.status);
    return;
  }
  if (e.data.type === "sherpa-onnx-tts-generation-progress") {
    const percent = Math.max(0, Math.min(100, (e.data.progress || 0) * 100));
    setGenerationStatus(`Generating audio... ${percent.toFixed(2)}%`);
    return;
  }
  if (e.data.type === "sherpa-onnx-tts-ready") {
    ttsInstanceInfo.modelType = e.data.modelType;
    ttsInstanceInfo.numSpeakers = e.data.numSpeakers;
    ttsInstanceInfo.isReady = true;
    generateBtn.disabled = false;
    speakerIdLabel.innerHTML = `Speaker ID (0 - ${e.data.numSpeakers - 1}):`;
    updateUiForModelType();
    Module.setStatus('');
    return;
  }
  if (e.data.type === "error") {
    generateBtn.disabled = false;
    if (ttsInstanceInfo.isReady) {
      setGenerationStatus(e.data.message);
    } else {
      Module.setStatus(e.data.message);
    }
    return;
  }
  if (e.data.type === "sherpa-onnx-tts-result") {
    let audio = e.data;
    generateBtn.disabled = false;
    setGenerationStatus('');

    console.log(audio.samples.length, audio.sampleRate);

    if (!audioCtx) {
      audioCtx = new AudioContext({ sampleRate: audio.sampleRate });
    }

    const buffer = audioCtx.createBuffer(
      1,
      audio.samples.length,
      audio.sampleRate,
    );

    buffer.getChannelData(0).set(audio.samples); // 使用 .set() 比 for 循环快得多
    const source = audioCtx.createBufferSource();
    source.buffer = buffer;
    source.connect(audioCtx.destination);
    source.start();

    createAudioTag(audio);
  }
};

Module = {};

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.setStatus = function(status) {
  console.log(`status ${status}`);
  if (status == 'Running...') {
    status = 'Model downloaded. Initializing text to speech model...'
  }

  const downloadMatch = status.match(/Downloading data... \((\d+)\/(\d+)\)/);
  if (downloadMatch) {
    const downloaded = BigInt(downloadMatch[1]);
    const total = BigInt(downloadMatch[2]);
    const percent =
        total === 0 ? 0.00 : Number((downloaded * 10000n) / total) / 100;
    const downloadedMB = Number(downloaded) / (1024 * 1024);
    const totalMB = Number(total) / (1024 * 1024);
    status = `Downloading data... ${percent.toFixed(2)}% (${downloadedMB.toFixed(2)} MB/${
        totalMB.toFixed(2)} MB)`;
    console.log(`here ${status}`)
  }

  statusElement.textContent = status;
  if (status === '') {
    statusElement.style.display = 'none';
    // statusElement.parentNode.removeChild(statusElement);

    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.remove('loading');
    });
  } else {
    statusElement.style.display = 'block';
    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.add('loading');
    });
  }
};
speedInput.oninput = function() {
  speedValue.innerHTML = this.value;
};

function updateUiForModelType() {
  const isZipVoice = ttsInstanceInfo.modelType === 4;
  const isPocketTts = ttsInstanceInfo.modelType === 5;
  const useGenerationConfig = isZipVoice || isPocketTts;
  speakerIdSection.classList.toggle('hidden', useGenerationConfig);
  referenceAudioSection.classList.toggle('hidden', !useGenerationConfig);
  referenceTextSection.classList.toggle('hidden', !isZipVoice);
}

function setGenerationStatus(status) {
  if (!generationStatusElement) {
    return;
  }

  generationStatusElement.textContent = status;
  generationStatusElement.style.display = status ? 'block' : 'none';
}

function getMonoSamples(audioBuffer) {
  if (audioBuffer.numberOfChannels === 1) {
    return new Float32Array(audioBuffer.getChannelData(0));
  }

  const samples = new Float32Array(audioBuffer.length);
  for (let c = 0; c < audioBuffer.numberOfChannels; ++c) {
    const channel = audioBuffer.getChannelData(c);
    for (let i = 0; i < channel.length; ++i) {
      samples[i] += channel[i];
    }
  }

  for (let i = 0; i < samples.length; ++i) {
    samples[i] /= audioBuffer.numberOfChannels;
  }

  return samples;
}

async function readReferenceAudio(file) {
  const arrayBuffer = await file.arrayBuffer();
  const ctx = new AudioContext();
  try {
    const audioBuffer = await ctx.decodeAudioData(arrayBuffer.slice(0));
    return {
      samples: getMonoSamples(audioBuffer),
      sampleRate: audioBuffer.sampleRate,
    };
  } finally {
    await ctx.close();
  }
}

function isWaveFile(file) {
  const name = file.name || '';
  return name.toLowerCase().endsWith('.wav');
}

function sanitizeFilename(name) {
  return name.replace(/[^a-zA-Z0-9._-]+/g, '-');
}

function downloadBlob(blob, filename) {
  const url = window.URL.createObjectURL(blob);
  const link = document.createElement('a');
  link.href = url;
  link.download = filename;
  document.body.appendChild(link);
  link.click();
  document.body.removeChild(link);
  window.URL.revokeObjectURL(url);
}

generateBtn.onclick = async function() {
  const isZipVoice = ttsInstanceInfo.modelType === 4;
  const isPocketTts = ttsInstanceInfo.modelType === 5;
  const useGenerationConfig = isZipVoice || isPocketTts;

  let speakerId = speakerIdInput.value;
  if (!useGenerationConfig) {
    if (speakerId.trim().length == 0) {
      alert('Please input a speakerId');
      return;
    }

    if (!speakerId.match(/^\d+$/)) {
      alert(`Input speakerID ${
          speakerId} is not a number.\nPlease enter a number between 0 and ${
          ttsInstanceInfo.numSpeakers - 1}`);
      return;
    }
    speakerId = parseInt(speakerId, 10);
    if (speakerId > ttsInstanceInfo.numSpeakers - 1) {
      alert(`Pleaser enter a number between 0 and ${ttsInstanceInfo.numSpeakers - 1}`);
      return;
    }
  }

  let text = textArea.value.trim();
  if (text.length == 0) {
    alert('Please input a non-blank text');
    return;
  }

  console.log('speakerId', speakerId);
  console.log('speed', speedInput.value);
  console.log('text', text);

  if (useGenerationConfig) {
    if (!referenceAudioInput.files || referenceAudioInput.files.length === 0) {
      alert('Please select a reference audio file');
      return;
    }

    const referenceFile = referenceAudioInput.files[0];
    if (!isWaveFile(referenceFile)) {
      alert('Please select a .wav reference audio file');
      return;
    }

    const referenceAudio = await readReferenceAudio(referenceFile);
    const genConfig = {
      speed: parseFloat(speedInput.value),
      referenceAudio: referenceAudio.samples,
      referenceSampleRate: referenceAudio.sampleRate,
      numSteps: isPocketTts ? 5 : 4,
    };

    if (isZipVoice) {
      const referenceText = referenceTextInput.value.trim();
      if (referenceText.length === 0) {
        alert('Please input the transcript of the reference audio');
        return;
      }

      genConfig.referenceText = referenceText;
      genConfig.extra = {
        min_char_in_sentence: 10,
      };
    }

    generateBtn.disabled = true;
    setGenerationStatus('Generating audio...');

    worker.postMessage({
      text,
      genConfig,
      type: "generateWithConfig",
    }, [genConfig.referenceAudio.buffer]);
    return;
  }

  worker.postMessage({
    text,
    sid: speakerId,
    speed: parseFloat(speedInput.value),
    type: "generate",
  });
};

function createAudioTag(generateAudio) {
  const blob = toWav(generateAudio.samples, generateAudio.sampleRate);

  const text = textArea.value.trim().substring(0, 100);
  const clipName = `${index} ${text} ...`;
  const filename = `${sanitizeFilename(clipName)}.wav`;
  index += 1;

  const clipContainer = document.createElement('article');
  const clipLabel = document.createElement('p');
  const audio = document.createElement('audio');
  const saveButton = document.createElement('button');
  const deleteButton = document.createElement('button');
  clipContainer.classList.add('clip');
  audio.setAttribute('controls', '');
  saveButton.textContent = 'Save';
  saveButton.className = 'save';
  deleteButton.textContent = 'Delete';
  deleteButton.className = 'delete';

  clipLabel.textContent = clipName;

  clipContainer.appendChild(audio);

  clipContainer.appendChild(clipLabel);
  clipContainer.appendChild(saveButton);
  clipContainer.appendChild(deleteButton);
  soundClips.appendChild(clipContainer);

  audio.controls = true;

  const audioURL = window.URL.createObjectURL(blob);
  audio.src = audioURL;

  saveButton.onclick = function() {
    downloadBlob(blob, filename);
  };

  deleteButton.onclick = function(e) {
    let evtTgt = e.target;
    evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
  };

  clipLabel.onclick = function() {
    const existingName = clipLabel.textContent;
    const newClipName = prompt('Enter a new name for your sound clip?');
    if (newClipName === null) {
      clipLabel.textContent = existingName;
    } else {
      clipLabel.textContent = newClipName;
    }
  };
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(floatSamples, sampleRate) {
  let samples = new Int16Array(floatSamples.length);
  for (let i = 0; i < samples.length; ++i) {
    let s = floatSamples[i];
    if (s >= 1)
      s = 1;
    else if (s <= -1)
      s = -1;

    samples[i] = s * 32767;
  }

  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, sampleRate, true);          // sampleRate
  view.setUint32(28, sampleRate * 2, true);      // byteRate
  view.setUint16(32, 2, true);                   // blockAlign
  view.setUint16(34, 16, true);                  // bitsPerSample
  view.setUint32(36, 0x61746164, true);          // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);  // subchunk2Size

  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }

  return new Blob([view], {type: 'audio/wav'});
}


================================================
FILE: wasm/tts/assets/.gitignore
================================================
*.onnx
*.txt
espeak-ng-data


================================================
FILE: wasm/tts/assets/README.md
================================================
# Introduction

Please refer to
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
to download a model.

The following is an example:
```bash
cd sherpa-onnx/wasm/tts/assets

wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
rm vits-piper-en_US-libritts_r-medium.tar.bz2
mv vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx ./model.onnx
mv vits-piper-en_US-libritts_r-medium/tokens.txt ./
mv vits-piper-en_US-libritts_r-medium/espeak-ng-data ./
rm -rf vits-piper-en_US-libritts_r-medium
```

ZipVoice example:

```bash
cd sherpa-onnx/wasm/tts/assets

wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
tar xf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2
rm sherpa-onnx-zipvoice-distill-int8-zh-en-emilia.tar.bz2

mv sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/encoder.int8.onnx ./
mv sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/decoder.int8.onnx ./
mv sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/tokens.txt ./
mv sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/lexicon.txt ./
mv sherpa-onnx-zipvoice-distill-int8-zh-en-emilia/espeak-ng-data ./
rm -rf sherpa-onnx-zipvoice-distill-int8-zh-en-emilia

wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos_24khz.onnx
```

PocketTTS example:

```bash
cd sherpa-onnx/wasm/tts/assets

wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
tar xf sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2
rm sherpa-onnx-pocket-tts-int8-2026-01-26.tar.bz2

mv sherpa-onnx-pocket-tts-int8-2026-01-26/lm_flow.int8.onnx ./
mv sherpa-onnx-pocket-tts-int8-2026-01-26/lm_main.int8.onnx ./
mv sherpa-onnx-pocket-tts-int8-2026-01-26/encoder.onnx ./
mv sherpa-onnx-pocket-tts-int8-2026-01-26/decoder.int8.onnx ./
mv sherpa-onnx-pocket-tts-int8-2026-01-26/text_conditioner.onnx ./
mv sherpa-onnx-pocket-tts-int8-2026-01-26/vocab.json ./
mv sherpa-onnx-pocket-tts-int8-2026-01-26/token_scores.json ./
rm -rf sherpa-onnx-pocket-tts-int8-2026-01-26
```

You should have the following files in `assets` before you can run
`build-wasm-simd-tts.sh`

```
assets fangjun$ tree -L 1
.
├── README.md
├── espeak-ng-data
├── mode.onnx
└── tokens.txt

1 directory, 3 files
```

You can find example build scripts at:

  - English TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-tts.yaml
  - German TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-de-tts.yaml


================================================
FILE: wasm/tts/index.html
================================================
<html lang="en">

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
  <style>
    h1,div {
      text-align: center;
    }
    textarea {
      width:100%;
    }
    .loading {
      display: none !important;
    }
    .hidden {
      display: none !important;
    }
  </style>
</head>

<body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
  <h1>
    Next-gen Kaldi + WebAssembly<br/>
    Text-to-speech Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a>
  </h1>

  <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
    <div id="status">Loading...</div>

    <div id="singleAudioContent" class="tab-content loading">
      <div id="speakerIdSection">
        <label for="speakerId" id="speakerIdLabel">Speaker ID: </label>
        <input type="text" id="speakerId" name="speakerId" value="0" />
        <br/>
        <br/>
      </div>
      <div id="referenceAudioSection" class="hidden">
        <label for="referenceAudio">Reference audio (.wav): </label>
        <input type="file" id="referenceAudio" name="referenceAudio" accept=".wav,audio/wav" />
        <div style="font-size: 0.9rem; color: #6c757d;">Only `.wav` files are supported.</div>
        <br/>
        <br/>
      </div>
      <div id="referenceTextSection" class="hidden">
        <label for="referenceText">Reference transcript (must match the reference audio): </label>
        <br/>
        <textarea id="referenceText" rows="3" placeholder="Please enter the transcript of the reference audio exactly"></textarea>
        <br/>
        <br/>
      </div>
      <label for="speed" id="speedLabel">Speed: </label>
      <input type="range" id="speed" name="speed" min="0.4" max="3.5" step="0.1" value="1.0" />
      <span id="speedValue"></span>
      <br/>
      <br/>
      <textarea id="text" rows="10" placeholder="Please enter your text here and click the Generate button"></textarea>
      <br/>
      <br/>
      <button id="generateBtn" disabled>Generate</button>
      <div id="generationStatus" style="display: none; margin-top: 0.75rem; font-size: 0.95rem; color: #6c757d;"></div>
    </div>

    <section flex="1" overflow="auto" id="sound-clips">
    </section>
  </div>

  <!-- Footer Section -->
  <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
    <h3>Description</h3>
    <ul>
      <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
      <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
        <ul>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
        </ul>
    </ul>
    <h3>About This Demo</h3>
    <ul>
      <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
      <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
    </ul>
    <h3>Latest Update</h3>
    <ul>
      <li>Update UI.</li>
      <li>First working version.</li>
    </ul>

    <h3>Acknowledgement</h3>
    <ul>
      <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
    </ul>
  </div>


  <script src="app-tts.js"></script>
</body>


================================================
FILE: wasm/tts/sherpa-onnx-tts.js
================================================

function freeConfig(config, Module) {
  if ('buffer' in config) {
    Module._free(config.buffer);
  }

  if ('config' in config) {
    freeConfig(config.config, Module)
  }

  if ('matcha' in config) {
    freeConfig(config.matcha, Module)
  }

  if ('kokoro' in config) {
    freeConfig(config.kokoro, Module)
  }

  if ('kitten' in config) {
    freeConfig(config.kitten, Module)
  }

  if ('zipvoice' in config) {
    freeConfig(config.zipvoice, Module)
  }

  if ('pocket' in config) {
    freeConfig(config.pocket, Module)
  }

  if ('supertonic' in config) {
    freeConfig(config.supertonic, Module)
  }

  if (config.ptr) {
    Module._free(config.ptr);
  }
}

// The user should free the returned pointers
function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
  const dictDir = ''
  const dictDirLen = Module.lengthBytesUTF8(dictDir) + 1;

  const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen;

  const buffer = Module._malloc(n);

  const len = 8 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
  offset += modelLen;

  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
  offset += lexiconLen;

  Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
  offset += tokensLen;

  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
  offset += dataDirLen;

  Module.stringToUTF8(dictDir, buffer + offset, dictDirLen);
  offset += dictDirLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += modelLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += lexiconLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');
  offset += tokensLen;

  Module.setValue(ptr + 12, buffer + offset, 'i8*');
  offset += dataDirLen;

  Module.setValue(ptr + 16, config.noiseScale || 0.667, 'float');
  Module.setValue(ptr + 20, config.noiseScaleW || 0.8, 'float');
  Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float');
  Module.setValue(ptr + 28, buffer + offset, 'i8*');
  offset += dictDirLen;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineTtsMatchaModelConfig(config, Module) {
  const acousticModelLen = Module.lengthBytesUTF8(config.acousticModel) + 1;
  const vocoderLen = Module.lengthBytesUTF8(config.vocoder) + 1;
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;

  const dictDir = '';
  const dictDirLen = Module.lengthBytesUTF8(dictDir) + 1;

  const n = acousticModelLen + vocoderLen + lexiconLen + tokensLen +
      dataDirLen + dictDirLen;

  const buffer = Module._malloc(n);

  const len = 8 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(
      config.acousticModel || '', buffer + offset, acousticModelLen);
  offset += acousticModelLen;

  Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen);
  offset += vocoderLen;

  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
  offset += lexiconLen;

  Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
  offset += tokensLen;

  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
  offset += dataDirLen;

  Module.stringToUTF8(dictDir, buffer + offset, dictDirLen);
  offset += dictDirLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += acousticModelLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += vocoderLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');
  offset += lexiconLen;

  Module.setValue(ptr + 12, buffer + offset, 'i8*');
  offset += tokensLen;

  Module.setValue(ptr + 16, buffer + offset, 'i8*');
  offset += dataDirLen;

  Module.setValue(ptr + 20, config.noiseScale || 0.667, 'float');
  Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float');
  Module.setValue(ptr + 28, buffer + offset, 'i8*');
  offset += dictDirLen;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
  const modelLen = Module.lengthBytesUTF8(config.model) + 1;
  const voicesLen = Module.lengthBytesUTF8(config.voices) + 1;
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
  const dictDir = '';
  const dictDirLen = Module.lengthBytesUTF8(dictDir) + 1;
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
  const langLen = Module.lengthBytesUTF8(config.lang || '') + 1;

  const n = modelLen + voicesLen + tokensLen + dataDirLen + dictDirLen +
      lexiconLen + langLen;

  const buffer = Module._malloc(n);

  const len = 8 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
  offset += modelLen;

  Module.stringToUTF8(config.voices || '', buffer + offset, voicesLen);
  offset += voicesLen;

  Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
  offset += tokensLen;

  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
  offset += dataDirLen;

  Module.stringToUTF8(dictDir, buffer + offset, dictDirLen);
  offset += dictDirLen;

  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
  offset += lexiconLen;

  Module.stringToUTF8(config.lang || '', buffer + offset, langLen);
  offset += langLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += modelLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += voicesLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');
  offset += tokensLen;

  Module.setValue(ptr + 12, buffer + offset, 'i8*');
  offset += dataDirLen;

  Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float');

  Module.setValue(ptr + 20, buffer + offset, 'i8*');
  offset += dictDirLen;

  Module.setValue(ptr + 24, buffer + offset, 'i8*');
  offset += lexiconLen;

  Module.setValue(ptr + 28, buffer + offset, 'i8*');
  offset += langLen;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineTtsKittenModelConfig(config, Module) {
  const modelLen = Module.lengthBytesUTF8(config.model) + 1;
  const voicesLen = Module.lengthBytesUTF8(config.voices) + 1;
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;

  const n = modelLen + voicesLen + tokensLen + dataDirLen;

  const buffer = Module._malloc(n);

  const len = 5 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.model || '', buffer + offset, modelLen);
  offset += modelLen;

  Module.stringToUTF8(config.voices || '', buffer + offset, voicesLen);
  offset += voicesLen;

  Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
  offset += tokensLen;

  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
  offset += dataDirLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += modelLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += voicesLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');
  offset += tokensLen;

  Module.setValue(ptr + 12, buffer + offset, 'i8*');
  offset += dataDirLen;

  Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineTtsZipVoiceModelConfig(config, Module) {
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  const vocoderLen = Module.lengthBytesUTF8(config.vocoder || '') + 1;
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;

  const n = tokensLen + encoderLen + decoderLen + vocoderLen + dataDirLen +
      lexiconLen;

  const buffer = Module._malloc(n);

  const len = 10 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
  offset += tokensLen;

  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
  offset += decoderLen;

  Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen);
  offset += vocoderLen;

  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
  offset += dataDirLen;

  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
  offset += lexiconLen;

  offset = 0;
  Module.setValue(ptr, buffer + offset, 'i8*');
  offset += tokensLen;

  Module.setValue(ptr + 4, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 8, buffer + offset, 'i8*');
  offset += decoderLen;

  Module.setValue(ptr + 12, buffer + offset, 'i8*');
  offset += vocoderLen;

  Module.setValue(ptr + 16, buffer + offset, 'i8*');
  offset += dataDirLen;

  Module.setValue(ptr + 20, buffer + offset, 'i8*');
  offset += lexiconLen;

  Module.setValue(ptr + 24, config.featScale || 0.1, 'float');
  Module.setValue(ptr + 28, config.tShift || 0.5, 'float');
  Module.setValue(ptr + 32, config.targetRMS || 0.1, 'float');
  Module.setValue(ptr + 36, config.guidanceScale || 1.0, 'float');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineTtsPocketModelConfig(config, Module) {
  const lmFlowLen = Module.lengthBytesUTF8(config.lmFlow || '') + 1;
  const lmMainLen = Module.lengthBytesUTF8(config.lmMain || '') + 1;
  const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
  const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
  const textConditionerLen =
      Module.lengthBytesUTF8(config.textConditioner || '') + 1;
  const vocabJsonLen = Module.lengthBytesUTF8(config.vocabJson || '') + 1;
  const tokenScoresJsonLen =
      Module.lengthBytesUTF8(config.tokenScoresJson || '') + 1;


  const n = lmFlowLen + lmMainLen + encoderLen + decoderLen +
      textConditionerLen + vocabJsonLen + tokenScoresJsonLen;

  const buffer = Module._malloc(n);

  const len = 8 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(config.lmFlow || '', buffer + offset, lmFlowLen);
  offset += lmFlowLen;

  Module.stringToUTF8(config.lmMain || '', buffer + offset, lmMainLen);
  offset += lmMainLen;

  Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
  offset += encoderLen;

  Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
  offset += decoderLen;

  Module.stringToUTF8(
      config.textConditioner || '', buffer + offset, textConditionerLen);
  offset += textConditionerLen;

  Module.stringToUTF8(config.vocabJson || '', buffer + offset, vocabJsonLen);
  offset += vocabJsonLen;

  Module.stringToUTF8(
      config.tokenScoresJson || '', buffer + offset, tokenScoresJsonLen);
  offset += tokenScoresJsonLen;

  offset = 0;
  Module.setValue(ptr + 0 * 4, buffer + offset, 'i8*');
  offset += lmFlowLen;

  Module.setValue(ptr + 1 * 4, buffer + offset, 'i8*');
  offset += lmMainLen;

  Module.setValue(ptr + 2 * 4, buffer + offset, 'i8*');
  offset += encoderLen;

  Module.setValue(ptr + 3 * 4, buffer + offset, 'i8*');
  offset += decoderLen;

  Module.setValue(ptr + 4 * 4, buffer + offset, 'i8*');
  offset += textConditionerLen;

  Module.setValue(ptr + 5 * 4, buffer + offset, 'i8*');
  offset += vocabJsonLen;

  Module.setValue(ptr + 6 * 4, buffer + offset, 'i8*');
  offset += tokenScoresJsonLen;

  Module.setValue(
      ptr + 7 * 4,
      config.voiceEmbeddingCacheCapacity !== undefined ?
          config.voiceEmbeddingCacheCapacity :
          50,
      'i32');

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineTtsSupertonicModelConfig(config, Module) {
  const durationPredictorLen =
      Module.lengthBytesUTF8(config.durationPredictor || '') + 1;
  const textEncoderLen = Module.lengthBytesUTF8(config.textEncoder || '') + 1;
  const vectorEstimatorLen =
      Module.lengthBytesUTF8(config.vectorEstimator || '') + 1;
  const vocoderLen = Module.lengthBytesUTF8(config.vocoder || '') + 1;
  const ttsJsonLen = Module.lengthBytesUTF8(config.ttsJson || '') + 1;
  const unicodeIndexerLen =
      Module.lengthBytesUTF8(config.unicodeIndexer || '') + 1;
  const voiceStyleLen = Module.lengthBytesUTF8(config.voiceStyle || '') + 1;

  const n = durationPredictorLen + textEncoderLen + vectorEstimatorLen +
      vocoderLen + ttsJsonLen + unicodeIndexerLen + voiceStyleLen;

  const buffer = Module._malloc(n);

  const len = 7 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module.stringToUTF8(
      config.durationPredictor || '', buffer + offset, durationPredictorLen);
  offset += durationPredictorLen;

  Module.stringToUTF8(
      config.textEncoder || '', buffer + offset, textEncoderLen);
  offset += textEncoderLen;

  Module.stringToUTF8(
      config.vectorEstimator || '', buffer + offset, vectorEstimatorLen);
  offset += vectorEstimatorLen;

  Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen);
  offset += vocoderLen;

  Module.stringToUTF8(config.ttsJson || '', buffer + offset, ttsJsonLen);
  offset += ttsJsonLen;

  Module.stringToUTF8(
      config.unicodeIndexer || '', buffer + offset, unicodeIndexerLen);
  offset += unicodeIndexerLen;

  Module.stringToUTF8(config.voiceStyle || '', buffer + offset, voiceStyleLen);
  offset += voiceStyleLen;

  offset = 0;
  Module.setValue(ptr + 0 * 4, buffer + offset, 'i8*');
  offset += durationPredictorLen;

  Module.setValue(ptr + 1 * 4, buffer + offset, 'i8*');
  offset += textEncoderLen;

  Module.setValue(ptr + 2 * 4, buffer + offset, 'i8*');
  offset += vectorEstimatorLen;

  Module.setValue(ptr + 3 * 4, buffer + offset, 'i8*');
  offset += vocoderLen;

  Module.setValue(ptr + 4 * 4, buffer + offset, 'i8*');
  offset += ttsJsonLen;

  Module.setValue(ptr + 5 * 4, buffer + offset, 'i8*');
  offset += unicodeIndexerLen;

  Module.setValue(ptr + 6 * 4, buffer + offset, 'i8*');
  offset += voiceStyleLen;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
  if (!('offlineTtsVitsModelConfig' in config)) {
    config.offlineTtsVitsModelConfig = {
      model: '',
      lexicon: '',
      tokens: '',
      noiseScale: 0.667,
      noiseScaleW: 0.8,
      lengthScale: 1.0,
      dataDir: '',
    };
  }

  if (!('offlineTtsMatchaModelConfig' in config)) {
    config.offlineTtsMatchaModelConfig = {
      acousticModel: '',
      vocoder: '',
      lexicon: '',
      tokens: '',
      noiseScale: 0.667,
      lengthScale: 1.0,
      dataDir: '',
    };
  }

  if (!('offlineTtsKokoroModelConfig' in config)) {
    config.offlineTtsKokoroModelConfig = {
      model: '',
      voices: '',
      tokens: '',
      lengthScale: 1.0,
      dataDir: '',
      lexicon: '',
      lang: '',
    };
  }

  if (!('offlineTtsKittenModelConfig' in config)) {
    config.offlineTtsKittenModelConfig = {
      model: '',
      voices: '',
      tokens: '',
      lengthScale: 1.0,
    };
  }

  if (!('offlineTtsZipVoiceModelConfig' in config)) {
    config.offlineTtsZipVoiceModelConfig = {
      tokens: '',
      encoder: '',
      decoder: '',
      vocoder: '',
      dataDir: '',
      lexicon: '',
      featScale: 0.1,
      tShift: 0.5,
      targetRMS: 0.1,
      guidanceScale: 1.0,
    };
  }

  if (!('offlineTtsPocketModelConfig' in config)) {
    config.offlineTtsPocketModelConfig = {
      lmFlow: '',
      lmMain: '',
      encoder: '',
      decoder: '',
      textConditioner: '',
      vocabJson: '',
      tokenScoresJson: '',
      voiceEmbeddingCacheCapacity: 50,
    };
  }

  if (!('offlineTtsSupertonicModelConfig' in config)) {
    config.offlineTtsSupertonicModelConfig = {
      durationPredictor: '',
      textEncoder: '',
      vectorEstimator: '',
      vocoder: '',
      ttsJson: '',
      unicodeIndexer: '',
      voiceStyle: '',
    };
  }

  const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
      config.offlineTtsVitsModelConfig, Module);

  const matchaModelConfig = initSherpaOnnxOfflineTtsMatchaModelConfig(
      config.offlineTtsMatchaModelConfig, Module);

  const kokoroModelConfig = initSherpaOnnxOfflineTtsKokoroModelConfig(
      config.offlineTtsKokoroModelConfig, Module);

  const kittenModelConfig = initSherpaOnnxOfflineTtsKittenModelConfig(
      config.offlineTtsKittenModelConfig, Module);

  const zipVoiceModelConfig = initSherpaOnnxOfflineTtsZipVoiceModelConfig(
      config.offlineTtsZipVoiceModelConfig, Module);

  const pocketModelConfig = initSherpaOnnxOfflineTtsPocketModelConfig(
      config.offlineTtsPocketModelConfig, Module);

  const supertonicModelConfig = initSherpaOnnxOfflineTtsSupertonicModelConfig(
      config.offlineTtsSupertonicModelConfig, Module);

  const len = vitsModelConfig.len + matchaModelConfig.len +
      kokoroModelConfig.len + kittenModelConfig.len + zipVoiceModelConfig.len +
      pocketModelConfig.len + supertonicModelConfig.len + 3 * 4;

  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
  offset += vitsModelConfig.len;

  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.debug || 0, 'i32');
  offset += 4;

  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  const buffer = Module._malloc(providerLen);
  Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
  Module.setValue(ptr + offset, buffer, 'i8*');
  offset += 4;

  Module._CopyHeap(matchaModelConfig.ptr, matchaModelConfig.len, ptr + offset);
  offset += matchaModelConfig.len;

  Module._CopyHeap(kokoroModelConfig.ptr, kokoroModelConfig.len, ptr + offset);
  offset += kokoroModelConfig.len;

  Module._CopyHeap(kittenModelConfig.ptr, kittenModelConfig.len, ptr + offset);
  offset += kittenModelConfig.len;

  Module._CopyHeap(
      zipVoiceModelConfig.ptr, zipVoiceModelConfig.len, ptr + offset);
  offset += zipVoiceModelConfig.len;

  Module._CopyHeap(pocketModelConfig.ptr, pocketModelConfig.len, ptr + offset);
  offset += pocketModelConfig.len;

  Module._CopyHeap(
      supertonicModelConfig.ptr, supertonicModelConfig.len, ptr + offset);
  offset += supertonicModelConfig.len;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    config: vitsModelConfig,
    matcha: matchaModelConfig,
    kokoro: kokoroModelConfig,
    kitten: kittenModelConfig,
    zipvoice: zipVoiceModelConfig,
    pocket: pocketModelConfig,
    supertonic: supertonicModelConfig,
  };
}

function initSherpaOnnxOfflineTtsConfig(config, Module) {
  const modelConfig =
      initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module);
  const len = modelConfig.len + 4 * 4;
  const ptr = Module._malloc(len);

  let offset = 0;
  Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
  offset += modelConfig.len;

  const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
  const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;

  const buffer = Module._malloc(ruleFstsLen + ruleFarsLen);
  Module.stringToUTF8(config.ruleFsts || '', buffer, ruleFstsLen);
  Module.stringToUTF8(config.ruleFars || '', buffer + ruleFstsLen, ruleFarsLen);

  Module.setValue(ptr + offset, buffer, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.maxNumSentences || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.silenceScale || 0.2, 'float');
  offset += 4;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    config: modelConfig,
  };
}

/*
const genConfig = {
  silenceScale: 0.2,
  speed: 1.0,
  sid: 1,
  referenceAudio: myFloat32Array, // optional
  referenceSampleRate: 16000, // used if referenceAudio is required
  referenceText: "Hello world", // optional
  numSteps: 5, // optional
  extra: { bar: "ok", foo: 0.8, foobar: 10}
};

 */

// Allocate a SherpaOnnxGenerationConfig in WASM
function initSherpaOnnxGenerationConfig(config, Module) {
  const len = 9 * 4;
  const ptr = Module._malloc(len);

  // float silence_scale
  Module.setValue(ptr + 0 * 4, config.silenceScale || 0.2, 'float');

  // float speed
  Module.setValue(ptr + 1 * 4, config.speed || 1.0, 'float');

  // int32_t sid
  Module.setValue(ptr + 2 * 4, config.sid || 0, 'i32');

  // const float* reference_audio
  let referenceAudioPtr = 0;
  if (config.referenceAudio && config.referenceAudio.length > 0) {
    referenceAudioPtr = Module._malloc(config.referenceAudio.length * 4);
    Module.HEAPF32.set(config.referenceAudio, referenceAudioPtr / 4);
  }
  Module.setValue(ptr + 3 * 4, referenceAudioPtr, 'i8*');

  // int32_t reference_audio_len
  Module.setValue(
      ptr + 4 * 4, config.referenceAudio ? config.referenceAudio.length : 0,
      'i32');

  // int32_t reference_sample_rate
  Module.setValue(ptr + 5 * 4, config.referenceSampleRate || 0, 'i32');

  // const char* reference_text
  let referenceTextPtr = 0;
  if (config.referenceText) {
    const textLen = Module.lengthBytesUTF8(config.referenceText) + 1;
    referenceTextPtr = Module._malloc(textLen);
    Module.stringToUTF8(config.referenceText, referenceTextPtr, textLen);
  }
  Module.setValue(ptr + 6 * 4, referenceTextPtr, 'i8*');

  // int32_t num_steps
  Module.setValue(ptr + 7 * 4, config.numSteps || 5, 'i32');

  // const char* extra (JSON string)
  let extraPtr = 0;
  let extraStr = null;

  if (config.extra) {
    if (typeof config.extra === 'object') {
      extraStr = JSON.stringify(config.extra);
    } else if (typeof config.extra === 'string') {
      extraStr = config.extra;
    }
  }

  if (extraStr !== null) {
    const extraLen = Module.lengthBytesUTF8(extraStr) + 1;
    extraPtr = Module._malloc(extraLen);
    Module.stringToUTF8(extraStr, extraPtr, extraLen);
  }

  Module.setValue(ptr + 8 * 4, extraPtr, 'i8*');

  return {
    ptr,
    referenceAudioPtr,
    referenceTextPtr,
    extraPtr,
  };
}


// Free the memory allocated for a SherpaOnnxGenerationConfig
function freeSherpaOnnxGenerationConfig(cfg, Module) {
  if (!cfg) return;

  if (cfg.referenceAudioPtr) Module._free(cfg.referenceAudioPtr);
  if (cfg.referenceTextPtr) Module._free(cfg.referenceTextPtr);
  if (cfg.extraPtr) Module._free(cfg.extraPtr);
  if (cfg.ptr) Module._free(cfg.ptr);
}


class OfflineTts {
  constructor(configObj, Module) {
    const config = initSherpaOnnxOfflineTtsConfig(configObj, Module)
    const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr);

    freeConfig(config, Module);

    this.handle = handle;
    this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle);
    this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle);
    this.Module = Module
  }

  free() {
    if (!this.handle) return;

    this.Module._SherpaOnnxDestroyOfflineTts(this.handle);
    this.handle = 0
  }

  // {
  //   text: "hello",
  //   sid: 1,
  //   speed: 1.0
  // }
  generate(config) {
    if (!this.handle) {
      throw new Error('OfflineTts has been freed');
    }

    if (!config || !config.text) {
      throw new Error('config.text is required');
    }

    const textLen = this.Module.lengthBytesUTF8(config.text) + 1;
    const textPtr = this.Module._malloc(textLen);
    this.Module.stringToUTF8(config.text, textPtr, textLen);

    const h = this.Module._SherpaOnnxOfflineTtsGenerate(
        this.handle, textPtr, config.sid ?? 0, config.speed ?? 1.0);

    this.Module._free(textPtr);

    if (!h) {
      throw new Error('TTS generation failed');
    }

    const base = h / 4;

    const samplesPtr = this.Module.HEAPU32[base];
    const numSamples = this.Module.HEAP32[base + 1];
    const sampleRate = this.Module.HEAP32[base + 2];

    const heapSamples = this.Module.HEAPF32.subarray(
        samplesPtr / 4, samplesPtr / 4 + numSamples);

    const samples = new Float32Array(heapSamples);

    this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
    return {samples: samples, sampleRate: sampleRate};
  }

  generateWithConfig(text, genConfig) {
    if (!this.handle) {
      throw new Error('OfflineTts has been freed');
    }

    const cfgWasm = initSherpaOnnxGenerationConfig(genConfig, this.Module);

    const textLen = this.Module.lengthBytesUTF8(text) + 1;
    const textPtr = this.Module._malloc(textLen);
    this.Module.stringToUTF8(text, textPtr, textLen);

    let callbackPtr = 0;
    if (genConfig.callback) {
      callbackPtr = this.Module.addFunction((samplesPtr, n, progress, arg) => {
        const heapSamples =
            this.Module.HEAPF32.subarray(samplesPtr / 4, samplesPtr / 4 + n);
        const samples = new Float32Array(heapSamples);
        return genConfig.callback(samples, n, progress, arg);
      }, 'iiifi');
    }

    let audioPtr = 0;
    try {
      audioPtr = this.Module._SherpaOnnxOfflineTtsGenerateWithConfig(
          this.handle, textPtr, cfgWasm.ptr, callbackPtr, 0);
    } finally {
      this.Module._free(textPtr);
      freeSherpaOnnxGenerationConfig(cfgWasm, this.Module);
      if (callbackPtr) {
        this.Module.removeFunction(callbackPtr);
      }
    }

    if (!audioPtr) {
      throw new Error('Failed to generate audio');
    }

    const base = audioPtr / 4;

    const samplesPtr = this.Module.HEAPU32[base];     // float* samples
    const numSamples = this.Module.HEAP32[base + 1];  // int32 num_samples
    const sampleRate = this.Module.HEAP32[base + 2];  // int32 sample_rate

    const heapSamples = this.Module.HEAPF32.subarray(
        samplesPtr / 4, samplesPtr / 4 + numSamples);
    const samples = new Float32Array(heapSamples);

    this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(audioPtr);

    return {samples, sampleRate};
  }

  save(filename, audio) {
    const samples = audio.samples;
    const sampleRate = audio.sampleRate;
    const ptr = this.Module._malloc(samples.length * 4);

    this.Module.HEAPF32.set(samples, ptr / 4);

    const filenameLen = this.Module.lengthBytesUTF8(filename) + 1;
    const buffer = this.Module._malloc(filenameLen);
    this.Module.stringToUTF8(filename, buffer, filenameLen);
    this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer);
    this.Module._free(buffer);
    this.Module._free(ptr);
  }
}

let modelType = 0;

function getDefaultOfflineTtsModelType() {
  return modelType;
}

function createOfflineTts(Module, myConfig) {
  const vits = {
    model: '',
    lexicon: '',
    tokens: '',
    dataDir: '',
    noiseScale: 0.667,
    noiseScaleW: 0.8,
    lengthScale: 1.0,
  };

  const matcha = {
    acousticModel: '',
    vocoder: '',
    lexicon: '',
    tokens: '',
    dataDir: '',
    noiseScale: 0.667,
    lengthScale: 1.0,
  };

  const offlineTtsKokoroModelConfig = {
    model: '',
    voices: '',
    tokens: '',
    dataDir: '',
    lengthScale: 1.0,
    lexicon: '',
    lang: '',
  };

  const offlineTtsKittenModelConfig = {
    model: '',
    voices: '',
    tokens: '',
    dataDir: '',
    lengthScale: 1.0,
  };

  const offlineTtsZipVoiceModelConfig = {
    tokens: '',
    encoder: '',
    decoder: '',
    vocoder: '',
    dataDir: '',
    lexicon: '',
    featScale: 0.1,
    tShift: 0.5,
    targetRMS: 0.1,
    guidanceScale: 1.0,
  };

  const offlineTtsPocketModelConfig = {
    lmFlow: '',
    lmMain: '',
    encoder: '',
    decoder: '',
    textConditioner: '',
    vocabJson: '',
    tokenScoresJson: '',
    voiceEmbeddingCacheCapacity: 50,
  };

  let ruleFsts = '';

  switch (modelType) {
    case 0:
      // vits
      vits.model = './model.onnx';
      vits.tokens = './tokens.txt';
      vits.dataDir = './espeak-ng-data';
      break;
    case 1:
      // matcha zh-en
      // https://k2-fsa.github.io/sherpa/onnx/tts/all/Chinese-English/matcha-icefall-zh-en.html
      matcha.acousticModel = './model-steps-3.onnx';
      matcha.vocoder = './vocos-16khz-univ.onnx';
      matcha.lexicon = './lexicon.txt';
      matcha.tokens = './tokens.txt';
      matcha.dataDir = './espeak-ng-data';
      ruleFsts = './phone-zh.fst,./date-zh.fst,./number-zh.fst';
      break;
    case 2:
      // matcha zh
      // https://k2-fsa.github.io/sherpa/onnx/tts/all/Chinese/matcha-icefall-zh-baker.html
      matcha.acousticModel = './model-steps-3.onnx';
      matcha.vocoder = './vocos-22khz-univ.onnx';
      matcha.lexicon = './lexicon.txt';
      matcha.tokens = './tokens.txt';
      ruleFsts = './phone.fst,./date.fst,./number.fst';
      break;
    case 3:
      // matcha en
      // https://k2-fsa.github.io/sherpa/onnx/tts/all/English/matcha-icefall-en_US-ljspeech.html
      matcha.acousticModel = './model-steps-3.onnx';
      matcha.vocoder = './vocos-22khz-univ.onnx';
      matcha.tokens = './tokens.txt';
      matcha.dataDir = './espeak-ng-data';
      break;
    case 4:
      // zipvoice zh-en
      // https://k2-fsa.github.io/sherpa/onnx/tts/zipvoice.html
      offlineTtsZipVoiceModelConfig.tokens = './tokens.txt';
      offlineTtsZipVoiceModelConfig.encoder = './encoder.int8.onnx';
      offlineTtsZipVoiceModelConfig.decoder = './decoder.int8.onnx';
      offlineTtsZipVoiceModelConfig.vocoder = './vocos_24khz.onnx';
      offlineTtsZipVoiceModelConfig.dataDir = './espeak-ng-data';
      offlineTtsZipVoiceModelConfig.lexicon = './lexicon.txt';
      break;
    case 5:
      // pocket tts
      // https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
      offlineTtsPocketModelConfig.lmFlow = './lm_flow.int8.onnx';
      offlineTtsPocketModelConfig.lmMain = './lm_main.int8.onnx';
      offlineTtsPocketModelConfig.encoder = './encoder.onnx';
      offlineTtsPocketModelConfig.decoder = './decoder.int8.onnx';
      offlineTtsPocketModelConfig.textConditioner = './text_conditioner.onnx';
      offlineTtsPocketModelConfig.vocabJson = './vocab.json';
      offlineTtsPocketModelConfig.tokenScoresJson = './token_scores.json';
      break;
  }

  const offlineTtsModelConfig = {
    offlineTtsVitsModelConfig: vits,
    offlineTtsMatchaModelConfig: matcha,
    offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
    offlineTtsKittenModelConfig: offlineTtsKittenModelConfig,
    offlineTtsZipVoiceModelConfig: offlineTtsZipVoiceModelConfig,
    offlineTtsPocketModelConfig: offlineTtsPocketModelConfig,
    numThreads: 1,
    debug: 1,
    provider: 'cpu',
  };

  let offlineTtsConfig = {
    offlineTtsModelConfig: offlineTtsModelConfig,
    ruleFsts: ruleFsts,
    ruleFars: '',
    maxNumSentences: 1,
  }

  if (myConfig) {
    offlineTtsConfig = myConfig;
  }

  return new OfflineTts(offlineTtsConfig, Module);
}

if (typeof process == 'object' && typeof process.versions == 'object' &&
    typeof process.versions.node == 'string') {
  module.exports = {
    createOfflineTts,
    getDefaultOfflineTtsModelType,
  };
}


================================================
FILE: wasm/tts/sherpa-onnx-tts.worker.js
================================================
let tts = null;
self.Module = {
  // https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  locateFile: function (path, scriptDirectory = "") {
    return scriptDirectory + path;
  },
  // https://emscripten.org/docs/api_reference/module.html#Module.locateFile
  setStatus: function (status) {
    self.postMessage({ type: "sherpa-onnx-tts-progress", status });
  },
  onRuntimeInitialized: function () {
    console.log("Model files downloaded!");
    console.log("Initializing tts ......");
    try {
      tts = createOfflineTts(self.Module);
      self.postMessage({
        type: "sherpa-onnx-tts-ready",
        modelType: getDefaultOfflineTtsModelType(),
        numSpeakers: tts.numSpeakers,
      });
    } catch (e) {
      self.postMessage({
        type: "error",
        message: "TTS Initialization failed: " + e.message,
      });
    }
  },
};
importScripts("sherpa-onnx-wasm-main-tts.js");
importScripts("sherpa-onnx-tts.js");

function getErrorMessage(err) {
  if (err instanceof Error) {
    if (err.stack) {
      return `${err.message}\n${err.stack}`;
    }
    return err.message;
  }

  return `${err}`;
}

self.onmessage = async (e) => {
  const { type, text, sid, speed, genConfig } = e.data;
  if (type === "generate") {
    if (!tts) {
      return;
    }
    try {
      const audio = tts.generate({
        text: text,
        sid: sid || 0,
        speed: speed || 1.0,
      });
      const samples = audio.samples;
      const sampleRate = tts.sampleRate;
      self.postMessage(
        {
          type: "sherpa-onnx-tts-result",
          samples: samples,
          sampleRate: sampleRate,
        },
        [samples.buffer],
      );
    } catch (err) {
      self.postMessage({
        type: "error",
        message: "Generation failed: " + getErrorMessage(err),
      });
    }
  } else if (type === "generateWithConfig") {
    if (!tts) {
      return;
    }
    try {
      const config = Object.assign({}, genConfig || {});
      config.callback = (samples, n, progress) => {
        self.postMessage({
          type: "sherpa-onnx-tts-generation-progress",
          progress: progress,
        });
        return 1;
      };

      const audio = tts.generateWithConfig(text, config);
      const samples = audio.samples;
      const sampleRate = audio.sampleRate;
      self.postMessage(
          {
            type: "sherpa-onnx-tts-result",
            samples: samples,
            sampleRate: sampleRate,
          },
          [samples.buffer],
      );
    } catch (err) {
      self.postMessage({
        type: "error",
        message: "Generation failed: " + getErrorMessage(err),
      });
    }
  }
};


================================================
FILE: wasm/tts/sherpa-onnx-wasm-main-tts.cc
================================================
// wasm/sherpa-onnx-wasm-main-tts.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

// see also
// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html

extern "C" {

static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 8 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTtsKittenModelConfig) == 5 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTtsZipvoiceModelConfig) == 10 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTtsPocketModelConfig) == 8 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTtsSupertonicModelConfig) == 7 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
                  sizeof(SherpaOnnxOfflineTtsVitsModelConfig) +
                      sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) +
                      sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) + 3 * 4 +
                      sizeof(SherpaOnnxOfflineTtsKittenModelConfig) +
                      sizeof(SherpaOnnxOfflineTtsZipvoiceModelConfig) +
                      sizeof(SherpaOnnxOfflineTtsPocketModelConfig) +
                      sizeof(SherpaOnnxOfflineTtsSupertonicModelConfig),
              "");

static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
                  sizeof(SherpaOnnxOfflineTtsModelConfig) + 4 * 4,
              "");

static_assert(sizeof(SherpaOnnxGenerationConfig) == 9 * 4, "");

void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
  auto tts_model_config = &tts_config->model;
  auto vits_model_config = &tts_model_config->vits;
  auto matcha_model_config = &tts_model_config->matcha;
  auto kokoro = &tts_model_config->kokoro;
  auto kitten = &tts_model_config->kitten;
  auto zipvoice = &tts_model_config->zipvoice;
  auto pocket = &tts_model_config->pocket;
  fprintf(stdout, "----------vits model config----------\n");
  fprintf(stdout, "model: %s\n", vits_model_config->model);
  fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
  fprintf(stdout, "tokens: %s\n", vits_model_config->tokens);
  fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir);
  fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
  fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
  fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
  fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir);

  fprintf(stdout, "----------matcha model config----------\n");
  fprintf(stdout, "acoustic_model: %s\n", matcha_model_config->acoustic_model);
  fprintf(stdout, "vocoder: %s\n", matcha_model_config->vocoder);
  fprintf(stdout, "lexicon: %s\n", matcha_model_config->lexicon);
  fprintf(stdout, "tokens: %s\n", matcha_model_config->tokens);
  fprintf(stdout, "data_dir: %s\n", matcha_model_config->data_dir);
  fprintf(stdout, "noise scale: %.3f\n", matcha_model_config->noise_scale);
  fprintf(stdout, "length scale: %.3f\n", matcha_model_config->length_scale);
  fprintf(stdout, "dict_dir: %s\n", matcha_model_config->dict_dir);

  fprintf(stdout, "----------kokoro model config----------\n");
  fprintf(stdout, "model: %s\n", kokoro->model);
  fprintf(stdout, "voices: %s\n", kokoro->voices);
  fprintf(stdout, "tokens: %s\n", kokoro->tokens);
  fprintf(stdout, "data_dir: %s\n", kokoro->data_dir);
  fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale);
  fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir);
  fprintf(stdout, "lexicon: %s\n", kokoro->lexicon);
  fprintf(stdout, "lang: %s\n", kokoro->lang);

  fprintf(stdout, "----------kitten model config----------\n");
  fprintf(stdout, "model: %s\n", kitten->model);
  fprintf(stdout, "voices: %s\n", kitten->voices);
  fprintf(stdout, "tokens: %s\n", kitten->tokens);
  fprintf(stdout, "data_dir: %s\n", kitten->data_dir);
  fprintf(stdout, "length scale: %.3f\n", kitten->length_scale);

  fprintf(stdout, "----------zipvoice model config----------\n");
  fprintf(stdout, "tokens: %s\n", zipvoice->tokens);
  fprintf(stdout, "encoder: %s\n", zipvoice->encoder);
  fprintf(stdout, "decoder: %s\n", zipvoice->decoder);
  fprintf(stdout, "vocoder: %s\n", zipvoice->vocoder);
  fprintf(stdout, "data_dir: %s\n", zipvoice->data_dir);
  fprintf(stdout, "lexicon: %s\n", zipvoice->lexicon);
  fprintf(stdout, "feat scale: %.3f\n", zipvoice->feat_scale);
  fprintf(stdout, "t_shift: %.3f\n", zipvoice->t_shift);
  fprintf(stdout, "target_rms: %.3f\n", zipvoice->target_rms);
  fprintf(stdout, "guidance_scale: %.3f\n", zipvoice->guidance_scale);

  fprintf(stdout, "----------pocketTTS model config----------\n");
  fprintf(stdout, "lm_flow: %s\n", pocket->lm_flow);
  fprintf(stdout, "lm_main: %s\n", pocket->lm_main);
  fprintf(stdout, "encoder: %s\n", pocket->encoder);
  fprintf(stdout, "decoder: %s\n", pocket->decoder);
  fprintf(stdout, "text_conditioner: %s\n", pocket->text_conditioner);
  fprintf(stdout, "vocab_json: %s\n", pocket->vocab_json);
  fprintf(stdout, "token_scores_json: %s\n", pocket->token_scores_json);
  fprintf(stdout, "voice_embedding_cache_capacity: %d\n",
          pocket->voice_embedding_cache_capacity);

  auto supertonic = &tts_model_config->supertonic;
  fprintf(stdout, "----------supertonic model config----------\n");
  fprintf(stdout, "duration_predictor: %s\n", supertonic->duration_predictor);
  fprintf(stdout, "text_encoder: %s\n", supertonic->text_encoder);
  fprintf(stdout, "vector_estimator: %s\n", supertonic->vector_estimator);
  fprintf(stdout, "vocoder: %s\n", supertonic->vocoder);
  fprintf(stdout, "tts_json: %s\n", supertonic->tts_json);
  fprintf(stdout, "unicode_indexer: %s\n", supertonic->unicode_indexer);
  fprintf(stdout, "voice_style: %s\n", supertonic->voice_style);

  fprintf(stdout, "----------tts model config----------\n");
  fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
  fprintf(stdout, "debug: %d\n", tts_model_config->debug);
  fprintf(stdout, "provider: %s\n", tts_model_config->provider);

  fprintf(stdout, "----------tts config----------\n");
  fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
  fprintf(stdout, "rule_fars: %s\n", tts_config->rule_fars);
  fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
  fprintf(stdout, "silence scale: %.3f\n", tts_config->silence_scale);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}


================================================
FILE: wasm/vad/CMakeLists.txt
================================================
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD")
endif()

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx" AND NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/ten-vad.onnx" )
  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
endif()

set(exported_functions
  MyPrint
  # VAD
  SherpaOnnxCreateCircularBuffer
  SherpaOnnxDestroyCircularBuffer
  SherpaOnnxCircularBufferPush
  SherpaOnnxCircularBufferGet
  SherpaOnnxCircularBufferFree
  SherpaOnnxCircularBufferPop
  SherpaOnnxCircularBufferSize
  SherpaOnnxCircularBufferHead
  SherpaOnnxCircularBufferReset
  SherpaOnnxCreateVoiceActivityDetector
  SherpaOnnxDestroyVoiceActivityDetector
  SherpaOnnxVoiceActivityDetectorAcceptWaveform
  SherpaOnnxVoiceActivityDetectorEmpty
  SherpaOnnxVoiceActivityDetectorDetected
  SherpaOnnxVoiceActivityDetectorPop
  SherpaOnnxVoiceActivityDetectorClear
  SherpaOnnxVoiceActivityDetectorFront
  SherpaOnnxDestroySpeechSegment
  SherpaOnnxVoiceActivityDetectorReset
  SherpaOnnxVoiceActivityDetectorFlush
  #
  SherpaOnnxFileExists
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
  list(APPEND mangled_exported_functions "_${x}")
endforeach()
list(JOIN mangled_exported_functions "," all_exported_functions)

include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=64MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','HEAPU8','HEAP16','HEAP32','HEAPU32','HEAPF32','HEAPF64'] ")

message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${MY_FLAGS}")

if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  message(FATAL_ERROR "The default suffix for building executables should be .js!")
endif()
# set(CMAKE_EXECUTABLE_SUFFIX ".html")

add_executable(sherpa-onnx-wasm-main-vad sherpa-onnx-wasm-main-vad.cc)
target_link_libraries(sherpa-onnx-wasm-main-vad sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-main-vad DESTINATION bin/wasm/vad)

install(
  FILES
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.js"
    "index.html"
    "sherpa-onnx-vad.js"
    "app-vad.js"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.wasm"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad>/sherpa-onnx-wasm-main-vad.data"
  DESTINATION
    bin/wasm/vad
)


================================================
FILE: wasm/vad/app-vad.js
================================================
// This file copies and modifies code
// from https://mdn.github.io/web-dictaphone/scripts/app.js
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e

const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const clearBtn = document.getElementById('clearBtn');
const soundClips = document.getElementById('sound-clips');

let textArea = document.getElementById('results');

let lastResult = '';
let resultList = [];

clearBtn.onclick = function() {
  resultList = [];
  textArea.value = getDisplayResult();
  textArea.scrollTop = textArea.scrollHeight;  // auto scroll
};

function getDisplayResult() {
  let i = 0;
  let ans = '';
  for (let s in resultList) {
    if (resultList[s] == '') {
      continue;
    }

    if (resultList[s] == 'Speech detected') {
      ans += '' + i + ': ' + resultList[s];
      i += 1;
    } else {
      ans += ', ' + resultList[s] + '\n';
    }
  }

  if (lastResult.length > 0) {
    ans += '' + i + ': ' + lastResult + '\n';
  }
  return ans;
}


Module = {};

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.locateFile = function(path, scriptDirectory = '') {
  console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
  return scriptDirectory + path;
};

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.setStatus = function(status) {
  console.log(`status ${status}`);
  const statusElement = document.getElementById('status');
  if (status == 'Running...') {
    status = 'Model downloaded. Initializing vad...'
  }

  const downloadMatch = status.match(/Downloading data... \((\d+)\/(\d+)\)/);
  if (downloadMatch) {
    const downloaded = BigInt(downloadMatch[1]);
    const total = BigInt(downloadMatch[2]);
    const percent =
        total === 0 ? 0.00 : Number((downloaded * 10000n) / total) / 100;
    const downloadedMB = Number(downloaded) / (1024 * 1024);
    const totalMB = Number(total) / (1024 * 1024);
    status = `Downloading data... ${percent.toFixed(2)}% (${downloadedMB.toFixed(2)} MB/${
        totalMB.toFixed(2)} MB)`;
    console.log(`here ${status}`)
  }

  statusElement.textContent = status;
  if (status === '') {
    statusElement.style.display = 'none';
    // statusElement.parentNode.removeChild(statusElement);

    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.remove('loading');
    });
  } else {
    statusElement.style.display = 'block';
    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.add('loading');
    });
  }
};

Module.onRuntimeInitialized = function() {
  console.log('inited!');

  startBtn.disabled = false;

  initVad();
  console.log('vad is created!', vad);

  buffer = new CircularBuffer(30 * 16000, Module);
  console.log('CircularBuffer is created!', buffer);
};

function fileExists(filename) {
  const filenameLen = Module.lengthBytesUTF8(filename) + 1;
  const buffer = Module._malloc(filenameLen);
  Module.stringToUTF8(filename, buffer, filenameLen);

  let exists = Module._SherpaOnnxFileExists(buffer);

  Module._free(buffer);

  return exists;
}

function initVad() {
  const sileroVad = {
    model: '',
    threshold: 0.50,
    minSilenceDuration: 0.50,
    minSpeechDuration: 0.25,
    maxSpeechDuration: 20,
    windowSize: 512,
  };

  const tenVad = {
    model: '',
    threshold: 0.50,
    minSilenceDuration: 0.50,
    minSpeechDuration: 0.25,
    maxSpeechDuration: 20,
    windowSize: 256,
  };

  let config = {
    sileroVad: sileroVad,
    tenVad: tenVad,
    sampleRate: 16000,
    numThreads: 1,
    provider: 'cpu',
    debug: 1,
    bufferSizeInSeconds: 30,
  };

  if (fileExists('silero_vad.onnx') == 1) {
    config.sileroVad.model = 'silero_vad.onnx'
  } else if (fileExists('ten-vad.onnx') == 1) {
    config.tenVad.model = 'ten-vad.onnx'
  }

  vad = createVad(Module, config);
}

let audioCtx;
let mediaStream;

let expectedSampleRate = 16000;
let recordSampleRate;  // the sampleRate of the microphone
let recorder = null;   // the microphone
let leftchannel = [];  // TODO: Use a single channel

let recordingLength = 0;  // number of samples so far

let vad = null;
let buffer = null;
let printed = false;

if (navigator.mediaDevices.getUserMedia) {
  console.log('getUserMedia supported.');

  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
  const constraints = {audio: true};

  let onSuccess = function(stream) {
    if (!audioCtx) {
      audioCtx = new AudioContext({sampleRate: expectedSampleRate});
    }
    console.log(audioCtx);
    recordSampleRate = audioCtx.sampleRate;
    console.log('sample rate ' + recordSampleRate);

    // creates an audio node from the microphone incoming stream
    mediaStream = audioCtx.createMediaStreamSource(stream);
    console.log('media stream', mediaStream);

    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
    // bufferSize: the onaudioprocess event is called when the buffer is full
    var bufferSize = 4096;
    var numberOfInputChannels = 1;
    var numberOfOutputChannels = 2;
    if (audioCtx.createScriptProcessor) {
      recorder = audioCtx.createScriptProcessor(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    } else {
      recorder = audioCtx.createJavaScriptNode(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    }
    console.log('recorder', recorder);

    recorder.onaudioprocess = function(e) {
      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
      samples = downsampleBuffer(samples, expectedSampleRate);
      buffer.push(samples);
      while (buffer.size() > vad.config.sileroVad.windowSize) {
        const s = buffer.get(buffer.head(), vad.config.sileroVad.windowSize);
        vad.acceptWaveform(s);
        buffer.pop(vad.config.sileroVad.windowSize);

        if (vad.isDetected() && !printed) {
          printed = true;
          lastResult = 'Speech detected';
        }

        if (!vad.isDetected()) {
          printed = false;
          if (lastResult != '') {
            resultList.push(lastResult);
          }
          lastResult = '';
        }

        while (!vad.isEmpty()) {
          const segment = vad.front();
          const duration = segment.samples.length / expectedSampleRate;
          const durationStr = `Duration: ${duration.toFixed(3)} seconds`;
          resultList.push(durationStr);
          vad.pop();

          // now save the segment to a wav file
          let buf = new Int16Array(segment.samples.length);
          for (var i = 0; i < segment.samples.length; ++i) {
            let s = segment.samples[i];
            if (s >= 1)
              s = 1;
            else if (s <= -1)
              s = -1;

            buf[i] = s * 32767;
          }

          let clipName = new Date().toISOString() + '--' + durationStr;

          const clipContainer = document.createElement('article');
          const clipLabel = document.createElement('p');
          const audio = document.createElement('audio');
          const deleteButton = document.createElement('button');

          clipContainer.classList.add('clip');
          audio.setAttribute('controls', '');
          deleteButton.textContent = 'Delete';
          deleteButton.className = 'delete';

          clipLabel.textContent = clipName;

          clipContainer.appendChild(audio);

          clipContainer.appendChild(clipLabel);
          clipContainer.appendChild(deleteButton);
          soundClips.appendChild(clipContainer);

          audio.controls = true;
          const blob = toWav(buf);

          leftchannel = [];
          const audioURL = window.URL.createObjectURL(blob);
          audio.src = audioURL;

          deleteButton.onclick = function(e) {
            let evtTgt = e.target;
            evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
          };

          clipLabel.onclick = function() {
            const existingName = clipLabel.textContent;
            const newClipName = prompt('Enter a new name for your sound clip?');
            if (newClipName === null) {
              clipLabel.textContent = existingName;
            } else {
              clipLabel.textContent = newClipName;
            }
          };
        }
      }

      textArea.value = getDisplayResult();
      textArea.scrollTop = textArea.scrollHeight;  // auto scroll
    };

    startBtn.onclick = function() {
      mediaStream.connect(recorder);
      recorder.connect(audioCtx.destination);

      console.log('recorder started');

      stopBtn.disabled = false;
      startBtn.disabled = true;
    };

    stopBtn.onclick = function() {
      vad.reset();
      buffer.reset();
      console.log('recorder stopped');

      // stopBtn recording
      recorder.disconnect(audioCtx.destination);
      mediaStream.disconnect(recorder);

      startBtn.style.background = '';
      startBtn.style.color = '';
      // mediaRecorder.requestData();

      stopBtn.disabled = true;
      startBtn.disabled = false;
    };
  };

  let onError = function(err) {
    console.log('The following error occurred: ' + err);
  };

  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
  console.log('getUserMedia not supported on your browser!');
  alert('getUserMedia not supported on your browser!');
}


// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function flatten(listOfSamples) {
  let n = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    n += listOfSamples[i].length;
  }
  let ans = new Int16Array(n);

  let offset = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    ans.set(listOfSamples[i], offset);
    offset += listOfSamples[i].length;
  }
  return ans;
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples) {
  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, expectedSampleRate, true);  // sampleRate
  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
  view.setUint16(32, 2, true);                       // blockAlign
  view.setUint16(34, 16, true);                      // bitsPerSample
  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);      // subchunk2Size

  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }

  return new Blob([view], {type: 'audio/wav'});
}

// this function is copied from
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
function downsampleBuffer(buffer, exportSampleRate) {
  if (exportSampleRate === recordSampleRate) {
    return buffer;
  }
  var sampleRateRatio = recordSampleRate / exportSampleRate;
  var newLength = Math.round(buffer.length / sampleRateRatio);
  var result = new Float32Array(newLength);
  var offsetResult = 0;
  var offsetBuffer = 0;
  while (offsetResult < result.length) {
    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
    var accum = 0, count = 0;
    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
      accum += buffer[i];
      count++;
    }
    result[offsetResult] = accum / count;
    offsetResult++;
    offsetBuffer = nextOffsetBuffer;
  }
  return result;
};


================================================
FILE: wasm/vad/assets/README.md
================================================
# Introduction

## Use silero-vad

Please download
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.

You can find example build script at
https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-silero-vad.yaml

```
cd /path/to/sherpa-onnx/wasm/vad/assets
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
```

## Use ten-vad

Please download
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
and put `ten-vad.onnx` into the current directory, i.e., `wasm/vad/assets`.

You can find example build script at
https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-ten-vad.yaml

```
cd /path/to/sherpa-onnx/wasm/vad/assets
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
cd ..
sed -i.bak "s|.*(with <a .*|    (with <a href="https://github.com/TEN-framework/ten-vad">ten-vad</a>)|" ./index.html

```


================================================
FILE: wasm/vad/index.html
================================================
<html lang="en">

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD</title>
  <style>
    h1,div {
      text-align: center;
    }
    textarea {
      width:100%;
    }
    .loading {
      display: none !important;
    }
  </style>
</head>

<body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
  <h1>
    Next-gen Kaldi + WebAssembly<br/>
    VAD Demo using <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
    (with <a href="https://github.com/snakers4/silero-vad">silero-vad</a>)
  </h1>

  <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
    <div id="status">Loading...</div>

    <div id="singleAudioContent" class="tab-content loading">
      <div style="display: flex; gap: 1.5rem;">
        <div style="flex: 1; display: flex; flex-direction: row; align-items: center; gap: 1rem;">
          <button id="startBtn" disabled>Start</button>
          <button id="stopBtn" disabled>Stop</button>
          <button id="clearBtn">Clear</button>
        </div>
      </div>

      <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
          <textarea id="results" rows="10" placeholder="Please click start and speak. Output will appear here..." readonly style="flex: 1; padding: 0.75rem; font-size: 1rem; border: 1px solid #ced4da; border-radius: 8px; resize: none; background-color: #f8f9fa;"></textarea>
      </div>

      <section flex="1" overflow="auto" id="sound-clips">
      </section>
  </div>

  <!-- Footer Section -->
  <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
    <h3>Description</h3>
    <ul>
      <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
      <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
        <ul>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
        </ul>
    </ul>
    <h3>About This Demo</h3>
    <ul>
      <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
      <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
    </ul>
    <h3>Latest Update</h3>
    <ul>
      <li>Update UI.</li>
      <li>First working version.</li>
    </ul>

    <h3>Acknowledgement</h3>
    <ul>
      <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
    </ul>
  </div>

  <script src="sherpa-onnx-vad.js"></script>
  <script src="app-vad.js"></script>
  <script src="sherpa-onnx-wasm-main-vad.js"></script>
</body>


================================================
FILE: wasm/vad/sherpa-onnx-vad.js
================================================
function freeConfig(config, Module) {
  if ('buffer' in config) {
    Module._free(config.buffer);
  }

  if ('sileroVad' in config) {
    freeConfig(config.sileroVad, Module)
  }

  if ('tenVad' in config) {
    freeConfig(config.tenVad, Module)
  }


  Module._free(config.ptr);
}

// The user should free the returned pointers
function initSherpaOnnxSileroVadModelConfig(config, Module) {
  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;

  const n = modelLen;

  const buffer = Module._malloc(n);

  const len = 6 * 4;
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, modelLen);

  let offset = 0;
  Module.setValue(ptr, buffer, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.windowSize || 512, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.maxSpeechDuration || 20, 'float');
  offset += 4;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxTenVadModelConfig(config, Module) {
  const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;

  const n = modelLen;

  const buffer = Module._malloc(n);

  const len = 6 * 4;
  const ptr = Module._malloc(len);

  Module.stringToUTF8(config.model || '', buffer, modelLen);

  let offset = 0;
  Module.setValue(ptr, buffer, 'i8*');
  offset += 4;

  Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float');
  offset += 4;

  Module.setValue(ptr + offset, config.windowSize || 256, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.maxSpeechDuration || 20, 'float');
  offset += 4;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
  };
}

function initSherpaOnnxVadModelConfig(config, Module) {
  if (!('sileroVad' in config)) {
    config.sileroVad = {
      model: '',
      threshold: 0.50,
      minSilenceDuration: 0.50,
      minSpeechDuration: 0.25,
      windowSize: 512,
      maxSpeechDuration: 20,
    };
  }

  if (!('tenVad' in config)) {
    config.tenVad = {
      model: '',
      threshold: 0.50,
      minSilenceDuration: 0.50,
      minSpeechDuration: 0.25,
      windowSize: 256,
      maxSpeechDuration: 20,
    };
  }

  const sileroVad =
      initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module);

  const tenVad = initSherpaOnnxTenVadModelConfig(config.tenVad, Module);

  const len = sileroVad.len + 4 * 4 + tenVad.len;
  const ptr = Module._malloc(len);

  const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
  const buffer = Module._malloc(providerLen);
  Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);

  let offset = 0;
  Module._CopyHeap(sileroVad.ptr, sileroVad.len, ptr + offset);
  offset += sileroVad.len;

  Module.setValue(ptr + offset, config.sampleRate || 16000, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
  offset += 4;

  Module.setValue(ptr + offset, buffer, 'i8*');  // provider
  offset += 4;

  Module.setValue(ptr + offset, config.debug || 0, 'i32');
  offset += 4;

  Module._CopyHeap(tenVad.ptr, tenVad.len, ptr + offset);
  offset += tenVad.len;

  return {
    buffer: buffer,
    ptr: ptr,
    len: len,
    sileroVad: sileroVad,
    tenVad: tenVad
  };
}

function createVad(Module, myConfig) {
  const sileroVad = {
    model: './silero_vad.onnx',
    threshold: 0.50,
    minSilenceDuration: 0.50,
    minSpeechDuration: 0.25,
    maxSpeechDuration: 20,
    windowSize: 512,
  };

  const tenVad = {
    model: '',
    threshold: 0.50,
    minSilenceDuration: 0.50,
    minSpeechDuration: 0.25,
    maxSpeechDuration: 20,
    windowSize: 256,
  };

  let config = {
    sileroVad: sileroVad,
    tenVad: tenVad,
    sampleRate: 16000,
    numThreads: 1,
    provider: 'cpu',
    debug: 1,
    bufferSizeInSeconds: 30,
  };

  if (myConfig) {
    config = myConfig;
  }

  return new Vad(config, Module);
}


class CircularBuffer {
  constructor(capacity, Module) {
    this.handle = Module._SherpaOnnxCreateCircularBuffer(capacity);
    this.Module = Module;
  }

  free() {
    this.Module._SherpaOnnxDestroyCircularBuffer(this.handle);
    this.handle = 0
  }

  /**
   * @param samples {Float32Array}
   */
  push(samples) {
    const pointer =
        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
    this.Module._SherpaOnnxCircularBufferPush(
        this.handle, pointer, samples.length);
    this.Module._free(pointer);
  }

  get(startIndex, n) {
    const p =
        this.Module._SherpaOnnxCircularBufferGet(this.handle, startIndex, n);

    const samplesPtr = p / 4;
    const samples = new Float32Array(n);
    for (let i = 0; i < n; i++) {
      samples[i] = this.Module.HEAPF32[samplesPtr + i];
    }

    this.Module._SherpaOnnxCircularBufferFree(p);

    return samples;
  }

  pop(n) {
    this.Module._SherpaOnnxCircularBufferPop(this.handle, n);
  }

  size() {
    return this.Module._SherpaOnnxCircularBufferSize(this.handle);
  }

  head() {
    return this.Module._SherpaOnnxCircularBufferHead(this.handle);
  }

  reset() {
    this.Module._SherpaOnnxCircularBufferReset(this.handle);
  }
}

class Vad {
  constructor(configObj, Module) {
    this.config = configObj;
    const config = initSherpaOnnxVadModelConfig(configObj, Module);
    const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
        config.ptr, configObj.bufferSizeInSeconds || 30);
    freeConfig(config, Module);

    this.handle = handle;
    this.Module = Module;
  }

  free() {
    this.Module._SherpaOnnxDestroyVoiceActivityDetector(this.handle);
    this.handle = 0
  }

  // samples is a float32 array
  acceptWaveform(samples) {
    const pointer =
        this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
    this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
    this.Module._SherpaOnnxVoiceActivityDetectorAcceptWaveform(
        this.handle, pointer, samples.length);
    this.Module._free(pointer);
  }

  isEmpty() {
    return this.Module._SherpaOnnxVoiceActivityDetectorEmpty(this.handle) == 1;
  }

  isDetected() {
    return this.Module._SherpaOnnxVoiceActivityDetectorDetected(this.handle) ==
        1;
  }

  pop() {
    this.Module._SherpaOnnxVoiceActivityDetectorPop(this.handle);
  }

  clear() {
    this.Module._SherpaOnnxVoiceActivityDetectorClear(this.handle);
  }

  /*
{
  samples: a 1-d float32 array,
  start: an int32
}
   */
  front() {
    const h = this.Module._SherpaOnnxVoiceActivityDetectorFront(this.handle);

    const start = this.Module.HEAP32[h / 4];
    const samplesPtr = this.Module.HEAP32[h / 4 + 1] / 4;
    const numSamples = this.Module.HEAP32[h / 4 + 2];

    const samples = new Float32Array(numSamples);
    for (let i = 0; i < numSamples; i++) {
      samples[i] = this.Module.HEAPF32[samplesPtr + i];
    }

    this.Module._SherpaOnnxDestroySpeechSegment(h);
    return {samples: samples, start: start};
  }

  reset() {
    this.Module._SherpaOnnxVoiceActivityDetectorReset(this.handle);
  }

  flush() {
    this.Module._SherpaOnnxVoiceActivityDetectorFlush(this.handle);
  }
};

if (typeof process == 'object' && typeof process.versions == 'object' &&
    typeof process.versions.node == 'string') {
  module.exports = {
    createVad,
    CircularBuffer,
  };
}


================================================
FILE: wasm/vad/sherpa-onnx-wasm-main-vad.cc
================================================
// wasm/sherpa-onnx-wasm-main-vad.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

// see also
// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html

extern "C" {

static_assert(sizeof(SherpaOnnxSileroVadModelConfig) == 6 * 4, "");
static_assert(sizeof(SherpaOnnxTenVadModelConfig) == 6 * 4, "");

static_assert(sizeof(SherpaOnnxVadModelConfig) ==
                  sizeof(SherpaOnnxSileroVadModelConfig) + 4 * 4 +
                      sizeof(SherpaOnnxTenVadModelConfig),
              "");
void MyPrint(SherpaOnnxVadModelConfig *config) {
  auto silero_vad = &config->silero_vad;
  auto ten_vad = &config->ten_vad;

  fprintf(stdout, "----------silero_vad config----------\n");
  fprintf(stdout, "model: %s\n", silero_vad->model);
  fprintf(stdout, "threshold: %.3f\n", silero_vad->threshold);
  fprintf(stdout, "min_silence_duration: %.3f\n",
          silero_vad->min_silence_duration);
  fprintf(stdout, "min_speech_duration: %.3f\n",
          silero_vad->min_speech_duration);
  fprintf(stdout, "window_size: %d\n", silero_vad->window_size);
  fprintf(stdout, "max_speech_duration: %.3f\n",
          silero_vad->max_speech_duration);

  fprintf(stdout, "----------ten_vad config----------\n");
  fprintf(stdout, "model: %s\n", ten_vad->model);
  fprintf(stdout, "threshold: %.3f\n", ten_vad->threshold);
  fprintf(stdout, "min_silence_duration: %.3f\n",
          ten_vad->min_silence_duration);
  fprintf(stdout, "min_speech_duration: %.3f\n", ten_vad->min_speech_duration);
  fprintf(stdout, "window_size: %d\n", ten_vad->window_size);
  fprintf(stdout, "max_speech_duration: %.3f\n", ten_vad->max_speech_duration);

  fprintf(stdout, "----------config----------\n");

  fprintf(stdout, "sample_rate: %d\n", config->sample_rate);
  fprintf(stdout, "num_threads: %d\n", config->num_threads);

  fprintf(stdout, "provider: %s\n", config->provider);
  fprintf(stdout, "debug: %d\n", config->debug);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}


================================================
FILE: wasm/vad-asr/CMakeLists.txt
================================================
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD")
endif()

if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx" OR NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/tokens.txt")
  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
endif()

set(exported_functions
  # VAD
  SherpaOnnxCreateCircularBuffer
  SherpaOnnxDestroyCircularBuffer
  SherpaOnnxCircularBufferPush
  SherpaOnnxCircularBufferGet
  SherpaOnnxCircularBufferFree
  SherpaOnnxCircularBufferPop
  SherpaOnnxCircularBufferSize
  SherpaOnnxCircularBufferHead
  SherpaOnnxCircularBufferReset
  SherpaOnnxCreateVoiceActivityDetector
  SherpaOnnxDestroyVoiceActivityDetector
  SherpaOnnxVoiceActivityDetectorAcceptWaveform
  SherpaOnnxVoiceActivityDetectorEmpty
  SherpaOnnxVoiceActivityDetectorDetected
  SherpaOnnxVoiceActivityDetectorPop
  SherpaOnnxVoiceActivityDetectorClear
  SherpaOnnxVoiceActivityDetectorFront
  SherpaOnnxDestroySpeechSegment
  SherpaOnnxVoiceActivityDetectorReset
  SherpaOnnxVoiceActivityDetectorFlush
  # non-streaming ASR
  SherpaOnnxAcceptWaveformOffline
  SherpaOnnxCreateOfflineRecognizer
  SherpaOnnxCreateOfflineStream
  SherpaOnnxDecodeMultipleOfflineStreams
  SherpaOnnxDecodeOfflineStream
  SherpaOnnxDestroyOfflineRecognizer
  SherpaOnnxDestroyOfflineRecognizerResult
  SherpaOnnxDestroyOfflineStream
  SherpaOnnxDestroyOfflineStreamResultJson
  SherpaOnnxGetOfflineStreamResult
  SherpaOnnxGetOfflineStreamResultAsJson
  #
  SherpaOnnxFileExists
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
  list(APPEND mangled_exported_functions "_${x}")
endforeach()
list(JOIN mangled_exported_functions "," all_exported_functions)

include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','HEAPU8','HEAP16','HEAP32','HEAPU32','HEAPF32','HEAPF64'] ")

message(STATUS "MY_FLAGS: ${MY_FLAGS}")

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTABLE_LINKER_FLAGS "${CMAKE_EXECUTABLE_LINKER_FLAGS} ${MY_FLAGS}")

if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  message(FATAL_ERROR "The default suffix for building executables should be .js!")
endif()
# set(CMAKE_EXECUTABLE_SUFFIX ".html")

add_executable(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-wasm-main-vad-asr.cc)
target_link_libraries(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-main-vad-asr DESTINATION bin/wasm/vad-asr)

install(
  FILES
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.js"
    "index.html"
    "app-vad-asr.js"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.wasm"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.data"
  DESTINATION
    bin/wasm/vad-asr
)


================================================
FILE: wasm/vad-asr/app-vad-asr.js
================================================
// This file copies and modifies code
// from https://mdn.github.io/web-dictaphone/scripts/app.js
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e

const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const clearBtn = document.getElementById('clearBtn');
const soundClips = document.getElementById('sound-clips');

let textArea = document.getElementById('results');

let lastResult = '';
let resultList = [];

clearBtn.onclick = function() {
  resultList = [];
  textArea.value = getDisplayResult();
  textArea.scrollTop = textArea.scrollHeight;  // auto scroll
};

function getDisplayResult() {
  let i = 0;
  let ans = '';
  for (let s in resultList) {
    if (resultList[s] == '') {
      continue;
    }

    if (resultList[s] == 'Speech detected') {
      ans += '' + i + ': ' + resultList[s];
      i += 1;
    } else {
      ans += ', ' + resultList[s] + '\n';
    }
  }

  if (lastResult.length > 0) {
    ans += '' + i + ': ' + lastResult + '\n';
  }
  return ans;
}

Module = {};

let audioCtx;
let mediaStream;

let expectedSampleRate = 16000;
let recordSampleRate;  // the sampleRate of the microphone
let recorder = null;   // the microphone
let leftchannel = [];  // TODO: Use a single channel

let recordingLength = 0;  // number of samples so far

let vad = null;
let buffer = null;
let recognizer = null;
let printed = false;

function fileExists(filename) {
  const filenameLen = Module.lengthBytesUTF8(filename) + 1;
  const buffer = Module._malloc(filenameLen);
  Module.stringToUTF8(filename, buffer, filenameLen);

  let exists = Module._SherpaOnnxFileExists(buffer);

  Module._free(buffer);

  return exists;
}

function initOfflineRecognizer() {
  let config = {
    modelConfig: {
      debug: 1,
      tokens: './tokens.txt',
    },
  };
  if (fileExists('sense-voice.onnx') == 1) {
    config.modelConfig.senseVoice = {
      model: './sense-voice.onnx',
      useInverseTextNormalization: 1,
    };
  } else if (fileExists('whisper-encoder.onnx')) {
    config.modelConfig.whisper = {
      encoder: './whisper-encoder.onnx',
      decoder: './whisper-decoder.onnx',
    };
  } else if (fileExists('transducer-encoder.onnx')) {
    config.modelConfig.transducer = {
      encoder: './transducer-encoder.onnx',
      decoder: './transducer-decoder.onnx',
      joiner: './transducer-joiner.onnx',
    };
    config.modelConfig.modelType = 'transducer';
  } else if (fileExists('nemo-transducer-encoder.onnx')) {
    config.modelConfig.transducer = {
      encoder: './nemo-transducer-encoder.onnx',
      decoder: './nemo-transducer-decoder.onnx',
      joiner: './nemo-transducer-joiner.onnx',
    };
    config.modelConfig.modelType = 'nemo_transducer';
  } else if (fileExists('paraformer.onnx')) {
    config.modelConfig.paraformer = {
      model: './paraformer.onnx',
    };
  } else if (fileExists('telespeech.onnx')) {
    config.modelConfig.telespeechCtc = './telespeech.onnx';
  } else if (fileExists('moonshine-preprocessor.onnx')) {
    // moonshine v1
    config.modelConfig.moonshine = {
      preprocessor: './moonshine-preprocessor.onnx',
      encoder: './moonshine-encoder.onnx',
      uncachedDecoder: './moonshine-uncached-decoder.onnx',
      cachedDecoder: './moonshine-cached-decoder.onnx'
    };
  } else if (fileExists('moonshine-merged-decoder.ort')) {
    // moonshine v2
    config.modelConfig.moonshine = {
      encoder: './moonshine-encoder.ort',
      mergedDecoder: './moonshine-merged-decoder.ort'
    };
  } else if (fileExists('dolphin.onnx')) {
    config.modelConfig.dolphin = {model: './dolphin.onnx'};
  } else if (fileExists('zipformer-ctc.onnx')) {
    // you need to rename model.int8.onnx from zipformer CTC to
    // zipformer-ctc.onnx
    config.modelConfig.zipformerCtc = {model: './zipformer-ctc.onnx'};
  } else {
    console.log('Please specify a model.');
    alert('Please specify a model.');
  }

  recognizer = new OfflineRecognizer(config, Module);
}

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.locateFile = function(path, scriptDirectory = '') {
  console.log(`path: ${path}, scriptDirectory: ${scriptDirectory}`);
  return scriptDirectory + path;
};

// https://emscripten.org/docs/api_reference/module.html#Module.locateFile
Module.setStatus = function(status) {
  console.log(`status ${status}`);
  const statusElement = document.getElementById('status');
  if (status == 'Running...') {
    status = 'Model downloaded. Initializing recognizer...'
  }

  const downloadMatch = status.match(/Downloading data... \((\d+)\/(\d+)\)/);
  if (downloadMatch) {
    const downloaded = BigInt(downloadMatch[1]);
    const total = BigInt(downloadMatch[2]);
    const percent =
        total === 0 ? 0.00 : Number((downloaded * 10000n) / total) / 100;
    const downloadedMB = Number(downloaded) / (1024 * 1024);
    const totalMB = Number(total) / (1024 * 1024);
    status = `Downloading data... ${percent.toFixed(2)}% (${downloadedMB.toFixed(2)} MB/${
        totalMB.toFixed(2)} MB)`;
    console.log(`here ${status}`)
  }

  statusElement.textContent = status;
  if (status === '') {
    statusElement.style.display = 'none';
    // statusElement.parentNode.removeChild(statusElement);

    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.remove('loading');
    });
  } else {
    statusElement.style.display = 'block';
    document.querySelectorAll('.tab-content').forEach((tabContentElement) => {
      tabContentElement.classList.add('loading');
    });
  }
};

Module.onRuntimeInitialized = function() {
  console.log('inited!');

  startBtn.disabled = false;

  vad = createVad(Module);
  console.log('vad is created!', vad);

  buffer = new CircularBuffer(30 * 16000, Module);
  console.log('CircularBuffer is created!', buffer);

  initOfflineRecognizer();
};

if (navigator.mediaDevices.getUserMedia) {
  console.log('getUserMedia supported.');

  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
  const constraints = {audio: true};

  let onSuccess = function(stream) {
    if (!audioCtx) {
      audioCtx = new AudioContext({sampleRate: expectedSampleRate});
    }
    console.log(audioCtx);
    recordSampleRate = audioCtx.sampleRate;
    console.log('sample rate ' + recordSampleRate);

    // creates an audio node from the microphone incoming stream
    mediaStream = audioCtx.createMediaStreamSource(stream);
    console.log('media stream', mediaStream);

    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
    // bufferSize: the onaudioprocess event is called when the buffer is full
    var bufferSize = 4096;
    var numberOfInputChannels = 1;
    var numberOfOutputChannels = 2;
    if (audioCtx.createScriptProcessor) {
      recorder = audioCtx.createScriptProcessor(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    } else {
      recorder = audioCtx.createJavaScriptNode(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    }
    console.log('recorder', recorder);

    recorder.onaudioprocess = function(e) {
      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
      samples = downsampleBuffer(samples, expectedSampleRate);
      buffer.push(samples);
      while (buffer.size() > vad.config.sileroVad.windowSize) {
        const s = buffer.get(buffer.head(), vad.config.sileroVad.windowSize);
        vad.acceptWaveform(s);
        buffer.pop(vad.config.sileroVad.windowSize);

        if (vad.isDetected() && !printed) {
          printed = true;
          lastResult = 'Speech detected';
        }

        if (!vad.isDetected()) {
          printed = false;
          if (lastResult != '') {
            resultList.push(lastResult);
          }
          lastResult = '';
        }

        while (!vad.isEmpty()) {
          const segment = vad.front();
          const duration = segment.samples.length / expectedSampleRate;
          let durationStr = `Duration: ${duration.toFixed(3)} seconds`;
          vad.pop();

          // non-streaming asr
          const stream = recognizer.createStream();
          stream.acceptWaveform(expectedSampleRate, segment.samples);
          recognizer.decode(stream);
          let recognitionResult = recognizer.getResult(stream);
          console.log(recognitionResult);
          let text = recognitionResult.text;
          stream.free();
          console.log(text);

          if (text != '') {
            durationStr += `. Result: ${text}`;
          }

          resultList.push(durationStr);

          // now save the segment to a wav file
          let buf = new Int16Array(segment.samples.length);
          for (var i = 0; i < segment.samples.length; ++i) {
            let s = segment.samples[i];
            if (s >= 1)
              s = 1;
            else if (s <= -1)
              s = -1;

            buf[i] = s * 32767;
          }

          let clipName = new Date().toISOString() + '--' + durationStr;

          const clipContainer = document.createElement('article');
          const clipLabel = document.createElement('p');
          const audio = document.createElement('audio');
          const deleteButton = document.createElement('button');

          clipContainer.classList.add('clip');
          audio.setAttribute('controls', '');
          deleteButton.textContent = 'Delete';
          deleteButton.className = 'delete';

          clipLabel.textContent = clipName;

          clipContainer.appendChild(audio);

          clipContainer.appendChild(clipLabel);
          clipContainer.appendChild(deleteButton);
          soundClips.appendChild(clipContainer);

          audio.controls = true;
          const blob = toWav(buf);

          leftchannel = [];
          const audioURL = window.URL.createObjectURL(blob);
          audio.src = audioURL;

          deleteButton.onclick = function(e) {
            let evtTgt = e.target;
            evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
          };

          clipLabel.onclick = function() {
            const existingName = clipLabel.textContent;
            const newClipName = prompt('Enter a new name for your sound clip?');
            if (newClipName === null) {
              clipLabel.textContent = existingName;
            } else {
              clipLabel.textContent = newClipName;
            }
          };
        }
      }

      textArea.value = getDisplayResult();
      textArea.scrollTop = textArea.scrollHeight;  // auto scroll
    };

    startBtn.onclick = function() {
      mediaStream.connect(recorder);
      recorder.connect(audioCtx.destination);

      console.log('recorder started');

      stopBtn.disabled = false;
      startBtn.disabled = true;
    };

    stopBtn.onclick = function() {
      vad.reset();
      buffer.reset();
      console.log('recorder stopped');

      // stopBtn recording
      recorder.disconnect(audioCtx.destination);
      mediaStream.disconnect(recorder);

      startBtn.style.background = '';
      startBtn.style.color = '';
      // mediaRecorder.requestData();

      stopBtn.disabled = true;
      startBtn.disabled = false;
    };
  };

  let onError = function(err) {
    console.log('The following error occurred: ' + err);
  };

  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
  console.log('getUserMedia not supported on your browser!');
  alert('getUserMedia not supported on your browser!');
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function flatten(listOfSamples) {
  let n = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    n += listOfSamples[i].length;
  }
  let ans = new Int16Array(n);

  let offset = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    ans.set(listOfSamples[i], offset);
    offset += listOfSamples[i].length;
  }
  return ans;
}

// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples) {
  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);

  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, expectedSampleRate, true);  // sampleRate
  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
  view.setUint16(32, 2, true);                       // blockAlign
  view.setUint16(34, 16, true);                      // bitsPerSample
  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);      // subchunk2Size

  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }

  return new Blob([view], {type: 'audio/wav'});
}

// this function is copied from
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
function downsampleBuffer(buffer, exportSampleRate) {
  if (exportSampleRate === recordSampleRate) {
    return buffer;
  }
  var sampleRateRatio = recordSampleRate / exportSampleRate;
  var newLength = Math.round(buffer.length / sampleRateRatio);
  var result = new Float32Array(newLength);
  var offsetResult = 0;
  var offsetBuffer = 0;
  while (offsetResult < result.length) {
    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
    var accum = 0, count = 0;
    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
      accum += buffer[i];
      count++;
    }
    result[offsetResult] = accum / count;
    offsetResult++;
    offsetBuffer = nextOffsetBuffer;
  }
  return result;
};


================================================
FILE: wasm/vad-asr/assets/README.md
================================================
# Introduction

## Download VAD models

Please download
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.

## Download non-streaming ASR models

Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
to download a non-streaming ASR model, i.e., an offline ASR model.

After downloading, you should rename the model files.

Please refer to
https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-a-speech-recognition-model
for how to rename.

The renamed file shoud put in current folder( ```/<repo>/wasm/vad-asr/assets```)
Example after download sense-voice model
```
tree ~/work/github/sherpa-onnx/wasm/vad-asr/assets
/home/gxy/work/github/sherpa-onnx/wasm/vad-asr/assets
├── README.md
├── sense-voice.onnx
├── silero_vad.onnx
└── tokens.txt

1 directory, 4 files
```


You can find example build scripts at the following address:

  https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-vad-asr.yaml


================================================
FILE: wasm/vad-asr/index.html
================================================
<html lang="en">

<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD + ASR</title>
  <style>
    h1,div {
      text-align: center;
    }
    textarea {
      width:100%;
    }
    .loading {
      display: none !important;
    }
  </style>
</head>

<body style="font-family: 'Source Sans Pro', sans-serif; background-color: #f9fafb; color: #333; display: flex; flex-direction: column; align-items: center; height: 100vh; margin: 0;">
  <h1>
    Next-gen Kaldi + WebAssembly<br/>
    VAD+ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
    (with Zipformer)
  </h1>

  <div style="width: 100%; max-width: 900px; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); flex: 1;">
    <div id="status">Loading...</div>

    <div id="singleAudioContent" class="tab-content loading">
      <div style="display: flex; gap: 1.5rem;">
        <div style="flex: 1; display: flex; flex-direction: row; align-items: center; gap: 1rem;">
          <button id="startBtn" disabled>Start</button>
          <button id="stopBtn" disabled>Stop</button>
          <button id="clearBtn">Clear</button>
        </div>
      </div>

      <div style="flex: 1; display: flex; flex-direction: column; gap: 1rem;">
          <div style="font-size: 1rem; font-weight: bold; padding: 0.5rem 1rem; background-color: #f8f9fa; border-radius: 8px; color: #6c757d;">Transcript</div>
          <textarea id="results" rows="10" placeholder="Output will appear here..." readonly style="flex: 1; padding: 0.75rem; font-size: 1rem; border: 1px solid #ced4da; border-radius: 8px; resize: none; background-color: #f8f9fa;"></textarea>
      </div>

      <section flex="1" overflow="auto" id="sound-clips">
      </section>
  </div>

  <!-- Footer Section -->
  <div style="width: 100%; max-width: 900px; margin-top: 1.5rem; background: #fff; padding: 1.5rem; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); text-align: left; font-size: 0.9rem; color: #6c757d;">
    <h3>Description</h3>
    <ul>
      <li>Everything is <strong>open-sourced.</strong> <a href="https://github.com/k2-fsa/sherpa-onnx">code</a></li>
      <li>If you have any issues, please either <a href="https://github.com/k2-fsa/sherpa-onnx/issues">file a ticket</a> or contact us via</li>
        <ul>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#wechat">WeChat group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#qq">QQ group</a></li>
          <li><a href="https://k2-fsa.github.io/sherpa/social-groups.html#bilibili-b">Bilibili</a></li>
        </ul>
    </ul>
    <h3>About This Demo</h3>
    <ul>
      <li><strong>Private and Secure:</strong> All processing is done locally on your device (CPU) within your browser with a single thread. No server is involved, ensuring privacy and security. You can disconnect from the Internet once this page is loaded.</li>
      <li><strong>Efficient Resource Usage:</strong> No GPU is required, leaving system resources available for webLLM analysis.</li>
    </ul>
    <h3>Latest Update</h3>
    <ul>
      <li>Update UI.</li>
      <li>First working version.</li>
    </ul>

    <h3>Acknowledgement</h3>
    <ul>
      <li>We refer to <a href="https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm">https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Wasm</a> for the UI part.</li>
    </ul>
  </div>

  <script src="sherpa-onnx-asr.js"></script>
  <script src="sherpa-onnx-vad.js"></script>
  <script src="app-vad-asr.js"></script>
  <script src="sherpa-onnx-wasm-main-vad-asr.js"></script>
</body>


================================================
FILE: wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc
================================================
// wasm/sherpa-onnx-wasm-main-vad-asr.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

// see also
// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html

extern "C" {

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}